def test_neuroquery_model():
    x, y, voc = _dataset_and_voc()
    vect = tokenization.TextVectorizer.from_vocabulary(voc)
    reg = smoothed_regression.SmoothedRegression(n_components=10).fit(x, y)
    encoder = encoding.NeuroQueryModel(vect,
                                       reg,
                                       mask_img=_mask_img(y.shape[1]))
    text = "feature0 and feature8 compared to feature73"
    res = encoder(text)
    simil = res["similar_words"]
    assert simil.loc["feature0"]["similarity"] != 0
    assert simil.loc["feature0"]["weight_in_brain_map"] != 0
    assert simil.loc["feature0"]["weight_in_query"] != 0
    assert simil.loc["feature8"]["weight_in_query"] != 0
    assert simil.loc["feature8"]["similarity"] != 0
    assert simil.loc["feature8"]["weight_in_brain_map"] == pytest.approx(0)
    assert simil.loc["feature18"]["weight_in_brain_map"] == pytest.approx(0)
    assert simil.loc["feature18"]["weight_in_query"] == pytest.approx(0)
    assert res["similar_documents"] is None
    with tempfile.TemporaryDirectory() as tmp_dir:
        encoder.to_data_dir(tmp_dir)
        loaded = encoding.NeuroQueryModel.from_data_dir(tmp_dir)
        assert not loaded.vectorizer.add_unigrams
    encoded = loaded(text)["brain_map"].get_data()
    assert np.allclose(encoded, res["brain_map"].get_data())
    assert res["z_map"] is res["brain_map"]

    n_docs = 4
    tfidf = np.zeros((n_docs, x.shape[1]))
    tfidf[:n_docs, :n_docs] = np.eye(n_docs)
    metadata = pd.DataFrame.from_dict({"id": np.arange(n_docs)})
    encoder = encoding.NeuroQueryModel(
        vect,
        reg,
        mask_img=_mask_img(y.shape[1]),
        corpus_info={
            "tfidf": tfidf,
            "metadata": metadata
        },
    )
    for i in range(n_docs):
        res = encoder(encoder.full_vocabulary()[i])
        assert res["similar_documents"]["id"][0] == i
        assert res["similar_words"]["n_documents"][0] == 1

    with tempfile.TemporaryDirectory() as tmp_dir:
        encoder.to_data_dir(tmp_dir)
        loaded = encoding.NeuroQueryModel.from_data_dir(tmp_dir)
        assert not loaded.vectorizer.add_unigrams

    for i in range(n_docs):
        res = encoder(encoder.full_vocabulary()[i])
        assert res["similar_documents"].id[0] == i
def test_z_maps():
    rng = np.random.RandomState(0)
    X = rng.binomial(3, 0.3, size=(21, 9)).astype("float64")
    Y = rng.randn(21, 11)
    reg = smoothed_regression.SmoothedRegression(n_components=5).fit(X, Y)
    z = reg.transform_to_z_maps(X)
    assert z.shape == Y.shape
    with tempfile.TemporaryDirectory() as tmp_dir:
        reg.to_data_dir(tmp_dir)
        loaded = smoothed_regression.SmoothedRegression.from_data_dir(tmp_dir)
        assert np.allclose(loaded.transform_to_z_maps(X),
                           reg.transform_to_z_maps(X))
def test_predictions():
    x, y = datasets.make_regression(
        n_samples=102,
        n_informative=5,
        n_features=91,
        n_targets=117,
        effective_rank=9,
        noise=0.5,
        shuffle=False,
        random_state=0,
    )
    x -= x.min() - 1
    reg = smoothed_regression.SmoothedRegression(n_components=5,
                                                 smoothing_weight=1e-3)
    print(reg.fit(x, y).predict(x))
    sk_reg = RidgeCV()
    score = cross_val_score(reg, x, y, cv=5)
    sk_score = cross_val_score(sk_reg, x, y, cv=5)
    assert score.mean() > sk_score.mean()
Beispiel #4
0
def test_text_to_brain():
    x, y, voc = _dataset_and_voc()
    vect = tokenization.TextVectorizer.from_vocabulary(voc)
    reg = smoothed_regression.SmoothedRegression(n_components=10).fit(x, y)
    encoder = text_to_brain.TextToBrain(vect,
                                        reg,
                                        mask_img=_mask_img(y.shape[1]))
    text = "feature0 and feature8 but not feature73"
    res = encoder(text)
    simil = res["similar_words"]
    assert simil.loc["feature0"]["similarity"] != 0
    assert simil.loc["feature0"]["weight_in_brain_map"] != 0
    assert simil.loc["feature0"]["weight_in_query"] != 0
    assert simil.loc["feature8"]["weight_in_query"] != 0
    assert simil.loc["feature8"]["similarity"] != 0
    assert simil.loc["feature8"]["weight_in_brain_map"] == pytest.approx(0)
    assert simil.loc["feature18"]["weight_in_brain_map"] == pytest.approx(0)
    assert simil.loc["feature18"]["weight_in_query"] == pytest.approx(0)
    with tempfile.TemporaryDirectory() as tmp_dir:
        encoder.to_data_dir(tmp_dir)
        loaded = text_to_brain.TextToBrain.from_data_dir(tmp_dir)
        encoded = loaded(text)["z_map"].get_data()
        assert np.allclose(encoded, res["z_map"].get_data())