def test_neuroquery_model(): x, y, voc = _dataset_and_voc() vect = tokenization.TextVectorizer.from_vocabulary(voc) reg = smoothed_regression.SmoothedRegression(n_components=10).fit(x, y) encoder = encoding.NeuroQueryModel(vect, reg, mask_img=_mask_img(y.shape[1])) text = "feature0 and feature8 compared to feature73" res = encoder(text) simil = res["similar_words"] assert simil.loc["feature0"]["similarity"] != 0 assert simil.loc["feature0"]["weight_in_brain_map"] != 0 assert simil.loc["feature0"]["weight_in_query"] != 0 assert simil.loc["feature8"]["weight_in_query"] != 0 assert simil.loc["feature8"]["similarity"] != 0 assert simil.loc["feature8"]["weight_in_brain_map"] == pytest.approx(0) assert simil.loc["feature18"]["weight_in_brain_map"] == pytest.approx(0) assert simil.loc["feature18"]["weight_in_query"] == pytest.approx(0) assert res["similar_documents"] is None with tempfile.TemporaryDirectory() as tmp_dir: encoder.to_data_dir(tmp_dir) loaded = encoding.NeuroQueryModel.from_data_dir(tmp_dir) assert not loaded.vectorizer.add_unigrams encoded = loaded(text)["brain_map"].get_data() assert np.allclose(encoded, res["brain_map"].get_data()) assert res["z_map"] is res["brain_map"] n_docs = 4 tfidf = np.zeros((n_docs, x.shape[1])) tfidf[:n_docs, :n_docs] = np.eye(n_docs) metadata = pd.DataFrame.from_dict({"id": np.arange(n_docs)}) encoder = encoding.NeuroQueryModel( vect, reg, mask_img=_mask_img(y.shape[1]), corpus_info={ "tfidf": tfidf, "metadata": metadata }, ) for i in range(n_docs): res = encoder(encoder.full_vocabulary()[i]) assert res["similar_documents"]["id"][0] == i assert res["similar_words"]["n_documents"][0] == 1 with tempfile.TemporaryDirectory() as tmp_dir: encoder.to_data_dir(tmp_dir) loaded = encoding.NeuroQueryModel.from_data_dir(tmp_dir) assert not loaded.vectorizer.add_unigrams for i in range(n_docs): res = encoder(encoder.full_vocabulary()[i]) assert res["similar_documents"].id[0] == i
def test_z_maps(): rng = np.random.RandomState(0) X = rng.binomial(3, 0.3, size=(21, 9)).astype("float64") Y = rng.randn(21, 11) reg = smoothed_regression.SmoothedRegression(n_components=5).fit(X, Y) z = reg.transform_to_z_maps(X) assert z.shape == Y.shape with tempfile.TemporaryDirectory() as tmp_dir: reg.to_data_dir(tmp_dir) loaded = smoothed_regression.SmoothedRegression.from_data_dir(tmp_dir) assert np.allclose(loaded.transform_to_z_maps(X), reg.transform_to_z_maps(X))
def test_predictions(): x, y = datasets.make_regression( n_samples=102, n_informative=5, n_features=91, n_targets=117, effective_rank=9, noise=0.5, shuffle=False, random_state=0, ) x -= x.min() - 1 reg = smoothed_regression.SmoothedRegression(n_components=5, smoothing_weight=1e-3) print(reg.fit(x, y).predict(x)) sk_reg = RidgeCV() score = cross_val_score(reg, x, y, cv=5) sk_score = cross_val_score(sk_reg, x, y, cv=5) assert score.mean() > sk_score.mean()
def test_text_to_brain(): x, y, voc = _dataset_and_voc() vect = tokenization.TextVectorizer.from_vocabulary(voc) reg = smoothed_regression.SmoothedRegression(n_components=10).fit(x, y) encoder = text_to_brain.TextToBrain(vect, reg, mask_img=_mask_img(y.shape[1])) text = "feature0 and feature8 but not feature73" res = encoder(text) simil = res["similar_words"] assert simil.loc["feature0"]["similarity"] != 0 assert simil.loc["feature0"]["weight_in_brain_map"] != 0 assert simil.loc["feature0"]["weight_in_query"] != 0 assert simil.loc["feature8"]["weight_in_query"] != 0 assert simil.loc["feature8"]["similarity"] != 0 assert simil.loc["feature8"]["weight_in_brain_map"] == pytest.approx(0) assert simil.loc["feature18"]["weight_in_brain_map"] == pytest.approx(0) assert simil.loc["feature18"]["weight_in_query"] == pytest.approx(0) with tempfile.TemporaryDirectory() as tmp_dir: encoder.to_data_dir(tmp_dir) loaded = text_to_brain.TextToBrain.from_data_dir(tmp_dir) encoded = loaded(text)["z_map"].get_data() assert np.allclose(encoded, res["z_map"].get_data())