def test_sim_mat(self): def sim_func(a, b): return a * b items = [1, 2, 3, 4] expected = np.array([[1., 2., 3., 4.], [2., 1., 6., 8.], [3., 6., 1., 12.], [4., 8., 12., 1.]]) sim_mat = util.build_sim_mat(items, sim_func) np.testing.assert_array_equal(sim_mat, expected)
def test_sim_mat(self): def sim_func(a, b): return a * b items = [1,2,3,4] expected = np.array([ [1., 2., 3., 4.], [2., 1., 6., 8.], [3., 6., 1., 12.], [4., 8., 12., 1.] ]) sim_mat = util.build_sim_mat(items, sim_func) np.testing.assert_array_equal(sim_mat, expected)
def __call__(self, docs, token_docs): all_terms = set([t for toks in token_docs for t in toks]) bg_docs = [self.fetch_wikipage(t) for t in all_terms] # Filter out empty docs (will mess up cosine similarity) bg_docs = [bg for bg in bg_docs if bg] n_docs = len(docs) all_docs = docs + bg_docs vectr = self.vectorizer() vecs = vectr.vectorize(all_docs).todense() doc_vecs = vecs[:n_docs] # target doc vecs bg_vecs = vecs[n_docs:] # background doc vecs # Bridging space representation of the docs doc_vecs = cdist(doc_vecs, bg_vecs, metric='cosine') return build_sim_mat(doc_vecs, self.compute_bridge_similarity)
def __call__(self, token_docs, entities): pdocs = [] for i, (tks, ents) in enumerate(zip(token_docs, entities)): pdocs.append(Document(i, ents, tks)) return build_sim_mat(pdocs, self.similarity)