Esempio n. 1
0
    def test_sim_mat(self):
        def sim_func(a, b):
            return a * b

        items = [1, 2, 3, 4]
        expected = np.array([[1., 2., 3., 4.], [2., 1., 6., 8.],
                             [3., 6., 1., 12.], [4., 8., 12., 1.]])

        sim_mat = util.build_sim_mat(items, sim_func)
        np.testing.assert_array_equal(sim_mat, expected)
Esempio n. 2
0
    def test_sim_mat(self):
        def sim_func(a, b):
            return a * b

        items = [1,2,3,4]
        expected = np.array([
            [1., 2., 3., 4.],
            [2., 1., 6., 8.],
            [3., 6., 1., 12.],
            [4., 8., 12., 1.]
        ])

        sim_mat = util.build_sim_mat(items, sim_func)
        np.testing.assert_array_equal(sim_mat, expected)
Esempio n. 3
0
    def __call__(self, docs, token_docs):
        all_terms = set([t for toks in token_docs for t in toks])
        bg_docs = [self.fetch_wikipage(t) for t in all_terms]

        # Filter out empty docs (will mess up cosine similarity)
        bg_docs = [bg for bg in bg_docs if bg]

        n_docs = len(docs)
        all_docs = docs + bg_docs

        vectr = self.vectorizer()
        vecs = vectr.vectorize(all_docs).todense()

        doc_vecs = vecs[:n_docs] # target doc vecs
        bg_vecs = vecs[n_docs:] # background doc vecs

        # Bridging space representation of the docs
        doc_vecs = cdist(doc_vecs, bg_vecs, metric='cosine')

        return build_sim_mat(doc_vecs, self.compute_bridge_similarity)
Esempio n. 4
0
    def __call__(self, docs, token_docs):
        all_terms = set([t for toks in token_docs for t in toks])
        bg_docs = [self.fetch_wikipage(t) for t in all_terms]

        # Filter out empty docs (will mess up cosine similarity)
        bg_docs = [bg for bg in bg_docs if bg]

        n_docs = len(docs)
        all_docs = docs + bg_docs

        vectr = self.vectorizer()
        vecs = vectr.vectorize(all_docs).todense()

        doc_vecs = vecs[:n_docs]  # target doc vecs
        bg_vecs = vecs[n_docs:]  # background doc vecs

        # Bridging space representation of the docs
        doc_vecs = cdist(doc_vecs, bg_vecs, metric='cosine')

        return build_sim_mat(doc_vecs, self.compute_bridge_similarity)
Esempio n. 5
0
    def __call__(self, token_docs, entities):
        pdocs = []
        for i, (tks, ents) in enumerate(zip(token_docs, entities)):
            pdocs.append(Document(i, ents, tks))

        return build_sim_mat(pdocs, self.similarity)