def post(network_data: dict) -> Response:
    """
    The POST method for the vector network REST API. It provides sentences whose content is similar to a given word.
    """
    vnf: VectorNetworkForm = VectorNetworkForm.from_dict(network_data)
    nearest_neighbor_count = vnf.nearest_neighbor_count if vnf.nearest_neighbor_count else 10
    w2v: Word2Vec = Word2Vec.load(Config.PANEGYRICI_LATINI_MODEL_PATH)
    search_regex: Pattern[str] = re.compile(vnf.search_regex)
    keys: List[str] = [x for x in w2v.wv.vocab if search_regex.match(x)]
    relevant_vectors: List[ndarray] = [w2v.wv.get_vector(x) for x in keys]
    target_vector: ndarray = sum(relevant_vectors) / len(relevant_vectors)
    sentences: List[str] = open(Config.PANEGYRICI_LATINI_TEXT_PATH).readlines()
    sentence_vectors: Dict[int, ndarray] = {}
    for i in range(len(sentences)):
        toks: List[str] = sentences[i][:-1].split()
        if toks:
            vecs: List[ndarray] = []
            for tok in toks:
                vector: ndarray = w2v.wv.get_vector(tok)
                vecs.append(vector)
            sentence_vectors[i] = sum(vecs) / len(vecs)
    sims: List[Tuple[int, ndarray]] = []
    for key in sentence_vectors.keys():
        sims.append((key,
                     dot(matutils.unitvec(target_vector),
                         matutils.unitvec(sentence_vectors[key]))))
    sims.sort(key=lambda x: x[1], reverse=True)
    sims = sims[:nearest_neighbor_count]
    return NetworkService.make_json_response(
        [sentences[x[0]].split() for x in sims])
Example #2
0
def cosine_similarity(vec1: numpy.ndarray, vec2: numpy.ndarray) -> float:
    norm1 = norm(vec1)
    norm2 = norm(vec2)

    if norm1 == 0.0 or norm2 == 0.0:
        return 0.0
    return dot(vec1, vec2) / (norm1 * norm2)
Example #3
0
    def test_dot_2args(self):
        from numpy.core.multiarray import dot

        a = np.array([[1, 2], [3, 4]], dtype=float)
        b = np.array([[1, 0], [1, 1]], dtype=float)
        c = np.array([[3, 2], [7, 4]], dtype=float)

        d = dot(a, b)
        assert_allclose(c, d)
Example #4
0
    def test_dot_2args(self):
        from numpy.core.multiarray import dot

        a = np.array([[1, 2], [3, 4]], dtype=float)
        b = np.array([[1, 0], [1, 1]], dtype=float)
        c = np.array([[3, 2], [7, 4]], dtype=float)

        d = dot(a, b)
        assert_allclose(c, d)
Example #5
0
 def test_compute_mean_vector(self):
     entity_vector_model = EntityVectorComputeModel()
     entity_vector_model.init_word2vec_model(path="vocab.test.plain.txt",
                                             binary=False)
     vector1 = entity_vector_model.compute_mean_vector(
         "Public internet is very good")
     vector2 = entity_vector_model.compute_mean_vector(
         "Public internet application is better than private")
     print(vector1)
     print(vector2)
     similarity = dot(matutils.unitvec(vector1), matutils.unitvec(vector2))
     print(similarity)
Example #6
0
 def dot(a, b):
     a = numpy.asarray(a)
     b = numpy.asarray(b)
     if (a.ndim == 1 and b.ndim == 1
             and (a.dtype == complex or b.dtype == complex)):
         if 1:
             #print 'Warning: Bad use of dot!'
             from numpy.core.multiarray import dot
             return dot(a, b)
         else:
             raise RuntimeError('Bad use of dot!')
     else:
         return olddot(a, b)
Example #7
0
 def dot(a, b):
     a = numpy.asarray(a)
     b = numpy.asarray(b)
     if (a.ndim == 1 and b.ndim == 1 and
         (a.dtype == complex or b.dtype == complex)):
         if 1:
             #print 'Warning: Bad use of dot!'
             from numpy.core.multiarray import dot
             return dot(a, b)
         else:
             raise RuntimeError('Bad use of dot!')
     else:
         return olddot(a, b)
    def test_similarity_calculation(self):
        str1 = "AbstractInputMethodService provides a abstract base class for inut methods."
        str2 = "The default implementation in this abstract class returns 1.0 for all components."

        vector_map = EntityVectorModel.load(
            "mean_vector_api_paragraph.plain.txt", binary=False)

        vector1 = vector_map.compute_mean_vector(str1)
        vector2 = vector_map.compute_mean_vector(str2)
        semantic_similarity = dot(matutils.unitvec(vector1),
                                  matutils.unitvec(vector2))
        print("semantic similarity is " + semantic_similarity)
        structure_similarity = textdistance.jaccard(str1, str2)
        print("structure similarity is " + structure_similarity)
Example #9
0
    def test_train_vector(self):
        #entity_vector_model = EntityVectorComputeModel()
        #entity_vector_model.init_word2vec_model(path="vocab.test.plain.txt", binary=False)
        #entity_vector_model.train_mean_vector("entity_description.json", "entity.vector.plain.txt")

        keyvector = EntityVectorModel.load("vocab.test.plain.txt",
                                           binary=False)
        print keyvector.vocab
        print "123" in keyvector.vocab
        vector1 = keyvector["and"]
        vector2 = keyvector["for"]
        print(vector1)
        print(vector2)
        similarity = dot(matutils.unitvec(vector1), matutils.unitvec(vector1))
        print(similarity)
Example #10
0
    def test_dot_3args(self):
        from numpy.core.multiarray import dot

        np.random.seed(22)
        f = np.random.random_sample((1024, 16))
        v = np.random.random_sample((16, 32))

        r = np.empty((1024, 32))
        for i in range(12):
            dot(f, v, r)
        assert_equal(sys.getrefcount(r), 2)
        r2 = dot(f, v, out=None)
        assert_array_equal(r2, r)
        assert_(r is dot(f, v, out=r))

        v = v[:, 0].copy()  # v.shape == (16,)
        r = r[:, 0].copy()  # r.shape == (1024,)
        r2 = dot(f, v)
        assert_(r is dot(f, v, r))
        assert_array_equal(r2, r)
Example #11
0
    def test_dot_3args(self):
        from numpy.core.multiarray import dot

        np.random.seed(22)
        f = np.random.random_sample((1024, 16))
        v = np.random.random_sample((16, 32))

        r = np.empty((1024, 32))
        for i in range(12):
            dot(f, v, r)
        assert_equal(sys.getrefcount(r), 2)
        r2 = dot(f, v, out=None)
        assert_array_equal(r2, r)
        assert_(r is dot(f, v, out=r))

        v = v[:, 0].copy()  # v.shape == (16,)
        r = r[:, 0].copy()  # r.shape == (1024,)
        r2 = dot(f, v)
        assert_(r is dot(f, v, r))
        assert_array_equal(r2, r)
# Segmentation fault with acml's _dotblas.so
import numpy as np
from numpy.core.multiarray import dot
b = np.ones(13, np.complex); dot(b, b)
 def matrix_mul(list_1, list_2):
     matrix_1 = np.array(list_1)
     matrix_2 = np.array(list_2)
     return dot(matrix_1, matrix_2)
Example #14
0
    nb_scores = []
    pairs = [("animal", "dog"), ("good", "bad"), ("motivation", "inspiration"),
             ("girl", "chick"), ("body", "girl"),
             ("britain", "united_kingdom"), ("warrior", "war"),
             ("car", "table")]
    for pair in pairs:
        pref = "/c/en/"
        c1 = pair[0]
        c2 = pair[1]
        print("Comparing")
        print(c1)
        print(c2)
        truck_index = conceptmap[pref + c1]
        car_index = conceptmap[pref + c2]

        truck_row = X[truck_index].toarray()
        car_row = X[car_index].toarray()

        truck_low_dim = tsvd.transform(truck_row)[:, 0]
        car_low_dim = tsvd.transform(car_row)[:, 0]
        testdotres = dot(truck_low_dim, car_low_dim.transpose())
        skecth_scores.append(testdotres)
        print(testdotres)
        print("Comparing against Numberbatch")
        v1, v2 = find_vectors(c1, c2)
        nbdotres = dot(v1, v2)
        nb_scores.append(nbdotres)
        print(nbdotres)
    print(np.log(np.average(skecth_scores)) / np.log(np.average(nb_scores)))
    print(np.cov([skecth_scores, nb_scores]))
    print(str((end_time - start_time).seconds))
Example #15
0
# Segmentation fault with acml's _dotblas.so
import numpy as np
from numpy.core.multiarray import dot
b = np.ones(13, np.complex)
dot(b, b)
Example #16
0
 def cosine_similarity(v1, v2):
     return dot(gensim.matutils.unitvec(v1), gensim.matutils.unitvec(v2))
Example #17
0
 def cosine_similarity(v1, v2):
     return dot(gensim.matutils.unitvec(v1), gensim.matutils.unitvec(v2))
def sigmoid(theta=theta, a=a, b=b):
    bs = repeat(reshape(b, (len(b), 1)), numpeople, 1)
    # print b.shape, a.shape, theta.shape
    return 1.0 / (1.0 + exp(bs - dot(a, theta)))
Example #19
0
 def compute_similarity(self, vector1, vector2):
     return dot(matutils.unitvec(vector1), matutils.unitvec(vector2))
Example #20
0
def vector_similarity(v1, v2):
    return dot(unitvec(v1), unitvec(v2))
Example #21
0
 def calculate_similarity_between_domain_and_wiki(self, domain_entity_vector, candidate_wiki_vector):
     if candidate_wiki_vector is not None and domain_entity_vector is not None:
         return dot(matutils.unitvec(candidate_wiki_vector), matutils.unitvec(domain_entity_vector))
     return None
Example #22
0
def distance(vec1, vec2):
    return dot(matutils.unitvec(vec1), matutils.unitvec(vec2))
Example #23
0
    def calc_song_similar(self,
                          positive_songs=[],
                          negative_songs=[],
                          positive_artists=[],
                          negative_artists=[],
                          song_weight=1.0,
                          artist_weight=1.5,
                          topn=10,
                          restrict_vocab=None):
        """
        计算歌曲和歌手的加减相似度,求出最近似的歌曲top n
        Args:
            topn:
            restrict_vocab:
            artist_weight:
            song_weight:
            positive_songs:
            negative_songs:
            positive_artists:
            negative_artists:

        Returns:

        """
        try:
            positive_songs = [(word, song_weight) for word in positive_songs]
            negative_songs = [(word, -song_weight) for word in negative_songs]
            positive_artists = [(word, artist_weight)
                                for word in positive_artists]
            negative_artists = [(word, -artist_weight)
                                for word in negative_artists]
            all_words, mean = set(), []
            if positive_songs + negative_songs:
                for song, weight in positive_songs + negative_songs:
                    song = song.strip()
                    if isinstance(song, ndarray):
                        mean.append(weight * song)
                    elif song in self.song2vec_model.vocab:
                        mean.append(weight * self.song2vec_model.syn0norm[
                            self.song2vec_model.vocab[song].index])
                        all_words.add(self.song2vec_model.vocab[song].index)
                    else:
                        raise KeyError("song '%s' not in vocabulary" % song)
            # limited = self.song2vec_model.syn0norm if restrict_vocab is None \
            #     else self.song2vec_model.syn0norm[:restrict_vocab]
            if positive_artists + negative_artists:
                for artist, weight in positive_artists + negative_artists:
                    if isinstance(word, ndarray):
                        mean.append(weight * artist)
                    elif word in self.artist2vec_model.vocab:
                        mean.append(weight * self.artist2vec_model.syn0norm[
                            self.artist2vec_model.vocab[artist].index])
                        all_words.add(
                            self.artist2vec_model.vocab[artist].index)
                    else:
                        raise KeyError("artist '%s' not in vocabulary" %
                                       artist)
            if not mean:
                raise ValueError("cannot compute similarity with no input")
            mean = matutils.unitvec(array(mean).mean(axis=0)).astype(REAL)
            limited = self.song2vec_model.syn0norm if restrict_vocab is None \
                else self.song2vec_model.syn0norm[:restrict_vocab]
            # limited += self.artist2vec_model.syn0norm if restrict_vocab is None \
            #     else self.artist2vec_model.syn0norm[:restrict_vocab]
            dists = dot(limited, mean)
            if not topn:
                return dists
            best = matutils.argsort(dists,
                                    topn=topn + len(all_words),
                                    reverse=True)
            # ignore (don't return) words from the input
            result = [(self.song2vec_model.index2word[sim], float(dists[sim]))
                      for sim in best if sim not in all_words]
            return result[:topn]
        except Exception, e:
            print 'error = %s' % e
            raise e