def post(network_data: dict) -> Response: """ The POST method for the vector network REST API. It provides sentences whose content is similar to a given word. """ vnf: VectorNetworkForm = VectorNetworkForm.from_dict(network_data) nearest_neighbor_count = vnf.nearest_neighbor_count if vnf.nearest_neighbor_count else 10 w2v: Word2Vec = Word2Vec.load(Config.PANEGYRICI_LATINI_MODEL_PATH) search_regex: Pattern[str] = re.compile(vnf.search_regex) keys: List[str] = [x for x in w2v.wv.vocab if search_regex.match(x)] relevant_vectors: List[ndarray] = [w2v.wv.get_vector(x) for x in keys] target_vector: ndarray = sum(relevant_vectors) / len(relevant_vectors) sentences: List[str] = open(Config.PANEGYRICI_LATINI_TEXT_PATH).readlines() sentence_vectors: Dict[int, ndarray] = {} for i in range(len(sentences)): toks: List[str] = sentences[i][:-1].split() if toks: vecs: List[ndarray] = [] for tok in toks: vector: ndarray = w2v.wv.get_vector(tok) vecs.append(vector) sentence_vectors[i] = sum(vecs) / len(vecs) sims: List[Tuple[int, ndarray]] = [] for key in sentence_vectors.keys(): sims.append((key, dot(matutils.unitvec(target_vector), matutils.unitvec(sentence_vectors[key])))) sims.sort(key=lambda x: x[1], reverse=True) sims = sims[:nearest_neighbor_count] return NetworkService.make_json_response( [sentences[x[0]].split() for x in sims])
def cosine_similarity(vec1: numpy.ndarray, vec2: numpy.ndarray) -> float: norm1 = norm(vec1) norm2 = norm(vec2) if norm1 == 0.0 or norm2 == 0.0: return 0.0 return dot(vec1, vec2) / (norm1 * norm2)
def test_dot_2args(self): from numpy.core.multiarray import dot a = np.array([[1, 2], [3, 4]], dtype=float) b = np.array([[1, 0], [1, 1]], dtype=float) c = np.array([[3, 2], [7, 4]], dtype=float) d = dot(a, b) assert_allclose(c, d)
def test_compute_mean_vector(self): entity_vector_model = EntityVectorComputeModel() entity_vector_model.init_word2vec_model(path="vocab.test.plain.txt", binary=False) vector1 = entity_vector_model.compute_mean_vector( "Public internet is very good") vector2 = entity_vector_model.compute_mean_vector( "Public internet application is better than private") print(vector1) print(vector2) similarity = dot(matutils.unitvec(vector1), matutils.unitvec(vector2)) print(similarity)
def dot(a, b): a = numpy.asarray(a) b = numpy.asarray(b) if (a.ndim == 1 and b.ndim == 1 and (a.dtype == complex or b.dtype == complex)): if 1: #print 'Warning: Bad use of dot!' from numpy.core.multiarray import dot return dot(a, b) else: raise RuntimeError('Bad use of dot!') else: return olddot(a, b)
def test_similarity_calculation(self): str1 = "AbstractInputMethodService provides a abstract base class for inut methods." str2 = "The default implementation in this abstract class returns 1.0 for all components." vector_map = EntityVectorModel.load( "mean_vector_api_paragraph.plain.txt", binary=False) vector1 = vector_map.compute_mean_vector(str1) vector2 = vector_map.compute_mean_vector(str2) semantic_similarity = dot(matutils.unitvec(vector1), matutils.unitvec(vector2)) print("semantic similarity is " + semantic_similarity) structure_similarity = textdistance.jaccard(str1, str2) print("structure similarity is " + structure_similarity)
def test_train_vector(self): #entity_vector_model = EntityVectorComputeModel() #entity_vector_model.init_word2vec_model(path="vocab.test.plain.txt", binary=False) #entity_vector_model.train_mean_vector("entity_description.json", "entity.vector.plain.txt") keyvector = EntityVectorModel.load("vocab.test.plain.txt", binary=False) print keyvector.vocab print "123" in keyvector.vocab vector1 = keyvector["and"] vector2 = keyvector["for"] print(vector1) print(vector2) similarity = dot(matutils.unitvec(vector1), matutils.unitvec(vector1)) print(similarity)
def test_dot_3args(self): from numpy.core.multiarray import dot np.random.seed(22) f = np.random.random_sample((1024, 16)) v = np.random.random_sample((16, 32)) r = np.empty((1024, 32)) for i in range(12): dot(f, v, r) assert_equal(sys.getrefcount(r), 2) r2 = dot(f, v, out=None) assert_array_equal(r2, r) assert_(r is dot(f, v, out=r)) v = v[:, 0].copy() # v.shape == (16,) r = r[:, 0].copy() # r.shape == (1024,) r2 = dot(f, v) assert_(r is dot(f, v, r)) assert_array_equal(r2, r)
# Segmentation fault with acml's _dotblas.so import numpy as np from numpy.core.multiarray import dot b = np.ones(13, np.complex); dot(b, b)
def matrix_mul(list_1, list_2): matrix_1 = np.array(list_1) matrix_2 = np.array(list_2) return dot(matrix_1, matrix_2)
nb_scores = [] pairs = [("animal", "dog"), ("good", "bad"), ("motivation", "inspiration"), ("girl", "chick"), ("body", "girl"), ("britain", "united_kingdom"), ("warrior", "war"), ("car", "table")] for pair in pairs: pref = "/c/en/" c1 = pair[0] c2 = pair[1] print("Comparing") print(c1) print(c2) truck_index = conceptmap[pref + c1] car_index = conceptmap[pref + c2] truck_row = X[truck_index].toarray() car_row = X[car_index].toarray() truck_low_dim = tsvd.transform(truck_row)[:, 0] car_low_dim = tsvd.transform(car_row)[:, 0] testdotres = dot(truck_low_dim, car_low_dim.transpose()) skecth_scores.append(testdotres) print(testdotres) print("Comparing against Numberbatch") v1, v2 = find_vectors(c1, c2) nbdotres = dot(v1, v2) nb_scores.append(nbdotres) print(nbdotres) print(np.log(np.average(skecth_scores)) / np.log(np.average(nb_scores))) print(np.cov([skecth_scores, nb_scores])) print(str((end_time - start_time).seconds))
# Segmentation fault with acml's _dotblas.so import numpy as np from numpy.core.multiarray import dot b = np.ones(13, np.complex) dot(b, b)
def cosine_similarity(v1, v2): return dot(gensim.matutils.unitvec(v1), gensim.matutils.unitvec(v2))
def sigmoid(theta=theta, a=a, b=b): bs = repeat(reshape(b, (len(b), 1)), numpeople, 1) # print b.shape, a.shape, theta.shape return 1.0 / (1.0 + exp(bs - dot(a, theta)))
def compute_similarity(self, vector1, vector2): return dot(matutils.unitvec(vector1), matutils.unitvec(vector2))
def vector_similarity(v1, v2): return dot(unitvec(v1), unitvec(v2))
def calculate_similarity_between_domain_and_wiki(self, domain_entity_vector, candidate_wiki_vector): if candidate_wiki_vector is not None and domain_entity_vector is not None: return dot(matutils.unitvec(candidate_wiki_vector), matutils.unitvec(domain_entity_vector)) return None
def distance(vec1, vec2): return dot(matutils.unitvec(vec1), matutils.unitvec(vec2))
def calc_song_similar(self, positive_songs=[], negative_songs=[], positive_artists=[], negative_artists=[], song_weight=1.0, artist_weight=1.5, topn=10, restrict_vocab=None): """ 计算歌曲和歌手的加减相似度,求出最近似的歌曲top n Args: topn: restrict_vocab: artist_weight: song_weight: positive_songs: negative_songs: positive_artists: negative_artists: Returns: """ try: positive_songs = [(word, song_weight) for word in positive_songs] negative_songs = [(word, -song_weight) for word in negative_songs] positive_artists = [(word, artist_weight) for word in positive_artists] negative_artists = [(word, -artist_weight) for word in negative_artists] all_words, mean = set(), [] if positive_songs + negative_songs: for song, weight in positive_songs + negative_songs: song = song.strip() if isinstance(song, ndarray): mean.append(weight * song) elif song in self.song2vec_model.vocab: mean.append(weight * self.song2vec_model.syn0norm[ self.song2vec_model.vocab[song].index]) all_words.add(self.song2vec_model.vocab[song].index) else: raise KeyError("song '%s' not in vocabulary" % song) # limited = self.song2vec_model.syn0norm if restrict_vocab is None \ # else self.song2vec_model.syn0norm[:restrict_vocab] if positive_artists + negative_artists: for artist, weight in positive_artists + negative_artists: if isinstance(word, ndarray): mean.append(weight * artist) elif word in self.artist2vec_model.vocab: mean.append(weight * self.artist2vec_model.syn0norm[ self.artist2vec_model.vocab[artist].index]) all_words.add( self.artist2vec_model.vocab[artist].index) else: raise KeyError("artist '%s' not in vocabulary" % artist) if not mean: raise ValueError("cannot compute similarity with no input") mean = matutils.unitvec(array(mean).mean(axis=0)).astype(REAL) limited = self.song2vec_model.syn0norm if restrict_vocab is None \ else self.song2vec_model.syn0norm[:restrict_vocab] # limited += self.artist2vec_model.syn0norm if restrict_vocab is None \ # else self.artist2vec_model.syn0norm[:restrict_vocab] dists = dot(limited, mean) if not topn: return dists best = matutils.argsort(dists, topn=topn + len(all_words), reverse=True) # ignore (don't return) words from the input result = [(self.song2vec_model.index2word[sim], float(dists[sim])) for sim in best if sim not in all_words] return result[:topn] except Exception, e: print 'error = %s' % e raise e