def robustness(target, target_expl, neighborhood, neighborhood_expl): ratios = [] for i, neighbor in enumerate(neighborhood): ratio = cosine_dist(target_expl, neighborhood_expl[i]) / cosine_dist( target, neighbor) ratios.append(ratio) return max(ratios)
def cosine_sim(vector, centroid): #Convert Arrays and Matricies to Matrix Type for preventing unexpeted error, such as returning vector instead of scalar vector = np.matrix(vector) centroid = np.matrix(centroid) return 1 - cosine_dist(vector, centroid)
def _calculate_dist(self, strategy='l2_dist'): assert strategy in ['l2_dist', 'cosine'] if strategy == 'l2_dist': diff = np.subtract(self.source_embeddings, self.target_embedggins) dist = np.sum(np.square(diff), 1) else: dist = [ cosine_dist(self.source_embeddings[i], self.target_embedggins[i]) for i in range(len(self.source_embeddings)) ] dist = np.stack(dist, axis=0) return dist
def get_nonego_simmilarity_vec(source_node, data , ind_list, cutoff , simm_fun = lambda x,y: 1 - cosine_dist(x,y) , sparseflag = False): """ returns a vector with most similar values of nodes from @ind_list :param source_node: :param data: :param ind_list: :param cutoff: :param simm_fun: :return: """ f_ref = data[source_node] out = np.zeros((1,data.shape[0])) out = scipy.sparse.dok_matrix(out) if sparseflag else out rank = [] for n in ind_list: bisect.insort(rank,(simm_fun(f_ref, data[n,:]), n)) for r,n in rank[::-1][:cutoff]: out[0,n] = r return out
def _appearance_comparison(self, bbox0, bbox1, im0, im1, window_sz, model): ''' Computes appearance characteristics using `model` between objects in `bbox0` and `bbox1` in `im0` and `im1` respectively. Returns distances as cosine similarity. Parameters ---------- bbox0 : ndarray. M x 4 of bounding box dimensions. (min_row, min_col, max_row, max_col). bbox1 : ndarray. N x 4 of bounding box dimensions. (min_row, min_col, max_row, max_col). im0, im1 : ndarray. images containing `bbox0`, `bbox1` respectively. window_sz : integer. size of windows for feature extraction. model : object with a `.predict(im)` method that extracts appearance features. Returns ------- cd : float. [0,1]. cosine distance. #ed : float. [0, inf). Euclidean distance. ''' from scipy.spatial.distance import cosine as cosine_dist # pad channels dimension as "RGB" for 2D intensity images if len(im0.shape) < 3: im0 = np.stack([im0] * 3, -1) if len(im1.shape) < 3: im1 = np.stack([im1] * 3, -1) # pad images to ensure bboxes never run off the edge im0p = np.pad(im0, ((window_sz, window_sz), (window_sz, window_sz), (0, 0)), mode='reflect') im1p = np.pad(im1, ((window_sz, window_sz), (window_sz, window_sz), (0, 0)), mode='reflect') # move bboxes to fit new dimensional indexing of the image new_bbox0 = bbox0.copy() + window_sz new_bbox1 = bbox1.copy() + window_sz new_bbox0 = new_bbox0.astype('int32') new_bbox1 = new_bbox1.astype('int32') im0_roi = im0p[new_bbox0[0]:new_bbox0[2], new_bbox0[1]:new_bbox0[3], :] im1_roi = im1p[new_bbox1[0]:new_bbox1[2], new_bbox1[1]:new_bbox1[3], :] import keras.backend as K if K.backend() == 'tensorflow': # tensorflow: (batch, dim00, dim01, channels) im0_classif = np.expand_dims(im0_roi, 0) im1_classif = np.expand_dims(im1_roi, 0) else: # theano: (batch, channels, dim00, dim01) im0_classif = np.expand_dims(np.rollaxis(im0_roi, -1), 0) im1_classif = np.expand_dims(np.rollaxis(im1_roi, -1), 0) f0 = model.predict(im0_classif) f1 = model.predict(im1_classif) #if self.verbose: #print('Appearance features') #print(f0, ' | ', f1) assert np.isnan(f0).sum() == 0 and np.isnan( f1).sum() == 0, 'appearance feature was NaN' assert np.isinf(f0).sum() == 0 and np.isinf( f1).sum() == 0, 'appearance feature was inf' cd = cosine_dist(f0, f1) ed = np.sqrt(np.sum((f0 - f1)**2)) return cd
def cosine(u, v): ''' Returns the cosine similarity between vectors u and v. ''' return 1 - cosine_dist(u, v)
def similarity(v1, v2): score = 0.0 if np.count_nonzero(v1) != 0 and np.count_nonzero(v2) != 0: score = ( (1 - cosine_dist(v1, v2)) + 1) / 2 # 1 - cosine_dist = similarity. return score
start_time = time.time() for row_idx in range(n_rows): if row_idx % 100 == 0: print(row_idx) for col_idx in range(n_cols): curr_grad = (x_grads[row_idx, col_idx], y_grads[row_idx, col_idx]) if col_idx > 0: left_grad = (x_grads[row_idx, col_idx - 1], y_grads[row_idx, col_idx - 1]) left_dist = cosine_dist(curr_grad, left_grad) else: left_dist = 0.0 if col_idx < (n_cols - 1): right_grad = (x_grads[row_idx, col_idx + 1], y_grads[row_idx, col_idx + 1]) right_dist = cosine_dist(curr_grad, right_grad) else: right_dist = 0.0 if row_idx > 0: top_grad = (x_grads[row_idx - 1, col_idx], y_grads[row_idx - 1, col_idx]) top_dist = cosine_dist(curr_grad, top_grad) else:
def cosine_sim(self, w1, w2): return 1 - cosine_dist(self.all_embed[self.stoi(w1)], self.all_embed[self.stoi(w2)])
def cosine_similarity(tag_vactor_a, tag_vector_b): a = [0, 0, 1] b = [1, 2, 1] print 1 - cosine_dist(a, b)
def score(resource_path, embeddings_path, simtest_path, use_UNK=False): """ Compares scores contained in a test set with the maximum cosine similarity between all pairs of vectors associated to two given words' synsets, by computing Spearman and Pearson coefficients. :param resource_path: Path to the resource folder :param embeddings_path: Path to the embeddings.vec file :param simtest_path: Path to the similarity test file :param use_UNK: unknown synset vector -> True: uses vectors associated to <UNK>; False: 0-valued vector (default) :return: None """ print("Loading vocabularies...") word_to_ix, _, _ = u.get_vocab(vocab_path=resource_path + "/vocab.txt", antivocab_path=resource_path + "/antivocab.txt") print("Loading mappings...") mapping, reverse_mapping = u.get_WN_mappings(resource_path + "/bn2wn_mapping.txt", with_reverse=True) print("Loading embeddings...") embeddings = keyedvectors.KeyedVectors.load_word2vec_format( embeddings_path, binary=False) print("Computing similarities...") gold_scores = [] scores = [] for word_pair in u.wordsim_pairs_generator(simtest_path): curr_score = -1 for syn1 in wn.synsets(word_pair.word1): wn_id1 = "%d%s" % (syn1.offset(), syn1.pos()) bn_id1 = reverse_mapping.get(wn_id1, None) # 0-valued vector in case no BabelNet ID is found vector1 = np.zeros(shape=[EMBEDDING_SIZE], dtype=np.float) if bn_id1 is None: if use_UNK: vector1 = embeddings.get_vector("<UNK>") else: for word in word_to_ix.keys(): if word.find(bn_id1) > 0: vector1 = embeddings.get_vector(word) break for syn2 in wn.synsets(word_pair.word2): wn_id2 = "%d%s" % (syn2.offset(), syn2.pos()) bn_id2 = reverse_mapping.get(wn_id2, None) # 0-valued vector in case no BabelNet ID is found vector2 = np.zeros(shape=[EMBEDDING_SIZE], dtype=np.float) if bn_id2 is None: if use_UNK: vector2 = embeddings.get_vector("<UNK>") else: for word in word_to_ix.keys(): if word.find(bn_id2) > 0: vector2 = embeddings.get_vector(word) break cos_sim = 1.0 - cosine_dist(vector1, vector2) curr_score = max(curr_score, cos_sim) gold_scores.append(word_pair.score) scores.append(curr_score) # compute spearman and pearson coefficients print( "\nSpearman: %.3f\nPearson: %.3f" % (spearmanr(gold_scores, scores)[0], pearsonr(gold_scores, scores)[0]))