def dist_tuple(nc_rep1, nc_rep2): # domain nouns related to the noun compound # space is a broad measure of similarity domain_full1 = nc_rep1[0] domain_full2 = nc_rep2[0] dist0 = cosine_dist(domain_full1, domain_full2) # domain nouns related to the head # space is analagous to the head domain_head1 = nc_rep1[1] domain_head2 = nc_rep2[1] dist1 = cosine_dist(domain_head1, domain_head2) # verbs related to the noun compound # space is measuring the action of the modifier on the head action1 = nc_rep1[2] action2 = nc_rep2[2] dist2 = cosine_dist(action1, action2) # adjectives that act on the head # space is analagous to modifier qualifier_head1 = nc_rep1[3] qualifier_head2 = nc_rep2[3] dist3 = cosine_dist(qualifier_head1, qualifier_head2) # adjectives the modifier might be like # space is analagous to modifier qualifier_mod1 = nc_rep1[4] qualifier_mod2 = nc_rep2[4] dist4 = cosine_dist(qualifier_mod1, qualifier_mod2) return (dist0, dist1, dist2, dist3, dist4)
def rank_mean_word_vector(X, k): ''' Rank responses based on cosine similarity between mean word vectors ''' n = X.shape[0] mat = np.zeros((n, n)) mean_q = X['t_q'].apply(lambda x: mean_word_vector(' '.join(x))) mean_r = X['t_r'].apply(lambda x: mean_word_vector(' '.join(x))) for (iq, ir) in product(range(n), range(n)): mat[iq, ir] = cosine_dist(mean_q.values[iq].reshape(1, -1), mean_r.values[ir].reshape(1, -1)) r = dist_matrix_to_rank_k(mat, k) return r
def rank_cos_tfifd_vector(X, k): ''' Rank responses based on cosine similarity between tf-ifd vectors ''' n = X.shape[0] mat = np.zeros((n, n)) vectorizer = TfidfVectorizer() resp = vectorizer.fit_transform(X['t_r'].apply(lambda x: ' '.join(x))) ques = vectorizer.transform(X['t_q'].apply(lambda x: ' '.join(x))) for (iq, ir) in product(range(n), range(n)): mat[iq, ir] = cosine_dist(ques[iq, :], resp[ir, :]) r = dist_matrix_to_rank_k(mat, k) return r
def sim_dilation(nc1, nc2): v1 = nc_dils_dict[nc1] v2 = nc_dils_dict[nc2] return cosine_dist(v1, v2)
def sim_sum(nc1, nc2): v1 = nc_sums_dict[nc1] v2 = nc_sums_dict[nc2] return cosine_dist(v1, v2)