Ejemplo n.º 1
0
def dist_tuple(nc_rep1, nc_rep2):

    # domain nouns related to the noun compound
    # space is a broad measure of similarity
    domain_full1 = nc_rep1[0]
    domain_full2 = nc_rep2[0]
    dist0 = cosine_dist(domain_full1, domain_full2)

    # domain nouns related to the head
    # space is analagous to the head
    domain_head1 = nc_rep1[1]
    domain_head2 = nc_rep2[1]
    dist1 = cosine_dist(domain_head1, domain_head2)

    # verbs related to the noun compound
    # space is measuring the action of the modifier on the head
    action1 = nc_rep1[2]
    action2 = nc_rep2[2]
    dist2 = cosine_dist(action1, action2)

    # adjectives that act on the head
    # space is analagous to modifier
    qualifier_head1 = nc_rep1[3]
    qualifier_head2 = nc_rep2[3]
    dist3 = cosine_dist(qualifier_head1, qualifier_head2)

    # adjectives the modifier might be like
    # space is analagous to modifier
    qualifier_mod1 = nc_rep1[4]
    qualifier_mod2 = nc_rep2[4]
    dist4 = cosine_dist(qualifier_mod1, qualifier_mod2)

    return (dist0, dist1, dist2, dist3, dist4)
Ejemplo n.º 2
0
def rank_mean_word_vector(X, k):
    ''' Rank responses based on cosine similarity between mean word vectors '''
    n = X.shape[0]
    mat = np.zeros((n, n))

    mean_q = X['t_q'].apply(lambda x: mean_word_vector(' '.join(x)))
    mean_r = X['t_r'].apply(lambda x: mean_word_vector(' '.join(x)))

    for (iq, ir) in product(range(n), range(n)):
        mat[iq, ir] = cosine_dist(mean_q.values[iq].reshape(1, -1),
                                  mean_r.values[ir].reshape(1, -1))

    r = dist_matrix_to_rank_k(mat, k)

    return r
Ejemplo n.º 3
0
def rank_cos_tfifd_vector(X, k):
    ''' Rank responses based on cosine similarity between tf-ifd vectors '''
    n = X.shape[0]
    mat = np.zeros((n, n))

    vectorizer = TfidfVectorizer()

    resp = vectorizer.fit_transform(X['t_r'].apply(lambda x: ' '.join(x)))
    ques = vectorizer.transform(X['t_q'].apply(lambda x: ' '.join(x)))

    for (iq, ir) in product(range(n), range(n)):
        mat[iq, ir] = cosine_dist(ques[iq, :], resp[ir, :])

    r = dist_matrix_to_rank_k(mat, k)

    return r
Ejemplo n.º 4
0
def sim_dilation(nc1, nc2):
    v1 = nc_dils_dict[nc1]
    v2 = nc_dils_dict[nc2]
    return cosine_dist(v1, v2)
Ejemplo n.º 5
0
def sim_sum(nc1, nc2):
    v1 = nc_sums_dict[nc1]
    v2 = nc_sums_dict[nc2]
    return cosine_dist(v1, v2)