def word_distance(w1, w2, wv):
    """
    # word_distance: spectral word distance
    #
    # Input:
    # w1 - a word (string)
    # w2 - a word (string)
    # wv - a dictionary with keys words and values embedding vectors
    #
    # Output:
    # dist - a floating point number in the range (0, 1)
    #
    # Instructions:
    # You should use the cosine function (imported at the beginning of this file as
    # cs), to compute cosine angle similarity between the two words.
    """
    dist = 0.0

    # -----
    # YOUR CODE GOES HERE
    v1 = wv[w1]
    v2 = wv[w2]
    dist = cs(v1, v2)
    # -----

    return dist
Exemple #2
0
def calc_distances(dic1,dic2,task,topks,distances,GT_samples):
    logger = logging.getLogger("my_logger") 
    logger.info('TASK: %s',task)
    maps =[]
    for dis in distances:
        logger.info('distance: %s',dis)
        tops_val = []
        for topk in topks:
            logger.info('K: %s',topk)         
            mAP = 0
            order_of_samples1 = sorted(dic1.keys())
            order_of_samples2 = sorted(dic2.keys())
            for sample in order_of_samples1:
                score_samples = []
                reps1 = dic1[sample]
                for given_sample in order_of_samples2:
                        if sample == given_sample:
                            pass
                        else:
                            reps2 = dic2[given_sample]
                            if dis == 'cos':                
                                given_score = 1 - cs(reps1, reps2)
                            elif dis == 'sp':
                                given_score = sp.entropy(reps1, reps2)
                            elif dis == 'corr':
                                given_score = pearsonr(reps1,reps2)[0] #1-->correlated, 0-->no correlated
                            else:
                                given_score = eu(reps1, reps2)
                            score_samples.append((given_sample, given_score))
                if dis == 'corr':
                    sorted_scores = sorted(score_samples, key=lambda x:x[1],reverse=True) #decreasing                    
                else:
                    sorted_scores = sorted(score_samples, key=lambda x:x[1],reverse=False) #increasing
                mAP = mAP + get_AP(sorted_scores, sample, GT_samples, top_k=topk)

            tops_val.append(mAP/(len(dic1.keys())*1.0))
            logger.info('mAP %s %s',task, str(tops_val[-1]))
        maps.append(tops_val)
    maps = pd.DataFrame(maps)   
    maps.columns = topks
    maps['distances'] =  distances
    maps['task'] = task
    return maps