Esempio n. 1
0
def get_keywords_keywords_distance(prod_tuple, q_tuple):
    try:
        p_keywords = np.array(
            MyUtils_strings.fromlls_toarrays(prod_tuple.kwsVectors))
        q_keywords = np.array(
            MyUtils_strings.fromlls_toarrays(q_tuple.kwsVectors))
        m = len(p_keywords)
        n = len(q_keywords)
        sim_matrix = np.ones(shape=(m, n)) * -1
        for i in range(m):
            kw_vec_1 = p_keywords[i]
            for j in range(n):
                kw_vec_2 = q_keywords[j]
                sim_matrix[i][j] = 1 - distance.cosine(u=kw_vec_1, v=kw_vec_2)
        # logging.debug("\nThe sim.matrix : %s", sim_matrix)

        max_similarities = MyUtils.pick_maxmatches_matrix(sim_matrix)

        min_distances = list(map(lambda sim: 1 - sim, max_similarities))

        avg_min_distance = np.average(min_distances)

        return avg_min_distance
    except NameError:
        return None
def get_kws_similarity(kwsVectors_stringls_1, kwsVectors_stringls_2):
    #logging.debug(kwsVectors_stringls_1)
    #logging.debug(type(kwsVectors_stringls_1))
    kwsVectors_ls_1 = MyUtils_strings.fromlls_toarrays(kwsVectors_stringls_1)
    kwsVectors_ls_2 = MyUtils_strings.fromlls_toarrays(kwsVectors_stringls_2)
    m = len(kwsVectors_ls_1)
    n = len(kwsVectors_ls_2)
    sim_matrix = numpy.ones(shape=(m, n)) * -1
    for i in range(m):
        kw_vec_1 = kwsVectors_ls_1[i]
        #logging.debug("Vector 1 : %s, kw_vec_1)
        for j in range(n):
            kw_vec_2 = kwsVectors_ls_2[j]
            #logging.debug(kw_vec_2)
            sim_matrix[i][j] = 1 - scipy.spatial.distance.cosine(u=kw_vec_1,
                                                                 v=kw_vec_2)
    logging.debug("\nThe keywords sim.matrix : %s", sim_matrix)
    kws_aggregated_sim = numpy.average(
        MyUtils.pick_maxmatches_matrix(sim_matrix))
    logging.debug("Aggregated keywords' similarity: %s", kws_aggregated_sim)
    return kws_aggregated_sim