コード例 #1
0
def count_noun(s1, s2):
    try:
        s1 = utils.valid_words(w2v_google, convert(s1))
        s2 = utils.valid_words(w2v_google, convert(s2))
        return len(s1) == len(s2) * 1
    except:
        return -1
コード例 #2
0
def diff_w2v_set_diff(s1, s2):
    try:
        if 'differ' not in s1 and 'differ' not in s2:
            return -1
        s1 = set(utils.valid_words(w2v_google, convert(s1)))
        s2 = set(utils.valid_words(w2v_google, convert(s2)))
        return w2v_google.n_similarity(s1 - s2, s2 - s1)
    except:
        return -1
コード例 #3
0
def diff_w2v_quora(s1, s2):
    try:
        if 'differ' not in s1 and 'differ' not in s2:
            return -1
        s1 = utils.valid_words(w2v_quora, convert(s1))
        s2 = utils.valid_words(w2v_quora, convert(s2))
        return w2v_quora.n_similarity(s1, s2)
    except:
        return -1
コード例 #4
0
def len_valid(s1, s2):
    try:
        if 'differ' not in s1 and 'differ' not in s2:
            return -1
        s1 = set(utils.valid_words(w2v_google, convert(s1)))
        s2 = set(utils.valid_words(w2v_google, convert(s2)))
        return (len(s1) == len(s2)) * 1
    except:
        return -1
コード例 #5
0
def most_sim(s1, s2):
    """
    """
    s1 = utils.valid_words(w2v_google, s1 - stops)
    s2 = utils.valid_words(w2v_google, s2 - stops)
    words_sets = product(s1, s2)
    sims = [
        sorted([w1, w2]) for w1, w2 in words_sets
        if w2v_google.similarity(w1, w2) > sim_value
    ]
    #    sims = [(w1,w2) for w1,w2 in words_sets if w2v_google.similarity(w1,w2)>sim_value or w2v_quora.similarity(w1,w2)>sim_value]
    return sims