def count_noun(s1, s2): try: s1 = utils.valid_words(w2v_google, convert(s1)) s2 = utils.valid_words(w2v_google, convert(s2)) return len(s1) == len(s2) * 1 except: return -1
def diff_w2v_set_diff(s1, s2): try: if 'differ' not in s1 and 'differ' not in s2: return -1 s1 = set(utils.valid_words(w2v_google, convert(s1))) s2 = set(utils.valid_words(w2v_google, convert(s2))) return w2v_google.n_similarity(s1 - s2, s2 - s1) except: return -1
def diff_w2v_quora(s1, s2): try: if 'differ' not in s1 and 'differ' not in s2: return -1 s1 = utils.valid_words(w2v_quora, convert(s1)) s2 = utils.valid_words(w2v_quora, convert(s2)) return w2v_quora.n_similarity(s1, s2) except: return -1
def len_valid(s1, s2): try: if 'differ' not in s1 and 'differ' not in s2: return -1 s1 = set(utils.valid_words(w2v_google, convert(s1))) s2 = set(utils.valid_words(w2v_google, convert(s2))) return (len(s1) == len(s2)) * 1 except: return -1
def most_sim(s1, s2): """ """ s1 = utils.valid_words(w2v_google, s1 - stops) s2 = utils.valid_words(w2v_google, s2 - stops) words_sets = product(s1, s2) sims = [ sorted([w1, w2]) for w1, w2 in words_sets if w2v_google.similarity(w1, w2) > sim_value ] # sims = [(w1,w2) for w1,w2 in words_sets if w2v_google.similarity(w1,w2)>sim_value or w2v_quora.similarity(w1,w2)>sim_value] return sims