Example #1
0
def get_top15(word, vocab_list, func):
    sim_vec = [{'id':w[0], 'text': w[1], 'similarity': func(word,w[1])}
        for w in vocab_list if ' '.join(lemmatize_an_idea(w[1])) != ' '.join(lemmatize_an_idea(word)) and func(word,w[1]) > -100]

    sim_vec = sorted(sim_vec, key=lambda t: t['similarity'])
    for_sim = [t for t in sim_vec if t['similarity'] < 0.5]
    return jsonify(
            word = word,
            similar = [i for i in reversed(for_sim[-15:])],
            different = sim_vec[:15])
Example #2
0
def spacyPhraseSim(p1, p2):
    # TODO: find a more reasonable way to aggregate vector
    processed1 = ' '.join(lemmatize_an_idea(p1))
    processed2 = ' '.join(lemmatize_an_idea(p2))
    tok1 = nlp(unicode(processed1))

    tok2 = nlp(unicode(processed2))

    v1 = np.mean([t.repvec for t in tok1], axis=0)
    v2 = np.mean([t.repvec for t in tok2], axis=0)
    sim = cossim(v1, v2)
    return float(sim)
Example #3
0
def spacyPhraseSim(p1, p2):
    # TODO: find a more reasonable way to aggregate vector
    processed1 = ' '.join(lemmatize_an_idea(p1))
    processed2 = ' '.join(lemmatize_an_idea(p2))
    tok1 = nlp(unicode(processed1))

    tok2 = nlp(unicode(processed2))

    v1 = np.mean([t.repvec for t in tok1], axis=0)
    v2 = np.mean([t.repvec for t in tok2], axis=0)
    sim = cossim(v1, v2)
    return float(sim)
Example #4
0
def get_glove_sim_set(topic):
    data = request.get_json()
    word = data['word']['text']
    func = gloveSim
    this_dict_set = theme_dict_set if topic=='weddingTheme' else prop_dict_set
    vocab_list = this_dict_set['words']
    sim_vec = [{'id':w[0], 'text':w[1], 'similarity':func(word,w[1])}
        for w in vocab_list if ' '.join(lemmatize_an_idea(w[1])) != ' '.join(lemmatize_an_idea(word)) and func(word,w[1]) > -100]

    sim_vec = sorted(sim_vec, key=lambda t: t['similarity'])
    for_sim = [t for t in sim_vec if t['similarity'] < 0.5]
    operation = data['operation']
    similar_sets = []
    different_sets = []

    if operation == 'similar':
        similar_words = [i for i in reversed(for_sim[-5:])]
        for s in similar_words:
            s_idx = vocab_list.index((s['id'], s['text']))
            tmp = random.choice(this_dict_set['set_dict'][s_idx])
            tmp = (
                {'id': vocab_list[tmp[0]][0], 'text': vocab_list[tmp[0]][1]},
                {'id': vocab_list[tmp[1]][0], 'text': vocab_list[tmp[1]][1]},
                {'id': vocab_list[tmp[2]][0], 'text': vocab_list[tmp[2]][1]},
                )
            similar_sets.append(tmp)
    else:
        different_words = sim_vec[:5]
        for s in different_words:
            s_idx = vocab_list.index((s['id'], s['text']))
            tmp = random.choice(this_dict_set['set_dict'][s_idx])
            tmp = (
                {'id': vocab_list[tmp[0]][0], 'text': vocab_list[tmp[0]][1]},
                {'id': vocab_list[tmp[1]][0], 'text': vocab_list[tmp[1]][1]},
                {'id': vocab_list[tmp[2]][0], 'text': vocab_list[tmp[2]][1]},
                )
            different_sets.append(tmp)

    return jsonify(
            word = data['word'],
            similar = similar_sets,
            different = different_sets)
Example #5
0
 def vec_for_sentence(self, sentence):
     tokens = lemmatize_an_idea(sentence,False)
     return self.vec_for_tokens(tokens)
Example #6
0
def get_sorted_similar(word, vocab_list, func):
    sim_vec = [{'id':w[0], 'text': w[1], 'similarity': func(word,w[1])}
        for w in vocab_list if ' '.join(lemmatize_an_idea(w[1])) != ' '.join(lemmatize_an_idea(word)) and func(word,w[1]) > -100]
    sim_vec = sorted(sim_vec, key=lambda t: t['similarity'])
    for_sim = [t for t in sim_vec if t['similarity'] < 0.5]
    return for_sim