def hard_e_step(word, vocab, lv, threshold=0):
    cluster_size = len(vocab)
    # if word is > max vocab
    if len(word) < cluster_size:
        cluster_size = len(word)
    potential_words = []
    word_vector = random_idx.id_vector(N, word, alphabet, lv, ordered)

    for i in range(cluster_size - 1, -1, -1):
        for key in vocab[i].keys():
            key_vector = random_idx.id_vector(N, key, alphabet, lv, ordered)
            similarity = np.dot(np.transpose(word_vector[0]), key_vector[0])
            if similarity > threshold:
                potential_words.append([similarity, key])
    return potential_words
Exemple #2
0
def hard_e_step(word, vocab, lv, threshold=0):
    cluster_size = len(vocab)
    # if word is > max vocab
    if len(word) < cluster_size:
        cluster_size = len(word)
    potential_words = []
    word_vector = random_idx.id_vector(N, word, alphabet, lv, ordered)

    for i in range(cluster_size - 1, -1, -1):
        for key in vocab[i].keys():
            key_vector = random_idx.id_vector(N, key, alphabet, lv, ordered)
            similarity = np.dot(np.transpose(word_vector[0]), key_vector[0])
            if similarity > threshold:
                potential_words.append([similarity, key])
    return potential_words
def create_meaning_matrix(ldamodel, topicid, topn, dictionary):
    # token 2 id dictionary
    # print dictionary.token2id
    matrix = np.zeros((N, N))
    id2token = dictionary.id2token
    topic_terms = []

    for tup in ldamodel.get_topic_terms(topicid, topn):
        topic_terms.append(str(id2token[tup[0]]))

    for i in range(0, topn):
        term_vector = random_idx.id_vector(N, topic_terms[i], alphabet, RI_letters, ordered)
        matrix[i] = term_vector
    return matrix
def create_meaning_matrix(ldamodel, topicid, topn, dictionary):
    # token 2 id dictionary
    # print dictionary.token2id
    matrix = np.zeros((N, N))
    id2token = dictionary.id2token
    topic_terms = []

    for tup in ldamodel.get_topic_terms(topicid, topn):
        topic_terms.append(str(id2token[tup[0]]))

    for i in range(0, topn):
        term_vector = random_idx.id_vector(N, topic_terms[i], alphabet,
                                           RI_letters, ordered)
        matrix[i] = term_vector
    return matrix