def load_poems_model(file_name, w2v_model, vectorize=True):
    pmodel = read_data_model(file_name)
    print("loading model...")
    if vectorize:
        print("vectorizing model...")
        pmodel['matrices'] = [sem.bag_to_matrix(bag, w2v_model) for bag in pmodel['bags']]
        pmodel['a_matrices'] = [sem.bag_to_matrix(bag, w2v_model) for bag in pmodel['associations']]
    print("poems model '%s' loaded" % file_name)
    return pmodel
Beispiel #2
0
def load_poems_model(file_name, w2v_model, vectorize=True):
    pmodel = read_data_model(file_name)
    print("loading model...")
    if vectorize:
        print("vectorizing model...")
        pmodel['matrices'] = [sem.bag_to_matrix(bag, w2v_model) for bag in pmodel['bags']]
        pmodel['a_matrices'] = [sem.bag_to_matrix(bag, w2v_model) for bag in pmodel['associations']]
    print("poems model '%s' loaded" % file_name)
    return pmodel
Beispiel #3
0
def similar_poems_idx(query: str, poem_model, w2v_model, topn=5, use_associations=False) -> list:
    query_bag = sem.canonize_words(query.split())
    if use_associations:
        query_bag += sem.semantic_association(query_bag, w2v_model, topn=5)
        query_mx = sem.bag_to_matrix(query_bag, w2v_model)
        similars = [(i, sem.semantic_similarity_fast(query_mx, np.vstack((mx, poem_model['a_matrices'][i]))))
                    for i, mx in enumerate(poem_model['matrices']) if len(mx) > 0]
    else:
        query_mx = sem.bag_to_matrix(query_bag, w2v_model)
        similars = [(i, sem.semantic_similarity_fast(query_mx, mx))
                    for i, mx in enumerate(poem_model['matrices'])]
    similars.sort(key=lambda x: x[1], reverse=True)
    return similars[:topn]
Beispiel #4
0
def similar_poems_idx(query: str, poem_model, w2v_model, topn=5, use_associations=False) -> list:  # [(poem_idx, sim)]
    query_bag = sem.canonize_words(query.split())
    if use_associations:
        query_bag += sem.semantic_association(query_bag, w2v_model, topn=5)
        query_mx = sem.bag_to_matrix(query_bag, w2v_model)
        if len(query_mx) == 0:
            return []
        similars = [(i, sem.semantic_similarity_fast(query_mx, np.vstack((mx, poem_model['a_matrices'][i]))))
                    for i, mx in enumerate(poem_model['matrices']) if len(mx) > 0]
    else:
        query_mx = sem.bag_to_matrix(query_bag, w2v_model)
        if len(query_mx) == 0:
            return []
        similars = [(i, sem.semantic_similarity_fast_log(query_mx, mx))
                    for i, mx in enumerate(poem_model['matrices'])]
    # similars.sort(key=lambda x: x[1], reverse=True)
    return heapq.nlargest(topn, similars, key=lambda x: x[1])