def load_poems_model(file_name, w2v_model, vectorize=True): pmodel = read_data_model(file_name) print("loading model...") if vectorize: print("vectorizing model...") pmodel['matrices'] = [sem.bag_to_matrix(bag, w2v_model) for bag in pmodel['bags']] pmodel['a_matrices'] = [sem.bag_to_matrix(bag, w2v_model) for bag in pmodel['associations']] print("poems model '%s' loaded" % file_name) return pmodel
def load_poems_model(file_name, w2v_model, vectorize=True): pmodel = read_data_model(file_name) print("loading model...") if vectorize: print("vectorizing model...") pmodel['matrices'] = [sem.bag_to_matrix(bag, w2v_model) for bag in pmodel['bags']] pmodel['a_matrices'] = [sem.bag_to_matrix(bag, w2v_model) for bag in pmodel['associations']] print("poems model '%s' loaded" % file_name) return pmodel
def similar_poems_idx(query: str, poem_model, w2v_model, topn=5, use_associations=False) -> list: query_bag = sem.canonize_words(query.split()) if use_associations: query_bag += sem.semantic_association(query_bag, w2v_model, topn=5) query_mx = sem.bag_to_matrix(query_bag, w2v_model) similars = [(i, sem.semantic_similarity_fast(query_mx, np.vstack((mx, poem_model['a_matrices'][i])))) for i, mx in enumerate(poem_model['matrices']) if len(mx) > 0] else: query_mx = sem.bag_to_matrix(query_bag, w2v_model) similars = [(i, sem.semantic_similarity_fast(query_mx, mx)) for i, mx in enumerate(poem_model['matrices'])] similars.sort(key=lambda x: x[1], reverse=True) return similars[:topn]
def similar_poems_idx(query: str, poem_model, w2v_model, topn=5, use_associations=False) -> list: # [(poem_idx, sim)] query_bag = sem.canonize_words(query.split()) if use_associations: query_bag += sem.semantic_association(query_bag, w2v_model, topn=5) query_mx = sem.bag_to_matrix(query_bag, w2v_model) if len(query_mx) == 0: return [] similars = [(i, sem.semantic_similarity_fast(query_mx, np.vstack((mx, poem_model['a_matrices'][i])))) for i, mx in enumerate(poem_model['matrices']) if len(mx) > 0] else: query_mx = sem.bag_to_matrix(query_bag, w2v_model) if len(query_mx) == 0: return [] similars = [(i, sem.semantic_similarity_fast_log(query_mx, mx)) for i, mx in enumerate(poem_model['matrices'])] # similars.sort(key=lambda x: x[1], reverse=True) return heapq.nlargest(topn, similars, key=lambda x: x[1])