def __iter__(self): json_list = json_dict_from_file(self.dirname,"content") for json_dict in json_list: # try: content = delete_stop_words(clean_comment(json_dict['content']), return_list=True) # content = delete_stop_words(clean_comment(json_dict), return_list=True) # return content yield content
def __iter__(self): json_list = json_dict_from_file(self.dirname, "content") for json_dict in json_list: # try: content = delete_stop_words(clean_comment(json_dict['content']), return_list=True) # content = delete_stop_words(clean_comment(json_dict), return_list=True) # return content yield content
feature_size = 500 content_window = 10 freq_min_count = 4 threads_num = 8 negative = 6 # best采样使用hierarchical softmax方法(负采样,对常见词有利),不使用negative sampling方法(对罕见词有利)。 t_iter = 60 print("word2vec...") tic = time.time() if os.path.isfile(save_model): model = Word2Vec.load(save_model) print(model.vocab) print("Loaded word2vec model") else: s_list = json_dict_from_file(file_name,"content") model = Word2Vec(s_list, size=feature_size, window=content_window, iter=t_iter, min_count=freq_min_count,negative=negative, workers=multiprocessing.cpu_count()) toc = time.time() print("Word2vec completed! Elapsed time is %s." % (toc-tic)) model.save(save_model) model.save_word2vec_format(save_model2, binary=False) print("Word2vec Saved!") """ 品牌维度 """ # brand =[u'性能',
feature_size = 500 content_window = 10 freq_min_count = 4 threads_num = 8 negative = 6 # best采样使用hierarchical softmax方法(负采样,对常见词有利),不使用negative sampling方法(对罕见词有利)。 t_iter = 60 print("word2vec...") tic = time.time() if os.path.isfile(save_model): model = Word2Vec.load(save_model) print(model.vocab) print("Loaded word2vec model") else: s_list = json_dict_from_file(file_name, "content") model = Word2Vec(s_list, size=feature_size, window=content_window, iter=t_iter, min_count=freq_min_count, negative=negative, workers=multiprocessing.cpu_count()) toc = time.time() print("Word2vec completed! Elapsed time is %s." % (toc - tic)) model.save(save_model) model.save_word2vec_format(save_model2, binary=False) print("Word2vec Saved!") """ 品牌维度 """