if __name__ == '__main__': ## for LJ2M word vector model w2vmodel = Word2Vec.load('/corpus/LJ2M/exp/model/lj2mStanfordparser_300features_50minwords_10context') index2word_set = set(w2vmodel.index2word) # # for 42B word vector model # w2vmodel = txt_to_wordvecmodel(filepath='glove.42B.300d.txt') # index2word_set = set(w2vmodel.keys()) use_unicode = False if type(list(index2word_set)[0]) == unicode: use_unicode = True model = {} data = pickle.load(open('/corpus/LJ40K/data/features/tfidf/GlobalInfo.pkl')) LJ40K_axis = build_axis(LJ40K) for i,word in enumerate(data): if i % 1000 == 0: print i if use_unicode: word = word.decode('utf-8') if word in index2word_set: # case1: model[word] = map_word_on_axis(word, Feeling_Wheel, model=w2vmodel) # case2: # model[word] = map_word_on_axis(word, LJ40K_axis, model=w2vmodel) print 'dump..' pickle.dump(model,open('model_wordvec_semantic_similarity_lemma_63768_LJ2M_Feeling_Wheel.pkl', 'wb'))
)) words_set = pickle.load(open('../data/wordset/wordsetlemma_35304.pkl')) ## two pole # path_graph = pickle.load(open('../data/graph/calculate_path/ss_for_path_wv_lemma_42b_50172.pkl')) # score_graph = pickle.load(open('../data/graph/calculate_score/pos_wv_lemma_42b_50172.pkl')) # words_set = pickle.load(open('../data/wordset/wordsetlemma_35304.pkl')) model = {} for i, word in enumerate(words_set): print i ## one pole ##case1 model[word] = map_word_on_axis(word, LJ40K, score_graph=score_graph, path_graph=path_graph) ##case2 # model[word] = map_word_on_axis(word, LJ40K, score_graph=score_graph, path_graph=path_graph, threshold=0.6) ##case3 # model[word] = map_word_on_axis(word, Emotion_wheel, score_graph=score_graph, path_graph=path_graph) ## two pole # model[word] = map_word_on_axis(word, Feeling_Wheel, score_graph=score_graph, path_graph=path_graph) print 'dump..' pickle.dump( model, open(
only for using graph to build the model ''' ## one pole path_graph = pickle.load(open('../data/graph/calculate_path/ss_ant_for_path_wv_LJ40k_enhance_lemma_42b_50785.pkl')) score_graph = pickle.load(open('../data/graph/calculate_score/pos_neg_wv_LJ40k_enhance_lemma_42b_50785.pkl')) words_set = pickle.load(open('../data/wordset/wordsetlemma_35304.pkl')) ## two pole # path_graph = pickle.load(open('../data/graph/calculate_path/ss_for_path_wv_lemma_42b_50172.pkl')) # score_graph = pickle.load(open('../data/graph/calculate_score/pos_wv_lemma_42b_50172.pkl')) # words_set = pickle.load(open('../data/wordset/wordsetlemma_35304.pkl')) model = {} for i,word in enumerate(words_set): print i ## one pole ##case1 model[word] = map_word_on_axis(word, LJ40K, score_graph=score_graph, path_graph=path_graph) ##case2 # model[word] = map_word_on_axis(word, LJ40K, score_graph=score_graph, path_graph=path_graph, threshold=0.6) ##case3 # model[word] = map_word_on_axis(word, Emotion_wheel, score_graph=score_graph, path_graph=path_graph) ## two pole # model[word] = map_word_on_axis(word, Feeling_Wheel, score_graph=score_graph, path_graph=path_graph) print 'dump..' pickle.dump(model,open('model_one_minusone_semantic_similarity_lj40k_enhance_lemma_50785_42b.pkl', 'wb'))