Example #1
0
if __name__ == '__main__':
    ## for LJ2M word vector model
    w2vmodel = Word2Vec.load('/corpus/LJ2M/exp/model/lj2mStanfordparser_300features_50minwords_10context')
    index2word_set = set(w2vmodel.index2word)

    # # for 42B word vector model
    # w2vmodel = txt_to_wordvecmodel(filepath='glove.42B.300d.txt')
    # index2word_set = set(w2vmodel.keys())

    use_unicode = False
    if type(list(index2word_set)[0]) == unicode:
        use_unicode = True


    model = {}
    data = pickle.load(open('/corpus/LJ40K/data/features/tfidf/GlobalInfo.pkl'))
    LJ40K_axis = build_axis(LJ40K)

    for i,word in enumerate(data):
        if i % 1000 == 0: print i
        if use_unicode:
            word = word.decode('utf-8')
        if word in index2word_set:
            # case1:
            model[word] = map_word_on_axis(word, Feeling_Wheel, model=w2vmodel)
            # case2:
            # model[word] = map_word_on_axis(word, LJ40K_axis, model=w2vmodel)

    print 'dump..'
    pickle.dump(model,open('model_wordvec_semantic_similarity_lemma_63768_LJ2M_Feeling_Wheel.pkl', 'wb'))
Example #2
0
        ))
    words_set = pickle.load(open('../data/wordset/wordsetlemma_35304.pkl'))

    ## two pole
    # path_graph = pickle.load(open('../data/graph/calculate_path/ss_for_path_wv_lemma_42b_50172.pkl'))
    # score_graph = pickle.load(open('../data/graph/calculate_score/pos_wv_lemma_42b_50172.pkl'))
    # words_set = pickle.load(open('../data/wordset/wordsetlemma_35304.pkl'))

    model = {}
    for i, word in enumerate(words_set):
        print i
        ## one pole

        ##case1
        model[word] = map_word_on_axis(word,
                                       LJ40K,
                                       score_graph=score_graph,
                                       path_graph=path_graph)

        ##case2
        # model[word] = map_word_on_axis(word, LJ40K, score_graph=score_graph, path_graph=path_graph, threshold=0.6)

        ##case3
        # model[word] = map_word_on_axis(word, Emotion_wheel, score_graph=score_graph, path_graph=path_graph)

        ## two pole
        # model[word] = map_word_on_axis(word, Feeling_Wheel, score_graph=score_graph, path_graph=path_graph)

    print 'dump..'
    pickle.dump(
        model,
        open(
    only for using graph to build the model
    '''
    ## one pole
    path_graph = pickle.load(open('../data/graph/calculate_path/ss_ant_for_path_wv_LJ40k_enhance_lemma_42b_50785.pkl'))
    score_graph = pickle.load(open('../data/graph/calculate_score/pos_neg_wv_LJ40k_enhance_lemma_42b_50785.pkl'))
    words_set = pickle.load(open('../data/wordset/wordsetlemma_35304.pkl'))

    ## two pole
    # path_graph = pickle.load(open('../data/graph/calculate_path/ss_for_path_wv_lemma_42b_50172.pkl'))
    # score_graph = pickle.load(open('../data/graph/calculate_score/pos_wv_lemma_42b_50172.pkl'))
    # words_set = pickle.load(open('../data/wordset/wordsetlemma_35304.pkl'))

    model = {}
    for i,word in enumerate(words_set):
        print i
        ## one pole
        
        ##case1
        model[word] = map_word_on_axis(word, LJ40K, score_graph=score_graph, path_graph=path_graph)
        
        ##case2
        # model[word] = map_word_on_axis(word, LJ40K, score_graph=score_graph, path_graph=path_graph, threshold=0.6)

        ##case3
        # model[word] = map_word_on_axis(word, Emotion_wheel, score_graph=score_graph, path_graph=path_graph)

        ## two pole
        # model[word] = map_word_on_axis(word, Feeling_Wheel, score_graph=score_graph, path_graph=path_graph)

    print 'dump..'
    pickle.dump(model,open('model_one_minusone_semantic_similarity_lj40k_enhance_lemma_50785_42b.pkl', 'wb'))