コード例 #1
0
def main():
    dic = pickle.load(open('dictionary_value.pkl'))
    all_v1 = readall.readall('/home/ubuntu/zhou/Backend/rating_log/v1')
    all_v2 = readall.readall('/home/ubuntu/zhou/Backend/rating_log/v2')
    all_v3 = readall.readall('/home/ubuntu/zhou/Backend/rating_log/v3')
    all_logs = dict(all_v1.items() + all_v2.items() + all_v3.items())
    sent,length = extract_word2vec_length(all_logs,dic)
#print sent
    with open('sent_100.pkl','w') as f:
        pickle.dump(sent,f)
    with open('length.pkl','w') as f:
        pickle.dump(length,f)
コード例 #2
0
def main():
    dic = pickle.load(open('dictionary_value.pkl'))
    all_v1 = readall.readall('/home/ubuntu/zhou/Backend/rating_log/v1')
    all_v2 = readall.readall('/home/ubuntu/zhou/Backend/rating_log/v2')
    all_v3 = readall.readall('/home/ubuntu/zhou/Backend/rating_log/v3')
    all_logs = dict(all_v1.items() + all_v2.items() + all_v3.items())
    sent, length = extract_word2vec_length(all_logs, dic)
    #print sent
    with open('sent_100.pkl', 'w') as f:
        pickle.dump(sent, f)
    with open('length.pkl', 'w') as f:
        pickle.dump(length, f)
コード例 #3
0
import readall
rating_logs = readall.readall("/home/ubuntu/zhou/Backend/rating_log/v4")
writelist = readall.get_log(rating_logs)
strategy_scan = ['init','end','more','switch','joke']
strategy_pre = ['continue','oov','short_answer','name_entity','no_repeat']
table_strategy_app = [0,0,0]
table = {'init':[0,0,0],'end':[0,0,0],'more':[0,0,0],'switch':[0,0,0],'joke':[0,0,0]}
for rate in rating_logs:
    for tmpdict in writelist:
        #print tmpdict
        strategy = tmpdict["strategy"]
        for stra in strategy:
            if stra in strategy_pre:
                break
                print tmpdict["strategy"]
                index = int(tmpdict["app_value"])-1
                table_strategy_app[index] = table_strategy_app[index] +1
                #table[strategy_real][index] = table[strategy_real][index] +1
print table_strategy_app
print table

コード例 #4
0
import gensim, logging
import pickle
import readall
import nltk
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s',level=logging.INFO)
all_v1 = readall.readall('/home/ubuntu/zhou/Backend/rating_log/v1')
all_v2 = readall.readall('/home/ubuntu/zhou/Backend/rating_log/v2')
all_v3 = readall.readall('/home/ubuntu/zhou/Backend/rating_log/v3')
all_v5 = readall.readall('/home/ubuntu/zhou/Backend/rating_log/v5')
all_logs = dict(all_v1.items() + all_v2.items() + all_v3.items() +all_v5.items())
sentences =[]
user_input = []
dictionary = []
for item in all_logs:
        #print item
        conv = all_logs[item]["Turns"]
        for turn in conv:
                sentences.append(nltk.word_tokenize(conv[turn]["You"].lower()))
                sentences.append(nltk.word_tokenize(conv[turn]["TickTock"].lower()))
                user_input.append(conv[turn]["You"])
#print len(sentences)
#print sentences
model = gensim.models.Word2Vec(sentences,size =100, min_count=1)
dictionary = list(set([item for sublist in sentences for item in sublist]))
#print dictionary
model.save('/tmp/word2vec_100_break')
pickle.dump(dictionary, open('dictionary_conv.pkl','w'))
with open('user_input_all.pkl','w') as f:
    pickle.dump(user_input,f)
コード例 #5
0
import readall
rating_logs = readall.readall("/home/ubuntu/zhou/Backend/rating_log/v4")
writelist = readall.get_log(rating_logs)
strategy_scan = ['init', 'end', 'more', 'switch', 'joke']
strategy_pre = ['continue', 'oov', 'short_answer', 'name_entity', 'no_repeat']
table_strategy_app = [0, 0, 0]
table = {
    'init': [0, 0, 0],
    'end': [0, 0, 0],
    'more': [0, 0, 0],
    'switch': [0, 0, 0],
    'joke': [0, 0, 0]
}
for rate in rating_logs:
    for tmpdict in writelist:
        #print tmpdict
        strategy = tmpdict["strategy"]
        for stra in strategy:
            if stra in strategy_pre:
                break
                print tmpdict["strategy"]
                index = int(tmpdict["app_value"]) - 1
                table_strategy_app[index] = table_strategy_app[index] + 1
                #table[strategy_real][index] = table[strategy_real][index] +1
print table_strategy_app
print table
コード例 #6
0
import readall
import gensim
import nltk
import numpy as np
import pickle
# we need to extract some features, now we make it easy now to just use the word2vec, one turn previous turn.
#
model = gensim.models.Word2Vec.load('/tmp/word2vec_50_break')

all_v1 = readall.readall('/home/ubuntu/zhou/Backend/rating_log/v1')
all_v2 = readall.readall('/home/ubuntu/zhou/Backend/rating_log/v2')
all_v3 = readall.readall('/home/ubuntu/zhou/Backend/rating_log/v3')
all_logs = dict(all_v1.items() + all_v2.items() + all_v3.items())
sent_vec = None
for item in all_logs:
    print item
    conv = all_logs[item]["Turns"]
    sent_pre = None
    for turn in conv:
        turn_vec_1 = sum(model[nltk.word_tokenize(conv[turn]["You"])])
        if len(nltk.word_tokenize(conv[turn]["TickTock"])) == 0:
            continue
        #print 'TickTock'
        #print conv[turn]["TickTock"]
        turn_vec_2 = sum(model[nltk.word_tokenize(conv[turn]["TickTock"])])
        #print turn_vec_1
        #print turn_vec_2
        if sent_vec is None:
            sent_vec = turn_vec_1 + turn_vec_2 + turn_vec_1 + turn_vec_2
            target = np.array(int(conv[turn]["Appropriateness"]))
        else: