def __init__(self): self.models = {'dqn': DQN} read = lambda fn: pickle.load(open(get_path(c.c['paths'][fn]), 'rb'), encoding='latin1') self.movie_db = read('movie_db') remove_empty_slots(self.movie_db) self.movie_dict = read('movie_dict') self.user_goals = read('user_goals') self.dst = DST(self.movie_db) self.emc = EMC(self.movie_dict) assert c.c['run']['model'] in self.models self.dqn = self.models[c.c['run']['model']](self.dst.s_size)
class BotHCN(object): def __init__(self, voc_fn, w2v_fn, w2v_dim, entity_types, entity_dict, action_mask_dict, obs_size, act_size, templates): self.nlu = NLU(voc_fn,w2v_fn,w2v_dim,entity_dict) self.dst = DST(entity_types) self.model = HCN(action_mask_dict,obs_size,act_size) self.templates = templates def train(self, data_fn, epochs=5): def train_dialog(dialog): loss = 0 self.dst.clear() self.model.reset_state() for text,action in dialog: feat_bow = self.nlu.get_bow_vector(text) feat_emb = self.nlu.get_utter_emb(text) entities = self.nlu.extract_entities(text) self.dst.update(entities) feat_ctx = self.dst.get_feat() feats = np.concatenate((feat_bow,feat_emb,feat_ctx),axis=0) action_mask = self.model.get_action_mask(feat_ctx) loss += self.model.train_step(feats,action,action_mask)[0] return loss data = list(get_data(data_fn)) data = convert_train_data(data,self.templates) data_train = data[:int(.9*len(data))] data_valid = data[int(.9*len(data)):] for epoch in xrange(epochs): loss = sum([train_dialog(dialog) for dialog in data_train]) accu = self.eval(data_valid) print '[{0}/{1}] {2:.4f} {3:.4f}'.format(epoch,epochs,loss,accu) self.model.save() def eval(self, dialogs): def eval_dialog(dialog): correct = 0 self.dst.clear() self.model.reset_state() for text,real in dialog: feat_bow = self.nlu.get_bow_vector(text) feat_emb = self.nlu.get_utter_emb(text) entities = self.nlu.extract_entities(text) self.dst.update(entities) feat_ctx = self.dst.get_feat() feats = np.concatenate((feat_bow,feat_emb,feat_ctx),axis=0) action_mask = self.model.get_action_mask(feat_ctx) pred = self.model.predict_action(feats,action_mask) correct += int(pred==real) return 1.*correct/len(dialog) return 1.*sum([eval_dialog(dialog) for dialog in dialogs])/len(dialogs) def test(self): self.dst.clear() self.model.load() self.model.reset_state() while True: text = raw_input(':: ') if text in ('clear','reset','restart'): self.dst.clear() self.model.reset_state() print '' elif text in ('exit','quit','stop'): break else: text = text or '<SILENCE>' feat_bow = self.nlu.get_bow_vector(text) feat_emb = self.nlu.get_utter_emb(text) entities = self.nlu.extract_entities(text) self.dst.update(entities) feat_ctx = self.dst.get_feat() feats = np.concatenate((feat_bow,feat_emb,feat_ctx),axis=0) action_mask = self.model.get_action_mask(feat_ctx) pred = self.model.predict_action(feats,action_mask) print '>>', self.templates[pred].format(**self.dst.entities)
def __init__(self, voc_fn, w2v_fn, w2v_dim, entity_types, entity_dict, action_mask_dict, obs_size, act_size, templates): self.nlu = NLU(voc_fn,w2v_fn,w2v_dim,entity_dict) self.dst = DST(entity_types) self.model = HCN(action_mask_dict,obs_size,act_size) self.templates = templates
from flask import Flask from flask_socketio import SocketIO, emit from nlu_test import predict from dst import DST from dpl import DPL from nlg import NLG app = Flask(__name__) app.config["SECRET_KEY"] = "secret!" socketio = SocketIO(app) _dst = DST() _dpl = DPL() _nlg = NLG() last_action, last_state = None, None Hn = list() @socketio.on("receive") def recevie_msg(msg): print(msg) # get intent & slots nlu_result = predict(msg) # get state (Gn, Un, Hn) state = _dst.get_state(nlu_result, last_action, last_state, Hn) # get action action, Hn = _dpl.get_action(state)
class BotRL(object): def __init__(self): self.models = {'dqn': DQN} read = lambda fn: pickle.load(open(get_path(c.c['paths'][fn]), 'rb'), encoding='latin1') self.movie_db = read('movie_db') remove_empty_slots(self.movie_db) self.movie_dict = read('movie_dict') self.user_goals = read('user_goals') self.dst = DST(self.movie_db) self.emc = EMC(self.movie_dict) assert c.c['run']['model'] in self.models self.dqn = self.models[c.c['run']['model']](self.dst.s_size) def eps_reset(self): self.dst.reset() u_act = self.usr.reset() self.emc.infuse_error(u_act) self.dst.update_state_user(u_act) self.dqn.reset() def next_turn(self, state, warmup=False): a_aid, a_act = self.dqn.get_action(state, use_rule=warmup) self.dst.update_state_agent(a_act) u_act, reward, done, succ = self.usr.step(a_act) if not done: self.emc.infuse_error(u_act) self.dst.update_state_user(u_act) nstate = self.dst.get_state(done) self.dqn.add_exp(state, a_aid, reward, nstate, done) return nstate, reward, done, succ def warmup(self, cnt=0): while cnt < c.c['run']['warmup_mem'] and not self.dqn.is_mem_full(): self.eps_reset() state = self.dst.get_state() done = False while not done: nstate, _, done, _ = self.next_turn(state, warmup=True) state = nstate cnt += 1 # self.dst.print_history(); print '-'*10 def train(self): self.usr = USim(self.movie_db, self.user_goals) self.warmup() eps = 0 succ_total, reward_total = 0, 0 best = 0.0 while eps < c.c['run']['ep_run_num']: self.eps_reset() eps += 1 done = False state = self.dst.get_state() while not done: nstate, reward, done, succ = self.next_turn(state) reward_total += reward state = nstate # if succ: self.dst.print_history(); print '-'*10 succ_total += succ if eps % c.c['run']['train_freq'] == 0: succ_rate = 1. * succ_total / c.c['run']['train_freq'] av_reward = 1. * reward_total / c.c['run']['train_freq'] print('eps:{0} succ:{1} reward:{2}'.format( eps, succ_rate, av_reward)) if succ_rate >= best and succ_rate >= c.c['run']['succ_thres']: self.dqn.empty_mem() if succ_rate > best: best = succ_rate self.dqn.save_weights() succ_total, reward_total = 0, 0 self.dqn.copy() self.dqn.train() def test(self): self.usr = User() eps = 0 while eps < c.c['run']['ep_run_num']: print('eps:{0}'.format(eps)) self.eps_reset() eps += 1 done = False state = self.dst.get_state() while not done: a_aid, a_act = self.dqn.get_action(state) self.dst.update_state_agent(a_act) u_act, reward, done, succ = self.usr.step(a_act) if not done: self.emc.infuse_error(u_act) self.dst.update_state_user(u_act) state = self.dst.get_state(done) # print 'succ:{0} reward:{1}'.format(succ,reward) print('end')