Ejemplo n.º 1
0
 def __init__(self):
     self.models = {'dqn': DQN}
     read = lambda fn: pickle.load(open(get_path(c.c['paths'][fn]), 'rb'),
                                   encoding='latin1')
     self.movie_db = read('movie_db')
     remove_empty_slots(self.movie_db)
     self.movie_dict = read('movie_dict')
     self.user_goals = read('user_goals')
     self.dst = DST(self.movie_db)
     self.emc = EMC(self.movie_dict)
     assert c.c['run']['model'] in self.models
     self.dqn = self.models[c.c['run']['model']](self.dst.s_size)
Ejemplo n.º 2
0
class BotHCN(object):

    def __init__(self, voc_fn, w2v_fn, w2v_dim, entity_types, entity_dict,
            action_mask_dict, obs_size, act_size, templates):
        self.nlu = NLU(voc_fn,w2v_fn,w2v_dim,entity_dict)
        self.dst = DST(entity_types)
        self.model = HCN(action_mask_dict,obs_size,act_size)
        self.templates = templates

    def train(self, data_fn, epochs=5):
        def train_dialog(dialog):
            loss = 0
            self.dst.clear()
            self.model.reset_state()
            for text,action in dialog:
                feat_bow = self.nlu.get_bow_vector(text)
                feat_emb = self.nlu.get_utter_emb(text)
                entities = self.nlu.extract_entities(text)
                self.dst.update(entities)
                feat_ctx = self.dst.get_feat()
                feats = np.concatenate((feat_bow,feat_emb,feat_ctx),axis=0)
                action_mask = self.model.get_action_mask(feat_ctx)
                loss += self.model.train_step(feats,action,action_mask)[0]
            return loss
        data = list(get_data(data_fn))
        data = convert_train_data(data,self.templates)
        data_train = data[:int(.9*len(data))]
        data_valid = data[int(.9*len(data)):]
        for epoch in xrange(epochs):
            loss = sum([train_dialog(dialog) for dialog in data_train])
            accu = self.eval(data_valid)
            print '[{0}/{1}] {2:.4f} {3:.4f}'.format(epoch,epochs,loss,accu)
        self.model.save()

    def eval(self, dialogs):
        def eval_dialog(dialog):
            correct = 0
            self.dst.clear()
            self.model.reset_state()
            for text,real in dialog:
                feat_bow = self.nlu.get_bow_vector(text)
                feat_emb = self.nlu.get_utter_emb(text)
                entities = self.nlu.extract_entities(text)
                self.dst.update(entities)
                feat_ctx = self.dst.get_feat()
                feats = np.concatenate((feat_bow,feat_emb,feat_ctx),axis=0)
                action_mask = self.model.get_action_mask(feat_ctx)
                pred = self.model.predict_action(feats,action_mask)
                correct += int(pred==real)
            return 1.*correct/len(dialog)
        return 1.*sum([eval_dialog(dialog) for dialog in dialogs])/len(dialogs)

    def test(self):
        self.dst.clear()
        self.model.load()
        self.model.reset_state()
        while True:
            text = raw_input(':: ')
            if text in ('clear','reset','restart'):
                self.dst.clear()
                self.model.reset_state()
                print ''
            elif text in ('exit','quit','stop'):
                break
            else:
                text = text or '<SILENCE>'
                feat_bow = self.nlu.get_bow_vector(text)
                feat_emb = self.nlu.get_utter_emb(text)
                entities = self.nlu.extract_entities(text)
                self.dst.update(entities)
                feat_ctx = self.dst.get_feat()
                feats = np.concatenate((feat_bow,feat_emb,feat_ctx),axis=0)
                action_mask = self.model.get_action_mask(feat_ctx)
                pred = self.model.predict_action(feats,action_mask)
                print '>>', self.templates[pred].format(**self.dst.entities)
Ejemplo n.º 3
0
 def __init__(self, voc_fn, w2v_fn, w2v_dim, entity_types, entity_dict,
         action_mask_dict, obs_size, act_size, templates):
     self.nlu = NLU(voc_fn,w2v_fn,w2v_dim,entity_dict)
     self.dst = DST(entity_types)
     self.model = HCN(action_mask_dict,obs_size,act_size)
     self.templates = templates
Ejemplo n.º 4
0
from flask import Flask
from flask_socketio import SocketIO, emit

from nlu_test import predict
from dst import DST
from dpl import DPL
from nlg import NLG

app = Flask(__name__)
app.config["SECRET_KEY"] = "secret!"
socketio = SocketIO(app)

_dst = DST()
_dpl = DPL()
_nlg = NLG()

last_action, last_state = None, None
Hn = list()

@socketio.on("receive")
def recevie_msg(msg):
    print(msg)
    # get intent & slots
    nlu_result = predict(msg)

    # get state (Gn, Un, Hn)
    state = _dst.get_state(nlu_result, last_action, last_state, Hn)

    # get action
    action, Hn = _dpl.get_action(state)
Ejemplo n.º 5
0
class BotRL(object):
    def __init__(self):
        self.models = {'dqn': DQN}
        read = lambda fn: pickle.load(open(get_path(c.c['paths'][fn]), 'rb'),
                                      encoding='latin1')
        self.movie_db = read('movie_db')
        remove_empty_slots(self.movie_db)
        self.movie_dict = read('movie_dict')
        self.user_goals = read('user_goals')
        self.dst = DST(self.movie_db)
        self.emc = EMC(self.movie_dict)
        assert c.c['run']['model'] in self.models
        self.dqn = self.models[c.c['run']['model']](self.dst.s_size)

    def eps_reset(self):
        self.dst.reset()
        u_act = self.usr.reset()
        self.emc.infuse_error(u_act)
        self.dst.update_state_user(u_act)
        self.dqn.reset()

    def next_turn(self, state, warmup=False):
        a_aid, a_act = self.dqn.get_action(state, use_rule=warmup)
        self.dst.update_state_agent(a_act)
        u_act, reward, done, succ = self.usr.step(a_act)
        if not done: self.emc.infuse_error(u_act)
        self.dst.update_state_user(u_act)
        nstate = self.dst.get_state(done)
        self.dqn.add_exp(state, a_aid, reward, nstate, done)
        return nstate, reward, done, succ

    def warmup(self, cnt=0):
        while cnt < c.c['run']['warmup_mem'] and not self.dqn.is_mem_full():
            self.eps_reset()
            state = self.dst.get_state()
            done = False
            while not done:
                nstate, _, done, _ = self.next_turn(state, warmup=True)
                state = nstate
                cnt += 1
            # self.dst.print_history(); print '-'*10

    def train(self):
        self.usr = USim(self.movie_db, self.user_goals)
        self.warmup()
        eps = 0
        succ_total, reward_total = 0, 0
        best = 0.0
        while eps < c.c['run']['ep_run_num']:
            self.eps_reset()
            eps += 1
            done = False
            state = self.dst.get_state()
            while not done:
                nstate, reward, done, succ = self.next_turn(state)
                reward_total += reward
                state = nstate
            # if succ: self.dst.print_history(); print '-'*10
            succ_total += succ
            if eps % c.c['run']['train_freq'] == 0:
                succ_rate = 1. * succ_total / c.c['run']['train_freq']
                av_reward = 1. * reward_total / c.c['run']['train_freq']
                print('eps:{0} succ:{1} reward:{2}'.format(
                    eps, succ_rate, av_reward))
                if succ_rate >= best and succ_rate >= c.c['run']['succ_thres']:
                    self.dqn.empty_mem()
                if succ_rate > best:
                    best = succ_rate
                    self.dqn.save_weights()
                succ_total, reward_total = 0, 0
                self.dqn.copy()
                self.dqn.train()

    def test(self):
        self.usr = User()
        eps = 0
        while eps < c.c['run']['ep_run_num']:
            print('eps:{0}'.format(eps))
            self.eps_reset()
            eps += 1
            done = False
            state = self.dst.get_state()
            while not done:
                a_aid, a_act = self.dqn.get_action(state)
                self.dst.update_state_agent(a_act)
                u_act, reward, done, succ = self.usr.step(a_act)
                if not done: self.emc.infuse_error(u_act)
                self.dst.update_state_user(u_act)
                state = self.dst.get_state(done)
                # print 'succ:{0} reward:{1}'.format(succ,reward)
        print('end')