user_sim.set_nlg_model(nlg_model) ################################################################################ # load trained NLU model ################################################################################ nlu_model_path = params['nlu_model_path'] nlu_model = nlu() nlu_model.load_nlu_model(nlu_model_path) agent.set_nlu_model(nlu_model) user_sim.set_nlu_model(nlu_model) ################################################################################ # Dialog Manager ################################################################################ dialog_manager = DialogManager(agent, user_sim, act_set, slot_set, kb) ################################################################################ # Run num_episodes Conversation Simulations ################################################################################ status = {'successes': 0, 'count': 0, 'cumulative_reward': 0} simulation_epoch_size = params['simulation_epoch_size'] batch_size = params['batch_size'] # default = 16 warm_start = params['warm_start'] warm_start_epochs = params['warm_start_epochs'] success_rate_threshold = params['success_rate_threshold'] save_check_point = params['save_check_point'] """ Best Model and Performance Records """ best_model = {}
def __init__(self, movie_dict=None, act_set=None, slot_set=None, params=None): ## parameters associated with dialogue action and slot filling self.movie_dict = movie_dict self.act_set = act_set self.slot_set = slot_set self.act_cardinality = len(act_set.keys()) self.slot_cardinality = len(slot_set.keys()) self.feasible_actions = dialog_config.feasible_actions self.num_actions = len(self.feasible_actions) # rl specific parameters # epsilon: self.params = params self.epsilon = params['epsilon'] # self.agent_run_mode = params['agent_run_mode'] # self.agent_act_level = params['agent_act_level'] # experience replay # self.experience_replay_pool_size = params.get('experience_replay_pool_size', 1000) # self.experience_replay_pool = [] #Replay_Memory(self.experience_replay_pool_size) self.hidden_size = params.get('dqn_hidden_size', 60) # gamma : discount factor self.gamma = params.get('gamma', 0.99) self.predict_mode = params.get('predict_mode', False) self.actor_lr = params.get('actor_lr', 0.0005) self.critic_lr = params.get('critic_lr', 0.001) self.gan_critic_lr = params.get('gan_critic_lr', 0.001) self.discriminator_lr = params.get('discriminator_lr', 0.0005) self.discriminator_batch_size = params.get('discriminator_batch_size', 1) self.expert_path = params["expert_path"] self.reg_cost = self.params.get('reg_cost', 1e-3) ## warm start: ## there is no warm start since there are is no experience replay # self.warm_start = params.get('warm_start', 0) self.max_turn = params['max_turn'] + 4 self.state_dimension = 2 * self.act_cardinality + 7 * self.slot_cardinality + 3 + self.max_turn self.expert_weights = params['expert_weights'] # Build models self.build_actor_model(self.actor_lr) self.build_critic_model(self.critic_lr) self.build_critic_model(self.gan_critic_lr, True) self.build_discriminator(self.gan_critic_lr) self.n = params.get('n', 50) ## load a model if present if params['trained_model_path'] != None: self.load(params['trained_actor_model_path'], "actor") self.load(params['trained_critic_model_path'], "critic") self.load(params['trained_adversarial_critic_model_path'], "advesarial_critic") self.load(params['trained_discriminator_model_path'], "discriminator") self.predict_mode = True self.warm_start = 2 #self.expert = DQN(self.state_dimension, self.hidden_size, self.hidden_size, self.num_actions) self.expert = self.build_expert_model() # self.clone_dqn = copy.deepcopy(self.expert) # self.clone_dqn = keras.models.clone_model(self.expert) self.cur_bellman_err = 0 # Prediction Mode: load trained DQN model if params['expert_path'] != None: # self.dqn.model = model_from_json(params['expert_path']) # copy.deepcopy(self.load_trained_DQN(params['expert_path'])) # self.dqn.model.load_weights(params['expert_weights']) self.predict_mode = True self.warm_start = 2 user_sim = RuleSimulator(params['movie_dictionary'], params['act_set'], params['slot_set'], params['goal_set'], params['usersim_params']) self.dialog_manager = DialogManager(self.expert, user_sim, params['act_set'], params['slot_set'], params['movie_kb']) user_sim.set_nlg_model(params['nlg']) user_sim.set_nlu_model(params['nlu'])
cdir = "sessions/" + str(uid) + '_' + datetime.datetime.now().strftime( '%Y-%m-%d_%H-%M-%S') + "/" if not os.path.exists(cdir): os.makedirs(cdir) with open(os.path.join(cdir, 'credentials'), 'w') as f: f.write(uname) try: for i in range(N): print "-" * 200 + "\n第{0}次对话:".format(i) dia = [] curr_agent = agent dia.append(curr_agent) dialog_manager = DialogManager(curr_agent, user_sim, db_full, db_inc, movie_kb, verbose=False) utt = dialog_manager.initialize_episode() dia.append(copy.deepcopy(utt)) total_reward = 0 while (True): episode_over, reward, utt, agact = dialog_manager.next_turn() dia.append(agact) dia.append(copy.deepcopy(utt)) total_reward += reward if episode_over: break pkl.dump(dia, open(cdir + str(i) + ".p", 'w')) except KeyboardInterrupt: sys.exit()
pol_start=params['pol_start'], lr=params['lr'], N=params['featN'], tr=params['tr'], ts=params['ts'], frac=params['frac'], max_req=params['max_req'], upd=params['upd'], name=params['model_name']) else: print "Invalid agent!" sys.exit() dialog_manager = DialogManager(agent, user_sim, db_full, db_inc, movie_kb, verbose=False) dialog_manager_eval = DialogManager(agent_eval, user_sim, db_full, db_inc, movie_kb, verbose=False) def eval_agent(ite, max_perf, best=False): num_iter = 2000 nn = np.sqrt(num_iter) if best: agent_eval.load_model(dialog_config.MODEL_PATH + 'best_' +
################################################################################ # load trained NLU model ################################################################################ nlu_model_path = params['nlu_model_path'] nlu_model = nlu() nlu_model.load_nlu_model(nlu_model_path) agent.set_nlu_model(nlu_model) user_sim.set_nlu_model(nlu_model) world_model.set_nlu_model(nlu_model) ################################################################################ # Dialog Manager ################################################################################ dialog_manager = DialogManager(agent, user_sim, world_model, act_set, slot_set, movie_kb) ################################################################################ # Run num_episodes Conversation Simulations ################################################################################ status = {'successes': 0, 'count': 0, 'cumulative_reward': 0} simulation_epoch_size = params['simulation_epoch_size'] batch_size = params['batch_size'] # default = 16 warm_start = params['warm_start'] warm_start_epochs = params['warm_start_epochs'] planning_steps = params['planning_steps'] agent.planning_steps = planning_steps success_rate_threshold = params['success_rate_threshold']
user_sim_planning.set_nlg_model(nlg_model) ################################################################################ # load trained NLU model ################################################################################ nlu_model_path = params['nlu_model_path'] nlu_model = nlu() nlu_model.load_nlu_model(nlu_model_path) agent.set_nlu_model(nlu_model) user_sim.set_nlu_model(nlu_model) ################################################################################ # Dialog Manager ################################################################################ dialog_manager = DialogManager(agent, user_sim, user_sim_planning, act_set, slot_set, movie_kb, discriminator) ################################################################################ # Run num_episodes Conversation Simulations ################################################################################ status = {'successes': 0, 'count': 0, 'cumulative_reward': 0} simulation_epoch_size = params['simulation_epoch_size'] batch_size = params['batch_size'] # default = 16 warm_start = params['warm_start'] warm_start_epochs = params['warm_start_epochs'] planning_steps = params['planning_steps'] success_rate_threshold = params['success_rate_threshold'] save_check_point = params['save_check_point']
# load trained NLG model ################################################################################ agent.set_nlg_model(nlg_model) user_sim.set_nlg_model(nlg_model) ################################################################################ # load trained NLU model ################################################################################ agent.set_nlu_model(nlu_model) user_sim.set_nlu_model(nlu_model) ################################################################################ # Dialog Manager ################################################################################ dialog_manager = DialogManager(agent, user_sim, act_set, slot_set, movie_kb, params['is_a2c']) ################################################################################ # Run num_episodes Conversation Simulations ################################################################################ status = {'successes': 0, 'count': 0, 'cumulative_reward': 0} simulation_epoch_size = params['simulation_epoch_size'] batch_size = params['batch_size'] # default = 16 warm_start = params['warm_start'] warm_start_epochs = params['warm_start_epochs'] success_rate_threshold = params['success_rate_threshold'] save_check_point = params['save_check_point'] """ Best Model and Performance Records """ best_model = {}
else: curr_user_goals["all"] = [] curr_user_goals["all"].extend(copy.deepcopy(train_user_goals)) # create pretrain user simulator pretrain_user_sim = RuleSimulator(mock_dictionary, act_set, slot_set, copy.deepcopy(curr_user_goals), pretrain_usersim_params) # create not a pre-trained user simulator user_sim = RuleSimulator(mock_dictionary, act_set, slot_set, copy.deepcopy(curr_user_goals), usersim_params) # create the pre-trained agent pretrained_agent = AgentDQN(kb, act_set, slot_set, pretrained_agent_params) # create the agent from scratch agent = AgentDQN(kb, act_set, slot_set, agent_params) # create dialogue manager for pre-trained agent pretrain_dialog_manager = DialogManager(pretrained_agent, pretrain_user_sim, act_set, slot_set, kb) # create dialogue manager for not pre-trained agent dialog_manager = DialogManager(agent, user_sim, act_set, slot_set, kb) # the warmup success rate of the pre-trained model pretrain_warmup_succ_rate = 0 # the warmup success rate of the scratch model warmup_succ_rate = 0 # warm-start the pre-trained agent pretrain_warmup_res = warm_start_simulation(pretrain_dialog_manager, pretrained_agent, copy.deepcopy(curr_user_goals["all"]), 2, 8, "pretrain") # warm-start the agent from scartch