def __init__(self, movie_dict=None, act_set=None, slot_set=None, params=None): ## parameters associated with dialogue action and slot filling self.movie_dict = movie_dict self.act_set = act_set self.slot_set = slot_set self.act_cardinality = len(act_set.keys()) self.slot_cardinality = len(slot_set.keys()) self.feasible_actions = dialog_config.feasible_actions self.num_actions = len(self.feasible_actions) # rl specific parameters # epsilon: self.params = params self.epsilon = params['epsilon'] # self.agent_run_mode = params['agent_run_mode'] # self.agent_act_level = params['agent_act_level'] # experience replay # self.experience_replay_pool_size = params.get('experience_replay_pool_size', 1000) # self.experience_replay_pool = [] #Replay_Memory(self.experience_replay_pool_size) self.hidden_size = params.get('dqn_hidden_size', 60) # gamma : discount factor self.gamma = params.get('gamma', 0.99) self.predict_mode = params.get('predict_mode', False) self.actor_lr = params.get('actor_lr', 0.0005) self.critic_lr = params.get('critic_lr', 0.001) self.gan_critic_lr = params.get('gan_critic_lr', 0.001) self.discriminator_lr = params.get('discriminator_lr', 0.0005) self.discriminator_batch_size = params.get('discriminator_batch_size', 1) self.expert_path = params["expert_path"] self.reg_cost = self.params.get('reg_cost', 1e-3) ## warm start: ## there is no warm start since there are is no experience replay # self.warm_start = params.get('warm_start', 0) self.max_turn = params['max_turn'] + 4 self.state_dimension = 2 * self.act_cardinality + 7 * self.slot_cardinality + 3 + self.max_turn self.expert_weights = params['expert_weights'] # Build models self.build_actor_model(self.actor_lr) self.build_critic_model(self.critic_lr) self.build_critic_model(self.gan_critic_lr, True) self.build_discriminator(self.gan_critic_lr) self.n = params.get('n', 50) ## load a model if present if params['trained_model_path'] != None: self.load(params['trained_actor_model_path'], "actor") self.load(params['trained_critic_model_path'], "critic") self.load(params['trained_adversarial_critic_model_path'], "advesarial_critic") self.load(params['trained_discriminator_model_path'], "discriminator") self.predict_mode = True self.warm_start = 2 #self.expert = DQN(self.state_dimension, self.hidden_size, self.hidden_size, self.num_actions) self.expert = self.build_expert_model() # self.clone_dqn = copy.deepcopy(self.expert) # self.clone_dqn = keras.models.clone_model(self.expert) self.cur_bellman_err = 0 # Prediction Mode: load trained DQN model if params['expert_path'] != None: # self.dqn.model = model_from_json(params['expert_path']) # copy.deepcopy(self.load_trained_DQN(params['expert_path'])) # self.dqn.model.load_weights(params['expert_weights']) self.predict_mode = True self.warm_start = 2 user_sim = RuleSimulator(params['movie_dictionary'], params['act_set'], params['slot_set'], params['goal_set'], params['usersim_params']) self.dialog_manager = DialogManager(self.expert, user_sim, params['act_set'], params['slot_set'], params['movie_kb']) user_sim.set_nlg_model(params['nlg']) user_sim.set_nlu_model(params['nlu'])
# Parameters for User Simulators ################################################################################ usersim_params = {} usersim_params['max_turn'] = max_turn usersim_params['slot_err_probability'] = params['slot_err_prob'] usersim_params['slot_err_mode'] = params['slot_err_mode'] usersim_params['intent_err_probability'] = params['intent_err_prob'] usersim_params['simulator_run_mode'] = params['run_mode'] usersim_params['simulator_act_level'] = params['act_level'] usersim_params['learning_phase'] = params['learning_phase'] if usr == 0: # real user user_sim = RealUser(movie_dictionary, act_set, slot_set, goal_set, usersim_params) elif usr == 1: # movie simulator user_sim = RuleSimulator(movie_dictionary, act_set, slot_set, goal_set, usersim_params) elif usr == 2: # restaurant simulator user_sim = RuleRestaurantSimulator(movie_dictionary, act_set, slot_set, goal_set, usersim_params) elif usr == 3: # taxi simulator user_sim = RuleTaxiSimulator(movie_dictionary, act_set, slot_set, goal_set, usersim_params) ################################################################################ # Add your user simulator here ################################################################################ else: pass ################################################################################ # load trained NLG model
act_set = DictReader() act_set.load_dict_from_file(params['act_set']) slot_set = SlotReader(slot_path) movie_kb = MovieDict(dict_path) db_full = Database(db_full_path, movie_kb, name=params['dataset']) db_inc = Database(db_inc_path, movie_kb, name='incomplete%.2f_' % params['unk'] + params['dataset']) nlg = S2SNLG(template_path, params['nlg_slots_path'], params['nlg_model_path'], params['nlg_temp']) user_sim = RuleSimulator(movie_kb, act_set, slot_set, None, max_turn, nlg, err_prob, db_full, \ 1.-dk_prob, sub_prob=params['sub_prob'], max_first_turn=params['max_first_turn']) if agent_type == 'simple-rl-soft': agent = AgentSimpleRLAllAct(movie_kb, act_set, slot_set, db_inc, _reload=_reload, n_hid=params['nhid'], batch=params['batch'], ment=params['ment'], inputtype=params['input'], pol_start=params['pol_start'], lr=params['lr'], upd=params['upd'], tr=params['tr'],
################################################################################ # Parameters for User Simulators ################################################################################ usersim_params = {} usersim_params['max_turn'] = max_turn usersim_params['slot_err_probability'] = params['slot_err_prob'] usersim_params['slot_err_mode'] = params['slot_err_mode'] usersim_params['intent_err_probability'] = params['intent_err_prob'] usersim_params['simulator_run_mode'] = params['run_mode'] usersim_params['simulator_act_level'] = params['act_level'] usersim_params['learning_phase'] = params['learning_phase'] if usr == 0: # real user user_sim = RealUser(movie_dictionary, act_set, slot_set, usersim_params) elif usr == 1: user_sim = RuleSimulator(selfPlay, movie_dictionary, act_set, slot_set, usersim_params) ################################################################################ # Add your user simulator here ################################################################################ else: pass ################################################################################ # load trained NLG model ################################################################################ nlg_model_path = params['nlg_model_path'] diaact_nl_pairs = params['diaact_nl_pairs'] nlg_model = nlg() nlg_model.load_nlg_model(nlg_model_path) nlg_model.load_predefine_act_nl_pairs(diaact_nl_pairs)
################################################################################ usersim_params = {} usersim_params['max_turn'] = max_turn usersim_params['slot_err_probability'] = params['slot_err_prob'] usersim_params['slot_err_mode'] = params['slot_err_mode'] usersim_params['intent_err_probability'] = params['intent_err_prob'] usersim_params['simulator_run_mode'] = params['run_mode'] usersim_params['simulator_act_level'] = params['act_level'] usersim_params['learning_phase'] = params['learning_phase'] usersim_params['hidden_size'] = params['dqn_hidden_size'] #if usr == 0: # real user # user_sim = RealUser(movie_dictionary, act_set, slot_set, goal_set, usersim_params) #elif usr == 1: if usr == 1: user_sim = RuleSimulator(movie_dictionary, act_set, slot_set, goal_set, usersim_params) world_model = ModelBasedSimulator(movie_dictionary, act_set, slot_set, goal_set, usersim_params) agent.set_user_planning(world_model) # elif usr == 2: # user_sim = ModelBasedSimulator(movie_dictionary, act_set, slot_set, goal_set, usersim_params) ################################################################################ # Add your user simulator here ################################################################################ else: pass ################################################################################ # load trained NLG model ################################################################################ nlg_model_path = params['nlg_model_path']
################################################################################ # Parameters for User Simulators ################################################################################ usersim_params = {} usersim_params['max_turn'] = max_turn usersim_params['slot_err_probability'] = params['slot_err_prob'] usersim_params['slot_err_mode'] = params['slot_err_mode'] usersim_params['intent_err_probability'] = params['intent_err_prob'] usersim_params['simulator_run_mode'] = params['run_mode'] usersim_params['simulator_act_level'] = params['act_level'] usersim_params['learning_phase'] = params['learning_phase'] #if usr == 0:# real user #user_sim = RealUser(movie_dictionary, act_set, slot_set, goal_set, usersim_params) if usr == 1: user_sim = RuleSimulator(movie_dictionary, act_set, slot_set, goal_set, usersim_params) ################################################################################ # Add your user simulator here ################################################################################ else: pass user_sim = RuleSimulator( movie_dictionary, act_set, slot_set, goal_set, usersim_params) # added by jose remove it......dont know proper working ################################################################################ # load trained NLG model ################################################################################ nlg_model_path = params['nlg_model_path'] diaact_nl_pairs = params['diaact_nl_pairs']
################################################################################ # randomly select a set of user goals curr_user_goals = {} if num_goals != 120: curr_user_goals = select_random_user_goals(train_user_goals_no_req_slots, train_user_goals_with_req_slots, training_user_goals_no_req_cardinality[i], training_user_goals_req_cardinality[i]) else: curr_user_goals["all"] = [] curr_user_goals["all"].extend(copy.deepcopy(train_user_goals)) # create pretrain user simulator pretrain_user_sim = RuleSimulator(mock_dictionary, act_set, slot_set, copy.deepcopy(curr_user_goals), pretrain_usersim_params) # create not a pre-trained user simulator user_sim = RuleSimulator(mock_dictionary, act_set, slot_set, copy.deepcopy(curr_user_goals), usersim_params) # create the pre-trained agent pretrained_agent = AgentDQN(kb, act_set, slot_set, pretrained_agent_params) # create the agent from scratch agent = AgentDQN(kb, act_set, slot_set, agent_params) # create dialogue manager for pre-trained agent pretrain_dialog_manager = DialogManager(pretrained_agent, pretrain_user_sim, act_set, slot_set, kb) # create dialogue manager for not pre-trained agent dialog_manager = DialogManager(agent, user_sim, act_set, slot_set, kb) # the warmup success rate of the pre-trained model