def __init__(self, movie_dict=None, act_set=None, slot_set=None, params=None):

		## parameters associated with dialogue action and slot filling
		self.movie_dict = movie_dict
		self.act_set = act_set
		self.slot_set = slot_set
		self.act_cardinality = len(act_set.keys())
		self.slot_cardinality = len(slot_set.keys())

		self.feasible_actions = dialog_config.feasible_actions
		self.num_actions = len(self.feasible_actions)

		# rl specific parameters
		# epsilon:
		self.params = params
		self.epsilon = params['epsilon']
		#
		self.agent_run_mode = params['agent_run_mode']
		#
		self.agent_act_level = params['agent_act_level']
		# experience replay
		# self.experience_replay_pool_size = params.get('experience_replay_pool_size', 1000)
		# self.experience_replay_pool = [] #Replay_Memory(self.experience_replay_pool_size)
		self.hidden_size = params.get('dqn_hidden_size', 60)
		# gamma : discount factor
		self.gamma = params.get('gamma', 0.99)
		self.predict_mode = params.get('predict_mode', False)
		self.actor_lr = params.get('actor_lr', 0.0005)
		self.critic_lr = params.get('critic_lr', 0.001)
		self.gan_critic_lr = params.get('gan_critic_lr', 0.001)
		self.discriminator_lr = params.get('discriminator_lr', 0.0005)
		self.discriminator_batch_size = params.get('discriminator_batch_size', 1)
		self.expert_path = params["expert_path"]
                self.reg_cost = self.params.get('reg_cost', 1e-3)

		## warm start:
		## there is no warm start since there are is no experience replay
		# self.warm_start = params.get('warm_start', 0)

		self.max_turn = params['max_turn'] + 4
		self.state_dimension = 2 * self.act_cardinality + 7 * self.slot_cardinality + 3 + self.max_turn
                self.expert_weights = params['expert_weights']
		# Build models
		self.build_actor_model(self.actor_lr)
		self.build_critic_model(self.critic_lr)
		self.build_critic_model(self.gan_critic_lr, True)
		self.build_discriminator(self.gan_critic_lr)
		self.n = params.get('n', 50)

		## load a model if present
		if params['trained_model_path'] != None:
			self.load(params['trained_actor_model_path'], "actor")
			self.load(params['trained_critic_model_path'], "critic")
			self.load(params['trained_adversarial_critic_model_path'], "advesarial_critic")
			self.load(params['trained_discriminator_model_path'], "discriminator")
			self.predict_mode = True
			self.warm_start = 2
		#self.expert = DQN(self.state_dimension, self.hidden_size, self.hidden_size, self.num_actions)
		self.expert = self.build_expert_model()
		# self.clone_dqn = copy.deepcopy(self.expert)
		# self.clone_dqn = keras.models.clone_model(self.expert)
		self.cur_bellman_err = 0
			
		# Prediction Mode: load trained DQN model
                if params['expert_path'] != None:
		    # self.dqn.model = model_from_json(params['expert_path'])
                    # copy.deepcopy(self.load_trained_DQN(params['expert_path']))
		    # self.dqn.model.load_weights(params['expert_weights'])
		    self.predict_mode = True
		    self.warm_start = 2
                user_sim = RuleSimulator(params['movie_dictionary'], 
                        params['act_set'], 
                        params['slot_set'], 
                        params['goal_set'], 
                        params['usersim_params'])
                self.dialog_manager = DialogManager(self.expert, 
                        user_sim, 
                        params['act_set'], 
                        params['slot_set'], 
                        params['movie_kb'])
                user_sim.set_nlg_model(params['nlg'])
                user_sim.set_nlu_model(params['nlu'])
Beispiel #2
0
nlg_model.load_nlg_model(nlg_model_path)
nlg_model.load_predefine_act_nl_pairs(diaact_nl_pairs)

agent.set_nlg_model(nlg_model)
user_sim.set_nlg_model(nlg_model)
world_model.set_nlg_model(nlg_model)

################################################################################
# load trained NLU model
################################################################################
nlu_model_path = params['nlu_model_path']
nlu_model = nlu()
nlu_model.load_nlu_model(nlu_model_path)

agent.set_nlu_model(nlu_model)
user_sim.set_nlu_model(nlu_model)
world_model.set_nlu_model(nlu_model)

################################################################################
# Dialog Manager
################################################################################
dialog_manager = DialogManager(agent, user_sim, world_model, act_set, slot_set, movie_kb)

################################################################################
#   Run num_episodes Conversation Simulations
################################################################################
status = {'successes': 0, 'count': 0, 'cumulative_reward': 0}

simulation_epoch_size = params['simulation_epoch_size']
batch_size = params['batch_size']  # default = 16
warm_start = params['warm_start']