Esempio n. 1
0
user_sim.set_nlg_model(nlg_model)

################################################################################
# load trained NLU model
################################################################################
nlu_model_path = params['nlu_model_path']
nlu_model = nlu()
nlu_model.load_nlu_model(nlu_model_path)

agent.set_nlu_model(nlu_model)
user_sim.set_nlu_model(nlu_model)

################################################################################
# Dialog Manager
################################################################################
dialog_manager = DialogManager(agent, user_sim, act_set, slot_set, kb)

################################################################################
#   Run num_episodes Conversation Simulations
################################################################################
status = {'successes': 0, 'count': 0, 'cumulative_reward': 0}

simulation_epoch_size = params['simulation_epoch_size']
batch_size = params['batch_size']  # default = 16
warm_start = params['warm_start']
warm_start_epochs = params['warm_start_epochs']

success_rate_threshold = params['success_rate_threshold']
save_check_point = params['save_check_point']
""" Best Model and Performance Records """
best_model = {}
	def __init__(self, movie_dict=None, act_set=None, slot_set=None, params=None):

		## parameters associated with dialogue action and slot filling
		self.movie_dict = movie_dict
		self.act_set = act_set
		self.slot_set = slot_set
		self.act_cardinality = len(act_set.keys())
		self.slot_cardinality = len(slot_set.keys())

		self.feasible_actions = dialog_config.feasible_actions
		self.num_actions = len(self.feasible_actions)

		# rl specific parameters
		# epsilon:
		self.params = params
		self.epsilon = params['epsilon']
		#
		self.agent_run_mode = params['agent_run_mode']
		#
		self.agent_act_level = params['agent_act_level']
		# experience replay
		# self.experience_replay_pool_size = params.get('experience_replay_pool_size', 1000)
		# self.experience_replay_pool = [] #Replay_Memory(self.experience_replay_pool_size)
		self.hidden_size = params.get('dqn_hidden_size', 60)
		# gamma : discount factor
		self.gamma = params.get('gamma', 0.99)
		self.predict_mode = params.get('predict_mode', False)
		self.actor_lr = params.get('actor_lr', 0.0005)
		self.critic_lr = params.get('critic_lr', 0.001)
		self.gan_critic_lr = params.get('gan_critic_lr', 0.001)
		self.discriminator_lr = params.get('discriminator_lr', 0.0005)
		self.discriminator_batch_size = params.get('discriminator_batch_size', 1)
		self.expert_path = params["expert_path"]
                self.reg_cost = self.params.get('reg_cost', 1e-3)

		## warm start:
		## there is no warm start since there are is no experience replay
		# self.warm_start = params.get('warm_start', 0)

		self.max_turn = params['max_turn'] + 4
		self.state_dimension = 2 * self.act_cardinality + 7 * self.slot_cardinality + 3 + self.max_turn
                self.expert_weights = params['expert_weights']
		# Build models
		self.build_actor_model(self.actor_lr)
		self.build_critic_model(self.critic_lr)
		self.build_critic_model(self.gan_critic_lr, True)
		self.build_discriminator(self.gan_critic_lr)
		self.n = params.get('n', 50)

		## load a model if present
		if params['trained_model_path'] != None:
			self.load(params['trained_actor_model_path'], "actor")
			self.load(params['trained_critic_model_path'], "critic")
			self.load(params['trained_adversarial_critic_model_path'], "advesarial_critic")
			self.load(params['trained_discriminator_model_path'], "discriminator")
			self.predict_mode = True
			self.warm_start = 2
		#self.expert = DQN(self.state_dimension, self.hidden_size, self.hidden_size, self.num_actions)
		self.expert = self.build_expert_model()
		# self.clone_dqn = copy.deepcopy(self.expert)
		# self.clone_dqn = keras.models.clone_model(self.expert)
		self.cur_bellman_err = 0
			
		# Prediction Mode: load trained DQN model
                if params['expert_path'] != None:
		    # self.dqn.model = model_from_json(params['expert_path'])
                    # copy.deepcopy(self.load_trained_DQN(params['expert_path']))
		    # self.dqn.model.load_weights(params['expert_weights'])
		    self.predict_mode = True
		    self.warm_start = 2
                user_sim = RuleSimulator(params['movie_dictionary'], 
                        params['act_set'], 
                        params['slot_set'], 
                        params['goal_set'], 
                        params['usersim_params'])
                self.dialog_manager = DialogManager(self.expert, 
                        user_sim, 
                        params['act_set'], 
                        params['slot_set'], 
                        params['movie_kb'])
                user_sim.set_nlg_model(params['nlg'])
                user_sim.set_nlu_model(params['nlu'])
Esempio n. 3
0
cdir = "sessions/" + str(uid) + '_' + datetime.datetime.now().strftime(
    '%Y-%m-%d_%H-%M-%S') + "/"
if not os.path.exists(cdir):
    os.makedirs(cdir)
with open(os.path.join(cdir, 'credentials'), 'w') as f:
    f.write(uname)
try:
    for i in range(N):
        print "-" * 200 + "\n第{0}次对话:".format(i)
        dia = []
        curr_agent = agent
        dia.append(curr_agent)
        dialog_manager = DialogManager(curr_agent,
                                       user_sim,
                                       db_full,
                                       db_inc,
                                       movie_kb,
                                       verbose=False)
        utt = dialog_manager.initialize_episode()
        dia.append(copy.deepcopy(utt))
        total_reward = 0
        while (True):
            episode_over, reward, utt, agact = dialog_manager.next_turn()
            dia.append(agact)
            dia.append(copy.deepcopy(utt))
            total_reward += reward
            if episode_over:
                break
        pkl.dump(dia, open(cdir + str(i) + ".p", 'w'))
except KeyboardInterrupt:
    sys.exit()
Esempio n. 4
0
                                  pol_start=params['pol_start'],
                                  lr=params['lr'],
                                  N=params['featN'],
                                  tr=params['tr'],
                                  ts=params['ts'],
                                  frac=params['frac'],
                                  max_req=params['max_req'],
                                  upd=params['upd'],
                                  name=params['model_name'])
else:
    print "Invalid agent!"
    sys.exit()

dialog_manager = DialogManager(agent,
                               user_sim,
                               db_full,
                               db_inc,
                               movie_kb,
                               verbose=False)
dialog_manager_eval = DialogManager(agent_eval,
                                    user_sim,
                                    db_full,
                                    db_inc,
                                    movie_kb,
                                    verbose=False)


def eval_agent(ite, max_perf, best=False):
    num_iter = 2000
    nn = np.sqrt(num_iter)
    if best:
        agent_eval.load_model(dialog_config.MODEL_PATH + 'best_' +
Esempio n. 5
0
File: run.py Progetto: zxsted/DDQ
################################################################################
# load trained NLU model
################################################################################
nlu_model_path = params['nlu_model_path']
nlu_model = nlu()
nlu_model.load_nlu_model(nlu_model_path)

agent.set_nlu_model(nlu_model)
user_sim.set_nlu_model(nlu_model)
world_model.set_nlu_model(nlu_model)

################################################################################
# Dialog Manager
################################################################################
dialog_manager = DialogManager(agent, user_sim, world_model, act_set, slot_set, movie_kb)

################################################################################
#   Run num_episodes Conversation Simulations
################################################################################
status = {'successes': 0, 'count': 0, 'cumulative_reward': 0}

simulation_epoch_size = params['simulation_epoch_size']
batch_size = params['batch_size']  # default = 16
warm_start = params['warm_start']
warm_start_epochs = params['warm_start_epochs']
planning_steps = params['planning_steps']

agent.planning_steps = planning_steps

success_rate_threshold = params['success_rate_threshold']
Esempio n. 6
0
user_sim_planning.set_nlg_model(nlg_model)

################################################################################
# load trained NLU model
################################################################################
nlu_model_path = params['nlu_model_path']
nlu_model = nlu()
nlu_model.load_nlu_model(nlu_model_path)

agent.set_nlu_model(nlu_model)
user_sim.set_nlu_model(nlu_model)

################################################################################
# Dialog Manager
################################################################################
dialog_manager = DialogManager(agent, user_sim, user_sim_planning, act_set, slot_set, movie_kb, discriminator)

################################################################################
#   Run num_episodes Conversation Simulations
################################################################################
status = {'successes': 0, 'count': 0, 'cumulative_reward': 0}

simulation_epoch_size = params['simulation_epoch_size']
batch_size = params['batch_size'] # default = 16
warm_start = params['warm_start']
warm_start_epochs = params['warm_start_epochs']
planning_steps = params['planning_steps']

success_rate_threshold = params['success_rate_threshold']
save_check_point = params['save_check_point']
Esempio n. 7
0
# load trained NLG model
################################################################################
agent.set_nlg_model(nlg_model)
user_sim.set_nlg_model(nlg_model)

################################################################################
# load trained NLU model
################################################################################

agent.set_nlu_model(nlu_model)
user_sim.set_nlu_model(nlu_model)

################################################################################
# Dialog Manager
################################################################################
dialog_manager = DialogManager(agent, user_sim, act_set, slot_set, movie_kb,
                               params['is_a2c'])

################################################################################
#   Run num_episodes Conversation Simulations
################################################################################
status = {'successes': 0, 'count': 0, 'cumulative_reward': 0}

simulation_epoch_size = params['simulation_epoch_size']
batch_size = params['batch_size']  # default = 16
warm_start = params['warm_start']
warm_start_epochs = params['warm_start_epochs']

success_rate_threshold = params['success_rate_threshold']
save_check_point = params['save_check_point']
""" Best Model and Performance Records """
best_model = {}
Esempio n. 8
0
        else:
            curr_user_goals["all"] = []
            curr_user_goals["all"].extend(copy.deepcopy(train_user_goals))

        # create pretrain user simulator
        pretrain_user_sim = RuleSimulator(mock_dictionary, act_set, slot_set, copy.deepcopy(curr_user_goals), pretrain_usersim_params)
        # create not a pre-trained user simulator
        user_sim = RuleSimulator(mock_dictionary, act_set, slot_set, copy.deepcopy(curr_user_goals), usersim_params)

        # create the pre-trained agent
        pretrained_agent = AgentDQN(kb, act_set, slot_set, pretrained_agent_params)
        # create the agent from scratch
        agent = AgentDQN(kb, act_set, slot_set, agent_params)

        # create dialogue manager for pre-trained agent
        pretrain_dialog_manager = DialogManager(pretrained_agent, pretrain_user_sim, act_set, slot_set, kb)

        # create dialogue manager for not pre-trained agent
        dialog_manager = DialogManager(agent, user_sim, act_set, slot_set, kb)

        # the warmup success rate of the pre-trained model
        pretrain_warmup_succ_rate = 0
        # the warmup success rate of the scratch model
        warmup_succ_rate = 0


        # warm-start the pre-trained agent
        pretrain_warmup_res = warm_start_simulation(pretrain_dialog_manager, pretrained_agent, copy.deepcopy(curr_user_goals["all"]), 2, 8, "pretrain")


        # warm-start the agent from scartch