def main(): start_time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) print('[START]', start_time, '=' * 30) # RL configuration env = 'gpu' pretrained_folder = '2019-06-20-22-49-55-sl_cat' pretrained_model_id = 41 exp_dir = os.path.join('sys_config_log_model', pretrained_folder, 'rl-' + start_time) # create exp folder if not os.path.exists(exp_dir): os.mkdir(exp_dir) rl_config = Pack( train_path='../data/norm-multi-woz/train_dials.json', valid_path='../data/norm-multi-woz/val_dials.json', test_path='../data/norm-multi-woz/test_dials.json', sv_config_path=os.path.join('sys_config_log_model', pretrained_folder, 'config.json'), sv_model_path=os.path.join('sys_config_log_model', pretrained_folder, '{}-model'.format(pretrained_model_id)), rl_config_path=os.path.join(exp_dir, 'rl_config.json'), rl_model_path=os.path.join(exp_dir, 'rl_model'), ppl_best_model_path=os.path.join(exp_dir, 'ppl_best.model'), reward_best_model_path=os.path.join(exp_dir, 'reward_best.model'), record_path=exp_dir, record_freq=200, sv_train_freq= 0, # TODO pay attention to main.py, cuz it is also controlled there use_gpu=env == 'gpu', nepoch=10, nepisode=0, tune_pi_only=False, max_words=100, temperature=1.0, episode_repeat=1.0, rl_lr=0.01, momentum=0.0, nesterov=False, gamma=0.99, rl_clip=5.0, random_seed=100, ) # save configuration with open(rl_config.rl_config_path, 'w') as f: json.dump(rl_config, f, indent=4) # set random seed set_seed(rl_config.random_seed) # load previous supervised learning configuration and corpus sv_config = Pack(json.load(open(rl_config.sv_config_path))) sv_config['dropout'] = 0.0 sv_config['use_gpu'] = rl_config.use_gpu corpus = NormMultiWozCorpus(sv_config) # TARGET AGENT sys_model = SysPerfectBD2Cat(corpus, sv_config) if sv_config.use_gpu: sys_model.cuda() sys_model.load_state_dict( th.load(rl_config.sv_model_path, map_location=lambda storage, location: storage)) sys_model.eval() sys = OfflineLatentRlAgent(sys_model, corpus, rl_config, name='System', tune_pi_only=rl_config.tune_pi_only) # start RL reinforce = OfflineTaskReinforce(sys, corpus, sv_config, sys_model, rl_config, task_generate) reinforce.run() end_time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) print('[END]', end_time, '=' * 30)
improve_threshold = 0.996, patient_increase = 2.0, save_model = True, early_stop = False, gen_type = 'greedy', preview_batch_num = 1, max_dec_len = 40, k = domain_info.input_length(), goal_embed_size = 64, goal_nhid = 64, init_range = 0.1, pretrain_folder = '2018-11-19-21-28-29-sl_latent', forward_only = False ) set_seed(10) if config.forward_only: saved_path = os.path.join(stats_path, config.pretrain_folder) config = Pack(json.load(open(os.path.join(saved_path, 'config.json')))) config['forward_only'] = True else: saved_path = os.path.join(stats_path, start_time+'-'+os.path.basename(__file__).split('.')[0]) if not os.path.exists(saved_path): os.mkdir(saved_path) config.saved_path = saved_path prepare_dirs_loggers(config) logger = logging.getLogger() logger.info('[START]\n{}\n{}'.format(start_time, '='*30))
def main(): start_time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) print('[START]', start_time, '=' * 30) # RL configuration folder = '2019-06-20-10-24-23-sl_gauss' epoch_id = '28' env = 'gpu' sim_epoch_id = '23' simulator_folder = '2019-06-20-09-19-39-sl_word' exp_dir = os.path.join('config_log_model', folder, 'rl-' + start_time) if not os.path.exists(exp_dir): os.mkdir(exp_dir) rl_config = Pack( train_path='../data/negotiate/train.txt', val_path='../data/negotiate/val.txt', test_path='../data/negotiate/test.txt', selfplay_path='../data/negotiate/selfplay.txt', selfplay_eval_path='../data/negotiate/selfplay_eval.txt', sim_config_path=os.path.join('config_log_model', simulator_folder, 'config.json'), sim_model_path=os.path.join('config_log_model', simulator_folder, '{}-model'.format(sim_epoch_id)), sv_config_path=os.path.join('config_log_model', folder, 'config.json'), sv_model_path=os.path.join('config_log_model', folder, '{}-model'.format(epoch_id)), rl_config_path=os.path.join(exp_dir, 'rl_config.json'), rl_model_path=os.path.join(exp_dir, 'rl_model'), ppl_best_model_path=os.path.join(exp_dir, 'ppl_best_model'), reward_best_model_path=os.path.join(exp_dir, 'reward_best_model'), judger_model_path=os.path.join('../FB', 'sv_model.th'), judger_config_path=os.path.join('../FB', 'judger_config.json'), record_path=exp_dir, record_freq=50, use_gpu=env == 'gpu', nepoch=4, nepisode=0, sv_train_freq= 0, # TODO pay attention to main.py, cuz it is also controlled there eval_freq=0, max_words=100, rl_lr=0.2, momentum=0.1, nesterov=True, gamma=0.95, rl_clip=1.0, ref_text='../data/negotiate/train.txt', domain='object_division', max_nego_turn=50, random_seed=0, use_latent_rl=True) # save configuration with open(rl_config.rl_config_path, 'w') as f: json.dump(rl_config, f, indent=4) # set random seed set_seed(rl_config.random_seed) # load previous supervised learning configuration and corpus sv_config = Pack(json.load(open(rl_config.sv_config_path))) sim_config = Pack(json.load(open(rl_config.sim_config_path))) # TODO revise the use_gpu in the config sv_config['use_gpu'] = rl_config.use_gpu sim_config['use_gpu'] = rl_config.use_gpu corpus = DealCorpus(sv_config) # load models for two agents # TARGET AGENT sys_model = models_deal.GaussHRED(corpus, sv_config) if sv_config.use_gpu: # TODO gpu -> cpu transfer sys_model.cuda() sys_model.load_state_dict( th.load(rl_config.sv_model_path, map_location=lambda storage, location: storage)) # we don't want to use Dropout during RL sys_model.eval() sys = LatentRlAgent(sys_model, corpus, rl_config, name='System', use_latent_rl=rl_config.use_latent_rl) # SIMULATOR we keep usr frozen, i.e. we don't update its parameters usr_model = models_deal.HRED(corpus, sim_config) if sim_config.use_gpu: # TODO gpu -> cpu transfer usr_model.cuda() usr_model.load_state_dict( th.load(rl_config.sim_model_path, map_location=lambda storage, location: storage)) usr_model.eval() usr_type = LstmAgent usr = usr_type(usr_model, corpus, rl_config, name='User') # load FB judger model # load FB judger model judger_config = Pack(json.load(open(rl_config.judger_config_path))) judger_config['cuda'] = rl_config.use_gpu judger_config['data'] = '../data/negotiate' judger_device_id = FB_use_cuda(judger_config.cuda) judger_word_corpus = FbWordCorpus(judger_config.data, freq_cutoff=judger_config.unk_threshold, verbose=True) judger_model = FbDialogModel(judger_word_corpus.word_dict, judger_word_corpus.item_dict, judger_word_corpus.context_dict, judger_word_corpus.output_length, judger_config, judger_device_id) if judger_device_id is not None: judger_model.cuda(judger_device_id) judger_model.load_state_dict( th.load(rl_config.judger_model_path, map_location=lambda storage, location: storage)) judger_model.eval() judger = Judger(judger_model, judger_device_id) # initialize communication dialogue between two agents dialog = Dialog([sys, usr], judger, rl_config) ctx_gen = ContextGenerator(rl_config.selfplay_path) # simulation module dialog_eval = DialogEval([sys, usr], judger, rl_config) ctx_gen_eval = ContextGeneratorEval(rl_config.selfplay_eval_path) # start RL reinforce = Reinforce(dialog, ctx_gen, corpus, sv_config, sys_model, usr_model, rl_config, dialog_eval, ctx_gen_eval) reinforce.run() # save sys model th.save(sys_model.state_dict(), rl_config.rl_model_path) end_time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) print('[END]', end_time, '=' * 30)
fix_batch=True, fix_train_batch=False, avg_type='word', print_step=300, ckpt_step=1416, improve_threshold=0.996, patient_increase=2.0, save_model=True, early_stop=False, gen_type='greedy', preview_batch_num=None, k=domain_info.input_length(), init_range=0.1, pretrain_folder='2019-06-20-21-43-06-sl_cat', forward_only=False) set_seed(config.seed) start_time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) stats_path = 'sys_config_log_model' if config.forward_only: saved_path = os.path.join(stats_path, config.pretrain_folder) config = Pack(json.load(open(os.path.join(saved_path, 'config.json')))) config['forward_only'] = True else: saved_path = os.path.join( stats_path, start_time + '-' + os.path.basename(__file__).split('.')[0]) if not os.path.exists(saved_path): os.makedirs(saved_path) config.saved_path = saved_path prepare_dirs_loggers(config)
#forward_only = True, # different batching style seq=True, # use oracle context and proposal parse oracle_context=True, #oracle_context = False, #oracle_parse = False, oracle_parse=True, semisupervised=False, #prop_weight = 0.1, prop_weight=1, #prop_weight = 0, tie_prop_utt_enc=False, ) set_seed(config.random_seed) if config.forward_only: saved_path = os.path.join(stats_path, config.pretrain_folder) config = Pack(json.load(open(os.path.join(saved_path, 'config.json')))) config['forward_only'] = True else: saved_path = os.path.join( stats_path, start_time + '-' + os.path.basename(__file__).split('.')[0]) if not os.path.exists(saved_path): os.mkdir(saved_path) config.saved_path = saved_path prepare_dirs_loggers(config)