def run_rl(self): '''Run the main RL loop until clock.max_frame''' logger.info( f'Running RL loop for trial {self.spec["meta"]["trial"]} session {self.index}' ) clock = self.env.clock obs = self.env.reset() clock.tick('t') self.agent.reset(obs) done = False while True: if util.epi_done(done): # before starting another episode logger.nl(f'A dialog session is done') self.try_ckpt(self.agent, self.env) if clock.get() < clock.max_frame: # reset and continue clock.tick('epi') obs = self.env.reset() self.agent.reset(obs) done = False self.try_ckpt(self.agent, self.env) if clock.get() >= clock.max_frame: # finish break clock.tick('t') action = self.agent.act(obs) next_obs, reward, done, info = self.env.step(action) self.agent.update(obs, action, reward, next_obs, done) obs = next_obs
def read_spec_and_run(spec_file, spec_name, lab_mode): '''Read a spec and run it in lab mode''' logger.info( f'Running lab spec_file:{spec_file} spec_name:{spec_name} in mode:{lab_mode}' ) if lab_mode in TRAIN_MODES: spec = spec_util.get(spec_file, spec_name) else: # eval mode if '@' in lab_mode: lab_mode, prename = lab_mode.split('@') spec = spec_util.get_eval_spec(spec_file, spec_name, prename) else: spec = spec_util.get(spec_file, spec_name) if 'spec_params' not in spec: run_spec(spec, lab_mode) else: # spec is parametrized; run them in parallel param_specs = spec_util.get_param_specs(spec) num_pro = spec['meta']['param_spec_process'] # can't use Pool since it cannot spawn nested Process, which is needed for VecEnv and parallel sessions. So these will run and wait by chunks workers = [ mp.Process(target=run_spec, args=(spec, lab_mode)) for spec in param_specs ] for chunk_w in ps.chunk(workers, num_pro): for w in chunk_w: w.start() for w in chunk_w: w.join()
def init_global_nets(algorithm): ''' Initialize global_nets for Hogwild using an identical instance of an algorithm from an isolated Session in spec.meta.distributed, specify either: - 'shared': global network parameter is shared all the time. In this mode, algorithm local network will be replaced directly by global_net via overriding by identify attribute name - 'synced': global network parameter is periodically synced to local network after each gradient push. In this mode, algorithm will keep a separate reference to `global_{net}` for each of its network ''' dist_mode = algorithm.agent.spec['meta']['distributed'] assert dist_mode in ('shared', 'synced'), f'Unrecognized distributed mode' global_nets = {} for net_name in algorithm.net_names: optim_name = net_name.replace('net', 'optim') if not hasattr( algorithm, optim_name): # only for trainable network, i.e. has an optim continue g_net = getattr(algorithm, net_name) g_net.share_memory() # make net global if dist_mode == 'shared': # use the same name to override the local net global_nets[net_name] = g_net else: # keep a separate reference for syncing global_nets[f'global_{net_name}'] = g_net # if optim is Global, set to override the local optim and its scheduler optim = getattr(algorithm, optim_name) if 'Global' in util.get_class_name(optim): optim.share_memory() # make optim global global_nets[optim_name] = optim lr_scheduler_name = net_name.replace('net', 'lr_scheduler') lr_scheduler = getattr(algorithm, lr_scheduler_name) global_nets[lr_scheduler_name] = lr_scheduler logger.info( f'Initialized global_nets attr {list(global_nets.keys())} for Hogwild') return global_nets
def retro_analyze_sessions(predir): '''Retro analyze all sessions''' logger.info('Running retro_analyze_sessions') session_spec_paths = glob(f'{predir}/*_s*_spec.json') util.parallelize(_retro_analyze_session, [(p, ) for p in session_spec_paths], num_cpus=util.NUM_CPUS)
def train(self): if util.in_eval_lab_modes(): return np.nan clock = self.body.env.clock if self.to_train == 1: total_loss = torch.tensor(0.0) for _ in range(self.training_iter): batches = [] if self.body.warmup_memory.size >= self.body.warmup_memory.batch_size: batches.append(self.warmup_sample()) if self.body.memory.size >= self.body.memory.batch_size: batches.append(self.sample()) clock.set_batch_size(sum(len(batch) for batch in batches)) for batch in batches: for _ in range(self.training_batch_iter): loss = self.calc_q_loss(batch) self.net.train_step(loss, self.optim, self.lr_scheduler, clock=clock, global_net=self.global_net) total_loss += loss loss = total_loss / (self.training_iter * self.training_batch_iter) # reset self.to_train = 0 logger.info( f'Trained {self.name} at epi: {clock.epi}, warmup_size: {self.body.warmup_memory.size}, memory_size: {self.body.memory.size}, loss: {loss:g}' ) return loss.item() else: return np.nan
def retro_analyze_trials(predir): '''Retro analyze all trials''' logger.info('Running retro_analyze_trials') session_spec_paths = glob(f'{predir}/*_s*_spec.json') # remove session spec paths trial_spec_paths = ps.difference(glob(f'{predir}/*_t*_spec.json'), session_spec_paths) util.parallelize(_retro_analyze_trial, [(p,) for p in trial_spec_paths], num_cpus=util.NUM_CPUS)
def save(self, ckpt=None): '''Save net models for algorithm given the required property self.net_names''' if not hasattr(self, 'net_names'): logger.info( 'No net declared in self.net_names in init_nets(); no models to save.' ) else: net_util.save_algorithm(self, ckpt=ckpt)
def set_global_nets(algorithm, global_nets): '''For Hogwild, set attr built in init_global_nets above. Use in algorithm init.''' # set attr first so algorithm always has self.global_{net} to pass into train_step for net_name in algorithm.net_names: setattr(algorithm, f'global_{net_name}', None) # set attr created in init_global_nets if global_nets is not None: util.set_attr(algorithm, global_nets) logger.info(f'Set global_nets attr {list(global_nets.keys())} for Hogwild')
def __init__(self, spec, aeb_space): self.spec = spec self.aeb_space = aeb_space aeb_space.env_space = self self.info_space = aeb_space.info_space self.envs = [] for e in range(len(self.spec['env'])): env = make_env(self.spec, e, env_space=self) self.envs.append(env) logger.info(util.self_desc(self))
def post_init_nets(self): ''' Method to conditionally load models. Call at the end of init_nets() after setting self.net_names ''' assert hasattr(self, 'net_names') if util.in_eval_lab_modes(): logger.info(f'Loaded algorithm models for lab_mode: {util.get_lab_mode()}') self.load() else: logger.info(f'Initialized algorithm models for lab_mode: {util.get_lab_mode()}')
def log_summary(self, df_mode): ''' Log the summary for this body when its environment is done @param str:df_mode 'train' or 'eval' ''' prefix = self.get_log_prefix() df = getattr(self, f'{df_mode}_df') last_row = df.iloc[-1] row_str = ' '.join([f'{k}: {v:g}' for k, v in last_row.items()]) msg = f'{prefix} [{df_mode}_df] {row_str}' logger.info(msg)
def load(self): '''Load net models for algorithm given the required property self.net_names''' if not hasattr(self, 'net_names'): logger.info('No net declared in self.net_names in init_nets(); no models to load.') else: net_util.load_algorithm(self) # set decayable variables to final values for k, v in vars(self).items(): if k.endswith('_scheduler'): var_name = k.replace('_scheduler', '') if hasattr(v, 'end_val'): setattr(self.body, var_name, v.end_val)
def run_eval(self): avg_return, avg_len, avg_success, avg_p, avg_r, avg_f1, avg_book_rate = analysis.gen_avg_result(self.agent, self.eval_env, self.num_eval) result = f'{self.num_eval} episodes, {avg_return:.2f} return' if not avg_success is None: result += f', {avg_success*100:.2f}% success rate' if avg_len: result += f', {avg_len:.2f} turns' if avg_p: result += f', {avg_p:.2f} P, {avg_r:.2f} R, {avg_f1:.2f} F1' if avg_book_rate: result += f', {avg_book_rate*100:.2f}% book rate' logger.info(result)
def retro_analyze_experiment(predir): '''Retro analyze an experiment''' logger.info('Running retro_analyze_experiment') trial_spec_paths = glob(f'{predir}/*_t*_spec.json') # remove trial and session spec paths experiment_spec_paths = ps.difference(glob(f'{predir}/*_spec.json'), trial_spec_paths) experiment_spec_path = experiment_spec_paths[0] spec = util.read(experiment_spec_path) info_prepath = spec['meta']['info_prepath'] if os.path.exists(f'{info_prepath}_trial_data_dict.json'): return # only run analysis if experiment had been ran trial_data_dict = util.read(f'{info_prepath}_trial_data_dict.json') analysis.analyze_experiment(spec, trial_data_dict)
def analyze_trial(trial_spec, session_metrics_list): '''Analyze trial and save data, then return metrics''' info_prepath = trial_spec['meta']['info_prepath'] # calculate metrics trial_metrics = calc_trial_metrics(session_metrics_list, info_prepath) # plot graphs viz.plot_trial(trial_spec, trial_metrics) # zip files if util.get_lab_mode() == 'train': predir, _, _, _, _, _ = util.prepath_split(info_prepath) shutil.make_archive(predir, 'zip', predir) logger.info(f'All trial data zipped to {predir}.zip') return trial_metrics
def analyze_experiment(spec, trial_data_dict): '''Analyze experiment and save data''' info_prepath = spec['meta']['info_prepath'] util.write(trial_data_dict, f'{info_prepath}_trial_data_dict.json') # calculate experiment df experiment_df = calc_experiment_df(trial_data_dict, info_prepath) # plot graph viz.plot_experiment(spec, experiment_df, METRICS_COLS) # zip files predir, _, _, _, _, _ = util.prepath_split(info_prepath) shutil.make_archive(predir, 'zip', predir) logger.info(f'All experiment data zipped to {predir}.zip') return experiment_df
def __init__(self, spec, e=None): super(MultiWozEnv, self).__init__(spec, e) self.action_dim = self.observation_dim = 0 util.set_attr(self, self.env_spec, [ 'observation_dim', 'action_dim', ]) worker_id = int(f'{os.getpid()}{self.e+int(ps.unique_id())}'[-4:]) self.u_env = MultiWozEnvironment(self.env_spec, worker_id, self.action_dim) self.evaluator = self.u_env.evaluator self.patch_gym_spaces(self.u_env) self._set_attr_from_u_env(self.u_env) logger.info(util.self_desc(self))
def retro_analyze(predir): ''' Method to analyze experiment/trial from files after it ran. @example yarn retro_analyze data/reinforce_cartpole_2018_01_22_211751/ ''' predir = predir.strip('/') # sanitary os.environ['LOG_PREPATH'] = f'{predir}/log/retro_analyze' # to prevent overwriting log file logger.info(f'Running retro-analysis on {predir}') retro_analyze_sessions(predir) retro_analyze_trials(predir) retro_analyze_experiment(predir) logger.info('Finished retro-analysis')
def __init__(self, spec, body, a=None, global_nets=None): self.spec = spec self.a = a or 0 # for compatibility with agent_space self.agent_spec = spec['agent'][self.a] self.name = self.agent_spec['name'] assert not ps.is_list( global_nets ), f'single agent global_nets must be a dict, got {global_nets}' self.nlu = None if 'nlu' in self.agent_spec: params = deepcopy(ps.get(self.agent_spec, 'nlu')) NluClass = getattr(nlu, params.pop('name')) self.nlu = NluClass(**params) self.dst = None if 'dst' in self.agent_spec: params = deepcopy(ps.get(self.agent_spec, 'dst')) DstClass = getattr(dst, params.pop('name')) self.dst = DstClass(**params) if 'word_dst' in self.agent_spec: params = deepcopy(ps.get(self.agent_spec, 'word_dst')) DstClass = getattr(word_dst, params.pop('name')) self.dst = DstClass(**params) self.state_encoder = None if 'state_encoder' in self.agent_spec: params = deepcopy(ps.get(self.agent_spec, 'state_encoder')) StateEncoderClass = getattr(state_encoder, params.pop('name')) self.state_encoder = StateEncoderClass(**params) self.action_decoder = None if 'action_decoder' in self.agent_spec: params = deepcopy(ps.get(self.agent_spec, 'action_decoder')) ActionDecoderClass = getattr(action_decoder, params.pop('name')) self.action_decoder = ActionDecoderClass(**params) self.nlg = None if 'nlg' in self.agent_spec: params = deepcopy(ps.get(self.agent_spec, 'nlg')) NlgClass = getattr(nlg, params.pop('name')) self.nlg = NlgClass(**params) self.body = body body.agent = self AlgorithmClass = getattr(algorithm, ps.get(self.agent_spec, 'algorithm.name')) self.algorithm = AlgorithmClass(self, global_nets) if ps.get(self.agent_spec, 'memory'): MemoryClass = getattr(memory, ps.get(self.agent_spec, 'memory.name')) self.body.memory = MemoryClass(self.agent_spec['memory'], self.body) self.warmup_epi = ps.get(self.agent_spec, 'algorithm.warmup_epi') or -1 self.body.state, self.body.encoded_state, self.body.action = None, None, None logger.info(util.self_desc(self))
def __init__(self, agent, global_nets=None): ''' @param {*} agent is the container for algorithm and related components, and interfaces with env. ''' self.agent = agent self.algorithm_spec = agent.agent_spec['algorithm'] self.name = self.algorithm_spec['name'] self.net_spec = agent.agent_spec.get('net', None) if ps.get(agent.agent_spec, 'memory'): self.memory_spec = agent.agent_spec['memory'] self.body = self.agent.body self.init_algorithm_params() self.init_nets(global_nets) logger.info(util.self_desc(self))
def __init__(self, spec, global_nets=None): self.spec = spec self.index = self.spec['meta']['session'] util.set_random_seed(self.spec) util.set_cuda_id(self.spec) util.set_logger(self.spec, logger, 'session') spec_util.save(spec, unit='session') self.agent, self.env = make_agent_env(self.spec, global_nets) with util.ctx_lab_mode('eval'): # env for eval self.eval_env = make_env(self.spec) self.agent.body.eval_env = self.eval_env self.num_eval = ps.get(self.agent.spec, 'meta.num_eval') self.warmup_epi = ps.get(self.agent.agent_spec, 'algorithm.warmup_epi') or -1 logger.info(util.self_desc(self))
def __init__(self, spec, body, a=None, global_nets=None): self.spec = spec self.a = a or 0 # for multi-agent self.agent_spec = spec['agent'][self.a] self.name = self.agent_spec['name'] assert not ps.is_list(global_nets), f'single agent global_nets must be a dict, got {global_nets}' # set components self.body = body body.agent = self MemoryClass = getattr(memory, ps.get(self.agent_spec, 'memory.name')) self.body.memory = MemoryClass(self.agent_spec['memory'], self.body) AlgorithmClass = getattr(algorithm, ps.get(self.agent_spec, 'algorithm.name')) self.algorithm = AlgorithmClass(self, global_nets) logger.info(util.self_desc(self))
def check_fn(*args, **kwargs): if not to_check_train_step(): return fn(*args, **kwargs) net = args[0] # first arg self # get pre-update parameters to compare pre_params = [param.clone() for param in net.parameters()] # run train_step, get loss loss = fn(*args, **kwargs) assert not torch.isnan(loss).any(), loss # get post-update parameters to compare post_params = [param.clone() for param in net.parameters()] if loss == 0.0: # if loss is 0, there should be no updates # TODO if without momentum, parameters should not change too for p_name, param in net.named_parameters(): assert param.grad.norm() == 0 else: # check parameter updates try: assert not all( torch.equal(w1, w2) for w1, w2 in zip(pre_params, post_params) ), f'Model parameter is not updated in train_step(), check if your tensor is detached from graph. Loss: {loss:g}' logger.info( f'Model parameter is updated in train_step(). Loss: {loss: g}' ) except Exception as e: logger.error(e) if os.environ.get('PY_ENV') == 'test': # raise error if in unit test raise (e) # check grad norms min_norm, max_norm = 0.0, 1e5 for p_name, param in net.named_parameters(): try: grad_norm = param.grad.norm() assert min_norm < grad_norm < max_norm, f'Gradient norm for {p_name} is {grad_norm:g}, fails the extreme value check {min_norm} < grad_norm < {max_norm}. Loss: {loss:g}. Check your network and loss computation.' except Exception as e: logger.warning(e) logger.info(f'Gradient norms passed value check.') logger.debug('Passed network parameter update check.') # store grad norms for debugging net.store_grad_norms() return loss
def check_all(): '''Check all spec files, all specs.''' spec_files = ps.filter_(os.listdir(SPEC_DIR), lambda f: f.endswith('.json') and not f.startswith('_')) for spec_file in spec_files: spec_dict = util.read(f'{SPEC_DIR}/{spec_file}') for spec_name, spec in spec_dict.items(): # fill-in info at runtime spec['name'] = spec_name spec = extend_meta_spec(spec) try: check(spec) except Exception as e: logger.exception(f'spec_file {spec_file} fails spec check') raise e logger.info(f'Checked all specs from: {ps.join(spec_files, ",")}') return True
def airl_train(self, training_times=1): # print("airl training") for t in range(training_times): total_loss = 0 # idx = min(t+1, len(self.experience_buffer)) batch = self.experience_buffer[-1] minibatches = util.split_minibatch(batch, 64) # print("minibatch number: {}".format(len(minibatches))) for fake_batch in minibatches: self.optim_disc.zero_grad() loss = self.discriminator.disc_train(fake_batch) total_loss += loss.item() loss.backward() self.optim_disc.step() for p in self.discriminator.parameters(): p.data.clamp_(-0.1, 0.1) logger.info("airl training loss: {}".format(total_loss/len(minibatches)))
def train(self): if util.in_eval_lab_modes(): return np.nan clock = self.body.env.clock if self.to_train == 1: batch = self.sample() clock.set_batch_size(len(batch)) pdparams = self.calc_pdparam_batch(batch) advs = self.calc_ret_advs(batch) loss = self.calc_policy_loss(batch, pdparams, advs) self.net.train_step(loss, self.optim, self.lr_scheduler, clock=clock, global_net=self.global_net) # reset self.to_train = 0 logger.info(f'Trained {self.name} at epi: {clock.epi}, frame: {clock.frame}, t: {clock.t}, total_reward so far: {self.body.total_reward}, loss: {loss:g}') return loss.item() else: return np.nan
def load_algorithm(algorithm): '''Save all the nets for an algorithm''' agent = algorithm.agent net_names = algorithm.net_names if util.in_eval_lab_modes(): # load specific model in eval mode model_prepath = agent.spec['meta']['eval_model_prepath'] else: model_prepath = agent.spec['meta']['model_prepath'] logger.info(f'Loading algorithm {util.get_class_name(algorithm)} nets {net_names} from {model_prepath}_*.pt') for net_name in net_names: net = getattr(algorithm, net_name) model_path = f'{model_prepath}_{net_name}_model.pt' load(net, model_path) optim_name = net_name.replace('net', 'optim') optim = getattr(algorithm, optim_name, None) if optim is not None: # only trainable net has optim optim_path = f'{model_prepath}_{net_name}_optim.pt' load(optim, optim_path)
def run_ray_search(spec): ''' Method to run ray search from experiment. Uses RandomSearch now. TODO support for other ray search algorithms: https://ray.readthedocs.io/en/latest/tune-searchalg.html ''' logger.info(f'Running ray search for spec {spec["name"]}') # generate trial index to pass into Lab Trial global trial_index # make gen_trial_index passable into ray.run trial_index = -1 def gen_trial_index(): global trial_index trial_index += 1 return trial_index ray.init() ray_trials = tune.run( ray_trainable, name=spec['name'], config={ "spec": spec, "trial_index": tune.sample_from(lambda spec: gen_trial_index()), **build_config_space(spec) }, resources_per_trial=infer_trial_resources(spec), num_samples=spec['meta']['max_trial'], queue_trials=True, ) trial_data_dict = {} # data for Lab Experiment to analyze for ray_trial in ray_trials: ray_trial_data = ray_trial.last_result['trial_data'] trial_data_dict.update(ray_trial_data) ray.shutdown() return trial_data_dict
def train(self): if util.in_eval_lab_modes(): return np.nan clock = self.body.env.clock # import pdb; pdb.set_trace() # self.batch_count = 0 # print("***********") if self.to_train == 1: # print("===========") self.reward_agent.eval() total_loss = torch.tensor(0.0) self.reward_count = 0 self.batch_count = 0 for _ in range(self.training_iter): batches = [] warmup = False if self.body.warmup_memory.size >= self.body.warmup_memory.batch_size: batches.append(self.warmup_sample()) # if self.body.env.clock.frame < 100000: # batches.append(self.warmup_sample()) # else: # batches.append(self.sample()) warmup = True if self.body.memory.size >= self.body.memory.batch_size: batches.append(self.sample()) clock.set_batch_size(sum(len(batch) for batch in batches)) for idx, batch in enumerate(batches): for _ in range(self.training_batch_iter): loss = self.calc_q_loss(batch, False) self.net.train_step(loss, self.optim, self.lr_scheduler, clock=clock, global_net=self.global_net) total_loss += loss loss = total_loss / (self.training_iter * self.training_batch_iter) reward_irl = self.reward_count / self.batch_count logger.info("***********") logger.info(reward_irl) # reset self.to_train = 0 logger.info( f'Trained {self.name} at epi: {clock.epi}, warmup_size: {self.body.warmup_memory.size}, memory_size: {self.body.memory.size}, loss: {loss:g}, irl_reward: {reward_irl}' ) # logger.info(f'Trained {self.name} at epi: {clock.epi}, warmup_size: {self.body.warmup_memory.size}, memory_size: {self.body.memory.size}, loss: {loss:g}') return loss.item() else: return np.nan
def log_metrics(self, metrics, df_mode): '''Log session metrics''' prefix = self.get_log_prefix() row_str = ' '.join([f'{k}: {v:g}' for k, v in metrics.items()]) msg = f'{prefix} [{df_mode}_df metrics] {row_str}' logger.info(msg)