def _log_stats(self, epoch): logger.log("Epoch {} finished".format(epoch), with_timestamp=True) """ Replay Buffer """ logger.record_dict(self.replay_buffer.get_diagnostics(), prefix='replay_buffer/') """ Trainer """ logger.record_dict(self.trainer.get_diagnostics(), prefix='trainer/') """ Exploration """ logger.record_dict(self.expl_data_collector.get_diagnostics(), prefix='exploration/') expl_paths = self.expl_data_collector.get_epoch_paths() if hasattr(self.expl_env, 'get_diagnostics'): logger.record_dict( self.expl_env.get_diagnostics(expl_paths), prefix='exploration/', ) logger.record_dict( eval_util.get_generic_path_information(expl_paths), prefix="exploration/", ) """ Evaluation """ logger.record_dict( self.eval_data_collector.get_diagnostics(), prefix='evaluation/', ) eval_paths = self.eval_data_collector.get_epoch_paths() if hasattr(self.eval_env, 'get_diagnostics'): logger.record_dict( self.eval_env.get_diagnostics(eval_paths), prefix='evaluation/', ) # Get path information. logger.record_dict( eval_util.get_generic_path_information(eval_paths), prefix="evaluation/", ) """ Misc """ gt.stamp('logging') logger.record_dict(_get_epoch_timings()) logger.record_tabular('Epoch', epoch) logger.dump_tabular(with_prefix=False, with_timestamp=False)
def _try_to_eval(self, epoch): logger.save_extra_data(self.get_extra_data_to_save(epoch)) if self._can_evaluate(): self.evaluate(epoch) params = self.get_epoch_snapshot(epoch) logger.save_itr_params(epoch, params) table_keys = logger.get_table_key_set() if self._old_table_keys is not None: print('$$$$$$$$$$$$$$$') print(table_keys) print('\n' * 4) print(self._old_table_keys) print('$$$$$$$$$$$$$$$') print(set(table_keys) - set(self._old_table_keys)) print(set(self._old_table_keys) - set(table_keys)) assert table_keys == self._old_table_keys, ( "Table keys cannot change from iteration to iteration.") self._old_table_keys = table_keys logger.record_tabular( "Number of train steps total", self._n_train_steps_total, ) logger.record_tabular( "Number of env steps total", self._n_env_steps_total, ) logger.record_tabular( "Number of rollouts total", self._n_rollouts_total, ) times_itrs = gt.get_times().stamps.itrs train_time = times_itrs['train'][-1] sample_time = times_itrs['sample'][-1] eval_time = times_itrs['eval'][-1] if epoch > 0 else 0 epoch_time = train_time + sample_time + eval_time total_time = gt.get_times().total logger.record_tabular('Train Time (s)', train_time) logger.record_tabular('(Previous) Eval Time (s)', eval_time) logger.record_tabular('Sample Time (s)', sample_time) logger.record_tabular('Epoch Time (s)', epoch_time) logger.record_tabular('Total Train Time (s)', total_time) logger.record_tabular("Epoch", epoch) logger.dump_tabular(with_prefix=False, with_timestamp=False) else: logger.log("Skipping eval for now.")
def evaluate(self, epoch): """ Evaluate the policy, e.g. save/print progress. :param epoch: :return: """ statistics = OrderedDict() try: statistics.update(self.eval_statistics) self.eval_statistics = None except: print('No Stats to Eval') logger.log("Collecting samples for evaluation") test_paths = self.eval_sampler.obtain_samples() statistics.update( eval_util.get_generic_path_information( test_paths, stat_prefix="Test", )) statistics.update( eval_util.get_generic_path_information( self._exploration_paths, stat_prefix="Exploration", )) if hasattr(self.env, "log_diagnostics"): self.env.log_diagnostics(test_paths) if hasattr(self.env, "log_statistics"): statistics.update(self.env.log_statistics(test_paths)) if epoch % self.freq_log_visuals == 0: if hasattr(self.env, "log_visuals"): self.env.log_visuals(test_paths, epoch, logger.get_snapshot_dir()) average_returns = eval_util.get_average_returns(test_paths) statistics['AverageReturn'] = average_returns for key, value in statistics.items(): logger.record_tabular(key, value) best_statistic = statistics[self.best_key] if best_statistic > self.best_statistic_so_far: self.best_statistic_so_far = best_statistic if self.save_best and epoch >= self.save_best_starting_from_epoch: data_to_save = {'epoch': epoch, 'statistics': statistics} data_to_save.update(self.get_epoch_snapshot(epoch)) logger.save_extra_data(data_to_save, 'best.pkl') print('\n\nSAVED BEST\n\n')
def step(self, action): """ :param action: joint position controls in action space (action bounds), then scaled to joint space """ assert np.shape(action) == (self.n_actions, ) # action = np.clip(action, self.action_space.low, self.action_space.high) action = self.process_action(action) p.configureDebugVisualizer(p.COV_ENABLE_SINGLE_STEP_RENDERING) forces = np.array([100] * 7 + [60] * 2) p.setJointMotorControlArray(self.pandaUid, list(range(7)) + [9, 10], p.POSITION_CONTROL, action, forces=forces) p.stepSimulation() self.observation, _ = self.get_obs() done = False done, reward, _ = self.get_reward(done) # done here is that we completed, if completed we stay completed until env.reset() self.completed = self.completed or done self.step_counter += 1 if self.step_counter > self._max_episode_steps: reward = 0 done = True info = { "obj_pos": np.array(p.getBasePositionAndOrientation(self.objectUid)[0]), "obj_ori": np.array(p.getBasePositionAndOrientation(self.objectUid)[1]), "hand_pos": np.array(p.getLinkState(self.pandaUid, 11)[0]), "fingers_joint": np.array([ p.getJointState(self.pandaUid, 9)[0], p.getJointState(self.pandaUid, 10)[0] ]), "completed": self.completed, } if self.completed and self.verbose: logger.log("Completed!") return self.observation, reward, done, info
def train(self): self.fix_data_set() logger.log("Done creating dataset.") num_batches_total = 0 for epoch in range(self.num_epochs): for _ in range(self.num_batches_per_epoch): self.qf.train(True) self._do_training() num_batches_total += 1 logger.push_prefix('Iteration #%d | ' % epoch) self.qf.train(False) self.evaluate(epoch) params = self.get_epoch_snapshot(epoch) logger.save_itr_params(epoch, params) logger.log("Done evaluating") logger.pop_prefix()
def _try_to_eval(self, epoch): if self._can_evaluate(): # save if it's time to save if (epoch % self.freq_saving == 0) or (epoch + 1 >= self.num_epochs): # if epoch + 1 >= self.num_epochs: # epoch = 'final' logger.save_extra_data(self.get_extra_data_to_save(epoch)) params = self.get_epoch_snapshot(epoch) logger.save_itr_params(epoch, params) self.evaluate(epoch) logger.record_tabular( "Number of train calls total", self._n_train_steps_total, ) logger.record_tabular( "Number of env steps total", self._n_env_steps_total, ) logger.record_tabular( "Number of rollouts total", self._n_rollouts_total, ) times_itrs = gt.get_times().stamps.itrs train_time = times_itrs['train'][-1] sample_time = times_itrs['sample'][-1] eval_time = times_itrs['eval'][-1] if epoch > 0 else 0 epoch_time = train_time + sample_time + eval_time total_time = gt.get_times().total logger.record_tabular('Train Time (s)', train_time) logger.record_tabular('(Previous) Eval Time (s)', eval_time) logger.record_tabular('Sample Time (s)', sample_time) logger.record_tabular('Epoch Time (s)', epoch_time) logger.record_tabular('Total Train Time (s)', total_time) logger.record_tabular("Epoch", epoch) logger.dump_tabular(with_prefix=False, with_timestamp=False) else: logger.log("Skipping eval for now.")
def execute_actions(actions, polygons, asset_path, sim_steps=2000, img_dim=64, hold_last_action=False): polygon_map = {ind: ply for ind, ply in enumerate(polygons)} xml = XML(asset_path) names = [] for action in actions: ply_ind, pos, axangle, scale, rgb = action ply = polygon_map[ply_ind] rgba = rgb.tolist() + [1] #print('Dropping {} | pos: {} | axangle: {} | scale: {} | rgb: {} '.format(ply, pos, axangle, scale, rgb)) name = xml.add_mesh(ply, pos=pos, axangle=axangle, scale=scale, rgba=rgba) names.append(name) xml_str = xml.instantiate() model = mjc.load_model_from_xml(xml_str) sim = mjc.MjSim(model) log_steps = len(actions) + 1 logger = Logger(xml, sim, steps=log_steps, img_dim=img_dim) logger.log(0) for act_ind, act in enumerate(actions): hold_objects = names[act_ind + 1:] drop_object = names[act_ind] if act_ind == len(actions) - 1 and hold_last_action: logger.hold_drop_execute(hold_objects, drop_object, 1) else: logger.hold_drop_execute(hold_objects, drop_object, sim_steps) logger.log(act_ind + 1, hold_objects) data, images, masks = logger.get_logs() images = images / 255. return data, images, masks
def render(self): logger.push_prefix("HighLow(sign={0})\t".format(self._sign)) if self._last_action is None: logger.log("No action taken.") else: if self._last_t == 0: logger.log("--- New Episode ---") logger.push_prefix("t={0}\t".format(self._last_t)) with np_print_options(precision=4, suppress=False): logger.log("Action: {0}".format(self._last_action, )) logger.log("Reward: {0}".format(self._last_reward, )) logger.pop_prefix() logger.pop_prefix()
def _try_to_eval(self, epoch): if epoch % self.freq_saving == 0: logger.save_extra_data(self.get_extra_data_to_save(epoch)) if self._can_evaluate(): self.evaluate(epoch) if epoch % self.freq_saving == 0: params = self.get_epoch_snapshot(epoch) logger.save_itr_params(epoch, params) table_keys = logger.get_table_key_set() # logger.record_tabular( # "Number of train steps total", # self._n_policy_train_steps_total, # ) logger.record_tabular( "Number of env steps total", self._n_env_steps_total, ) logger.record_tabular( "Number of rollouts total", self._n_rollouts_total, ) times_itrs = gt.get_times().stamps.itrs train_time = times_itrs['train'][-1] sample_time = times_itrs['sample'][-1] eval_time = times_itrs['eval'][-1] if epoch > 0 else 0 epoch_time = train_time + sample_time + eval_time total_time = gt.get_times().total logger.record_tabular('Train Time (s)', train_time) logger.record_tabular('(Previous) Eval Time (s)', eval_time) logger.record_tabular('Sample Time (s)', sample_time) logger.record_tabular('Epoch Time (s)', epoch_time) logger.record_tabular('Total Train Time (s)', total_time) logger.record_tabular("Epoch", epoch) logger.dump_tabular(with_prefix=False, with_timestamp=False) else: logger.log("Skipping eval for now.")
def sim_first_step(self, actions): asset_path = '/home/jcoreyes/objects/object-oriented-prediction/o2p2/data/stl' img_dim = 64 xml = XML(asset_path) names = [] for action in actions: ply_ind, pos, axangle, scale, rgb = action[0], action[1:4], action[ 4:8], action[8], action[9:12] ply = self.polygon_map[ply_ind] rgba = rgb.tolist() + [1] print('Dropping {} | pos: {} | axangle: {} | scale: {} | rgb: {} '. format(ply, pos, axangle, scale, rgb)) name = xml.add_mesh(ply, pos=pos, axangle=axangle, scale=scale, rgba=rgba) names.append(name) xml_str = xml.instantiate() model = mjc.load_model_from_xml(xml_str) sim = mjc.MjSim(model) log_steps = 2 sim_steps = 1 logger = Logger(xml, sim, steps=log_steps, img_dim=img_dim) logger.log(0) for act_ind, act in enumerate(actions): hold_objects = names[act_ind + 1:] drop_object = names[act_ind] logger.hold_drop_execute(hold_objects, drop_object, sim_steps) logger.log(act_ind + 1, hold_objects) break data, images, masks = logger.get_logs() images = images / 255. import pdb pdb.set_trace()
def evaluate(self, epoch, eval_paths=None): statistics = OrderedDict() statistics.update(self.eval_statistics) logger.log("Collecting samples for evaluation") if eval_paths: test_paths = eval_paths else: test_paths = self.get_eval_paths() statistics.update( eval_util.get_generic_path_information( test_paths, stat_prefix="Test", )) if len(self._exploration_paths) > 0: statistics.update( eval_util.get_generic_path_information( self._exploration_paths, stat_prefix="Exploration", )) if hasattr(self.env, "log_diagnostics"): self.env.log_diagnostics(test_paths, logger=logger) if hasattr(self.env, "get_diagnostics"): statistics.update(self.env.get_diagnostics(test_paths)) for i in range(len(test_paths)): self.env.update_rewards(test_paths[i]) statistics['AverageReturn'] = eval_util.get_average_returns(test_paths) statistics[ 'AverageEnvironmentReturn'] = eval_util.get_average_environment_returns( test_paths) statistics[ 'AverageUnsupervisedReturn'] = eval_util.get_average_unsupervised_returns( test_paths) for key, value in statistics.items(): logger.record_tabular(key, value) self.need_to_update_eval_statistics = True
def train(self): for epoch in range(self.num_epochs): logger.push_prefix('Iteration #%d | ' % epoch) start_time = time.time() for _ in range(self.num_steps_per_epoch): batch = self.get_batch() train_dict = self.get_train_dict(batch) self.policy_optimizer.zero_grad() policy_loss = train_dict['Policy Loss'] policy_loss.backward() self.policy_optimizer.step() logger.log("Train time: {}".format(time.time() - start_time)) start_time = time.time() self.evaluate(epoch) logger.log("Eval time: {}".format(time.time() - start_time)) params = self.get_epoch_snapshot(epoch) logger.save_itr_params(epoch, params) logger.pop_prefix()
def run_task(variant): from rlkit.core import logger print(variant) logger.log("Hello from script") logger.log("variant: " + str(variant)) logger.record_tabular("value", 1) logger.dump_tabular() logger.log("snapshot_dir:", logger.get_snapshot_dir())
def _backtracking_line_search(self, params, descent_step, f_loss, f_constraint): prev_params = [p.clone() for p in params] ratio_list = self._backtrack_ratio**np.arange(self._max_backtracks) loss_before = f_loss() param_shapes = [p.shape or torch.Size([1]) for p in params] descent_step = unflatten_tensors(descent_step, param_shapes) assert len(descent_step) == len(params) for ratio in ratio_list: for step, prev_param, param in zip(descent_step, prev_params, params): step = ratio * step new_param = prev_param.data - step param.data = new_param.data loss = f_loss() constraint_val = f_constraint() if (loss < loss_before and constraint_val <= self._max_constraint_value): break if ((torch.isnan(loss) or torch.isnan(constraint_val) or loss >= loss_before or constraint_val >= self._max_constraint_value) and not self._accept_violation): logger.log('Line search condition violated. Rejecting the step!') if torch.isnan(loss): logger.log('Violated because loss is NaN') if torch.isnan(constraint_val): logger.log('Violated because constraint is NaN') if loss >= loss_before: logger.log('Violated because loss not improving') if constraint_val >= self._max_constraint_value: logger.log('Violated because constraint is violated') for prev, cur in zip(prev_params, params): cur.data = prev.data
def evaluate(self, epoch, exploration_paths): """ Perform evaluation for this algorithm. :param epoch: The epoch number. :param exploration_paths: List of dicts, each representing a path. """ logger.log("Collecting samples for evaluation") paths = self._sample_eval_paths(epoch) statistics = OrderedDict() statistics.update(self._statistics_from_paths(paths, "Test")) statistics.update(self._get_other_statistics()) statistics.update( self._statistics_from_paths(exploration_paths, "Exploration")) statistics['AverageReturn'] = get_average_returns(paths) statistics['Epoch'] = epoch for key, value in statistics.items(): logger.record_tabular(key, value) self.log_diagnostics(paths)
def train_amortized_goal_chooser( goal_chooser, goal_conditioned_model, argmax_q, discount, replay_buffer, learning_rate=1e-3, batch_size=32, num_updates=1000, ): def get_loss(training=False): buffer = replay_buffer.get_replay_buffer(training) batch = buffer.random_batch(batch_size) obs = ptu.np_to_var(batch['observations'], requires_grad=False) goals = ptu.np_to_var(batch['goal_states'], requires_grad=False) goal = goal_chooser(obs, goals) actions = argmax_q(obs, goal, discount) final_state_predicted = goal_conditioned_model( obs, actions, goal, discount, ) + obs rewards = goal_chooser.reward_function(final_state_predicted, goals) return -rewards.mean() discount = ptu.np_to_var(discount * np.ones((batch_size, 1))) optimizer = optim.Adam(goal_chooser.parameters(), learning_rate) for i in range(num_updates): optimizer.zero_grad() loss = get_loss() loss.backward() optimizer.step() if i % 100 == 0: logger.log("Number updates: {}".format(i)) logger.log("Train loss: {}".format(float(ptu.get_numpy(loss)))) logger.log("Validation loss: {}".format( float(ptu.get_numpy(get_loss(training=False)))))
def _try_to_eval(self, epoch, eval_paths=None): if MPI and MPI.COMM_WORLD.Get_rank() == 0: if epoch % self.save_extra_data_interval == 0: logger.save_extra_data(self.get_extra_data_to_save(epoch)) if epoch % self.num_epochs_per_param_save == 0: print("Attemping itr param save...") params = self.get_epoch_snapshot(epoch) logger.save_itr_params(epoch, params) print(F"Itr{epoch} param saved!") if self._can_evaluate(): self.evaluate(epoch, eval_paths=eval_paths) logger.record_tabular( "Number of train steps total", self._n_train_steps_total, ) logger.record_tabular( "Number of env steps total", self._n_env_steps_total, ) logger.record_tabular( "Number of rollouts total", self._n_rollouts_total, ) times_itrs = gt.get_times().stamps.itrs # train_time = times_itrs['train'][-1] training_loops = ['get_batch', 'update_normalizer', 'forward', 'compute_losses', 'qf1_loop', "policy_loss_forward", 'policy_loop', 'vf_loop'] train_time = sum(times_itrs[loop][-1] for loop in times_itrs.keys()) sample_time = times_itrs['sample'][-1] if epoch > 0: eval_time = times_itrs['eval'][-1] else: times_itrs['eval'] = [0] # Need to do this so we can do line 343, the list comprehension eval_time = 0 epoch_time = train_time + sample_time + eval_time total_time = gt.get_times().total # logger.record_tabular('Get Batch (s)', times_itrs['get_batch'][-1]) # logger.record_tabular('Update Normalizer (s)', times_itrs['update_normalizer'][-1]) # logger.record_tabular('Forward (s)', times_itrs['forward'][-1]) # logger.record_tabular('Compute Losses (s)', times_itrs['compute_losses'][-1]) # logger.record_tabular('QF1 Loop (s)', times_itrs['qf1_loop'][-1]) # logger.record_tabular('QF2 Loop (s)', times_itrs['qf2_loop'][-1]) # logger.record_tabular("Policy Forward (s)", times_itrs['policy_loss_forward'][-1]) # logger.record_tabular('Policy Loop (s)', times_itrs['policy_loop'][-1]) # logger.record_tabular('VF Loop (s)', times_itrs['vf_loop'][-1]) [logger.record_tabular(key.title(), times_itrs[key][-1]) for key in times_itrs.keys()] logger.record_tabular('Train Time (s) ---', train_time) logger.record_tabular('(Previous) Eval Time (s) ---', eval_time) logger.record_tabular('Sample Time (s) ---', sample_time) logger.record_tabular('Epoch Time (s)', epoch_time) logger.record_tabular('Total Train Time (s)', total_time) logger.record_tabular("Epoch", epoch) table_keys = logger.get_table_key_set() if self._old_table_keys is not None and table_keys != self._old_table_keys: # assert table_keys == self._old_table_keys, ( # "Table keys cannot change from iteration to iteration." # ) print("Table keys have changed. Rewriting header and filling with 0s") logger.update_header() raise NotImplementedError self._old_table_keys = table_keys logger.dump_tabular(with_prefix=False, with_timestamp=False) else: logger.log("Skipping eval for now.")
def example(variant): import mujoco_py import torch logger.log(torch.__version__) date_format = '%m/%d/%Y %H:%M:%S %Z' date = datetime.now(tz=pytz.utc) logger.log("start") logger.log('Current date & time is: {}'.format(date.strftime(date_format))) if torch.cuda.is_available(): x = torch.randn(3) logger.log(str(x.to(ptu.device))) date = date.astimezone(timezone('US/Pacific')) logger.log('Local date & time is: {}'.format(date.strftime(date_format))) for i in range(variant['num_seconds']): logger.log("Tick, {}".format(i)) time.sleep(1) logger.log("end") logger.log('Local date & time is: {}'.format(date.strftime(date_format))) logger.log("start mujoco") from gym.envs.mujoco import HalfCheetahEnv e = HalfCheetahEnv() img = e.sim.render(32, 32) logger.log(str(sum(img))) logger.log("end mujocoy")
time.sleep(1) logger.log("end") logger.log('Local date & time is: {}'.format(date.strftime(date_format))) logger.log("start mujoco") from gym.envs.mujoco import HalfCheetahEnv e = HalfCheetahEnv() img = e.sim.render(32, 32) logger.log(str(sum(img))) logger.log("end mujocoy") if __name__ == "__main__": # noinspection PyTypeChecker date_format = '%m/%d/%Y %H:%M:%S %Z' date = datetime.now(tz=pytz.utc) logger.log("start") variant = dict( num_seconds=10, launch_time=str(date.strftime(date_format)), ) run_experiment( example, exp_prefix='test-gpu-local-singularity', mode='local_singularity', variant=variant, # use_gpu=True, use_gpu=False, verbose=True, )
def setup_logger( exp_prefix="default", variant=None, text_log_file="debug.log", variant_log_file="variant.json", tabular_log_file="progress.csv", snapshot_mode="last", snapshot_gap=1, log_tabular_only=False, log_dir=None, git_infos=None, script_name=None, **create_log_dir_kwargs ): """ Set up logger to have some reasonable default settings. Will save log output to based_log_dir/exp_prefix/exp_name. exp_name will be auto-generated to be unique. If log_dir is specified, then that directory is used as the output dir. :param exp_prefix: The sub-directory for this specific experiment. :param variant: :param text_log_file: :param variant_log_file: :param tabular_log_file: :param snapshot_mode: :param log_tabular_only: :param snapshot_gap: :param log_dir: :param git_infos: :param script_name: If set, save the script name to this. :return: """ if git_infos is None: git_infos = get_git_infos(conf.CODE_DIRS_TO_MOUNT) first_time = log_dir is None if first_time: log_dir = create_log_dir(exp_prefix, **create_log_dir_kwargs) if variant is not None: logger.log("Variant:") logger.log(json.dumps(dict_to_safe_json(variant), indent=2)) variant_log_path = osp.join(log_dir, variant_log_file) logger.log_variant(variant_log_path, variant) tabular_log_path = osp.join(log_dir, tabular_log_file) text_log_path = osp.join(log_dir, text_log_file) logger.add_text_output(text_log_path) if first_time: logger.add_tabular_output(tabular_log_path) else: logger._add_output(tabular_log_path, logger._tabular_outputs, logger._tabular_fds, mode='a') for tabular_fd in logger._tabular_fds: logger._tabular_header_written.add(tabular_fd) logger.set_snapshot_dir(log_dir) logger.set_snapshot_mode(snapshot_mode) logger.set_snapshot_gap(snapshot_gap) logger.set_log_tabular_only(log_tabular_only) exp_name = log_dir.split("/")[-1] logger.push_prefix("[%s] " % exp_name) if git_infos is not None: for ( directory, code_diff, code_diff_staged, commit_hash, branch_name ) in git_infos: if directory[-1] == '/': directory = directory[:-1] diff_file_name = directory[1:].replace("/", "-") + ".patch" diff_staged_file_name = ( directory[1:].replace("/", "-") + "_staged.patch" ) if code_diff is not None and len(code_diff) > 0: with open(osp.join(log_dir, diff_file_name), "w") as f: f.write(code_diff + '\n') if code_diff_staged is not None and len(code_diff_staged) > 0: with open(osp.join(log_dir, diff_staged_file_name), "w") as f: f.write(code_diff_staged + '\n') with open(osp.join(log_dir, "git_infos.txt"), "a") as f: f.write("directory: {}\n".format(directory)) f.write("git hash: {}\n".format(commit_hash)) f.write("git branch name: {}\n\n".format(branch_name)) if script_name is not None: with open(osp.join(log_dir, "script_name.txt"), "w") as f: f.write(script_name) return log_dir
def _log_stats(self, epoch): logger.log(f"Epoch {epoch} finished", with_timestamp=True) """ Replay Buffer """ logger.record_dict( self.replay_buffer.get_diagnostics(), prefix="replay_buffer/" ) """ Trainer """ logger.record_dict(self.trainer.get_diagnostics(), prefix="trainer/") """ Exploration """ logger.record_dict( self.expl_data_collector.get_diagnostics(), prefix="exploration/" ) expl_paths = self.expl_data_collector.get_epoch_paths() if len(expl_paths) > 0: if hasattr(self.expl_env, "get_diagnostics"): logger.record_dict( self.expl_env.get_diagnostics(expl_paths), prefix="exploration/", ) logger.record_dict( eval_util.get_generic_path_information(expl_paths), prefix="exploration/", ) """ Evaluation """ logger.record_dict( self.eval_data_collector.get_diagnostics(), prefix="evaluation/", ) eval_paths = self.eval_data_collector.get_epoch_paths() if hasattr(self.eval_env, "get_diagnostics"): logger.record_dict( self.eval_env.get_diagnostics(eval_paths), prefix="evaluation/", ) logger.record_dict( eval_util.get_generic_path_information(eval_paths), prefix="evaluation/", ) """ Misc """ gt.stamp("logging") timings = _get_epoch_timings() timings["time/training and exploration (s)"] = self.total_train_expl_time logger.record_dict(timings) logger.record_tabular("Epoch", epoch) logger.dump_tabular(with_prefix=False, with_timestamp=False)
def pretrain(self): logger.log('Pretraining ...') for ep in range(self.num_pretrain_updates): for t in range(self.num_update_loops_per_train_call): self._do_update_step(ep, use_expert_buffer=True)
def setup_logger( exp_prefix="default", exp_id=0, seed=0, variant=None, base_log_dir=None, text_log_file="debug.log", variant_log_file="variant.json", tabular_log_file="progress.csv", snapshot_mode="last", snapshot_gap=1, log_tabular_only=False, log_dir=None, git_info=None, script_name=None, ): """ Set up logger to have some reasonable default settings. Will save log output to based_log_dir/exp_prefix/exp_name. exp_name will be auto-generated to be unique. If log_dir is specified, then that directory is used as the output dir. :param exp_prefix: The sub-directory for this specific experiment. :param exp_id: The number of the specific experiment run within this experiment. :param variant: :param base_log_dir: The directory where all log should be saved. :param text_log_file: :param variant_log_file: :param tabular_log_file: :param snapshot_mode: :param log_tabular_only: :param snapshot_gap: :param log_dir: :param git_info: :param script_name: If set, save the script name to this. :return: """ first_time = log_dir is None if first_time: log_dir = create_log_dir(exp_prefix, exp_id=exp_id, seed=seed, base_log_dir=base_log_dir) if variant is not None: logger.log("Variant:") logger.log(json.dumps(dict_to_safe_json(variant), indent=2)) variant_log_path = osp.join(log_dir, variant_log_file) logger.log_variant(variant_log_path, variant) tabular_log_path = osp.join(log_dir, tabular_log_file) text_log_path = osp.join(log_dir, text_log_file) logger.add_text_output(text_log_path) if first_time: logger.add_tabular_output(tabular_log_path) else: logger._add_output(tabular_log_path, logger._tabular_outputs, logger._tabular_fds, mode='a') for tabular_fd in logger._tabular_fds: logger._tabular_header_written.add(tabular_fd) logger.set_snapshot_dir(log_dir) logger.set_snapshot_mode(snapshot_mode) logger.set_snapshot_gap(snapshot_gap) logger.set_log_tabular_only(log_tabular_only) exp_name = log_dir.split("/")[-1] logger.push_prefix("[%s] " % exp_name) if git_info is not None: code_diff, commit_hash, branch_name = git_info if code_diff is not None: with open(osp.join(log_dir, "code.diff"), "w") as f: f.write(code_diff) with open(osp.join(log_dir, "git_info.txt"), "w") as f: f.write("git hash: {}".format(commit_hash)) f.write('\n') f.write("git branch name: {}".format(branch_name)) if script_name is not None: with open(osp.join(log_dir, "script_name.txt"), "w") as f: f.write(script_name) return log_dir
def _log_stats(self, epoch): logger.log("Epoch {} finished".format(epoch), with_timestamp=True) """ Replay Buffer """ logger.record_dict( self.replay_buffer.get_diagnostics(), global_step=epoch, prefix="replay_buffer/", ) """ Trainer """ logger.record_dict(self.trainer.get_diagnostics(), global_step=epoch, prefix="trainer/") """ Exploration """ logger.record_dict( self.expl_data_collector.get_diagnostics(), global_step=epoch, prefix="exploration/", ) expl_paths = self.expl_data_collector.get_epoch_paths() if hasattr(self.expl_env, "get_diagnostics"): logger.record_dict( self.expl_env.get_diagnostics(expl_paths), global_step=epoch, prefix="exploration/", ) logger.record_dict( eval_util.get_generic_path_information(expl_paths), global_step=epoch, prefix="exploration/", ) """ Evaluation """ logger.record_dict( self.eval_data_collector.get_diagnostics(), global_step=epoch, prefix="evaluation/", ) eval_paths = self.eval_data_collector.get_epoch_paths() if hasattr(self.eval_env, "get_diagnostics"): logger.record_dict( self.eval_env.get_diagnostics(eval_paths), global_step=epoch, prefix="evaluation/", ) logger.record_dict( eval_util.get_generic_path_information(eval_paths), global_step=epoch, prefix="evaluation/", ) """ Misc """ gt.stamp("logging") logger.record_dict(_get_epoch_timings(), global_step=epoch) logger.record_tabular("Epoch", epoch) logger.dump_tabular(with_prefix=False, with_timestamp=False)
def evaluate(self, epoch): statistics = OrderedDict() statistics.update(self.eval_statistics) self.eval_statistics = None # statistics.update(eval_util.get_generic_path_information( # self._exploration_paths, stat_prefix="Exploration", # )) for mode in ['meta_train', 'meta_test']: logger.log("Collecting samples for evaluation") test_paths = self.obtain_eval_samples(epoch, mode=mode) statistics.update( eval_util.get_generic_path_information( test_paths, stat_prefix="Test " + mode, )) # print(statistics.keys()) if hasattr(self.env, "log_diagnostics"): self.env.log_diagnostics(test_paths) if hasattr(self.env, "log_statistics"): log_stats = self.env.log_statistics(test_paths) new_log_stats = OrderedDict( (k + ' ' + mode, v) for k, v in log_stats.items()) statistics.update(new_log_stats) average_returns = rlkit.core.eval_util.get_average_returns( test_paths) statistics['AverageReturn ' + mode] = average_returns if self.render_eval_paths: self.env.render_paths(test_paths) # meta_test_this_epoch = statistics['Percent_Solved meta_test'] # meta_test_this_epoch = statistics['Avg Run Rew meta_test'] # meta_test_this_epoch = statistics['L2AverageClosest meta_test'] meta_test_this_epoch = statistics['Perc Success meta_test'] # meta_test_this_epoch = 100.0 # meta_test_this_epoch = statistics['AverageReturn meta_test'] if meta_test_this_epoch > self.best_meta_test: # make sure you set save_algorithm to true then call save_extra_data prev_save_alg = self.save_algorithm self.save_algorithm = True if self.save_best: if epoch > self.save_best_after_epoch: temp_rb = self.replay_buffer self.replay_buffer = None logger.save_extra_data(self.get_extra_data_to_save(epoch), 'best_meta_test.pkl') self.replay_buffer = temp_rb self.best_meta_test = meta_test_this_epoch print('\n\nSAVED ALG AT EPOCH %d\n\n' % epoch) self.save_algorithm = prev_save_alg if epoch in self.custom_save_epoch: prev_save_alg = self.save_algorithm self.save_algorithm = True logger.save_extra_data(self.get_extra_data_to_save(epoch), 'custom_save_epoch_%d.pkl' % epoch) self.save_algorithm = prev_save_alg for key, value in statistics.items(): logger.record_tabular(key, value) if self.plotter: self.plotter.draw()
def evaluate(self, epoch): """ Evaluate the policy, e.g. save/print progress. :param epoch: :return: """ statistics = OrderedDict() try: statistics.update(self.eval_statistics) self.eval_statistics = None except: print('No Stats to Eval') logger.log("Collecting random samples for evaluation") eval_steps = self.num_steps_per_eval test_paths = self.eval_sampler.obtain_samples(eval_steps) obs = torch.Tensor( np.squeeze(np.vstack([path["observations"] for path in test_paths]))) acts = torch.Tensor( np.squeeze(np.vstack([path["actions"] for path in test_paths]))) if len(acts.shape) < 2: acts = torch.unsqueeze(acts, 1) random_input = torch.cat([obs, acts], dim=1).to(ptu.device) exp_batch = self.get_batch(eval_steps, keys=['observations', 'actions'], use_expert_buffer=True) # exp_batch = {'observations':torch.Tensor([[0.],[1.],[2.],[3.],[4.],[5.],[6.],[7.],[8.],[9.],[10.]]), 'actions':torch.Tensor([[0.5]]*11)} obs = exp_batch['observations'] acts = exp_batch['actions'] exp_input = torch.cat([obs, acts], dim=1).to(ptu.device) statistics['random_avg_energy'] = self.ebm(random_input).mean().item() statistics['expert_avg_energy'] = self.get_energy( exp_input).mean().item() statistics['expert*20_avg_energy'] = self.get_energy(exp_input * 20).mean().item() statistics["random_expert_diff"] = statistics[ "random_avg_energy"] - statistics["expert_avg_energy"] for key, value in statistics.items(): logger.record_tabular(key, value) best_statistic = statistics[self.best_key] if best_statistic > self.best_statistic_so_far: self.best_statistic_so_far = best_statistic self.best_epoch = epoch self.best_random_avg_energy = statistics['random_avg_energy'] self.best_expert_avg_energy = statistics['expert_avg_energy'] logger.record_tabular("Best Model Epoch", self.best_epoch) logger.record_tabular("Best Random Energy", self.best_random_avg_energy) logger.record_tabular("Best Expert Energy", self.best_expert_avg_energy) if self.save_best and epoch >= self.save_best_starting_from_epoch: data_to_save = {'epoch': epoch, 'statistics': statistics} data_to_save.update(self.get_epoch_snapshot(epoch)) logger.save_extra_data(data_to_save, 'best.pkl') print('\n\nSAVED BEST\n\n') logger.record_tabular("Best Model Epoch", self.best_epoch) logger.record_tabular("Best Random Energy", self.best_random_avg_energy) logger.record_tabular("Best Expert Energy", self.best_expert_avg_energy)
def _try_to_eval(self, epoch): if epoch % self.logging_period != 0: return if epoch in self.save_extra_manual_epoch_set: logger.save_extra_data( self.get_extra_data_to_save(epoch), file_name='extra_snapshot_itr{}'.format(epoch), mode='cloudpickle', ) if self._save_extra_every_epoch: logger.save_extra_data(self.get_extra_data_to_save(epoch)) gt.stamp('save-extra') if self._can_evaluate(): self.evaluate(epoch) gt.stamp('eval') params = self.get_epoch_snapshot(epoch) logger.save_itr_params(epoch, params) gt.stamp('save-snapshot') table_keys = logger.get_table_key_set() if self._old_table_keys is not None: assert table_keys == self._old_table_keys, ( "Table keys cannot change from iteration to iteration.") self._old_table_keys = table_keys logger.record_dict( self.trainer.get_diagnostics(), prefix='trainer/', ) logger.record_tabular( "Number of train steps total", self._n_train_steps_total, ) logger.record_tabular( "Number of env steps total", self._n_env_steps_total, ) logger.record_tabular( "Number of rollouts total", self._n_rollouts_total, ) times_itrs = gt.get_times().stamps.itrs train_time = times_itrs['train'][-1] sample_time = times_itrs['sample'][-1] save_extra_time = times_itrs['save-extra'][-1] save_snapshot_time = times_itrs['save-snapshot'][-1] eval_time = times_itrs['eval'][-1] if epoch > 0 else 0 epoch_time = train_time + sample_time + save_extra_time + eval_time total_time = gt.get_times().total logger.record_tabular('in_unsupervised_model', float(self.in_unsupervised_phase)) logger.record_tabular('Train Time (s)', train_time) logger.record_tabular('(Previous) Eval Time (s)', eval_time) logger.record_tabular('Sample Time (s)', sample_time) logger.record_tabular('Save Extra Time (s)', save_extra_time) logger.record_tabular('Save Snapshot Time (s)', save_snapshot_time) logger.record_tabular('Epoch Time (s)', epoch_time) logger.record_tabular('Total Train Time (s)', total_time) logger.record_tabular("Epoch", epoch) logger.dump_tabular(with_prefix=False, with_timestamp=False) else: logger.log("Skipping eval for now.")
def example(variant): import torch import rlkit.torch.pytorch_util as ptu print("Starting") logger.log(torch.__version__) date_format = "%m/%d/%Y %H:%M:%S %Z" date = datetime.now(tz=pytz.utc) logger.log("start") logger.log("Current date & time is: {}".format(date.strftime(date_format))) logger.log("Cuda available: {}".format(torch.cuda.is_available())) if torch.cuda.is_available(): x = torch.randn(3) logger.log(str(x.to(ptu.device))) date = date.astimezone(timezone("US/Pacific")) logger.log("Local date & time is: {}".format(date.strftime(date_format))) for i in range(variant["num_seconds"]): logger.log("Tick, {}".format(i)) time.sleep(1) logger.log("end") logger.log("Local date & time is: {}".format(date.strftime(date_format))) logger.log("start mujoco") from gym.envs.mujoco import HalfCheetahEnv e = HalfCheetahEnv() img = e.sim.render(32, 32) logger.log(str(sum(img))) logger.log("end mujoco") logger.record_tabular("Epoch", 1) logger.dump_tabular() logger.record_tabular("Epoch", 2) logger.dump_tabular() logger.record_tabular("Epoch", 3) logger.dump_tabular() print("Done")
def _end_epoch(self): logger.log("Epoch Duration: {0}".format(time.time() - self._epoch_start_time)) logger.log("Started Training: {0}".format(self._can_train())) logger.pop_prefix()
def _try_to_eval(self, epoch): logger.save_extra_data(self.get_extra_data_to_save(epoch)) if self._can_evaluate(): self.evaluate(epoch) params = self.get_epoch_snapshot(epoch) logger.save_itr_params(epoch, params) table_keys = logger.get_table_key_set() #print("TABLE KEYS") #print(table_keys) #if self._old_table_keys is not None: # assert table_keys == self._old_table_keys, ( # "Table keys cannot change from iteration to iteration." # ) self._old_table_keys = table_keys logger.record_tabular( "Number of train steps total", self._n_train_steps_total, ) logger.record_tabular( "Number of env steps total", self._n_env_steps_total, ) logger.record_tabular( "Number of rollouts total", self._n_rollouts_total, ) times_itrs = gt.get_times().stamps.itrs train_time = times_itrs['train'][-1] sample_time = times_itrs['sample'][-1] eval_time = times_itrs['eval'][-1] if epoch > 0 else 0 epoch_time = train_time + sample_time + eval_time total_time = gt.get_times().total logger.record_tabular('Train Time (s)', train_time) logger.record_tabular('(Previous) Eval Time (s)', eval_time) logger.record_tabular('Sample Time (s)', sample_time) logger.record_tabular('Epoch Time (s)', epoch_time) logger.record_tabular('Total Train Time (s)', total_time) logger.record_tabular("Epoch", epoch) # tensorboard stuff _writer = self._writer for k, v_str in logger._tabular: if k == 'Epoch': continue v = float(v_str) if k.endswith('Loss'): _writer.add_scalar('Loss/{}'.format(k), v, epoch) elif k.endswith('Max'): prefix = k[:-4] _writer.add_scalar('{}/{}'.format(prefix, k), v, epoch) elif k.endswith('Min'): prefix = k[:-4] _writer.add_scalar('{}/{}'.format(prefix, k), v, epoch) elif k.endswith('Std'): prefix = k[:-4] _writer.add_scalar('{}/{}'.format(prefix, k), v, epoch) elif k.endswith('Mean'): prefix = k[:-5] _writer.add_scalar('{}/{}'.format(prefix, k), v, epoch) elif 'Time' in k: _writer.add_scalar('Time/{}'.format(k), v, epoch) elif k.startswith('Num'): _writer.add_scalar('Number/{}'.format(k), v, epoch) elif k.startswith('Exploration'): _writer.add_scalar('Exploration/{}'.format(k), v, epoch) elif k.startswith('Test'): _writer.add_scalar('Test/{}'.format(k), v, epoch) else: _writer.add_scalar(k, v, epoch) _writer.file_writer.flush() logger.dump_tabular(with_prefix=False, with_timestamp=False) else: logger.log("Skipping eval for now.")