def setup_pytorch_for_mpi(): """ Avoid slowdowns caused by each separate process's PyTorch using more than its fair share of CPU resources. """ print('Proc %d: Reporting original number of Torch threads as %d.'%(proc_id(), torch.get_num_threads()), flush=True) if torch.get_num_threads() == 1: return fair_num_threads = max(int(torch.get_num_threads() / num_procs()), 1) torch.set_num_threads(fair_num_threads) print('Proc %d: Reporting new number of Torch threads as %d.'%(proc_id(), torch.get_num_threads()), flush=True)
def dump_tabular(self): """ Write all of the diagnostics from the current iteration. Writes both to stdout, and to the output file. """ if proc_id() == 0: vals = [] key_lens = [len(key) for key in self.log_headers] max_key_len = max(15, max(key_lens)) keystr = '%' + '%d' % max_key_len fmt = "| " + keystr + "s | %15s |" n_slashes = 22 + max_key_len print("-" * n_slashes) for key in self.log_headers: val = self.log_current_row.get(key, "") valstr = "%8.3g" % val if hasattr(val, "__float__") else val print(fmt % (key, valstr)) vals.append(val) print("-" * n_slashes) if self.output_file is not None: if self.first_row: self.output_file.write("\t".join(self.log_headers) + "\n") self.output_file.write("\t".join(map(str, vals)) + "\n") self.output_file.flush() self.log_current_row.clear() self.first_row = False
def save_state(self, state_dict, itr=None): """ Saves the state of an experiment. To be clear: this is about saving *state*, not logging diagnostics. All diagnostic logging is separate from this function. This function will save whatever is in ``state_dict``---usually just a copy of the environment---and the most recent parameters for the model you previously set up saving for with ``setup_tf_saver``. Call with any frequency you prefer. If you only want to maintain a single state and overwrite it at each call with the most recent version, leave ``itr=None``. If you want to keep all of the states you save, provide unique (increasing) values for 'itr'. Args: state_dict (dict): Dictionary containing essential elements to describe the current state of training. itr: An int, or None. Current iteration of training. """ if proc_id() == 0: fname = 'vars.pkl' if itr is None else 'vars%d.pkl' % itr try: joblib.dump(state_dict, osp.join(self.output_dir, fname)) except: self.log('Warning: could not pickle state_dict.', color='red') if hasattr(self, 'tf_saver_elements'): self._tf_simple_save(itr)
def launch(net, args): env = gym.make(args.env_name) # Special function to avoid certain slowdowns from PyTorch + MPI combo. setup_pytorch_for_mpi() # 确保不同进程的随机种子不同! seed = args.seed seed += 10000 * proc_id() env.seed(seed) np.random.seed(seed) try: s_dim = env.observation_space.shape[0] except: s_dim = env.observation_space.spaces['observation'].shape[0] + \ env.observation_space.spaces['desired_goal'].shape[0] act_dim = env.action_space.shape[0] a_bound = env.action_space.high[0] import os os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu_id) """ torch1.17.1,gpu_id: 1 device: cuda:0,用的是物理上的0卡; cuda的序号仍然是按照物理序号; torch1.3.1,gpu_id: 1 device: cuda:0,用的是物理上的1卡, torch1.3.1,gpu_id: 1 device: cuda:1,报错:invalid device ordinal; torch1.3.1,gpu_id: 1,3 device: cuda:1,用的是物理上的3卡, 有点类似于指定GPU-ID后,cuda会重新排序。 """ device = torch.device( "cuda:" + str(0) if torch.cuda.is_available() and args.gpu_id != -1 else 'cpu') print("gpu_id:", args.gpu_id, "device:", device) net = net( act_dim=act_dim, obs_dim=s_dim, a_bound=a_bound, per_flag=args.per, her_flag=args.her, action_l2=args.action_l2, state_norm=args.state_norm, gamma=args.gamma, sess_opt=args.sess_opt, seed=args.seed, clip_return=args.clip_return, device=device, ) restore_path = 'HER_DRLib_Net_Reload/2022-08-12_HER_mpi1_random_TD3Torch_FetchPush-v1/2022-08-12_15-57-53-HER_mpi1_random_TD3Torch_FetchPush-v1_s300/' net.load_simple_network(restore_path + "actor.pth") # net.load_replay_buffer(restore_path+"replay.pkl") # 因为文件太大了,我删掉了默认的值 net.load_norm(restore_path + "norm.pkl") trainer(net, env, args)
def _tf_simple_save(self, itr=None): """ Uses simple_save to save a trained model, plus info to make it easy to associated tensors to variables after restore. """ if proc_id() == 0: assert hasattr(self, 'tf_saver_elements'), \ "First have to setup saving with self.setup_tf_saver" fpath = 'simple_save' + ('%d' % itr if itr is not None else '') fpath = osp.join(self.output_dir, fpath) if osp.exists(fpath): # simple_save refuses to be useful if fpath already exists, # so just delete fpath if it's there. shutil.rmtree(fpath) tf.saved_model.simple_save(export_dir=fpath, **self.tf_saver_elements) joblib.dump(self.tf_saver_info, osp.join(fpath, 'model_info.pkl'))
def __init__(self, output_dir=None, output_fname='progress.txt', exp_name=None): """ Initialize a Logger. Args: output_dir (string): A directory for saving results to. If ``None``, defaults to a temp directory of the form ``/tmp/experiments/somerandomnumber``. output_fname (string): Name for the tab-separated-value file containing metrics logged throughout a training run. Defaults to ``progress.txt``. exp_name (string): Experiment name. If you run multiple training runs and give them all the same ``exp_name``, the plotter will know to group them. (Use case: if you run the same hyperparameter configuration with multiple random seeds, you should give them all the same ``exp_name``.) """ if proc_id() == 0: self.output_dir = output_dir or "/tmp/experiments/%i" % int( time.time()) if osp.exists(self.output_dir): print( "Warning: Log dir %s already exists! Storing info there anyway." % self.output_dir) else: os.makedirs(self.output_dir) self.output_file = open(osp.join(self.output_dir, output_fname), 'w') atexit.register(self.output_file.close) print( colorize("Logging data to %s" % self.output_file.name, 'green', bold=True)) else: self.output_dir = None self.output_file = None self.first_row = True self.log_headers = [] self.log_current_row = {} self.exp_name = exp_name
def save_config(self, config, root_dir=__file__): """ Log an experiment configuration. Call this once at the top of your experiment, passing in all important config vars as a dict. This will serialize the config to JSON, while handling anything which can't be serialized in a graceful way (writing as informative a string as possible). Example use: .. code-block:: python logger = EpochLogger(**logger_kwargs) logger.save_config(locals()) """ lines = [] with open(root_dir, 'r') as file_to_read: while True: line = file_to_read.readline() if not line: break line = line.strip('\n') lines.append(line) fileObject = open(osp.join(self.output_dir, 'Script_backup.py'), 'w') for ip in lines: fileObject.write(str(ip)) fileObject.write('\n') fileObject.close() config_json = convert_json(config) if self.exp_name is not None: config_json['exp_name'] = self.exp_name if proc_id() == 0: output = json.dumps(config_json, separators=(',', ':\t'), indent=4, sort_keys=True) print(colorize('Saving config:\n', color='cyan', bold=True)) print(output) with open(osp.join(self.output_dir, "config.json"), 'w') as out: out.write(output)
def launch(net, args): env = gym.make(args.env_name) # 确保不同进程的随机种子不同! seed = args.seed seed += 10000 * proc_id() env.seed(seed) np.random.seed(seed) try: s_dim = env.observation_space.shape[0] except: s_dim = env.observation_space.spaces['observation'].shape[0] + \ env.observation_space.spaces['desired_goal'].shape[0] act_dim = env.action_space.shape[0] a_bound = env.action_space.high[0] net = net( act_dim=act_dim, obs_dim=s_dim, a_bound=a_bound, per_flag=args.per, her_flag=args.her, action_l2=args.action_l2, state_norm=args.state_norm, gamma=args.gamma, sess_opt=args.sess_opt, seed=args.seed, clip_return=args.clip_return, ) # 同步强化网络的参数,如果网络不仅有强化的,可能会出问题; sess = tf.Session() sess.run(tf.global_variables_initializer()) # Sync params across processes sess.run(sync_all_params()) trainer(net, env, args)
def trainer(net, env, args): # logger exp_name = args.exp_name + '_' + args.RL_name + '_' + args.env_name logger_kwargs = setup_logger_kwargs(exp_name=exp_name, seed=args.seed, output_dir=args.output_dir + "/") logger = EpochLogger(**logger_kwargs) if proc_id() == 0: sys.stdout = Logger(logger_kwargs["output_dir"] + "/print.log", sys.stdout) logger.save_config(locals(), __file__) # start running start_time = time.time() for i in range(args.n_epochs): test_ep_reward, logger = net.test_agent( args=args, env=env, n=10, logger=logger, obs2state=obs2state, ) logger.store(TestEpRet=test_ep_reward) logger.log_tabular('Epoch', i) logger.log_tabular('TestEpRet', average_only=True) logger.log_tabular('TestSuccess', average_only=True) logger.dump_tabular() print( colorize("the experience %s is end" % logger.output_file.name, 'green', bold=True)) net.save_simple_network(logger_kwargs["output_dir"]) net.save_norm(logger_kwargs["output_dir"]) net.save_replay_buffer(logger_kwargs["output_dir"])
def trainer(net, env, args): # logger exp_name = args.exp_name + '_' + args.RL_name + '_' + args.env_name logger_kwargs = setup_logger_kwargs(exp_name=exp_name, seed=args.seed, output_dir=args.output_dir + "/") logger = EpochLogger(**logger_kwargs) if proc_id() == 0: sys.stdout = Logger(logger_kwargs["output_dir"] + "/print.log", sys.stdout) logger.save_config(locals(), __file__) # start running start_time = time.time() for i in range(args.n_epochs): for c in range(args.n_cycles): obs = env.reset() episode_trans = [] s = obs2state(obs) ep_reward = 0 real_ep_reward = 0 episode_time = time.time() success = [] for j in range(args.n_steps): a = net.get_action(s, noise_scale=args.noise_ps) # a = net.get_action(s) # a = noise.add_noise(a) if np.random.random() < args.random_eps: a = np.random.uniform(low=-net.a_bound, high=net.a_bound, size=net.act_dim) a = np.clip(a, -net.a_bound, net.a_bound) # ensure the gripper close! try: obs_next, r, done, info = env.step(a) success.append(info["is_success"]) except Exception as e: success.append(int(done)) s_ = obs2state(obs_next) # visualization if args.render and i % 3 == 0 and c % 20 == 0: env.render() # 防止gym中的最大step会返回done=True done = False if j == args.n_steps - 1 else done if not args.her: net.store_transition((s, a, r, s_, done)) episode_trans.append([obs, a, r, obs_next, done, info]) s = s_ obs = obs_next ep_reward += r real_ep_reward += r if args.her: net.save_episode(episode_trans=episode_trans, reward_func=env.compute_reward, obs2state=obs2state) logger.store(EpRet=ep_reward) logger.store(EpRealRet=real_ep_reward) for _ in range(40): outs = net.learn( args.batch_size, args.base_lr, args.base_lr * 2, ) if outs[1] is not None: logger.store(Q1=outs[1]) logger.store(Q2=outs[2]) if 0.0 < sum(success) < args.n_steps: print("epoch:", i, "\tep:", c, "\tep_rew:", ep_reward, "\ttime:", np.round(time.time() - episode_time, 3), '\tdone:', sum(success)) test_ep_reward, logger = net.test_agent( args=args, env=env, n=10, logger=logger, obs2state=obs2state, ) logger.store(TestEpRet=test_ep_reward) logger.log_tabular('Epoch', i) logger.log_tabular('EpRet', average_only=True) logger.log_tabular('EpRealRet', average_only=True) logger.log_tabular('TestEpRet', average_only=True) logger.log_tabular('Q1', with_min_and_max=True) logger.log_tabular('Q2', average_only=True) logger.log_tabular('TestSuccess', average_only=True) logger.log_tabular( 'TotalEnvInteracts', i * args.n_cycles * args.n_steps + c * args.n_steps + j + 1) logger.log_tabular('TotalTime', time.time() - start_time) logger.dump_tabular() print( colorize("the experience %s is end" % logger.output_file.name, 'green', bold=True)) net.save_simple_network(logger_kwargs["output_dir"])
def log(self, msg, color='green'): """Print a colorized message to stdout.""" if proc_id() == 0: print(colorize(msg, color, bold=True))