예제 #1
0
def setup_pytorch_for_mpi():
    """
    Avoid slowdowns caused by each separate process's PyTorch using
    more than its fair share of CPU resources.
    """
    print('Proc %d: Reporting original number of Torch threads as %d.'%(proc_id(), torch.get_num_threads()), flush=True)
    if torch.get_num_threads() == 1:
        return
    fair_num_threads = max(int(torch.get_num_threads() / num_procs()), 1)
    torch.set_num_threads(fair_num_threads)
    print('Proc %d: Reporting new number of Torch threads as %d.'%(proc_id(), torch.get_num_threads()), flush=True)
예제 #2
0
    def dump_tabular(self):
        """
        Write all of the diagnostics from the current iteration.

        Writes both to stdout, and to the output file.
        """
        if proc_id() == 0:
            vals = []
            key_lens = [len(key) for key in self.log_headers]
            max_key_len = max(15, max(key_lens))
            keystr = '%' + '%d' % max_key_len
            fmt = "| " + keystr + "s | %15s |"
            n_slashes = 22 + max_key_len
            print("-" * n_slashes)
            for key in self.log_headers:
                val = self.log_current_row.get(key, "")
                valstr = "%8.3g" % val if hasattr(val, "__float__") else val
                print(fmt % (key, valstr))
                vals.append(val)
            print("-" * n_slashes)
            if self.output_file is not None:
                if self.first_row:
                    self.output_file.write("\t".join(self.log_headers) + "\n")
                self.output_file.write("\t".join(map(str, vals)) + "\n")
                self.output_file.flush()
        self.log_current_row.clear()
        self.first_row = False
예제 #3
0
    def save_state(self, state_dict, itr=None):
        """
        Saves the state of an experiment.

        To be clear: this is about saving *state*, not logging diagnostics.
        All diagnostic logging is separate from this function. This function
        will save whatever is in ``state_dict``---usually just a copy of the
        environment---and the most recent parameters for the model you
        previously set up saving for with ``setup_tf_saver``.

        Call with any frequency you prefer. If you only want to maintain a
        single state and overwrite it at each call with the most recent
        version, leave ``itr=None``. If you want to keep all of the states you
        save, provide unique (increasing) values for 'itr'.

        Args:
            state_dict (dict): Dictionary containing essential elements to
                describe the current state of training.

            itr: An int, or None. Current iteration of training.
        """
        if proc_id() == 0:
            fname = 'vars.pkl' if itr is None else 'vars%d.pkl' % itr
            try:
                joblib.dump(state_dict, osp.join(self.output_dir, fname))
            except:
                self.log('Warning: could not pickle state_dict.', color='red')
            if hasattr(self, 'tf_saver_elements'):
                self._tf_simple_save(itr)
def launch(net, args):
    env = gym.make(args.env_name)
    # Special function to avoid certain slowdowns from PyTorch + MPI combo.
    setup_pytorch_for_mpi()
    # 确保不同进程的随机种子不同!
    seed = args.seed
    seed += 10000 * proc_id()
    env.seed(seed)
    np.random.seed(seed)

    try:
        s_dim = env.observation_space.shape[0]
    except:
        s_dim = env.observation_space.spaces['observation'].shape[0] + \
                env.observation_space.spaces['desired_goal'].shape[0]

    act_dim = env.action_space.shape[0]
    a_bound = env.action_space.high[0]

    import os
    os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu_id)
    """        
        torch1.17.1,gpu_id: 1 device: cuda:0,用的是物理上的0卡;
        cuda的序号仍然是按照物理序号;
        torch1.3.1,gpu_id: 1 device: cuda:0,用的是物理上的1卡,
        torch1.3.1,gpu_id: 1 device: cuda:1,报错:invalid device ordinal;
        torch1.3.1,gpu_id: 1,3 device: cuda:1,用的是物理上的3卡,
        有点类似于指定GPU-ID后,cuda会重新排序。        
    """

    device = torch.device(
        "cuda:" +
        str(0) if torch.cuda.is_available() and args.gpu_id != -1 else 'cpu')
    print("gpu_id:", args.gpu_id, "device:", device)

    net = net(
        act_dim=act_dim,
        obs_dim=s_dim,
        a_bound=a_bound,
        per_flag=args.per,
        her_flag=args.her,
        action_l2=args.action_l2,
        state_norm=args.state_norm,
        gamma=args.gamma,
        sess_opt=args.sess_opt,
        seed=args.seed,
        clip_return=args.clip_return,
        device=device,
    )
    restore_path = 'HER_DRLib_Net_Reload/2022-08-12_HER_mpi1_random_TD3Torch_FetchPush-v1/2022-08-12_15-57-53-HER_mpi1_random_TD3Torch_FetchPush-v1_s300/'
    net.load_simple_network(restore_path + "actor.pth")
    # net.load_replay_buffer(restore_path+"replay.pkl") # 因为文件太大了,我删掉了默认的值
    net.load_norm(restore_path + "norm.pkl")
    trainer(net, env, args)
예제 #5
0
 def _tf_simple_save(self, itr=None):
     """
     Uses simple_save to save a trained model, plus info to make it easy
     to associated tensors to variables after restore.
     """
     if proc_id() == 0:
         assert hasattr(self, 'tf_saver_elements'), \
             "First have to setup saving with self.setup_tf_saver"
         fpath = 'simple_save' + ('%d' % itr if itr is not None else '')
         fpath = osp.join(self.output_dir, fpath)
         if osp.exists(fpath):
             # simple_save refuses to be useful if fpath already exists,
             # so just delete fpath if it's there.
             shutil.rmtree(fpath)
         tf.saved_model.simple_save(export_dir=fpath,
                                    **self.tf_saver_elements)
         joblib.dump(self.tf_saver_info, osp.join(fpath, 'model_info.pkl'))
예제 #6
0
    def __init__(self,
                 output_dir=None,
                 output_fname='progress.txt',
                 exp_name=None):
        """
        Initialize a Logger.

        Args:
            output_dir (string): A directory for saving results to. If
                ``None``, defaults to a temp directory of the form
                ``/tmp/experiments/somerandomnumber``.

            output_fname (string): Name for the tab-separated-value file
                containing metrics logged throughout a training run.
                Defaults to ``progress.txt``.

            exp_name (string): Experiment name. If you run multiple training
                runs and give them all the same ``exp_name``, the plotter
                will know to group them. (Use case: if you run the same
                hyperparameter configuration with multiple random seeds, you
                should give them all the same ``exp_name``.)
        """
        if proc_id() == 0:
            self.output_dir = output_dir or "/tmp/experiments/%i" % int(
                time.time())
            if osp.exists(self.output_dir):
                print(
                    "Warning: Log dir %s already exists! Storing info there anyway."
                    % self.output_dir)
            else:
                os.makedirs(self.output_dir)
            self.output_file = open(osp.join(self.output_dir, output_fname),
                                    'w')
            atexit.register(self.output_file.close)
            print(
                colorize("Logging data to %s" % self.output_file.name,
                         'green',
                         bold=True))
        else:
            self.output_dir = None
            self.output_file = None
        self.first_row = True
        self.log_headers = []
        self.log_current_row = {}
        self.exp_name = exp_name
예제 #7
0
    def save_config(self, config, root_dir=__file__):
        """
        Log an experiment configuration.

        Call this once at the top of your experiment, passing in all important
        config vars as a dict. This will serialize the config to JSON, while
        handling anything which can't be serialized in a graceful way (writing
        as informative a string as possible).

        Example use:

        .. code-block:: python

            logger = EpochLogger(**logger_kwargs)
            logger.save_config(locals())
        """

        lines = []
        with open(root_dir, 'r') as file_to_read:
            while True:
                line = file_to_read.readline()
                if not line:
                    break
                line = line.strip('\n')
                lines.append(line)

        fileObject = open(osp.join(self.output_dir, 'Script_backup.py'), 'w')
        for ip in lines:
            fileObject.write(str(ip))
            fileObject.write('\n')
        fileObject.close()

        config_json = convert_json(config)
        if self.exp_name is not None:
            config_json['exp_name'] = self.exp_name
        if proc_id() == 0:
            output = json.dumps(config_json,
                                separators=(',', ':\t'),
                                indent=4,
                                sort_keys=True)
            print(colorize('Saving config:\n', color='cyan', bold=True))
            print(output)
            with open(osp.join(self.output_dir, "config.json"), 'w') as out:
                out.write(output)
예제 #8
0
def launch(net, args):
    env = gym.make(args.env_name)
    # 确保不同进程的随机种子不同!
    seed = args.seed
    seed += 10000 * proc_id()
    env.seed(seed)
    np.random.seed(seed)

    try:
        s_dim = env.observation_space.shape[0]
    except:
        s_dim = env.observation_space.spaces['observation'].shape[0] + \
                env.observation_space.spaces['desired_goal'].shape[0]

    act_dim = env.action_space.shape[0]
    a_bound = env.action_space.high[0]

    net = net(
        act_dim=act_dim,
        obs_dim=s_dim,
        a_bound=a_bound,
        per_flag=args.per,
        her_flag=args.her,
        action_l2=args.action_l2,
        state_norm=args.state_norm,
        gamma=args.gamma,
        sess_opt=args.sess_opt,
        seed=args.seed,
        clip_return=args.clip_return,
    )
    # 同步强化网络的参数,如果网络不仅有强化的,可能会出问题;
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    # Sync params across processes
    sess.run(sync_all_params())

    trainer(net, env, args)
def trainer(net, env, args):
    # logger
    exp_name = args.exp_name + '_' + args.RL_name + '_' + args.env_name
    logger_kwargs = setup_logger_kwargs(exp_name=exp_name,
                                        seed=args.seed,
                                        output_dir=args.output_dir + "/")
    logger = EpochLogger(**logger_kwargs)
    if proc_id() == 0:
        sys.stdout = Logger(logger_kwargs["output_dir"] + "/print.log",
                            sys.stdout)
        logger.save_config(locals(), __file__)
    # start running
    start_time = time.time()
    for i in range(args.n_epochs):
        test_ep_reward, logger = net.test_agent(
            args=args,
            env=env,
            n=10,
            logger=logger,
            obs2state=obs2state,
        )
        logger.store(TestEpRet=test_ep_reward)

        logger.log_tabular('Epoch', i)
        logger.log_tabular('TestEpRet', average_only=True)

        logger.log_tabular('TestSuccess', average_only=True)
        logger.dump_tabular()

    print(
        colorize("the experience %s is end" % logger.output_file.name,
                 'green',
                 bold=True))
    net.save_simple_network(logger_kwargs["output_dir"])
    net.save_norm(logger_kwargs["output_dir"])
    net.save_replay_buffer(logger_kwargs["output_dir"])
예제 #10
0
def trainer(net, env, args):
    # logger
    exp_name = args.exp_name + '_' + args.RL_name + '_' + args.env_name
    logger_kwargs = setup_logger_kwargs(exp_name=exp_name,
                                        seed=args.seed,
                                        output_dir=args.output_dir + "/")
    logger = EpochLogger(**logger_kwargs)
    if proc_id() == 0:
        sys.stdout = Logger(logger_kwargs["output_dir"] + "/print.log",
                            sys.stdout)
        logger.save_config(locals(), __file__)
    # start running
    start_time = time.time()
    for i in range(args.n_epochs):
        for c in range(args.n_cycles):
            obs = env.reset()
            episode_trans = []
            s = obs2state(obs)
            ep_reward = 0
            real_ep_reward = 0
            episode_time = time.time()

            success = []
            for j in range(args.n_steps):
                a = net.get_action(s, noise_scale=args.noise_ps)
                # a = net.get_action(s)
                # a = noise.add_noise(a)

                if np.random.random() < args.random_eps:
                    a = np.random.uniform(low=-net.a_bound,
                                          high=net.a_bound,
                                          size=net.act_dim)
                a = np.clip(a, -net.a_bound, net.a_bound)
                # ensure the gripper close!
                try:
                    obs_next, r, done, info = env.step(a)
                    success.append(info["is_success"])
                except Exception as e:
                    success.append(int(done))
                s_ = obs2state(obs_next)

                # visualization
                if args.render and i % 3 == 0 and c % 20 == 0:
                    env.render()

                # 防止gym中的最大step会返回done=True
                done = False if j == args.n_steps - 1 else done

                if not args.her:
                    net.store_transition((s, a, r, s_, done))

                episode_trans.append([obs, a, r, obs_next, done, info])
                s = s_
                obs = obs_next
                ep_reward += r
                real_ep_reward += r
            if args.her:
                net.save_episode(episode_trans=episode_trans,
                                 reward_func=env.compute_reward,
                                 obs2state=obs2state)
            logger.store(EpRet=ep_reward)
            logger.store(EpRealRet=real_ep_reward)

            for _ in range(40):
                outs = net.learn(
                    args.batch_size,
                    args.base_lr,
                    args.base_lr * 2,
                )
                if outs[1] is not None:
                    logger.store(Q1=outs[1])
                    logger.store(Q2=outs[2])
            if 0.0 < sum(success) < args.n_steps:
                print("epoch:", i, "\tep:", c, "\tep_rew:",
                      ep_reward, "\ttime:",
                      np.round(time.time() - episode_time, 3), '\tdone:',
                      sum(success))

        test_ep_reward, logger = net.test_agent(
            args=args,
            env=env,
            n=10,
            logger=logger,
            obs2state=obs2state,
        )
        logger.store(TestEpRet=test_ep_reward)

        logger.log_tabular('Epoch', i)
        logger.log_tabular('EpRet', average_only=True)
        logger.log_tabular('EpRealRet', average_only=True)
        logger.log_tabular('TestEpRet', average_only=True)

        logger.log_tabular('Q1', with_min_and_max=True)
        logger.log_tabular('Q2', average_only=True)

        logger.log_tabular('TestSuccess', average_only=True)

        logger.log_tabular(
            'TotalEnvInteracts',
            i * args.n_cycles * args.n_steps + c * args.n_steps + j + 1)
        logger.log_tabular('TotalTime', time.time() - start_time)
        logger.dump_tabular()

    print(
        colorize("the experience %s is end" % logger.output_file.name,
                 'green',
                 bold=True))
    net.save_simple_network(logger_kwargs["output_dir"])
예제 #11
0
 def log(self, msg, color='green'):
     """Print a colorized message to stdout."""
     if proc_id() == 0:
         print(colorize(msg, color, bold=True))