Ejemplo n.º 1
0
def run(args, env_fn, policy_fn, get_weights_fn, set_weights_fn):
    initialize_logger(
        logging_level=logging.getLevelName(args.logging_level))

    if args.n_env > 1:
        args.n_explorer = 1
    elif args.n_explorer is None:
        args.n_explorer = multiprocessing.cpu_count() - 1
    assert args.n_explorer > 0, "[error] number of explorers must be positive integer"

    env = env_fn()

    global_rb, queues, is_training_done, lock, trained_steps = prepare_experiment(env, args)

    noise = 0.3
    tasks = []

    # Add explorers
    if args.n_env > 1:
        tasks.append(Process(
            target=explorer,
            args=[global_rb, queues[0], trained_steps, is_training_done,
                  lock, env_fn, policy_fn, set_weights_fn, noise,
                  args.n_env, args.n_thread, args.local_buffer_size,
                  args.episode_max_steps, args.gpu_explorer]))
    else:
        for i in range(args.n_explorer):
            tasks.append(Process(
                target=explorer,
                args=[global_rb, queues[i], trained_steps, is_training_done,
                      lock, env_fn, policy_fn, set_weights_fn, noise,
                      args.n_env, args.n_thread, args.local_buffer_size,
                      args.episode_max_steps, args.gpu_explorer]))

    # Add learner
    tasks.append(Process(
        target=learner,
        args=[global_rb, trained_steps, is_training_done,
              lock, env_fn(), policy_fn, get_weights_fn,
              args.n_training, args.param_update_freq,
              args.test_freq, args.gpu_learner, queues]))

    # Add evaluator
    tasks.append(Process(
        target=evaluator,
        args=[is_training_done, env_fn(), policy_fn, set_weights_fn,
              queues[-1], args.gpu_evaluator, args.save_model_interval]))

    for task in tasks:
        task.start()
    for task in tasks:
        task.join()
Ejemplo n.º 2
0
    def __init__(
            self,
            policy,
            env,
            args,
            test_env=None):
        self._set_from_args(args)
        self._policy = policy
        self._env = env
        self._test_env = self._env if test_env is None else test_env
        if self._normalize_obs:
            assert isinstance(env.observation_space, Box)
            self._obs_normalizer = EmpiricalNormalizer(
                shape=env.observation_space.shape)

        # prepare log directory
        self._output_dir = prepare_output_dir(
            args=args, user_specified_dir=self._logdir,
            suffix="{}_{}".format(self._policy.policy_name, args.dir_suffix))
        self.logger = initialize_logger(
            logging_level=logging.getLevelName(args.logging_level),
            output_dir=self._output_dir)

        if args.evaluate:
            assert args.model_dir is not None
        self._set_check_point(args.model_dir)

        # prepare TensorBoard output
        self.writer = tf.summary.create_file_writer(self._output_dir)
        self.writer.set_as_default()
Ejemplo n.º 3
0
    def __init__(self, policy, env, args, test_env=None):
        self._policy = policy
        self._env = env
        self._test_env = self._env if test_env is None else test_env
        self._set_from_args(args)

        # prepare log directory
        self._output_dir = prepare_output_dir(args=args,
                                              user_specified_dir="./results",
                                              suffix="{}_{}".format(
                                                  self._policy.policy_name,
                                                  args.dir_suffix))
        self.logger = initialize_logger(logging_level=logging.getLevelName(
            args.logging_level),
                                        output_dir=self._output_dir)

        # Save and restore model
        checkpoint = tf.train.Checkpoint(policy=self._policy)
        self.checkpoint_manager = tf.train.CheckpointManager(
            checkpoint, directory=self._output_dir, max_to_keep=5)
        if args.model_dir is not None:
            assert os.path.isdir(args.model_dir)
            path_ckpt = tf.train.latest_checkpoint(args.model_dir)
            checkpoint.restore(path_ckpt)
            self.logger.info("Restored {}".format(path_ckpt))

        # prepare TensorBoard output
        self.writer = tf.summary.create_file_writer(self._output_dir)
        self.writer.set_as_default()
Ejemplo n.º 4
0
    def __init__(
            self,
            policy,
            env,
            params,
            test_env=None):
        """Initializing the training instance."""

        self._params = params
        self._set_from_params()
        self._policy = policy
        self._env = env
        self._test_env = self._env if test_env is None else test_env
        args = self._get_args_from_params()

        # Convolutional Autoencoder:
        self._CAE = CAE(pooling=self._params["cae"]["pooling"],
                        latent_dim=self._params["cae"]["latent_dim"],
                        input_shape=self._env.workspace.shape,
                        conv_filters=self._params["cae"]["conv_filters"])
        self._CAE.build(input_shape=(1, self._env.workspace.shape[0], self._env.workspace.shape[1], 1))
        self._CAE.load_weights(filepath=self._params["cae"]["weights_path"])
        for layer, _ in self._CAE._get_trainable_state().items():
            layer.trainable = False

        #Initialize array for trajectory storage
        self.trajectory=[]

        # Initialize workspace relabeler:
        self._relabeler = PointrobotRelabeler(
            ws_shape=(self._env.grid_size, self._env.grid_size),
            mode=params["trainer"]["relabeling_mode"],
            remove_zigzaging=params["trainer"]["remove_zigzaging"]
            )

        # prepare log directory
        self._output_dir = prepare_output_dir(
            args=args, user_specified_dir=self._logdir,
            suffix="{}_{}".format(self._policy.policy_name, params["trainer"]["dir_suffix"]))
        self.logger = initialize_logger(
            logging_level=logging.getLevelName(params["trainer"]["logging_level"]),
            output_dir=self._output_dir)
        if self._save_test_path_sep:
            sep_logdirs = ['successful_trajs', 'unsuccessful_trajs', 'unfinished_trajs']
            for logdir in sep_logdirs:
                if not os.path.exists(os.path.join(self._logdir, logdir)):
                    os.makedirs(os.path.join(self._logdir, logdir))

        if params["trainer"]["mode"] == "evaluate":
            assert glob.glob(os.path.join(params["trainer"]["model_dir"], '*'))
        self._set_check_point(params["trainer"]["model_dir"])

        # prepare TensorBoard output
        self.writer = tf.summary.create_file_writer(self._output_dir)
        self.writer.set_as_default()

        # relabeling visualization:
        self._relabel_fig = plt.figure(2)
Ejemplo n.º 5
0
    def __init__(self, policy, env, args, test_env=None):
        """
        Initialize Trainer class

        Args:
            policy: Policy to be trained
            env (gym.Env): Environment for train
            args (Namespace or dict): config parameters specified with command line
            test_env (gym.Env): Environment for test.
        """
        if isinstance(args, dict):
            _args = args
            args = policy.__class__.get_argument(Trainer.get_argument())
            args = args.parse_args([])
            for k, v in _args.items():
                if hasattr(args, k):
                    setattr(args, k, v)
                else:
                    raise ValueError(f"{k} is invalid parameter.")

        self._set_from_args(args)
        self._policy = policy
        self._env = env
        self._test_env = self._env if test_env is None else test_env
        if self._normalize_obs:
            assert isinstance(env.observation_space, Box)
            self._obs_normalizer = EmpiricalNormalizer(
                shape=env.observation_space.shape)

        # prepare log directory
        self._output_dir = prepare_output_dir(args=args,
                                              user_specified_dir=self._logdir,
                                              suffix="{}_{}".format(
                                                  self._policy.policy_name,
                                                  args.dir_suffix))
        self.logger = initialize_logger(logging_level=logging.getLevelName(
            args.logging_level),
                                        output_dir=self._output_dir)

        if args.evaluate:
            assert args.model_dir is not None
        self._set_check_point(args.model_dir)

        # prepare TensorBoard output
        self.writer = tf.summary.create_file_writer(self._output_dir)
        self.writer.set_as_default()
Ejemplo n.º 6
0
    def __init__(
            self,
            policy,
            env,
            args,
            test_env=None):
        self._set_from_args(args)
        self._policy = policy
        self._env = env
        self._test_env = self._env if test_env is None else test_env
        if self._normalize_obs:
            assert isinstance(env.observation_space, Box)
            self._obs_normalizer = EmpiricalNormalizer(
                shape=env.observation_space.shape)

        # prepare log directory
        self._output_dir = prepare_output_dir(
            args=args, user_specified_dir=self._logdir,
            suffix="{}_{}".format(self._policy.policy_name, args.dir_suffix))
        self.logger = initialize_logger(
            logging_level=logging.getLevelName(args.logging_level),
            output_dir=self._output_dir)

        # Save and restore model
        self._checkpoint = tf.train.Checkpoint(policy=self._policy)
        self.checkpoint_manager = tf.train.CheckpointManager(
            self._checkpoint, directory=self._output_dir, max_to_keep=5)
        if args.evaluate:
            assert args.model_dir is not None
        if args.model_dir is not None:
            assert os.path.isdir(args.model_dir)
            self._latest_path_ckpt = tf.train.latest_checkpoint(args.model_dir)
            self._checkpoint.restore(self._latest_path_ckpt)
            self.logger.info("Restored {}".format(self._latest_path_ckpt))

        # prepare TensorBoard output
        self.writer = tf.summary.create_file_writer(self._output_dir)
        self.writer.set_as_default()