Esempio n. 1
0
    def __init__(self, algorithm, obs_dim, act_dim):
        self.obs_dim = obs_dim
        self.act_dim = act_dim
        super(OpenSimAgent, self).__init__(algorithm)

        # Use ParallelExecutor to make program running faster
        exec_strategy = fluid.ExecutionStrategy()
        exec_strategy.use_experimental_executor = True
        exec_strategy.num_threads = 4
        build_strategy = fluid.BuildStrategy()
        build_strategy.remove_unnecessary_lock = True

        with fluid.scope_guard(fluid.global_scope().new_scope()):
            self.learn_pe = fluid.ParallelExecutor(
                use_cuda=machine_info.is_gpu_available(),
                main_program=self.learn_program,
                exec_strategy=exec_strategy,
                build_strategy=build_strategy)

        with fluid.scope_guard(fluid.global_scope().new_scope()):
            self.pred_pe = fluid.ParallelExecutor(
                use_cuda=machine_info.is_gpu_available(),
                main_program=self.pred_program,
                exec_strategy=exec_strategy,
                build_strategy=build_strategy)

        # Attention: In the beginning, sync target model totally.
        self.alg.sync_target(decay=0,
                             share_vars_parallel_executor=self.learn_pe)
Esempio n. 2
0
    def __init__(self, algorithm, obs_dim, action_dim):
        self._action_dim = action_dim
        self._obs_dim = obs_dim
        self._update_target_steps = 1000

        self._global_step = 0
        self.exploration_ratio = 0.9
        self.exploration_decre = 1e-7
        self.exploration_min = 0.1
        super(ElevatorAgent, self).__init__(algorithm)

        use_cuda = machine_info.is_gpu_available()
        if self.gpu_id >= 0:
            assert get_gpu_count() == 1, 'Only support training in single GPU,\
                    Please set environment variable: `export CUDA_VISIBLE_DEVICES=[GPU_ID_YOU_WANT_TO_USE]` .'

        else:
            os.environ['CPU_NUM'] = str(1)

        exec_strategy = fluid.ExecutionStrategy()
        exec_strategy.num_threads = 1
        exec_strategy.num_iteration_per_drop_scope = 10
        build_strategy = fluid.BuildStrategy()
        build_strategy.remove_unnecessary_lock = False

        self.learn_pe = fluid.ParallelExecutor(
            use_cuda=use_cuda,
            main_program=self.learn_program,
            build_strategy=build_strategy,
            exec_strategy=exec_strategy,
        )
Esempio n. 3
0
    def __init__(self,
                 algorithm,
                 agent_index=None,
                 obs_dim_n=None,
                 act_dim_n=None,
                 batch_size=None,
                 speedup=False):
        assert isinstance(agent_index, int)
        assert isinstance(obs_dim_n, list)
        assert isinstance(act_dim_n, list)
        assert isinstance(batch_size, int)
        assert isinstance(speedup, bool)
        self.agent_index = agent_index
        self.obs_dim_n = obs_dim_n
        self.act_dim_n = act_dim_n
        self.batch_size = batch_size
        self.speedup = speedup
        self.n = len(act_dim_n)

        self.memory_size = int(1e6)
        self.min_memory_size = batch_size * 25  # batch_size * args.max_episode_len
        self.rpm = ReplayMemory(
            max_size=self.memory_size,
            obs_dim=self.obs_dim_n[agent_index],
            act_dim=self.act_dim_n[agent_index])
        self.global_train_step = 0

        if machine_info.is_gpu_available():
            assert get_gpu_count() == 1, 'Only support training in single GPU,\
                    Please set environment variable: `export CUDA_VISIBLE_DEVICES=[GPU_ID_TO_USE]` .'

        super(MAAgent, self).__init__(algorithm)

        # Attention: In the beginning, sync target model totally.
        self.alg.sync_target(decay=0)
Esempio n. 4
0
    def __init__(self,
                 algorithm,
                 obs_shape,
                 predict_thread_num,
                 learn_data_provider=None):
        """

        Args:
            algorithm (`parl.Algorithm`): a2c algorithm
            obs_shape (list/tuple): observation shape of atari environment
            predict_thread_num (int): number of predict thread (predict parallel exector)
            learn_data_provider: data generator of training
        """

        assert isinstance(obs_shape, (list, tuple))
        assert isinstance(predict_thread_num, int)
        self.obs_shape = obs_shape

        super(AtariAgent, self).__init__(algorithm)

        exec_strategy = fluid.ExecutionStrategy()
        exec_strategy.use_experimental_executor = True
        exec_strategy.num_threads = 4
        build_strategy = fluid.BuildStrategy()
        build_strategy.remove_unnecessary_lock = True

        # Use ParallelExecutor to make learn program run faster
        self.learn_exe = fluid.ParallelExecutor(
            use_cuda=machine_info.is_gpu_available(),
            loss_name=self.learn_outputs[0],
            main_program=self.learn_program,
            build_strategy=build_strategy,
            exec_strategy=exec_strategy)

        self.sample_exes = []
        for _ in range(predict_thread_num):
            with fluid.scope_guard(fluid.global_scope().new_scope()):
                pe = fluid.ParallelExecutor(
                    use_cuda=machine_info.is_gpu_available(),
                    main_program=self.sample_program,
                    build_strategy=build_strategy,
                    exec_strategy=exec_strategy)
                self.sample_exes.append(pe)

        if learn_data_provider:
            self.learn_reader.decorate_tensor_provider(learn_data_provider)
            self.learn_reader.start()
Esempio n. 5
0
    def __init__(self, config):
        self.config = config
        self.sample_data_queue = queue.Queue(
            maxsize=config['sample_queue_max_size'])

        #=========== Create Agent ==========
        env = gym.make(config['env_name'])
        env = wrap_deepmind(env, dim=config['env_dim'], obs_format='NCHW')
        obs_shape = env.observation_space.shape

        act_dim = env.action_space.n

        model = AtariModel(act_dim)
        algorithm = parl.algorithms.IMPALA(
            model,
            sample_batch_steps=self.config['sample_batch_steps'],
            gamma=self.config['gamma'],
            vf_loss_coeff=self.config['vf_loss_coeff'],
            clip_rho_threshold=self.config['clip_rho_threshold'],
            clip_pg_rho_threshold=self.config['clip_pg_rho_threshold'])
        self.agent = AtariAgent(algorithm, obs_shape, act_dim,
                                self.learn_data_provider)

        if machine_info.is_gpu_available():
            assert get_gpu_count() == 1, 'Only support training in single GPU,\
                    Please set environment variable: `export CUDA_VISIBLE_DEVICES=[GPU_ID_TO_USE]` .'

        self.cache_params = self.agent.get_weights()
        self.params_lock = threading.Lock()
        self.params_updated = False
        self.cache_params_sent_cnt = 0
        self.total_params_sync = 0

        #========== Learner ==========
        self.lr, self.entropy_coeff = None, None
        self.lr_scheduler = PiecewiseScheduler(config['lr_scheduler'])
        self.entropy_coeff_scheduler = PiecewiseScheduler(
            config['entropy_coeff_scheduler'])

        self.total_loss_stat = WindowStat(100)
        self.pi_loss_stat = WindowStat(100)
        self.vf_loss_stat = WindowStat(100)
        self.entropy_stat = WindowStat(100)
        self.kl_stat = WindowStat(100)
        self.learn_time_stat = TimeStat(100)
        self.start_time = None

        self.learn_thread = threading.Thread(target=self.run_learn)
        self.learn_thread.setDaemon(True)
        self.learn_thread.start()

        #========== Remote Actor ===========
        self.remote_count = 0

        self.batch_buffer = []
        self.remote_metrics_queue = queue.Queue()
        self.sample_total_steps = 0

        self.create_actors()
Esempio n. 6
0
    def __init__(self, algorithm):
        """Build programs by calling the method ``self.build_program()`` and run initialization function of ``fluid.default_startup_program()``.

        Args:
            algorithm (parl.Algorithm): an instance of `parl.Algorithm`. This algorithm is then passed to `self.alg`.
        """

        assert isinstance(algorithm, Algorithm)
        super(Agent, self).__init__(algorithm)

        self.gpu_id = 0 if machine_info.is_gpu_available() else -1

        self.build_program()

        self.place = fluid.CUDAPlace(
            0) if machine_info.is_gpu_available() else fluid.CPUPlace()
        self.fluid_executor = fluid.Executor(self.place)
        self.fluid_executor.run(fluid.default_startup_program())
Esempio n. 7
0
    def __init__(self, args):
        if machine_info.is_gpu_available():
            assert get_gpu_count() == 1, 'Only support training in single GPU,\
                    Please set environment variable: `export CUDA_VISIBLE_DEVICES=[GPU_ID_TO_USE]` .'

        else:
            cpu_num = os.environ.get('CPU_NUM')
            assert cpu_num is not None and cpu_num == '1', 'Only support training in single CPU,\
                    Please set environment variable:  `export CPU_NUM=1`.'

        model = OpenSimModel(OBS_DIM, VEL_DIM, ACT_DIM)
        algorithm = parl.algorithms.DDPG(
            model,
            gamma=GAMMA,
            tau=TAU,
            actor_lr=ACTOR_LR,
            critic_lr=CRITIC_LR)
        self.agent = OpenSimAgent(algorithm, OBS_DIM, ACT_DIM)

        self.evaluate_result = []

        self.lock = threading.Lock()
        self.model_lock = threading.Lock()
        self.model_queue = queue.Queue()

        self.best_shaping_reward = 0
        self.best_env_reward = 0

        if args.offline_evaluate:
            self.offline_evaluate()
        else:
            t = threading.Thread(target=self.online_evaluate)
            t.start()

        with self.lock:
            while True:
                model_path = self.model_queue.get()
                if not args.offline_evaluate:
                    # online evaluate
                    while not self.model_queue.empty():
                        model_path = self.model_queue.get()
                try:
                    self.agent.restore(model_path)
                    break
                except Exception as e:
                    logger.warn("Agent restore Exception: {} ".format(e))

            self.cur_model = model_path

        self.create_actors()
Esempio n. 8
0
    def __init__(self, config):
        self.config = config

        # 这里创建游戏单纯是为了获取游戏动作的维度
        env = retro_util.RetroEnv(game=config['env_name'],
                                  use_restricted_actions=retro.Actions.DISCRETE,
                                  resize_shape=config['obs_shape'],
                                  render_preprocess=False)
        obs_dim = env.observation_space.shape
        action_dim = env.action_space.n
        self.config['action_dim'] = action_dim

        # 这里创建的模型是真正学习使用的
        model = Model(action_dim)
        algorithm = parl.algorithms.A3C(model, vf_loss_coeff=config['vf_loss_coeff'])
        self.agent = Agent(algorithm, config, obs_dim)

        # 只支持单个GPU
        if machine_info.is_gpu_available():
            assert get_gpu_count() == 1, 'Only support training in single GPU,\
                    Please set environment variable: `export CUDA_VISIBLE_DEVICES=[GPU_ID_TO_USE]` .'

        # 加载预训练模型
        if self.config['restore_model']:
            logger.info("加载预训练模型...")
            self.agent.restore(self.config['model_path'])

        # 记录训练的日志
        self.total_loss_stat = WindowStat(100)
        self.pi_loss_stat = WindowStat(100)
        self.vf_loss_stat = WindowStat(100)
        self.entropy_stat = WindowStat(100)
        self.lr = None
        self.entropy_coeff = None

        self.best_loss = None

        self.learn_time_stat = TimeStat(100)
        self.start_time = None

        # ========== Remote Actor ===========
        self.remote_count = 0
        self.sample_data_queue = queue.Queue()

        self.remote_metrics_queue = queue.Queue()
        self.sample_total_steps = 0

        self.params_queues = []
        self.create_actors()
Esempio n. 9
0
    def __init__(self, args):
        if machine_info.is_gpu_available():
            assert get_gpu_count() == 1, 'Only support training in single GPU,\
                    Please set environment variable: `export CUDA_VISIBLE_DEVICES=[GPU_ID_TO_USE]` .'

        else:
            cpu_num = os.environ.get('CPU_NUM')
            assert cpu_num is not None and cpu_num == '1', 'Only support training in single CPU,\
                    Please set environment variable:  `export CPU_NUM=1`.'

        model = OpenSimModel(OBS_DIM, VEL_DIM, ACT_DIM)
        algorithm = parl.algorithms.DDPG(
            model,
            gamma=GAMMA,
            tau=TAU,
            actor_lr=ACTOR_LR,
            critic_lr=CRITIC_LR)
        self.agent = OpenSimAgent(algorithm, OBS_DIM, ACT_DIM)

        self.rpm = ReplayMemory(args.rpm_size, OBS_DIM, ACT_DIM)

        if args.restore_rpm_path is not None:
            self.rpm.load(args.restore_rpm_path)
        if args.restore_model_path is not None:
            self.restore(args.restore_model_path)

        # add lock between training and predicting
        self.model_lock = threading.Lock()

        # add lock when appending data to rpm or writing scalars to summary
        self.memory_lock = threading.Lock()

        self.ready_actor_queue = queue.Queue()

        self.total_steps = 0
        self.noiselevel = 0.5

        self.critic_loss_stat = WindowStat(500)
        self.env_reward_stat = WindowStat(500)
        self.shaping_reward_stat = WindowStat(500)
        self.max_env_reward = 0

        # thread to keep training
        learn_thread = threading.Thread(target=self.keep_training)
        learn_thread.setDaemon(True)
        learn_thread.start()

        self.create_actors()
Esempio n. 10
0
    def set_weights(self, weights):
        """Copy parameters from ``set_weights()`` to the model.
        
        Args:
            weights (list): a Python list containing the parameters.
        """
        assert len(weights) == len(self.parameters()), \
                'size of input weights should be same as weights number of current model'
        try:
            is_gpu_available = self._is_gpu_available
        except AttributeError:
            self._is_gpu_available = machine_info.is_gpu_available()
            is_gpu_available = self._is_gpu_available

        for (param_name, weight) in list(zip(self.parameters(), weights)):
            set_value(param_name, weight, is_gpu_available)
Esempio n. 11
0
    def __init__(self, config, cuda):
        self.cuda = cuda

        self.config = config
        env = gym.make(config['env_name'])
        env = wrap_deepmind(env, dim=config['env_dim'], obs_format='NCHW')
        obs_shape = env.observation_space.shape
        act_dim = env.action_space.n
        self.config['obs_shape'] = obs_shape
        self.config['act_dim'] = act_dim

        model = ActorCritic(act_dim)
        if self.cuda:
            model = model.cuda()

        algorithm = A2C(model, config)
        self.agent = Agent(algorithm, config)

        if machine_info.is_gpu_available():
            assert get_gpu_count() == 1, 'Only support training in single GPU,\
                    Please set environment variable: `export CUDA_VISIBLE_DEVICES=[GPU_ID_YOU_WANT_TO_USE]` .'

        else:
            os.environ['CPU_NUM'] = str(1)

        #========== Learner ==========
        self.total_loss_stat = WindowStat(100)
        self.pi_loss_stat = WindowStat(100)
        self.vf_loss_stat = WindowStat(100)
        self.entropy_stat = WindowStat(100)
        self.lr = None
        self.entropy_coeff = None

        self.learn_time_stat = TimeStat(100)
        self.start_time = None

        #========== Remote Actor ===========
        self.remote_count = 0
        self.sample_total_steps = 0
        self.sample_data_queue = queue.Queue()
        self.remote_metrics_queue = queue.Queue()
        self.params_queues = []

        self.create_actors()
Esempio n. 12
0
    def __init__(self, config):
        self.config = config

        self.sample_data_queue = queue.Queue()
        self.batch_buffer = defaultdict(list)

        #=========== Create Agent ==========
        env = gym.make(config['env_name'])
        env = wrap_deepmind(env, dim=config['env_dim'], obs_format='NCHW')
        obs_shape = env.observation_space.shape
        act_dim = env.action_space.n

        self.config['obs_shape'] = obs_shape
        self.config['act_dim'] = act_dim

        model = AtariModel(act_dim)
        algorithm = parl.algorithms.A3C(model,
                                        vf_loss_coeff=config['vf_loss_coeff'])
        self.agent = AtariAgent(
            algorithm,
            obs_shape=self.config['obs_shape'],
            predict_thread_num=self.config['predict_thread_num'],
            learn_data_provider=self.learn_data_provider)

        if machine_info.is_gpu_available():
            assert get_gpu_count() == 1, 'Only support training in single GPU,\
                    Please set environment variable: `export CUDA_VISIBLE_DEVICES=[GPU_ID_YOU_WANT_TO_USE]` .'

        else:
            cpu_num = os.environ.get('CPU_NUM')
            assert cpu_num is not None and cpu_num == '1', 'Only support training in single CPU,\
                    Please set environment variable:  `export CPU_NUM=1`.'

        #========== Learner ==========
        self.lr, self.entropy_coeff = None, None
        self.lr_scheduler = PiecewiseScheduler(config['lr_scheduler'])
        self.entropy_coeff_scheduler = PiecewiseScheduler(
            config['entropy_coeff_scheduler'])

        self.total_loss_stat = WindowStat(100)
        self.pi_loss_stat = WindowStat(100)
        self.vf_loss_stat = WindowStat(100)
        self.entropy_stat = WindowStat(100)

        self.learn_time_stat = TimeStat(100)
        self.start_time = None

        # learn thread
        self.learn_thread = threading.Thread(target=self.run_learn)
        self.learn_thread.setDaemon(True)
        self.learn_thread.start()

        self.predict_input_queue = queue.Queue()

        # predict thread
        self.predict_threads = []
        for i in six.moves.range(self.config['predict_thread_num']):
            predict_thread = threading.Thread(target=self.run_predict,
                                              args=(i, ))
            predict_thread.setDaemon(True)
            predict_thread.start()
            self.predict_threads.append(predict_thread)

        #========== Remote Simulator ===========
        self.remote_count = 0

        self.remote_metrics_queue = queue.Queue()
        self.sample_total_steps = 0

        self.create_actors()
Esempio n. 13
0
    def sync_weights_to(self,
                        target_model,
                        decay=0.0,
                        share_vars_parallel_executor=None):
        """Synchronize parameters of current model to another model.
        
        To speed up the synchronizing process, it will create a program implicitly to finish the process. It
        also stores a program as the cache to avoid creating program repeatedly.

        target_model_weights = decay * target_model_weights + (1 - decay) * current_model_weights

        Args:
            target_model (`parl.Model`): an instance of ``Model`` that has the same neural network architecture as the current model.
            decay (float):  the rate of decline in copying parameters. 0 if no parameters decay when synchronizing the parameters.
            share_vars_parallel_executor (fluid.ParallelExecutor): Optional. If not None, will use ``fluid.ParallelExecutor``
                                                                   to run program instead of ``fluid.Executor``.

        Example:

        .. code-block:: python

            import copy
            # create a model that has the same neural network structures.
            target_model = copy.deepcopy(model)

            # after initilizing the parameters ...
            model.sync_weights_to(target_mdodel)

        Note:
            Before calling ``sync_weights_to``, parameters of the model must have been initialized.
        """

        args_hash_id = hashlib.md5('{}_{}'.format(
            id(target_model), decay).encode('utf-8')).hexdigest()
        has_cached = False
        try:
            if self._cached_id == args_hash_id:
                has_cached = True
        except AttributeError:
            has_cached = False

        if not has_cached:
            # Can not run _cached program, need create a new program
            self._cached_id = args_hash_id

            assert not target_model is self, "cannot copy between identical model"
            assert isinstance(target_model, Model)
            assert self.__class__.__name__ == target_model.__class__.__name__, \
                "must be the same class for params syncing!"
            assert (decay >= 0 and decay <= 1)

            param_pairs = self._get_parameter_pairs(self, target_model)

            self._cached_sync_weights_program = fluid.Program()

            with fluid.program_guard(self._cached_sync_weights_program):
                for (src_var_name, target_var_name) in param_pairs:
                    src_var = fetch_framework_var(src_var_name)
                    target_var = fetch_framework_var(target_var_name)
                    fluid.layers.assign(
                        decay * target_var + (1 - decay) * src_var, target_var)

            if share_vars_parallel_executor is None:
                # use fluid.Executor
                place = fluid.CUDAPlace(0) if machine_info.is_gpu_available(
                ) else fluid.CPUPlace()
                self._cached_fluid_executor = fluid.Executor(place)
            else:
                # use fluid.ParallelExecutor

                # specify strategy to make ParallelExecutor run faster
                exec_strategy = fluid.ExecutionStrategy()
                exec_strategy.use_experimental_executor = True
                exec_strategy.num_threads = 4
                build_strategy = fluid.BuildStrategy()
                build_strategy.remove_unnecessary_lock = True

                with fluid.scope_guard(fluid.global_scope().new_scope()):
                    self._cached_fluid_executor = fluid.ParallelExecutor(
                        use_cuda=machine_info.is_gpu_available(),
                        main_program=self._cached_sync_weights_program,
                        share_vars_from=share_vars_parallel_executor,
                        exec_strategy=exec_strategy,
                        build_strategy=build_strategy,
                    )
        if share_vars_parallel_executor is None:
            self._cached_fluid_executor.run(self._cached_sync_weights_program)
        else:
            self._cached_fluid_executor.run(fetch_list=[])