def __init__(self, algorithm, obs_dim, act_dim): self.obs_dim = obs_dim self.act_dim = act_dim super(OpenSimAgent, self).__init__(algorithm) # Use ParallelExecutor to make program running faster exec_strategy = fluid.ExecutionStrategy() exec_strategy.use_experimental_executor = True exec_strategy.num_threads = 4 build_strategy = fluid.BuildStrategy() build_strategy.remove_unnecessary_lock = True with fluid.scope_guard(fluid.global_scope().new_scope()): self.learn_pe = fluid.ParallelExecutor( use_cuda=machine_info.is_gpu_available(), main_program=self.learn_program, exec_strategy=exec_strategy, build_strategy=build_strategy) with fluid.scope_guard(fluid.global_scope().new_scope()): self.pred_pe = fluid.ParallelExecutor( use_cuda=machine_info.is_gpu_available(), main_program=self.pred_program, exec_strategy=exec_strategy, build_strategy=build_strategy) # Attention: In the beginning, sync target model totally. self.alg.sync_target(decay=0, share_vars_parallel_executor=self.learn_pe)
def __init__(self, algorithm, obs_dim, action_dim): self._action_dim = action_dim self._obs_dim = obs_dim self._update_target_steps = 1000 self._global_step = 0 self.exploration_ratio = 0.9 self.exploration_decre = 1e-7 self.exploration_min = 0.1 super(ElevatorAgent, self).__init__(algorithm) use_cuda = machine_info.is_gpu_available() if self.gpu_id >= 0: assert get_gpu_count() == 1, 'Only support training in single GPU,\ Please set environment variable: `export CUDA_VISIBLE_DEVICES=[GPU_ID_YOU_WANT_TO_USE]` .' else: os.environ['CPU_NUM'] = str(1) exec_strategy = fluid.ExecutionStrategy() exec_strategy.num_threads = 1 exec_strategy.num_iteration_per_drop_scope = 10 build_strategy = fluid.BuildStrategy() build_strategy.remove_unnecessary_lock = False self.learn_pe = fluid.ParallelExecutor( use_cuda=use_cuda, main_program=self.learn_program, build_strategy=build_strategy, exec_strategy=exec_strategy, )
def __init__(self, algorithm, agent_index=None, obs_dim_n=None, act_dim_n=None, batch_size=None, speedup=False): assert isinstance(agent_index, int) assert isinstance(obs_dim_n, list) assert isinstance(act_dim_n, list) assert isinstance(batch_size, int) assert isinstance(speedup, bool) self.agent_index = agent_index self.obs_dim_n = obs_dim_n self.act_dim_n = act_dim_n self.batch_size = batch_size self.speedup = speedup self.n = len(act_dim_n) self.memory_size = int(1e6) self.min_memory_size = batch_size * 25 # batch_size * args.max_episode_len self.rpm = ReplayMemory( max_size=self.memory_size, obs_dim=self.obs_dim_n[agent_index], act_dim=self.act_dim_n[agent_index]) self.global_train_step = 0 if machine_info.is_gpu_available(): assert get_gpu_count() == 1, 'Only support training in single GPU,\ Please set environment variable: `export CUDA_VISIBLE_DEVICES=[GPU_ID_TO_USE]` .' super(MAAgent, self).__init__(algorithm) # Attention: In the beginning, sync target model totally. self.alg.sync_target(decay=0)
def __init__(self, algorithm, obs_shape, predict_thread_num, learn_data_provider=None): """ Args: algorithm (`parl.Algorithm`): a2c algorithm obs_shape (list/tuple): observation shape of atari environment predict_thread_num (int): number of predict thread (predict parallel exector) learn_data_provider: data generator of training """ assert isinstance(obs_shape, (list, tuple)) assert isinstance(predict_thread_num, int) self.obs_shape = obs_shape super(AtariAgent, self).__init__(algorithm) exec_strategy = fluid.ExecutionStrategy() exec_strategy.use_experimental_executor = True exec_strategy.num_threads = 4 build_strategy = fluid.BuildStrategy() build_strategy.remove_unnecessary_lock = True # Use ParallelExecutor to make learn program run faster self.learn_exe = fluid.ParallelExecutor( use_cuda=machine_info.is_gpu_available(), loss_name=self.learn_outputs[0], main_program=self.learn_program, build_strategy=build_strategy, exec_strategy=exec_strategy) self.sample_exes = [] for _ in range(predict_thread_num): with fluid.scope_guard(fluid.global_scope().new_scope()): pe = fluid.ParallelExecutor( use_cuda=machine_info.is_gpu_available(), main_program=self.sample_program, build_strategy=build_strategy, exec_strategy=exec_strategy) self.sample_exes.append(pe) if learn_data_provider: self.learn_reader.decorate_tensor_provider(learn_data_provider) self.learn_reader.start()
def __init__(self, config): self.config = config self.sample_data_queue = queue.Queue( maxsize=config['sample_queue_max_size']) #=========== Create Agent ========== env = gym.make(config['env_name']) env = wrap_deepmind(env, dim=config['env_dim'], obs_format='NCHW') obs_shape = env.observation_space.shape act_dim = env.action_space.n model = AtariModel(act_dim) algorithm = parl.algorithms.IMPALA( model, sample_batch_steps=self.config['sample_batch_steps'], gamma=self.config['gamma'], vf_loss_coeff=self.config['vf_loss_coeff'], clip_rho_threshold=self.config['clip_rho_threshold'], clip_pg_rho_threshold=self.config['clip_pg_rho_threshold']) self.agent = AtariAgent(algorithm, obs_shape, act_dim, self.learn_data_provider) if machine_info.is_gpu_available(): assert get_gpu_count() == 1, 'Only support training in single GPU,\ Please set environment variable: `export CUDA_VISIBLE_DEVICES=[GPU_ID_TO_USE]` .' self.cache_params = self.agent.get_weights() self.params_lock = threading.Lock() self.params_updated = False self.cache_params_sent_cnt = 0 self.total_params_sync = 0 #========== Learner ========== self.lr, self.entropy_coeff = None, None self.lr_scheduler = PiecewiseScheduler(config['lr_scheduler']) self.entropy_coeff_scheduler = PiecewiseScheduler( config['entropy_coeff_scheduler']) self.total_loss_stat = WindowStat(100) self.pi_loss_stat = WindowStat(100) self.vf_loss_stat = WindowStat(100) self.entropy_stat = WindowStat(100) self.kl_stat = WindowStat(100) self.learn_time_stat = TimeStat(100) self.start_time = None self.learn_thread = threading.Thread(target=self.run_learn) self.learn_thread.setDaemon(True) self.learn_thread.start() #========== Remote Actor =========== self.remote_count = 0 self.batch_buffer = [] self.remote_metrics_queue = queue.Queue() self.sample_total_steps = 0 self.create_actors()
def __init__(self, algorithm): """Build programs by calling the method ``self.build_program()`` and run initialization function of ``fluid.default_startup_program()``. Args: algorithm (parl.Algorithm): an instance of `parl.Algorithm`. This algorithm is then passed to `self.alg`. """ assert isinstance(algorithm, Algorithm) super(Agent, self).__init__(algorithm) self.gpu_id = 0 if machine_info.is_gpu_available() else -1 self.build_program() self.place = fluid.CUDAPlace( 0) if machine_info.is_gpu_available() else fluid.CPUPlace() self.fluid_executor = fluid.Executor(self.place) self.fluid_executor.run(fluid.default_startup_program())
def __init__(self, args): if machine_info.is_gpu_available(): assert get_gpu_count() == 1, 'Only support training in single GPU,\ Please set environment variable: `export CUDA_VISIBLE_DEVICES=[GPU_ID_TO_USE]` .' else: cpu_num = os.environ.get('CPU_NUM') assert cpu_num is not None and cpu_num == '1', 'Only support training in single CPU,\ Please set environment variable: `export CPU_NUM=1`.' model = OpenSimModel(OBS_DIM, VEL_DIM, ACT_DIM) algorithm = parl.algorithms.DDPG( model, gamma=GAMMA, tau=TAU, actor_lr=ACTOR_LR, critic_lr=CRITIC_LR) self.agent = OpenSimAgent(algorithm, OBS_DIM, ACT_DIM) self.evaluate_result = [] self.lock = threading.Lock() self.model_lock = threading.Lock() self.model_queue = queue.Queue() self.best_shaping_reward = 0 self.best_env_reward = 0 if args.offline_evaluate: self.offline_evaluate() else: t = threading.Thread(target=self.online_evaluate) t.start() with self.lock: while True: model_path = self.model_queue.get() if not args.offline_evaluate: # online evaluate while not self.model_queue.empty(): model_path = self.model_queue.get() try: self.agent.restore(model_path) break except Exception as e: logger.warn("Agent restore Exception: {} ".format(e)) self.cur_model = model_path self.create_actors()
def __init__(self, config): self.config = config # 这里创建游戏单纯是为了获取游戏动作的维度 env = retro_util.RetroEnv(game=config['env_name'], use_restricted_actions=retro.Actions.DISCRETE, resize_shape=config['obs_shape'], render_preprocess=False) obs_dim = env.observation_space.shape action_dim = env.action_space.n self.config['action_dim'] = action_dim # 这里创建的模型是真正学习使用的 model = Model(action_dim) algorithm = parl.algorithms.A3C(model, vf_loss_coeff=config['vf_loss_coeff']) self.agent = Agent(algorithm, config, obs_dim) # 只支持单个GPU if machine_info.is_gpu_available(): assert get_gpu_count() == 1, 'Only support training in single GPU,\ Please set environment variable: `export CUDA_VISIBLE_DEVICES=[GPU_ID_TO_USE]` .' # 加载预训练模型 if self.config['restore_model']: logger.info("加载预训练模型...") self.agent.restore(self.config['model_path']) # 记录训练的日志 self.total_loss_stat = WindowStat(100) self.pi_loss_stat = WindowStat(100) self.vf_loss_stat = WindowStat(100) self.entropy_stat = WindowStat(100) self.lr = None self.entropy_coeff = None self.best_loss = None self.learn_time_stat = TimeStat(100) self.start_time = None # ========== Remote Actor =========== self.remote_count = 0 self.sample_data_queue = queue.Queue() self.remote_metrics_queue = queue.Queue() self.sample_total_steps = 0 self.params_queues = [] self.create_actors()
def __init__(self, args): if machine_info.is_gpu_available(): assert get_gpu_count() == 1, 'Only support training in single GPU,\ Please set environment variable: `export CUDA_VISIBLE_DEVICES=[GPU_ID_TO_USE]` .' else: cpu_num = os.environ.get('CPU_NUM') assert cpu_num is not None and cpu_num == '1', 'Only support training in single CPU,\ Please set environment variable: `export CPU_NUM=1`.' model = OpenSimModel(OBS_DIM, VEL_DIM, ACT_DIM) algorithm = parl.algorithms.DDPG( model, gamma=GAMMA, tau=TAU, actor_lr=ACTOR_LR, critic_lr=CRITIC_LR) self.agent = OpenSimAgent(algorithm, OBS_DIM, ACT_DIM) self.rpm = ReplayMemory(args.rpm_size, OBS_DIM, ACT_DIM) if args.restore_rpm_path is not None: self.rpm.load(args.restore_rpm_path) if args.restore_model_path is not None: self.restore(args.restore_model_path) # add lock between training and predicting self.model_lock = threading.Lock() # add lock when appending data to rpm or writing scalars to summary self.memory_lock = threading.Lock() self.ready_actor_queue = queue.Queue() self.total_steps = 0 self.noiselevel = 0.5 self.critic_loss_stat = WindowStat(500) self.env_reward_stat = WindowStat(500) self.shaping_reward_stat = WindowStat(500) self.max_env_reward = 0 # thread to keep training learn_thread = threading.Thread(target=self.keep_training) learn_thread.setDaemon(True) learn_thread.start() self.create_actors()
def set_weights(self, weights): """Copy parameters from ``set_weights()`` to the model. Args: weights (list): a Python list containing the parameters. """ assert len(weights) == len(self.parameters()), \ 'size of input weights should be same as weights number of current model' try: is_gpu_available = self._is_gpu_available except AttributeError: self._is_gpu_available = machine_info.is_gpu_available() is_gpu_available = self._is_gpu_available for (param_name, weight) in list(zip(self.parameters(), weights)): set_value(param_name, weight, is_gpu_available)
def __init__(self, config, cuda): self.cuda = cuda self.config = config env = gym.make(config['env_name']) env = wrap_deepmind(env, dim=config['env_dim'], obs_format='NCHW') obs_shape = env.observation_space.shape act_dim = env.action_space.n self.config['obs_shape'] = obs_shape self.config['act_dim'] = act_dim model = ActorCritic(act_dim) if self.cuda: model = model.cuda() algorithm = A2C(model, config) self.agent = Agent(algorithm, config) if machine_info.is_gpu_available(): assert get_gpu_count() == 1, 'Only support training in single GPU,\ Please set environment variable: `export CUDA_VISIBLE_DEVICES=[GPU_ID_YOU_WANT_TO_USE]` .' else: os.environ['CPU_NUM'] = str(1) #========== Learner ========== self.total_loss_stat = WindowStat(100) self.pi_loss_stat = WindowStat(100) self.vf_loss_stat = WindowStat(100) self.entropy_stat = WindowStat(100) self.lr = None self.entropy_coeff = None self.learn_time_stat = TimeStat(100) self.start_time = None #========== Remote Actor =========== self.remote_count = 0 self.sample_total_steps = 0 self.sample_data_queue = queue.Queue() self.remote_metrics_queue = queue.Queue() self.params_queues = [] self.create_actors()
def __init__(self, config): self.config = config self.sample_data_queue = queue.Queue() self.batch_buffer = defaultdict(list) #=========== Create Agent ========== env = gym.make(config['env_name']) env = wrap_deepmind(env, dim=config['env_dim'], obs_format='NCHW') obs_shape = env.observation_space.shape act_dim = env.action_space.n self.config['obs_shape'] = obs_shape self.config['act_dim'] = act_dim model = AtariModel(act_dim) algorithm = parl.algorithms.A3C(model, vf_loss_coeff=config['vf_loss_coeff']) self.agent = AtariAgent( algorithm, obs_shape=self.config['obs_shape'], predict_thread_num=self.config['predict_thread_num'], learn_data_provider=self.learn_data_provider) if machine_info.is_gpu_available(): assert get_gpu_count() == 1, 'Only support training in single GPU,\ Please set environment variable: `export CUDA_VISIBLE_DEVICES=[GPU_ID_YOU_WANT_TO_USE]` .' else: cpu_num = os.environ.get('CPU_NUM') assert cpu_num is not None and cpu_num == '1', 'Only support training in single CPU,\ Please set environment variable: `export CPU_NUM=1`.' #========== Learner ========== self.lr, self.entropy_coeff = None, None self.lr_scheduler = PiecewiseScheduler(config['lr_scheduler']) self.entropy_coeff_scheduler = PiecewiseScheduler( config['entropy_coeff_scheduler']) self.total_loss_stat = WindowStat(100) self.pi_loss_stat = WindowStat(100) self.vf_loss_stat = WindowStat(100) self.entropy_stat = WindowStat(100) self.learn_time_stat = TimeStat(100) self.start_time = None # learn thread self.learn_thread = threading.Thread(target=self.run_learn) self.learn_thread.setDaemon(True) self.learn_thread.start() self.predict_input_queue = queue.Queue() # predict thread self.predict_threads = [] for i in six.moves.range(self.config['predict_thread_num']): predict_thread = threading.Thread(target=self.run_predict, args=(i, )) predict_thread.setDaemon(True) predict_thread.start() self.predict_threads.append(predict_thread) #========== Remote Simulator =========== self.remote_count = 0 self.remote_metrics_queue = queue.Queue() self.sample_total_steps = 0 self.create_actors()
def sync_weights_to(self, target_model, decay=0.0, share_vars_parallel_executor=None): """Synchronize parameters of current model to another model. To speed up the synchronizing process, it will create a program implicitly to finish the process. It also stores a program as the cache to avoid creating program repeatedly. target_model_weights = decay * target_model_weights + (1 - decay) * current_model_weights Args: target_model (`parl.Model`): an instance of ``Model`` that has the same neural network architecture as the current model. decay (float): the rate of decline in copying parameters. 0 if no parameters decay when synchronizing the parameters. share_vars_parallel_executor (fluid.ParallelExecutor): Optional. If not None, will use ``fluid.ParallelExecutor`` to run program instead of ``fluid.Executor``. Example: .. code-block:: python import copy # create a model that has the same neural network structures. target_model = copy.deepcopy(model) # after initilizing the parameters ... model.sync_weights_to(target_mdodel) Note: Before calling ``sync_weights_to``, parameters of the model must have been initialized. """ args_hash_id = hashlib.md5('{}_{}'.format( id(target_model), decay).encode('utf-8')).hexdigest() has_cached = False try: if self._cached_id == args_hash_id: has_cached = True except AttributeError: has_cached = False if not has_cached: # Can not run _cached program, need create a new program self._cached_id = args_hash_id assert not target_model is self, "cannot copy between identical model" assert isinstance(target_model, Model) assert self.__class__.__name__ == target_model.__class__.__name__, \ "must be the same class for params syncing!" assert (decay >= 0 and decay <= 1) param_pairs = self._get_parameter_pairs(self, target_model) self._cached_sync_weights_program = fluid.Program() with fluid.program_guard(self._cached_sync_weights_program): for (src_var_name, target_var_name) in param_pairs: src_var = fetch_framework_var(src_var_name) target_var = fetch_framework_var(target_var_name) fluid.layers.assign( decay * target_var + (1 - decay) * src_var, target_var) if share_vars_parallel_executor is None: # use fluid.Executor place = fluid.CUDAPlace(0) if machine_info.is_gpu_available( ) else fluid.CPUPlace() self._cached_fluid_executor = fluid.Executor(place) else: # use fluid.ParallelExecutor # specify strategy to make ParallelExecutor run faster exec_strategy = fluid.ExecutionStrategy() exec_strategy.use_experimental_executor = True exec_strategy.num_threads = 4 build_strategy = fluid.BuildStrategy() build_strategy.remove_unnecessary_lock = True with fluid.scope_guard(fluid.global_scope().new_scope()): self._cached_fluid_executor = fluid.ParallelExecutor( use_cuda=machine_info.is_gpu_available(), main_program=self._cached_sync_weights_program, share_vars_from=share_vars_parallel_executor, exec_strategy=exec_strategy, build_strategy=build_strategy, ) if share_vars_parallel_executor is None: self._cached_fluid_executor.run(self._cached_sync_weights_program) else: self._cached_fluid_executor.run(fetch_list=[])