def __init__(self, opt, world): super().__init__(opt) self.inner_world = world self.numthreads = opt['numthreads'] self.sync = { # syncronization primitives # semaphores for counting queued examples 'queued_sem': Semaphore(0), # counts num exs to be processed 'threads_sem': Semaphore(0), # counts threads 'reset_sem': Semaphore(0), # allows threads to reset # flags for communicating with threads 'reset_flag': Value('b', False), # threads should reset 'term_flag': Value('b', False), # threads should terminate # counters 'epoch_done_ctr': Value('i', 0), # number of done threads 'total_parleys': Value('l', 0), # number of parleys in threads } self.threads = [] for i in range(self.numthreads): self.threads.append( HogwildProcess(i, opt, world.share(), self.sync)) time.sleep( 0.05) # delay can help prevent deadlock in thread launches for t in self.threads: t.start() for _ in self.threads: # wait for threads to launch # this makes sure that no threads get examples before all are set up # otherwise they might reset one another after processing some exs self.sync['threads_sem'].acquire()
def start(self) -> None: shards = glob.glob(self.file_path) # Ensure a consistent order before shuffling for testing. shards.sort() num_shards = len(shards) # If we want multiple epochs per read, put shards in the queue multiple times. self.input_queue = Queue(num_shards * self.epochs_per_read + self.num_workers) for _ in range(self.epochs_per_read): np.random.shuffle(shards) for shard in shards: self.input_queue.put(shard) # Then put a None per worker to signify no more files. for _ in range(self.num_workers): self.input_queue.put(None) assert not self.processes, "Process list non-empty! You must call QIterable.join() before restarting." self.num_active_workers = Value('i', self.num_workers) self.num_inflight_items = Value('i', 0) for worker_id in range(self.num_workers): process = Process(target=_worker, args=(self.reader, self.input_queue, self.output_queue, self.num_active_workers, self.num_inflight_items, worker_id)) logger.info(f"starting worker {worker_id}") process.start() self.processes.append(process)
def load_saved_model(model: Module, path: str, T: Value, global_reward: Value, model_critic: Module = None) -> None: """ load saved model from file :param model: model to load params for :param path: path to load parameters from :param T: global steps counter, to continue training :param model_critic: possible separate critic model to load if non shared network is used :return: None """ if os.path.isfile(path): print(f"=> loading model checkpoint '{path}'") checkpoint = torch.load(path) model.load_state_dict(checkpoint['model']) T.value = checkpoint['epoch'] global_reward.value = checkpoint['global_reward'] if model_critic: model_critic.load_state_dict(checkpoint['model_critic']) print(f"=> loaded model checkpoint '{path}' (T: {checkpoint['epoch']} " f"-- global reward: {checkpoint['global_reward']})") else: print(f"=> no model checkpoint found at '{path}'")
def __init__(self, opt, world): super().__init__(opt) self.inner_world = world self.numthreads = opt['numthreads'] self.sync = { # syncronization primitives # semaphores for counting queued examples 'queued_sem': Semaphore(0), # counts num exs to be processed 'threads_sem': Semaphore(0), # counts threads 'reset_sem': Semaphore(0), # allows threads to reset # flags for communicating with threads 'reset_flag': Value('b', False), # threads should reset 'term_flag': Value('b', False), # threads should terminate # counters 'epoch_done_ctr': Value('i', 0), # number of done threads 'total_parleys': Value('l', 0), # number of parleys in threads } # don't let threads create more threads! self.threads = [] for i in range(self.numthreads): self.threads.append(HogwildProcess(i, opt, world, self.sync)) for t in self.threads: t.start() for _ in self.threads: self.sync['threads_sem'].acquire()
def _compare_parallel(self, network, opponent_network, device, num_workers): q, r = divmod(self.conf.GAMES_PER_COMPARISON, num_workers) num_active_workers = Value('i', num_workers) evaluator_mgr = BulkEvaluatorManager( [network, opponent_network], device, num_workers) score = Value('i', 0) workers = [] s = 0 for worker_id in range(num_workers): num_games = q + 1 if worker_id < r else q evaluator = evaluator_mgr.get_evaluator(worker_id, 0) opponent_evaluator = evaluator_mgr.get_evaluator(worker_id, 1) color = BLACK if s % 2 == 0 else WHITE s += num_games worker = Process( target=self._worker_job, args=(num_games, num_active_workers, evaluator, opponent_evaluator, color, score), ) workers.append(worker) worker.start() # start evaluator server server = evaluator_mgr.get_server(num_active_workers) server.start() for worker in workers: worker.join() server.join() return score.value / self.conf.GAMES_PER_COMPARISON
def __init__(self, config: ParamDict, environment: Environment, policy: Policy, filter_op: Filter): threads, gpu = config.require("threads", "gpu") super(Agent_sync, self).__init__(config, environment, policy, filter_op) # sync signal, -1: terminate, 0: normal running, >0 restart and waiting for parameter update self._sync_signal = Value('i', 0) # sampler sub-process list self._sampler_proc = [] # used for synchronize commands self._cmd_pipe = None self._param_pipe = None self._cmd_lock = Lock() cmd_pipe_child, cmd_pipe_parent = Pipe(duplex=True) param_pipe_child, param_pipe_parent = Pipe(duplex=False) self._cmd_pipe = cmd_pipe_parent self._param_pipe = param_pipe_parent for i_thread in range(threads): child_name = f"sampler_{i_thread}" worker_cfg = ParamDict({ "seed": self.seed + 1024 + i_thread, "gpu": gpu }) child = Process(target=Agent_sync._sampler_worker, name=child_name, args=(worker_cfg, cmd_pipe_child, param_pipe_child, self._cmd_lock, self._sync_signal, deepcopy(policy), deepcopy(environment), deepcopy(filter_op))) self._sampler_proc.append(child) child.start()
def __init__(self, name, env_kwargs, model_kwargs, **kwargs): super().__init__(env_kwargs=env_kwargs, model_kwargs=model_kwargs) self.name = name self.num_processes = 16 self._report_queue = Queue(maxsize=16) self._shared_global_t = Value('i', 0) self._shared_is_stopped = Value('i', False)
def _generate_parallel(self, iteration, network, device, num_workers): q, r = divmod(self.remaining_games, num_workers) num_active_workers = Value('i', num_workers) resign_threshold = Value('d', self.resign_mgr.threshold()) evaluator_mgr = BulkEvaluatorManager([network], device, num_workers) output_queue = SimpleQueue() # start the workers workers = [] for worker_id in range(num_workers): num_games = q + 1 if worker_id < r else q evaluator = evaluator_mgr.get_evaluator(worker_id, 0) worker = Process( target=self._worker_job, args=(worker_id, num_games, num_active_workers, resign_threshold, evaluator, output_queue), ) workers.append(worker) worker.start() # start evaluator server server = evaluator_mgr.get_server(num_active_workers) server.start() # collect the examples generated by workers while num_active_workers.value > 0 or not output_queue.empty(): examples, resign_value_history, result = output_queue.get() self.example_pool += examples self.game_length.append(len(examples)) # add the history into resignation manager to update the threshold if resign_value_history is not None: self.resign_mgr.add(resign_value_history, result) resign_threshold.value = self.resign_mgr.threshold() self.remaining_games -= 1 # periodically save the progress if (self.conf.GAMES_PER_ITERATION - self.remaining_games) \ % self.conf.EXAMPLE_POOL_SAVE_FREQUENCY == 0: self.save(iteration) log.info( f'[iter={iteration}] ExamplePool: checkpoint saved, ' f'{self.remaining_games} games remaining' ) for worker in workers: worker.join() server.join()
def __init__(self, world_class, opt, agents): super().__init__(opt) self.inner_world = world_class(opt, agents) self.queued_items = Semaphore(0) # counts num exs to be processed self.epochDone = Condition() # notifies when exs are finished self.terminate = Value('b', False) # tells threads when to shut down self.cnt = Value('i', 0) # number of exs that remain to be processed self.threads = [] for i in range(opt['numthreads']): self.threads.append( HogwildProcess(i, world_class, opt, agents, self.queued_items, self.epochDone, self.terminate, self.cnt)) for t in self.threads: t.start()
def create(cls): if not hasattr(cls, 'length_to_eps'): # Maps episode length to list of episodes cls.length_to_eps = {} if not hasattr(cls, 'ep_indices'): # Set of episode indices already in the cache cls.ep_indices = set() if not hasattr(cls, 'batches'): # List of batches if popping batches cls.batches = [] if not hasattr(cls, 'load_complete'): # If all episodes have been loaded into memory cls.load_complete = Value(ctypes.c_bool, False) if not hasattr(cls, 'batches_lock'): # Lock to access batches cls.batches_lock = Lock() if not hasattr(cls, 'cache_lock'): # Lock to access length_to_eps cls.cache_lock = Lock() if not hasattr(cls, 'fill_cache_lock'): # Lock for condition variables cls.fill_cache_lock = RLock() if not hasattr(cls, 'add_to_cache_cv'): # Condition notifying Loader to add to cache cls.add_to_cache_cv = Condition(lock=cls.fill_cache_lock) if not hasattr(cls, 'cache_filled_cv'): # Condition notifying teacher that cache has episodes cls.cache_filled_cv = Condition(lock=cls.fill_cache_lock)
def __init__(self, chk_dir, chk, keep_epoch_chk=True, overwrite=True, mode=CFMode.AUTO, chk_prefix='model_v_'): self.logger = logging.getLogger(__name__) self.chk_dir = chk_dir self.chk = chk self.keep_epoch_chk = keep_epoch_chk self.overwrite = overwrite self.chk_prefix = chk_prefix self.mode = mode self.chk_epoch_subdir = 'epoch' self.mp_manager = Manager() self.snapshot_copy = None self.cpu_side = False # Active snapshot, if true, don't snapshot again self.active_snapshot = Value('i', 0) self.lock = Lock() self.in_progress_snapshot = Value('i', 0) # Handle to the process performing checkpoint # Can be only one at any instant. A new checkpoint # cannot start unless the previous one completes self.chk_process = None # `overwrite` supersedes if False if self.overwrite is False and self.keep_epoch_chk is False: self.keep_epoch_chk = True # Global ID of checkpoints being written # Used to format the checkpoint path # Instantiate from chk when restoring self.chk_global_id = -1 # Sorted List of available checkpoints (fnames) self.available_chk_iters = self.mp_manager.list() self.available_chk_epochs = self.mp_manager.list() self.initalize_chk_dir() self.logger.info("Available checkpoints : ") for item in self.available_chk_iters: self.logger.info(item)
def __init__(self, config: ParamDict, environment: Environment, policy: Policy, filter_op: Filter): threads, gpu = config.require("threads", "gpu") threads_gpu = config["gpu threads"] if "gpu threads" in config else 2 super(Agent_async, self).__init__(config, environment, policy, filter_op) # sync signal, -1: terminate, 0: normal running, >0 restart and waiting for parameter update self._sync_signal = Value('i', 0) # environment sub-process list self._environment_proc = [] # policy sub-process list self._policy_proc = [] # used for synchronize policy parameters self._param_pipe = None self._policy_lock = Lock() # used for synchronize roll-out commands self._control_pipe = None self._environment_lock = Lock() step_pipe = [] cmd_pipe_child, cmd_pipe_parent = Pipe(duplex=True) param_pipe_child, param_pipe_parent = Pipe(duplex=False) self._control_pipe = cmd_pipe_parent self._param_pipe = param_pipe_parent for i_envs in range(threads): child_name = f"environment_{i_envs}" step_pipe_pi, step_pipe_env = Pipe(duplex=True) step_lock = Lock() worker_cfg = ParamDict({ "seed": self.seed + 1024 + i_envs, "gpu": gpu }) child = Process(target=Agent_async._environment_worker, name=child_name, args=(worker_cfg, cmd_pipe_child, step_pipe_env, self._environment_lock, step_lock, self._sync_signal, deepcopy(environment), deepcopy(filter_op))) self._environment_proc.append(child) step_pipe.append((step_pipe_pi, step_lock)) child.start() for i_policies in range(threads_gpu): child_name = f"policy_{i_policies}" worker_cfg = ParamDict({ "seed": self.seed + 2048 + i_policies, "gpu": gpu }) child = Process(target=Agent_async._policy_worker, name=child_name, args=(worker_cfg, param_pipe_child, step_pipe, self._policy_lock, self._sync_signal, deepcopy(policy))) self._policy_proc.append(child) child.start() sleep(5)
def __init__(self, inputs, mode): self.mode = mode self.inputs = inputs self.queue = Queue() self.control = Value('i', 1) if self.mode == 0: self.process = Process(target=self.worker, args=(self.inputs, self.queue, self.control)) self.process.start()
def __init__(self, *args): """ Statistics process saves the statistics obtained from workers. In particular, the shared models are saved every Config.MODEL_SAVE_FREQUENCY episodes. Moreover, some statistics are logged every Config.LOG_STATS_FREQUENCY episodes. """ super(StatProcess, self).__init__() self.episode_log_q = Queue(maxsize=Config.MAX_STATS_QUEUE_SIZE) self.ae_loss_log_q = Queue(maxsize=Config.MAX_STATS_QUEUE_SIZE) self.episode_count = Value('i', 0) self.model_save = Value('i', 0) self.exit_flag = Value('i', 0) #:obj:`dict`: Dictionary of DPS models for RL. self.agents = {} for model, env_id in zip(args, Config.ENV_IDS): self.agents[env_id] = model #float: Time at start for logging. self._start_time = time.time()
def __init__(self, args) -> None: """ Constructor :param args: Cmd-line arguments """ self.args = args # global counter self.T = Value('i', 0) self.global_reward = Value('d', -np.inf) # worker handling self.worker_pool = [] # validity check for input parameter if args.optimizer not in ['rmsprop', 'adam']: raise Exception( 'Your given optimizer %s is currently not supported. Choose either "rmsprop" or "adam"', args.optimizer)
class Counter(object): ''' A counter used for multiprocessing, simple wrapper around multiprocessing.Value ''' def __init__(self): from torch.multiprocessing import Value self.val = Value('i', 0) def increment(self, n=1): with self.val.get_lock(): self.val.value += n def reset(self): with self.val.get_lock(): self.val.value = 0 @property def value(self): return self.val.value
def _worker( reader: DatasetReader, input_queue: Queue, output_queue: Queue, num_active_workers: Value, num_inflight_items: Value, worker_id: int, ) -> None: """ A worker that pulls filenames off the input queue, uses the dataset reader to read them, and places the generated instances on the output queue. When there are no filenames left on the input queue, it decrements num_active_workers to signal completion. """ logger.info(f"Reader worker: {worker_id} PID: {os.getpid()}") # Keep going until you get a file_path that's None. while True: file_path = input_queue.get() if file_path is None: # It's important that we close and join the queue here before # decrementing num_active_workers. Otherwise our parent may join us # before the queue's feeder thread has passed all buffered items to # the underlying pipe resulting in a deadlock. # # See: # https://docs.python.org/3.6/library/multiprocessing.html?highlight=process#pipes-and-queues # https://docs.python.org/3.6/library/multiprocessing.html?highlight=process#programming-guidelines output_queue.close() output_queue.join_thread() # Decrementing is not atomic. # See https://docs.python.org/2/library/multiprocessing.html#multiprocessing.Value. with num_active_workers.get_lock(): num_active_workers.value -= 1 logger.info(f"Reader worker {worker_id} finished") break logger.info(f"reading instances from {file_path}") for instance in reader.read(file_path): with num_inflight_items.get_lock(): num_inflight_items.value += 1 output_queue.put(instance)
def init_data(self): self.is_working = False self.semaphore = True self.is_change_bar = Value( c_bool, False) #whether user has dragged the slider,default: False self.frame_index = Value('i', 0) self.share_lock = Lock() #shared lock for frame_index self.share_lock2 = Lock() # shared lock for frame_index self.mutex = threading.Lock() self.timer = QTimer(self) # used for the updating of progress bar self.temp_timer = QTimer( self) #used for detecting whether the frame_total is given. self.frame_total = Value('i', -1) self.playable = Value(c_bool, True) self.is_working = Value(c_bool, False) manager = Manager() self.play_src = manager.Value(c_char_p, '0') #用于记录播放的视频地址 self.mode = None # 'online' or 'offline'
def __init__(self, scheduler: scheduler_type, mode: str = 'nearest', align_corners: bool = None, preserve_range: bool = False, keys: Sequence = ('data', ), grad: bool = False, **kwargs): """ Args: scheduler: scheduler which determined the current size. The scheduler is called with the current iteration of the transform mode: one of ``nearest``, ``linear``, ``bilinear``, ``bicubic``, ``trilinear``, ``area`` (for more inforamtion see :func:`torch.nn.functional.interpolate`) align_corners: input and output tensors are aligned by the center points of their corners pixels, preserving the values at the corner pixels. preserve_range: output tensor has same range as input tensor keys: keys which should be augmented grad: enable gradient computation inside transformation **kwargs: keyword arguments passed to augment_fn Warnings: When this transformations is used in combination with multiprocessing, the step counter is not perfectly synchronized between multiple processes. As a result the step count my jump between values in a range of the number of processes used. """ super().__init__(size=0, mode=mode, align_corners=align_corners, preserve_range=preserve_range, keys=keys, grad=grad, **kwargs) self.scheduler = scheduler self._step = Value('i', 0)
def __init__(self, config, share_batches=True, manager=None, new_process=True): if new_process == True and manager is None: manager = Manager() self.knows = Semaphore(0) # > 0 if we know if any are coming # == 0 if DatasetReader is processing a command self.working = Semaphore(1 if new_process else 100) self.finished_reading = Lock( ) # locked if we're still reading from file # number of molecules that have been sent to the pipe: self.in_pipe = Value('i', 0) # Tracking what's already been sent through the pipe: self._example_number = Value('i', 0) # The final kill switch: self._close = Value('i', 0) self.command_queue = manager.Queue(10) self.molecule_pipeline = None self.batch_queue = Queue(config.data.batch_queue_cap ) #manager.Queue(config.data.batch_queue_cap) self.share_batches = share_batches self.dataset_reader = DatasetReader("dataset_reader", self, config, new_process=new_process) if new_process: self.dataset_reader.start()
class Signal(object): ''' a signal used for mutliprocessing, simple wrapper around multiprocessing.Value ''' def __init__(self): from torch.multiprocessing import Value self.val = Value('i', False) def set_signal(self, boolean): with self.val.get_lock(): self.val.value = boolean @property def value(self): return bool(self.val.value)
def add_agents(self, nb): old_length = len(self.agents) for index in range(old_length, old_length + nb): self.agents.append( Agent(id_=index, prediction_queue=self.prediction_queue, training_queue=self.training_queue, states=self.train_set, exit_flag=Value(c_bool, False), statistics_queue=self.statistics_queue, episode_counter=self.nb_episodes, observation_shape=(self.channels, self.height, self.width), action_space=self.n_outputs, device=self.agent_device, step_max=self.sequence_length))
def __init__(self, n_workers, actor, args): self._now_episode = Value('i', 0) self.queue = Queue() self.collect_event = Event() self.worker = [] for i in range(n_workers): self.worker.append( Worker(self.queue, self.collect_event, actor, args, i)) self.process = [ Process(target=self.worker[i].run, args=(self._now_episode, )) for i in range(n_workers) ] for p in self.process: p.start() print(f'Start {n_workers} workers.')
def __init__(self, experience_q, prediction_q, observation_q, env_id, episode_log_q, agent_id): """ Workers are the agents interacting with the environment. Workers run a copy of the environment with their own specifications. It requires Predictor processes to make decisions. Gathered experiences are submitted to a Queue on which the shared models are trained. Args: experience_q (mp.Queue): Shared memory queue containing experiences across workers of the same type. prediction_q (mp.Queue): Shared memory queue containing predictions of this worker. observation_q (mp.Queue): Shared memory queue containing observation across workers of the same type. env_id (str): The id of the environment instance this worker is interacting with. episode_log_q (mp.Queue): Shared memory queue containing the experience of past episodes. agent_id (int): The id of the worker process. """ super(WorkerProcess, self).__init__() self.experience_q = experience_q self.prediction_q = prediction_q self.observation_q = observation_q self.env_id = env_id self.episode_log_q = episode_log_q self.id = agent_id #:class:`memory.ShortTermMemory`: Short term memory where the history is saved and experiences are memorized. self.memory = ShortTermMemory(Config.GLOW) #int: Signal for process exit. self.exit_flag = Value('i', 0) #torch.Tensor of float: Array of actions in one-hot encoding. self.actions = torch.Tensor(np.eye(Config.NUM_ACTIONS)) #:class:`gym.Env`: The environment the agent interacts with. self.env = gym.make(Config.ENV_NAME, **Config.ENV_PARAMS[self.env_id]) #bool: Boolean value that signals that an episode is finished. self.done = False #int: Current size of batches. self.batch_size = 0 #torch.Tensor: Tensor of observation batch. self.o_batch = torch.Tensor([0.]) #torch.Tensor: Tensor of action batch. self.a_batch = torch.Tensor([0.]) #torch.Tensor: Tensor of target batch. self.t_batch = torch.Tensor([0.])
def __init__(self, autoencoder, optimizer_ae, agent, optimizer_ps, env_id, select_data, experience_q, training_count, ae_loss_log_q, trainer_id): """ Trainers gather experiences and train respective models model. Args: autoencoder (:class:`base_networks.DenseAutoencoder`): The Server.autoencoder model. optimizer_ae (:class:`optim.Adam`): The Server.optimizer_ae for encoder. agent (:class:`base_networks.DeepPS`): The deep PS model for RL. optimizer_ps (:class:`optim.Adam`): The Server.optimizer_ps for deep PS. env_id (str): The id of the environment/agent instance this trainer is using. select_data (tuple): The data used for training in 'selection' mode. experience_q (:class:`mp.Queue`): Shared memory queue containing experiences for training. training_count (:class:`mp.Value`): Shared memory value which counts the number of trainings. ae_loss_log_q (:class:`mp.Queue`): Shared memory queue containing loss of decoder. trainer_id (int): The id of the trainer process. """ super(TrainerProcess, self).__init__() self.autoencoder = autoencoder self.optimizer_ae = optimizer_ae self.agent = agent self.optimizer_ps = optimizer_ps self.env_id = env_id self.experience_q = experience_q self.training_count = training_count self.ae_loss_log_q = ae_loss_log_q self.id = trainer_id if Config.TRAIN_MODE == 'selection': o_batch, a_batch, t_batch = select_data #torch.Tensor: The observation training data set. self.o_batch = o_batch.to(Config.DEVICE) #torch.Tensor: The action training data set. self.a_batch = a_batch.to(Config.DEVICE) #torch.Tensor: The target training data set. self.t_batch = t_batch.to(Config.DEVICE) #int: Signal for process exit. self.exit_flag = Value('i', 0)
def __init__(self, agent, observation_q, prediction_qs, env_id, predictor_id): """ Predictors gather observations from agents and make predictions. Args: agent (:class:`base_networks.DeepPS`): The deep PS model for RL. observation_q (:class:`mp.Queue`): Shared memory queue with observations of agents of the same type. prediction_qs (:obj:`list` of :class:`mp.Queue`): Shared memory queues containing predictions. env_id (str): The identifier for the environment type. predictor_id (int): The id of the trainer process. """ super(PredictorProcess, self).__init__() self.agent = agent self.observation_q = observation_q self.prediction_qs = prediction_qs self.env_id = env_id self.id = predictor_id #int: Signal for process exit. self.exit_flag = Value('i', 0) #torch.Tensor of float: Array of actions in one-hot encoding. self.actions = torch.Tensor(np.eye(Config.NUM_ACTIONS)).to( Config.DEVICE)
class Agent_sync(Agent): """ An agent class will maintain multiple policy net and environments, each worker will have one environment and one policy useful for most of single agent RL/IL settings """ def __init__(self, config: ParamDict, environment: Environment, policy: Policy, filter_op: Filter): threads, gpu = config.require("threads", "gpu") super(Agent_sync, self).__init__(config, environment, policy, filter_op) # sync signal, -1: terminate, 0: normal running, >0 restart and waiting for parameter update self._sync_signal = Value('i', 0) # sampler sub-process list self._sampler_proc = [] # used for synchronize commands self._cmd_pipe = None self._param_pipe = None self._cmd_lock = Lock() cmd_pipe_child, cmd_pipe_parent = Pipe(duplex=True) param_pipe_child, param_pipe_parent = Pipe(duplex=False) self._cmd_pipe = cmd_pipe_parent self._param_pipe = param_pipe_parent for i_thread in range(threads): child_name = f"sampler_{i_thread}" worker_cfg = ParamDict({ "seed": self.seed + 1024 + i_thread, "gpu": gpu }) child = Process(target=Agent_sync._sampler_worker, name=child_name, args=(worker_cfg, cmd_pipe_child, param_pipe_child, self._cmd_lock, self._sync_signal, deepcopy(policy), deepcopy(environment), deepcopy(filter_op))) self._sampler_proc.append(child) child.start() def __del__(self): """ We should terminate all child-process here """ self._sync_signal.value = -1 sleep(1) for _proc in self._sampler_proc: _proc.join(2) if _proc.is_alive(): _proc.terminate() self._cmd_pipe.close() self._param_pipe.close() def broadcast(self, config: ParamDict): policy_state, filter_state, max_step, self._batch_size, fixed_env, fixed_policy, fixed_filter = \ config.require("policy state dict", "filter state dict", "trajectory max step", "batch size", "fixed environment", "fixed policy", "fixed filter") self._replay_buffer = [] policy_state["fixed policy"] = fixed_policy filter_state["fixed filter"] = fixed_filter cmd = ParamDict({ "trajectory max step": max_step, "fixed environment": fixed_env, "filter state dict": filter_state }) assert self._sync_signal.value < 1, "Last sync event not finished due to some error, some sub-proc maybe died, abort" # tell sub-process to reset with self._sync_signal.get_lock(): self._sync_signal.value = len(self._sampler_proc) # sync net parameters with self._cmd_lock: for _ in range(len(self._sampler_proc)): self._param_pipe.send(policy_state) # wait for all agents' ready feedback while self._sync_signal.value > 0: sleep(0.01) # sync commands for _ in range(self._batch_size): self._cmd_pipe.send(cmd) def collect(self): if self._cmd_pipe.poll(0.1): self._replay_buffer.append(self._cmd_pipe.recv()) if len(self._replay_buffer) < self._batch_size: return None else: batch = self._filter.operate_trajectoryList(self._replay_buffer) return batch @staticmethod def _sampler_worker(setups: ParamDict, pipe_cmd, pipe_param, read_lock, sync_signal, policy, environment, filter_op): gpu, seed = setups.require("gpu", "seed") device = decide_device(gpu) random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) environment.init(display=False) filter_op.init() filter_op.to_device(device) policy.init() policy.to_device(device) # -1: syncing, 0: waiting for new command, 1: sampling local_state = 0 current_step = None step_buffer = [] cmd = None def _get_piped_data(pipe): with read_lock: if pipe.poll(0.001): return pipe.recv() else: return None while sync_signal.value >= 0: # check sync counter for sync event, and waiting for new parameters if sync_signal.value > 0 and local_state >= 0: # receive sync signal, reset all workspace settings, decrease sync counter, # and set state machine to -1 for not init again while _get_piped_data(pipe_cmd) is not None: pass step_buffer.clear() _policy_state = _get_piped_data(pipe_param) if _policy_state is not None: # set new parameters policy.reset(_policy_state) with sync_signal.get_lock(): sync_signal.value -= 1 local_state = -1 # if sync ends, tell state machine to recover from syncing state, and reset environment elif sync_signal.value == 0 and local_state == -1: local_state = 0 # waiting for states (states are list of dicts) elif sync_signal.value == 0 and local_state == 0: cmd = _get_piped_data(pipe_cmd) if cmd is not None: step_buffer.clear() cmd.require("filter state dict", "fixed environment", "trajectory max step") current_step = environment.reset( random=not cmd["fixed environment"]) filter_op.reset(cmd["filter state dict"]) local_state = 1 # sampling elif sync_signal.value == 0 and local_state == 1: with torch.no_grad(): policy_step = filter_op.operate_currentStep(current_step) last_step = policy.step([policy_step])[0] last_step, current_step, done = environment.step(last_step) record_step = filter_op.operate_recordStep(last_step) step_buffer.append(record_step) if len(step_buffer) >= cmd["trajectory max step"] or done: traj = filter_op.operate_stepList(step_buffer, done=done) with read_lock: pipe_cmd.send(traj) local_state = 0 # finalization filter_op.finalize() policy.finalize() environment.finalize() pipe_cmd.close() pipe_param.close() print("Sampler sub-process exited")
# os.environ['CUDA_LAUNCH_BLOCKING'] = '1' # torch.multiprocessing.set_start_method('spawn', force=True) # torch.multiprocessing.set_sharing_strategy('file_system') PPO_Transition = namedtuple('PPO_Transition', ('obs', 'action', 'reward', 'done', 'value', 'logproba')) CPPO_Transition = namedtuple('CPPO_Transition', ('obs', 'action', 'reward', 'cost', 'sum_cost', 'done', 'value', 'c_value', 'logproba')) TD_Transition = namedtuple('TD_Transition', ('obs', 'action', 'obs_next', 'reward', 'done')) Safe_TD_Transition = namedtuple('Safe_TD_Transition', ('obs', 'action', 'obs_next', 'reward', 'cost', 'cost_next', 'done')) # 0正常 1阻塞 主进程完成一次sample后,控制子进程阻塞 Sub_Proc_Blocking = Value('i', 0) def make_env(config_env, seed, env_index): env = gym.make(config_env['id']) env.start(str(env_index), path=config_env['path'], gui=config_env['gui'], max_step=config_env['max_step'], reward_model=config_env['reward_model'], is_human_model=config_env['human_model']) env.seed(seed) return env def is_on_policy(args_algo): on_policys = ['ppo2', 'trpo', 'cppo', 'cppo2'] if args_algo in on_policys: return True else:
def __init__(self, cache_dir, dataset_dir, dataset_list, cuda, batch_size=500, num_workers=3, renew_frequency=5, rejection_radius_position=0, numpatches=900, numneg=3, pos_thr=50.0, reject=True, mode='train', rejection_radius=3000, dist_type='3D', patch_radius=None, use_depth=False, use_normals=False, use_silhouettes=False, color_jitter=False, greyscale=False, maxres=4096, scale_jitter=False, photo_jitter=False, uniform_negatives=False, needles=0, render_only=False, maxitems=200, cache_once=False): super(MultimodalPatchesCache, self).__init__() self.cache_dir = cache_dir self.dataset_dir = dataset_dir #self.images_path = images_path self.dataset_list = dataset_list self.cuda = cuda self.batch_size = batch_size self.num_workers = num_workers self.renew_frequency = renew_frequency self.rejection_radius_position = rejection_radius_position self.numpatches = numpatches self.numneg = numneg self.pos_thr = pos_thr self.reject = reject self.mode = mode self.rejection_radius = rejection_radius self.dist_type = dist_type self.patch_radius = patch_radius self.use_depth = use_depth self.use_normals = use_normals self.use_silhouettes = use_silhouettes self.color_jitter = color_jitter self.greyscale = greyscale self.maxres = maxres self.scale_jitter = scale_jitter self.photo_jitter = photo_jitter self.uniform_negatives = uniform_negatives self.needles = needles self.render_only = render_only self.cache_done_lock = Lock() self.all_done = Value('B', 0) # 0 is False self.cache_done = Value('B', 0) # 0 is False self.wait_for_cache_builder = Event() # prepare for wait until initial cache is built self.wait_for_cache_builder.clear() self.cache_builder_resume = Event() self.maxitems = maxitems self.cache_once = cache_once if self.mode == 'eval': self.maxitems = -1 self.cache_builder = Process(target=self.buildCache, args=[self.maxitems]) self.current_cache_build = Value('B', 0) # 0th cache self.current_cache_use = Value('B', 1) # 1th cache self.cache_names = ["cache1", "cache2"] # constant rebuild_cache = True if self.mode == 'eval': validation_dir = os.path.join( self.cache_dir, self.cache_names[self.current_cache_build.value]) if os.path.isdir(validation_dir): # we don't need to rebuild validation cache # TODO: check if cache is VALID rebuild_cache = False elif cache_once: build_dataset_dir = os.path.join( self.cache_dir, self.cache_names[self.current_cache_build.value]) if os.path.isdir(build_dataset_dir): # we don't need to rebuild training cache if we are training # on limited subset of the training set rebuild_cache = False if rebuild_cache: # clear the caches if they already exist build_dataset_dir = os.path.join( self.cache_dir, self.cache_names[self.current_cache_build.value]) if os.path.isdir(build_dataset_dir): shutil.rmtree(build_dataset_dir) use_dataset_dir = os.path.join( self.cache_dir, self.cache_names[self.current_cache_use.value]) if os.path.isdir(use_dataset_dir): shutil.rmtree(use_dataset_dir) os.makedirs(build_dataset_dir) self.cache_builder_resume.set() self.cache_builder.start() # wait until initial cache is built # print("before wait to build") # print("wait for cache builder state", # self.wait_for_cache_builder.is_set()) self.wait_for_cache_builder.wait() # print("after wait to build") # we have been resumed if self.mode != 'eval' and (not self.cache_once): # for training, we can set up the cache builder to build # the second cache self.restart() else: # else for validation we don't need second cache # we just need to switch the built cache to the use cache in order # to use it tmp = self.current_cache_build.value self.current_cache_build.value = self.current_cache_use.value self.current_cache_use.value = tmp
config = json.load(server_file) if ('addr' not in config) or ('port' not in config): print("IP address (addr) and port number required in config") address = config['addr'] port = int(config['port']) server = socket.socket(socket.AF_INET, socket.SOCK_STREAM) server.bind((address, port)) server.listen() param_queue = Queue() param_queue.put(net.state_dict()) shutdown_val = Value('b', 0) receiver_proc = Process(target=HandleWorkers, args=(server, replay_memory, mem_lock, param_queue, shutdown_val)) receiver_proc.start() while True: try: Train(net, replay_memory, mem_lock, args.output_file) if param_queue is not None: param_queue.put(net.state_dict) torch.save(net.state_dict(), args.output_file) except KeyboardInterrupt: if server is not None: assert (shutdown_val is not None and receiver_proc is not None)