def _compare_parallel(self, network, opponent_network, device, num_workers): q, r = divmod(self.conf.GAMES_PER_COMPARISON, num_workers) num_active_workers = Value('i', num_workers) evaluator_mgr = BulkEvaluatorManager( [network, opponent_network], device, num_workers) score = Value('i', 0) workers = [] s = 0 for worker_id in range(num_workers): num_games = q + 1 if worker_id < r else q evaluator = evaluator_mgr.get_evaluator(worker_id, 0) opponent_evaluator = evaluator_mgr.get_evaluator(worker_id, 1) color = BLACK if s % 2 == 0 else WHITE s += num_games worker = Process( target=self._worker_job, args=(num_games, num_active_workers, evaluator, opponent_evaluator, color, score), ) workers.append(worker) worker.start() # start evaluator server server = evaluator_mgr.get_server(num_active_workers) server.start() for worker in workers: worker.join() server.join() return score.value / self.conf.GAMES_PER_COMPARISON
def __init__(self, opt, world): super().__init__(opt) self.inner_world = world self.numthreads = opt['numthreads'] self.sync = { # syncronization primitives # semaphores for counting queued examples 'queued_sem': Semaphore(0), # counts num exs to be processed 'threads_sem': Semaphore(0), # counts threads 'reset_sem': Semaphore(0), # allows threads to reset # flags for communicating with threads 'reset_flag': Value('b', False), # threads should reset 'term_flag': Value('b', False), # threads should terminate # counters 'epoch_done_ctr': Value('i', 0), # number of done threads 'total_parleys': Value('l', 0), # number of parleys in threads } self.threads = [] for i in range(self.numthreads): self.threads.append( HogwildProcess(i, opt, world.share(), self.sync)) time.sleep( 0.05) # delay can help prevent deadlock in thread launches for t in self.threads: t.start() for _ in self.threads: # wait for threads to launch # this makes sure that no threads get examples before all are set up # otherwise they might reset one another after processing some exs self.sync['threads_sem'].acquire()
def __init__(self, opt, world): super().__init__(opt) self.inner_world = world self.numthreads = opt['numthreads'] self.sync = { # syncronization primitives # semaphores for counting queued examples 'queued_sem': Semaphore(0), # counts num exs to be processed 'threads_sem': Semaphore(0), # counts threads 'reset_sem': Semaphore(0), # allows threads to reset # flags for communicating with threads 'reset_flag': Value('b', False), # threads should reset 'term_flag': Value('b', False), # threads should terminate # counters 'epoch_done_ctr': Value('i', 0), # number of done threads 'total_parleys': Value('l', 0), # number of parleys in threads } # don't let threads create more threads! self.threads = [] for i in range(self.numthreads): self.threads.append(HogwildProcess(i, opt, world, self.sync)) for t in self.threads: t.start() for _ in self.threads: self.sync['threads_sem'].acquire()
def __init__(self, config, share_batches=True, manager=None, new_process=True): if new_process == True and manager is None: manager = Manager() self.knows = Semaphore(0) # > 0 if we know if any are coming # == 0 if DatasetReader is processing a command self.working = Semaphore(1 if new_process else 100) self.finished_reading = Lock( ) # locked if we're still reading from file # number of molecules that have been sent to the pipe: self.in_pipe = Value('i', 0) # Tracking what's already been sent through the pipe: self._example_number = Value('i', 0) # The final kill switch: self._close = Value('i', 0) self.command_queue = manager.Queue(10) self.molecule_pipeline = None self.batch_queue = Queue(config.data.batch_queue_cap ) #manager.Queue(config.data.batch_queue_cap) self.share_batches = share_batches self.dataset_reader = DatasetReader("dataset_reader", self, config, new_process=new_process) if new_process: self.dataset_reader.start()
def start(self) -> None: shards = glob.glob(self.file_path) # Ensure a consistent order before shuffling for testing. shards.sort() num_shards = len(shards) # If we want multiple epochs per read, put shards in the queue multiple times. self.input_queue = Queue(num_shards * self.epochs_per_read + self.num_workers) for _ in range(self.epochs_per_read): np.random.shuffle(shards) for shard in shards: self.input_queue.put(shard) # Then put a None per worker to signify no more files. for _ in range(self.num_workers): self.input_queue.put(None) assert not self.processes, "Process list non-empty! You must call QIterable.join() before restarting." self.num_active_workers = Value('i', self.num_workers) self.num_inflight_items = Value('i', 0) for worker_id in range(self.num_workers): process = Process(target=_worker, args=(self.reader, self.input_queue, self.output_queue, self.num_active_workers, self.num_inflight_items, worker_id)) logger.info(f"starting worker {worker_id}") process.start() self.processes.append(process)
def __init__(self, name, env_kwargs, model_kwargs, **kwargs): super().__init__(env_kwargs=env_kwargs, model_kwargs=model_kwargs) self.name = name self.num_processes = 16 self._report_queue = Queue(maxsize=16) self._shared_global_t = Value('i', 0) self._shared_is_stopped = Value('i', False)
def _generate_parallel(self, iteration, network, device, num_workers): q, r = divmod(self.remaining_games, num_workers) num_active_workers = Value('i', num_workers) resign_threshold = Value('d', self.resign_mgr.threshold()) evaluator_mgr = BulkEvaluatorManager([network], device, num_workers) output_queue = SimpleQueue() # start the workers workers = [] for worker_id in range(num_workers): num_games = q + 1 if worker_id < r else q evaluator = evaluator_mgr.get_evaluator(worker_id, 0) worker = Process( target=self._worker_job, args=(worker_id, num_games, num_active_workers, resign_threshold, evaluator, output_queue), ) workers.append(worker) worker.start() # start evaluator server server = evaluator_mgr.get_server(num_active_workers) server.start() # collect the examples generated by workers while num_active_workers.value > 0 or not output_queue.empty(): examples, resign_value_history, result = output_queue.get() self.example_pool += examples self.game_length.append(len(examples)) # add the history into resignation manager to update the threshold if resign_value_history is not None: self.resign_mgr.add(resign_value_history, result) resign_threshold.value = self.resign_mgr.threshold() self.remaining_games -= 1 # periodically save the progress if (self.conf.GAMES_PER_ITERATION - self.remaining_games) \ % self.conf.EXAMPLE_POOL_SAVE_FREQUENCY == 0: self.save(iteration) log.info( f'[iter={iteration}] ExamplePool: checkpoint saved, ' f'{self.remaining_games} games remaining' ) for worker in workers: worker.join() server.join()
def __init__(self, world_class, opt, agents): super().__init__(opt) self.inner_world = world_class(opt, agents) self.queued_items = Semaphore(0) # counts num exs to be processed self.epochDone = Condition() # notifies when exs are finished self.terminate = Value('b', False) # tells threads when to shut down self.cnt = Value('i', 0) # number of exs that remain to be processed self.threads = [] for i in range(opt['numthreads']): self.threads.append( HogwildProcess(i, world_class, opt, agents, self.queued_items, self.epochDone, self.terminate, self.cnt)) for t in self.threads: t.start()
def __init__(self, config: ParamDict, environment: Environment, policy: Policy, filter_op: Filter): threads, gpu = config.require("threads", "gpu") super(Agent_sync, self).__init__(config, environment, policy, filter_op) # sync signal, -1: terminate, 0: normal running, >0 restart and waiting for parameter update self._sync_signal = Value('i', 0) # sampler sub-process list self._sampler_proc = [] # used for synchronize commands self._cmd_pipe = None self._param_pipe = None self._cmd_lock = Lock() cmd_pipe_child, cmd_pipe_parent = Pipe(duplex=True) param_pipe_child, param_pipe_parent = Pipe(duplex=False) self._cmd_pipe = cmd_pipe_parent self._param_pipe = param_pipe_parent for i_thread in range(threads): child_name = f"sampler_{i_thread}" worker_cfg = ParamDict({ "seed": self.seed + 1024 + i_thread, "gpu": gpu }) child = Process(target=Agent_sync._sampler_worker, name=child_name, args=(worker_cfg, cmd_pipe_child, param_pipe_child, self._cmd_lock, self._sync_signal, deepcopy(policy), deepcopy(environment), deepcopy(filter_op))) self._sampler_proc.append(child) child.start()
def create(cls): if not hasattr(cls, 'length_to_eps'): # Maps episode length to list of episodes cls.length_to_eps = {} if not hasattr(cls, 'ep_indices'): # Set of episode indices already in the cache cls.ep_indices = set() if not hasattr(cls, 'batches'): # List of batches if popping batches cls.batches = [] if not hasattr(cls, 'load_complete'): # If all episodes have been loaded into memory cls.load_complete = Value(ctypes.c_bool, False) if not hasattr(cls, 'batches_lock'): # Lock to access batches cls.batches_lock = Lock() if not hasattr(cls, 'cache_lock'): # Lock to access length_to_eps cls.cache_lock = Lock() if not hasattr(cls, 'fill_cache_lock'): # Lock for condition variables cls.fill_cache_lock = RLock() if not hasattr(cls, 'add_to_cache_cv'): # Condition notifying Loader to add to cache cls.add_to_cache_cv = Condition(lock=cls.fill_cache_lock) if not hasattr(cls, 'cache_filled_cv'): # Condition notifying teacher that cache has episodes cls.cache_filled_cv = Condition(lock=cls.fill_cache_lock)
def __init__(self, chk_dir, chk, keep_epoch_chk=True, overwrite=True, mode=CFMode.AUTO, chk_prefix='model_v_'): self.logger = logging.getLogger(__name__) self.chk_dir = chk_dir self.chk = chk self.keep_epoch_chk = keep_epoch_chk self.overwrite = overwrite self.chk_prefix = chk_prefix self.mode = mode self.chk_epoch_subdir = 'epoch' self.mp_manager = Manager() self.snapshot_copy = None self.cpu_side = False # Active snapshot, if true, don't snapshot again self.active_snapshot = Value('i', 0) self.lock = Lock() self.in_progress_snapshot = Value('i', 0) # Handle to the process performing checkpoint # Can be only one at any instant. A new checkpoint # cannot start unless the previous one completes self.chk_process = None # `overwrite` supersedes if False if self.overwrite is False and self.keep_epoch_chk is False: self.keep_epoch_chk = True # Global ID of checkpoints being written # Used to format the checkpoint path # Instantiate from chk when restoring self.chk_global_id = -1 # Sorted List of available checkpoints (fnames) self.available_chk_iters = self.mp_manager.list() self.available_chk_epochs = self.mp_manager.list() self.initalize_chk_dir() self.logger.info("Available checkpoints : ") for item in self.available_chk_iters: self.logger.info(item)
def __init__(self, config: ParamDict, environment: Environment, policy: Policy, filter_op: Filter): threads, gpu = config.require("threads", "gpu") threads_gpu = config["gpu threads"] if "gpu threads" in config else 2 super(Agent_async, self).__init__(config, environment, policy, filter_op) # sync signal, -1: terminate, 0: normal running, >0 restart and waiting for parameter update self._sync_signal = Value('i', 0) # environment sub-process list self._environment_proc = [] # policy sub-process list self._policy_proc = [] # used for synchronize policy parameters self._param_pipe = None self._policy_lock = Lock() # used for synchronize roll-out commands self._control_pipe = None self._environment_lock = Lock() step_pipe = [] cmd_pipe_child, cmd_pipe_parent = Pipe(duplex=True) param_pipe_child, param_pipe_parent = Pipe(duplex=False) self._control_pipe = cmd_pipe_parent self._param_pipe = param_pipe_parent for i_envs in range(threads): child_name = f"environment_{i_envs}" step_pipe_pi, step_pipe_env = Pipe(duplex=True) step_lock = Lock() worker_cfg = ParamDict({ "seed": self.seed + 1024 + i_envs, "gpu": gpu }) child = Process(target=Agent_async._environment_worker, name=child_name, args=(worker_cfg, cmd_pipe_child, step_pipe_env, self._environment_lock, step_lock, self._sync_signal, deepcopy(environment), deepcopy(filter_op))) self._environment_proc.append(child) step_pipe.append((step_pipe_pi, step_lock)) child.start() for i_policies in range(threads_gpu): child_name = f"policy_{i_policies}" worker_cfg = ParamDict({ "seed": self.seed + 2048 + i_policies, "gpu": gpu }) child = Process(target=Agent_async._policy_worker, name=child_name, args=(worker_cfg, param_pipe_child, step_pipe, self._policy_lock, self._sync_signal, deepcopy(policy))) self._policy_proc.append(child) child.start() sleep(5)
def __init__(self, args) -> None: """ Constructor :param args: Cmd-line arguments """ self.args = args # global counter self.T = Value('i', 0) self.global_reward = Value('d', -np.inf) # worker handling self.worker_pool = [] # validity check for input parameter if args.optimizer not in ['rmsprop', 'adam']: raise Exception( 'Your given optimizer %s is currently not supported. Choose either "rmsprop" or "adam"', args.optimizer)
def __init__(self, inputs, mode): self.mode = mode self.inputs = inputs self.queue = Queue() self.control = Value('i', 1) if self.mode == 0: self.process = Process(target=self.worker, args=(self.inputs, self.queue, self.control)) self.process.start()
def __init__(self, *args): """ Statistics process saves the statistics obtained from workers. In particular, the shared models are saved every Config.MODEL_SAVE_FREQUENCY episodes. Moreover, some statistics are logged every Config.LOG_STATS_FREQUENCY episodes. """ super(StatProcess, self).__init__() self.episode_log_q = Queue(maxsize=Config.MAX_STATS_QUEUE_SIZE) self.ae_loss_log_q = Queue(maxsize=Config.MAX_STATS_QUEUE_SIZE) self.episode_count = Value('i', 0) self.model_save = Value('i', 0) self.exit_flag = Value('i', 0) #:obj:`dict`: Dictionary of DPS models for RL. self.agents = {} for model, env_id in zip(args, Config.ENV_IDS): self.agents[env_id] = model #float: Time at start for logging. self._start_time = time.time()
def init_data(self): self.is_working = False self.semaphore = True self.is_change_bar = Value( c_bool, False) #whether user has dragged the slider,default: False self.frame_index = Value('i', 0) self.share_lock = Lock() #shared lock for frame_index self.share_lock2 = Lock() # shared lock for frame_index self.mutex = threading.Lock() self.timer = QTimer(self) # used for the updating of progress bar self.temp_timer = QTimer( self) #used for detecting whether the frame_total is given. self.frame_total = Value('i', -1) self.playable = Value(c_bool, True) self.is_working = Value(c_bool, False) manager = Manager() self.play_src = manager.Value(c_char_p, '0') #用于记录播放的视频地址 self.mode = None # 'online' or 'offline'
def add_agents(self, nb): old_length = len(self.agents) for index in range(old_length, old_length + nb): self.agents.append( Agent(id_=index, prediction_queue=self.prediction_queue, training_queue=self.training_queue, states=self.train_set, exit_flag=Value(c_bool, False), statistics_queue=self.statistics_queue, episode_counter=self.nb_episodes, observation_shape=(self.channels, self.height, self.width), action_space=self.n_outputs, device=self.agent_device, step_max=self.sequence_length))
def __init__(self, n_workers, actor, args): self._now_episode = Value('i', 0) self.queue = Queue() self.collect_event = Event() self.worker = [] for i in range(n_workers): self.worker.append( Worker(self.queue, self.collect_event, actor, args, i)) self.process = [ Process(target=self.worker[i].run, args=(self._now_episode, )) for i in range(n_workers) ] for p in self.process: p.start() print(f'Start {n_workers} workers.')
def __init__(self, experience_q, prediction_q, observation_q, env_id, episode_log_q, agent_id): """ Workers are the agents interacting with the environment. Workers run a copy of the environment with their own specifications. It requires Predictor processes to make decisions. Gathered experiences are submitted to a Queue on which the shared models are trained. Args: experience_q (mp.Queue): Shared memory queue containing experiences across workers of the same type. prediction_q (mp.Queue): Shared memory queue containing predictions of this worker. observation_q (mp.Queue): Shared memory queue containing observation across workers of the same type. env_id (str): The id of the environment instance this worker is interacting with. episode_log_q (mp.Queue): Shared memory queue containing the experience of past episodes. agent_id (int): The id of the worker process. """ super(WorkerProcess, self).__init__() self.experience_q = experience_q self.prediction_q = prediction_q self.observation_q = observation_q self.env_id = env_id self.episode_log_q = episode_log_q self.id = agent_id #:class:`memory.ShortTermMemory`: Short term memory where the history is saved and experiences are memorized. self.memory = ShortTermMemory(Config.GLOW) #int: Signal for process exit. self.exit_flag = Value('i', 0) #torch.Tensor of float: Array of actions in one-hot encoding. self.actions = torch.Tensor(np.eye(Config.NUM_ACTIONS)) #:class:`gym.Env`: The environment the agent interacts with. self.env = gym.make(Config.ENV_NAME, **Config.ENV_PARAMS[self.env_id]) #bool: Boolean value that signals that an episode is finished. self.done = False #int: Current size of batches. self.batch_size = 0 #torch.Tensor: Tensor of observation batch. self.o_batch = torch.Tensor([0.]) #torch.Tensor: Tensor of action batch. self.a_batch = torch.Tensor([0.]) #torch.Tensor: Tensor of target batch. self.t_batch = torch.Tensor([0.])
def __init__(self, autoencoder, optimizer_ae, agent, optimizer_ps, env_id, select_data, experience_q, training_count, ae_loss_log_q, trainer_id): """ Trainers gather experiences and train respective models model. Args: autoencoder (:class:`base_networks.DenseAutoencoder`): The Server.autoencoder model. optimizer_ae (:class:`optim.Adam`): The Server.optimizer_ae for encoder. agent (:class:`base_networks.DeepPS`): The deep PS model for RL. optimizer_ps (:class:`optim.Adam`): The Server.optimizer_ps for deep PS. env_id (str): The id of the environment/agent instance this trainer is using. select_data (tuple): The data used for training in 'selection' mode. experience_q (:class:`mp.Queue`): Shared memory queue containing experiences for training. training_count (:class:`mp.Value`): Shared memory value which counts the number of trainings. ae_loss_log_q (:class:`mp.Queue`): Shared memory queue containing loss of decoder. trainer_id (int): The id of the trainer process. """ super(TrainerProcess, self).__init__() self.autoencoder = autoencoder self.optimizer_ae = optimizer_ae self.agent = agent self.optimizer_ps = optimizer_ps self.env_id = env_id self.experience_q = experience_q self.training_count = training_count self.ae_loss_log_q = ae_loss_log_q self.id = trainer_id if Config.TRAIN_MODE == 'selection': o_batch, a_batch, t_batch = select_data #torch.Tensor: The observation training data set. self.o_batch = o_batch.to(Config.DEVICE) #torch.Tensor: The action training data set. self.a_batch = a_batch.to(Config.DEVICE) #torch.Tensor: The target training data set. self.t_batch = t_batch.to(Config.DEVICE) #int: Signal for process exit. self.exit_flag = Value('i', 0)
def __init__(self, scheduler: scheduler_type, mode: str = 'nearest', align_corners: bool = None, preserve_range: bool = False, keys: Sequence = ('data', ), grad: bool = False, **kwargs): """ Args: scheduler: scheduler which determined the current size. The scheduler is called with the current iteration of the transform mode: one of ``nearest``, ``linear``, ``bilinear``, ``bicubic``, ``trilinear``, ``area`` (for more inforamtion see :func:`torch.nn.functional.interpolate`) align_corners: input and output tensors are aligned by the center points of their corners pixels, preserving the values at the corner pixels. preserve_range: output tensor has same range as input tensor keys: keys which should be augmented grad: enable gradient computation inside transformation **kwargs: keyword arguments passed to augment_fn Warnings: When this transformations is used in combination with multiprocessing, the step counter is not perfectly synchronized between multiple processes. As a result the step count my jump between values in a range of the number of processes used. """ super().__init__(size=0, mode=mode, align_corners=align_corners, preserve_range=preserve_range, keys=keys, grad=grad, **kwargs) self.scheduler = scheduler self._step = Value('i', 0)
def __init__(self, agent, observation_q, prediction_qs, env_id, predictor_id): """ Predictors gather observations from agents and make predictions. Args: agent (:class:`base_networks.DeepPS`): The deep PS model for RL. observation_q (:class:`mp.Queue`): Shared memory queue with observations of agents of the same type. prediction_qs (:obj:`list` of :class:`mp.Queue`): Shared memory queues containing predictions. env_id (str): The identifier for the environment type. predictor_id (int): The id of the trainer process. """ super(PredictorProcess, self).__init__() self.agent = agent self.observation_q = observation_q self.prediction_qs = prediction_qs self.env_id = env_id self.id = predictor_id #int: Signal for process exit. self.exit_flag = Value('i', 0) #torch.Tensor of float: Array of actions in one-hot encoding. self.actions = torch.Tensor(np.eye(Config.NUM_ACTIONS)).to( Config.DEVICE)
# os.environ['CUDA_LAUNCH_BLOCKING'] = '1' # torch.multiprocessing.set_start_method('spawn', force=True) # torch.multiprocessing.set_sharing_strategy('file_system') PPO_Transition = namedtuple('PPO_Transition', ('obs', 'action', 'reward', 'done', 'value', 'logproba')) CPPO_Transition = namedtuple('CPPO_Transition', ('obs', 'action', 'reward', 'cost', 'sum_cost', 'done', 'value', 'c_value', 'logproba')) TD_Transition = namedtuple('TD_Transition', ('obs', 'action', 'obs_next', 'reward', 'done')) Safe_TD_Transition = namedtuple('Safe_TD_Transition', ('obs', 'action', 'obs_next', 'reward', 'cost', 'cost_next', 'done')) # 0正常 1阻塞 主进程完成一次sample后,控制子进程阻塞 Sub_Proc_Blocking = Value('i', 0) def make_env(config_env, seed, env_index): env = gym.make(config_env['id']) env.start(str(env_index), path=config_env['path'], gui=config_env['gui'], max_step=config_env['max_step'], reward_model=config_env['reward_model'], is_human_model=config_env['human_model']) env.seed(seed) return env def is_on_policy(args_algo): on_policys = ['ppo2', 'trpo', 'cppo', 'cppo2'] if args_algo in on_policys: return True else:
def __init__(self, cache_dir, dataset_dir, dataset_list, cuda, batch_size=500, num_workers=3, renew_frequency=5, rejection_radius_position=0, numpatches=900, numneg=3, pos_thr=50.0, reject=True, mode='train', rejection_radius=3000, dist_type='3D', patch_radius=None, use_depth=False, use_normals=False, use_silhouettes=False, color_jitter=False, greyscale=False, maxres=4096, scale_jitter=False, photo_jitter=False, uniform_negatives=False, needles=0, render_only=False, maxitems=200, cache_once=False): super(MultimodalPatchesCache, self).__init__() self.cache_dir = cache_dir self.dataset_dir = dataset_dir #self.images_path = images_path self.dataset_list = dataset_list self.cuda = cuda self.batch_size = batch_size self.num_workers = num_workers self.renew_frequency = renew_frequency self.rejection_radius_position = rejection_radius_position self.numpatches = numpatches self.numneg = numneg self.pos_thr = pos_thr self.reject = reject self.mode = mode self.rejection_radius = rejection_radius self.dist_type = dist_type self.patch_radius = patch_radius self.use_depth = use_depth self.use_normals = use_normals self.use_silhouettes = use_silhouettes self.color_jitter = color_jitter self.greyscale = greyscale self.maxres = maxres self.scale_jitter = scale_jitter self.photo_jitter = photo_jitter self.uniform_negatives = uniform_negatives self.needles = needles self.render_only = render_only self.cache_done_lock = Lock() self.all_done = Value('B', 0) # 0 is False self.cache_done = Value('B', 0) # 0 is False self.wait_for_cache_builder = Event() # prepare for wait until initial cache is built self.wait_for_cache_builder.clear() self.cache_builder_resume = Event() self.maxitems = maxitems self.cache_once = cache_once if self.mode == 'eval': self.maxitems = -1 self.cache_builder = Process(target=self.buildCache, args=[self.maxitems]) self.current_cache_build = Value('B', 0) # 0th cache self.current_cache_use = Value('B', 1) # 1th cache self.cache_names = ["cache1", "cache2"] # constant rebuild_cache = True if self.mode == 'eval': validation_dir = os.path.join( self.cache_dir, self.cache_names[self.current_cache_build.value]) if os.path.isdir(validation_dir): # we don't need to rebuild validation cache # TODO: check if cache is VALID rebuild_cache = False elif cache_once: build_dataset_dir = os.path.join( self.cache_dir, self.cache_names[self.current_cache_build.value]) if os.path.isdir(build_dataset_dir): # we don't need to rebuild training cache if we are training # on limited subset of the training set rebuild_cache = False if rebuild_cache: # clear the caches if they already exist build_dataset_dir = os.path.join( self.cache_dir, self.cache_names[self.current_cache_build.value]) if os.path.isdir(build_dataset_dir): shutil.rmtree(build_dataset_dir) use_dataset_dir = os.path.join( self.cache_dir, self.cache_names[self.current_cache_use.value]) if os.path.isdir(use_dataset_dir): shutil.rmtree(use_dataset_dir) os.makedirs(build_dataset_dir) self.cache_builder_resume.set() self.cache_builder.start() # wait until initial cache is built # print("before wait to build") # print("wait for cache builder state", # self.wait_for_cache_builder.is_set()) self.wait_for_cache_builder.wait() # print("after wait to build") # we have been resumed if self.mode != 'eval' and (not self.cache_once): # for training, we can set up the cache builder to build # the second cache self.restart() else: # else for validation we don't need second cache # we just need to switch the built cache to the use cache in order # to use it tmp = self.current_cache_build.value self.current_cache_build.value = self.current_cache_use.value self.current_cache_use.value = tmp
config = json.load(server_file) if ('addr' not in config) or ('port' not in config): print("IP address (addr) and port number required in config") address = config['addr'] port = int(config['port']) server = socket.socket(socket.AF_INET, socket.SOCK_STREAM) server.bind((address, port)) server.listen() param_queue = Queue() param_queue.put(net.state_dict()) shutdown_val = Value('b', 0) receiver_proc = Process(target=HandleWorkers, args=(server, replay_memory, mem_lock, param_queue, shutdown_val)) receiver_proc.start() while True: try: Train(net, replay_memory, mem_lock, args.output_file) if param_queue is not None: param_queue.put(net.state_dict) torch.save(net.state_dict(), args.output_file) except KeyboardInterrupt: if server is not None: assert (shutdown_val is not None and receiver_proc is not None)
# Data Worker def work(loader, queue, control): while 1: if control.value == 0: break if queue.qsize() < 5: batch = opcaffe.Batch() myClass.load(batch) data = torch.tensor(batch.data) label = torch.tensor(batch.label) queue.put([data, label]) time.sleep(0.1) queue = Queue() control = Value('i', 1) process = Process(target=work, args=(myClass, queue, control)) process.start() # Iterate while 1: iterations += 1 # Get Data from Queue data, label = queue.get() # LR if iterations in lr_half_sets: print("Half LR") half_lr(optimizer)
def train_ai2thor(model, args, rank=0, b=None): seed = args.seed + 10000 * rank torch.manual_seed(seed) np.random.seed(seed) # torch.cuda.set_device(rank) # device = torch.device(f'cuda:{rank}') device = torch.device('cuda' if torch.cuda.is_available() else "cpu") # if torch.cuda.is_available(): # os.environ['DISPLAY'] = f':{rank}' model = model.to(device) model.share_memory() # Experience buffer storage = PPOBuffer(model.obs_shape, args.steps, args.num_workers, args.state_size, args.gamma, device=device) storage.share_memory() #torch.multiprocessing.set_start_method('spawn') # start multiple processes ready_to_works = [Event() for _ in range(args.num_workers)] exit_flag = Value('i', 0) queue = SimpleQueue() processes = [] # task_config_file = "config_files/multiMugTaskTrain.json" task_config_file = "config_files/multiMugTaskTrain.json" # start workers for worker_id in range(args.num_workers): print('START>>>>>>>>>>>>>>>>') p = Process(target=worker, args=(worker_id, model, storage, ready_to_works[worker_id], queue, exit_flag, args.use_priors, task_config_file)) p.start() processes.append(p) # start trainer train_params = { "epochs": args.epochs, "steps": args.steps, "world_size": args.world_size, "num_workers": args.num_workers } ppo_params = { "clip_param": args.clip_param, "train_iters": args.train_iters, "mini_batch_size": args.mini_batch_size, "value_loss_coef": args.value_loss_coef, "entropy_coef": args.entropy_coef, "rnn_steps": args.rnn_steps, "lr": args.lr, "max_kl": args.max_kl } distributed = False if args.world_size > 1: if distributed == True: distributed = True # Initialize Process Group, distributed backend type dist_backend = 'nccl' # Url used to setup distributed training dist_url = "tcp://127.0.0.1:23456" print("Initialize Process Group... pid:", os.getpid()) dist.init_process_group(backend=dist_backend, init_method=dist_url, rank=rank, world_size=args.world_size) # Make model DistributedDataParallel model = DistributedDataParallel(model, device_ids=[rank], output_device=rank) else: print('Distribution is not allowed') learner(model, storage, train_params, ppo_params, ready_to_works, queue, exit_flag, rank, distributed, b) for p in processes: print("process ", p.pid, " joined") p.join()
def __init__(self, config_file): super(Manager, self).__init__() # Setting it as daemon child self.daemon = True # Read config file self.config = configparser.ConfigParser() # Fixing lower-case keys in config files self.config.optionxform = lambda option: option self.config.read(config_file) # Initializing the device if self.config["settings"]["device"] == "cuda": assert torch.cuda.is_available() self.device = self.config["settings"]["device"] self.agent_device = self.config["settings"]["device"] # Test and training sets self.train_set, self.test_set = [], [] for key, value in self.config["levels"].items(): if value == "train": self.train_set.append(key) elif value == "test": self.test_set.append(key) # Dimensions of the view self.channels = int(self.config["environnement"]["stacks"]) self.height = int(self.config["environnement"]["height"]) self.width = int(self.config["environnement"]["width"]) # Creating the environnement generation function self.n_outputs = len(KartMultiDiscretizer.discretized_actions) # Impala constants self.sequence_length = int(self.config["impala"]["sequence_length"]) self.rho = float(self.config["impala"]["rho"]) self.cis = float(self.config["impala"]["cis"]) self.discount_factor = float(self.config["impala"]["discount_factor"]) self.entropy_coef = float(self.config["impala"]["entropy_coef"]) self.value_coef = float(self.config["impala"]["value_coef"]) # Building the model and share it (cf torch.multiprocessing best practices) self.model = torch.jit.script( ActorCriticLSTM(c=self.channels, h=self.height, w=self.width, n_outputs=self.n_outputs, sequence_length=self.sequence_length).float()).to( self.device) # To have a multi-machine-case, just place on different devices and sync the models once a while self.impala = torch.jit.script( Impala(sequence_length=self.sequence_length, entropy_coef=self.entropy_coef, value_coef=self.value_coef, discount_factor=self.discount_factor, model=self.model, rho=self.rho, cis=self.cis, device=self.device)) # Sharing memory between processes self.model.share_memory() self.impala.share_memory() # Building the optimizer self.optimizer = optim.RMSprop( self.model.parameters(), lr=float(self.config["optimizer"]["lr"]), alpha=float(self.config["optimizer"]["alpha"]), eps=float(self.config["optimizer"]["eps"]), momentum=float(self.config["optimizer"]["momentum"]), weight_decay=float(self.config["optimizer"]["weight_decay"]), centered=self.config["optimizer"]["centered"] == "True") # Checkpoints directory self.checkpoint_path = self.config["settings"]["checkpoint_path"] # Building the torch.multiprocessing-queues self.training_queue = Queue( maxsize=int(self.config["settings"]["training_queue"])) self.prediction_queue = Queue( maxsize=int(self.config["settings"]["prediction_queue"])) self.statistics_queue = Queue() # Building the torch.multiprocessing-values self.learning_step = Value('i', 0) self.nb_episodes = Value('i', 0) self.max_nb_steps = int(self.config["settings"]["max_nb_episodes"]) # Statistics thread self.tensorboard = self.config["settings"]["tensorboard"] self.statistics = Statistics(writer_dir=self.tensorboard, statistics_queue=self.statistics_queue, nb_episodes=self.nb_episodes) # Agents, predictions and learners self.training_batch_size = int( self.config["settings"]["training_batch_size"]) self.trainers = [] self.prediction_batch_size = int( self.config["settings"]["prediction_batch_size"]) self.predictors = [] self.agents = [] # Adding the threads and agents self.add_trainers(int(self.config["settings"]["trainers"])) self.add_agents(int(self.config["settings"]["agents"])) self.add_predictors(int(self.config["settings"]["predictors"]))
''' Maps episode length to dictionary with following keys: current_idx: which episode in the list are we at (if simply indexing into list) ep_list: list of episodes of the length of the key bucket_complete: if there are no more episodes left to consider in the bucket ''' # Maps episode length to list of episodes length_to_eps = {} # List of batches if popping batches batches = [] # If all episodes have been loaded into memory load_complete = Value(ctypes.c_bool, False) # Lock to access batches batches_lock = Lock() # Lock to access length_to_eps cache_lock = Lock() # Lock for condition variables fill_cache_lock = RLock() # Condition notifying Loader to add to cache add_to_cache_cv = Condition(lock=fill_cache_lock) # Condition notifying teacher that cache has episodes cache_filled_cv = Condition(lock=fill_cache_lock) def batch_cache(function): max_cache_size = 10000 # Max unseen eps min_cache_size = 1000 # Min unseen eps
from threading import Thread, Condition, RLock ''' Maps episode length to dictionary with following keys: current_idx: which episode in the list are we at (if simply indexing into list) ep_list: list of episodes of the length of the key bucket_complete: if there are no more episodes left to consider in the bucket ''' length_to_eps = {} # Maps episode length to list # of episodes batches = [] # List of batches if popping # batches load_complete = Value(ctypes.c_bool, False) # If all episodes have been # loaded into memory batches_lock = Lock() # Lock to access batches cache_lock = Lock() # Lock to access length_to_eps fill_cache_lock = RLock() # Lock for condition variables add_to_cache_cv = Condition(lock=fill_cache_lock) # Condition notifying Loader # to add to cache cache_filled_cv = Condition(lock=fill_cache_lock) # Condition notifying teacher # that cache has episodes def batch_cache(function): max_cache_size = 10000 # Max unseen eps min_cache_size = 1000 # Min unseen eps def get_cache_size():