def _instances(self, file_path: str, manager: Manager, output_queue: Queue) -> Iterator[Instance]: """ A generator that reads instances off the output queue and yields them up until none are left (signified by all ``num_workers`` workers putting their ids into the queue). """ shards = glob.glob(file_path) num_shards = len(shards) # If we want multiple epochs per read, put shards in the queue multiple times. input_queue = manager.Queue(num_shards * self.epochs_per_read + self.num_workers) for _ in range(self.epochs_per_read): random.shuffle(shards) for shard in shards: input_queue.put(shard) # Then put a None per worker to signify no more files. for _ in range(self.num_workers): input_queue.put(None) processes: List[Process] = [] num_finished = 0 for worker_id in range(self.num_workers): process = Process(target=_worker, args=(self.reader, input_queue, output_queue, worker_id)) logger.info(f"starting worker {worker_id}") process.start() processes.append(process) # Keep going as long as not all the workers have finished. while num_finished < self.num_workers: item = output_queue.get() if isinstance(item, int): # Means a worker has finished, so increment the finished count. num_finished += 1 logger.info(f"worker {item} finished ({num_finished}/{self.num_workers})") else: # Otherwise it's an ``Instance``, so yield it up. yield item for process in processes: process.join() processes.clear()
def dist_train(self): gpu_ids_avail = GPUtil.getAvailable(maxMemory=0.02, limit=8) shuffle(gpu_ids_avail) gpu_ids = gpu_ids_avail[:self.world_size] assert len(gpu_ids) == self.world_size, "not enough GPUs" processes = [] for rank, gpu_id in enumerate(gpu_ids): p = Process(target=self._dist_train, args=(rank, gpu_id)) p.start() print(f"process {rank} has started") processes.append(p) for p in processes: p.join()
def ansyc_multiple_process_train(args, save_dir): q = Queue(10) data_lists = [build_datasets(args) for _ in range(args.gpus)] p_producer = Process(target=data_producers, args=(args, q)) p_consumers = [ Process(target=data_consumers, args=(args, q, save_dir, i, data_lists[i])) for i in range(args.gpus) ] p_producer.start() for p in p_consumers: p.start() p_producer.join() for p in p_consumers: p.join()
def __init__(self, env_fns, spaces=None): """ envs: list of gym environments to run in subprocesses """ self.waiting = False self.closed = False nenvs = len(env_fns) self.nenvs = nenvs self.remotes, self.work_remotes = zip(*[Pipe() for _ in range(nenvs)]) self.ps = [Process(target=worker, args=(work_remote, remote, CloudpickleWrapper(env_fn))) for (work_remote, remote, env_fn) in zip(self.work_remotes, self.remotes, env_fns)] for p in self.ps: p.daemon = True # if the main process crashes, we should not cause things to hang p.start() for remote in self.work_remotes: remote.close() self.remotes[0].send(('get_spaces', None)) observation_space, action_space = self.remotes[0].recv() VecEnv.__init__(self, len(env_fns), observation_space, action_space)
def main(): print('Starting') parser = argparse.ArgumentParser() # Configurable hyperparameters parser.add_argument('--rows', type=int, default=1, help='Number of rows in the tensor.') parser.add_argument('--columns', type=int, default=1, help='Number of columns in the tensor.') parser.add_argument('--backend', type=str, default=None, help='backend for distributed operations.') # Container environment parser.add_argument('--hosts', type=list, default=json.loads(os.environ["SM_HOSTS"])) parser.add_argument('--current-host', type=str, default=os.environ["SM_CURRENT_HOST"]) parser.add_argument('--model-dir', type=str, default=os.environ["SM_MODEL_DIR"]) parser.add_argument('--num-gpus', type=int, default=os.environ["SM_NUM_GPUS"]) parser.add_argument('--num-cpus', type=int, default=os.environ["SM_NUM_CPUS"]) args = parser.parse_args() number_of_processes = args.num_gpus if args.num_gpus > 0 else args.num_cpus world_size = number_of_processes * len(args.hosts) logger.info('Running \'{}\' backend on {} nodes and {} processes. World size is {}.'.format( args.backend, len(args.hosts), number_of_processes, world_size )) host_rank = args.hosts.index(args.current_host) master_addr = args.hosts[0] master_port = '55555' processes = [] for rank in range(number_of_processes): process_rank = host_rank * number_of_processes + rank p = Process( target=init_processes, args=(args.backend, master_addr, master_port, process_rank, world_size, args.rows, args.columns, args.current_host) ) p.start() processes.append(p) for p in processes: p.join() save('success', args.model_dir)
def _generate_parallel(self, iteration, network, device, num_workers): q, r = divmod(self.remaining_games, num_workers) num_active_workers = Value('i', num_workers) resign_threshold = Value('d', self.resign_mgr.threshold()) evaluator_mgr = BulkEvaluatorManager([network], device, num_workers) output_queue = SimpleQueue() # start the workers workers = [] for worker_id in range(num_workers): num_games = q + 1 if worker_id < r else q evaluator = evaluator_mgr.get_evaluator(worker_id, 0) worker = Process( target=self._worker_job, args=(worker_id, num_games, num_active_workers, resign_threshold, evaluator, output_queue), ) workers.append(worker) worker.start() # start evaluator server server = evaluator_mgr.get_server(num_active_workers) server.start() # collect the examples generated by workers while num_active_workers.value > 0 or not output_queue.empty(): examples, resign_value_history, result = output_queue.get() self.example_pool += examples self.game_length.append(len(examples)) # add the history into resignation manager to update the threshold if resign_value_history is not None: self.resign_mgr.add(resign_value_history, result) resign_threshold.value = self.resign_mgr.threshold() self.remaining_games -= 1 # periodically save the progress if (self.conf.GAMES_PER_ITERATION - self.remaining_games) \ % self.conf.EXAMPLE_POOL_SAVE_FREQUENCY == 0: self.save(iteration) log.info( f'[iter={iteration}] ExamplePool: checkpoint saved, ' f'{self.remaining_games} games remaining' ) for worker in workers: worker.join() server.join()
def _instances(self, file_path: str, manager: Manager, output_queue: Queue) -> Iterator[Instance]: """ A generator that reads instances off the output queue and yields them up until none are left (signified by all ``num_workers`` workers putting their ids into the queue). """ shards = glob.glob(file_path) num_shards = len(shards) # If we want multiple epochs per read, put shards in the queue multiple times. input_queue = manager.Queue(num_shards * self.epochs_per_read + self.num_workers) for _ in range(self.epochs_per_read): random.shuffle(shards) for shard in shards: input_queue.put(shard) # Then put a None per worker to signify no more files. for _ in range(self.num_workers): input_queue.put(None) processes: List[Process] = [] num_finished = 0 for worker_id in range(self.num_workers): process = Process(target=_worker, args=(self.reader, input_queue, output_queue, worker_id)) logger.info(f"starting worker {worker_id}") process.start() processes.append(process) # Keep going as long as not all the workers have finished. while num_finished < self.num_workers: item = output_queue.get() if isinstance(item, int): # Means a worker has finished, so increment the finished count. num_finished += 1 logger.info( f"worker {item} finished ({num_finished}/{self.num_workers})" ) else: # Otherwise it's an ``Instance``, so yield it up. yield item for process in processes: process.join() processes.clear()
def __init__(self, n_workers, actor, args): self._now_episode = Value('i', 0) self.queue = Queue() self.collect_event = Event() self.worker = [] for i in range(n_workers): self.worker.append( Worker(self.queue, self.collect_event, actor, args)) time.sleep(1) self.process = [ Process(target=self.worker[i].run, args=(self._now_episode, )) for i in range(n_workers) ] for p in self.process: p.start() print(f'Start {n_workers} workers.')
def run(args): # setup N, D = args.N, args.D n_examples = args.n_train_examples n_threads = args.n_threads n_examples_per_thread = n_examples // n_threads # create policy network policy_network = alphatsp.util.get_policy_network(args.policy_network) # generate examples print("Generating examples and training...") manager = Manager() train_queue = manager.Queue() shared_dict = manager.dict() shared_dict["success"] = False producers = [] for _ in range(n_threads): producers.append( Process(target=generate_examples, args=(n_examples_per_thread, train_queue, args))) for p in producers: p.start() c = Process(target=train, args=(policy_network, train_queue, shared_dict, args)) c.start() for p in producers: p.join() train_queue.put(None) c.join() status = shared_dict["success"] if not status: print("Experiment failed.") return -1
def _call_with_instances(self, instances: Iterable[Instance], num_epochs: int, shuffle: bool) -> Iterator[TensorDict]: # JoinableQueue needed here as sharing tensors across processes # requires that the creating process not exit prematurely. output_queue = JoinableQueue(self.output_queue_size) input_queue = Queue(self.output_queue_size * self.batch_size) # Start process that populates the queue. self.queuer = Process(target=_queuer, args=(instances, input_queue, self.num_workers, num_epochs)) self.queuer.start() # Start the tensor-dict workers. for i in range(self.num_workers): args = (input_queue, output_queue, self.iterator, shuffle, i) process = Process(target=_create_tensor_dicts_from_queue, args=args) process.start() self.processes.append(process) num_finished = 0 while num_finished < self.num_workers: item = output_queue.get() output_queue.task_done() if isinstance(item, int): num_finished += 1 logger.info(f"worker {item} finished ({num_finished} / {self.num_workers})") else: yield item for process in self.processes: process.join() self.processes.clear() if self.queuer is not None: self.queuer.join() self.queuer = None
def run_in_process_group(world_size, fn, input): assert not dist.is_initialized() processes = [] q = Queue() for rank in range(world_size - 1): p = Process(target=init_process, args=(rank, world_size, fn, input, q)) p.start() processes.append(p) if world_size >= 1: # run 1 process in current unittest process for debug purpose init_process(world_size - 1, world_size, fn, input, q) for p in processes: p.join() return q
def train(self, data_loaders, tb=None, num_updates=5, num_iters=250000): data_queue = Queue() # for notifying when to recieve data data_event = Event() # for notifying this method when to send new data process_event = Event() # so doesn't hang on first iteration process_event.set() num_tasks = len(data_loaders) processes = [] for process_id in range(self.world_size): processes.append( Process(target=self.init_process, args=(process_id, data_queue, data_event, process_event, num_updates, tb if process_id == 0 else None))) processes[-1].start() for num_iter in range(num_iters): print("at the top of iter loop %d" % num_iter) process_event.wait() process_event.clear() tasks = np.random.randint(0, num_tasks, (self.world_size)) for task in tasks: # place holder for sampling data from dataset task_data = next(data_loaders[task]) # print(hey[0].shape) data_queue.put( (task_data[0].numpy()[0], task_data[1].numpy()[0], task_data[2].numpy()[0], task_data[3].numpy()[0])) data_event.set() new_model = self.meta_learners[0].model.original_state_dict for p in processes: p.terminate() p.join()
def test_mem_share(share_memory): p = Process(target=torch_shared_mem_process, args=(share_memory, )) p.start() start = time.time() n = 100 for i in range(n): data = torch.zeros([5, 1280, 720, 3], dtype=torch.float, pin_memory=True) if share_memory: data.share_memory_() q.put(data) else: q.put(data.numpy()) q.put(None) p.join() return time.time() - start
def subprocess_prefetch(generator: Iterable[Union[np.array, Iterable[np.array]]], prefetch_buffer_size: int, )->Iterable[Union[np.array, Iterable[np.array]]]: """ Wraps a generator to prefect batches in a separate subprocess. It can be used in a `with` block (which grants proper resource cleanup) or directly as a normal generator. It relies on the ability of torch.multiprocessing to load Tensors in shared memory; this way, the subprocess loads the numpy array from disk, creates a torch Tensor from it and then sends it through a Queue to the main process, which consumes it. :param generator: Generator to wrap. :param prefetch_buffer_size: Size of the prefetch buffer. :return: Wrapped generator. """ batch_queue = Queue(prefetch_buffer_size) control_queue = Queue() Process(target=_enqueue_loader_output, args=(batch_queue, control_queue, generator)).start() control_queue.put(True) return _BatchIterator(batch_queue, control_queue)
def __iter__(self): print('Starting processes') random.seed(0) random.shuffle(self.filepaths) filepaths = deque() for path in self.filepaths: filepaths.append(path) self.buffr_processes = [] args = (self.filepaths, self.buffer, self.partial) for i in range(10): process = Process(target=fill_buffer, args=args) process.daemon = True process.start() self.buffr_processes.append(process) args = (self.buffer, self.batch_queue, self.batch_size) self.batch_process = Process(target=fill_batch, args=args) self.batch_process.daemon = True self.batch_process.start() return self
def trace(size: int, model: nn.Module, data_partitions: DataPartitioner, criterion: Callable, ip: str): processes = [] queue = Queue() for rank in range(size): p = Process(target=init_process, args=(rank, size, model, data_partitions.use(rank), criterion, queue, trace_, ip)) p.start() processes.append(p) trace = queue.get() for p in processes: p.join() return trace
def main(args): args.embedding_output_path = join(args.EMBED_DATA_DIR, args.output_path) print('> START ') print('> parameter ') for k, v in args._get_kwargs(): print('> {} : {}'.format(k, v)) print('') print('> Action ') pool_path = args.data_path GPU_NUM = args.gpu_num embedding_type_name = args.embedding_type_name output_embedding_data = args.embedding_output_path sym_line_list = [ _.strip() for _ in open(pool_path, mode = 'r' , encoding= 'utf-8')] number_of_processes = GPU_NUM if GPU_NUM < len(sym_line_list) else len(sym_line_list) num_of_tasks = len(sym_line_list)//number_of_processes tasks = [sym_line_list[_ * num_of_tasks : (_ + 1) * num_of_tasks] for _ in range(number_of_processes)] tasks_to_accomplish = Manager().Queue() for task in tasks: tasks_to_accomplish.put(task) tasks_finished = Manager().Queue() processes = [] # creating processes for i in range(number_of_processes): p = Process(target = make_embedding, args = (tasks_to_accomplish, tasks_finished, i,embedding_type_name , )) processes.append(p) p.start() store_target = [] for p in processes: p.join() while not tasks_finished.empty(): store_target.append(tasks_finished.get_nowait()) with open(output_embedding_data, 'wb') as f: pickle.dump(store_target, f, pickle.HIGHEST_PROTOCOL) return True
def ansyc_multiple_process_train(args, save_dir): q = Queue(10) metann_params = meta_neuralnet_params(args.search_space) data_lists = [ build_datasets(metann_params['search_space']) for _ in range(args.gpus) ] p_producer = Process(target=data_producers, args=(args, q)) p_consumers = [ Process(target=data_consumers, args=(args, q, save_dir, i, data_lists[i])) for i in range(args.gpus) ] p_producer.start() for p in p_consumers: p.start() p_producer.join() for p in p_consumers: p.join()
def __init__(self, args): self.args = args ######### Load the trained model ######## self.test_agent = TestAgent(self.args, 991) self.test_bucket = self.test_agent.rollout_actor ######### TEST WORKERS ############ self.test_task_pipes = Pipe() self.test_result_pipes = Pipe() self.test_workers = [Process(target=test_rollout_worker, args=(self.args, 0, 'test', self.test_task_pipes[1], self.test_result_pipes[0], None, self.test_bucket, False, RANDOM_BASELINE))] # test_bucket is the neural network for evo for worker in self.test_workers: worker.start() #### STATS AND TRACKING WHICH ROLLOUT IS DONE ###### self.best_score = -999; self.total_frames = 0; self.gen_frames = 0; self.test_trace = []
def experiment_join(experiment_name, your_code, skip_tests=False): """Join an experiment, passing a function for your experiment code""" if not skip_tests: experiment_is_enabled = get_global( f"experiment:{experiment_name}:enabled", type=bool) if not experiment_is_enabled: raise RuntimeError( "It is too early to run this experiment. Please wait for a bit." ) experiment_is_running = get_global( f"experiment:{experiment_name}:running", type=bool) if experiment_is_running: raise RuntimeError( "This experiment is already running. You are too late to join. Sorry." ) # Register ourself redis.zadd(f"experiment:{experiment_name}:participants", {WORKER_ID: 1}) def fn(): print("Waiting for more workers to join. Thanks for your patience ...") # Wait until we receive a rank from the master node rank = receive(f"experiment:{experiment_name}:rank", blocking=True, type=int) world_size = receive(f"experiment:{experiment_name}:world_size", blocking=True, type=int) pytorch_distributed_init(experiment_name, rank, world_size) print( f"Connected to {world_size} workers. You are worker number {rank}. Starting ..." ) start_time = time() your_code() duration = time() - start_time print(f"Execution finished in {duration:.1f}s.") p = Process(target=fn) p.start() p.join()
def density(size: int, model: nn.Module, data_partitions: DataPartitioner, criterion: Callable, ip: str): processes = [] queue = Queue() for rank in range(size): p = Process(target=init_process, args=(rank, size, model, data_partitions.use(rank), criterion, queue, density_, ip)) p.start() processes.append(p) eigen_list_full = queue.get() weight_list_full = queue.get() for p in processes: p.join() return eigen_list_full, weight_list_full
def collectGameDataParallel(network, useNetwork, T, width, height): totalGames = 0 game_images = [] game_targets = [] while totalGames < 80: images = Queue() targets = Queue() ngames = 5 barrier = Barrier(ngames + 1) processes=[Process(target=collectGameData, args=(barrier,play_game, network,\ useNetwork, T, width,height, images, targets)) \ for _ in range(ngames)] for p in processes: p.start() for _ in range(ngames): im = images.get() game_images.append(copy.deepcopy(im)) del im t = targets.get() game_targets.append(copy.deepcopy(t)) del t barrier.wait() for p in processes: p.join() totalGames += ngames flattened_images = list(itertools.chain.from_iterable(game_images)) flattened_targets = list(itertools.chain.from_iterable(game_targets)) batchSize = min(len(flattened_images), 2048) sample_indices = numpy.random.choice(range(len(flattened_images)), batchSize) sample_images = [flattened_images[i] for i in sample_indices] sample_targets = [flattened_targets[i] for i in sample_indices] return sample_images, sample_targets
def test_step(self): def _run(rank, world_size): model = nn.Linear(10, 1) optimizer = DistributedAdamW(model.parameters()) optimizer.zero_grad() loss = model(torch.ones(10).float()) loss.backward() optimizer.step() processes = [] world_size = 4 for rank in range(world_size): p = Process(target=init_processes, args=(rank, world_size, _run)) p.start() processes.append(p) for p in processes: p.join()
def __init__(self, fn, n_processes=4, max_size=200, batchsize=200): ##what needs to happen: def consumer(Q): iterator = get_supervised_batchsize(fn, batchsize=batchsize) #todo while True: try: # get a new message size = Q.qsize() #print(size) if size < max_size: # process the data ret = next(iterator) Q.put(ret) else: time.sleep(2) except ValueError as e: print( "I think you closed the thing while it was running, but that's okay" ) break except Exception as e: print("error!", e) break self.Q = Queue() print("started queue ...") # instantiate workers self.workers = [ Process(target=consumer, args=(self.Q, )) for i in range(n_processes) ] for w in self.workers: w.start() print("started parallel workers, ready to work!")
def create_training_processes(training_jobs, createNewEnvironment, checkpoint_at_iterations, agent_queue, results_path, seed): """ :param training_jobs: Array of TrainingJob namedtuples containing a training-scheme, algorithm and name :param createNewEnvironment: OpenAI gym environment creation function :param checkpoint_at_iterations: array containing the episodes at which the agents will be cloned for benchmarking against one another :param agent_queue: queue shared among processes to submit agents that will be benchmarked :returns: array of process handlers, needed to join processes at the end of experiment computation """ episodic_reward_directory = f'{results_path}/episodic_rewards' if not os.path.exists(episodic_reward_directory): os.mkdir(episodic_reward_directory) logger = logging.getLogger('CreateTrainingProcesses') logger.setLevel(logging.DEBUG) logger.addHandler( logging.handlers.SocketHandler( host='localhost', port=logging.handlers.DEFAULT_TCP_LOGGING_PORT)) logger.info('Training {} jobs: [{}]. '.format( len(training_jobs), ', '.join(map(lambda job: job.name, training_jobs)))) menagerie_path = f'{results_path}/menageries' if not os.path.exists(menagerie_path): os.mkdir(menagerie_path) ps = [] for job in training_jobs: p = Process(target=training_process, args=(createNewEnvironment(), job.agent, job.training_scheme, checkpoint_at_iterations, agent_queue, job.name, results_path, seed)) ps.append(p) logger.info("All training jobs submitted") return ps
def run_parallel_episodes(self, max_episodes, max_steps): for episode in range(max_episodes): print("Episode : {}".format(episode + 1)) agent_list = [] for i in range(self.num_agents): agent_list.append(np.random.randint(C.MIN_NODES, C.MAX_NODES)) arr = Array('i', agent_list) m = Manager() printlock = m.Lock() synchlock = m.Lock() all_processes = [ Process(target=self.sample_run, args=(30, max_steps, printlock, synchlock, arr, episode, j)) for j in range(self.num_agents) ] for p in all_processes: p.start() for p in all_processes: p.join() for p in all_processes: p.terminate()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--ip', type=str, default='192.168.0.12') parser.add_argument('--port', type=str, default='20000') parser.add_argument('--rank', '-r', type=int) parser.add_argument('--world-size', '-s', type=int) args = parser.parse_args() print(args) # initialize(args.rank, args.world_size, args.ip, args.port) size = 2 processes = [] for i in range(size): p = Process(target=initialize, args=(i, size, args.ip, args.port)) p.start() processes.append(p) for p in processes: p.join()
def async_macro_model_train(model_data, gpus, save_dir, dataset='cifar10'): q = Queue(10) manager = multiprocessing.Manager() total_data_dict = manager.dict() p_producer = Process(target=model_producer, args=(model_data, q, gpus)) time.sleep(3) p_consumers = [ Process(target=model_consumer, args=(q, i, save_dir, total_data_dict, model_data, dataset)) for i in range(gpus) ] p_producer.start() for p in p_consumers: p.start() p_producer.join() for p in p_consumers: p.join() data_dict = {} for k, v in total_data_dict.items(): data_dict[v[2]] = (100 - v[0], 100 - v[1]) return data_dict
def run_in_process_group(world_size, fn, input): assert not dist.is_initialized() processes = [] q = Queue() port = get_free_tcp_port() log.info(f"using tcp port: {port}") backend = "gloo" for rank in range(world_size - 1): p = Process( target=init_process, args=(rank, world_size, fn, input, q, backend, port) ) p.start() processes.append(p) if world_size >= 1: # run 1 process in current unittest process for debug purpose init_process(world_size - 1, world_size, fn, input, q, backend, port) for p in processes: p.join() return q
def multi_process(): # 定义共享变量,在进程之间共享 shared_num=mp.Value('i', 0) scores=mp.Queue() processes = [] results=[] for i in range(4): p = Process(target=f,args=(shared_num,scores)) p.start() processes.append(p) while True: r = scores.get() if r is not None: results.append(r) print("!!! ", results) else: break for p in processes: p.join() print("EDN !!! ",results)
def main(args): load_config_json(args) check_and_update_generation_args(args) adjust_multilingual_eval(args) set_seed(args) args.tasks = list(get_tasks(args.task_names, args).values()) logger.info(f'Arguments:\n{pformat(vars(args))}') logger.info(f'Loading from {args.best_checkpoint}') devices = init_devices(args) if args.devices is not None: devices = [devices[i] for i in args.devices] if len(devices) > 1: # Independent multi-GPU generation all_processes = [] all_data_folders = split_folder_on_disk(args.data, len(devices)) for device_id in range(len(devices)): copy_args = copy.copy(args) copy_args.data = all_data_folders[device_id] copy_args.eval_dir = get_part_path(args.eval_dir, device_id) p = Process(target=run, args=(copy_args, devices[device_id])) all_processes.append(p) p.start() for p in all_processes: p.join() for folder in all_data_folders: shutil.rmtree(folder) combine_folders_on_disk(args.eval_dir, len(devices), line_group_size=1, delete=True) else: run(args, devices[0])