def main(): # construct the argument parser and parse the arguments ap = argparse.ArgumentParser() ap.add_argument("-m", "--embedding-model", default="net.pth", help="path to the deep learning face embedding model") ap.add_argument("-r", "--recognizer", default="output/recognizer.pickle", help="path to model trained to recognize faces") ap.add_argument("-l", "--le", default="output/le.pickle", help="path to label encoder") ap.add_argument("-c", "--confidence", type=float, default=0.45, help="minimum probability to filter weak detections") ap.add_argument("-d", "--detector", default="face_detection_model", help="path to OpenCV's deep learning face detector") args = vars(ap.parse_args()) fq = Queue() pq = Queue() frame_length = 1 / 45.0 pf = Process(target=get_frames, args=(fq, )) pf.start() dpf = Process(target=display_processed_frames, args=(pq, frame_length)) dpf.start() process_frames(fq, pq, args)
class SC2Environment(environment.Environment): def __init__(self, env_args): super(SC2Environment, self).__init__() env = partial(make_sc2env, **env_args) self.conn, child_conn = Pipe() self.proc = Process(target=worker, args=(child_conn, CloudpickleWrapper(env))) self.proc.start() self.reset() @staticmethod def get_action_size(): return len(FUNCTIONS) def reset(self): self.conn.send([COMMAND_RESET, None]) return [self.conn.recv()] def close(self): self.conn.send([COMMAND_TERMINATE, None]) self.conn.close() self.proc.join() print("SC2 environment closed") def step(self, actions): self.conn.send([COMMAND_STEP, actions]) obs = self.conn.recv() return [obs], obs.reward, obs.last()
def main(args): assert args.path is not None, '--path required for generation!' assert not args.sampling or args.nbest == args.beam, \ '--sampling requires --nbest to be equal to --beam' assert args.replace_unk is None or args.dataset_impl == 'raw', \ '--replace-unk requires a raw text dataset (--dataset-impl=raw)' assert args.results_path is None, 'We do not support setting results_path!' if args.results_path is not None: os.makedirs(args.results_path, exist_ok=True) output_path = os.path.join(args.results_path, 'generate-{}.txt'.format(args.gen_subset)) with open(output_path, 'w', buffering=1) as h: return _main(args, h) else: if args.ngpus == 1: return _main(args, sys.stdout) else: from torch.multiprocessing import Process processes = [] world_size = args.ngpus backend = args.distributed_backend master_addr = args.distributed_master_addr master_port = args.distributed_master_port for rank in range(args.ngpus): p = Process(target=_main, args=(args, sys.stdout, rank, world_size, backend, master_addr, master_port)) p.start() processes.append(p) for p in processes: p.join()
def init_processes(f, size): processes=[] for rank in range(size): p = Process(target=init_process, args=(rank, size, f)) p.start() processes.append(p) return lambda: [p.join() for p in processes]
def _compare_parallel(self, network, opponent_network, device, num_workers): q, r = divmod(self.conf.GAMES_PER_COMPARISON, num_workers) num_active_workers = Value('i', num_workers) evaluator_mgr = BulkEvaluatorManager( [network, opponent_network], device, num_workers) score = Value('i', 0) workers = [] s = 0 for worker_id in range(num_workers): num_games = q + 1 if worker_id < r else q evaluator = evaluator_mgr.get_evaluator(worker_id, 0) opponent_evaluator = evaluator_mgr.get_evaluator(worker_id, 1) color = BLACK if s % 2 == 0 else WHITE s += num_games worker = Process( target=self._worker_job, args=(num_games, num_active_workers, evaluator, opponent_evaluator, color, score), ) workers.append(worker) worker.start() # start evaluator server server = evaluator_mgr.get_server(num_active_workers) server.start() for worker in workers: worker.join() server.join() return score.value / self.conf.GAMES_PER_COMPARISON
def run_train(self, args): print("training...") model = self sim = Simulator(model) games = [] for i in range(1): games.append( args.instance_class(args.vizdoom_config, args.wad_path, args.skiprate, actions=args.action_set, id=i)) for iter in range(100): print("iteration: ", iter) # # generate data # processes = [] for game in games: process = Process(target=self.generate_data, args=(game, sim, args)) process.start() processes.append(process) for process in processes: process.join() # # train model with new data # self.train_model(model)
def _call_with_qiterable(self, qiterable: QIterable, num_epochs: int, shuffle: bool) -> Iterator[TensorDict]: # JoinableQueue needed here as sharing tensors across processes # requires that the creating tensor not exit prematurely. output_queue = JoinableQueue(self.output_queue_size) for _ in range(num_epochs): qiterable.start() # Start the tensor-dict workers. for i in range(self.num_workers): args = (qiterable, output_queue, self.iterator, shuffle, i) process = Process(target=_create_tensor_dicts_from_qiterable, args=args) process.start() self.processes.append(process) num_finished = 0 while num_finished < self.num_workers: item = output_queue.get() output_queue.task_done() if isinstance(item, int): num_finished += 1 logger.info(f"worker {item} finished ({num_finished} / {self.num_workers})") else: yield item for process in self.processes: process.join() self.processes.clear() qiterable.join()
def init_jobs(queue, batch_size,num_features): # Process를 여러개 만들수도 있다. Peocess list로 task = Process(target=prefetch_data, args=(queue, batch_size,num_features)) task.daemon = True task.start() return task
def main(args): args.device = torch.device( "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") args.n_gpu = torch.cuda.device_count() set_seed(args) args.model_type = args.model_type.lower() if args.n_gpu > 1: # Independent multi-GPU evaluation all_processes = [] all_input_files = split_file_on_disk(args.input_file, args.n_gpu) for gpu_idx in range(args.n_gpu): copy_args = copy.copy(args) if torch.cuda.is_available() and not args.no_cuda: copy_args.device = torch.device("cuda:" + str(gpu_idx)) copy_args.n_gpu = 1 copy_args.input_file = all_input_files[gpu_idx] copy_args.output_file = get_file_part_path(args.output_file, gpu_idx) p = Process(target=run_generation, args=(copy_args, )) all_processes.append(p) p.start() for p in all_processes: p.join() combine_files_on_disk(args.output_file, args.n_gpu) else: run_generation(args)
def __config(self) -> List[Process]: logger.debug( f"Configuring {self.num_workers} local workers /" f" {self.world_size} world size on node #{self.node_id}...") self.__config_master_protocol() self.__build_rank_range() self.local_workers = {} for rank in self.rank_range: process = Process( target=process_exec, args=( self.node_id, rank, self.world_size, self.dataset, self.neural_network, self.training, self.num_epochs, self.backend, self.verbose, ), ) self.local_workers[rank] = process logger.debug(f"Starting worker {rank}/{self.world_size-1} process" f" on node #{self.node_id}...") process.start() logger.debug( f"Worker {rank}/{self.world_size-1} successfully started" f" on node #{self.node_id}...") return self.local_workers
def __init__(self, game_factory: GameExecutorFactory, network: nn.Module, device: torch.device, processes: int, batches_ahead: int, batch_size: int, states_on_device: bool): self._states_on_device = states_on_device self._device = device self._experience_queue = Queue(maxsize=processes + 1) block_size = max(1, batches_ahead - processes) self.block_buffer = [] print('* starting %d workers (batch size: %d, block size: %d)' % (processes, batch_size, block_size)) self._processes = [] self._request_queues = [] for i in range(processes): request_queue = Queue(maxsize=10) # Transfer to GPU in the other process does not work.. it does not throw an error, but training does not converge p = Process(target=_run_game, args=( i, game_factory, network, device, request_queue, self._experience_queue, batch_size, block_size, False, )) p.start() self._request_queues.append(request_queue) self._processes.append(p)
def run(self, nepoch, batchsize=None, loss='variance', ndist=1): if ndist == 1: self.distributed_training = False self._worker(nepoch, batchsize, loss) else: self.distributed_training = True processes = [] manager = Manager() obs_data = manager.list() for rank in range(ndist): p = Process(target=self.init_process, args=(obs_data, rank, ndist, nepoch, batchsize, loss)) p.start() processes.append(p) for p in processes: p.join() self.obs_dict = obs_data
def run_in_process_group(world_size, filename, fn, inputs): if torch.distributed.is_initialized(): torch.distributed.destroy_process_group() processes = [] q = Queue() wait_event = Event() # run the remaining processes # for rank in range(world_size - 1): for rank in range(world_size): p = Process( target=init_and_run_process, args=(rank, world_size, filename, fn, inputs[rank], q, wait_event), ) p.start() processes.append(p) # fetch the results from the queue before joining, the background processes # need to be alive if the queue contains tensors. See # https://discuss.pytorch.org/t/using-torch-tensor-over-multiprocessing-queue-process-fails/2847/3 # noqa: B950 results = [] for _ in range(len(processes)): results.append(q.get()) wait_event.set() for p in processes: p.join() return results
def __init__(self, config: ParamDict, environment: Environment, policy: Policy, filter_op: Filter): threads, gpu = config.require("threads", "gpu") threads_gpu = config["gpu threads"] if "gpu threads" in config else 2 super(Agent_async, self).__init__(config, environment, policy, filter_op) # sync signal, -1: terminate, 0: normal running, >0 restart and waiting for parameter update self._sync_signal = Value('i', 0) # environment sub-process list self._environment_proc = [] # policy sub-process list self._policy_proc = [] # used for synchronize policy parameters self._param_pipe = None self._policy_lock = Lock() # used for synchronize roll-out commands self._control_pipe = None self._environment_lock = Lock() step_pipe = [] cmd_pipe_child, cmd_pipe_parent = Pipe(duplex=True) param_pipe_child, param_pipe_parent = Pipe(duplex=False) self._control_pipe = cmd_pipe_parent self._param_pipe = param_pipe_parent for i_envs in range(threads): child_name = f"environment_{i_envs}" step_pipe_pi, step_pipe_env = Pipe(duplex=True) step_lock = Lock() worker_cfg = ParamDict({ "seed": self.seed + 1024 + i_envs, "gpu": gpu }) child = Process(target=Agent_async._environment_worker, name=child_name, args=(worker_cfg, cmd_pipe_child, step_pipe_env, self._environment_lock, step_lock, self._sync_signal, deepcopy(environment), deepcopy(filter_op))) self._environment_proc.append(child) step_pipe.append((step_pipe_pi, step_lock)) child.start() for i_policies in range(threads_gpu): child_name = f"policy_{i_policies}" worker_cfg = ParamDict({ "seed": self.seed + 2048 + i_policies, "gpu": gpu }) child = Process(target=Agent_async._policy_worker, name=child_name, args=(worker_cfg, param_pipe_child, step_pipe, self._policy_lock, self._sync_signal, deepcopy(policy))) self._policy_proc.append(child) child.start() sleep(5)
def main(): print('Starting') parser = argparse.ArgumentParser() # Configurable hyperparameters parser.add_argument('--rows', type=int, default=1, help='Number of rows in the tensor.') parser.add_argument('--columns', type=int, default=1, help='Number of columns in the tensor.') parser.add_argument('--backend', type=str, default=None, help='backend for distributed operations.') # Container environment parser.add_argument('--hosts', type=list, default=json.loads(os.environ["SM_HOSTS"])) parser.add_argument('--current-host', type=str, default=os.environ["SM_CURRENT_HOST"]) parser.add_argument('--model-dir', type=str, default=os.environ["SM_MODEL_DIR"]) parser.add_argument('--num-gpus', type=int, default=os.environ["SM_NUM_GPUS"]) parser.add_argument('--num-cpus', type=int, default=os.environ["SM_NUM_CPUS"]) args = parser.parse_args() number_of_processes = args.num_gpus if args.num_gpus > 0 else args.num_cpus world_size = number_of_processes * len(args.hosts) logger.info( 'Running \'{}\' backend on {} nodes and {} processes. World size is {}.' .format(args.backend, len(args.hosts), number_of_processes, world_size)) host_rank = args.hosts.index(args.current_host) master_addr = args.hosts[0] master_port = '55555' processes = [] for rank in range(number_of_processes): process_rank = host_rank * number_of_processes + rank p = Process(target=init_processes, args=(args.backend, master_addr, master_port, process_rank, world_size, args.rows, args.columns, args.current_host, args.num_gpus)) p.start() processes.append(p) for p in processes: p.join() save('success', args.model_dir)
def run(self): # mp.set_start_method('spawn', force=True) with Manager() as manager: d = manager.dict() self.d = d d['train_progress'] = [] d['best_epoch'] = None d['best_epoch_summary'] = None d['model'] = None d['labels'] = None pqueue = mp.Queue() out_pqueue = mp.Queue() model = self.trainer_kwargs['model'] # model = copy.deepcopy(model) model.share_memory() self.trainer_kwargs['model'] = model self.trainer_kwargs['num_workers'] = 0 p = Process(target=main_q, args=(pqueue, out_pqueue, d)) p.daemon = True p.start() # pool.apply_async(main_q, args=(pqueue, out_pqueue, d, )) # pool.apply_async(main_train, args=(d, self.num_epochs, self.trainer_args, self.trainer_kwargs, self.datbaase_items)) # pool.starmap(main_q, [(pqueue, out_pqueue, d),]) pqueue.put(None) pqueue.put(self.num_epochs) pqueue.put(self.trainer_args) pqueue.put(self.trainer_kwargs) pqueue.put(self.database_items) p.join() # pool.close() # pool.join() print('Process results: ', len(d.keys())) # best_epoch = d['best_epoch'] # best_epoch_sumamry = d['best_epoch_summary'] # model = d['model'] # labels = d['labels'] self.d = get_queue_dict(out_pqueue, item_names=[ 'best_epoch', 'best_epoch_summary', 'model', 'labels', ]) best_epoch = self.d['best_epoch'] best_epoch_sumamry = self.d['best_epoch_summary'] model = model.load_state_dict(self.d['model']) labels = self.d['labels'] self.d = { "train_progress": d['train_progress'], } # best_epoch, best_epoch_summary = self.trainer.train(epochs=self.num_epochs) self.complete_func( self.host, { "best_epoch": best_epoch, "best_epoch_summary": best_epoch_sumamry, "model": model, "labels": labels, })
def init_parallel_jobs(cfg, queue, fn, ped_data=None, emp_data=None): tasks = Process(target=prefetch_data, args=(cfg, queue, fn, ped_data, emp_data)) # for task in tasks: # task.daemon = True # task.start() tasks.daemon = True tasks.start() return tasks
def main(args): import torchvision.transforms as transforms from torch.multiprocessing import Process from lib.utils.process_data import load_data, get_word_frequencies from lib.utils.vocabulary import load_vocab from lib.utils.data_loader import get_split_data_set """Loading Data""" train_data, val_data, test_data, image_ids, topic_set = load_data( args.data_dir) data = {'train': train_data, 'val': val_data} transform = { 'train': transforms.Compose([ transforms.Resize(256), transforms.RandomCrop(224), transforms.RandomHorizontalFlip(), transforms.RandomVerticalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]), 'val': transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) } vocabs = load_vocab(args.data_dir, min_occurrences=args.min_occurrences) word_frequencies = get_word_frequencies(train_data, vocabs['word_vocab']) while args.batch_size % args.num_gpus != 0: args.batch_size += 1 split_data = { x: get_split_data_set(data[x], args.batch_size // args.num_gpus, vocabs, args.data_dir, transform[x], args.num_gpus, randomize=True, max_size=args.max_size) for x in ['train', 'val'] } if args.num_gpus > 1: processes = [] for rank in range(args.num_gpus): p = Process(target=init_processes, args=(rank, args.num_gpus, split_data, vocabs, word_frequencies, args, run)) p.start() processes.append(p) for p in processes: p.join() else: run(0, 1, split_data, vocabs, args)
def __init__(self, data, batch_size, num_steps=1, sample_coverage=50, save_dir=None, num_workers=0, log=True): assert data.edge_index is not None assert 'node_norm' not in data assert 'edge_norm' not in data self.N = N = data.num_nodes self.E = data.num_edges self.adj = SparseTensor(row=data.edge_index[0], col=data.edge_index[1], value=data.edge_attr, sparse_sizes=(N, N)) self.data = copy.copy(data) self.data.edge_index = None self.data.edge_attr = None self.batch_size = batch_size self.num_steps = num_steps self.sample_coverage = sample_coverage self.num_workers = num_workers self.log = log self.__count__ = 0 if self.num_workers > 0: self.__sample_queue__ = Queue() self.__sample_workers__ = [] for _ in range(self.num_workers): worker = Process(target=self.__put_sample__, args=(self.__sample_queue__, )) worker.daemon = True worker.start() self.__sample_workers__.append(worker) path = osp.join(save_dir or '', self.__filename__) if save_dir is not None and osp.exists(path): # pragma: no cover self.node_norm, self.edge_norm = torch.load(path) else: self.node_norm, self.edge_norm = self.__compute_norm__() if save_dir is not None: # pragma: no cover torch.save((self.node_norm, self.edge_norm), path) if self.num_workers > 0: self.__data_queue__ = Queue() self.__data_workers__ = [] for _ in range(self.num_workers): worker = Process(target=self.__put_data__, args=(self.__data_queue__, )) worker.daemon = True worker.start() self.__data_workers__.append(worker)
def init_main(scatter_list, sr): size = 10 processes = [] port = 29500 # output = init_processes(port,'127.0.0.1',0,size,run,scatter_list,sr) p = Process(target=init_processes, args=(port, '127.0.0.1', 0, size, run, scatter_list, sr)) # init_processes(port,'127.0.0.1',0,size,run,scatter_list,sr) p.start()
def test_log_buffer(world_size): processes = [] for rank in range(world_size): p = Process(target=init_process, args=(rank, world_size, run)) p.start() processes.append(p) for p in processes: p.join()
def run_local(size): processes = [] for rank in range(size): p = Process(target=init_processes, args=(rank, size, run)) p.start() processes.append(p) for p in processes: p.join()
class Actor: def __init__(self, inputs): self.inputs = inputs self.process = TorchProcess(target=self.act, daemon=True) self.process.start() def act(self): # print(torch.ones((12,23,42)).sum()) torch.multiprocessing.set_sharing_strategy('file_system') args, experiment_name, i, lock, stats_queue, device, \ obs, actions, logprobs, rewards, dones, values = self.inputs # obs = to_numpy(obs_sm, 5) envs = [] # o = np.ones((210, 160, 3)) # print(o.sum()) # print(torch.ones((84,160,3)).sum()) # raise def make_env(gym_id, seed, idx): env = gym.make(gym_id) env = wrap_atari(env) env = gym.wrappers.RecordEpisodeStatistics(env) env = wrap_deepmind( env, clip_rewards=True, frame_stack=True, scale=False, ) env.seed(seed) env.action_space.seed(seed) env.observation_space.seed(seed) return env envs = [make_env(args.gym_id, args.seed+i, i) for i in range(args.num_envs)] envs = np.array(envs, dtype=object) for env_idx, env in enumerate(envs): env.reset() # print('Process %d finished resetting %d/%d envs', env_idx + 1, len(envs)) last_report = last_report_frames = total_env_frames = 0 while True: for env_idx, env in enumerate(envs): # os = [] for step in range(args.num_steps): action = env.action_space.sample() o, r, d, info = env.step(action) if d: o = env.reset() obs[i,env_idx,0,0,step] = np.array(o) num_frames = 1 total_env_frames += num_frames if 'episode' in info.keys(): stats_queue.put(info['episode']['l'])
def run(self, *args, **kwargs): processes = [] for rank, mode in enumerate(self.world): p = Process(target=self.init_process, args=(rank, args, kwargs, mode)) p.start() processes.append(p) for p in processes: p.join()
def train(self, size=2): processes = [] for rank in range(size): p = Process(target=self.init_processes, args=(rank, size, self.run)) p.start() processes.append(p) for p in processes: p.join()
def run(self, *args, **kwargs): processes = [] for rank, (mode, device) in enumerate(self.world): kwargs.update({"mode": mode, "device": device}) p = Process(target=self.init_process, args=(rank, args, kwargs)) p.start() processes.append(p) for p in processes: p.join()
class DataLoaderMultiFiles(object): """DataLoader to iterator over a set of DataSet""" def __init__(self, filepaths, partial, batch_s, buffer_s): self.filepaths = filepaths self.partial = partial self.batch_size = batch_s self.max_len = buffer_s self.buffer = Queue(maxsize=buffer_s) self.batch_queue = Queue(maxsize=10) def __iter__(self): print('Starting processes') random.seed(0) random.shuffle(self.filepaths) filepaths = deque() for path in self.filepaths: filepaths.append(path) self.buffr_processes = [] args = (self.filepaths, self.buffer, self.partial) for i in range(10): process = Process(target=fill_buffer, args=args) process.daemon = True process.start() self.buffr_processes.append(process) args = (self.buffer, self.batch_queue, self.batch_size) self.batch_process = Process(target=fill_batch, args=args) self.batch_process.daemon = True self.batch_process.start() return self def done_files(self): return sum([e.is_alive() for e in self.buffr_processes]) def __next__(self): # print('get batch') # print('buffer_queue: {}, batch_queue: {}'.format(self.buffer.qsize(), self.batch_queue.qsize())) # noqa timeout = 1 if self.done_files() == 0 else 60 try: batch = self.batch_queue.get(timeout=timeout) except Empty: self.kill() raise StopIteration # print('got batch') tmp = LongTensor(batch) # print('computing') return tmp def kill(self): print('Killing processes') self.buffr_process.terminate() self.batch_process.terminate() def __del__(self): self.kill()
def train(Model, model_args): # Run one worker node for each gpu gpus = model_args['gpus'] model_args["distributed"]["world_size"] *= len(gpus) processes = [] for gpu in gpus: p = Process(target=launch_worker_thread, args=(gpu, Model, model_args)) p.start() processes.append(p) for p in processes: p.join()
def main(): size = 4 processes = [] for i in range(size): p = Process(target=init_processes, args=(i, size, run)) p.start() processes.append(p) for p in processes: p.join()
def train_dist(args, myargs): myargs.writer.close() size = args.world_size processes = [] for rank in range(size): p = Process(target=init_processes, args=(rank, size, args, myargs)) p.start() processes.append(p) for p in processes: p.join()
def _instances(self, file_path: str, manager: Manager, output_queue: Queue) -> Iterator[Instance]: """ A generator that reads instances off the output queue and yields them up until none are left (signified by all ``num_workers`` workers putting their ids into the queue). """ shards = glob.glob(file_path) num_shards = len(shards) # If we want multiple epochs per read, put shards in the queue multiple times. input_queue = manager.Queue(num_shards * self.epochs_per_read + self.num_workers) for _ in range(self.epochs_per_read): random.shuffle(shards) for shard in shards: input_queue.put(shard) # Then put a None per worker to signify no more files. for _ in range(self.num_workers): input_queue.put(None) processes: List[Process] = [] num_finished = 0 for worker_id in range(self.num_workers): process = Process(target=_worker, args=(self.reader, input_queue, output_queue, worker_id)) logger.info(f"starting worker {worker_id}") process.start() processes.append(process) # Keep going as long as not all the workers have finished. while num_finished < self.num_workers: item = output_queue.get() if isinstance(item, int): # Means a worker has finished, so increment the finished count. num_finished += 1 logger.info(f"worker {item} finished ({num_finished}/{self.num_workers})") else: # Otherwise it's an ``Instance``, so yield it up. yield item for process in processes: process.join() processes.clear()