def log_fn(self, stop_event: Event): try: self._super_create_loggers() self.resposne_queue.put({ k: self.__dict__[k] for k in ["save_dir", "tb_logdir", "is_sweep"] }) while True: try: cmd = self.draw_queue.get(True, 0.1) except EmptyQueue: if stop_event.is_set(): break else: continue self._super_log(*cmd) self.resposne_queue.put(True) except: print("Logger process crashed.") raise finally: print("Logger: syncing") if self.use_wandb: wandb.join() stop_event.set() print("Logger process terminating...")
class TensorEvent: """Basically a tuple of several torch.Tensors and a multiprocessing.Event. The Tensors can be used as "shared tensors" for passing intermediate tensors across processes. The Event should be used to signal that the consumer process has finished reading from the Tensors. When writing values to Tensors, the producer process should first check if Tensors are free, by calling event.wait(). If the Tensors are indeed free, then event.wait() will return at once. If not, then event.wait() will block until the consumer process calls event.set(). Thus, the consumer should make sure that it calls event.set() AFTER the Tensors' contents have been copied to a safe area, such as the consumer's own local tensors. This class also includes an Array object living on shared memory, consisting of integers for indicating the valid region in each tensor. For example, if a process uses only 3 rows of a 4-row tensor, then the corresponding entry in the Array would be set to 3. Later, when values are read from the tensor by another process, that process would first check the Array value and know that it can ignore the final row. """ def __init__(self, shapes, device, dtype=torch.float32): self.tensors = tuple( torch.empty(*shape, dtype=dtype, device=device) for shape in shapes) self.event = Event() self.event.set() self.valid_batch_sizes = Array('i', len(shapes))
def __init__(self, queue: Queue, collect_event: Event, actor_net, args): self._env = ManyUavEnv(1, True) self._queue = queue self._collect_event = collect_event self._actor = actor_net self._args = args self.event = Event()
def create_loggers(self): self._super_create_loggers = super().create_loggers self.stop_event = Event() self.proc = Process(target=self.log_fn, args=(self, self.stop_event)) self.proc.start() atexit.register(self.finish)
def run_in_process_group(world_size, filename, fn, inputs): if torch.distributed.is_initialized(): torch.distributed.destroy_process_group() processes = [] q = Queue() wait_event = Event() # run the remaining processes # for rank in range(world_size - 1): for rank in range(world_size): p = Process( target=init_and_run_process, args=(rank, world_size, filename, fn, inputs[rank], q, wait_event), ) p.start() processes.append(p) # fetch the results from the queue before joining, the background processes # need to be alive if the queue contains tensors. See # https://discuss.pytorch.org/t/using-torch-tensor-over-multiprocessing-queue-process-fails/2847/3 # noqa: B950 results = [] for _ in range(len(processes)): results.append(q.get()) wait_event.set() for p in processes: p.join() return results
def __init__(self, worker_id, args): super().__init__() self.id = worker_id self.args = args # for master use, for worker use self.pipe_master, self.pipe_worker = Pipe() self.exit_event = Event() # determine n_e q, r = divmod(args.n_e, args.n_w) if r: print('Warning: n_e % n_w != 0') if worker_id == args.n_w - 1: self.n_e = n_e = q + r else: self.n_e = n_e = q print('Worker', self.id, '] n_e = %d' % n_e) self.env_start = worker_id * q self.env_slice = slice(self.env_start, self.env_start + n_e) self.env_range = range(self.env_start, self.env_start + n_e) self.envs = None self.start()
def _worker_loop(dataset, job_queue: mp.Queue, result_queue: mp.Queue, interrupt_event: mp.Event): logger = logging.getLogger("worker_loop") logger.debug("Worker started.") while True: logger.debug("Trying to fetch from job_queue.") if interrupt_event.is_set(): logger.debug("Received interrupt signal, breaking.") break try: # This assumes that the job_queue is fully populated before the worker is started. index = job_queue.get_nowait() logger.debug("Fetch successful.") except Empty: logger.debug("Queue empty, setting up poison pill.") index = None if index is None or interrupt_event.is_set(): logger.debug( "Fetched poison pill or received interrupt signal, breaking.") break try: logger.debug("Sampling index {} from dataset.".format(index)) sample = dataset[index] except Exception: logger.debug("Dataset threw an exception.".format(index), exc_info=1) result_queue.put((index, ExceptionWrapper(sys.exc_info()))) else: logger.debug( "Putting sample at index {} in the result queue.".format( index)) result_queue.put((index, sample))
def __init__(self, shapes, device, dtype=torch.float32): self.tensors = tuple( torch.empty(*shape, dtype=dtype, device=device) for shape in shapes) self.event = Event() self.event.set() self.valid_batch_sizes = Array('i', len(shapes))
def __init__(self, port_out, front_sink_addr, verbose=False): super().__init__() self.port = port_out self.exit_flag = Event() self.logger = set_logger(colored('SINK', 'green'), verbose) self.front_sink_addr = front_sink_addr self.is_ready = Event() self.verbose = verbose
def __init__(self, queue: Queue, collect_event: Event, actor_net, args, seed): self._queue = queue self._collect_event = collect_event self._actor = actor_net self._args = args self.event = Event() self.seed = seed
def test_routine_as_process(): r = DummyRoutine() e = Event() r.stop_event = e r.as_process() r.start() e.set() r.runner.join()
class DataLoader: def __init__(self, data_store, epochs=1): """ Start the batchCreator and sampleCreator. Read the memory config file, and create the right number of processes. """ self.ds = data_store # Event to stop batch creator and sample creator self.stop_sc = Event() self.stop_bc = Event() # Start separate processes for sample_creator(s) self.epochs = epochs self.sc = SampleCreator(self.ds, event=self.stop_sc, epochs=self.epochs, sampled=self.ds.points_sampled) self.sc.start() # Start batch_creator(s) self.bc = BatchCreator(self.ds, self.stop_bc) self.bc.start() def get_next_batch(self): """ Get the next batch from the queue """ if self.ds.batch_creator_done.full(): return None # Access batches from data_store.batches and return the batch try: batch = self.ds.batches.get() return batch except Exception as e: print(e) return None def stop_batch_creation(self): # Attempt to gracefully terminate the processes self.stop_bc.set() self.stop_sc.set() time.sleep(1) # Terminate the processes forcefully self.bc.terminate() self.sc.terminate() # Wait for child processes to end self.bc.join() self.sc.join()
def test_routine_no_runner(): r = DummyRoutine(name="dummy") with pytest.raises(NoRunnerException): r.start() e = Event() r.stop_event = e r.as_thread() try: r.start() except NoRunnerException: pytest.fail("NoRunnerException was thrown...") e.set() r.runner.join()
def multiprocess_training_loader(process_number: int, _config, _queue: mp.Queue, _wait_for_exit: mp.Event, _local_file, _fasttext_vocab_cached_mapping, _fasttext_vocab_cached_data): # workflow: we tokenize the data files with the costly spacy before training in a preprocessing step # (and concat the tokens with single whitespaces), so here we only split on the whitepsaces _tokenizer = None if _config["preprocessed_tokenized"] == True: _tokenizer = WordTokenizer(word_splitter=JustSpacesWordSplitter()) if _config["token_embedder_type"] == "embedding": _token_indexers = { "tokens": SingleIdTokenIndexer(lowercase_tokens=True) } _vocab = Vocabulary.from_files(_config["vocab_directory"]) elif _config["token_embedder_type"] == "fasttext": _token_indexers = { "tokens": FastTextNGramIndexer(_config["fasttext_max_subwords"]) } _vocab = FastTextVocab(_fasttext_vocab_cached_mapping, _fasttext_vocab_cached_data, _config["fasttext_max_subwords"]) elif _config["token_embedder_type"] == "elmo": _token_indexers = {"tokens": ELMoTokenCharactersIndexer()} _vocab = None _triple_loader = IrTripleDatasetReader( lazy=True, tokenizer=_tokenizer, token_indexers=_token_indexers, max_doc_length=_config["max_doc_length"], max_query_length=_config["max_query_length"]) _iterator = BucketIterator(batch_size=int(_config["batch_size_train"]), sorting_keys=[("doc_pos_tokens", "num_tokens"), ("doc_neg_tokens", "num_tokens")]) _iterator.index_with(_vocab) for training_batch in _iterator(_triple_loader.read(_local_file), num_epochs=1): _queue.put( training_batch) # this moves the tensors in to shared memory _queue.close() # indicate this local thread is done _wait_for_exit.wait( ) # keep this process alive until all the shared memory is used and not needed anymore
def test_routine_crash_message(caplog): r = DummyCrashingRoutine() e = Event() r.stop_event = e r.as_thread() r.start() time.sleep(0.01) e.set() r.runner.join() logs_text_list = [ record[2] for record in (record_tuple for record_tuple in caplog.record_tuples) ] assert any("The routine has crashed" in log for log in logs_text_list)
class Worker: def __init__(self, queue: Queue, collect_event: Event, actor_net, args, seed): self._queue = queue self._collect_event = collect_event self._actor = actor_net self._args = args self.event = Event() self.seed = seed def run(self, episode): env = ManyUavEnv(self._args.agents, self.seed, self._args.reward_type) state = env.reset() while True: self.event.set() self._collect_event.wait() actions = [] for i in range(self._args.agents): action = self._choose_action_with_exploration(state[i]) actions.append(action) next_state, reward, done, info = env.step( np.array(actions) * self._args.action_bound) transition = [] for i in range(self._args.agents): transition.append( (state[i], actions[i], reward[i], next_state[i], done)) self._queue.put(transition) state = next_state if done: state = env.reset() with episode.get_lock(): episode.value += 1 if self._queue.qsize() >= self._args.update_interval: self._collect_event.clear() def _choose_action_with_exploration(self, state): action = self._choose_action(state) noise = np.random.normal(0, self._args.scale, (2, )) action = np.clip(action + noise, -1, 1) # clip action between [-1, 1] return action def _choose_action(self, state): with torch.no_grad(): state = torch.from_numpy(state).float().to(CHIP) action = self._actor(state) action = action.detach().cpu().numpy() return action
def __init__(self, generator, max_queue_size=5, nb_worker=1): self.generator = generator self.nb_worker = nb_worker self.max_queue_size = max_queue_size self._queue = Queue() self._signal = Event() self._available_cv = Condition() self._full_cv = Condition() args = (generator, self._queue, self._signal, self._available_cv, self._full_cv, self.nb_worker, self.max_queue_size) self.working_process = Process(target=self.generator_process, args=args) self.working_process.daemon = True self.working_process.start()
def __init__(self, component_config, start_component=False): self.name = "" self.ROUTINES_FOLDER_PATH = "pipert/contrib/routines" self.MONITORING_SYSTEMS_FOLDER_PATH = "pipert/contrib/metrics_collectors" self.use_memory = False self.stop_event = Event() self.stop_event.set() self.queues = {} self._routines = {} self.metrics_collector = NullCollector() self.parent_logger = None self.logger = None self.setup_component(component_config) self.metrics_collector.setup() if start_component: self.run_comp()
def __init__(self, n_workers, actor, args): self._now_episode = Value('i', 0) self.queue = Queue() self.collect_event = Event() self.worker = [] for i in range(n_workers): self.worker.append( Worker(self.queue, self.collect_event, actor, args, i)) self.process = [ Process(target=self.worker[i].run, args=(self._now_episode, )) for i in range(n_workers) ] for p in self.process: p.start() print(f'Start {n_workers} workers.')
def _prefetch(in_queue: mp.Queue, out_queue: mp.Queue, batchsize: int, shutdown_event: mp.Event, target_device, waiting_time=5): """Continuously prefetches complete trajectories dropped by the :py:class:`~.TrajectoryStore` for training. As long as shutdown is not set, this method pulls :py:attr:`batchsize` trajectories from :py:attr:`in_queue`, transforms them into batches using :py:meth:`~_to_batch()` and puts them onto the :py:attr:`out_queue`. This usually runs as an asynchronous :py:obj:`multiprocessing.Process`. Parameters ---------- in_queue: :py:obj:`multiprocessing.Queue` A queue that delivers dropped trajectories from :py:class:`~.TrajectoryStore`. out_queue: :py:obj:`multiprocessing.Queue` A queue that delivers batches to :py:meth:`_loop()`. batchsize: `int` The number of trajectories that shall be processed into a batch. shutdown_event: :py:obj:`multiprocessing.Event` An event that breaks this methods internal loop. target_device: :py:obj:`torch.device` The target device of the batch. waiting_time: `float` Time the methods loop sleeps between each iteration. """ while not shutdown_event.is_set(): try: trajectories = [ in_queue.get(timeout=waiting_time) for _ in range(batchsize) ] except queue.Empty: continue batch = Learner._to_batch(trajectories, target_device) # delete Tensors after usage to free memory (see torch multiprocessing) del trajectories try: out_queue.put(batch) except (AssertionError, ValueError): # queue closed continue # delete Tensors after usage to free memory (see torch multiprocessing) del batch try: del trajectories except UnboundLocalError: # already deleted pass
class Worker: def __init__(self, queue: Queue, collect_event: Event, actor_net, args): self._env = ManyUavEnv(1, True) self._queue = queue self._collect_event = collect_event self._actor = actor_net self._args = args self.event = Event() def run(self, episode): state = self._env.reset() while True: self.event.set() self._collect_event.wait() action = self._choose_action_with_exploration(state) next_state, reward, done, info = self._env.step( action * self._args.action_bound) self._queue.put((state, action, reward, next_state, done)) state = next_state if done: state = self._env.reset() with episode.get_lock(): episode.value += 1 if self._queue.qsize() >= self._args.update_interval: self._collect_event.clear() def _choose_action_with_exploration(self, state): action = self._choose_action(state) noise = np.random.normal(0, self._args.scale, (2, )) action = np.clip(action + noise, -1, 1) # clip action between [-1, 1] return action def _choose_action(self, state): with torch.no_grad(): state = torch.from_numpy(state).float() action = self._actor(state) action = action.detach().numpy() return action
def __init__(self, endpoint="tcp://0.0.0.0:4242", name="", *args, **kwargs): """ Args: endpoint: the endpoint the component's zerorpc server will listen in. *args: TBD **kwargs: TBD """ super().__init__() self.name = name self.stop_event = Event() self.endpoint = endpoint self._routines = [] self.zrpc = zerorpc.Server(self) self.zrpc.bind(endpoint)
def __init__(self, fp16=False, mean=(0., 0., 0.), std=(1., 1., 1.), pin_memory=True, pca_jitter=False, **kwargs): super().__init__(**kwargs) print('Using DALI CPU iterator') self.stream = torch.cuda.Stream() self.fp16 = fp16 self.mean = torch.tensor(mean).cuda().view(1, 3, 1, 1) self.std = torch.tensor(std).cuda().view(1, 3, 1, 1) self.pin_memory = pin_memory self.pca_jitter = pca_jitter if self.fp16: self.mean = self.mean.half() self.std = self.std.half() self.proc_next_input = Event() self.done_event = Event() self.output_queue = queue.Queue(maxsize=5) self.preproc_thread = threading.Thread( target=_preproc_worker, kwargs={ 'dali_iterator': self._dali_iterator, 'cuda_stream': self.stream, 'fp16': self.fp16, 'mean': self.mean, 'std': self.std, 'proc_next_input': self.proc_next_input, 'done_event': self.done_event, 'output_queue': self.output_queue, 'pin_memory': self.pin_memory, 'pca_jitter': self.pca_jitter }) self.preproc_thread.daemon = True self.preproc_thread.start() self.proc_next_input.set()
class WorkerManager: def __init__(self, n_workers, actor, args): self._now_episode = Value('i', 0) self.queue = Queue() self.collect_event = Event() self.worker = [] for i in range(n_workers): self.worker.append( Worker(self.queue, self.collect_event, actor, args)) time.sleep(1) self.process = [ Process(target=self.worker[i].run, args=(self._now_episode, )) for i in range(n_workers) ] for p in self.process: p.start() print(f'Start {n_workers} workers.') def collect(self): result = [] self.collect_event.set() while self.collect_event.is_set(): # WAIT FOR DATA COLLECT END pass for w in self.worker: w.event.wait() while not self.queue.empty(): result.append(self.queue.get()) for w in self.worker: w.event.clear() return result def now_episode(self): value = self._now_episode.value return value
def __init__(self, data_store, epochs=1): """ Start the batchCreator and sampleCreator. Read the memory config file, and create the right number of processes. """ self.ds = data_store # Event to stop batch creator and sample creator self.stop_sc = Event() self.stop_bc = Event() # Start separate processes for sample_creator(s) self.epochs = epochs self.sc = SampleCreator(self.ds, event=self.stop_sc, epochs=self.epochs, sampled=self.ds.points_sampled) self.sc.start() # Start batch_creator(s) self.bc = BatchCreator(self.ds, self.stop_bc) self.bc.start()
def test_add_event_handler(): r = DummyRoutine() e = Event() r.stop_event = e r.as_thread() r.add_event_handler(Events.BEFORE_LOGIC, dummy_before_handler) r.add_event_handler(Events.AFTER_LOGIC, dummy_after_handler) @r.on(Events.AFTER_LOGIC) def dummy_handler(_): pass r.start() r.runner.join() assert r.state.dummy == 667
class DaliIteratorCPU(DaliIterator): """ Wrapper class to decode the DALI iterator output & provide iterator that functions the same as torchvision Note that permutation to channels first, converting from 8 bit to float & normalization are all performed on GPU pipelines (Pipeline): DALI pipelines size (int): Number of examples in set fp16 (bool): Use fp16 as output format, f32 otherwise mean (tuple): Image mean value for each channel std (tuple): Image standard deviation value for each channel pin_memory (bool): Transfer input tensor to pinned memory, before moving to GPU """ def __init__(self, fp16=False, mean=(0., 0., 0.), std=(1., 1., 1.), pin_memory=True, pca_jitter=False, **kwargs): super().__init__(**kwargs) print('Using DALI CPU iterator') self.stream = torch.cuda.Stream() self.fp16 = fp16 self.mean = torch.tensor(mean).cuda().view(1, 3, 1, 1) self.std = torch.tensor(std).cuda().view(1, 3, 1, 1) self.pin_memory = pin_memory self.pca_jitter = pca_jitter if self.fp16: self.mean = self.mean.half() self.std = self.std.half() self.proc_next_input = Event() self.done_event = Event() self.output_queue = queue.Queue(maxsize=5) self.preproc_thread = threading.Thread( target=_preproc_worker, kwargs={ 'dali_iterator': self._dali_iterator, 'cuda_stream': self.stream, 'fp16': self.fp16, 'mean': self.mean, 'std': self.std, 'proc_next_input': self.proc_next_input, 'done_event': self.done_event, 'output_queue': self.output_queue, 'pin_memory': self.pin_memory, 'pca_jitter': self.pca_jitter }) self.preproc_thread.daemon = True self.preproc_thread.start() self.proc_next_input.set() def __next__(self): torch.cuda.current_stream().wait_stream(self.stream) data = self.output_queue.get() self.proc_next_input.set() if data is None: raise StopIteration return data def __del__(self): self.done_event.set() self.proc_next_input.set() torch.cuda.current_stream().wait_stream(self.stream) self.preproc_thread.join()
def buildCache(self, limit): # print("Building cache: ", # self.cache_names[self.current_cache_build.value] # ) dataset = MultimodalPatchesDatasetAll( self.dataset_dir, self.dataset_list, rejection_radius_position=self.rejection_radius_position, #self.images_path, list=train_sampled, numpatches=self.numpatches, numneg=self.numneg, pos_thr=self.pos_thr, reject=self.reject, mode=self.mode, rejection_radius=self.rejection_radius, dist_type=self.dist_type, patch_radius=self.patch_radius, use_depth=self.use_depth, use_normals=self.use_normals, use_silhouettes=self.use_silhouettes, color_jitter=self.color_jitter, greyscale=self.greyscale, maxres=self.maxres, scale_jitter=self.scale_jitter, photo_jitter=self.photo_jitter, uniform_negatives=self.uniform_negatives, needles=self.needles, render_only=self.render_only) n_triplets = len(dataset) if limit == -1: limit = n_triplets dataloader = DataLoader( dataset, batch_size=self.batch_size, shuffle=False, pin_memory=False, num_workers=1, # self.num_workers collate_fn=MultimodalPatchesCache.my_collate) qmaxsize = 15 data_queue = JoinableQueue(maxsize=qmaxsize) # cannot load to cuda from background, therefore use cpu device preloader_resume = Event() preloader = Process(target=MultimodalPatchesCache.generateTrainingData, args=(data_queue, dataset, dataloader, self.batch_size, qmaxsize, preloader_resume, True, True)) preloader.do_run_generate = True preloader.start() preloader_resume.set() i_batch = 0 data = data_queue.get() i_batch = data[0] counter = 0 while i_batch != -1: self.cache_builder_resume.wait() build_dataset_dir = os.path.join( self.cache_dir, self.cache_names[self.current_cache_build.value]) batch_fname = os.path.join(build_dataset_dir, 'batch_' + str(counter) + '.pt') # print("ibatch", i_batch, # "___data___", data[3].shape, data[6].shape) anchor = data[1] pos = data[2] neg = data[3] anchor_r = data[4] pos_p = data[5] neg_p = data[6] c1 = data[7] c2 = data[8] cneg = data[9] id = data[10] if not (self.use_depth or self.use_normals): #no need to store image data as float, convert to uint anchor = (anchor * 255.0).to(torch.uint8) pos = (pos * 255.0).to(torch.uint8) neg = (neg * 255.0).to(torch.uint8) anchor_r = (anchor_r * 255.0).to(torch.uint8) pos_p = (pos_p * 255.0).to(torch.uint8) neg_p = (neg_p * 255.0).to(torch.uint8) tosave = { 'anchor': anchor, 'pos': pos, 'neg': neg, 'anchor_r': anchor_r, 'pos_p': pos_p, 'neg_p': neg_p, 'c1': c1, 'c2': c2, 'cneg': cneg, 'id': id } try: torch.save(tosave, batch_fname) torch.load(batch_fname) counter += 1 except Exception as e: print("Could not save ", batch_fname, ", due to:", e, "skipping...", file=sys.stderr) if os.path.isfile(batch_fname): os.remove(batch_fname) data_queue.task_done() if counter >= limit: self.cache_done_lock.acquire() self.cache_done.value = 1 # 1 is True self.cache_done_lock.release() counter = 0 # sleep until calling thread wakes us self.cache_builder_resume.clear() # resume calling thread so that it can work self.wait_for_cache_builder.set() data = data_queue.get() i_batch = data[0] #print("ibatch", i_batch) data_queue.task_done() self.cache_done_lock.acquire() self.cache_done.value = 1 # 1 is True self.all_done.value = 1 print("Cache done ALL") self.cache_done_lock.release() # resume calling thread so that it can work self.wait_for_cache_builder.set() preloader.join() preloader = None data_queue = None
def __init__(self, cache_dir, dataset_dir, dataset_list, cuda, batch_size=500, num_workers=3, renew_frequency=5, rejection_radius_position=0, numpatches=900, numneg=3, pos_thr=50.0, reject=True, mode='train', rejection_radius=3000, dist_type='3D', patch_radius=None, use_depth=False, use_normals=False, use_silhouettes=False, color_jitter=False, greyscale=False, maxres=4096, scale_jitter=False, photo_jitter=False, uniform_negatives=False, needles=0, render_only=False, maxitems=200, cache_once=False): super(MultimodalPatchesCache, self).__init__() self.cache_dir = cache_dir self.dataset_dir = dataset_dir #self.images_path = images_path self.dataset_list = dataset_list self.cuda = cuda self.batch_size = batch_size self.num_workers = num_workers self.renew_frequency = renew_frequency self.rejection_radius_position = rejection_radius_position self.numpatches = numpatches self.numneg = numneg self.pos_thr = pos_thr self.reject = reject self.mode = mode self.rejection_radius = rejection_radius self.dist_type = dist_type self.patch_radius = patch_radius self.use_depth = use_depth self.use_normals = use_normals self.use_silhouettes = use_silhouettes self.color_jitter = color_jitter self.greyscale = greyscale self.maxres = maxres self.scale_jitter = scale_jitter self.photo_jitter = photo_jitter self.uniform_negatives = uniform_negatives self.needles = needles self.render_only = render_only self.cache_done_lock = Lock() self.all_done = Value('B', 0) # 0 is False self.cache_done = Value('B', 0) # 0 is False self.wait_for_cache_builder = Event() # prepare for wait until initial cache is built self.wait_for_cache_builder.clear() self.cache_builder_resume = Event() self.maxitems = maxitems self.cache_once = cache_once if self.mode == 'eval': self.maxitems = -1 self.cache_builder = Process(target=self.buildCache, args=[self.maxitems]) self.current_cache_build = Value('B', 0) # 0th cache self.current_cache_use = Value('B', 1) # 1th cache self.cache_names = ["cache1", "cache2"] # constant rebuild_cache = True if self.mode == 'eval': validation_dir = os.path.join( self.cache_dir, self.cache_names[self.current_cache_build.value]) if os.path.isdir(validation_dir): # we don't need to rebuild validation cache # TODO: check if cache is VALID rebuild_cache = False elif cache_once: build_dataset_dir = os.path.join( self.cache_dir, self.cache_names[self.current_cache_build.value]) if os.path.isdir(build_dataset_dir): # we don't need to rebuild training cache if we are training # on limited subset of the training set rebuild_cache = False if rebuild_cache: # clear the caches if they already exist build_dataset_dir = os.path.join( self.cache_dir, self.cache_names[self.current_cache_build.value]) if os.path.isdir(build_dataset_dir): shutil.rmtree(build_dataset_dir) use_dataset_dir = os.path.join( self.cache_dir, self.cache_names[self.current_cache_use.value]) if os.path.isdir(use_dataset_dir): shutil.rmtree(use_dataset_dir) os.makedirs(build_dataset_dir) self.cache_builder_resume.set() self.cache_builder.start() # wait until initial cache is built # print("before wait to build") # print("wait for cache builder state", # self.wait_for_cache_builder.is_set()) self.wait_for_cache_builder.wait() # print("after wait to build") # we have been resumed if self.mode != 'eval' and (not self.cache_once): # for training, we can set up the cache builder to build # the second cache self.restart() else: # else for validation we don't need second cache # we just need to switch the built cache to the use cache in order # to use it tmp = self.current_cache_build.value self.current_cache_build.value = self.current_cache_use.value self.current_cache_use.value = tmp
class MultimodalPatchesCache(object): def __init__(self, cache_dir, dataset_dir, dataset_list, cuda, batch_size=500, num_workers=3, renew_frequency=5, rejection_radius_position=0, numpatches=900, numneg=3, pos_thr=50.0, reject=True, mode='train', rejection_radius=3000, dist_type='3D', patch_radius=None, use_depth=False, use_normals=False, use_silhouettes=False, color_jitter=False, greyscale=False, maxres=4096, scale_jitter=False, photo_jitter=False, uniform_negatives=False, needles=0, render_only=False, maxitems=200, cache_once=False): super(MultimodalPatchesCache, self).__init__() self.cache_dir = cache_dir self.dataset_dir = dataset_dir #self.images_path = images_path self.dataset_list = dataset_list self.cuda = cuda self.batch_size = batch_size self.num_workers = num_workers self.renew_frequency = renew_frequency self.rejection_radius_position = rejection_radius_position self.numpatches = numpatches self.numneg = numneg self.pos_thr = pos_thr self.reject = reject self.mode = mode self.rejection_radius = rejection_radius self.dist_type = dist_type self.patch_radius = patch_radius self.use_depth = use_depth self.use_normals = use_normals self.use_silhouettes = use_silhouettes self.color_jitter = color_jitter self.greyscale = greyscale self.maxres = maxres self.scale_jitter = scale_jitter self.photo_jitter = photo_jitter self.uniform_negatives = uniform_negatives self.needles = needles self.render_only = render_only self.cache_done_lock = Lock() self.all_done = Value('B', 0) # 0 is False self.cache_done = Value('B', 0) # 0 is False self.wait_for_cache_builder = Event() # prepare for wait until initial cache is built self.wait_for_cache_builder.clear() self.cache_builder_resume = Event() self.maxitems = maxitems self.cache_once = cache_once if self.mode == 'eval': self.maxitems = -1 self.cache_builder = Process(target=self.buildCache, args=[self.maxitems]) self.current_cache_build = Value('B', 0) # 0th cache self.current_cache_use = Value('B', 1) # 1th cache self.cache_names = ["cache1", "cache2"] # constant rebuild_cache = True if self.mode == 'eval': validation_dir = os.path.join( self.cache_dir, self.cache_names[self.current_cache_build.value]) if os.path.isdir(validation_dir): # we don't need to rebuild validation cache # TODO: check if cache is VALID rebuild_cache = False elif cache_once: build_dataset_dir = os.path.join( self.cache_dir, self.cache_names[self.current_cache_build.value]) if os.path.isdir(build_dataset_dir): # we don't need to rebuild training cache if we are training # on limited subset of the training set rebuild_cache = False if rebuild_cache: # clear the caches if they already exist build_dataset_dir = os.path.join( self.cache_dir, self.cache_names[self.current_cache_build.value]) if os.path.isdir(build_dataset_dir): shutil.rmtree(build_dataset_dir) use_dataset_dir = os.path.join( self.cache_dir, self.cache_names[self.current_cache_use.value]) if os.path.isdir(use_dataset_dir): shutil.rmtree(use_dataset_dir) os.makedirs(build_dataset_dir) self.cache_builder_resume.set() self.cache_builder.start() # wait until initial cache is built # print("before wait to build") # print("wait for cache builder state", # self.wait_for_cache_builder.is_set()) self.wait_for_cache_builder.wait() # print("after wait to build") # we have been resumed if self.mode != 'eval' and (not self.cache_once): # for training, we can set up the cache builder to build # the second cache self.restart() else: # else for validation we don't need second cache # we just need to switch the built cache to the use cache in order # to use it tmp = self.current_cache_build.value self.current_cache_build.value = self.current_cache_use.value self.current_cache_use.value = tmp # initialization finished, now this dataset can be used def getCurrentCache(self): # Lock should not be needed - cache_done is not touched # and cache_len is read only for cache in use, which should not # been touched by other threads # self.cache_done_lock.acquire() h5_dataset_filename = os.path.join( self.cache_dir, self.cache_names[self.current_cache_use.value]) # self.cache_done_lock.release() return h5_dataset_filename def restart(self): # print("Restarting - waiting for lock...") self.cache_done_lock.acquire() # print("Restarting cached dataset...") if self.cache_done.value and (not self.cache_once): cache_changed = True tmp_cache_name = self.current_cache_use.value self.current_cache_use.value = self.current_cache_build.value self.current_cache_build.value = tmp_cache_name # clear the old cache if exists build_dataset_dir = os.path.join( self.cache_dir, self.cache_names[self.current_cache_build.value]) if os.path.isdir(build_dataset_dir): shutil.rmtree(build_dataset_dir) os.makedirs(build_dataset_dir) self.cache_done.value = 0 # 0 is False self.cache_builder_resume.set() # print("Switched cache to: ", # self.cache_names[self.current_cache_use.value] # ) else: cache_changed = False # print( # "New cache not ready, continuing with old cache:", # self.cache_names[self.current_cache_use.value] # ) all_done_value = self.all_done.value self.cache_done_lock.release() # returns true if no more items are available to be loaded # this object should be destroyed and new dataset should be created # in order to start over. return cache_changed, all_done_value def buildCache(self, limit): # print("Building cache: ", # self.cache_names[self.current_cache_build.value] # ) dataset = MultimodalPatchesDatasetAll( self.dataset_dir, self.dataset_list, rejection_radius_position=self.rejection_radius_position, #self.images_path, list=train_sampled, numpatches=self.numpatches, numneg=self.numneg, pos_thr=self.pos_thr, reject=self.reject, mode=self.mode, rejection_radius=self.rejection_radius, dist_type=self.dist_type, patch_radius=self.patch_radius, use_depth=self.use_depth, use_normals=self.use_normals, use_silhouettes=self.use_silhouettes, color_jitter=self.color_jitter, greyscale=self.greyscale, maxres=self.maxres, scale_jitter=self.scale_jitter, photo_jitter=self.photo_jitter, uniform_negatives=self.uniform_negatives, needles=self.needles, render_only=self.render_only) n_triplets = len(dataset) if limit == -1: limit = n_triplets dataloader = DataLoader( dataset, batch_size=self.batch_size, shuffle=False, pin_memory=False, num_workers=1, # self.num_workers collate_fn=MultimodalPatchesCache.my_collate) qmaxsize = 15 data_queue = JoinableQueue(maxsize=qmaxsize) # cannot load to cuda from background, therefore use cpu device preloader_resume = Event() preloader = Process(target=MultimodalPatchesCache.generateTrainingData, args=(data_queue, dataset, dataloader, self.batch_size, qmaxsize, preloader_resume, True, True)) preloader.do_run_generate = True preloader.start() preloader_resume.set() i_batch = 0 data = data_queue.get() i_batch = data[0] counter = 0 while i_batch != -1: self.cache_builder_resume.wait() build_dataset_dir = os.path.join( self.cache_dir, self.cache_names[self.current_cache_build.value]) batch_fname = os.path.join(build_dataset_dir, 'batch_' + str(counter) + '.pt') # print("ibatch", i_batch, # "___data___", data[3].shape, data[6].shape) anchor = data[1] pos = data[2] neg = data[3] anchor_r = data[4] pos_p = data[5] neg_p = data[6] c1 = data[7] c2 = data[8] cneg = data[9] id = data[10] if not (self.use_depth or self.use_normals): #no need to store image data as float, convert to uint anchor = (anchor * 255.0).to(torch.uint8) pos = (pos * 255.0).to(torch.uint8) neg = (neg * 255.0).to(torch.uint8) anchor_r = (anchor_r * 255.0).to(torch.uint8) pos_p = (pos_p * 255.0).to(torch.uint8) neg_p = (neg_p * 255.0).to(torch.uint8) tosave = { 'anchor': anchor, 'pos': pos, 'neg': neg, 'anchor_r': anchor_r, 'pos_p': pos_p, 'neg_p': neg_p, 'c1': c1, 'c2': c2, 'cneg': cneg, 'id': id } try: torch.save(tosave, batch_fname) torch.load(batch_fname) counter += 1 except Exception as e: print("Could not save ", batch_fname, ", due to:", e, "skipping...", file=sys.stderr) if os.path.isfile(batch_fname): os.remove(batch_fname) data_queue.task_done() if counter >= limit: self.cache_done_lock.acquire() self.cache_done.value = 1 # 1 is True self.cache_done_lock.release() counter = 0 # sleep until calling thread wakes us self.cache_builder_resume.clear() # resume calling thread so that it can work self.wait_for_cache_builder.set() data = data_queue.get() i_batch = data[0] #print("ibatch", i_batch) data_queue.task_done() self.cache_done_lock.acquire() self.cache_done.value = 1 # 1 is True self.all_done.value = 1 print("Cache done ALL") self.cache_done_lock.release() # resume calling thread so that it can work self.wait_for_cache_builder.set() preloader.join() preloader = None data_queue = None @staticmethod def loadBatch(sample_batched, mode, device, keep_all=False): if mode == 'eval': coords1 = sample_batched[6] coords2 = sample_batched[7] coords_neg = sample_batched[8] keep = sample_batched[10] item_id = sample_batched[11] else: coords1 = sample_batched[6] coords2 = sample_batched[7] coords_neg = sample_batched[8] keep = sample_batched[9] item_id = sample_batched[10] if keep_all: # requested to return fill batch batchsize = sample_batched[0].shape[0] keep = torch.ones(batchsize).byte() keep = keep.reshape(-1) keep = keep.bool() anchor = sample_batched[0] pos = sample_batched[1] neg = sample_batched[2] # swapped photo to render anchor_r = sample_batched[3] pos_p = sample_batched[4] neg_p = sample_batched[5] anchor = anchor[keep].to(device) pos = pos[keep].to(device) neg = neg[keep].to(device) anchor_r = anchor_r[keep] pos_p = pos_p[keep] neg_p = neg_p[keep] coords1 = coords1[keep] coords2 = coords2[keep] coords_neg = coords_neg[keep] item_id = item_id[keep] return anchor, pos, neg, anchor_r, pos_p, neg_p, coords1, coords2, \ coords_neg, item_id @staticmethod def generateTrainingData(queue, dataset, dataloader, batch_size, qmaxsize, resume, shuffle=True, disable_tqdm=False): local_buffer_a = [] local_buffer_p = [] local_buffer_n = [] local_buffer_ar = [] local_buffer_pp = [] local_buffer_np = [] local_buffer_c1 = [] local_buffer_c2 = [] local_buffer_cneg = [] local_buffer_id = [] nbatches = 10 # cannot load to cuda in batckground process! device = torch.device('cpu') buffer_size = min(qmaxsize * batch_size, nbatches * batch_size) bidx = 0 for i_batch, sample_batched in enumerate(dataloader): # tqdm(dataloader, disable=disable_tqdm) resume.wait() anchor, pos, neg, anchor_r, \ pos_p, neg_p, c1, c2, cneg, id = \ MultimodalPatchesCache.loadBatch( sample_batched, dataset.mode, device ) if anchor.shape[0] == 0: continue local_buffer_a.extend(list(anchor)) # [:current_batches] local_buffer_p.extend(list(pos)) local_buffer_n.extend(list(neg)) local_buffer_ar.extend(list(anchor_r)) local_buffer_pp.extend(list(pos_p)) local_buffer_np.extend(list(neg_p)) local_buffer_c1.extend(list(c1)) local_buffer_c2.extend(list(c2)) local_buffer_cneg.extend(list(cneg)) local_buffer_id.extend(list(id)) if len(local_buffer_a) >= buffer_size: if shuffle: local_buffer_a, local_buffer_p, local_buffer_n, \ local_buffer_ar, local_buffer_pp, local_buffer_np, \ local_buffer_c1, local_buffer_c2, local_buffer_cneg, \ local_buffer_id = sklearn.utils.shuffle( local_buffer_a, local_buffer_p, local_buffer_n, local_buffer_ar, local_buffer_pp, local_buffer_np, local_buffer_c1, local_buffer_c2, local_buffer_cneg, local_buffer_id ) curr_nbatches = int(np.floor(len(local_buffer_a) / batch_size)) for i in range(0, curr_nbatches): queue.put([ bidx, torch.stack(local_buffer_a[:batch_size]), torch.stack(local_buffer_p[:batch_size]), torch.stack(local_buffer_n[:batch_size]), torch.stack(local_buffer_ar[:batch_size]), torch.stack(local_buffer_pp[:batch_size]), torch.stack(local_buffer_np[:batch_size]), torch.stack(local_buffer_c1[:batch_size]), torch.stack(local_buffer_c2[:batch_size]), torch.stack(local_buffer_cneg[:batch_size]), torch.stack(local_buffer_id[:batch_size]) ]) del local_buffer_a[:batch_size] del local_buffer_p[:batch_size] del local_buffer_n[:batch_size] del local_buffer_ar[:batch_size] del local_buffer_pp[:batch_size] del local_buffer_np[:batch_size] del local_buffer_c1[:batch_size] del local_buffer_c2[:batch_size] del local_buffer_cneg[:batch_size] del local_buffer_id[:batch_size] bidx += 1 remaining_batches = len(local_buffer_a) // batch_size for i in range(0, remaining_batches): queue.put([ bidx, torch.stack(local_buffer_a[:batch_size]), torch.stack(local_buffer_p[:batch_size]), torch.stack(local_buffer_n[:batch_size]), torch.stack(local_buffer_ar[:batch_size]), torch.stack(local_buffer_pp[:batch_size]), torch.stack(local_buffer_np[:batch_size]), torch.stack(local_buffer_c1[:batch_size]), torch.stack(local_buffer_c2[:batch_size]), torch.stack(local_buffer_cneg[:batch_size]), torch.stack(local_buffer_id[:batch_size]) ]) del local_buffer_a[:batch_size] del local_buffer_p[:batch_size] del local_buffer_n[:batch_size] del local_buffer_ar[:batch_size] del local_buffer_pp[:batch_size] del local_buffer_np[:batch_size] del local_buffer_c1[:batch_size] del local_buffer_c2[:batch_size] del local_buffer_cneg[:batch_size] del local_buffer_id[:batch_size] ra = torch.randn(batch_size, 3, 64, 64) queue.put([-1, ra, ra, ra]) queue.join() @staticmethod def my_collate(batch): batch = list(filter(lambda x: x is not None, batch)) return default_collate(batch)