def __detector_process(detector_cfg, recivq: Queue, sendqs, timeout, run_semaphore, pause_event): detector = __build_detector_component(detector_cfg) logger = get_logger() logger.info('create ' + str(detector_cfg['type'])) try: while True: if not run_semaphore.value: logger.info('通过信号量停止了detector') break pause_event.wait() kwargs = recivq.get(timeout=timeout) kwargs = detector(**kwargs) # 因为后续可能是backbones也可能是tracker所以使用list来发送 for sendq in sendqs: sendq.put(kwargs, timeout=timeout) except KeyboardInterrupt: logger.info('user stop the detector process') except Empty: logger.info('head不再发送数据detector自动释放') except Full: logger.exception('通向某一条主干或者跟踪器的队列已满') # except Exception as e: # logger.exception(e) finally: logger.info('release the detector source') del detector # 清除探测器对象 del logger torch.cuda.empty_cache() # 清空GPU缓存,防止出现进程STOP占用显存 recivq.cancel_join_thread() for sendq in sendqs: sendq.cancel_join_thread() sendq.close() recivq.close() return
def __backbone_process(backbone_cfg: list, recivq: Queue, sendq: Queue, timeout, run_semaphore, pause_event): # 实例化一个backbone里面所有的组件 backbone_components = [__build_backbone_component(bbcfg) for bbcfg in backbone_cfg] logger = get_logger() logger.info('create backbone') try: while True: if not run_semaphore.value: logger.info('通过信号量停止了backbone') break pause_event.wait() kwargs = recivq.get(timeout=timeout) # 首先由该管道内的第一个组件处理数据 kwargs = backbone_components[0](**kwargs) if len(backbone_components) > 1: # 如果该管道有多个component的话依次将数据交给之后的component处理 for backbone_component in backbone_components[1:]: kwargs = backbone_component(**kwargs) # print('backbone sendq len is {}'.format(sendq.qsize())) if kwargs is not None: for img_info in kwargs['imgs_info']: sendq.put(img_info, timeout=timeout) except KeyboardInterrupt: logger.info('user stop a backbone_process process') except Empty: logger.info('backbone normal stoped') except Full as e: logger.exception(e) logger.warning('通向主进程的队列已满,请检查主进程是否正常取出数据') except Exception as e: logger.exception(e) logger.info('发生不可忽视的错误,因此强制停止整个后台程序运行,请检查log输出定位错误') # import signal # os.killpg(os.getpgid(os.getpid()), signal.SIGKILL) finally: logger.info('release backbone source') del logger recivq.cancel_join_thread() sendq.cancel_join_thread() recivq.close() sendq.close() return
def __tracker_process(tracker_cfg, recivq: Queue, sendqs, timeout, run_semaphore, pause_event): tracker = __build_tracker_component(tracker_cfg) logger = get_logger() logger.info('create ' + str(tracker_cfg['type'])) try: while True: if not run_semaphore.value: logger.info('通过信号量停止了tracker') break pause_event.wait() kwargs = recivq.get(timeout=timeout) imgs, imgs_info = kwargs['imgs'], kwargs['imgs_info'] for index, (img, img_info) in enumerate(zip(imgs, imgs_info)): img_info = tracker(img, img_info) imgs_info[index] = img_info for sendq in sendqs: # print('tracker sendq len is {}'.format(sendq.qsize())) sendq.put({'imgs': imgs, 'imgs_info': imgs_info}, timeout=timeout) except KeyboardInterrupt: logger.info('user stop the detector process') except Empty: logger.info('detector不再发送数据tracker自动释放') except Full: logger.exception('通向某一条主干的队列已满') # except Exception as e: # logger.exception(e) finally: logger.info('release the tracker source') del tracker # 清除探测器对象 del logger torch.cuda.empty_cache() # 清空GPU缓存,防止出现进程STOP占用显存 recivq.cancel_join_thread() for sendq in sendqs: sendq.cancel_join_thread() sendq.close() recivq.close() return
class BigDataset(torch.utils.data.Dataset): """this is used for big dataset that couldn't be fitted in memory USAGE: big_dataset = BigDataset(...) # one epoch while True: try: big_dataset.next_buffer() except StopIteration: break some_iter = Iterator(big_dataset) for data in some_iter: training or something .... """ sort_key = None def __init__(self, files, fields, buffer_size=10000, split_fields_func=lambda x: x.split("\t"), filter_pred=None): """Create a dataset form files Arguments: files: a list of file names fields (List(tuple(str, Field))): The Fields to use in this tuple. The string is a field name, and the Field is the associated field. filter_pred (callable or None): Use only examples for which filter_pred(example) is True, or use all examples if None. Default is None. """ if isinstance(files, str): files = [files] self.files = files self.split_fields_func = split_fields_func self.buffer_size = buffer_size self.filter_pred = filter_pred self.examples = [] self.current_queue = Queue(maxsize=self.buffer_size) self.current_queue.cancel_join_thread() p = multiprocessing.Process(target=_worker, args=(self.current_queue, self.files, self.fields, self.split_fields_func)) p.daemon = True p.start() self.fields = dict(fields) self.is_last_buffer = False # Unpack field tuples for n, f in list(self.fields.items()): if isinstance(n, tuple): self.fields.update(zip(n, f)) del self.fields[n] def __getitem__(self, i): if len(self.examples) == 0: raise ValueError("run next_buffer first") return self.examples[i] def __len__(self): try: return len(self.examples) except TypeError: return 2**32 def __iter__(self): if len(self.examples) == 0: raise ValueError("run next_buffer first") for x in self.examples: yield x def __getattr__(self, attr): if attr in self.fields: for x in self.examples: yield getattr(x, attr) def next_buffer(self): if self.is_last_buffer: raise StopIteration("empty buffer") self.examples = [] for i in range(self.buffer_size): cur_example = self.current_queue.get() if cur_example is None: self.is_last_buffer = True break self.examples.append(cur_example) if self.filter_pred is not None: make_list = isinstance(self.examples, list) examples = filter(self.filter_pred, self.examples) if make_list: self.examples = list(examples) def filter_examples(self, field_names): """Remove unknown words from dataset examples with respect to given field. Arguments: field_names (list(str)): Within example only the parts with field names in field_names will have their unknown words deleted. """ for i, example in enumerate(self.examples): for field_name in field_names: vocab = set(self.fields[field_name].vocab.stoi) text = getattr(example, field_name) example_part = [word for word in text if word in vocab] setattr(example, field_name, example_part) self.examples[i] = example