def main(opts): logger = logging.getLogger(__name__) roidb = combined_roidb_for_training(cfg.TRAIN.DATASETS, cfg.TRAIN.PROPOSAL_FILES) logger.info('{:d} roidb entries'.format(len(roidb))) roi_data_loader = RoIDataLoader( roidb, num_loaders=opts.num_loaders, minibatch_queue_size=opts.minibatch_queue_size, blobs_queue_capacity=opts.blobs_queue_capacity) blob_names = roi_data_loader.get_output_names() net = core.Net('dequeue_net') net.type = 'dag' all_blobs = [] for gpu_id in range(cfg.NUM_GPUS): with core.NameScope('gpu_{}'.format(gpu_id)): with core.DeviceScope(muji.OnGPU(gpu_id)): for blob_name in blob_names: blob = core.ScopedName(blob_name) all_blobs.append(blob) workspace.CreateBlob(blob) logger.info('Creating blob: {}'.format(blob)) net.DequeueBlobs(roi_data_loader._blobs_queue_name, blob_names) logger.info("Protobuf:\n" + str(net.Proto())) if opts.profiler: import cProfile cProfile.runctx('loader_loop(roi_data_loader)', globals(), locals(), sort='cumulative') else: loader_loop(roi_data_loader) roi_data_loader.register_sigint_handler() roi_data_loader.start(prefill=True) total_time = 0 for i in range(opts.num_batches): start_t = time.time() for _ in range(opts.x_factor): workspace.RunNetOnce(net) total_time += (time.time() - start_t) / opts.x_factor logger.info( '{:d}/{:d}: Averge dequeue time: {:.3f}s [{:d}/{:d}]'.format( i + 1, opts.num_batches, total_time / (i + 1), roi_data_loader._minibatch_queue.qsize(), opts.minibatch_queue_size)) # Sleep to simulate the time taken by running a little network time.sleep(opts.sleep_time) # To inspect: # blobs = workspace.FetchBlobs(all_blobs) # from IPython import embed; embed() logger.info('Shutting down data loader...') roi_data_loader.shutdown()
def add_training_inputs(model, roidb=None): """Create network input ops and blobs used for training. To be called *after* model_builder.create(). """ # Implementation notes: # Typically, one would create the input ops and then the rest of the net. # However, creating the input ops depends on loading the dataset, which # can take a few minutes for COCO. # We prefer to avoid waiting so debugging can fail fast. # Thus, we create the net *without input ops* prior to loading the # dataset, and then add the input ops after loading the dataset. # Since we defer input op creation, we need to do a little bit of surgery # to place the input ops at the start of the network op list. assert model.train, 'Training inputs can only be added to a trainable model' if roidb is not None: # To make debugging easier you can set cfg.DATA_LOADER.NUM_THREADS = 1 model.roi_data_loader = RoIDataLoader( roidb, num_loaders=cfg.DATA_LOADER.NUM_THREADS ) orig_num_op = len(model.net._net.op) blob_names = roi_data.minibatch.get_minibatch_blob_names( is_training=True ) for gpu_id in range(cfg.NUM_GPUS): with c2_utils.NamedCudaScope(gpu_id): for blob_name in blob_names: workspace.CreateBlob(core.ScopedName(blob_name)) model.net.DequeueBlobs( model.roi_data_loader._blobs_queue_name, blob_names ) # A little op surgery to move input ops to the start of the net diff = len(model.net._net.op) - orig_num_op new_op = model.net._net.op[-diff:] + model.net._net.op[:-diff] del model.net._net.op[:] model.net._net.op.extend(new_op)
def add_inputs(model, roidb=None): """Add network input ops. To be called *after* model_bulder.create().""" # Implementation notes: # Typically, one would create the input ops and then the rest of the net. # However, creating the input ops depends on loading the dataset, which # can take a few minutes for COCO. # We prefer to avoid waiting so debugging can fail fast. # Thus, we create the net *without input ops* prior to loading the # dataset, and then add the input ops after loading the dataset. # Since we defer input op creation, we need to do a little bit of surgery # to place the input ops at the start of the network op list. if roidb is not None: # Make debugging easier when NUM_GPUS is 1 by only using one worker # thread for loading mini-batches num_workers = 1 if cfg.NUM_GPUS == 1 else cfg.NUM_WORKERS model.roi_data_loader = RoIDataLoader( roidb, num_workers=num_workers, num_enqueuers=1, minibatch_queue_size=cfg.TRAIN.MINIBATCH_QUEUE_SIZE) orig_num_op = len(model.net._net.op) for gpu_id in range(cfg.NUM_GPUS): with core.NameScope('gpu_{}'.format(gpu_id)): with core.DeviceScope(muji.OnGPU(gpu_id)): if model.train: add_train_inputs(model) else: add_test_inputs(model) # A little op surgery to move input ops to the start of the net diff = len(model.net._net.op) - orig_num_op new_op = model.net._net.op[-diff:] + model.net._net.op[:-diff] del model.net._net.op[:] model.net._net.op.extend(new_op)
def main(opts): logger = logging.getLogger(__name__) roidb = combined_roidb_for_training( cfg.TRAIN.DATASETS, cfg.TRAIN.PROPOSAL_FILES) logger.info('{:d} roidb entries'.format(len(roidb))) roi_data_loader = RoIDataLoader( roidb, num_loaders=opts.num_loaders, minibatch_queue_size=opts.minibatch_queue_size, blobs_queue_capacity=opts.blobs_queue_capacity) blob_names = roi_data_loader.get_output_names() net = core.Net('dequeue_net') net.type = 'dag' all_blobs = [] for gpu_id in range(cfg.NUM_GPUS): with core.NameScope('gpu_{}'.format(gpu_id)): with core.DeviceScope(muji.OnGPU(gpu_id)): for blob_name in blob_names: blob = core.ScopedName(blob_name) all_blobs.append(blob) workspace.CreateBlob(blob) logger.info('Creating blob: {}'.format(blob)) net.DequeueBlobs( roi_data_loader._blobs_queue_name, blob_names) logger.info("Protobuf:\n" + str(net.Proto())) if opts.profiler: import cProfile cProfile.runctx( 'loader_loop(roi_data_loader)', globals(), locals(), sort='cumulative') else: loader_loop(roi_data_loader) roi_data_loader.register_sigint_handler() roi_data_loader.start(prefill=True) total_time = 0 for i in range(opts.num_batches): start_t = time.time() for _ in range(opts.x_factor): workspace.RunNetOnce(net) total_time += (time.time() - start_t) / opts.x_factor logger.info('{:d}/{:d}: Averge dequeue time: {:.3f}s [{:d}/{:d}]'. format(i + 1, opts.num_batches, total_time / (i + 1), roi_data_loader._minibatch_queue.qsize(), opts.minibatch_queue_size)) # Sleep to simulate the time taken by running a little network time.sleep(opts.sleep_time) # To inspect: # blobs = workspace.FetchBlobs(all_blobs) # from IPython import embed; embed() logger.info('Shutting down data loader (EnqueueBlob errors are ok)...') roi_data_loader.shutdown()
def create_loader_and_network(sample_data, name): roidb = get_roidb_sample_data(sample_data) loader = RoIDataLoader(roidb) net = get_net(loader, 'dequeue_net_train') loader.register_sigint_handler() loader.start(prefill=False) return loader, net