def test_restore_checkpoint():
    # Create Model
    model = model_builder.create(cfg.MODEL.TYPE, train=True)
    add_momentum_init_ops(model)
    init_weights(model)
    # Fill input blobs
    roidb = combined_roidb_for_training(
        cfg.TRAIN.DATASETS, cfg.TRAIN.PROPOSAL_FILES
    )
    model_builder.add_training_inputs(model, roidb=roidb)
    workspace.CreateNet(model.net)
    # Bookkeeping for checkpoint creation
    iter_num = 0
    checkpoints = {}
    output_dir = get_output_dir(cfg.TRAIN.DATASETS, training=True)
    chk_file_path = os.path.join(output_dir, 'model_iter{}.pkl'.format(iter_num))
    checkpoints[iter_num] = chk_file_path
    # Save model weights
    nu.save_model_to_weights_file(checkpoints[iter_num], model)
    orig_gpu_0_params, orig_all_params = get_params(model)
    # Change the model weights
    init_weights(model)
    # Reload the weights in the model
    nu.initialize_gpu_from_weights_file(model, chk_file_path, gpu_id=0)
    nu.broadcast_parameters(model)
    shutil.rmtree(cfg.OUTPUT_DIR)
    _, restored_all_params = get_params(model)
    # Check if all params are loaded correctly
    for scoped_name, blob in orig_all_params.items():
        np.testing.assert_array_equal(blob, restored_all_params[scoped_name])
    # Check if broadcast_parameters works
    for scoped_name, blob in restored_all_params.items():
        unscoped_name = c2_utils.UnscopeName(scoped_name)
        np.testing.assert_array_equal(blob, orig_gpu_0_params[unscoped_name])
def main(opts):
    logger = logging.getLogger(__name__)
    roidb = combined_roidb_for_training(
        cfg.TRAIN.DATASETS, cfg.TRAIN.PROPOSAL_FILES)
    logger.info('{:d} roidb entries'.format(len(roidb)))
    roi_data_loader = RoIDataLoader(
        roidb,
        num_loaders=cfg.DATA_LOADER.NUM_THREADS,
        minibatch_queue_size=cfg.DATA_LOADER.MINIBATCH_QUEUE_SIZE,
        blobs_queue_capacity=cfg.DATA_LOADER.BLOBS_QUEUE_CAPACITY
    )
    blob_names = roi_data_loader.get_output_names()

    net = core.Net('dequeue_net')
    net.type = 'dag'
    all_blobs = []
    for gpu_id in range(cfg.NUM_GPUS):
        with core.NameScope('gpu_{}'.format(gpu_id)):
            with core.DeviceScope(muji.OnGPU(gpu_id)):
                for blob_name in blob_names:
                    blob = core.ScopedName(blob_name)
                    all_blobs.append(blob)
                    workspace.CreateBlob(blob)
                    logger.info('Creating blob: {}'.format(blob))
                net.DequeueBlobs(
                    roi_data_loader._blobs_queue_name, blob_names)
    logger.info("Protobuf:\n" + str(net.Proto()))

    if opts.profiler:
        import cProfile
        cProfile.runctx(
            'loader_loop(roi_data_loader)', globals(), locals(),
            sort='cumulative')
    else:
        loader_loop(roi_data_loader)

    roi_data_loader.register_sigint_handler()
    roi_data_loader.start(prefill=True)
    total_time = 0
    for i in range(opts.num_batches):
        start_t = time.time()
        for _ in range(opts.x_factor):
            workspace.RunNetOnce(net)
        total_time += (time.time() - start_t) / opts.x_factor
        logger.info(
            '{:d}/{:d}: Averge dequeue time: {:.3f}s  [{:d}/{:d}]'.format(
                i + 1, opts.num_batches, total_time / (i + 1),
                roi_data_loader._minibatch_queue.qsize(),
                cfg.DATA_LOADER.MINIBATCH_QUEUE_SIZE
            )
        )
        # Sleep to simulate the time taken by running a little network
        time.sleep(opts.sleep_time)
        # To inspect:
        # blobs = workspace.FetchBlobs(all_blobs)
        # from IPython import embed; embed()
    logger.info('Shutting down data loader...')
    roi_data_loader.shutdown()
Esempio n. 3
0
def add_model_training_inputs(model):
    """Load the training dataset and attach the training inputs to the model."""
    logger = logging.getLogger(__name__)
    logger.info('Loading dataset: {}'.format(cfg.TRAIN.DATASETS))
    roidb = combined_roidb_for_training(
        cfg.TRAIN.DATASETS, cfg.TRAIN.PROPOSAL_FILES
    )
    logger.info('{:d} roidb entries'.format(len(roidb)))
    model_builder.add_training_inputs(model, roidb=roidb)