Example #1
0
def setup_model_for_training(model, weights_file, output_dir):
    """Loaded saved weights and create the network in the C2 workspace."""
    logger = logging.getLogger(__name__)
    # 添加网络输入
    add_model_training_inputs(model)

    if weights_file:
        # Override random weight initialization with weights from a saved model
        # 加载预训练模型参数
        nu.initialize_gpu_from_weights_file(model, weights_file, gpu_id=0)
    # Even if we're randomly initializing we still need to synchronize
    # parameters across GPUs

    nu.broadcast_parameters(model)

    # 创建网络
    workspace.CreateNet(model.net)

    logger.info('Outputs saved to: {:s}'.format(os.path.abspath(output_dir)))
    dump_proto_files(model, output_dir)

    # Start loading mini-batches and enqueuing blobs
    # 开始加载数据
    model.roi_data_loader.register_sigint_handler()
    model.roi_data_loader.start(prefill=True)
    return output_dir
def main(args):
    merge_cfg_from_file(args.cfg)
    cfg.NUM_GPUS = 1
    for i, weights_file in enumerate(args.weights_list):
        args.weights_list[i] = cache_url(weights_file, cfg.DOWNLOAD_CACHE)
    assert_and_infer_cfg(cache_urls=False)

    preffix_list = args.preffix_list if len(args.preffix_list) \
        else [""] * len(args.weights_list)
    model = model_builder.create(cfg.MODEL.TYPE, train=False)
    # Initialize GPU from weights files
    for i, weights_file in enumerate(args.weights_list):
        nu.initialize_gpu_from_weights_file(model,
                                            weights_file,
                                            gpu_id=0,
                                            preffix=preffix_list[i])
    nu.broadcast_parameters(model)
    blobs = {}
    # Save all parameters
    for param in model.params:
        scoped_name = str(param)
        unscoped_name = c2_utils.UnscopeName(scoped_name)
        if unscoped_name not in blobs:
            if workspace.HasBlob(scoped_name):
                blobs[unscoped_name] = workspace.FetchBlob(scoped_name)
    # Save merged weights file
    save_object(dict(blobs=blobs), args.output_wts)
Example #3
0
def test_restore_checkpoint():
    # Create Model
    model = model_builder.create(cfg.MODEL.TYPE, train=True)
    add_momentum_init_ops(model)
    init_weights(model)
    # Fill input blobs
    roidb = combined_roidb_for_training(cfg.TRAIN.DATASETS,
                                        cfg.TRAIN.PROPOSAL_FILES)
    model_builder.add_training_inputs(model, roidb=roidb)
    workspace.CreateNet(model.net)
    # Bookkeeping for checkpoint creation
    iter_num = 0
    checkpoints = {}
    output_dir = get_output_dir(cfg.TRAIN.DATASETS, training=True)
    chk_file_path = os.path.join(output_dir,
                                 'model_iter{}.pkl'.format(iter_num))
    checkpoints[iter_num] = chk_file_path
    # Save model weights
    nu.save_model_to_weights_file(checkpoints[iter_num], model)
    orig_gpu_0_params, orig_all_params = get_params(model)
    # Change the model weights
    init_weights(model)
    # Reload the weights in the model
    nu.initialize_gpu_from_weights_file(model, chk_file_path, gpu_id=0)
    nu.broadcast_parameters(model)
    shutil.rmtree(cfg.OUTPUT_DIR)
    _, restored_all_params = get_params(model)
    # Check if all params are loaded correctly
    for scoped_name, blob in orig_all_params.items():
        np.testing.assert_array_equal(blob, restored_all_params[scoped_name])
    # Check if broadcast_parameters works
    for scoped_name, blob in restored_all_params.items():
        unscoped_name = c2_utils.UnscopeName(scoped_name)
        np.testing.assert_array_equal(blob, orig_gpu_0_params[unscoped_name])
def test_restore_checkpoint():
    # Create Model
    model = model_builder.create(cfg.MODEL.TYPE, train=True)
    add_momentum_init_ops(model)
    init_weights(model)
    # Fill input blobs
    roidb = combined_roidb_for_training(
        cfg.TRAIN.DATASETS, cfg.TRAIN.PROPOSAL_FILES
    )
    model_builder.add_training_inputs(model, roidb=roidb)
    workspace.CreateNet(model.net)
    # Bookkeeping for checkpoint creation
    iter_num = 0
    checkpoints = {}
    output_dir = get_output_dir(cfg.TRAIN.DATASETS, training=True)
    chk_file_path = os.path.join(output_dir, 'model_iter{}.pkl'.format(iter_num))
    checkpoints[iter_num] = chk_file_path
    # Save model weights
    nu.save_model_to_weights_file(checkpoints[iter_num], model)
    orig_gpu_0_params, orig_all_params = get_params(model)
    # Change the model weights
    init_weights(model)
    # Reload the weights in the model
    nu.initialize_gpu_from_weights_file(model, chk_file_path, gpu_id=0)
    nu.broadcast_parameters(model)
    shutil.rmtree(cfg.OUTPUT_DIR)
    _, restored_all_params = get_params(model)
    # Check if all params are loaded correctly
    for scoped_name, blob in orig_all_params.items():
        np.testing.assert_array_equal(blob, restored_all_params[scoped_name])
    # Check if broadcast_parameters works
    for scoped_name, blob in restored_all_params.items():
        unscoped_name = c2_utils.UnscopeName(scoped_name)
        np.testing.assert_array_equal(blob, orig_gpu_0_params[unscoped_name])
Example #5
0
def create_model(weights_file):
    """adapted from utils.train.setup_model_for_training
    """
    model = model_builder.create(cfg.MODEL.TYPE, train=True)
    if cfg.MEMONGER:
        optimize_memory(model)
    # Performs random weight initialization as defined by the model
    workspace.RunNetOnce(model.param_init_net)

    roidb = combined_roidb_for_training(
        cfg.TRAIN.DATASETS, cfg.TRAIN.PROPOSAL_FILES
    )
    # To make debugging easier you can set cfg.DATA_LOADER.NUM_THREADS = 1
    model.roi_data_loader = RoIDataLoaderSimple(
        roidb,
        num_loaders=cfg.DATA_LOADER.NUM_THREADS,
        minibatch_queue_size=cfg.DATA_LOADER.MINIBATCH_QUEUE_SIZE,
        blobs_queue_capacity=cfg.DATA_LOADER.BLOBS_QUEUE_CAPACITY
    )
    orig_num_op = len(model.net._net.op)
    blob_names = roi_data_minibatch.get_minibatch_blob_names(is_training=True)
    with c2_utils.NamedCudaScope(0):
        for blob_name in blob_names:
            workspace.CreateBlob(core.ScopedName(blob_name))
        model.net.DequeueBlobs(
            model.roi_data_loader._blobs_queue_name, blob_names
        )
    # A little op surgery to move input ops to the start of the net
    diff = len(model.net._net.op) - orig_num_op
    new_op = model.net._net.op[-diff:] + model.net._net.op[:-diff]
    del model.net._net.op[:]
    model.net._net.op.extend(new_op)

    nu.initialize_gpu_from_weights_file(model, weights_file, gpu_id=0)
    nu.broadcast_parameters(model)

    workspace.CreateBlob("gpu_0/track_n_rois_two")
    workspace.CreateNet(model.net)

    # Start loading mini-batches and enqueuing blobs
    model.roi_data_loader.register_sigint_handler()
    model.roi_data_loader.start(prefill=True)
    return model
Example #6
0
def setup_model_for_training(model, weights_file, output_dir):
    """Loaded saved weights and create the network in the C2 workspace."""
    logger = logging.getLogger(__name__)
    add_model_training_inputs(model)

    if weights_file:
        # Override random weight initialization with weights from a saved model
        nu.initialize_gpu_from_weights_file(model, weights_file, gpu_id=0)
    # Even if we're randomly initializing we still need to synchronize
    # parameters across GPUs
    nu.broadcast_parameters(model)
    workspace.CreateNet(model.net)

    logger.info('Outputs saved to: {:s}'.format(os.path.abspath(output_dir)))
    dump_proto_files(model, output_dir)

    # Start loading mini-batches and enqueuing blobs
    model.roi_data_loader.register_sigint_handler()
    model.roi_data_loader.start(prefill=True)
    return output_dir
Example #7
0
def setup_model_for_training(model, weights_file, output_dir):
    """Loaded saved weights and create the network in the C2 workspace."""
    logger = logging.getLogger(__name__)
    if cfg.TRAIN.DOMAIN_ADAPTATION:
        add_model_da_training_inputs(model)
    else:
        add_model_training_inputs(model)

    if weights_file:
        # Override random weight initialization with weights from a saved model
        nu.initialize_gpu_from_weights_file(model, weights_file, gpu_id=0)
    # Even if we're randomly initializing we still need to synchronize
    # parameters across GPUs
    nu.broadcast_parameters(model)
    workspace.CreateNet(model.net)

    logger.info('Outputs saved to: {:s}'.format(os.path.abspath(output_dir)))
    dump_proto_files(model, output_dir)

    # from IPython import display
    # graph = net_drawer.GetPydotGraphMinimal(model.net.Proto().op,"da-frcnn",rankdir='LR')
    # png = graph.create(format='png')
    # with open('graph.png','w') as f:
    #     f.write(png)
    #     f.flush()
    # print(graph)
    # import pydot
    # print(pydot.graph_from_dot_data(graph))
    # (graph2,) = pydot.graph_from_dot_data(str(graph))
    # png = graph2.create_png()
    # png = graph.create_png()
    # import matplotlib.pyplot as plt
    # plt.imshow('graph.png')
    # plt.show()

    # Start loading mini-batches and enqueuing blobs
    model.roi_data_loader.register_sigint_handler()
    # Jerome: TODO: set back to True:
    model.roi_data_loader.start(prefill=False)
    return output_dir
def setup_model_for_training(model, weights_file, output_dir):
    """Loaded saved weights and create the network in the C2 workspace."""
    logger = logging.getLogger(__name__)
    add_model_training_inputs(model)

    if weights_file:
        # Override random weight initialization with weights from a saved model
        nu.initialize_gpu_from_weights_file(model, weights_file, gpu_id=0)

    logger.info("{}".format(cfg.TRAIN.Load_SqueezeNetWeights))
    logger.info("{}".format(cfg.TRAIN.SqueezeNetWeightsFile))

    # print (cfg.TRAIN.SqueezeNetWeightsFile)
    if cfg.TRAIN.Load_SqueezeNetWeights:
        prefix = "gpu_0/"
        logger.info(
            '\n\n\n\n========> Loading Weights For SqueezeNet<======================\n\n\n\n'
        )
        pickle_file = cfg.TRAIN.SqueezeNetWeightsFile
        with open(pickle_file, 'rb') as file:
            weights = pickle.load(file)

        dev = c2_utils.CudaDevice(0)
        for i in weights.keys():
            # workspace.FetchBlob(prefix+i)
            workspace.FeedBlob(prefix + i, weights[i], device_option=dev)

    # Even if we're randomly initializing we still need to synchronize
    # parameters across GPUs
    nu.broadcast_parameters(model)
    workspace.CreateNet(model.net)

    logger.info('Outputs saved to: {:s}'.format(os.path.abspath(output_dir)))
    dump_proto_files(model, output_dir)

    # Start loading mini-batches and enqueuing blobs
    model.roi_data_loader.register_sigint_handler()
    model.roi_data_loader.start(prefill=True)
    return output_dir
def main():
    # Initialize C2
    workspace.GlobalInit(
        ['caffe2', '--caffe2_log_level=0', '--caffe2_gpu_memory_tracking=1'])
    # Set up logging and load config options
    logger = setup_logging(__name__)
    logging.getLogger('detectron.roi_data.loader').setLevel(logging.INFO)
    args = parse_args()
    logger.info('Called with args:')
    logger.info(args)
    if args.cfg_file is not None:
        merge_cfg_from_file(args.cfg_file)
    if args.opts is not None:
        merge_cfg_from_list(args.opts)
    assert_and_infer_cfg()
    smi_output, cuda_ver, cudnn_ver = c2_utils.get_nvidia_info()
    logger.info("cuda version : {}".format(cuda_ver))
    logger.info("cudnn version: {}".format(cudnn_ver))
    logger.info("nvidia-smi output:\n{}".format(smi_output))
    logger.info('Training with config:')
    logger.info(pprint.pformat(cfg))
    # Note that while we set the numpy random seed network training will not be
    # deterministic in general. There are sources of non-determinism that cannot
    # be removed with a reasonble execution-speed tradeoff (such as certain
    # non-deterministic cudnn functions).
    np.random.seed(cfg.RNG_SEED)
    # test model
    logger.info("creat test model ...")
    test_model = test_engine.initialize_model_from_cfg(cfg.TEST.WEIGHTS,
                                                       gpu_id=0)
    logger.info("created test model ...")
    train_data = DataLoader(root,
                            "train_id.txt",
                            cfg,
                            test_model,
                            is_train=True)
    # creat mode
    model, weights_file, start_iter, checkpoints = create_model(
        True, cfg, output_dir)
    # test blob
    print(workspace.Blobs())
    # create input blob
    blob_names = ['data_stage2', 'gt_label_stage2']
    for gpu_id in range(cfg.NUM_GPUS):
        with c2_utils.NamedCudaScope(gpu_id):
            for blob_name in blob_names:
                workspace.CreateBlob(core.ScopedName(blob_name))
    # Override random weight initialization with weights from a saved model
    if weights_file:
        nu.initialize_gpu_from_weights_file(model, weights_file, gpu_id=0)
    # Even if we're randomly initializing we still need to synchronize
    # parameters across GPUs
    nu.broadcast_parameters(model)
    workspace.CreateNet(model.net)

    logger.info('Outputs saved to: {:s}'.format(os.path.abspath(output_dir)))
    dump_proto_files(model, output_dir)

    writer = SummaryWriter(log_dir=output_dir)
    training_stats = TrainingStats(model, writer)
    CHECKPOINT_PERIOD = int(cfg.TRAIN.SNAPSHOT_ITERS / cfg.NUM_GPUS)
    logger.info("start train ...")
    for cur_iter in range(start_iter, cfg.SOLVER.MAX_ITER):
        # feed data
        # print("{} iter starting feed data...".format(cur_iter))
        data_stage2, gt_label = train_data.next_batch()
        with c2_utils.NamedCudaScope(gpu_id):
            workspace.FeedBlob(core.ScopedName('data_stage2'), data_stage2)
            workspace.FeedBlob(core.ScopedName('gt_label_stage2'), gt_label)

        # print("workspace.RunNet(model.net.Proto().name)")
        training_stats.IterTic()
        lr = model.UpdateWorkspaceLr(cur_iter,
                                     lr_policy.get_lr_at_iter(cur_iter))
        workspace.RunNet(model.net.Proto().name)
        if cur_iter == start_iter:
            nu.print_net(model)
        training_stats.IterToc()
        training_stats.UpdateIterStats(cur_iter)
        training_stats.LogIterStats(cur_iter, lr)
        writer.add_scalar('learning_rate', lr, cur_iter)

        # print("end of RunNet")
        if (cur_iter + 1) % CHECKPOINT_PERIOD == 0 and cur_iter > start_iter:
            checkpoints[cur_iter] = os.path.join(
                output_dir, 'model_iter{}.pkl'.format(cur_iter))
            nu.save_model_to_weights_file(checkpoints[cur_iter], model)

        if cur_iter == start_iter + training_stats.LOG_PERIOD:
            # Reset the iteration timer to remove outliers from the first few
            # SGD iterations
            training_stats.ResetIterTimer()

        if np.isnan(training_stats.iter_total_loss):
            handle_critical_error(model, 'Loss is NaN')

    # Save the final model
    checkpoints['final'] = os.path.join(output_dir, 'model_final.pkl')
    nu.save_model_to_weights_file(checkpoints['final'], model)
    # save train loss and metric
    state_file = os.path.join(output_dir, 'training_state.json')
    training_stats.SaveTrainingStates(state_file)
    # Execute the training run
    checkpoints = detectron.utils.train.train_model()
    # Test the trained model
    if not args.skip_test:
        test_model(checkpoints['final'], args.multi_gpu_testing, args.opts)
Example #10
0
def init_weights(model):
    # init weights in gpu_id = 0 and then broadcast
    workspace.RunNetOnce(model.param_init_net)
    nu.broadcast_parameters(model)
Example #11
0
def main(args):
    MINIMAL = False
    TRAIN = False
    FORWARD = False
    SHAPES = False
    HIDE_PARAMS = True
    if args.opts is not None:
        if 'minimal' in args.opts:
            MINIMAL = True
        if 'train' in args.opts:
            TRAIN = True
        if 'forward' in args.opts:
            FORWARD = True
        if 'shapes' in args.opts:
            SHAPES = True
        if 'params' in args.opts:
            HIDE_PARAMS = False

    if SHAPES and args.model_file is None:
        raise ValueError('Specify model file')
    MODEL_FILE = args.model_file
    NET_NAMES = args.net_names

    if MINIMAL:
        get_dot_graph = lambda net, shapes: net_drawer.GetPydotGraphMinimal(
            net, rankdir="BT")
    else:
        get_dot_graph = lambda net, shapes: net_drawer.GetPydotGraph(
            net, rankdir="BT", shapes=shapes, hide_params=HIDE_PARAMS)

    # Get model
    if args.cfg_file is not None:
        merge_cfg_from_file(args.cfg_file)
    cfg.NUM_GPUS = 1
    cfg.VIS_NET = True
    if FORWARD:
        cfg.MODEL.FORWARD_ONLY = True
    assert_and_infer_cfg(cache_urls=False)

    if SHAPES and TRAIN:
        raise NotImplementedError

    # Run model to get shape information of all blobs
    if SHAPES:
        model = infer_engine.initialize_model_from_cfg(MODEL_FILE)
        workspace.RunNetOnce(model.param_init_net)
        nu.broadcast_parameters(model)

        dataset = JsonDataset(cfg.TRAIN.DATASETS[0])
        roidb = dataset.get_roidb()

        with c2_utils.NamedCudaScope(0):
            if cfg.MODEL.TRACKING_ON:
                roidb_min = [roidb[0], roidb[1]]
                im_list = [cv2.imread(e['image']) for e in roidb_min]
                infer_engine.multi_im_detect_all(model, im_list, [None, None])
            else:
                infer_engine.im_detect_all(model, roidb[0]['image'], None)
    else:
        model = model_builder.create(cfg.MODEL.TYPE, train=TRAIN)

    subprocess.call(["killall", "xdot"])

    # Visualize all specified nets
    for net_name in NET_NAMES:
        net = getattr(model, net_name, None)
        if net:
            print('processing graph {}...'.format(net_name))
            g = get_dot_graph(net.Proto(), shapes=SHAPES)
            name = net_name
            if TRAIN:
                name_append = 'train'
            else:
                name_append = 'infer'
            # Save graph
            graph_dir = os.path.join(args.output_dir, cfg.MODEL.TYPE)
            if not os.path.exists(graph_dir):
                os.makedirs(graph_dir)
            dot_name = os.path.join(graph_dir,
                                    '{}_{}.dot'.format(net_name, name_append))
            g.write_dot(dot_name)
            subprocess.Popen(['xdot', dot_name])
Example #12
0
def main():
    # Initialize C2
    workspace.GlobalInit(
        ['caffe2', '--caffe2_log_level=0', '--caffe2_gpu_memory_tracking=1']
    )
    # Set up logging and load config options
    logger = setup_logging(__name__)
    logging.getLogger('detectron.roi_data.loader').setLevel(logging.INFO)
    args = parse_args()
    logger.info('Called with args:')
    logger.info(args)
    if args.cfg_file is not None:
        merge_cfg_from_file(args.cfg_file)
    if args.opts is not None:
        merge_cfg_from_list(args.opts)
    assert_and_infer_cfg()
    smi_output, cuda_ver, cudnn_ver = c2_utils.get_nvidia_info()
    logger.info("cuda version : {}".format(cuda_ver))
    logger.info("cudnn version: {}".format(cudnn_ver))
    logger.info("nvidia-smi output:\n{}".format(smi_output))
    logger.info('Training with config:')
    logger.info(pprint.pformat(cfg))
    # Note that while we set the numpy random seed network training will not be
    # deterministic in general. There are sources of non-determinism that cannot
    # be removed with a reasonble execution-speed tradeoff (such as certain
    # non-deterministic cudnn functions).
    np.random.seed(cfg.RNG_SEED)
    # test model
    logger.info("creat test model ...")
    test_model = test_engine.initialize_model_from_cfg(cfg.TEST.WEIGHTS, gpu_id=0)
    logger.info("created test model ...")
    #cfg.TRAIN.IMS_PER_BATCH = 1
    train_data = DataLoader(root, "val_id.txt", cfg, test_model, is_train=False)
    # creat mode
    model, weights_file, start_iter, checkpoints = create_model(False, cfg, output_dir)
    # test blob
    print(workspace.Blobs())
    # create input blob
    blob_names = ['data_stage2']
    for gpu_id in range(cfg.NUM_GPUS):
        with c2_utils.NamedCudaScope(gpu_id):
            for blob_name in blob_names:
                workspace.CreateBlob(core.ScopedName(blob_name))
    # Override random weight initialization with weights from a saved model
    if weights_file:
        nu.initialize_gpu_from_weights_file(model, weights_file, gpu_id=0)
    # Even if we're randomly initializing we still need to synchronize
    # parameters across GPUs
    nu.broadcast_parameters(model)
    workspace.CreateNet(model.net)

    logger.info('Outputs saved to: {:s}'.format(os.path.abspath(output_dir)))

    logger.info("start test ...")
    save_root = os.path.join(output_dir, 'fusion')
    if not os.path.exists(save_root):
        os.makedirs(save_root)
    for cur_iter in range(10000):
        # feed data
        # print("{} iter starting feed data...".format(cur_iter))
        data_stage2, gt_label, meta = train_data.next_batch()
        '''# 
        print('input0-20 sungalsses max score:', np.max(data_stage2[0, 4, :, :]))
        print('input20-40 sungalsses max score:', np.max(data_stage2[0, 24, :, :]))
        print('input0-20 glovess max score:', np.max(data_stage2[0, 3, :, :]))
        print('input20-40 glovess max score:', np.max(data_stage2[0, 23, :, :]))
        #'''
        with c2_utils.NamedCudaScope(gpu_id):
            workspace.FeedBlob(core.ScopedName('data_stage2'), data_stage2)

        # print("workspace.RunNet(model.net.Proto().name)")
        with c2_utils.NamedCudaScope(gpu_id):
            workspace.RunNet(model.net.Proto().name)
            batch_probs = workspace.FetchBlob(core.ScopedName('probs_human_NCHW_stage2'))
            batch_probs = batch_probs.transpose((0, 2, 3, 1))
        assert len(meta) == batch_probs.shape[0]
        #print('batch_probs shape:', batch_probs.shape)
        for i in range(len(meta)):
            probs = cv2.resize(batch_probs[i], (meta[i]['width'], meta[i]['height']), interpolation=cv2.INTER_LINEAR)
            probs = probs.transpose((2,0,1))
            print('sungalsses max score:', np.max(probs[4, :, :]))
            print('glovess max score:', np.max(probs[3, :, :]))
            #print('probs shape:', probs.shape)
            cv2.imwrite(os.path.join(save_root, meta[i]['id']+'.png'), probs.argmax(0))
        print("prossed ", cur_iter)
def init_weights(model):
    # init weights in gpu_id = 0 and then broadcast
    workspace.RunNetOnce(model.param_init_net)
    nu.broadcast_parameters(model)