Beispiel #1
0
def setup_model_for_training(model, weights_file, output_dir):
    """Loaded saved weights and create the network in the C2 workspace."""
    logger = logging.getLogger(__name__)
    # 添加网络输入
    add_model_training_inputs(model)

    if weights_file:
        # Override random weight initialization with weights from a saved model
        # 加载预训练模型参数
        nu.initialize_gpu_from_weights_file(model, weights_file, gpu_id=0)
    # Even if we're randomly initializing we still need to synchronize
    # parameters across GPUs

    nu.broadcast_parameters(model)

    # 创建网络
    workspace.CreateNet(model.net)

    logger.info('Outputs saved to: {:s}'.format(os.path.abspath(output_dir)))
    dump_proto_files(model, output_dir)

    # Start loading mini-batches and enqueuing blobs
    # 开始加载数据
    model.roi_data_loader.register_sigint_handler()
    model.roi_data_loader.start(prefill=True)
    return output_dir
def main(args):
    merge_cfg_from_file(args.cfg)
    cfg.NUM_GPUS = 1
    for i, weights_file in enumerate(args.weights_list):
        args.weights_list[i] = cache_url(weights_file, cfg.DOWNLOAD_CACHE)
    assert_and_infer_cfg(cache_urls=False)

    preffix_list = args.preffix_list if len(args.preffix_list) \
        else [""] * len(args.weights_list)
    model = model_builder.create(cfg.MODEL.TYPE, train=False)
    # Initialize GPU from weights files
    for i, weights_file in enumerate(args.weights_list):
        nu.initialize_gpu_from_weights_file(model,
                                            weights_file,
                                            gpu_id=0,
                                            preffix=preffix_list[i])
    nu.broadcast_parameters(model)
    blobs = {}
    # Save all parameters
    for param in model.params:
        scoped_name = str(param)
        unscoped_name = c2_utils.UnscopeName(scoped_name)
        if unscoped_name not in blobs:
            if workspace.HasBlob(scoped_name):
                blobs[unscoped_name] = workspace.FetchBlob(scoped_name)
    # Save merged weights file
    save_object(dict(blobs=blobs), args.output_wts)
Beispiel #3
0
def test_restore_checkpoint():
    # Create Model
    model = model_builder.create(cfg.MODEL.TYPE, train=True)
    add_momentum_init_ops(model)
    init_weights(model)
    # Fill input blobs
    roidb = combined_roidb_for_training(cfg.TRAIN.DATASETS,
                                        cfg.TRAIN.PROPOSAL_FILES)
    model_builder.add_training_inputs(model, roidb=roidb)
    workspace.CreateNet(model.net)
    # Bookkeeping for checkpoint creation
    iter_num = 0
    checkpoints = {}
    output_dir = get_output_dir(cfg.TRAIN.DATASETS, training=True)
    chk_file_path = os.path.join(output_dir,
                                 'model_iter{}.pkl'.format(iter_num))
    checkpoints[iter_num] = chk_file_path
    # Save model weights
    nu.save_model_to_weights_file(checkpoints[iter_num], model)
    orig_gpu_0_params, orig_all_params = get_params(model)
    # Change the model weights
    init_weights(model)
    # Reload the weights in the model
    nu.initialize_gpu_from_weights_file(model, chk_file_path, gpu_id=0)
    nu.broadcast_parameters(model)
    shutil.rmtree(cfg.OUTPUT_DIR)
    _, restored_all_params = get_params(model)
    # Check if all params are loaded correctly
    for scoped_name, blob in orig_all_params.items():
        np.testing.assert_array_equal(blob, restored_all_params[scoped_name])
    # Check if broadcast_parameters works
    for scoped_name, blob in restored_all_params.items():
        unscoped_name = c2_utils.UnscopeName(scoped_name)
        np.testing.assert_array_equal(blob, orig_gpu_0_params[unscoped_name])
def test_restore_checkpoint():
    # Create Model
    model = model_builder.create(cfg.MODEL.TYPE, train=True)
    add_momentum_init_ops(model)
    init_weights(model)
    # Fill input blobs
    roidb = combined_roidb_for_training(
        cfg.TRAIN.DATASETS, cfg.TRAIN.PROPOSAL_FILES
    )
    model_builder.add_training_inputs(model, roidb=roidb)
    workspace.CreateNet(model.net)
    # Bookkeeping for checkpoint creation
    iter_num = 0
    checkpoints = {}
    output_dir = get_output_dir(cfg.TRAIN.DATASETS, training=True)
    chk_file_path = os.path.join(output_dir, 'model_iter{}.pkl'.format(iter_num))
    checkpoints[iter_num] = chk_file_path
    # Save model weights
    nu.save_model_to_weights_file(checkpoints[iter_num], model)
    orig_gpu_0_params, orig_all_params = get_params(model)
    # Change the model weights
    init_weights(model)
    # Reload the weights in the model
    nu.initialize_gpu_from_weights_file(model, chk_file_path, gpu_id=0)
    nu.broadcast_parameters(model)
    shutil.rmtree(cfg.OUTPUT_DIR)
    _, restored_all_params = get_params(model)
    # Check if all params are loaded correctly
    for scoped_name, blob in orig_all_params.items():
        np.testing.assert_array_equal(blob, restored_all_params[scoped_name])
    # Check if broadcast_parameters works
    for scoped_name, blob in restored_all_params.items():
        unscoped_name = c2_utils.UnscopeName(scoped_name)
        np.testing.assert_array_equal(blob, orig_gpu_0_params[unscoped_name])
def initialize_model_from_cfg(weights_file, gpu_id=0):
    """Initialize a model from the global cfg. Loads test-time weights and
    creates the networks in the Caffe2 workspace.
    """
    model = model_builder.create(cfg.MODEL.TYPE, train=False, gpu_id=gpu_id)
    net_utils.initialize_gpu_from_weights_file(
        model, weights_file, gpu_id=gpu_id,
    )
    model_builder.add_inference_inputs(model)
    workspace.CreateNet(model.net)
    workspace.CreateNet(model.conv_body_net)
    if cfg.MODEL.MASK_ON:
        workspace.CreateNet(model.mask_net)
    if cfg.MODEL.KEYPOINTS_ON:
        workspace.CreateNet(model.keypoint_net)
    return model
Beispiel #6
0
def initialize_model_from_cfg(weights_file, gpu_id=0):
    """Initialize a model from the global cfg. Loads test-time weights and
    creates the networks in the Caffe2 workspace.
    """
    model = model_builder.create(cfg.MODEL.TYPE, train=False, gpu_id=gpu_id)
    net_utils.initialize_gpu_from_weights_file(
        model, weights_file, gpu_id=gpu_id,
    )
    model_builder.add_inference_inputs(model)
    workspace.CreateNet(model.net)
    workspace.CreateNet(model.conv_body_net)
    if cfg.MODEL.MASK_ON:
        workspace.CreateNet(model.mask_net)
    if cfg.MODEL.KEYPOINTS_ON:
        workspace.CreateNet(model.keypoint_net)
    return model
Beispiel #7
0
def generate_rpn_on_range(
    weights_file,
    dataset_name,
    _proposal_file_ignored,
    output_dir,
    ind_range=None,
    gpu_id=0
):
    """Run inference on all images in a dataset or over an index range of images
    in a dataset using a single GPU.
    """
    assert cfg.MODEL.RPN_ONLY or cfg.MODEL.FASTER_RCNN

    roidb, start_ind, end_ind, total_num_images = get_roidb(
        dataset_name, ind_range
    )
    logger.info(
        'Output will be saved to: {:s}'.format(os.path.abspath(output_dir))
    )

    model = model_builder.create(cfg.MODEL.TYPE, train=False, gpu_id=gpu_id)
    nu.initialize_gpu_from_weights_file(
        model, weights_file, gpu_id=gpu_id,
    )
    model_builder.add_inference_inputs(model)
    workspace.CreateNet(model.net)

    boxes, scores, ids = generate_proposals_on_roidb(
        model,
        roidb,
        start_ind=start_ind,
        end_ind=end_ind,
        total_num_images=total_num_images,
        gpu_id=gpu_id,
    )

    cfg_yaml = envu.yaml_dump(cfg)
    if ind_range is not None:
        rpn_name = 'rpn_proposals_range_%s_%s.pkl' % tuple(ind_range)
    else:
        rpn_name = 'rpn_proposals.pkl'
    rpn_file = os.path.join(output_dir, rpn_name)
    save_object(
        dict(boxes=boxes, scores=scores, ids=ids, cfg=cfg_yaml), rpn_file
    )
    logger.info('Wrote RPN proposals to {}'.format(os.path.abspath(rpn_file)))
    return boxes, scores, ids, rpn_file
Beispiel #8
0
def generate_rpn_on_range(
    weights_file,
    dataset_name,
    _proposal_file_ignored,
    output_dir,
    ind_range=None,
    gpu_id=0
):
    """Run inference on all images in a dataset or over an index range of images
    in a dataset using a single GPU.
    """
    assert cfg.MODEL.RPN_ONLY or cfg.MODEL.FASTER_RCNN

    roidb, start_ind, end_ind, total_num_images = get_roidb(
        dataset_name, ind_range
    )
    logger.info(
        'Output will be saved to: {:s}'.format(os.path.abspath(output_dir))
    )

    model = model_builder.create(cfg.MODEL.TYPE, train=False, gpu_id=gpu_id)
    nu.initialize_gpu_from_weights_file(
        model, weights_file, gpu_id=gpu_id,
    )
    model_builder.add_inference_inputs(model)
    workspace.CreateNet(model.net)

    boxes, scores, ids = generate_proposals_on_roidb(
        model,
        roidb,
        start_ind=start_ind,
        end_ind=end_ind,
        total_num_images=total_num_images,
        gpu_id=gpu_id,
    )

    cfg_yaml = yaml.dump(cfg)
    if ind_range is not None:
        rpn_name = 'rpn_proposals_range_%s_%s.pkl' % tuple(ind_range)
    else:
        rpn_name = 'rpn_proposals.pkl'
    rpn_file = os.path.join(output_dir, rpn_name)
    save_object(
        dict(boxes=boxes, scores=scores, ids=ids, cfg=cfg_yaml), rpn_file
    )
    logger.info('Wrote RPN proposals to {}'.format(os.path.abspath(rpn_file)))
    return boxes, scores, ids, rpn_file
Beispiel #9
0
def create_model(weights_file):
    """adapted from utils.train.setup_model_for_training
    """
    model = model_builder.create(cfg.MODEL.TYPE, train=True)
    if cfg.MEMONGER:
        optimize_memory(model)
    # Performs random weight initialization as defined by the model
    workspace.RunNetOnce(model.param_init_net)

    roidb = combined_roidb_for_training(
        cfg.TRAIN.DATASETS, cfg.TRAIN.PROPOSAL_FILES
    )
    # To make debugging easier you can set cfg.DATA_LOADER.NUM_THREADS = 1
    model.roi_data_loader = RoIDataLoaderSimple(
        roidb,
        num_loaders=cfg.DATA_LOADER.NUM_THREADS,
        minibatch_queue_size=cfg.DATA_LOADER.MINIBATCH_QUEUE_SIZE,
        blobs_queue_capacity=cfg.DATA_LOADER.BLOBS_QUEUE_CAPACITY
    )
    orig_num_op = len(model.net._net.op)
    blob_names = roi_data_minibatch.get_minibatch_blob_names(is_training=True)
    with c2_utils.NamedCudaScope(0):
        for blob_name in blob_names:
            workspace.CreateBlob(core.ScopedName(blob_name))
        model.net.DequeueBlobs(
            model.roi_data_loader._blobs_queue_name, blob_names
        )
    # A little op surgery to move input ops to the start of the net
    diff = len(model.net._net.op) - orig_num_op
    new_op = model.net._net.op[-diff:] + model.net._net.op[:-diff]
    del model.net._net.op[:]
    model.net._net.op.extend(new_op)

    nu.initialize_gpu_from_weights_file(model, weights_file, gpu_id=0)
    nu.broadcast_parameters(model)

    workspace.CreateBlob("gpu_0/track_n_rois_two")
    workspace.CreateNet(model.net)

    # Start loading mini-batches and enqueuing blobs
    model.roi_data_loader.register_sigint_handler()
    model.roi_data_loader.start(prefill=True)
    return model
Beispiel #10
0
def setup_model_for_training(model, weights_file, output_dir):
    """Loaded saved weights and create the network in the C2 workspace."""
    logger = logging.getLogger(__name__)
    add_model_training_inputs(model)

    if weights_file:
        # Override random weight initialization with weights from a saved model
        nu.initialize_gpu_from_weights_file(model, weights_file, gpu_id=0)
    # Even if we're randomly initializing we still need to synchronize
    # parameters across GPUs
    nu.broadcast_parameters(model)
    workspace.CreateNet(model.net)

    logger.info('Outputs saved to: {:s}'.format(os.path.abspath(output_dir)))
    dump_proto_files(model, output_dir)

    # Start loading mini-batches and enqueuing blobs
    model.roi_data_loader.register_sigint_handler()
    model.roi_data_loader.start(prefill=True)
    return output_dir
Beispiel #11
0
def setup_model_for_training(model, weights_file, output_dir):
    """Loaded saved weights and create the network in the C2 workspace."""
    logger = logging.getLogger(__name__)
    if cfg.TRAIN.DOMAIN_ADAPTATION:
        add_model_da_training_inputs(model)
    else:
        add_model_training_inputs(model)

    if weights_file:
        # Override random weight initialization with weights from a saved model
        nu.initialize_gpu_from_weights_file(model, weights_file, gpu_id=0)
    # Even if we're randomly initializing we still need to synchronize
    # parameters across GPUs
    nu.broadcast_parameters(model)
    workspace.CreateNet(model.net)

    logger.info('Outputs saved to: {:s}'.format(os.path.abspath(output_dir)))
    dump_proto_files(model, output_dir)

    # from IPython import display
    # graph = net_drawer.GetPydotGraphMinimal(model.net.Proto().op,"da-frcnn",rankdir='LR')
    # png = graph.create(format='png')
    # with open('graph.png','w') as f:
    #     f.write(png)
    #     f.flush()
    # print(graph)
    # import pydot
    # print(pydot.graph_from_dot_data(graph))
    # (graph2,) = pydot.graph_from_dot_data(str(graph))
    # png = graph2.create_png()
    # png = graph.create_png()
    # import matplotlib.pyplot as plt
    # plt.imshow('graph.png')
    # plt.show()

    # Start loading mini-batches and enqueuing blobs
    model.roi_data_loader.register_sigint_handler()
    # Jerome: TODO: set back to True:
    model.roi_data_loader.start(prefill=False)
    return output_dir
def setup_model_for_training(model, weights_file, output_dir):
    """Loaded saved weights and create the network in the C2 workspace."""
    logger = logging.getLogger(__name__)
    add_model_training_inputs(model)

    if weights_file:
        # Override random weight initialization with weights from a saved model
        nu.initialize_gpu_from_weights_file(model, weights_file, gpu_id=0)

    logger.info("{}".format(cfg.TRAIN.Load_SqueezeNetWeights))
    logger.info("{}".format(cfg.TRAIN.SqueezeNetWeightsFile))

    # print (cfg.TRAIN.SqueezeNetWeightsFile)
    if cfg.TRAIN.Load_SqueezeNetWeights:
        prefix = "gpu_0/"
        logger.info(
            '\n\n\n\n========> Loading Weights For SqueezeNet<======================\n\n\n\n'
        )
        pickle_file = cfg.TRAIN.SqueezeNetWeightsFile
        with open(pickle_file, 'rb') as file:
            weights = pickle.load(file)

        dev = c2_utils.CudaDevice(0)
        for i in weights.keys():
            # workspace.FetchBlob(prefix+i)
            workspace.FeedBlob(prefix + i, weights[i], device_option=dev)

    # Even if we're randomly initializing we still need to synchronize
    # parameters across GPUs
    nu.broadcast_parameters(model)
    workspace.CreateNet(model.net)

    logger.info('Outputs saved to: {:s}'.format(os.path.abspath(output_dir)))
    dump_proto_files(model, output_dir)

    # Start loading mini-batches and enqueuing blobs
    model.roi_data_loader.register_sigint_handler()
    model.roi_data_loader.start(prefill=True)
    return output_dir
Beispiel #13
0
def initialize_mixed_model_from_cfg(weights_list, preffix_list, gpu_id=0):
    """Initialize a model from the global cfg. Loads and combinds multiple test-time weights
    and creates the networks in the Caffe2 workspace.
    """
    model = model_builder.create(cfg.MODEL.TYPE, train=False, gpu_id=gpu_id)

    assert len(weights_list) == len(preffix_list)

    for i, weights_file in enumerate(weights_list):
        net_utils.initialize_gpu_from_weights_file(model,
                                                   weights_file,
                                                   gpu_id=gpu_id,
                                                   preffix=preffix_list[i])
    model_builder.add_inference_inputs(model)
    workspace.CreateNet(model.net)
    workspace.CreateNet(model.conv_body_net)
    if cfg.MODEL.MASK_ON:
        workspace.CreateNet(model.mask_net)
    if cfg.MODEL.KEYPOINTS_ON:
        workspace.CreateNet(model.keypoint_net)
    if cfg.MODEL.TRACKING_ON:
        workspace.CreateNet(model.track_net)
    return model
def main():
    # Initialize C2
    workspace.GlobalInit(
        ['caffe2', '--caffe2_log_level=0', '--caffe2_gpu_memory_tracking=1'])
    # Set up logging and load config options
    logger = setup_logging(__name__)
    logging.getLogger('detectron.roi_data.loader').setLevel(logging.INFO)
    args = parse_args()
    logger.info('Called with args:')
    logger.info(args)
    if args.cfg_file is not None:
        merge_cfg_from_file(args.cfg_file)
    if args.opts is not None:
        merge_cfg_from_list(args.opts)
    assert_and_infer_cfg()
    smi_output, cuda_ver, cudnn_ver = c2_utils.get_nvidia_info()
    logger.info("cuda version : {}".format(cuda_ver))
    logger.info("cudnn version: {}".format(cudnn_ver))
    logger.info("nvidia-smi output:\n{}".format(smi_output))
    logger.info('Training with config:')
    logger.info(pprint.pformat(cfg))
    # Note that while we set the numpy random seed network training will not be
    # deterministic in general. There are sources of non-determinism that cannot
    # be removed with a reasonble execution-speed tradeoff (such as certain
    # non-deterministic cudnn functions).
    np.random.seed(cfg.RNG_SEED)
    # test model
    logger.info("creat test model ...")
    test_model = test_engine.initialize_model_from_cfg(cfg.TEST.WEIGHTS,
                                                       gpu_id=0)
    logger.info("created test model ...")
    train_data = DataLoader(root,
                            "train_id.txt",
                            cfg,
                            test_model,
                            is_train=True)
    # creat mode
    model, weights_file, start_iter, checkpoints = create_model(
        True, cfg, output_dir)
    # test blob
    print(workspace.Blobs())
    # create input blob
    blob_names = ['data_stage2', 'gt_label_stage2']
    for gpu_id in range(cfg.NUM_GPUS):
        with c2_utils.NamedCudaScope(gpu_id):
            for blob_name in blob_names:
                workspace.CreateBlob(core.ScopedName(blob_name))
    # Override random weight initialization with weights from a saved model
    if weights_file:
        nu.initialize_gpu_from_weights_file(model, weights_file, gpu_id=0)
    # Even if we're randomly initializing we still need to synchronize
    # parameters across GPUs
    nu.broadcast_parameters(model)
    workspace.CreateNet(model.net)

    logger.info('Outputs saved to: {:s}'.format(os.path.abspath(output_dir)))
    dump_proto_files(model, output_dir)

    writer = SummaryWriter(log_dir=output_dir)
    training_stats = TrainingStats(model, writer)
    CHECKPOINT_PERIOD = int(cfg.TRAIN.SNAPSHOT_ITERS / cfg.NUM_GPUS)
    logger.info("start train ...")
    for cur_iter in range(start_iter, cfg.SOLVER.MAX_ITER):
        # feed data
        # print("{} iter starting feed data...".format(cur_iter))
        data_stage2, gt_label = train_data.next_batch()
        with c2_utils.NamedCudaScope(gpu_id):
            workspace.FeedBlob(core.ScopedName('data_stage2'), data_stage2)
            workspace.FeedBlob(core.ScopedName('gt_label_stage2'), gt_label)

        # print("workspace.RunNet(model.net.Proto().name)")
        training_stats.IterTic()
        lr = model.UpdateWorkspaceLr(cur_iter,
                                     lr_policy.get_lr_at_iter(cur_iter))
        workspace.RunNet(model.net.Proto().name)
        if cur_iter == start_iter:
            nu.print_net(model)
        training_stats.IterToc()
        training_stats.UpdateIterStats(cur_iter)
        training_stats.LogIterStats(cur_iter, lr)
        writer.add_scalar('learning_rate', lr, cur_iter)

        # print("end of RunNet")
        if (cur_iter + 1) % CHECKPOINT_PERIOD == 0 and cur_iter > start_iter:
            checkpoints[cur_iter] = os.path.join(
                output_dir, 'model_iter{}.pkl'.format(cur_iter))
            nu.save_model_to_weights_file(checkpoints[cur_iter], model)

        if cur_iter == start_iter + training_stats.LOG_PERIOD:
            # Reset the iteration timer to remove outliers from the first few
            # SGD iterations
            training_stats.ResetIterTimer()

        if np.isnan(training_stats.iter_total_loss):
            handle_critical_error(model, 'Loss is NaN')

    # Save the final model
    checkpoints['final'] = os.path.join(output_dir, 'model_final.pkl')
    nu.save_model_to_weights_file(checkpoints['final'], model)
    # save train loss and metric
    state_file = os.path.join(output_dir, 'training_state.json')
    training_stats.SaveTrainingStates(state_file)
    # Execute the training run
    checkpoints = detectron.utils.train.train_model()
    # Test the trained model
    if not args.skip_test:
        test_model(checkpoints['final'], args.multi_gpu_testing, args.opts)
Beispiel #15
0
def initialize_model_from_cfg(weights_file, gpu_id=0, int8=True):
    """Initialize a model from the global cfg. Loads test-time weights and
    creates the networks in the Caffe2 workspace.
    """
    ob = None
    ob_mask = None
    ob_keypoint = None
    model = model_builder.create(cfg.MODEL.TYPE, train=False, gpu_id=gpu_id)
    net_utils.initialize_gpu_from_weights_file(
        model,
        weights_file,
        gpu_id=gpu_id,
    )
    model_builder.add_inference_inputs(model)
    int8_path = os.environ.get('INT8PATH')

    def LoadModuleFile(fname):
        with open(fname) as f:
            from caffe2.proto import caffe2_pb2
            net_def = caffe2_pb2.NetDef()
            if os.environ.get('INT8PTXT') == "1":
                import google.protobuf.text_format as ptxt
                net_def = ptxt.Parse(f.read(), caffe2_pb2.NetDef())
            else:
                net_def.ParseFromString(f.read())
            if gpu_id == -2:
                device_opts = caffe2_pb2.DeviceOption()
                device_opts.device_type = caffe2_pb2.IDEEP
                for op in net_def.op:
                    op.device_option.CopyFrom(device_opts)
            return net_def
        return None

    def CreateNet(net):
        int8_file_path = int8_path if int8_path else ''
        if os.environ.get('INT8PTXT') == "1":
            int8_predict_file = int8_file_path + '/' + net.Proto(
            ).name + '_predict_int8.pbtxt'
            int8_init_file = int8_file_path + '/' + net.Proto(
            ).name + '_init_int8.pbtxt'
        else:
            int8_predict_file = int8_file_path + '/' + net.Proto(
            ).name + '_predict_int8.pb'
            int8_init_file = int8_file_path + '/' + net.Proto(
            ).name + '_init_int8.pb'
        if os.path.isfile(int8_init_file):
            logging.warning('Loading Int8 init file for module {}'.format(
                net.Proto().name))
            workspace.RunNetOnce(LoadModuleFile(int8_init_file))
        if os.path.isfile(int8_predict_file):
            logging.warning('Loading Int8 predict file for module {}'.format(
                net.Proto().name))
            net.Proto().CopyFrom(LoadModuleFile(int8_predict_file))
        if os.environ.get('DEBUGMODE') == "1":
            for i, op in enumerate(net.Proto().op):
                if len(op.name) == 0:
                    op.name = op.type.lower() + str(i)
        if gpu_id == -2 and os.environ.get('DNOOPT') != "1":
            logging.warning('Optimize module {}....................'.format(
                net.Proto().name))
            tf.optimizeForIDEEP(net)
        if os.environ.get('DEBUGMODE') == "1":
            with open("{}_opt_predict_net.pb".format(net.Proto().name),
                      "w") as fid:
                fid.write(net.Proto().SerializeToString())
            with open("{}_opt_predict_net.pbtxt".format(net.Proto().name),
                      "w") as fid:
                fid.write(str(net.Proto()))
        workspace.CreateNet(net)

    if os.environ.get('COSIM') and int8 == False:
        int8_path = None
    CreateNet(model.net)
    if os.environ.get('DPROFILE') == "1":
        logging.warning('need profile, add observer....................')
        ob = model.net.AddObserver("TimeObserver")
    workspace.CreateNet(model.conv_body_net)
    if cfg.MODEL.MASK_ON:
        CreateNet(model.mask_net)
        if os.environ.get('DPROFILE') == "1":
            ob_mask = model.mask_net.AddObserver("TimeObserver")
    if cfg.MODEL.KEYPOINTS_ON:
        CreateNet(model.keypoint_net)
        if os.environ.get('DPROFILE') == "1":
            ob_keypoint = model.keypoint_net.AddObserver("TimeObserver")
    return model, ob, ob_mask, ob_keypoint
def main():
    # Initialize C2
    workspace.GlobalInit(
        ['caffe2', '--caffe2_log_level=0', '--caffe2_gpu_memory_tracking=1']
    )
    # Set up logging and load config options
    logger = setup_logging(__name__)
    logging.getLogger('detectron.roi_data.loader').setLevel(logging.INFO)
    args = parse_args()
    logger.info('Called with args:')
    logger.info(args)
    if args.cfg_file is not None:
        merge_cfg_from_file(args.cfg_file)
    if args.opts is not None:
        merge_cfg_from_list(args.opts)
    assert_and_infer_cfg()
    smi_output, cuda_ver, cudnn_ver = c2_utils.get_nvidia_info()
    logger.info("cuda version : {}".format(cuda_ver))
    logger.info("cudnn version: {}".format(cudnn_ver))
    logger.info("nvidia-smi output:\n{}".format(smi_output))
    logger.info('Training with config:')
    logger.info(pprint.pformat(cfg))
    # Note that while we set the numpy random seed network training will not be
    # deterministic in general. There are sources of non-determinism that cannot
    # be removed with a reasonble execution-speed tradeoff (such as certain
    # non-deterministic cudnn functions).
    np.random.seed(cfg.RNG_SEED)
    # test model
    logger.info("creat test model ...")
    test_model = test_engine.initialize_model_from_cfg(cfg.TEST.WEIGHTS, gpu_id=0)
    logger.info("created test model ...")
    #cfg.TRAIN.IMS_PER_BATCH = 1
    train_data = DataLoader(root, "val_id.txt", cfg, test_model, is_train=False)
    # creat mode
    model, weights_file, start_iter, checkpoints = create_model(False, cfg, output_dir)
    # test blob
    print(workspace.Blobs())
    # create input blob
    blob_names = ['data_stage2']
    for gpu_id in range(cfg.NUM_GPUS):
        with c2_utils.NamedCudaScope(gpu_id):
            for blob_name in blob_names:
                workspace.CreateBlob(core.ScopedName(blob_name))
    # Override random weight initialization with weights from a saved model
    if weights_file:
        nu.initialize_gpu_from_weights_file(model, weights_file, gpu_id=0)
    # Even if we're randomly initializing we still need to synchronize
    # parameters across GPUs
    nu.broadcast_parameters(model)
    workspace.CreateNet(model.net)

    logger.info('Outputs saved to: {:s}'.format(os.path.abspath(output_dir)))

    logger.info("start test ...")
    save_root = os.path.join(output_dir, 'fusion')
    if not os.path.exists(save_root):
        os.makedirs(save_root)
    for cur_iter in range(10000):
        # feed data
        # print("{} iter starting feed data...".format(cur_iter))
        data_stage2, gt_label, meta = train_data.next_batch()
        '''# 
        print('input0-20 sungalsses max score:', np.max(data_stage2[0, 4, :, :]))
        print('input20-40 sungalsses max score:', np.max(data_stage2[0, 24, :, :]))
        print('input0-20 glovess max score:', np.max(data_stage2[0, 3, :, :]))
        print('input20-40 glovess max score:', np.max(data_stage2[0, 23, :, :]))
        #'''
        with c2_utils.NamedCudaScope(gpu_id):
            workspace.FeedBlob(core.ScopedName('data_stage2'), data_stage2)

        # print("workspace.RunNet(model.net.Proto().name)")
        with c2_utils.NamedCudaScope(gpu_id):
            workspace.RunNet(model.net.Proto().name)
            batch_probs = workspace.FetchBlob(core.ScopedName('probs_human_NCHW_stage2'))
            batch_probs = batch_probs.transpose((0, 2, 3, 1))
        assert len(meta) == batch_probs.shape[0]
        #print('batch_probs shape:', batch_probs.shape)
        for i in range(len(meta)):
            probs = cv2.resize(batch_probs[i], (meta[i]['width'], meta[i]['height']), interpolation=cv2.INTER_LINEAR)
            probs = probs.transpose((2,0,1))
            print('sungalsses max score:', np.max(probs[4, :, :]))
            print('glovess max score:', np.max(probs[3, :, :]))
            #print('probs shape:', probs.shape)
            cv2.imwrite(os.path.join(save_root, meta[i]['id']+'.png'), probs.argmax(0))
        print("prossed ", cur_iter)