Esempio n. 1
0
def save_model_to_weights_file(weights_file, model):
    """Stash model weights in a dictionary and pickle them to a file. We map
    GPU device scoped names to unscoped names (e.g., 'gpu_0/conv1_w' ->
    'conv1_w').
    """
    logger.info('Saving parameters and momentum to {}'.format(
        os.path.abspath(weights_file)))
    blobs = {}
    # Save all parameters
    for param in model.params:
        scoped_name = str(param)
        unscoped_name = c2_utils.UnscopeName(scoped_name)
        if unscoped_name not in blobs:
            logger.debug(' {:s} -> {:s}'.format(scoped_name, unscoped_name))
            blobs[unscoped_name] = workspace.FetchBlob(scoped_name)
    # Save momentum
    for param in model.TrainableParams():
        scoped_name = str(param) + '_momentum'
        unscoped_name = c2_utils.UnscopeName(scoped_name)
        if unscoped_name not in blobs:
            logger.debug(' {:s} -> {:s}'.format(scoped_name, unscoped_name))
            blobs[unscoped_name] = workspace.FetchBlob(scoped_name)
    # Save preserved blobs
    for scoped_name in workspace.Blobs():
        if scoped_name.startswith('__preserve__/'):
            unscoped_name = c2_utils.UnscopeName(scoped_name)
            if unscoped_name not in blobs:
                logger.debug(' {:s} -> {:s} (preserved)'.format(
                    scoped_name, unscoped_name))
                blobs[unscoped_name] = workspace.FetchBlob(scoped_name)
    cfg_yaml = yaml.dump(cfg)
    save_object(dict(blobs=blobs, cfg=cfg_yaml), weights_file)
Esempio n. 2
0
def print_net(model, namescope='gpu_0'):
    """Print the model network."""
    logger.info('Printing model: {}'.format(model.net.Name()))
    op_list = model.net.Proto().op
    for op in op_list:
        input_name = op.input
        # For simplicity: only print the first output
        # Not recommended if there are split layers
        output_name = str(op.output[0])
        op_type = op.type
        op_name = op.name

        if namescope is None or output_name.startswith(namescope):
            # Only print the forward pass network
            if output_name.find('grad') >= 0 or output_name.find('__m') >= 0:
                continue

            try:
                # Under some conditions (e.g., dynamic memory optimization)
                # it is possible that the network frees some blobs when they are
                # no longer needed. Handle this case...
                output_shape = workspace.FetchBlob(output_name).shape
            except BaseException:
                output_shape = '<unknown>'

            first_blob = True
            op_label = op_type + (op_name if op_name == '' else ':' + op_name)
            suffix = ' ------- (op: {})'.format(op_label)
            for j in range(len(input_name)):
                if input_name[j] in model.params:
                    continue
                input_blob = workspace.FetchBlob(input_name[j])
                if isinstance(input_blob, np.ndarray):
                    input_shape = input_blob.shape
                    logger.info('{:28s}: {:20s} => {:28s}: {:20s}{}'.format(
                        c2_utils.UnscopeName(str(input_name[j])),
                        '{}'.format(input_shape),
                        c2_utils.UnscopeName(str(output_name)),
                        '{}'.format(output_shape), suffix))
                    logger.info('{:28s} mean: {}'.format(
                        str(input_name[j]), input_blob.mean()))
                    logger.info('{:28s} max: {}'.format(
                        str(input_name[j]), input_blob.max()))
                    logger.info('{:28s} min: {}'.format(
                        str(input_name[j]), input_blob.min()))
                    logger.info('{:28s} data: {}'.format(
                        str(input_name[j]), input_blob))
                    if first_blob:
                        first_blob = False
                        suffix = ' ------|'
    logger.info('End of model: {}'.format(model.net.Name()))
def main(args):
    merge_cfg_from_file(args.cfg)
    cfg.NUM_GPUS = 1
    for i, weights_file in enumerate(args.weights_list):
        args.weights_list[i] = cache_url(weights_file, cfg.DOWNLOAD_CACHE)
    assert_and_infer_cfg(cache_urls=False)

    preffix_list = args.preffix_list if len(args.preffix_list) \
        else [""] * len(args.weights_list)
    model = model_builder.create(cfg.MODEL.TYPE, train=False)
    # Initialize GPU from weights files
    for i, weights_file in enumerate(args.weights_list):
        nu.initialize_gpu_from_weights_file(model,
                                            weights_file,
                                            gpu_id=0,
                                            preffix=preffix_list[i])
    nu.broadcast_parameters(model)
    blobs = {}
    # Save all parameters
    for param in model.params:
        scoped_name = str(param)
        unscoped_name = c2_utils.UnscopeName(scoped_name)
        if unscoped_name not in blobs:
            if workspace.HasBlob(scoped_name):
                blobs[unscoped_name] = workspace.FetchBlob(scoped_name)
    # Save merged weights file
    save_object(dict(blobs=blobs), args.output_wts)
Esempio n. 4
0
def test_restore_checkpoint():
    # Create Model
    model = model_builder.create(cfg.MODEL.TYPE, train=True)
    add_momentum_init_ops(model)
    init_weights(model)
    # Fill input blobs
    roidb = combined_roidb_for_training(cfg.TRAIN.DATASETS,
                                        cfg.TRAIN.PROPOSAL_FILES)
    model_builder.add_training_inputs(model, roidb=roidb)
    workspace.CreateNet(model.net)
    # Bookkeeping for checkpoint creation
    iter_num = 0
    checkpoints = {}
    output_dir = get_output_dir(cfg.TRAIN.DATASETS, training=True)
    chk_file_path = os.path.join(output_dir,
                                 'model_iter{}.pkl'.format(iter_num))
    checkpoints[iter_num] = chk_file_path
    # Save model weights
    nu.save_model_to_weights_file(checkpoints[iter_num], model)
    orig_gpu_0_params, orig_all_params = get_params(model)
    # Change the model weights
    init_weights(model)
    # Reload the weights in the model
    nu.initialize_gpu_from_weights_file(model, chk_file_path, gpu_id=0)
    nu.broadcast_parameters(model)
    shutil.rmtree(cfg.OUTPUT_DIR)
    _, restored_all_params = get_params(model)
    # Check if all params are loaded correctly
    for scoped_name, blob in orig_all_params.items():
        np.testing.assert_array_equal(blob, restored_all_params[scoped_name])
    # Check if broadcast_parameters works
    for scoped_name, blob in restored_all_params.items():
        unscoped_name = c2_utils.UnscopeName(scoped_name)
        np.testing.assert_array_equal(blob, orig_gpu_0_params[unscoped_name])
Esempio n. 5
0
def get_params(model):
    blobs = {}  # gpu_0 blobs with unscoped_name as key
    all_blobs = {}  # all blobs with scoped name as key
    # Save all parameters
    for param in model.params:
        scoped_name = str(param)
        unscoped_name = c2_utils.UnscopeName(scoped_name)
        if 'gpu_0' in scoped_name:
            blobs[unscoped_name] = workspace.FetchBlob(scoped_name)
        all_blobs[scoped_name] = workspace.FetchBlob(scoped_name)
    for param in model.TrainableParams():
        scoped_name = str(param) + '_momentum'
        unscoped_name = c2_utils.UnscopeName(scoped_name)
        if 'gpu_0' in scoped_name:
            blobs[unscoped_name] = workspace.FetchBlob(scoped_name)
        all_blobs[scoped_name] = workspace.FetchBlob(scoped_name)
    return blobs, all_blobs
def save_model_to_weights_file(weights_file, model, cur_iter=None):
    """Stash model weights in a dictionary and pickle them to a file. We map
    GPU device scoped names to unscoped names (e.g., 'gpu_0/conv1_w' ->
    'conv1_w').
    """
    logger.info('Saving parameters and momentum to {}'.format(
        os.path.abspath(weights_file)))
    blobs = {}
    # Save all parameters
    for param in model.params:
        scoped_name = str(param)
        unscoped_name = c2_utils.UnscopeName(scoped_name)
        if unscoped_name not in blobs:
            logger.debug(' {:s} -> {:s}'.format(scoped_name, unscoped_name))
            blobs[unscoped_name] = workspace.FetchBlob(scoped_name)
    # Save momentum
    for param in model.TrainableParams():
        scoped_name = str(param) + '_momentum'
        unscoped_name = c2_utils.UnscopeName(scoped_name)
        if unscoped_name not in blobs:
            logger.debug(' {:s} -> {:s}'.format(scoped_name, unscoped_name))
            blobs[unscoped_name] = workspace.FetchBlob(scoped_name)
    # Save preserved blobs
    for scoped_name in workspace.Blobs():
        if scoped_name.startswith('__preserve__/'):
            unscoped_name = c2_utils.UnscopeName(scoped_name)
            if unscoped_name not in blobs:
                logger.debug(' {:s} -> {:s} (preserved)'.format(
                    scoped_name, unscoped_name))
                blobs[unscoped_name] = workspace.FetchBlob(scoped_name)
    # Save roidb shuffling:
    if 'roidb_state' not in blobs and type(cur_iter) != type(None):
        blobs['roidb_state'] = model.roi_data_loader.get_perm_state(
            cur_iter + 1)  # give iters_done.
    else:
        logger.info("roidb state not stored")

    if cfg.TRAIN.PADA:
        if 'weight_db' not in blobs:
            blobs['weight_db'] = model.class_weight_db.get_state()

    cfg_yaml = envu.yaml_dump(cfg)
    save_object(dict(blobs=blobs, cfg=cfg_yaml), weights_file)
Esempio n. 7
0
 def AddLosses(self, losses):
     ###isinstance函数表示losses是否是list的一个类或子类
     if not isinstance(losses, list):
         losses = [losses]
     # Conversion to str allows losses to include BlobReferences
     ####BlobReferences和unscopename这两项始终没懂
     ####答案--->UnscopeName函数作用是把原变量去除scope,在这里
     ####即是把losses中的每一项都去除scope
     losses = [c2_utils.UnscopeName(str(l)) for l in losses]
     ####为什么要self.losses+losses ????
     self.losses = list(set(self.losses + losses))
Esempio n. 8
0
def convert_mask_net(args, mask_net):

    # Initialization net
    init_net = caffe2_pb2.NetDef()
    net = caffe2_pb2.NetDef()
    blobs = io_utils.load_object(args.wts)['blobs']
    externals = set(
        c2_utils.UnscopeName(inp) for inp in mask_net.external_input)
    for name in set(blobs.keys()).intersection(externals):
        blob = blobs[name]
        add_custom_op(init_net,
                      'GivenTensorFill', [], [name],
                      values=blob.flatten(),
                      shape=blob.shape)

    # Pre-process the ROIs
    add_custom_op(net, 'BBoxToRoi', [
        Constants.nms_outputs[1], Constants.im_info, Constants.nms_outputs[3]
    ], [Constants.mask_rois])
    # Group the ROIs based on their FPN level
    if cfg.FPN.MULTILEVEL_ROIS:
        outputs = [Constants.mask_rois + Constants.idx_restore_suffix]
        for level in range(cfg.FPN.ROI_MIN_LEVEL, cfg.FPN.ROI_MAX_LEVEL + 1):
            outputs.append(Constants.mask_rois +
                           Constants.fpn_level_suffix(level))
        add_custom_op(net,
                      'MultiLevelRoi', [Constants.mask_rois],
                      outputs,
                      min_level=cfg.FPN.ROI_MIN_LEVEL,
                      canon_scale=cfg.FPN.ROI_CANONICAL_SCALE,
                      canon_level=cfg.FPN.ROI_CANONICAL_LEVEL)

    # Generate the masks
    net.op.extend(mask_net.op)

    # Post-process the masks
    add_custom_op(net,
                  'SegmentMask',
                  Constants.nms_outputs[1:-1] +
                  [Constants.mask_pred, Constants.im_info],
                  [Constants.main_output, Constants.im_info],
                  thresh_bin=cfg.MRCNN.THRESH_BINARIZE)

    net.name = args.net_name + '_mask'
    init_net.name = args.net_name + '_mask_init'
    save_model(net, init_net, args.mask_dir)
def unscope_name(name):
    return c2_utils.UnscopeName(name)
Esempio n. 10
0
 def AddLosses(self, losses):
     if not isinstance(losses, list):
         losses = [losses]
     # Conversion to str allows losses to include BlobReferences
     losses = [c2_utils.UnscopeName(str(l)) for l in losses]
     self.losses = list(set(self.losses + losses))
Esempio n. 11
0
def initialize_gpu_from_weights_file(model, weights_file, gpu_id=0):
    """Initialize a network with ops on a specific GPU.

    If you use CUDA_VISIBLE_DEVICES to target specific GPUs, Caffe2 will
    automatically map logical GPU ids (starting from 0) to the physical GPUs
    specified in CUDA_VISIBLE_DEVICES.
    """
    logger.info('Loading weights from: {}'.format(weights_file))
    ws_blobs = workspace.Blobs()
    with open(weights_file, 'r') as f:
        src_blobs = pickle.load(f)
    if 'cfg' in src_blobs:
        saved_cfg = load_cfg(src_blobs['cfg'])
        configure_bbox_reg_weights(model, saved_cfg)
    if 'blobs' in src_blobs:
        # Backwards compat--dictionary used to be only blobs, now they are
        # stored under the 'blobs' key
        src_blobs = src_blobs['blobs']
    # Initialize weights on GPU gpu_id only
    unscoped_param_names = OrderedDict()  # Print these out in model order
    for blob in model.params:
        unscoped_param_names[c2_utils.UnscopeName(str(blob))] = True
    with c2_utils.NamedCudaScope(gpu_id):
        for unscoped_param_name in unscoped_param_names.keys():
            if (unscoped_param_name.find(']_') >= 0
                    and unscoped_param_name not in src_blobs):
                # Special case for sharing initialization from a pretrained
                # model:
                # If a blob named '_[xyz]_foo' is in model.params and not in
                # the initialization blob dictionary, then load source blob
                # 'foo' into destination blob '_[xyz]_foo'
                src_name = unscoped_param_name[unscoped_param_name.find(']_') +
                                               2:]
            else:
                src_name = unscoped_param_name
            if src_name not in src_blobs:
                logger.info('{:s} not found'.format(src_name))
                continue
            dst_name = core.ScopedName(unscoped_param_name)
            has_momentum = src_name + '_momentum' in src_blobs
            has_momentum_str = ' [+ momentum]' if has_momentum else ''
            logger.debug(
                '{:s}{:} loaded from weights file into {:s}: {}'.format(
                    src_name, has_momentum_str, dst_name,
                    src_blobs[src_name].shape))
            if dst_name in ws_blobs:
                # If the blob is already in the workspace, make sure that it
                # matches the shape of the loaded blob
                GPU_NAME = 'gpu_0'
                ws_blob = workspace.FetchBlob(dst_name)
                if     (dst_name!=GPU_NAME+'/cls_score_w')       and  (dst_name!=GPU_NAME+'/cls_score_b')  \
                   and (dst_name!=GPU_NAME+'/bbox_pred_w')       and  (dst_name!=GPU_NAME+'/bbox_pred_b')  \
                   and (dst_name!=GPU_NAME+'/mask_fcn_logits_w') and  (dst_name!=GPU_NAME+'/mask_fcn_logits_b') : #add by shan

                    # If the blob is already in the workspace, make sure that it
                    # matches the shape of the loaded blob

                    #print(dst_name, ws_blob.shape,  src_name, src_blobs[src_name].shape)

                    assert ws_blob.shape == src_blobs[src_name].shape, \
                        ('Workspace blob {} with shape {} does not match '
                         'weights file shape {}').format(
                            src_name,
                            ws_blob.shape,
                            src_blobs[src_name].shape)

                else:
                    if dst_name == GPU_NAME + '/cls_score_w':
                        temp = 0.0001 * np.random.randn(*(ws_blob.shape))
                        temp[0:4, :] = src_blobs[src_name][0:4, :]
                        temp[5, :] = src_blobs[src_name][6, :]
                        temp[6, :] = src_blobs[src_name][8, :]
                        src_blobs[src_name] = temp

                    if (dst_name == GPU_NAME + '/cls_score_b'):
                        temp = -np.log((1 - 0.00001) / 0.00001) * np.ones(
                            *(ws_blob.shape))
                        temp[0:4] = src_blobs[src_name][0:4]
                        temp[5] = src_blobs[src_name][6]
                        temp[6] = src_blobs[src_name][8]
                        src_blobs[src_name] = temp

                    if (dst_name == GPU_NAME + '/bbox_pred_w'):
                        temp = 0.0001 * np.random.randn(*(ws_blob.shape))
                        temp[0:16, :] = src_blobs[src_name][0:16, :]
                        temp[16:20, :] = src_blobs[src_name][24:28, :]
                        temp[20:24, :] = src_blobs[src_name][32:36, :]
                        src_blobs[src_name] = temp

                    if (dst_name == GPU_NAME + '/bbox_pred_b'):
                        temp = -np.log((1 - 0.00001) / 0.00001) * np.ones(
                            *(ws_blob.shape))
                        temp[0:16] = src_blobs[src_name][0:16]
                        temp[16:20] = src_blobs[src_name][24:28]
                        temp[20:24] = src_blobs[src_name][32:36]
                        src_blobs[src_name] = temp

                    if dst_name == GPU_NAME + '/mask_fcn_logits_w':
                        print(src_blobs[src_name].shape)
                        temp = 0.0001 * np.random.randn(*(ws_blob.shape))
                        temp[0:4, :, :, :] = src_blobs[src_name][0:4, :, :, :]
                        temp[5, :, :, :] = src_blobs[src_name][6, :, :, :]
                        temp[6, :, :, :] = src_blobs[src_name][8, :, :, :]
                        src_blobs[src_name] = temp
                    #src_blobs[src_name + '_momentum'] = np.zeros(ws_blob.shape) #ws_blob.shape
                    if dst_name == GPU_NAME + '/mask_fcn_logits_b':
                        #print("--", src_blobs[src_name].shape)
                        temp = -np.log((1 - 0.00001) / 0.00001) * np.ones(
                            *(ws_blob.shape))
                        temp[0:4] = src_blobs[src_name][0:4]
                        temp[5] = src_blobs[src_name][6]
                        temp[6] = src_blobs[src_name][8]
                        src_blobs[src_name] = temp

                    src_blobs[src_name + '_momentum'] = np.zeros(ws_blob.shape)
            workspace.FeedBlob(
                dst_name, src_blobs[src_name].astype(np.float32, copy=False))
            if has_momentum:
                workspace.FeedBlob(
                    dst_name + '_momentum',
                    src_blobs[src_name + '_momentum'].astype(np.float32,
                                                             copy=False))

    # We preserve blobs that are in the weights file but not used by the current
    # model. We load these into CPU memory under the '__preserve__/' namescope.
    # These blobs will be stored when saving a model to a weights file. This
    # feature allows for alternating optimization of Faster R-CNN in which blobs
    # unused by one step can still be preserved forward and used to initialize
    # another step.
    for src_name in src_blobs.keys():
        if (src_name not in unscoped_param_names
                and not src_name.endswith('_momentum')
                and src_blobs[src_name] is not None):
            with c2_utils.CpuScope():
                workspace.FeedBlob('__preserve__/{:s}'.format(src_name),
                                   src_blobs[src_name])
                logger.debug(
                    '{:s} preserved in workspace (unused)'.format(src_name))
def initialize_gpu_from_weights_file(model, weights_file, gpu_id=0):
    """Initialize a network with ops on a specific GPU.

    If you use CUDA_VISIBLE_DEVICES to target specific GPUs, Caffe2 will
    automatically map logical GPU ids (starting from 0) to the physical GPUs
    specified in CUDA_VISIBLE_DEVICES.
    """
    logger.info('Loading weights from: {}'.format(weights_file))
    ws_blobs = workspace.Blobs()
    src_blobs = load_object(weights_file)

    if 'cfg' in src_blobs:
        saved_cfg = load_cfg(src_blobs['cfg'])
        configure_bbox_reg_weights(model, saved_cfg)
    if 'blobs' in src_blobs:
        # Backwards compat--dictionary used to be only blobs, now they are
        # stored under the 'blobs' key
        src_blobs = src_blobs['blobs']
    # Initialize weights on GPU gpu_id only
    unscoped_param_names = OrderedDict()  # Print these out in model order
    for blob in model.params:
        unscoped_param_names[c2_utils.UnscopeName(str(blob))] = True
    with c2_utils.NamedCudaScope(gpu_id):
        for unscoped_param_name in unscoped_param_names.keys():
            if (unscoped_param_name.find(']_') >= 0
                    and unscoped_param_name not in src_blobs):
                # Special case for sharing initialization from a pretrained
                # model:
                # If a blob named '_[xyz]_foo' is in model.params and not in
                # the initialization blob dictionary, then load source blob
                # 'foo' into destination blob '_[xyz]_foo'
                src_name = unscoped_param_name[unscoped_param_name.find(']_') +
                                               2:]
            else:
                src_name = unscoped_param_name
            if src_name not in src_blobs:
                logger.info('{:s} not found'.format(src_name))
                continue
            dst_name = core.ScopedName(unscoped_param_name)
            has_momentum = src_name + '_momentum' in src_blobs
            has_momentum = False
            has_momentum_str = ' [+ momentum]' if has_momentum else ''
            logger.info(
                '{:s}{:} loaded from weights file into {:s}: {}'.format(
                    src_name, has_momentum_str, dst_name,
                    src_blobs[src_name].shape))
            if dst_name in ws_blobs:
                # If the blob is already in the workspace, make sure that it
                # matches the shape of the loaded blob
                ws_blob = workspace.FetchBlob(dst_name)
                assert ws_blob.shape == src_blobs[src_name].shape, \
                    ('Workspace blob {} with shape {} does not match '
                     'weights file shape {}').format(
                        src_name,
                        ws_blob.shape,
                        src_blobs[src_name].shape)
            workspace.FeedBlob(
                dst_name, src_blobs[src_name].astype(np.float32, copy=False))
            if has_momentum:
                workspace.FeedBlob(
                    dst_name + '_momentum',
                    src_blobs[src_name + '_momentum'].astype(np.float32,
                                                             copy=False))

    # let roidb continue with the data that is not seen yet.
    if 'roidb_state' in src_blobs and model.roi_data_loader is not None:
        model.roi_data_loader.set_perm_state(src_blobs['roidb_state'])
        del src_blobs['roidb_state']
    else:
        logger.info("roidb state not loaded")
    if 'roidb_state' in src_blobs and model.roi_data_loader is None:
        del src_blobs['roidb_state']

    if cfg.TRAIN.PADA:
        if 'weight_db' in src_blobs:
            import detectron.modeling.PADA as pada
            model.class_weight_db = pada.ClassWeightDB(*src_blobs['weight_db'])
            del src_blobs['weight_db']
    elif 'weight_db' in src_blobs:
        del src_blobs['weight_db']

    # We preserve blobs that are in the weights file but not used by the current
    # model. We load these into CPU memory under the '__preserve__/' namescope.
    # These blobs will be stored when saving a model to a weights file. This
    # feature allows for alternating optimization of Faster R-CNN in which blobs
    # unused by one step can still be preserved forward and used to initialize
    # another step.
    for src_name in src_blobs.keys():
        if (src_name not in unscoped_param_names
                and not src_name.endswith('_momentum')
                and src_blobs[src_name] is not None):
            with c2_utils.CpuScope():
                workspace.FeedBlob('__preserve__/{:s}'.format(src_name),
                                   src_blobs[src_name])
                logger.info(
                    '{:s} preserved in workspace (unused)'.format(src_name))
Esempio n. 13
0
def initialize_gpu_from_weights_file(model, weights_file, gup_id=0):
    logger.info('Loading weights from: {}'.format(weights_file))
    ws_blobs = workspace.Blobs()
    src_blobs = load_object(weights_file)

    if 'cfg' in src_blobs:
        saved_cfg = load_cfg(src_blobs['cfg'])
        configure_bbox_reg_weights(model, saved_cfg)
    if 'blobs' in src_blobs:
        src_blobs = src_blobs['blobs']

    unscoped_para_names = OrderedDict()
    for blob in model.params:
        unscoped_para_names[c2_utils.UnscopeName(str(blob))] = True
    with c2_utils.NamedCudaScope(gup_id):
        for unscoped_para_name in unscoped_para_names.keys():
            if (unscoped_para_name.find(']_') >= 0
                    and unscoped_para_name not in src_blobs):
                # Special case for sharing initialization from a pretrained
                # model:
                # If a blob named '_[xyz]_foo' is in model.params and not in
                # the initialization blob dictionary, then load source blob
                # 'foo' into destination blob '_[xyz]_foo'
                src_name = unscoped_para_name[unscoped_para_name.find((']_') +
                                                                      2)]
            else:
                src_name = unscoped_para_name
            if src_name not in src_blobs:
                logger.info('{:s} not found.'.format(src_name))
                continue
            dst_name = core.ScopedName(unscoped_para_name)
            has_momentum = src_name + '_momentum' in src_blobs
            has_momentum_str = ' [+ momentum] ' if has_momentum else ''
            logger.info(
                '{:s}{:} loaded form weights file into {:s}: {}'.format(
                    src_name, has_momentum_str, dst_name,
                    src_blobs[src_name].shape))
            if dst_name in ws_blobs:
                # if the blob is already in the workspace, make sure that it matches
                # the shape of the loaded blob
                ws_blob = workspace.FetchBlob(dst_name)
                assert ws_blob.shape == src_blobs[src_name].shape, \
                    ('Workspace blob {} with shape {} does not match '
                     'weights file shape {}').format(
                    src_name,
                    ws_blob.shape,
                    src_blobs[src_name].shape)
            workspace.FeedBlob(
                dst_name, src_blobs[src_name].astype(np.float32, copy=False))
            if has_momentum:
                workspace.FeedBlob(
                    dst_name + '_momentum',
                    src_blobs[src_name + '_momentum'].astype(np.float32,
                                                             copy=False))

    for src_name in src_blobs.keys():
        if (src_name not in unscoped_para_names
                and not src_name.endswith('_momentum')
                and src_blobs[src_name] is not None):
            with c2_utils.CpuScope():
                workspace.FeedBlob('__presever__/{:s}'.format(src_name),
                                   src_blobs[src_name])
                logger.info(
                    '{:s} preserved in workspace (unused)'.format(src_name))
Esempio n. 14
0
def initialize_gpu_from_weights_file(model, weights_file, gpu_id=0):
    """Initialize a network with ops on a specific GPU.

    If you use CUDA_VISIBLE_DEVICES to target specific GPUs, Caffe2 will
    automatically map logical GPU ids (starting from 0) to the physical GPUs
    specified in CUDA_VISIBLE_DEVICES.
    """
    logger.info('Loading weights from: {}'.format(weights_file))
    ws_blobs = workspace.Blobs()
    with open(weights_file, 'r') as f:
        src_blobs = pickle.load(f)
    if 'cfg' in src_blobs:
        saved_cfg = load_cfg(src_blobs['cfg'])
        configure_bbox_reg_weights(model, saved_cfg)
    if 'blobs' in src_blobs:
        # Backwards compat--dictionary used to be only blobs, now they are
        # stored under the 'blobs' key
        src_blobs = src_blobs['blobs']
    # Initialize weights on GPU gpu_id only
    unscoped_param_names = OrderedDict()  # Print these out in model order
    for blob in model.params:
        unscoped_param_names[c2_utils.UnscopeName(str(blob))] = True
    with c2_utils.NamedCudaScope(gpu_id):
        for unscoped_param_name in unscoped_param_names.keys():
            if (unscoped_param_name.find(']_') >= 0
                    and unscoped_param_name not in src_blobs):
                # Special case for sharing initialization from a pretrained
                # model:
                # If a blob named '_[xyz]_foo' is in model.params and not in
                # the initialization blob dictionary, then load source blob
                # 'foo' into destination blob '_[xyz]_foo'
                src_name = unscoped_param_name[unscoped_param_name.find(']_') +
                                               2:]
            else:
                src_name = unscoped_param_name
            if src_name not in src_blobs:
                logger.info('{:s} not found'.format(src_name))
                continue
            dst_name = core.ScopedName(unscoped_param_name)
            has_momentum = src_name + '_momentum' in src_blobs
            has_momentum_str = ' [+ momentum]' if has_momentum else ''
            logger.debug(
                '{:s}{:} loaded from weights file into {:s}: {}'.format(
                    src_name, has_momentum_str, dst_name,
                    src_blobs[src_name].shape))
            if dst_name in ws_blobs:
                print("dst_name:" + dst_name)
                # If the blob is already in the workspace, make sure that it
                # matches the shape of the loaded blob
                ws_blob = workspace.FetchBlob(dst_name)
                print("xhpan:ws_blob.shape:" + str(ws_blob.shape))
                print("xhpan:src_blobs[src_name].shape:" +
                      str(src_blobs[src_name].shape))
                classes_layers_list_w = [
                    'gpu_0/cls_score_w', 'gpu_0/bbox_pred_w',
                    'gpu_0/mask_fcn_logits_w'
                ]
                classes_layers_list_b = [
                    'gpu_0/cls_score_b', 'gpu_0/bbox_pred_b',
                    'gpu_0/mask_fcn_logits_b'
                ]

                # -----------------(11, 1024) - --------------(10, 1024)
                # -----------------(11,) - --------------(10,)
                # -----------------(44, 1024) - --------------(40, 1024)
                # -----------------(44,) - --------------(40,)
                # -----------------(11, 256, 1, 1) - --------------(10, 256, 1, 1)
                # -----------------(11,) - --------------(10,)
                # if ws_blob.shape != src_blobs[src_name].shape:
                #     if dst_name is 'gpu_0/cls_score_w':
                #         if ws_blob.shape[0] > src_blobs[src_name].shape[0]:#(10, 1024)
                #             src_blobs[src_name].extend(0.0001 * np.random.randn(*(ws_blob.shape[0] - src_blobs[src_name].shape[0], ws_blob.shape[1])))
                #         else:
                #             num = src_blobs[src_name].shape[0] - ws_blob.shape[0]
                #             src_blobs[src_name] = src_blobs[src_name][-num]
                #     elif dst_name is

                cfg.MODEL.NUM_CLASSES
                if ws_blob.shape != src_blobs[src_name].shape:
                    print("ws_blob.shape != src_blobs[src_name].shape")
                    print("-----------------" + str(ws_blob.shape) +
                          "---------------" + str(src_blobs[src_name].shape))
                    if dst_name in classes_layers_list_w or dst_name in classes_layers_list_b:
                        if dst_name in classes_layers_list_w:
                            target_shape = [
                                ws_blob.shape[0] - src_blobs[src_name].shape[0]
                            ]
                            target_shape.extend(list(ws_blob.shape[1:]))
                            init_weight = 0.0001 * np.random.randn(
                                *(tuple(target_shape)))
                            src_blobs[src_name] = np.append(
                                src_blobs[src_name], init_weight, axis=0)
                        else:
                            target_shape = [
                                ws_blob.shape[0] - src_blobs[src_name].shape[0]
                            ]
                            target_shape.extend(list(ws_blob.shape[1:]))
                            init_weight = -np.log(
                                (1 - 0.00001) / 0.00001) * np.ones(
                                    *(tuple(target_shape)))
                            src_blobs[src_name] = np.append(
                                src_blobs[src_name], init_weight, axis=0)

                        target_shape = [
                            ws_blob.shape[0] -
                            src_blobs[src_name + '_momentum'].shape[0]
                        ]
                        target_shape.extend(list(ws_blob.shape[1:]))
                        init_weight = np.zeros(target_shape)
                        src_blobs[src_name + '_momentum'] = np.append(
                            src_blobs[src_name + '_momentum'],
                            init_weight,
                            axis=0)

                # if ws_blob.shape != src_blobs[src_name].shape:
                #     print ("ws_blob.shape != src_blobs[src_name].shape")
                #     print ("-----------------" + str(ws_blob.shape) + "---------------" + str(src_blobs[src_name].shape))
                #     if dst_name in classes_layers_list_w or dst_name in classes_layers_list_b:
                #         if dst_name in classes_layers_list_w :
                #             src_blobs[src_name] = 0.0001 * np.random.randn(*(ws_blob.shape))
                #         else:
                #             src_blobs[src_name] = -np.log((1 - 0.00001) / 0.00001) * np.ones(*(ws_blob.shape))
                #
                #         src_blobs[src_name + '_momentum'] = np.zeros(ws_blob.shape)




                assert ws_blob.shape == src_blobs[src_name].shape, \
                    ('Workspace blob {} with shape {} does not match '
                     'weights file shape {}').format(
                        src_name,
                        ws_blob.shape,
                        src_blobs[src_name].shape)
            workspace.FeedBlob(
                dst_name, src_blobs[src_name].astype(np.float32, copy=False))
            if has_momentum:
                workspace.FeedBlob(
                    dst_name + '_momentum',
                    src_blobs[src_name + '_momentum'].astype(np.float32,
                                                             copy=False))

    # We preserve blobs that are in the weights file but not used by the current
    # model. We load these into CPU memory under the '__preserve__/' namescope.
    # These blobs will be stored when saving a model to a weights file. This
    # feature allows for alternating optimization of Faster R-CNN in which blobs
    # unused by one step can still be preserved forward and used to initialize
    # another step.
    for src_name in src_blobs.keys():
        if (src_name not in unscoped_param_names
                and not src_name.endswith('_momentum')
                and src_blobs[src_name] is not None):
            with c2_utils.CpuScope():
                workspace.FeedBlob('__preserve__/{:s}'.format(src_name),
                                   src_blobs[src_name])
                logger.debug(
                    '{:s} preserved in workspace (unused)'.format(src_name))
Esempio n. 15
0
def initialize_gpu_from_weights_file(model, weights_file, gpu_id=0):
    """Initialize a network with ops on a specific GPU.

    If you use CUDA_VISIBLE_DEVICES to target specific GPUs, Caffe2 will
    automatically map logical GPU ids (starting from 0) to the physical GPUs
    specified in CUDA_VISIBLE_DEVICES.
    """
    logger.info('Loading weights from: {}'.format(weights_file))
    ws_blobs = workspace.Blobs()
    with open(weights_file, 'rb') as f:
        try:
            src_blobs = pickle.load(
                f, encoding='latin1'
            )  # the pickles from the Model Zoo (as of January 2018) seem to be encoded with latin1; see also https://github.com/tflearn/tflearn/issues/57
        except TypeError:
            src_blobs = pickle.load(
                f)  # Python 2 has no "encoding" argument for pickle
    if 'cfg' in src_blobs:
        saved_cfg = load_cfg(src_blobs['cfg'])
        configure_bbox_reg_weights(model, saved_cfg)
    if 'blobs' in src_blobs:
        # Backwards compat--dictionary used to be only blobs, now they are
        # stored under the 'blobs' key
        src_blobs = src_blobs['blobs']
    # Initialize weights on GPU gpu_id only
    unscoped_param_names = OrderedDict()  # Print these out in model order
    for blob in model.params:
        unscoped_param_names[c2_utils.UnscopeName(str(blob))] = True
    with c2_utils.NamedCudaScope(gpu_id):
        for unscoped_param_name in list(unscoped_param_names.keys()):
            if (unscoped_param_name.find(']_') >= 0
                    and unscoped_param_name not in src_blobs):
                # Special case for sharing initialization from a pretrained
                # model:
                # If a blob named '_[xyz]_foo' is in model.params and not in
                # the initialization blob dictionary, then load source blob
                # 'foo' into destination blob '_[xyz]_foo'
                src_name = unscoped_param_name[unscoped_param_name.find(']_') +
                                               2:]
            else:
                src_name = unscoped_param_name
            if src_name not in src_blobs:
                logger.info('{:s} not found'.format(src_name))
                continue
            dst_name = core.ScopedName(unscoped_param_name)
            has_momentum = src_name + '_momentum' in src_blobs
            has_momentum_str = ' [+ momentum]' if has_momentum else ''
            logger.debug(
                '{:s}{:} loaded from weights file into {:s}: {}'.format(
                    src_name, has_momentum_str, dst_name,
                    src_blobs[src_name].shape))
            if dst_name in ws_blobs:
                # If the blob is already in the workspace, make sure that it
                # matches the shape of the loaded blob
                ws_blob = workspace.FetchBlob(dst_name)
                assert ws_blob.shape == src_blobs[src_name].shape, \
                    ('Workspace blob {} with shape {} does not match '
                     'weights file shape {}').format(
                        src_name,
                        ws_blob.shape,
                        src_blobs[src_name].shape)
            workspace.FeedBlob(
                dst_name, src_blobs[src_name].astype(np.float32, copy=False))
            if has_momentum:
                workspace.FeedBlob(
                    dst_name + '_momentum',
                    src_blobs[src_name + '_momentum'].astype(np.float32,
                                                             copy=False))

    # We preserve blobs that are in the weights file but not used by the current
    # model. We load these into CPU memory under the '__preserve__/' namescope.
    # These blobs will be stored when saving a model to a weights file. This
    # feature allows for alternating optimization of Faster R-CNN in which blobs
    # unused by one step can still be preserved forward and used to initialize
    # another step.
    for src_name in list(src_blobs.keys()):
        if (src_name not in unscoped_param_names
                and not src_name.endswith('_momentum')
                and src_blobs[src_name] is not None):
            with c2_utils.CpuScope():
                workspace.FeedBlob('__preserve__/{:s}'.format(src_name),
                                   src_blobs[src_name])
                logger.debug(
                    '{:s} preserved in workspace (unused)'.format(src_name))
Esempio n. 16
0
def initialize_gpu_from_weights_file(model, weights_file, gpu_id=0, preffix=''):
    """Initialize a network with ops on a specific GPU.

    If you use CUDA_VISIBLE_DEVICES to target specific GPUs, Caffe2 will
    automatically map logical GPU ids (starting from 0) to the physical GPUs
    specified in CUDA_VISIBLE_DEVICES.
    """
    logger.info('Loading weights from: {}'.format(weights_file))
    ws_blobs = workspace.Blobs()
    src_blobs = load_object(weights_file)

    if 'cfg' in src_blobs:
        saved_cfg = load_cfg(src_blobs['cfg'])
        configure_bbox_reg_weights(model, saved_cfg)
    if 'blobs' in src_blobs:
        # Backwards compat--dictionary used to be only blobs, now they are
        # stored under the 'blobs' key
        src_blobs = src_blobs['blobs']
    if preffix:
        for key in src_blobs.keys():
            src_blobs[preffix + "_" + key] = src_blobs.pop(key)
    # Initialize weights on GPU gpu_id only
    unscoped_param_names = OrderedDict()  # Print these out in model order
    for blob in model.params:
        unscoped_param_names[c2_utils.UnscopeName(str(blob))] = True
    with c2_utils.NamedCudaScope(gpu_id):
        for unscoped_param_name in unscoped_param_names.keys():
            if (unscoped_param_name.find(']_') >= 0 and
                    unscoped_param_name not in src_blobs):
                # Special case for sharing initialization from a pretrained
                # model:
                # If a blob named '_[xyz]_foo' is in model.params and not in
                # the initialization blob dictionary, then load source blob
                # 'foo' into destination blob '_[xyz]_foo'
                src_name = unscoped_param_name[
                    unscoped_param_name.find(']_') + 2:]
            else:
                src_name = unscoped_param_name
            if src_name not in src_blobs:
                msg = '{:s} not found)'.format(src_name)
                if len(preffix):
                    msg += '(prefix: {})'.format(preffix)
                logger.info(msg)
                continue
            dst_name = core.ScopedName(unscoped_param_name)
            has_momentum = src_name + '_momentum' in src_blobs
            has_momentum_str = ' [+ momentum]' if has_momentum else ''
            if dst_name in ws_blobs:
                if cfg.SKIP_EXISTING_WEIGHTS:
                    msg = 'Workspace blob {} already in workspace, skipping.'.format(
                            src_name)
                    logger.warning(msg)
                    continue
                # If the blob is already in the workspace, make sure that it
                # matches the shape of the loaded blob
                ws_blob = workspace.FetchBlob(dst_name)
                shapes_match = ws_blob.shape == src_blobs[src_name].shape
                if not shapes_match:
                    msg = ('Workspace blob {} with shape {} does not match '
                         'weights file shape {}').format(
                            src_name,
                            ws_blob.shape,
                            src_blobs[src_name].shape)
                    assert ws_blob.shape == src_blobs[src_name].shape, \
                        msg
            logger.info(
                '{:s}{:} loaded from weights file into {:s}: {}'.format(
                    src_name, has_momentum_str, dst_name, src_blobs[src_name]
                    .shape
                )
            )
            workspace.FeedBlob(
                dst_name,
                src_blobs[src_name].astype(np.float32, copy=False))
            if has_momentum:
                workspace.FeedBlob(
                    dst_name + '_momentum',
                    src_blobs[src_name + '_momentum'].astype(
                        np.float32, copy=False))

    # We preserve blobs that are in the weights file but not used by the current
    # model. We load these into CPU memory under the '__preserve__/' namescope.
    # These blobs will be stored when saving a model to a weights file. This
    # feature allows for alternating optimization of Faster R-CNN in which blobs
    # unused by one step can still be preserved forward and used to initialize
    # another step.
    for src_name in src_blobs.keys():
        if (src_name not in unscoped_param_names and
                not src_name.endswith('_momentum') and
                src_blobs[src_name] is not None):
            with c2_utils.CpuScope():
                workspace.FeedBlob(
                    '__preserve__/{:s}'.format(src_name), src_blobs[src_name])
                msg = '{:s} preserved in workspace (unused)'.format(src_name)
                if len(preffix):
                    msg += '(prefix: {})'.format(preffix)
                logger.info(msg)