def save_model_to_weights_file(weights_file, model): """Stash model weights in a dictionary and pickle them to a file. We map GPU device scoped names to unscoped names (e.g., 'gpu_0/conv1_w' -> 'conv1_w'). """ logger.info('Saving parameters and momentum to {}'.format( os.path.abspath(weights_file))) blobs = {} # Save all parameters for param in model.params: scoped_name = param._name unscoped_name = c2_utils.UnscopeGPUName(scoped_name) if unscoped_name not in blobs: logger.debug(' {:s} -> {:s}'.format(scoped_name, unscoped_name)) blobs[unscoped_name] = workspace.FetchBlob(scoped_name) # Save momentum for param in model.TrainableParams(): scoped_name = param._name + '_momentum' unscoped_name = c2_utils.UnscopeGPUName(scoped_name) if unscoped_name not in blobs: logger.debug(' {:s} -> {:s}'.format(scoped_name, unscoped_name)) blobs[unscoped_name] = workspace.FetchBlob(scoped_name) # Save preserved blobs for scoped_name in workspace.Blobs(): if scoped_name.startswith('__preserve__/'): unscoped_name = c2_utils.UnscopeGPUName(scoped_name) if unscoped_name not in blobs: logger.debug(' {:s} -> {:s} (preserved)'.format( scoped_name, unscoped_name)) blobs[unscoped_name] = workspace.FetchBlob(scoped_name) cfg_yaml = yaml.dump(cfg) save_object(dict(blobs=blobs, cfg=cfg_yaml), weights_file)
def print_net(model, namescope='gpu_0'): """Print the model network.""" logger.info('Printing model: {}'.format(model.net.Name())) op_list = model.net.Proto().op for op in op_list: input_name = op.input # For simplicity: only print the first output # Not recommended if there are split layers output_name = str(op.output[0]) op_type = op.type op_name = op.name if namescope is None or output_name.startswith(namescope): # Only print the forward pass network if output_name.find('grad') >= 0 or output_name.find('__m') >= 0: continue try: # Under some conditions (e.g., dynamic memory optimization) # it is possible that the network frees some blobs when they are # no longer needed. Handle this case... output_shape = workspace.FetchBlob(output_name).shape except BaseException: output_shape = '<unknown>' first_blob = True op_label = op_type + (op_name if op_name == '' else ':' + op_name) suffix = ' ------- (op: {})'.format(op_label) for j in range(len(input_name)): if input_name[j] in model.params: continue input_blob = workspace.FetchBlob(input_name[j]) if isinstance(input_blob, np.ndarray): input_shape = input_blob.shape logger.info('{:28s}: {:20s} => {:28s}: {:20s}{}'.format( c2_utils.UnscopeGPUName(str(input_name[j])), '{}'.format(input_shape), c2_utils.UnscopeGPUName(str(output_name)), '{}'.format(output_shape), suffix)) if first_blob: first_blob = False suffix = ' ------|' logger.info('End of model: {}'.format(model.net.Name()))
def _ctx_roi_align(model, ctx, name_scope): ctx_name = c2_utils.UnscopeGPUName(ctx._name) ctx_crop = model.RoIFeatureTransform( ctx_name, name_scope + '/ctx_crop', blob_rois='rois', method=cfg.FAST_RCNN.ROI_XFORM_METHOD, resolution=cfg.MEM.CROP_SIZE, sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO, spatial_scale=cfg.MEM.SCALE) return ctx_crop
def _norm_roi_align(model, norm): norm_name = c2_utils.UnscopeGPUName(norm._name) norm_crop = model.RoIFeatureTransform( norm_name, 'norm_crop', blob_rois='rois', method=cfg.FAST_RCNN.ROI_XFORM_METHOD, resolution=cfg.MEM.CROP_SIZE, sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO, spatial_scale=cfg.MEM.SCALE) return norm_crop
def add_loss(model, cls_score, loss_scale=1.0): cls_score_name = c2_utils.UnscopeGPUName(cls_score._name) cls_prob_name = cls_score_name.replace('cls_score', 'cls_prob') loss_cls_name = cls_score_name.replace('cls_score', 'loss_cls') cls_prob, loss_cls = model.net.SoftmaxWithLoss( [cls_score, 'labels_int32'], [cls_prob_name, loss_cls_name], scale=model.GetLossScale() * loss_scale) loss_gradients = blob_utils.get_loss_gradients(model, [loss_cls]) model.AddLosses([loss_cls]) accuracy_cls_name = cls_score_name.replace('cls_score', 'accuracy_cls') model.Accuracy([cls_prob_name, 'labels_int32'], accuracy_cls_name) model.AddMetrics(accuracy_cls_name) return loss_gradients, cls_prob
def _add_roi_2mlp_head(model, mem_cls, name_scope, reuse): init_weight = ('GaussianFill', {'std': cfg.MEM.STD}) init_bias = ('ConstantFill', {'value': 0.}) if cfg.MEM.AT_MIN: # minimal design min_iter = 1 else: min_iter = 0 roi_size = cfg.MEM.CROP_SIZE ctx_crop = _ctx_roi_align(model, mem_cls, name_scope) bl_in = ctx_crop dim_in = cfg.MEM.C * roi_size * roi_size dim_out = cfg.MEM.FC_C for nf in range(cfg.MEM.FC_L): suffix = '/fc{}'.format(nf + 6) if not reuse: bl_out = model.FC(bl_in, name_scope + suffix, dim_in, dim_out, weight_init=init_weight, bias_init=init_bias) else: bl_out = model.FCShared( bl_in, name_scope + suffix, dim_in, dim_out, weight='mem_%02d/fc%d_w' % (min_iter, nf + 6), bias='mem_%02d/fc%d_b' % (min_iter, nf + 6)) bl_in = model.Relu(bl_out, bl_out) dim_in = dim_out return c2_utils.UnscopeGPUName(bl_in._name), dim_out
def _single_gpu_build_func(model): """Builds the model on a single GPU. Can be called in a loop over GPUs with name and device scoping to create a data parallel model.""" blob_conv, dim_conv, spatial_scale_conv = add_conv_body_func(model) if not model.train: model.conv_body_net = model.net.Clone('conv_body_net') if cfg.FPN.FPN_ON: # After adding the RPN head, restrict FPN blobs and scales to # those used in the RoI heads blob_conv, spatial_scale_conv = _narrow_to_fpn_roi_levels( blob_conv, spatial_scale_conv) # break the fast rcnn head down blob_frcn, dim_frcn = add_roi_box_head_func(model, blob_conv, dim_conv, spatial_scale_conv) fast_rcnn_heads.add_fast_rcnn_outputs_class_only( model, blob_frcn, dim_frcn) head_loss_gradients = {} if model.train: head_loss_gradients[ 'base'] = fast_rcnn_heads.add_fast_rcnn_losses_class_only( model) image_blob_name = core.ScopedName('data') rois_name = core.ScopedName('rois') if 'gpu_0' in rois_name: model.AddSummaryImageBoxes(image_blob_name, rois_name) if cfg.FPN.FPN_ON: blob_conv = [ model.StopGradient(bc, c2_utils.UnscopeGPUName(bc._name + '_nb')) for bc in blob_conv ] else: blob_conv = model.StopGradient( blob_conv, c2_utils.UnscopeGPUName(blob_conv._name + '_nb')) cls_score = u'cls_score' cls_score_base = model.StopGradient(cls_score, cls_score + '_nb') cls_prob = u'cls_prob' cls_prob_base = core.ScopedBlobReference(cls_prob) # cls_prob_base = model.StopGradient(cls_prob, cls_prob + '_nb') mem = region_memory_model.init(model) cls_score_list = [cls_score_base] norm = region_memory_model.init_normalizer(model) if 'gpu_0' in mem._name: model.AddSummaryMem(mem._name) if cfg.MEM.AT_MIN: cls_attend_list = [] else: cls_attend_list = [ region_memory_model.init_attenton_prediction(model, mem) ] cls_score = cls_score_base cls_prob = cls_prob_base reuse = False conv_crop = region_memory_model._roi_align(model, blob_conv, spatial_scale_conv) conv_crop_nb = model.StopGradient( conv_crop, c2_utils.UnscopeGPUName(conv_crop._name + '_nb')) norm_crop = region_memory_model._norm_roi_align(model, norm) norm_diff = model.InvRoIAlign(core.ScopedBlobReference('rois'), norm, norm_crop) if 'gpu_0' in norm_diff._name: model.AddSummaryMem(norm_diff._name) for iter in range(1, cfg.MEM.ITER + 1): mem = region_memory_model.update(model, mem, norm_diff, conv_crop_nb, dim_conv, cls_score, cls_prob, iter, reuse=reuse) if 'gpu_0' in mem._name: model.AddSummaryMem(mem._name) # for testing, return cls_prob cls_score, cls_prob, cls_attend = region_memory_model.prediction( model, mem, cls_score_base, iter, reuse=reuse) # for training, it will get cls_prob when getting the loss if model.train: name = 'mem_%02d' % iter head_loss_gradients[ name], cls_prob = region_memory_model.add_loss( model, cls_score, cfg.MEM.WEIGHT) cls_score = model.StopGradient( cls_score, c2_utils.UnscopeGPUName(cls_score._name + '_nb')) # cls_prob = model.StopGradient(cls_prob, c2_utils.UnscopeGPUName(cls_prob._name + '_nb')) cls_score_list.append(cls_score) cls_attend_list.append(cls_attend) reuse = True cls_score_final = region_memory_model.combine(model, cls_score_list, cls_attend_list) if model.train: head_loss_gradients[ 'final'], cls_prob_final = region_memory_model.add_loss( model, cls_score_final, cfg.MEM.WEIGHT_FINAL) loss_gradients = {} for lg in head_loss_gradients.values(): if lg is not None: loss_gradients.update(lg) return loss_gradients else: cls_prob_final = region_memory_model.add_final_prob( model, cls_score_final) return None
def initialize_gpu_from_weights_file(model, weights_file, gpu_id=0): """Initialize a network with ops on a specific GPU. If you use CUDA_VISIBLE_DEVICES to target specific GPUs, Caffe2 will automatically map logical GPU ids (starting from 0) to the physical GPUs specified in CUDA_VISIBLE_DEVICES. """ logger.info('Loading weights from: {}'.format(weights_file)) ws_blobs = workspace.Blobs() with open(weights_file, 'r') as f: src_blobs = pickle.load(f) if 'cfg' in src_blobs: saved_cfg = yaml.load(src_blobs['cfg']) configure_bbox_reg_weights(model, saved_cfg) if 'blobs' in src_blobs: # Backwards compat--dictionary used to be only blobs, now they are # stored under the 'blobs' key src_blobs = src_blobs['blobs'] # Initialize weights on GPU gpu_id only unscoped_param_names = OrderedDict() # Print these out in model order for blob in model.params: unscoped_param_names[c2_utils.UnscopeGPUName(blob._name)] = True with c2_utils.NamedCudaScope(gpu_id): for unscoped_param_name in unscoped_param_names.keys(): if (unscoped_param_name.find(']_') >= 0 and unscoped_param_name not in src_blobs): # Special case for sharing initialization from a pretrained # model: # If a blob named '_[xyz]_foo' is in model.params and not in # the initialization blob dictionary, then load source blob # 'foo' into destination blob '_[xyz]_foo' src_name = unscoped_param_name[unscoped_param_name.find(']_') + 2:] else: src_name = unscoped_param_name if src_name not in src_blobs: logger.info('{:s} not found'.format(src_name)) continue dst_name = core.ScopedName(unscoped_param_name) has_momentum = src_name + '_momentum' in src_blobs has_momentum_str = ' [+ momentum]' if has_momentum else '' logger.debug( '{:s}{:} loaded from weights file into {:s}: {}'.format( src_name, has_momentum_str, dst_name, src_blobs[src_name].shape)) if dst_name in ws_blobs: # If the blob is already in the workspace, make sure that it # matches the shape of the loaded blob ws_blob = workspace.FetchBlob(dst_name) assert ws_blob.shape == src_blobs[src_name].shape, \ ('Workspace blob {} with shape {} does not match ' 'weights file shape {}').format( src_name, ws_blob.shape, src_blobs[src_name].shape) workspace.FeedBlob( dst_name, src_blobs[src_name].astype(np.float32, copy=False)) if has_momentum: workspace.FeedBlob( dst_name + '_momentum', src_blobs[src_name + '_momentum'].astype(np.float32, copy=False)) # We preserve blobs that are in the weights file but not used by the current # model. We load these into CPU memory under the '__preserve__/' namescope. # These blobs will be stored when saving a model to a weights file. This # feature allows for alternating optimization of Faster R-CNN in which blobs # unused by one step can still be preserved forward and used to initialize # another step. for src_name in src_blobs.keys(): if (src_name not in unscoped_param_names and not src_name.endswith('_momentum') and src_blobs[src_name] is not None): with c2_utils.CpuScope(): workspace.FeedBlob('__preserve__/{:s}'.format(src_name), src_blobs[src_name]) logger.debug( '{:s} preserved in workspace (unused)'.format(src_name))
def _affine(model, conv_crop, dim, logits, name_scope, reuse): init_weight = ('GaussianFill', {'std': cfg.MEM.IN_STD}) init_bias_scale = ('ConstantFill', {'value': cfg.MEM.IN_R}) init_bias_offset = ('ConstantFill', {'value': 0.}) cls_pred_dim = model.num_classes scaler_name = name_scope + '/affine/scaler' offset_name = name_scope + '/affine/offset' if not reuse: scaler = model.FC(logits, scaler_name, cls_pred_dim, dim, weight_init=init_weight, bias_init=init_bias_scale) offset = model.FC(logits, offset_name, cls_pred_dim, dim, weight_init=init_weight, bias_init=init_bias_offset) else: scaler_weight_name = 'mem_01/affine/scaler_w' scaler_bias_name = 'mem_01/affine/scaler_b' scaler = model.FCShared(logits, scaler_name, cls_pred_dim, dim, weight=scaler_weight_name, bias=scaler_bias_name) offset_weight_name = 'mem_01/affine/offset_w' offset_bias_name = 'mem_01/affine/offset_b' offset = model.FCShared(logits, offset_name, cls_pred_dim, dim, weight=offset_weight_name, bias=offset_bias_name) if cfg.MEM.IN_ACT == 'tanh': scaler = model.Tanh(scaler, scaler) elif cfg.MEM.IN_ACT == 'none': pass else: raise NotImplementedError # then try to combine them together scaled_name = name_scope + '/affine/scaled' blobs_in = [c2_utils.UnscopeGPUName(conv_crop._name), scaler_name] blobs_out = [scaled_name] scaled = model.MulConvFC(blobs_in, blobs_out) result_name = name_scope + '/affine/result' blobs_in = [c2_utils.UnscopeGPUName(scaled._name), offset_name] blobs_out = [result_name] result = model.SumConvFC(blobs_in, blobs_out) result = model.Relu(result, result) if 'gpu_0' in result._name: model.AddSummaryHistogram(conv_crop._name) model.AddSummaryHistogram(logits._name) model.AddSummaryHistogram(scaler._name) model.AddSummaryHistogram(offset._name) model.AddSummaryHistogram(result._name) return result
def _inplace_update(model, mem_crop, input_crop, dim, name_scope, reuse): input_init = ('GaussianFill', {'std': cfg.MEM.U_STD}) mem_init = ('GaussianFill', {'std': cfg.MEM.U_STD * cfg.MEM.FM_R}) input_gate_init = ('GaussianFill', {'std': cfg.MEM.U_STD / cfg.MEM.VG_R}) mem_gate_init = ('GaussianFill', { 'std': cfg.MEM.U_STD * cfg.MEM.FM_R / cfg.MEM.VG_R }) bias_init = ('ConstantFill', {'value': 0.}) mconv = cfg.MEM.CONV mpad = (mconv - 1) // 2 p_input_name = name_scope + '/inplace/input_p' p_reset_name = name_scope + '/inplace/reset_p' p_update_name = name_scope + '/inplace/update_p' m_input_name = name_scope + '/inplace/input_m' m_reset_name = name_scope + '/inplace/reset_m' m_update_name = name_scope + '/inplace/update_m' input_name = name_scope + '/inplace/input' reset_name = name_scope + '/inplace/reset' update_name = name_scope + '/inplace/update' mem_crop_name = c2_utils.UnscopeGPUName(mem_crop._name) mult_mem_name = name_scope + '/inplace/mult_mem' next_crop_raw_name = name_scope + '/next_crop_raw' next_crop_name = name_scope + '/next_crop' if not reuse: p_input = model.Conv(input_crop, p_input_name, dim, cfg.MEM.C, mconv, stride=1, pad=mpad, weight_init=input_init, bias_init=bias_init) p_reset = model.Conv(input_crop, p_reset_name, dim, 1, mconv, stride=1, pad=mpad, weight_init=input_gate_init, bias_init=bias_init) p_update = model.Conv(input_crop, p_update_name, dim, 1, mconv, stride=1, pad=mpad, weight_init=input_gate_init, bias_init=bias_init) m_reset = model.Conv(mem_crop, m_reset_name, cfg.MEM.C, 1, mconv, stride=1, pad=mpad, weight_init=mem_gate_init, no_bias=True) m_update = model.Conv(mem_crop, m_update_name, cfg.MEM.C, 1, mconv, stride=1, pad=mpad, weight_init=mem_gate_init, no_bias=True) reset = model.net.Sum([p_reset_name, m_reset_name], reset_name) reset = model.net.Sigmoid(reset, reset_name) blobs_in = [mem_crop_name, reset_name] blobs_out = [mult_mem_name] mult_mem = model.MulConvGate(blobs_in, blobs_out) m_input = model.Conv(mult_mem, m_input_name, cfg.MEM.C, cfg.MEM.C, mconv, stride=1, pad=mpad, weight_init=mem_init, no_bias=True) else: p_input_weight_name = 'mem_01/inplace/input_p_w' p_input_bias_name = 'mem_01/inplace/input_p_b' p_reset_weight_name = 'mem_01/inplace/reset_p_w' p_reset_bias_name = 'mem_01/inplace/reset_p_b' p_update_weight_name = 'mem_01/inplace/update_p_w' p_update_bias_name = 'mem_01/inplace/update_p_b' m_input_weight_name = 'mem_01/inplace/input_m_w' m_reset_weight_name = 'mem_01/inplace/reset_m_w' m_update_weight_name = 'mem_01/inplace/update_m_w' p_input = model.ConvShared(input_crop, p_input_name, dim, cfg.MEM.C, mconv, stride=1, pad=mpad, weight=p_input_weight_name, bias=p_input_bias_name) p_reset = model.ConvShared(input_crop, p_reset_name, dim, 1, mconv, stride=1, pad=mpad, weight=p_reset_weight_name, bias=p_reset_bias_name) p_update = model.ConvShared(input_crop, p_update_name, dim, 1, mconv, stride=1, pad=mpad, weight=p_update_weight_name, bias=p_update_bias_name) m_reset = model.ConvShared(mem_crop, m_reset_name, cfg.MEM.C, 1, mconv, stride=1, pad=mpad, weight=m_reset_weight_name, no_bias=True) m_update = model.ConvShared(mem_crop, m_update_name, cfg.MEM.C, 1, mconv, stride=1, pad=mpad, weight=m_update_weight_name, no_bias=True) reset = model.net.Sum([p_reset_name, m_reset_name], reset_name) reset = model.net.Sigmoid(reset, reset_name) blobs_in = [mem_crop_name, reset_name] blobs_out = [mult_mem_name] mult_mem = model.MulConvGate(blobs_in, blobs_out) m_input = model.ConvShared(mult_mem, m_input_name, cfg.MEM.C, cfg.MEM.C, mconv, stride=1, pad=mpad, weight=m_input_weight_name, no_bias=True) input = model.net.Sum([p_input_name, m_input_name], input_name) if cfg.MEM.ACT == 'tanh': input = model.Tanh(input, input) elif cfg.MEM.ACT == 'relu': input = model.Relu(input, input) else: raise NotImplementedError update = model.net.Sum([p_update_name, m_update_name], update_name) update = model.net.Sigmoid(update, update_name) next_crop_raw = model.net.Sub([input_name, mem_crop_name], next_crop_raw_name) blobs_in = [next_crop_raw_name, update_name] blobs_out = [next_crop_name] next_crop = model.MulConvGate(blobs_in, blobs_out) if 'gpu_0' in p_input._name: model.AddSummaryHistogram(p_input._name) model.AddSummaryHistogram(m_input._name) model.AddSummaryHistogram(p_reset._name) model.AddSummaryHistogram(m_reset._name) model.AddSummaryHistogram(p_update._name) model.AddSummaryHistogram(m_update._name) model.AddSummaryHistogram(input._name) model.AddSummaryHistogram(reset._name) model.AddSummaryHistogram(update._name) model.AddSummaryHistogram(mem_crop._name) model.AddSummaryHistogram(next_crop_raw._name) return next_crop