def spatial_gn(model, blob_in, blob_out, dim_in, init_scale=1., init_bias=0., ScaleInitializer=None, BiasInitializer=None, RunningMeanInitializer=None, RunningVarianceInitializer=None, order="NCHW", **kwargs): ''' Group normalizes the input, cf. https://arxiv.org/abs/1803.08494. ''' blob_out = blob_out or model.net.NextName() # Input: input, scale, bias # Output: output, group_mean, group_inv_std # scale: initialize with init_scale (default 1.) # [recommendation: set init_scale = 0. in the last layer for each res block] # bias: initialize with init_bias (default 0.) if model.init_params: scale_init = ("ConstantFill", {'value': init_scale}) bias_init = ("ConstantFill", {'value': init_bias}) ScaleInitializer = initializers.update_initializer( ScaleInitializer, scale_init, ("ConstantFill", {}) ) BiasInitializer = initializers.update_initializer( BiasInitializer, bias_init, ("ConstantFill", {}) ) else: ScaleInitializer = initializers.ExternalInitializer() BiasInitializer = initializers.ExternalInitializer() scale = model.create_param( param_name=blob_out + '_s', shape=[dim_in], initializer=ScaleInitializer, tags=ParameterTags.WEIGHT ) bias = model.create_param( param_name=blob_out + '_b', shape=[dim_in], initializer=BiasInitializer, tags=ParameterTags.BIAS ) blob_outs = [blob_out, blob_out + "_mean", blob_out + "_std"] blob_outputs = model.net.GroupNorm( [blob_in, scale, bias], blob_outs, **kwargs) # Return the output return blob_outputs[0]
def moments_with_running_stats(model, blob_in, blob_out, dim_in, RunningMeanInitializer=None, RunningVarianceInitializer=None, order="NCHW", **kwargs): if model.init_params: rm_init = ("ConstantFill", {'value': 0.0}) riv_init = ("ConstantFill", {'value': 1.0}) RunningMeanInitializer = initializers.update_initializer( RunningMeanInitializer, rm_init, ("ConstantFill", {})) RunningVarianceInitializer = initializers.update_initializer( RunningVarianceInitializer, riv_init, ("ConstantFill", {})) else: RunningMeanInitializer = initializers.ExternalInitializer() RunningVarianceInitializer = initializers.ExternalInitializer() running_mean = model.create_param(param_name=blob_out + '_rm', shape=[dim_in], initializer=RunningMeanInitializer, tags=ParameterTags.COMPUTED_PARAM) # this is just running variance running_inv_var = model.create_param( param_name=blob_out + '_riv', shape=[dim_in], initializer=RunningVarianceInitializer, tags=ParameterTags.COMPUTED_PARAM) blob_outs = [blob_out + "_sm", blob_out + "_sv"] if order == 'NCHW': blob_outputs = model.net.Moments([blob_in], blob_outs, axes=[0, 2, 3], order=order, keepdims=False, **kwargs) elif order == 'NHWC': blob_outputs = model.net.Moments([blob_in], blob_outs, axes=[0, 1, 2], order=order, keepdims=False, **kwargs) return blob_outputs
def visit_initializer(self, initializer: d5.ops.OnnxTensor, network: Caffe2Network): with core.DeviceScope(self.device_option): network.train_init_net.AddExternalInput(initializer.name) network.test_net.AddExternalInput(initializer.name) network.train_model.create_param( initializer.name, initializer.dims, initializer=initializers.ExternalInitializer()) network.test_model.create_param( initializer.name, initializer.dims, initializer=initializers.ExternalInitializer()) network.workspace.FeedBlob(initializer.name, initializer.get_data(), self.device_option)
def LoadCustomSqueezenetModel(name, initNetPath, predictNetPath, deviceOps, argScope, learningRate=10**-2, freezeOpsUntil='conv10'): model = model_helper.ModelHelper(name, arg_scope=argScope, init_params=False) predNetPb = c2p2.NetDef() with open(predictNetPath, 'rb') as f: predNetPb.ParseFromString(f.read()) initNetPb = c2p2.NetDef() with open(initNetPath, 'rb') as f: initNetPb.ParseFromString(f.read()) opsFreezed = True for op in initNetPb.op: paramName = op.output[0] if opsFreezed and freezeOpsUntil in paramName: opsFreezed = False if not opsFreezed and paramName.endswith('_w') or paramName.endswith('_b'): tags = (ParameterTags.WEIGHT if paramName.endswith('_w') else ParameterTags.BIAS) model.create_param(param_name=paramName, shape=op.arg[0], initializer=initializers.ExternalInitializer(), tags=tags) # removing conv10_w and conv10_b from init net # initNetPb.op.pop(51) # initNetPb.op.pop(50) model.param_init_net = core.Net(initNetPb) model.net = core.Net(predNetPb) model.Squeeze('softmaxout', 'softmax', dims=[2, 3]) ScaffoldModelTrainingOperators(model, 'softmax', 'label', learningRate) return model, predNetPb
def _FC_or_packed_FC(model, op_call, blob_in, blob_out, dim_in, dim_out, weight_init=None, bias_init=None, WeightInitializer=None, BiasInitializer=None, enable_tensor_core=False, float16_compute=False, **kwargs): WeightInitializer = initializers.update_initializer( WeightInitializer, weight_init, ("XavierFill", {})) BiasInitializer = initializers.update_initializer(BiasInitializer, bias_init, ("ConstantFill", {})) if not model.init_params: WeightInitializer = initializers.ExternalInitializer() BiasInitializer = initializers.ExternalInitializer() blob_out = blob_out or model.net.NextName() bias_tags = [ParameterTags.BIAS] if 'freeze_bias' in kwargs: bias_tags.append(ParameterTags.COMPUTED_PARAM) weight = model.create_param(param_name=blob_out + '_w', shape=[dim_out, dim_in], initializer=WeightInitializer, tags=ParameterTags.WEIGHT) bias = model.create_param(param_name=blob_out + '_b', shape=[ dim_out, ], initializer=BiasInitializer, tags=bias_tags) # enable TensorCore by setting appropriate engine if enable_tensor_core: kwargs['engine'] = 'TENSORCORE' # Enable float 16 compute kernel (relevant for CUDA) if float16_compute: kwargs['float16_compute'] = True return op_call([blob_in, weight, bias], blob_out, **kwargs)
def feed_tensor(self, name, new_value, device_option=None, is_param=False): if is_param: self.train_model.create_param( name, new_value.shape, initializer=initializers.ExternalInitializer()) device_option = device_option if device_option else self.device_option return workspace.FeedBlob(name, new_value, device_option)
def TranslateAlexNetOrVGG19(name, classCount, initNetPath, predictNetPath, devOps, argScope, learningRate=10**-2): model = model_helper.ModelHelper(name, arg_scope=argScope, init_params=False) initNetPb = c2p2.NetDef() with open(initNetPath, 'rb') as f: initNetPb.ParseFromString(f.read()) for op in initNetPb.op: if op.output[0] == 'fc8_w': for arg in op.arg: if arg.name == 'shape': arg.ClearField('ints') arg.ints.extend([classCount, 4096]) elif arg.name == 'values': arg.ClearField('floats') arg.floats.extend(np.random.normal(0, .1, 4096*classCount)) # gaussian curve elif op.output[0] == 'fc8_b': for arg in op.arg: if arg.name == 'shape': arg.ClearField('ints') arg.ints.extend([classCount]) elif arg.name == 'values': arg.ClearField('floats') arg.floats.extend(np.zeros((classCount,)).astype(np.float32)) for op in initNetPb.op: if op.output[0] in ['fc8_w', 'fc8_b']: tag = (ParameterTags.BIAS if op.output[0].endswith('_b') else ParameterTags.WEIGHT) model.create_param(op.output[0], op.arg[0], initializers.ExternalInitializer(), tags=tag) model.param_init_net = core.Net(initNetPb) predNetPb = c2p2.NetDef() with open(predictNetPath, 'rb') as f: predNetPb.ParseFromString(f.read()) fixInPlaceOps(predNetPb.op) model.net = core.Net(predNetPb) ScaffoldModelTrainingOperators(model, 'prob', 'label', learningRate, devOps) return model, predNetPb
def TranslateSqueezenetModel(name, classCount, initNetPath, predictNetPath, devOps, argScope, learningRate=10**-2): predNetPb = c2p2.NetDef() with open(predictNetPath, 'rb') as f: predNetPb.ParseFromString(f.read()) initNetPb = c2p2.NetDef() with open(initNetPath, 'rb') as f: initNetPb.ParseFromString(f.read()) model = model_helper.ModelHelper(name, arg_scope=argScope, init_params=False) for op in initNetPb.op: if op.output[0] in ['conv10_w', 'conv10_b']: tag = (ParameterTags.WEIGHT if op.output[0].endswith('_w') else ParameterTags.BIAS) # create params inside model model.create_param(op.output[0], op.arg[0], initializers.ExternalInitializer(), tags=tag) # remove conv10_w and conv10_b ops from protobuf - ids -> 50,51 # these ops were added to the model below -> XavierFill, ConstantFill initNetPb.op.pop(50) initNetPb.op.pop(50) fixInPlaceOps(predNetPb.op) model.net = core.Net(predNetPb) model.Squeeze('softmaxout', 'softmax', dims=[2, 3]) model.param_init_net = core.Net(initNetPb) model.param_init_net.XavierFill([], 'conv10_w', shape=[classCount, 512, 1, 1]) model.param_init_net.ConstantFill([], 'conv10_b', shape=[classCount]) ScaffoldModelTrainingOperators(model, 'softmax', 'label', learningRate) # InscribeDeviceOptionsToModel(model, devOps) return model, core.Net(predNetPb)
# Populate the model obj with the predict net stuff, which defines the structure of the model predict_net_proto = caffe2_pb2.NetDef() with open(PREDICT_NET, "rb") as f: predict_net_proto.ParseFromString(f.read()) tmp_predict_net = core.Net(predict_net_proto) #test_model.net = test_model.net.AppendNet(tmp_predict_net) test_model.net = tmp_predict_net ##### Externally initialize params so we can extract gradients for i,op in enumerate(init_net_proto.op): param_name = op.output[0] if param_name != 'data': assert(op.arg[0].name == "shape") tags = (ParameterTags.WEIGHT if param_name.endswith("_w") else ParameterTags.BIAS) test_model.create_param(param_name=op.output[0], shape=op.arg[0].ints, initializer=initializers.ExternalInitializer(), tags=tags) ##### Add the "training operators" to the model xent = test_model.LabelCrossEntropy(['softmax', 'label'], 'xent') loss = test_model.AveragedLoss(xent, 'loss') test_model.AddGradientOperators([loss]) ################################################################################## ### Run ################################################################################## # Initialize Dataset Object test_dataset = jdh.Jester_Dataset(dictionary_file=TEST_DICT,seq_size=10) # Prime the workspace with some data so we can run init net once
def spatial_bn(model, blob_in, blob_out, dim_in, init_scale=1., init_bias=0., ScaleInitializer=None, BiasInitializer=None, RunningMeanInitializer=None, RunningVarianceInitializer=None, order="NCHW", **kwargs): blob_out = blob_out or model.net.NextName() # Input: input, scale, bias, est_mean, est_inv_var # Output: output, running_mean, running_inv_var, saved_mean, # saved_inv_var # scale: initialize with init_scale (default 1.) # bias: initialize with init_bias (default 0.) # est mean: zero # est var: ones if model.init_params: scale_init = ("ConstantFill", {'value': init_scale}) bias_init = ("ConstantFill", {'value': init_bias}) rm_init = ("ConstantFill", {'value': 0.0}) riv_init = ("ConstantFill", {'value': 1.0}) ScaleInitializer = initializers.update_initializer( ScaleInitializer, scale_init, ("ConstantFill", {}) ) BiasInitializer = initializers.update_initializer( BiasInitializer, bias_init, ("ConstantFill", {}) ) RunningMeanInitializer = initializers.update_initializer( RunningMeanInitializer, rm_init, ("ConstantFill", {}) ) RunningVarianceInitializer = initializers.update_initializer( RunningVarianceInitializer, riv_init, ("ConstantFill", {}) ) else: ScaleInitializer = initializers.ExternalInitializer() BiasInitializer = initializers.ExternalInitializer() RunningMeanInitializer = initializers.ExternalInitializer() RunningVarianceInitializer = initializers.ExternalInitializer() scale = model.create_param( param_name=blob_out + '_s', shape=[dim_in], initializer=ScaleInitializer, tags=ParameterTags.WEIGHT ) bias = model.create_param( param_name=blob_out + '_b', shape=[dim_in], initializer=BiasInitializer, tags=ParameterTags.BIAS ) running_mean = model.create_param( param_name=blob_out + '_rm', shape=[dim_in], initializer=RunningMeanInitializer, tags=ParameterTags.COMPUTED_PARAM ) running_inv_var = model.create_param( param_name=blob_out + '_riv', shape=[dim_in], initializer=RunningVarianceInitializer, tags=ParameterTags.COMPUTED_PARAM ) blob_outs = [blob_out, running_mean, running_inv_var, blob_out + "_sm", blob_out + "_siv"] if 'is_test' in kwargs and kwargs['is_test']: blob_outputs = model.net.SpatialBN( [blob_in, scale, bias, blob_outs[1], blob_outs[2]], [blob_out], order=order, **kwargs) return blob_outputs else: blob_outputs = model.net.SpatialBN( [blob_in, scale, bias, blob_outs[1], blob_outs[2]], blob_outs, order=order, **kwargs) # Return the output return blob_outputs[0]
def _ConvBase(model, is_nd, blob_in, blob_out, dim_in, dim_out, kernel, weight_init=None, bias_init=None, WeightInitializer=None, BiasInitializer=None, group=1, transform_inputs=None, use_cudnn=False, order="NCHW", cudnn_exhaustive_search=False, ws_nbytes_limit=None, **kwargs): kernels = [] if is_nd: if not isinstance(kernel, list): kernels = [kernel] else: kernels = kernel else: if isinstance(kernel, list): assert len(kernel) == 2, "Conv support only a 2D kernel." kernels = kernel else: kernels = [kernel] * 2 requested_engine = kwargs.get('engine') if requested_engine is not None: if use_cudnn and requested_engine != 'CUDNN': raise ValueError( 'When use_cudnn=True, the only engine you can specify is ' '"CUDNN"') elif not use_cudnn and requested_engine == 'CUDNN': raise ValueError( 'When use_cudnn=False, the only engine you can specify is ' '""') if use_cudnn: kwargs['engine'] = 'CUDNN' kwargs['exhaustive_search'] = cudnn_exhaustive_search if ws_nbytes_limit: kwargs['ws_nbytes_limit'] = ws_nbytes_limit use_bias =\ False if ("no_bias" in kwargs and kwargs["no_bias"]) else True blob_out = blob_out or model.net.NextName() weight_shape = [dim_out] if order == "NCHW": weight_shape.append(int(dim_in / group)) weight_shape.extend(kernels) else: weight_shape.extend(kernels) weight_shape.append(int(dim_in / group)) WeightInitializer = initializers.update_initializer( WeightInitializer, weight_init, ("XavierFill", {})) BiasInitializer = initializers.update_initializer(BiasInitializer, bias_init, ("ConstantFill", {})) if not model.init_params: WeightInitializer = initializers.ExternalInitializer() BiasInitializer = initializers.ExternalInitializer() weight = model.create_param(param_name=blob_out + '_w', shape=weight_shape, initializer=WeightInitializer, tags=ParameterTags.WEIGHT) if use_bias: bias = model.create_param(param_name=blob_out + '_b', shape=[ dim_out, ], initializer=BiasInitializer, tags=ParameterTags.BIAS) if use_bias: inputs = [blob_in, weight, bias] else: inputs = [blob_in, weight] if transform_inputs is not None: transform_inputs(model, blob_out, inputs) # For the operator, we no longer need to provide the no_bias field # because it can automatically figure this out from the number of # inputs. if 'no_bias' in kwargs: del kwargs['no_bias'] if group != 1: kwargs['group'] = group if is_nd: return model.net.Conv(inputs, blob_out, kernels=kernels, order=order, **kwargs) else: if isinstance(kernel, list): return model.net.Conv(inputs, blob_out, kernel_h=kernel[0], kernel_w=kernel[1], order=order, **kwargs) else: return model.net.Conv(inputs, blob_out, kernel=kernel, order=order, **kwargs)
init_net_proto.ParseFromString(f.read()) # Define the parameters to learn in the model # Since we are no longer using all 1000 classes of imagenet, we have to reset these two layers # to have the output dimensions = # of classes in UCF11 = 11 classes params_to_learn = ['loss3/classifier_w', 'loss3/classifier_b'] # Iterate through all of the ops in the init_net for op in init_net_proto.op: param_name = op.output[0] # If the current op is the parameter we want to learn if param_name in params_to_learn: print "here" # Set tags to WEIGHT or BIAS depending on what conv10_w or conv10_b tags = (ParameterTags.WEIGHT if param_name.endswith("_w") else ParameterTags.BIAS) # (?) - why is the shape the same as op.arg[0], shouldnt we change the dimension here? my_model.create_param(param_name=param_name, shape=op.arg[0], initializer=initializers.ExternalInitializer(), tags=tags) # Print the params to learn in the ops of the init net. When you find one that is in # params_to_learn, mark the index and we will delete it later. indx_to_remove = [] for i in range(len(init_net_proto.op)): print "\n******************************" print "OP: ", i print "******************************" print "OP_NAME: ",init_net_proto.op[i].name print "OP_INPUT: ",init_net_proto.op[i].input print "OP_OUTPUT: ",init_net_proto.op[i].output if init_net_proto.op[i].output[0] in params_to_learn: indx_to_remove.append(i) print "OP_SHAPE: ",init_net_proto.op[i].arg[0]
params_to_learn = ['conv10_w', 'conv10_b'] # Iterate through all of the ops in the init_net for op in init_net_proto.op: param_name = op.output[0] # If the current op is the parameter we want to learn [i.e. 'conv10_w', 'conv10_b'] if param_name in params_to_learn: # Set tags to WEIGHT or BIAS depending on what conv10_w or conv10_b tags = (ParameterTags.WEIGHT if param_name.endswith("_w") else ParameterTags.BIAS) # (?) - what is this doing? - what is a parameter in a model? # since we are going to delete these two, we must now create new versions of these # and specify that they will be initialized externally # (?) - why is the shape the same as op.arg[0], shouldnt we change the dimension here? my_model.create_param(param_name=param_name, shape=op.arg[0], initializer=initializers.ExternalInitializer(), tags=tags) # Remove conv10_w, conv10_b initializers at (50,51) # To get the numbers, 50 and 51, must look at the ops in the init_net model. # If we print the ops from the pb using the following: # $ python print_pb_verbose.py ../../models/squeezenet/init_net.pb # We see that op[50] outputs 'conv10_w' and op[51] outputs 'conv10_b', thus, these # are the ops we desire to remove and update # Print the info for the conv10 ops we are about to remove. This verifies that # ops 50 and 51 in the init_net are in-fact conv10_w and conv10_b and it also # shows us the shape information, which we must change from 1000 to 11 print "The ops we are about to remove and replace..." for i in [50, 51]: print "\n******************************"
def DeformableConv(self, blob_in, offset, prefix, dim_in, dim_out, kernel, stride=1, pad=1, weight_init=None, bias_init=None, WeightInitializer=None, BiasInitializer=None, dilation=1, no_bias=1, group=1, deformable_group=4, order="NCHW", cudnn_exhaustive_search=False, **kwargs): kernels = [] if isinstance(kernel, list): assert len(kernel) == 2, "Conv support only a 2D kernel." kernels = kernel else: kernels = [kernel] * 2 blob_out = prefix weight_shape = [dim_out] if order == "NCHW": weight_shape.append(int(dim_in / group)) weight_shape.extend(kernels) else: weight_shape.extend(kernels) weight_shape.append(int(dim_in / group)) WeightInitializer = initializers.update_initializer( WeightInitializer, weight_init, ("XavierFill", {})) BiasInitializer = initializers.update_initializer( BiasInitializer, bias_init, ("ConstantFill", {})) if not self.init_params: WeightInitializer = initializers.ExternalInitializer() BiasInitializer = initializers.ExternalInitializer() weight = self.create_param(param_name=blob_out + '_w', shape=weight_shape, initializer=WeightInitializer, tags=ParameterTags.WEIGHT) if not no_bias: bias = self.create_param(param_name=blob_out + '_b', shape=[ dim_out, ], initializer=BiasInitializer, tags=ParameterTags.BIAS) if no_bias: inputs = [blob_in, offset, weight] else: inputs = [blob_in, offset, weight, bias] if isinstance(kernel, list): return self.net.DeformConv(inputs, blob_out, kernel_h=kernel[0], kernel_w=kernel[1], pad=pad, stride=stride, dilation=dilation, order=order, deformable_group=deformable_group, use_cudnn=self.use_cudnn) else: return self.net.DeformConv(inputs, blob_out, kernel=kernel, pad=pad, stride=stride, dilation=dilation, order=order, deformable_group=deformable_group, use_cudnn=self.use_cudnn)