def InferTensorRunAndCompare(self, model): ''' Runs shape inference, and then the model to check that the inferred shapes agree with the actual ones ''' (shapes, types) = workspace.InferShapesAndTypes( [model.param_init_net, model.net], ) # .. Create net workspace.RunNetOnce(model.param_init_net) workspace.CreateNet(model.net, True) workspace.RunNet(model.Proto().name) # ... and then check the shapes mismatch correct_shapes = {} correct_types = {} for b in workspace.Blobs(): arr = workspace.FetchBlob(b) correct_shapes[b] = arr.shape if type(arr) is np.ndarray: if arr.dtype == np.dtype('float32'): correct_types[b] = caffe2_pb2.TensorProto.FLOAT elif arr.dtype == np.dtype('int32'): correct_types[b] = caffe2_pb2.TensorProto.INT32 # BYTE # STRING elif arr.dtype == np.dtype('bool'): correct_types[b] = caffe2_pb2.TensorProto.BOOL elif arr.dtype == np.dtype('uint8'): correct_types[b] = caffe2_pb2.TensorProto.UINT8 elif arr.dtype == np.dtype('int8'): correct_types[b] = caffe2_pb2.TensorProto.INT8 elif arr.dtype == np.dtype('uint16'): correct_types[b] = caffe2_pb2.TensorProto.UINT16 elif arr.dtype == np.dtype('int16'): correct_types[b] = caffe2_pb2.TensorProto.INT16 elif arr.dtype == np.dtype('int64'): correct_types[b] = caffe2_pb2.TensorProto.INT64 elif arr.dtype == np.dtype('float16'): correct_types[b] = caffe2_pb2.TensorProto.FLOAT16 elif arr.dtype == np.dtype('float64'): correct_types[b] = caffe2_pb2.TensorProto.DOUBLE else: correct_types[b] = "unknown {}".format(arr.dtype) else: correct_types[b] = str(type(arr)) for b in correct_shapes: self.assertTrue( np.array_equal( np.array(shapes[b]).astype(np.int32), np.array(correct_shapes[b]).astype(np.int32)), "Shape {} mismatch: {} vs. {}".format(b, shapes[b], correct_shapes[b])) self.assertFalse( b not in types and b in correct_types, "Type for {} not defined".format(b), ) self.assertEqual( types[b], correct_types[b], "Type {} mismatch: {} vs. {}".format( b, types[b], correct_types[b], ))
def test_resnet_shared_grads(self, with_shapes, gc, dc): model = cnn.CNNModelHelper( order="NCHW", name="test", cudnn_exhaustive_search=True, ) with core.NameScope("gpu_0"): data = model.net.AddExternalInput("gpu_0/data") label = model.net.AddExternalInput("gpu_0/label") (_softmax, loss) = resnet.create_resnet50( model, data, num_input_channels=3, num_labels=1000, label=label, is_test=False, ) param_to_grad = model.AddGradientOperators([loss]) (shapes, types) = workspace.InferShapesAndTypes( [model.param_init_net, model.net], {'gpu_0/data': [4, 3, 227, 227], 'gpu_0/label': [4]}, ) count_before = count_blobs(model.net.Proto()) optim_proto = memonger.share_grad_blobs( model.net, ["gpu_0/loss"], set(model.param_to_grad.values()), "gpu_0/", share_activations=True, dont_share_blobs=set([str(param_to_grad["gpu_0/conv1_w"])]), blob_shapes=shapes if with_shapes else None, ) count_after = count_blobs(optim_proto) self.assertTrue(count_after < count_before) # Run model and compare results. We check that the loss is same # and also that the final gradient (conv1_w_grad is same) workspace.RunNetOnce(model.param_init_net) data = np.random.rand(4, 3, 227, 227).astype(np.float32) label = (np.random.rand(4) * 1000).astype(np.int32) workspace.FeedBlob("gpu_0/data", data) workspace.FeedBlob("gpu_0/label", label) workspace.RunNetOnce(model.net) model.net.Proto().type = 'dag' model.net.Proto().num_workers = 4 loss1 = workspace.FetchBlob("gpu_0/last_out_L1000") conv1_w_grad = workspace.FetchBlob(param_to_grad["gpu_0/conv1_w"]) workspace.FeedBlob(param_to_grad["gpu_0/conv1_w"], np.array([0.0])) workspace.RunNetOnce(optim_proto) optimized_loss1 = workspace.FetchBlob("gpu_0/last_out_L1000") optim_conv1_w_grad = workspace.FetchBlob(param_to_grad["gpu_0/conv1_w"]) print("before: {} after: {}".format(count_before, count_after)) np.testing.assert_almost_equal(loss1, optimized_loss1) np.testing.assert_almost_equal(conv1_w_grad, optim_conv1_w_grad)
def _run(self, net, param_init_net, param_info): param = param_info.blob grad = param_info.grad if self.alpha <= 0: return lr, _ = self.build_lr(net, param_init_net, base_learning_rate=self.alpha, policy=self.policy, **(self.init_kwargs)) if self.rowWise: shapes, types = workspace.InferShapesAndTypes([param_init_net]) if str(param) not in shapes: # Type/shape inference is not available for this param, fallback # on Shape/Slice logic shape = param_init_net.Shape(param, str(param) + "_shape") num_rows = param_init_net.Slice([shape], str(shape) + "_numrows", starts=[0], ends=[1]) param_squared_sum = param_init_net.ConstantFill( num_rows, str(param) + "_avg_squared_sum", input_as_shape=1, value=0.0) else: param_squared_sum = param_init_net.ConstantFill( [], str(param) + "_avg_squared_sum", shape=[shapes[str(param)][0]], value=0.0) else: param_squared_sum = param_init_net.ConstantFill([param], str(param) + "_squared_sum", value=0.0) self._aux_params.local.append(param_squared_sum) if self.rowWise: assert isinstance(grad, core.GradientSlice),\ 'If SparseAdagrad with rowWise=True, gradient must be '\ 'a gradientslice. PLease ensure that rowWise is not enabled '\ 'for the dense Adagrad optimizer, as it is not supported.' if isinstance(grad, core.GradientSlice): assert self.decay == 1.,\ 'Decay is not implemented for SparseAdagrad and must be set to 1' grad = self.dedup(net, self.sparse_dedup_aggregator, grad) if self.rowWise: op = 'RowWiseSparseAdagrad' else: op = 'SparseAdagrad' net.__getattr__(op)( [param, param_squared_sum, grad.indices, grad.values, lr], [param, param_squared_sum], epsilon=self.epsilon, engine=self.engine) else: net.Adagrad([param, param_squared_sum, grad, lr], [param, param_squared_sum], epsilon=self.epsilon, decay=float(self.decay), engine=self.engine)
def assertReferenceChecks( self, device_option, op, inputs, reference, input_device_options=None, threshold=1e-4, output_to_grad=None, grad_reference=None, atol=None, outputs_to_check=None, ): """ This runs the reference Python function implementation (effectively calling `reference(*inputs)`, and compares that to the output of output, with an absolute/relative tolerance given by the `threshold` parameter. Useful for checking the implementation matches the Python (typically NumPy) implementation of the same functionality. Usage example: @given(X=hu.tensor(), inplace=st.booleans(), **hu.gcs) def test_softsign(self, X, inplace, gc, dc): op = core.CreateOperator( "Softsign", ["X"], ["X" if inplace else "Y"]) def softsign(X): return (X / (1 + np.abs(X)),) self.assertReferenceChecks(gc, op, [X], softsign) """ if input_device_options is None: input_device_options = {} op = copy.deepcopy(op) op.device_option.CopyFrom(device_option) with temp_workspace(): if (len(op.input) > len(inputs)): raise ValueError( 'must supply an input for each input on the op: %s vs %s' % (op.input, inputs)) for (n, b) in zip(op.input, inputs): workspace.FeedBlob( n, b, device_option=input_device_options.get(n, device_option) ) net = core.Net("opnet") net.Proto().op.extend([op]) test_shape_inference = False try: (shapes, types) = workspace.InferShapesAndTypes([net]) test_shape_inference = True except RuntimeError as e: # Temporarily catch runtime errors when inferring shape # and type info logging.warning(str(e)) if os.getenv('CAFFE2_ASSERT_SHAPEINFERENCE') == '1': raise e workspace.RunNetOnce(net) reference_outputs = reference(*inputs) if not (isinstance(reference_outputs, tuple) or isinstance(reference_outputs, list)): raise RuntimeError( "You are providing a wrong reference implementation. A " "proper one should return a tuple/list of numpy arrays.") if not outputs_to_check: self.assertEqual(len(reference_outputs), len(op.output)) outputs_to_check = list(range(len(op.output))) outs = [] for (output_index, ref) in zip(outputs_to_check, reference_outputs): output_blob_name = op.output[output_index] output = workspace.FetchBlob(output_blob_name) if output.dtype.kind in ('S', 'O'): np.testing.assert_array_equal(output, ref) else: if atol is None: atol = threshold np.testing.assert_allclose( output, ref, atol=atol, rtol=threshold, err_msg=( 'Output {0} is not matching the reference'.format( output_blob_name, )), ) if test_shape_inference: self._assertInferTensorChecks( output_blob_name, shapes, types, output) outs.append(output) if grad_reference is not None: assert output_to_grad is not None, \ "If grad_reference is set," \ "output_to_grad has to be set as well" with core.DeviceScope(device_option): self._assertGradReferenceChecks( op, inputs, reference_outputs, output_to_grad, grad_reference, threshold=threshold) return outs
def test_dnnlowp_fully_connected_int( self, input_channels, output_channels, batch_size, in_quantized, out_quantized, weight_quantized, prepack_weight, preserve_activation_sparsity, preserve_weight_sparsity, fuse_relu, output_packed_bias, use_input_qparam, gc, dc, ): # X and W have scale 1, so exactly represented after quantization X_min = 0 if preserve_activation_sparsity else -77 X_max = X_min + 255 X = np.round( np.random.rand(batch_size, input_channels) * (X_max - X_min) + X_min) X = X.astype(np.float32) # input channels 0 and 1 are all X_min to avoid overflow from vpmaddubsw # when multiplied with W_min and W_max X[:, 0] = X_min if batch_size != 0: X[0, 1] = X_max if preserve_weight_sparsity: W_min = -128 W_max = 100 else: W_min = -100 W_max = W_min + 255 W = np.round( np.random.rand(output_channels, input_channels) * (W_max - W_min) + W_min) W = W.astype(np.float32) W[0, 0] = W_min W[1, 0] = W_max # Make sure we won't have overflows from vpmaddubsw instruction used in # fbgemm avoid_vpmaddubsw_overflow_fc( batch_size, input_channels, output_channels, X, X_min, X_max, W, W_min, W_max, ) b = np.random.randn(output_channels).astype(np.float32) Output = collections.namedtuple("Output", ["Y", "op_type", "engine"]) outputs = [] op_engine_list = [("FC", "")] if fuse_relu: op_engine_list += [("Int8FCRelu", "DNNLOWP")] else: op_engine_list += [ ("FC", "DNNLOWP"), ("FC", "DNNLOWP_16"), ("Int8FC", "DNNLOWP"), ] for op_type, engine in op_engine_list: init_net = core.Net("test_init_net") net = core.Net("test_net") do_quantize = "DNNLOWP" in engine and in_quantized do_dequantize = "DNNLOWP" in engine and out_quantized do_quantize_weight = (engine == "DNNLOWP" and weight_quantized and len(outputs) > 0) do_prepack_weight = engine == "DNNLOWP" and prepack_weight if do_quantize: quantize = core.CreateOperator( "Quantize", ["X"], ["X_q"], preserve_activation_sparsity=preserve_activation_sparsity, engine=engine, device_option=gc, ) net.Proto().op.extend([quantize]) X_min = 0 if X.size == 0 else X.min() X_max = 0 if X.size == 0 else X.max() x_q_param = dnnlowp_utils.choose_quantization_params( X_min, X_max, preserve_activation_sparsity) w_q_param = None if do_quantize_weight: ( int8_given_tensor_fill, w_q_param, ) = dnnlowp_utils.create_int8_given_tensor_fill( W, "W_q", preserve_weight_sparsity) init_net.Proto().op.extend([int8_given_tensor_fill]) # Bias int8_bias_tensor_fill = dnnlowp_utils.create_int8_bias_tensor_fill( b, "b_q", x_q_param, w_q_param) init_net.Proto().op.extend([int8_bias_tensor_fill]) if do_prepack_weight: inputs = ["W_q" if do_quantize_weight else "W"] if do_dequantize: inputs += ["b_q" if do_quantize_weight else "b"] pack = core.CreateOperator( "Int8FCPackWeight", inputs, ["W_packed", "B_q32"] if do_dequantize and output_packed_bias else ["W_packed"], preserve_weight_sparsity=preserve_weight_sparsity, in_scale=x_q_param.scale, engine=engine, ) init_net.Proto().op.extend([pack]) if use_input_qparam and do_dequantize and op_type != "FC": fc = core.CreateOperator( op_type, [ "X_q" if do_quantize else "X", "W_packed" if do_prepack_weight else ("W_q" if do_quantize_weight else "W"), "b_q" if do_quantize_weight else "b", "quant_param", ], ["Y_q" if do_dequantize else "Y"], dequantize_output=not do_dequantize, preserve_activation_sparsity=preserve_activation_sparsity, preserve_weight_sparsity=preserve_weight_sparsity, engine=engine, device_option=gc, ) else: fc = core.CreateOperator( op_type, [ "X_q" if do_quantize else "X", "W_packed" if do_prepack_weight else ("W_q" if do_quantize_weight else "W"), "b_q" if do_quantize_weight else "b", ], ["Y_q" if do_dequantize else "Y"], dequantize_output=not do_dequantize, preserve_activation_sparsity=preserve_activation_sparsity, preserve_weight_sparsity=preserve_weight_sparsity, engine=engine, device_option=gc, ) if do_quantize_weight or do_prepack_weight: # When quantized weight is provided, we can't rescale the # output dynamically by looking at the range of output of each # batch, so here we provide the range of output observed from # fp32 reference implementation dnnlowp_utils.add_quantization_param_args( fc, outputs[0][0], preserve_activation_sparsity) net.Proto().op.extend([fc]) if fuse_relu and "DNNLOWP" not in engine: net.Relu(["Y"], "Y") if do_dequantize: dequantize = core.CreateOperator("Dequantize", ["Y_q"], ["Y"], engine=engine, device_option=gc) net.Proto().op.extend([dequantize]) if use_input_qparam and do_dequantize and op_type != "FC": ref_output = outputs[0][0] ref_output_min = 0 if ref_output.size == 0 else ref_output.min( ) ref_output_max = 0 if ref_output.size == 0 else ref_output.max( ) q_param = dnnlowp_utils.choose_quantization_params( ref_output_min, ref_output_max, preserve_activation_sparsity) run_conv_or_fc( self, init_net, net, X, W, b, op_type, engine, None, gc, outputs, q_param.scale, q_param.zero_point, ) else: run_conv_or_fc(self, init_net, net, X, W, b, op_type, engine, None, gc, outputs) if output_packed_bias and do_prepack_weight and do_dequantize: bias_int32 = self.ws.blobs["B_q32"].fetch() if do_quantize_weight: np.testing.assert_equal( bias_int32[0], np.round(b / (x_q_param.scale * w_q_param.scale))) np.testing.assert_equal(bias_int32[0].dtype, np.int32) shapes, types = workspace.InferShapesAndTypes( [init_net, net], blob_dimensions={ "X": [batch_size, input_channels], "W": [output_channels, input_channels], "b": [output_channels], "quant_param": [1], }, blob_types={ "X": core.DataType.FLOAT, "W": core.DataType.FLOAT, "b": core.DataType.FLOAT, "quant_param": core.DataType.FLOAT, }, ) assert ("Y" in shapes and "Y" in types), "Failed to infer the shape or type of Y" self.assertEqual(shapes["Y"], [batch_size, output_channels]) self.assertEqual(types["Y"], core.DataType.FLOAT) check_quantized_results_close(outputs, symmetric=preserve_activation_sparsity)
def __init__(self, model, input_record, output_names_or_num, function, name='functional', output_dtypes=None, tags=None, **kwargs): # allow coercion input_record = schema.as_record(input_record) super(Functional, self).__init__(model, name, input_record, tags=tags, **kwargs) self._function = function self._kwargs = kwargs return_struct = (isinstance(output_names_or_num, list) or (isinstance(output_names_or_num, six.integer_types) and output_names_or_num != 1)) with scope.NameScope(self.name, reset=True): if isinstance(output_names_or_num, int): struct_output_schema = schema.NewRecord( model.net, schema.RawTuple(output_names_or_num)) elif isinstance(output_names_or_num, schema.Field): self.output_schema = output_names_or_num.clone(keep_blobs=True) return else: if not isinstance(output_names_or_num, list): output_names_or_num = [output_names_or_num] out_tuple = [(out, np.void) for out in output_names_or_num] struct_output_schema = schema.NewRecord( model.net, schema.Struct(*out_tuple)) num_outputs = len(struct_output_schema.field_blobs()) # functional layer returns Struct if more than one outputs or output is # a list, otherwise Scalar if return_struct: self.output_schema = struct_output_schema else: self.output_schema = struct_output_schema[0] # If output_dtypes is provided, use it for output schema. Otherwise # the shape and type will be inferred. if output_dtypes is not None: if not isinstance(output_dtypes, list): output_dtypes = [output_dtypes] * num_outputs assert len(output_dtypes) == num_outputs for dtype, scalar in zip(output_dtypes, self.output_schema.all_scalars()): scalar.set_type(dtype) return # Fake execution of the function to infer shapes and types automatically had_issues = False try: type_net = core.Net('_temp_type_and_shape_inference_net') schema.InitEmptyRecord(type_net, input_record, enforce_types=True) function(type_net, self.input_record, self.output_schema, **kwargs) (shapes, types) = workspace.InferShapesAndTypes([type_net], {}) for i in range(num_outputs): scalar_schema = (self.output_schema[i] if return_struct else self.output_schema) blob = scalar_schema() if blob not in types or blob not in shapes: had_issues = True continue if shapes[blob] == []: # Scalar type shape = tuple() elif shapes[blob][0] == 0: shape = tuple(shapes[blob][1:]) else: logger.warning("unexpected shape: {}".format(shapes[blob])) # If batch dimension is not first - give up on shape # inference for that blob had_issues = True continue # TODO(amalevich): Move it to some shared library dtype = None if types[blob] == caffe2_pb2.TensorProto.DOUBLE: dtype = (np.float64, shape) elif types[blob] == caffe2_pb2.TensorProto.FLOAT: dtype = (np.float32, shape) elif types[blob] == caffe2_pb2.TensorProto.INT32: dtype = (np.int32, shape) elif types[blob] == caffe2_pb2.TensorProto.INT64: dtype = (np.int64, shape) elif types[blob] == caffe2_pb2.TensorProto.FLOAT16: dtype = (np.float16, shape) if dtype is not None: scalar_schema.set_type(dtype) except TypeError as ex: had_issues = True logger.warning(str(ex)) if had_issues: logger.warning("Type inference had problems for layer: {}".format( self.name))
def __init__(self, model, input_record, output_names_or_num, function, name='functional', **kwargs): super(Functional, self).__init__(model, name, input_record, **kwargs) self._function = function with scope.NameScope(self.name): if isinstance(output_names_or_num, int): self.output_schema = schema.NewRecord( model.net, schema.RawTuple(output_names_or_num)) else: if not isinstance(output_names_or_num, list): output_names_or_num = [output_names_or_num] out_tuple = [(out, np.void) for out in output_names_or_num] self.output_schema = schema.NewRecord( model.net, schema.Struct(*out_tuple)) num_outputs = len(self.output_schema.field_blobs()) # Fake execution of the function to infer shapes and types automatically had_issues = False try: type_net = core.Net('_temp_type_and_shape_inference_net') schema.InitEmptyRecord(type_net, input_record, enforce_types=True) function(type_net, self.input_record, self.output_schema) (shapes, types) = workspace.InferShapesAndTypes([type_net], {}) for i in range(num_outputs): blob = self.output_schema[i]() if blob not in types or blob not in shapes: had_issues = True continue if shapes[blob] == []: # Scalar type shape = tuple() elif shapes[blob][0] == 0: shape = tuple(shapes[blob][1:]) else: logger.warning("unexpeced shape: {}".format(shapes[blob])) # If batch dimension is not first - give up on shape # inference for that blob had_issues = True continue # TODO(amalevich): Move it to some shared library dtype = None if types[blob] == caffe2_pb2.TensorProto.DOUBLE: dtype = (np.float64, shape) elif types[blob] == caffe2_pb2.TensorProto.FLOAT: dtype = (np.float32, shape) elif types[blob] == caffe2_pb2.TensorProto.INT32: dtype = (np.int32, shape) elif types[blob] == caffe2_pb2.TensorProto.INT64: dtype = (np.int64, shape) if dtype is not None: self.output_schema[i].set_type(dtype) except TypeError as ex: had_issues = True logger.warning(str(ex)) if had_issues: logger.warning( "Type inference had problems for layer: {}".format(self.name))
#i = my_model.StopGradient('inception_5b/pool_proj_w','inception_5b/pool_proj_w') # Print the new init net to see that the shape has changed for i in range(len(my_model.param_init_net.Proto().op)): print "\n******************************" print "OP: ", i print "******************************" print "OP_NAME: ",my_model.param_init_net.Proto().op[i].name print "OP_INPUT: ",my_model.param_init_net.Proto().op[i].input print "OP_OUTPUT: ",my_model.param_init_net.Proto().op[i].output print "OP_SHAPE: ",my_model.param_init_net.Proto().op[i].arg[0] for param in my_model.params: print param tmp = workspace.InferShapesAndTypes([my_model.param_init_net]) for t in tmp[0]: print t #exit() workspace.ResetWorkspace() ################################################################################## # Add the training operators xent = my_model.LabelCrossEntropy(['prob', 'label'], 'xent') loss = my_model.AveragedLoss('xent', 'loss') brew.accuracy(my_model, ['prob', 'label'], 'accuracy') my_model.AddGradientOperators(['loss']) opt = optimizer.build_sgd(my_model, base_learning_rate=0.1) for param in my_model.GetOptimizationParamInfo():
def _run(self, net, param_init_net, param_info): param = param_info.blob grad = param_info.grad if self.alpha <= 0: return lr, iteration = self.build_lr(net, param_init_net, base_learning_rate=self.alpha, policy=self.policy, **(self.init_kwargs)) if self.use_lr_adaption: effective_grad = param_init_net.ConstantFill([param], param + "_effgrad", value=0.0) self._aux_params.local.append(effective_grad) net.LearningRateAdaption( [lr, grad, effective_grad], [lr], lr_alpha=self.lr_alpha, normalized_lr_adaption=self.normalized_lr_adaption) m1 = param_init_net.ConstantFill([param], param + "_first_moment", value=0.0) if self.rowWise: shapes, types = workspace.InferShapesAndTypes([param_init_net]) m2 = param_init_net.ConstantFill([], param + "_avg_second_moment", shape=[shapes[param][0]], value=0.0) else: m2 = param_init_net.ConstantFill([param], param + "_second_moment", value=0.0) self._aux_params.shared.append(iteration) self._aux_params.local.append(m1) self._aux_params.local.append(m2) if self.rowWise: assert isinstance(grad, core.GradientSlice),\ 'If SparseAdam with rowWise=True, gradient must be '\ 'a gradientslice. PLease ensure that rowWise is not enabled '\ 'for the dense Adam optimizer, as it is not supported.' if isinstance(grad, core.GradientSlice): grad = self.dedup(net, self.sparse_dedup_aggregator, grad) if self.rowWise: op = 'RowWiseSparseAdam' else: op = 'SparseAdam' net.__getattr__(op)( [param, m1, m2, grad.indices, grad.values, lr, iteration], [param, m1, m2], beta1=self.beta1, beta2=self.beta2, epsilon=self.epsilon) else: if self.use_lr_adaption: net.Adam([param, m1, m2, grad, lr, iteration], [param, m1, m2, effective_grad], beta1=self.beta1, beta2=self.beta2, epsilon=self.epsilon) else: net.Adam([param, m1, m2, grad, lr, iteration], [param, m1, m2], beta1=self.beta1, beta2=self.beta2, epsilon=self.epsilon)
def _run(self, net, param_init_net, param_info): param = param_info.blob grad = param_info.grad if self.alpha <= 0: return self._clear_local_lr_multiplier() if self.lars is not None and not isinstance(grad, core.GradientSlice): assert self.lars >= 0, ( 'Lars offset must be nonnegative, got {}'.format(self.lars)) lr_lars_multiplier = net.Lars( [param, grad], self.make_unique_blob_name(str(param) + "_lars"), offset=self.lars) current_scope = scope.CurrentDeviceScope() self._add_local_lr_multiplier( lr_lars_multiplier, is_gpu_blob=(current_scope is not None and current_scope.device_type == caffe2_pb2.CUDA), ) lr, _ = self.build_lr(net, param_init_net, base_learning_rate=self.alpha, policy=self.policy, **(self.init_kwargs)) if self.rowWise: shapes, types = workspace.InferShapesAndTypes([param_init_net]) if str(param) not in shapes: # Type/shape inference is not available for this param, fallback # on Shape/Slice logic shape = param_init_net.Shape(param, str(param) + "_shape") num_rows = param_init_net.Slice([shape], str(shape) + "_numrows", starts=[0], ends=[1]) param_squared_sum = param_init_net.ConstantFill( num_rows, str(param) + "_avg_squared_sum", input_as_shape=1, value=0.0) else: param_squared_sum = param_init_net.ConstantFill( [], str(param) + "_avg_squared_sum", shape=[shapes[str(param)][0]], value=0.0) else: param_squared_sum = param_init_net.ConstantFill([param], str(param) + "_squared_sum", value=0.0) self._aux_params.local.append(param_squared_sum) if self.rowWise: assert isinstance(grad, core.GradientSlice),\ 'If SparseAdagrad with rowWise=True, gradient must be '\ 'a gradientslice. PLease ensure that rowWise is not enabled '\ 'for the dense Adagrad optimizer, as it is not supported.' if isinstance(grad, core.GradientSlice): assert self.decay == 1.,\ 'Decay is not implemented for SparseAdagrad and must be set to 1' grad = self.dedup(net, self.sparse_dedup_aggregator, grad) if self.rowWise: op = 'RowWiseSparseAdagrad' else: op = 'SparseAdagrad' net.__getattr__(op)( [param, param_squared_sum, grad.indices, grad.values, lr], [param, param_squared_sum], epsilon=self.epsilon, engine=self.engine) else: output_args = [param, param_squared_sum] if self.output_effective_lr_and_update: output_args.append(str(param) + '_effective_lr') output_args.append(str(param) + '_update') elif self.output_effective_lr: output_args.append(str(param) + '_effective_lr') net.Adagrad([param, param_squared_sum, grad, lr], output_args, epsilon=self.epsilon, decay=float(self.decay), engine=self.engine)
def test_shared_grads( with_shapes, create_model, conv_blob, last_out_blob, data_blob='gpu_0/data', label_blob='gpu_0/label', num_labels=1000, ): model = cnn.CNNModelHelper( order="NCHW", name="test", cudnn_exhaustive_search=True, ) with core.NameScope("gpu_0"): data = model.net.AddExternalInput(data_blob) label = model.net.AddExternalInput(label_blob) (_softmax, loss) = create_model( model, data, num_input_channels=3, num_labels=num_labels, label=label, is_test=False, ) param_to_grad = model.AddGradientOperators([loss]) (shapes, types) = workspace.InferShapesAndTypes( [model.param_init_net, model.net], { data_blob: [4, 3, 227, 227], label_blob: [4] }, ) count_before = count_blobs(model.net.Proto()) optim_proto = memonger.share_grad_blobs( model.net, ["gpu_0/loss"], set(model.param_to_grad.values()), "gpu_0/", share_activations=True, dont_share_blobs=set([str(param_to_grad[conv_blob])]), blob_shapes=shapes if with_shapes else None, ) count_after = count_blobs(optim_proto) # Run model and compare results. We check that the loss is same # and also that the final gradient (conv1_w_grad is same) workspace.RunNetOnce(model.param_init_net) data = np.random.rand(4, 3, 227, 227).astype(np.float32) label = (np.random.rand(4) * num_labels).astype(np.int32) workspace.FeedBlob(data_blob, data) workspace.FeedBlob(label_blob, label) workspace.RunNetOnce(model.net) model.net.Proto().type = 'dag' model.net.Proto().num_workers = 4 loss1 = workspace.FetchBlob(last_out_blob) conv1_w_grad = workspace.FetchBlob(param_to_grad[conv_blob]) workspace.FeedBlob(param_to_grad[conv_blob], np.array([0.0])) workspace.RunNetOnce(optim_proto) optimized_loss1 = workspace.FetchBlob(last_out_blob) optim_conv1_w_grad = workspace.FetchBlob(param_to_grad[conv_blob]) return [(count_after, count_before), (loss1, optimized_loss1), (conv1_w_grad, optim_conv1_w_grad)]