def AddLeNetModel(model, data): ''' This part is the standard LeNet model: from data to the softmax prediction. For each convolutional layer we specify dim_in - number of input channels and dim_out - number or output channels. Also each Conv and MaxPool layer changes the image size. For example, kernel of size 5 reduces each side of an image by 4. While when we have kernel and stride sizes equal 2 in a MaxPool layer, it divides each side in half. ''' # Image size: 28 x 28 -> 24 x 24 conv1 = brew.conv(model, data, 'conv1', dim_in=1, dim_out=20, kernel=5) # Image size: 24 x 24 -> 12 x 12 pool1 = model.net.MaxPool(conv1, 'pool1', kernel=2, stride=2) # Image size: 12 x 12 -> 8 x 8 conv2 = brew.conv(model, pool1, 'conv2', dim_in=20, dim_out=50, kernel=5) # Image size: 8 x 8 -> 4 x 4 pool2 = model.net.MaxPool(conv2, 'pool2', kernel=2, stride=2) # 50 * 4 * 4 stands for dim_out from previous layer multiplied by the image size fc3 = brew.fc(model, pool2, 'fc3', dim_in=50 * 4 * 4, dim_out=500) fc3 = model.net.Relu(fc3, 'relu3') pred = brew.fc(model, fc3, 'pred', 500, 10) softmax = model.net.Softmax(pred, 'softmax') return softmax
def test_gradient_clipping_using_param_norm(self): model = model_helper.ModelHelper(name="test") data = model.net.AddExternalInput("data") fc1 = brew.fc(model, data, "fc1", dim_in=4, dim_out=2) # no operator name set, will use default fc2 = brew.fc(model, fc1, "fc2", dim_in=2, dim_out=1) sigm = model.net.Sigmoid(fc2, 'sigm') sq = model.net.SquaredL2Distance([sigm, 'label'], 'sq') loss = model.net.SumElements(sq, 'loss') grad_map = model.AddGradientOperators([loss]) grad_map_for_param = {key: grad_map[key] for key in ['fc1_w', 'fc2_w']} net_modifier = GradientClipping( grad_clip_method='by_norm', clip_norm_type='l2_norm', clip_threshold=0.1, use_parameter_norm=True, ) net_modifier(model.net, grad_map=grad_map_for_param) workspace.FeedBlob('data', np.random.rand(10, 4).astype(np.float32)) workspace.FeedBlob('label', np.random.rand(10, 1).astype(np.float32)) workspace.RunNetOnce(model.param_init_net) workspace.RunNetOnce(model.net) # 5 forward ops + 6 backward ops + 2 * (5 gradient clipping ops) self.assertEqual(len(model.net.Proto().op), 21)
def MLP(order, cudnn_ws, device): model = ModelHelper(name="benchmark") d = 256 depth = 20 width = 3 for i in range(depth): for j in range(width): current = "fc_{}_{}".format(i, j) if i > 0 else "data" next_ = "fc_{}_{}".format(i + 1, j) brew.fc( model, current, next_, dim_in=d, dim_out=d, weight_init=('XavierFill', {}), bias_init=('XavierFill', {})) brew.sum(model, ["fc_{}_{}".format(depth, j) for j in range(width)], ["sum"]) brew.fc(model, "sum", "last", dim_in=d, dim_out=1000, weight_init=('XavierFill', {}), bias_init=('XavierFill', {})) xent = model.LabelCrossEntropy(["last", "label"], "xent") if device != 'MKL': model.AveragedLoss(xent, "loss") return model, d
def test_fast_memonger(self, input_dim, output_dim, batch_size, do): m = model_helper.ModelHelper() fc1 = brew.fc(m, "data", "fc1", dim_in=input_dim, dim_out=output_dim) fc2 = brew.fc(m, fc1, "fc2", dim_in=output_dim, dim_out=output_dim) fc3 = brew.fc(m, fc2, "fc3", dim_in=output_dim, dim_out=output_dim) fc3.Relu([], fc3)\ .Softmax([], "pred") \ .LabelCrossEntropy(["label"], ["xent"]) \ .AveragedLoss([], "loss") input_to_grad = m.AddGradientOperators(["loss"]) m.net.Proto().device_option.CopyFrom(do) m.param_init_net.Proto().device_option.CopyFrom(do) static_blobs = \ [o for op in m.param_init_net.Proto().op for o in op.output] + \ ["data", "label", "loss", input_to_grad["fc1_w"]] optimized_net = memonger.optimize_inference_fast( m.Proto(), static_blobs) data = np.random.randn(batch_size, input_dim).astype(np.float32) label = np.random.randint( low=0, high=output_dim, size=(batch_size,)).astype(np.int32) workspace.RunNetOnce(m.param_init_net) workspace.FeedBlob("data", data, device_option=do) workspace.FeedBlob("label", label, device_option=do) workspace.RunNetOnce(m.net) loss = workspace.FetchBlob("loss") grad = workspace.FetchBlob(str(input_to_grad["fc1_w"])) workspace.RunNetOnce(optimized_net) optimized_loss = workspace.FetchBlob("loss") optimized_grad = workspace.FetchBlob(str(input_to_grad["fc1_w"])) np.testing.assert_almost_equal(loss, optimized_loss) np.testing.assert_almost_equal(grad, optimized_grad) self.assertLess(count_blobs(optimized_net), count_blobs(m.Proto()))
def test_registry_invalid(self, input_dim, output_dim, batch_size): m = model_helper.ModelHelper() brew.fc(m, "data", "fc1", dim_in=input_dim, dim_out=output_dim) with self.assertRaises(RuntimeError): workspace.ApplyTransform( "definitely_not_a_real_transform", m.net.Proto())
def test_compute_statistics_for_blobs(self): model = model_helper.ModelHelper(name="test") data = model.net.AddExternalInput("data") fc1 = brew.fc(model, data, "fc1", dim_in=4, dim_out=2) # no operator name set, will use default brew.fc(model, fc1, "fc2", dim_in=2, dim_out=1) net_modifier = ComputeStatisticsForBlobs( blobs=['fc1_w', 'fc2_w'], logging_frequency=10, ) net_modifier(model.net) workspace.FeedBlob('data', np.random.rand(10, 4).astype(np.float32)) workspace.RunNetOnce(model.param_init_net) workspace.RunNetOnce(model.net) fc1_w = workspace.FetchBlob('fc1_w') fc1_w_summary = workspace.FetchBlob('fc1_w_summary') # std is unbiased here stats_ref = np.array([fc1_w.flatten().min(), fc1_w.flatten().max(), fc1_w.flatten().mean(), fc1_w.flatten().std(ddof=1)]) self.assertAlmostEqual(np.linalg.norm(stats_ref - fc1_w_summary), 0, delta=1e-5) self.assertEqual(fc1_w_summary.size, 4) assert model.net.output_record() is None
def test_compute_averaged_norm_for_blobs(self): model = model_helper.ModelHelper(name="test") data = model.net.AddExternalInput("data") fc1 = brew.fc(model, data, "fc1", dim_in=4, dim_out=2) # no operator name set, will use default brew.fc(model, fc1, "fc2", dim_in=2, dim_out=1) net_modifier = ComputeNormForBlobs( blobs=['fc1_w', 'fc2_w'], logging_frequency=10, compute_averaged_norm=True, ) net_modifier(model.net) workspace.FeedBlob('data', np.random.rand(10, 4).astype(np.float32)) workspace.RunNetOnce(model.param_init_net) workspace.RunNetOnce(model.net) fc1_w = workspace.FetchBlob('fc1_w') fc1_w_l2_averaged_norm = workspace.FetchBlob('fc1_w_averaged_l2_norm') self.assertEqual(fc1_w_l2_averaged_norm.size, 1) self.assertAlmostEqual(fc1_w_l2_averaged_norm[0], np.linalg.norm(fc1_w)**2 / fc1_w.size, delta=1e-5) self.assertEqual(len(model.net.Proto().op), 8)
def testGPUDense(self, dtype=core.DataType.FLOAT): device_opt = core.DeviceOption(caffe2_pb2.CUDA, 0) with core.DeviceScope(device_opt): model, _perfect_model, data, label = self._createDense(dtype) if dtype == core.DataType.FLOAT16: fc_fp32_for_host = model.HalfToFloat('fc', 'fc_fp32_for_host') model.CopyGPUToCPU(fc_fp32_for_host, 'fc_cpu') else: model.CopyGPUToCPU('fc', 'fc_cpu') workspace.FeedBlob('data', data[0]) workspace.FeedBlob('label', label[0]) # Add some CPU ops brew.fc(model, 'fc_cpu', 'fc2', dim_in=1, dim_out=10, axis=0) # Create optimizer in default device scope self.build_optimizer(model) if self._skip_gpu: return # Run net to see it does not crash workspace.RunNetOnce(model.param_init_net) workspace.CreateNet(model.net, True) workspace.RunNet(model.net.Proto().name)
def test_get_entry_from_blobs_modify_output_record(self): model = model_helper.ModelHelper(name="test") data = model.net.AddExternalInput("data") fc1 = brew.fc(model, data, "fc1", dim_in=4, dim_out=4) # no operator name set, will use default brew.fc(model, fc1, "fc2", dim_in=4, dim_out=4) i1, i2 = np.random.randint(4, size=2) net_modifier = GetEntryFromBlobs( blobs=['fc1_w', 'fc2_w'], logging_frequency=10, i1=i1, i2=i2, ) net_modifier(model.net, modify_output_record=True) workspace.FeedBlob('data', np.random.rand(10, 4).astype(np.float32)) workspace.RunNetOnce(model.param_init_net) workspace.RunNetOnce(model.net) fc1_w = workspace.FetchBlob('fc1_w') fc1_w_entry = workspace.FetchBlob('fc1_w_{0}_{1}'.format(i1, i2)) self.assertEqual(fc1_w_entry.size, 1) self.assertEqual(fc1_w_entry[0], fc1_w[i1][i2]) assert 'fc1_w' + net_modifier.field_name_suffix() in\ model.net.output_record().field_blobs(),\ model.net.output_record().field_blobs() assert 'fc2_w' + net_modifier.field_name_suffix() in\ model.net.output_record().field_blobs(),\ model.net.output_record().field_blobs()
def test_release_blobs_when_used(self): m = model_helper.ModelHelper() fc1 = brew.fc(m, "data", "x", dim_in=2, dim_out=2) fc2 = brew.fc(m, fc1, "y", dim_in=2, dim_out=2) fc3 = brew.fc(m, fc1, "z", dim_in=2, dim_out=2) fc4 = brew.fc(m, fc2, "u", dim_in=2, dim_out=2) m.net.Alias(["u"], ["u_alias"]) brew.sum(m, [fc3, fc4], "out") with_frees = memonger.release_blobs_when_used(m.net.Proto(), set("data")) expect_frees = {"x", "y", "z"} # out is external output # and u is aliased so cannot be freed found_frees = set() for op in with_frees.op: if op.type == "Free": self.assertFalse(op.input[0] in found_frees) # no double frees found_frees.add(op.input[0]) else: # Check a freed blob is not used anymore for inp in op.input: self.assertFalse(inp in found_frees) for outp in op.output: self.assertFalse(outp in found_frees) self.assertEqual(expect_frees, found_frees)
def test_optimizer_context(self): from caffe2.python import brew, optimizer from caffe2.python.model_helper import ModelHelper model = ModelHelper(name="test", arg_scope={'order': 'NCHW'}) count = optimizer._optimizer_instance_count['SgdOptimizer'] cnv_optim = SgdOptimizer(0.15) weight_optim = SgdOptimizer(0.2) bias_optim = SgdOptimizer(0.1) with UseOptimizer(cnv_optim): cnv = brew.conv(model, 'data', 'cnv', 32, 32, 4) with UseOptimizer({'WEIGHT': weight_optim, 'BIAS': bias_optim}): a = brew.fc(model, cnv, 'a', 100, 200) pred = brew.fc(model, a, 'b', 200, 5) (softmax, loss) = model.SoftmaxWithLoss( [pred, 'label'], ['softmax', 'loss'], ) model.AddGradientOperators([loss]) add_weight_decay(model, weight_decay=1e-4) # use the following optimizer if none specified in param_info build_sgd(model, 0.11) expected_weight_grad = {'b_w_grad', 'a_w_grad', 'cnv_w_grad'} expected_learning_rate = { "SgdOptimizer_{}_lr_cpu".format(count): -0.15, "SgdOptimizer_{}_lr_cpu".format(count + 1): -0.2, "SgdOptimizer_{}_lr_cpu".format(count + 2): -0.1, "SgdOptimizer_{}_lr_cpu".format(count + 3): -0.11 } for op in model.net.Proto().op: # Check the proto that all weights are decayed and not non-weights # are decayed. if op.type == 'WeightedSum' and 'wd_0_0' in op.input: if op.output[0] not in expected_weight_grad: print( "Unexpected param for weight_decay: {}". format(op.output[0]) ) self.assertTrue(op.output[0] in expected_weight_grad) expected_weight_grad.remove(op.output[0]) # Check the learning rate for each parameter if op.type == 'LearningRate': val = 0 for arg in op.arg: if arg.name == 'base_lr': val = arg.f self.assertAlmostEqual( val, expected_learning_rate[op.output[0]] ) self.assertEqual( expected_weight_grad, set(), "Not all weights were decayed: {}".format(expected_weight_grad) )
def test_param_consistence(self): model = ModelHelper(name='test_mode') cnv = brew.conv(model, 'data', 'cnv', 32, 32, 4) step_model = ModelHelper(name='step_model', param_model=model) a = brew.fc(step_model, cnv, 'a', 100, 200) brew.fc(model, a, 'b', 200, 5) # test the _parameters_info is shared between model and step_model self.assertEqual(model._parameters_info, step_model._parameters_info)
def model_build_fun(model, loss_scale): fc1 = brew.fc(model, "data", "fc1", dim_in=8, dim_out=8) fc2 = brew.fc(model, fc1, "fc2", dim_in=8, dim_out=8) fc3 = brew.fc(model, fc2, "fc3", dim_in=8, dim_out=8) fc4 = brew.fc(model, fc3, "fc4", dim_in=8, dim_out=8) fc5 = brew.fc(model, fc4, "fc5", dim_in=8, dim_out=8) loss = model.net.SumElements([fc5], ["loss"]) return [loss]
def test_mobile_exporter(self): model = ModelHelper(name="mobile_exporter_test_model") # Test LeNet brew.conv(model, 'data', 'conv1', dim_in=1, dim_out=20, kernel=5) brew.max_pool(model, 'conv1', 'pool1', kernel=2, stride=2) brew.conv(model, 'pool1', 'conv2', dim_in=20, dim_out=50, kernel=5) brew.max_pool(model, 'conv2', 'pool2', kernel=2, stride=2) brew.fc(model, 'pool2', 'fc3', dim_in=50 * 4 * 4, dim_out=500) brew.relu(model, 'fc3', 'fc3') brew.fc(model, 'fc3', 'pred', 500, 10) brew.softmax(model, 'pred', 'out') # Create our mobile exportable networks workspace.RunNetOnce(model.param_init_net) init_net, predict_net = mobile_exporter.Export( workspace, model.net, model.params ) # Populate the workspace with data np_data = np.random.rand(1, 1, 28, 28).astype(np.float32) workspace.FeedBlob("data", np_data) workspace.CreateNet(model.net) workspace.RunNet(model.net) ref_out = workspace.FetchBlob("out") # Clear the workspace workspace.ResetWorkspace() # Populate the workspace with data workspace.RunNetOnce(init_net) # Fake "data" is populated by init_net, we have to replace it workspace.FeedBlob("data", np_data) # Overwrite the old net workspace.CreateNet(predict_net, True) workspace.RunNet(predict_net.name) manual_run_out = workspace.FetchBlob("out") np.testing.assert_allclose( ref_out, manual_run_out, atol=1e-10, rtol=1e-10 ) # Clear the workspace workspace.ResetWorkspace() # Predictor interface test (simulates writing to disk) predictor = workspace.Predictor( init_net.SerializeToString(), predict_net.SerializeToString() ) # Output is a vector of outputs but we only care about the first and only result predictor_out = predictor.run([np_data]) assert len(predictor_out) == 1 predictor_out = predictor_out[0] np.testing.assert_allclose( ref_out, predictor_out, atol=1e-10, rtol=1e-10 )
def test_fc(self): m, n, k = (15, 15, 15) X = np.random.rand(m, k).astype(np.float32) - 0.5 workspace.FeedBlob("x", X) model = ModelHelper(name="test_model") brew.fc(model, "x", "out_1", k, n) model.Validate() workspace.RunNetOnce(model.param_init_net) workspace.RunNetOnce(model.net)
def setUp(self): core.GlobalInit(["python", "caffe2"]) ws.ResetWorkspace() self.model = model_helper.ModelHelper() brew.fc(self.model, "data", "y", dim_in=4, dim_out=2, weight_init=('ConstantFill', dict(value=1.0)), bias_init=('ConstantFill', dict(value=0.0)), axis=0) ws.FeedBlob("data", np.zeros([4], dtype='float32')) ws.RunNetOnce(self.model.param_init_net) ws.CreateNet(self.model.net)
def test_net_conversion_and_append_net(self): other = model_helper.ModelHelper() fc1 = brew.fc(other, "data", "other_fc1", dim_in=3*227*227, dim_out=10) fc2 = brew.fc(other, fc1, "other_fc2", dim_in=10, dim_out=10) brew.fc(other, fc2, "other_fc3", dim_in=10, dim_out=10) def add_input_ops(model): model.net.UniformFill([], ["data"], shape=[4, 227, 227, 3]) model.net.UniformFill([], ["label"], shape=[4]) def add_model_ops(model, loss_scale): model.NHWC2NCHW("data", "data_nchw") model.Conv("data_nchw", 'conv1', 3, 64, weight_init=("MSRAFill", {}), kernel=7, stride=2, pad=3, no_bias=0) model.SpatialBN('conv1', 'conv1_spatbn_relu', 64, epsilon=1e-3, is_test=False) model.Relu('conv1_spatbn_relu', 'conv1_spatbn_relu') model.MaxPool('conv1_spatbn_relu', 'pool1', kernel=3, stride=2) model.FC('pool1', 'fc', dim_in=(64 * 56 * 56), dim_out=10) # Append the net and param_init_net of the other model appendnet = data_parallel_model.ConvertNetForDevice(other.net) model.net.AppendNet(appendnet) model.param_init_net.AppendNet( data_parallel_model.ConvertNetForDevice(other.param_init_net)) model.Sigmoid('fc', 'fc_sigm') model.Softmax('fc_sigm', 'softmax') loss = model.AveragedLoss('softmax', 'loss') return [loss] def add_optimizer(model): optimizer.build_sgd(model, 0.1, policy="fixed", momentum=0.9) model = cnn.CNNModelHelper( order="NCHW", name="test", ) data_parallel_model.Parallelize_CPU( model, input_builder_fun=add_input_ops, forward_pass_builder_fun=add_model_ops, optimizer_builder_fun=add_optimizer, devices=range(4) ) # Just create and run net and confirm no exception is thrown workspace.RunNetOnce(model.param_init_net) workspace.CreateNet(model.net) workspace.RunNet(model.net)
def test_extract_simple(self): from caffe2.python import brew from caffe2.python.model_helper import ModelHelper, ExtractPredictorNet model = ModelHelper(name="test", arg_scope={'order': 'NCHW'}) [data, label] = brew.image_input( model, "reader", ["xx/data", "label"], is_test=1, ) cnv = brew.conv(model, data, 'cnv', 32, 32, 4) a = brew.fc(model, cnv, 'a', 100, 200) pred = brew.fc(model, a, 'pred', 200, 5) brew.softmax(model, [pred, label], "softmax") (predict_net, export_blobs) = ExtractPredictorNet( net_proto=model.net.Proto(), input_blobs=["xx/data"], output_blobs=["pred"], renames={"xx/data": "image"}, ) export_blobs = set(export_blobs) ops = list(predict_net.Proto().op) for op in ops: self.assertFalse(op.type == "Softmax") self.assertFalse("xx/data" in op.input) # Note: image input should not be included self.assertEquals(ops[0].type, "Conv") self.assertEquals(ops[1].type, "FC") self.assertEquals(ops[2].type, "FC") self.assertEquals(len(ops), 3) # test rename happened self.assertEquals(ops[0].input[0], "image") # Check export blobs self.assertTrue("image" not in export_blobs) self.assertTrue("xx/data" not in export_blobs) self.assertEqual(set([str(p) for p in model.params]), export_blobs) # Check external inputs/outputs self.assertTrue("image" in predict_net.Proto().external_input) self.assertEquals(set(["pred"]), set(predict_net.Proto().external_output)) self.assertEqual( set(predict_net.Proto().external_input) - set([str(p) for p in model.params]), set(["image"]) )
def test_fc_fp16_initializer(self): model = model_helper.ModelHelper(name="test") data = model.net.AddExternalInput("data") fc1 = brew.fc(model, data, "fc1", dim_in=1, dim_out=1) # default operator, pFP16Initializer fc2 = brew.fc(model, fc1, "fc2", dim_in=1, dim_out=1, WeightInitializer=pFP16Initializer ) # specified operator, pFP16Initializer fc3 = brew.fc(model, fc2, "fc3", dim_in=1, dim_out=1, weight_init=("ConstantFill", {}), WeightInitializer=pFP16Initializer )
def _createDense(self, dtype=core.DataType.FLOAT): perfect_model = np.array([2, 6, 5, 0, 1]).astype(np.float32) np.random.seed(123) # make test deterministic numpy_dtype = np.float32 if dtype == core.DataType.FLOAT else np.float16 initializer = Initializer if dtype == core.DataType.FLOAT else \ PseudoFP16Initializer data = np.random.randint( 2, size=(20, perfect_model.size)).astype(numpy_dtype) label = np.dot(data, perfect_model)[:, np.newaxis] model = ModelHelper(name="test", arg_scope={'order': 'NCHW'}) out = brew.fc( model, 'data', 'fc', perfect_model.size, 1, ('ConstantFill', {}), ('ConstantFill', {}), axis=0, WeightInitializer=initializer, BiasInitializer=initializer ) if dtype == core.DataType.FLOAT16: out = model.HalfToFloat(out, out + "_fp32") sq = model.SquaredL2Distance([out, 'label']) loss = model.AveragedLoss(sq, "avg_loss") grad_map = model.AddGradientOperators([loss]) self.assertIsInstance(grad_map['fc_w'], core.BlobReference) return (model, perfect_model, data, label)
def _calc_attention_logits_from_sum_match( model, decoder_hidden_encoder_outputs_sum, encoder_output_dim, scope, ): # [encoder_length, batch_size, encoder_output_dim] decoder_hidden_encoder_outputs_sum = model.net.Tanh( decoder_hidden_encoder_outputs_sum, decoder_hidden_encoder_outputs_sum, ) # [encoder_length, batch_size, 1] attention_logits = brew.fc( model, decoder_hidden_encoder_outputs_sum, s(scope, 'attention_logits'), dim_in=encoder_output_dim, dim_out=1, axis=2, freeze_bias=True, ) # [batch_size, encoder_length, 1] attention_logits_transposed = brew.transpose( model, attention_logits, s(scope, 'attention_logits_transposed'), axes=[1, 0, 2], ) return attention_logits_transposed
def testShapeInferenceConvNet(self): model = model_helper.ModelHelper(name="convtest") model.NHWC2NCHW("data", "data_nchw") brew.conv(model, "data_nchw", 'conv1', 3, 64, weight_init=("MSRAFill", {}), kernel=7, stride=2, pad=3, no_bias=0) brew.spatial_bn(model, 'conv1', 'conv1_spatbn_relu', 64, epsilon=1e-3, is_test=False) brew.relu(model, 'conv1_spatbn_relu', 'conv1_spatbn_relu') brew.max_pool(model, 'conv1_spatbn_relu', 'pool1', kernel=3, stride=2) brew.fc(model, 'pool1', 'fc', dim_in=(64 * 56 * 56), dim_out=100) brew.dropout(model, 'fc', 'fc_drop', is_test=False) model.Sigmoid('fc_drop', 'fc_sigm') brew.softmax(model, 'fc_sigm', 'softmax') model.LabelCrossEntropy(['softmax', 'label'], 'xent') loss = model.AveragedLoss('xent', 'loss') model.AddGradientOperators([loss]) LR = model.param_init_net.ConstantFill( [], 'LR', shape=[1], value=0.1 ) for param in model.GetParams(): param_grad = model.param_to_grad[param] param_momentum = model.param_init_net.ConstantFill( [param], param + '_momentum', value=0.0 ) model.net.MomentumSGDUpdate( [param_grad, param_momentum, LR, param], [param_grad, param_momentum, param], ) workspace.FeedBlob( "data", np.random.rand(16, 227, 227, 3).astype(np.float32), ) workspace.FeedBlob( "label", (100 * np.random.rand(16)).astype(np.int32), ) workspace.FeedBlob( "label", (100 * np.random.rand(16)).astype(np.int32), ) # Then do automatic comparison test: run the next once to # initialize everything self.InferTensorRunAndCompare(model)
def test_forward_optim_tree_daggy(self, input_dim, output_dim, batch_size): m = model_helper.ModelHelper() m.Proto().type = "dag" m.Proto().num_workers = 4 with core.NameScope("name_x"): fc1 = brew.fc(m, "data", "fc1", dim_in=input_dim, dim_out=output_dim) fc2 = brew.fc(m, fc1, "fc2", dim_in=output_dim, dim_out=output_dim) fc3 = brew.fc(m, fc2, "fc3", dim_in=output_dim, dim_out=output_dim) fc4 = brew.fc(m, fc3, "fc4", dim_in=output_dim, dim_out=output_dim) fc5 = brew.fc(m, fc4, "fc5", dim_in=output_dim, dim_out=output_dim) # Branch fc3b = brew.fc(m, fc2, "fc3b", dim_in=output_dim, dim_out=output_dim) fc4b = brew.fc(m, fc3b, "fc4b", dim_in=output_dim, dim_out=output_dim) fc5b = brew.fc(m, fc4b, "fc5b", dim_in=output_dim, dim_out=output_dim) fc5sum = brew.sum(m, [fc5, fc5b], "fc5sum") fc5.Relu([], fc5sum) \ .Softmax([], "pred1") \ .LabelCrossEntropy(["label"], ["xent1"]) \ .AveragedLoss([], "loss1") fc6 = brew.fc(m, fc5, "fc6", dim_in=output_dim, dim_out=output_dim) fc6.Relu([], fc6) \ .Softmax([], "pred2") \ .LabelCrossEntropy(["label"], ["xent2"]) \ .AveragedLoss([], "loss2") blobs_before = count_blobs(m.net.Proto()) optim_proto = memonger.optimize_inference_for_dag( m.net, ["name_x/data"], "name_x" ) blobs_after = count_blobs(optim_proto) self.assertLess(blobs_after, blobs_before) # Test networks produce exactly same results data = np.random.randn(batch_size, input_dim).astype(np.float32) label = np.random.randint( low=0, high=output_dim, size=(batch_size,)).astype(np.int32) workspace.RunNetOnce(m.param_init_net) workspace.FeedBlob("name_x/data", data) workspace.FeedBlob("name_x/label", label) workspace.RunNetOnce(m.net) loss1 = workspace.FetchBlob("name_x/loss1") loss2 = workspace.FetchBlob("name_x/loss2") workspace.RunNetOnce(optim_proto) optimized_loss1 = workspace.FetchBlob("name_x/loss1") optimized_loss2 = workspace.FetchBlob("name_x/loss2") np.testing.assert_almost_equal(loss1, optimized_loss1) np.testing.assert_almost_equal(loss2, optimized_loss2)
def CreateModel(self): log.debug("Start training") model = model_helper.ModelHelper(name="char_rnn") input_blob, seq_lengths, hidden_init, cell_init, target = \ model.net.AddExternalInputs( 'input_blob', 'seq_lengths', 'hidden_init', 'cell_init', 'target', ) hidden_output_all, self.hidden_output, _, self.cell_state = LSTM( model, input_blob, seq_lengths, (hidden_init, cell_init), self.D, self.hidden_size, scope="LSTM") output = brew.fc( model, hidden_output_all, None, dim_in=self.hidden_size, dim_out=self.D, axis=2 ) # axis is 2 as first two are T (time) and N (batch size). # We treat them as one big batch of size T * N softmax = model.net.Softmax(output, 'softmax', axis=2) softmax_reshaped, _ = model.net.Reshape( softmax, ['softmax_reshaped', '_'], shape=[-1, self.D]) # Create a copy of the current net. We will use it on the forward # pass where we don't need loss and backward operators self.forward_net = core.Net(model.net.Proto()) xent = model.net.LabelCrossEntropy([softmax_reshaped, target], 'xent') # Loss is average both across batch and through time # Thats why the learning rate below is multiplied by self.seq_length loss = model.net.AveragedLoss(xent, 'loss') model.AddGradientOperators([loss]) # use build_sdg function to build an optimizer build_sgd( model, base_learning_rate=0.1 * self.seq_length, policy="step", stepsize=1, gamma=0.9999 ) self.model = model self.predictions = softmax self.loss = loss self.prepare_state = core.Net("prepare_state") self.prepare_state.Copy(self.hidden_output, hidden_init) self.prepare_state.Copy(self.cell_state, cell_init)
def _create_model(self): m = model_helper.ModelHelper() y = brew.fc(m, "data", "y", dim_in=4, dim_out=2, weight_init=('ConstantFill', dict(value=1.0)), bias_init=('ConstantFill', dict(value=0.0)), axis=0) m.net.AddExternalOutput(y) return m
def testShapeInferenceSimpleFC(self): m = model_helper.ModelHelper(name="test_model") brew.fc(m, "data", "fc1", dim_in=96, dim_out=32) brew.fc(m, "fc1", "fc2", dim_in=32, dim_out=55) (shapes, types) = workspace.InferShapesAndTypes( [m.param_init_net, m.net], {'data': [64, 96]} ) self.assertEquals(shapes['data'], [64, 96]) self.assertEquals(shapes['fc1_w'], [32, 96]) self.assertEquals(shapes['fc1_b'], [32]) self.assertEquals(shapes['fc1'], [64, 32]) self.assertEquals(shapes['fc2_w'], [55, 32]) self.assertEquals(shapes['fc2_b'], [55]) self.assertEquals(shapes['fc2'], [64, 55])
def AddMLPModel(model, data): size = 28 * 28 * 1 sizes = [size, size * 2, size * 2, 10] layer = data for i in range(len(sizes) - 1): layer = brew.fc(model, layer, 'dense_{}'.format(i), dim_in=sizes[i], dim_out=sizes[i + 1]) layer = model.net.Relu(layer, 'relu_{}'.format(i)) softmax = model.net.Softmax(layer, 'softmax') return softmax
def prepare_input(self, model, input_blob): return brew.fc( model, input_blob, self.scope('i2h'), dim_in=self.input_size, dim_out=3 * self.hidden_size, axis=2, )
def test_compute_histogram_for_blobs_modify_output_record(self): model = model_helper.ModelHelper(name="test") data = model.net.AddExternalInput("data") fc1 = brew.fc(model, data, "fc1", dim_in=4, dim_out=2) # no operator name set, will use default brew.fc(model, fc1, "fc2", dim_in=2, dim_out=1) num_buckets = 20 lower_bound = 0.2 upper_bound = 0.8 accumulate = False net_modifier = ComputeHistogramForBlobs(blobs=['fc1_w', 'fc2_w'], logging_frequency=10, num_buckets=num_buckets, lower_bound=lower_bound, upper_bound=upper_bound, accumulate=accumulate) net_modifier(model.net, modify_output_record=True) workspace.FeedBlob('data', np.random.rand(10, 4).astype(np.float32)) workspace.RunNetOnce(model.param_init_net) workspace.RunNetOnce(model.net) fc1_w = workspace.FetchBlob('fc1_w') fc1_w_curr_normalized_hist = workspace.FetchBlob('fc1_w_curr_normalized_hist') cur_hist, acc_hist = self.histogram(fc1_w, lower_bound=lower_bound, upper_bound=upper_bound, num_buckets=num_buckets) self.assertEqual(fc1_w_curr_normalized_hist.size, num_buckets + 2) self.assertAlmostEqual(np.linalg.norm( fc1_w_curr_normalized_hist - cur_hist), 0.0, delta=1e-5) self.assertEqual(len(model.net.Proto().op), 12) assert 'fc1_w' + net_modifier.field_name_suffix() in\ model.net.output_record().field_blobs(),\ model.net.output_record().field_blobs() assert 'fc2_w' + net_modifier.field_name_suffix() in\ model.net.output_record().field_blobs(),\ model.net.output_record().field_blobs()
def test_fc_initializer(self): model = model_helper.ModelHelper(name="test") data = model.net.AddExternalInput("data") fc1 = brew.fc(model, data, "fc1", dim_in=1, dim_out=1) # no operator name set, will use default fc2 = brew.fc(model, fc1, "fc2", dim_in=1, dim_out=1, WeightInitializer=Initializer) # no operator name set, will use custom fc3 = brew.fc(model, fc2, "fc3", dim_in=1, dim_out=1, WeightInitializer=Initializer, weight_init=("ConstantFill", {}), ) # operator name set, no initializer class set fc4 = brew.fc(model, fc3, "fc4", dim_in=1, dim_out=1, WeightInitializer=None, weight_init=("ConstantFill", {}) )
def Inception(order, cudnn_ws, model_path=""): my_arg_scope = { 'order': order, 'use_cudnn': True, 'cudnn_exhaustive_search': True, } if cudnn_ws: my_arg_scope['ws_nbytes_limit'] = cudnn_ws model = model_helper.ModelHelper( name="inception", arg_scope=my_arg_scope, ) conv1 = brew.conv( model, "data", "conv1", 3, 64, 7, ('XavierFill', {}), ('ConstantFill', {}), stride=2, pad=3, ) relu1 = brew.relu(model, conv1, "conv1") pool1 = brew.max_pool(model, relu1, "pool1", kernel=3, stride=2, pad=1) conv2a = brew.conv( model, pool1, "conv2a", 64, 64, 1, ('XavierFill', {}), ('ConstantFill', {}) ) conv2a = brew.relu(model, conv2a, conv2a) conv2 = brew.conv( model, conv2a, "conv2", 64, 192, 3, ('XavierFill', {}), ('ConstantFill', {}), pad=1, ) relu2 = brew.relu(model, conv2, "conv2") pool2 = brew.max_pool(model, relu2, "pool2", kernel=3, stride=2, pad=1) # Inception modules inc3 = _InceptionModule( model, pool2, 192, "inc3", 64, [96, 128], [16, 32], 32 ) inc4 = _InceptionModule( model, inc3, 256, "inc4", 128, [128, 192], [32, 96], 64 ) pool5 = brew.max_pool(model, inc4, "pool5", kernel=3, stride=2, pad=1) inc5 = _InceptionModule( model, pool5, 480, "inc5", 192, [96, 208], [16, 48], 64 ) inc6 = _InceptionModule( model, inc5, 512, "inc6", 160, [112, 224], [24, 64], 64 ) inc7 = _InceptionModule( model, inc6, 512, "inc7", 128, [128, 256], [24, 64], 64 ) inc8 = _InceptionModule( model, inc7, 512, "inc8", 112, [144, 288], [32, 64], 64 ) inc9 = _InceptionModule( model, inc8, 528, "inc9", 256, [160, 320], [32, 128], 128 ) pool9 = brew.max_pool(model, inc9, "pool9", kernel=3, stride=2, pad=1) inc10 = _InceptionModule( model, pool9, 832, "inc10", 256, [160, 320], [32, 128], 128 ) inc11 = _InceptionModule( model, inc10, 832, "inc11", 384, [192, 384], [48, 128], 128 ) pool11 = brew.average_pool(model, inc11, "pool11", kernel=7, stride=1) fc = brew.fc( model, pool11, "fc", 1024, 1000, ('XavierFill', {}), ('ConstantFill', {}) ) # It seems that Soumith's benchmark does not have softmax on top # for Inception. We will add it anyway so we can have a proper # backward pass. pred = brew.softmax(model, fc, "pred") xent = model.net.LabelCrossEntropy([pred, "label"], "xent") model.net.AveragedLoss(xent, "loss") return model, 224
def Add_Original_CIFAR10_Model(model, data, num_classes, image_height, image_width, image_channels): # Convolutional layer 1 conv1 = brew.conv(model, data, 'conv1', dim_in=image_channels, dim_out=32, kernel=5, stride=1, pad=2) h, w = update_dims(height=image_height, width=image_width, kernel=5, stride=1, pad=2) # Pooling layer 1 pool1 = brew.max_pool(model, conv1, 'pool1', kernel=3, stride=2) h, w = update_dims(height=h, width=w, kernel=3, stride=2, pad=0) # ReLU layer 1 relu1 = brew.relu(model, pool1, 'relu1') # Convolutional layer 2 conv2 = brew.conv(model, relu1, 'conv2', dim_in=32, dim_out=32, kernel=5, stride=1, pad=2) h, w = update_dims(height=h, width=w, kernel=5, stride=1, pad=2) # ReLU layer 2 relu2 = brew.relu(model, conv2, 'relu2') # Pooling layer 1 pool2 = brew.average_pool(model, relu2, 'pool2', kernel=3, stride=2) h, w = update_dims(height=h, width=w, kernel=3, stride=2, pad=0) # Convolutional layer 3 conv3 = brew.conv(model, pool2, 'conv3', dim_in=32, dim_out=64, kernel=5, stride=1, pad=2) h, w = update_dims(height=h, width=w, kernel=5, stride=1, pad=2) # ReLU layer 3 relu3 = brew.relu(model, conv3, 'relu3') # Pooling layer 3 pool3 = brew.average_pool(model, relu3, 'pool3', kernel=3, stride=2) h, w = update_dims(height=h, width=w, kernel=3, stride=2, pad=0) # Fully connected layers fc1 = brew.fc(model, pool3, 'fc1', dim_in=64 * h * w, dim_out=64) fc2 = brew.fc(model, fc1, 'fc2', dim_in=64, dim_out=num_classes) # Softmax layer softmax = brew.softmax(model, fc2, 'softmax') return softmax
def test_net_conversion_and_append_net(self): other = model_helper.ModelHelper() fc1 = brew.fc(other, "data", "other_fc1", dim_in=3 * 227 * 227, dim_out=10) fc2 = brew.fc(other, fc1, "other_fc2", dim_in=10, dim_out=10) brew.fc(other, fc2, "other_fc3", dim_in=10, dim_out=10) def add_input_ops(model): model.net.UniformFill([], ["data"], shape=[4, 227, 227, 3]) model.net.UniformFill([], ["label"], shape=[4]) def add_model_ops(model, loss_scale): model.NHWC2NCHW("data", "data_nchw") model.Conv("data_nchw", 'conv1', 3, 64, weight_init=("MSRAFill", {}), kernel=7, stride=2, pad=3, no_bias=0) model.SpatialBN('conv1', 'conv1_spatbn_relu', 64, epsilon=1e-3, is_test=False) model.Relu('conv1_spatbn_relu', 'conv1_spatbn_relu') model.MaxPool('conv1_spatbn_relu', 'pool1', kernel=3, stride=2) model.FC('pool1', 'fc', dim_in=(64 * 56 * 56), dim_out=10) # Append the net and param_init_net of the other model appendnet = data_parallel_model.ConvertNetForDevice(other.net) model.net.AppendNet(appendnet) model.param_init_net.AppendNet( data_parallel_model.ConvertNetForDevice(other.param_init_net)) model.Sigmoid('fc', 'fc_sigm') model.Softmax('fc_sigm', 'softmax') loss = model.AveragedLoss('softmax', 'loss') return [loss] def add_optimizer(model): optimizer.build_sgd(model, 0.1, policy="fixed", momentum=0.9) model = cnn.CNNModelHelper( order="NCHW", name="test", ) data_parallel_model.Parallelize_CPU( model, input_builder_fun=add_input_ops, forward_pass_builder_fun=add_model_ops, optimizer_builder_fun=add_optimizer, devices=range(4)) # Just create and run net and confirm no exception is thrown workspace.RunNetOnce(model.param_init_net) workspace.CreateNet(model.net) workspace.RunNet(model.net)
def add_osme_branch(model, config, feature, seq): ''' add OSME module model net architecture: feature -> avgpool -> fc[1] -> relu -> fc[2] -> sigmoid -> mul ->(avgpool -> fc[3] ->) attention ''' # set weight initialization method init_policy = "XavierFill" if seq == 1 else "MSRAFill" # GAP avg_pool = brew.average_pool( model, feature, 'osme_GAP1_{}'.format(seq), kernel = config['model_arch']['last_conv_size'], stride = 1, ) # fc1 (need different initializer) fc1 = brew.fc( model, avg_pool, 'osme_fc1_{}'.format(seq), dim_in=config['model_arch']['feature_dim'], dim_out=config['model_arch']['feature_dim'] // config['model_arch']['r'], weight_init=(init_policy, {}), ) # relu fc1_relu = brew.relu(model, fc1, fc1) # fc2 fc2 = brew.fc( model, fc1_relu, 'osme_fc2_{}'.format(seq), dim_in=config['model_arch']['feature_dim'] // config['model_arch']['r'], dim_out=config['model_arch']['feature_dim'], weight_init=(init_policy, {}), ) # sigmoid mask = model.net.Sigmoid(fc2, 'osme_mask_{}'.format(seq)) # channel-wise mul attention = model.net.Mul( [feature, mask], ['osme_excitation_{}'.format(seq)], broadcast=1, axis=0, ) # one more GAP attention_gap = brew.average_pool( model, attention, 'osme_GAP2_{}'.format(seq), kernel = config['model_arch']['last_conv_size'], stride = 1, ) # fc3 att_feature = brew.fc( model, attention_gap, 'attention_{}'.format(seq), dim_in=config['model_arch']['feature_dim'], dim_out=config['model_arch']['attention_dim'], weight_init=(init_policy, {}), ) return att_feature
def create_vgg( model, data, num_input_channels, num_labels, num_layers=11, is_test=False, ): if num_layers == 11: # VGG configuration A first_layers_count = 1 last_layers_count = 2 elif num_layers == 13: # VGG configuration D first_layers_count = 2 last_layers_count = 2 elif num_layers == 16: # VGG configuration D first_layers_count = 2 last_layers_count = 3 elif num_layers == 19: # VGG configuration E first_layers_count = 2 last_layers_count = 4 else: raise NotImplementedError( "not currently supported: try one of {11, 13, 16, 19}, corresponding to VGG A, B, D, and E." ) conv1 = brew.conv( model, data, "conv1", num_input_channels, 64, 3, ('XavierFill', {}), ('ConstantFill', {}), pad=1, ) relu1 = brew.relu(model, conv1, "conv1") for i in range(0, first_layers_count - 1): conv1 = brew.conv( model, relu1, "conv1{}".format(i), 64, 64, 3, ('XavierFill', {}), ('ConstantFill', {}), pad=1, ) relu1 = brew.relu(model, conv1, "conv1{}".format(i)) pool1 = brew.max_pool(model, relu1, "pool1", kernel=2, stride=2) conv2 = brew.conv( model, pool1, "conv2", 64, 128, 3, ('XavierFill', {}), ('ConstantFill', {}), pad=1, ) relu2 = brew.relu(model, conv2, "conv2") for i in range(0, first_layers_count - 1): conv2 = brew.conv( model, relu2, "conv2{}".format(i), 128, 128, 3, ('XavierFill', {}), ('ConstantFill', {}), pad=1, ) relu2 = brew.relu(model, conv2, "conv2{}".format(i)) pool2 = brew.max_pool(model, relu2, "pool2", kernel=2, stride=2) conv3 = brew.conv( model, pool2, "conv3", 128, 256, 3, ('XavierFill', {}), ('ConstantFill', {}), pad=1, ) relu3 = brew.relu(model, conv3, "conv3") for i in range(0, last_layers_count - 1): conv4 = brew.conv( model, relu3, "conv4{}".format(i), 256, 256, 3, ('XavierFill', {}), ('ConstantFill', {}), pad=1, ) relu4 = brew.relu(model, conv4, "conv4{}".format(i)) pool4 = brew.max_pool(model, relu4, "pool4", kernel=2, stride=2) conv5 = brew.conv( model, pool4, "conv5", 256, 512, 3, ('XavierFill', {}), ('ConstantFill', {}), pad=1, ) relu5 = brew.relu(model, conv5, "conv5") for i in range(0, last_layers_count - 1): conv6 = brew.conv( model, relu5, "conv6{}".format(i), 512, 512, 3, ('XavierFill', {}), ('ConstantFill', {}), pad=1, ) relu6 = brew.relu(model, conv6, "conv6{}".format(i)) pool6 = brew.max_pool(model, relu6, "pool6", kernel=2, stride=2) conv7 = brew.conv( model, pool6, "conv7", 512, 512, 3, ('XavierFill', {}), ('ConstantFill', {}), pad=1, ) relu7 = brew.relu(model, conv7, "conv7") for i in range(0, last_layers_count - 1): conv8 = brew.conv( model, relu7, "conv8{}".format(i), 512, 512, 3, ('XavierFill', {}), ('ConstantFill', {}), pad=1, ) relu8 = brew.relu(model, conv8, "conv8{}".format(i)) pool8 = brew.max_pool(model, relu8, "pool8", kernel=2, stride=2) fcix = brew.fc(model, pool8, "fcix", 512 * 7 * 7, 4096, ('XavierFill', {}), ('ConstantFill', {})) reluix = brew.relu(model, fcix, "fcix") fcx = brew.fc(model, reluix, "fcx", 4096, 4096, ('XavierFill', {}), ('ConstantFill', {})) relux = brew.relu(model, fcx, "fcx") fcxi = brew.fc(model, relux, "fcxi", 4096, num_labels, ('XavierFill', {}), ('ConstantFill', {})) return fcxi
def simple_fc(): model = ModelHelper(name="r") brew.fc(model, "data", "fc", 10, 10) return model, [(1, 10)]
def AlexNet(order, cudnn_ws, ideep): my_arg_scope = { 'order': order, 'use_cudnn': True, 'cudnn_exhaustive_search': True, 'ws_nbytes_limit': str(cudnn_ws) } model = ModelHelper(name="alexnet", arg_scope=my_arg_scope) conv1 = brew.conv(model, "data", "conv1", 3, 64, 11, ('XavierFill', {}), ('ConstantFill', {}), stride=4, pad=2) relu1 = brew.relu(model, conv1, "conv1") pool1 = brew.max_pool(model, relu1, "pool1", kernel=3, stride=2) conv2 = brew.conv(model, pool1, "conv2", 64, 192, 5, ('XavierFill', {}), ('ConstantFill', {}), pad=2) relu2 = brew.relu(model, conv2, "conv2") pool2 = brew.max_pool(model, relu2, "pool2", kernel=3, stride=2) conv3 = brew.conv(model, pool2, "conv3", 192, 384, 3, ('XavierFill', {}), ('ConstantFill', {}), pad=1) relu3 = brew.relu(model, conv3, "conv3") conv4 = brew.conv(model, relu3, "conv4", 384, 256, 3, ('XavierFill', {}), ('ConstantFill', {}), pad=1) relu4 = brew.relu(model, conv4, "conv4") conv5 = brew.conv(model, relu4, "conv5", 256, 256, 3, ('XavierFill', {}), ('ConstantFill', {}), pad=1) relu5 = brew.relu(model, conv5, "conv5") pool5 = brew.max_pool(model, relu5, "pool5", kernel=3, stride=2) fc6 = brew.fc(model, pool5, "fc6", 256 * 6 * 6, 4096, ('XavierFill', {}), ('ConstantFill', {})) relu6 = brew.relu(model, fc6, "fc6") fc7 = brew.fc(model, relu6, "fc7", 4096, 4096, ('XavierFill', {}), ('ConstantFill', {})) relu7 = brew.relu(model, fc7, "fc7") fc8 = brew.fc(model, relu7, "fc8", 4096, 1000, ('XavierFill', {}), ('ConstantFill', {})) pred = brew.softmax(model, fc8, "pred") xent = model.LabelCrossEntropy([pred, "label"], "xent") loss = model.AveragedLoss(xent, "loss") return model, 224
def OverFeat(order, cudnn_ws, model_path=""): my_arg_scope = { 'order': order, 'use_cudnn': True, 'cudnn_exhaustive_search': True, } if cudnn_ws: my_arg_scope['ws_nbytes_limit'] = cudnn_ws model = model_helper.ModelHelper( name="overfeat", arg_scope=my_arg_scope, ) conv1 = brew.conv( model, "data", "conv1", 3, 96, 11, ('XavierFill', {}), ('ConstantFill', {}), stride=4, ) relu1 = brew.relu(model, conv1, "conv1") pool1 = brew.max_pool(model, relu1, "pool1", kernel=2, stride=2) conv2 = brew.conv( model, pool1, "conv2", 96, 256, 5, ('XavierFill', {}), ('ConstantFill', {}) ) relu2 = brew.relu(model, conv2, "conv2") pool2 = brew.max_pool(model, relu2, "pool2", kernel=2, stride=2) conv3 = brew.conv( model, pool2, "conv3", 256, 512, 3, ('XavierFill', {}), ('ConstantFill', {}), pad=1, ) relu3 = brew.relu(model, conv3, "conv3") conv4 = brew.conv( model, relu3, "conv4", 512, 1024, 3, ('XavierFill', {}), ('ConstantFill', {}), pad=1, ) relu4 = brew.relu(model, conv4, "conv4") conv5 = brew.conv( model, relu4, "conv5", 1024, 1024, 3, ('XavierFill', {}), ('ConstantFill', {}), pad=1, ) relu5 = brew.relu(model, conv5, "conv5") pool5 = brew.max_pool(model, relu5, "pool5", kernel=2, stride=2) fc6 = brew.fc( model, pool5, "fc6", 1024 * 6 * 6, 3072, ('XavierFill', {}), ('ConstantFill', {}) ) relu6 = brew.relu(model, fc6, "fc6") fc7 = brew.fc( model, relu6, "fc7", 3072, 4096, ('XavierFill', {}), ('ConstantFill', {}) ) relu7 = brew.relu(model, fc7, "fc7") fc8 = brew.fc( model, relu7, "fc8", 4096, 1000, ('XavierFill', {}), ('ConstantFill', {}) ) pred = brew.softmax(model, fc8, "pred") xent = model.net.LabelCrossEntropy([pred, "label"], "xent") model.net.AveragedLoss(xent, "loss") return model, 231
def FC(self, *args, **kwargs): return brew.fc(self, *args, **kwargs)
def VGGA(order, cudnn_ws, model_path=""): my_arg_scope = { 'order': order, 'use_cudnn': True, 'cudnn_exhaustive_search': True, } if cudnn_ws: my_arg_scope['ws_nbytes_limit'] = cudnn_ws model = model_helper.ModelHelper( name="vgga", arg_scope=my_arg_scope, ) conv1 = brew.conv( model, "data", "conv1", 3, 64, 3, ('XavierFill', {}), ('ConstantFill', {}), pad=1, ) relu1 = brew.relu(model, conv1, "conv1") pool1 = brew.max_pool(model, relu1, "pool1", kernel=2, stride=2) conv2 = brew.conv( model, pool1, "conv2", 64, 128, 3, ('XavierFill', {}), ('ConstantFill', {}), pad=1, ) relu2 = brew.relu(model, conv2, "conv2") pool2 = brew.max_pool(model, relu2, "pool2", kernel=2, stride=2) conv3 = brew.conv( model, pool2, "conv3", 128, 256, 3, ('XavierFill', {}), ('ConstantFill', {}), pad=1, ) relu3 = brew.relu(model, conv3, "conv3") conv4 = brew.conv( model, relu3, "conv4", 256, 256, 3, ('XavierFill', {}), ('ConstantFill', {}), pad=1, ) relu4 = brew.relu(model, conv4, "conv4") pool4 = brew.max_pool(model, relu4, "pool4", kernel=2, stride=2) conv5 = brew.conv( model, pool4, "conv5", 256, 512, 3, ('XavierFill', {}), ('ConstantFill', {}), pad=1, ) relu5 = brew.relu(model, conv5, "conv5") conv6 = brew.conv( model, relu5, "conv6", 512, 512, 3, ('XavierFill', {}), ('ConstantFill', {}), pad=1, ) relu6 = brew.relu(model, conv6, "conv6") pool6 = brew.max_pool(model, relu6, "pool6", kernel=2, stride=2) conv7 = brew.conv( model, pool6, "conv7", 512, 512, 3, ('XavierFill', {}), ('ConstantFill', {}), pad=1, ) relu7 = brew.relu(model, conv7, "conv7") conv8 = brew.conv( model, relu7, "conv8", 512, 512, 3, ('XavierFill', {}), ('ConstantFill', {}), pad=1, ) relu8 = brew.relu(model, conv8, "conv8") pool8 = brew.max_pool(model, relu8, "pool8", kernel=2, stride=2) fcix = brew.fc( model, pool8, "fcix", 512 * 7 * 7, 4096, ('XavierFill', {}), ('ConstantFill', {}) ) reluix = brew.relu(model, fcix, "fcix") fcx = brew.fc( model, reluix, "fcx", 4096, 4096, ('XavierFill', {}), ('ConstantFill', {}) ) relux = brew.relu(model, fcx, "fcx") fcxi = brew.fc( model, relux, "fcxi", 4096, 1000, ('XavierFill', {}), ('ConstantFill', {}) ) pred = brew.softmax(model, fcxi, "pred") xent = model.net.LabelCrossEntropy([pred, "label"], "xent") model.net.AveragedLoss(xent, "loss") return model, 231
def _apply( self, model, input_t, seq_lengths, states, timestep, extra_inputs=None, ): hidden_t_prev = states[0] # Split input tensors to get inputs for each gate. input_t_reset, input_t_update, input_t_output = model.net.Split( [ input_t, ], [ self.scope('input_t_reset'), self.scope('input_t_update'), self.scope('input_t_output'), ], axis=2, ) # Fully connected layers for reset and update gates. reset_gate_t = brew.fc( model, hidden_t_prev, self.scope('reset_gate_t'), dim_in=self.hidden_size, dim_out=self.hidden_size, axis=2, ) update_gate_t = brew.fc( model, hidden_t_prev, self.scope('update_gate_t'), dim_in=self.hidden_size, dim_out=self.hidden_size, axis=2, ) # Calculating the modified hidden state going into output gate. reset_gate_t = model.net.Sum( [reset_gate_t, input_t_reset], self.scope('reset_gate_t') ) reset_gate_t_sigmoid = model.net.Sigmoid( reset_gate_t, self.scope('reset_gate_t_sigmoid') ) # `self.linear_before_reset = True` matches cudnn semantics if self.linear_before_reset: output_gate_fc = brew.fc( model, hidden_t_prev, self.scope('output_gate_t'), dim_in=self.hidden_size, dim_out=self.hidden_size, axis=2, ) output_gate_t = model.net.Mul( [reset_gate_t_sigmoid, output_gate_fc], self.scope('output_gate_t_mul') ) else: modified_hidden_t_prev = model.net.Mul( [reset_gate_t_sigmoid, hidden_t_prev], self.scope('modified_hidden_t_prev') ) output_gate_t = brew.fc( model, modified_hidden_t_prev, self.scope('output_gate_t'), dim_in=self.hidden_size, dim_out=self.hidden_size, axis=2, ) # Add input contributions to update and output gate. # We already (in-place) added input contributions to the reset gate. update_gate_t = model.net.Sum( [update_gate_t, input_t_update], self.scope('update_gate_t'), ) output_gate_t = model.net.Sum( [output_gate_t, input_t_output], self.scope('output_gate_t_summed'), ) # Join gate outputs and add input contributions gates_t, _gates_t_concat_dims = model.net.Concat( [ reset_gate_t, update_gate_t, output_gate_t, ], [ self.scope('gates_t'), self.scope('_gates_t_concat_dims'), ], axis=2, ) if seq_lengths is not None: inputs = [hidden_t_prev, gates_t, seq_lengths, timestep] else: inputs = [hidden_t_prev, gates_t, timestep] hidden_t = model.net.GRUUnit( inputs, list(self.get_state_names()), forget_bias=self.forget_bias, drop_states=self.drop_states, sequence_lengths=(seq_lengths is not None), ) model.net.AddExternalOutputs(hidden_t) return (hidden_t,)
def addModel(model, data): channels = 50 channels2 = 200 kernel_size = 3 if model.init_params: weight = model.param_init_net.XavierFill( [], 'conv1' + '_w', shape=[channels, 1, 1, kernel_size]) bias = model.param_init_net.ConstantFill([], 'conv1' + '_b', shape=[ channels, ]) else: weight = core.ScopedBlobReference('conv1' + '_w', model.param_init_net) bias = core.ScopedBlobReference('conv1' + '_b', model.param_init_net) model.params.extend([weight, bias]) model.weights.append(weight) model.biases.append(bias) conv1 = model.net.Conv([data, weight, bias], 'conv1', dim_in=1, dim_out=channels, kernel_h=1, kernel_w=kernel_size) #conv1 = brew.conv(model, data, 'conv1', 1, 2, 5) pool1 = brew.max_pool(model, conv1, 'pool1', kernel_h=1, kernel_w=2, stride=2) pool_dim_out = (41 - kernel_size) / 2 if model.init_params: weight2 = model.param_init_net.XavierFill( [], 'conv2' + '_w', shape=[channels2, channels, 1, kernel_size]) bias2 = model.param_init_net.ConstantFill([], 'conv2' + '_b', shape=[ channels2, ]) else: weight2 = core.ScopedBlobReference('conv2' + '_w', model.param_init_net) bias2 = core.ScopedBlobReference('conv2' + '_b', model.param_init_net) model.params.extend([weight2, bias2]) model.weights.append(weight2) model.biases.append(bias2) conv2 = model.net.Conv([pool1, weight2, bias2], 'conv2', dim_in=channels, dim_out=channels2, kernel_h=1, kernel_w=kernel_size) pool2 = brew.max_pool(model, conv2, 'pool2', kernel_h=1, kernel_w=2, stride=2) pool_dim_out_2 = (pool_dim_out + 1 - kernel_size) / 2 fc3 = brew.fc(model, pool2, 'fc3', dim_in=pool_dim_out_2 * channels2, dim_out=1000) fc3 = brew.relu(model, fc3, fc3) pred = brew.fc(model, fc3, 'pred', 1000, 2) #print(workspace.FetchBlob('pred_w')) softmax = brew.softmax(model, pred, 'softmax') return softmax
def create_resnet50( model, data, num_input_channels, num_labels, label=None, is_test=False, no_loss=False, no_bias=0, conv1_kernel=7, conv1_stride=2, final_avg_kernel=7, ): # conv1 + maxpool brew.conv(model, data, 'conv1', num_input_channels, 64, weight_init=("MSRAFill", {}), kernel=conv1_kernel, stride=conv1_stride, pad=3, no_bias=no_bias) brew.spatial_bn(model, 'conv1', 'conv1_spatbn_relu', 64, epsilon=1e-3, momentum=0.1, is_test=is_test) brew.relu(model, 'conv1_spatbn_relu', 'conv1_spatbn_relu') brew.max_pool(model, 'conv1_spatbn_relu', 'pool1', kernel=3, stride=2) # Residual blocks... builder = ResNetBuilder(model, 'pool1', no_bias=no_bias, is_test=is_test, spatial_bn_mom=0.1) # conv2_x (ref Table 1 in He et al. (2015)) builder.add_bottleneck(64, 64, 256) builder.add_bottleneck(256, 64, 256) builder.add_bottleneck(256, 64, 256) # conv3_x builder.add_bottleneck(256, 128, 512, down_sampling=True) for _ in range(1, 4): builder.add_bottleneck(512, 128, 512) # conv4_x builder.add_bottleneck(512, 256, 1024, down_sampling=True) for _ in range(1, 6): builder.add_bottleneck(1024, 256, 1024) # conv5_x builder.add_bottleneck(1024, 512, 2048, down_sampling=True) builder.add_bottleneck(2048, 512, 2048) builder.add_bottleneck(2048, 512, 2048) # Final layers final_avg = brew.average_pool( model, builder.prev_blob, 'final_avg', kernel=final_avg_kernel, stride=1, global_pooling=True, ) # Final dimension of the "image" is reduced to 7x7 last_out = brew.fc(model, final_avg, 'last_out_L{}'.format(num_labels), 2048, num_labels) if no_loss: return last_out # If we create model for training, use softmax-with-loss if (label is not None): (softmax, loss) = model.SoftmaxWithLoss( [last_out, label], ["softmax", "loss"], ) return (softmax, loss) else: # For inference, we just return softmax return brew.softmax(model, last_out, "softmax")
def test_cpu2gpu_gpu2cpu_gradients(self): model = model_helper.ModelHelper(name="copy_test") batch = 32 cpu_opt = core.DeviceOption(caffe2_pb2.CPU, 0) gpu_opt = core.DeviceOption(workspace.GpuDeviceType, 0) with core.NameScope("cpu"): with core.DeviceScope(cpu_opt): x_cpu = brew.fc(model, 'data', 'x_cpu', 16, 8) with core.NameScope("gpu_0"): with core.DeviceScope(gpu_opt): x_gpu = model.CopyCPUToGPU(x_cpu, "x_gpu") pred_gpu = brew.fc(model, x_gpu, "pred_gpu", 8, 4) pred_cpu = model.CopyGPUToCPU(pred_gpu, "pred_cpu") with core.DeviceScope(cpu_opt): with core.NameScope("cpu"): (softmax, loss) = model.SoftmaxWithLoss( [pred_cpu, "label"], ["softmax", "loss"], ) gradient_map = model.AddGradientOperators([loss]) # Add param updates (for cpu and gpu) init_net = model.param_init_net with core.DeviceScope(cpu_opt): with core.NameScope("cpu"): ONE = init_net.ConstantFill([], "ONE", shape=[1], value=1.) LR = init_net.ConstantFill([], "LR", shape=[1], value=-2.0) for param in model.GetParams(): model.WeightedSum( [param, ONE, gradient_map[param], LR], param, ) with core.NameScope("gpu_0"): with core.DeviceScope(gpu_opt): ONE = init_net.ConstantFill([], "ONE", shape=[1], value=1.) LR = init_net.ConstantFill([], "LR", shape=[1], value=-2.0) for param in model.GetParams(): model.WeightedSum( [param, ONE, gradient_map[param], LR], param, ) with core.DeviceScope(cpu_opt): workspace.FeedBlob( 'cpu/data', np.random.rand(batch, 16).astype(np.float32), ) workspace.FeedBlob( 'cpu/label', np.random.randint(4, size=batch).astype(np.int32), ) workspace.RunNetOnce(model.param_init_net) workspace.CreateNet(model.net) initial_params = {p: workspace.FetchBlob(p) for p in model.GetParams()} workspace.RunNet(model.net.Proto().name) updated_params = {p: workspace.FetchBlob(p) for p in model.GetParams()} for p in model.GetParams(): g = gradient_map[p] expected = initial_params[p] - 2.0 * workspace.FetchBlob(g) actual = updated_params[p] self.assertTrue( np.array_equal(expected, updated_params[p]), "Mismatch: {}: {}, {}".format(p, expected, actual), )
def apply_dot_attention( model, encoder_output_dim, # [batch_size, encoder_output_dim, encoder_length] encoder_outputs_transposed, # [1, batch_size, decoder_state_dim] decoder_hidden_state_t, decoder_hidden_state_dim, scope, encoder_lengths=None, ): if decoder_hidden_state_dim != encoder_output_dim: weighted_decoder_hidden_state = brew.fc( model, decoder_hidden_state_t, s(scope, 'weighted_decoder_hidden_state'), dim_in=decoder_hidden_state_dim, dim_out=encoder_output_dim, axis=2, ) else: weighted_decoder_hidden_state = decoder_hidden_state_t # [batch_size, decoder_state_dim] squeezed_weighted_decoder_hidden_state = model.net.Squeeze( weighted_decoder_hidden_state, s(scope, 'squeezed_weighted_decoder_hidden_state'), dims=[0], ) # [batch_size, decoder_state_dim, 1] expanddims_squeezed_weighted_decoder_hidden_state = model.net.ExpandDims( squeezed_weighted_decoder_hidden_state, s(scope, 'expanddims_squeezed_weighted_decoder_hidden_state'), dims=[2], ) # [batch_size, encoder_length, encoder_output_dim] encoder_outputs_for_dot_product = model.net.Transpose( encoder_outputs_transposed, s(scope, 'encoder_outputs_for_dot_product'), axes=[0, 2, 1], ) # [batch_size, encoder_output_dim, 1] attention_logits_transposed = model.net.BatchMatMul( [ encoder_outputs_for_dot_product, expanddims_squeezed_weighted_decoder_hidden_state, ], s(scope, 'attention_logits'), ) # [batch_size, encoder_length, 1] attention_weights_3d = _calc_attention_weights( model=model, attention_logits_transposed=attention_logits_transposed, scope=scope, encoder_lengths=encoder_lengths, ) # [batch_size, encoder_output_dim, 1] attention_weighted_encoder_context = _calc_weighted_context( model=model, encoder_outputs_transposed=encoder_outputs_transposed, encoder_output_dim=encoder_output_dim, attention_weights_3d=attention_weights_3d, scope=scope, ) return attention_weighted_encoder_context, attention_weights_3d, []
def alexnet(): model = ModelHelper(name="r", arg_scope={"order": "NCHW", "is_test": True}) conv1 = brew.conv(model, "data", "conv1", 3, 64, 11, ('XavierFill', {}), ('ConstantFill', {}), stride=4, pad=2) relu1 = brew.relu(model, conv1, "conv1") pool1 = brew.max_pool(model, relu1, "pool1", kernel=3, stride=2, pad=0, legacy_pad=3) lrn1 = brew.lrn(model, pool1, "pool1_lrn", size=5, alpha=1.0e-4, beta=0.75, bias=1.0) conv2 = brew.conv(model, lrn1, "conv2", 64, 192, 5, ('XavierFill', {}), ('ConstantFill', {}), pad=2) relu2 = brew.relu(model, conv2, "conv2") pool2 = brew.max_pool(model, relu2, "pool2", kernel=3, stride=2) lrn2 = brew.lrn(model, pool2, "pool2_lrn", size=5, alpha=1.0e-4, beta=0.75, bias=1.0) conv3 = brew.conv(model, lrn2, "conv3", 192, 384, 3, ('XavierFill', {}), ('ConstantFill', {}), pad=1) relu3 = brew.relu(model, conv3, "conv3") conv4 = brew.conv(model, relu3, "conv4", 384, 256, 3, ('XavierFill', {}), ('ConstantFill', {}), pad=1) relu4 = brew.relu(model, conv4, "conv4") conv5 = brew.conv(model, relu4, "conv5", 256, 256, 3, ('XavierFill', {}), ('ConstantFill', {}), pad=1) relu5 = brew.relu(model, conv5, "conv5") pool5 = brew.max_pool(model, relu5, "pool5", kernel=3, stride=2) fc6 = brew.fc(model, pool5, "fc6", 256 * 6 * 6, 4096, ('XavierFill', {}), ('ConstantFill', {})) relu6 = brew.relu(model, fc6, "fc6") fc7 = brew.fc(model, relu6, "fc7", 4096, 4096, ('XavierFill', {}), ('ConstantFill', {})) relu7 = brew.relu(model, fc7, "fc7") drop7 = brew.dropout(model, relu7, "fc7_dropout", is_test=1, ratio=0.5) fc8 = brew.fc(model, drop7, "fc8", 4096, 1000, ('XavierFill', {}), ('ConstantFill', {})) relu8 = brew.relu(model, fc8, "fc8") brew.dropout(model, relu8, "fc8_dropout", is_test=1, ratio=0.5) return model, [(1, 3, 224, 224)]
def test_fc_external_initializer(self): model = model_helper.ModelHelper(name="test", init_params=False) data = model.net.AddExternalInput("data") fc1 = brew.fc(model, data, "fc1", dim_in=1, dim_out=1) # noqa self.assertEqual(len(model.net.Proto().op), 1) self.assertEqual(len(model.param_init_net.Proto().op), 0)
def test_multiple_optimizers(self): from caffe2.python import brew, core, optimizer from caffe2.python.model_helper import ModelHelper model = ModelHelper(name="test") fc1 = brew.fc(model, 'data', 'fc1', 100, 50) fc2 = brew.fc(model, fc1, 'fc2', 50, 25) pred = brew.fc(model, fc2, 'fc3', 25, 10) (softmax, loss) = model.SoftmaxWithLoss( [pred, 'label'], ['softmax', 'loss'], ) model.AddGradientOperators([loss]) param_to_device = optimizer._get_param_to_device(model) def infer_blob_device(blob_name): return optimizer.get_param_device(blob_name, "{}_grad".format(blob_name), param_to_device) sgd_1 = optimizer.SgdOptimizer(base_learning_rate=0.1) sgd_2 = optimizer.SgdOptimizer(base_learning_rate=0.2) adagrad = optimizer.AdagradOptimizer() # Check same optimizer share the same learning rate. with core.DeviceScope(infer_blob_device("fc1_w")): sgd_1(model.net, model.param_init_net, "fc1_w", "fc1_w_grad") with core.DeviceScope(infer_blob_device("fc1_b")): sgd_1(model.net, model.param_init_net, "fc1_b", "fc1_b_grad") fc1_lr_blobs = [] for op in model.net.Proto().op: if op.type == 'WeightedSum' and op.input[0] == 'fc1_w' or \ op.input[0] == 'fc1_b': fc1_lr_blobs.append(op.input[3]) self.assertEqual(fc1_lr_blobs[0], fc1_lr_blobs[1]) # Check different instance of the same optimizer has a different lr. with core.DeviceScope(infer_blob_device("fc2_w")): sgd_2(model.net, model.param_init_net, "fc2_w", "fc2_w_grad") with core.DeviceScope(infer_blob_device("fc2_b")): sgd_2(model.net, model.param_init_net, "fc2_b", "fc2_b_grad") fc2_lr_blobs = [] for op in model.net.Proto().op: if op.type == 'WeightedSum' and op.input[0] == 'fc2_w' or \ op.input[0] == 'fc2_b': self.assertTrue(op.input[3] not in fc1_lr_blobs) fc2_lr_blobs.append(op.input[3]) self.assertEqual(fc2_lr_blobs[0], fc2_lr_blobs[1]) # Check different optimizer type case with core.DeviceScope(infer_blob_device("fc3_w")): adagrad(model.net, model.param_init_net, "fc3_w", "fc3_w_grad") with core.DeviceScope(infer_blob_device("fc3_b")): adagrad(model.net, model.param_init_net, "fc3_b", "fc3_b_grad") fc3_lr_blobs = [] for op in model.net.Proto().op: if op.type == 'Adagrad' and op.input[0] == 'fc3_w' or \ op.input[0] == 'fc3_b': self.assertTrue(op.input[3] not in fc2_lr_blobs) self.assertTrue(op.input[3] not in fc1_lr_blobs) fc3_lr_blobs.append(op.input[3]) self.assertEqual(fc3_lr_blobs[0], fc3_lr_blobs[1])
def double_matmul(): model = ModelHelper(name="r") fc0 = brew.fc(model, "data", "fc0", 10, 10) fc1 = brew.fc(model, fc0, "fc1", 10, 10) model.Proto().external_output[:] = [str(fc0), str(fc1)] return model, [(1, 10)]
def test_simple_model(self): model = model_helper.ModelHelper(name="mnist") # how come those inputs don't break the forward pass =.=a workspace.FeedBlob("data", np.random.randn(1, 3, 64, 64).astype(np.float32)) workspace.FeedBlob("label", np.random.randn(1, 1000).astype(np.int)) with core.NameScope("conv1"): conv1 = brew.conv(model, "data", 'conv1', dim_in=1, dim_out=20, kernel=5) # Image size: 24 x 24 -> 12 x 12 pool1 = brew.max_pool(model, conv1, 'pool1', kernel=2, stride=2) # Image size: 12 x 12 -> 8 x 8 conv2 = brew.conv(model, pool1, 'conv2', dim_in=20, dim_out=100, kernel=5) # Image size: 8 x 8 -> 4 x 4 pool2 = brew.max_pool(model, conv2, 'pool2', kernel=2, stride=2) with core.NameScope("classifier"): # 50 * 4 * 4 stands for dim_out from previous layer multiplied by the image size fc3 = brew.fc(model, pool2, 'fc3', dim_in=100 * 4 * 4, dim_out=500) relu = brew.relu(model, fc3, fc3) pred = brew.fc(model, relu, 'pred', 500, 10) softmax = brew.softmax(model, pred, 'softmax') xent = model.LabelCrossEntropy([softmax, "label"], 'xent') # compute the expected loss loss = model.AveragedLoss(xent, "loss") model.net.RunAllOnMKL() model.param_init_net.RunAllOnMKL() model.AddGradientOperators([loss], skip=1) blob_name_tracker = {} graph = tb.model_to_graph_def( model, blob_name_tracker=blob_name_tracker, shapes={}, show_simplified=False, ) #self.assertEqual( # blob_name_tracker['GRADIENTS/conv1/conv1_b_grad'], # 'conv1/conv1_b_grad', #) self.maxDiff = None # We can't guarantee the order in which they appear, so we sort # both before we compare them with open('tests/expect/caffe_mnist.expect') as f: EXPECTED_MNIST = f.read() sep = "node {" expected = "\n".join( sorted(sep + "\n " + part.strip() for part in EXPECTED_MNIST.strip().split(sep) if part.strip())) actual = "\n".join( sorted(sep + "\n " + part.strip() for part in str(graph).strip().split(sep) if part.strip()))
def _apply( self, model, input_t, seq_lengths, states, timestep, extra_inputs=None, ): hidden_t_prev, cell_t_prev = states fc_input = hidden_t_prev fc_input_dim = self.hidden_size if extra_inputs is not None: extra_input_blobs, extra_input_sizes = zip(*extra_inputs) fc_input, _ = model.net.Concat( [hidden_t_prev] + list(extra_input_blobs), [ self.scope('gates_concatenated_input_t'), self.scope('_gates_concatenated_input_t_concat_dims'), ], axis=2, ) fc_input_dim += sum(extra_input_sizes) prev_t = brew.fc( model, fc_input, self.scope('prev_t'), dim_in=fc_input_dim, dim_out=4 * self.hidden_size, axis=2, ) # defining MI parameters alpha = model.param_init_net.ConstantFill( [], [self.scope('alpha')], shape=[4 * self.hidden_size], value=1.0, ) beta_h = model.param_init_net.ConstantFill( [], [self.scope('beta1')], shape=[4 * self.hidden_size], value=1.0, ) beta_i = model.param_init_net.ConstantFill( [], [self.scope('beta2')], shape=[4 * self.hidden_size], value=1.0, ) b = model.param_init_net.ConstantFill( [], [self.scope('b')], shape=[4 * self.hidden_size], value=0.0, ) model.params.extend([alpha, beta_h, beta_i, b]) # alpha * input_t + beta_h # Shape: [1, batch_size, 4 * hidden_size] alpha_by_input_t_plus_beta_h = model.net.ElementwiseLinear( [input_t, alpha, beta_h], self.scope('alpha_by_input_t_plus_beta_h'), axis=2, ) # (alpha * input_t + beta_h) * prev_t = # alpha * input_t * prev_t + beta_h * prev_t # Shape: [1, batch_size, 4 * hidden_size] alpha_by_input_t_plus_beta_h_by_prev_t = model.net.Mul( [alpha_by_input_t_plus_beta_h, prev_t], self.scope('alpha_by_input_t_plus_beta_h_by_prev_t') ) # beta_i * input_t + b # Shape: [1, batch_size, 4 * hidden_size] beta_i_by_input_t_plus_b = model.net.ElementwiseLinear( [input_t, beta_i, b], self.scope('beta_i_by_input_t_plus_b'), axis=2, ) # alpha * input_t * prev_t + beta_h * prev_t + beta_i * input_t + b # Shape: [1, batch_size, 4 * hidden_size] gates_t = model.net.Sum( [alpha_by_input_t_plus_beta_h_by_prev_t, beta_i_by_input_t_plus_b], self.scope('gates_t') ) hidden_t, cell_t = model.net.LSTMUnit( [hidden_t_prev, cell_t_prev, gates_t, seq_lengths, timestep], [self.scope('hidden_t_intermediate'), self.scope('cell_t')], forget_bias=self.forget_bias, drop_states=self.drop_states, ) model.net.AddExternalOutputs( cell_t, hidden_t, ) if self.memory_optimization: self.recompute_blobs = [gates_t] return hidden_t, cell_t
def test_registry_invalid(self, input_dim, output_dim, batch_size): m = model_helper.ModelHelper() brew.fc(m, "data", "fc1", dim_in=input_dim, dim_out=output_dim) with self.assertRaises(RuntimeError): workspace.ApplyTransform("definitely_not_a_real_transform", m.net.Proto())
def _MiniAlexNetNoDropout(self, order): # First, AlexNet using the cnn wrapper. model = model_helper.ModelHelper(name="alexnet") conv1 = brew.conv(model, "data", "conv1", 3, 16, 11, ("XavierFill", {}), ("ConstantFill", {}), stride=4, pad=0) relu1 = brew.relu(model, conv1, "relu1") norm1 = brew.lrn(model, relu1, "norm1", size=5, alpha=0.0001, beta=0.75) pool1 = brew.max_pool(model, norm1, "pool1", kernel=3, stride=2) conv2 = brew.group_conv(model, pool1, "conv2", 16, 32, 5, ("XavierFill", {}), ("ConstantFill", { "value": 0.1 }), group=2, stride=1, pad=2) relu2 = brew.relu(model, conv2, "relu2") norm2 = brew.lrn(model, relu2, "norm2", size=5, alpha=0.0001, beta=0.75) pool2 = brew.max_pool(model, norm2, "pool2", kernel=3, stride=2) conv3 = brew.conv(model, pool2, "conv3", 32, 64, 3, ("XavierFill", { 'std': 0.01 }), ("ConstantFill", {}), pad=1) relu3 = brew.relu(model, conv3, "relu3") conv4 = brew.group_conv(model, relu3, "conv4", 64, 64, 3, ("XavierFill", {}), ("ConstantFill", { "value": 0.1 }), group=2, pad=1) relu4 = brew.relu(model, conv4, "relu4") conv5 = brew.group_conv(model, relu4, "conv5", 64, 32, 3, ("XavierFill", {}), ("ConstantFill", { "value": 0.1 }), group=2, pad=1) relu5 = brew.relu(model, conv5, "relu5") pool5 = brew.max_pool(model, relu5, "pool5", kernel=3, stride=2) fc6 = brew.fc(model, pool5, "fc6", 1152, 1024, ("XavierFill", {}), ("ConstantFill", { "value": 0.1 })) relu6 = brew.relu(model, fc6, "relu6") fc7 = brew.fc(model, relu6, "fc7", 1024, 1024, ("XavierFill", {}), ("ConstantFill", { "value": 0.1 })) relu7 = brew.relu(model, fc7, "relu7") fc8 = brew.fc(model, relu7, "fc8", 1024, 5, ("XavierFill", {}), ("ConstantFill", { "value": 0.0 })) pred = brew.softmax(model, fc8, "pred") xent = model.LabelCrossEntropy([pred, "label"], "xent") loss = model.AveragedLoss([xent], ["loss"]) model.AddGradientOperators([loss]) return model
def test_gradient_optim(self, input_dim, output_dim, batch_size): m = model_helper.ModelHelper() with core.NameScope("name_x"): fc1 = brew.fc(m, "data", "fc1", dim_in=input_dim, dim_out=output_dim) fc2 = brew.fc(m, fc1, "fc2", dim_in=output_dim, dim_out=output_dim) fc3 = brew.fc(m, fc2, "fc3", dim_in=output_dim, dim_out=output_dim) fc4 = brew.fc(m, fc3, "fc4", dim_in=output_dim, dim_out=output_dim) fc5 = brew.fc(m, fc4, "fc5", dim_in=output_dim, dim_out=output_dim) fc5.Relu([], fc5)\ .Softmax([], "pred") \ .LabelCrossEntropy(["label"], ["xent"]) \ .AveragedLoss([], "loss") input_to_grad = m.AddGradientOperators(["name_x/loss"]) blobs_before = count_blobs(m.net.Proto()) optim_proto = memonger.share_grad_blobs( m.net, ["name_x/loss"], set(viewvalues(m.param_to_grad)), "name_x/", share_activations=False, ) blobs_after = count_blobs(optim_proto) self.assertLess(blobs_after, blobs_before) optim_proto_wacts = memonger.share_grad_blobs( m.net, ["name_x/loss"], set(viewvalues(m.param_to_grad)), "name_x/", share_activations=True, dont_share_blobs=set([str(input_to_grad["name_x/fc1_w"])]), ) blobs_wact_optim = count_blobs(optim_proto_wacts) self.assertLessEqual(blobs_wact_optim, blobs_after) # Check that the last activations are not shared self.assertTrue(has_blob(optim_proto, "name_x/fc5")) self.assertTrue( has_blob(optim_proto_wacts, "name_x/fc5"), "Dont remap final activation", ) # Test networks produce exactly same gradients data = np.random.randn(batch_size, input_dim).astype(np.float32) label = np.random.randint(low=0, high=output_dim, size=(batch_size, )).astype(np.int32) workspace.RunNetOnce(m.param_init_net) workspace.FeedBlob("name_x/data", data) workspace.FeedBlob("name_x/label", label) workspace.RunNetOnce(m.net) loss = workspace.FetchBlob("name_x/loss") grad = workspace.FetchBlob(str(input_to_grad["name_x/fc1_w"])) workspace.RunNetOnce(optim_proto) optimized_loss = workspace.FetchBlob("name_x/loss") optimized_grad = workspace.FetchBlob(str( input_to_grad["name_x/fc1_w"])) np.testing.assert_almost_equal(loss, optimized_loss) np.testing.assert_almost_equal(grad, optimized_grad) workspace.FeedBlob(str(input_to_grad["name_x/fc1_w"]), np.array([0.0])) # Run with the forward optimization workspace.RunNetOnce(optim_proto_wacts) optimized_loss = workspace.FetchBlob("name_x/loss") optimized_grad = workspace.FetchBlob(str( input_to_grad["name_x/fc1_w"])) np.testing.assert_almost_equal(loss, optimized_loss) np.testing.assert_almost_equal(grad, optimized_grad)
def build_mnist_lenet(model, input_blob_name): """Build the LeNet network for MNIST.""" # Convolution layer that operates on the input MNIST image # Input is grayscale image of size 28x28 pixels # After convolution by 20 kernels each of size 5x5, # output is 20 channels, each of size 24x24 layer_1_input_dims = 1 # Input to layer is grayscale, so 1 channel layer_1_output_dims = 20 # Output from this layer has 20 channels layer_1_kernel_dims = 5 # Each kernel is of size 1x5x5 layer_1_conv = brew.conv( model, input_blob_name, "layer_1_conv", dim_in=layer_1_input_dims, dim_out=layer_1_output_dims, kernel=layer_1_kernel_dims, ) # Max-pooling layer that operates on output from previous convolution layer # Input is 20 channels, each of size 24x24 # After pooling by 2x2 windows and stride of 2, the output of this layer # is 20 channels, each of size 12x12 layer_2_kernel_dims = 2 # Max-pool over 2x2 windows layer_2_stride = 2 # Stride by 2 pixels between each pool layer_2_pool = brew.max_pool( model, layer_1_conv, "layer_2_pool", kernel=layer_2_kernel_dims, stride=layer_2_stride, ) # Convolution layer that operates on output from previous pooling layer. # Input is 20 channels, each of size 12x12 # After convolution by 50 kernels, each of size 20x5x5, # the output is 50 channels, each of size 8x8 layer_3_input_dims = 20 # Number of input channels layer_3_output_dims = 50 # Number of output channels layer_3_kernel_dims = 5 # Each kernel is of size 50x5x5 layer_3_conv = brew.conv( model, layer_2_pool, "layer_3_conv", dim_in=layer_3_input_dims, dim_out=layer_3_output_dims, kernel=layer_3_kernel_dims, ) # Max-pooling layer that operates on output from previous convolution layer # Input is 50 channels, each of size 8x8 # Apply pooling by 2x2 windows and stride of 2 # Output is 50 channels, each of size 4x4 layer_4_kernel_dims = 2 # Max-pool over 2x2 windows layer_4_stride = 2 # Stride by 2 pixels between each pool layer_4_pool = brew.max_pool( model, layer_3_conv, "layer_4_pool", kernel=layer_4_kernel_dims, stride=layer_4_stride, ) # Fully-connected layer that operates on output from previous pooling layer # Input is 50 channels, each of size 4x4 # Output is vector of size 500 layer_5_input_dims = 50 * 4 * 4 layer_5_output_dims = 500 layer_5_fc = brew.fc( model, layer_4_pool, "layer_5_fc", dim_in=layer_5_input_dims, dim_out=layer_5_output_dims, ) # ReLU layer that operates on output from previous fully-connected layer # Input and output are both of size 500 layer_6_relu = brew.relu( model, layer_5_fc, "layer_6_relu", ) # Fully-connected layer that operates on output from previous ReLU layer # Input is of size 500 # Output is of size 10, the number of classes in MNIST dataset layer_7_input_dims = 500 layer_7_output_dims = 10 layer_7_fc = brew.fc( model, layer_6_relu, "layer_7_fc", dim_in=layer_7_input_dims, dim_out=layer_7_output_dims, ) # Softmax layer that operates on output from previous fully-connected layer # Input and output are both of size 10 # Each output (0 to 9) is a probability score on that digit layer_8_softmax = brew.softmax( model, layer_7_fc, "softmax", ) return layer_8_softmax
def test_gradient_optim_tree(self, input_dim, output_dim, batch_size): m = model_helper.ModelHelper() with core.NameScope("name_x"): fc1 = brew.fc(m, "data", "fc1", dim_in=input_dim, dim_out=output_dim) fc2 = brew.fc(m, fc1, "fc2", dim_in=output_dim, dim_out=output_dim) fc3 = brew.fc(m, fc2, "fc3", dim_in=output_dim, dim_out=output_dim) fc4 = brew.fc(m, fc3, "fc4", dim_in=output_dim, dim_out=output_dim) fc5 = brew.fc(m, fc4, "fc5", dim_in=output_dim, dim_out=output_dim) fc5.Relu([], fc5) \ .Softmax([], "pred1") \ .LabelCrossEntropy(["label"], ["xent1"]) \ .AveragedLoss([], "loss1") fc6 = brew.fc(m, fc5, "fc6", dim_in=output_dim, dim_out=output_dim) fc6.Relu([], fc6) \ .Softmax([], "pred2") \ .LabelCrossEntropy(["label"], ["xent2"]) \ .AveragedLoss([], "loss2") input_to_grad = m.AddGradientOperators( ["name_x/loss1", "name_x/loss2"]) blobs_before = count_blobs(m.net.Proto()) optim_proto = memonger.share_grad_blobs( m.net, ["name_x/loss1", "name_x/loss2"], set(viewvalues(m.param_to_grad)), "name_x", # "name_x//shared_gradinp_0_shared" if using "name_x/" share_activations=True, dont_share_blobs=set([ 'name_x/fc6', 'name_x/fc5', str(input_to_grad["name_x/fc1_w"]) ]), ) blobs_after = count_blobs(optim_proto) self.assertLess(blobs_after, blobs_before) self.assertTrue(has_blob(optim_proto, "name_x/fc6")) # Test networks produce exactly same gradients data = np.random.randn(batch_size, input_dim).astype(np.float32) label = np.random.randint(low=0, high=output_dim, size=(batch_size, )).astype(np.int32) workspace.RunNetOnce(m.param_init_net) workspace.FeedBlob("name_x/data", data) workspace.FeedBlob("name_x/label", label) workspace.RunNetOnce(m.net) loss1 = workspace.FetchBlob("name_x/loss1") loss2 = workspace.FetchBlob("name_x/loss2") grad = workspace.FetchBlob(str(input_to_grad["name_x/fc1_w"])) workspace.FeedBlob(str(input_to_grad["name_x/fc1_w"]), np.array([0.0])) workspace.RunNetOnce(optim_proto) optimized_loss1 = workspace.FetchBlob("name_x/loss1") optimized_loss2 = workspace.FetchBlob("name_x/loss2") optimized_grad = workspace.FetchBlob(str( input_to_grad["name_x/fc1_w"])) np.testing.assert_almost_equal(loss1, optimized_loss1) np.testing.assert_almost_equal(loss2, optimized_loss2) np.testing.assert_almost_equal(grad, optimized_grad)
from caffe2.python.model_helper import ModelHelper from caffe2.python.cnn import CNNModelHelper import unittest import numpy as np m, k, n = (1, 28 * 28, 10) # [m][k] * [k][n] = [m][n] x = np.random.rand(m, k).astype(np.float32) - 0.5 # x = m*k 2D tensor workspace.ResetWorkspace() # clear workspace workspace.FeedBlob("x", x) # feed x as a blob model = ModelHelper(name="test_model") # create model model.Proto() # print model's protocol buffer before add operator brew.fc( model, "x", "y", k, n ) # fully connected NN, weight = k*n 2D tensor /// bias, y = m*n 2D tensor brew.softmax(model, "y", "z") model.Validate() model.Proto() # print model's protocol buffer after add operator workspace.RunNetOnce( model.param_init_net) # init [y_w(weight), y_b(bias) (randomize)] # weight is 2D array, bias is 1D array workspace.Blobs() # print workspace's blobs # workspace.FetchBlob("y_w") # workspace.FetchBlob("y_b") workspace.RunNetOnce(model.net) # y = workspace.FetchBlob("y") # z = workspace.FetchBlob("z")
def test_forward_optim_tree_harder(self, input_dim, output_dim, batch_size): m = model_helper.ModelHelper() m.net.Proto().type = "dag" m.net.Proto().num_workers = 4 m.net.AddExternalInput("label") m.net.AddExternalInput("data") with core.NameScope("name_x"): fc1 = brew.fc(m, "data", "fc1", dim_in=input_dim, dim_out=output_dim) fc2 = brew.fc(m, fc1, "fc2", dim_in=output_dim, dim_out=output_dim) fc3 = brew.fc(m, fc2, "fc3", dim_in=output_dim, dim_out=output_dim) fc4 = brew.fc(m, fc3, "fc4", dim_in=output_dim, dim_out=output_dim) fc5 = brew.fc(m, fc4, "fc5", dim_in=output_dim, dim_out=output_dim) # Branch fc3b = brew.fc(m, fc2, "fc3b", dim_in=output_dim, dim_out=output_dim) fc4b = brew.fc(m, fc3b, "fc4b", dim_in=output_dim, dim_out=output_dim) fc5b = brew.fc(m, fc4b, "fc5b", dim_in=output_dim, dim_out=output_dim) fc5sum = brew.sum(m, [fc5, fc5b], "fc5sum") fc5sum.Relu([], "relu1") \ .Softmax([], "pred1") \ .LabelCrossEntropy(["label"], ["xent1"]) \ .AveragedLoss([], "loss1") fc6 = brew.fc(m, fc5, "fc6", dim_in=output_dim, dim_out=output_dim) fc6.Relu([], fc6) \ .Softmax([], "pred2") \ .LabelCrossEntropy(["label"], ["xent2"]) \ .AveragedLoss([], "loss2") blobs_before = count_blobs(m.net.Proto()) optim_proto = memonger.optimize_inference_for_dag( m.net, ["name_x/data"], "name_x/") blobs_after = count_blobs(optim_proto) # Extra test with when one of the parameters is also an input. # This caused a bug before. optim_proto_extra_input = memonger.optimize_inference_for_dag( m.net, ["name_x/data", "name_x/fc1_w"], "name_x/") blobs_after_extra_input = count_blobs(optim_proto_extra_input) self.assertEqual(blobs_after, blobs_after_extra_input) ### print(str(optim_proto)) self.assertLess(blobs_after, blobs_before) # Test networks produce exactly same results data = np.random.randn(batch_size, input_dim).astype(np.float32) label = np.random.randint(low=0, high=output_dim, size=(batch_size, )).astype(np.int32) workspace.RunNetOnce(m.param_init_net) workspace.FeedBlob("name_x/data", data) workspace.FeedBlob("name_x/label", label) workspace.RunNetOnce(m.net) loss1 = workspace.FetchBlob("name_x/loss1") loss2 = workspace.FetchBlob("name_x/loss2") workspace.RunNetOnce(optim_proto) optimized_loss1 = workspace.FetchBlob("name_x/loss1") optimized_loss2 = workspace.FetchBlob("name_x/loss2") np.testing.assert_almost_equal(loss1, optimized_loss1) np.testing.assert_almost_equal(loss2, optimized_loss2)
def test_forward_optim_tree_daggy(self, input_dim, output_dim, batch_size): m = model_helper.ModelHelper() m.Proto().type = "dag" m.Proto().num_workers = 4 with core.NameScope("name_x"): fc1 = brew.fc(m, "data", "fc1", dim_in=input_dim, dim_out=output_dim) fc2 = brew.fc(m, fc1, "fc2", dim_in=output_dim, dim_out=output_dim) fc3 = brew.fc(m, fc2, "fc3", dim_in=output_dim, dim_out=output_dim) fc4 = brew.fc(m, fc3, "fc4", dim_in=output_dim, dim_out=output_dim) fc5 = brew.fc(m, fc4, "fc5", dim_in=output_dim, dim_out=output_dim) # Branch fc3b = brew.fc(m, fc2, "fc3b", dim_in=output_dim, dim_out=output_dim) fc4b = brew.fc(m, fc3b, "fc4b", dim_in=output_dim, dim_out=output_dim) fc5b = brew.fc(m, fc4b, "fc5b", dim_in=output_dim, dim_out=output_dim) fc5sum = brew.sum(m, [fc5, fc5b], "fc5sum") fc5.Relu([], fc5sum) \ .Softmax([], "pred1") \ .LabelCrossEntropy(["label"], ["xent1"]) \ .AveragedLoss([], "loss1") fc6 = brew.fc(m, fc5, "fc6", dim_in=output_dim, dim_out=output_dim) fc6.Relu([], fc6) \ .Softmax([], "pred2") \ .LabelCrossEntropy(["label"], ["xent2"]) \ .AveragedLoss([], "loss2") blobs_before = count_blobs(m.net.Proto()) optim_proto = memonger.optimize_inference_for_dag( m.net, ["name_x/data"], "name_x") self.assertTrue( memonger.verify_graph_equality(m.net.Proto(), optim_proto)) blobs_after = count_blobs(optim_proto) self.assertLess(blobs_after, blobs_before) # Test networks produce exactly same results data = np.random.randn(batch_size, input_dim).astype(np.float32) label = np.random.randint(low=0, high=output_dim, size=(batch_size, )).astype(np.int32) workspace.RunNetOnce(m.param_init_net) workspace.FeedBlob("name_x/data", data) workspace.FeedBlob("name_x/label", label) workspace.RunNetOnce(m.net) loss1 = workspace.FetchBlob("name_x/loss1") loss2 = workspace.FetchBlob("name_x/loss2") workspace.RunNetOnce(optim_proto) optimized_loss1 = workspace.FetchBlob("name_x/loss1") optimized_loss2 = workspace.FetchBlob("name_x/loss2") np.testing.assert_almost_equal(loss1, optimized_loss1) np.testing.assert_almost_equal(loss2, optimized_loss2)
if prediction < np.random.uniform(0, 1): return 0 else: return 1 avg_t = np.array([]) input_data = np.random.rand(1, 4).astype(np.float32) workspace.FeedBlob("input_data", input_data) forward_model = model_helper.ModelHelper(name="forward") forward_init_net = forward_model.param_init_net forward_net = forward_model.net brew.fc(forward_model, 'input_data', 'hidden', 4, HIDDEN_SIZE) brew.relu(forward_model, 'hidden', 'hidden') brew.fc(forward_model, 'hidden', 'prediction', HIDDEN_SIZE, 1) forward_model.Sigmoid('prediction', 'prediction') full_model = model_helper.ModelHelper(name="full") full_init_net = full_model.param_init_net full_net = full_model.net loss = full_net.ConstantFill([], "loss", shape=[1], value=0.0) ONE = full_net.ConstantFill([], "ONE", shape=[1], value=1.) brew.fc(full_model, 'input_data', 'hidden', 4, HIDDEN_SIZE) brew.relu(full_model, 'hidden', 'hidden') brew.fc(full_model, 'hidden', 'prediction', HIDDEN_SIZE, 1) full_model.Sigmoid('prediction', 'prediction') gradient_map = full_net.AddGradientOperators(['loss'])