def test_weight_decay(self): from caffe2.python import brew from caffe2.python.model_helper import ModelHelper model = ModelHelper(name="test", arg_scope={'order': 'NCHW'}) cnv = brew.conv(model, 'data', 'cnv', 32, 32, 4) a = brew.fc(model, cnv, 'a', 100, 200) pred = brew.fc(model, a, 'b', 200, 5) (softmax, loss) = model.SoftmaxWithLoss( [pred, 'label'], ['softmax', 'loss'], ) model.AddGradientOperators([loss]) add_weight_decay(model, weight_decay=1e-4) build_sgd(model, 0.11) expected_weight_grad = {'b_w_grad', 'a_w_grad', 'cnv_w_grad'} # Check the proto that all weights are decayed and not non-weights # are decayed. for op in model.net.Proto().op: if op.type == 'WeightedSum' and 'wd_0_0' in op.input: if op.output[0] not in expected_weight_grad: print("Unexpected param for weight_decay: {}".format( op.output[0])) self.assertTrue(op.output[0] in expected_weight_grad) expected_weight_grad.remove(op.output[0]) self.assertEqual( expected_weight_grad, set(), "Not all weights were decayed: {}".format(expected_weight_grad))
def test_optimizer_context(self): from caffe2.python import brew, optimizer from caffe2.python.model_helper import ModelHelper model = ModelHelper(name="test", arg_scope={'order': 'NCHW'}) count = optimizer._optimizer_instance_count['SgdOptimizer'] cnv_optim = SgdOptimizer(0.15) weight_optim = SgdOptimizer(0.2) bias_optim = SgdOptimizer(0.1) with UseOptimizer(cnv_optim): cnv = brew.conv(model, 'data', 'cnv', 32, 32, 4) with UseOptimizer({'WEIGHT': weight_optim, 'BIAS': bias_optim}): a = brew.fc(model, cnv, 'a', 100, 200) pred = brew.fc(model, a, 'b', 200, 5) (softmax, loss) = model.SoftmaxWithLoss( [pred, 'label'], ['softmax', 'loss'], ) model.AddGradientOperators([loss]) add_weight_decay(model, weight_decay=1e-4) # use the following optimizer if none specified in param_info build_sgd(model, 0.11) expected_weight_grad = {'b_w_grad', 'a_w_grad', 'cnv_w_grad'} expected_learning_rate = { "SgdOptimizer_{}_lr_cpu".format(count): -0.15, "SgdOptimizer_{}_lr_cpu".format(count + 1): -0.2, "SgdOptimizer_{}_lr_cpu".format(count + 2): -0.1, "SgdOptimizer_{}_lr_cpu".format(count + 3): -0.11 } for op in model.net.Proto().op: # Check the proto that all weights are decayed and not non-weights # are decayed. if op.type == 'WeightedSum' and 'wd_0_0' in op.input: if op.output[0] not in expected_weight_grad: print( "Unexpected param for weight_decay: {}". format(op.output[0]) ) self.assertTrue(op.output[0] in expected_weight_grad) expected_weight_grad.remove(op.output[0]) # Check the learning rate for each parameter if op.type == 'LearningRate': val = 0 for arg in op.arg: if arg.name == 'base_lr': val = arg.f self.assertAlmostEqual( val, expected_learning_rate[op.output[0]] ) self.assertEqual( expected_weight_grad, set(), "Not all weights were decayed: {}".format(expected_weight_grad) )
def test_multiple_optimizers(self): from caffe2.python import brew, core, optimizer from caffe2.python.model_helper import ModelHelper model = ModelHelper(name="test") fc1 = brew.fc(model, 'data', 'fc1', 100, 50) fc2 = brew.fc(model, fc1, 'fc2', 50, 25) pred = brew.fc(model, fc2, 'fc3', 25, 10) (softmax, loss) = model.SoftmaxWithLoss( [pred, 'label'], ['softmax', 'loss'], ) model.AddGradientOperators([loss]) param_to_device = optimizer._get_param_to_device(model) def infer_blob_device(blob_name): return optimizer.get_param_device(blob_name, "{}_grad".format(blob_name), param_to_device) sgd_1 = optimizer.SgdOptimizer(base_learning_rate=0.1) sgd_2 = optimizer.SgdOptimizer(base_learning_rate=0.2) adagrad = optimizer.AdagradOptimizer() # Check same optimizer share the same learning rate. with core.DeviceScope(infer_blob_device("fc1_w")): sgd_1(model.net, model.param_init_net, "fc1_w", "fc1_w_grad") with core.DeviceScope(infer_blob_device("fc1_b")): sgd_1(model.net, model.param_init_net, "fc1_b", "fc1_b_grad") fc1_lr_blobs = [] for op in model.net.Proto().op: if op.type == 'WeightedSum' and op.input[0] == 'fc1_w' or \ op.input[0] == 'fc1_b': fc1_lr_blobs.append(op.input[3]) self.assertEqual(fc1_lr_blobs[0], fc1_lr_blobs[1]) # Check different instance of the same optimizer has a different lr. with core.DeviceScope(infer_blob_device("fc2_w")): sgd_2(model.net, model.param_init_net, "fc2_w", "fc2_w_grad") with core.DeviceScope(infer_blob_device("fc2_b")): sgd_2(model.net, model.param_init_net, "fc2_b", "fc2_b_grad") fc2_lr_blobs = [] for op in model.net.Proto().op: if op.type == 'WeightedSum' and op.input[0] == 'fc2_w' or \ op.input[0] == 'fc2_b': self.assertTrue(op.input[3] not in fc1_lr_blobs) fc2_lr_blobs.append(op.input[3]) self.assertEqual(fc2_lr_blobs[0], fc2_lr_blobs[1]) # Check different optimizer type case with core.DeviceScope(infer_blob_device("fc3_w")): adagrad(model.net, model.param_init_net, "fc3_w", "fc3_w_grad") with core.DeviceScope(infer_blob_device("fc3_b")): adagrad(model.net, model.param_init_net, "fc3_b", "fc3_b_grad") fc3_lr_blobs = [] for op in model.net.Proto().op: if op.type == 'Adagrad' and op.input[0] == 'fc3_w' or \ op.input[0] == 'fc3_b': self.assertTrue(op.input[3] not in fc2_lr_blobs) self.assertTrue(op.input[3] not in fc1_lr_blobs) fc3_lr_blobs.append(op.input[3]) self.assertEqual(fc3_lr_blobs[0], fc3_lr_blobs[1])