def add_optimizer(model): stepsz = int(30 * args.epoch_size / total_batch_size / num_shards) if args.float16_compute: # TODO: merge with multi-prceision optimizer opt = optimizer.build_fp16_sgd( model, args.base_learning_rate, momentum=0.9, nesterov=1, weight_decay=args.weight_decay, # weight decay included policy="step", stepsize=stepsz, gamma=0.1 ) else: optimizer.add_weight_decay(model, args.weight_decay) opt = optimizer.build_multi_precision_sgd( model, args.base_learning_rate, momentum=0.9, nesterov=1, policy="step", stepsize=stepsz, gamma=0.1 ) return opt
def add_optimizer(model): # stepsz = int(30 * args.epoch_size / total_batch_size / num_shards) stepsz = 1 if args.dtype == 'float16': opt = optimizer.build_fp16_sgd( model, args.base_learning_rate, momentum=0.9, nesterov=1, weight_decay=args.weight_decay, # weight decay included policy="step", stepsize=stepsz, gamma=0.9999 ) else: optimizer.add_weight_decay(model, args.weight_decay) opt = optimizer.build_multi_precision_sgd( model, args.base_learning_rate, momentum=0.9, nesterov=1, policy="step", stepsize=stepsz, gamma=0.9999 ) return opt
def test_weight_decay(self): from caffe2.python import brew from caffe2.python.model_helper import ModelHelper model = ModelHelper(name="test", arg_scope={'order': 'NCHW'}) cnv = brew.conv(model, 'data', 'cnv', 32, 32, 4) a = brew.fc(model, cnv, 'a', 100, 200) pred = brew.fc(model, a, 'b', 200, 5) (softmax, loss) = model.SoftmaxWithLoss( [pred, 'label'], ['softmax', 'loss'], ) model.AddGradientOperators([loss]) add_weight_decay(model, weight_decay=1e-4) build_sgd(model, 0.11) expected_weight_grad = {'b_w_grad', 'a_w_grad', 'cnv_w_grad'} # Check the proto that all weights are decayed and not non-weights # are decayed. for op in model.net.Proto().op: if op.type == 'WeightedSum' and 'wd_0_0' in op.input: if op.output[0] not in expected_weight_grad: print("Unexpected param for weight_decay: {}".format( op.output[0])) self.assertTrue(op.output[0] in expected_weight_grad) expected_weight_grad.remove(op.output[0]) self.assertEqual( expected_weight_grad, set(), "Not all weights were decayed: {}".format(expected_weight_grad))
def add_optimizer(model): stepsz = int(30 * args.epoch_size / total_batch_size / num_shards) if args.float16_compute: # TODO: merge with multi-precision optimizer opt = optimizer.build_fp16_sgd( model, args.base_learning_rate, momentum=0.9, nesterov=1, weight_decay=args.weight_decay, # weight decay included policy="step", stepsize=stepsz, gamma=0.1 ) else: optimizer.add_weight_decay(model, args.weight_decay) opt = optimizer.build_multi_precision_sgd( model, args.base_learning_rate, momentum=0.9, nesterov=1, policy="step", stepsize=stepsz, gamma=0.1 ) return opt
def add_optimizer(model): ''' stepsz = int(30 * args.epoch_size / total_batch_size / num_shards) optimizer.add_weight_decay(model, args.weight_decay) opt = optimizer.build_multi_precision_sgd( model, args.base_learning_rate, momentum=0.9, nesterov=1, policy="step", stepsize=stepsz, gamma=0.1 ) ''' optimizer.add_weight_decay(model, args.weight_decay) opt = optimizer.build_multi_precision_sgd( model, base_learning_rate = args.base_learning_rate, momentum = model_config['solver']['momentum'], nesterov = model_config['solver']['nesterov'], policy = model_config['solver']['lr_policy'], power = model_config['solver']['power'], max_iter = model_config['solver']['max_iter'], ) return opt
def test_optimizer_context(self): from caffe2.python import brew, optimizer from caffe2.python.model_helper import ModelHelper model = ModelHelper(name="test", arg_scope={'order': 'NCHW'}) count = optimizer._optimizer_instance_count['SgdOptimizer'] cnv_optim = SgdOptimizer(0.15) weight_optim = SgdOptimizer(0.2) bias_optim = SgdOptimizer(0.1) with UseOptimizer(cnv_optim): cnv = brew.conv(model, 'data', 'cnv', 32, 32, 4) with UseOptimizer({'WEIGHT': weight_optim, 'BIAS': bias_optim}): a = brew.fc(model, cnv, 'a', 100, 200) pred = brew.fc(model, a, 'b', 200, 5) (softmax, loss) = model.SoftmaxWithLoss( [pred, 'label'], ['softmax', 'loss'], ) model.AddGradientOperators([loss]) add_weight_decay(model, weight_decay=1e-4) # use the following optimizer if none specified in param_info build_sgd(model, 0.11) expected_weight_grad = {'b_w_grad', 'a_w_grad', 'cnv_w_grad'} expected_learning_rate = { "SgdOptimizer_{}_lr_cpu".format(count): -0.15, "SgdOptimizer_{}_lr_cpu".format(count + 1): -0.2, "SgdOptimizer_{}_lr_cpu".format(count + 2): -0.1, "SgdOptimizer_{}_lr_cpu".format(count + 3): -0.11 } for op in model.net.Proto().op: # Check the proto that all weights are decayed and not non-weights # are decayed. if op.type == 'WeightedSum' and 'wd_0_0' in op.input: if op.output[0] not in expected_weight_grad: print( "Unexpected param for weight_decay: {}". format(op.output[0]) ) self.assertTrue(op.output[0] in expected_weight_grad) expected_weight_grad.remove(op.output[0]) # Check the learning rate for each parameter if op.type == 'LearningRate': val = 0 for arg in op.arg: if arg.name == 'base_lr': val = arg.f self.assertAlmostEqual( val, expected_learning_rate[op.output[0]] ) self.assertEqual( expected_weight_grad, set(), "Not all weights were decayed: {}".format(expected_weight_grad) )
def add_optimizer(model): stepsz = int(30 * args.epoch_size / total_batch_size / num_shards) optimizer.add_weight_decay(model, args.weight_decay) optimizer.build_sgd(model, args.base_learning_rate, momentum=0.9, nesterov=1, policy="step", stepsize=stepsz, gamma=0.1)
def add_optimizer(model, config): optimizer.add_weight_decay(model, config['solver']['weight_decay']) optimizer.build_multi_precision_sgd( model, base_learning_rate=config['solver']['base_learning_rate'], policy=config['solver']['lr_policy'], stepsize=config['solver']['stepsize'], momentum=config['solver']['momentum'], gamma=config['solver']['gamma'], nesterov=config['solver']['nesterov'], )
def AddOptimizerOps(model): """Add optimizer ops.""" optimizer.add_weight_decay(model, 0.004) stepsize = TRAIN_ENTRIES * EPOCHS // BATCH_SIZE optimizer.build_sgd(model, 0.001, policy='step', stepsize=stepsize, gamma=0.1, momentum=0.9, nesterov=False)
def add_optimizer_rmsprop(model, config): optimizer.add_weight_decay(model, config['solver']['weight_decay']) optimizer.build_rms_prop( model, base_learning_rate=config['solver']['base_learning_rate'], epsilon=config['solver']['epsilon'], decay=config['solver']['decay'], momentum=config['solver']['momentum'], policy=config['solver']['lr_policy'], stepsize=config['solver']['stepsize'], )
def add_optimizer(model): stepsz = int(30 * epoch_size / batch_size) optimizer.add_weight_decay(model, weight_decay) opt = optimizer.build_multi_precision_sgd(model, base_learning_rate, momentum=0.9, nesterov=1, policy="step", stepsize=stepsz, gamma=0.1) return opt
def add_optimizer(model): stepsz = int(30 * args.epoch_size / total_batch_size / num_shards) optimizer.add_weight_decay(model, args.weight_decay) opt = optimizer.build_multi_precision_sgd( model, args.base_learning_rate, momentum=0.9, nesterov=1, policy="step", stepsize=stepsz, gamma=0.1 ) return opt
def AddTrainingOperators(model, loss): model.AddGradientOperators([loss]) optimizer.add_weight_decay(model, 5e-4) stepsz = int(10 * 60000 / 128) opt = optimizer.build_sgd(model, base_learning_rate=0.01, policy="step", stepsize=stepsz, gamma=0.1, momentum=0.9) # opt = optimizer.build_yellowfin(model) return opt
def build_optimizer(model, float16_compute = False): if False: # float16_compute: # A newwer versions of Caffe support this print("[INFO] Building FP16 SGD optimizer.") opt = optimizer.build_fp16_sgd( model, 0.1, momentum=0.9, policy='step', gamma=0.1, weight_decay=1e-4 ) else: print("[INFO] Building Multi-precision SGD optimizer.") optimizer.add_weight_decay(model, 1e-4) #opt = optimizer.build_sgd( opt = optimizer.build_multi_precision_sgd( model, 0.1, momentum=0.9, policy='fixed', gamma=0.1 ) return opt
def add_optimizer(model, config): # add L2 norm for every weights optimizer.add_weight_decay(model, config['solver']['weight_decay']) optimizer.build_multi_precision_sgd( model, base_learning_rate=config['solver']['base_learning_rate'], momentum=config['solver']['momentum'], nesterov=config['solver']['nesterov'], policy=config['solver']['lr_policy'], gamma=config['solver']['gamma'], stepsize=config['solver']['stepsize'], # power = config['solver']['power'], # max_iter = config['solver']['max_iter'], # policy = "multistep" # stepsize = [100, 200, 500] )
def build_optimizer(model, float16_compute=False): if False: # float16_compute: # A newwer versions of Caffe support this print("[INFO] Building FP16 SGD optimizer.") opt = optimizer.build_fp16_sgd( model, 0.1, momentum=0.9, policy='step', gamma=0.1, weight_decay=1e-4 ) else: optimizer.add_weight_decay(model, 1e-4) try: opt_builder = optimizer.build_multi_precision_sgd print("[INFO] Building Multi-precision SGD optimizer.") except AttributeError: opt_builder = optimizer.build_sgd print("[INFO] Building SGD optimizer (Multi-precision SGD is not available).") opt = opt_builder( model, 0.1, momentum=0.9, policy='fixed', gamma=0.1 ) return opt
def add_optimizer(model, config): # add L2 norm for every weights optimizer.add_weight_decay(model, config['solver']['weight_decay']) # add L1 norm only for spatial bn weights, which is the 'scale params' # if config['solver'].has_key('sparse_scale'): # if config['solver']['sparse_scale'] != 0: if config['solver']['sparse_scale'] is not None: print("[INFO] Training with L1 regularization on BN scale...") add_l1_normalization_bn(model, config['solver']['sparse_scale']) optimizer.build_multi_precision_sgd( model, base_learning_rate=config['solver']['base_learning_rate'], momentum=config['solver']['momentum'], nesterov=config['solver']['nesterov'], policy=config['solver']['lr_policy'], power=config['solver']['power'], max_iter=config['solver']['max_iter'], # gamma = config['solver']['gamma'], # stepsize = config['solver']['stepsize'], )
def add_optimizer(model): """ Optimizer function called once for the entire model, as opposed for each CPU / GPU individually. The optimizer will be a stepwise weight decay. :return: return the optimizer """ stepsz = int(30 * args.epoch_size / args.batch_size / args.num_shards) stepsz = stepsz if stepsz else 100 optimizer.add_weight_decay(model, 1e-4) # opt = optimizer.build_multi_precision_sgd( opt = optimizer.build_sgd(model, args.base_learning_rate, momentum=0.9, nesterov=1, policy="step", stepsize=stepsz, gamma=0.1) return opt
def test_weight_decay(self): from caffe2.python import brew from caffe2.python.model_helper import ModelHelper model = ModelHelper(name="test", arg_scope={'order': 'NCHW'}) cnv = brew.conv(model, 'data', 'cnv', 32, 32, 4) a = brew.fc(model, cnv, 'a', 100, 200) pred = brew.fc(model, a, 'b', 200, 5) (softmax, loss) = model.SoftmaxWithLoss( [pred, 'label'], ['softmax', 'loss'], ) model.AddGradientOperators([loss]) add_weight_decay(model, weight_decay=1e-4) build_sgd(model, 0.11) expected_weight_grad = {'b_w_grad', 'a_w_grad', 'cnv_w_grad'} # Check the proto that all weights are decayed and not non-weights # are decayed. for op in model.net.Proto().op: if op.type == 'WeightedSum' and 'wd_0_0' in op.input: if op.output[0] not in expected_weight_grad: print( "Unexpected param for weight_decay: {}". format(op.output[0]) ) self.assertTrue(op.output[0] in expected_weight_grad) expected_weight_grad.remove(op.output[0]) self.assertEqual( expected_weight_grad, set(), "Not all weights were decayed: {}".format(expected_weight_grad) )
def CivilNet(name, train_test_deplopy=0): arg_scope = { 'order': 'NCHW', 'use_cudnn': True, 'cudnn_exhaustive_search': True, 'ws_nbytes_limit': (64 * 1024 * 1024) } model = model_helper.ModelHelper(name=name, arg_scope=arg_scope) model._device_type = caffe2_pb2.CUDA model._device_prefix = "gpu" model._shared_model = False model._devices = [0] device_opt = core.DeviceOption(caffe2_pb2.CUDA, 0) #for deploy if train_test_deplopy == 2: with core.DeviceScope(device_opt): with core.NameScope("{}_{}".format(model._device_prefix, 0)): with brew.arg_scope([brew.conv, brew.fc], WeightInitializer=Initializer, BiasInitializer=Initializer, enable_tensor_core=False, float16_compute=False): resnet.create_resnet50(model, "data", num_input_channels=3, num_labels=args.num_labels, no_bias=True, no_loss=False) workspace.RunNetOnce(model.param_init_net) workspace.CreateNet(model.net) return model reader_name = "reader" if train_test_deplopy == 0 else "test_reader" reader_data = args.train_data if train_test_deplopy == 0 else args.test_data reader = model.CreateDB(reader_name, db=reader_data, db_type='lmdb', num_shards=1, shard_id=0) is_test = True if train_test_deplopy == 1 else False loss = None with core.DeviceScope(device_opt): with core.NameScope("{}_{}".format(model._device_prefix, 0)): AddImageInput(model, reader, batch_size=32, is_test=is_test) with brew.arg_scope([brew.conv, brew.fc], WeightInitializer=Initializer, BiasInitializer=Initializer, enable_tensor_core=False, float16_compute=False): pred = resnet.create_resnet50(model, "data", num_input_channels=3, num_labels=args.num_labels, no_bias=True, no_loss=True) softmax, loss = model.SoftmaxWithLoss([pred, 'label'], ['softmax', 'loss']) brew.accuracy(model, [softmax, "label"], "accuracy") #for test if train_test_deplopy == 1: workspace.RunNetOnce(model.param_init_net) workspace.CreateNet(model.net) return model #for train loss_grad = {} losses_by_gpu = {} losses_by_gpu[0] = [loss] #add grad def create_grad(lossp): return model.ConstantFill(lossp, str(lossp) + "_grad", value=1.0) # Explicitly need to create gradients on GPU 0 device = core.DeviceOption(model._device_type, 0) with core.DeviceScope(device): for l in losses_by_gpu[0]: lg = create_grad(l) loss_grad[str(l)] = str(lg) model.AddGradientOperators(loss_grad) #end add grad optimizer.add_weight_decay(model, args.weight_decay) stepsz = int(30 * args.epoch_size / 32) opt = optimizer.build_multi_precision_sgd(model, args.base_learning_rate, momentum=0.9, nesterov=1, policy="step", stepsize=stepsz, gamma=0.1) model._optimizer = opt workspace.RunNetOnce(model.param_init_net) workspace.CreateNet(model.net) return model
value=1) # add model pred = add_resnet50(model, data) # add loss softmax, loss = model.net.SoftmaxWithLoss( [pred, label], ['softmax', 'loss'], ) # add training operator model.AddGradientOperators([loss]) # add optimizer optimizer.add_weight_decay(model, WEIGHT_DECAY) add_l1_normalization_bn(model, SPARSE_SCALE) optimizer.build_multi_precision_sgd( model, base_learning_rate=LEARNING_RATE, momentum=MOMENTUM, nesterov=1, policy='poly', power=1., max_iter=MAX_ITER, ) # initialization workspace.RunNetOnce(model.param_init_net) workspace.CreateNet(model.net) print("hello foo")