def add_optimizer(model, config): optimizer.add_weight_decay(model, config['solver']['weight_decay']) optimizer.build_multi_precision_sgd( model, base_learning_rate=config['solver']['base_learning_rate'], policy=config['solver']['lr_policy'], stepsize=config['solver']['stepsize'], momentum=config['solver']['momentum'], gamma=config['solver']['gamma'], nesterov=config['solver']['nesterov'], )
def add_optimizer(model): ''' stepsz = int(30 * args.epoch_size / total_batch_size / num_shards) optimizer.add_weight_decay(model, args.weight_decay) opt = optimizer.build_multi_precision_sgd( model, args.base_learning_rate, momentum=0.9, nesterov=1, policy="step", stepsize=stepsz, gamma=0.1 ) ''' optimizer.add_weight_decay(model, args.weight_decay) opt = optimizer.build_multi_precision_sgd( model, base_learning_rate = args.base_learning_rate, momentum = model_config['solver']['momentum'], nesterov = model_config['solver']['nesterov'], policy = model_config['solver']['lr_policy'], power = model_config['solver']['power'], max_iter = model_config['solver']['max_iter'], ) return opt
def add_optimizer(model): stepsz = int(30 * args.epoch_size / total_batch_size / num_shards) if args.float16_compute: # TODO: merge with multi-precision optimizer opt = optimizer.build_fp16_sgd( model, args.base_learning_rate, momentum=0.9, nesterov=1, weight_decay=args.weight_decay, # weight decay included policy="step", stepsize=stepsz, gamma=0.1 ) else: optimizer.add_weight_decay(model, args.weight_decay) opt = optimizer.build_multi_precision_sgd( model, args.base_learning_rate, momentum=0.9, nesterov=1, policy="step", stepsize=stepsz, gamma=0.1 ) return opt
def add_optimizer(model): stepsz = int(30 * args.epoch_size / total_batch_size / num_shards) if args.float16_compute: # TODO: merge with multi-prceision optimizer opt = optimizer.build_fp16_sgd( model, args.base_learning_rate, momentum=0.9, nesterov=1, weight_decay=args.weight_decay, # weight decay included policy="step", stepsize=stepsz, gamma=0.1 ) else: optimizer.add_weight_decay(model, args.weight_decay) opt = optimizer.build_multi_precision_sgd( model, args.base_learning_rate, momentum=0.9, nesterov=1, policy="step", stepsize=stepsz, gamma=0.1 ) return opt
def add_optimizer(model): # stepsz = int(30 * args.epoch_size / total_batch_size / num_shards) stepsz = 1 if args.dtype == 'float16': opt = optimizer.build_fp16_sgd( model, args.base_learning_rate, momentum=0.9, nesterov=1, weight_decay=args.weight_decay, # weight decay included policy="step", stepsize=stepsz, gamma=0.9999 ) else: optimizer.add_weight_decay(model, args.weight_decay) opt = optimizer.build_multi_precision_sgd( model, args.base_learning_rate, momentum=0.9, nesterov=1, policy="step", stepsize=stepsz, gamma=0.9999 ) return opt
def add_optimizer(model, config): # add L2 norm for every weights optimizer.add_weight_decay(model, config['solver']['weight_decay']) optimizer.build_multi_precision_sgd( model, base_learning_rate=config['solver']['base_learning_rate'], momentum=config['solver']['momentum'], nesterov=config['solver']['nesterov'], policy=config['solver']['lr_policy'], gamma=config['solver']['gamma'], stepsize=config['solver']['stepsize'], # power = config['solver']['power'], # max_iter = config['solver']['max_iter'], # policy = "multistep" # stepsize = [100, 200, 500] )
def add_optimizer(model): #optimizer.add_weight_decay(model, 1e-4) opt = optimizer.build_multi_precision_sgd(model, 1e-8, momentum=0.0, nesterov=0, policy="step", stepsize=10000, gamma=0.1) return opt
def add_optimizer(model): stepsz = int(30 * epoch_size / batch_size) optimizer.add_weight_decay(model, weight_decay) opt = optimizer.build_multi_precision_sgd(model, base_learning_rate, momentum=0.9, nesterov=1, policy="step", stepsize=stepsz, gamma=0.1) return opt
def add_optimizer(model, config): # add L2 norm for every weights optimizer.add_weight_decay(model, config['solver']['weight_decay']) # add L1 norm only for spatial bn weights, which is the 'scale params' # if config['solver'].has_key('sparse_scale'): # if config['solver']['sparse_scale'] != 0: if config['solver']['sparse_scale'] is not None: print("[INFO] Training with L1 regularization on BN scale...") add_l1_normalization_bn(model, config['solver']['sparse_scale']) optimizer.build_multi_precision_sgd( model, base_learning_rate=config['solver']['base_learning_rate'], momentum=config['solver']['momentum'], nesterov=config['solver']['nesterov'], policy=config['solver']['lr_policy'], power=config['solver']['power'], max_iter=config['solver']['max_iter'], # gamma = config['solver']['gamma'], # stepsize = config['solver']['stepsize'], )
def add_optimizer(model): stepsz = int(30 * args.epoch_size / total_batch_size / num_shards) optimizer.add_weight_decay(model, args.weight_decay) opt = optimizer.build_multi_precision_sgd( model, args.base_learning_rate, momentum=0.9, nesterov=1, policy="step", stepsize=stepsz, gamma=0.1 ) return opt
def build_optimizer(model, float16_compute = False): if False: # float16_compute: # A newwer versions of Caffe support this print("[INFO] Building FP16 SGD optimizer.") opt = optimizer.build_fp16_sgd( model, 0.1, momentum=0.9, policy='step', gamma=0.1, weight_decay=1e-4 ) else: print("[INFO] Building Multi-precision SGD optimizer.") optimizer.add_weight_decay(model, 1e-4) #opt = optimizer.build_sgd( opt = optimizer.build_multi_precision_sgd( model, 0.1, momentum=0.9, policy='fixed', gamma=0.1 ) return opt
def build_optimizer(self, model, **kwargs): self._skip_gpu = False return build_multi_precision_sgd(model, base_learning_rate=0.1, **kwargs)
def CivilNet(name, train_test_deplopy=0): arg_scope = { 'order': 'NCHW', 'use_cudnn': True, 'cudnn_exhaustive_search': True, 'ws_nbytes_limit': (64 * 1024 * 1024) } model = model_helper.ModelHelper(name=name, arg_scope=arg_scope) model._device_type = caffe2_pb2.CUDA model._device_prefix = "gpu" model._shared_model = False model._devices = [0] device_opt = core.DeviceOption(caffe2_pb2.CUDA, 0) #for deploy if train_test_deplopy == 2: with core.DeviceScope(device_opt): with core.NameScope("{}_{}".format(model._device_prefix, 0)): with brew.arg_scope([brew.conv, brew.fc], WeightInitializer=Initializer, BiasInitializer=Initializer, enable_tensor_core=False, float16_compute=False): resnet.create_resnet50(model, "data", num_input_channels=3, num_labels=args.num_labels, no_bias=True, no_loss=False) workspace.RunNetOnce(model.param_init_net) workspace.CreateNet(model.net) return model reader_name = "reader" if train_test_deplopy == 0 else "test_reader" reader_data = args.train_data if train_test_deplopy == 0 else args.test_data reader = model.CreateDB(reader_name, db=reader_data, db_type='lmdb', num_shards=1, shard_id=0) is_test = True if train_test_deplopy == 1 else False loss = None with core.DeviceScope(device_opt): with core.NameScope("{}_{}".format(model._device_prefix, 0)): AddImageInput(model, reader, batch_size=32, is_test=is_test) with brew.arg_scope([brew.conv, brew.fc], WeightInitializer=Initializer, BiasInitializer=Initializer, enable_tensor_core=False, float16_compute=False): pred = resnet.create_resnet50(model, "data", num_input_channels=3, num_labels=args.num_labels, no_bias=True, no_loss=True) softmax, loss = model.SoftmaxWithLoss([pred, 'label'], ['softmax', 'loss']) brew.accuracy(model, [softmax, "label"], "accuracy") #for test if train_test_deplopy == 1: workspace.RunNetOnce(model.param_init_net) workspace.CreateNet(model.net) return model #for train loss_grad = {} losses_by_gpu = {} losses_by_gpu[0] = [loss] #add grad def create_grad(lossp): return model.ConstantFill(lossp, str(lossp) + "_grad", value=1.0) # Explicitly need to create gradients on GPU 0 device = core.DeviceOption(model._device_type, 0) with core.DeviceScope(device): for l in losses_by_gpu[0]: lg = create_grad(l) loss_grad[str(l)] = str(lg) model.AddGradientOperators(loss_grad) #end add grad optimizer.add_weight_decay(model, args.weight_decay) stepsz = int(30 * args.epoch_size / 32) opt = optimizer.build_multi_precision_sgd(model, args.base_learning_rate, momentum=0.9, nesterov=1, policy="step", stepsize=stepsz, gamma=0.1) model._optimizer = opt workspace.RunNetOnce(model.param_init_net) workspace.CreateNet(model.net) return model
def build_optimizer(self, model, **kwargs): self._skip_gpu = False return build_multi_precision_sgd( model, base_learning_rate=0.1, **kwargs )
softmax, loss = model.net.SoftmaxWithLoss( [pred, label], ['softmax', 'loss'], ) # add training operator model.AddGradientOperators([loss]) # add optimizer optimizer.add_weight_decay(model, WEIGHT_DECAY) add_l1_normalization_bn(model, SPARSE_SCALE) optimizer.build_multi_precision_sgd( model, base_learning_rate=LEARNING_RATE, momentum=MOMENTUM, nesterov=1, policy='poly', power=1., max_iter=MAX_ITER, ) # initialization workspace.RunNetOnce(model.param_init_net) workspace.CreateNet(model.net) print("hello foo") # ================= DEBUG PRINT ======================= # print(model.net.Proto()) # print(model.param_init_net.Proto())