def AddParameterUpdateOps( model, optimizer_input="SGD", base_learning_rate=0.01, *args, **kwargs ): if optimizer_input not in OPTIMIZER_DICT: raise Exception( "Optimizer {} unknown. Valid choices are {}" .format(optimizer_input, ', '.join(OPTIMIZER_DICT.keys())) ) optimizer_rule = OPTIMIZER_DICT[optimizer_input] if optimizer_rule == GRAD_OPTIMIZER.SGD: build_sgd( model, base_learning_rate, gamma=kwargs['gamma'], policy=kwargs['policy'], stepsize=1 ) elif optimizer_rule == GRAD_OPTIMIZER.ADAGRAD: build_adagrad(model, base_learning_rate) elif optimizer_rule == GRAD_OPTIMIZER.ADAM: build_adam(model, base_learning_rate) elif optimizer_rule == GRAD_OPTIMIZER.FTRL: build_ftrl(model, base_learning_rate) else: print( "Unrecognized in caffe2 setting, using default SGD", optimizer_rule ) build_sgd(model, base_learning_rate)
def test_weight_decay(self): from caffe2.python import brew from caffe2.python.model_helper import ModelHelper model = ModelHelper(name="test", arg_scope={'order': 'NCHW'}) cnv = brew.conv(model, 'data', 'cnv', 32, 32, 4) a = brew.fc(model, cnv, 'a', 100, 200) pred = brew.fc(model, a, 'b', 200, 5) (softmax, loss) = model.SoftmaxWithLoss( [pred, 'label'], ['softmax', 'loss'], ) model.AddGradientOperators([loss]) add_weight_decay(model, weight_decay=1e-4) build_sgd(model, 0.11) expected_weight_grad = {'b_w_grad', 'a_w_grad', 'cnv_w_grad'} # Check the proto that all weights are decayed and not non-weights # are decayed. for op in model.net.Proto().op: if op.type == 'WeightedSum' and 'wd_0_0' in op.input: if op.output[0] not in expected_weight_grad: print("Unexpected param for weight_decay: {}".format( op.output[0])) self.assertTrue(op.output[0] in expected_weight_grad) expected_weight_grad.remove(op.output[0]) self.assertEqual( expected_weight_grad, set(), "Not all weights were decayed: {}".format(expected_weight_grad))
def addParameterUpdateOps(self, model): if self.optimizer not in OPTIMIZER_DICT: raise Exception( "Optimizer {} unknown. Valid choices are {}".format( self.optimizer, ", ".join(OPTIMIZER_DICT.keys()))) optimizer_rule = OPTIMIZER_DICT[self.optimizer] if optimizer_rule == GRAD_OPTIMIZER.SGD: build_sgd( model, self.learning_rate, gamma=self.lr_decay, policy=self.lr_policy, stepsize=1, ) elif optimizer_rule == GRAD_OPTIMIZER.ADAGRAD: build_adagrad(model, self.learning_rate) elif optimizer_rule == GRAD_OPTIMIZER.ADAM: build_adam(model, self.learning_rate) elif optimizer_rule == GRAD_OPTIMIZER.FTRL: build_ftrl(model, self.learning_rate) else: print("Unrecognized in caffe2 setting, using default SGD", optimizer_rule) build_sgd(model, self.learning_rate)
def CreateModel(self): log.debug("Start training") model = model_helper.ModelHelper(name="char_rnn") input_blob, seq_lengths, hidden_init, cell_init, target = \ model.net.AddExternalInputs( 'input_blob', 'seq_lengths', 'hidden_init', 'cell_init', 'target', ) hidden_output_all, self.hidden_output, _, self.cell_state = LSTM( model, input_blob, seq_lengths, (hidden_init, cell_init), self.D, self.hidden_size, scope="LSTM") output = brew.fc(model, hidden_output_all, None, dim_in=self.hidden_size, dim_out=self.D, axis=2) # axis is 2 as first two are T (time) and N (batch size). # We treat them as one big batch of size T * N softmax = model.net.Softmax(output, 'softmax', axis=2) softmax_reshaped, _ = model.net.Reshape(softmax, ['softmax_reshaped', '_'], shape=[-1, self.D]) # Create a copy of the current net. We will use it on the forward # pass where we don't need loss and backward operators self.forward_net = core.Net(model.net.Proto()) xent = model.net.LabelCrossEntropy([softmax_reshaped, target], 'xent') # Loss is average both across batch and through time # Thats why the learning rate below is multiplied by self.seq_length loss = model.net.AveragedLoss(xent, 'loss') model.AddGradientOperators([loss]) # use build_sdg function to build an optimizer build_sgd(model, base_learning_rate=0.1 * self.seq_length, policy="step", stepsize=1, gamma=0.9999) self.model = model self.predictions = softmax self.loss = loss self.prepare_state = core.Net("prepare_state") self.prepare_state.Copy(self.hidden_output, hidden_init) self.prepare_state.Copy(self.cell_state, cell_init)
def test_optimizer_context(self): from caffe2.python import brew, optimizer from caffe2.python.model_helper import ModelHelper model = ModelHelper(name="test", arg_scope={'order': 'NCHW'}) count = optimizer._optimizer_instance_count['SgdOptimizer'] cnv_optim = SgdOptimizer(0.15) weight_optim = SgdOptimizer(0.2) bias_optim = SgdOptimizer(0.1) with UseOptimizer(cnv_optim): cnv = brew.conv(model, 'data', 'cnv', 32, 32, 4) with UseOptimizer({'WEIGHT': weight_optim, 'BIAS': bias_optim}): a = brew.fc(model, cnv, 'a', 100, 200) pred = brew.fc(model, a, 'b', 200, 5) (softmax, loss) = model.SoftmaxWithLoss( [pred, 'label'], ['softmax', 'loss'], ) model.AddGradientOperators([loss]) add_weight_decay(model, weight_decay=1e-4) # use the following optimizer if none specified in param_info build_sgd(model, 0.11) expected_weight_grad = {'b_w_grad', 'a_w_grad', 'cnv_w_grad'} expected_learning_rate = { "SgdOptimizer_{}_lr_cpu".format(count): -0.15, "SgdOptimizer_{}_lr_cpu".format(count + 1): -0.2, "SgdOptimizer_{}_lr_cpu".format(count + 2): -0.1, "SgdOptimizer_{}_lr_cpu".format(count + 3): -0.11 } for op in model.net.Proto().op: # Check the proto that all weights are decayed and not non-weights # are decayed. if op.type == 'WeightedSum' and 'wd_0_0' in op.input: if op.output[0] not in expected_weight_grad: print( "Unexpected param for weight_decay: {}". format(op.output[0]) ) self.assertTrue(op.output[0] in expected_weight_grad) expected_weight_grad.remove(op.output[0]) # Check the learning rate for each parameter if op.type == 'LearningRate': val = 0 for arg in op.arg: if arg.name == 'base_lr': val = arg.f self.assertAlmostEqual( val, expected_learning_rate[op.output[0]] ) self.assertEqual( expected_weight_grad, set(), "Not all weights were decayed: {}".format(expected_weight_grad) )
def test_optimizer_context(self): from caffe2.python import brew, optimizer from caffe2.python.model_helper import ModelHelper model = ModelHelper(name="test", arg_scope={'order': 'NCHW'}) count = optimizer._optimizer_instance_count['SgdOptimizer'] cnv_optim = SgdOptimizer(0.15) weight_optim = SgdOptimizer(0.2) bias_optim = SgdOptimizer(0.1) with UseOptimizer(cnv_optim): cnv = brew.conv(model, 'data', 'cnv', 32, 32, 4) with UseOptimizer({'WEIGHT': weight_optim, 'BIAS': bias_optim}): a = brew.fc(model, cnv, 'a', 100, 200) pred = brew.fc(model, a, 'b', 200, 5) (softmax, loss) = model.SoftmaxWithLoss( [pred, 'label'], ['softmax', 'loss'], ) model.AddGradientOperators([loss]) add_weight_decay(model, weight_decay=1e-4) # use the following optimizer if none specified in param_info build_sgd(model, 0.11) expected_weight_grad = {'b_w_grad', 'a_w_grad', 'cnv_w_grad'} expected_learning_rate = { "SgdOptimizer_{}_lr_cpu".format(count): -0.15, "SgdOptimizer_{}_lr_cpu".format(count + 1): -0.2, "SgdOptimizer_{}_lr_cpu".format(count + 2): -0.1, "SgdOptimizer_{}_lr_cpu".format(count + 3): -0.11 } for op in model.net.Proto().op: # Check the proto that all weights are decayed and not non-weights # are decayed. if op.type == 'WeightedSum' and 'wd_0_0' in op.input: if op.output[0] not in expected_weight_grad: print( "Unexpected param for weight_decay: {}". format(op.output[0]) ) self.assertTrue(op.output[0] in expected_weight_grad) expected_weight_grad.remove(op.output[0]) # Check the learning rate for each parameter if op.type == 'LearningRate': val = 0 for arg in op.arg: if arg.name == 'base_lr': val = arg.f self.assertAlmostEqual( val, expected_learning_rate[op.output[0]] ) self.assertEqual( expected_weight_grad, set(), "Not all weights were decayed: {}".format(expected_weight_grad) )
def add_optmzer_lossfunc(model, softmax, label): cross_entropy = model.LabelCrossEntropy([softmax, label], 'cross_entropy') loss = model.AveragedLoss(cross_entropy, "loss") model.AddGradientOperators([loss]) # look at documentation optimizer.build_sgd( model, base_learning_rate=0.01, )
def CreateModel(self): log.debug("Start training") model = model_helper.ModelHelper(name="char_rnn") input_blob, seq_lengths, hidden_init, cell_init, target = \ model.net.AddExternalInputs( 'input_blob', 'seq_lengths', 'hidden_init', 'cell_init', 'target', ) hidden_output_all, self.hidden_output, _, self.cell_state = LSTM( model, input_blob, seq_lengths, (hidden_init, cell_init), self.D, self.hidden_size, scope="LSTM") output = brew.fc( model, hidden_output_all, None, dim_in=self.hidden_size, dim_out=self.D, axis=2 ) # axis is 2 as first two are T (time) and N (batch size). # We treat them as one big batch of size T * N softmax = model.net.Softmax(output, 'softmax', axis=2) softmax_reshaped, _ = model.net.Reshape( softmax, ['softmax_reshaped', '_'], shape=[-1, self.D]) # Create a copy of the current net. We will use it on the forward # pass where we don't need loss and backward operators self.forward_net = core.Net(model.net.Proto()) xent = model.net.LabelCrossEntropy([softmax_reshaped, target], 'xent') # Loss is average both across batch and through time # Thats why the learning rate below is multiplied by self.seq_length loss = model.net.AveragedLoss(xent, 'loss') model.AddGradientOperators([loss]) # use build_sdg function to build an optimizer build_sgd( model, base_learning_rate=0.1 * self.seq_length, policy="step", stepsize=1, gamma=0.9999 ) self.model = model self.predictions = softmax self.loss = loss self.prepare_state = core.Net("prepare_state") self.prepare_state.Copy(self.hidden_output, hidden_init) self.prepare_state.Copy(self.cell_state, cell_init)
def add_optimizer(model): stepsz = int(30 * args.epoch_size / total_batch_size / num_shards) optimizer.build_sgd(model, args.base_learning_rate, momentum=0.9, nesterov=1, policy="step", stepsize=stepsz, gamma=0.1)
def AddOptimizerOps(model): """Add optimizer ops.""" optimizer.build_sgd(model, 0.01, policy='step', stepsize=1, gamma=0.999, momentum=0.9, nesterov=False)
def AddOptimizerOps(model): """Add optimizer ops.""" optimizer.add_weight_decay(model, 0.004) stepsize = TRAIN_ENTRIES * EPOCHS // BATCH_SIZE optimizer.build_sgd(model, 0.001, policy='step', stepsize=stepsize, gamma=0.1, momentum=0.9, nesterov=False)
def AddTrainingOperators(model, softmax, label): xent = model.LabelCrossEntropy([softmax, label], 'xent') # Compute the expected loss loss = model.AveragedLoss(xent, "loss") # Use the average loss we just computed to add gradient operators to the model model.AddGradientOperators([loss]) # Use stochastic gradient descent as optimization function optimizer.build_sgd(model, base_learning_rate=0.01, policy="fixed", momentum=0.9, weight_decay=0.004)
def AddTrainingOperators(model, softmax, label): xent = model.LabelCrossEntropy([softmax, label], 'xent') loss = model.AveragedLoss(xent, "loss") model.AddGradientOperators([loss]) optimizer.build_sgd( model, base_learning_rate=0.1, policy="step", stepsize=10, gamma=0.999, )
def AddTrainingOperators(model, softmax, label): """Adds training operators to the model.""" xent = model.LabelCrossEntropy([softmax, label], "xent", use_cudnn=False) # compute the expected loss loss = model.AveragedLoss(xent, "loss", use_cudnn=False) # track the accuracy of the model AddAccuracy(model, softmax, label) # use the average loss we just computed to add gradient operators to the # model model.AddGradientOperators([loss]) optimizer.build_sgd( model, base_learning_rate=0.1, policy="step", stepsize=1, gamma=0.999 )
def AddTrainingOperators(model, softmax, label, save_png=True): xent = model.LabelCrossEntropy([softmax, label], 'xent') loss = model.AveragedLoss(xent, "loss") model.AddGradientOperators([loss]) optimizer.build_sgd( model, base_learning_rate=0.1, policy="step", stepsize=10, gamma=0.999, ) if save_png: graph = net_drawer.GetPydotGraph(model.net, rankdir="LR") graph.write_png("CIFAR10_with_Grad.png")
def add_training_operators(model, last_out, device_opts): with core.DeviceScope(device_opts): softmax, loss = add_softmax_with_loss(model, last_out, device_opts) accuracy = add_accuracy(model, softmax, device_opts) model.AddGradientOperators([loss]) opt = optimizer.build_sgd( model, base_learning_rate=0.1, policy="step", stepsize=50000 * 80 // args.batch_size, weight_decay=1e-4, momentum=0.9, gamma=0.1, nesterov=1, ) # [Optional] feel free to use adam or other optimizers # opt = optimizer.build_adam( # model, # base_learning_rate=1e-3, # weight_decay=1e-4, # ) return opt
def AddTrainingOperators(model, softmax, label): """Adds training operators to the model.""" xent = model.LabelCrossEntropy([softmax, label], 'xent') # compute the expected loss loss = model.AveragedLoss(xent, "loss") # track the accuracy of the model AddAccuracy(model, softmax, label) # use the average loss we just computed to add gradient operators to the model model.AddGradientOperators([loss]) optimizer.build_sgd( model, base_learning_rate=0.1, policy="step", stepsize=1, gamma=0.999, )
def AddTrainingOperators(model, softmax, label): """Adds training operators to the model.""" xent = model.LabelCrossEntropy([softmax, label], 'xent') # compute the expected loss loss = model.AveragedLoss(xent, "loss") # track the accuracy of the model model_defs.AddAccuracy(model, softmax, label) # use the average loss we just computed to add gradient operators to the model model.AddGradientOperators([loss]) # do a simple stochastic gradient descent optimizer.build_sgd( model, base_learning_rate=0.1, policy="step", stepsize=1, gamma=0.999, )
def add_optimizer(model): return optimizer.build_sgd( model, 0.1, policy="fixed", max_gradient_norm=5.0, allow_lr_injection=True, )
def add_optimizer(model): return optimizer.build_sgd( model, 0.1, policy="fixed", max_gradient_norm=5.0, allow_lr_injection=True, )
def AddTrainingOperators(model, softmax, label): '''优化参数,训练模型. 参数: model: 模型结构 softmax: 分类数据 label: 图像标签 返回: None ''' xent = model.LabelCrossEntropy([softmax, label], 'xent') loss = model.AveragedLoss(xent, 'loss') AddAccuracy(model, softmax, label) model.AddGradientOperators([loss]) optimizer.build_sgd(model, base_learning_rate=0.1, policy="step", stepsize=1, gamma=0.999)
def ScaffoldModelTrainingOperators(model, softmax, label, learningRate, devOps=None): # with core.DeviceScope(core.DeviceOption(c2p2.PROTO_CUDA, 0)): xent = model.LabelCrossEntropy([softmax, label], "xent") loss = model.AveragedLoss(xent, "loss") ScaffoldModelAccuracyMeter(model, softmax, label) model.AddGradientOperators([loss]) opt = optimizer.build_sgd(model, base_learning_rate=learningRate) for param in model.GetOptimizationParamInfo(): opt(model.net, model.param_init_net, param)
def add_training_operators(softmax, model, device_opts) : with core.DeviceScope(device_opts): xent = model.LabelCrossEntropy([softmax, "label"], 'xent') loss = model.AveragedLoss(xent, "loss") brew.accuracy(model, [softmax, "label"], "accuracy") model.AddGradientOperators([loss]) opt = optimizer.build_sgd(model, base_learning_rate=0.01, policy="step", stepsize=1, gamma=0.999) # , momentum=0.9
def AddTrainingOperators(model, softmax, label, device_opts): with core.DeviceScope(device_opts): xent = model.LabelCrossEntropy([softmax, label], 'xent') # Compute the expected loss loss = model.AveragedLoss(xent, "loss") brew.accuracy(model, [softmax, label], "accuracy") # Use the average loss we just computed to add gradient operators to the model model.AddGradientOperators([loss]) # Use SGD optimizer optimizer.build_sgd( model, base_learning_rate=0.1, weight_decay=1e-5, gamma=0.999, policy='step', stepsize=50, nesterov=1, )
def AddTrainingOperators(model, softmax, label): """Adds training operators to the model.""" # Compute cross entropy between softmax scores and labels xent = model.LabelCrossEntropy([softmax, label], 'xent') # Compute the expected loss loss = model.AveragedLoss(xent, "loss") # Track the accuracy of the model AddAccuracy(model, softmax, label) # Use the average loss we just computed to add gradient operators to the model model.AddGradientOperators([loss]) # Specify the optimization algorithm optimizer.build_sgd( model, base_learning_rate=0.1, policy="step", stepsize=1, gamma=0.999, )
def ScaffoldModelBackpropagation(model, softmax, label, learningRate): # loss function - tells imageSizeow wrong the prediction was crossEntropy = model.LabelCrossEntropy([softmax, label], 'cross_entropy') # expected loss (how to find out more on this step) loss = model.AveragedLoss(crossEntropy, 'loss') ScaffoldModelAccuracyMeter(model, softmax, label) # add gradient operator used for backpropagation model.AddGradientOperators([loss]) # lastly construct stochastic gradient descent for learning optimizer.build_sgd( model, base_learning_rate=learningRate, policy='step', stepsize=1, gamma=0.999 # momentum=0.9, # weight_decay=0.004 )
def main(opt_name): workspace.FeedBlob('input', np.random.randn(2, 16).astype(np.float32)) workspace.FeedBlob('label', np.array([0, 1]).astype(np.float32)) helper = ModelHelper("sample_model") fc = brew.fc(helper, "input", "fc", dim_in=16, dim_out=8) relu = helper.Relu(fc, 'relu') fc2 = brew.fc(helper, relu, "fc2", dim_in=8, dim_out=1) label_ex = helper.ExpandDims("label", "label_ex", dims=[1]) xent = helper.SigmoidCrossEntropyWithLogits([fc2, label_ex], 'xent') loss = helper.AveragedLoss(xent, 'loss') helper.AddGradientOperators([loss]) if opt_name == "manual": ONE = helper.param_init_net.ConstantFill([], "ONE", shape=[1], value=1.0) LR = helper.param_init_net.ConstantFill([], "LR", shape=[1], value=-0.03) for param in helper.params: param_grad = helper.param_to_grad[param] helper.WeightedSum([param, ONE, param_grad, LR], param) elif opt_name == "sgd": optimizer.build_sgd(helper, 0.03) elif opt_name == "adagrad": optimizer.build_adagrad(helper, 0.03) # caffe2 does not support rowwise adagrad for dense parameters # caffe2 seems not have lamb support yet elif opt_name == "adam": optimizer.build_adam(helper, 0.03) else: assert False, f"Unsupported optimizer {opt_name}" workspace.RunNetOnce(helper.param_init_net) workspace.RunNetOnce(helper.net) import pdb pdb.set_trace()
def add_training_operators(softmax, m, device_opts) : with core.DeviceScope(device_opts): xent = m.LabelCrossEntropy([softmax, "label"], 'xent') loss = m.AveragedLoss(xent, "loss") #brew.accuracy(m, [softmax, "label"], "accuracy") m.AddGradientOperators([loss]) opt = optimizer.build_sgd( m, base_learning_rate=LR, policy='fixed', momentum=MOMENTUM)
def add_optimizer(model): stepsz = int(60 * config.TRAIN_IMAGES / args.batch_size / args.gpus) return optimizer.build_sgd( model, base_learning_rate=args.learning_rate, policy="step", stepsize=stepsz, gamma=0.1, weight_decay=1e-4, momentum=0.9, nesterov=1, )
def create_train_model(data_folder): """Create model for training with MNIST train dataset.""" # Create the model helper for the train model train_model = model_helper.ModelHelper(name="mnist_lenet_train_model") # Specify the input is from the train lmdb data, label = add_model_inputs( train_model, batch_size=64, db=os.path.join(data_folder, "mnist-train-nchw-lmdb"), db_type="lmdb", ) # Build the LeNet-5 network softmax_layer = build_mnist_lenet(train_model, data) # Compute cross entropy between softmax scores and labels cross_entropy = train_model.LabelCrossEntropy([softmax_layer, label], "cross_entropy") # Compute the expected loss loss = train_model.AveragedLoss(cross_entropy, "loss") # Use the average loss we just computed to add gradient operators to the model train_model.AddGradientOperators([loss]) # Specify the optimization algorithm optimizer.build_sgd( train_model, base_learning_rate=0.1, policy="step", stepsize=1, gamma=0.999, ) # Track the accuracy of the model add_accuracy_op(train_model, softmax_layer, label) return train_model
def AddTrainingOperators(model, loss): model.AddGradientOperators([loss]) optimizer.add_weight_decay(model, 5e-4) stepsz = int(10 * 60000 / 128) opt = optimizer.build_sgd(model, base_learning_rate=0.01, policy="step", stepsize=stepsz, gamma=0.1, momentum=0.9) # opt = optimizer.build_yellowfin(model) return opt
def AddTrainingOperators(model, softmax): # calculate Loss xent = model.LabelCrossEntropy([softmax, 'label']) loss = model.AveragedLoss(xent, "loss") # calculate Accuracy AddAccuracy(model, softmax) # Add loss to gradient for backpropogation model.AddGradientOperators([loss]) # Init SGD optimizer solver opt = optimizer.build_sgd(model, base_learning_rate=0.1, policy="step", stepsize=1, gamma=0.999)
def test_weight_decay(self): from caffe2.python import brew from caffe2.python.model_helper import ModelHelper model = ModelHelper(name="test", arg_scope={'order': 'NCHW'}) cnv = brew.conv(model, 'data', 'cnv', 32, 32, 4) a = brew.fc(model, cnv, 'a', 100, 200) pred = brew.fc(model, a, 'b', 200, 5) (softmax, loss) = model.SoftmaxWithLoss( [pred, 'label'], ['softmax', 'loss'], ) model.AddGradientOperators([loss]) add_weight_decay(model, weight_decay=1e-4) build_sgd(model, 0.11) expected_weight_grad = {'b_w_grad', 'a_w_grad', 'cnv_w_grad'} # Check the proto that all weights are decayed and not non-weights # are decayed. for op in model.net.Proto().op: if op.type == 'WeightedSum' and 'wd_0_0' in op.input: if op.output[0] not in expected_weight_grad: print( "Unexpected param for weight_decay: {}". format(op.output[0]) ) self.assertTrue(op.output[0] in expected_weight_grad) expected_weight_grad.remove(op.output[0]) self.assertEqual( expected_weight_grad, set(), "Not all weights were decayed: {}".format(expected_weight_grad) )
def add_optimizer(model): """ Optimizer function called once for the entire model, as opposed for each CPU / GPU individually. The optimizer will be a stepwise weight decay. :return: return the optimizer """ stepsz = int(30 * args.epoch_size / args.batch_size / args.num_shards) stepsz = stepsz if stepsz else 100 optimizer.add_weight_decay(model, 1e-4) # opt = optimizer.build_multi_precision_sgd( opt = optimizer.build_sgd(model, args.base_learning_rate, momentum=0.9, nesterov=1, policy="step", stepsize=stepsz, gamma=0.1) return opt
def build_optimizer(self, model, **kwargs): self._skip_gpu = False return build_sgd(model, base_learning_rate=0.1, **kwargs)
# #### Add the training operators and prime the workspace # # In this **very important** step, we specify the loss function, setup the SGD training algorithm, prime and initialize the workspace, and initialize our model's weights and biases. # In[5]: # The loss function is computed by a squared L2 distance, # and then averaged over all items. dist = regression_model.SquaredL2Distance(['Y_gt', y_pred], "dist") loss = regression_model.AveragedLoss(dist, "loss") # Add the gradient operators and setup the SGD algorithm regression_model.AddGradientOperators([loss]) optimizer.build_sgd(regression_model, base_learning_rate=learning_rate) # Prime the workspace with some data workspace.FeedBlob("Y_gt",Y_gt.astype(np.float32)) workspace.FeedBlob("X",X.astype(np.float32)) # Run the init net to prepare the workspace then create the net workspace.RunNetOnce(regression_model.param_init_net) workspace.CreateNet(regression_model.net) # Inject our desired initial weights and bias workspace.FeedBlob("y_pred_w",np.array([initial_weights]).astype(np.float32)) workspace.FeedBlob("y_pred_b",np.array([0.]).astype(np.float32)) # #### Run the training
def add_optimizer(model): optimizer.build_sgd(model, 0.1)
def add_optimizer(model): return optimizer.build_sgd(model, 0.1, policy="fixed")
def add_optimizer(model): optimizer.build_sgd(model, 0.1, policy="fixed", momentum=0.9)
def build_optimizer(self, model, **kwargs): self._skip_gpu = False return build_sgd(model, base_learning_rate=0.1, **kwargs)