コード例 #1
0
def AddParameterUpdateOps(
    model, optimizer_input="SGD", base_learning_rate=0.01, *args, **kwargs
):
    if optimizer_input not in OPTIMIZER_DICT:
        raise Exception(
            "Optimizer {} unknown. Valid choices are {}"
            .format(optimizer_input, ', '.join(OPTIMIZER_DICT.keys()))
        )
    optimizer_rule = OPTIMIZER_DICT[optimizer_input]

    if optimizer_rule == GRAD_OPTIMIZER.SGD:
        build_sgd(
            model,
            base_learning_rate,
            gamma=kwargs['gamma'],
            policy=kwargs['policy'],
            stepsize=1
        )
    elif optimizer_rule == GRAD_OPTIMIZER.ADAGRAD:
        build_adagrad(model, base_learning_rate)
    elif optimizer_rule == GRAD_OPTIMIZER.ADAM:
        build_adam(model, base_learning_rate)
    elif optimizer_rule == GRAD_OPTIMIZER.FTRL:
        build_ftrl(model, base_learning_rate)
    else:
        print(
            "Unrecognized in caffe2 setting, using default SGD", optimizer_rule
        )
        build_sgd(model, base_learning_rate)
コード例 #2
0
ファイル: optimizer_test.py プロジェクト: aimsky/caffe2
    def test_weight_decay(self):
        from caffe2.python import brew
        from caffe2.python.model_helper import ModelHelper

        model = ModelHelper(name="test", arg_scope={'order': 'NCHW'})
        cnv = brew.conv(model, 'data', 'cnv', 32, 32, 4)
        a = brew.fc(model, cnv, 'a', 100, 200)
        pred = brew.fc(model, a, 'b', 200, 5)
        (softmax, loss) = model.SoftmaxWithLoss(
            [pred, 'label'],
            ['softmax', 'loss'],
        )
        model.AddGradientOperators([loss])

        add_weight_decay(model, weight_decay=1e-4)
        build_sgd(model, 0.11)

        expected_weight_grad = {'b_w_grad', 'a_w_grad', 'cnv_w_grad'}

        # Check the proto that all weights are decayed and not non-weights
        # are decayed.
        for op in model.net.Proto().op:
            if op.type == 'WeightedSum' and 'wd_0_0' in op.input:
                if op.output[0] not in expected_weight_grad:
                    print("Unexpected param for weight_decay: {}".format(
                        op.output[0]))
                self.assertTrue(op.output[0] in expected_weight_grad)
                expected_weight_grad.remove(op.output[0])

        self.assertEqual(
            expected_weight_grad, set(),
            "Not all weights were decayed: {}".format(expected_weight_grad))
コード例 #3
0
ファイル: ml_trainer.py プロジェクト: kanirudh54/BlueWhale
    def addParameterUpdateOps(self, model):
        if self.optimizer not in OPTIMIZER_DICT:
            raise Exception(
                "Optimizer {} unknown. Valid choices are {}".format(
                    self.optimizer, ", ".join(OPTIMIZER_DICT.keys())))
        optimizer_rule = OPTIMIZER_DICT[self.optimizer]

        if optimizer_rule == GRAD_OPTIMIZER.SGD:
            build_sgd(
                model,
                self.learning_rate,
                gamma=self.lr_decay,
                policy=self.lr_policy,
                stepsize=1,
            )
        elif optimizer_rule == GRAD_OPTIMIZER.ADAGRAD:
            build_adagrad(model, self.learning_rate)
        elif optimizer_rule == GRAD_OPTIMIZER.ADAM:
            build_adam(model, self.learning_rate)
        elif optimizer_rule == GRAD_OPTIMIZER.FTRL:
            build_ftrl(model, self.learning_rate)
        else:
            print("Unrecognized in caffe2 setting, using default SGD",
                  optimizer_rule)
            build_sgd(model, self.learning_rate)
コード例 #4
0
ファイル: char_rnn.py プロジェクト: madieragold1/micro--
    def CreateModel(self):
        log.debug("Start training")
        model = model_helper.ModelHelper(name="char_rnn")

        input_blob, seq_lengths, hidden_init, cell_init, target = \
            model.net.AddExternalInputs(
                'input_blob',
                'seq_lengths',
                'hidden_init',
                'cell_init',
                'target',
            )

        hidden_output_all, self.hidden_output, _, self.cell_state = LSTM(
            model,
            input_blob,
            seq_lengths, (hidden_init, cell_init),
            self.D,
            self.hidden_size,
            scope="LSTM")
        output = brew.fc(model,
                         hidden_output_all,
                         None,
                         dim_in=self.hidden_size,
                         dim_out=self.D,
                         axis=2)

        # axis is 2 as first two are T (time) and N (batch size).
        # We treat them as one big batch of size T * N
        softmax = model.net.Softmax(output, 'softmax', axis=2)

        softmax_reshaped, _ = model.net.Reshape(softmax,
                                                ['softmax_reshaped', '_'],
                                                shape=[-1, self.D])

        # Create a copy of the current net. We will use it on the forward
        # pass where we don't need loss and backward operators
        self.forward_net = core.Net(model.net.Proto())

        xent = model.net.LabelCrossEntropy([softmax_reshaped, target], 'xent')
        # Loss is average both across batch and through time
        # Thats why the learning rate below is multiplied by self.seq_length
        loss = model.net.AveragedLoss(xent, 'loss')
        model.AddGradientOperators([loss])

        # use build_sdg function to build an optimizer
        build_sgd(model,
                  base_learning_rate=0.1 * self.seq_length,
                  policy="step",
                  stepsize=1,
                  gamma=0.9999)

        self.model = model
        self.predictions = softmax
        self.loss = loss

        self.prepare_state = core.Net("prepare_state")
        self.prepare_state.Copy(self.hidden_output, hidden_init)
        self.prepare_state.Copy(self.cell_state, cell_init)
コード例 #5
0
    def test_optimizer_context(self):
        from caffe2.python import brew, optimizer
        from caffe2.python.model_helper import ModelHelper

        model = ModelHelper(name="test", arg_scope={'order': 'NCHW'})
        count = optimizer._optimizer_instance_count['SgdOptimizer']
        cnv_optim = SgdOptimizer(0.15)
        weight_optim = SgdOptimizer(0.2)
        bias_optim = SgdOptimizer(0.1)

        with UseOptimizer(cnv_optim):
            cnv = brew.conv(model, 'data', 'cnv', 32, 32, 4)
        with UseOptimizer({'WEIGHT': weight_optim, 'BIAS': bias_optim}):
            a = brew.fc(model, cnv, 'a', 100, 200)
        pred = brew.fc(model, a, 'b', 200, 5)
        (softmax, loss) = model.SoftmaxWithLoss(
            [pred, 'label'],
            ['softmax', 'loss'],
        )
        model.AddGradientOperators([loss])

        add_weight_decay(model, weight_decay=1e-4)
        # use the following optimizer if none specified in param_info
        build_sgd(model, 0.11)
        expected_weight_grad = {'b_w_grad', 'a_w_grad', 'cnv_w_grad'}
        expected_learning_rate = {
            "SgdOptimizer_{}_lr_cpu".format(count): -0.15,
            "SgdOptimizer_{}_lr_cpu".format(count + 1): -0.2,
            "SgdOptimizer_{}_lr_cpu".format(count + 2): -0.1,
            "SgdOptimizer_{}_lr_cpu".format(count + 3): -0.11
        }

        for op in model.net.Proto().op:
            # Check the proto that all weights are decayed and not non-weights
            # are decayed.
            if op.type == 'WeightedSum' and 'wd_0_0' in op.input:
                if op.output[0] not in expected_weight_grad:
                    print(
                        "Unexpected param for weight_decay: {}".
                        format(op.output[0])
                    )
                self.assertTrue(op.output[0] in expected_weight_grad)
                expected_weight_grad.remove(op.output[0])
            # Check the learning rate for each parameter
            if op.type == 'LearningRate':
                val = 0
                for arg in op.arg:
                    if arg.name == 'base_lr':
                        val = arg.f
                self.assertAlmostEqual(
                    val,
                    expected_learning_rate[op.output[0]]
                )

        self.assertEqual(
            expected_weight_grad,
            set(),
            "Not all weights were decayed: {}".format(expected_weight_grad)
        )
コード例 #6
0
ファイル: optimizer_test.py プロジェクト: Sissipei/caffe2
    def test_optimizer_context(self):
        from caffe2.python import brew, optimizer
        from caffe2.python.model_helper import ModelHelper

        model = ModelHelper(name="test", arg_scope={'order': 'NCHW'})
        count = optimizer._optimizer_instance_count['SgdOptimizer']
        cnv_optim = SgdOptimizer(0.15)
        weight_optim = SgdOptimizer(0.2)
        bias_optim = SgdOptimizer(0.1)

        with UseOptimizer(cnv_optim):
            cnv = brew.conv(model, 'data', 'cnv', 32, 32, 4)
        with UseOptimizer({'WEIGHT': weight_optim, 'BIAS': bias_optim}):
            a = brew.fc(model, cnv, 'a', 100, 200)
        pred = brew.fc(model, a, 'b', 200, 5)
        (softmax, loss) = model.SoftmaxWithLoss(
            [pred, 'label'],
            ['softmax', 'loss'],
        )
        model.AddGradientOperators([loss])

        add_weight_decay(model, weight_decay=1e-4)
        # use the following optimizer if none specified in param_info
        build_sgd(model, 0.11)
        expected_weight_grad = {'b_w_grad', 'a_w_grad', 'cnv_w_grad'}
        expected_learning_rate = {
            "SgdOptimizer_{}_lr_cpu".format(count): -0.15,
            "SgdOptimizer_{}_lr_cpu".format(count + 1): -0.2,
            "SgdOptimizer_{}_lr_cpu".format(count + 2): -0.1,
            "SgdOptimizer_{}_lr_cpu".format(count + 3): -0.11
        }

        for op in model.net.Proto().op:
            # Check the proto that all weights are decayed and not non-weights
            # are decayed.
            if op.type == 'WeightedSum' and 'wd_0_0' in op.input:
                if op.output[0] not in expected_weight_grad:
                    print(
                        "Unexpected param for weight_decay: {}".
                        format(op.output[0])
                    )
                self.assertTrue(op.output[0] in expected_weight_grad)
                expected_weight_grad.remove(op.output[0])
            # Check the learning rate for each parameter
            if op.type == 'LearningRate':
                val = 0
                for arg in op.arg:
                    if arg.name == 'base_lr':
                        val = arg.f
                self.assertAlmostEqual(
                    val,
                    expected_learning_rate[op.output[0]]
                )

        self.assertEqual(
            expected_weight_grad,
            set(),
            "Not all weights were decayed: {}".format(expected_weight_grad)
        )
コード例 #7
0
def add_optmzer_lossfunc(model, softmax, label):
    cross_entropy = model.LabelCrossEntropy([softmax, label], 'cross_entropy')
    loss = model.AveragedLoss(cross_entropy, "loss")
    model.AddGradientOperators([loss])  # look at documentation
    optimizer.build_sgd(
        model,
        base_learning_rate=0.01,
    )
コード例 #8
0
ファイル: char_rnn.py プロジェクト: Ralfhund/caffe2
    def CreateModel(self):
        log.debug("Start training")
        model = model_helper.ModelHelper(name="char_rnn")

        input_blob, seq_lengths, hidden_init, cell_init, target = \
            model.net.AddExternalInputs(
                'input_blob',
                'seq_lengths',
                'hidden_init',
                'cell_init',
                'target',
            )

        hidden_output_all, self.hidden_output, _, self.cell_state = LSTM(
            model, input_blob, seq_lengths, (hidden_init, cell_init),
            self.D, self.hidden_size, scope="LSTM")
        output = brew.fc(
            model,
            hidden_output_all,
            None,
            dim_in=self.hidden_size,
            dim_out=self.D,
            axis=2
        )

        # axis is 2 as first two are T (time) and N (batch size).
        # We treat them as one big batch of size T * N
        softmax = model.net.Softmax(output, 'softmax', axis=2)

        softmax_reshaped, _ = model.net.Reshape(
            softmax, ['softmax_reshaped', '_'], shape=[-1, self.D])

        # Create a copy of the current net. We will use it on the forward
        # pass where we don't need loss and backward operators
        self.forward_net = core.Net(model.net.Proto())

        xent = model.net.LabelCrossEntropy([softmax_reshaped, target], 'xent')
        # Loss is average both across batch and through time
        # Thats why the learning rate below is multiplied by self.seq_length
        loss = model.net.AveragedLoss(xent, 'loss')
        model.AddGradientOperators([loss])

        # use build_sdg function to build an optimizer
        build_sgd(
            model,
            base_learning_rate=0.1 * self.seq_length,
            policy="step",
            stepsize=1,
            gamma=0.9999
        )

        self.model = model
        self.predictions = softmax
        self.loss = loss

        self.prepare_state = core.Net("prepare_state")
        self.prepare_state.Copy(self.hidden_output, hidden_init)
        self.prepare_state.Copy(self.cell_state, cell_init)
コード例 #9
0
 def add_optimizer(model):
     stepsz = int(30 * args.epoch_size / total_batch_size / num_shards)
     optimizer.build_sgd(model,
                         args.base_learning_rate,
                         momentum=0.9,
                         nesterov=1,
                         policy="step",
                         stepsize=stepsz,
                         gamma=0.1)
コード例 #10
0
def AddOptimizerOps(model):
    """Add optimizer ops."""
    optimizer.build_sgd(model,
                        0.01,
                        policy='step',
                        stepsize=1,
                        gamma=0.999,
                        momentum=0.9,
                        nesterov=False)
コード例 #11
0
def AddOptimizerOps(model):
    """Add optimizer ops."""
    optimizer.add_weight_decay(model, 0.004)
    stepsize = TRAIN_ENTRIES * EPOCHS // BATCH_SIZE
    optimizer.build_sgd(model,
                        0.001,
                        policy='step',
                        stepsize=stepsize,
                        gamma=0.1,
                        momentum=0.9,
                        nesterov=False)
コード例 #12
0
def AddTrainingOperators(model, softmax, label):
    xent = model.LabelCrossEntropy([softmax, label], 'xent')
    # Compute the expected loss
    loss = model.AveragedLoss(xent, "loss")
    # Use the average loss we just computed to add gradient operators to the model
    model.AddGradientOperators([loss])
    # Use stochastic gradient descent as optimization function
    optimizer.build_sgd(model,
                        base_learning_rate=0.01,
                        policy="fixed",
                        momentum=0.9,
                        weight_decay=0.004)
コード例 #13
0
def AddTrainingOperators(model, softmax, label):
    xent = model.LabelCrossEntropy([softmax, label], 'xent')
    loss = model.AveragedLoss(xent, "loss")

    model.AddGradientOperators([loss])
    optimizer.build_sgd(
        model,
        base_learning_rate=0.1,
        policy="step",
        stepsize=10,
        gamma=0.999,
    )
コード例 #14
0
def AddTrainingOperators(model, softmax, label):
    """Adds training operators to the model."""
    xent = model.LabelCrossEntropy([softmax, label], "xent", use_cudnn=False)
    # compute the expected loss
    loss = model.AveragedLoss(xent, "loss", use_cudnn=False)
    # track the accuracy of the model
    AddAccuracy(model, softmax, label)
    # use the average loss we just computed to add gradient operators to the
    # model
    model.AddGradientOperators([loss])
    optimizer.build_sgd(
        model, base_learning_rate=0.1, policy="step", stepsize=1, gamma=0.999
    )
コード例 #15
0
ファイル: cifar01.py プロジェクト: bbclr20/Caffe2-Examples
def AddTrainingOperators(model, softmax, label, save_png=True):
    xent = model.LabelCrossEntropy([softmax, label], 'xent')
    loss = model.AveragedLoss(xent, "loss")

    model.AddGradientOperators([loss])
    optimizer.build_sgd(
        model,
        base_learning_rate=0.1,
        policy="step",
        stepsize=10,
        gamma=0.999,
    )

    if save_png:
        graph = net_drawer.GetPydotGraph(model.net, rankdir="LR")
        graph.write_png("CIFAR10_with_Grad.png")
コード例 #16
0
def add_training_operators(model, last_out, device_opts):

    with core.DeviceScope(device_opts):

        softmax, loss = add_softmax_with_loss(model, last_out, device_opts)
        accuracy = add_accuracy(model, softmax, device_opts)

        model.AddGradientOperators([loss])

        opt = optimizer.build_sgd(
            model,
            base_learning_rate=0.1,
            policy="step",
            stepsize=50000 * 80 // args.batch_size,
            weight_decay=1e-4,
            momentum=0.9,
            gamma=0.1,
            nesterov=1,
        )
        # [Optional] feel free to use adam or other optimizers
        # opt = optimizer.build_adam(
        #     model,
        #     base_learning_rate=1e-3,
        #     weight_decay=1e-4,
        #     )
        return opt
コード例 #17
0
ファイル: MNIST.py プロジェクト: Yangqing/caffe2
def AddTrainingOperators(model, softmax, label):
    """Adds training operators to the model."""
    xent = model.LabelCrossEntropy([softmax, label], 'xent')
    # compute the expected loss
    loss = model.AveragedLoss(xent, "loss")
    # track the accuracy of the model
    AddAccuracy(model, softmax, label)
    # use the average loss we just computed to add gradient operators to the model
    model.AddGradientOperators([loss])
    optimizer.build_sgd(
        model,
        base_learning_rate=0.1,
        policy="step",
        stepsize=1,
        gamma=0.999,
    )
コード例 #18
0
def AddTrainingOperators(model, softmax, label):
    """Adds training operators to the model."""
    xent = model.LabelCrossEntropy([softmax, label], 'xent')
    # compute the expected loss
    loss = model.AveragedLoss(xent, "loss")
    # track the accuracy of the model
    model_defs.AddAccuracy(model, softmax, label)
    # use the average loss we just computed to add gradient operators to the model
    model.AddGradientOperators([loss])
    # do a simple stochastic gradient descent
    optimizer.build_sgd(
        model,
        base_learning_rate=0.1,
        policy="step",
        stepsize=1,
        gamma=0.999,
    )
コード例 #19
0
 def add_optimizer(model):
     return optimizer.build_sgd(
         model,
         0.1,
         policy="fixed",
         max_gradient_norm=5.0,
         allow_lr_injection=True,
     )
コード例 #20
0
 def add_optimizer(model):
     return optimizer.build_sgd(
         model,
         0.1,
         policy="fixed",
         max_gradient_norm=5.0,
         allow_lr_injection=True,
     )
コード例 #21
0
def AddTrainingOperators(model, softmax, label):
    '''优化参数,训练模型.
	参数:
		model: 模型结构
		softmax: 分类数据
		label: 图像标签
	返回:
		None
	'''
    xent = model.LabelCrossEntropy([softmax, label], 'xent')
    loss = model.AveragedLoss(xent, 'loss')
    AddAccuracy(model, softmax, label)
    model.AddGradientOperators([loss])
    optimizer.build_sgd(model,
                        base_learning_rate=0.1,
                        policy="step",
                        stepsize=1,
                        gamma=0.999)
コード例 #22
0
def ScaffoldModelTrainingOperators(model, softmax, label, learningRate, devOps=None):
	# with core.DeviceScope(core.DeviceOption(c2p2.PROTO_CUDA, 0)):
	xent = model.LabelCrossEntropy([softmax, label], "xent")
	loss = model.AveragedLoss(xent, "loss")
	ScaffoldModelAccuracyMeter(model, softmax, label)
	model.AddGradientOperators([loss])
	opt = optimizer.build_sgd(model, base_learning_rate=learningRate)
	for param in model.GetOptimizationParamInfo():
		opt(model.net, model.param_init_net, param)
コード例 #23
0
def add_training_operators(softmax, model, device_opts) :

    with core.DeviceScope(device_opts):
        xent = model.LabelCrossEntropy([softmax, "label"], 'xent')
        loss = model.AveragedLoss(xent, "loss")
        brew.accuracy(model, [softmax, "label"], "accuracy")

        model.AddGradientOperators([loss])
        opt = optimizer.build_sgd(model, base_learning_rate=0.01, policy="step", stepsize=1, gamma=0.999)  # , momentum=0.9
コード例 #24
0
def AddTrainingOperators(model, softmax, label, device_opts):
    with core.DeviceScope(device_opts):
        xent = model.LabelCrossEntropy([softmax, label], 'xent')
        # Compute the expected loss
        loss = model.AveragedLoss(xent, "loss")
        brew.accuracy(model, [softmax, label], "accuracy")
        # Use the average loss we just computed to add gradient operators to the model
        model.AddGradientOperators([loss])
        # Use SGD optimizer
        optimizer.build_sgd(
            model,
            base_learning_rate=0.1,
            weight_decay=1e-5,
            gamma=0.999, 
            policy='step', 
            stepsize=50,
            nesterov=1,
        )
コード例 #25
0
def AddTrainingOperators(model, softmax, label):
    """Adds training operators to the model."""
    # Compute cross entropy between softmax scores and labels
    xent = model.LabelCrossEntropy([softmax, label], 'xent')
    # Compute the expected loss
    loss = model.AveragedLoss(xent, "loss")
    # Track the accuracy of the model
    AddAccuracy(model, softmax, label)
    # Use the average loss we just computed to add gradient operators to the model
    model.AddGradientOperators([loss])
    # Specify the optimization algorithm
    optimizer.build_sgd(
        model,
        base_learning_rate=0.1,
        policy="step",
        stepsize=1,
        gamma=0.999,
    )
コード例 #26
0
def ScaffoldModelBackpropagation(model, softmax, label, learningRate):
	# loss function - tells imageSizeow wrong the prediction was
	crossEntropy = model.LabelCrossEntropy([softmax, label], 'cross_entropy')
	# expected loss (how to find out more on this step)
	loss = model.AveragedLoss(crossEntropy, 'loss')
	ScaffoldModelAccuracyMeter(model, softmax, label)
	# add gradient operator used for backpropagation
	model.AddGradientOperators([loss])
	# lastly construct stochastic gradient descent for learning
	optimizer.build_sgd(
		model,
		base_learning_rate=learningRate,
		policy='step',
		stepsize=1,
		gamma=0.999
		# momentum=0.9,
		# weight_decay=0.004
	)
コード例 #27
0
ファイル: c2.py プロジェクト: shunting314/tmlf
def main(opt_name):
    workspace.FeedBlob('input', np.random.randn(2, 16).astype(np.float32))
    workspace.FeedBlob('label', np.array([0, 1]).astype(np.float32))

    helper = ModelHelper("sample_model")
    fc = brew.fc(helper, "input", "fc", dim_in=16, dim_out=8)
    relu = helper.Relu(fc, 'relu')
    fc2 = brew.fc(helper, relu, "fc2", dim_in=8, dim_out=1)
    label_ex = helper.ExpandDims("label", "label_ex", dims=[1])
    xent = helper.SigmoidCrossEntropyWithLogits([fc2, label_ex], 'xent')
    loss = helper.AveragedLoss(xent, 'loss')
    helper.AddGradientOperators([loss])

    if opt_name == "manual":
        ONE = helper.param_init_net.ConstantFill([],
                                                 "ONE",
                                                 shape=[1],
                                                 value=1.0)
        LR = helper.param_init_net.ConstantFill([],
                                                "LR",
                                                shape=[1],
                                                value=-0.03)

        for param in helper.params:
            param_grad = helper.param_to_grad[param]
            helper.WeightedSum([param, ONE, param_grad, LR], param)
    elif opt_name == "sgd":
        optimizer.build_sgd(helper, 0.03)
    elif opt_name == "adagrad":
        optimizer.build_adagrad(helper, 0.03)
    # caffe2 does not support rowwise adagrad for dense parameters
    # caffe2 seems not have lamb support yet
    elif opt_name == "adam":
        optimizer.build_adam(helper, 0.03)
    else:
        assert False, f"Unsupported optimizer {opt_name}"

    workspace.RunNetOnce(helper.param_init_net)
    workspace.RunNetOnce(helper.net)

    import pdb
    pdb.set_trace()
コード例 #28
0
def add_training_operators(softmax, m, device_opts) :
    with core.DeviceScope(device_opts):
        xent = m.LabelCrossEntropy([softmax, "label"], 'xent')
        loss = m.AveragedLoss(xent, "loss")
        #brew.accuracy(m, [softmax, "label"], "accuracy")
        m.AddGradientOperators([loss])
        opt = optimizer.build_sgd(
            m,
            base_learning_rate=LR, 
            policy='fixed',
            momentum=MOMENTUM)
コード例 #29
0
def add_optimizer(model):
    stepsz = int(60 * config.TRAIN_IMAGES / args.batch_size / args.gpus)
    return optimizer.build_sgd(
        model,
        base_learning_rate=args.learning_rate,
        policy="step",
        stepsize=stepsz,
        gamma=0.1,
        weight_decay=1e-4,
        momentum=0.9,
        nesterov=1,
    )
コード例 #30
0
def create_train_model(data_folder):
    """Create model for training with MNIST train dataset."""

    # Create the model helper for the train model
    train_model = model_helper.ModelHelper(name="mnist_lenet_train_model")

    # Specify the input is from the train lmdb
    data, label = add_model_inputs(
        train_model,
        batch_size=64,
        db=os.path.join(data_folder, "mnist-train-nchw-lmdb"),
        db_type="lmdb",
    )

    # Build the LeNet-5 network
    softmax_layer = build_mnist_lenet(train_model, data)

    # Compute cross entropy between softmax scores and labels
    cross_entropy = train_model.LabelCrossEntropy([softmax_layer, label],
                                                  "cross_entropy")

    # Compute the expected loss
    loss = train_model.AveragedLoss(cross_entropy, "loss")

    # Use the average loss we just computed to add gradient operators to the model
    train_model.AddGradientOperators([loss])

    # Specify the optimization algorithm
    optimizer.build_sgd(
        train_model,
        base_learning_rate=0.1,
        policy="step",
        stepsize=1,
        gamma=0.999,
    )

    # Track the accuracy of the model
    add_accuracy_op(train_model, softmax_layer, label)

    return train_model
コード例 #31
0
def AddTrainingOperators(model, loss):
    model.AddGradientOperators([loss])
    optimizer.add_weight_decay(model, 5e-4)
    stepsz = int(10 * 60000 / 128)
    opt = optimizer.build_sgd(model, 
        base_learning_rate=0.01, 
        policy="step", 
        stepsize=stepsz, 
        gamma=0.1, 
        momentum=0.9)
    # opt = optimizer.build_yellowfin(model)

    return opt
コード例 #32
0
def AddTrainingOperators(model, softmax):
    # calculate Loss
    xent = model.LabelCrossEntropy([softmax, 'label'])
    loss = model.AveragedLoss(xent, "loss")
    # calculate Accuracy
    AddAccuracy(model, softmax)
    # Add loss to gradient for backpropogation
    model.AddGradientOperators([loss])
    # Init SGD optimizer solver
    opt = optimizer.build_sgd(model,
                              base_learning_rate=0.1,
                              policy="step",
                              stepsize=1,
                              gamma=0.999)
コード例 #33
0
ファイル: optimizer_test.py プロジェクト: Sissipei/caffe2
    def test_weight_decay(self):
        from caffe2.python import brew
        from caffe2.python.model_helper import ModelHelper

        model = ModelHelper(name="test", arg_scope={'order': 'NCHW'})
        cnv = brew.conv(model, 'data', 'cnv', 32, 32, 4)
        a = brew.fc(model, cnv, 'a', 100, 200)
        pred = brew.fc(model, a, 'b', 200, 5)
        (softmax, loss) = model.SoftmaxWithLoss(
            [pred, 'label'],
            ['softmax', 'loss'],
        )
        model.AddGradientOperators([loss])

        add_weight_decay(model, weight_decay=1e-4)
        build_sgd(model, 0.11)

        expected_weight_grad = {'b_w_grad', 'a_w_grad', 'cnv_w_grad'}

        # Check the proto that all weights are decayed and not non-weights
        # are decayed.
        for op in model.net.Proto().op:
            if op.type == 'WeightedSum' and 'wd_0_0' in op.input:
                if op.output[0] not in expected_weight_grad:
                    print(
                        "Unexpected param for weight_decay: {}".
                        format(op.output[0])
                    )
                self.assertTrue(op.output[0] in expected_weight_grad)
                expected_weight_grad.remove(op.output[0])

        self.assertEqual(
            expected_weight_grad,
            set(),
            "Not all weights were decayed: {}".format(expected_weight_grad)
        )
コード例 #34
0
    def add_optimizer(model):
        """
        Optimizer function called once for the entire model, as opposed for each 
        CPU / GPU individually. The optimizer will be a stepwise weight decay.

        :return: return the optimizer
        """
        stepsz = int(30 * args.epoch_size / args.batch_size / args.num_shards)
        stepsz = stepsz if stepsz else 100

        optimizer.add_weight_decay(model, 1e-4)
        # opt = optimizer.build_multi_precision_sgd(

        opt = optimizer.build_sgd(model,
                                  args.base_learning_rate,
                                  momentum=0.9,
                                  nesterov=1,
                                  policy="step",
                                  stepsize=stepsz,
                                  gamma=0.1)
        return opt
コード例 #35
0
ファイル: optimizer_test.py プロジェクト: aimsky/caffe2
 def build_optimizer(self, model, **kwargs):
     self._skip_gpu = False
     return build_sgd(model, base_learning_rate=0.1, **kwargs)
コード例 #36
0
ファイル: Toy_Regression.py プロジェクト: Yangqing/caffe2
# #### Add the training operators and prime the workspace
# 
# In this **very important** step, we specify the loss function, setup the SGD training algorithm, prime and initialize the workspace, and initialize our model's weights and biases.

# In[5]:


# The loss function is computed by a squared L2 distance, 
#   and then averaged over all items.
dist = regression_model.SquaredL2Distance(['Y_gt', y_pred], "dist")
loss = regression_model.AveragedLoss(dist, "loss")

# Add the gradient operators and setup the SGD algorithm
regression_model.AddGradientOperators([loss])
optimizer.build_sgd(regression_model, base_learning_rate=learning_rate)

# Prime the workspace with some data
workspace.FeedBlob("Y_gt",Y_gt.astype(np.float32))
workspace.FeedBlob("X",X.astype(np.float32))

# Run the init net to prepare the workspace then create the net
workspace.RunNetOnce(regression_model.param_init_net)
workspace.CreateNet(regression_model.net)

# Inject our desired initial weights and bias
workspace.FeedBlob("y_pred_w",np.array([initial_weights]).astype(np.float32))
workspace.FeedBlob("y_pred_b",np.array([0.]).astype(np.float32))


# #### Run the training
コード例 #37
0
 def add_optimizer(model):
     optimizer.build_sgd(model, 0.1)
コード例 #38
0
 def add_optimizer(model):
     return optimizer.build_sgd(model, 0.1, policy="fixed")
コード例 #39
0
 def add_optimizer(model):
     optimizer.build_sgd(model, 0.1, policy="fixed", momentum=0.9)
コード例 #40
0
ファイル: optimizer_test.py プロジェクト: Sissipei/caffe2
 def build_optimizer(self, model, **kwargs):
     self._skip_gpu = False
     return build_sgd(model, base_learning_rate=0.1, **kwargs)