Exemple #1
0
def AddTrainingOperators(model, mbox_layers, gt_label):
    MultiboxInput = mbox_layers
    # MultiboxInput.append(gt_label)

    mbox_loc_cpu = model.net.CopyGPUToCPU(mbox_layers[0], 'mbox_loc_cpu')
    mbox_conf_cpu = model.net.CopyGPUToCPU(mbox_layers[1], 'mbox_conf_cpu')

    loc_pred, loc_gt, conf_pred, conf_gt = model.net.MultiboxLoss(
        [mbox_loc_cpu, mbox_conf_cpu, mbox_layers[2], gt_label],
        ['loc_pred', 'loc_gt', 'conf_pred', 'conf_gt'])
    # loc_pred,loc_gt,conf_pred,conf_gt=model.net.MultiboxLoss(
    # [mbox_layers[0], mbox_layers[1], mbox_layers[2],gt_label],['loc_pred','loc_gt','conf_pred','conf_gt']
    # )
    loc_pred_gpu = model.net.CopyCPUToGPU(loc_pred, 'loc_pred_gpu')
    loc_gt_gpu = model.net.CopyCPUToGPU(loc_gt, 'loc_gt_gpu')
    conf_pred_gpu = model.net.CopyCPUToGPU(conf_pred, 'conf_pred_gpu')
    conf_gt_gpu = model.net.CopyCPUToGPU(conf_gt, 'conf_gt_gpu')
    SmoothL1Loss = model.net.SmoothL1Loss([loc_pred_gpu, loc_gt_gpu],
                                          'SmoothL1Loss')
    P, SoftmaxWithLoss = model.net.SoftmaxWithLoss(
        [conf_pred_gpu, conf_gt_gpu], ["P", "SoftmaxWithLoss"])
    model.AddGradientOperators([SmoothL1Loss, SoftmaxWithLoss])

    ITER = brew.iter(model, "iter")
    #  ITER = model.param_init_net.ConstantFill([],'ITER',shape=[1],value=0,dtype=core.DataType.INT32)
    LR = model.LearningRate(ITER,
                            "LR",
                            base_lr=-0.001,
                            policy="step",
                            stepsize=80000,
                            gamma=0.1)
    ONE = model.param_init_net.ConstantFill([], "ONE", shape=[1], value=1.0)
    for param in model.params:
        param_grad = model.param_to_grad[param]
        model.WeightedSum([param, ONE, param_grad, LR], param)
def AddCheckpoints(model, checkpoint_iters, db_type):
    ITER = brew.iter(model, "iter")
    model.Checkpoint([ITER] + model.params, [],
                     db=os.path.join(unique_timestamp,
                                     "action_tufts_checkpoint_%05d.lmdb"),
                     db_type="lmdb",
                     every=checkpoint_iters)
Exemple #3
0
    def add_parameter_update_ops(model):
        brew.add_weight_decay(model, weight_decay)
        iter = brew.iter(model, "iter")
        lr = model.net.LearningRate(
            [iter],
            "lr",
            base_lr=base_learning_rate,
            policy="step",
            stepsize=stepsize,
            gamma=0.1,
        )
        for param in model.GetParams():
            param_grad = model.param_to_grad[param]
            param_momentum = model.param_init_net.ConstantFill(
                [param], param + '_momentum', value=0.0
            )

            # Update param_grad and param_momentum in place
            model.net.MomentumSGDUpdate(
                [param_grad, param_momentum, lr, param],
                [param_grad, param_momentum, param],
                # almost 100% but with room to grow
                momentum=0.9,
                # netsterov is a defenseman for the Montreal Canadiens, but
                # Nesterov Momentum works slightly better than standard momentum
                nesterov=1,
            )
Exemple #4
0
def AddTrainingOperators(model, output, label):
    loss = model.SquaredL2Distance([output, label], "loss")
    avgloss = model.AveragedLoss([loss], "avgloss")
    model.AddGradientOperators([avgloss])
    ITER = brew.iter(model, "iter")
    stepsize = int(train_iters / 2000)
    if (stepsize > 30):
        stepsize = int(30 + (stepsize - 30) / 5)
    if stepsize < 1:
        stepsize = 1
    assert (LEARN_RATE < 0)
    LR = model.LearningRate(ITER,
                            "LR",
                            base_lr=LEARN_RATE,
                            policy="step",
                            stepsize=stepsize,
                            gamma=0.9995)
    ONE = model.param_init_net.ConstantFill([], "ONE", shape=[1], value=1.0)
    for param in model.params:
        param_grad = model.param_to_grad[param]
        model.WeightedSum([param, ONE, param_grad, LR], param)
    model.Checkpoint([ITER] + model.params, [],
                     db="checkpoint_%06d.lmdb",
                     db_type="lmdb",
                     every=10000)
Exemple #5
0
def AddTrainingOperators(model):
    """
    opt = optimizer.build_sgd(model, base_learning_rate=1e-5, policy="step", stepsize=1, gamma=0.999, momentum=0.9)
#    model.AddWeightDecay(1e-4)
    """
    #    brew.add_weight_decay(model, 1e-4)
    ITER = brew.iter(model, "iter")
    #    ITER = model.Iter("iter")
    LR = model.LearningRate(ITER,
                            "LR",
                            base_lr=0.01,
                            policy="step",
                            stepsize=1,
                            gamma=0.999,
                            momentum=0.9)
    ONE = model.param_init_net.ConstantFill([], "ONE", shape=[1], value=1.0)
    for param in model.GetParams():
        param_grad = model.param_to_grad[param]
        param_momentum = model.param_init_net.ConstantFill([param],
                                                           param + '_momentum',
                                                           value=0.0)
        model.net.MomentumSGDUpdate(
            [param_grad, param_momentum, LR, param],
            [param_grad, param_momentum, param],
            momentum=0.9,
            nesterov=1,
        )
    return
Exemple #6
0
def add_loss_and_backpropagation(model, probs, labels):
    # define cross-entropy
    x_entropy = model.LabelCrossEntropy([probs, labels], 'x_entropy')
    # compute the expected loss
    loss = model.AveragedLoss(x_entropy, "loss")
    # use the average loss we just computed to add gradient operators to the model
    model.AddGradientOperators([loss])
    # a counter
    ITER = brew.iter(model, "ITER")
    # set the learning rate schedule, stepsize is iteration size, gamma is simply lr * gamma
    LR = model.LearningRate(ITER,
                            "LR",
                            base_lr=-0.01,
                            policy="step",
                            stepsize=4,
                            gamma=0.999)
    #regularization
    #brew.add_weight_decay(model, 0.001)
    # ONE is a constant value that is used in the gradient update. We only need
    # to create it once, so it is explicitly placed in param_init_net.
    ONE = model.param_init_net.ConstantFill([], "ONE", shape=[1], value=1.0)
    # Now, for each parameter, we do the gradient updates.
    for param in model.params:
        # Note how we get the gradient of each parameter - ModelHelper keeps
        # track of that.
        param_grad = model.param_to_grad[param]
        # The update is a simple weighted sum: param = param + param_grad * LR
        model.WeightedSum([param, ONE, param_grad, LR], param)
    '''
def add_parameter_update_ops(model):
    brew.add_weight_decay(model, weight_decay)
    iter = brew.iter(model, "iter")
    lr = model.net.LearningRate(
        [iter],
        "lr",
        base_lr=base_learning_rate,
        policy="step",
        stepsize=stepsize,
        gamma=0.1,
    )
    for param in model.GetParams():
        param_grad = model.param_to_grad[param]
        param_momentum = model.param_init_net.ConstantFill(
            [param], param + '_momentum', value=0.0
        )

        # Update param_grad and param_momentum in place
        model.net.MomentumSGDUpdate(
            [param_grad, param_momentum, lr, param],
            [param_grad, param_momentum, param],
            # almost 100% but with room to grow
            momentum=0.9,
            # netsterov is a defenseman for the Montreal Canadiens, but
            # Nesterov Momentum works slightly better than standard momentum
            nesterov=1,
        )
Exemple #8
0
def AddTrainingOperators(model, softmax, label):
    """Adds training operators to the model."""
    xent = model.LabelCrossEntropy([softmax, label], 'xent')
    # compute the expected loss
    loss = model.AveragedLoss(xent, "loss")
    # track the accuracy of the model
    AddAccuracy(model, softmax, label)
    # use the average loss we just computed to add gradient operators to the model
    model.AddGradientOperators([loss])
    # do a simple stochastic gradient descent
    ITER = brew.iter(model, "iter")
    # set the learning rate schedule
    LR = model.LearningRate(ITER,
                            "LR",
                            base_lr=-0.1,
                            policy="step",
                            stepsize=1,
                            gamma=0.999)
    # ONE is a constant value that is used in the gradient update. We only need
    # to create it once, so it is explicitly placed in param_init_net.
    ONE = model.param_init_net.ConstantFill([], "ONE", shape=[1], value=1.0)
    # Now, for each parameter, we do the gradient updates.
    for param in model.params:
        # Note how we get the gradient of each parameter - ModelHelper keeps
        # track of that.
        param_grad = model.param_to_grad[param]
        # The update is a simple weighted sum: param = param + param_grad * LR
        model.WeightedSum([param, ONE, param_grad, LR], param)
Exemple #9
0
def add_check_points(model, time_stamp, checkpoint_iters, db_type):
    ITER = brew.iter(model, "iter")
    model.Checkpoint([ITER] + model.params, [],
                     db=os.path.join(time_stamp,
                                     "cifar10_checkpoint_%05d.lmdb"),
                     db_type=db_type,
                     every=checkpoint_iters)
Exemple #10
0
def AddCheckpoints(model, checkpoint_iters, db_type):
    ITER = brew.iter(train_model, "iter")
    train_model.Checkpoint([ITER] + train_model.params, [],
                           db=os.path.join(unique_timestamp,
                                           "cifar10_checkpoint_%05d.lmdb"),
                           db_type="lmdb",
                           every=checkpoint_iters)
def build_conv_model(model_name, batch_size):
    model_gen_map = conv_model_generators()
    assert model_name in model_gen_map, "Model " + model_name + " not found"
    model, input_size = model_gen_map[model_name]("NCHW", None)

    input_shape = [batch_size, 3, input_size, input_size]
    if model_name == "MLP":
        input_shape = [batch_size, input_size]

    model.param_init_net.GaussianFill(
        [],
        "data",
        shape=input_shape,
        mean=0.0,
        std=1.0
    )
    model.param_init_net.UniformIntFill(
        [],
        "label",
        shape=[batch_size, ],
        min=0,
        max=999
    )

    model.AddGradientOperators(["loss"])

    ITER = brew.iter(model, "iter")
    LR = model.net.LearningRate(
        ITER, "LR", base_lr=-1e-8, policy="step", stepsize=10000, gamma=0.999)
    ONE = model.param_init_net.ConstantFill([], "ONE", shape=[1], value=1.0)
    for param in model.params:
        param_grad = model.param_to_grad[param]
        model.net.WeightedSum([param, ONE, param_grad, LR], param)

    return model
def ScaffoldModelCheckpoints(model, checkpointFolder, every):
	newCheckpointFolder = str(datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
	newCheckpointFolder = join(checkpointFolder, newCheckpointFolder)

	print 'check point folder: ', newCheckpointFolder
	makedirs(newCheckpointFolder)

	iter = brew.iter(model, 'iterations')
	model.Checkpoint([iter] + model.params, [], db=join(newCheckpointFolder, 'dataset_checkpoint_%05d.lmdb'), db_type='lmdb', every=every)
def AddParameterUpdate(model):
    """ Simple plain SGD update -- not tuned to actually train the models """
    ITER = brew.iter(model, "iter")
    LR = model.net.LearningRate(
        ITER, "LR", base_lr=-1e-8, policy="step", stepsize=10000, gamma=0.999)
    ONE = model.param_init_net.ConstantFill([], "ONE", shape=[1], value=1.0)
    for param in model.params:
        param_grad = model.param_to_grad[param]
        model.net.WeightedSum([param, ONE, param_grad, LR], param)
def AddParameterUpdate(model):
    """ Simple plain SGD update -- not tuned to actually train the models """
    ITER = brew.iter(model, "iter")
    LR = model.LearningRate(
        ITER, "LR", base_lr=-1e-8, policy="step", stepsize=10000, gamma=0.999)
    ONE = model.param_init_net.ConstantFill([], "ONE", shape=[1], value=1.0)
    for param in model.params:
        param_grad = model.param_to_grad[param]
        model.WeightedSum([param, ONE, param_grad, LR], param)
Exemple #15
0
    def add_parameter_update_ops(model):
        """A simple parameter update code.

        :param model_helper.ModelHelper model: Model to add update parameters operators for.
        """
        iteration = brew.iter(model, "ITER")
        learning_rate = model.net.LearningRate([iteration], "LR", base_lr=0.01, policy="fixed")
        one = model.param_init_net.ConstantFill([], "ONE", shape=[1], value=1.0)
        for param in model.GetParams():
            grad = model.param_to_grad[param]
            model.WeightedSum([param, one, grad, learning_rate], param)
Exemple #16
0
def AddTrainingOperators(model, softmax, label):
    """Adds training operators to the model."""

    #labelcrossetropy usado para calcular el error de las predicciones
    xent = model.LabelCrossEntropy([softmax, label], 'xent')
    # compute the expected loss -> con la ayuda de crossentropy xent
    loss = model.AveragedLoss(xent, "loss")
    # track the accuracy of the model -> para llevar el siguimiento de nuestro modelo
    AddAccuracy(model, softmax, label)

    # use the average loss we just computed to add gradient operators to the model
    # se usa el loss porque es al que le queremos hacer el gradiente para minimizarlo
    '''
	Inputs:
	ys: a list or a dictionary specifying what blobs we want to compute
	  derivatives of. If the input is a list, we will automatically
	  generate their gradients with all-one values; if the input is a
	  dictionary, for any dictionary entries that are not None, we will
	  take the corresponding blobs as their gradients; for all those
	  that are None, we will auto-fill them with 1.
	'''
    model.AddGradientOperators([loss])

    # do a simple stochastic gradient descent
    #Stores a single integer, that gets incremented on each call to Run().
    #Useful for tracking the iteration count during SGD, for example.
    ITER = brew.iter(model, "iter")
    # set the learning rate schedule
    LR = model.LearningRate(ITER,
                            "LR",
                            base_lr=-0.1,
                            policy="step",
                            stepsize=1,
                            gamma=0.999)
    # ONE is a constant value that is used in the gradient update. We only need
    # to create it once, so it is explicitly placed in param_init_net.
    ONE = model.param_init_net.ConstantFill([], "ONE", shape=[1], value=1.0)
    # Now, for each parameter, we do the gradient updates.
    for param in model.params:
        # Note how we get the gradient of each parameter - ModelHelper keeps
        # track of that.
        param_grad = model.param_to_grad[param]
        # The update is a simple weighted sum: param = param + param_grad * LR
        model.WeightedSum([param, ONE, param_grad, LR], param)
    '''
	We will need to checkpoint the parameters of the model periodically. 
	This is achieved via the Checkpoint operator. 
	It also takes in a parameter every so that we dont checkpoint way too often. 
	In this case, we will say lets checkpoint every 20 iterations, which should probably be fine.
	'''
    model.Checkpoint([ITER] + model.params, [],
                     db="mnist_lenet_checkpoint_%05d.lmdb",
                     db_type="lmdb",
                     every=20)
Exemple #17
0
 def add_parameter_update_ops(model):
     brew.add_weight_decay(model, args.weight_decay)
     ITER = brew.iter(model, "ITER")
     stepsz = int(30 * args.epoch_size / total_batch_size / num_shards)
     LR = model.net.LearningRate(
         [ITER],
         "LR",
         base_lr=args.base_learning_rate,
         policy="step",
         stepsize=stepsz,
         gamma=0.1,
     )
     AddMomentumParameterUpdate(model, LR)
Exemple #18
0
def AddTrainingParameters(model, softmax, label):
    xent = model.LabelCrossEntropy([softmax, label], 'xent')
    loss = model.AveragedLoss(xent, "loss")
    AddAccuracy(model, softmax, label)
    model.AddGradientOperators([loss])
    ITER = brew.iter(model, "iter")
    LR = model.LearningRate(ITER,
                            "LR",
                            base_lr=-0.1,
                            policy="step",
                            stepsize=1,
                            gamma=0.999)
    ONE = model.param_init_net.ConstantFill([], "ONE", shape=[1], value=1.0)

    for param in model.params:
        param_grad = model.param_to_grad[param]
        model.WeightedSum([param, ONE, param_grad, LR], param)
def add_parameter_update_ops_resnet(model, base_learning_rate, weight_decay):
        brew.add_weight_decay(model, weight_decay)
        iter = brew.iter(model, "iter")
        lr = model.net.LearningRate([iter],
                                    "lr",
                                    base_lr=base_learning_rate,
                                    policy="fixed",
                                    gamma=0.1)
        for param in model.GetParams():
            param_grad = model.param_to_grad[param]
            param_momentum = model.param_init_net.ConstantFill(
                [param], param + '_momentum', value=0.0 )

            model.net.MomentumSGDUpdate(
                [param_grad, param_momentum, lr, param],
                [param_grad, param_momentum, param],
                momentum=0.9,
                nesterov=1)
Exemple #20
0
def AddTrainingOperators(model, softmax, label):
    """Adds training operators to the model."""
    xent = model.LabelCrossEntropy([softmax, label], 'xent')
    # compute the expected loss
    loss = model.AveragedLoss(xent, "loss")
    # track the accuracy of the model
    AddAccuracy(model, softmax, label)
    # use the average loss we just computed to add gradient operators to the model
    model.AddGradientOperators([loss])
    ITER = brew.iter(model, "iter")
    LR = model.net.LearningRate(ITER,
                                "LR",
                                base_lr=-0.1,
                                policy="step",
                                stepsize=1,
                                gamma=0.999)
    ONE = model.param_init_net.ConstantFill([], "ONE", shape=[1], value=1.0)
    for param in model.params:
        param_grad = model.param_to_grad[param]
        model.net.WeightedSum([param, ONE, param_grad, LR], param)
Exemple #21
0
 def add_parameter_update(model):
     """
     Add a simple gradient based parameter update with stepwise adaptive learning rate.
     """
     # This counts the number if iterations we are making
     ITER = brew.iter(model, "iter")
     # Adds a LR to the model, updated using a simple step policy every 10k steps; gamma is an update parameter
     LR = model.LearningRate(ITER,
                             "LR",
                             base_lr=-args.base_learning_rate,
                             policy="step",
                             stepsize=1000,
                             gamma=0.999)
     # This is a constant used in the following loop
     ONE = model.param_init_net.ConstantFill([],
                                             "ONE",
                                             shape=[1],
                                             value=1.0)
     # Here we are essentially applying the gradients to the weights (using the classical method)
     for param in model.params:
         param_grad = model.param_to_grad[param]
         model.WeightedSum([param, ONE, param_grad, LR], param)
def AddTrainingOperators(model, predict, label, expect, base_lr, log=True):
    """Adds training operators to the model.
        predict: Predicted distribution by Policy Model
        expect: Expected distribution by MCTS, or transformed from Policy Model
        base_lr: Base Learning Rate. Always fixed
    """
    # compute the expected loss
    if label:
        onehot = AddOneHot(model, label)
        softmax, xent = model.SoftmaxWithLoss([predict, onehot],
                                              ['softmax', 'xent'],
                                              label_prob=1)
        AddAccuracy(model, softmax, label, log)
    else:
        softmax, xent = model.SoftmaxWithLoss([predict, expect],
                                              ['softmax', 'xent'],
                                              label_prob=1)
    loss = model.AveragedLoss(xent, "loss")
    # use the average loss we just computed to add gradient operators to the model
    model.AddGradientOperators([loss])
    # do a simple stochastic gradient descent
    ITER = brew.iter(model, "iter")
    # set the learning rate schedule
    LR = model.LearningRate(
        ITER, "LR", base_lr=base_lr,
        policy="fixed")  # when policy=fixed, stepsize and gamma are ignored
    # ONE is a constant value that is used in the gradient update. We only need
    # to create it once, so it is explicitly placed in param_init_net.
    ONE = model.param_init_net.ConstantFill([], "ONE", shape=[1], value=1.0)
    # Now, for each parameter, we do the gradient updates.
    for param in model.params:
        # Note how we get the gradient of each parameter - ModelHelper keeps
        # track of that.
        param_grad = model.param_to_grad[param]
        # The update is a simple weighted sum: param = param + param_grad * LR
        model.WeightedSum([param, ONE, param_grad, LR], param)
    if log:
        model.Print('loss', [], to_file=1)
Exemple #23
0
def AddParameterUpdate_ops(model):
    brew.add_weight_decay(model, weight_decay)
    iter = brew.iter(model, "iter")
    lr = model.net.LearningRate(
        [iter],
        "lr",
        base_lr=base_learning_rate,
        policy="step",
        stepsize=stepsize,
        gamma=0.1,
    )
    for param in model.GetParams():
        param_grad = model.param_to_grad[param]
        param_momentum = model.param_init_net.ConstantFill([param],
                                                           param + "_momentum",
                                                           value=0.0)

        model.net.MomentumSGDUpdate(
            [param_grad, param_momentum, lr, param],
            [param_grad, param_momentum, param],
            momentum=0.9,
            nesterov=1,
        )
Exemple #24
0
def AddTrainingOperators(model, softmax, label):
    """Adds training operators to the model."""
    xent = model.LabelCrossEntropy([softmax, label], 'xent')
    # compute the expected loss
    loss = model.AveragedLoss(xent, "loss")
    # track the accuracy of the model
    AddAccuracy(model, softmax, label)
    # use the average loss we just computed to add gradient operators to the model
    model.AddGradientOperators([loss])
    # do a simple stochastic gradient descent
    ITER = brew.iter(model, "iter")
    # set the learning rate schedule
    LR = model.LearningRate(
        ITER, "LR", base_lr=-0.1, policy="step", stepsize=1, gamma=0.999 )
    # ONE is a constant value that is used in the gradient update. We only need
    # to create it once, so it is explicitly placed in param_init_net.
    ONE = model.param_init_net.ConstantFill([], "ONE", shape=[1], value=1.0)
    # Now, for each parameter, we do the gradient updates.
    for param in model.params:
        # Note how we get the gradient of each parameter - ModelHelper keeps
        # track of that.
        param_grad = model.param_to_grad[param]
        # The update is a simple weighted sum: param = param + param_grad * LR
        model.WeightedSum([param, ONE, param_grad, LR], param)
def AddCheckpoints(model, checkpoint_iters, db_type):
    ITER = brew.iter(train_model, "iter")
    train_model.Checkpoint([ITER] + train_model.params, [],
                           db="mstar_lenet_checkpoint_%05d.lmdb",
                           db_type="lmdb",
                           every=checkpoint_iters)
Exemple #26
0
 def Iter(self, *args, **kwargs):
     return brew.iter(self, *args, **kwargs)
Exemple #27
0
def AddTrainingOperators(model, softmax, label):
    # something very important happens here
    xent = model.LabelCrossEntropy([softmax, label], 'xent')
    # compute the expected loss
    loss = model.AveragedLoss(xent, "loss")
    # track the accuracy of the model
    AddAccuracy(model, softmax, label)
    # use the average loss we just computed to add gradient operators to the model
    model.AddGradientOperators([loss])
    # do a simple stochastic gradient descent
    ITER = brew.iter(model, "iter")
    # set the learning rate schedule
    LR = model.LearningRate(ITER,
                            "LR",
                            base_lr=-0.1,
                            policy="step",
                            stepsize=1,
                            gamma=0.999)
    # ONE is a constant value that is used in the gradient update. We only need
    # to create it once, so it is explicitly placed in param_init_net.
    ONE = model.param_init_net.ConstantFill([], "ONE", shape=[1], value=1.0)
    # Now, for each parameter, we do the gradient updates.
    for param in model.params:
        # Note how we get the gradient of each parameter - CNNModelHelper keeps
        # track of that.
        param_grad = model.param_to_grad[param]
        # The update is a simple weighted sum: param = param + param_grad * LR
        model.WeightedSum([param, ONE, param_grad, LR], param)
    # let's checkpoint every 20 iterations, which should probably be fine.
    # you may need to delete tutorial_files/tutorial-mnist to re-run the tutorial
    model.Checkpoint([ITER] + model.params, [],
                     db="mnist_lenet_checkpoint_%05d.leveldb",
                     db_type="leveldb",
                     every=20)

    arg_scope = {"order": "NCHW"}
    train_model = model_helper.ModelHelper(name="mnist_train",
                                           arg_scope=arg_scope)
    data, label = AddInput(train_model,
                           batch_size=64,
                           db=os.path.join(data_folder,
                                           'mnist-train-nchw-leveldb'),
                           db_type='leveldb')
    softmax = AddLeNetModel(train_model, data)
    AddTrainingOperators(train_model, softmax, label)

    # Testing model. We will set the batch size to 100, so that the testing
    # pass is 100 iterations (10,000 images in total).
    # For the testing model, we need the data input part, the main LeNetModel
    # part, and an accuracy part. Note that init_params is set False because
    # we will be using the parameters obtained from the train model.
    test_model = model_helper.ModelHelper(name="mnist_test",
                                          arg_scope=arg_scope,
                                          init_params=False)
    data, label = AddInput(test_model,
                           batch_size=100,
                           db=os.path.join(data_folder,
                                           'mnist-test-nchw-leveldb'),
                           db_type='leveldb')
    softmax = AddLeNetModel(test_model, data)
    AddAccuracy(test_model, softmax, label)

    # Deployment model. We simply need the main LeNetModel part.
    deploy_model = model_helper.ModelHelper(name="mnist_deploy",
                                            arg_scope=arg_scope,
                                            init_params=False)
    AddLeNetModel(deploy_model, "data")

    graph = net_drawer.GetPydotGraphMinimal(train_model.net.Proto().op,
                                            "mnist",
                                            rankdir="LR",
                                            minimal_dependency=True)
    display.Image(graph.create_png(), width=800)

    with open(os.path.join(root_folder, "train_net.pbtxt"), 'w') as fid:
        fid.write(str(train_model.net.Proto()))
    with open(os.path.join(root_folder, "train_init_net.pbtxt"), 'w') as fid:
        fid.write(str(train_model.param_init_net.Proto()))
    with open(os.path.join(root_folder, "test_net.pbtxt"), 'w') as fid:
        fid.write(str(test_model.net.Proto()))
    with open(os.path.join(root_folder, "test_init_net.pbtxt"), 'w') as fid:
        fid.write(str(test_model.param_init_net.Proto()))
    with open(os.path.join(root_folder, "deploy_net.pbtxt"), 'w') as fid:
        fid.write(str(deploy_model.net.Proto()))
    print("Protocol buffers files have been created in your root folder: " +
          root_folder)

    # The parameter initialization network only needs to be run once.
    workspace.RunNetOnce(train_model.param_init_net)
    # creating the network
    workspace.CreateNet(train_model.net)
    # set the number of iterations and track the accuracy & loss
    total_iters = 200
    accuracy = np.zeros(total_iters)
    loss = np.zeros(total_iters)
    # Now, we will manually run the network for 200 iterations.
    for i in range(total_iters):
        workspace.RunNet(train_model.net.Proto().name)
        accuracy[i] = workspace.FetchBlob('accuracy')
        loss[i] = workspace.FetchBlob('loss')
    # After the execution is done, let's plot the values.
    pyplot.plot(loss, 'b')
    pyplot.plot(accuracy, 'r')
    pyplot.legend(('Loss', 'Accuracy'), loc='upper right')

    # Let's look at some of the data.
    pyplot.figure()
    data = workspace.FetchBlob('data')
    _ = visualize.NCHW.ShowMultiple(data)
    pyplot.figure()
    softmax = workspace.FetchBlob('softmax')
    _ = pyplot.plot(softmax[0], 'ro')
    pyplot.title('Prediction for the first image')

    # run a test pass on the test net
    workspace.RunNetOnce(test_model.param_init_net)
    workspace.CreateNet(test_model.net)
    test_accuracy = np.zeros(100)
    for i in range(100):
        workspace.RunNet(test_model.net.Proto().name)
        test_accuracy[i] = workspace.FetchBlob('accuracy')
    # After the execution is done, let's plot the values.
    pyplot.plot(test_accuracy, 'r')
    pyplot.title('Acuracy over test batches.')
    print('test_accuracy: %f' % test_accuracy.mean())
Exemple #28
0
    def CreateModel(self):
        log.debug("Start training")
        model = model_helper.ModelHelper(name="char_rnn")

        input_blob, seq_lengths, hidden_init, cell_init, target = \
            model.net.AddExternalInputs(
                'input_blob',
                'seq_lengths',
                'hidden_init',
                'cell_init',
                'target',
            )

        hidden_output_all, self.hidden_output, _, self.cell_state = LSTM(
            model,
            input_blob,
            seq_lengths, (hidden_init, cell_init),
            self.D,
            self.hidden_size,
            scope="LSTM")
        output = brew.fc(model,
                         hidden_output_all,
                         None,
                         dim_in=self.hidden_size,
                         dim_out=self.D,
                         axis=2)

        # axis is 2 as first two are T (time) and N (batch size).
        # We treat them as one big batch of size T * N
        softmax = model.net.Softmax(output, 'softmax', axis=2)

        softmax_reshaped, _ = model.net.Reshape(softmax,
                                                ['softmax_reshaped', '_'],
                                                shape=[-1, self.D])

        # Create a copy of the current net. We will use it on the forward
        # pass where we don't need loss and backward operators
        self.forward_net = core.Net(model.net.Proto())

        xent = model.net.LabelCrossEntropy([softmax_reshaped, target], 'xent')
        # Loss is average both across batch and through time
        # Thats why the learning rate below is multiplied by self.seq_length
        loss = model.net.AveragedLoss(xent, 'loss')
        model.AddGradientOperators([loss])

        # Hand made SGD update. Normally one can use helper functions
        # to build an optimizer
        ITER = brew.iter(model, "iter")
        LR = model.LearningRate(ITER,
                                "LR",
                                base_lr=-0.1 * self.seq_length,
                                policy="step",
                                stepsize=1,
                                gamma=0.9999)
        ONE = model.param_init_net.ConstantFill([],
                                                "ONE",
                                                shape=[1],
                                                value=1.0)

        # Update weights for each of the model parameters
        for param in model.params:
            param_grad = model.param_to_grad[param]
            model.net.WeightedSum([param, ONE, param_grad, LR], param)

        self.model = model
        self.predictions = softmax
        self.loss = loss

        self.prepare_state = core.Net("prepare_state")
        self.prepare_state.Copy(self.hidden_output, hidden_init)
        self.prepare_state.Copy(self.cell_state, cell_init)
Exemple #29
0
softmax = AddLeNetModel(train_model, data)

##################################################################################
#### Step 3: Add training operators to the model
# TODO: use the optimizer class here instead of doing sgd by hand

xent = train_model.LabelCrossEntropy(['softmax', 'label'], 'xent')
loss = train_model.AveragedLoss(xent, 'loss')
brew.accuracy(train_model, ['softmax', 'label'], 'accuracy')
train_model.AddGradientOperators([loss])
opt = optimizer.build_sgd(train_model, base_learning_rate=0.1)
for param in train_model.GetOptimizationParamInfo():
    opt(train_model.net, train_model.param_init_net, param)

#model.Checkpoint([ITER] + model.params, [], db="mnist_lenet_checkpoint_%05d.lmdb", db_type="lmdb", every=20)
ITER = brew.iter(train_model, "iter")
train_model.Checkpoint([ITER] + train_model.params, [],
                       db="mnist_lenet_checkpoint_%05d.lmdb",
                       db_type="lmdb",
                       every=checkpoint_iters)

##################################################################################
#### Run the training procedure

# run the param init network once
workspace.RunNetOnce(train_model.param_init_net)
# create the network
workspace.CreateNet(train_model.net, overwrite=True)
# Set the total number of iterations and track the accuracy and loss
total_iters = training_iters
accuracy = np.zeros(total_iters)
Exemple #30
0
 def Iter(self, *args, **kwargs):
     return brew.iter(self, *args, **kwargs)