Example #1
0
def test_gdm(random_learning_rate, random_momentum_coef, wdecay, nesterov,
             select_variables):

    # Setup the baseline and reference optimizers to be tested
    gdm_args = {'learning_rate': random_learning_rate,
                'momentum_coef': random_momentum_coef,
                'wdecay': wdecay,
                'nesterov': nesterov}

    gdm_ref = GDMReference(**gdm_args)
    gdm = GradientDescentMomentum(**gdm_args)

    # test baseline against reference
    if select_variables:
        compare_optimizer_variable_select(gdm, gdm_ref)
    else:
        compare_optimizer(gdm, gdm_ref)
Example #2
0
    Pooling(pool_shape=(7, 7), strides=1, pool_type="avg"),
    Affine(axes=ax.Y,
           weight_init=XavierInit(),
           bias_init=bias_init,
           activation=Softmax())
])

lr_schedule = {
    'name': 'schedule',
    'base_lr': 0.01,
    'gamma': (1 / 250.)**(1 / 3.),
    'schedule': [22, 44, 65]
}

optimizer = GradientDescentMomentum(lr_schedule,
                                    0.0,
                                    wdecay=0.0005,
                                    iteration=inputs['iteration'])
train_prob = seq1(inputs['image'])
train_loss = ng.cross_entropy_multi(train_prob,
                                    ng.one_hot(inputs['label'], axis=ax.Y))
batch_cost = ng.sequential(
    [optimizer(train_loss),
     ng.mean(train_loss, out_axes=())])
train_outputs = dict(batch_cost=batch_cost)

with closing(ngt.make_transformer()) as transformer:
    train_computation = make_bound_computation(transformer, train_outputs,
                                               inputs)

    cbs = make_default_callbacks(transformer=transformer,
                                 output_file=args.output_file,
                       Affine(weight_init=init_uni, bias_init=init_uni,
                              activation=Softmax(), axes=out_axis)])

# Optimizer
# Initial learning rate is 0.01 (base_lr)
# At iteration (num_iterations // 75), lr is multiplied by gamma (new lr = .95 * .01)
# At iteration (num_iterations * 2 // 75), it is reduced by gamma again
# So on..
no_steps = 75
step = num_iterations // no_steps
schedule = list(np.arange(step, num_iterations, step))
learning_rate_policy = {'name': 'schedule',
                        'schedule': schedule,
                        'gamma': 0.95,
                        'base_lr': 0.01}
optimizer = GradientDescentMomentum(learning_rate=learning_rate_policy,
                                    iteration=inputs['iteration'])
# Define the loss function (Cross entropy loss)
# Note that we convert the integer values of input['y'] to one hot here
fwd_prop = seq1(inputs['X'])
train_loss = ng.cross_entropy_multi(fwd_prop,
                                    ng.one_hot(inputs['y'], axis=out_axis),
                                    usebits=True)

# Train cost computation
batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())])
train_computation = ng.computation([batch_cost, fwd_prop], "all")
train_outputs = dict(batch_cost=batch_cost)

# Forward prop of evaluation set
# Required for correct functioning of batch norm and dropout layers during inference mode
with Layer.inference_mode_on():
                     args.size,
                     en_bottleneck,
                     num_resnet_mods,
                     batch_norm=not args.disable_batch_norm)

# Learning Rate Placeholder
lr_ph = ng.placeholder(axes=(), initial_value=base_lr)

# Optimizer
# Provided learning policy takes learning rate as input to graph using a placeholder.
# This allows you to control learning rate based on various factors of network
learning_rate_policy = {'name': 'provided', 'lr_placeholder': lr_ph}

optimizer = GradientDescentMomentum(learning_rate=learning_rate_policy,
                                    momentum_coef=momentum_coef,
                                    wdecay=wdecay,
                                    nesterov=False,
                                    iteration=input_ph['iteration'])
label_indices = input_ph['label']
# Make a prediction
prediction = resnet(input_ph['image'])
# Calculate loss
train_loss = ng.cross_entropy_multi(prediction,
                                    ng.one_hot(label_indices, axis=ax.Y))
# Average loss over the batch
batch_cost = ng.sequential(
    [optimizer(train_loss),
     ng.mean(train_loss, out_axes=())])
train_outputs = dict(batch_cost=batch_cost)

# Instantiate the Saver object to save weights
Example #5
0
######################
# Model specification


def cifar_mean_subtract(x):
    bgr_mean = ng.persistent_tensor(
        axes=x.axes.find_by_name('C'),
        initial_value=np.array([104., 119., 127.]))
    return (x - bgr_mean) / 255.


seq1 = Sequential([Preprocess(functor=cifar_mean_subtract),
                   Affine(nout=200, weight_init=UniformInit(-0.1, 0.1), activation=Rectlin()),
                   Affine(axes=ax.Y, weight_init=UniformInit(-0.1, 0.1), activation=Softmax())])

optimizer = GradientDescentMomentum(0.1, 0.9)
train_prob = seq1(inputs['image'])
train_loss = ng.cross_entropy_multi(train_prob, ng.one_hot(inputs['label'], axis=ax.Y))
batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())])
train_outputs = dict(batch_cost=batch_cost)

with Layer.inference_mode_on():
    inference_prob = seq1(inputs['image'])
eval_loss = ng.cross_entropy_multi(inference_prob, ng.one_hot(inputs['label'], axis=ax.Y))
eval_outputs = dict(results=inference_prob, cross_ent_loss=eval_loss)

# Now bind the computations we are interested in
with closing(ngt.make_transformer()) as transformer:
    train_computation = make_bound_computation(transformer, train_outputs, inputs)
    loss_computation = make_bound_computation(transformer, eval_outputs, inputs)