Example #1
0
def make_optimizer(name=None, weight_clip_value=None):
    optimizer = Adam(learning_rate=1e-4,
                     beta_1=0.5,
                     beta_2=0.9,
                     epsilon=1e-8,
                     weight_clip_value=weight_clip_value)

    return optimizer
Example #2
0
def make_optimizer(name=None, weight_clip_value=None, loss_type="WGAN-GP"):
    if loss_type == "WGAN":
        optimizer = RMSProp(learning_rate=5e-5,
                            decay_rate=0.99,
                            epsilon=1e-8,
                            weight_clip_value=weight_clip_value)

    if loss_type == "WGAN-GP":
        optimizer = Adam(learning_rate=1e-4,
                         beta_1=0.5,
                         beta_2=0.9,
                         epsilon=1e-8,
                         weight_clip_value=weight_clip_value)

    return optimizer
def test_adam(random_learning_rate, random_beta_1, random_beta_2, epsilon, select_variables):

    # Setup the baseline and reference optimizers to be tested
    adam_args = {'learning_rate': random_learning_rate,
                 'beta_1': random_beta_1,
                 'beta_2': random_beta_2,
                 'epsilon': epsilon}

    adam_reference = AdamReference(**adam_args)
    adam = Adam(**adam_args)

    # test baseline against reference
    if select_variables:
        compare_optimizer_variable_select(adam, adam_reference)
    else:
        compare_optimizer(adam, adam_reference)
Example #4
0
def test_adam(random_learning_rate, random_beta_1, random_beta_2, epsilon,
              transformer_factory):

    # Setup the baseline and reference optimizers to be tested
    adam_args = {
        'learning_rate': random_learning_rate,
        'beta_1': random_beta_1,
        'beta_2': random_beta_2,
        'epsilon': epsilon
    }

    adam_reference = AdamReference(**adam_args)
    adam = Adam(**adam_args)

    # test baseline against reference
    compare_optimizer(adam, adam_reference)
Example #5
0
def make_optimizer(name=None):
    optimizer = Adam(learning_rate=5e-4,
                     beta_1=0.5,
                     beta_2=0.999,
                     epsilon=1e-8)
    return optimizer
            dropout=dropout).layers +
        [lambda op: ng.map_roles(op, {
            'C': 'F',
            'W': 'REC'
        })] + [affine_layer])
elif args.modeltype == "LSTM":
    model = Sequential(
        recurrent_model.define_model(out_axis,
                                     celltype=args.modeltype,
                                     recurrent_units=hidden_sizes,
                                     return_sequence=True).layers +
        [Logistic()])

# Optimizer
if args.modeltype == "TCN":
    optimizer = Adam(learning_rate=args.lr,
                     gradient_clip_value=args.grad_clip_value)
else:
    optimizer = GradientDescentMomentum(
        learning_rate=args.lr, gradient_clip_value=args.grad_clip_value)

# Define the loss function (categorical cross entropy, since each musical key on the piano is encoded as a binary value)
fwd_prop = model(inputs['X'])
fwd_prop = ng.axes_with_order(fwd_prop, out_axes)
train_loss = ng.cross_entropy_binary(fwd_prop, inputs['y'])

with Layer.inference_mode_on():
    preds = model(inputs['X'])
    preds = ng.axes_with_order(preds, out_axes)
eval_loss = ng.mean(ng.cross_entropy_binary(preds, inputs['y']), out_axes=())
eval_computation = ng.computation([eval_loss], "all")
predict_computation = ng.computation([preds], "all")
Example #7
0
           axes=out_axis)
])

# Optimizer
# Following policy will set the initial learning rate to 0.05 (base_lr)
# At iteration (num_iterations // 5), learning rate is multiplied by gamma (new lr = .005)
# At iteration (num_iterations // 2), it will be reduced by gamma again (new lr = .0005)
schedule = [num_iterations // 5, num_iterations // 2]
learning_rate_policy = {
    'name': 'schedule',
    'schedule': schedule,
    'gamma': 0.1,
    'base_lr': 0.05
}
optimizer = Adam(learning_rate=learning_rate_policy,
                 iteration=inputs['iteration'],
                 gradient_clip_value=1)

# Define the loss function (squared L2 loss)
fwd_prop = seq1(inputs['X'])
train_loss = ng.squared_L2(fwd_prop - inputs['y'])

# Cost calculation
batch_cost = ng.sequential(
    [optimizer(train_loss),
     ng.mean(train_loss, out_axes=())])
train_outputs = dict(batch_cost=batch_cost)

# Forward prop of test set
# Required for correct functioning of batch norm and dropout layers during inference mode
with Layer.inference_mode_on():
Example #8
0
inputs = train_set.make_placeholders()
vocab_axis = ng.make_axis(length=wikimovies.vocab_size, name='vocab_axis')

memn2n = KVMemN2N(num_iterations, args.batch_size, args.emb_size, args.nhops,
                  wikimovies.story_length, wikimovies.memory_size,
                  wikimovies.vocab_size, vocab_axis, args.use_v_luts)
# Compute answer predictions
a_pred, _ = memn2n(inputs)

loss = ng.cross_entropy_multi(a_pred,
                              ng.one_hot(inputs['answer'], axis=vocab_axis),
                              usebits=True)

mean_cost = ng.sum(loss, out_axes=[])

optimizer = Adam(learning_rate=args.lr)

updates = optimizer(loss)

batch_cost = ng.sequential([updates, mean_cost])

# provide outputs for bound computation
train_outputs = dict(batch_cost=batch_cost, train_preds=a_pred)

with Layer.inference_mode_on():
    a_pred_inference, _ = memn2n(inputs)
    eval_loss = ng.cross_entropy_multi(a_pred_inference,
                                       ng.one_hot(inputs['answer'],
                                                  axis=vocab_axis),
                                       usebits=True)
Example #9
0
                       args.batch_size,
                       use_match_type=args.use_match_type,
                       kb_ents_to_type=babi.kb_ents_to_type,
                       kb_ents_to_cand_idxs=babi.kb_ents_to_cand_idxs,
                       match_type_idxs=babi.match_type_idxs,
                       nhops=args.nhops,
                       eps=args.eps,
                       init=GaussianInit(mean=0.0, std=0.1))

a_pred, attention = memn2n(inputs)

# specify loss function, calculate loss and update weights
loss = ng.cross_entropy_multi(a_pred, inputs['answer'], usebits=True)

mean_cost = ng.sum(loss, out_axes=[])
optimizer = Adam(learning_rate=0.001)
updates = optimizer(loss)

batch_cost = ng.sequential([updates, mean_cost])

# provide outputs for bound computation
train_outputs = dict(batch_cost=batch_cost, train_preds=a_pred)

with Layer.inference_mode_on():
    a_pred_inference, attention_inference = memn2n(inputs)
    eval_loss = ng.cross_entropy_multi(a_pred_inference,
                                       inputs['answer'],
                                       usebits=True)

interactive_outputs = dict(test_preds=a_pred_inference,
                           attention=attention_inference)
Example #10
0
                             axis=inputs['answer'].axes.feature_axes()[0],
                             idx=1)
labels_concat = [label1, label2]
loss1 = ng.cross_entropy_multi(logits1,
                               ng.one_hot(label1, axis=ax.Y),
                               usebits=False)

loss2 = ng.cross_entropy_multi(logits2,
                               ng.one_hot(label2, axis=ax.Y),
                               usebits=False)

# Total Loss
train_loss = loss1 + loss2

# Set optimizer (no learning rate scheduler used)
optimizer = Adam(learning_rate=2e-3)

print('compiling the graph')
# Cost set up
batch_cost = ng.sequential(
    [optimizer(train_loss),
     ng.mean(train_loss, out_axes=())])

# Predicted class is the max probability out of the 2=3
# Required Outputs- Batch Cost, Train Probability,misclass train
train_outputs = dict(batch_cost=batch_cost,
                     inps=inputs['answer'],
                     logits=ng.stack(logits_concat, span, 1),
                     labels=inputs['answer'],
                     drop=dropout_val)