def make_optimizer(name=None, weight_clip_value=None): optimizer = Adam(learning_rate=1e-4, beta_1=0.5, beta_2=0.9, epsilon=1e-8, weight_clip_value=weight_clip_value) return optimizer
def make_optimizer(name=None, weight_clip_value=None, loss_type="WGAN-GP"): if loss_type == "WGAN": optimizer = RMSProp(learning_rate=5e-5, decay_rate=0.99, epsilon=1e-8, weight_clip_value=weight_clip_value) if loss_type == "WGAN-GP": optimizer = Adam(learning_rate=1e-4, beta_1=0.5, beta_2=0.9, epsilon=1e-8, weight_clip_value=weight_clip_value) return optimizer
def test_adam(random_learning_rate, random_beta_1, random_beta_2, epsilon, select_variables): # Setup the baseline and reference optimizers to be tested adam_args = {'learning_rate': random_learning_rate, 'beta_1': random_beta_1, 'beta_2': random_beta_2, 'epsilon': epsilon} adam_reference = AdamReference(**adam_args) adam = Adam(**adam_args) # test baseline against reference if select_variables: compare_optimizer_variable_select(adam, adam_reference) else: compare_optimizer(adam, adam_reference)
def test_adam(random_learning_rate, random_beta_1, random_beta_2, epsilon, transformer_factory): # Setup the baseline and reference optimizers to be tested adam_args = { 'learning_rate': random_learning_rate, 'beta_1': random_beta_1, 'beta_2': random_beta_2, 'epsilon': epsilon } adam_reference = AdamReference(**adam_args) adam = Adam(**adam_args) # test baseline against reference compare_optimizer(adam, adam_reference)
def make_optimizer(name=None): optimizer = Adam(learning_rate=5e-4, beta_1=0.5, beta_2=0.999, epsilon=1e-8) return optimizer
dropout=dropout).layers + [lambda op: ng.map_roles(op, { 'C': 'F', 'W': 'REC' })] + [affine_layer]) elif args.modeltype == "LSTM": model = Sequential( recurrent_model.define_model(out_axis, celltype=args.modeltype, recurrent_units=hidden_sizes, return_sequence=True).layers + [Logistic()]) # Optimizer if args.modeltype == "TCN": optimizer = Adam(learning_rate=args.lr, gradient_clip_value=args.grad_clip_value) else: optimizer = GradientDescentMomentum( learning_rate=args.lr, gradient_clip_value=args.grad_clip_value) # Define the loss function (categorical cross entropy, since each musical key on the piano is encoded as a binary value) fwd_prop = model(inputs['X']) fwd_prop = ng.axes_with_order(fwd_prop, out_axes) train_loss = ng.cross_entropy_binary(fwd_prop, inputs['y']) with Layer.inference_mode_on(): preds = model(inputs['X']) preds = ng.axes_with_order(preds, out_axes) eval_loss = ng.mean(ng.cross_entropy_binary(preds, inputs['y']), out_axes=()) eval_computation = ng.computation([eval_loss], "all") predict_computation = ng.computation([preds], "all")
axes=out_axis) ]) # Optimizer # Following policy will set the initial learning rate to 0.05 (base_lr) # At iteration (num_iterations // 5), learning rate is multiplied by gamma (new lr = .005) # At iteration (num_iterations // 2), it will be reduced by gamma again (new lr = .0005) schedule = [num_iterations // 5, num_iterations // 2] learning_rate_policy = { 'name': 'schedule', 'schedule': schedule, 'gamma': 0.1, 'base_lr': 0.05 } optimizer = Adam(learning_rate=learning_rate_policy, iteration=inputs['iteration'], gradient_clip_value=1) # Define the loss function (squared L2 loss) fwd_prop = seq1(inputs['X']) train_loss = ng.squared_L2(fwd_prop - inputs['y']) # Cost calculation batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_outputs = dict(batch_cost=batch_cost) # Forward prop of test set # Required for correct functioning of batch norm and dropout layers during inference mode with Layer.inference_mode_on():
inputs = train_set.make_placeholders() vocab_axis = ng.make_axis(length=wikimovies.vocab_size, name='vocab_axis') memn2n = KVMemN2N(num_iterations, args.batch_size, args.emb_size, args.nhops, wikimovies.story_length, wikimovies.memory_size, wikimovies.vocab_size, vocab_axis, args.use_v_luts) # Compute answer predictions a_pred, _ = memn2n(inputs) loss = ng.cross_entropy_multi(a_pred, ng.one_hot(inputs['answer'], axis=vocab_axis), usebits=True) mean_cost = ng.sum(loss, out_axes=[]) optimizer = Adam(learning_rate=args.lr) updates = optimizer(loss) batch_cost = ng.sequential([updates, mean_cost]) # provide outputs for bound computation train_outputs = dict(batch_cost=batch_cost, train_preds=a_pred) with Layer.inference_mode_on(): a_pred_inference, _ = memn2n(inputs) eval_loss = ng.cross_entropy_multi(a_pred_inference, ng.one_hot(inputs['answer'], axis=vocab_axis), usebits=True)
args.batch_size, use_match_type=args.use_match_type, kb_ents_to_type=babi.kb_ents_to_type, kb_ents_to_cand_idxs=babi.kb_ents_to_cand_idxs, match_type_idxs=babi.match_type_idxs, nhops=args.nhops, eps=args.eps, init=GaussianInit(mean=0.0, std=0.1)) a_pred, attention = memn2n(inputs) # specify loss function, calculate loss and update weights loss = ng.cross_entropy_multi(a_pred, inputs['answer'], usebits=True) mean_cost = ng.sum(loss, out_axes=[]) optimizer = Adam(learning_rate=0.001) updates = optimizer(loss) batch_cost = ng.sequential([updates, mean_cost]) # provide outputs for bound computation train_outputs = dict(batch_cost=batch_cost, train_preds=a_pred) with Layer.inference_mode_on(): a_pred_inference, attention_inference = memn2n(inputs) eval_loss = ng.cross_entropy_multi(a_pred_inference, inputs['answer'], usebits=True) interactive_outputs = dict(test_preds=a_pred_inference, attention=attention_inference)
axis=inputs['answer'].axes.feature_axes()[0], idx=1) labels_concat = [label1, label2] loss1 = ng.cross_entropy_multi(logits1, ng.one_hot(label1, axis=ax.Y), usebits=False) loss2 = ng.cross_entropy_multi(logits2, ng.one_hot(label2, axis=ax.Y), usebits=False) # Total Loss train_loss = loss1 + loss2 # Set optimizer (no learning rate scheduler used) optimizer = Adam(learning_rate=2e-3) print('compiling the graph') # Cost set up batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) # Predicted class is the max probability out of the 2=3 # Required Outputs- Batch Cost, Train Probability,misclass train train_outputs = dict(batch_cost=batch_cost, inps=inputs['answer'], logits=ng.stack(logits_concat, span, 1), labels=inputs['answer'], drop=dropout_val)