# At iteration (num_iterations * 2 // 75), it is reduced by gamma again # So on.. no_steps = 75 step = num_iterations // no_steps schedule = list(np.arange(step, num_iterations, step)) learning_rate_policy = {'name': 'schedule', 'schedule': schedule, 'gamma': 0.95, 'base_lr': 0.01} optimizer = GradientDescentMomentum(learning_rate=learning_rate_policy, iteration=inputs['iteration']) # Define the loss function (Cross entropy loss) # Note that we convert the integer values of input['y'] to one hot here fwd_prop = seq1(inputs['X']) train_loss = ng.cross_entropy_multi(fwd_prop, ng.one_hot(inputs['y'], axis=out_axis), usebits=True) # Train cost computation batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_computation = ng.computation([batch_cost, fwd_prop], "all") train_outputs = dict(batch_cost=batch_cost) # Forward prop of evaluation set # Required for correct functioning of batch norm and dropout layers during inference mode with Layer.inference_mode_on(): inference_prop = seq1(inputs['X']) eval_loss = ng.cross_entropy_multi(inference_prop, ng.one_hot(inputs['y'], axis=out_axis), usebits=True) eval_computation = ng.computation([ng.mean(eval_loss, out_axes=()), inference_prop], "all")
activation=Softmax()) ]) lr_schedule = { 'name': 'schedule', 'base_lr': 0.01, 'gamma': (1 / 250.)**(1 / 3.), 'schedule': [22, 44, 65] } optimizer = GradientDescentMomentum(lr_schedule, 0.0, wdecay=0.0005, iteration=inputs['iteration']) train_prob = seq1(inputs['image']) train_loss = ng.cross_entropy_multi(train_prob, ng.one_hot(inputs['label'], axis=ax.Y)) batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_outputs = dict(batch_cost=batch_cost) with closing(ngt.make_transformer()) as transformer: train_computation = make_bound_computation(transformer, train_outputs, inputs) cbs = make_default_callbacks(transformer=transformer, output_file=args.output_file, frequency=args.iter_interval, train_computation=train_computation, total_iterations=args.num_iterations, use_progress_bar=args.progress_bar)
def __init__(self): self.ng_computation = lambda Y, T: ng.cross_entropy_multi(Y, T)
# Optimizer # Provided learning policy takes learning rate as input to graph using a placeholder. # This allows you to control learning rate based on various factors of network learning_rate_policy = {'name': 'provided', 'lr_placeholder': lr_ph} optimizer = GradientDescentMomentum(learning_rate=learning_rate_policy, momentum_coef=momentum_coef, wdecay=wdecay, nesterov=False, iteration=input_ph['iteration']) label_indices = input_ph['label'] # Make a prediction prediction = resnet(input_ph['image']) # Calculate loss train_loss = ng.cross_entropy_multi(prediction, ng.one_hot(label_indices, axis=ax.Y)) # Average loss over the batch batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_outputs = dict(batch_cost=batch_cost) # Instantiate the Saver object to save weights weight_saver = Saver() with Layer.inference_mode_on(): # Doing inference inference_prob = resnet(input_ph['image']) eval_loss = ng.cross_entropy_multi(inference_prob, ng.one_hot(label_indices, axis=ax.Y)) # Computation for inference
init, activation=Tanh(), return_sequence=True, sum_out=True) # model initialization seq1 = Sequential([ layer_0, rlayer, Affine(init, activation=Softmax(), bias_init=init, axes=(ax.Y, )) ]) optimizer = RMSProp() train_prob = seq1(inputs['inp_txt']) train_loss = ng.cross_entropy_multi(train_prob, ng.one_hot(inputs['tgt_txt'], axis=ax.Y), usebits=True) batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_outputs = dict(batch_cost=batch_cost) with Layer.inference_mode_on(): inference_prob = seq1(inputs['inp_txt']) eval_loss = ng.cross_entropy_multi(inference_prob, ng.one_hot(inputs['tgt_txt'], axis=ax.Y), usebits=True) eval_outputs = dict(cross_ent_loss=eval_loss) # Now bind the computations we are interested in
linear = Affine(init, activation=Softmax(), bias_init=init, axes=(ax.Y)) optimizer = RMSProp(decay_rate=0.95, learning_rate=2e-3, epsilon=1e-6, gradient_clip_value=gradient_clip_value) # build network graph one_hot_enc_out = one_hot_enc(inputs['inp_txt']) one_hot_dec_out = one_hot_dec(inputs['prev_tgt']) enc_out = enc(one_hot_enc_out) dec_out = dec(one_hot_dec_out, init_state=enc_out) output_prob = linear(dec_out) loss = ng.cross_entropy_multi(output_prob, ng.one_hot(inputs['tgt_txt'], axis=ax.Y), usebits=True) mean_cost = ng.mean(loss, out_axes=[]) updates = optimizer(loss) train_outputs = dict(batch_cost=mean_cost, updates=updates) loss_outputs = dict(cross_ent_loss=loss) # inference graph with Layer.inference_mode_on(): enc_out_inference = enc(one_hot_enc_out) # Create decoder placeholders axes = one_hot_dec_out.axes axes = axes - axes.recurrent_axis() + ng.make_axis(length=1, name="REC") decoder_input_inference = ng.placeholder(axes, name="input")
Convolution((5, 5, 16), filter_init=init_xav, activation=Rectlin()), Pooling((2, 2), strides=2), Convolution((5, 5, 32), filter_init=init_xav, activation=Rectlin()), Pooling((2, 2), strides=2), Affine(nout=500, weight_init=init_xav, activation=Rectlin()), Affine(axes=ax.Y, weight_init=init_xav, activation=Softmax())]) optimizer = GradientDescentMomentum(0.01, 0.9) train_prob = seq1(inputs['image']) train_loss = ng.cross_entropy_binary(train_prob, ng.one_hot(inputs['label'], axis=ax.Y)) batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_outputs = dict(batch_cost=batch_cost) with Layer.inference_mode_on(): inference_prob = seq1(inputs['image']) eval_loss = ng.cross_entropy_multi(inference_prob, ng.one_hot(inputs['label'], axis=ax.Y)) eval_outputs = dict(cross_ent_loss=eval_loss, results=inference_prob) # Now bind the computations we are interested in with closing(ngt.make_transformer()) as transformer: train_computation = make_bound_computation(transformer, train_outputs, inputs) loss_computation = make_bound_computation(transformer, eval_outputs, inputs) cbs = make_default_callbacks(transformer=transformer, output_file=args.output_file, frequency=args.iter_interval, train_computation=train_computation, total_iterations=args.num_iterations, eval_set=valid_set, loss_computation=loss_computation, use_progress_bar=args.progress_bar)