def build_seq2seq_computations(): # Training loss, optimizer train_decoded = recurrent_model.encode_and_decode(encoder, decoder, inputs['X'], previous) train_loss = ng.squared_L2(target - train_decoded) batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_computation = ng.computation(batch_cost, "all") # Evaluation loss with Layer.inference_mode_on(): eval_decoded = recurrent_model.encode_and_generate(encoder, decoder, inputs['X'], in_axes) eval_loss = ng.mean(ng.squared_L2(target - eval_decoded), out_axes=()) loss_computation = ng.computation([eval_loss], "all") return train_computation, loss_computation
def build_regressor_computations(): train_preds = predictions(encoder, affine_layer, inputs['X']) train_loss = ng.squared_L2(train_preds - inputs['y']) # Cost calculation batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_computation = ng.computation(batch_cost, "all") with Layer.inference_mode_on(): eval_preds = predictions(encoder, affine_layer, inputs['X']) eval_loss = ng.mean(ng.squared_L2(eval_preds - inputs['y']), out_axes=()) loss_computation = ng.computation([eval_loss], "all") return train_computation, loss_computation
def run_mini_ds2_benchmark(args, **kwargs): device_id = kwargs.get('device_id') inputs, train_set, eval_set = generate_ds2_data(args.max_length, args.str_w, args.nout, args.nbands, args.batch_size, args.num_iterations) model_out = get_mini_ds2(inputs, args.nfilters, args.filter_width, args.str_w, args.nbands, args.depth, args.hidden_size, args.batch_norm, args.hetr_device, device_id) if args.bprop: with ng.metadata(device=args.hetr_device, device_id=device_id, parallel=ax.N): loss = ng.ctc(model_out, ng.flatten(inputs["char_map"]), inputs["audio_length"], inputs["trans_length"]) optimizer = GradientDescentMomentum(learning_rate=2e-5, momentum_coef=0.99, gradient_clip_norm=400, nesterov=args.nesterov) updates = optimizer(loss) mean_cost = ng.sequential([updates, ng.mean(loss, out_axes=())]) bprop_computation_op = ng.computation(mean_cost, "all") benchmark = Benchmark(bprop_computation_op, train_set, inputs, args.backend, args.hetr_device) Benchmark.print_benchmark_results( benchmark.time(args.num_iterations, args.skip_iter, 'ds2_bprop', args.visualize, preprocess=True)) else: fprop_computation_op = ng.computation(model_out, "all") benchmark_fprop = Benchmark(fprop_computation_op, train_set, inputs, args.backend, args.hetr_device) Benchmark.print_benchmark_results( benchmark_fprop.time(args.num_iterations, args.skip_iter, 'ds2_fprop', args.visualize, preprocess=True))
def test_persistent_tensor(): input_axes = ng.make_axes([ ng.make_axis(10), ng.make_axis(3) ]) bgr = ng.persistent_tensor( axes=input_axes, initial_value=np.array([113.9, 123.0, 125.3])) bgr_comp = ng.computation(bgr, "all") results = dict() weight_saver = Saver() with closing(ngt.make_transformer()) as transformer: bgr_func = transformer.add_computation(bgr_comp) weight_saver.setup_save(transformer=transformer, computation=bgr_comp) results['saved'] = bgr_func().copy() weight_saver.save(filename="test_persistent_tensor") with closing(ngt.make_transformer()) as restore_transformer: bgr_refunc = restore_transformer.add_computation(bgr_comp) weight_saver.setup_restore(transformer=restore_transformer, computation=bgr_comp, filename="test_persistent_tensor") weight_saver.restore() results['restored'] = bgr_refunc().copy() os.remove("test_persistent_tensor.npz") assert np.allclose(results['saved'], results['restored'], atol=0)
def run_mini_ds2_benchmark(max_length, nbands, str_w, batch_size, max_iter, skip_iter, nfilters, filter_width, depth, hidden_size, batch_norm, device_id, device, transformer, visualize=False): inputs, train_set, eval_set = generate_ds2_data(max_length, str_w, nbands, batch_size, max_iter) model_out = get_mini_ds2(inputs, nfilters, filter_width, str_w, nbands, depth, hidden_size, batch_norm, device_id) fprop_computation_op = ng.computation(model_out, "all") benchmark_fprop = Benchmark(fprop_computation_op, train_set, inputs, transformer, device) Benchmark.print_benchmark_results( benchmark_fprop.time(max_iter, skip_iter, 'ds2_fprop', visualize))
def run_resnet_benchmark(dataset, num_iterations, n_skip, batch_size, device_id, transformer_type, device, bprop=True, batch_norm=False, visualize=False, stage_depth=1): inputs, data, train_set = get_fake_data(dataset, batch_size, num_iterations) # Running forward propagation model_out = get_mini_resnet(inputs, dataset, device, device_id, batch_norm=batch_norm, stage_depth=stage_depth) # Running back propagation if bprop: with ng.metadata(device=device, device_id=device_id, parallel=ax.N): optimizer = GradientDescentMomentum(0.01, 0.9) train_loss = ng.cross_entropy_multi( model_out, ng.one_hot(inputs['label'], axis=ax.Y)) batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) batch_cost_computation_op = ng.computation(batch_cost, "all") benchmark = Benchmark(batch_cost_computation_op, train_set, inputs, transformer_type, device) Benchmark.print_benchmark_results( benchmark.time(num_iterations, n_skip, dataset + '_msra_bprop', visualize, 'device_id')) else: fprop_computation_op = ng.computation(model_out, 'all') benchmark = Benchmark(fprop_computation_op, train_set, inputs, transformer_type, device) Benchmark.print_benchmark_results( benchmark.time(num_iterations, n_skip, dataset + '_msra_fprop', visualize))
def __init__(self, tuning_parameters, name="", global_network=None, network_is_local=True): Architecture.__init__(self, tuning_parameters, name) assert tuning_parameters.agent.neon_support, 'Neon is not supported for this agent' self.clip_error = tuning_parameters.clip_gradients self.total_loss = None self.epoch = 0 self.inputs = [] self.outputs = [] self.targets = [] self.losses = [] self.transformer = tuning_parameters.sess self.network = self.get_model(tuning_parameters) self.accumulated_gradients = [] # training and inference ops train_output = ng.sequential( [self.optimizer(self.total_loss), self.total_loss]) placeholders = self.inputs + self.targets self.train_op = self.transformer.add_computation( ng.computation(train_output, *placeholders)) self.predict_op = self.transformer.add_computation( ng.computation(self.outputs, self.inputs[0])) # update weights from array op self.weights = [ ng.placeholder(w.axes) for w in self.total_loss.variables() ] self.set_weights_ops = [] for target_variable, variable in zip(self.total_loss.variables(), self.weights): self.set_weights_ops.append( self.transformer.add_computation( ng.computation(ng.assign(target_variable, variable), variable))) # get weights op self.get_variables = self.transformer.add_computation( ng.computation(self.total_loss.variables()))
def test_conv1d(transformer_factory, filter_width, num_filters, strides, padding, time_steps, feature_dimension, batch_size): dilation = 1 # reference conv does not support dilation F = ng.make_axis(name='F', length=feature_dimension) REC = ng.make_axis(name='REC', length=time_steps) N = ng.make_axis(name='N', length=batch_size) in_axes = ng.make_axes([F, REC, N]) inputs = ng.placeholder(axes=in_axes) input_vals = np.random.randn(*in_axes.lengths) filter_init = GaussianInit() conv1d = Convolution((filter_width, num_filters), filter_init, strides=strides, padding=padding, dilation=dilation, bias_init=None, activation=Rectlin(), batch_norm=None) result_op = conv1d(inputs, channel_axes='F', spatial_axes={'W': 'REC'}) with closing(ngt.make_transformer()) as transformer: result_comp = transformer.add_computation( ng.computation(result_op, inputs)) filter_vals = transformer.add_computation(ng.computation( conv1d.conv.W))() result_ng = result_comp(input_vals) result_np = np.squeeze( reference_conv1d(input_vals, filter_vals, lambda x: np.maximum(0, x))) ng.testing.assert_allclose(result_ng, result_np)
def run_resnet_benchmark(dataset, n_iter, n_skip, batch_size, device_id, transformer_type, device, bprop=False, visualize=False): inputs, data, train_set = get_fake_data(dataset, batch_size, n_iter) model_out = get_mini_resnet(inputs, dataset, device_id) # Running forward propagation fprop_computation_op = ng.computation(model_out, 'all') benchmark_fprop = Benchmark(fprop_computation_op, train_set, inputs, transformer_type, device) Benchmark.print_benchmark_results(benchmark_fprop.time(n_iter, n_skip, dataset + '_msra_fprop', visualize)) # Running back propagation if bprop: optimizer = GradientDescentMomentum(0.01, 0.9) train_loss = ng.cross_entropy_multi(model_out, ng.one_hot(inputs['label'], axis=ax.Y)) batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())]) batch_cost_computation_op = ng.computation(batch_cost, "all") benchmark = Benchmark(batch_cost_computation_op, train_set, inputs, transformer_type, device) Benchmark.print_benchmark_results(benchmark.time(n_iter, n_skip, dataset + '_msra_bprop', visualize))
def test_variable(): input_axes = ng.make_axes([ ng.make_axis(10), ng.make_axis(3) ]) var = ng.variable(axes=input_axes) assign_val = np.random.rand(10, 3) var_assign = ng.AssignOp(tensor=var, val=assign_val) var_seq = ng.sequential([var_assign, var]) var_comp = ng.computation(var_seq, "all") results = dict() weight_saver = Saver() with closing(ngt.make_transformer()) as transformer: var_func = transformer.add_computation(var_comp) weight_saver.setup_save(transformer=transformer, computation=var_comp) results['saved'] = var_func().copy() weight_saver.save(filename="test_variable") reassign_val = np.random.rand(10, 3) var_reassign = ng.AssignOp(tensor=var, val=reassign_val) var_recomp = ng.computation(var_reassign, "all") var_read = ng.computation(var, "all") with closing(ngt.make_transformer()) as restore_transformer: var_recompfunc = restore_transformer.add_computation(var_recomp) weight_saver.setup_restore(transformer=restore_transformer, computation=var_recomp, filename="test_variable") var_readfunc = restore_transformer.add_computation(var_read) var_recompfunc() results['reassigned'] = var_readfunc().copy() weight_saver.restore() results['restored'] = var_readfunc().copy() os.remove("test_variable.npz") assert np.allclose(results['saved'], assign_val, atol=0) assert np.allclose(results['reassigned'], reassign_val, atol=0) assert np.allclose(results['saved'], results['restored'], atol=0)
def test_deconv(): """ basic test of deconv fprop. ngraph/tests/test_conv.py tests ng.deconvolution bprop """ # filter params R, S = 5, 5 fshape = (R, S, 1) strides = 2 filter_val_nz = np.arange(1, R * S + 1).reshape(R, S) filter_val = np.zeros(fshape) filter_val[:, :, 0] = filter_val_nz deconv = Deconvolution(fshape, filter_init=ConstantInit(filter_val), strides=strides, padding=0, dilation=1) N = ng.make_axis(name='N', length=1) # batch image_shape = (1, 8, 8) # CHW image_axes = ng.make_axes( [ng.make_axis(name=nm, length=l) for nm, l in zip('CHW', image_shape)]) image_axes |= N image = ng.placeholder(axes=image_axes) output = deconv(image) with closing(ngt.make_transformer()) as transformer: comp = transformer.add_computation(ng.computation(output, image)) input_val = np.zeros(image_shape + (N.length, ), dtype=float) input_val[0, 0, 0] = 1 input_val[0, 5, 5] = 1 input_val[0, 7, 7] = 1 result = comp(input_val) feature_map = np.squeeze(result) assert (feature_map[:5, :5] == filter_val_nz).all() result2 = filter_val_nz.copy() result2[-1, -1] = 26 assert (feature_map[10:15, 10:15] == result2).all() result3 = filter_val_nz.copy() result3[0, 0] = 26 assert (feature_map[-5:, -5:] == result3).all()
def run_cifar_benchmark(n_iter=10, n_skip=5, batch_size=4, transformer_type='cpu'): inputs, data, train_set = get_fake_cifar(batch_size, n_iter) model = get_mini_resnet(inputs) optimizer = GradientDescentMomentum(0.01, 0.9) train_loss = ng.cross_entropy_multi(model(inputs['image']), ng.one_hot(inputs['label'], axis=ax.Y)) batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) batch_cost_computation_op = ng.computation(batch_cost, "all") feed_dict = fill_feed_dict(train_set, inputs) benchmarks = dict() benchmarks['cifar_msra_fprop'] = run_benchmark(batch_cost_computation_op, transformer_type, feed_dict, n_skip, n_iter) print_benchmark_results(benchmarks)
'schedule': [32000, 48000], 'gamma': 0.1, 'base_lr': 0.1 } optimizer = GradientDescentMomentum(learning_rate=learning_rate_policy, momentum_coef=0.9, wdecay=0.0001, iteration=inputs['iteration']) label_indices = inputs['label'] train_loss = ng.cross_entropy_multi(resnet(inputs['image']), ng.one_hot(label_indices, axis=ax.Y)) batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_computation = ng.computation(batch_cost, "all") with Layer.inference_mode_on(): inference_prob = resnet(inputs['image']) errors = ng.not_equal(ng.argmax(inference_prob, out_axes=[ax.N]), label_indices) eval_loss = ng.cross_entropy_multi( inference_prob, ng.one_hot(label_indices, axis=ax.Y)) eval_loss_names = ['cross_ent_loss', 'misclass'] eval_computation = ng.computation([eval_loss, errors], "all") # Now bind the computations we are interested in transformer = ngt.make_transformer() train_function = transformer.add_computation(train_computation) eval_function = transformer.add_computation(eval_computation)
def build_generator_computation(): with Layer.inference_mode_on(): generated = recurrent_model.encode_and_generate(encoder, decoder, inputs['X'], in_axes) return ng.computation([generated], "all")
def build_regressor_prediction(): with Layer.inference_mode_on(): eval_preds = predictions(encoder, affine_layer, inputs['X']) return ng.computation([eval_preds], "all")
'name': 'schedule', 'base_lr': 0.01, 'gamma': (1 / 250.)**(1 / 3.), 'schedule': [22, 44, 65] } optimizer = GradientDescentMomentum(lr_schedule, 0.0, wdecay=0.0005, iteration=inputs['iteration']) train_prob = seq1(inputs['image']) train_loss = ng.cross_entropy_multi(train_prob, ng.one_hot(inputs['label'], axis=ax.Y)) batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_computation = ng.computation(batch_cost, "all") with closing(ngt.make_transformer()) as transformer: train_function = transformer.add_computation(train_computation) if args.no_progress_bar: ncols = 0 else: ncols = 100 tpbar = tqdm(unit="batches", ncols=ncols, total=args.num_iterations) interval_cost = 0.0 for step, data in enumerate(train_set): data['iteration'] = step feed_dict = {inputs[k]: data[k] for k in inputs.keys()}
train_loss_main = ng.cross_entropy_multi(train_prob_main, y_onehot, enable_softmax_opt=False) train_prob_aux = inception.seq_aux(inception.seq1(inputs['image'])) train_prob_aux = ng.map_roles(train_prob_aux, {"C": ax.Y.name}) train_loss_aux = ng.cross_entropy_multi(train_prob_aux, y_onehot, enable_softmax_opt=False) batch_cost = ng.sequential([ optimizer(train_loss_main + 0.4 * train_loss_aux), ng.mean(train_loss_main, out_axes=()) ]) train_computation = ng.computation([batch_cost], 'all') # Build the computations for inference (evaluation) with Layer.inference_mode_on(): inference_prob = inception.seq2(inception.seq1(inputs['image'])) slices = [ 0 if cx.name in ("H", "W") else slice(None) for cx in inference_prob.axes ] inference_prob = ng.tensor_slice(inference_prob, slices) inference_prob = ng.map_roles(inference_prob, {"C": "Y"}) errors = ng.not_equal(ng.argmax(inference_prob, out_axes=[ax.N]), inputs['label']) eval_loss = ng.cross_entropy_multi(inference_prob, y_onehot, enable_softmax_opt=False)
optimizer = GradientDescentMomentum(learning_rate=learning_rate_policy, momentum_coef=momentum_coef, wdecay=wdecay, nesterov=False, iteration=input_ops_train['iteration']) # Make a prediction prediction = resnet(input_ops_train['image']) # Calculate loss train_loss = ng.cross_entropy_multi( prediction, ng.one_hot(input_ops_train['label'], axis=ax.Y)) # Average loss over the batch batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_computation = ng.computation(batch_cost, "all") # Instantiate the Saver object to save weights weight_saver = Saver() with ng.metadata(device=device_hetr, device_id=device_id, parallel=ax.N): # Inference with Layer.inference_mode_on(): # Doing inference inference_prob = resnet(input_ops_valid['image']) eval_loss = ng.cross_entropy_multi( inference_prob, ng.one_hot(input_ops_valid['label'], axis=ax.Y)) # Computation for inference eval_computation = ng.computation( [inference_prob, eval_loss, input_ops_valid['label']], "all")
optimizer = GradientDescentMomentum( args.lr, momentum_coef=args.momentum, gradient_clip_norm=args.gradient_clip_norm, nesterov=args.nesterov) start = time.time() updates = optimizer(loss) stop = time.time() logger.debug("Optimizer graph creation took {} seconds".format(stop - start)) mean_cost = ng.sequential([updates, ng.mean(loss, out_axes=())]) # Create computation and initialize the transformer to allocate weights train_computation = ng.computation([mean_cost, output], "all") if inference is True: with Layer.inference_mode_on(): eval_output = ds2(inputs["audio"], spatial_axes={ "H": "frequency", "W": "time" }) eval_computation = ng.computation(eval_output, "all") # Now bind the computations we are interested in with closing(ngt.make_transformer()) as transformer: train_function = transformer.add_computation(train_computation) if inference is True: eval_function = transformer.add_computation(eval_computation)
# inference graph with Layer.inference_mode_on(): enc_out_inference = enc(one_hot_enc_out) # Create decoder placeholders axes = one_hot_dec_out.axes axes = axes - axes.recurrent_axis() + ng.make_axis(length=1, name="REC") decoder_input_inference = ng.placeholder(axes, name="input") decoder_state_inference = ng.placeholder(enc_out_inference.axes, name="state") dec_out_inference = dec(decoder_input_inference, init_state=decoder_state_inference) inference_out = linear(dec_out_inference) encoder_computation = ng.computation(enc_out_inference, inputs["inp_txt"]) decoder_computation = ng.computation([inference_out, dec_out_inference], decoder_input_inference, decoder_state_inference) ###################### # Train Loop # Now bind the computations we are interested in with closing(ngt.make_transformer()) as transformer: # training computations train_computation = make_bound_computation(transformer, train_outputs, inputs) loss_computation = make_bound_computation(transformer, loss_outputs, inputs)
optimizer = Adam(learning_rate=args.lr, gradient_clip_value=args.grad_clip_value) else: optimizer = GradientDescentMomentum( learning_rate=args.lr, gradient_clip_value=args.grad_clip_value) # Define the loss function (categorical cross entropy, since each musical key on the piano is encoded as a binary value) fwd_prop = model(inputs['X']) fwd_prop = ng.axes_with_order(fwd_prop, out_axes) train_loss = ng.cross_entropy_binary(fwd_prop, inputs['y']) with Layer.inference_mode_on(): preds = model(inputs['X']) preds = ng.axes_with_order(preds, out_axes) eval_loss = ng.mean(ng.cross_entropy_binary(preds, inputs['y']), out_axes=()) eval_computation = ng.computation([eval_loss], "all") predict_computation = ng.computation([preds], "all") # Cost calculation batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_computation = ng.computation(batch_cost, "all") trainer = TimeseriesTrainer(optimizer, train_computation, eval_computation, predict_computation, inputs, model_graph=[model], tensorboard_dir="./tfboard")
'base_lr': 0.01, 'gamma': (1 / 250.)**(1 / 3.), 'schedule': [22, 44, 65] } optimizer = GradientDescentMomentum(lr_schedule, 0.0, wdecay=0.0005, iteration=inputs['iteration']) train_prob = seq1(inputs['image']) train_loss = ng.cross_entropy_multi(train_prob, ng.one_hot(inputs['label'], axis=ax.Y)) batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_computation = ng.computation(batch_cost, 'all') with closing(ngt.make_transformer()) as transformer: train_function = transformer.add_computation(train_computation) if args.no_progress_bar: ncols = 0 else: ncols = 100 tpbar = tqdm(unit="batches", ncols=ncols, total=args.num_iterations) interval_cost = 0.0 for step, data in enumerate(train_set): data['iteration'] = step feed_dict = {inputs[k]: data[k] for k in inputs.keys()}
def __init__(self, state_axes, action_size, batch_size, model, learning_rate=0.0001): """ for now, model must be a function which takes action_axes, and returns a neon container """ super(ModelWrapper, self).__init__() self.axes = Namespace() self.axes.state = make_axes(state_axes, name='state') self.axes.action = ng.make_axis(name='action', length=action_size) self.axes.n = ng.make_axis(name='N', length=batch_size) self.axes.n1 = ng.make_axis(name='N', length=1) # placeholders self.state = ng.placeholder(self.axes.state + [self.axes.n]) self.state_single = ng.placeholder(self.axes.state + [self.axes.n1]) self.target = ng.placeholder([self.axes.action, self.axes.n]) # these q functions have the same structure but different variables self.q_function = model(self.axes.action) self.q_function_target = model(self.axes.action) # construct inference computation with neon.Layer.inference_mode_on(): inference = self.q_function(self.state) inference_computation = ng.computation(inference, self.state) # construct inference target computation with neon.Layer.inference_mode_on(): inference_target = self.q_function_target(self.state) inference_target_computation = ng.computation(inference_target, self.state) # construct inference computation for evaluating a single observation with neon.Layer.inference_mode_on(): inference_single = self.q_function(self.state_single) inference_computation_single = ng.computation(inference_single, self.state_single) # update q function target weights with values from q function # assumes that the variables in each are in the same order update_computation = ng.computation( ng.doall([ ng.assign(target_variable, ng.cast_axes(variable, target_variable.axes)) for target_variable, variable in zip( self.q_function_target.variables.values(), self.q_function.variables.values()) ])) # construct training computation loss = ng.squared_L2(self.q_function(self.state) - self.target) optimizer = neon.RMSProp( learning_rate=learning_rate, gradient_clip_value=1, ) train_output = ng.sequential([ optimizer(loss), loss, ]) train_computation = ng.computation(train_output, self.state, self.target) # now bind computations we are interested in self.transformer = ng.transformers.make_transformer() self.inference_function = self.transformer.add_computation( inference_computation) self.inference_target_function = self.transformer.add_computation( inference_target_computation) self.inference_function_single = self.transformer.add_computation( inference_computation_single) self.train_function = self.transformer.add_computation( train_computation) self.update_function = self.transformer.add_computation( update_computation) # run a single update to ensure that both q functions have the same # initial weights self.update()
# Cost calculation batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_outputs = dict(batch_cost=batch_cost) # Forward prop of test set # Required for correct functioning of batch norm and dropout layers during inference mode with Layer.inference_mode_on(): inference_prob = seq1(inputs['X']) eval_loss = ng.squared_L2(inference_prob - inputs['y']) eval_outputs = dict(l2_loss=eval_loss) # Define computations print('Start training') eval_computation = ng.computation(inference_prob, "all") with closing(ngt.make_transformer()) as transformer: # transformer = ngt.make_transformer() train_computation = make_bound_computation(transformer, train_outputs, inputs) loss_computation = make_bound_computation(transformer, eval_outputs, inputs) eval_function = transformer.add_computation(eval_computation) # Printout interval of the validation set loss during training iter_interval = num_iterations // 10 cbs = make_default_callbacks(transformer=transformer, output_file=args.output_file, frequency=iter_interval, train_computation=train_computation,
'base_lr': 0.01 } optimizer = GradientDescentMomentum(learning_rate=learning_rate_policy, iteration=inputs['iteration']) # Define the loss function (Cross entropy loss) # Note that we convert the integer values of input['y'] to one hot here fwd_prop = seq1(inputs['X']) train_loss = ng.cross_entropy_multi(fwd_prop, ng.one_hot(inputs['y'], axis=out_axis), usebits=True) # Train cost computation batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_computation = ng.computation([batch_cost, fwd_prop], "all") train_outputs = dict(batch_cost=batch_cost) # Forward prop of evaluation set # Required for correct functioning of batch norm and dropout layers during inference mode with Layer.inference_mode_on(): inference_prop = seq1(inputs['X']) eval_loss = ng.cross_entropy_multi(inference_prop, ng.one_hot(inputs['y'], axis=out_axis), usebits=True) eval_computation = ng.computation( [ng.mean(eval_loss, out_axes=()), inference_prop], "all") eval_outputs = dict(x_ent_loss=eval_loss) # Computation for text generation - this is pure inference (fwd prop) gen_computation = ng.computation(inference_prop, "all")