def test_logreg(transformer_factory): # xs: (C, N), y: (N,) xs = np.array([[0.52, 0.88, 0.52, 0.74], [1.12, -1.08, 0.06, -2.49], [0.77, 0.15, -1.3, 1.39]]) ys = np.array([1, 1, 0, 1]) max_iter = 10 alpha = 0.1 thetas = np.array([0., 0., 0.]) np_logreg = NumpyLogreg(xs, ys, thetas) C, N = ng.make_axis(length=3), ng.make_axis(length=4) # input tensors xs_v = ng.placeholder((C, N)) ys_v = ng.placeholder([N]) alpha_v = ng.placeholder(()) thetas_var = ng.variable([C], initial_value=thetas) # define ops ys_pred = ng.sigmoid(ng.dot(thetas_var, xs_v)) log_likelihoods = ng.log(ys_pred) * ys_v + ng.log(1 - ys_pred) * (1 - ys_v) loss = -ng.sum(log_likelihoods, reduction_axes=[N]) grad_comp = ng.deriv(loss, thetas_var) weight_update = ng.sequential( [ng.assign(thetas_var, thetas_var - alpha_v * grad_comp), thetas_var]) # transformer with ExecutorFactory() as ex: train_eval_func = ex.executor([grad_comp, loss, weight_update], xs_v, ys_v, alpha_v) # evaluate for i in range(max_iter): grad_np, loss_np, thetas_np = np_logreg.optimize(alpha) grad_ng, loss_ng, thetas_ng = train_eval_func(xs, ys, alpha) ng.testing.assert_allclose(loss_np, loss_ng) ng.testing.assert_allclose(grad_np, grad_ng) ng.testing.assert_allclose(thetas_np, thetas_ng)
def __call__(self, cost_func, variable_scope=None): self._pre_call_hook() all_updates = [] batch_cost = ng.sum(cost_func, out_axes=()) batch_size = cost_func.axes.batch_axis().length selected_variables = batch_cost.variables() if variable_scope is not None: selected_variables = [ op for op in selected_variables if op.scope == variable_scope ] grads = [ ng.deriv(batch_cost, v) / batch_size for v in selected_variables ] scale_factor = clip_gradient_norm(grads, self.gradient_clip_norm) for variable, grad in zip(selected_variables, grads): updates = self.variable_update(variable, grad, scale_factor) all_updates.append(updates) updates = ng.doall(all_updates) grads = ng.doall(grads) return ng.sequential([grads, updates, 0])
def run_cifar_benchmark(n_iter=10, n_skip=5, batch_size=4, transformer_type='cpu'): inputs, data, train_set = get_fake_cifar(batch_size, n_iter) model = get_mini_resnet(inputs) optimizer = GradientDescentMomentum(0.01, 0.9) train_loss = ng.cross_entropy_multi(model(inputs['image']), ng.one_hot(inputs['label'], axis=ax.Y)) batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) batch_cost_computation_op = ng.computation(batch_cost, "all") feed_dict = fill_feed_dict(train_set, inputs) benchmarks = dict() benchmarks['cifar_msra_fprop'] = run_benchmark(batch_cost_computation_op, transformer_type, feed_dict, n_skip, n_iter) print_benchmark_results(benchmarks)
def run_resnet_benchmark(dataset, num_iterations, n_skip, batch_size, device_id, transformer_type, device, bprop=True, visualize=False): inputs, data, train_set = get_fake_data(dataset, batch_size, num_iterations) # Running forward propagation model_out = get_mini_resnet(inputs, dataset, device_id) # Running back propagation if bprop: with ng.metadata(device_id=device_id, parallel=ax.N): optimizer = GradientDescentMomentum(0.01, 0.9) train_loss = ng.cross_entropy_multi( model_out, ng.one_hot(inputs['label'], axis=ax.Y)) batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) batch_cost_computation_op = ng.computation(batch_cost, "all") benchmark = Benchmark(batch_cost_computation_op, train_set, inputs, transformer_type, device) Benchmark.print_benchmark_results( benchmark.time(num_iterations, n_skip, dataset + '_msra_bprop', visualize, 'device_id')) else: fprop_computation_op = ng.computation(model_out, 'all') benchmark = Benchmark(fprop_computation_op, train_set, inputs, transformer_type, device) Benchmark.print_benchmark_results( benchmark.time(num_iterations, n_skip, dataset + '_msra_fprop', visualize))
def test_weight_clipping(w_clip, optimizer): opt_ng = optimizer(0.1, weight_clip_value=w_clip) if isinstance(opt_ng, Adam): pytest.config.argon_skip_now("Argon Transformer error") # TODO triage # Set up data placeholders C = ng.make_axis(20) N = ng.make_axis(32, name='N') data = ng.placeholder([C, N]) target = ng.placeholder([N]) # params to be updated using optimizer to be tested # make sure initial values are higher than clip values np_W = 10 * w_clip * (2 * np.random.rand(C.length) - 1) W = ng.variable([C], initial_value=np_W) # double check generated initial W value assert np.max(np_W) > w_clip assert np.min(np_W) < -w_clip # Set up op graph cost = ng.sum(target - ng.dot(W, data), out_axis=()) updated_weights = ng.sequential([opt_ng(cost), W]) epsilon = w_clip * 1e-3 # Set up the computation and run the "train" loop with ExecutorFactory() as ex: opt_ng_comp = ex.transformer.computation(updated_weights, data, target) mock_dataset = data_generator(20, C.length, N.length) for x, y in mock_dataset: ng_W = opt_ng_comp(x, y) # updated weights for ngraph optimizer assert np.max(ng_W) < w_clip + epsilon assert np.min(ng_W) > -w_clip - epsilon
def test_setting(M): with ExecutorFactory() as ex: axes = ng.make_axes([M]) np_x = np.array([1, 2, 3], dtype=np.float32) np_y = np.array([1, 3, 5], dtype=np.float32) y = ng.constant(np_y, axes) v = ng.variable(axes, initial_value=np_x) f_v = ex.executor(v) vset = ng.sequential([ng.assign(v, v + y), v]) f_v1 = ex.executor(vset) f_v2 = ex.executor(v) e_v = f_v().copy() assert ng.testing.allclose(e_v, np_x) e_v1 = f_v1().copy() assert ng.testing.allclose(e_v1, np_x + np_y) e_v2 = f_v2().copy() assert ng.testing.allclose(e_v2, np_x + np_y)
def test_variable(): input_axes = ng.make_axes([ ng.make_axis(10), ng.make_axis(3) ]) var = ng.variable(axes=input_axes) assign_val = np.random.rand(10, 3) var_assign = ng.AssignOp(tensor=var, val=assign_val) var_seq = ng.sequential([var_assign, var]) var_comp = ng.computation(var_seq, "all") results = dict() weight_saver = Saver() with closing(ngt.make_transformer()) as transformer: var_func = transformer.add_computation(var_comp) weight_saver.setup_save(transformer=transformer, computation=var_comp) results['saved'] = var_func().copy() weight_saver.save(filename="test_variable") reassign_val = np.random.rand(10, 3) var_reassign = ng.AssignOp(tensor=var, val=reassign_val) var_recomp = ng.computation(var_reassign, "all") var_read = ng.computation(var, "all") with closing(ngt.make_transformer()) as restore_transformer: var_recompfunc = restore_transformer.add_computation(var_recomp) weight_saver.setup_restore(transformer=restore_transformer, computation=var_recomp, filename="test_variable") var_readfunc = restore_transformer.add_computation(var_read) var_recompfunc() results['reassigned'] = var_readfunc().copy() weight_saver.restore() results['restored'] = var_readfunc().copy() os.remove("test_variable.npz") assert np.allclose(results['saved'], assign_val, atol=0) assert np.allclose(results['reassigned'], reassign_val, atol=0) assert np.allclose(results['saved'], results['restored'], atol=0)
inputs = make_placeholders(args.batch_size, cs_loader) model = WideDeepClassifier(cs_loader.parameters['dimensions_embeddings'], cs_loader.parameters['tokens_in_embeddings'], fc_layers_deep, deep_activation_fn=Rectlin()) wide_deep = model(args.batch_size, inputs) loss = ng.cross_entropy_binary(wide_deep, inputs['Y']) optimizer = Adagrad(args.learning_rate) # recall that optimizer does not generate output batch_cost = ng.sequential([optimizer(loss), ng.sum(loss, out_axes=())]) def compute_accuracy(data): accuracy = 0.0 total = 0.0 for value in data.values(): x_d = value[0] x_w = value[1] x_e = value[2] y = value[3] wide_features = x_w deep_features = x_d
def __call__(self, in_obj, init_state=None): """ Sets shape based parameters of this layer given an input tuple or int or input layer. Arguments: in_obj (int, tuple, Layer or Tensor): object that provides shape information for layer init_state (tuple of Tensor): object that provides initial state, and in LSTM, it includes hidden state, and cell states Returns: rnn_out (Tensor): output """ # try to understand the axes from the input if init_state is not None: assert len(init_state) == 2 and init_state[0].axes == init_state[1].axes self.interpret_axes(in_obj, init_state[0]) else: self.interpret_axes(in_obj, init_state) # initialize the hidden states if init_state is not None: self.h_init = init_state[0] self.c_init = init_state[1] else: if self.reset_cells: self.h_init = ng.temporary(initial_value=0, axes=self.out_axes).named('h_init') self.c_init = ng.temporary(initial_value=0, axes=self.out_axes).named('c_init') else: self.h_init = ng.variable(initial_value=0, axes=self.out_axes).named('h_init') self.c_init = ng.variable(initial_value=0, axes=self.out_axes).named('c_init') # params are dictionary for i, f, o, g self.W_input = {k: ng.variable(axes=self.w_in_axes, initial_value=self.init, scope=self.scope). named("W_in_{}".format(k)) for k in self.metadata['gates']} self.W_recur = {k: ng.variable(axes=self.w_re_axes, initial_value=self.init_inner, scope=self.scope). named("W_re_{}".format(k)) for k in self.metadata['gates']} self.b = {k: ng.variable(axes=self.out_feature_axes, initial_value=0, scope=self.scope). named("bias_{}".format(k)) for k in self.metadata['gates']} h = self.h_init c = self.c_init h_list = [] c_list = [] # Compute feed forward weighted inputs # Batch norm is computed only on the weighted inputs # as in https://arxiv.org/abs/1510.01378 h_ff = dict() for k in self.metadata["gates"]: h_ff[k] = ng.dot(self.W_input[k], in_obj) if self.batch_norm is not None: h_ff[k] = self.batch_norm[k](h_ff[k]) # slice the weighted inputs into time slices h_ff = get_steps(h_ff, self.recurrent_axis, self.backward) # recurrent computation for i in range(self.recurrent_axis.length): with ng.metadata(recurrent_step=str(i)): [h, c] = self._step(h_ff[i], [h, c]) h_list.append(h) c_list.append(c) if self.return_sequence is True: if self.backward: h_list = h_list[::-1] c_list = c_list[::-1] lstm_out = ng.stack(h_list, self.recurrent_axis, pos=self.recurrent_axis_idx) else: lstm_out = h_list[-1] if self.reset_cells is True: return lstm_out else: return ng.sequential([ ng.doall([ ng.assign(self.h_init, h_list[-1]), ng.assign(self.c_init, c_list[-1]) ]), lstm_out ])
def unroll_with_attention(cell, num_steps, H_pr, H_hy, init_states=None, reset_cells=True, return_sequence=True, reverse_mode=False, input_data=None): """ Unroll the cell with attention for num_steps steps. Arguments: ---------- cell : provide the cell that has to be unrolled (Eg: MatchLSTMCell_withAttention) num_steps: the number of steps needed to unroll H_pr : the encoding for the question H_hy : the encoding for the passage init_states: Either None or a dictionary containing states reset_cell: argument which determine if cell has to be reset or not reverse_mode: Set to True if unrolling in the opposite direction is desired input_data: the ArrayIterator object for training data (contains information of length of each sentence) """ recurrent_axis = H_hy.axes.recurrent_axis() if init_states is not None: states = { k: ng.cast_role(v, out_axes) for (k, v) in init_states.items() } else: states = init_states stepped_inputs = get_steps(H_hy, recurrent_axis, backward=reverse_mode) stepped_outputs = [] for t in range(num_steps): with ng.metadata(step=str(t)): if t == 0: output, states = cell(H_pr, stepped_inputs[t], states, output=None, input_data=input_data) else: output, states = cell(H_pr, stepped_inputs[t], states, output=output, input_data=input_data) stepped_outputs.append(output) if reverse_mode: if return_sequence: stepped_outputs.reverse() if return_sequence: outputs = ng.stack(stepped_outputs, recurrent_axis, pos=1) else: outputs = stepped_outputs[-1] if not reset_cells: update_inits = ng.doall([ ng.assign(initial, states[name]) for (name, initial) in states.items() ]) outputs = ng.sequential([update_inits, outputs]) return outputs
# Build the main and auxiliary loss functions y_onehot = ng.one_hot(inputs['label'], axis=ax.Y) train_prob_main = inception.seq2(inception.seq1(inputs['image'])) train_prob_main = ng.map_roles(train_prob_main, {"C": ax.Y.name}) train_loss_main = ng.cross_entropy_multi(train_prob_main, y_onehot, enable_softmax_opt=False) train_prob_aux = inception.seq_aux(inception.seq1(inputs['image'])) train_prob_aux = ng.map_roles(train_prob_aux, {"C": ax.Y.name}) train_loss_aux = ng.cross_entropy_multi(train_prob_aux, y_onehot, enable_softmax_opt=False) batch_cost = ng.sequential([ optimizer(train_loss_main + 0.4 * train_loss_aux), ng.mean(train_loss_main, out_axes=()) ]) train_computation = ng.computation([batch_cost], 'all') # Build the computations for inference (evaluation) with Layer.inference_mode_on(): inference_prob = inception.seq2(inception.seq1(inputs['image'])) slices = [ 0 if cx.name in ("H", "W") else slice(None) for cx in inference_prob.axes ] inference_prob = ng.tensor_slice(inference_prob, slices) inference_prob = ng.map_roles(inference_prob, {"C": "Y"}) errors = ng.not_equal(ng.argmax(inference_prob, out_axes=[ax.N]), inputs['label'])
def train_network(model, train_set, valid_set, batch_size, epochs, log_file): ''' Trains the predefined network. Trains the model and saves the progress in the log file that is defined in the arguments model(object): Defines the model in Neon train_set(object): Defines the training set valid_set(object): Defines the validation set args(object): Training arguments batch_size(int): Minibatch size epochs(int): Number of training epoch log_file(string): File name to store trainig logs for plotting ''' # Form placeholders for inputs to the network # Iterations needed for learning rate schedule inputs = train_set.make_placeholders(include_iteration=True) # Convert labels into one-hot vectors one_hot_label = ng.one_hot(inputs['label'], axis=ax.Y) learning_rate_policy = { 'name': 'schedule', 'schedule': list(np.arange(2, epochs, 2)), 'gamma': 0.6, 'base_lr': 0.001 } optimizer = GradientDescentMomentum(learning_rate=learning_rate_policy, momentum_coef=0.9, wdecay=0.005, iteration=inputs['iteration']) # Define graph for training train_prob = model(inputs['video']) train_loss = ng.cross_entropy_multi(train_prob, one_hot_label) batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) with closing(ngt.make_transformer()) as transformer: # Define graph for calculating validation set error and misclassification rate # Use inference mode for validation to avoid dropout in forward pass with Layer.inference_mode_on(): inference_prob = model(inputs['video']) errors = ng.not_equal(ng.argmax(inference_prob), inputs['label']) eval_loss = ng.cross_entropy_multi(inference_prob, one_hot_label) eval_outputs = {'cross_ent_loss': eval_loss, 'misclass': errors} eval_computation = make_bound_computation(transformer, eval_outputs, inputs) train_outputs = {'batch_cost': batch_cost} train_computation = make_bound_computation(transformer, train_outputs, inputs) interval_cost = 0.0 # Train in epochs logs = {'train': [], 'validation': [], 'misclass': []} for epoch in trange(epochs, desc='Epochs'): # Setup the training bar numBatches = train_set.ndata // batch_size tpbar = tqdm(unit='batches', ncols=100, total=numBatches, leave=False) train_set.reset() valid_set.reset() train_log = [] for step, data in enumerate(train_set): data = dict(data) data['iteration'] = epoch # learning schedule based on epochs output = train_computation(data) train_log.append(float(output['batch_cost'])) tpbar.update(1) tpbar.set_description("Training {:0.4f}".format( float(output['batch_cost']))) interval_cost += float(output['batch_cost']) tqdm.write("Epoch {epch} complete. " "Avg Train Cost {cost:0.4f}".format(epch=epoch, cost=interval_cost / step)) interval_cost = 0.0 tpbar.close() validation_loss = run_validation(valid_set, eval_computation) tqdm.write("Avg losses: {}".format(validation_loss)) logs['train'].append(train_log) logs['validation'].append(validation_loss['cross_ent_loss']) logs['misclass'].append(validation_loss['misclass']) # Save log data and plot at the end of each epoch with open(log_file, 'wb') as f: pickle.dump(logs, f) plot_logs(logs=logs)
learning_rate_policy = { 'name': 'schedule', 'schedule': [32000, 48000], 'gamma': 0.1, 'base_lr': 0.1 } optimizer = GradientDescentMomentum(learning_rate=learning_rate_policy, momentum_coef=0.9, wdecay=0.0001, iteration=inputs['iteration']) label_indices = inputs['label'] train_loss = ng.cross_entropy_multi(resnet(inputs['image']), ng.one_hot(label_indices, axis=ax.Y)) batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_computation = ng.computation(batch_cost, "all") with Layer.inference_mode_on(): inference_prob = resnet(inputs['image']) errors = ng.not_equal(ng.argmax(inference_prob, out_axes=[ax.N]), label_indices) eval_loss = ng.cross_entropy_multi( inference_prob, ng.one_hot(label_indices, axis=ax.Y)) eval_loss_names = ['cross_ent_loss', 'misclass'] eval_computation = ng.computation([eval_loss, errors], "all") # Now bind the computations we are interested in transformer = ngt.make_transformer() train_function = transformer.add_computation(train_computation)
wikimovies.story_length, wikimovies.memory_size, wikimovies.vocab_size, vocab_axis, args.use_v_luts) # Compute answer predictions a_pred, _ = memn2n(inputs) loss = ng.cross_entropy_multi(a_pred, ng.one_hot(inputs['answer'], axis=vocab_axis), usebits=True) mean_cost = ng.sum(loss, out_axes=[]) optimizer = Adam(learning_rate=args.lr) updates = optimizer(loss) batch_cost = ng.sequential([updates, mean_cost]) # provide outputs for bound computation train_outputs = dict(batch_cost=batch_cost, train_preds=a_pred) with Layer.inference_mode_on(): a_pred_inference, _ = memn2n(inputs) eval_loss = ng.cross_entropy_multi(a_pred_inference, ng.one_hot(inputs['answer'], axis=vocab_axis), usebits=True) eval_outputs = dict(test_cross_ent_loss=eval_loss, test_preds=a_pred_inference) if args.interactive: interactive_outputs = dict(test_preds=a_pred_inference)
eps=args.eps, init=GaussianInit( mean=0.0, std=0.1)) # Compute answer predictions a_pred, attention = memn2n(inputs) # specify loss function, calculate loss and update weights loss = ng.cross_entropy_multi(a_pred, inputs['answer'], usebits=True) mean_cost = ng.sum(loss, out_axes=[]) optimizer = Adam(learning_rate=args.lr) updates = optimizer(loss) batch_cost = ng.sequential([updates, mean_cost]) # provide outputs for bound computation train_outputs = dict(batch_cost=batch_cost, train_preds=a_pred) with Layer.inference_mode_on(): a_pred_inference, attention_inference = memn2n(inputs) eval_loss = ng.cross_entropy_multi( a_pred_inference, inputs['answer'], usebits=True) interactive_outputs = dict( test_preds=a_pred_inference, attention=attention_inference) eval_outputs = dict(test_cross_ent_loss=eval_loss, test_preds=a_pred_inference) # Train Loop
def __init__(self, state_axes, action_size, batch_size, model, learning_rate=0.0001): """ for now, model must be a function which takes action_axes, and returns a neon container """ super(ModelWrapper, self).__init__() self.axes = Namespace() self.axes.state = make_axes(state_axes, name='state') self.axes.action = ng.make_axis(name='action', length=action_size) self.axes.n = ng.make_axis(name='N', length=batch_size) self.axes.n1 = ng.make_axis(name='N', length=1) # placeholders self.state = ng.placeholder(self.axes.state + [self.axes.n]) self.state_single = ng.placeholder(self.axes.state + [self.axes.n1]) self.target = ng.placeholder([self.axes.action, self.axes.n]) # these q functions have the same structure but different variables self.q_function = model(self.axes.action) self.q_function_target = model(self.axes.action) # construct inference computation with neon.Layer.inference_mode_on(): inference = self.q_function(self.state) inference_computation = ng.computation(inference, self.state) # construct inference target computation with neon.Layer.inference_mode_on(): inference_target = self.q_function_target(self.state) inference_target_computation = ng.computation(inference_target, self.state) # construct inference computation for evaluating a single observation with neon.Layer.inference_mode_on(): inference_single = self.q_function(self.state_single) inference_computation_single = ng.computation(inference_single, self.state_single) # update q function target weights with values from q function # assumes that the variables in each are in the same order update_computation = ng.computation( ng.doall([ ng.assign(target_variable, ng.cast_axes(variable, target_variable.axes)) for target_variable, variable in zip( self.q_function_target.variables.values(), self.q_function.variables.values()) ])) # construct training computation loss = ng.squared_L2(self.q_function(self.state) - self.target) optimizer = neon.RMSProp( learning_rate=learning_rate, gradient_clip_value=1, ) train_output = ng.sequential([ optimizer(loss), loss, ]) train_computation = ng.computation(train_output, self.state, self.target) # now bind computations we are interested in self.transformer = ng.transformers.make_transformer() self.inference_function = self.transformer.add_computation( inference_computation) self.inference_target_function = self.transformer.add_computation( inference_target_computation) self.inference_function_single = self.transformer.add_computation( inference_computation_single) self.train_function = self.transformer.add_computation( train_computation) self.update_function = self.transformer.add_computation( update_computation) # run a single update to ensure that both q functions have the same # initial weights self.update()
def train_outputs(self, in_obj, init_state=None): """ Sets shape based parameters of this layer given an input tuple or int or input layer. Arguments: in_obj (int, tuple, Layer or Tensor): object that provides shape information for layer init_state (tuple of Tensor): object that provides initial state, and in LSTM, it includes hidden state, and cell states Returns: rnn_out (Tensor): output """ # try to understand the axes from the input if init_state is not None: assert len( init_state) == 2 and init_state[0].axes == init_state[1].axes self.interpret_axes(in_obj, init_state[0]) else: self.interpret_axes(in_obj, init_state) # initialize the hidden states if init_state is not None: self.h_init = init_state[0] self.c_init = init_state[1] else: if self.reset_cells: self.h_init = ng.temporary( initial_value=0, axes=self.hidden_state_axes).named('h_init') self.c_init = ng.temporary( initial_value=0, axes=self.hidden_state_axes).named('c_init') else: self.h_init = ng.variable( initial_value=0, axes=self.hidden_state_axes).named('h_init') self.c_init = ng.variable( initial_value=0, axes=self.hidden_state_axes).named('c_init') # params are dictionary for i, f, o, g self.W_input = { k: ng.variable(axes=self.w_in_axes, initial_value=self.init).named("W_in_{}".format(k)) for k in self.metadata['gates'] } self.W_recur = { k: ng.variable(axes=self.w_re_axes, initial_value=self.init_inner).named( "W_re_{}".format(k)) for k in self.metadata['gates'] } self.b = { k: ng.variable(axes=self.hidden_axes, initial_value=0).named("bias_{}".format(k)) for k in self.metadata['gates'] } h = self.h_init c = self.c_init h_list = [] c_list = [] # feedforward computation in_s = get_steps(in_obj, self.recurrent_axis, self.backward) # recurrent computation for i in range(self.recurrent_axis.length): with ng.metadata(recurrent_step=str(i)): [h, c] = self._step(in_s[i], [h, c]) h_list.append(h) c_list.append(c) if self.return_sequence is True: if self.backward: h_list = h_list[::-1] c_list = c_list[::-1] lstm_out = ng.stack(h_list, self.recurrent_axis, pos=self.recurrent_axis_idx) else: lstm_out = h_list[-1] if self.reset_cells is True: return lstm_out else: return ng.sequential([ ng.doall([ ng.assign(self.h_init, h_list[-1]), ng.assign(self.c_init, c_list[-1]) ]), lstm_out ])
def assign_ops(ops, values): assign_ops = [ng.AssignOp(op, value) for op, value in zip(ops, values)] return ng.sequential(assign_ops)
loss1 = ng.cross_entropy_multi(logits1, ng.one_hot(label1, axis=ax.Y), usebits=False) loss2 = ng.cross_entropy_multi(logits2, ng.one_hot(label2, axis=ax.Y), usebits=False) # Total Loss train_loss = loss1 + loss2 # Set optimizer (no learning rate scheduler used) optimizer = Adam(learning_rate=2e-3) print('compiling the graph') # Cost set up batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) # Predicted class is the max probability out of the 2=3 # Required Outputs- Batch Cost, Train Probability,misclass train train_outputs = dict(batch_cost=batch_cost, inps=inputs['answer'], logits=ng.stack(logits_concat, span, 1), labels=inputs['answer'], drop=dropout_val) # Inference Mode for validation dataset: with Layer.inference_mode_on(): eval_outputs = dict(logits=ng.stack(logits_concat, span, 1), labels=inputs['answer'], drop=drop_pointer) # Now bind the computations we are interested in print('generating transformer')
def _pre_call_hook(self): self.t = ng.sequential([ng.assign(self.t, self.t + 1), self.t]) self.ell = self.lrate * ng.sqrt(1 - self.beta_2**self.t) / ( 1 - self.beta_1**self.t)
def mnist_mlp(args): # write tensorflow models x = tf.placeholder(tf.float32, [args.batch_size, 784]) t = tf.placeholder(tf.float32, [args.batch_size, 10]) w = tf.Variable(tf.zeros([784, 10])) b = tf.Variable(tf.zeros([10])) y = tf.matmul(x, w) + b cost = tf.reduce_mean( -tf.reduce_sum(t * tf.log(tf.nn.softmax(y)), reduction_indices=[1])) init = tf.global_variables_initializer() # import graph_def importer = TFImporter() importer.import_graph_def(tf.get_default_graph().as_graph_def()) # get handle of ngraph ops x_ng, t_ng, cost_ng, init_op_ng = importer.get_op_handle( [x, t, cost, init]) # transformer and computations transformer = ngt.make_transformer() updates = CommonSGDOptimizer(args.lrate).minimize(cost_ng, cost_ng.variables()) train_comp = transformer.computation(ng.sequential([updates, cost_ng]), x_ng, t_ng) init_comp = transformer.computation(init_op_ng) transformer.initialize() # train if args.random_data is not None: mnist = args.random_data mnist.reset(0) else: mnist = input_data.read_data_sets(args.data_dir, one_hot=True) init_comp() ng_cost_vals = [] for idx in range(args.max_iter): batch_xs, batch_ys = mnist.train.next_batch(args.batch_size) cost_val = train_comp(batch_xs, batch_ys) ng_cost_vals.append(float(cost_val)) print("[Iter %s] Cost = %s" % (idx, cost_val)) transformer.close() # train in tensorflow as comparison with tf.Session() as sess: # train in tensorflow train_step = tf.train.GradientDescentOptimizer( args.lrate).minimize(cost) sess.run(init) if args.random_data is not None: mnist = args.random_data mnist.reset(0) else: mnist = input_data.read_data_sets(args.data_dir, one_hot=True) tf_cost_vals = [] for idx in range(args.max_iter): batch_xs, batch_ys = mnist.train.next_batch(args.batch_size) cost_val, _ = sess.run([cost, train_step], feed_dict={ x: batch_xs, t: batch_ys }) tf_cost_vals.append(float(cost_val)) print("[Iter %s] Cost = %s" % (idx, cost_val)) return ng_cost_vals, tf_cost_vals
loss = ng.ctc(output, ng.flatten(inputs["char_map"]), ng.flatten(inputs["audio_length"]), ng.flatten(inputs["char_map_length"])) optimizer = GradientDescentMomentum( args.lr, momentum_coef=args.momentum, gradient_clip_norm=args.gradient_clip_norm, nesterov=args.nesterov) start = time.time() updates = optimizer(loss) stop = time.time() logger.debug("Optimizer graph creation took {} seconds".format(stop - start)) mean_cost = ng.sequential([updates, ng.mean(loss, out_axes=())]) # Create computation and initialize the transformer to allocate weights train_computation = ng.computation([mean_cost, output], "all") if inference is True: with Layer.inference_mode_on(): eval_output = ds2(inputs["audio"], spatial_axes={ "H": "frequency", "W": "time" }) eval_computation = ng.computation(eval_output, "all") # Now bind the computations we are interested in with closing(ngt.make_transformer()) as transformer: train_function = transformer.add_computation(train_computation)
def train_mnist_mlp(transformer_name, data_dir=None, rng_seed=12, batch_size=128, train_iter=10, eval_iter=10): assert transformer_name in ['cpu', 'hetr'] assert isinstance(rng_seed, int) # Apply this metadata to graph regardless of transformer, # but it is ignored for non-HeTr case hetr_device_ids = (0, 1) # use consistent rng seed between runs np.random.seed(rng_seed) # Data train_data, valid_data = MNIST(path=data_dir).load_data() train_set = ArrayIterator(train_data, batch_size, total_iterations=train_iter) valid_set = ArrayIterator(valid_data, batch_size) inputs = train_set.make_placeholders() ax.Y.length = 10 # Model with ng.metadata(device_id=hetr_device_ids, parallel=ax.N): seq1 = Sequential([ Preprocess(functor=lambda x: x / 255.), Affine(nout=100, weight_init=GaussianInit(), activation=Rectlin()), Affine(axes=ax.Y, weight_init=GaussianInit(), activation=Logistic()) ]) train_prob = seq1(inputs['image']) train_loss = ng.cross_entropy_binary( train_prob, ng.one_hot(inputs['label'], axis=ax.Y)) optimizer = GradientDescentMomentum(0.1, 0.9) batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_outputs = dict(batch_cost=batch_cost) with Layer.inference_mode_on(): inference_prob = seq1(inputs['image']) errors = ng.not_equal(ng.argmax(inference_prob, out_axes=[ax.N]), inputs['label']) eval_loss = ng.cross_entropy_binary( inference_prob, ng.one_hot(inputs['label'], axis=ax.Y)) eval_outputs = dict(cross_ent_loss=eval_loss, misclass_pct=errors) # Runtime with closing( ngt.make_transformer_factory(transformer_name)()) as transformer: train_computation = make_bound_computation(transformer, train_outputs, inputs) loss_computation = make_bound_computation(transformer, eval_outputs, inputs) train_costs = list() for step in range(train_iter): out = train_computation(next(train_set)) train_costs.append(float(out['batch_cost'])) ce_loss = list() for step in range(eval_iter): out = loss_computation(next(valid_set)) ce_loss.append(np.mean(out['cross_ent_loss'])) return train_costs, ce_loss