def build_regressor_computations(): train_preds = predictions(encoder, affine_layer, inputs['X']) train_loss = ng.squared_L2(train_preds - inputs['y']) # Cost calculation batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_computation = ng.computation(batch_cost, "all") with Layer.inference_mode_on(): eval_preds = predictions(encoder, affine_layer, inputs['X']) eval_loss = ng.mean(ng.squared_L2(eval_preds - inputs['y']), out_axes=()) loss_computation = ng.computation([eval_loss], "all") return train_computation, loss_computation
def build_seq2seq_computations(): # Training loss, optimizer train_decoded = recurrent_model.encode_and_decode(encoder, decoder, inputs['X'], previous) train_loss = ng.squared_L2(target - train_decoded) batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_computation = ng.computation(batch_cost, "all") # Evaluation loss with Layer.inference_mode_on(): eval_decoded = recurrent_model.encode_and_generate(encoder, decoder, inputs['X'], in_axes) eval_loss = ng.mean(ng.squared_L2(target - eval_decoded), out_axes=()) loss_computation = ng.computation([eval_loss], "all") return train_computation, loss_computation
def __call__(self, in_obj): in_axes = in_obj.axes if in_axes.channel_axis() is None: red_axes = ng.make_axes(in_axes.recurrent_axis()) + in_axes.batch_axes() else: red_axes = in_axes - in_axes.channel_axis() out_axes = in_axes - red_axes in_obj = ng.flatten(in_obj, out_axes | red_axes.flatten(force=True)) if self.gamma is None: self.gvar = self.gvar or ng.persistent_tensor(axes=out_axes, initial_value=1.0) self.gmean = self.gmean or ng.persistent_tensor(axes=out_axes, initial_value=0.0) self.gamma = ng.variable(axes=out_axes, initial_value=self.init_gamma, scope=self.scope).named('gamma') self.beta = ng.variable(axes=out_axes, initial_value=self.init_beta, scope=self.scope).named('beta') xmean = ng.mean(in_obj, out_axes=out_axes) xvar = ng.variance(in_obj, out_axes=out_axes) if Layer.inference_mode: return ng.unflatten(self.gamma * ((in_obj - self.gmean) * ng.reciprocal(ng.sqrt(self.gvar + self.eps))) + self.beta) else: return ng.sequential([ ng.assign(self.gmean, self.gmean * self.rho + xmean * (1.0 - self.rho)), ng.assign(self.gvar, self.gvar * self.rho + xvar * (1.0 - self.rho)), ng.unflatten(self.gamma * ((in_obj - xmean) * ng.reciprocal(ng.sqrt(xvar + self.eps))) + self.beta) ])
def train_outputs(self, in_obj): in_axes = in_obj.axes.sample_axes() red_axes = ng.make_axes() if len(in_axes.role_axes(ar.features_input)) != 0: red_axes += in_axes.sample_axes() - in_axes.role_axes( ar.features_input) red_axes += in_obj.axes.batch_axes() out_axes = in_axes - red_axes self.gamma = self.gamma or ng.variable( axes=out_axes, initial_value=1.0).named('gamma') self.beta = self.beta or ng.variable(axes=out_axes, initial_value=0.0).named('beta') self.gvar = self.gvar or ng.persistent_tensor(axes=out_axes, initial_value=1.0) self.gmean = self.gmean or ng.persistent_tensor(axes=out_axes, initial_value=0.0) xmean = ng.mean(in_obj, reduction_axes=red_axes) xvar = ng.variance(in_obj, reduction_axes=red_axes) return ng.sequential([ ng.assign(self.gmean, self.gmean * self.rho + xmean * (1.0 - self.rho)), ng.assign(self.gvar, self.gvar * self.rho + xvar * (1.0 - self.rho)), self.gamma * (in_obj - xmean) / ng.sqrt(xvar + self.eps) + self.beta ])
def create_loss_and_learner(model, labels, learning_rate, momentum_coef=0.0, wdecay=0.0, nesterov=False, gradient_clip_norm=None, gradient_clip_value=None): """ Auxiliary function to create loss function (cross entropy and softmax) and trainer using stochastic gradient descent with momentum. Arguments: model - imported model labels - placeholder for one-hot labels array learning_rate - learning rate for trainer momentum_coef - coefficient of momentum (deafult 0.0) wdecay - amount of weight decay (default 0.0) nesterov - use nesterov accelerated gradient (dafault False) gradient_clip_norm - target gradient norm (default None) gradient_clip_value - value to element-wise clip gradients (default None) Returns: Loss function (mean for batch) """ if model.axes.lengths != labels.axes.lengths: labels = ng.Transpose(labels) assert model.axes.lengths == labels.axes.lengths model = ng.cast_axes(model, axes=labels.axes) loss = ng.cross_entropy_multi(ng.softmax(model), labels) optimizer = GradientDescentMomentum(learning_rate, momentum_coef, wdecay, gradient_clip_norm, gradient_clip_value, nesterov) return ng.sequential([optimizer(loss), ng.mean(loss, out_axes=())])
def _build_module(self, input_layer): # Dueling Network # state value tower - V output_axis = ng.make_axis(self.num_actions, name='q_values') state_value = neon.Sequential([ neon.Affine(nout=256, activation=neon.Rectlin(), weight_init=self.weights_init, bias_init=self.biases_init), neon.Affine(nout=1, weight_init=self.weights_init, bias_init=self.biases_init) ])(input_layer) # action advantage tower - A action_advantage_unnormalized = neon.Sequential([ neon.Affine(nout=256, activation=neon.Rectlin(), weight_init=self.weights_init, bias_init=self.biases_init), neon.Affine(axes=output_axis, weight_init=self.weights_init, bias_init=self.biases_init) ])(input_layer) action_advantage = action_advantage_unnormalized - ng.mean( action_advantage_unnormalized) repeated_state_value = ng.expand_dims( ng.slice_along_axis(state_value, state_value.axes[0], 0), output_axis, 0) # merge to state-action value function Q self.output = repeated_state_value + action_advantage
def _build_module(self, input_layer): # This is almost exactly the same as Dueling Network but we predict the future measurements for each action multistep_measurements_size = self.measurements_size[0] * self.num_predicted_steps_ahead # actions expectation tower (expectation stream) - E with name_scope("expectation_stream"): expectation_stream = neon.Sequential([ neon.Affine(nout=256, activation=neon.Rectlin(), weight_init=self.weights_init, bias_init=self.biases_init), neon.Affine(nout=multistep_measurements_size, weight_init=self.weights_init, bias_init=self.biases_init) ])(input_layer) # action fine differences tower (action stream) - A with name_scope("action_stream"): action_stream_unnormalized = neon.Sequential([ neon.Affine(nout=256, activation=neon.Rectlin(), weight_init=self.weights_init, bias_init=self.biases_init), neon.Affine(nout=self.num_actions * multistep_measurements_size, weight_init=self.weights_init, bias_init=self.biases_init), neon.Reshape((self.num_actions, multistep_measurements_size)) ])(input_layer) action_stream = action_stream_unnormalized - ng.mean(action_stream_unnormalized) repeated_expectation_stream = ng.slice_along_axis(expectation_stream, expectation_stream.axes[0], 0) repeated_expectation_stream = ng.expand_dims(repeated_expectation_stream, output_axis, 0) # merge to future measurements predictions self.output = repeated_expectation_stream + action_stream
def test_sequential_side(M): x1_np = 2 x2_np = 3 b_np = 1 x_np = np.array([1, 2, 3], dtype=np.float32) x = ng.variable([M], initial_value=x_np) x1 = ng.persistent_tensor(axes=(), initial_value=x1_np) x2 = ng.persistent_tensor(axes=(), initial_value=x2_np) x1_vo = ng.value_of(x1) x2_vo = ng.value_of(x2) b = ng.persistent_tensor(axes=(), initial_value=b_np) y = ng.sequential([ x1_vo, x2_vo, ng.assign(x1, ng.sum(x, out_axes=()) + x1 * b + (1 - b)), ng.assign(x2, ng.mean(x, out_axes=()) + x2 * b + (1 - b)), x * 2 ]) with ExecutorFactory() as ex: main_effect = ex.executor((y, x1_vo, x2_vo, x1, x2)) current_values = ex.executor((x1, x2)) # Run main path #1 y_val, x1_init_val, x2_init_val, x1_final_val, x2_final_val = main_effect( ) y_np = x_np * 2 assert np.allclose(y_val, y_np) assert np.allclose(x1_init_val, x1_np) assert np.allclose(x2_init_val, x2_np) x1_np = np.sum(x_np) + x1_np * b_np + (1 - b_np) x2_np = np.mean(x_np) + x2_np * b_np + (1 - b_np) assert np.allclose(x1_final_val, x1_np) assert np.allclose(x2_final_val, x2_np) x1_val, x2_val = current_values() assert np.allclose(x1_val, x1_np) assert np.allclose(x2_val, x2_np) # Run main path #2 (Should be the same as before) y_val, x1_init_val, x2_init_val, x1_final_val, x2_final_val = main_effect( ) y_np = x_np * 2 assert np.allclose(y_val, y_np) assert np.allclose(x1_init_val, x1_np) assert np.allclose(x2_init_val, x2_np) x1_np = np.sum(x_np) + x1_np * b_np + (1 - b_np) x2_np = np.mean(x_np) + x2_np * b_np + (1 - b_np) assert np.allclose(x1_final_val, x1_np) assert np.allclose(x2_final_val, x2_np)
def ReduceElements(self, cntk_op, inputs): """ Returns a reduction operation (max, min, mean, sum, prod) or a calculation which matches CNTK's LogSum reduction (`reduce_log_sum_exp` function). Arguments: cntk_op: CNTK operation to be imported. inputs: List of inputs to this node. Returns: A ngraph Op. """ assert len(inputs) == 1 reduction_op_name = cntk_op.attributes.get('reductionOpName') # CNTK API defines a reductionKeepDimensions flag, but we currently don't use it # keep_dimensions = cntk_op.attributes.get('reductionKeepDimensions', False) cntk_op_attribute_axes = [] if cntk_op.attributes.get('axisVec'): cntk_op_attribute_axes.extend(cntk_op.attributes.get('axisVec')) elif cntk_op.attributes.get('axis'): cntk_op_attribute_axes.append(cntk_op.attributes.get('axis')) # CNTK axes are numbered in reverse order: the last axis is labeled 0, the previous 1, etc. reduction_axes_indexes = [len(inputs[0].axes) - 1 - i for (_, _, i) in cntk_op_attribute_axes] reduction_ng_axes_list = [axis for (i, axis) in enumerate(inputs[0].axes) if i in reduction_axes_indexes] reduction_ng_axes = ng.Axes(axes=reduction_ng_axes_list) if reduction_op_name == 'Max': return ng.max(inputs[0], reduction_axes=reduction_ng_axes).named(cntk_op.uid) if reduction_op_name == 'Min': return ng.min(inputs[0], reduction_axes=reduction_ng_axes).named(cntk_op.uid) if reduction_op_name == 'Mean': return ng.mean(inputs[0], reduction_axes=reduction_ng_axes).named(cntk_op.uid) if reduction_op_name == 'Sum': return ng.sum(inputs[0], reduction_axes=reduction_ng_axes).named(cntk_op.uid) if reduction_op_name == 'Prod': return ng.prod(inputs[0], reduction_axes=reduction_ng_axes).named(cntk_op.uid) if reduction_op_name == 'LogSum': return ng.log(ng.sum(ng.exp(inputs[0]), reduction_axes=reduction_ng_axes))\ .named(cntk_op.uid) raise NotImplementedError('CNTKImporter: ReduceElements does not support operation %s', reduction_op_name)
def AveragedLoss(self, c2_op, inputs): """ Computes average loss for the batch. Arguments: c2_op: OperatorDef object, the caffe2 node to convert. inputs: List of ngraph Ops as inputs to this node. Returns: A ngraph Op corresponding to the caffe2 node. """ x = inputs[0] reduction_axes = x.axes.batch_axes() if x.axes.batch_axes() else ng.make_axes(x.axes[0]) return ng.mean(x, reduction_axes=reduction_axes)
def run_mini_ds2_benchmark(args, **kwargs): device_id = kwargs.get('device_id') inputs, train_set, eval_set = generate_ds2_data(args.max_length, args.str_w, args.nout, args.nbands, args.batch_size, args.num_iterations) model_out = get_mini_ds2(inputs, args.nfilters, args.filter_width, args.str_w, args.nbands, args.depth, args.hidden_size, args.batch_norm, args.hetr_device, device_id) if args.bprop: with ng.metadata(device=args.hetr_device, device_id=device_id, parallel=ax.N): loss = ng.ctc(model_out, ng.flatten(inputs["char_map"]), inputs["audio_length"], inputs["trans_length"]) optimizer = GradientDescentMomentum(learning_rate=2e-5, momentum_coef=0.99, gradient_clip_norm=400, nesterov=args.nesterov) updates = optimizer(loss) mean_cost = ng.sequential([updates, ng.mean(loss, out_axes=())]) bprop_computation_op = ng.computation(mean_cost, "all") benchmark = Benchmark(bprop_computation_op, train_set, inputs, args.backend, args.hetr_device) Benchmark.print_benchmark_results( benchmark.time(args.num_iterations, args.skip_iter, 'ds2_bprop', args.visualize, preprocess=True)) else: fprop_computation_op = ng.computation(model_out, "all") benchmark_fprop = Benchmark(fprop_computation_op, train_set, inputs, args.backend, args.hetr_device) Benchmark.print_benchmark_results( benchmark_fprop.time(args.num_iterations, args.skip_iter, 'ds2_fprop', args.visualize, preprocess=True))
def test_mean(transformer_factory, input_tensor): inputs = input_tensor targets = ng.placeholder(inputs.axes) inp_stat = ng.mean(inputs, reduction_axes=inputs.axes.batch_axes()) err = ng.sum(inp_stat - targets, out_axes=()) with executor(err, inputs, targets) as comp_func: input_value = rng.uniform(-1, 1, inputs.axes) target_value = rng.uniform(-1, 1, targets.axes) ng_f_res = comp_func(input_value, target_value) np_f_res = np.sum(np.mean(input_value, axis=1, keepdims=True) - target_value) ng.testing.assert_allclose(np_f_res, ng_f_res, atol=1e-4, rtol=1e-4)
def run_resnet_benchmark(dataset, num_iterations, n_skip, batch_size, device_id, transformer_type, device, bprop=True, batch_norm=False, visualize=False, stage_depth=1): inputs, data, train_set = get_fake_data(dataset, batch_size, num_iterations) # Running forward propagation model_out = get_mini_resnet(inputs, dataset, device, device_id, batch_norm=batch_norm, stage_depth=stage_depth) # Running back propagation if bprop: with ng.metadata(device=device, device_id=device_id, parallel=ax.N): optimizer = GradientDescentMomentum(0.01, 0.9) train_loss = ng.cross_entropy_multi( model_out, ng.one_hot(inputs['label'], axis=ax.Y)) batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) batch_cost_computation_op = ng.computation(batch_cost, "all") benchmark = Benchmark(batch_cost_computation_op, train_set, inputs, transformer_type, device) Benchmark.print_benchmark_results( benchmark.time(num_iterations, n_skip, dataset + '_msra_bprop', visualize, 'device_id')) else: fprop_computation_op = ng.computation(model_out, 'all') benchmark = Benchmark(fprop_computation_op, train_set, inputs, transformer_type, device) Benchmark.print_benchmark_results( benchmark.time(num_iterations, n_skip, dataset + '_msra_fprop', visualize))
def test_mean(transformer_factory): ax = ng.name_scope('x') ax.N = ng.make_axis(128, batch=True) ax.Y = ng.make_axis(100) inputs = ng.placeholder([ax.Y, ax.N]) targets = ng.placeholder([ax.Y, ax.N]) inp_stat = ng.mean(inputs, reduction_axes=inputs.axes.batch_axes()) err = ng.sum(inp_stat - targets, out_axes=()) comp_func = executor(err, inputs, targets) input_value = rng.uniform(-1, 1, inputs.axes) target_value = rng.uniform(-1, 1, targets.axes) ng_f_res = comp_func(input_value, target_value) np_f_res = np.sum(np.mean(input_value, axis=1, keepdims=True) - target_value) np.testing.assert_allclose(np_f_res, ng_f_res, atol=1e-4, rtol=1e-4)
def cross_entropy_with_softmax(model, labels): """ Auxiliary function to add cross entropy and softmax (loss function) to imported model for training. Arguments: model - imported model labels - placeholder for one-hot labels array Returns: Loss function (mean for batch) """ if model.axes.lengths != labels.axes.lengths: model = ng.Transpose(model) assert model.axes.lengths == labels.axes.lengths model = ng.cast_axes(model, axes=labels.axes) loss = ng.cross_entropy_multi(ng.softmax(model), labels) return ng.mean(loss, out_axes=())
def classification_error(model, labels): """ Auxiliary function to add classification error function to imported model for testing. Arguments: model - imported model labels - placeholder for one-hot labels array Returns: Classification error function (mean for batch) """ try: errors = ng.not_equal( ng.argmax(model, out_axes=[labels.axes.batch_axis()]), ng.argmax(labels, out_axes=[labels.axes.batch_axis()])) except ValueError: errors = ng.not_equal(ng.argmax(model), ng.argmax(labels)) return ng.mean(errors, out_axes=())
def CrossEntropyWithSoftmax(self, cntk_op, inputs): """ Computes the softmax cross entropy between the inputs[0] and inputs[1]. Arguments: cntk_op: CNTK operation to be imported. inputs: List of inputs to this node. Returns: A ngraph Op. """ cast_0, cast_1 = squeeze_axes(inputs) if cast_0.axes.lengths != cast_1.axes.lengths: cast_0 = ng.Transpose(cast_0) assert cast_0.axes.lengths == cast_1.axes.lengths cast_0 = ng.cast_axes(cast_0, axes=cast_1.axes) loss = ng.cross_entropy_multi(ng.softmax(cast_0), cast_1) return ng.mean(loss, out_axes=()).named(cntk_op.uid)
def run_cifar_benchmark(n_iter=10, n_skip=5, batch_size=4, transformer_type='cpu'): inputs, data, train_set = get_fake_cifar(batch_size, n_iter) model = get_mini_resnet(inputs) optimizer = GradientDescentMomentum(0.01, 0.9) train_loss = ng.cross_entropy_multi(model(inputs['image']), ng.one_hot(inputs['label'], axis=ax.Y)) batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) batch_cost_computation_op = ng.computation(batch_cost, "all") feed_dict = fill_feed_dict(train_set, inputs) benchmarks = dict() benchmarks['cifar_msra_fprop'] = run_benchmark(batch_cost_computation_op, transformer_type, feed_dict, n_skip, n_iter) print_benchmark_results(benchmarks)
def test_multi_computations(hetr_device): if hetr_device == 'gpu': pytest.xfail("enable after gpu exgraph") axes_x = ng.make_axes([ax_A, ax_B]) x = ng.placeholder(axes=axes_x) y = ng.placeholder(()) with ng.metadata(device_id=('0', '1'), parallel=ax_A): f = x**2 out = y - ng.mean(f, out_axes=()) np_x = np.random.randint(10, size=axes_x.lengths) np_y = np.random.randint(10) with closing(ngt.make_transformer_factory('hetr', device=hetr_device)()) as t: comp = t.computation(out, x, y) another_comp = t.computation(f, x) res_comp = comp(np_x, np_y) res_another_comp = another_comp(np_x) ref_comp = np_y - np.mean(np_x**2) np.testing.assert_array_equal(res_comp, ref_comp) np.testing.assert_array_equal(res_another_comp, np_x**2)
def run_resnet_benchmark(dataset, n_iter, n_skip, batch_size, device_id, transformer_type, device, bprop=False, visualize=False): inputs, data, train_set = get_fake_data(dataset, batch_size, n_iter) model_out = get_mini_resnet(inputs, dataset, device_id) # Running forward propagation fprop_computation_op = ng.computation(model_out, 'all') benchmark_fprop = Benchmark(fprop_computation_op, train_set, inputs, transformer_type, device) Benchmark.print_benchmark_results(benchmark_fprop.time(n_iter, n_skip, dataset + '_msra_fprop', visualize)) # Running back propagation if bprop: optimizer = GradientDescentMomentum(0.01, 0.9) train_loss = ng.cross_entropy_multi(model_out, ng.one_hot(inputs['label'], axis=ax.Y)) batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())]) batch_cost_computation_op = ng.computation(batch_cost, "all") benchmark = Benchmark(batch_cost_computation_op, train_set, inputs, transformer_type, device) Benchmark.print_benchmark_results(benchmark.time(n_iter, n_skip, dataset + '_msra_bprop', visualize))
if args.modeltype == "TCN": optimizer = Adam(learning_rate=args.lr, gradient_clip_value=args.grad_clip_value) else: optimizer = GradientDescentMomentum( learning_rate=args.lr, gradient_clip_value=args.grad_clip_value) # Define the loss function (categorical cross entropy, since each musical key on the piano is encoded as a binary value) fwd_prop = model(inputs['X']) fwd_prop = ng.axes_with_order(fwd_prop, out_axes) train_loss = ng.cross_entropy_binary(fwd_prop, inputs['y']) with Layer.inference_mode_on(): preds = model(inputs['X']) preds = ng.axes_with_order(preds, out_axes) eval_loss = ng.mean(ng.cross_entropy_binary(preds, inputs['y']), out_axes=()) eval_computation = ng.computation([eval_loss], "all") predict_computation = ng.computation([preds], "all") # Cost calculation batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_computation = ng.computation(batch_cost, "all") trainer = TimeseriesTrainer(optimizer, train_computation, eval_computation, predict_computation, inputs, model_graph=[model],
'name': 'schedule', 'schedule': [32000, 48000], 'gamma': 0.1, 'base_lr': 0.1 } optimizer = GradientDescentMomentum(learning_rate=learning_rate_policy, momentum_coef=0.9, wdecay=0.0001, iteration=inputs['iteration']) label_indices = inputs['label'] train_loss = ng.cross_entropy_multi(resnet(inputs['image']), ng.one_hot(label_indices, axis=ax.Y)) batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_computation = ng.computation(batch_cost, "all") with Layer.inference_mode_on(): inference_prob = resnet(inputs['image']) errors = ng.not_equal(ng.argmax(inference_prob, out_axes=[ax.N]), label_indices) eval_loss = ng.cross_entropy_multi( inference_prob, ng.one_hot(label_indices, axis=ax.Y)) eval_loss_names = ['cross_ent_loss', 'misclass'] eval_computation = ng.computation([eval_loss, errors], "all") # Now bind the computations we are interested in transformer = ngt.make_transformer() train_function = transformer.add_computation(train_computation) eval_function = transformer.add_computation(eval_computation)
# Input specification ax.C.length, ax.H.length, ax.W.length = train_set.shapes['image'] ax.D.length = 1 ax.N.length = args.batch_size ax.Y.length = 10 # placeholders with descriptive names inputs = dict(image=ng.placeholder([ax.C, ax.H, ax.W, ax.N]), label=ng.placeholder([ax.N])) optimizer = GradientDescentMomentum(0.01, 0.9) output_prob = seq1.train_outputs(inputs['image']) errors = ng.not_equal(ng.argmax(output_prob, out_axes=[ax.N]), inputs['label']) loss = ng.cross_entropy_multi(output_prob, ng.one_hot(inputs['label'], axis=ax.Y)) mean_cost = ng.mean(loss, out_axes=()) updates = optimizer(loss) train_outputs = dict(batch_cost=mean_cost, updates=updates) loss_outputs = dict(cross_ent_loss=loss, misclass_pct=errors) # Now bind the computations we are interested in transformer = ngt.make_transformer() train_computation = make_bound_computation(transformer, train_outputs, inputs) loss_computation = make_bound_computation(transformer, loss_outputs, inputs) cbs = make_default_callbacks(output_file=args.output_file, frequency=args.iter_interval, train_computation=train_computation, total_iterations=args.num_iterations, eval_set=valid_set,
ng.one_hot(label1, axis=ax.Y), usebits=False) loss2 = ng.cross_entropy_multi(logits2, ng.one_hot(label2, axis=ax.Y), usebits=False) # Total Loss train_loss = loss1 + loss2 # Set optimizer (no learning rate scheduler used) optimizer = Adam(learning_rate=2e-3) print('compiling the graph') # Cost set up batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) # Predicted class is the max probability out of the 2=3 # Required Outputs- Batch Cost, Train Probability,misclass train train_outputs = dict(batch_cost=batch_cost, inps=inputs['answer'], logits=ng.stack(logits_concat, span, 1), labels=inputs['answer'], drop=dropout_val) # Inference Mode for validation dataset: with Layer.inference_mode_on(): eval_outputs = dict(logits=ng.stack(logits_concat, span, 1), labels=inputs['answer'], drop=drop_pointer) # Now bind the computations we are interested in print('generating transformer')
def __call__(self, in_obj, channel_axes="C", spatial_axes=("D", "H", "W"), **kwargs): """ Arguments: in_obj (Op): Input op channel_axes (str): name of the expected channel axis type - defaults to "C" spatial_axes (tuple): names of expected depth, height and width axis types - defaults to "D", "H", and "W" """ if isinstance(spatial_axes, dict): spatial_axes = tuple( spatial_axes.get(name, name) for name in ("D", "H", "W")) elif isinstance(spatial_axes, tuple): if len(spatial_axes) < 3: raise ValueError( "spatial_axes must have length 3 (e.g. ('D', 'H', 'W'))") spatial_axes = tuple( name if name else default for name, default in zip(spatial_axes, ("D", "H", "W"))) orig_axes = in_obj.axes in_obj = reorder_spatial_axes(in_obj, channel_axes, spatial_axes) channel_axes = in_obj.axes.get_by_names(channel_axes) spatial_axes = in_obj.axes.get_by_names(*spatial_axes) filter_axes = self._filter_axes(channel_axes, spatial_axes) # mark 'K' as a shadow axis for the initializers. axes_map = shadow_axes_map(filter_axes.find_by_name('K')) filter_axes = ng.make_axes([ axis if axis.name != 'K' else list(axes_map.keys())[0] for axis in filter_axes ]) if not self.initialized: if not self.weight_norm: self.W = ng.variable(axes=filter_axes, initial_value=self.init, metadata={ "label": LABELS["weight"] }).named("W") else: self.v = ng.variable(axes=filter_axes, initial_value=self.init, metadata={ "label": LABELS["weight"] }).named("v") out_axes = ng.make_axes( [filter_axes.get_by_names("K__NG_SHADOW")]) v_norm = ng.mean(ng.square(self.v), out_axes=out_axes) self.g = ng.variable(axes=out_axes, initial_value=self.init, metadata={ "label": LABELS["weight"] }).named("g") self.W = self.g * self.v * ng.reciprocal( ng.sqrt(v_norm + 1e-3)) else: if filter_axes != self.W.axes: raise ValueError( ("{layer_name} layer has already been initialized with an " "input object which has resulted in filter axes: " "{existing_filter_axes}. This new input object has axes: " "{input_axes}, which implies the need for filter axes: " "{new_filter_axes} which are different than the existing " "filter axes.").format( layer_name=self.name, existing_filter_axes=self.W.axes, input_axes=in_obj.axes, new_filter_axes=filter_axes, )) output = ng.map_roles( self._conv_op(in_obj, channel_axes, spatial_axes), axes_map) # Reorder the output to match the input order output_axis_order = ng.make_axes( [output.axes.find_by_name(ax.name)[0] for ax in orig_axes]) # Remove introduced axes. If their length is > 1, then perhaps they should be kept slices = [ 0 if (ax not in orig_axes) and ax.length == 1 else slice(None) for ax in output.axes ] output = ng.tensor_slice(output, slices) # New axes with length > 1 may have been introduced. Add them to the end. output_axis_order = output_axis_order | output.axes return ng.axes_with_order(output, output_axis_order)
layer_0 = LookupTable(50, 100, init, update=False) else: layer_0 = Preprocess(functor=expand_onehot) # model initialization seq1 = Sequential([layer_0, rlayer, Affine(init, activation=Softmax(), bias_init=init, axes=(ax.Y,))]) optimizer = RMSProp() train_prob = seq1(inputs['inp_txt']) train_loss = ng.cross_entropy_multi(train_prob, ng.one_hot(inputs['tgt_txt'], axis=ax.Y), usebits=True) batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_outputs = dict(batch_cost=batch_cost) with Layer.inference_mode_on(): inference_prob = seq1(inputs['inp_txt']) eval_loss = ng.cross_entropy_multi(inference_prob, ng.one_hot(inputs['tgt_txt'], axis=ax.Y), usebits=True) eval_outputs = dict(cross_ent_loss=eval_loss) # Now bind the computations we are interested in with closing(ngt.make_transformer()) as transformer: train_computation = make_bound_computation(transformer, train_outputs, inputs) loss_computation = make_bound_computation(transformer, eval_outputs, inputs) cbs = make_default_callbacks(output_file=args.output_file,
def __init__(self): self.ng_computation = lambda Y, T: ng.mean(ng.square(Y - T), out_axes=()) / 2.
# image placeholder C = ng.make_axis(name='C', length=1) D = ng.make_axis(name='D', length=1) H = ng.make_axis(name='H', length=28) W = ng.make_axis(name='W', length=28) image_axes = ng.make_axes([C, D, H, W, N]) image = ng.placeholder(axes=image_axes) # build network graph generated = generator(z) D1 = discriminator(image) D2 = discriminator(generated) loss_d = -ng.log(D1) - ng.log(1 - D2) mean_cost_d = ng.mean(loss_d, out_axes=[]) loss_g = -ng.log(D2) mean_cost_g = ng.mean(loss_g, out_axes=[]) optimizer_d = make_optimizer(name='discriminator_optimizer') optimizer_g = make_optimizer(name='generator_optimizer') updates_d = optimizer_d(loss_d, subgraph=discriminator) updates_g = optimizer_g(loss_g, subgraph=generator) # compile computations generator_train_inputs = {'noise': z} discriminator_train_inputs = {'image': image, 'noise': z} generator_train_outputs = { 'batch_cost': mean_cost_g, 'updates': updates_g,
loss = ng.ctc(output, ng.flatten(inputs["char_map"]), ng.flatten(inputs["audio_length"]), ng.flatten(inputs["char_map_length"])) optimizer = GradientDescentMomentum( args.lr, momentum_coef=args.momentum, gradient_clip_norm=args.gradient_clip_norm, nesterov=args.nesterov) start = time.time() updates = optimizer(loss) stop = time.time() logger.debug("Optimizer graph creation took {} seconds".format(stop - start)) mean_cost = ng.sequential([updates, ng.mean(loss, out_axes=())]) # Create computation and initialize the transformer to allocate weights train_computation = ng.computation([mean_cost, output], "all") if inference is True: with Layer.inference_mode_on(): eval_output = ds2(inputs["audio"], spatial_axes={ "H": "frequency", "W": "time" }) eval_computation = ng.computation(eval_output, "all") # Now bind the computations we are interested in with closing(ngt.make_transformer()) as transformer: train_function = transformer.add_computation(train_computation)
gradient_penalty = ng.square(grad_norm - 1) if args.loss_type == "WGAN-GP": gp = args.gp_scale * gradient_penalty weight_clipping = None elif args.loss_type == "WGAN": # standard WGAN with no gradient penalty gp = None weight_clipping = args.w_clip if gp: loss_d = D1 - D2 + gp else: loss_d = D1 - D2 mean_cost_d = ng.mean(loss_d, out_axes=[]) loss_g = D2 mean_cost_g = ng.mean(loss_g, out_axes=[]) mean_grad_norm = ng.mean(grad_norm, out_axes=[]) optimizer_d = make_optimizer(name='discriminator_optimizer', weight_clip_value=weight_clipping) optimizer_g = make_optimizer(name='generator_optimizer') updates_d = optimizer_d(loss_d, subgraph=discriminator) updates_g = optimizer_g(loss_g, subgraph=generator) # noise and data generators train_set = DataGenerator((data_dim, args.batch_size), 0, data_type=args.data_type) noise_gen = NormalNoise((noise_dim, args.batch_size), 0)
y_onehot = ng.one_hot(inputs['label'], axis=ax.Y) train_prob_main = inception.seq2(inception.seq1(inputs['image'])) train_prob_main = ng.map_roles(train_prob_main, {"C": ax.Y.name}) train_loss_main = ng.cross_entropy_multi(train_prob_main, y_onehot, enable_softmax_opt=False) train_prob_aux = inception.seq_aux(inception.seq1(inputs['image'])) train_prob_aux = ng.map_roles(train_prob_aux, {"C": ax.Y.name}) train_loss_aux = ng.cross_entropy_multi(train_prob_aux, y_onehot, enable_softmax_opt=False) batch_cost = ng.sequential([ optimizer(train_loss_main + 0.4 * train_loss_aux), ng.mean(train_loss_main, out_axes=()) ]) train_computation = ng.computation([batch_cost], 'all') # Build the computations for inference (evaluation) with Layer.inference_mode_on(): inference_prob = inception.seq2(inception.seq1(inputs['image'])) slices = [ 0 if cx.name in ("H", "W") else slice(None) for cx in inference_prob.axes ] inference_prob = ng.tensor_slice(inference_prob, slices) inference_prob = ng.map_roles(inference_prob, {"C": "Y"}) errors = ng.not_equal(ng.argmax(inference_prob, out_axes=[ax.N]), inputs['label'])