def test_dilated_conv(dilation): """Test that the dilated convolution layer output matches expected. This test compares the maximum output value to an expected max output value. The expected value is computed based on the dilation parameter. The test also checks that the output size matches the expected size based on the dilaton parameter value.""" image_size = 3 batch_size = 1 init_val = 0.1 conv_size = 3 pad = 3 N_filters = 1 image_channels = 3 model = Sequential([ Convolution((conv_size, conv_size, N_filters), filter_init=ConstantInit(val=init_val), padding=pad, dilation=dilation) ]) X = np.ones(shape=(batch_size, 3, image_size, image_size)) # Create dummy image data = {'image': X, 'iteration': 1} data_size = OrderedDict([('N', batch_size), ('C', 3), ('H', image_size), ('W', image_size)]) ax = [ ng.make_axis(length=data_size[k], name=k) for k in list(data_size.keys()) ] p_axes = ng.make_axes(ax) named_inputs = {'image': ng.placeholder(p_axes)} outputs = model(named_inputs['image']) named_outputs = {outputs.name: outputs} with closing(ngt.make_transformer()) as transformer: m = make_bound_computation(transformer, named_outputs, named_inputs) output = m(data)[list(m(data).keys())[0]] filter_size = dilation * (conv_size - 1) + 1 # Compute expected filter size # Compute the expected output size based on convolution parameters out_size = (image_size + 2 * pad - filter_size) + 1 filt_tmp = np.zeros(filter_size) filt_tmp[0::dilation] = 1 # max overlap between dilated filter and image (in 1-d) max_overlap = int(np.min([filter_size, image_size])) exp_max_output = init_val * image_channels * (np.sum( filt_tmp[0:max_overlap]))**2 # Expected max output changes for different dilation parameter values# assert int(10 * np.max(output)) == int(10 * exp_max_output), \ ("Dilated conv max outputs do not match expected: " "{} != {}").format(np.max(output), init_val * conv_size * ((image_size - (dilation - 1))**2)) assert np.shape(output) == (batch_size, N_filters, out_size, out_size), \ ("Dilated conv output is not expected size: " "{} != {}").format(np.shape(output), (batch_size, N_filters, out_size, out_size))
# placeholders with descriptive names inputs = dict(image=ng.placeholder([ax.C, ax.H, ax.W, ax.N]), label=ng.placeholder([ax.N])) optimizer = GradientDescentMomentum(0.01, 0.9) output_prob = seq1.train_outputs(inputs['image']) errors = ng.not_equal(ng.argmax(output_prob, out_axes=[ax.N]), inputs['label']) loss = ng.cross_entropy_multi(output_prob, ng.one_hot(inputs['label'], axis=ax.Y)) mean_cost = ng.mean(loss, out_axes=()) updates = optimizer(loss) train_outputs = dict(batch_cost=mean_cost, updates=updates) loss_outputs = dict(cross_ent_loss=loss, misclass_pct=errors) # Now bind the computations we are interested in transformer = ngt.make_transformer() train_computation = make_bound_computation(transformer, train_outputs, inputs) loss_computation = make_bound_computation(transformer, loss_outputs, inputs) cbs = make_default_callbacks(output_file=args.output_file, frequency=args.iter_interval, train_computation=train_computation, total_iterations=args.num_iterations, eval_set=valid_set, loss_computation=loss_computation, use_progress_bar=args.progress_bar) loop_train(train_set, train_computation, cbs)
# compile computations generator_train_inputs = {'noise': z} discriminator_train_inputs = {'image': image, 'noise': z} generator_train_outputs = { 'batch_cost': mean_cost_g, 'updates': updates_g, 'generated': generated } # for plots discriminator_train_outputs = {'batch_cost': mean_cost_d, 'updates': updates_d} with closing(ngt.make_transformer()) as transformer: train_computation_g = make_bound_computation(transformer, generator_train_outputs, generator_train_inputs) train_computation_d = make_bound_computation(transformer, discriminator_train_outputs, discriminator_train_inputs) # train loop k = 1 # variable rate training of discriminator if k > 1 print('start train loop') for mb_idx, (z_samp, datadict) in enumerate(zip(noise_generator, train_set)): image_samp = 2. * (datadict['image'].astype(np.float) / 255.0) - 1.0 # reshape from NHW to DHWN image_samp = np.expand_dims(image_samp.transpose([1, 2, 0]), axis=0) # reshape from DHWN to CDHWN
noise_gen = NormalNoise((noise_dim, args.batch_size), 0) # input and output dictionaries gen_train_inputs = {'noise': z} dis_train_inputs = {'data': data, 'noise': z} gen_train_outputs = {'batch_cost': mean_cost_g, 'updates': updates_g, 'generated': generated} dis_train_outputs = {'batch_cost': mean_cost_d, 'updates': updates_d, 'grad_norm': mean_grad_norm} # training with closing(ngt.make_transformer()) as transformer: train_computation_g = make_bound_computation(transformer, gen_train_outputs, gen_train_inputs) train_computation_d = make_bound_computation(transformer, dis_train_outputs, dis_train_inputs) train_data = {'Discriminator Cost': [], 'Generator Cost': [], 'Log Gradient Norm': []} progress_bar = ProgressBar(unit="iterations", ncols=100, total=args.num_iterations) for iteration in progress_bar(range(int(args.num_iterations))): for iter_g in range(1):
mean_cost_d = ng.mean(loss_d, out_axes=[]) loss_g = -ng.log(D2) mean_cost_g = ng.mean(loss_g, out_axes=[]) optimizer_d = make_optimizer(name='discriminator_optimizer') optimizer_g = make_optimizer(name='generator_optimizer') updates_d = optimizer_d(loss_d, subgraph=discriminator) updates_g = optimizer_g(loss_g, subgraph=generator) discriminator_train_outputs = {'batch_cost': mean_cost_d, 'updates': updates_d} generator_train_outputs = {'batch_cost': mean_cost_g, 'updates': updates_g} with closing(ngt.make_transformer()) as transformer: train_computation_g = make_bound_computation( transformer, generator_train_outputs, {'noise_sample': inputs['noise_sample']}) train_computation_d = make_bound_computation(transformer, discriminator_train_outputs, inputs) generator_inference = transformer.computation(G, z) # train loop k = 1 # variable rate training of discriminator, if k > 1 for mb_idx, data in enumerate(train_set): # update discriminator for iter_d in range(k): batch_output_d = train_computation_d(data) # update generator batch_output_g = train_computation_g(
# Inference Mode for validation dataset: with Layer.inference_mode_on(): eval_outputs = dict(logits=ng.stack(logits_concat, span, 1), labels=inputs['answer'], drop=drop_pointer) # Now bind the computations we are interested in print('generating transformer') eval_frequency = 20 val_frequency = np.ceil(len(train['para']['data']) / params_dict['batch_size']) train_error_frequency = 1000 # Create Transformer transformer = ngt.make_transformer() train_computation = make_bound_computation(transformer, train_outputs, inputs) valid_computation = make_bound_computation(transformer, eval_outputs, inputs) ''' TODO: Include feature to Save and load weights ''' #Ensure batch size is greater than 0 assert(params_dict['batch_size'] > 0) # Start Itearting through epoch_no = 0 for idx, data in enumerate(train_set): train_output = train_computation(data)
train_outputs = dict(batch_cost=batch_cost, train_preds=a_pred) with Layer.inference_mode_on(): a_pred_inference, attention_inference = memn2n(inputs) eval_loss = ng.cross_entropy_multi( a_pred_inference, inputs['answer'], usebits=True) interactive_outputs = dict( test_preds=a_pred_inference, attention=attention_inference) eval_outputs = dict(test_cross_ent_loss=eval_loss, test_preds=a_pred_inference) # Train Loop with closing(ngt.make_transformer()) as transformer: # bind the computations train_computation = make_bound_computation( transformer, train_outputs, inputs) loss_computation = make_bound_computation( transformer, eval_outputs, inputs) interactive_computation = make_bound_computation( transformer, interactive_outputs, inputs) weight_saver.setup_save(transformer=transformer, computation=train_outputs) if args.restore and os.path.exists(weights_save_path): print("Loading weights from {}".format(weights_save_path)) weight_saver.setup_restore( transformer=transformer, computation=train_outputs, filename=weights_save_path) weight_saver.restore() elif args.restore and os.path.exists(weights_save_path) is False:
usebits=True) eval_outputs = dict(test_cross_ent_loss=eval_loss, test_preds=a_pred_inference) if args.interactive: interactive_outputs = dict(test_preds=a_pred_inference) if model_file is not None: # Instantiate the Saver object to save weights weight_saver = Saver() if args.inference is False: # Train Loop with closing(ngt.make_transformer()) as transformer: # bind the computations train_computation = make_bound_computation(transformer, train_outputs, inputs) eval_computation = make_bound_computation(transformer, eval_outputs, inputs) if (model_file is not None and args.restore): weight_saver.setup_restore(transformer=transformer, computation=train_outputs, filename=model_file) # Restore weight weight_saver.restore() if model_file is not None: weight_saver.setup_save(transformer=transformer, computation=train_outputs) for e in range(args.epochs + 1): train_error = []
updates = optimizer(loss) batch_cost = ng.sequential([updates, mean_cost]) # provide outputs for bound computation train_outputs = dict(batch_cost=batch_cost, train_preds=a_pred) with Layer.inference_mode_on(): a_pred_inference, attention_inference = memn2n(inputs) eval_loss = ng.cross_entropy_multi(a_pred_inference, inputs['answer'], usebits=True) interactive_outputs = dict(test_preds=a_pred_inference, attention=attention_inference) with closing(ngt.make_transformer()) as transformer: interactive_computation = make_bound_computation(transformer, interactive_outputs, inputs) # Restore weights weight_saver.setup_restore(transformer=transformer, computation=train_outputs, filename=model_file) weight_saver.restore() # Add interactive mode print("Beginning interactive mode...") interactive_loop(interactive_computation, babi)
def train_network(model, train_set, valid_set, batch_size, epochs, log_file): ''' Trains the predefined network. Trains the model and saves the progress in the log file that is defined in the arguments model(object): Defines the model in Neon train_set(object): Defines the training set valid_set(object): Defines the validation set args(object): Training arguments batch_size(int): Minibatch size epochs(int): Number of training epoch log_file(string): File name to store trainig logs for plotting ''' # Form placeholders for inputs to the network # Iterations needed for learning rate schedule inputs = train_set.make_placeholders(include_iteration=True) # Convert labels into one-hot vectors one_hot_label = ng.one_hot(inputs['label'], axis=ax.Y) learning_rate_policy = { 'name': 'schedule', 'schedule': list(np.arange(2, epochs, 2)), 'gamma': 0.6, 'base_lr': 0.001 } optimizer = GradientDescentMomentum(learning_rate=learning_rate_policy, momentum_coef=0.9, wdecay=0.005, iteration=inputs['iteration']) # Define graph for training train_prob = model(inputs['video']) train_loss = ng.cross_entropy_multi(train_prob, one_hot_label) batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) with closing(ngt.make_transformer()) as transformer: # Define graph for calculating validation set error and misclassification rate # Use inference mode for validation to avoid dropout in forward pass with Layer.inference_mode_on(): inference_prob = model(inputs['video']) errors = ng.not_equal(ng.argmax(inference_prob), inputs['label']) eval_loss = ng.cross_entropy_multi(inference_prob, one_hot_label) eval_outputs = {'cross_ent_loss': eval_loss, 'misclass': errors} eval_computation = make_bound_computation(transformer, eval_outputs, inputs) train_outputs = {'batch_cost': batch_cost} train_computation = make_bound_computation(transformer, train_outputs, inputs) interval_cost = 0.0 # Train in epochs logs = {'train': [], 'validation': [], 'misclass': []} for epoch in trange(epochs, desc='Epochs'): # Setup the training bar numBatches = train_set.ndata // batch_size tpbar = tqdm(unit='batches', ncols=100, total=numBatches, leave=False) train_set.reset() valid_set.reset() train_log = [] for step, data in enumerate(train_set): data = dict(data) data['iteration'] = epoch # learning schedule based on epochs output = train_computation(data) train_log.append(float(output['batch_cost'])) tpbar.update(1) tpbar.set_description("Training {:0.4f}".format( float(output['batch_cost']))) interval_cost += float(output['batch_cost']) tqdm.write("Epoch {epch} complete. " "Avg Train Cost {cost:0.4f}".format(epch=epoch, cost=interval_cost / step)) interval_cost = 0.0 tpbar.close() validation_loss = run_validation(valid_set, eval_computation) tqdm.write("Avg losses: {}".format(validation_loss)) logs['train'].append(train_log) logs['validation'].append(validation_loss['cross_ent_loss']) logs['misclass'].append(validation_loss['misclass']) # Save log data and plot at the end of each epoch with open(log_file, 'wb') as f: pickle.dump(logs, f) plot_logs(logs=logs)
def train_mnist_mlp(transformer_name, data_dir=None, rng_seed=12, batch_size=128, train_iter=10, eval_iter=10): assert transformer_name in ['cpu', 'hetr'] assert isinstance(rng_seed, int) # Apply this metadata to graph regardless of transformer, # but it is ignored for non-HeTr case hetr_device_ids = (0, 1) # use consistent rng seed between runs np.random.seed(rng_seed) # Data train_data, valid_data = MNIST(path=data_dir).load_data() train_set = ArrayIterator(train_data, batch_size, total_iterations=train_iter) valid_set = ArrayIterator(valid_data, batch_size) inputs = train_set.make_placeholders() ax.Y.length = 10 # Model with ng.metadata(device_id=hetr_device_ids, parallel=ax.N): seq1 = Sequential([ Preprocess(functor=lambda x: x / 255.), Affine(nout=100, weight_init=GaussianInit(), activation=Rectlin()), Affine(axes=ax.Y, weight_init=GaussianInit(), activation=Logistic()) ]) train_prob = seq1(inputs['image']) train_loss = ng.cross_entropy_binary( train_prob, ng.one_hot(inputs['label'], axis=ax.Y)) optimizer = GradientDescentMomentum(0.1, 0.9) batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_outputs = dict(batch_cost=batch_cost) with Layer.inference_mode_on(): inference_prob = seq1(inputs['image']) errors = ng.not_equal(ng.argmax(inference_prob, out_axes=[ax.N]), inputs['label']) eval_loss = ng.cross_entropy_binary( inference_prob, ng.one_hot(inputs['label'], axis=ax.Y)) eval_outputs = dict(cross_ent_loss=eval_loss, misclass_pct=errors) # Runtime with closing( ngt.make_transformer_factory(transformer_name)()) as transformer: train_computation = make_bound_computation(transformer, train_outputs, inputs) loss_computation = make_bound_computation(transformer, eval_outputs, inputs) train_costs = list() for step in range(train_iter): out = train_computation(next(train_set)) train_costs.append(float(out['batch_cost'])) ce_loss = list() for step in range(eval_iter): out = loss_computation(next(valid_set)) ce_loss.append(np.mean(out['cross_ent_loss'])) return train_costs, ce_loss
updates = optimizer(loss) batch_cost = ng.sequential([updates, mean_cost]) # provide outputs for bound computation train_outputs = dict(batch_cost=batch_cost, train_preds=a_pred) with Layer.inference_mode_on(): a_pred_inference, attention_inference = memn2n(inputs) eval_loss = ng.cross_entropy_multi( a_pred_inference, inputs['answer'], usebits=True) interactive_outputs = dict( test_preds=a_pred_inference, attention=attention_inference) with closing(ngt.make_transformer()) as transformer: interactive_computation = make_bound_computation( transformer, interactive_outputs, inputs) # Restore weights weight_saver.setup_restore( transformer=transformer, computation=train_outputs, filename=model_file) weight_saver.restore() # Add interactive mode print("Beginning interactive mode...") interactive_loop(interactive_computation, babi)
# Inference Mode for validation dataset: with Layer.inference_mode_on(): eval_outputs = dict(logits=ng.stack(logits_concat, span, 1), labels=inputs['answer'], drop=drop_pointer) # Now bind the computations we are interested in print('generating transformer') eval_frequency = 20 val_frequency = np.ceil(len(train['para']['data']) / params_dict['batch_size']) train_error_frequency = 1000 # Create Transformer transformer = ngt.make_transformer() train_computation = make_bound_computation(transformer, train_outputs, inputs) valid_computation = make_bound_computation(transformer, eval_outputs, inputs) ''' TODO: Include feature to Save and load weights ''' #Ensure batch size is greater than 0 assert (params_dict['batch_size'] > 0) # Start Itearting through epoch_no = 0 for idx, data in enumerate(train_set): train_output = train_computation(data) predictions = train_output['logits'] label_batch = train_output['labels']