def run_training(self, iterations): sv = tf.train.Supervisor(logdir=os.path.join( 'logs', time.strftime("%Y%m%d-%H%M%S")), summary_op=None, global_step=self.global_step, save_model_secs=3600) with sv.managed_session() as sess: coord = tf.train.Coordinator() tf.train.start_queue_runners(coord=coord, sess=sess) tf.logging.log(tf.logging.INFO, "Number of parameters %d" % count_params()) train_rec_loss = 0 train_disc_loss = 0 train_enc_loss = 0 log_time = 100 for i in range(1, iterations + 1): r = sess.run([ self.train_rec_loss, self.rec_optimizer, self.train_disc_loss, self.disc_optimizer, self.train_enc_loss, self.enc_optimizer, self.gan_optimizer, self.critic_optimizer ], feed_dict={self.model.is_training: True}) loss_r, _, loss_d, _, loss_e, _, _, _ = r train_rec_loss += loss_r train_disc_loss += loss_d train_enc_loss += loss_e # Compute summary on training on every 100th iteration if i % log_time is 0: t_s = sess.run(self.train_summaries, feed_dict={ self.model.is_training: False, self.train_rec_loss_p: train_rec_loss / log_time, self.train_disc_loss_p: train_disc_loss / log_time, self.train_enc_loss_p: train_enc_loss / log_time }) tf.logging.log(tf.logging.INFO, "\nIteration %d" % i) tf.logging.log( tf.logging.INFO, "Reconstruction Loss %g" % (train_rec_loss / log_time)) tf.logging.log( tf.logging.INFO, "Discriminator Loss %g" % (train_disc_loss / log_time)) tf.logging.log( tf.logging.INFO, "Encoder Loss %g" % (train_enc_loss / log_time)) #tf.logging.log(tf.logging.INFO, "Gan Loss %g" % (train_enc_loss/log_time)) #tf.logging.log(tf.logging.INFO, "Critic Loss %g" % (train_enc_loss/log_time)) sv.summary_computed(sess, t_s) train_rec_loss = 0 train_disc_loss = 0 train_enc_loss = 0 self.run_validation(sv, sess)
def main(): """Main entrance for training""" args = parser.parse_args() print(sys.argv) #context.set_context(mode=context.GRAPH_MODE) context.set_context(mode=context.PYNATIVE_MODE) if args.GPU: context.set_context(device_target='GPU') # parse model argument assert args.model.startswith( "hournas"), "Only Tinynet models are supported." #_, sub_name = args.model.split("_") net = hournasnet(args.model, num_classes=args.num_classes, drop_rate=0.0, drop_connect_rate=0.0, global_pool="avg", bn_tf=False, bn_momentum=None, bn_eps=None) print(net) print("Total number of parameters:", count_params(net)) cfg = edict({ 'image_height': args.image_size, 'image_width': args.image_size, }) cfg.batch_size = args.batch_size print(cfg) #input_size = net.default_cfg['input_size'][1] val_data_url = args.data_path #os.path.join(args.data_path, 'val') val_dataset = create_dataset_cifar10(val_data_url, repeat_num=1, training=False, cifar_cfg=cfg) loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') eval_metrics = { 'Validation-Loss': Loss(), 'Top1-Acc': Top1CategoricalAccuracy(), 'Top5-Acc': Top5CategoricalAccuracy() } ckpt = load_checkpoint(args.ckpt) load_param_into_net(net, ckpt) net.set_train(False) model = Model(net, loss, metrics=eval_metrics) metrics = model.eval(val_dataset, dataset_sink_mode=False) print(metrics)
def train_and_predict_AR(model, train_data_inputs, train_data_targets, test_data, tr_to_val_split=0.9, tr_verbose=False): # Count number of model parameters total_num_params, total_num_trainable_params = count_params(model=model) print("The total number of params: {} and the number of trainable params:{}".format(total_num_params, total_num_trainable_params)) # Apply concat data to concatenate the rows that have columns with signal (not the timestamp) train_data_inputs, train_data_targets = concat_data(train_data_inputs), concat_data(train_data_targets) tr_losses, val_losses, model = train_armodel(model, nepochs=model.num_epochs, inputs=train_data_inputs, targets=train_data_targets, tr_split=tr_to_val_split, tr_verbose=tr_verbose) if len(test_data) > 0: predictions_ar = predict_armodel(model=model, eval_input=train_data_inputs[-1], n_predict=len(test_data)) test_error = mean_squared_error(y_true=test_data[:, -1], y_pred=predictions_ar) else: #NOTE: Heuristically setting the number of future predictions predictions_ar = predict_armodel(model=model, eval_input=train_data_inputs[-1], n_predict=132) test_error = np.nan tr_error = tr_losses[-1] # latest training error val_error = val_losses[-1] # latest validation error #print("**********************************************************************************************************") print("{} - {}, {} - {}, {} - {:.8f}, {} - {:.8f}, {}, - {:.8f}".format( "Model", "AR", "P", model.num_taps, "Training Error", tr_error, "Validation Error", val_error, "Test Error", test_error)) print("***********************************************************************************************************") ''' with open("results_{}.txt".format(model_type), "a") as fp: print("**********************************************************************************************************") print("{} - {}, {} - {}, {} - {:.8f}, {} - {:.8f}, {}, - {:.8f}".format( "Model", "AR", "P", model.num_taps, "Training Error", tr_error, "Validation Error", val_error, "Test Error", test_error), fp) print("***********************************************************************************************************") ''' return predictions_ar, test_error, val_error, tr_error
def main(): """Main entrance for training""" args = parser.parse_args() print(sys.argv) context.set_context(mode=context.GRAPH_MODE) if args.GPU: context.set_context(device_target='GPU') # parse model argument assert args.model.startswith( "tinynet"), "Only Tinynet models are supported." _, sub_name = args.model.split("_") net = tinynet(sub_model=sub_name, num_classes=args.num_classes, drop_rate=0.0, drop_connect_rate=0.0, global_pool="avg", bn_tf=False, bn_momentum=None, bn_eps=None) print("Total number of parameters:", count_params(net)) input_size = net.default_cfg['input_size'][1] val_data_url = os.path.join(args.data_path, 'val') val_dataset = create_dataset_val(args.batch_size, val_data_url, workers=args.workers, distributed=False, input_size=input_size) loss = LabelSmoothingCrossEntropy(smooth_factor=args.smoothing, num_classes=args.num_classes) loss.add_flags_recursive(fp32=True, fp16=False) eval_metrics = { 'Validation-Loss': Loss(), 'Top1-Acc': Top1CategoricalAccuracy(), 'Top5-Acc': Top5CategoricalAccuracy() } ckpt = load_checkpoint(args.ckpt) load_param_into_net(net, ckpt) net.set_train(False) model = Model(net, loss, metrics=eval_metrics) metrics = model.eval(val_dataset, dataset_sink_mode=False) print(metrics)
def main(): """Main entrance for training""" args = parser.parse_args() print(sys.argv) devid, args.rank_id, args.rank_size = 0, 0, 1 context.set_context(mode=context.GRAPH_MODE) if args.distributed: if args.GPU: init("nccl") context.set_context(device_target='GPU') else: init() devid = int(os.getenv('DEVICE_ID')) context.set_context(device_target='Ascend', device_id=devid, reserve_class_name_in_scope=False) context.reset_auto_parallel_context() args.rank_id = get_rank() args.rank_size = get_group_size() context.set_auto_parallel_context( parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True, device_num=args.rank_size) else: if args.GPU: context.set_context(device_target='GPU') is_master = not args.distributed or (args.rank_id == 0) # parse model argument assert args.model.startswith( "tinynet"), "Only Tinynet models are supported." _, sub_name = args.model.split("_") net = tinynet(sub_model=sub_name, num_classes=args.num_classes, drop_rate=args.drop, drop_connect_rate=args.drop_connect, global_pool="avg", bn_tf=args.bn_tf, bn_momentum=args.bn_momentum, bn_eps=args.bn_eps) if is_master: print("Total number of parameters:", count_params(net)) # input image size of the network input_size = net.default_cfg['input_size'][1] train_dataset = val_dataset = None train_data_url = os.path.join(args.data_path, 'train') val_data_url = os.path.join(args.data_path, 'val') val_dataset = create_dataset_val(args.batch_size, val_data_url, workers=args.workers, distributed=False, input_size=input_size) if args.train: train_dataset = create_dataset(args.batch_size, train_data_url, workers=args.workers, distributed=args.distributed, input_size=input_size) batches_per_epoch = train_dataset.get_dataset_size() loss = LabelSmoothingCrossEntropy(smooth_factor=args.smoothing, num_classes=args.num_classes) time_cb = TimeMonitor(data_size=batches_per_epoch) loss_scale_manager = FixedLossScaleManager(args.loss_scale, drop_overflow_update=False) lr_array = get_lr(base_lr=args.lr, total_epochs=args.epochs, steps_per_epoch=batches_per_epoch, decay_epochs=args.decay_epochs, decay_rate=args.decay_rate, warmup_epochs=args.warmup_epochs, warmup_lr_init=args.warmup_lr, global_epoch=0) lr = Tensor(lr_array) loss_cb = LossMonitor(lr_array, args.epochs, per_print_times=args.per_print_times, start_epoch=0) param_group = add_weight_decay(net, weight_decay=args.weight_decay) if args.opt == 'sgd': if is_master: print('Using SGD optimizer') optimizer = SGD(param_group, learning_rate=lr, momentum=args.momentum, weight_decay=args.weight_decay, loss_scale=args.loss_scale) elif args.opt == 'rmsprop': if is_master: print('Using rmsprop optimizer') optimizer = RMSProp(param_group, learning_rate=lr, decay=0.9, weight_decay=args.weight_decay, momentum=args.momentum, epsilon=args.opt_eps, loss_scale=args.loss_scale) loss.add_flags_recursive(fp32=True, fp16=False) eval_metrics = { 'Validation-Loss': Loss(), 'Top1-Acc': Top1CategoricalAccuracy(), 'Top5-Acc': Top5CategoricalAccuracy() } if args.ckpt: ckpt = load_checkpoint(args.ckpt) load_param_into_net(net, ckpt) net.set_train(False) model = Model(net, loss, optimizer, metrics=eval_metrics, loss_scale_manager=loss_scale_manager, amp_level=args.amp_level) net_ema = copy.deepcopy(net) net_ema.set_train(False) assert args.ema_decay > 0, "EMA should be used in tinynet training." ema_cb = EmaEvalCallBack(network=net, ema_network=net_ema, loss_fn=loss, eval_dataset=val_dataset, decay=args.ema_decay, save_epoch=args.ckpt_save_epoch, dataset_sink_mode=args.dataset_sink, start_epoch=0) callbacks = [loss_cb, ema_cb, time_cb] if is_master else [] if is_master: print("Training on " + args.model + " with " + str(args.num_classes) + " classes") model.train(args.epochs, train_dataset, callbacks=callbacks, dataset_sink_mode=args.dataset_sink)
def train_and_predict_RNN(model, options, train_data_inputs, train_data_targets, test_data, tr_to_val_split=0.9, tr_verbose=False, use_grid_search=0): # Count number of model parameters total_num_params, total_num_trainable_params = count_params(model=model) print("The total number of params: {} and the number of trainable params:{}".format(total_num_params, total_num_trainable_params)) # Apply concat data to concatenate the rows that have columns with signal (not the timestamp) train_data_inputs, train_data_targets = concat_data(train_data_inputs), concat_data(train_data_targets) if len(train_data_inputs.shape) == 2: # Extra dimension to be added N, P = train_data_inputs.shape train_data_inputs = train_data_inputs.reshape((N, P, model.input_size)) #train_data_target = train_data_inputs.reshape((N, P, model.input_size)) # Train - Validation split tr_inputs, tr_targets, val_inputs, val_targets = train_validation_split( train_data_inputs, train_data_targets, tr_split=tr_to_val_split) tr_losses, val_losses, model, best_model_wts, best_val_loss, best_val_epoch = train_rnn(model=model, nepochs=model.num_epochs, tr_inputs=tr_inputs, tr_targets=tr_targets, val_inputs=val_inputs, val_targets=val_targets, tr_verbose=tr_verbose) print("Model saved at epoch:{} with val loss:{}".format(best_val_epoch, best_val_loss)) device = get_device() model_best = RNN_model( input_size=options["input_size"], output_size=options["output_size"], n_hidden=options["n_hidden"], n_layers=options["n_layers"], num_directions=options["num_directions"], model_type=options["model_type"], batch_first=options["batch_first"], lr=options["lr"], num_epochs=options["num_epochs"], ).to(device) #model_best = load_model_with_opts(options, model.model_type).to(device) # Load the best weights model_best.load_state_dict(best_model_wts) #if tr_verbose == True: # plot_losses(tr_losses=tr_losses, val_losses=val_losses, logscale=True) # Trying to visualise training data predictions #predictions_rnn_train = predict_rnn(model=model, eval_input=train_data_inputs[0, :, :].reshape((1, P, -1)), n_predict=len(train_data_targets)) #plot_training_predictions(ytrain=train_data_targets, predictions=predictions_rnn_train, title="Predictions for Training data") eval_input = torch.from_numpy(train_data_inputs[-1, :, :].reshape((1, P, -1))) if len(test_data) > 0: predictions_rnn = predict_rnn(model=model_best, eval_input=eval_input, n_predict=len(test_data)) test_error = mean_squared_error(y_true=test_data[:, -1], y_pred=predictions_rnn) else: #NOTE: Heuristically setting the number of future predictions predictions_rnn = predict_rnn(model=model_best, eval_input=eval_input, n_predict=132) test_error = np.nan # No reference to compare for genearting Test error tr_error = tr_losses[-1] # latest training error val_error = val_losses[-1] # latest validation error #print("**********************************************************************************************************") if use_grid_search == 0: print("{} - {}, {} - {}, {} - {}, {} - {}, {} - {}".format("Model", model_best.model_type, "Training Error", tr_error, "Validation Error", val_error, "Best Validation Error", best_val_loss,"Test Error", test_error)) print("***********************************************************************************************************") elif use_grid_search == 1: print("{} - {}, {} - {}, {} - {}, {} - {}".format("Model", model_best.model_type, "Training Error", tr_error,"Validation Error", val_error, "Best Validation Error", best_val_loss)) print("***********************************************************************************************************") best_val_loss = best_val_loss.cpu().numpy() return predictions_rnn, test_error, best_val_loss, tr_error
def train_and_predict_RNN(model, train_data_inputs, train_data_targets, test_data, tr_to_val_split=0.9, tr_verbose=False, use_grid_search=0): # Count number of model parameters total_num_params, total_num_trainable_params = count_params(model=model) print( "The total number of params: {} and the number of trainable params:{}". format(total_num_params, total_num_trainable_params)) # Apply concat data to concatenate the rows that have columns with signal (not the timestamp) train_data_inputs, train_data_targets = concat_data( train_data_inputs), concat_data(train_data_targets) if len(train_data_inputs.shape) == 2: # Extra dimension to be added N, P = train_data_inputs.shape train_data_inputs = train_data_inputs.reshape((N, P, model.input_size)) #train_data_target = train_data_inputs.reshape((N, P, model.input_size)) # Train - Validation split tr_inputs, tr_targets, val_inputs, val_targets = train_validation_split( train_data_inputs, train_data_targets, tr_split=tr_to_val_split) tr_losses, val_losses, model = train_rnn(model=model, nepochs=model.num_epochs, tr_inputs=tr_inputs, tr_targets=tr_targets, val_inputs=val_inputs, val_targets=val_targets, tr_verbose=tr_verbose) #if tr_verbose == True: # plot_losses(tr_losses=tr_losses, val_losses=val_losses, logscale=True) # Trying to visualise training data predictions #predictions_rnn_train = predict_rnn(model=model, eval_input=train_data_inputs[0, :, :].reshape((1, P, -1)), n_predict=len(train_data_targets)) #plot_training_predictions(ytrain=train_data_targets, predictions=predictions_rnn_train, title="Predictions for Training data") if len(test_data) > 0: predictions_rnn = predict_rnn( model=model, eval_input=train_data_inputs[-1, :, :].reshape((1, P, -1)), n_predict=len(test_data)) test_error = mean_squared_error(y_true=test_data[:, -1], y_pred=predictions_rnn) else: #NOTE: Heuristically setting the number of future predictions predictions_rnn = predict_rnn( model=model, eval_input=train_data_inputs[-1, :, :].reshape((1, P, -1)), n_predict=132) test_error = np.nan # No reference to compare for genearting Test error tr_error = tr_losses[-1] # latest training error val_error = val_losses[-1] # latest validation error #print("**********************************************************************************************************") if use_grid_search == 0: print("{} - {}, {} - {}, {} - {}, {} - {}".format( "Model", model.model_type, "Training Error", tr_error, "Validation Error", val_error, "Test Error", test_error)) print( "***********************************************************************************************************" ) elif use_grid_search == 1: print("{} - {}, {} - {}, {} - {}".format("Model", model.model_type, "Training Error", tr_error, "Validation Error", val_error)) print( "***********************************************************************************************************" ) return predictions_rnn, test_error, val_error, tr_error
train_func = train solver_class = AaeSolver # If restore then start training from latest saved point # But if warm and feature matching is selected then restore last saved # point from pixel matching training restore = False warm = False # MNIST++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # Mnist dense with y labels if scenario == 1: y_dim = 10 model = ModelDenseMnist(batch_size=128, z_dim=mnist_z_dim, y_dim=y_dim) solver = solver_class(model=model) print("Number of parameters in model %d" % count_params()) data = MNIST() print('Training Mnist dense with y labels') train_func(solver, data, name='Mnist_Dense_y', restore=restore, warm=False) # Mnist dense without y labels elif scenario == 2: y_dim = None model = ModelDenseMnist(batch_size=128, z_dim=mnist_z_dim, y_dim=y_dim) solver = solver_class(model=model) print("Number of parameters in model %d" % count_params()) data = MNIST()