def run_training(self, iterations):
        sv = tf.train.Supervisor(logdir=os.path.join(
            'logs', time.strftime("%Y%m%d-%H%M%S")),
                                 summary_op=None,
                                 global_step=self.global_step,
                                 save_model_secs=3600)

        with sv.managed_session() as sess:
            coord = tf.train.Coordinator()
            tf.train.start_queue_runners(coord=coord, sess=sess)
            tf.logging.log(tf.logging.INFO,
                           "Number of parameters %d" % count_params())

            train_rec_loss = 0
            train_disc_loss = 0
            train_enc_loss = 0
            log_time = 100
            for i in range(1, iterations + 1):
                r = sess.run([
                    self.train_rec_loss, self.rec_optimizer,
                    self.train_disc_loss, self.disc_optimizer,
                    self.train_enc_loss, self.enc_optimizer,
                    self.gan_optimizer, self.critic_optimizer
                ],
                             feed_dict={self.model.is_training: True})
                loss_r, _, loss_d, _, loss_e, _, _, _ = r
                train_rec_loss += loss_r
                train_disc_loss += loss_d
                train_enc_loss += loss_e
                # Compute summary on training on every 100th iteration
                if i % log_time is 0:
                    t_s = sess.run(self.train_summaries,
                                   feed_dict={
                                       self.model.is_training:
                                       False,
                                       self.train_rec_loss_p:
                                       train_rec_loss / log_time,
                                       self.train_disc_loss_p:
                                       train_disc_loss / log_time,
                                       self.train_enc_loss_p:
                                       train_enc_loss / log_time
                                   })
                    tf.logging.log(tf.logging.INFO, "\nIteration %d" % i)
                    tf.logging.log(
                        tf.logging.INFO,
                        "Reconstruction Loss %g" % (train_rec_loss / log_time))
                    tf.logging.log(
                        tf.logging.INFO,
                        "Discriminator Loss %g" % (train_disc_loss / log_time))
                    tf.logging.log(
                        tf.logging.INFO,
                        "Encoder Loss %g" % (train_enc_loss / log_time))
                    #tf.logging.log(tf.logging.INFO, "Gan Loss %g" % (train_enc_loss/log_time))
                    #tf.logging.log(tf.logging.INFO, "Critic Loss %g" % (train_enc_loss/log_time))
                    sv.summary_computed(sess, t_s)
                    train_rec_loss = 0
                    train_disc_loss = 0
                    train_enc_loss = 0

                    self.run_validation(sv, sess)
Example #2
0
def main():
    """Main entrance for training"""
    args = parser.parse_args()
    print(sys.argv)

    #context.set_context(mode=context.GRAPH_MODE)
    context.set_context(mode=context.PYNATIVE_MODE)

    if args.GPU:
        context.set_context(device_target='GPU')

    # parse model argument
    assert args.model.startswith(
        "hournas"), "Only Tinynet models are supported."
    #_, sub_name = args.model.split("_")
    net = hournasnet(args.model,
                     num_classes=args.num_classes,
                     drop_rate=0.0,
                     drop_connect_rate=0.0,
                     global_pool="avg",
                     bn_tf=False,
                     bn_momentum=None,
                     bn_eps=None)
    print(net)
    print("Total number of parameters:", count_params(net))
    cfg = edict({
        'image_height': args.image_size,
        'image_width': args.image_size,
    })
    cfg.batch_size = args.batch_size
    print(cfg)

    #input_size = net.default_cfg['input_size'][1]
    val_data_url = args.data_path  #os.path.join(args.data_path, 'val')
    val_dataset = create_dataset_cifar10(val_data_url,
                                         repeat_num=1,
                                         training=False,
                                         cifar_cfg=cfg)

    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')

    eval_metrics = {
        'Validation-Loss': Loss(),
        'Top1-Acc': Top1CategoricalAccuracy(),
        'Top5-Acc': Top5CategoricalAccuracy()
    }

    ckpt = load_checkpoint(args.ckpt)
    load_param_into_net(net, ckpt)
    net.set_train(False)

    model = Model(net, loss, metrics=eval_metrics)

    metrics = model.eval(val_dataset, dataset_sink_mode=False)
    print(metrics)
Example #3
0
def train_and_predict_AR(model, train_data_inputs, train_data_targets, test_data, tr_to_val_split=0.9, tr_verbose=False):
    
    # Count number of model parameters
    total_num_params, total_num_trainable_params = count_params(model=model)
    print("The total number of params: {} and the number of trainable params:{}".format(total_num_params, total_num_trainable_params))
    
    # Apply concat data to concatenate the rows that have columns with signal (not the timestamp)
    train_data_inputs, train_data_targets = concat_data(train_data_inputs), concat_data(train_data_targets) 

    tr_losses, val_losses, model = train_armodel(model, nepochs=model.num_epochs, inputs=train_data_inputs,
        targets=train_data_targets, tr_split=tr_to_val_split, tr_verbose=tr_verbose)
    
    if len(test_data) > 0:
        predictions_ar = predict_armodel(model=model, eval_input=train_data_inputs[-1], n_predict=len(test_data))
        test_error = mean_squared_error(y_true=test_data[:, -1], y_pred=predictions_ar)
    else:
        #NOTE: Heuristically setting the number of future predictions
        predictions_ar = predict_armodel(model=model, eval_input=train_data_inputs[-1], n_predict=132)
        test_error = np.nan
    
    tr_error = tr_losses[-1] # latest training error
    val_error = val_losses[-1] # latest validation error
    #print("**********************************************************************************************************")
    print("{} - {},  {} - {},  {} - {:.8f},  {} - {:.8f},  {}, - {:.8f}".format(
                                                                "Model", "AR",
                                                                "P",
                                                                model.num_taps,
                                                                "Training Error",
                                                                tr_error,
                                                                "Validation Error",
                                                                val_error,
                                                                "Test Error",
                                                                test_error))
    print("***********************************************************************************************************")
    '''
    with open("results_{}.txt".format(model_type), "a") as fp:
        print("**********************************************************************************************************")
        print("{} - {},  {} - {},  {} - {:.8f},  {} - {:.8f},  {}, - {:.8f}".format(
                                                                "Model", "AR",
                                                                "P",
                                                                model.num_taps,
                                                                "Training Error",
                                                                tr_error,
                                                                "Validation Error",
                                                                val_error,
                                                                "Test Error",
                                                                test_error), fp)
        print("***********************************************************************************************************")
    '''
    return predictions_ar, test_error, val_error, tr_error
Example #4
0
def main():
    """Main entrance for training"""
    args = parser.parse_args()
    print(sys.argv)

    context.set_context(mode=context.GRAPH_MODE)

    if args.GPU:
        context.set_context(device_target='GPU')

    # parse model argument
    assert args.model.startswith(
        "tinynet"), "Only Tinynet models are supported."
    _, sub_name = args.model.split("_")
    net = tinynet(sub_model=sub_name,
                  num_classes=args.num_classes,
                  drop_rate=0.0,
                  drop_connect_rate=0.0,
                  global_pool="avg",
                  bn_tf=False,
                  bn_momentum=None,
                  bn_eps=None)
    print("Total number of parameters:", count_params(net))

    input_size = net.default_cfg['input_size'][1]
    val_data_url = os.path.join(args.data_path, 'val')
    val_dataset = create_dataset_val(args.batch_size,
                                     val_data_url,
                                     workers=args.workers,
                                     distributed=False,
                                     input_size=input_size)

    loss = LabelSmoothingCrossEntropy(smooth_factor=args.smoothing,
                                      num_classes=args.num_classes)

    loss.add_flags_recursive(fp32=True, fp16=False)
    eval_metrics = {
        'Validation-Loss': Loss(),
        'Top1-Acc': Top1CategoricalAccuracy(),
        'Top5-Acc': Top5CategoricalAccuracy()
    }

    ckpt = load_checkpoint(args.ckpt)
    load_param_into_net(net, ckpt)
    net.set_train(False)

    model = Model(net, loss, metrics=eval_metrics)

    metrics = model.eval(val_dataset, dataset_sink_mode=False)
    print(metrics)
Example #5
0
def main():
    """Main entrance for training"""
    args = parser.parse_args()
    print(sys.argv)
    devid, args.rank_id, args.rank_size = 0, 0, 1

    context.set_context(mode=context.GRAPH_MODE)

    if args.distributed:
        if args.GPU:
            init("nccl")
            context.set_context(device_target='GPU')
        else:
            init()
            devid = int(os.getenv('DEVICE_ID'))
            context.set_context(device_target='Ascend',
                                device_id=devid,
                                reserve_class_name_in_scope=False)
        context.reset_auto_parallel_context()
        args.rank_id = get_rank()
        args.rank_size = get_group_size()
        context.set_auto_parallel_context(
            parallel_mode=ParallelMode.DATA_PARALLEL,
            gradients_mean=True,
            device_num=args.rank_size)
    else:
        if args.GPU:
            context.set_context(device_target='GPU')

    is_master = not args.distributed or (args.rank_id == 0)

    # parse model argument
    assert args.model.startswith(
        "tinynet"), "Only Tinynet models are supported."
    _, sub_name = args.model.split("_")
    net = tinynet(sub_model=sub_name,
                  num_classes=args.num_classes,
                  drop_rate=args.drop,
                  drop_connect_rate=args.drop_connect,
                  global_pool="avg",
                  bn_tf=args.bn_tf,
                  bn_momentum=args.bn_momentum,
                  bn_eps=args.bn_eps)

    if is_master:
        print("Total number of parameters:", count_params(net))
    # input image size of the network
    input_size = net.default_cfg['input_size'][1]

    train_dataset = val_dataset = None
    train_data_url = os.path.join(args.data_path, 'train')
    val_data_url = os.path.join(args.data_path, 'val')
    val_dataset = create_dataset_val(args.batch_size,
                                     val_data_url,
                                     workers=args.workers,
                                     distributed=False,
                                     input_size=input_size)

    if args.train:
        train_dataset = create_dataset(args.batch_size,
                                       train_data_url,
                                       workers=args.workers,
                                       distributed=args.distributed,
                                       input_size=input_size)
        batches_per_epoch = train_dataset.get_dataset_size()

    loss = LabelSmoothingCrossEntropy(smooth_factor=args.smoothing,
                                      num_classes=args.num_classes)
    time_cb = TimeMonitor(data_size=batches_per_epoch)
    loss_scale_manager = FixedLossScaleManager(args.loss_scale,
                                               drop_overflow_update=False)

    lr_array = get_lr(base_lr=args.lr,
                      total_epochs=args.epochs,
                      steps_per_epoch=batches_per_epoch,
                      decay_epochs=args.decay_epochs,
                      decay_rate=args.decay_rate,
                      warmup_epochs=args.warmup_epochs,
                      warmup_lr_init=args.warmup_lr,
                      global_epoch=0)
    lr = Tensor(lr_array)

    loss_cb = LossMonitor(lr_array,
                          args.epochs,
                          per_print_times=args.per_print_times,
                          start_epoch=0)

    param_group = add_weight_decay(net, weight_decay=args.weight_decay)

    if args.opt == 'sgd':
        if is_master:
            print('Using SGD optimizer')
        optimizer = SGD(param_group,
                        learning_rate=lr,
                        momentum=args.momentum,
                        weight_decay=args.weight_decay,
                        loss_scale=args.loss_scale)

    elif args.opt == 'rmsprop':
        if is_master:
            print('Using rmsprop optimizer')
        optimizer = RMSProp(param_group,
                            learning_rate=lr,
                            decay=0.9,
                            weight_decay=args.weight_decay,
                            momentum=args.momentum,
                            epsilon=args.opt_eps,
                            loss_scale=args.loss_scale)

    loss.add_flags_recursive(fp32=True, fp16=False)
    eval_metrics = {
        'Validation-Loss': Loss(),
        'Top1-Acc': Top1CategoricalAccuracy(),
        'Top5-Acc': Top5CategoricalAccuracy()
    }

    if args.ckpt:
        ckpt = load_checkpoint(args.ckpt)
        load_param_into_net(net, ckpt)
        net.set_train(False)

    model = Model(net,
                  loss,
                  optimizer,
                  metrics=eval_metrics,
                  loss_scale_manager=loss_scale_manager,
                  amp_level=args.amp_level)

    net_ema = copy.deepcopy(net)
    net_ema.set_train(False)
    assert args.ema_decay > 0, "EMA should be used in tinynet training."

    ema_cb = EmaEvalCallBack(network=net,
                             ema_network=net_ema,
                             loss_fn=loss,
                             eval_dataset=val_dataset,
                             decay=args.ema_decay,
                             save_epoch=args.ckpt_save_epoch,
                             dataset_sink_mode=args.dataset_sink,
                             start_epoch=0)

    callbacks = [loss_cb, ema_cb, time_cb] if is_master else []

    if is_master:
        print("Training on " + args.model + " with " + str(args.num_classes) +
              " classes")

    model.train(args.epochs,
                train_dataset,
                callbacks=callbacks,
                dataset_sink_mode=args.dataset_sink)
def train_and_predict_RNN(model, options, train_data_inputs, train_data_targets, test_data, tr_to_val_split=0.9, tr_verbose=False, use_grid_search=0):

    # Count number of model parameters
    total_num_params, total_num_trainable_params = count_params(model=model)
    print("The total number of params: {} and the number of trainable params:{}".format(total_num_params, total_num_trainable_params))

    # Apply concat data to concatenate the rows that have columns with signal (not the timestamp)
    train_data_inputs, train_data_targets = concat_data(train_data_inputs), concat_data(train_data_targets) 
    
    if len(train_data_inputs.shape) == 2:
        # Extra dimension to be added
        N, P = train_data_inputs.shape
        train_data_inputs = train_data_inputs.reshape((N, P, model.input_size))
        #train_data_target = train_data_inputs.reshape((N, P, model.input_size))

    # Train -  Validation split
    tr_inputs, tr_targets, val_inputs, val_targets = train_validation_split(
                    train_data_inputs, train_data_targets, tr_split=tr_to_val_split)

    tr_losses, val_losses, model, best_model_wts, best_val_loss, best_val_epoch = train_rnn(model=model, nepochs=model.num_epochs, 
                                                                tr_inputs=tr_inputs, tr_targets=tr_targets, 
                                                                val_inputs=val_inputs, val_targets=val_targets, 
                                                                tr_verbose=tr_verbose)

    print("Model saved at epoch:{} with val loss:{}".format(best_val_epoch, best_val_loss))
    device = get_device()
    model_best = RNN_model(
            input_size=options["input_size"],
            output_size=options["output_size"],
            n_hidden=options["n_hidden"],
            n_layers=options["n_layers"],
            num_directions=options["num_directions"],
            model_type=options["model_type"],
            batch_first=options["batch_first"],
            lr=options["lr"],
            num_epochs=options["num_epochs"],
            ).to(device)

    #model_best = load_model_with_opts(options, model.model_type).to(device)
    # Load the best weights
    model_best.load_state_dict(best_model_wts)

    #if tr_verbose == True:
    #    plot_losses(tr_losses=tr_losses, val_losses=val_losses, logscale=True)

    # Trying to visualise training data predictions
    #predictions_rnn_train = predict_rnn(model=model, eval_input=train_data_inputs[0, :, :].reshape((1, P, -1)), n_predict=len(train_data_targets))
    #plot_training_predictions(ytrain=train_data_targets, predictions=predictions_rnn_train, title="Predictions for Training data")
    eval_input = torch.from_numpy(train_data_inputs[-1, :, :].reshape((1, P, -1)))
    if len(test_data) > 0:
        predictions_rnn = predict_rnn(model=model_best, eval_input=eval_input, n_predict=len(test_data))
        test_error = mean_squared_error(y_true=test_data[:, -1], y_pred=predictions_rnn)
    else:
        #NOTE: Heuristically setting the number of future predictions
        predictions_rnn = predict_rnn(model=model_best, eval_input=eval_input, n_predict=132)
        test_error = np.nan # No reference to compare for genearting Test error

    tr_error = tr_losses[-1] # latest training error
    val_error = val_losses[-1] # latest validation error
    #print("**********************************************************************************************************")
    if use_grid_search == 0:
        print("{} - {}, {} - {},  {} - {}, {} - {}, {} - {}".format("Model", model_best.model_type, "Training Error", tr_error,
                                                            "Validation Error", val_error, "Best Validation Error", best_val_loss,"Test Error", test_error))
        print("***********************************************************************************************************")
    elif use_grid_search == 1:
        print("{} - {}, {} - {},  {} - {}, {} - {}".format("Model", model_best.model_type, "Training Error", tr_error,"Validation Error", 
                                                  val_error, "Best Validation Error", best_val_loss))
        print("***********************************************************************************************************")
    
    best_val_loss = best_val_loss.cpu().numpy()
    return predictions_rnn, test_error, best_val_loss, tr_error
def train_and_predict_RNN(model,
                          train_data_inputs,
                          train_data_targets,
                          test_data,
                          tr_to_val_split=0.9,
                          tr_verbose=False,
                          use_grid_search=0):

    # Count number of model parameters
    total_num_params, total_num_trainable_params = count_params(model=model)
    print(
        "The total number of params: {} and the number of trainable params:{}".
        format(total_num_params, total_num_trainable_params))

    # Apply concat data to concatenate the rows that have columns with signal (not the timestamp)
    train_data_inputs, train_data_targets = concat_data(
        train_data_inputs), concat_data(train_data_targets)

    if len(train_data_inputs.shape) == 2:
        # Extra dimension to be added
        N, P = train_data_inputs.shape
        train_data_inputs = train_data_inputs.reshape((N, P, model.input_size))
        #train_data_target = train_data_inputs.reshape((N, P, model.input_size))

    # Train -  Validation split
    tr_inputs, tr_targets, val_inputs, val_targets = train_validation_split(
        train_data_inputs, train_data_targets, tr_split=tr_to_val_split)

    tr_losses, val_losses, model = train_rnn(model=model,
                                             nepochs=model.num_epochs,
                                             tr_inputs=tr_inputs,
                                             tr_targets=tr_targets,
                                             val_inputs=val_inputs,
                                             val_targets=val_targets,
                                             tr_verbose=tr_verbose)

    #if tr_verbose == True:
    #    plot_losses(tr_losses=tr_losses, val_losses=val_losses, logscale=True)

    # Trying to visualise training data predictions
    #predictions_rnn_train = predict_rnn(model=model, eval_input=train_data_inputs[0, :, :].reshape((1, P, -1)), n_predict=len(train_data_targets))
    #plot_training_predictions(ytrain=train_data_targets, predictions=predictions_rnn_train, title="Predictions for Training data")

    if len(test_data) > 0:
        predictions_rnn = predict_rnn(
            model=model,
            eval_input=train_data_inputs[-1, :, :].reshape((1, P, -1)),
            n_predict=len(test_data))
        test_error = mean_squared_error(y_true=test_data[:, -1],
                                        y_pred=predictions_rnn)
    else:
        #NOTE: Heuristically setting the number of future predictions
        predictions_rnn = predict_rnn(
            model=model,
            eval_input=train_data_inputs[-1, :, :].reshape((1, P, -1)),
            n_predict=132)
        test_error = np.nan  # No reference to compare for genearting Test error

    tr_error = tr_losses[-1]  # latest training error
    val_error = val_losses[-1]  # latest validation error
    #print("**********************************************************************************************************")
    if use_grid_search == 0:
        print("{} - {}, {} - {},  {} - {},  {} - {}".format(
            "Model", model.model_type, "Training Error", tr_error,
            "Validation Error", val_error, "Test Error", test_error))
        print(
            "***********************************************************************************************************"
        )
    elif use_grid_search == 1:
        print("{} - {}, {} - {},  {} - {}".format("Model", model.model_type,
                                                  "Training Error", tr_error,
                                                  "Validation Error",
                                                  val_error))
        print(
            "***********************************************************************************************************"
        )

    return predictions_rnn, test_error, val_error, tr_error
Example #8
0
        train_func = train
        solver_class = AaeSolver

    # If restore then start training from latest saved point
    # But if warm and feature matching is selected then restore last saved
    # point from pixel matching training
    restore = False
    warm = False

    # MNIST++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    # Mnist dense with y labels
    if scenario == 1:
        y_dim = 10
        model = ModelDenseMnist(batch_size=128, z_dim=mnist_z_dim, y_dim=y_dim)
        solver = solver_class(model=model)
        print("Number of parameters in model %d" % count_params())
        data = MNIST()
        print('Training Mnist dense with y labels')
        train_func(solver,
                   data,
                   name='Mnist_Dense_y',
                   restore=restore,
                   warm=False)

    # Mnist dense without y labels
    elif scenario == 2:
        y_dim = None
        model = ModelDenseMnist(batch_size=128, z_dim=mnist_z_dim, y_dim=y_dim)
        solver = solver_class(model=model)
        print("Number of parameters in model %d" % count_params())
        data = MNIST()