def save_evaluation_plots(training_configs):
    """Create and save learning curves for all models in the batch"""
    for i, training_config in enumerate(training_configs):
        print('Saving plot for Model {}: {}'.format(i + 1,
                                                    training_config.name))
        model = training_config.get_by_model_key(False)
        checkpoint = models.ModelCheckpoint(model)
        checkpoint.load(training_config.get_model_path('checkpoint'))
        if not checkpoint.loaded:
            print('Not evaluated')
            continue
        path = os.path.join(training_config.models_dir,
                            "{}_lc.png".format(training_config.name))
        commons.save_learning_curve(checkpoint.training_losses,
                                    checkpoint.cv_losses, path)
def print_evaluation_report(training_config):
    """Print the training and evaluation results for a model"""
    # Training Config
    print('Training Config')
    for key, val in training_config.__dict__.items():
        print('{}\t{}'.format(key, val))
    print()

    # Checkpoint
    model = training_config.get_by_model_key(False)
    checkpoint = models.ModelCheckpoint(model)
    checkpoint.load(training_config.get_model_path('checkpoint'))
    if not checkpoint.loaded:
        print('Not evaluated')
        return

    print('Last checkpoint stats')
    for key, val in checkpoint.__dict__.items():
        print('{}\t{}'.format(key, val))
def save_evaluation_report(training_configs, config_path):
    """Compile and save hyper-tuning report for all models in the batch"""
    hps = []
    for i, training_config in enumerate(training_configs):
        print('Saving report for Model {}: {}'.format(i + 1,
                                                      training_config.name))
        model = training_config.get_by_model_key(False)
        checkpoint = models.ModelCheckpoint(model)
        checkpoint.load(training_config.get_model_path('checkpoint'))
        if not checkpoint.loaded:
            print('Not evaluated')
            continue
        if training_config.model == 'conv_autoencoder':
            hps.append(_get_hps_for_autoencoder(training_config, checkpoint))
        elif training_config.model == 'cnn_classifier':
            hps.append(_get_hps_for_classifier(training_config, checkpoint))
        else:
            raise Exception('Invalid model code: {}'.format(
                training_configs.model))
    with open(os.path.join(os.path.dirname(config_path), 'hps.txt'),
              'w') as rep_file:
        rep_file.write('\n'.join(['\t'.join(hp) for hp in hps]))
Beispiel #4
0
def show_plot(train_config_file, opt=1):
    """Plot learning curve for a training process"""
    training_config = train.TrainingConfig.load_from_file(train_config_file)

    # Model initialization
    model = training_config.get_by_model_key(False)
    checkpoint = models.ModelCheckpoint(model)
    checkpoint.load(training_config.get_model_path('checkpoint'))
    if not checkpoint.loaded:
        print('Not evaluated')
        return

    if opt == 1:
        commons.plot_learning_curve(checkpoint.training_losses,
                                    checkpoint.cv_losses,
                                    close=True)
    elif opt == 2:
        commons.plot_learning_curve(
            checkpoint.cv_accuracies,
            checkpoint.model_specific['polled_accuracies'],
            close=True)
    else:
        return
    time.sleep(60)
Beispiel #5
0
def train(training_config,
          plot_learning_curves=False,
          cuda=False,
          email=False):
    """Train a model using the specified training configuration
    Arguments:
        training_config: Instance of TrainingConfig
        plot_learning_curves: Whether to plot learning curves at the end of each epoch (Useful for monitoring training)
        cuda: Use True to train on GPU
        email: Use True to send email notifications on training completion or failure
    """
    print('Training model {} [CUDA = {}, Plot = {}]'.format(
        training_config.name, cuda, plot_learning_curves))
    # Exception block to catch training failures and send email notifications
    try:

        if training_config.ignore:
            print('Ignoring model')
            return

        # Model initialization
        model = training_config.get_by_model_key(cuda)

        # Load checkpoint
        checkpoint = models.ModelCheckpoint(model)
        print('Model Size: {} params'.format(checkpoint.trainable_params))
        if training_config.resume:
            model.load_state(training_config.get_model_path('state'))
            checkpoint.load(training_config.get_model_path('checkpoint'))

        # Data generators for Training and Validation sets
        train_parts, cv_part, test_part = dp.load_created_partitions(
            training_config.dataset_path)
        if len(train_parts) == 0:
            raise Exception('No training partitions found')
        training_set = dp.PartitionBatchGenerator(train_parts,
                                                  training_config.batch_size,
                                                  mode='train')
        training_set_len = len(training_set)
        cv_set = dp.PartitionBatchGenerator(cv_part,
                                            training_config.batch_size,
                                            mode='cv')
        cv_set_len = len(cv_set)

        if checkpoint.epoch >= training_config.num_epochs:
            print('Already completed {} epochs'.format(checkpoint.epoch))
            return

        # Training loop
        for curr_epoch in range(checkpoint.epoch, training_config.num_epochs):

            # Plot learning curves after first epoch
            if plot_learning_curves and curr_epoch > 0:
                commons.plot_learning_curve(checkpoint.training_losses,
                                            checkpoint.cv_losses,
                                            close=True)

            # Train on training set
            model.begin_training()
            loss = 0
            train_start_time = time.time()
            progress = commons.ProgressBar(training_set_len,
                                           status='Training epoch %s' %
                                           str(curr_epoch + 1))
            for i, (x, y) in enumerate(training_set):
                loss += model.train_batch(x, y)
                progress.update(i)
            train_stop_time = time.time()
            training_time = train_stop_time - train_start_time
            checkpoint.training_times.append(training_time)
            progress.complete(
                status='Done training epoch {} in {} seconds'.format(
                    str(curr_epoch + 1), training_time))
            avg_loss = loss / training_set_len
            checkpoint.training_losses.append(avg_loss)
            print('Average training loss per batch:', avg_loss)

            # Evaluate on validation set
            model.begin_evaluation()
            loss_cv = 0
            for i, (x_cv, y_cv) in enumerate(cv_set):
                loss_batch_cv = model.evaluate(x_cv, y_cv)
                loss_cv += loss_batch_cv
            avg_loss_cv = loss_cv / cv_set_len
            checkpoint.cv_losses.append(avg_loss_cv)
            checkpoint.best_loss = avg_loss_cv if checkpoint.best_loss is None else min(
                checkpoint.best_loss, avg_loss_cv)
            print('Average validation loss per batch:', avg_loss_cv)
            print('Best Loss:', checkpoint.best_loss)

            # Post evaluation model specific actions
            model.post_evaluation(checkpoint)

            print()

            # Checkpoint
            checkpoint.epoch += 1
            model.save_state(training_config.get_model_path('state'))
            checkpoint.save(training_config.get_model_path('checkpoint'))
            if checkpoint.best_loss == avg_loss_cv:
                model.save_state(training_config.get_model_path('state_best'))
                checkpoint.save(
                    training_config.get_model_path('checkpoint_best'))

        print('Training complete')

        # Success email
        if email:
            emailer.sendmail(
                'Model Training Complete: {}'.format(training_config.name),
                'Model Config: {}\n Model Checkpoint: {}'.format(
                    str(training_config.get_dict()),
                    str(checkpoint.get_dict())))
    except Exception as ex:
        print('Model Training Failed: {}'.format(str(ex)))
        # Failure email
        if email:
            emailer.sendmail(
                'Model Training Failed: {}'.format(training_config.name),
                'Error: {}'.format(traceback.format_exc()))
        raise
Beispiel #6
0
    n_classes = 100

x_train_pct, y_train_pct = m.sample_train(x_train, y_train, train_pct)


m.print_params(feature_extractor, embedding_dim, n_centers_per_class, n_classes, lr, sigma, batch_size, epochs, dataset, input_shape, patience)

for i in range(n_trials):

  rbf_model, softmax_model, embeddings = m.construct_models(feature_extractor, embedding_dim, n_centers_per_class, n_classes, lr, sigma)


  # Callbacks Setup

  callbacks = [m.EarlyStopping(monitor='val_loss', patience=patience)]
  callbacks2 = [m.EarlyStopping(monitor='val_loss', patience=patience), m.ModelCheckpoint(filepath, 
                monitor='val_loss', verbose=0, save_best_only=True, mode='min')]


  # Training Models

  ''' Softmax Model / Plain Model
  '''
  history_plain = softmax_model.fit(x_train_pct, y_train_pct,
                          batch_size=  batch_size,
                          epochs=epochs,
                          verbose=1,
                          validation_data=(x_test, y_test),
                          callbacks = callbacks2)


  ''' Pre trained Softmax Model.
Beispiel #7
0
def main(args):

    history        = []


    # Command Line Arguments

    feature_extractor = args.feature_extractor
    filepath = args.file_path
    dataset = args.dataset
    n_trials = args.n_trials

    # Dataset Setup

    if dataset == "CIFAR10":
        x_train, x_test, x_val, y_train, y_test, y_val = adl.load_cifar10()
        n_classes = 10
    elif dataset == "CIFAR100":
        x_train, x_test, x_val, y_train, y_test, y_val = adl.load_cifar100()
        n_classes = 100
    elif dataset == "TinyImagenet":
        x_train, x_test, x_val, y_train, y_test, y_val = adl.load_tiny_imagenet()
        n_classes = 200       


    for pct in ["10", "20", "30"]:
            
        x_train_pct, y_train_pct = x_train[pct], y_train[pct]


        m.print_params(feature_extractor, embedding_dim, n_centers_per_class, n_classes, lr, sigma, batch_size, epochs, dataset, input_shape, patience)

        for i in range(n_trials):

            rbf_model, softmax_model, embeddings = m.construct_models(feature_extractor, embedding_dim, n_centers_per_class, n_classes, lr, sigma)


            # Callbacks Setup

            callbacks = [m.EarlyStopping(monitor='val_loss', patience=patience)]
            callbacks2 = [m.EarlyStopping(monitor='val_loss', patience=patience), m.ModelCheckpoint(filepath, 
                            monitor='val_loss', verbose=0, save_best_only=True, mode='min')]


            # Training Models

            ''' Softmax Model / Plain Model
            '''
            print("Model with softmax layer")
            history_plain = softmax_model.fit(x_train_pct, y_train_pct,
                                    batch_size=  batch_size,
                                    epochs=epochs,
                                    verbose=1,
                                    validation_data=(x_val, y_val),
                                    callbacks = callbacks2)

            softmax_model.load_weights(filepath)
            error_softmax = rbf_model.evaluate(x_test, y_test, verbose = 0) 


            ''' Pre trained Softmax Model.
                With K-Means Initialization.
                With Gauss Kernel.
            '''
            print("Model with gauss kernel and initialization")
            rbf_model, softmax_model, embeddings = m.construct_models(feature_extractor, embedding_dim, n_centers_per_class, n_classes, lr, sigma, kernel_type = "gauss")
            softmax_model.load_weights(filepath)
            init_keys = m.get_initial_weights(embeddings, x_train_pct, y_train_pct, n_centers_per_class, n_classes, embedding_dim, init_method= "KMEANS")
            rbf_model.layers[-1].set_keys(init_keys)

            history_gauss_kmeans = rbf_model.fit(x_train_pct, y_train_pct,
                            batch_size=  batch_size,
                            epochs=epochs,
                            verbose=1,
                            validation_data=(x_val, y_val),
                            callbacks = callbacks)

            error_rbf_kmeans = rbf_model.evaluate(x_test, y_test, verbose = 0) 


            ''' Non pre trained Model.
                Without Initialization.
                With Gauss Kernel.
            '''
            print("Model with gauss kernel and without initialization")
            rbf_model, _, _ = m.construct_models(feature_extractor, embedding_dim, n_centers_per_class, n_classes, lr, sigma, kernel_type = "gauss")

            history_gauss = rbf_model.fit(x_train_pct, y_train_pct,
                            batch_size=  batch_size,
                            epochs=epochs,
                            verbose=1,
                            validation_data=(x_val, y_val),
                            callbacks = callbacks)

            error_rbf = rbf_model.evaluate(x_test, y_test, verbose = 0) 
            # Record of Highest Validation Accuracies

           

            highest_plain = np.max(history_plain.history["val_acc"])
            highest_gauss_kmeans = np.max(history_gauss_kmeans.history["val_acc"])
            highest_gauss = np.max(history_gauss.history["val_acc"])

            history.append({"plain": highest_plain,
                            "gauss_means": highest_gauss_kmeans,
                            "gauss": highest_gauss,
                            "plain_error": error_softmax,
                            "error_rbf": error_rbf,
                            "error_rbf_kmeans": error_rbf_kmeans})



            with open("Train_Results_"+feature_extractor+str(int(train_pct*100))+"_trial_"+str(i), "wb") as f:
                pickle.dump(history, f)
Beispiel #8
0
    feature_extractor = str(sys.argv[1]).upper()
    dataset = str(sys.argv[2]).upper()


# Models Weights Record

model_name_softmax = "model-"+str(int(train_pct*100))+"-"+str(n_centers_per_class)+"-softmax.h5"
model_name_gauss_kmeans = "model-"+str(int(train_pct*100))+"-"+str(n_centers_per_class)+"-gauss-kmeans.h5"
model_name_gauss_kmedoids = "model-"+str(int(train_pct*100))+"-"+str(n_centers_per_class)+"-gauss-kmedoids.h5"
model_name_gauss_no_init = "model-"+str(int(train_pct*100))+"-"+str(n_centers_per_class)+"-gauss-no-init.h5"


# Callbacks Setup

cbs_softmax = [m.EarlyStopping(monitor='val_loss', patience=patience), 
                m.ModelCheckpoint(model_name_softmax, monitor='val_loss', 
                verbose=0, save_best_only=True, mode='min')]

cbs_gauss_kmeans = [m.EarlyStopping(monitor='val_loss', patience=patience), 
                    m.ModelCheckpoint(model_name_gauss_kmeans, monitor='val_loss', 
                    verbose=0, save_best_only=True, mode='min')]

cbs_gauss_kmedoids = [m.EarlyStopping(monitor='val_loss', patience=patience), 
                        m.ModelCheckpoint(model_name_gauss_kmedoids, monitor='val_loss', 
                        verbose=0, save_best_only=True, mode='min')]

cbs_gauss_no_init = [m.EarlyStopping(monitor='val_loss', patience=patience), 
                        m.ModelCheckpoint(model_name_gauss_no_init, monitor='val_loss', 
                        verbose=0, save_best_only=True, mode='min')]

# Dataset Setup