Ejemplo n.º 1
0
    def test_learning_set(self):
        # Directory iterator option
        train, val = learning_set(path, split=split,
                                  batch_size=batch_size,
                                  iterator_mode=None,
                                  classes=classes)
        assert isinstance(path, str), 'the path should be in a string format'
        assert isinstance(split, (float, np.float32, int)), \
            ' the data split should be a number'
        assert isinstance(classes, list), \
            'the classes should be inputted as a list'
        for item in classes:
            assert isinstance(item, str), 'the class should be a string'
        assert isinstance(train, keras.preprocessing.image.DirectoryIterator),\
            'the training set should be an image iterator type of object'
        assert isinstance(val, keras.preprocessing.image.DirectoryIterator),\
            'the validation set should be an image iterator type of object'
        assert isinstance(batch_size, int), \
            'the batch size should be an integer'
        # array iterator option
        data_tups = catalogue._data_tuples_from_fnames(input_path=data_path)
        data_storage = data_path + 'test_1.pkl'
        catalogue.rgb_list(data_tups, storage_location=data_storage)
        plot_tups = handling.pickled_data_loader(data_path, 'test_1')

        train, val = learning_set(image_list=plot_tups, split=split,
                                  batch_size=batch_size,
                                  iterator_mode='arrays',
                                  classes=classes)
        assert isinstance(train, keras.preprocessing.image.NumpyArrayIterator
                          ), \
            'the training set should be an image iterator type of object'
        assert isinstance(val, keras.preprocessing.image.NumpyArrayIterator),\
            'the validation set should be an image iterator type of object'
Ejemplo n.º 2
0
def k_fold_model(k,
                 config_path='./',
                 target_size=(80, 80),
                 classes=['noisy', 'not_noisy'],
                 batch_size=32,
                 color_mode='rgb',
                 iterator_mode='arrays',
                 image_list=None,
                 test_set=None,
                 **kwargs):
    '''

    '''

    validation_score = []

    for fold in range(k):
        train_data, val_data = to_catalogue.learning_set(
            target_size=target_size,
            classes=classes,
            batch_size=batch_size,
            color_mode=color_mode,
            iterator_mode='arrays',
            image_list=image_list,
            k_fold=True,
            k=k,
            fold=fold,
            **kwargs)
        model, history = build_model(train_data, config_path=config_path)
        validation_score.append(evaluate_model(model, val_data)[1])

    validation_score = np.average(validation_score)
    print('The average model accuracy is {} for {} number of folds'.format(
        np.round(validation_score, 3), k))

    # Retrain the model with the entirety of the data set
    # and return its performance
    train_data, val_data = to_catalogue.learning_set(target_size=target_size,
                                                     classes=classes,
                                                     batch_size=batch_size,
                                                     color_mode=color_mode,
                                                     iterator_mode='arrays',
                                                     split=0,
                                                     image_list=image_list,
                                                     **kwargs)
    model, history = build_model(train_data, config_path=config_path)
    final_score = evaluate_model(model, test_set)

    print('The final model accuracy is {}'.format(final_score[1]))

    return validation_score, model, history, final_score
Ejemplo n.º 3
0
    def test_model_analysis(self):

        num_files = 1
        data_tups = catalogue._data_tuples_from_fnames(input_path=data_path)
        data_storage = data_path + 'test_1.pkl'
        catalogue.rgb_list(data_tups, storage_location=data_storage)
        plot_tups = handling.pickled_data_loader(data_path, 'test_1')

        test_set_filenames = preprocessing.hold_out_test_set(
            data_path, number_of_files_per_class=num_files)

        test_set_list, learning_set_list = catalogue.data_set_split(
            plot_tups, test_set_filenames)
        train, val = catalogue.learning_set(image_list=learning_set_list,
                                            split=split,
                                            classes=['noise', 'one'],
                                            iterator_mode='arrays')
        testing_set = catalogue.test_set(image_list=test_set_list,
                                         classes=['noise', 'one'],
                                         iterator_mode='arrays')
        model, history = cnn.build_model(train,
                                         val,
                                         config_path='./hardy/test/')

        result = reporting.model_analysis(model, testing_set, test_set_list)

        assert isinstance(result, pd.DataFrame)
Ejemplo n.º 4
0
 def test_report_on_metrics(self):
     train, val = learning_set(path, split=split, classes=classes,
                               iterator_mode=None)
     testing = test_set(path, batch_size=batch_size, classes=classes,
                        iterator_mode=None)
     model, history = cnn.build_model(train, val,
                                      config_path='./hardy/test/')
     conf_matrix, report = cnn.report_on_metrics(
                             model, testing,
                             target_names=['noisy', 'not_noisy'])
     assert isinstance(conf_matrix, np.ndarray), \
         'the confusion matrix should be contained in a numpy array'
     assert isinstance(report, str), 'the report should be a string'
Ejemplo n.º 5
0
 def test_evaluate_model(self):
     # define the sets and the model to use for the rest of the testing
     train, val = learning_set(path, split=split, classes=classes,
                               iterator_mode=None)
     testing = test_set(path, batch_size=batch_size, classes=classes,
                        iterator_mode=None)
     model, history = cnn.build_model(train, val,
                                      config_path='./hardy/test/')
     results = cnn.evaluate_model(model, testing)
     assert isinstance(results, list), \
         'model performance should be store in a list'
     assert results[1] <= 1,\
         'the accuracy should be a number smaller than 1'
Ejemplo n.º 6
0
 def test_build_model(self):
     train, val = learning_set(path, split=split, classes=classes,
                               iterator_mode=None)
     model, history = cnn.build_model(train, val,
                                      config_path='./hardy/test/')
     assert isinstance(train, keras.preprocessing.image.DirectoryIterator),\
         'the training set should be an image iterator type of object'
     assert isinstance(val, keras.preprocessing.image.DirectoryIterator),\
         'the validation set should be an image iterator type of object'
     assert isinstance(model, keras.engine.sequential.Sequential),\
         'the CNN model should be a keras sequential model'
     assert isinstance(history, keras.callbacks.callbacks.History), \
         'the history should be the output of a allback function'
Ejemplo n.º 7
0
    def test_plot_history(self):
        train, val = learning_set(path, split=split, classes=classes,
                                  iterator_mode=None)
        model, history = cnn.build_model(train, val,
                                         config_path='./hardy/test/')

        _, ax = plt.subplots(1, 2)
        ax = cnn.plot_history(history)
        epochs, loss = ax[0].lines[0].get_xydata().T
        assert (loss == history.history['loss']).all(), \
            'the plot should containg the loss value per epoch'
        epochs, acc = ax[1].lines[0].get_xydata().T
        assert (acc == history.history['accuracy']).all(), \
            'the plot should contain the accuracy value epoch'
        pass
Ejemplo n.º 8
0
    def test_save_load_model(self):
        train, val = learning_set(path, split=split, classes=classes,
                                  iterator_mode=None)
        model, history = cnn.build_model(train, val,
                                         config_path='./hardy/test/')

        saved_model = cnn.save_load_model(
            model=model, save=True, filepath='./hardy/test/model')
        assert saved_model == 'the model was correctly saved'
        model_loaded = cnn.save_load_model(
            load=True, filepath='./hardy/test/model')
        assert model_loaded, 'the model was not correctly loaded'
        # delete the model file after testing
        os.remove('./hardy/test/model')
        print('the saved model was correctly deleted after testing')

        pass
Ejemplo n.º 9
0
    def test_run_tuner(self):
        config_path = './hardy/test/'
        tuner.build_param(config_path)

        train, val = learning_set(path,
                                  split=split,
                                  classes=classes,
                                  iterator_mode=None)
        project_name = 'test_project'
        tuner_model = tuner.run_tuner(train, val, project_name='test_project')
        assert tuner_model.oracle.get_space().space[0].name == \
            'kernel_size', 'The first entry should be the kernel size'
        assert tuner_model.oracle.get_space().space[1].name == 'filters',\
            'The name of first layer must be filters'
        assert tuner_model.oracle.get_space().space[2].name == 'conv_layers',\
            'First filter must be conv_layers'

        # Deleting the log files

        shutil.rmtree('./' + project_name)

        print('Successfully Deleted the log directory created under test')
        # Generate test for BayesianOptimization search function
        # config_path = './hardy/test/test_data/'
        # tuner.build_param(config_path)
        # tuner_model = tuner.run_tuner(train, val, project_name='test_project'
        # )
        # assert tuner_model.oracle.get_space().space[0].name == \
        #     'kernel_size', 'The first entry should be the kernel size'
        # assert tuner_model.oracle.get_space().space[1].name == 'filters',\
        #     'The name of first layer must be filters'
        # assert tuner_model.oracle.get_space().space[2].name == 'conv_layers',
        # \
        #     'First filter must be conv_layers'

        # shutil.rmtree('./'+project_name)

        print('Successfully Deleted the log directory created under test')
Ejemplo n.º 10
0
    def test_report_generation(self):

        config_path = './hardy/test/'
        train, val = learning_set(path,
                                  split=split,
                                  classes=classes,
                                  iterator_mode=None)
        model, history = cnn.build_model(train, val, config_path=config_path)
        metrics = cnn.evaluate_model(model, val)

        log_dir = './hardy/test/temp_report/'

        tuner.report_generation(model,
                                history,
                                metrics,
                                log_dir,
                                save_model=False,
                                config_path=config_path)

        report_dir = log_dir + 'report/'

        report_location = os.listdir(report_dir)

        for item in report_location:
            if item.endswith('.yaml'):
                with open(report_dir + item, 'r') as file:
                    report = yaml.load(file, Loader=yaml.FullLoader)
                    assert isinstance(report, dict),\
                        'The filetype returned in not a dictionary'
        # remove report files after checking they were
        # correctly created
        # for item in report_location:
        #     if item.endswith('.yaml'):
        #         os.remove(report_dir+item)

        shutil.rmtree(log_dir)
Ejemplo n.º 11
0
def classifier_wrapper(input_path,
                       test_set_filenames,
                       run_name,
                       config_path,
                       classifier='tuner',
                       iterator_mode='arrays',
                       split=0.1,
                       target_size=(80, 80),
                       color_mode='rgb',
                       batch_size=32,
                       image_path=None,
                       classes=['class_1', 'class_2'],
                       project_name='tuner_run',
                       k_fold=False,
                       k=None,
                       **kwarg):
    '''
    Single "Universal" Wrapping function to setup and run the CNN and Tuner
    on any properly labeled image set.

    Operates in either of two formats:
        "arrays"  : Takes data as "List_of_Image_Tuples"
        "else"    : Takes data as "image_path" of sorted image folders

    Parameters:
    -----------
    input_datapath : str
                   path to the raw .csv files containing the data to classify
    test_set_filenames : list
                         The list containig the strings of filenames
                         randomly selected to be part of the test set.
    config_path : str
                  string containing the path to the yaml file
                  representing the classifier hyperparameters
    run_name : str
               name use to create a folder for storing the results of this run
    iterator_mode : str
                    option to use images from arrays directly or save the
                    .png and use a directory iterator mode
    plot_format : str
                  option for standard or RGB color gradient
    print_out : bool
                option for printing out feedback on conputational time taken to
                initialize the data and generate the images
    num_test_files_class : int or float
                            numebr of files per class to select for the test
                            set
    classifier : str
                  option cnn or tuner
    split : float
            the percentage of the learning set to use for the validation step
    target_size : tuple
                  image target size. Presented as a tuble indicating number of
                  pixels composing the two dimensions of the image (w x h)
    batch_size : int
                 The number of files to group up into a batch

    classes : list
              A list containing strings of the classes the data is divided in.
              The class name represent the folder name the files are contained
              in.
    project_name : str
                   name of the folder to be created for storing the results of
                   the tuning
    '''

    if iterator_mode == 'arrays':
        # loading pickled data

        image_data = handling.pickled_data_loader(input_path, run_name)

        assert image_data, 'No image_data list provided'

        test_set_list, learning_set_list = to_catalogue.data_set_split(
            image_data, test_set_filenames)

        if k_fold:
            test_set = to_catalogue.test_set(image_list=test_set_list,
                                             target_size=target_size,
                                             classes=classes,
                                             color_mode=color_mode,
                                             iterator_mode='arrays',
                                             batch_size=batch_size)
        else:
            training_set, validation_set = to_catalogue.learning_set(
                image_list=learning_set_list,
                split=split,
                classes=classes,
                target_size=target_size,
                iterator_mode='arrays',
                batch_size=batch_size,
                color_mode=color_mode)

            test_set = to_catalogue.test_set(image_list=test_set_list,
                                             target_size=target_size,
                                             classes=classes,
                                             color_mode=color_mode,
                                             iterator_mode='arrays',
                                             batch_size=batch_size)
    else:

        assert image_path, 'no path to the image folders was provided'

        training_set, validation_set = to_catalogue.learning_set(
            image_path,
            split=split,
            target_size=target_size,
            iterator_mode='from_directory',
            batch_size=batch_size,
            classes=classes)

        test_set = to_catalogue.test_set(image_path,
                                         target_size=target_size,
                                         classes=classes,
                                         iterator_mode='from_directory',
                                         batch_size=batch_size)
    if k_fold:
        print('test set : {} batches of {} files'.format(
            len(test_set), batch_size))
    else:
        print('training set : {} batches of {} files'.format(
            len(training_set), batch_size))
        print('validation set : {} batches of {} files'.format(
            len(validation_set), batch_size))
        print('test set : {} batches of {} files'.format(
            len(test_set), batch_size))

    if classifier == 'tuner':
        # warn search_function, 'no search function provided,
        # using default RandomSearch'
        tuner.build_param(config_path)
        output_path = preprocessing.save_to_folder(input_path, project_name,
                                                   run_name)
        tuned_model = tuner.run_tuner(training_set,
                                      validation_set,
                                      project_name=output_path)
        model, history, metrics = tuner.best_model(tuned_model, training_set,
                                                   validation_set, test_set)
        conf_matrix, report = cnn.report_on_metrics(model, test_set)
        tuner.report_generation(model,
                                history,
                                metrics,
                                output_path,
                                tuner=tuned_model,
                                save_model=True)
    else:
        if k_fold:

            assert k, 'the number of folds needs to be provided'
            validation_score, model, history, final_score = \
                cnn.k_fold_model(k, config_path=config_path,
                                 target_size=target_size,
                                 classes=classes, batch_size=batch_size,
                                 color_mode=color_mode,
                                 iterator_mode=iterator_mode,
                                 image_list=learning_set_list,
                                 test_set=test_set)
            output_path = preprocessing.save_to_folder(input_path,
                                                       project_name, run_name)
            conf_matrix, report = cnn.report_on_metrics(model, test_set)
            tuner.report_generation(model,
                                    history,
                                    final_score,
                                    output_path,
                                    tuner=None,
                                    save_model=True,
                                    config_path=config_path,
                                    k_fold=k_fold,
                                    k=k)

        else:
            model, history = cnn.build_model(training_set,
                                             validation_set,
                                             config_path=config_path)
            metrics = cnn.evaluate_model(model, test_set)

            output_path = preprocessing.save_to_folder(input_path,
                                                       project_name, run_name)
            conf_matrix, report = cnn.report_on_metrics(model, test_set)
            tuner.report_generation(model,
                                    history,
                                    metrics,
                                    output_path,
                                    tuner=None,
                                    save_model=True,
                                    config_path=config_path)

    if iterator_mode == 'arrays':
        performance_evaluation = reporting.model_analysis(
            model, test_set, test_set_list)
        performance_evaluation.to_csv(output_path +
                                      'report/model_evaluation.csv')
    else:
        performance_evaluation = reporting.model_analysis(model, test_set)

        performance_evaluation.to_csv(output_path +
                                      'report/model_evaluation.csv')
    return