Ejemplo n.º 1
0
    def test_learning_set(self):
        # Directory iterator option
        train, val = learning_set(path, split=split,
                                  batch_size=batch_size,
                                  iterator_mode=None,
                                  classes=classes)
        assert isinstance(path, str), 'the path should be in a string format'
        assert isinstance(split, (float, np.float32, int)), \
            ' the data split should be a number'
        assert isinstance(classes, list), \
            'the classes should be inputted as a list'
        for item in classes:
            assert isinstance(item, str), 'the class should be a string'
        assert isinstance(train, keras.preprocessing.image.DirectoryIterator),\
            'the training set should be an image iterator type of object'
        assert isinstance(val, keras.preprocessing.image.DirectoryIterator),\
            'the validation set should be an image iterator type of object'
        assert isinstance(batch_size, int), \
            'the batch size should be an integer'
        # array iterator option
        data_tups = catalogue._data_tuples_from_fnames(input_path=data_path)
        data_storage = data_path + 'test_1.pkl'
        catalogue.rgb_list(data_tups, storage_location=data_storage)
        plot_tups = handling.pickled_data_loader(data_path, 'test_1')

        train, val = learning_set(image_list=plot_tups, split=split,
                                  batch_size=batch_size,
                                  iterator_mode='arrays',
                                  classes=classes)
        assert isinstance(train, keras.preprocessing.image.NumpyArrayIterator
                          ), \
            'the training set should be an image iterator type of object'
        assert isinstance(val, keras.preprocessing.image.NumpyArrayIterator),\
            'the validation set should be an image iterator type of object'
Ejemplo n.º 2
0
    def test_model_analysis(self):

        num_files = 1
        data_tups = catalogue._data_tuples_from_fnames(input_path=data_path)
        data_storage = data_path + 'test_1.pkl'
        catalogue.rgb_list(data_tups, storage_location=data_storage)
        plot_tups = handling.pickled_data_loader(data_path, 'test_1')

        test_set_filenames = preprocessing.hold_out_test_set(
            data_path, number_of_files_per_class=num_files)

        test_set_list, learning_set_list = catalogue.data_set_split(
            plot_tups, test_set_filenames)
        train, val = catalogue.learning_set(image_list=learning_set_list,
                                            split=split,
                                            classes=['noise', 'one'],
                                            iterator_mode='arrays')
        testing_set = catalogue.test_set(image_list=test_set_list,
                                         classes=['noise', 'one'],
                                         iterator_mode='arrays')
        model, history = cnn.build_model(train,
                                         val,
                                         config_path='./hardy/test/')

        result = reporting.model_analysis(model, testing_set, test_set_list)

        assert isinstance(result, pd.DataFrame)
Ejemplo n.º 3
0
    def test_save_load_data(self):
        # Simple pickeling save / load function
        data_tups = catalogue._data_tuples_from_fnames(input_path=data_path)
        data_storage = data_path + 'test_1.pkl'

        catalogue.rgb_list(data_tups, storage_location=data_storage)
        plot_tups = handling.pickled_data_loader(data_path, 'test_1')

        data = catalogue.save_load_data('test_pickled_data', data=plot_tups,
                                        save=True, location='./hardy/test/')
        assert data == 'Successfully Pickled'
        data = catalogue.save_load_data('test_pickled_data', data=plot_tups,
                                        load=True, location='./hardy/test/')
        assert isinstance(data, list), 'the data was correctly loaded'
        os.remove('./hardy/test/test_pickled_data.npy')
        print('compressed file correctly removed after testing')
        pass
Ejemplo n.º 4
0
    def test_data_split(self):
        num_files = 3
        data_tups = catalogue._data_tuples_from_fnames(input_path=data_path)
        data_storage = data_path + 'test_1' + '.pkl'
        catalogue.rgb_list(data_tups, storage_location=data_storage)
        plot_tups = handling.pickled_data_loader(data_path, 'test_1')

        test_set_filenames = preprocessing.hold_out_test_set(
            data_path, number_of_files_per_class=num_files)
        test_set_list, learning_set_list = catalogue.data_set_split(
            plot_tups, test_set_filenames)
        assert isinstance(test_set_filenames, list), 'format should be a list'
        assert len(test_set_filenames) == 2*num_files, \
            'the test set is not the correct length'
        assert isinstance(test_set_list, list), 'format should be a list'
        assert isinstance(learning_set_list, list), 'format should be a list'
        pass
Ejemplo n.º 5
0
    def test_data_wrapper(self):

        tform_command_list, tform_command_dict = \
            arbitrage.import_tform_config(tform_config_path)

        run.data_wrapper(tform_command_list[0],
                         data_path,
                         tform_command_dict,
                         iterator_mode='arrays',
                         classes=['noise', 'one'],
                         project_name='test_wrapper')
        data_location = os.listdir(data_path)
        for item in data_location:
            if item.endswith('.pkl'):
                image_data = handling.pickled_data_loader(
                    data_path, tform_command_list[0])
                assert isinstance(image_data, list),\
                    'The images are incorrectly pickled'
Ejemplo n.º 6
0
    def test_regular_plot_list(self):
        """
        Testing the Tuple-List image visualization wrapper
            Inputs the raw tuple-list from the prior wrapper
            and performs the visualizations as called in the "standard"
            methods.
        """

        data_tups = catalogue._data_tuples_from_fnames(input_path=data_path)
        data_storage = data_path + 'test_1.pkl'
        catalogue.regular_plot_list(data_tups, storage_location=data_storage)
        plot_tups = handling.pickled_data_loader(data_path, 'test_1')

        for row in plot_tups:
            assert type(row) is tuple, "List-of-Tuples has non-tuple?"
            assert type(row[0]) is str, "File Name in Tuple is wrong format."
            assert type(row[1]) is np.ndarray,\
                "List-of-image-Tuples is not in np.ndarray format??"
            assert type(row[2]) is str, "Class label is not a string?"
        pass
Ejemplo n.º 7
0
    def test_test_set(self):
        # Directory iterator option
        testing = test_set(path, batch_size=batch_size, iterator_mode=None,
                           classes=classes)
        assert isinstance(path, str), 'the path should be in a string format'
        assert isinstance(classes, list), \
            'the classes should be inputted as a list'
        for item in classes:
            assert isinstance(item, str), 'the class should be a string'
        assert isinstance(testing, keras.preprocessing.image.DirectoryIterator
                          ), 'the training set should be an image iterator'
        # array iterator option
        data_tups = catalogue._data_tuples_from_fnames(input_path=data_path)
        data_storage = data_path + 'test_1.pkl'
        catalogue.rgb_list(data_tups, storage_location=data_storage)
        plot_tups = handling.pickled_data_loader(data_path, 'test_1')

        testing = test_set(image_list=plot_tups, batch_size=batch_size,
                           iterator_mode='arrays', classes=classes)
        assert isinstance(testing, keras.preprocessing.image.NumpyArrayIterator
                          ), 'the training set should be an image iterator'
Ejemplo n.º 8
0
def classifier_wrapper(input_path,
                       test_set_filenames,
                       run_name,
                       config_path,
                       classifier='tuner',
                       iterator_mode='arrays',
                       split=0.1,
                       target_size=(80, 80),
                       color_mode='rgb',
                       batch_size=32,
                       image_path=None,
                       classes=['class_1', 'class_2'],
                       project_name='tuner_run',
                       k_fold=False,
                       k=None,
                       **kwarg):
    '''
    Single "Universal" Wrapping function to setup and run the CNN and Tuner
    on any properly labeled image set.

    Operates in either of two formats:
        "arrays"  : Takes data as "List_of_Image_Tuples"
        "else"    : Takes data as "image_path" of sorted image folders

    Parameters:
    -----------
    input_datapath : str
                   path to the raw .csv files containing the data to classify
    test_set_filenames : list
                         The list containig the strings of filenames
                         randomly selected to be part of the test set.
    config_path : str
                  string containing the path to the yaml file
                  representing the classifier hyperparameters
    run_name : str
               name use to create a folder for storing the results of this run
    iterator_mode : str
                    option to use images from arrays directly or save the
                    .png and use a directory iterator mode
    plot_format : str
                  option for standard or RGB color gradient
    print_out : bool
                option for printing out feedback on conputational time taken to
                initialize the data and generate the images
    num_test_files_class : int or float
                            numebr of files per class to select for the test
                            set
    classifier : str
                  option cnn or tuner
    split : float
            the percentage of the learning set to use for the validation step
    target_size : tuple
                  image target size. Presented as a tuble indicating number of
                  pixels composing the two dimensions of the image (w x h)
    batch_size : int
                 The number of files to group up into a batch

    classes : list
              A list containing strings of the classes the data is divided in.
              The class name represent the folder name the files are contained
              in.
    project_name : str
                   name of the folder to be created for storing the results of
                   the tuning
    '''

    if iterator_mode == 'arrays':
        # loading pickled data

        image_data = handling.pickled_data_loader(input_path, run_name)

        assert image_data, 'No image_data list provided'

        test_set_list, learning_set_list = to_catalogue.data_set_split(
            image_data, test_set_filenames)

        if k_fold:
            test_set = to_catalogue.test_set(image_list=test_set_list,
                                             target_size=target_size,
                                             classes=classes,
                                             color_mode=color_mode,
                                             iterator_mode='arrays',
                                             batch_size=batch_size)
        else:
            training_set, validation_set = to_catalogue.learning_set(
                image_list=learning_set_list,
                split=split,
                classes=classes,
                target_size=target_size,
                iterator_mode='arrays',
                batch_size=batch_size,
                color_mode=color_mode)

            test_set = to_catalogue.test_set(image_list=test_set_list,
                                             target_size=target_size,
                                             classes=classes,
                                             color_mode=color_mode,
                                             iterator_mode='arrays',
                                             batch_size=batch_size)
    else:

        assert image_path, 'no path to the image folders was provided'

        training_set, validation_set = to_catalogue.learning_set(
            image_path,
            split=split,
            target_size=target_size,
            iterator_mode='from_directory',
            batch_size=batch_size,
            classes=classes)

        test_set = to_catalogue.test_set(image_path,
                                         target_size=target_size,
                                         classes=classes,
                                         iterator_mode='from_directory',
                                         batch_size=batch_size)
    if k_fold:
        print('test set : {} batches of {} files'.format(
            len(test_set), batch_size))
    else:
        print('training set : {} batches of {} files'.format(
            len(training_set), batch_size))
        print('validation set : {} batches of {} files'.format(
            len(validation_set), batch_size))
        print('test set : {} batches of {} files'.format(
            len(test_set), batch_size))

    if classifier == 'tuner':
        # warn search_function, 'no search function provided,
        # using default RandomSearch'
        tuner.build_param(config_path)
        output_path = preprocessing.save_to_folder(input_path, project_name,
                                                   run_name)
        tuned_model = tuner.run_tuner(training_set,
                                      validation_set,
                                      project_name=output_path)
        model, history, metrics = tuner.best_model(tuned_model, training_set,
                                                   validation_set, test_set)
        conf_matrix, report = cnn.report_on_metrics(model, test_set)
        tuner.report_generation(model,
                                history,
                                metrics,
                                output_path,
                                tuner=tuned_model,
                                save_model=True)
    else:
        if k_fold:

            assert k, 'the number of folds needs to be provided'
            validation_score, model, history, final_score = \
                cnn.k_fold_model(k, config_path=config_path,
                                 target_size=target_size,
                                 classes=classes, batch_size=batch_size,
                                 color_mode=color_mode,
                                 iterator_mode=iterator_mode,
                                 image_list=learning_set_list,
                                 test_set=test_set)
            output_path = preprocessing.save_to_folder(input_path,
                                                       project_name, run_name)
            conf_matrix, report = cnn.report_on_metrics(model, test_set)
            tuner.report_generation(model,
                                    history,
                                    final_score,
                                    output_path,
                                    tuner=None,
                                    save_model=True,
                                    config_path=config_path,
                                    k_fold=k_fold,
                                    k=k)

        else:
            model, history = cnn.build_model(training_set,
                                             validation_set,
                                             config_path=config_path)
            metrics = cnn.evaluate_model(model, test_set)

            output_path = preprocessing.save_to_folder(input_path,
                                                       project_name, run_name)
            conf_matrix, report = cnn.report_on_metrics(model, test_set)
            tuner.report_generation(model,
                                    history,
                                    metrics,
                                    output_path,
                                    tuner=None,
                                    save_model=True,
                                    config_path=config_path)

    if iterator_mode == 'arrays':
        performance_evaluation = reporting.model_analysis(
            model, test_set, test_set_list)
        performance_evaluation.to_csv(output_path +
                                      'report/model_evaluation.csv')
    else:
        performance_evaluation = reporting.model_analysis(model, test_set)

        performance_evaluation.to_csv(output_path +
                                      'report/model_evaluation.csv')
    return