예제 #1
0
    def test_pipeline_optimize_works_fine(self):
        pipeline = Pipeline(feature_selection_algorithm=SelectKBest(),
                            feature_transform_algorithm=Normalizer(),
                            classifier=RandomForest())

        data_reader = CSVDataReader(
            src=os.path.dirname(os.path.abspath(__file__)) +
            '/tests_files/dataset_header_classes.csv',
            has_header=True,
            contains_classes=True)

        self.assertIsInstance(pipeline.get_classifier(), RandomForest)
        self.assertIsInstance(pipeline.get_feature_selection_algorithm(),
                              SelectKBest)
        self.assertIsInstance(pipeline.get_feature_transform_algorithm(),
                              Normalizer)

        accuracy = pipeline.optimize(data_reader.get_x(), data_reader.get_y(),
                                     20, 40, 'ParticleSwarmAlgorithm',
                                     'Accuracy')

        if accuracy != float('inf'):
            self.assertGreaterEqual(accuracy, -1.0)
            self.assertLessEqual(accuracy, 0.0)

        self.assertIsInstance(pipeline.get_classifier(), RandomForest)
        self.assertIsInstance(pipeline.get_feature_selection_algorithm(),
                              SelectKBest)
        self.assertIsInstance(pipeline.get_feature_transform_algorithm(),
                              Normalizer)
 def load_pipeline(file_name: str) -> Pipeline:
     """
     Method for loading a NiaAML's pipeline from a binary file.\n
     Args:
         file_name (str):
             path to a binary pipeline file
     Note:
         See NiaAML's documentation for more details
         on the use of the Pipeline class.
     """
     return Pipeline.load(file_name)
예제 #3
0
    def test_pipeline_export_text_works_fine(self):
        pipeline = Pipeline(feature_selection_algorithm=SelectKBest(),
                            feature_transform_algorithm=Normalizer(),
                            classifier=RandomForest())

        with tempfile.TemporaryDirectory() as tmp:
            pipeline.export_text(os.path.join(tmp, 'pipeline'))
            self.assertTrue(os.path.exists(os.path.join(tmp, 'pipeline.txt')))
            self.assertEqual(1, len([name for name in os.listdir(tmp)]))

            pipeline.export_text(os.path.join(tmp, 'pipeline.txt'))
            self.assertTrue(os.path.exists(os.path.join(tmp, 'pipeline.txt')))
            self.assertEqual(1, len([name for name in os.listdir(tmp)]))

        self.assertIsNotNone(pipeline.to_string())
        self.assertGreater(len(pipeline.to_string()), 0)
예제 #4
0
    def test_pipeline_run_works_fine(self):
        pipeline = Pipeline(feature_selection_algorithm=SelectKBest(),
                            feature_transform_algorithm=Normalizer(),
                            classifier=RandomForest())

        data_reader = CSVDataReader(
            src=os.path.dirname(os.path.abspath(__file__)) +
            '/tests_files/dataset_header_classes.csv',
            has_header=True,
            contains_classes=True)
        pipeline.optimize(data_reader.get_x(), data_reader.get_y(), 20, 40,
                          'ParticleSwarmAlgorithm', 'Accuracy')
        predicted = pipeline.run(
            pandas.DataFrame(
                numpy.random.uniform(low=0.0,
                                     high=15.0,
                                     size=(30, data_reader.get_x().shape[1]))))

        self.assertEqual(predicted.shape, (30, ))

        s1 = set(data_reader.get_y())
        s2 = set(predicted)
        self.assertTrue(s2.issubset(s1))
        self.assertTrue(len(s2) > 0 and len(s2) <= 2)
예제 #5
0
from niaaml import Pipeline
from niaaml.classifiers import AdaBoost
from niaaml.preprocessing.feature_selection import SelectKBest
from niaaml.preprocessing.feature_transform import Normalizer
"""
This example presents how to export a pipeline object into a file that can later be loaded back into a Python program as a Pipeline object.
"""

# instantiate a Pipeline object with AdaBoost classifier, SelectKBest feature selection algorithm and Normalizer as a feature transformation algorithm
pipeline = Pipeline(feature_selection_algorithm=SelectKBest(),
                    feature_transform_algorithm=Normalizer(),
                    classifier=AdaBoost())

# export the object to a file for later use
pipeline.export('exported_pipeline.ppln')
예제 #6
0
    def test_pipeline_setters_work_fine(self):
        pipeline = Pipeline(feature_selection_algorithm=SelectKBest(),
                            feature_transform_algorithm=Normalizer(),
                            classifier=RandomForest())

        pipeline.set_classifier(AdaBoost())
        pipeline.set_feature_selection_algorithm(SelectPercentile())
        pipeline.set_feature_transform_algorithm(StandardScaler())
        pipeline.set_selected_features_mask(
            numpy.ones([1, 1, 0, 0], dtype=bool))

        self.__y = numpy.array([
            'Class 1', 'Class 1', 'Class 1', 'Class 2', 'Class 1', 'Class 2',
            'Class 2', 'Class 2', 'Class 2', 'Class 1', 'Class 1', 'Class 2',
            'Class 1', 'Class 2', 'Class 1', 'Class 1', 'Class 1', 'Class 1',
            'Class 2', 'Class 1'
        ])
        self.__predicted = numpy.array([
            'Class 1', 'Class 1', 'Class 1', 'Class 2', 'Class 2', 'Class 2',
            'Class 1', 'Class 1', 'Class 1', 'Class 2', 'Class 1', 'Class 1',
            'Class 2', 'Class 2', 'Class 1', 'Class 2', 'Class 1', 'Class 2',
            'Class 2', 'Class 2'
        ])
        pipeline.set_stats(OptimizationStats(self.__predicted, self.__y))

        self.assertIsInstance(pipeline.get_classifier(), AdaBoost)
        self.assertIsInstance(pipeline.get_feature_selection_algorithm(),
                              SelectPercentile)
        self.assertIsInstance(pipeline.get_feature_transform_algorithm(),
                              StandardScaler)
        self.assertIsInstance(pipeline.get_stats(), OptimizationStats)
예제 #7
0
features = data_reader.get_x()

# we use the utility method impute_features to get imputers for the features with missing values, but you may instantiate and fit
# imputers separately and pass them as a dictionary (as long as they are implemented as this framework suggests), with keys as column names or indices (if there is no header in the csv)
# there should be as many imputers as the features with missing values
# this example uses Simple Imputer
features, imputers = impute_features(features, 'SimpleImputer')

# exactly the same goes for encoders
_, encoders = encode_categorical_features(features, 'OneHotEncoder')

# instantiate a Pipeline object
pipeline = Pipeline(feature_selection_algorithm=VarianceThreshold(),
                    feature_transform_algorithm=Normalizer(),
                    classifier=MultiLayerPerceptron(),
                    categorical_features_encoders=encoders,
                    imputers=imputers)

# run pipeline optimization process (returns fitness value, but sets the best parameters for classifier, feature selection algorithm and feature transform algorithm during the process)
pipeline.optimize(data_reader.get_x(), data_reader.get_y(), 10, 50,
                  'ParticleSwarmAlgorithm', 'Accuracy')

# run the pipeline using dummy data
# you could run the pipeline before the optimization process, but get wrong predictions as nothing in the pipeline is fit for the given dataset
predicted = pipeline.run(
    pandas.DataFrame([[
        10.32440339, 3.195964543, 1.215275549, 3.741461311, 11.6736581,
        6.435247906, 'a'
    ]]))
예제 #8
0
import os
import numpy
import pandas
"""
This example presents how to use the Pipeline class individually. You may use this if you want to test out a specific classification pipeline.
"""

# prepare data reader using csv file
data_reader = CSVDataReader(src=os.path.dirname(os.path.abspath(__file__)) +
                            '/example_files/dataset.csv',
                            has_header=False,
                            contains_classes=True)

# instantiate a Pipeline object
pipeline = Pipeline(feature_selection_algorithm=VarianceThreshold(),
                    feature_transform_algorithm=Normalizer(),
                    classifier=MultiLayerPerceptron())

# run pipeline optimization process (returns fitness value, but sets the best parameters for classifier, feature selection algorithm and feature transform algorithm during the process)
pipeline.optimize(data_reader.get_x(), data_reader.get_y(), 10, 50,
                  'ParticleSwarmAlgorithm', 'Accuracy')

# run the pipeline using dummy data
# you could run the pipeline before the optimization process, but get wrong predictions as nothing in the pipeline is fit for the given dataset
predicted = pipeline.run(
    pandas.DataFrame(
        numpy.random.uniform(low=0.0,
                             high=15.0,
                             size=(30, data_reader.get_x().shape[1]))))

# pipeline variable contains Pipeline object that can be used for further classification, exported as an object (that can be later loaded and used) or exported as text file
예제 #9
0
import os
from niaaml import Pipeline

"""
This example presents how to load a saved Pipeline object from a file. You can use all of its methods after it has been loaded successfully.
"""

# load Pipeline object from a file
pipeline = Pipeline.load(os.path.dirname(os.path.abspath(__file__)) + '/example_files/pipeline.ppln')

# all of the Pipeline's classes methods can be called after a successful load
예제 #10
0
 def run(self):
     dataReader = CSVDataReader(src=self.__data.csvSrc, contains_classes=False, has_header=self.__data.csvHasHeader)
     pipeline = Pipeline.load(self.__data.pipelineSrc)
     predictions = pipeline.run(dataReader.get_x())
     self.ran.emit(str(predictions))