Python Pipeline 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: niaaml

클래스/타입: Pipeline

hotexamples.com에서의 예제들: 10

Python Pipeline - 10개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 niaaml.Pipeline에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Pipeline(7)

export(2)

get_classifier(2)

get_feature_selection_algorithm(2)

get_feature_transform_algorithm(2)

export_text(1)

get_stats(1)

예제 #1

파일 보기

    def test_pipeline_optimize_works_fine(self):
        pipeline = Pipeline(feature_selection_algorithm=SelectKBest(),
                            feature_transform_algorithm=Normalizer(),
                            classifier=RandomForest())

        data_reader = CSVDataReader(
            src=os.path.dirname(os.path.abspath(__file__)) +
            '/tests_files/dataset_header_classes.csv',
            has_header=True,
            contains_classes=True)

        self.assertIsInstance(pipeline.get_classifier(), RandomForest)
        self.assertIsInstance(pipeline.get_feature_selection_algorithm(),
                              SelectKBest)
        self.assertIsInstance(pipeline.get_feature_transform_algorithm(),
                              Normalizer)

        accuracy = pipeline.optimize(data_reader.get_x(), data_reader.get_y(),
                                     20, 40, 'ParticleSwarmAlgorithm',
                                     'Accuracy')

        if accuracy != float('inf'):
            self.assertGreaterEqual(accuracy, -1.0)
            self.assertLessEqual(accuracy, 0.0)

        self.assertIsInstance(pipeline.get_classifier(), RandomForest)
        self.assertIsInstance(pipeline.get_feature_selection_algorithm(),
                              SelectKBest)
        self.assertIsInstance(pipeline.get_feature_transform_algorithm(),
                              Normalizer)

예제 #2

파일 보기

파일: data_analysis.py 프로젝트: luckyLukac/sport-activities-features

 def load_pipeline(file_name: str) -> Pipeline:
     """
     Method for loading a NiaAML's pipeline from a binary file.\n
     Args:
         file_name (str):
             path to a binary pipeline file
     Note:
         See NiaAML's documentation for more details
         on the use of the Pipeline class.
     """
     return Pipeline.load(file_name)

예제 #3

파일 보기

    def test_pipeline_export_text_works_fine(self):
        pipeline = Pipeline(feature_selection_algorithm=SelectKBest(),
                            feature_transform_algorithm=Normalizer(),
                            classifier=RandomForest())

        with tempfile.TemporaryDirectory() as tmp:
            pipeline.export_text(os.path.join(tmp, 'pipeline'))
            self.assertTrue(os.path.exists(os.path.join(tmp, 'pipeline.txt')))
            self.assertEqual(1, len([name for name in os.listdir(tmp)]))

            pipeline.export_text(os.path.join(tmp, 'pipeline.txt'))
            self.assertTrue(os.path.exists(os.path.join(tmp, 'pipeline.txt')))
            self.assertEqual(1, len([name for name in os.listdir(tmp)]))

        self.assertIsNotNone(pipeline.to_string())
        self.assertGreater(len(pipeline.to_string()), 0)

예제 #4

파일 보기

    def test_pipeline_run_works_fine(self):
        pipeline = Pipeline(feature_selection_algorithm=SelectKBest(),
                            feature_transform_algorithm=Normalizer(),
                            classifier=RandomForest())

        data_reader = CSVDataReader(
            src=os.path.dirname(os.path.abspath(__file__)) +
            '/tests_files/dataset_header_classes.csv',
            has_header=True,
            contains_classes=True)
        pipeline.optimize(data_reader.get_x(), data_reader.get_y(), 20, 40,
                          'ParticleSwarmAlgorithm', 'Accuracy')
        predicted = pipeline.run(
            pandas.DataFrame(
                numpy.random.uniform(low=0.0,
                                     high=15.0,
                                     size=(30, data_reader.get_x().shape[1]))))

        self.assertEqual(predicted.shape, (30, ))

        s1 = set(data_reader.get_y())
        s2 = set(predicted)
        self.assertTrue(s2.issubset(s1))
        self.assertTrue(len(s2) > 0 and len(s2) <= 2)

예제 #5

파일 보기

from niaaml import Pipeline
from niaaml.classifiers import AdaBoost
from niaaml.preprocessing.feature_selection import SelectKBest
from niaaml.preprocessing.feature_transform import Normalizer
"""
This example presents how to export a pipeline object into a file that can later be loaded back into a Python program as a Pipeline object.
"""

# instantiate a Pipeline object with AdaBoost classifier, SelectKBest feature selection algorithm and Normalizer as a feature transformation algorithm
pipeline = Pipeline(feature_selection_algorithm=SelectKBest(),
                    feature_transform_algorithm=Normalizer(),
                    classifier=AdaBoost())

# export the object to a file for later use
pipeline.export('exported_pipeline.ppln')

예제 #6

파일 보기

    def test_pipeline_setters_work_fine(self):
        pipeline = Pipeline(feature_selection_algorithm=SelectKBest(),
                            feature_transform_algorithm=Normalizer(),
                            classifier=RandomForest())

        pipeline.set_classifier(AdaBoost())
        pipeline.set_feature_selection_algorithm(SelectPercentile())
        pipeline.set_feature_transform_algorithm(StandardScaler())
        pipeline.set_selected_features_mask(
            numpy.ones([1, 1, 0, 0], dtype=bool))

        self.__y = numpy.array([
            'Class 1', 'Class 1', 'Class 1', 'Class 2', 'Class 1', 'Class 2',
            'Class 2', 'Class 2', 'Class 2', 'Class 1', 'Class 1', 'Class 2',
            'Class 1', 'Class 2', 'Class 1', 'Class 1', 'Class 1', 'Class 1',
            'Class 2', 'Class 1'
        ])
        self.__predicted = numpy.array([
            'Class 1', 'Class 1', 'Class 1', 'Class 2', 'Class 2', 'Class 2',
            'Class 1', 'Class 1', 'Class 1', 'Class 2', 'Class 1', 'Class 1',
            'Class 2', 'Class 2', 'Class 1', 'Class 2', 'Class 1', 'Class 2',
            'Class 2', 'Class 2'
        ])
        pipeline.set_stats(OptimizationStats(self.__predicted, self.__y))

        self.assertIsInstance(pipeline.get_classifier(), AdaBoost)
        self.assertIsInstance(pipeline.get_feature_selection_algorithm(),
                              SelectPercentile)
        self.assertIsInstance(pipeline.get_feature_transform_algorithm(),
                              StandardScaler)
        self.assertIsInstance(pipeline.get_stats(), OptimizationStats)

예제 #7

파일 보기

features = data_reader.get_x()

# we use the utility method impute_features to get imputers for the features with missing values, but you may instantiate and fit
# imputers separately and pass them as a dictionary (as long as they are implemented as this framework suggests), with keys as column names or indices (if there is no header in the csv)
# there should be as many imputers as the features with missing values
# this example uses Simple Imputer
features, imputers = impute_features(features, 'SimpleImputer')

# exactly the same goes for encoders
_, encoders = encode_categorical_features(features, 'OneHotEncoder')

# instantiate a Pipeline object
pipeline = Pipeline(feature_selection_algorithm=VarianceThreshold(),
                    feature_transform_algorithm=Normalizer(),
                    classifier=MultiLayerPerceptron(),
                    categorical_features_encoders=encoders,
                    imputers=imputers)

# run pipeline optimization process (returns fitness value, but sets the best parameters for classifier, feature selection algorithm and feature transform algorithm during the process)
pipeline.optimize(data_reader.get_x(), data_reader.get_y(), 10, 50,
                  'ParticleSwarmAlgorithm', 'Accuracy')

# run the pipeline using dummy data
# you could run the pipeline before the optimization process, but get wrong predictions as nothing in the pipeline is fit for the given dataset
predicted = pipeline.run(
    pandas.DataFrame([[
        10.32440339, 3.195964543, 1.215275549, 3.741461311, 11.6736581,
        6.435247906, 'a'
    ]]))

예제 #8

파일 보기

import os
import numpy
import pandas
"""
This example presents how to use the Pipeline class individually. You may use this if you want to test out a specific classification pipeline.
"""

# prepare data reader using csv file
data_reader = CSVDataReader(src=os.path.dirname(os.path.abspath(__file__)) +
                            '/example_files/dataset.csv',
                            has_header=False,
                            contains_classes=True)

# instantiate a Pipeline object
pipeline = Pipeline(feature_selection_algorithm=VarianceThreshold(),
                    feature_transform_algorithm=Normalizer(),
                    classifier=MultiLayerPerceptron())

# run pipeline optimization process (returns fitness value, but sets the best parameters for classifier, feature selection algorithm and feature transform algorithm during the process)
pipeline.optimize(data_reader.get_x(), data_reader.get_y(), 10, 50,
                  'ParticleSwarmAlgorithm', 'Accuracy')

# run the pipeline using dummy data
# you could run the pipeline before the optimization process, but get wrong predictions as nothing in the pipeline is fit for the given dataset
predicted = pipeline.run(
    pandas.DataFrame(
        numpy.random.uniform(low=0.0,
                             high=15.0,
                             size=(30, data_reader.get_x().shape[1]))))

# pipeline variable contains Pipeline object that can be used for further classification, exported as an object (that can be later loaded and used) or exported as text file

예제 #9

파일 보기

파일: load_pipeline_object_file.py 프로젝트: adi3/NiaAML

import os
from niaaml import Pipeline

"""
This example presents how to load a saved Pipeline object from a file. You can use all of its methods after it has been loaded successfully.
"""

# load Pipeline object from a file
pipeline = Pipeline.load(os.path.dirname(os.path.abspath(__file__)) + '/example_files/pipeline.ppln')

# all of the Pipeline's classes methods can be called after a successful load

예제 #10

파일 보기

 def run(self):
     dataReader = CSVDataReader(src=self.__data.csvSrc, contains_classes=False, has_header=self.__data.csvHasHeader)
     pipeline = Pipeline.load(self.__data.pipelineSrc)
     predictions = pipeline.run(dataReader.get_x())
     self.ran.emit(str(predictions))