def test_generate_import_code(): """Ensure export utils' generate_import_code outputs as expected""" reference_code = """\ import numpy as np import pandas as pd from sklearn.cross_validation import train_test_split from sklearn.decomposition import RandomizedPCA from sklearn.feature_selection import VarianceThreshold from sklearn.linear_model import LogisticRegression # NOTE: Make sure that the class is labeled 'class' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR') training_indices, testing_indices = train_test_split(tpot_data.index, stratify = tpot_data['class'].values, train_size=0.75, test_size=0.25) """ pipeline = [['result1', '_variance_threshold', 'input_df', '100.0'], ['result2', '_pca', 'input_df', '66', '34'], ['result3', '_combine_dfs', 'result2', 'result1'], ['result4', '_logistic_regression', 'result3', '0.12030075187969924', '0', 'True']] import_code = generate_import_code(pipeline) assert reference_code == import_code
def test_generate_import_code_2(): """Ensure export utils' generate_import_code outputs as expected when using multiple classes from the same module""" reference_code = """\ import numpy as np import pandas as pd from sklearn.cross_validation import train_test_split from sklearn.decomposition import FastICA, RandomizedPCA from sklearn.linear_model import LogisticRegression # NOTE: Make sure that the class is labeled 'class' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR') training_indices, testing_indices = train_test_split(tpot_data.index, stratify = tpot_data['class'].values, train_size=0.75, test_size=0.25) """ pipeline = [['result1', '_fast_ica', 'input_df', '5', '0.1'], ['result2', '_pca', 'input_df', '66', '34'], ['result3', '_combine_dfs', 'result2', 'result1'], [ 'result4', '_logistic_regression', 'result3', '0.12030075187969924', '0', 'True' ]] import_code = generate_import_code(pipeline) assert reference_code == import_code
def test_generate_import_code(): """Assert that generate_import_code() returns the correct set of dependancies for a given pipeline.""" pipeline = creator.Individual.from_string('GaussianNB(RobustScaler(input_matrix))', tpot_obj._pset) expected_code = """import numpy as np import pandas as pd from sklearn.model_selection import train_test_split from sklearn.naive_bayes import GaussianNB from sklearn.pipeline import make_pipeline from sklearn.preprocessing import RobustScaler """ assert expected_code == generate_import_code(pipeline, tpot_obj.operators)
def test_generate_import_code(): """Assert that generate_import_code() returns the correct set of dependancies for a given pipeline.""" pipeline = creator.Individual.from_string('GaussianNB(RobustScaler(input_matrix))', tpot_obj._pset) expected_code = """import numpy as np import pandas as pd from sklearn.model_selection import train_test_split from sklearn.naive_bayes import GaussianNB from sklearn.pipeline import make_pipeline from sklearn.preprocessing import RobustScaler """ assert expected_code == generate_import_code(pipeline, tpot_obj.operators)
def write_pipes(name, tpot): """Write TPOT pipelines out to subdirectories.""" import_codes = generate_import_code(tpot._optimized_pipeline, tpot.operators) pipeline_codes = generate_export_pipeline_code( expr_to_tree(tpot._optimized_pipeline, tpot._pset), tpot.operators) pipe_text = import_codes.replace( 'import numpy as np\nimport pandas as pd\n', 'from sklearn.preprocessing import FunctionTransformer\nfrom copy import copy\n' ) if tpot._imputed: # add impute code when there is missing data pipe_text += import_impute + make_func + impute_text else: pipe_text += make_func pipe_text += '\texported_pipeline = ' + pipeline_codes + "\n\treturn({'train_feat': training_features, 'test_feat': testing_features, 'pipe': exported_pipeline})" f = open(name + '.py', 'w') f.write(pipe_text) f.close()
def test_generate_import_code(): """Assert that generate_import_code() returns the correct set of dependancies for a given pipeline""" tpot_obj = TPOTClassifier() pipeline = creator.Individual.from_string('GaussianNB(RobustScaler(input_matrix))', tpot_obj._pset) expected_code = """import numpy as np from sklearn.model_selection import train_test_split from sklearn.naive_bayes import GaussianNB from sklearn.pipeline import make_pipeline from sklearn.preprocessing import RobustScaler # NOTE: Make sure that the class is labeled 'class' in the data file tpot_data = np.recfromcsv('PATH/TO/DATA/FILE', delimiter='COLUMN_SEPARATOR', dtype=np.float64) features = np.delete(tpot_data.view(np.float64).reshape(tpot_data.size, -1), tpot_data.dtype.names.index('class'), axis=1) training_features, testing_features, training_classes, testing_classes = \\ train_test_split(features, tpot_data['class'], random_state=42) """ assert expected_code == generate_import_code(pipeline, tpot_obj.operators)
def test_generate_import_code(): """Assert that generate_import_code() returns the correct set of dependancies for a given pipeline""" tpot_obj = TPOTClassifier() pipeline = creator.Individual.from_string('GaussianNB(RobustScaler(input_matrix))', tpot_obj._pset) expected_code = """import numpy as np from sklearn.model_selection import train_test_split from sklearn.naive_bayes import GaussianNB from sklearn.pipeline import make_pipeline from sklearn.preprocessing import RobustScaler # NOTE: Make sure that the class is labeled 'class' in the data file tpot_data = np.recfromcsv('PATH/TO/DATA/FILE', delimiter='COLUMN_SEPARATOR', dtype=np.float64) features = np.delete(tpot_data.view(np.float64).reshape(tpot_data.size, -1), tpot_data.dtype.names.index('class'), axis=1) training_features, testing_features, training_classes, testing_classes = \\ train_test_split(features, tpot_data['class'], random_state=42) """ assert expected_code == generate_import_code(pipeline, tpot_obj.operators)
def test_generate_import_code(): """Assert that generate_import_code() returns the correct set of dependancies for a given pipeline""" tpot_obj = TPOT() pipeline = creator.Individual.\ from_string('DecisionTreeClassifier(SelectKBest(input_matrix, 7), 0.5)', tpot_obj._pset) expected_code = """import numpy as np from sklearn.cross_validation import train_test_split from sklearn.ensemble import VotingClassifier from sklearn.feature_selection import SelectKBest, f_classif from sklearn.pipeline import make_pipeline, make_union from sklearn.preprocessing import FunctionTransformer from sklearn.tree import DecisionTreeClassifier # NOTE: Make sure that the class is labeled 'class' in the data file tpot_data = np.recfromcsv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = np.delete(tpot_data.view(np.float64).reshape(tpot_data.size, -1), tpot_data.dtype.names.index('class'), axis=1) training_features, testing_features, training_classes, testing_classes = \\ train_test_split(features, tpot_data['class'], random_state=42) """ assert expected_code == generate_import_code(pipeline)
def test_generate_import_code(): """Assert that generate_import_code() returns the correct set of dependancies for a given pipeline""" tpot_obj = TPOT() pipeline = creator.Individual.\ from_string('DecisionTreeClassifier(SelectKBest(input_matrix, 7), 0.5)', tpot_obj._pset) expected_code = """import numpy as np from sklearn.cross_validation import train_test_split from sklearn.ensemble import VotingClassifier from sklearn.feature_selection import SelectKBest, f_classif from sklearn.pipeline import make_pipeline, make_union from sklearn.preprocessing import FunctionTransformer from sklearn.tree import DecisionTreeClassifier # NOTE: Make sure that the class is labeled 'class' in the data file tpot_data = np.recfromcsv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = np.delete(tpot_data.view(np.float64).reshape(tpot_data.size, -1), tpot_data.dtype.names.index('class'), axis=1) training_features, testing_features, training_classes, testing_classes = \\ train_test_split(features, tpot_data['class'], random_state=42) """ assert expected_code == generate_import_code(pipeline)
def test_generate_import_code_2(): """Assert that generate_import_code() returns the correct set of dependancies and dependancies are importable.""" pipeline_string = ( 'KNeighborsClassifier(CombineDFs(' 'DecisionTreeClassifier(input_matrix, DecisionTreeClassifier__criterion=gini, ' 'DecisionTreeClassifier__max_depth=8,DecisionTreeClassifier__min_samples_leaf=5,' 'DecisionTreeClassifier__min_samples_split=5), ZeroCount(input_matrix))' 'KNeighborsClassifier__n_neighbors=10, ' 'KNeighborsClassifier__p=1,KNeighborsClassifier__weights=uniform') pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset) import_code = generate_import_code(pipeline, tpot_obj.operators) expected_code = """import numpy as np import pandas as pd from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsClassifier from sklearn.pipeline import make_pipeline, make_union from sklearn.tree import DecisionTreeClassifier from tpot.builtins import StackingEstimator, ZeroCount """ exec(import_code) # should not raise error assert expected_code == import_code
def test_generate_import_code_2(): """Assert that generate_import_code() returns the correct set of dependancies and dependancies are importable.""" pipeline_string = ( 'KNeighborsClassifier(CombineDFs(' 'DecisionTreeClassifier(input_matrix, DecisionTreeClassifier__criterion=gini, ' 'DecisionTreeClassifier__max_depth=8,DecisionTreeClassifier__min_samples_leaf=5,' 'DecisionTreeClassifier__min_samples_split=5), ZeroCount(input_matrix))' 'KNeighborsClassifier__n_neighbors=10, ' 'KNeighborsClassifier__p=1,KNeighborsClassifier__weights=uniform' ) pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset) import_code = generate_import_code(pipeline, tpot_obj.operators) expected_code = """import numpy as np import pandas as pd from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsClassifier from sklearn.pipeline import make_pipeline, make_union from sklearn.tree import DecisionTreeClassifier from tpot.builtins import StackingEstimator, ZeroCount """ exec(import_code) # should not raise error assert expected_code == import_code