def test_generate_code_nonlinear_pipeline_error( nonlinear_binary_pipeline_class): pipeline = nonlinear_binary_pipeline_class({}) with pytest.raises( ValueError, match="Code generation for nonlinear pipelines is not supported yet" ): generate_pipeline_code(pipeline)
def test_generate_code_pipeline_with_custom_components(): class CustomTransformer(Transformer): name = "My Custom Transformer" hyperparameter_ranges = {} def __init__(self, random_seed=0): parameters = {} super().__init__(parameters=parameters, component_obj=None, random_seed=random_seed) class CustomEstimator(Estimator): name = "My Custom Estimator" hyperparameter_ranges = {} supported_problem_types = [ ProblemTypes.BINARY, ProblemTypes.MULTICLASS ] model_family = ModelFamily.NONE def __init__(self, random_arg=False, random_seed=0): parameters = {'random_arg': random_arg} super().__init__(parameters=parameters, component_obj=None, random_seed=random_seed) mock_pipeline_with_custom_components = BinaryClassificationPipeline( [CustomTransformer, CustomEstimator]) expected_code = "from evalml.pipelines.binary_classification_pipeline import BinaryClassificationPipeline\n" \ "pipeline = BinaryClassificationPipeline(component_graph=[CustomTransformer, CustomEstimator], " \ "parameters={'My Custom Estimator':{'random_arg': False}}, random_seed=0)" pipeline = generate_pipeline_code(mock_pipeline_with_custom_components) assert pipeline == expected_code
def test_generate_code_pipeline(): custom_hyperparameters = { "Imputer": { "numeric_impute_strategy": 'most_frequent' } } binary_pipeline = BinaryClassificationPipeline( ['Imputer', 'Random Forest Classifier'], custom_hyperparameters=custom_hyperparameters) expected_code = "from evalml.pipelines.binary_classification_pipeline import BinaryClassificationPipeline\n" \ "pipeline = BinaryClassificationPipeline(component_graph=['Imputer', 'Random Forest Classifier'], " \ "parameters={'Imputer':{'categorical_impute_strategy': 'most_frequent', 'numeric_impute_strategy': 'mean', 'categorical_fill_value': None, 'numeric_fill_value': None}, " \ "'Random Forest Classifier':{'n_estimators': 100, 'max_depth': 6, 'n_jobs': -1}}, custom_hyperparameters={'Imputer':{'numeric_impute_strategy': 'most_frequent'}}, random_seed=0)" pipeline = generate_pipeline_code(binary_pipeline) assert expected_code == pipeline regression_pipeline = RegressionPipeline( ['Imputer', 'Random Forest Regressor'], custom_name="Mock Regression Pipeline") expected_code = "from evalml.pipelines.regression_pipeline import RegressionPipeline\n" \ "pipeline = RegressionPipeline(component_graph=['Imputer', 'Random Forest Regressor'], parameters={'Imputer':{'categorical_impute_strategy': 'most_frequent', 'numeric_impute_strategy': 'mean', 'categorical_fill_value': None, 'numeric_fill_value': None}, " \ "'Random Forest Regressor':{'n_estimators': 100, 'max_depth': 6, 'n_jobs': -1}}, custom_name='Mock Regression Pipeline', random_seed=0)" pipeline = generate_pipeline_code(regression_pipeline) assert pipeline == expected_code regression_pipeline_with_params = RegressionPipeline( ['Imputer', 'Random Forest Regressor'], custom_name="Mock Regression Pipeline", parameters={ "Imputer": { "numeric_impute_strategy": "most_frequent" }, "Random Forest Regressor": { "n_estimators": 50 } }) expected_code_params = "from evalml.pipelines.regression_pipeline import RegressionPipeline\n" \ "pipeline = RegressionPipeline(component_graph=['Imputer', 'Random Forest Regressor'], " \ "parameters={'Imputer':{'categorical_impute_strategy': 'most_frequent', 'numeric_impute_strategy': 'most_frequent', 'categorical_fill_value': None, 'numeric_fill_value': None}, " \ "'Random Forest Regressor':{'n_estimators': 50, 'max_depth': 6, 'n_jobs': -1}}, custom_name='Mock Regression Pipeline', random_seed=0)" pipeline = generate_pipeline_code(regression_pipeline_with_params) assert pipeline == expected_code_params
def test_generate_code_pipeline_json_with_objects(): class CustomEstimator(Estimator): name = "My Custom Estimator" hyperparameter_ranges = {} supported_problem_types = [ ProblemTypes.BINARY, ProblemTypes.MULTICLASS ] model_family = ModelFamily.NONE def __init__(self, random_arg=False, numpy_arg=[], random_seed=0): parameters = {'random_arg': random_arg, 'numpy_arg': numpy_arg} super().__init__(parameters=parameters, component_obj=None, random_seed=random_seed) component_graph = ['Imputer', CustomEstimator] pipeline = BinaryClassificationPipeline( component_graph, custom_name="Mock Binary Pipeline with Transformer", parameters={'My Custom Estimator': { 'numpy_arg': np.array([0]) }}) generated_pipeline_code = generate_pipeline_code(pipeline) assert generated_pipeline_code == "from evalml.pipelines.binary_classification_pipeline import BinaryClassificationPipeline\n" \ "pipeline = BinaryClassificationPipeline(component_graph=['Imputer', CustomEstimator], " \ "parameters={'Imputer':{'categorical_impute_strategy': 'most_frequent', 'numeric_impute_strategy': 'mean', 'categorical_fill_value': None, 'numeric_fill_value': None}, " \ "'My Custom Estimator':{'random_arg': False, 'numpy_arg': array([0])}}, custom_name='Mock Binary Pipeline with Transformer', random_seed=0)" pipeline = BinaryClassificationPipeline( component_graph, custom_name="Mock Binary Pipeline with Transformer", parameters={'My Custom Estimator': { 'random_arg': Imputer() }}) generated_pipeline_code = generate_pipeline_code(pipeline) assert generated_pipeline_code == "from evalml.pipelines.binary_classification_pipeline import BinaryClassificationPipeline\n" \ "pipeline = BinaryClassificationPipeline(component_graph=['Imputer', CustomEstimator], " \ "parameters={'Imputer':{'categorical_impute_strategy': 'most_frequent', 'numeric_impute_strategy': 'mean', 'categorical_fill_value': None, 'numeric_fill_value': None}, " \ "'My Custom Estimator':{'random_arg': Imputer(categorical_impute_strategy='most_frequent', numeric_impute_strategy='mean', categorical_fill_value=None, numeric_fill_value=None), 'numpy_arg': []}}, " \ "custom_name='Mock Binary Pipeline with Transformer', random_seed=0)"
def test_generate_code_pipeline_errors(): class MockBinaryPipeline(BinaryClassificationPipeline): name = "Mock Binary Pipeline" component_graph = ['Imputer', 'Random Forest Classifier'] class MockMulticlassPipeline(MulticlassClassificationPipeline): name = "Mock Multiclass Pipeline" component_graph = ['Imputer', 'Random Forest Classifier'] class MockRegressionPipeline(RegressionPipeline): name = "Mock Regression Pipeline" component_graph = ['Imputer', 'Random Forest Regressor'] with pytest.raises(ValueError, match="Element must be a pipeline instance"): generate_pipeline_code(MockBinaryPipeline) with pytest.raises(ValueError, match="Element must be a pipeline instance"): generate_pipeline_code(MockMulticlassPipeline) with pytest.raises(ValueError, match="Element must be a pipeline instance"): generate_pipeline_code(MockRegressionPipeline) with pytest.raises(ValueError, match="Element must be a pipeline instance"): generate_pipeline_code([Imputer]) with pytest.raises(ValueError, match="Element must be a pipeline instance"): generate_pipeline_code([Imputer, LogisticRegressionClassifier]) with pytest.raises(ValueError, match="Element must be a pipeline instance"): generate_pipeline_code([Imputer(), LogisticRegressionClassifier()])