def test_component_graph_dataset_with_target_imputer(): X = pd.DataFrame({ 'column_1': ['a', 'b', 'c', 'd', 'a', 'a', 'b', 'c', 'b'], 'column_2': [1, 2, 3, 4, 5, 6, 5, 4, 3] }) y = pd.Series([1, 0, 1, 0, 1, 1, 0, 0, np.nan]) graph = { 'Target Imputer': [TargetImputer], 'OneHot': [OneHotEncoder, 'Target Imputer.x', 'Target Imputer.y'], 'Random Forest': [RandomForestClassifier, 'OneHot.x', 'Target Imputer.y'], 'Elastic Net': [ElasticNetClassifier, 'OneHot.x', 'Target Imputer.y'], 'Logistic Regression': [ LogisticRegressionClassifier, 'Random Forest', 'Elastic Net', 'Target Imputer.y' ] } component_graph = ComponentGraph(graph) component_graph.instantiate({}) assert component_graph.get_parents('Target Imputer') == [] assert component_graph.get_parents('OneHot') == [ 'Target Imputer.x', 'Target Imputer.y' ] assert component_graph.get_parents('Random Forest') == [ 'OneHot.x', 'Target Imputer.y' ] assert component_graph.get_parents('Elastic Net') == [ 'OneHot.x', 'Target Imputer.y' ] component_graph.fit(X, y) predictions = component_graph.predict(X) assert not pd.isnull(predictions.to_series()).any()
def test_component_graph_sampler(): graph = { 'Imputer': [Imputer], 'OneHot': [OneHotEncoder, 'Imputer.x'], 'Undersampler': [Undersampler, 'OneHot.x'], 'Random Forest': [RandomForestClassifier, 'Undersampler.x', 'Undersampler.y'], 'Elastic Net': [ElasticNetClassifier, 'Undersampler.x', 'Undersampler.y'], 'Logistic Regression': [LogisticRegressionClassifier, 'Random Forest', 'Elastic Net'] } component_graph = ComponentGraph(graph) component_graph.instantiate({}) assert component_graph.get_parents('Imputer') == [] assert component_graph.get_parents('OneHot') == ['Imputer.x'] assert component_graph.get_parents('Undersampler') == ['OneHot.x'] assert component_graph.get_parents('Random Forest') == [ 'Undersampler.x', 'Undersampler.y' ] assert component_graph.get_parents('Elastic Net') == [ 'Undersampler.x', 'Undersampler.y' ] assert component_graph.get_parents('Logistic Regression') == [ 'Random Forest', 'Elastic Net' ]
def test_parents(example_graph): graph = example_graph component_graph = ComponentGraph(graph) assert component_graph.get_parents('Imputer') == [] assert component_graph.get_parents('OneHot_RandomForest') == ['Imputer.x'] assert component_graph.get_parents('OneHot_ElasticNet') == ['Imputer.x'] assert component_graph.get_parents('Random Forest') == [ 'OneHot_RandomForest.x' ] assert component_graph.get_parents('Elastic Net') == [ 'OneHot_ElasticNet.x' ] assert component_graph.get_parents('Logistic Regression') == [ 'Random Forest', 'Elastic Net' ] with pytest.raises(ValueError, match='not in the graph'): component_graph.get_parents('Fake component') component_graph.instantiate({}) assert component_graph.get_parents('Imputer') == [] assert component_graph.get_parents('OneHot_RandomForest') == ['Imputer.x'] assert component_graph.get_parents('OneHot_ElasticNet') == ['Imputer.x'] assert component_graph.get_parents('Random Forest') == [ 'OneHot_RandomForest.x' ] assert component_graph.get_parents('Elastic Net') == [ 'OneHot_ElasticNet.x' ] assert component_graph.get_parents('Logistic Regression') == [ 'Random Forest', 'Elastic Net' ] with pytest.raises(ValueError, match='not in the graph'): component_graph.get_parents('Fake component')