Ejemplo n.º 1
0
def test_component_graph_dataset_with_target_imputer():
    X = pd.DataFrame({
        'column_1': ['a', 'b', 'c', 'd', 'a', 'a', 'b', 'c', 'b'],
        'column_2': [1, 2, 3, 4, 5, 6, 5, 4, 3]
    })
    y = pd.Series([1, 0, 1, 0, 1, 1, 0, 0, np.nan])
    graph = {
        'Target Imputer': [TargetImputer],
        'OneHot': [OneHotEncoder, 'Target Imputer.x', 'Target Imputer.y'],
        'Random Forest':
        [RandomForestClassifier, 'OneHot.x', 'Target Imputer.y'],
        'Elastic Net': [ElasticNetClassifier, 'OneHot.x', 'Target Imputer.y'],
        'Logistic Regression': [
            LogisticRegressionClassifier, 'Random Forest', 'Elastic Net',
            'Target Imputer.y'
        ]
    }

    component_graph = ComponentGraph(graph)
    component_graph.instantiate({})
    assert component_graph.get_parents('Target Imputer') == []
    assert component_graph.get_parents('OneHot') == [
        'Target Imputer.x', 'Target Imputer.y'
    ]
    assert component_graph.get_parents('Random Forest') == [
        'OneHot.x', 'Target Imputer.y'
    ]
    assert component_graph.get_parents('Elastic Net') == [
        'OneHot.x', 'Target Imputer.y'
    ]

    component_graph.fit(X, y)
    predictions = component_graph.predict(X)
    assert not pd.isnull(predictions.to_series()).any()
Ejemplo n.º 2
0
def test_component_graph_sampler():
    graph = {
        'Imputer': [Imputer],
        'OneHot': [OneHotEncoder, 'Imputer.x'],
        'Undersampler': [Undersampler, 'OneHot.x'],
        'Random Forest':
        [RandomForestClassifier, 'Undersampler.x', 'Undersampler.y'],
        'Elastic Net':
        [ElasticNetClassifier, 'Undersampler.x', 'Undersampler.y'],
        'Logistic Regression':
        [LogisticRegressionClassifier, 'Random Forest', 'Elastic Net']
    }

    component_graph = ComponentGraph(graph)
    component_graph.instantiate({})
    assert component_graph.get_parents('Imputer') == []
    assert component_graph.get_parents('OneHot') == ['Imputer.x']
    assert component_graph.get_parents('Undersampler') == ['OneHot.x']
    assert component_graph.get_parents('Random Forest') == [
        'Undersampler.x', 'Undersampler.y'
    ]
    assert component_graph.get_parents('Elastic Net') == [
        'Undersampler.x', 'Undersampler.y'
    ]
    assert component_graph.get_parents('Logistic Regression') == [
        'Random Forest', 'Elastic Net'
    ]
Ejemplo n.º 3
0
def test_parents(example_graph):
    graph = example_graph
    component_graph = ComponentGraph(graph)

    assert component_graph.get_parents('Imputer') == []
    assert component_graph.get_parents('OneHot_RandomForest') == ['Imputer.x']
    assert component_graph.get_parents('OneHot_ElasticNet') == ['Imputer.x']
    assert component_graph.get_parents('Random Forest') == [
        'OneHot_RandomForest.x'
    ]
    assert component_graph.get_parents('Elastic Net') == [
        'OneHot_ElasticNet.x'
    ]
    assert component_graph.get_parents('Logistic Regression') == [
        'Random Forest', 'Elastic Net'
    ]

    with pytest.raises(ValueError, match='not in the graph'):
        component_graph.get_parents('Fake component')

    component_graph.instantiate({})
    assert component_graph.get_parents('Imputer') == []
    assert component_graph.get_parents('OneHot_RandomForest') == ['Imputer.x']
    assert component_graph.get_parents('OneHot_ElasticNet') == ['Imputer.x']
    assert component_graph.get_parents('Random Forest') == [
        'OneHot_RandomForest.x'
    ]
    assert component_graph.get_parents('Elastic Net') == [
        'OneHot_ElasticNet.x'
    ]
    assert component_graph.get_parents('Logistic Regression') == [
        'Random Forest', 'Elastic Net'
    ]

    with pytest.raises(ValueError, match='not in the graph'):
        component_graph.get_parents('Fake component')