Ejemplo n.º 1
0
def test_set_pipeline_step_none():
    # Test setting Pipeline steps to None
    X = np.array([[1]])
    y = np.array([1])
    mult2 = Mult(mult=2)
    mult3 = Mult(mult=3)
    mult5 = Mult(mult=5)

    def make():
        return Pipeline([("m2", mult2), ("m3", mult3), ("last", mult5)])

    pipeline = make()

    exp = 2 * 3 * 5
    assert_array_equal([[exp]], pipeline.fit_transform(X, y))
    assert_array_equal([exp], pipeline.fit(X).predict(X))
    assert_array_equal(X, pipeline.inverse_transform([[exp]]))

    pipeline.set_params(m3=None)
    exp = 2 * 5
    assert_array_equal([[exp]], pipeline.fit_transform(X, y))
    assert_array_equal([exp], pipeline.fit(X).predict(X))
    assert_array_equal(X, pipeline.inverse_transform([[exp]]))
    assert_dict_equal(
        pipeline.get_params(deep=True),
        {"steps": pipeline.steps, "m2": mult2, "m3": None, "last": mult5, "m2__mult": 2, "last__mult": 5},
    )

    pipeline.set_params(m2=None)
    exp = 5
    assert_array_equal([[exp]], pipeline.fit_transform(X, y))
    assert_array_equal([exp], pipeline.fit(X).predict(X))
    assert_array_equal(X, pipeline.inverse_transform([[exp]]))

    # for other methods, ensure no AttributeErrors on None:
    other_methods = ["predict_proba", "predict_log_proba", "decision_function", "transform", "score"]
    for method in other_methods:
        getattr(pipeline, method)(X)

    pipeline.set_params(m2=mult2)
    exp = 2 * 5
    assert_array_equal([[exp]], pipeline.fit_transform(X, y))
    assert_array_equal([exp], pipeline.fit(X).predict(X))
    assert_array_equal(X, pipeline.inverse_transform([[exp]]))

    pipeline = make()
    pipeline.set_params(last=None)
    # mult2 and mult3 are active
    exp = 6
    assert_array_equal([[exp]], pipeline.fit(X, y).transform(X))
    assert_array_equal([[exp]], pipeline.fit_transform(X, y))
    assert_array_equal(X, pipeline.inverse_transform([[exp]]))
    assert_raise_message(AttributeError, "'NoneType' object has no attribute 'predict'", getattr, pipeline, "predict")

    # Check None step at construction time
    exp = 2 * 5
    pipeline = Pipeline([("m2", mult2), ("m3", None), ("last", mult5)])
    assert_array_equal([[exp]], pipeline.fit_transform(X, y))
    assert_array_equal([exp], pipeline.fit(X).predict(X))
    assert_array_equal(X, pipeline.inverse_transform([[exp]]))
def test_column_transformer_get_set_params():
    ct = ColumnTransformer([('trans1', StandardScaler(), [0]),
                            ('trans2', StandardScaler(), [1])])

    exp = {'n_jobs': 1,
           'remainder': 'drop',
           'trans1': ct.transformers[0][1],
           'trans1__copy': True,
           'trans1__with_mean': True,
           'trans1__with_std': True,
           'trans2': ct.transformers[1][1],
           'trans2__copy': True,
           'trans2__with_mean': True,
           'trans2__with_std': True,
           'transformers': ct.transformers,
           'transformer_weights': None}

    assert_dict_equal(ct.get_params(), exp)

    ct.set_params(trans1__with_mean=False)
    assert_false(ct.get_params()['trans1__with_mean'])

    ct.set_params(trans1='passthrough')
    exp = {'n_jobs': 1,
           'remainder': 'drop',
           'trans1': 'passthrough',
           'trans2': ct.transformers[1][1],
           'trans2__copy': True,
           'trans2__with_mean': True,
           'trans2__with_std': True,
           'transformers': ct.transformers,
           'transformer_weights': None}

    assert_dict_equal(ct.get_params(), exp)
def test_column_transformer_get_set_params():
    ct = ColumnTransformer([('trans1', StandardScaler(), [0]),
                            ('trans2', StandardScaler(), [1])])

    exp = {'n_jobs': 1,
           'remainder': 'passthrough',
           'trans1': ct.transformers[0][1],
           'trans1__copy': True,
           'trans1__with_mean': True,
           'trans1__with_std': True,
           'trans2': ct.transformers[1][1],
           'trans2__copy': True,
           'trans2__with_mean': True,
           'trans2__with_std': True,
           'transformers': ct.transformers,
           'transformer_weights': None}

    assert_dict_equal(ct.get_params(), exp)

    ct.set_params(trans1__with_mean=False)
    assert_false(ct.get_params()['trans1__with_mean'])

    ct.set_params(trans1='passthrough')
    exp = {'n_jobs': 1,
           'remainder': 'passthrough',
           'trans1': 'passthrough',
           'trans2': ct.transformers[1][1],
           'trans2__copy': True,
           'trans2__with_mean': True,
           'trans2__with_std': True,
           'transformers': ct.transformers,
           'transformer_weights': None}

    assert_dict_equal(ct.get_params(), exp)
Ejemplo n.º 4
0
def test_pickle():
    """Pickle molecules"""
    mols = list(
        oddt.toolkit.readfile(
            'sdf',
            os.path.join(test_data_dir, 'data/dude/xiap/actives_docked.sdf')))
    pickled_mols = list(map(lambda x: loads(dumps(x)), mols))

    assert_array_equal(list(map(lambda x: x.title, mols)),
                       list(map(lambda x: x.title, pickled_mols)))

    assert_array_equal(list(map(lambda x: x.smiles, mols)),
                       list(map(lambda x: x.smiles, pickled_mols)))

    for mol, pickled_mol in zip(mols, pickled_mols):
        assert_dict_equal(dict(mol.data), dict(pickled_mol.data))

    # Test pickling of atom_dicts
    assert_array_equal(list(map(lambda x: x._atom_dict is None, mols)),
                       [True] * len(mols))
    mols_atom_dict = np.hstack(list(map(lambda x: x.atom_dict, mols)))
    assert_array_equal(list(map(lambda x: x._atom_dict is not None, mols)),
                       [True] * len(mols))
    pickled_mols = list(map(lambda x: loads(dumps(x)), mols))
    assert_array_equal(
        list(map(lambda x: x._atom_dict is not None, pickled_mols)),
        [True] * len(mols))
    pickled_mols_atom_dict = np.hstack(
        list(map(lambda x: x._atom_dict, pickled_mols)))
    for name in mols[0].atom_dict.dtype.names:
        if issubclass(np.dtype(mols_atom_dict[name].dtype).type, np.number):
            assert_array_almost_equal(mols_atom_dict[name],
                                      pickled_mols_atom_dict[name])
        else:
            assert_array_equal(mols_atom_dict[name],
                               pickled_mols_atom_dict[name])

    # Lazy Mols
    mols = list(
        oddt.toolkit.readfile('sdf',
                              os.path.join(
                                  test_data_dir,
                                  'data/dude/xiap/actives_docked.sdf'),
                              lazy=True))
    pickled_mols = list(map(lambda x: loads(dumps(x)), mols))

    assert_array_equal(
        list(map(lambda x: x._source is not None, pickled_mols)),
        [True] * len(mols))

    assert_array_equal(list(map(lambda x: x.title, mols)),
                       list(map(lambda x: x.title, pickled_mols)))

    assert_array_equal(list(map(lambda x: x.smiles, mols)),
                       list(map(lambda x: x.smiles, pickled_mols)))

    for mol, pickled_mol in zip(mols, pickled_mols):
        assert_dict_equal(dict(mol.data), dict(pickled_mol.data))
Ejemplo n.º 5
0
def test_gmd_estimator(data):
    est = GMD(runs=1000, random_state=1234)
    assert est.alpha == 0.1
    assert est.runs == 1000

    est.fit(*data)
    assert hasattr(est, "is_fitted_")

    assert_dict_equal(est.subspaces_, {
        0: [0, 2, 3],
        1: [1, 3, 2],
        2: [2, 3],
        3: [3, 2]
    })
Ejemplo n.º 6
0
def test_pickling_when_getstate_is_overwritten_by_mixin_outside_of_sklearn():
    try:
        estimator = MultiInheritanceEstimator()
        text = "this attribute should not be pickled"
        estimator._attribute_not_pickled = text
        old_mod = type(estimator).__module__
        type(estimator).__module__ = "notsklearn"

        serialized = estimator.__getstate__()
        assert_dict_equal(serialized, {'_attribute_not_pickled': None,
                                       'attribute_pickled': 5})

        serialized['attribute_pickled'] = 4
        estimator.__setstate__(serialized)
        assert_equal(estimator.attribute_pickled, 4)
        assert_true(estimator._restored)
    finally:
        type(estimator).__module__ = old_mod
Ejemplo n.º 7
0
def test_pickling_when_getstate_is_overwritten_by_mixin_outside_of_sklearn():
    try:
        estimator = MultiInheritanceEstimator()
        text = "this attribute should not be pickled"
        estimator._attribute_not_pickled = text
        old_mod = type(estimator).__module__
        type(estimator).__module__ = "notsklearn"

        serialized = estimator.__getstate__()
        assert_dict_equal(serialized, {'_attribute_not_pickled': None,
                                       'attribute_pickled': 5})

        serialized['attribute_pickled'] = 4
        estimator.__setstate__(serialized)
        assert_equal(estimator.attribute_pickled, 4)
        assert estimator._restored
    finally:
        type(estimator).__module__ = old_mod
Ejemplo n.º 8
0
def test_set_pipeline_step_passthrough(passthrough):
    X = np.array([[1]])
    y = np.array([1])
    mult2 = Mult(mult=2)
    mult3 = Mult(mult=3)
    mult5 = Mult(mult=5)

    def make():
        return Pipeline([('m2', mult2), ('m3', mult3), ('last', mult5)])

    pipeline = make()

    exp = 2 * 3 * 5
    assert_array_equal([[exp]], pipeline.fit_transform(X, y))
    assert_array_equal([exp], pipeline.fit(X).predict(X))
    assert_array_equal(X, pipeline.inverse_transform([[exp]]))

    pipeline.set_params(m3=passthrough)
    exp = 2 * 5
    assert_array_equal([[exp]], pipeline.fit_transform(X, y))
    assert_array_equal([exp], pipeline.fit(X).predict(X))
    assert_array_equal(X, pipeline.inverse_transform([[exp]]))
    assert_dict_equal(
        pipeline.get_params(deep=True), {
            'steps': pipeline.steps,
            'm2': mult2,
            'm3': passthrough,
            'last': mult5,
            'memory': None,
            'm2__mult': 2,
            'last__mult': 5,
            'verbose': False
        })

    pipeline.set_params(m2=passthrough)
    exp = 5
    assert_array_equal([[exp]], pipeline.fit_transform(X, y))
    assert_array_equal([exp], pipeline.fit(X).predict(X))
    assert_array_equal(X, pipeline.inverse_transform([[exp]]))

    # for other methods, ensure no AttributeErrors on None:
    other_methods = [
        'predict_proba', 'predict_log_proba', 'decision_function', 'transform',
        'score'
    ]
    for method in other_methods:
        getattr(pipeline, method)(X)

    pipeline.set_params(m2=mult2)
    exp = 2 * 5
    assert_array_equal([[exp]], pipeline.fit_transform(X, y))
    assert_array_equal([exp], pipeline.fit(X).predict(X))
    assert_array_equal(X, pipeline.inverse_transform([[exp]]))

    pipeline = make()
    pipeline.set_params(last=passthrough)
    # mult2 and mult3 are active
    exp = 6
    assert_array_equal([[exp]], pipeline.fit(X, y).transform(X))
    assert_array_equal([[exp]], pipeline.fit_transform(X, y))
    assert_array_equal(X, pipeline.inverse_transform([[exp]]))
    assert_raise_message(AttributeError,
                         "'str' object has no attribute 'predict'", getattr,
                         pipeline, 'predict')

    # Check 'passthrough' step at construction time
    exp = 2 * 5
    pipeline = Pipeline([('m2', mult2), ('m3', passthrough), ('last', mult5)])
    assert_array_equal([[exp]], pipeline.fit_transform(X, y))
    assert_array_equal([exp], pipeline.fit(X).predict(X))
    assert_array_equal(X, pipeline.inverse_transform([[exp]]))
Ejemplo n.º 9
0
def test_set_pipeline_step_none():
    # Test setting Pipeline steps to None
    X = np.array([[1]])
    y = np.array([1])
    mult2 = Mult(mult=2)
    mult3 = Mult(mult=3)
    mult5 = Mult(mult=5)

    def make():
        return Pipeline([('m2', mult2), ('m3', mult3), ('last', mult5)])

    pipeline = make()

    exp = 2 * 3 * 5
    assert_array_equal([[exp]], pipeline.fit_transform(X, y))
    assert_array_equal([exp], pipeline.fit(X).predict(X))
    assert_array_equal(X, pipeline.inverse_transform([[exp]]))

    pipeline.set_params(m3=None)
    exp = 2 * 5
    assert_array_equal([[exp]], pipeline.fit_transform(X, y))
    assert_array_equal([exp], pipeline.fit(X).predict(X))
    assert_array_equal(X, pipeline.inverse_transform([[exp]]))
    assert_dict_equal(
        pipeline.get_params(deep=True), {
            'steps': pipeline.steps,
            'm2': mult2,
            'm3': None,
            'last': mult5,
            'm2__mult': 2,
            'last__mult': 5,
        })

    pipeline.set_params(m2=None)
    exp = 5
    assert_array_equal([[exp]], pipeline.fit_transform(X, y))
    assert_array_equal([exp], pipeline.fit(X).predict(X))
    assert_array_equal(X, pipeline.inverse_transform([[exp]]))

    # for other methods, ensure no AttributeErrors on None:
    other_methods = [
        'predict_proba', 'predict_log_proba', 'decision_function', 'transform',
        'score'
    ]
    for method in other_methods:
        getattr(pipeline, method)(X)

    pipeline.set_params(m2=mult2)
    exp = 2 * 5
    assert_array_equal([[exp]], pipeline.fit_transform(X, y))
    assert_array_equal([exp], pipeline.fit(X).predict(X))
    assert_array_equal(X, pipeline.inverse_transform([[exp]]))

    pipeline = make()
    pipeline.set_params(last=None)
    # mult2 and mult3 are active
    exp = 6
    assert_array_equal([[exp]], pipeline.fit(X, y).transform(X))
    assert_array_equal([[exp]], pipeline.fit_transform(X, y))
    assert_array_equal(X, pipeline.inverse_transform([[exp]]))
    assert_raise_message(AttributeError,
                         "'NoneType' object has no attribute 'predict'",
                         getattr, pipeline, 'predict')

    # Check None step at construction time
    exp = 2 * 5
    pipeline = Pipeline([('m2', mult2), ('m3', None), ('last', mult5)])
    assert_array_equal([[exp]], pipeline.fit_transform(X, y))
    assert_array_equal([exp], pipeline.fit(X).predict(X))
    assert_array_equal(X, pipeline.inverse_transform([[exp]]))