コード例 #1
0
def test_set_pipeline_step_passthrough(passthrough):
    # Test setting Pipeline steps to None
    X = np.array([[1]])
    y = np.array([1])
    mult2 = Mult(mult=2)
    mult3 = Mult(mult=3)
    mult5 = Mult(mult=5)

    def make():
        return Pipeline([("m2", mult2), ("m3", mult3), ("last", mult5)])

    pipeline = make()

    exp = 2 * 3 * 5
    assert_array_equal([[exp]], pipeline.fit_transform(X, y))
    assert_array_equal([exp], pipeline.fit(X).predict(X))
    assert_array_equal(X, pipeline.inverse_transform([[exp]]))

    pipeline.set_params(m3=passthrough)
    exp = 2 * 5
    assert_array_equal([[exp]], pipeline.fit_transform(X, y))
    assert_array_equal([exp], pipeline.fit(X).predict(X))
    assert_array_equal(X, pipeline.inverse_transform([[exp]]))
    expected_params = {
        "steps": pipeline.steps,
        "m2": mult2,
        "m3": passthrough,
        "last": mult5,
        "memory": None,
        "m2__mult": 2,
        "last__mult": 5,
        "verbose": False,
    }
    assert pipeline.get_params(deep=True) == expected_params

    pipeline.set_params(m2=passthrough)
    exp = 5
    assert_array_equal([[exp]], pipeline.fit_transform(X, y))
    assert_array_equal([exp], pipeline.fit(X).predict(X))
    assert_array_equal(X, pipeline.inverse_transform([[exp]]))

    # for other methods, ensure no AttributeErrors on None:
    other_methods = [
        "predict_proba",
        "predict_log_proba",
        "decision_function",
        "transform",
        "score",
    ]
    for method in other_methods:
        getattr(pipeline, method)(X)

    pipeline.set_params(m2=mult2)
    exp = 2 * 5
    assert_array_equal([[exp]], pipeline.fit_transform(X, y))
    assert_array_equal([exp], pipeline.fit(X).predict(X))
    assert_array_equal(X, pipeline.inverse_transform([[exp]]))

    pipeline = make()
    pipeline.set_params(last=passthrough)
    # mult2 and mult3 are active
    exp = 6
    pipeline.fit(X, y)
    pipeline.transform(X)
    assert_array_equal([[exp]], pipeline.fit(X, y).transform(X))
    assert_array_equal([[exp]], pipeline.fit_transform(X, y))
    assert_array_equal(X, pipeline.inverse_transform([[exp]]))
    with raises(AttributeError, match="has no attribute 'predict'"):
        getattr(pipeline, "predict")

    # Check 'passthrough' step at construction time
    exp = 2 * 5
    pipeline = Pipeline([("m2", mult2), ("m3", passthrough), ("last", mult5)])
    assert_array_equal([[exp]], pipeline.fit_transform(X, y))
    assert_array_equal([exp], pipeline.fit(X).predict(X))
    assert_array_equal(X, pipeline.inverse_transform([[exp]]))
コード例 #2
0
def test_set_pipeline_step_passthrough(passthrough):
    X = np.array([[1]])
    y = np.array([1])
    mult2 = Mult(mult=2)
    mult3 = Mult(mult=3)
    mult5 = Mult(mult=5)

    def make():
        return Pipeline([('m2', mult2), ('m3', mult3), ('last', mult5)])

    pipeline = make()

    exp = 2 * 3 * 5
    assert_array_equal([[exp]], pipeline.fit_transform(X, y))
    assert_array_equal([exp], pipeline.fit(X).predict(X))
    assert_array_equal(X, pipeline.inverse_transform([[exp]]))

    pipeline.set_params(m3=passthrough)
    exp = 2 * 5
    assert_array_equal([[exp]], pipeline.fit_transform(X, y))
    assert_array_equal([exp], pipeline.fit(X).predict(X))
    assert_array_equal(X, pipeline.inverse_transform([[exp]]))
    assert (pipeline.get_params(deep=True) ==
            {'steps': pipeline.steps,
             'm2': mult2,
             'm3': passthrough,
             'last': mult5,
             'memory': None,
             'm2__mult': 2,
             'last__mult': 5,
             'verbose': False
             })

    pipeline.set_params(m2=passthrough)
    exp = 5
    assert_array_equal([[exp]], pipeline.fit_transform(X, y))
    assert_array_equal([exp], pipeline.fit(X).predict(X))
    assert_array_equal(X, pipeline.inverse_transform([[exp]]))

    # for other methods, ensure no AttributeErrors on None:
    other_methods = ['predict_proba', 'predict_log_proba',
                     'decision_function', 'transform', 'score']
    for method in other_methods:
        getattr(pipeline, method)(X)

    pipeline.set_params(m2=mult2)
    exp = 2 * 5
    assert_array_equal([[exp]], pipeline.fit_transform(X, y))
    assert_array_equal([exp], pipeline.fit(X).predict(X))
    assert_array_equal(X, pipeline.inverse_transform([[exp]]))

    pipeline = make()
    pipeline.set_params(last=passthrough)
    # mult2 and mult3 are active
    exp = 6
    assert_array_equal([[exp]], pipeline.fit(X, y).transform(X))
    assert_array_equal([[exp]], pipeline.fit_transform(X, y))
    assert_array_equal(X, pipeline.inverse_transform([[exp]]))

    msg = "'str' object has no attribute 'predict'"
    with pytest.raises(AttributeError, match=msg):
        getattr(pipeline, 'predict')

    # Check 'passthrough' step at construction time
    exp = 2 * 5
    pipeline = Pipeline(
        [('m2', mult2), ('m3', passthrough), ('last', mult5)])
    assert_array_equal([[exp]], pipeline.fit_transform(X, y))
    assert_array_equal([exp], pipeline.fit(X).predict(X))
    assert_array_equal(X, pipeline.inverse_transform([[exp]]))
コード例 #3
0
ファイル: test_dummy.py プロジェクト: Aathi410/Pro123
def test_string_labels():
    X = [[0]] * 5
    y = ["paris", "paris", "tokyo", "amsterdam", "berlin"]
    clf = DummyClassifier(strategy="most_frequent")
    clf.fit(X, y)
    assert_array_equal(clf.predict(X), ["paris"] * 5)
コード例 #4
0
ファイル: test_dbscan.py プロジェクト: obulrdy6881/Drowsinss
def test_dbscan_core_samples_toy(algorithm):
    X = [[0], [2], [3], [4], [6], [8], [10]]
    n_samples = len(X)

    # Degenerate case: every sample is a core sample, either with its own
    # cluster or including other close core samples.
    core_samples, labels = dbscan(X, algorithm=algorithm, eps=1, min_samples=1)
    assert_array_equal(core_samples, np.arange(n_samples))
    assert_array_equal(labels, [0, 1, 1, 1, 2, 3, 4])

    # With eps=1 and min_samples=2 only the 3 samples from the denser area
    # are core samples. All other points are isolated and considered noise.
    core_samples, labels = dbscan(X, algorithm=algorithm, eps=1, min_samples=2)
    assert_array_equal(core_samples, [1, 2, 3])
    assert_array_equal(labels, [-1, 0, 0, 0, -1, -1, -1])

    # Only the sample in the middle of the dense area is core. Its two
    # neighbors are edge samples. Remaining samples are noise.
    core_samples, labels = dbscan(X, algorithm=algorithm, eps=1, min_samples=3)
    assert_array_equal(core_samples, [2])
    assert_array_equal(labels, [-1, 0, 0, 0, -1, -1, -1])

    # It's no longer possible to extract core samples with eps=1:
    # everything is noise.
    core_samples, labels = dbscan(X, algorithm=algorithm, eps=1, min_samples=4)
    assert_array_equal(core_samples, [])
    assert_array_equal(labels, np.full(n_samples, -1.))
コード例 #5
0
def test_ward_linkage_tree_return_distance():
    # Test return_distance option on linkage and ward trees

    # test that return_distance when set true, gives same
    # output on both structured and unstructured clustering.
    n, p = 10, 5
    rng = np.random.RandomState(0)

    connectivity = np.ones((n, n))
    for i in range(5):
        X = .1 * rng.normal(size=(n, p))
        X -= 4. * np.arange(n)[:, np.newaxis]
        X -= X.mean(axis=1)[:, np.newaxis]

        out_unstructured = ward_tree(X, return_distance=True)
        out_structured = ward_tree(X, connectivity=connectivity,
                                   return_distance=True)

        # get children
        children_unstructured = out_unstructured[0]
        children_structured = out_structured[0]

        # check if we got the same clusters
        assert_array_equal(children_unstructured, children_structured)

        # check if the distances are the same
        dist_unstructured = out_unstructured[-1]
        dist_structured = out_structured[-1]

        assert_array_almost_equal(dist_unstructured, dist_structured)

        for linkage in ['average', 'complete', 'single']:
            structured_items = linkage_tree(
                X, connectivity=connectivity, linkage=linkage,
                return_distance=True)[-1]
            unstructured_items = linkage_tree(
                X, linkage=linkage, return_distance=True)[-1]
            structured_dist = structured_items[-1]
            unstructured_dist = unstructured_items[-1]
            structured_children = structured_items[0]
            unstructured_children = unstructured_items[0]
            assert_array_almost_equal(structured_dist, unstructured_dist)
            assert_array_almost_equal(
                structured_children, unstructured_children)

    # test on the following dataset where we know the truth
    # taken from scipy/cluster/tests/hierarchy_test_data.py
    X = np.array([[1.43054825, -7.5693489],
                  [6.95887839, 6.82293382],
                  [2.87137846, -9.68248579],
                  [7.87974764, -6.05485803],
                  [8.24018364, -6.09495602],
                  [7.39020262, 8.54004355]])
    # truth
    linkage_X_ward = np.array([[3., 4., 0.36265956, 2.],
                               [1., 5., 1.77045373, 2.],
                               [0., 2., 2.55760419, 2.],
                               [6., 8., 9.10208346, 4.],
                               [7., 9., 24.7784379, 6.]])

    linkage_X_complete = np.array(
        [[3., 4., 0.36265956, 2.],
         [1., 5., 1.77045373, 2.],
         [0., 2., 2.55760419, 2.],
         [6., 8., 6.96742194, 4.],
         [7., 9., 18.77445997, 6.]])

    linkage_X_average = np.array(
        [[3., 4., 0.36265956, 2.],
         [1., 5., 1.77045373, 2.],
         [0., 2., 2.55760419, 2.],
         [6., 8., 6.55832839, 4.],
         [7., 9., 15.44089605, 6.]])

    n_samples, n_features = np.shape(X)
    connectivity_X = np.ones((n_samples, n_samples))

    out_X_unstructured = ward_tree(X, return_distance=True)
    out_X_structured = ward_tree(X, connectivity=connectivity_X,
                                 return_distance=True)

    # check that the labels are the same
    assert_array_equal(linkage_X_ward[:, :2], out_X_unstructured[0])
    assert_array_equal(linkage_X_ward[:, :2], out_X_structured[0])

    # check that the distances are correct
    assert_array_almost_equal(linkage_X_ward[:, 2], out_X_unstructured[4])
    assert_array_almost_equal(linkage_X_ward[:, 2], out_X_structured[4])

    linkage_options = ['complete', 'average', 'single']
    X_linkage_truth = [linkage_X_complete, linkage_X_average]
    for (linkage, X_truth) in zip(linkage_options, X_linkage_truth):
        out_X_unstructured = linkage_tree(
            X, return_distance=True, linkage=linkage)
        out_X_structured = linkage_tree(
            X, connectivity=connectivity_X, linkage=linkage,
            return_distance=True)

        # check that the labels are the same
        assert_array_equal(X_truth[:, :2], out_X_unstructured[0])
        assert_array_equal(X_truth[:, :2], out_X_structured[0])

        # check that the distances are correct
        assert_array_almost_equal(X_truth[:, 2], out_X_unstructured[4])
        assert_array_almost_equal(X_truth[:, 2], out_X_structured[4])
コード例 #6
0
ファイル: test_label.py プロジェクト: obulrdy6881/Drowsinss
def test_inverse_binarize_multiclass():
    got = _inverse_binarize_multiclass(
        csr_matrix([[0, 1, 0], [-1, 0, -1], [0, 0, 0]]), np.arange(3))
    assert_array_equal(got, np.array([1, 1, 0]))
コード例 #7
0
def test_predict_equal_labels(algo):
    km = KMeans(random_state=13, n_init=1, max_iter=1, algorithm=algo)
    km.fit(X)
    assert_array_equal(km.predict(X), km.labels_)
コード例 #8
0
def test_the_extract_xi_labels(ordering, clusters, expected):
    labels = _extract_xi_labels(ordering, clusters)

    assert_array_equal(labels, expected)
コード例 #9
0
def test_extract_xi(global_dtype):
    # small and easy test (no clusters around other clusters)
    # but with a clear noise data.
    rng = np.random.RandomState(0)
    n_points_per_cluster = 5

    C1 = [-5, -2] + 0.8 * rng.randn(n_points_per_cluster, 2)
    C2 = [4, -1] + 0.1 * rng.randn(n_points_per_cluster, 2)
    C3 = [1, -2] + 0.2 * rng.randn(n_points_per_cluster, 2)
    C4 = [-2, 3] + 0.3 * rng.randn(n_points_per_cluster, 2)
    C5 = [3, -2] + 0.6 * rng.randn(n_points_per_cluster, 2)
    C6 = [5, 6] + 0.2 * rng.randn(n_points_per_cluster, 2)

    X = np.vstack(
        (C1, C2, C3, C4, C5, np.array([[100, 100]]), C6)).astype(global_dtype,
                                                                 copy=False)
    expected_labels = np.r_[[2] * 5, [0] * 5, [1] * 5, [3] * 5, [1] * 5, -1,
                            [4] * 5]
    X, expected_labels = shuffle(X, expected_labels, random_state=rng)

    clust = OPTICS(min_samples=3,
                   min_cluster_size=2,
                   max_eps=20,
                   cluster_method="xi",
                   xi=0.4).fit(X)
    assert_array_equal(clust.labels_, expected_labels)

    # check float min_samples and min_cluster_size
    clust = OPTICS(min_samples=0.1,
                   min_cluster_size=0.08,
                   max_eps=20,
                   cluster_method="xi",
                   xi=0.4).fit(X)
    assert_array_equal(clust.labels_, expected_labels)

    X = np.vstack((C1, C2, C3, C4, C5, np.array([[100, 100]] * 2),
                   C6)).astype(global_dtype, copy=False)
    expected_labels = np.r_[[1] * 5, [3] * 5, [2] * 5, [0] * 5, [2] * 5, -1,
                            -1, [4] * 5]
    X, expected_labels = shuffle(X, expected_labels, random_state=rng)

    clust = OPTICS(min_samples=3,
                   min_cluster_size=3,
                   max_eps=20,
                   cluster_method="xi",
                   xi=0.3).fit(X)
    # this may fail if the predecessor correction is not at work!
    assert_array_equal(clust.labels_, expected_labels)

    C1 = [[0, 0], [0, 0.1], [0, -0.1], [0.1, 0]]
    C2 = [[10, 10], [10, 9], [10, 11], [9, 10]]
    C3 = [[100, 100], [100, 90], [100, 110], [90, 100]]
    X = np.vstack((C1, C2, C3)).astype(global_dtype, copy=False)
    expected_labels = np.r_[[0] * 4, [1] * 4, [2] * 4]
    X, expected_labels = shuffle(X, expected_labels, random_state=rng)

    clust = OPTICS(min_samples=2,
                   min_cluster_size=2,
                   max_eps=np.inf,
                   cluster_method="xi",
                   xi=0.04).fit(X)
    assert_array_equal(clust.labels_, expected_labels)
コード例 #10
0
def test_isotonic_regression():
    y = np.array([3, 7, 5, 9, 8, 7, 10])
    y_ = np.array([3, 6, 6, 8, 8, 8, 10])
    assert_array_equal(y_, isotonic_regression(y))

    y = np.array([10, 0, 2])
    y_ = np.array([4, 4, 4])
    assert_array_equal(y_, isotonic_regression(y))

    x = np.arange(len(y))
    ir = IsotonicRegression(y_min=0., y_max=1.)
    ir.fit(x, y)
    assert_array_equal(ir.fit(x, y).transform(x), ir.fit_transform(x, y))
    assert_array_equal(ir.transform(x), ir.predict(x))

    # check that it is immune to permutation
    perm = np.random.permutation(len(y))
    ir = IsotonicRegression(y_min=0., y_max=1.)
    assert_array_equal(ir.fit_transform(x[perm], y[perm]),
                       ir.fit_transform(x, y)[perm])
    assert_array_equal(ir.transform(x[perm]), ir.transform(x)[perm])

    # check we don't crash when all x are equal:
    ir = IsotonicRegression()
    assert_array_equal(ir.fit_transform(np.ones(len(x)), y), np.mean(y))
コード例 #11
0
def test_compare_to_ELKI():
    # Expected values, computed with (future) ELKI 0.7.5 using:
    # java -jar elki.jar cli -dbc.in csv -dbc.filter FixedDBIDsFilter
    #   -algorithm clustering.optics.OPTICSHeap -optics.minpts 5
    # where the FixedDBIDsFilter gives 0-indexed ids.
    r1 = [
        np.inf,
        1.0574896366427478,
        0.7587934993548423,
        0.7290174038973836,
        0.7290174038973836,
        0.7290174038973836,
        0.6861627576116127,
        0.7587934993548423,
        0.9280118450166668,
        1.1748022534146194,
        3.3355455741292257,
        0.49618389254482587,
        0.2552805046961355,
        0.2552805046961355,
        0.24944622248445714,
        0.24944622248445714,
        0.24944622248445714,
        0.2552805046961355,
        0.2552805046961355,
        0.3086779122185853,
        4.163024452756142,
        1.623152630340929,
        0.45315840475822655,
        0.25468325192031926,
        0.2254004358159971,
        0.18765711877083036,
        0.1821471333893275,
        0.1821471333893275,
        0.18765711877083036,
        0.18765711877083036,
        0.2240202988740153,
        1.154337614548715,
        1.342604473837069,
        1.323308536402633,
        0.8607514948648837,
        0.27219111215810565,
        0.13260875220533205,
        0.13260875220533205,
        0.09890587675958984,
        0.09890587675958984,
        0.13548790801634494,
        0.1575483940837384,
        0.17515137170530226,
        0.17575920159442388,
        0.27219111215810565,
        0.6101447895405373,
        1.3189208094864302,
        1.323308536402633,
        2.2509184159764577,
        2.4517810628594527,
        3.675977064404973,
        3.8264795626020365,
        2.9130735341510614,
        2.9130735341510614,
        2.9130735341510614,
        2.9130735341510614,
        2.8459300127258036,
        2.8459300127258036,
        2.8459300127258036,
        3.0321982337972537,
    ]
    o1 = [
        0,
        3,
        6,
        4,
        7,
        8,
        2,
        9,
        5,
        1,
        31,
        30,
        32,
        34,
        33,
        38,
        39,
        35,
        37,
        36,
        44,
        21,
        23,
        24,
        22,
        25,
        27,
        29,
        26,
        28,
        20,
        40,
        45,
        46,
        10,
        15,
        11,
        13,
        17,
        19,
        18,
        12,
        16,
        14,
        47,
        49,
        43,
        48,
        42,
        41,
        53,
        57,
        51,
        52,
        56,
        59,
        54,
        55,
        58,
        50,
    ]
    p1 = [
        -1,
        0,
        3,
        6,
        6,
        6,
        8,
        3,
        7,
        5,
        1,
        31,
        30,
        30,
        34,
        34,
        34,
        32,
        32,
        37,
        36,
        44,
        21,
        23,
        24,
        22,
        25,
        25,
        22,
        22,
        22,
        21,
        40,
        45,
        46,
        10,
        15,
        15,
        13,
        13,
        15,
        11,
        19,
        15,
        10,
        47,
        12,
        45,
        14,
        43,
        42,
        53,
        57,
        57,
        57,
        57,
        59,
        59,
        59,
        58,
    ]

    # Tests against known extraction array
    # Does NOT work with metric='euclidean', because sklearn euclidean has
    # worse numeric precision. 'minkowski' is slower but more accurate.
    clust1 = OPTICS(min_samples=5).fit(X)

    assert_array_equal(clust1.ordering_, np.array(o1))
    assert_array_equal(clust1.predecessor_[clust1.ordering_], np.array(p1))
    assert_allclose(clust1.reachability_[clust1.ordering_], np.array(r1))
    # ELKI currently does not print the core distances (which are not used much
    # in literature, but we can at least ensure to have this consistency:
    for i in clust1.ordering_[1:]:
        assert clust1.reachability_[i] >= clust1.core_distances_[
            clust1.predecessor_[i]]

    # Expected values, computed with (future) ELKI 0.7.5 using
    r2 = [
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        0.27219111215810565,
        0.13260875220533205,
        0.13260875220533205,
        0.09890587675958984,
        0.09890587675958984,
        0.13548790801634494,
        0.1575483940837384,
        0.17515137170530226,
        0.17575920159442388,
        0.27219111215810565,
        0.4928068613197889,
        np.inf,
        0.2666183922512113,
        0.18765711877083036,
        0.1821471333893275,
        0.1821471333893275,
        0.1821471333893275,
        0.18715928772277457,
        0.18765711877083036,
        0.18765711877083036,
        0.25468325192031926,
        np.inf,
        0.2552805046961355,
        0.2552805046961355,
        0.24944622248445714,
        0.24944622248445714,
        0.24944622248445714,
        0.2552805046961355,
        0.2552805046961355,
        0.3086779122185853,
        0.34466409325984865,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
        np.inf,
    ]
    o2 = [
        0,
        1,
        2,
        3,
        4,
        5,
        6,
        7,
        8,
        9,
        10,
        15,
        11,
        13,
        17,
        19,
        18,
        12,
        16,
        14,
        47,
        46,
        20,
        22,
        25,
        23,
        27,
        29,
        24,
        26,
        28,
        21,
        30,
        32,
        34,
        33,
        38,
        39,
        35,
        37,
        36,
        31,
        40,
        41,
        42,
        43,
        44,
        45,
        48,
        49,
        50,
        51,
        52,
        53,
        54,
        55,
        56,
        57,
        58,
        59,
    ]
    p2 = [
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        10,
        15,
        15,
        13,
        13,
        15,
        11,
        19,
        15,
        10,
        47,
        -1,
        20,
        22,
        25,
        25,
        25,
        25,
        22,
        22,
        23,
        -1,
        30,
        30,
        34,
        34,
        34,
        32,
        32,
        37,
        38,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
        -1,
    ]
    clust2 = OPTICS(min_samples=5, max_eps=0.5).fit(X)

    assert_array_equal(clust2.ordering_, np.array(o2))
    assert_array_equal(clust2.predecessor_[clust2.ordering_], np.array(p2))
    assert_allclose(clust2.reachability_[clust2.ordering_], np.array(r2))

    index = np.where(clust1.core_distances_ <= 0.5)[0]
    assert_allclose(clust1.core_distances_[index],
                    clust2.core_distances_[index])
コード例 #12
0
def test_isotonic_regression_reversed():
    y = np.array([10, 9, 10, 7, 6, 6.1, 5])
    y_ = IsotonicRegression(increasing=False).fit_transform(
        np.arange(len(y)), y)
    assert_array_equal(np.ones(y_[:-1].shape), ((y_[:-1] - y_[1:]) >= 0))
コード例 #13
0
def test_pipeline_memory_sampler():
    X, y = make_classification(
        n_classes=2,
        class_sep=2,
        weights=[0.1, 0.9],
        n_informative=3,
        n_redundant=1,
        flip_y=0,
        n_features=20,
        n_clusters_per_class=1,
        n_samples=5000,
        random_state=0,
    )
    cachedir = mkdtemp()
    try:
        memory = Memory(cachedir, verbose=10)
        # Test with Transformer + SVC
        clf = SVC(gamma="scale", probability=True, random_state=0)
        transf = DummySampler()
        pipe = Pipeline([("transf", clone(transf)), ("svc", clf)])
        cached_pipe = Pipeline([("transf", transf), ("svc", clf)],
                               memory=memory)

        # Memoize the transformer at the first fit
        cached_pipe.fit(X, y)
        pipe.fit(X, y)
        # Get the time stamp of the tranformer in the cached pipeline
        expected_ts = cached_pipe.named_steps["transf"].timestamp_
        # Check that cached_pipe and pipe yield identical results
        assert_array_equal(pipe.predict(X), cached_pipe.predict(X))
        assert_array_equal(pipe.predict_proba(X), cached_pipe.predict_proba(X))
        assert_array_equal(pipe.predict_log_proba(X),
                           cached_pipe.predict_log_proba(X))
        assert_array_equal(pipe.score(X, y), cached_pipe.score(X, y))
        assert_array_equal(
            pipe.named_steps["transf"].means_,
            cached_pipe.named_steps["transf"].means_,
        )
        assert not hasattr(transf, "means_")
        # Check that we are reading the cache while fitting
        # a second time
        cached_pipe.fit(X, y)
        # Check that cached_pipe and pipe yield identical results
        assert_array_equal(pipe.predict(X), cached_pipe.predict(X))
        assert_array_equal(pipe.predict_proba(X), cached_pipe.predict_proba(X))
        assert_array_equal(pipe.predict_log_proba(X),
                           cached_pipe.predict_log_proba(X))
        assert_array_equal(pipe.score(X, y), cached_pipe.score(X, y))
        assert_array_equal(
            pipe.named_steps["transf"].means_,
            cached_pipe.named_steps["transf"].means_,
        )
        assert cached_pipe.named_steps["transf"].timestamp_ == expected_ts
        # Create a new pipeline with cloned estimators
        # Check that even changing the name step does not affect the cache hit
        clf_2 = SVC(gamma="scale", probability=True, random_state=0)
        transf_2 = DummySampler()
        cached_pipe_2 = Pipeline([("transf_2", transf_2), ("svc", clf_2)],
                                 memory=memory)
        cached_pipe_2.fit(X, y)

        # Check that cached_pipe and pipe yield identical results
        assert_array_equal(pipe.predict(X), cached_pipe_2.predict(X))
        assert_array_equal(pipe.predict_proba(X),
                           cached_pipe_2.predict_proba(X))
        assert_array_equal(pipe.predict_log_proba(X),
                           cached_pipe_2.predict_log_proba(X))
        assert_array_equal(pipe.score(X, y), cached_pipe_2.score(X, y))
        assert_array_equal(
            pipe.named_steps["transf"].means_,
            cached_pipe_2.named_steps["transf_2"].means_,
        )
        assert cached_pipe_2.named_steps["transf_2"].timestamp_ == expected_ts
    finally:
        shutil.rmtree(cachedir)
コード例 #14
0
def test_pipeline_memory_transformer():
    iris = load_iris()
    X = iris.data
    y = iris.target
    cachedir = mkdtemp()
    try:
        memory = Memory(cachedir, verbose=10)
        # Test with Transformer + SVC
        clf = SVC(gamma="scale", probability=True, random_state=0)
        transf = DummyTransf()
        pipe = Pipeline([("transf", clone(transf)), ("svc", clf)])
        cached_pipe = Pipeline([("transf", transf), ("svc", clf)],
                               memory=memory)

        # Memoize the transformer at the first fit
        cached_pipe.fit(X, y)
        pipe.fit(X, y)
        # Get the time stamp of the tranformer in the cached pipeline
        expected_ts = cached_pipe.named_steps["transf"].timestamp_
        # Check that cached_pipe and pipe yield identical results
        assert_array_equal(pipe.predict(X), cached_pipe.predict(X))
        assert_array_equal(pipe.predict_proba(X), cached_pipe.predict_proba(X))
        assert_array_equal(pipe.predict_log_proba(X),
                           cached_pipe.predict_log_proba(X))
        assert_array_equal(pipe.score(X, y), cached_pipe.score(X, y))
        assert_array_equal(
            pipe.named_steps["transf"].means_,
            cached_pipe.named_steps["transf"].means_,
        )
        assert not hasattr(transf, "means_")
        # Check that we are reading the cache while fitting
        # a second time
        cached_pipe.fit(X, y)
        # Check that cached_pipe and pipe yield identical results
        assert_array_equal(pipe.predict(X), cached_pipe.predict(X))
        assert_array_equal(pipe.predict_proba(X), cached_pipe.predict_proba(X))
        assert_array_equal(pipe.predict_log_proba(X),
                           cached_pipe.predict_log_proba(X))
        assert_array_equal(pipe.score(X, y), cached_pipe.score(X, y))
        assert_array_equal(
            pipe.named_steps["transf"].means_,
            cached_pipe.named_steps["transf"].means_,
        )
        assert cached_pipe.named_steps["transf"].timestamp_ == expected_ts
        # Create a new pipeline with cloned estimators
        # Check that even changing the name step does not affect the cache hit
        clf_2 = SVC(gamma="scale", probability=True, random_state=0)
        transf_2 = DummyTransf()
        cached_pipe_2 = Pipeline([("transf_2", transf_2), ("svc", clf_2)],
                                 memory=memory)
        cached_pipe_2.fit(X, y)

        # Check that cached_pipe and pipe yield identical results
        assert_array_equal(pipe.predict(X), cached_pipe_2.predict(X))
        assert_array_equal(pipe.predict_proba(X),
                           cached_pipe_2.predict_proba(X))
        assert_array_equal(pipe.predict_log_proba(X),
                           cached_pipe_2.predict_log_proba(X))
        assert_array_equal(pipe.score(X, y), cached_pipe_2.score(X, y))
        assert_array_equal(
            pipe.named_steps["transf"].means_,
            cached_pipe_2.named_steps["transf_2"].means_,
        )
        assert cached_pipe_2.named_steps["transf_2"].timestamp_ == expected_ts
    finally:
        shutil.rmtree(cachedir)
コード例 #15
0
ファイル: test_label.py プロジェクト: obulrdy6881/Drowsinss
def test_multilabel_binarizer_non_integer_labels():
    tuple_classes = _to_object_array([(1, ), (2, ), (3, )])
    inputs = [
        ([('2', '3'), ('1', ), ('1', '2')], ['1', '2', '3']),
        ([('b', 'c'), ('a', ), ('a', 'b')], ['a', 'b', 'c']),
        ([((2, ), (3, )), ((1, ), ), ((1, ), (2, ))], tuple_classes),
    ]
    indicator_mat = np.array([[0, 1, 1], [1, 0, 0], [1, 1, 0]])
    for inp, classes in inputs:
        # fit_transform()
        mlb = MultiLabelBinarizer()
        assert_array_equal(mlb.fit_transform(inp), indicator_mat)
        assert_array_equal(mlb.classes_, classes)
        assert_array_equal(mlb.inverse_transform(indicator_mat), inp)

        # fit().transform()
        mlb = MultiLabelBinarizer()
        assert_array_equal(mlb.fit(inp).transform(inp), indicator_mat)
        assert_array_equal(mlb.classes_, classes)
        assert_array_equal(mlb.inverse_transform(indicator_mat), inp)

    mlb = MultiLabelBinarizer()
    with pytest.raises(TypeError):
        mlb.fit_transform([({}), ({}, {'a': 'b'})])
コード例 #16
0
def test_additive_chi2_sampler():
    # test that AdditiveChi2Sampler approximates kernel on random data

    # compute exact kernel
    # abbreviations for easier formula
    X_ = X[:, np.newaxis, :]
    Y_ = Y[np.newaxis, :, :]

    large_kernel = 2 * X_ * Y_ / (X_ + Y_)

    # reduce to n_samples_x x n_samples_y by summing over features
    kernel = large_kernel.sum(axis=2)

    # approximate kernel mapping
    transform = AdditiveChi2Sampler(sample_steps=3)
    X_trans = transform.fit_transform(X)
    Y_trans = transform.transform(Y)

    kernel_approx = np.dot(X_trans, Y_trans.T)

    assert_array_almost_equal(kernel, kernel_approx, 1)

    X_sp_trans = transform.fit_transform(csr_matrix(X))
    Y_sp_trans = transform.transform(csr_matrix(Y))

    assert_array_equal(X_trans, X_sp_trans.A)
    assert_array_equal(Y_trans, Y_sp_trans.A)

    # test error is raised on negative input
    Y_neg = Y.copy()
    Y_neg[0, 0] = -1
    msg = "Negative values in data passed to"
    with pytest.raises(ValueError, match=msg):
        transform.transform(Y_neg)

    # test error on invalid sample_steps
    transform = AdditiveChi2Sampler(sample_steps=4)
    msg = re.escape("If sample_steps is not in [1, 2, 3],"
                    " you need to provide sample_interval")
    with pytest.raises(ValueError, match=msg):
        transform.fit(X)

    # test that the sample interval is set correctly
    sample_steps_available = [1, 2, 3]
    for sample_steps in sample_steps_available:

        # test that the sample_interval is initialized correctly
        transform = AdditiveChi2Sampler(sample_steps=sample_steps)
        assert transform.sample_interval is None

        # test that the sample_interval is changed in the fit method
        transform.fit(X)
        assert transform.sample_interval_ is not None

    # test that the sample_interval is set correctly
    sample_interval = 0.3
    transform = AdditiveChi2Sampler(sample_steps=4,
                                    sample_interval=sample_interval)
    assert transform.sample_interval == sample_interval
    transform.fit(X)
    assert transform.sample_interval_ == sample_interval
コード例 #17
0
ファイル: test_label.py プロジェクト: obulrdy6881/Drowsinss
def test_multilabel_binarizer_non_unique():
    inp = [(1, 1, 1, 0)]
    indicator_mat = np.array([[1, 1]])
    mlb = MultiLabelBinarizer()
    assert_array_equal(mlb.fit_transform(inp), indicator_mat)
コード例 #18
0
def test_one_hot_encoder_inverse_if_binary():
    X = np.array([['Male', 1], ['Female', 3], ['Female', 2]], dtype=object)
    ohe = OneHotEncoder(drop='if_binary', sparse=False)
    X_tr = ohe.fit_transform(X)
    assert_array_equal(ohe.inverse_transform(X_tr), X)
コード例 #19
0
def test_minibatch_update_consistency():
    # Check that dense and sparse minibatch update give the same results
    rng = np.random.RandomState(42)
    old_centers = centers + rng.normal(size=centers.shape)

    new_centers = old_centers.copy()
    new_centers_csr = old_centers.copy()

    weight_sums = np.zeros(new_centers.shape[0], dtype=np.double)
    weight_sums_csr = np.zeros(new_centers.shape[0], dtype=np.double)

    x_squared_norms = (X**2).sum(axis=1)
    x_squared_norms_csr = row_norms(X_csr, squared=True)

    buffer = np.zeros(centers.shape[1], dtype=np.double)
    buffer_csr = np.zeros(centers.shape[1], dtype=np.double)

    # extract a small minibatch
    X_mb = X[:10]
    X_mb_csr = X_csr[:10]
    x_mb_squared_norms = x_squared_norms[:10]
    x_mb_squared_norms_csr = x_squared_norms_csr[:10]

    sample_weight_mb = np.ones(X_mb.shape[0], dtype=np.double)

    # step 1: compute the dense minibatch update
    old_inertia, incremental_diff = _mini_batch_step(X_mb,
                                                     sample_weight_mb,
                                                     x_mb_squared_norms,
                                                     new_centers,
                                                     weight_sums,
                                                     buffer,
                                                     1,
                                                     None,
                                                     random_reassign=False)
    assert old_inertia > 0.0

    # compute the new inertia on the same batch to check that it decreased
    labels, new_inertia = _labels_inertia(X_mb, sample_weight_mb,
                                          x_mb_squared_norms, new_centers)
    assert new_inertia > 0.0
    assert new_inertia < old_inertia

    # check that the incremental difference computation is matching the
    # final observed value
    effective_diff = np.sum((new_centers - old_centers)**2)
    assert_almost_equal(incremental_diff, effective_diff)

    # step 2: compute the sparse minibatch update
    old_inertia_csr, incremental_diff_csr = _mini_batch_step(
        X_mb_csr,
        sample_weight_mb,
        x_mb_squared_norms_csr,
        new_centers_csr,
        weight_sums_csr,
        buffer_csr,
        1,
        None,
        random_reassign=False)
    assert old_inertia_csr > 0.0

    # compute the new inertia on the same batch to check that it decreased
    labels_csr, new_inertia_csr = _labels_inertia(X_mb_csr, sample_weight_mb,
                                                  x_mb_squared_norms_csr,
                                                  new_centers_csr)
    assert new_inertia_csr > 0.0
    assert new_inertia_csr < old_inertia_csr

    # check that the incremental difference computation is matching the
    # final observed value
    effective_diff = np.sum((new_centers_csr - old_centers)**2)
    assert_almost_equal(incremental_diff_csr, effective_diff)

    # step 3: check that sparse and dense updates lead to the same results
    assert_array_equal(labels, labels_csr)
    assert_array_almost_equal(new_centers, new_centers_csr)
    assert_almost_equal(incremental_diff, incremental_diff_csr)
    assert_almost_equal(old_inertia, old_inertia_csr)
    assert_almost_equal(new_inertia, new_inertia_csr)
コード例 #20
0
def test_ordinal_encoder(X):
    enc = OrdinalEncoder()
    exp = np.array([[0, 1, 0], [1, 0, 0]], dtype='int64')
    assert_array_equal(enc.fit_transform(X), exp.astype('float64'))
    enc = OrdinalEncoder(dtype='int64')
    assert_array_equal(enc.fit_transform(X), exp)
コード例 #21
0
ファイル: test_dbscan.py プロジェクト: obulrdy6881/Drowsinss
def test_weighted_dbscan():
    # ensure sample_weight is validated
    with pytest.raises(ValueError):
        dbscan([[0], [1]], sample_weight=[2])
    with pytest.raises(ValueError):
        dbscan([[0], [1]], sample_weight=[2, 3, 4])

    # ensure sample_weight has an effect
    assert_array_equal([],
                       dbscan([[0], [1]], sample_weight=None,
                              min_samples=6)[0])
    assert_array_equal([],
                       dbscan([[0], [1]], sample_weight=[5, 5],
                              min_samples=6)[0])
    assert_array_equal([0],
                       dbscan([[0], [1]], sample_weight=[6, 5],
                              min_samples=6)[0])
    assert_array_equal([0, 1],
                       dbscan([[0], [1]], sample_weight=[6, 6],
                              min_samples=6)[0])

    # points within eps of each other:
    assert_array_equal([0, 1],
                       dbscan([[0], [1]],
                              eps=1.5,
                              sample_weight=[5, 1],
                              min_samples=6)[0])
    # and effect of non-positive and non-integer sample_weight:
    assert_array_equal([],
                       dbscan([[0], [1]],
                              sample_weight=[5, 0],
                              eps=1.5,
                              min_samples=6)[0])
    assert_array_equal([0, 1],
                       dbscan([[0], [1]],
                              sample_weight=[5.9, 0.1],
                              eps=1.5,
                              min_samples=6)[0])
    assert_array_equal([0, 1],
                       dbscan([[0], [1]],
                              sample_weight=[6, 0],
                              eps=1.5,
                              min_samples=6)[0])
    assert_array_equal([],
                       dbscan([[0], [1]],
                              sample_weight=[6, -1],
                              eps=1.5,
                              min_samples=6)[0])

    # for non-negative sample_weight, cores should be identical to repetition
    rng = np.random.RandomState(42)
    sample_weight = rng.randint(0, 5, X.shape[0])
    core1, label1 = dbscan(X, sample_weight=sample_weight)
    assert len(label1) == len(X)

    X_repeated = np.repeat(X, sample_weight, axis=0)
    core_repeated, label_repeated = dbscan(X_repeated)
    core_repeated_mask = np.zeros(X_repeated.shape[0], dtype=bool)
    core_repeated_mask[core_repeated] = True
    core_mask = np.zeros(X.shape[0], dtype=bool)
    core_mask[core1] = True
    assert_array_equal(np.repeat(core_mask, sample_weight), core_repeated_mask)

    # sample_weight should work with precomputed distance matrix
    D = pairwise_distances(X)
    core3, label3 = dbscan(D,
                           sample_weight=sample_weight,
                           metric='precomputed')
    assert_array_equal(core1, core3)
    assert_array_equal(label1, label3)

    # sample_weight should work with estimator
    est = DBSCAN().fit(X, sample_weight=sample_weight)
    core4 = est.core_sample_indices_
    label4 = est.labels_
    assert_array_equal(core1, core4)
    assert_array_equal(label1, label4)

    est = DBSCAN()
    label5 = est.fit_predict(X, sample_weight=sample_weight)
    core5 = est.core_sample_indices_
    assert_array_equal(core1, core5)
    assert_array_equal(label1, label5)
    assert_array_equal(label1, est.labels_)
コード例 #22
0
def test_one_hot_encoder_drop_manual(missing_value):
    cats_to_drop = ['def', 12, 3, 56, missing_value]
    enc = OneHotEncoder(drop=cats_to_drop)
    X = [['abc', 12, 2, 55, 'a'], ['def', 12, 1, 55, 'a'],
         ['def', 12, 3, 56, missing_value]]
    trans = enc.fit_transform(X).toarray()
    exp = [[1, 0, 1, 1, 1], [0, 1, 0, 1, 1], [0, 0, 0, 0, 0]]
    assert_array_equal(trans, exp)
    dropped_cats = [
        cat[feature] for cat, feature in zip(enc.categories_, enc.drop_idx_)
    ]
    X_inv_trans = enc.inverse_transform(trans)
    X_array = np.array(X, dtype=object)

    # last value is np.nan
    if is_scalar_nan(cats_to_drop[-1]):
        assert_array_equal(dropped_cats[:-1], cats_to_drop[:-1])
        assert is_scalar_nan(dropped_cats[-1])
        assert is_scalar_nan(cats_to_drop[-1])
        # do not include the last column which includes missing values
        assert_array_equal(X_array[:, :-1], X_inv_trans[:, :-1])

        # check last column is the missing value
        assert_array_equal(X_array[-1, :-1], X_inv_trans[-1, :-1])
        assert is_scalar_nan(X_array[-1, -1])
        assert is_scalar_nan(X_inv_trans[-1, -1])
    else:
        assert_array_equal(dropped_cats, cats_to_drop)
        assert_array_equal(X_array, X_inv_trans)
コード例 #23
0
def test_agglomerative_clustering():
    # Check that we obtain the correct number of clusters with
    # agglomerative clustering.
    rng = np.random.RandomState(0)
    mask = np.ones([10, 10], dtype=bool)
    n_samples = 100
    X = rng.randn(n_samples, 50)
    connectivity = grid_to_graph(*mask.shape)
    for linkage in ("ward", "complete", "average", "single"):
        clustering = AgglomerativeClustering(n_clusters=10,
                                             connectivity=connectivity,
                                             linkage=linkage)
        clustering.fit(X)
        # test caching
        try:
            tempdir = mkdtemp()
            clustering = AgglomerativeClustering(
                n_clusters=10, connectivity=connectivity,
                memory=tempdir,
                linkage=linkage)
            clustering.fit(X)
            labels = clustering.labels_
            assert np.size(np.unique(labels)) == 10
        finally:
            shutil.rmtree(tempdir)
        # Turn caching off now
        clustering = AgglomerativeClustering(
            n_clusters=10, connectivity=connectivity, linkage=linkage)
        # Check that we obtain the same solution with early-stopping of the
        # tree building
        clustering.compute_full_tree = False
        clustering.fit(X)
        assert_almost_equal(normalized_mutual_info_score(clustering.labels_,
                                                         labels), 1)
        clustering.connectivity = None
        clustering.fit(X)
        assert np.size(np.unique(clustering.labels_)) == 10
        # Check that we raise a TypeError on dense matrices
        clustering = AgglomerativeClustering(
            n_clusters=10,
            connectivity=sparse.lil_matrix(
                connectivity.toarray()[:10, :10]),
            linkage=linkage)
        with pytest.raises(ValueError):
            clustering.fit(X)

    # Test that using ward with another metric than euclidean raises an
    # exception
    clustering = AgglomerativeClustering(
        n_clusters=10,
        connectivity=connectivity.toarray(),
        affinity="manhattan",
        linkage="ward")
    with pytest.raises(ValueError):
        clustering.fit(X)

    # Test using another metric than euclidean works with linkage complete
    for affinity in PAIRED_DISTANCES.keys():
        # Compare our (structured) implementation to scipy
        clustering = AgglomerativeClustering(
            n_clusters=10,
            connectivity=np.ones((n_samples, n_samples)),
            affinity=affinity,
            linkage="complete")
        clustering.fit(X)
        clustering2 = AgglomerativeClustering(
            n_clusters=10,
            connectivity=None,
            affinity=affinity,
            linkage="complete")
        clustering2.fit(X)
        assert_almost_equal(normalized_mutual_info_score(clustering2.labels_,
                                                         clustering.labels_),
                            1)

    # Test that using a distance matrix (affinity = 'precomputed') has same
    # results (with connectivity constraints)
    clustering = AgglomerativeClustering(n_clusters=10,
                                         connectivity=connectivity,
                                         linkage="complete")
    clustering.fit(X)
    X_dist = pairwise_distances(X)
    clustering2 = AgglomerativeClustering(n_clusters=10,
                                          connectivity=connectivity,
                                          affinity='precomputed',
                                          linkage="complete")
    clustering2.fit(X_dist)
    assert_array_equal(clustering.labels_, clustering2.labels_)
コード例 #24
0
ファイル: test_label.py プロジェクト: obulrdy6881/Drowsinss
def test_multilabel_binarizer_empty_sample():
    mlb = MultiLabelBinarizer()
    y = [[1, 2], [1], []]
    Y = np.array([[1, 1], [1, 0], [0, 0]])
    assert_array_equal(mlb.fit_transform(y), Y)
コード例 #25
0
def test_pipeline_memory():
    X = iris.data
    y = iris.target
    cachedir = mkdtemp()
    try:
        if parse_version(joblib.__version__) < parse_version('0.12'):
            # Deal with change of API in joblib
            memory = joblib.Memory(cachedir=cachedir, verbose=10)
        else:
            memory = joblib.Memory(location=cachedir, verbose=10)
        # Test with Transformer + SVC
        clf = SVC(probability=True, random_state=0)
        transf = DummyTransf()
        pipe = Pipeline([('transf', clone(transf)), ('svc', clf)])
        cached_pipe = Pipeline([('transf', transf), ('svc', clf)],
                               memory=memory)

        # Memoize the transformer at the first fit
        cached_pipe.fit(X, y)
        pipe.fit(X, y)
        # Get the time stamp of the transformer in the cached pipeline
        ts = cached_pipe.named_steps['transf'].timestamp_
        # Check that cached_pipe and pipe yield identical results
        assert_array_equal(pipe.predict(X), cached_pipe.predict(X))
        assert_array_equal(pipe.predict_proba(X), cached_pipe.predict_proba(X))
        assert_array_equal(pipe.predict_log_proba(X),
                           cached_pipe.predict_log_proba(X))
        assert_array_equal(pipe.score(X, y), cached_pipe.score(X, y))
        assert_array_equal(pipe.named_steps['transf'].means_,
                           cached_pipe.named_steps['transf'].means_)
        assert not hasattr(transf, 'means_')
        # Check that we are reading the cache while fitting
        # a second time
        cached_pipe.fit(X, y)
        # Check that cached_pipe and pipe yield identical results
        assert_array_equal(pipe.predict(X), cached_pipe.predict(X))
        assert_array_equal(pipe.predict_proba(X), cached_pipe.predict_proba(X))
        assert_array_equal(pipe.predict_log_proba(X),
                           cached_pipe.predict_log_proba(X))
        assert_array_equal(pipe.score(X, y), cached_pipe.score(X, y))
        assert_array_equal(pipe.named_steps['transf'].means_,
                           cached_pipe.named_steps['transf'].means_)
        assert ts == cached_pipe.named_steps['transf'].timestamp_
        # Create a new pipeline with cloned estimators
        # Check that even changing the name step does not affect the cache hit
        clf_2 = SVC(probability=True, random_state=0)
        transf_2 = DummyTransf()
        cached_pipe_2 = Pipeline([('transf_2', transf_2), ('svc', clf_2)],
                                 memory=memory)
        cached_pipe_2.fit(X, y)

        # Check that cached_pipe and pipe yield identical results
        assert_array_equal(pipe.predict(X), cached_pipe_2.predict(X))
        assert_array_equal(pipe.predict_proba(X),
                           cached_pipe_2.predict_proba(X))
        assert_array_equal(pipe.predict_log_proba(X),
                           cached_pipe_2.predict_log_proba(X))
        assert_array_equal(pipe.score(X, y), cached_pipe_2.score(X, y))
        assert_array_equal(pipe.named_steps['transf'].means_,
                           cached_pipe_2.named_steps['transf_2'].means_)
        assert ts == cached_pipe_2.named_steps['transf_2'].timestamp_
    finally:
        shutil.rmtree(cachedir)
コード例 #26
0
ファイル: test_label.py プロジェクト: obulrdy6881/Drowsinss
def test_multilabel_binarizer_given_classes():
    inp = [(2, 3), (1, ), (1, 2)]
    indicator_mat = np.array([[0, 1, 1], [1, 0, 0], [1, 0, 1]])
    # fit_transform()
    mlb = MultiLabelBinarizer(classes=[1, 3, 2])
    assert_array_equal(mlb.fit_transform(inp), indicator_mat)
    assert_array_equal(mlb.classes_, [1, 3, 2])

    # fit().transform()
    mlb = MultiLabelBinarizer(classes=[1, 3, 2])
    assert_array_equal(mlb.fit(inp).transform(inp), indicator_mat)
    assert_array_equal(mlb.classes_, [1, 3, 2])

    # ensure works with extra class
    mlb = MultiLabelBinarizer(classes=[4, 1, 3, 2])
    assert_array_equal(mlb.fit_transform(inp),
                       np.hstack(([[0], [0], [0]], indicator_mat)))
    assert_array_equal(mlb.classes_, [4, 1, 3, 2])

    # ensure fit is no-op as iterable is not consumed
    inp = iter(inp)
    mlb = MultiLabelBinarizer(classes=[1, 3, 2])
    assert_array_equal(mlb.fit(inp).transform(inp), indicator_mat)

    # ensure a ValueError is thrown if given duplicate classes
    err_msg = "The classes argument contains duplicate classes. Remove " \
              "these duplicates before passing them to MultiLabelBinarizer."
    mlb = MultiLabelBinarizer(classes=[1, 3, 2, 3])
    with pytest.raises(ValueError, match=err_msg):
        mlb.fit(inp)
コード例 #27
0
def test_set_feature_union_step_drop():
    mult2 = Mult(2)
    mult2.get_feature_names = lambda: ['x2']
    mult3 = Mult(3)
    mult3.get_feature_names = lambda: ['x3']
    X = np.asarray([[1]])

    ft = FeatureUnion([('m2', mult2), ('m3', mult3)])
    assert_array_equal([[2, 3]], ft.fit(X).transform(X))
    assert_array_equal([[2, 3]], ft.fit_transform(X))
    assert ['m2__x2', 'm3__x3'] == ft.get_feature_names()

    with pytest.warns(None) as record:
        ft.set_params(m2='drop')
        assert_array_equal([[3]], ft.fit(X).transform(X))
        assert_array_equal([[3]], ft.fit_transform(X))
    assert ['m3__x3'] == ft.get_feature_names()
    assert not record

    with pytest.warns(None) as record:
        ft.set_params(m3='drop')
        assert_array_equal([[]], ft.fit(X).transform(X))
        assert_array_equal([[]], ft.fit_transform(X))
    assert [] == ft.get_feature_names()
    assert not record

    with pytest.warns(None) as record:
        # check we can change back
        ft.set_params(m3=mult3)
        assert_array_equal([[3]], ft.fit(X).transform(X))
    assert not record

    with pytest.warns(None) as record:
        # Check 'drop' step at construction time
        ft = FeatureUnion([('m2', 'drop'), ('m3', mult3)])
        assert_array_equal([[3]], ft.fit(X).transform(X))
        assert_array_equal([[3]], ft.fit_transform(X))
    assert ['m3__x3'] == ft.get_feature_names()
    assert not record
コード例 #28
0
ファイル: test_label.py プロジェクト: obulrdy6881/Drowsinss
def test_label_binarizer():
    # one-class case defaults to negative label
    # For dense case:
    inp = ["pos", "pos", "pos", "pos"]
    lb = LabelBinarizer(sparse_output=False)
    expected = np.array([[0, 0, 0, 0]]).T
    got = lb.fit_transform(inp)
    assert_array_equal(lb.classes_, ["pos"])
    assert_array_equal(expected, got)
    assert_array_equal(lb.inverse_transform(got), inp)

    # For sparse case:
    lb = LabelBinarizer(sparse_output=True)
    got = lb.fit_transform(inp)
    assert issparse(got)
    assert_array_equal(lb.classes_, ["pos"])
    assert_array_equal(expected, got.toarray())
    assert_array_equal(lb.inverse_transform(got.toarray()), inp)

    lb = LabelBinarizer(sparse_output=False)
    # two-class case
    inp = ["neg", "pos", "pos", "neg"]
    expected = np.array([[0, 1, 1, 0]]).T
    got = lb.fit_transform(inp)
    assert_array_equal(lb.classes_, ["neg", "pos"])
    assert_array_equal(expected, got)

    to_invert = np.array([[1, 0], [0, 1], [0, 1], [1, 0]])
    assert_array_equal(lb.inverse_transform(to_invert), inp)

    # multi-class case
    inp = ["spam", "ham", "eggs", "ham", "0"]
    expected = np.array([[0, 0, 0, 1], [0, 0, 1, 0], [0, 1, 0, 0],
                         [0, 0, 1, 0], [1, 0, 0, 0]])
    got = lb.fit_transform(inp)
    assert_array_equal(lb.classes_, ['0', 'eggs', 'ham', 'spam'])
    assert_array_equal(expected, got)
    assert_array_equal(lb.inverse_transform(got), inp)
コード例 #29
0
def test_fit_transform(strategy, expected):
    est = KBinsDiscretizer(n_bins=3, encode='ordinal', strategy=strategy)
    est.fit(X)
    assert_array_equal(expected, est.transform(X))
コード例 #30
0
ファイル: test_multiclass.py プロジェクト: rpuegue/CoursA61
def test_unique_labels():
    # Empty iterable
    assert_raises(ValueError, unique_labels)

    # Multiclass problem
    assert_array_equal(unique_labels(range(10)), np.arange(10))
    assert_array_equal(unique_labels(np.arange(10)), np.arange(10))
    assert_array_equal(unique_labels([4, 0, 2]), np.array([0, 2, 4]))

    # Multilabel indicator
    assert_array_equal(
        unique_labels(np.array([[0, 0, 1], [1, 0, 1], [0, 0, 0]])),
        np.arange(3))

    assert_array_equal(unique_labels(np.array([[0, 0, 1], [0, 0, 0]])),
                       np.arange(3))

    # Several arrays passed
    assert_array_equal(unique_labels([4, 0, 2], range(5)), np.arange(5))
    assert_array_equal(unique_labels((0, 1, 2), (0, ), (2, 1)), np.arange(3))

    # Border line case with binary indicator matrix
    assert_raises(ValueError, unique_labels, [4, 0, 2], np.ones((5, 5)))
    assert_raises(ValueError, unique_labels, np.ones((5, 4)), np.ones((5, 5)))
    assert_array_equal(unique_labels(np.ones((4, 5)), np.ones((5, 5))),
                       np.arange(5))