예제 #1
0
def test_C_and_pandas_categorical():
    if not have_pandas_categorical:
        return

    objs = [pandas_Categorical_from_codes([1, 0, 1], ["b", "a"])]
    if have_pandas_categorical_dtype:
        objs.append(pandas.Series(objs[0]))
    for obj in objs:
        d = {"obj": obj}
        assert np.allclose(dmatrix("obj", d),
                           [[1, 1],
                            [1, 0],
                            [1, 1]])

        assert np.allclose(dmatrix("C(obj)", d),
                           [[1, 1],
                            [1, 0],
                            [1, 1]])

        assert np.allclose(dmatrix("C(obj, levels=['b', 'a'])", d),
                           [[1, 1],
                            [1, 0],
                            [1, 1]])

        assert np.allclose(dmatrix("C(obj, levels=['a', 'b'])", d),
                           [[1, 0],
                            [1, 1],
                            [1, 0]])
예제 #2
0
def test_C_and_pandas_categorical():
    if not have_pandas_categorical:
        return

    objs = [pandas_Categorical_from_codes([1, 0, 1], ["b", "a"])]
    if have_pandas_categorical_dtype:
        objs.append(pandas.Series(objs[0]))
    for obj in objs:
        d = {"obj": obj}
        assert np.allclose(dmatrix("obj", d),
                           [[1, 1],
                            [1, 0],
                            [1, 1]])

        assert np.allclose(dmatrix("C(obj)", d),
                           [[1, 1],
                            [1, 0],
                            [1, 1]])

        assert np.allclose(dmatrix("C(obj, levels=['b', 'a'])", d),
                           [[1, 1],
                            [1, 0],
                            [1, 1]])

        assert np.allclose(dmatrix("C(obj, levels=['a', 'b'])", d),
                           [[1, 0],
                            [1, 1],
                            [1, 0]])
예제 #3
0
 def Series_from_codes(codes, categories):
     c = pandas_Categorical_from_codes(codes, categories)
     return pandas.Series(c)
예제 #4
0
def test_CategoricalSniffer():
    from patsy.missing import NAAction
    def t(NA_types, datas, exp_finish_fast, exp_levels, exp_contrast=None):
        sniffer = CategoricalSniffer(NAAction(NA_types=NA_types))
        for data in datas:
            done = sniffer.sniff(data)
            if done:
                assert exp_finish_fast
                break
            else:
                assert not exp_finish_fast
        assert sniffer.levels_contrast() == (exp_levels, exp_contrast)
    
    if have_pandas_categorical:
        # We make sure to test with both boxed and unboxed pandas objects,
        # because we used to have a bug where boxed pandas objects would be
        # treated as categorical, but their levels would be lost...
        preps = [lambda x: x,
                 C]
        if have_pandas_categorical_dtype:
            preps += [pandas.Series,
                      lambda x: C(pandas.Series(x))]
        for prep in preps:
            t([], [prep(pandas.Categorical.from_array([1, 2, None]))],
              True, (1, 2))
            # check order preservation
            t([], [prep(pandas_Categorical_from_codes([1, 0], ["a", "b"]))],
              True, ("a", "b"))
            t([], [prep(pandas_Categorical_from_codes([1, 0], ["b", "a"]))],
              True, ("b", "a"))
            # check that if someone sticks a .contrast field onto our object
            obj = prep(pandas.Categorical.from_array(["a", "b"]))
            obj.contrast = "CONTRAST"
            t([], [obj], True, ("a", "b"), "CONTRAST")

    t([], [C([1, 2]), C([3, 2])], False, (1, 2, 3))
    # check order preservation
    t([], [C([1, 2], levels=[1, 2, 3]), C([4, 2])], True, (1, 2, 3))
    t([], [C([1, 2], levels=[3, 2, 1]), C([4, 2])], True, (3, 2, 1))

    # do some actual sniffing with NAs in
    t(["None", "NaN"], [C([1, np.nan]), C([10, None])],
      False, (1, 10))
    # But 'None' can be a type if we don't make it represent NA:
    sniffer = CategoricalSniffer(NAAction(NA_types=["NaN"]))
    sniffer.sniff(C([1, np.nan, None]))
    # The level order here is different on py2 and py3 :-( Because there's no
    # consistent way to sort mixed-type values on both py2 and py3. Honestly
    # people probably shouldn't use this, but I don't know how to give a
    # sensible error.
    levels, _ = sniffer.levels_contrast()
    assert set(levels) == set([None, 1])

    # bool special cases
    t(["None", "NaN"], [C([True, np.nan, None])],
      True, (False, True))
    t([], [C([10, 20]), C([False]), C([30, 40])],
      False, (False, True, 10, 20, 30, 40))
    # exercise the fast-path
    t([], [np.asarray([True, False]), ["foo"]],
      True, (False, True))

    # check tuples too
    t(["None", "NaN"], [C([("b", 2), None, ("a", 1), np.nan, ("c", None)])],
      False, (("a", 1), ("b", 2), ("c", None)))

    # contrasts
    t([], [C([10, 20], contrast="FOO")], False, (10, 20), "FOO")

    # no box
    t([], [[10, 30], [20]], False, (10, 20, 30))
    t([], [["b", "a"], ["a"]], False, ("a", "b"))

    # 0d
    t([], ["b"], False, ("b",))

    from nose.tools import assert_raises

    # unhashable level error:
    sniffer = CategoricalSniffer(NAAction())
    assert_raises(PatsyError, sniffer.sniff, [{}])

    # >1d is illegal
    assert_raises(PatsyError, sniffer.sniff, np.asarray([["b"]]))
예제 #5
0
 def Series_from_codes(codes, categories):
     c = pandas_Categorical_from_codes(codes, categories)
     return pandas.Series(c)
예제 #6
0
def test_CategoricalSniffer():
    from patsy.missing import NAAction

    def t(NA_types, datas, exp_finish_fast, exp_levels, exp_contrast=None):
        sniffer = CategoricalSniffer(NAAction(NA_types=NA_types))
        for data in datas:
            done = sniffer.sniff(data)
            if done:
                assert exp_finish_fast
                break
            else:
                assert not exp_finish_fast
        assert sniffer.levels_contrast() == (exp_levels, exp_contrast)

    if have_pandas_categorical:
        # We make sure to test with both boxed and unboxed pandas objects,
        # because we used to have a bug where boxed pandas objects would be
        # treated as categorical, but their levels would be lost...
        preps = [lambda x: x, C]
        if have_pandas_categorical_dtype:
            preps += [pandas.Series, lambda x: C(pandas.Series(x))]
        for prep in preps:
            t([], [prep(pandas.Categorical([1, 2, None]))], True, (1, 2))
            # check order preservation
            t([], [prep(pandas_Categorical_from_codes([1, 0], ["a", "b"]))],
              True, ("a", "b"))
            t([], [prep(pandas_Categorical_from_codes([1, 0], ["b", "a"]))],
              True, ("b", "a"))
            # check that if someone sticks a .contrast field onto our object
            obj = prep(pandas.Categorical(["a", "b"]))
            obj.contrast = "CONTRAST"
            t([], [obj], True, ("a", "b"), "CONTRAST")

    t([], [C([1, 2]), C([3, 2])], False, (1, 2, 3))
    # check order preservation
    t([], [C([1, 2], levels=[1, 2, 3]), C([4, 2])], True, (1, 2, 3))
    t([], [C([1, 2], levels=[3, 2, 1]), C([4, 2])], True, (3, 2, 1))

    # do some actual sniffing with NAs in
    t(["None", "NaN"], [C([1, np.nan]), C([10, None])], False, (1, 10))
    # But 'None' can be a type if we don't make it represent NA:
    sniffer = CategoricalSniffer(NAAction(NA_types=["NaN"]))
    sniffer.sniff(C([1, np.nan, None]))
    # The level order here is different on py2 and py3 :-( Because there's no
    # consistent way to sort mixed-type values on both py2 and py3. Honestly
    # people probably shouldn't use this, but I don't know how to give a
    # sensible error.
    levels, _ = sniffer.levels_contrast()
    assert set(levels) == set([None, 1])

    # bool special cases
    t(["None", "NaN"], [C([True, np.nan, None])], True, (False, True))
    t([], [C([10, 20]), C([False]), C([30, 40])], False,
      (False, True, 10, 20, 30, 40))
    # exercise the fast-path
    t([], [np.asarray([True, False]), ["foo"]], True, (False, True))

    # check tuples too
    t(["None", "NaN"], [C([("b", 2), None, ("a", 1), np.nan, ("c", None)])],
      False, (("a", 1), ("b", 2), ("c", None)))

    # contrasts
    t([], [C([10, 20], contrast="FOO")], False, (10, 20), "FOO")

    # no box
    t([], [[10, 30], [20]], False, (10, 20, 30))
    t([], [["b", "a"], ["a"]], False, ("a", "b"))

    # 0d
    t([], ["b"], False, ("b", ))

    from nose.tools import assert_raises

    # unhashable level error:
    sniffer = CategoricalSniffer(NAAction())
    assert_raises(PatsyError, sniffer.sniff, [{}])

    # >1d is illegal
    assert_raises(PatsyError, sniffer.sniff, np.asarray([["b"]]))