def test_C_and_pandas_categorical(): if not have_pandas_categorical: return objs = [pandas_Categorical_from_codes([1, 0, 1], ["b", "a"])] if have_pandas_categorical_dtype: objs.append(pandas.Series(objs[0])) for obj in objs: d = {"obj": obj} assert np.allclose(dmatrix("obj", d), [[1, 1], [1, 0], [1, 1]]) assert np.allclose(dmatrix("C(obj)", d), [[1, 1], [1, 0], [1, 1]]) assert np.allclose(dmatrix("C(obj, levels=['b', 'a'])", d), [[1, 1], [1, 0], [1, 1]]) assert np.allclose(dmatrix("C(obj, levels=['a', 'b'])", d), [[1, 0], [1, 1], [1, 0]])
def Series_from_codes(codes, categories): c = pandas_Categorical_from_codes(codes, categories) return pandas.Series(c)
def test_CategoricalSniffer(): from patsy.missing import NAAction def t(NA_types, datas, exp_finish_fast, exp_levels, exp_contrast=None): sniffer = CategoricalSniffer(NAAction(NA_types=NA_types)) for data in datas: done = sniffer.sniff(data) if done: assert exp_finish_fast break else: assert not exp_finish_fast assert sniffer.levels_contrast() == (exp_levels, exp_contrast) if have_pandas_categorical: # We make sure to test with both boxed and unboxed pandas objects, # because we used to have a bug where boxed pandas objects would be # treated as categorical, but their levels would be lost... preps = [lambda x: x, C] if have_pandas_categorical_dtype: preps += [pandas.Series, lambda x: C(pandas.Series(x))] for prep in preps: t([], [prep(pandas.Categorical.from_array([1, 2, None]))], True, (1, 2)) # check order preservation t([], [prep(pandas_Categorical_from_codes([1, 0], ["a", "b"]))], True, ("a", "b")) t([], [prep(pandas_Categorical_from_codes([1, 0], ["b", "a"]))], True, ("b", "a")) # check that if someone sticks a .contrast field onto our object obj = prep(pandas.Categorical.from_array(["a", "b"])) obj.contrast = "CONTRAST" t([], [obj], True, ("a", "b"), "CONTRAST") t([], [C([1, 2]), C([3, 2])], False, (1, 2, 3)) # check order preservation t([], [C([1, 2], levels=[1, 2, 3]), C([4, 2])], True, (1, 2, 3)) t([], [C([1, 2], levels=[3, 2, 1]), C([4, 2])], True, (3, 2, 1)) # do some actual sniffing with NAs in t(["None", "NaN"], [C([1, np.nan]), C([10, None])], False, (1, 10)) # But 'None' can be a type if we don't make it represent NA: sniffer = CategoricalSniffer(NAAction(NA_types=["NaN"])) sniffer.sniff(C([1, np.nan, None])) # The level order here is different on py2 and py3 :-( Because there's no # consistent way to sort mixed-type values on both py2 and py3. Honestly # people probably shouldn't use this, but I don't know how to give a # sensible error. levels, _ = sniffer.levels_contrast() assert set(levels) == set([None, 1]) # bool special cases t(["None", "NaN"], [C([True, np.nan, None])], True, (False, True)) t([], [C([10, 20]), C([False]), C([30, 40])], False, (False, True, 10, 20, 30, 40)) # exercise the fast-path t([], [np.asarray([True, False]), ["foo"]], True, (False, True)) # check tuples too t(["None", "NaN"], [C([("b", 2), None, ("a", 1), np.nan, ("c", None)])], False, (("a", 1), ("b", 2), ("c", None))) # contrasts t([], [C([10, 20], contrast="FOO")], False, (10, 20), "FOO") # no box t([], [[10, 30], [20]], False, (10, 20, 30)) t([], [["b", "a"], ["a"]], False, ("a", "b")) # 0d t([], ["b"], False, ("b",)) from nose.tools import assert_raises # unhashable level error: sniffer = CategoricalSniffer(NAAction()) assert_raises(PatsyError, sniffer.sniff, [{}]) # >1d is illegal assert_raises(PatsyError, sniffer.sniff, np.asarray([["b"]]))
def test_CategoricalSniffer(): from patsy.missing import NAAction def t(NA_types, datas, exp_finish_fast, exp_levels, exp_contrast=None): sniffer = CategoricalSniffer(NAAction(NA_types=NA_types)) for data in datas: done = sniffer.sniff(data) if done: assert exp_finish_fast break else: assert not exp_finish_fast assert sniffer.levels_contrast() == (exp_levels, exp_contrast) if have_pandas_categorical: # We make sure to test with both boxed and unboxed pandas objects, # because we used to have a bug where boxed pandas objects would be # treated as categorical, but their levels would be lost... preps = [lambda x: x, C] if have_pandas_categorical_dtype: preps += [pandas.Series, lambda x: C(pandas.Series(x))] for prep in preps: t([], [prep(pandas.Categorical([1, 2, None]))], True, (1, 2)) # check order preservation t([], [prep(pandas_Categorical_from_codes([1, 0], ["a", "b"]))], True, ("a", "b")) t([], [prep(pandas_Categorical_from_codes([1, 0], ["b", "a"]))], True, ("b", "a")) # check that if someone sticks a .contrast field onto our object obj = prep(pandas.Categorical(["a", "b"])) obj.contrast = "CONTRAST" t([], [obj], True, ("a", "b"), "CONTRAST") t([], [C([1, 2]), C([3, 2])], False, (1, 2, 3)) # check order preservation t([], [C([1, 2], levels=[1, 2, 3]), C([4, 2])], True, (1, 2, 3)) t([], [C([1, 2], levels=[3, 2, 1]), C([4, 2])], True, (3, 2, 1)) # do some actual sniffing with NAs in t(["None", "NaN"], [C([1, np.nan]), C([10, None])], False, (1, 10)) # But 'None' can be a type if we don't make it represent NA: sniffer = CategoricalSniffer(NAAction(NA_types=["NaN"])) sniffer.sniff(C([1, np.nan, None])) # The level order here is different on py2 and py3 :-( Because there's no # consistent way to sort mixed-type values on both py2 and py3. Honestly # people probably shouldn't use this, but I don't know how to give a # sensible error. levels, _ = sniffer.levels_contrast() assert set(levels) == set([None, 1]) # bool special cases t(["None", "NaN"], [C([True, np.nan, None])], True, (False, True)) t([], [C([10, 20]), C([False]), C([30, 40])], False, (False, True, 10, 20, 30, 40)) # exercise the fast-path t([], [np.asarray([True, False]), ["foo"]], True, (False, True)) # check tuples too t(["None", "NaN"], [C([("b", 2), None, ("a", 1), np.nan, ("c", None)])], False, (("a", 1), ("b", 2), ("c", None))) # contrasts t([], [C([10, 20], contrast="FOO")], False, (10, 20), "FOO") # no box t([], [[10, 30], [20]], False, (10, 20, 30)) t([], [["b", "a"], ["a"]], False, ("a", "b")) # 0d t([], ["b"], False, ("b", )) from nose.tools import assert_raises # unhashable level error: sniffer = CategoricalSniffer(NAAction()) assert_raises(PatsyError, sniffer.sniff, [{}]) # >1d is illegal assert_raises(PatsyError, sniffer.sniff, np.asarray([["b"]]))