Beispiel #1
0
 def t(NA_types, datas, exp_finish_fast, exp_levels, exp_contrast=None):
     sniffer = CategoricalSniffer(NAAction(NA_types=NA_types))
     for data in datas:
         done = sniffer.sniff(data)
         if done:
             assert exp_finish_fast
             break
         else:
             assert not exp_finish_fast
     assert sniffer.levels_contrast() == (exp_levels, exp_contrast)
Beispiel #2
0
def _examine_factor_types(factors, factor_states, data_iter_maker, NA_action):
    num_column_counts = {}
    cat_sniffers = {}
    examine_needed = set(factors)
    for data in data_iter_maker():
        for factor in list(examine_needed):
            value = factor.eval(factor_states[factor], data)
            if factor in cat_sniffers or guess_categorical(value):
                if factor not in cat_sniffers:
                    cat_sniffers[factor] = CategoricalSniffer(NA_action,
                                                              factor.origin)
                done = cat_sniffers[factor].sniff(value)
                if done:
                    examine_needed.remove(factor)
            else:
                # Numeric
                value = atleast_2d_column_default(value)
                _max_allowed_dim(2, value, factor)
                column_count = value.shape[1]
                num_column_counts[factor] = column_count
                examine_needed.remove(factor)
        if not examine_needed:
            break
    # Pull out the levels
    cat_levels_contrasts = {}
    for factor, sniffer in six.iteritems(cat_sniffers):
        cat_levels_contrasts[factor] = sniffer.levels_contrast()
    return (num_column_counts, cat_levels_contrasts)
Beispiel #3
0
def test_CategoricalSniffer():
    patch_patsy()

    from patsy.categorical import CategoricalSniffer

    def t(NA_types, datas, exp_finish_fast, exp_levels, exp_contrast=None):
        sniffer = CategoricalSniffer(NAAction(NA_types=NA_types))
        for data in datas:
            done = sniffer.sniff(data)
            if done:
                assert exp_finish_fast
                break
            else:
                assert not exp_finish_fast
        assert sniffer.levels_contrast() == (exp_levels, exp_contrast)

    t([], [pd.Categorical.from_array([1, 2, None])],
      True, (1, 2))
    # check order preservation
    t([], [pd.Categorical([1, 0], ["a", "b"])],
      True, ("a", "b"))
    t([], [pd.Categorical([1, 0], ["b", "a"])],
      True, ("b", "a"))
    # check that if someone sticks a .contrast field onto a Categorical
    # object, we pick it up:
    c = pd.Categorical.from_array(["a", "b"])
    c.contrast = "CONTRAST"
    t([], [c], True, ("a", "b"), "CONTRAST")

    t([], [C([1, 2]), C([3, 2])], False, (1, 2, 3))
    # check order preservation
    t([], [C([1, 2], levels=[1, 2, 3]), C([4, 2])], True, (1, 2, 3))
    t([], [C([1, 2], levels=[3, 2, 1]), C([4, 2])], True, (3, 2, 1))

    # do some actual sniffing with NAs in
    t(["None", "NaN"], [C([1, np.nan]), C([10, None])],
      False, (1, 10))
    # But 'None' can be a type if we don't make it represent NA:
    sniffer = CategoricalSniffer(NAAction(NA_types=["NaN"]))
    sniffer.sniff(C([1, np.nan, None]))
    # The level order here is different on py2 and py3 :-( Because there's no
    # consistent way to sort mixed-type values on both py2 and py3. Honestly
    # people probably shouldn't use this, but I don't know how to give a
    # sensible error.
    levels, _ = sniffer.levels_contrast()
    assert set(levels) == set([None, 1])

    # bool special case
    t(["None", "NaN"], [C([True, np.nan, None])],
      True, (False, True))
    t([], [C([10, 20]), C([False]), C([30, 40])],
      False, (False, True, 10, 20, 30, 40))

    # check tuples too
    t(["None", "NaN"], [C([("b", 2), None, ("a", 1), np.nan, ("c", None)])],
      False, (("a", 1), ("b", 2), ("c", None)))

    # contrasts
    t([], [C([10, 20], contrast="FOO")], False, (10, 20), "FOO")

    # unhashable level error:
    sniffer = CategoricalSniffer(NAAction())
    pytest.raises(PatsyError, sniffer.sniff, [{}])