EqualWidth = namedtuple("EqualWidth", ["k"]) Remove = namedtuple("Remove", []) Custom = namedtuple("Custom", ["points"]) METHODS = [(Default, ), (Leave, ), (MDL, ), (EqualFreq, ), (EqualWidth, ), (Remove, ), (Custom, )] _dispatch = { Default: lambda m, data, var: _dispatch[type(m.method)](m.method, data, var), Leave: lambda m, data, var: var, MDL: lambda m, data, var: disc.EntropyMDL()(data, var), EqualFreq: lambda m, data, var: disc.EqualFreq(m.k)(data, var), EqualWidth: lambda m, data, var: disc.EqualWidth(m.k)(data, var), Remove: lambda m, data, var: None, Custom: lambda m, data, var: disc.Discretizer.create_discretized_var( var, m.points) } # Variable discretization state DState = namedtuple( "DState", [ "method", # discretization method "points", # induced cut points
()), MethodDesc(Methods.Keep, "Keep numeric", "keep", "Keep the variable as is", lambda data, var: var, ()), MethodDesc(Methods.MDL, "Entropy vs. MDL", "entropy", "Split values until MDL exceeds the entropy (Fayyad-Irani)\n" "(requires discrete class variable)", _mdl_discretization, ()), MethodDesc(Methods.EqualFreq, "Equal frequency, intervals: ", "equal freq, k={}", "Create bins with same number of instances", lambda data, var, k: disc.EqualFreq(k)(data, var), ("freq_spin", )), MethodDesc(Methods.EqualWidth, "Equal width, intervals: ", "equal width, k={}", "Create bins of the same width", lambda data, var, k: disc.EqualWidth(k)(data, var), ("width_spin", )), MethodDesc(Methods.Remove, "Remove", "remove", "Remove variable", lambda *_: None, ()), MethodDesc(Methods.Binning, "Natural binning, desired bins: ", "binning, desired={}", "Create bins with nice thresholds; " "try matching desired number of bins",