def test_entropy_constant(self): X = np.zeros((100, 1)) domain = Domain([ContinuousVariable('v1')], [DiscreteVariable('c1', values=["1"])]) table = data.Table(domain, X, X) disc = discretize.EntropyMDL() dvar = disc(table, table.domain[0]) self.assertEqual(len(dvar.values), 1) self.assertEqual(dvar.compute_value.points, [])
def test_entropy(self): X = np.array([0] * 25 + [1] * 25 + [2] * 25 + [3] * 25 ).reshape((100, 1)) Y = np.array([0] * 25 + [1] * 75) table = data.Table.from_numpy(None, X, Y) disc = discretize.EntropyMDL() dvar = disc(table, table.domain[0]) self.assertEqual(len(dvar.values), 2) self.assertEqual(dvar.compute_value.points, [0.5])
def test_entropy_with_two_values(self): s = [0] * 50 + [1] * 50 random.shuffle(s) X = np.array(s).reshape((100, 1)) table = data.Table.from_numpy(None, X, X) disc = discretize.EntropyMDL() dvar = disc(table, table.domain[0]) self.assertEqual(len(dvar.values), 2) self.assertEqual(dvar.compute_value.points, [0.5])
MDL = namedtuple("MDL", []) EqualFreq = namedtuple("EqualFreq", ["k"]) EqualWidth = namedtuple("EqualWidth", ["k"]) Remove = namedtuple("Remove", []) Custom = namedtuple("Custom", ["points"]) METHODS = [(Default, ), (Leave, ), (MDL, ), (EqualFreq, ), (EqualWidth, ), (Remove, ), (Custom, )] _dispatch = { Default: lambda m, data, var: _dispatch[type(m.method)](m.method, data, var), Leave: lambda m, data, var: var, MDL: lambda m, data, var: disc.EntropyMDL()(data, var), EqualFreq: lambda m, data, var: disc.EqualFreq(m.k)(data, var), EqualWidth: lambda m, data, var: disc.EqualWidth(m.k)(data, var), Remove: lambda m, data, var: None, Custom: lambda m, data, var: disc.Discretizer.create_discretized_var( var, m.points) } # Variable discretization state DState = namedtuple( "DState", [
def _mdl_discretization( data: Table, var: Union[ContinuousVariable, str, int]) -> Union[DiscreteVariable, str]: if not data.domain.has_discrete_class: return "no discrete class" return disc.EntropyMDL()(data, var)