Ejemplo n.º 1
0
 def test_entropy_constant(self):
     X = np.zeros((100, 1))
     domain = Domain([ContinuousVariable('v1')],
                     [DiscreteVariable('c1', values=["1"])])
     table = data.Table(domain, X, X)
     disc = discretize.EntropyMDL()
     dvar = disc(table, table.domain[0])
     self.assertEqual(len(dvar.values), 1)
     self.assertEqual(dvar.compute_value.points, [])
Ejemplo n.º 2
0
 def test_entropy(self):
     X = np.array([0] * 25 + [1] * 25 + [2] * 25 + [3] * 25
                 ).reshape((100, 1))
     Y = np.array([0] * 25 + [1] * 75)
     table = data.Table.from_numpy(None, X, Y)
     disc = discretize.EntropyMDL()
     dvar = disc(table, table.domain[0])
     self.assertEqual(len(dvar.values), 2)
     self.assertEqual(dvar.compute_value.points, [0.5])
Ejemplo n.º 3
0
 def test_entropy_with_two_values(self):
     s = [0] * 50 + [1] * 50
     random.shuffle(s)
     X = np.array(s).reshape((100, 1))
     table = data.Table.from_numpy(None, X, X)
     disc = discretize.EntropyMDL()
     dvar = disc(table, table.domain[0])
     self.assertEqual(len(dvar.values), 2)
     self.assertEqual(dvar.compute_value.points, [0.5])
Ejemplo n.º 4
0
MDL = namedtuple("MDL", [])
EqualFreq = namedtuple("EqualFreq", ["k"])
EqualWidth = namedtuple("EqualWidth", ["k"])
Remove = namedtuple("Remove", [])
Custom = namedtuple("Custom", ["points"])

METHODS = [(Default, ), (Leave, ), (MDL, ), (EqualFreq, ), (EqualWidth, ),
           (Remove, ), (Custom, )]

_dispatch = {
    Default:
    lambda m, data, var: _dispatch[type(m.method)](m.method, data, var),
    Leave:
    lambda m, data, var: var,
    MDL:
    lambda m, data, var: disc.EntropyMDL()(data, var),
    EqualFreq:
    lambda m, data, var: disc.EqualFreq(m.k)(data, var),
    EqualWidth:
    lambda m, data, var: disc.EqualWidth(m.k)(data, var),
    Remove:
    lambda m, data, var: None,
    Custom:
    lambda m, data, var: disc.Discretizer.create_discretized_var(
        var, m.points)
}

# Variable discretization state
DState = namedtuple(
    "DState",
    [
Ejemplo n.º 5
0
def _mdl_discretization(
        data: Table,
        var: Union[ContinuousVariable, str, int]) -> Union[DiscreteVariable, str]:
    if not data.domain.has_discrete_class:
        return "no discrete class"
    return disc.EntropyMDL()(data, var)