Ejemplo n.º 1
0
def mine_c45(table, result):
    """ An entry point for C45 algorithm.

        _table_ - a dict representing data table in the following format:
        {
            "<column name>': [<column values>],
            "<column name>': [<column values>],
            ...
        }

        _result_: a string representing a name of column indicating a result.
    """
    col = max([(k, gain(table, k, result))
               for k in table.keys() if k != result],
              key=lambda x: x[1])[0]
    tree = []
    for subt in get_subtables(table, col):
        v = subt[col][0]
        if is_mono(subt[result]):
            tree.append(
                ['%s=%s' % (col, v),
                 '%s=%s' % (result, subt[result][0])])
        else:
            del subt[col]
            tree.append(['%s=%s' % (col, v)] + mine_c45(subt, result))
    return tree
Ejemplo n.º 2
0
def mine_c45(table, result):
    col = max([(k, gain(table, k, result)) for k in table.keys() if k != result],
              key=lambda x: x[1])[0]
    tree = []
    for subt in get_subtables(table, col):
        v = subt[col][0]
        if is_mono(subt[result]):
            tree.append(['%s=%s' % (col, v),
                         '%s=%s' % (result, subt[result][0])])
        else:
            del subt[col]
            tree.append(['%s=%s' % (col, v)] + mine_c45(subt, result))
    return tree
Ejemplo n.º 3
0
def mine_c45(table, result):
    col = max([(k, gain(table, k, result))
               for k in table.keys() if k != result],
              key=lambda x: x[1])[0]
    tree = []
    for subt in get_subtables(table, col):
        v = subt[col][0]
        if is_mono(subt[result]):
            tree.append(
                ['%s=%s' % (col, v),
                 '%s=%s' % (result, subt[result][0])])
        else:
            del subt[col]
            tree.append(['%s=%s' % (col, v)] + mine_c45(subt, result))
    return tree
Ejemplo n.º 4
0
def mine_c45(table, result):
    """ An entry point for C45 algorithm.

        _table_ - a dict representing data table in the following format:
        {
            "<column name>': [<column values>],
            "<column name>': [<column values>],
            ...
        }

        _result_: a string representing a name of column indicating a result.
    """
    tree = []

    # Special case when there is a mixed strategy
    if len(table.keys()) == 1:
        key_distr = get_distribution(table[result])
        for k in key_distr[1].keys():
            tree.append([
                'probability=%f' % (key_distr[1][k] / key_distr[0]),
                '%s=%s' % (result, k)
            ])

        return tree

    # All other cases
    col = max([(k, gain(table, k, result))
               for k in table.keys() if k != result],
              key=lambda x: x[1])[0]
    for subt in get_subtables(table, col):
        v = subt[col][0]
        if is_mono(subt[result]):
            tree.append(
                ['%s=%s' % (col, v),
                 '%s=%s' % (result, subt[result][0])])
        else:
            del subt[col]
            tree.append(['%s=%s' % (col, v)] + mine_c45(subt, result))

    return tree
Ejemplo n.º 5
0
def mine_c45(table, result):
    """ An entry point for C45 algorithm.

        _table_ - a dict representing data table in the following format:
        {
            "<column name>': [<column values>],
            "<column name>': [<column values>],
            ...
        }

        _result_: a string representing a name of column indicating a result.
    """
    col = max([(k, gain(table, k, result)) for k in table.keys() if k != result],
              key=lambda x: x[1])[0]
    tree = []
    for subt in get_subtables(table, col):
        v = subt[col][0]
        if is_mono(subt[result]):
            tree.append(['%s=%s' % (col, v),
                         '%s=%s' % (result, subt[result][0])])
        else:
            del subt[col]
            tree.append(['%s=%s' % (col, v)] + mine_c45(subt, result))
    return tree
Ejemplo n.º 6
0
def mine_c45(table, result):
    """ An entry point for C45 algorithm.

        _table_ - a dict representing data table in the following format:
        {
            "<column name>': [<column values>],
            "<column name>': [<column values>],
            ...
        }

        _result_: a string representing a name of column indicating a result.
    """
    tree = []
    
    # Special case when there is a mixed strategy
    if len(table.keys()) == 1:
        key_distr = get_distribution(table[result])
        for k in key_distr[1].keys():
            tree.append(['probability=%f' % (key_distr[1][k] / key_distr[0]),
                         '%s=%s' % (result, k)])
        
        return tree
    
    # All other cases
    col = max([(k, gain(table, k, result)) for k in table.keys() if k != result],
              key=lambda x: x[1])[0]
    for subt in get_subtables(table, col):
        v = subt[col][0]
        if is_mono(subt[result]):
            tree.append(['%s=%s' % (col, v),
                         '%s=%s' % (result, subt[result][0])])
        else:
            del subt[col]
            tree.append(['%s=%s' % (col, v)] + mine_c45(subt, result))
    
    return tree
Ejemplo n.º 7
0
 def test_gain(self):
     self.assertEqual(gain(self.table, 'arg1', 'result'), 0)
Ejemplo n.º 8
0
 def test_gain(self):
     self.assertEqual(gain(self.table, 'arg1', 'result'), 0)