Example #1
0
def mine_c45(table, result):
    """ An entry point for C45 algorithm.

        _table_ - a dict representing data table in the following format:
        {
            "<column name>': [<column values>],
            "<column name>': [<column values>],
            ...
        }

        _result_: a string representing a name of column indicating a result.
    """
    col = max([(k, gain(table, k, result))
               for k in table.keys() if k != result],
              key=lambda x: x[1])[0]
    tree = []
    for subt in get_subtables(table, col):
        v = subt[col][0]
        if is_mono(subt[result]):
            tree.append(
                ['%s=%s' % (col, v),
                 '%s=%s' % (result, subt[result][0])])
        else:
            del subt[col]
            tree.append(['%s=%s' % (col, v)] + mine_c45(subt, result))
    return tree
Example #2
0
def infox(table, col, res_col):
    """ Calculates the entropy of the table _table_
        after dividing it on the subtables by column _col_.
    """
    s = 0 # sum
    for subt in utils.get_subtables(table, col):
        s += (float(len(subt[col])) / len(table[col])) * info(subt, res_col)
    return s
Example #3
0
def infox(table, col, res_col):
    """ Calculates the entropy of the table _table_
        after dividing it on the subtables by column _col_.
    """
    s = 0  # sum
    for subt in utils.get_subtables(table, col):
        s += (float(len(subt[col])) / len(table[col])) * info(subt, res_col)
    return s
Example #4
0
 def test_get_subtables(self):
     expected = [{
         'result': ['yes', 'no'],
         'arg1': ['left', 'left'],
         'arg2': ['down', 'up'],
         'arg3': ['no', 'yes'],
     }, {
         'result': ['yes', 'no'],
         'arg1': ['right', 'right'],
         'arg2': ['down', 'down'],
         'arg3': ['yes', 'no'],
     }]
     self.assertEqual(utils.get_subtables(self.table, 'arg1'), expected)
Example #5
0
 def test_get_subtables(self):
     expected = [
     {   'result': ['yes', 'no'],
         'arg1': ['left', 'left'],
         'arg2': ['down', 'up'],
         'arg3': ['no', 'yes'],
     },
     {   'result': ['yes', 'no'],
         'arg1': ['right', 'right'],
         'arg2': ['down', 'down'],
         'arg3': ['yes', 'no'],
     }]
     self.assertEqual(utils.get_subtables(self.table, 'arg1'), expected)
Example #6
0
def mine_c45(table, result):
    col = max([(k, gain(table, k, result)) for k in table.keys() if k != result],
              key=lambda x: x[1])[0]
    tree = []
    for subt in get_subtables(table, col):
        v = subt[col][0]
        if is_mono(subt[result]):
            tree.append(['%s=%s' % (col, v),
                         '%s=%s' % (result, subt[result][0])])
        else:
            del subt[col]
            tree.append(['%s=%s' % (col, v)] + mine_c45(subt, result))
    return tree
Example #7
0
def mine_c45(table, result):
    col = max([(k, gain(table, k, result))
               for k in table.keys() if k != result],
              key=lambda x: x[1])[0]
    tree = []
    for subt in get_subtables(table, col):
        v = subt[col][0]
        if is_mono(subt[result]):
            tree.append(
                ['%s=%s' % (col, v),
                 '%s=%s' % (result, subt[result][0])])
        else:
            del subt[col]
            tree.append(['%s=%s' % (col, v)] + mine_c45(subt, result))
    return tree
Example #8
0
def mine_c45(table, result):
    """ An entry point for C45 algorithm.

        _table_ - a dict representing data table in the following format:
        {
            "<column name>': [<column values>],
            "<column name>': [<column values>],
            ...
        }

        _result_: a string representing a name of column indicating a result.
    """
    tree = []

    # Special case when there is a mixed strategy
    if len(table.keys()) == 1:
        key_distr = get_distribution(table[result])
        for k in key_distr[1].keys():
            tree.append([
                'probability=%f' % (key_distr[1][k] / key_distr[0]),
                '%s=%s' % (result, k)
            ])

        return tree

    # All other cases
    col = max([(k, gain(table, k, result))
               for k in table.keys() if k != result],
              key=lambda x: x[1])[0]
    for subt in get_subtables(table, col):
        v = subt[col][0]
        if is_mono(subt[result]):
            tree.append(
                ['%s=%s' % (col, v),
                 '%s=%s' % (result, subt[result][0])])
        else:
            del subt[col]
            tree.append(['%s=%s' % (col, v)] + mine_c45(subt, result))

    return tree
Example #9
0
def mine_c45(table, result):
    """ An entry point for C45 algorithm.

        _table_ - a dict representing data table in the following format:
        {
            "<column name>': [<column values>],
            "<column name>': [<column values>],
            ...
        }

        _result_: a string representing a name of column indicating a result.
    """
    col = max([(k, gain(table, k, result)) for k in table.keys() if k != result],
              key=lambda x: x[1])[0]
    tree = []
    for subt in get_subtables(table, col):
        v = subt[col][0]
        if is_mono(subt[result]):
            tree.append(['%s=%s' % (col, v),
                         '%s=%s' % (result, subt[result][0])])
        else:
            del subt[col]
            tree.append(['%s=%s' % (col, v)] + mine_c45(subt, result))
    return tree
Example #10
0
def mine_c45(table, result):
    """ An entry point for C45 algorithm.

        _table_ - a dict representing data table in the following format:
        {
            "<column name>': [<column values>],
            "<column name>': [<column values>],
            ...
        }

        _result_: a string representing a name of column indicating a result.
    """
    tree = []
    
    # Special case when there is a mixed strategy
    if len(table.keys()) == 1:
        key_distr = get_distribution(table[result])
        for k in key_distr[1].keys():
            tree.append(['probability=%f' % (key_distr[1][k] / key_distr[0]),
                         '%s=%s' % (result, k)])
        
        return tree
    
    # All other cases
    col = max([(k, gain(table, k, result)) for k in table.keys() if k != result],
              key=lambda x: x[1])[0]
    for subt in get_subtables(table, col):
        v = subt[col][0]
        if is_mono(subt[result]):
            tree.append(['%s=%s' % (col, v),
                         '%s=%s' % (result, subt[result][0])])
        else:
            del subt[col]
            tree.append(['%s=%s' % (col, v)] + mine_c45(subt, result))
    
    return tree
Example #11
0
def info_x(table, col, res_col):
    return sum(
        len(subtable[col]) / len(table[col]) * info(subtable, res_col)
        for subtable in utils.get_subtables(table, col))
Example #12
0
 def test_get_subtables(self):
     expected = [
         {"result": ["yes", "no"], "arg1": ["left", "left"], "arg2": ["down", "up"], "arg3": ["no", "yes"]},
         {"result": ["yes", "no"], "arg1": ["right", "right"], "arg2": ["down", "down"], "arg3": ["yes", "no"]},
     ]
     self.assertEquals(utils.get_subtables(self.table, "arg1"), expected)