def mine_c45(table, result): """ An entry point for C45 algorithm. _table_ - a dict representing data table in the following format: { "<column name>': [<column values>], "<column name>': [<column values>], ... } _result_: a string representing a name of column indicating a result. """ col = max([(k, gain(table, k, result)) for k in table.keys() if k != result], key=lambda x: x[1])[0] tree = [] for subt in get_subtables(table, col): v = subt[col][0] if is_mono(subt[result]): tree.append( ['%s=%s' % (col, v), '%s=%s' % (result, subt[result][0])]) else: del subt[col] tree.append(['%s=%s' % (col, v)] + mine_c45(subt, result)) return tree
def mine_c45(table, result): col = max([(k, gain(table, k, result)) for k in table.keys() if k != result], key=lambda x: x[1])[0] tree = [] for subt in get_subtables(table, col): v = subt[col][0] if is_mono(subt[result]): tree.append(['%s=%s' % (col, v), '%s=%s' % (result, subt[result][0])]) else: del subt[col] tree.append(['%s=%s' % (col, v)] + mine_c45(subt, result)) return tree
def mine_c45(table, result): col = max([(k, gain(table, k, result)) for k in table.keys() if k != result], key=lambda x: x[1])[0] tree = [] for subt in get_subtables(table, col): v = subt[col][0] if is_mono(subt[result]): tree.append( ['%s=%s' % (col, v), '%s=%s' % (result, subt[result][0])]) else: del subt[col] tree.append(['%s=%s' % (col, v)] + mine_c45(subt, result)) return tree
def mine_c45(table, result): """ An entry point for C45 algorithm. _table_ - a dict representing data table in the following format: { "<column name>': [<column values>], "<column name>': [<column values>], ... } _result_: a string representing a name of column indicating a result. """ tree = [] # Special case when there is a mixed strategy if len(table.keys()) == 1: key_distr = get_distribution(table[result]) for k in key_distr[1].keys(): tree.append([ 'probability=%f' % (key_distr[1][k] / key_distr[0]), '%s=%s' % (result, k) ]) return tree # All other cases col = max([(k, gain(table, k, result)) for k in table.keys() if k != result], key=lambda x: x[1])[0] for subt in get_subtables(table, col): v = subt[col][0] if is_mono(subt[result]): tree.append( ['%s=%s' % (col, v), '%s=%s' % (result, subt[result][0])]) else: del subt[col] tree.append(['%s=%s' % (col, v)] + mine_c45(subt, result)) return tree
def mine_c45(table, result): """ An entry point for C45 algorithm. _table_ - a dict representing data table in the following format: { "<column name>': [<column values>], "<column name>': [<column values>], ... } _result_: a string representing a name of column indicating a result. """ col = max([(k, gain(table, k, result)) for k in table.keys() if k != result], key=lambda x: x[1])[0] tree = [] for subt in get_subtables(table, col): v = subt[col][0] if is_mono(subt[result]): tree.append(['%s=%s' % (col, v), '%s=%s' % (result, subt[result][0])]) else: del subt[col] tree.append(['%s=%s' % (col, v)] + mine_c45(subt, result)) return tree
def mine_c45(table, result): """ An entry point for C45 algorithm. _table_ - a dict representing data table in the following format: { "<column name>': [<column values>], "<column name>': [<column values>], ... } _result_: a string representing a name of column indicating a result. """ tree = [] # Special case when there is a mixed strategy if len(table.keys()) == 1: key_distr = get_distribution(table[result]) for k in key_distr[1].keys(): tree.append(['probability=%f' % (key_distr[1][k] / key_distr[0]), '%s=%s' % (result, k)]) return tree # All other cases col = max([(k, gain(table, k, result)) for k in table.keys() if k != result], key=lambda x: x[1])[0] for subt in get_subtables(table, col): v = subt[col][0] if is_mono(subt[result]): tree.append(['%s=%s' % (col, v), '%s=%s' % (result, subt[result][0])]) else: del subt[col] tree.append(['%s=%s' % (col, v)] + mine_c45(subt, result)) return tree
def test_gain(self): self.assertEqual(gain(self.table, 'arg1', 'result'), 0)