def testTennis(self):
     """
     Test entire program on the tennis data set
     """
     tennis = mu.extract_data('tennis.csv')
     tennis = mu.enhance_data(tennis)
     dt = decisionTree.DecisionTree(tennis['feature_dict'], tennis['feature_names'])
     dt.fit(tennis['data'],tennis['target'])
     for x,y in zip(tennis['data'],tennis['target']):
         self.assertEquals(dt.predict([x]), [y])
     self.assertEquals(dt.predict(tennis['data']), tennis['target'])
Exemple #2
0

if __name__ == '__main__':
    #parse the command line arguments
    parser = argparse.ArgumentParser()
    parser.add_argument("train_file",
                        help="Name of file with training data",
                        type=str)
    parser.add_argument("-k", help="number of folds", type=int, default=5)
    parser.add_argument(
        "--ibm",
        help="Flag to indicate that input is IBM data, else plain CSV",
        action="store_true")
    parser.add_argument("--y_col",
                        help="name of column containing target",
                        type=str)
    args = parser.parse_args()

    #for you to add is logic for handling the --y_col flag if given (for tennis, for example)
    if args.ibm:
        data = joblib.load(args.train_file)
    else:
        if not args.y_col:
            data = mlUtil.extract_data(args.train_file)
        else:
            data = mlUtil.extract_data(fileName=args.train_file,
                                       targetInfo=args.y_col)
    data = mlUtil.enhance_data(data)

    print k_fold_eval(data, args.k)
Exemple #3
0
    #calculate the average for each value
    v_precision = sum(v_precisions)/k
    v_recall = sum(v_recalls)/k
    v_accuracy = sum(v_accuracys)/k

    result_dict["test_precision"] = v_precision
    result_dict["test_recall"] = v_recall
    result_dict["test_accuracy"] = v_accuracy

    t_precision = sum(t_precisions)/k
    t_recall = sum(t_recalls)/k
    t_accuracy = sum(t_accuracys)/k

    result_dict["train_precision"] = t_precision
    result_dict["train_recall"] = t_recall
    result_dict["train_accuracy"] = t_accuracy

    return result_dict

if __name__ == '__main__':
    #data = joblib.load("tgmc_stripReal_subset.pkl")
    data = mlUtil.extract_data("nursery.csv")
    data = mlUtil.enhance_data(data)

    print k_fold_eval(data, 5)




        print indent+"+-"+val+'-- <'+root.attribute+'>'
        print indent+"{"
        for k in root.children.keys():
            printTree(root.children[k],k,indentNum+1)
        print indent+"}"



if __name__ == '__main__':
    #parse the command line arguments
    parser = argparse.ArgumentParser()
    parser.add_argument("train_file", help="Name of file with training data", type=str)
    parser.add_argument("--y_col", help="name of column containing target", type=str)
    parser.add_argument("--ibm", help="Flag to indicate that input is IBM data, else plain CSV", action="store_true")
    args = parser.parse_args()

    #for you to add is logic for handling the --y_col flag if given (for tennis, for example)
    if args.ibm:
        data = joblib.load(args.train_file)
    else:
        data = mlUtil.extract_data(args.train_file)
    data = mlUtil.enhance_data(data)

    #will need some args in constructor
    tree = DecisionTree('***YOU ADD ARGUMENTS HERE***')
    tree.fit(data['data'], data['target'])
    #pritnTree(tree.clf)
    #test on training data
    tree.predict(data['data'])

 def testSelAttRest(self):
     dt = decisionTree.DecisionTree()
     rest = mu.extract_data('restaurant.csv')
     attrib = dt.selectAttribute(rest['data'], rest['target'])
     self.assertEquals(attrib, 4)
 def testSelAttTenn(self):
     dt = decisionTree.DecisionTree()
     tennis = mu.extract_data('tennis.csv')
     attrib = dt.selectAttribute(tennis['data'], tennis['target'])
     self.assertEquals(attrib, 0)

if __name__ == '__main__':
    #parse the command line arguments
    parser = argparse.ArgumentParser()
    parser.add_argument("train_file",
                        help="Name of file with training data",
                        type=str)
    parser.add_argument("--y_col",
                        help="name of column containing target",
                        type=str)
    parser.add_argument(
        "--ibm",
        help="Flag to indicate that input is IBM data, else plain CSV",
        action="store_true")
    args = parser.parse_args()

    #for you to add is logic for handling the --y_col flag if given (for tennis, for example)
    if args.ibm:
        data = joblib.load(args.train_file)
    else:
        data = mlUtil.extract_data(args.train_file)
    data = mlUtil.enhance_data(data)

    #will need some args in constructor
    tree = DecisionTree('***YOU ADD ARGUMENTS HERE***')
    tree.fit(data['data'], data['target'])
    #pritnTree(tree.clf)
    #test on training data
    tree.predict(data['data'])
Exemple #8
0
def printTree(root, val='Tree', indentNum=0):
    """ For printing the decision tree in a nice format
        Usage: printTree(rootNode)
    """
    indent = "\t" * indentNum
    if root.is_leaf():
        print indent + "+-" + str(val) + '-- ' + root.value

    else:
        print indent + "+-" + str(val) + '-- <' + root.attribute + '>'
        print indent + "{"
        for k in root.children.keys():
            printTree(root.children[k], k, indentNum + 1)
        print indent + "}"


if __name__ == '__main__':
    #parse the command line arguments

    data = mlUtil.extract_data("lymphography.csv")
    data = mlUtil.enhance_data(data)

    tree = DecisionTree(attrib_d=data['feature_dict'],
                        attribs=data['feature_names'],
                        default_v="default")
    tree.fit(data['data'], data['target'])
    printTree(tree.clf)
    #test on training data
    print data['target']
    print tree.predict(data['data'])