def testTennis(self):
     """
     Test entire program on the tennis data set
     """
     tennis = mu.extract_data('tennis.csv')
     tennis = mu.enhance_data(tennis)
     dt = decisionTree.DecisionTree(tennis['feature_dict'], tennis['feature_names'])
     dt.fit(tennis['data'],tennis['target'])
     for x,y in zip(tennis['data'],tennis['target']):
         self.assertEquals(dt.predict([x]), [y])
     self.assertEquals(dt.predict(tennis['data']), tennis['target'])
Esempio n. 2
0

if __name__ == '__main__':
    #parse the command line arguments
    parser = argparse.ArgumentParser()
    parser.add_argument("train_file",
                        help="Name of file with training data",
                        type=str)
    parser.add_argument("-k", help="number of folds", type=int, default=5)
    parser.add_argument(
        "--ibm",
        help="Flag to indicate that input is IBM data, else plain CSV",
        action="store_true")
    parser.add_argument("--y_col",
                        help="name of column containing target",
                        type=str)
    args = parser.parse_args()

    #for you to add is logic for handling the --y_col flag if given (for tennis, for example)
    if args.ibm:
        data = joblib.load(args.train_file)
    else:
        if not args.y_col:
            data = mlUtil.extract_data(args.train_file)
        else:
            data = mlUtil.extract_data(fileName=args.train_file,
                                       targetInfo=args.y_col)
    data = mlUtil.enhance_data(data)

    print k_fold_eval(data, args.k)
        print indent+"+-"+val+'-- <'+root.attribute+'>'
        print indent+"{"
        for k in root.children.keys():
            printTree(root.children[k],k,indentNum+1)
        print indent+"}"



if __name__ == '__main__':
    #parse the command line arguments
    parser = argparse.ArgumentParser()
    parser.add_argument("train_file", help="Name of file with training data", type=str)
    parser.add_argument("--y_col", help="name of column containing target", type=str)
    parser.add_argument("--ibm", help="Flag to indicate that input is IBM data, else plain CSV", action="store_true")
    args = parser.parse_args()

    #for you to add is logic for handling the --y_col flag if given (for tennis, for example)
    if args.ibm:
        data = joblib.load(args.train_file)
    else:
        data = mlUtil.extract_data(args.train_file)
    data = mlUtil.enhance_data(data)

    #will need some args in constructor
    tree = DecisionTree('***YOU ADD ARGUMENTS HERE***')
    tree.fit(data['data'], data['target'])
    #pritnTree(tree.clf)
    #test on training data
    tree.predict(data['data'])