예제 #1
0
def main():
    args = sys.argv
    if not args_are_valid(args):
        sys.exit()

    div_funct_name = args[1]
    parser = InputParser(args[2], args[3], args[4])

    trained_tree = decision_tree_builder.learn_tree(parser.features,
                                                    parser.possible_classification_values,
                                                    div_funct_name,
                                                    parser.train_data)
    train_num_correct = classify_all(trained_tree, parser.train_data)
    test_num_correct = classify_all(trained_tree, parser.test_data)

    report_outcome(trained_tree, div_funct_name, train_num_correct, test_num_correct,
                   len(parser.train_data), len(parser.test_data))
예제 #2
0
def main():
    args = sys.argv
    if not args_are_valid(args):
        sys.exit()

    div_funct_name = args[1]
    parser = InputParser(args[2], args[3], args[4])

    trained_tree = decision_tree_builder.learn_tree(
        parser.features, parser.possible_classification_values, div_funct_name,
        parser.train_data)
    train_num_correct = classify_all(trained_tree, parser.train_data)
    test_num_correct = classify_all(trained_tree, parser.test_data)

    report_outcome(trained_tree, div_funct_name, train_num_correct,
                   test_num_correct, len(parser.train_data),
                   len(parser.test_data))
def main():
    train_data = diversity_experiment.get_nursery_data()
    shuffle(train_data.examples)
    outcomes = {'unpruned': [],
                'conservative': [],
                'liberal': []}

    for i in range(0, K):
        # Split into training, test, and validation sets
        training_set, the_rest = diversity_experiment.split(train_data.examples, i, K)
        test_set, validation_set = validation_split(the_rest)

        # Learn the original tree
        unpruned = decision_tree_builder.learn_tree(train_data.features,
                                                    train_data.class_vals,
                                                    DIV_FUNCT,
                                                    training_set)

        # Make pruned versions too
        print "Pruning conservatively"
        conservative_tree = TreePruner(copy.deepcopy(unpruned), 'conservative', validation_set).root

        print "Pruning liberally."
        liberal_tree = TreePruner(copy.deepcopy(unpruned), 'liberal', validation_set).root

        trees = {
            'unpruned': unpruned,
            'liberal':  liberal_tree,
            'conservative':  conservative_tree
        }

        # Test ALL the trees!
        for tree in trees:
            outcomes[tree].append(evaluate(trees[tree], test_set))

    pairwise = [
        ('unpruned', 'liberal'),
        ('unpruned', 'conservative'),
        ('liberal', 'conservative')
    ]

    # Get t-statistics
    diversity_experiment.analyze(outcomes, pairwise, [], 'prune.csv', K)
def main():
    train_data = diversity_experiment.get_nursery_data()
    shuffle(train_data.examples)
    outcomes = {'unpruned': [], 'conservative': [], 'liberal': []}

    for i in range(0, K):
        # Split into training, test, and validation sets
        training_set, the_rest = diversity_experiment.split(
            train_data.examples, i, K)
        test_set, validation_set = validation_split(the_rest)

        # Learn the original tree
        unpruned = decision_tree_builder.learn_tree(train_data.features,
                                                    train_data.class_vals,
                                                    DIV_FUNCT, training_set)

        # Make pruned versions too
        print "Pruning conservatively"
        conservative_tree = TreePruner(copy.deepcopy(unpruned), 'conservative',
                                       validation_set).root

        print "Pruning liberally."
        liberal_tree = TreePruner(copy.deepcopy(unpruned), 'liberal',
                                  validation_set).root

        trees = {
            'unpruned': unpruned,
            'liberal': liberal_tree,
            'conservative': conservative_tree
        }

        # Test ALL the trees!
        for tree in trees:
            outcomes[tree].append(evaluate(trees[tree], test_set))

    pairwise = [('unpruned', 'liberal'), ('unpruned', 'conservative'),
                ('liberal', 'conservative')]

    # Get t-statistics
    diversity_experiment.analyze(outcomes, pairwise, [], 'prune.csv', K)
예제 #5
0
 def test_tree(train_examples, test_examples):
     tree = learn_tree(train_data.features, train_data.class_vals,
                       funct, train_examples)
     return classify_all(tree, test_examples)