Example #1
0
        # ('Title', 'categorical'): titles,
        # ('Ticket_Code', 'categorical'): ticket_codes,
        # ('Ticket_Val', 'continuous'): None,
    }
    target_var = ["Age", "continuous"]

    # Only build a tree if necessary.
    if build_tree:
        # Build tree to predict age:
        f = file("data/%s_age.csv" % filename, "r")
        max_depth = 6
        root = construct(f, ind_vars, target_var, max_depth)
        f.close()

        f = file("trees/age.tree", "w")
        write_tree(root, f)
        f.close()

        # Trim tree: TODO

    # Use tree to predict age:
    # Compile list of independent variables used to predict target variable
    tree = read_tree("trees/age.tree")
    f = file("data/%s_no_age.csv" % filename, "r")
    ind_vars[("PassengerId", "continuous")] = None
    data = get_data(f, ind_vars)
    var_dict = simplify_var_dict(ind_vars, None)

    # Output target variable predictions to csv.
    f = file("predictions/ages.csv", "w")
    f.write("PassengerId,%s\n" % target_var[0])
        pass
    elif target_var and target_var[1] == 'continuous':
        datum.append(float(line[var_dict[target_var[0]]]))
    elif target_var:
        raise Exception('Invalid variable type: %s' % target_var[1])

    return datum


if __name__ == '__main__':
    f = file('data/train_titles.csv', 'r')
    ind_vars = {
        ('Sex', 'categorical'): ['male', 'female'],
        ('Pclass', 'categorical'): ['1', '2', '3'],
        ('Embarked', 'categorical'): ['S', 'C', 'Q'],
        ('Title', 'categorical'): titles,
        ('Ticket_Code', 'categorical'): ticket_codes,
        ('SibSp', 'continuous'): None,
        ('Parch', 'continuous'): None,
        ('Fare', 'continuous'): None,
        ('Ticket_Val', 'continuous'): None,
    }
    target_var = ['Survived', 'categorical', '0', '1']
    max_depth = 100
    tree = construct(f, ind_vars, target_var, max_depth)
    f.close()

    f = file('trees/temp.tree', 'w')
    write_tree(tree, f)
    f.close()