예제 #1
0
        return list_columns
    else:
        return [columns for _ in range(k)]


if __name__ == '__main__':
    args = parse()
    data = pd.read_csv(args.path, sep=args.sep)
    if args.numeric_attributes:
        data = normalizer.min_max(data, args.numeric_attributes, True)
    list_train, list_test = splitter.cross_validation(data, args.target, 2)
    results = []
    for train_data, test_data in zip(list_train, list_test):
        fold_result = []
        sets = splitter.bootstrap(train_data, N_TREE)
        roots = []
        columns = list(train_data.columns[:-1])
        list_columns = random_columns(columns, n_max=len(columns) - 1)
        for i in range(N_TREE):
            roots.append(dt.DecisionNode(sets[i][0], args.target))
            roots[-1].fit(list_columns[i].copy())
        list_results = []
        for test_index in range(test_data.shape[0]):
            for i in range(N_TREE):
                list_results.append(roots[i].test(test_data.iloc[test_index]))
            fold_result.append(
                (test_data.iloc[test_index][args.target],
                 pd.Series(list_results).value_counts().index[0]))
        results.append(fold_result)
    print(results)