return list_columns else: return [columns for _ in range(k)] if __name__ == '__main__': args = parse() data = pd.read_csv(args.path, sep=args.sep) if args.numeric_attributes: data = normalizer.min_max(data, args.numeric_attributes, True) list_train, list_test = splitter.cross_validation(data, args.target, 2) results = [] for train_data, test_data in zip(list_train, list_test): fold_result = [] sets = splitter.bootstrap(train_data, N_TREE) roots = [] columns = list(train_data.columns[:-1]) list_columns = random_columns(columns, n_max=len(columns) - 1) for i in range(N_TREE): roots.append(dt.DecisionNode(sets[i][0], args.target)) roots[-1].fit(list_columns[i].copy()) list_results = [] for test_index in range(test_data.shape[0]): for i in range(N_TREE): list_results.append(roots[i].test(test_data.iloc[test_index])) fold_result.append( (test_data.iloc[test_index][args.target], pd.Series(list_results).value_counts().index[0])) results.append(fold_result) print(results)