def prunning(class_weight='balanced'):

        datasets = ['breast_cancer', 'kdd']
        min_samples_leaf_list = [1, 25]
        max_depth_list = [None, 4]
        for dataset in datasets:
            if dataset == 'kdd':
                transform_data = True
                random_slice = 10000
            else:
                transform_data = False
                random_slice = None
            x_train, x_test, y_train, y_test = data_service.load_and_split_data(scale_data=True,
                                                                                transform_data=transform_data,
                                                                                random_slice=random_slice,
                                                                                random_seed=None,
                                                                                dataset=dataset,
                                                                                test_size=.5)
            for min_samples_leaf in min_samples_leaf_list:
                for max_depth in max_depth_list:
                    dt_learner = DTLearner(criterion='entropy', min_samples_leaf=min_samples_leaf, max_depth=max_depth,
                                           class_weight=class_weight)

                    dt_accuracy_score, dt_fit_time, dt_predict_time = dt_learner.fit_predict_score(x_train, y_train,
                                                                                                   x_test, y_test)
                    print('DT set: {0}, min_samples_leaf: {1}, max_depth: {2},  score: {3}, fit_time: {4}, predict_time: {5}'
                          .format(dataset, min_samples_leaf, max_depth, dt_accuracy_score, dt_fit_time, dt_predict_time))

                    dt_learner.draw_tree('-{0}-min_samples_leaf{1}-max_depth{2}'.format(dataset, min_samples_leaf,
                                                                                        max_depth))
    def gini_vs_entropy():

        min_samples_leaf = 1
        max_depth = None
        class_weight = 'balanced'

        datasets = ['breast_cancer', 'kdd']
        criterions = ['gini', 'entropy']
        for dataset in datasets:
            if dataset == 'kdd':
                transform_data = True
                random_slice = 10000
            else:
                transform_data = False
                random_slice = None
            x_train, x_test, y_train, y_test = data_service.load_and_split_data(scale_data=True,
                                                                                transform_data=transform_data,
                                                                                random_slice=random_slice,
                                                                                random_seed=None,
                                                                                dataset=dataset,
                                                                                test_size=.5)
            for criterion in criterions:
                print(dataset, criterion)
                dt_learner = DTLearner(criterion=criterion, min_samples_leaf=min_samples_leaf, max_depth=max_depth,
                                       class_weight=class_weight)

                dt_accuracy_score, dt_fit_time, dt_predict_time = dt_learner.fit_predict_score(x_train, y_train,
                                                                                               x_test, y_test)
                print('DT set: {0}, criterion: {1}  score: {2}, fit_time: {3}, predict_time: {4}, {2}'
                      .format(dataset, criterion, dt_accuracy_score, dt_fit_time, dt_predict_time))

                dt_learner.draw_tree('-{0}-{1}'.format(dataset, criterion))
Beispiel #3
0
nn_solver = 'lbfgs'

#{'activation': 'relu', 'alpha': 0.0001, 'hidden_layer_sizes': (100,), 'learning_rate_init': 0.01, 'solver': 'lbfgs'}

nn_learner = NNLearner(hidden_layer_sizes=nn_hidden_layer_sizes,
                       max_iter=200,
                       solver=nn_solver,
                       activation=nn_activation,
                       alpha=alpha,
                       learning_rate=nn_learning_rate,
                       learning_rate_init=nn_learning_rate_init)

x_train, x_test, y_train, y_test = data_service.load_and_split_data(
    scale_data=scale_data,
    transform_data=transform_data,
    random_slice=random_slice,
    random_seed=random_seed,
    dataset=dataset,
    test_size=test_size)

nn_accuracy_score, nn_fit_time, nn_predict_time = nn_learner.fit_predict_score(
    x_train, y_train, x_test, y_test)

print('nn: {0}, {1}, {2}'.format(nn_accuracy_score, nn_fit_time,
                                 nn_predict_time))
print('---------------------------------------------------------------')
print(nn_learner.estimator.coefs_)
print('---------------------------------------------------------------')
print(nn_learner.estimator.coefs_[0].shape)
print(nn_learner.estimator.coefs_[1].shape)
print(nn_learner.estimator.n_outputs_)
Beispiel #4
0
    print("Mean Absolute Error: ")
    # mse = np.sqrt(np.mean(np.square(test_predictions - Y_test)))
    mae = np.mean(np.abs(test_predictions - Y_test))
    print(mae)

    print("RMSE: ")
    rmse = np.sqrt(np.mean(np.square(test_predictions - Y_test)))
    print(rmse)

    return mae


np.set_printoptions(suppress=True, precision=2)

X_train, X_test, Y_train, Y_test = \
    data_service.load_and_split_data(scale_data=True, transform_data=False, test_size=0.2, random_slice=None,
                                     random_seed=None, dataset='boston')

mean_absolute_errors = []
max_depths = range(1, 10)

for max_depth in max_depths:

    #do it 5 times for the depth
    mean_abs_errors_for_depth = []
    for train_run in range(5):
        X_train, X_test, Y_train, Y_test = \
            data_service.load_and_split_data(scale_data=True, transform_data=False, test_size=0.2, random_slice=None,
                                             random_seed=None, dataset='boston')
        mean_abs_error = fit_predict_score(max_depth, X_train, X_test, Y_train,
                                           Y_test)
        mean_abs_errors_for_depth.append(mean_abs_error)