Python calc_statistics 예제들, statistical_functions.calc_statistics Python 예제들

예제 #1

0

파일 보기

파일: run_d.py 프로젝트: Cyangray/ML-Project-2

                      lmbd = lmbd,
                      input_activation = 'sigmoid',
                      output_activation = 'linear',
                      cost_function = 'MSE')
 ffnn.add_layer(20, activation_method = 'sigmoid')
 ffnn.add_layer(20, activation_method = 'sigmoid')
 
 #Train network
 ffnn.train()
 
 #Save predictions
 y_tilde_train = ffnn.predict(X_train)
 y_tilde_test = ffnn.predict(X_test)
 
 #Save metrics into exportable matrices
 train_mse[i][j], train_R2[i][j] = statistics.calc_statistics(y_train, y_tilde_train)
 test_mse[i][j], test_R2[i][j] = statistics.calc_statistics(y_test, y_tilde_test)
 
 if best_train_mse > train_mse[i][j]:
     best_train_mse = train_mse[i][j]
     best_y_tilde_train = y_tilde_train
 
 if best_test_mse > test_mse[i][j]:
     best_test_mse = test_mse[i][j]
     best_y_tilde_test = y_tilde_test
     
 
 #print metrics
 print('Learning rate: ', eta)
 print('lambda: ', lmbd)
 print('Train. mse = ', train_mse[i][j], 'R2 = ', train_R2[i][j])

예제 #2

0

파일 보기

파일: sampling_methods.py 프로젝트: Loopdiloop/fys-stk4155-project2

    def kfold_cross_validation(self, k, method, deg=5, lambd=1):
        """Method that implements the k-fold cross-validation algorithm. It takes
        as input the method we want to use. if "least squares" an ordinary OLS will be evaulated.
        if "ridge" then the ridge method will be used, and respectively the same for "lasso"."""

        inst = self.inst
        lowest_mse = 1e5

        self.mse = []
        self.R2 = []
        self.mse_train = []
        self.R2_train = []
        self.bias = []
        self.variance = []
        design_matrix = fit(inst)
        whole_DM = design_matrix.create_design_matrix(
            deg=deg).copy()  #design matrix for the whole dataset
        whole_z = inst.z_1d.copy()  #save the whole output

        for i in range(self.inst.k):
            #pick the i-th set as test
            inst.sort_training_test_kfold(i)
            inst.fill_array_test_training()

            design_matrix.create_design_matrix(
                deg=deg
            )  #create design matrix for the training set, and evaluate
            if method == "least squares":
                z_train, beta_train = design_matrix.fit_design_matrix_numpy()
            elif method == "ridge":
                z_train, beta_train = design_matrix.fit_design_matrix_ridge(
                    lambd)
            elif method == "lasso":
                z_train, beta_train = design_matrix.fit_design_matrix_lasso(
                    lambd)
            else:
                sys.exit("Wrongly designated method: ", method, " not found")

            #Find out which values get predicted by the training set
            X_test = design_matrix.create_design_matrix(x=inst.test_x_1d,
                                                        y=inst.test_y_1d,
                                                        z=inst.test_z_1d,
                                                        N=inst.N_testing,
                                                        deg=deg)
            z_pred = design_matrix.test_design_matrix(beta_train, X=X_test)

            #Take the real values from the dataset for comparison
            z_test = inst.test_z_1d

            #Calculate the prediction for the whole dataset
            whole_z_pred = design_matrix.test_design_matrix(beta_train,
                                                            X=whole_DM)

            # Statistically evaluate the training set with test and predicted solution.
            mse, calc_r2 = statistics.calc_statistics(z_test, z_pred)

            # Statistically evaluate the training set with itself
            mse_train, calc_r2_train = statistics.calc_statistics(
                inst.z_1d, z_train)

            # Get the values for the bias and the variance
            bias, variance = statistics.calc_bias_variance(z_test, z_pred)

            self.mse.append(mse)
            self.R2.append(calc_r2)
            self.mse_train.append(mse_train)
            self.R2_train.append(calc_r2_train)
            self.bias.append(bias)
            self.variance.append(variance)
            # If needed/wanted:
            if abs(mse) < lowest_mse:
                lowest_mse = abs(mse)
                self.best_predicting_beta = beta_train

예제 #3

0

파일 보기

    def kfold_cross_validation(self,
                               method,
                               descent_method='SGD-skl',
                               deg=0,
                               Niterations=100,
                               lambd=0.01,
                               eta=0.000005,
                               m=5,
                               verbose=False):
        """Method that implements the k-fold cross-validation algorithm. It takes
        as input the method we want to use. if "least squares" an ordinary OLS will be evaulated.
        if "ridge" then the ridge method will be used, and respectively the same for "lasso"."""

        inst = self.inst
        lowest_mse = 1e5

        self.mse = []
        self.R2 = []
        self.mse_train = []
        self.R2_train = []
        self.bias = []
        self.variance = []
        self.accuracy = []
        self.design_matrix = fit(inst)
        self.rocaucs = []
        self.area_ratios = []
        #whole_DM = self.design_matrix.create_design_matrix(deg=deg).copy() #design matrix for the whole dataset
        #whole_y = inst.y_1d.copy() #save the whole output

        for i in range(self.inst.k):
            #pick the i-th set as test
            inst.sort_training_test_kfold(i)
            inst.fill_array_test_training()
            self.design_matrix.create_design_matrix(
                deg=deg
            )  #create design matrix for the training set, and evaluate

            if method == 'OLS':
                y_train, beta_train = self.design_matrix.fit_design_matrix_numpy(
                )
            elif method == "Ridge":
                y_train, beta_train = self.design_matrix.fit_design_matrix_ridge(
                    lambd)
            elif method == "LASSO":
                y_train, beta_train = self.design_matrix.fit_design_matrix_lasso(
                    lambd, maxiter=Niterations)
            elif method == 'logreg':
                y_train, beta_train = self.design_matrix.fit_design_matrix_logistic_regression(
                    descent_method=descent_method,
                    eta=eta,
                    Niteration=Niterations,
                    m=m,
                    verbose=verbose)

            else:
                sys.exit("Wrongly designated method: ", method, " not found")

            #Find out which values get predicted by the training set
            X_test = self.design_matrix.create_design_matrix(x=inst.test_x_1d,
                                                             N=inst.N_testing,
                                                             deg=deg)
            y_pred = self.design_matrix.test_design_matrix(beta_train,
                                                           X=X_test)

            #Take the real target values from the test datset for comparison (and also a rescaled set)
            y_test = inst.test_y_1d
            _, y_test_rescaled = inst.rescale_back(x=inst.test_x_1d,
                                                   y=inst.test_y_1d,
                                                   split=True)
            target = y_test_rescaled.astype(int)

            #Calculate the prediction for the whole dataset
            #whole_y_pred = self.design_matrix.test_design_matrix(beta_train, X=whole_DM)

            if method == 'logreg':
                # Statistically evaluate the training set with test and predicted solution.
                y_pred_onehot = np.column_stack((1 - y_pred, y_pred))
                accuracy_batch = statistics.calc_accuracy(target, y_pred)
                rocaucs_batch = statistics.calc_rocauc(target, y_pred)

                max_area_test = statistics.calc_cumulative_auc(
                    target, make_onehot(target))
                area_ratio_batch = (statistics.calc_cumulative_auc(
                    target, y_pred_onehot) - 0.5) / (max_area_test - 0.5)
                self.accuracy.append(accuracy_batch)
                self.rocaucs.append(rocaucs_batch)
                self.area_ratios.append(area_ratio_batch)
            else:

                # Statistically evaluate the training set with test and predicted solution.
                mse, calc_r2 = statistics.calc_statistics(y_test, y_pred)

                # Statistically evaluate the training set with itself
                mse_train, calc_r2_train = statistics.calc_statistics(
                    inst.y_1d, y_train)

                # Get the values for the bias and the variance
                bias, variance = statistics.calc_bias_variance(y_test, y_pred)
                self.mse.append(mse)
                self.R2.append(calc_r2)
                self.mse_train.append(mse_train)
                self.R2_train.append(calc_r2_train)
                self.bias.append(bias)
                self.variance.append(variance)
                # If needed/wanted:
                if abs(mse) < lowest_mse:
                    lowest_mse = abs(mse)
                    self.best_predicting_beta = beta_train

예제 #4

0

파일 보기

파일: run_a.py 프로젝트: Loopdiloop/fys-stk4155

# Or you can generate directly.
#dataset = data_generate()
#dataset.generate_franke(n=100, noise=0.2)

# Normalize the dataset
dataset.normalize_dataset()

# Fit design matrix
fitted_model = fit(dataset)

# Ordinary least square fitting
fitted_model.create_design_matrix(deg)
z_model_norm, beta = fitted_model.fit_design_matrix_numpy()

# Statistical evaluation
mse, calc_r2 = statistics.calc_statistics(dataset.z_1d, z_model_norm)
print("Mean square error: ", mse, "\n", "R2 score: ", calc_r2)

# Scale back the dataset
rescaled_dataset = dataset.rescale_back(z=z_model_norm)
#x_model = rescaled_dataset[0]
#y_model = rescaled_dataset[1]
z_model = rescaled_dataset[2]

# Generate analytical solution for plotting purposes
analytical = data_generate()
analytical.generate_franke(n, noise=0)

# Plot solutions and analytical for comparison
plot_3d(dataset.x_unscaled, dataset.y_unscaled, z_model, analytical.x_mesh,
        analytical.y_mesh, analytical.z_mesh, ["surface", "scatter"])