コード例 #1
0
class PreProcessor(object):
    """Preprocessor class. This class has the responsibility to preprocess the data. More specifically, the class
    has the task of simulating random correlated asset paths in the bivariate case. Additionally, the class has the
    responsibility for estimating the uncertainty in the output variable through a bootstrap resampling procedure."""
    def __init__(self):
        """Initializer PreProcessor object."""
        self.ta = TechnicalAnalyzer()
        self.mm = ModuleManager()

    def simulate_random_correlation_ar(self, T, a0, a1):
        """Simulate a random correlation process with highly persistent time-varying correlations following an
           auto-regressive process. Add noise with ar process
        :param T: simulation length
        :param a0:
        :param a1:
        :return: random_corr: correlation process following specified dynamics."""
        eps = 1e-5
        random_corr = np.empty(T)
        random_corr[0] = a0 / (1 - a1)  # initialise random correlation process
        for t in range(1, T):
            eta = np.random.normal(0, 0.2)
            random_corr[t] = np.maximum(
                -1 + eps,
                np.minimum(1 - eps, a0 + a1 * random_corr[t - 1] + eta))
        return random_corr

    def simulate_correlated_asset_paths(self, corr_vector, vol_matrix, T):
        """Simulate asset paths with specified time-varying correlation dynamics.
        :param corr_vector: time-varying correlation vector
        :param vol_matrix: volatility matrix
        :param T: simulation length
        :return: correlated_asset_paths: simulated asset paths with specified correlation dynamics."""
        if corr_vector.ndim == 1:
            size = 2
        else:
            size = corr_vector.shape[1]  # no of columns, i.e. no of assets
        z = np.random.normal(
            0, 1,
            (T,
             size))  # T-by-number of assets draws from N(0,1) random variable
        correlated_asset_paths = np.empty([
            T, size
        ])  # initialise Txsize dimensional array for correlated asset paths
        for t, rho in enumerate(corr_vector):
            corr_matrix = self.construct_correlation_matrix(rho, size)
            cov_matrix = self.construct_covariance_matrix(
                vol_matrix, corr_matrix)
            cholesky_factor = self.cholesky_factorization(
                cov_matrix)  # Cholesky decomposition
            correlated_asset_paths[t] = np.dot(
                cholesky_factor,
                z[t].transpose())  # Generating Y_t = H_t^(0.5) * z_t
        return correlated_asset_paths

    def construct_correlation_matrix(self, corr_vec, n):
        """Method for constructing time-varying correlation matrix given a time-varying correlations vector.
        :param corr_vec: time-varying correlation vector
        :param n: dimension correlation matrix
        :return corr_matrix: time-varying correlation matrix"""
        corr_triu = np.zeros((n, n))
        iu1 = np.triu_indices(
            n, 1
        )  # returns indices for upper-triangular matrix with diagonal offset of 1
        corr_triu[
            iu1] = corr_vec  # Assign vector correlations to corresponding upper-triangle matrix indices
        corr_matrix = corr_triu + corr_triu.T + np.eye(
            n)  # Transform upper-triangular matrix into symmetric matrix
        return corr_matrix

    def construct_covariance_matrix(self, vol_matrix, corr_matrix):
        """Method for constructing time-varying covariance matrix given a time-varying correlations matrix and asset
        volatility vector.
        :param vol_matrix: diagonal matrix containing asset volatilities
        :param corr_matrix: time-varying correlation matrix
        :return: cov_matrix: time-varying covariance matrix."""
        cov_matrix = np.dot(vol_matrix, np.dot(corr_matrix, vol_matrix))
        return cov_matrix

    def cholesky_factorization(self, cov_matrix):
        """Method for matrix decomposition through Cholesky factorization. The Cholesky factorization states that every
        symmetric positive definite matrix A has a unique factorization A = LL' where L is a lower-triangular matrix and
        L' is its conjugate transpose.
        :param cov_matrix: time-varying positive definite covariance matrix
        :return: cholesky_factor: cholesky decomposition lower-triangular matrix L such that LL' = cov_matrix"""
        cholesky_factor = np.linalg.cholesky(cov_matrix)
        return cholesky_factor

    def determinant_LU_factorization(self, corr_vec, n):
        """Method for determining the determinant of a given matrix. Determinants are computed using
        LU factorization.
        :param corr_vec: time-varying correlation vector
        :param n: dimension correlation matrix
        :return: determinant."""
        cor_matrix = self.construct_correlation_matrix(corr_vec, n)
        det = np.linalg.det(cor_matrix)
        return det

    def generate_bivariate_dataset(self,
                                   ta,
                                   simulated_data_process,
                                   dt,
                                   proxy_type='pearson',
                                   T=500):
        """Method for generating a bivariate dataset with proxies moving window correlation estimates for covariate set
        and true correlation as the output variables.
        :param ta: technical analyzer object
        :param simulated_data_process: bivariate asset process with predefined correlation dynamics.
        :param dt: window length
        :param proxy_type: type definition of proxy for estimates of true correlation
        :param T: length test set
        :return: datasets with true correlation and proxy for output variable."""
        if proxy_type is 'pearson':
            pearson_estimates = ta.moving_window_correlation_estimation(
                simulated_data_process.iloc[:, :2], dt)
            # Feature set consists of lagged asset price and mw correlation estimate, e.g. x_t = MW_t-1
            dataset = simulated_data_process.iloc[:, :2].shift(
                periods=1, axis='index')  # Dataframe
            dataset['MW_t-1'] = pearson_estimates.shift(periods=1,
                                                        axis='index')
            dataset_proxy = dataset.copy()  # copy feature matrix
            # Dataset with true correlations as target variable and proxies
            dataset['rho_true'] = simulated_data_process['rho']
            dataset_proxy['rho_proxy'] = pearson_estimates
        else:  # Kendall as proxy
            kendall_estimates = ta.moving_window_correlation_estimation(
                simulated_data_process.iloc[:, :2], dt, proxy_type='kendall')
            # Feature set consists of lagged asset price and kendall correlation estimate, e.g. x_t = kendall_t-1
            dataset = simulated_data_process.iloc[:, :2].shift(
                periods=1, axis='index')  # Dataframe
            dataset['Kendall_t-1'] = kendall_estimates.shift(periods=1,
                                                             axis='index')
            dataset_proxy = dataset.copy()  # copy feature matrix
            # Dataset with true correlations as target variable and proxies
            dataset['rho_true'] = simulated_data_process['rho']
            dataset_proxy['rho_proxy'] = kendall_estimates
        return dataset, dataset_proxy

    def generate_multivariate_dataset(self,
                                      ta,
                                      data,
                                      dt,
                                      proxy_type='pearson'):
        """Method for generating a multivariate dataset with moving window estimates as approximation for true
        correlation constructing the set of covariates and output variable.
        :param ta: technical analyzer object
        :param data: dataframe with log returns
        :param dt: window length
        :param proxy_type: type definition of proxy for estimates of true correlation
        :return: dataset with approximated covariates and output variable."""
        correlation_estimates = ta.moving_window_correlation_estimation(
            data, dt, proxy_type=proxy_type)
        # Feature set consists of lagged kendall correlation estimate amd lagged min. and max. asset returns
        dataset = correlation_estimates.shift(periods=1, axis='index')
        dataset['r_min'] = np.min(data, axis=1).shift(periods=1, axis='index')
        dataset['r_max'] = np.max(data, axis=1).shift(periods=1, axis='index')
        # Dataset with proxies
        result = pd.concat([dataset, correlation_estimates],
                           axis=1,
                           join='inner')
        return result

    def bootstrap_moving_window_estimate(self,
                                         data,
                                         delta_t,
                                         T=500,
                                         reps=1000,
                                         ciw=99,
                                         proxy_type='pearson'):
        """Method for measuring the estimation uncertainty associated to the correlation coefficients when moving
        window estimates are used for approximating true correlations.
        :param data: dataset used for the task of bootstrap resampling
        :param T: length of test set
        :param delta_t: window length for moving window estimates of Pearson correlation coefficient
        :param reps: number of bootstrap samples
        :param ciw: confidence interval width
        :param proxy_type: type definition of proxy for estimates of true correlation (pearson, emw, kendall)
        :return: correlation estimates with associated estimation uncertainty."""
        assets_price = data.tail(T + delta_t - 1).iloc[:, :-1]
        assets_price.reset_index(drop=True, inplace=True)
        rho_true = data.tail(T).iloc[:, -1]
        rho_true.reset_index(drop=True, inplace=True)
        rho_estimates = np.full(T, np.nan)
        sd_rho_estimates = np.full(
            T, np.nan)  # bootstrapped standard error of rho estimates
        lower_percentiles = np.full(
            T,
            np.nan)  # Initialisation array containing lower percentile values
        upper_percentiles = np.full(
            T,
            np.nan)  # Initialisation array containing upper percentile values
        p_low = (100 - ciw) / 2
        p_high = 100 - p_low

        for j, t in enumerate(range(delta_t, T + delta_t)):
            sampling_data = np.asarray(assets_price.iloc[t - delta_t:t, :])
            # Bootstrap resampling procedure:
            # draw sample of size delta_t by randomly extracting time units with uniform probability, with replacement.
            rho_bootstrapped = np.full(reps, np.nan)
            for rep in range(reps):
                indices = np.random.randint(low=0,
                                            high=sampling_data.shape[0],
                                            size=delta_t)
                sample = sampling_data[indices]
                if proxy_type is 'emw':
                    # Setup bootstrap procedure for weighted moving window estimates
                    w = self.ta.exponential_weights(delta_t, delta_t / 3)
                    weight_vec_raw = w[indices]
                    sum_w = np.sum(weight_vec_raw)
                    weight_vec_norm = [i / sum_w for i in weight_vec_raw
                                       ]  # Re-normalize weights to one
                    rho_bootstrapped[rep] = \
                        self.ta.pearson_weighted_correlation_estimation(sample[:, 0], sample[:, 1], delta_t,
                                                                        weight_vec_norm)
                elif proxy_type is 'pearson':
                    rho_bootstrapped[rep] = pearsonr(sample[:, 0],
                                                     sample[:, 1])[0]
                elif proxy_type is 'kendall':
                    rho_bootstrapped[rep] = kendalltau(sample[:, 0],
                                                       sample[:, 1])[0]
                else:
                    print(
                        'Please, choose an option from the supported set of proxies for true correlations (Pearson '
                        'moving window or Kendall moving window')
            lower, upper = np.nanpercentile(rho_bootstrapped, [p_low, p_high])
            lower_percentiles[j] = lower
            upper_percentiles[j] = upper
            rho_estimates[j] = np.nanmean(rho_bootstrapped)
            sd_rho_estimates[j] = np.nanstd(rho_bootstrapped)
        return rho_estimates, lower_percentiles, upper_percentiles, sd_rho_estimates

    def bootstrap_learner_estimate(self,
                                   data,
                                   T=500,
                                   reps=1000,
                                   ciw=99,
                                   model='knn',
                                   n_neighbors=5):
        """"Method for measuring the estimation uncertainty associated to the correlation coefficients when a learner
        model is used for approximating true correlations.
        :param data: dataset used for the task of bootstrap resampling
        :param T: length of test set
        :param reps: number of bootstrap samples
        :param ciw: confidence interval width
        :param model: learner model (e.g. nearest neighbour or random forest regressors)
        :param n_neighbors: number of multivariate neighbours
        :return: correlation estimates with associated estimation uncertainty."""
        rho_estimates = np.full(T, np.nan)
        sd_rho_estimates = np.full(
            T, np.nan)  # bootstrapped standard error of rho estimates
        lower_percentiles = np.full(
            T,
            np.nan)  # Initialisation array containing lower percentile values
        upper_percentiles = np.full(
            T,
            np.nan)  # Initialisation array containing upper percentile values
        p_low = (100 - ciw) / 2
        p_high = 100 - p_low
        data.drop(data.head(251).index, inplace=True)
        data.reset_index(drop=True, inplace=True)
        t_train_init = data.shape[0] - T  # 1000 for T = 500

        for j, t in enumerate(
                range(t_train_init,
                      data.shape[0])):  # j = {0, 499}, t = {1000, 1499}
            sampling_data = np.asarray(
                data.iloc[:t, :])  # True rolling window is [j:t, :]
            x_test = np.asarray(data.iloc[t, 0:-1])  # This is in fact x_t+1
            y_test = np.asarray(data.iloc[t, -1])  # This is in fact y_t+1
            # Bootstrap resampling procedure:
            # draw sample of size train_set by randomly extracting time units with uniform probability, with replacement
            rho_bootstrapped = np.full(reps, np.nan)
            for rep in range(reps):
                indices = np.random.randint(low=0, high=t, size=t)
                sample = sampling_data[
                    indices]  # Use sample to make a prediction with learner model
                # Separate data into feature and response components
                X = np.asarray(
                    sample[:, 0:-1]
                )  # feature matrix (vectorize data for speed up)
                y = np.asarray(sample[:, -1])  # response vector
                X_train = X[0:t, :]
                y_train = y[0:t]
                # Obtain estimation uncertainty in Pearson correlation estimation rho_t using bootstrap resampling:
                if model is 'knn':
                    knn = KNeighborsRegressor(
                        n_neighbors=5)  # n_neighbors=len(X_train)
                    rho_bootstrapped[rep] = knn.fit(X_train, y_train).predict(
                        x_test.reshape(1, -1))
                elif model is 'rf':
                    rf = RandomForestRegressor(n_jobs=1,
                                               n_estimators=10,
                                               max_features=1).fit(
                                                   X_train, y_train)
                    rho_bootstrapped[rep] = rf.predict(x_test.reshape(1, -1))
                else:
                    print(
                        'Please, choose an option from the supported set of learner algorithms (nearest neighbour, '
                        'random forest)')
            lower, upper = np.nanpercentile(rho_bootstrapped, [p_low, p_high])
            lower_percentiles[j] = lower
            upper_percentiles[j] = upper
            rho_estimates[j] = np.nanmean(rho_bootstrapped)
            sd_rho_estimates[j] = np.nanstd(rho_bootstrapped)
        return rho_estimates, lower_percentiles, upper_percentiles, sd_rho_estimates

    def mse_knn_sensitivity_analysis(self,
                                     proxy_type='pearson',
                                     output_type='true'):
        """Method for creation of a dataframe containing information on MSE decomposition as a function of different
        parameterizations for knn learner model.
        :param proxy_type: type of moving window estimator used as covariate.
        :param output_type: output variable true correlation or proxy.
        :return: dataframe."""
        rho_bias_squared = np.full(1001, np.nan)
        rho_var_vec = np.full(1001, np.nan)
        rho_mse_vec = np.full(1001, np.nan)
        # Load mse decomposition data
        mse_knn5 = self.mm.load_data(
            'bivariate_analysis/%s_cor/mse_results_%s_cor/mse_knn5_%s_%s_cor.pkl'
            % (output_type, output_type, proxy_type, output_type))
        mse_knn10 = self.mm.load_data(
            'bivariate_analysis/%s_cor/mse_results_%s_cor/mse_knn10_%s_%s_cor.pkl'
            % (output_type, output_type, proxy_type, output_type))
        mse_knn25 = self.mm.load_data(
            'bivariate_analysis/%s_cor/mse_results_%s_cor/mse_knn25_%s_%s_cor.pkl'
            % (output_type, output_type, proxy_type, output_type))
        mse_knn50 = self.mm.load_data(
            'bivariate_analysis/%s_cor/mse_results_%s_cor/mse_knn50_%s_%s_cor.pkl'
            % (output_type, output_type, proxy_type, output_type))
        mse_knn_100_to_1000 = self.mm.load_data(
            'bivariate_analysis/%s_cor/mse_results_%s_cor/'
            'mse_knn100_to_1000_%s_%s_cor.pkl' %
            (output_type, output_type, proxy_type, output_type))
        # Creation of dataframe
        rho_mse_vec[5], rho_bias_squared[5], rho_var_vec[5] = mse_knn5.iloc[
            10, :]
        rho_mse_vec[10], rho_bias_squared[10], rho_var_vec[
            10] = mse_knn10.iloc[10, :]
        rho_mse_vec[25], rho_bias_squared[25], rho_var_vec[
            25] = mse_knn25.iloc[10, :]
        rho_mse_vec[50], rho_bias_squared[50], rho_var_vec[
            50] = mse_knn50.iloc[10, :]
        rho_mse_vec[100], rho_bias_squared[100], rho_var_vec[
            100] = mse_knn_100_to_1000.iloc[1, :]
        rho_mse_vec[200], rho_bias_squared[200], rho_var_vec[
            200] = mse_knn_100_to_1000.iloc[2, :]
        rho_mse_vec[300], rho_bias_squared[300], rho_var_vec[
            300] = mse_knn_100_to_1000.iloc[3, :]
        rho_mse_vec[400], rho_bias_squared[400], rho_var_vec[
            400] = mse_knn_100_to_1000.iloc[4, :]
        rho_mse_vec[500], rho_bias_squared[500], rho_var_vec[
            500] = mse_knn_100_to_1000.iloc[5, :]
        rho_mse_vec[600], rho_bias_squared[600], rho_var_vec[
            600] = mse_knn_100_to_1000.iloc[6, :]
        rho_mse_vec[700], rho_bias_squared[700], rho_var_vec[
            700] = mse_knn_100_to_1000.iloc[7, :]
        rho_mse_vec[800], rho_bias_squared[800], rho_var_vec[
            800] = mse_knn_100_to_1000.iloc[8, :]
        rho_mse_vec[900], rho_bias_squared[900], rho_var_vec[
            900] = mse_knn_100_to_1000.iloc[9, :]
        rho_mse_vec[1000], rho_bias_squared[1000], rho_var_vec[
            1000] = mse_knn_100_to_1000.iloc[10, :]
        # Dataframe with information on MSE decomposition as a function of different learner parameterizations
        data_frame = pd.DataFrame({
            'bias_squared': rho_bias_squared,
            'variance': rho_var_vec,
            'MSE': rho_mse_vec
        })
        return data_frame

    def mse_rf_sensitivity_analysis(self,
                                    rho_true,
                                    proxy_type='pearson',
                                    output_type='true',
                                    type='trees'):
        """Method for creation of a dataframe containing information on MSE decomposition as a function of different
        parameterizations for rf learner model.
        :param rho_true: vector containing true correlation
        :param proxy_type: type of moving window estimator used as covariate.
        :param output_type: output variable true correlation or proxy.
        :return: dataframe."""
        if type is 'trees':
            rho_bias_squared = np.full(1001, np.nan)
            rho_var_vec = np.full(1001, np.nan)
            rho_mse_vec = np.full(1001, np.nan)
            trees = [10, 100, 300, 600, 1000]
            # Load mse decomposition data
            for tree in trees:
                data = self.mm.load_data(
                    'bivariate_analysis/%s_cor/%s/results_rf_%s_%s_cor/'
                    'rf%i_%s_10_estimate_uncertainty_rep_100_%s_corr.pkl' %
                    (output_type, proxy_type, proxy_type, output_type, tree,
                     proxy_type, output_type))
                rho_estimates = data['Rho_estimate']
                rho_bias_squared[tree] = np.mean(
                    np.power(rho_estimates - rho_true, 2))
                rho_var_vec[tree] = np.power(np.mean(data['std rho estimate']),
                                             2)
            rho_mse_vec = np.array(
                [np.sum(pair) for pair in zip(rho_bias_squared, rho_var_vec)])
            data_frame = pd.DataFrame({
                'bias_squared': rho_bias_squared,
                'variance': rho_var_vec,
                'MSE': rho_mse_vec
            })
            filename_save = 'mse_rf_%s_%s_cor_sensitivity_analysis_trees.pkl' % (
                proxy_type, output_type)
            self.mm.save_data(
                'bivariate_analysis/%s_cor/mse_results_%s_cor/' %
                (output_type, output_type) + filename_save, data_frame)
        else:
            rho_bias_squared = np.full(4, np.nan)
            rho_var_vec = np.full(4, np.nan)
            rho_mse_vec = np.full(4, np.nan)
            # Load mse decomposition data
            mse_rf300_1_to_3 = self.mm.load_data(
                'bivariate_analysis/%s_cor/mse_results_%s_cor/'
                'mse_rf300_1_to_3_%s_%s_cor.pkl' %
                (output_type, output_type, proxy_type, output_type))
            rho_mse_vec[1], rho_bias_squared[1], rho_var_vec[
                1] = mse_rf300_1_to_3.iloc[1, :]
            rho_mse_vec[2], rho_bias_squared[2], rho_var_vec[
                2] = mse_rf300_1_to_3.iloc[2, :]
            rho_mse_vec[3], rho_bias_squared[3], rho_var_vec[
                3] = mse_rf300_1_to_3.iloc[3, :]
            # Dataframe with information on MSE decomposition as a function of different learner parameterizations
            data_frame = pd.DataFrame({
                'bias_squared': rho_bias_squared,
                'variance': rho_var_vec,
                'MSE': rho_mse_vec
            })
            filename_save = 'mse_rf_%s_%s_cor_sensitivity_analysis_covariates.pkl' % (
                proxy_type, output_type)
            self.mm.save_data(
                'bivariate_analysis/%s_cor/mse_results_%s_cor/' %
                (output_type, output_type) + filename_save, data_frame)
        return data_frame
コード例 #2
0
def main():
    preprocesser = PreProcessor()
    mm = ModuleManager()

    def generate_random_points_on_hyperellipsoid(vol_data,
                                                 cor_data,
                                                 alpha_vec=np.array(
                                                     [0.9, 0.95, 0.975, 0.99]),
                                                 n_sample=int(1e4),
                                                 dim=30):
        header = alpha_vec
        result = pd.DataFrame(columns=header)
        for i in range(vol_data.shape[0]):
            start_time = time.time()
            var_estimates = []
            vol_mat = np.diag(vol_data.iloc[i, :])
            cor_mat = preprocesser.construct_correlation_matrix(
                corr_vec=cor_data.iloc[i, :], n=dim)
            H = preprocesser.construct_covariance_matrix(vol_matrix=vol_mat,
                                                         corr_matrix=cor_mat)
            r = np.random.randn(H.shape[0], n_sample)
            # u contains random points on the unit hypersphere
            u = r / np.linalg.norm(r, axis=0)
            for alpha in alpha_vec:
                y = np.sqrt(chi2.ppf(q=alpha, df=dim))
                # Transform points on the unit hypersphere to the hyperellipsoid
                xrandom = sqrtm(H).dot(np.sqrt(y) * u)
                # Compute the lowest (equally) weighted average of random points on the hyperellipsoid.
                # This is the maximum loss with alpha percent probability, i.e. Value-at-Risk
                xrandom_min = np.max(
                    np.abs(np.array([np.mean(x) for x in xrandom.T])))
                var_estimates.append(xrandom_min)
            result = pd.merge(result,
                              pd.DataFrame(np.asarray(var_estimates).reshape(
                                  1, -1),
                                           columns=header),
                              how='outer')
            print((i, time.time() - start_time))
        return result

    ##################################################################################################################
    ###                                      Multivariate Quantile Computation                                     ###
    ##################################################################################################################
    dim = 30
    vol_data = mm.load_data(
        'multivariate_analysis/volatilities_garch_norm_DJI30_2000_2001.pkl')
    #cor_data = mm.load_data('multivariate_analysis/cor_DCC_mvnorm_DJI30_1994_1995.pkl')
    cor_data = mm.load_data(
        'multivariate_analysis/pearson/pearson_cor_estimates/cor_knn5_pearson_10_DJI30_2000_2001.pkl'
    )

    result = generate_random_points_on_hyperellipsoid(vol_data=vol_data,
                                                      cor_data=cor_data)
    print(result)
    #mm.save_data('multivariate_analysis/VaR/var_dcc_mvnorm_1994_1995_nsample_1e6.pkl', result)
    #mm.transform_pickle_to_csv('multivariate_analysis/VaR/var_dcc_mvnorm_1994_1995_nsample_1e6.pkl')
    mm.save_data(
        'multivariate_analysis/VaR/var_knn5_pearson_garch_2000_2001_nsample_1e5_sqrt_chi2.pkl',
        result)
    mm.transform_pickle_to_csv(
        'multivariate_analysis/VaR/var_knn5_pearson_garch_2000_2001_nsample_1e5_sqrt_chi2.pkl'
    )
コード例 #3
0
def main():

    preprocesser = PreProcessor()
    mm = ModuleManager()
    ta = TechnicalAnalyzer()


    ##################################################################################################################
    ###     Asset path simulation using Cholesky Factorization and predefined time-varying correlation dynamics    ###
    ################## ###############################################################################################
    """
    T = 1751
    a0 = 0.1
    a1 = 0.8
    random_corr = preprocesser.simulate_random_correlation_ar(T, a0, a1)
    # Simple volatility matrix with randomly chosen volatilities for illustration purposes
    vol_matrix = np.array([[0.08, 0],
                           [0, 0.1]])
    correlated_asset_paths = preprocesser.simulate_correlated_asset_paths(random_corr, vol_matrix, T)
    data = pd.DataFrame(correlated_asset_paths)
    data['rho'] = random_corr
    mm.save_data('/bivariate_analysis/correlated_sim_data.pkl', data)
    # Figure
    correlated_asset_paths = mm.load_data('bivariate_analysis/correlated_sim_data.pkl')
    correlated_asset_paths = correlated_asset_paths.tail(500)
    correlated_asset_paths.reset_index(drop=True, inplace=True)
    plt.plot(correlated_asset_paths.iloc[:, 0], label='$y_{1,t}$', linewidth=1, color='black')
    plt.plot(correlated_asset_paths.iloc[:, 1], label='$y_{2,t}$', linewidth=1, linestyle='--', color='blue')
    plt.plot(correlated_asset_paths.iloc[:, -1], label='$\\rho_t$', linewidth=1, color='red')
    plt.legend(fontsize='small', loc='upper center', bbox_to_anchor=(0.5, 1.1), ncol=3, fancybox=True,
               edgecolor='black')
    plt.xlim(0, 500)
    plt.ylim(-0.5, 1)
    plt.show()
    """
    ##################################################################################################################
    ###     Estimation uncertainty in Pearson and Kendall correlation coefficient using moving window estimates    ###
    ##################################################################################################################
    simulated_data_process = mm.load_data('/bivariate_analysis/correlated_sim_data.pkl')
    T = 500
    delta_t = [21] #np.arange(3, 252)        # 3, 4, 5, 6, 7, 8, 9, 10, 21, 42, 63, 84, 126, 251
    proxy_type = ['pearson']  # kendall ['mw', 'emw', 'kendall']
    ciw = 99

    """
    for dt, proxy_type in [(x, y) for x in delta_t for y in proxy_type]:
        start_time = time.time()
        print('(%s, %i)' % (proxy_type, dt))
        rho_estimates, lower_percentiles, upper_percentiles, sd_rho_estimates = \
        preprocesser.bootstrap_moving_window_estimate(data=simulated_data_process, delta_t=dt, T=T, ciw=ciw,
                                                      proxy_type=proxy_type)
        data_frame = pd.DataFrame({'Percentile_low': lower_percentiles, 'Percentile_up': upper_percentiles,
                                   'std rho estimate': sd_rho_estimates, 'Rho_estimate': rho_estimates})
        filename = '%s_%i_estimate_uncertainty.pkl' % (proxy_type, dt)
        mm.save_data('bivariate_analysis/' + filename, data_frame)
        print("%s: %f" % ('Execution time:', (time.time() - start_time)))
    """
    """
    # Figures
    for dt, proxy_type in [(x, y) for x in delta_t for y in proxy_type]:
        data = mm.load_data('bivariate_analysis/results_%s/%s_%i_estimate_uncertainty.pkl' % (proxy_type, proxy_type, dt))
        rho_estimates = data['Rho_estimate']
        lower_percentiles = data['Percentile_low']
        upper_percentiles = data['Percentile_up']
        plt.figure()
        plt.plot(simulated_data_process['rho'], label='true correlation', linewidth=1, color='black')
        plt.plot(rho_estimates, label='%s correlation' % proxy_type.upper(), linewidth=1, color='red')
        plt.plot((upper_percentiles-lower_percentiles)-1, label='%d%% interval (bootstrap)'
                                                                % ciw, linewidth=1, color='magenta')
        #plt.plot(lower_percentiles, label='%d%% interval (bootstrap)' % ciw, linewidth=1, color='magenta')
        #plt.plot(upper_percentiles, label="", linewidth=1, color='magenta')
        plt.xlabel('observation')
        plt.legend(fontsize='small', loc='upper center', bbox_to_anchor=(0.5, 1.1), ncol=3, fancybox=True,
                   edgecolor='black')
        plt.xlim(0, T)
        plt.yticks(np.arange(-1, 1.00000001, 0.2))
        plt.ylim(-1, 1)
        plt.show()
    """
    ##################################################################################################################
    ###       Mean squared error of Pearson and Kendall correlation coefficient using moving window estimates      ###
    ##################################################################################################################
    simulated_data_process = mm.load_data('/bivariate_analysis/correlated_sim_data.pkl')
    T = 500
    rho_true = simulated_data_process.tail(T).iloc[:, -1]
    rho_true.reset_index(drop=True, inplace=True)
    delta_t_min, delta_t_max = 3, 252
    delta_t = np.arange(3, 252)  # dt = {[3, 10], 21, 42, 63, 126, 251}  (and 84 possibly)
    proxy_type = ['pearson', 'emw', 'kendall']  # run proxies individually otherwise one saves dataframe over other.
    rho_bias_squared = np.full(delta_t_max, np.nan)
    rho_var_vec = np.full(delta_t_max, np.nan)
    """
    # Create dataframe with (interpolated) mse results, squared bias, variance for varying window sizes
    for proxy_type, dt in [(x, y) for x in proxy_type for y in delta_t]:
        print('%s, %i' % (proxy_type, dt))
        data = mm.load_data('bivariate_analysis/%s_%i_estimate_uncertainty.pkl'
                            % (proxy_type, dt))
        rho_estimates = data['Rho_estimate']
        rho_bias_squared[dt] = np.mean(np.power(rho_estimates - rho_true, 2))
        rho_var_vec[dt] = np.power(np.mean(data['std rho estimate']), 2)

    rho_mse_vec = np.array([np.sum(pair) for pair in zip(rho_bias_squared, rho_var_vec)])
    data_frame = pd.DataFrame({'bias_squared': rho_bias_squared, 'variance': rho_var_vec,
                               'MSE': rho_mse_vec})
    filename = 'mse_%s.pkl' % proxy_type
    mm.save_data('bivariate_analysis/' + filename, data_frame)
    """
    """
    # Kendall correlation estimate 
        for col1, col2, in IT.combinations(simulated_data_process.columns[:-1], 2):
            def my_tau(idx):
                df_tau = simulated_data_process[[col1, col2]].iloc[idx+len(simulated_data_process)-T-dt+1]
                return kendalltau(df_tau[col1], df_tau[col2])[0]
            kendall_estimates = pd.rolling_apply(np.arange(T+dt-1), dt, my_tau)
        mse_kendall_vec[dt - 1] = mean_squared_error(rho_true, kendall_estimates[-T:])
    mm.save_data('/bivariate_analysis/mse_kendall_true_corr.pkl', mse_kendall_vec)
    print("%s: %f" % ('Execution time:', (time.time() - start_time)))
    """
    """
    # Load MSE data Pearson/ Kendall
    mse_pearson_vec = mm.load_data('bivariate_analysis/mse_pearson.pkl')
    mse_kendall_vec = mm.load_data('bivariate_analysis/mse_kendall.pkl')
    """
    """
    # Figure without interpolation MSE 
    plt.figure(1)
    plt.plot(mse_pearson_vec['MSE'], label='Pearson', color='indigo', linewidth=1)
    plt.plot(mse_kendall_vec['MSE'], label='Kendall', color='aquamarine', linewidth=1, linestyle='--')
    plt.xlabel('window length')
    plt.ylabel('MSE')
    plt.legend(fontsize='small', loc='upper center', bbox_to_anchor=(0.5, 1.1), ncol=5, fancybox=True,
               edgecolor='black')
    plt.xlim(0, 250)
    plt.yticks(np.arange(0, 0.61, 0.1))
    plt.ylim(0, 0.6)
    plt.show()
    """
    """
    # Figure without interpolation MSE decomposition 
    plt.figure(2)
    plt.plot(mse_kendall_vec['bias_squared'], label='Squared Bias', color='blue', linewidth=1)
    plt.plot(mse_kendall_vec['variance'], label='Variance', color='red', linewidth=1)
    plt.plot(mse_kendall_vec['MSE'], label='MSE', color='black', linestyle='--', linewidth=1)
    plt.xlabel('window length')
    plt.ylabel('MSE')
    plt.legend(fontsize='small', loc='upper center', bbox_to_anchor=(0.5, 1.1), ncol=5, fancybox=True,
               edgecolor='black')
    plt.xlim(0, 250)
    plt.yticks(np.arange(0, 0.61, 0.1))
    plt.ylim(0, 0.6)
    plt.show()
    """
    """
    # Variance in MSE window sizes
    var_mse_pearson = np.nanvar(mse_pearson_vec['MSE']); print('mse_pearson_var: %f' % var_mse_pearson)
    var_mse_kendall = np.nanvar(mse_kendall_vec['MSE']); print('mse_kendall_var: %f' % var_mse_kendall)

    # Max-min in MSE window sizes
    print('mse_pearson_min_max: (%f, %f)' % (np.nanmin(mse_pearson_vec['MSE']), np.nanmax(mse_pearson_vec['MSE'])))
    print('mse_kendall_min_max: (%f, %f)' % (np.nanmin(mse_kendall_vec['MSE']), np.nanmax(mse_kendall_vec['MSE'])))
    """

    ##################################################################################################################
    ###                         Minimum Determinant Pearson and Kendall Moving Window                              ###
    ##################################################################################################################
    # Get information on the minimum determinants over all corrlation estimates for all window sizes [3, 100]
    delta_t = range(3, 101)
    det_min_vec = np.full(101, np.nan)
    proxy_type = 'pearson'
    """
    for dt in delta_t:
        # Load data Pearson/ Kendall
        det_data_vec = np.full(501, np.nan)
        filename = '%s_%i_estimate_uncertainty.pkl' % (proxy_type, dt)
        data = mm.load_data('bivariate_analysis/results_%s/%s' % (proxy_type, filename))
        # Compute determinants for every dataset
        for i, rho in enumerate(data['Rho_estimate']):
            det_data_vec[i+1] = preprocesser.determinant_LU_factorization(rho, 2)
        det_min_vec[dt] = np.nanmin(det_data_vec)
    mm.save_data('bivariate_analysis/determinant_min_%s.pkl' % proxy_type, det_min_vec)
    """
    """
    # Plot minimum determinants of Pearson and Kendal Moving Window estimates of correlation
    det_min_pearson = mm.load_data('bivariate_analysis/determinant_min_pearson.pkl')
    det_min_kendall = mm.load_data('bivariate_analysis/determinant_min_kendall.pkl')
    plt.figure(1)
    plt.plot(det_min_pearson, label='Pearson', linewidth=1, color='orange')
    plt.plot(det_min_kendall, label='Kendall', linewidth=1)
    plt.xlabel('window length')
    plt.ylabel('minimum det($R_t)$')
    plt.legend(fontsize='small', loc='upper center', bbox_to_anchor=(0.5, 1.1), ncol=2, fancybox=True,
               edgecolor='black')
    plt.xlim(0, 100)
    plt.yticks(np.arange(-0.1, 1.1, 0.1))
    plt.ylim(-0.1, 1)
    plt.show()
    """

    ##################################################################################################################
    ###                                          Dataset creation                                                  ###
    ##################################################################################################################
    # Pearson and Kendall correlation moving window estimates as covariate and true correlation or moving window
    # estimate as proxy for output variable
    simulated_data_process = mm.load_data('/bivariate_analysis/correlated_sim_data.pkl')
    delta_t_min = 5
    delta_t_max = 6
    proxy_type = ['kendall']     # ['pearson', 'emw', 'kendall']
    """
    start_time = time.time()
    for dt, proxy_type in [(x,y) for x in range(delta_t_min, delta_t_max) for y in proxy_type]:
        print('(%i, %s)' % (dt, proxy_type))
        dataset, dataset_proxy = \
            preprocesser.generate_bivariate_dataset(ta, simulated_data_process, dt, proxy_type=proxy_type)
        mm.save_data('/bivariate_analysis/true_cor/%s/data/dataset_%s_%d.pkl' % (proxy_type, proxy_type, dt), dataset)
        mm.save_data('/bivariate_analysis/proxy_cor/%s/data/dataset_%s_%d.pkl' % (proxy_type, proxy_type, dt), dataset_proxy)
    print("%s: %f" % ('Execution time:', (time.time() - start_time)))
    """
    ##################################################################################################################
    ###    Estimation uncertainty in Pearson and Kendall correlation coefficient using machine learner estimates   ###
    ##################################################################################################################
    simulated_data_process = mm.load_data('/bivariate_analysis/correlated_sim_data.pkl')
    T = 500
    rho_true = simulated_data_process.tail(T).iloc[:, -1]
    rho_true.reset_index(drop=True, inplace=True)
    ciw = 99
    reps = 1000
    delta_t = [21]   # dt = {[3, 10], 21, 42, 63, 126, 251}  (and 84 possibly)
    model = ['knn']  # k-nearest neighbour: 'knn', random forest: 'rf'
    proxy_type = ['pearson', 'kendall']
    output_type = ['true', 'proxy']
    n_neighbours = [5]

    """
    for dt, proxy_type, model, k, output_type in [(x, y, z, k, o) for x in delta_t for y in proxy_type
                                     for z in model for k in n_neighbours for o in output_type]:
        start_time = time.time()
        print('(%i, %s, %s, %i)' % (dt, proxy_type, model, k))
        dataset = mm.load_data('bivariate_analysis/%s_cor/%s/data/dataset_mw_%i.pkl' % (output_type, proxy_type, dt))
        rho_estimates, lower_percentiles, upper_percentiles, sd_rho_estimates = \
        preprocesser.bootstrap_learner_estimate(data=dataset, reps=reps, model=model, n_neighbors=k)
        data_frame = pd.DataFrame({'Percentile_low': lower_percentiles, 'Percentile_up': upper_percentiles,
                                   'std rho estimate': sd_rho_estimates, 'Rho_estimate': rho_estimates})
        filename = '%s5_%s_%i_estimate_uncertainty_%s_corr.pkl' % (model, proxy_type, dt, output_type)
        mm.save_data('bivariate_analysis/%s_cor/%s/results_%s_%s_%s_cor/' % (output_type, proxy_type, model, proxy_type,
                                                                             output_type) + filename, data_frame)
        print("%s: %f" % ('Execution time', (time.time() - start_time)))
    """
    """
    # Figure with bootstrap uncertainty Nearest Neighbors
    for dt, proxy_type in [(x, y) for x in delta_t for y in proxy_type]:
        print('(%s, %i)' % (proxy_type, dt))
        data = mm.load_data('bivariate_analysis/proxy_cor/%s/results_knn_%s_proxy_cor/'
                            'knn5_%s_%i_estimate_uncertainty_proxy_corr.pkl' % (proxy_type, proxy_type, proxy_type, dt))
        rho_estimates = data['Rho_estimate']
        lower_percentiles = data['Percentile_low']
        upper_percentiles = data['Percentile_up']
        plt.figure()
        plt.plot(simulated_data_process['rho'], label='true correlation', linewidth=1, color='black')
        plt.plot(rho_estimates, label='KNN correlation', linewidth=1, color='red')
        plt.plot((upper_percentiles - lower_percentiles) - 1, label='%d%% interval (bootstrap)' % ciw,
                 linewidth=1, color='magenta')
        plt.xlabel('observation')
        plt.legend(fontsize='small', loc='upper center', bbox_to_anchor=(0.5, 1.1), ncol=3, fancybox=True,
                   edgecolor='black')
        plt.xlim(0, T)
        plt.yticks(np.arange(-1, 1.00000001, 0.2))
        plt.ylim(-1, 1)
        plt.show()
    """
    """
    # Figure with bootstrap uncertainty Random Forest
    for proxy_type, output_type in [(x, y) for x in proxy_type for y in output_type]:
        filename = 'rf10_%s_21_estimate_uncertainty_rep_1000_%s_corr.pkl' % (proxy_type, output_type)
        print(filename)
        data = mm.load_data('bivariate_analysis/%s_cor/%s/results_rf_%s_%s_cor/%s' % (output_type, proxy_type,
                                                                                      proxy_type, output_type, filename))
        rho_estimates = data['Rho_estimate']
        lower_percentiles = data['Percentile_low']
        upper_percentiles = data['Percentile_up']
        plt.figure(1)
        plt.plot(simulated_data_process['rho'], label='true correlation', linewidth=1, color='black')
        plt.plot(rho_estimates, label='RF correlation', linewidth=1, color='red')
        plt.plot((upper_percentiles - lower_percentiles) - 1, label='%d%% interval (bootstrap)' % ciw,
                 linewidth=1, color='magenta')
        #plt.plot(lower_percentiles, label='%d%% interval (bootstrap)' % ciw, linewidth=1, color='magenta')
        #plt.plot(upper_percentiles, label="", linewidth=1, color='magenta')
        plt.xlabel('observation')
        plt.legend(fontsize='small', loc='upper center', bbox_to_anchor=(0.5, 1.1), ncol=3, fancybox=True,
                   edgecolor='black')
        plt.xlim(0, T)
        plt.yticks(np.arange(-1, 1.1, 0.2))
        plt.ylim(-1, 1)
        plt.show()
     """

    ##################################################################################################################
    ###        Mean squared error of Pearson/Kendall correlation coefficient using machine learner estimates       ###
    ##################################################################################################################

    simulated_data_process = mm.load_data('/bivariate_analysis/correlated_sim_data.pkl')
    T = 500
    rho_true = simulated_data_process.tail(T).iloc[:, -1]
    rho_true.reset_index(drop=True, inplace=True)
    ciw = 99
    reps = 1000
    delta_t = [10]   #  range(3, 101)   # dt = {[3, 10], 21, 42, 63, 126, 251}  (and 84 possibly)
    model = ['rf']  # k-nearest neighbour: 'knn', random forest: 'rf'
    proxy_type = ['pearson']
    output_type = ['true']
    n_neighbour = [10, 100, 300, 600, 1000]  # 5, 10, 25, 50, 100, len_train, IDW
    rho_bias_squared = np.full(1001, np.nan)
    rho_var_vec = np.full(1001, np.nan)
    rho_mse_vec = np.full(1001, np.nan)

    """
    # Create dataframe with (interpolated) mse results, squared bias, variance for varying window lengths
    for model, n_neighbour, proxy_type, dt, output_type in [(w, k, x, y, z) for w in model for k in n_neighbour for
                                                            x in proxy_type for y in delta_t for z in output_type]:
        filename = '%s%i_%s_%i_estimate_uncertainty_rep_100_%s_corr.pkl' % (model, n_neighbour, proxy_type, dt, output_type)
        print(filename)
        data = mm.load_data('bivariate_analysis/%s_cor/%s/results_%s_%s_%s_cor/' % (output_type, proxy_type, model,
                                                                                    proxy_type, output_type) + filename)
        rho_estimates = data['Rho_estimate']
        rho_bias_squared[n_neighbour] = np.mean(np.power(rho_estimates-rho_true, 2))
        rho_var_vec[n_neighbour] = np.power(np.mean(data['std rho estimate']), 2)

    rho_mse_vec = np.array([np.sum(pair) for pair in zip(rho_bias_squared, rho_var_vec)])
    data_frame = pd.DataFrame({'bias_squared': rho_bias_squared, 'variance': rho_var_vec,
                               'MSE': rho_mse_vec})
    filename_save = 'mse_%s_%s_%s_cor_sensitivity_analysis_trees.pkl' % (model, proxy_type, output_type)
    print(filename_save)
    mm.save_data('bivariate_analysis/%s_cor/mse_results_%s_cor/' % (output_type, output_type) + filename_save, data_frame)
    """



    ## Load MSE data Pearson/ Kendall
    mse_pearson_vec = mm.load_data('bivariate_analysis/mse_pearson.pkl')
    mse_kendall_vec = mm.load_data('bivariate_analysis/mse_kendall.pkl')

    ## Load MSE data KNN
    # True Correlation
    mse_knn5_pearson_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_knn5_pearson_true_cor.pkl')
    mse_knn10_pearson_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_knn10_pearson_true_cor.pkl')
    mse_knn25_pearson_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_knn25_pearson_true_cor.pkl')
    mse_knn50_pearson_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_knn50_pearson_true_cor.pkl')
    mse_knn100_pearson_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_knn100_pearson_true_cor.pkl')
    mse_knn_len_train_pearson_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_knn_len_train_pearson_true_cor.pkl')
    mse_knn_IDW_pearson_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_knn_IDW_pearson_true_cor.pkl')

    mse_knn5_kendall_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_knn5_kendall_true_cor.pkl')
    mse_knn10_kendall_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_knn10_kendall_true_cor.pkl')
    mse_knn25_kendall_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_knn25_kendall_true_cor.pkl')
    mse_knn50_kendall_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_knn50_kendall_true_cor.pkl')
    mse_knn100_kendall_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_knn100_kendall_true_cor.pkl')
    mse_knn_len_train_kendall_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_knn_len_train_kendall_true_cor.pkl')
    mse_knn_IDW_kendall_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_knn_IDW_kendall_true_cor.pkl')

    # Proxy Correlation
    mse_knn5_pearson_proxy = mm.load_data('bivariate_analysis/proxy_cor/mse_results_proxy_cor/mse_knn5_pearson_proxy_cor.pkl')
    mse_knn10_pearson_proxy = mm.load_data('bivariate_analysis/proxy_cor/mse_results_proxy_cor/mse_knn10_pearson_proxy_cor.pkl')
    mse_knn25_pearson_proxy = mm.load_data('bivariate_analysis/proxy_cor/mse_results_proxy_cor/mse_knn25_pearson_proxy_cor.pkl')
    mse_knn50_pearson_proxy = mm.load_data('bivariate_analysis/proxy_cor/mse_results_proxy_cor/mse_knn50_pearson_proxy_cor.pkl')
    mse_knn100_pearson_proxy = mm.load_data('bivariate_analysis/proxy_cor/mse_results_proxy_cor/mse_knn100_pearson_proxy_cor.pkl')

    mse_knn_len_train_pearson_proxy = mm.load_data('bivariate_analysis/proxy_cor/mse_results_proxy_cor/mse_knn_len_train_pearson_proxy_cor.pkl')
    mse_knn_IDW_pearson_proxy = mm.load_data('bivariate_analysis/proxy_cor/mse_results_proxy_cor/mse_knn_IDW_pearson_proxy_cor.pkl')

    mse_knn5_kendall_proxy = mm.load_data('bivariate_analysis/proxy_cor/mse_results_proxy_cor/mse_knn5_kendall_proxy_cor.pkl')
    mse_knn_len_train_kendall_proxy = mm.load_data('bivariate_analysis/proxy_cor/mse_results_proxy_cor/mse_knn_len_train_kendall_proxy_cor.pkl')
    mse_knn_IDW_kendall_proxy = mm.load_data('bivariate_analysis/proxy_cor/mse_results_proxy_cor/mse_knn_IDW_kendall_proxy_cor.pkl')

    ## Load MSE data RF
    # True Correlation
    mse_rf10_pearson_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_rf10_pearson_true_cor.pkl')
    mse_rf100_pearson_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_rf100_pearson_true_cor.pkl')
    mse_rf300_pearson_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_rf300_pearson_true_cor.pkl')
    mse_rf1000_pearson_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_rf1000_pearson_true_cor.pkl')


    mse_rf10_kendall_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_rf10_kendall_true_cor.pkl')
    mse_rf100_kendall_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_rf100_kendall_true_cor.pkl')
    mse_rf300_kendall_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_rf300_kendall_true_cor.pkl')
    mse_rf1000_kendall_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_rf1000_kendall_true_cor.pkl')

    # Proxy Correlation
    mse_rf10_pearson_proxy = mm.load_data('bivariate_analysis/proxy_cor/mse_results_proxy_cor/mse_rf10_pearson_proxy_cor.pkl')

    mse_rf10_kendall_proxy = mm.load_data('bivariate_analysis/proxy_cor/mse_results_proxy_cor/mse_rf10_kendall_proxy_cor.pkl')



    # Figure without interpolation MSE
    """
    plt.figure(1)
    plt.plot(mse_pearson_vec['MSE'], label='Pearson', color='indigo', linewidth=1)
    #plt.plot(mse_kendall_vec['MSE'], label='Kendall', color='cyan', linestyle='--', linewidth=1)
    plt.plot(mse_knn5_pearson_proxy['MSE'], label='KNN(5)-Pearson', linewidth=1, color='brown')
    #plt.plot(mse_knn5_kendall_proxy['MSE'], label='KNN(5)-Kendall', linewidth=1, color='xkcd:azure')
    #plt.plot(mse_knn10_pearson_proxy['MSE'], label='KNN(10)', linewidth=1)
    #plt.plot(mse_knn25_pearson_proxy['MSE'], label='KNN(25)', linewidth=1)
    #plt.plot(mse_knn50_pearson_proxy['MSE'], label='KNN(50)', linewidth=1)
    plt.plot(mse_knn100_pearson_proxy['MSE'], label='KNN(100)', linewidth=1)
    plt.plot(mse_knn_IDW_pearson_proxy['MSE'], label='KNN(idw)-Pearson', color='black', linewidth=1)
    plt.plot(mse_rf10_pearson_proxy['MSE'], label='RF(10)', linewidth=1)
    #plt.plot(mse_knn_IDW_kendall_true['MSE'], label='KNN_kendall_idw', linewidth=1, color='xkcd:azure')
    #plt.plot(mse_knn_len_train_pearson_true['MSE'], label='KNN_pearson_len_train', linewidth=1)
    #plt.plot(mse_knn_len_train_pearson_proxy['MSE'], label='KNN_pearson_len_train', color='black', linewidth=1)
    #plt.plot(mse_knn_IDW_pearson_proxy['MSE'], label='KNN_pearson_IDW', color='black', linewidth=1)
    plt.xlabel('window length')
    plt.ylabel('MSE')
    plt.legend(fontsize='small', loc='upper center', bbox_to_anchor=(0.5, 1.1), ncol=7, fancybox=True,
               edgecolor='black')
    plt.xlim(0, 100)
    plt.yticks(np.arange(0, 0.61, 0.1))
    plt.ylim(0, 0.60)
    plt.show()
    """
    # Figure without interpolation MSE decomposition
    """
    plt.figure(2)
    plt.plot(mse_knn_IDW_kendall_true['bias_squared'], label='Squared Bias', color='blue', linewidth=1)
    plt.plot(mse_knn_IDW_kendall_true['variance'], label='Variance', color='red', linewidth=1)
    plt.plot(mse_knn_IDW_kendall_true['MSE'], label='MSE', color='black', linestyle='--', linewidth=1)
    plt.xlabel('window length')
    plt.ylabel('MSE')
    plt.legend(fontsize='small', loc='upper center', bbox_to_anchor=(0.5, 1.1), ncol=3, fancybox=True,
               edgecolor='black')
    plt.xlim(0, 100)
    plt.yticks(np.arange(0, 0.31, 0.02))
    plt.ylim(0, 0.2)
    plt.show()

    """
    # Figure with interpolation MSE decomposition sensitivity analysis
    """
    mse_knn_pearson_true_cor_sa = preprocesser.mse_knn_sensitivity_analysis()
    mm.save_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_knn_pearson_true_cor_sensitivity_analysis.pkl',
                 mse_knn_pearson_true_cor_sa)
    mse_knn_kendall_true_cor_sa = preprocesser.mse_knn_sensitivity_analysis(proxy_type='kendall')
    mm.save_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_knn_kendall_true_cor_sensitivity_analysis.pkl',
                 mse_knn_kendall_true_cor_sa)
    """
    """
    mse_knn_pearson_proxy_cor_sa = preprocesser.mse_knn_sensitivity_analysis(output_type='proxy')
    mm.save_data('bivariate_analysis/proxy_cor/mse_results_proxy_cor/mse_knn_pearson_proxy_cor_sensitivity_analysis.pkl',
                 mse_knn_pearson_proxy_cor_sa)
    mse_knn_kendall_proxy_cor_sa = preprocesser.mse_knn_sensitivity_analysis(proxy_type='kendall', output_type='proxy')
    mm.save_data('bivariate_analysis/proxy_cor/mse_results_proxy_cor/mse_knn_kendall_proxy_cor_sensitivity_analysis.pkl',
                 mse_knn_kendall_proxy_cor_sa)

    """
    """
    plt.figure(3)
    xs = np.arange(1001)
    s1mask = np.isfinite(mse_knn_pearson_proxy_cor_sa['bias_squared'])
    s2mask = np.isfinite(mse_knn_pearson_proxy_cor_sa['variance'])
    s3mask = np.isfinite(mse_knn_pearson_proxy_cor_sa['MSE'])
    plt.plot(xs[s1mask], mse_knn_pearson_proxy_cor_sa['bias_squared'][s1mask], label='Squared Bias', color='blue', linestyle='-', linewidth=1, marker='.')
    plt.plot(xs[s2mask], mse_knn_pearson_proxy_cor_sa['variance'][s2mask], label='Variance', color='red', linestyle='-', linewidth=1, marker='.')
    plt.plot(xs[s3mask], mse_knn_pearson_proxy_cor_sa['MSE'][s3mask], label='MSE', color='black', linestyle='--', linewidth=1, marker='.')

    plt.xlabel('number of neighbours')
    plt.ylabel('MSE')
    plt.legend(fontsize='small', loc='upper center', bbox_to_anchor=(0.5, 1.1), ncol=3, fancybox=True,
                  edgecolor='black')
    plt.xlim(0, 100)
    plt.xticks([5, 10, 25, 50, 100, 200, 300, 400, 500, 600, 700, 800, 900, 1000])
    plt.yticks(np.arange(0, 0.21, 0.02))
    plt.ylim(0, 0.2)
    plt.show()
    """
    """
    # Variance in MSE window sizes for KNN with Pearson/ Kendall as covariates.
    # True Correlation
    #var_mse_knn5_pearson_true = np.nanvar(mse_knn5_pearson_true['MSE']); print('mse_knn5_pearson_var: %.8f' % var_mse_knn5_pearson_true)
    #var_mse_knn5_kendall_true = np.nanvar(mse_knn5_kendall_true['MSE']); print('mse_knn5_kendall_var: %.8f' % var_mse_knn5_kendall_true)
    #var_mse_knn_len_train_pearson_true = np.nanvar(mse_knn_len_train_pearson_true['MSE']); print('mse_knn_len_train_pearson_var: %.13f' % var_mse_knn_len_train_pearson_true)
    #var_mse_knn_IDW_pearson_true = np.nanvar(mse_knn_IDW_pearson_true['MSE']); print('mse_knn_IDW_pearson_var: %.9f' % var_mse_knn_IDW_pearson_true)
    #var_mse_knn_len_train_kendall_true = np.nanvar(mse_knn_len_train_kendall_true['MSE']); print('mse_knn_len_train_pearson_var: %f' % var_mse_knn_len_train_kendall_true)
    #var_mse_knn_IDW_kendall_true = np.nanvar(mse_knn_IDW_kendall_true['MSE']); print('mse_knn_IDW_pearson_var: %f' % var_mse_knn_IDW_kendall_true)
    # Proxy Correlation
    #var_mse_knn5_pearson_proxy = np.nanvar(mse_knn5_pearson_proxy['MSE']); print('mse_knn5_pearson_proxy_var: %.6f' % var_mse_knn5_pearson_proxy)
    #var_mse_knn5_kendall_proxy = np.nanvar(mse_knn5_kendall_proxy['MSE']); print('mse_knn5_kendall_proxy_var: %.6f' % var_mse_knn5_kendall_proxy)
    #var_mse_knn_len_train_pearson_proxy = np.nanvar(mse_knn_len_train_pearson_proxy['MSE']); print('mse_knn_len_train_pearson_proxy_var: %.8f' % var_mse_knn_len_train_pearson_proxy)
    #var_mse_knn_len_train_kendall_proxy = np.nanvar(mse_knn_len_train_kendall_proxy['MSE']); print('mse_knn_len_train_kendall_proxy_var: %.9f' % var_mse_knn_len_train_kendall_proxy)
    #var_mse_knn_IDW_pearson_proxy = np.nanvar(mse_knn_IDW_pearson_proxy['MSE']); print('mse_knn_IDW_pearson_proxy_var: %.8f' % var_mse_knn_IDW_pearson_proxy)
    #var_mse_knn_IDW_kendall_proxy = np.nanvar(mse_knn_IDW_kendall_proxy['MSE']); print('mse_knn_IDW_kendall_proxy_var: %.8f' % var_mse_knn_IDW_kendall_proxy)

    # Max-min in MSE window sizes for KNN with Pearson/ Kendall as covariates.
    # True Correlation
    #print('mse_knn5_pearson_min_max: (%.4f, %.4f)' % (np.nanmin(mse_knn5_pearson_true['MSE']), np.nanmax(mse_knn5_pearson_true['MSE'])))
    #print('mse_knn5_kendall_min_max: (%.4f, %.4f)' % (np.nanmin(mse_knn5_kendall_true['MSE']), np.nanmax(mse_knn5_kendall_true['MSE'])))
    #print('mse_knn_len_train_pearson_min_max: (%.4f, %.4f)' % (np.nanmin(mse_knn_len_train_pearson_true['MSE']), np.nanmax(mse_knn_len_train_pearson_true['MSE'])))
    #print('mse_knn_IDW_pearson_min_max: (%.4f, %.4f)' % (np.nanmin(mse_knn_IDW_pearson_true['MSE']), np.nanmax(mse_knn_IDW_pearson_true['MSE'])))
    #print('mse_knn_len_train_kendall_min_max: (%.4f, %.4f)' % (np.nanmin(mse_knn_len_train_kendall_true['MSE']), np.nanmax(mse_knn_len_train_kendall_true['MSE'])))
    #print('mse_knn_IDW_kendall_min_max: (%.4f, %.4f)' % (np.nanmin(mse_knn_IDW_kendall_true['MSE']), np.nanmax(mse_knn_IDW_kendall_true['MSE'])))
    # Proxy Correlation
    #print('mse_knn5_pearson_proxy_min_max: (%.4f, %.4f)' % (np.nanmin(mse_knn5_pearson_proxy['MSE']), np.nanmax(mse_knn5_pearson_proxy['MSE'])))
    #print('mse_knn5_kendall_proxy_min_max: (%.4f, %.4f)' % (np.nanmin(mse_knn5_kendall_proxy['MSE']), np.nanmax(mse_knn5_kendall_proxy['MSE'])))
    #print('mse_knn_len_train_pearson_proxy_min_max: (%.4f, %.4f)' % (np.nanmin(mse_knn_len_train_pearson_proxy['MSE']), np.nanmax(mse_knn_len_train_pearson_proxy['MSE'])))
    #print('mse_knn_len_train_kendall_proxy_min_max: (%.4f, %.4f)' % (np.nanmin(mse_knn_len_train_kendall_proxy['MSE']), np.nanmax(mse_knn_len_train_kendall_proxy['MSE'])))
    #print('mse_knn_IDW_pearson_proxy_min_max: (%.4f, %.4f)' % (np.nanmin(mse_knn_IDW_pearson_proxy['MSE']), np.nanmax(mse_knn_IDW_pearson_proxy['MSE'])))
    #print('mse_knn_IDW_kendall_proxy_min_max: (%.4f, %.4f)' % (np.nanmin(mse_knn_IDW_kendall_proxy['MSE']), np.nanmax(mse_knn_IDW_kendall_proxy['MSE'])))
    """
    """
    # Variance in MSE window sizes for RF with Pearson/ Kendall as covariates.
    # True Correlation
    #var_mse_rf10_pearson_true = np.nanvar(mse_rf10_pearson_true['MSE']); print('var_mse_rf10_pearson_true: %.8f' % var_mse_rf10_pearson_true)
    #var_mse_rf10_kendall_true = np.nanvar(mse_rf10_kendall_true['MSE']); print('var_mse_rf10_kendall_true: %.8f' % var_mse_rf10_kendall_true)
    # Proxy Correlation
    var_mse_rf10_pearson_proxy = np.nanvar(mse_rf10_pearson_proxy['MSE']); print('var_mse_rf10_pearson_proxy: %.6f' % var_mse_rf10_pearson_proxy)
    var_mse_rf10_kendall_proxy = np.nanvar(mse_rf10_kendall_proxy['MSE']); print('var_mse_rf10_kendall_proxy: %.6f' % var_mse_rf10_kendall_proxy)

    # Max-min in MSE window sizes for RF with Pearson/ Kendall as covariates.
    # True Correlation
    #print('mse_rf10_pearson_min_max: (%.4f, %.4f)' % (np.nanmin(mse_rf10_pearson_true['MSE']), np.nanmax(mse_rf10_pearson_true['MSE'])))
    #print('mse_rf10_kendall_min_max: (%.4f, %.4f)' % (np.nanmin(mse_rf10_kendall_true['MSE']), np.nanmax(mse_rf10_kendall_true['MSE'])))
    
    # Proxy Correlation
    print('mse_rf10_pearson_proxy_min_max: (%.4f, %.4f)' % (np.nanmin(mse_rf10_pearson_proxy['MSE']), np.nanmax(mse_rf10_pearson_proxy['MSE'])))
    print('mse_rf10_kendall_proxy_min_max: (%.4f, %.4f)' % (np.nanmin(mse_rf10_kendall_proxy['MSE']), np.nanmax(mse_rf10_kendall_proxy['MSE'])))
    """
    """
    # Figure without interpolation MSE
    plt.figure(4)
    plt.plot(mse_knn10_pearson_proxy['MSE'], label='KNN(10)-Pearson', linewidth=1)
    plt.plot(mse_pearson_vec['MSE'], label='Pearson', color='indigo', linewidth=1)
    #plt.plot(mse_kendall_vec['MSE'], label='Kendall', color='cyan', linestyle='--', linewidth=1)
    plt.plot(mse_knn_IDW_pearson_proxy['MSE'], label='KNN(idw)-Pearson', color='black', linewidth=1)
    plt.plot(mse_knn100_pearson_proxy['MSE'], label='KNN(100)-Pearson', color='red', linewidth=1)
    plt.plot(mse_rf10_pearson_proxy['MSE'], label='RF(10)-Pearson', color='goldenrod', linewidth=1)
    #plt.plot(mse_rf10_kendall_proxy['MSE'], label='RF(10)-Kendall', color='xkcd:teal', linewidth=1)
    plt.xlabel('window length')
    plt.ylabel('MSE')
    plt.legend(fontsize='small', loc='upper center', bbox_to_anchor=(0.5, 1.13), ncol=3, fancybox=True,
               edgecolor='black')
    plt.xlim(0, 100)
    plt.yticks(np.arange(0, 0.61, 0.1))
    plt.ylim(0, 0.6)
    plt.show()
    """

    # Figure without interpolation MSE decomposition
    """
    mse_dt_pearson_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_dt_pearson_true_cor.pkl')
    mse_rf10_2_pearson_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_rf10_2_pearson_true_cor.pkl')
    mse_rf10_3_pearson_true = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_rf10_3_pearson_true_cor.pkl')
    
    plt.figure(5)
    plt.plot(mse_rf10_kendall_proxy['bias_squared'], label='Squared Bias', color='blue', linewidth=1)
    plt.plot(mse_rf10_kendall_proxy['variance'], label='Variance', color='red', linewidth=1)
    plt.plot(mse_rf10_kendall_proxy['MSE'], label='MSE', color='black', linestyle='--', linewidth=1)
    #plt.plot(mse_dt_pearson_true, label='dt_squared_bias', linewidth=1)
    plt.xlabel('window length')
    plt.ylabel('MSE')
    plt.legend(fontsize='small', loc='upper center', bbox_to_anchor=(0.5, 1.1), ncol=3, fancybox=True,
               edgecolor='black')
    plt.xlim(0, 100)
    plt.yticks(np.arange(0, 0.61, 0.02))
    plt.ylim(0, 0.3)
    plt.show()
    """
    """
    # Figure with interpolation MSE decomposition sensitivity analysis number of covariates
    mse_rf_pearson_true_cor_sa = mm.load_data('bivariate_analysis/true_cor/mse_results_true_cor/mse_rf300_1_to_3_pearson_true_cor.pkl')
    plt.figure(3)
    xs = np.arange(4)
    s1mask = np.isfinite(mse_rf_pearson_true_cor_sa['bias_squared'])
    s2mask = np.isfinite(mse_rf_pearson_true_cor_sa['variance'])
    s3mask = np.isfinite(mse_rf_pearson_true_cor_sa['MSE'])
    plt.plot(xs[s1mask], mse_rf_pearson_true_cor_sa['bias_squared'][s1mask], label='Squared Bias', color='blue', linestyle='-', linewidth=1, marker='.')
    plt.plot(xs[s2mask], mse_rf_pearson_true_cor_sa['variance'][s2mask], label='Variance', color='red', linestyle='-', linewidth=1, marker='.')
    plt.plot(xs[s3mask], mse_rf_pearson_true_cor_sa['MSE'][s3mask], label='MSE', color='black', linestyle='--', linewidth=1, marker='.')

    plt.xlabel('number of covariates')
    plt.ylabel('MSE')
    plt.legend(fontsize='small', loc='upper center', bbox_to_anchor=(0.5, 1.1), ncol=3, fancybox=True,
                  edgecolor='black')
    plt.xlim(0, 3)
    plt.xticks([0, 1, 2, 3])
    plt.yticks(np.arange(0, 0.21, 0.02))
    plt.ylim(0, 0.2)
    plt.show()
    """
    """
    # Figure with interpolation MSE decomposition sensitivity analysis number of trees
    mse_rf_pearson_true_cor_sa_trees = preprocesser.mse_rf_sensitivity_analysis(rho_true=rho_true)
    mse_rf_kendall_true_cor_sa_trees = preprocesser.mse_rf_sensitivity_analysis(
        rho_true=rho_true, proxy_type='kendall', output_type='true', type='trees')

    mse_rf_pearson_proxy_cor_sa_trees = preprocesser.mse_rf_sensitivity_analysis(rho_true=rho_true, output_type='proxy')
    mse_rf_kendall_proxy_cor_sa_trees = preprocesser.mse_rf_sensitivity_analysis(
        rho_true=rho_true, proxy_type='kendall', output_type='proxy', type='trees')
    plt.figure(4)
    xs = np.arange(1001)
    s1mask = np.isfinite(mse_rf_kendall_true_cor_sa_trees['bias_squared'])
    s2mask = np.isfinite(mse_rf_kendall_true_cor_sa_trees['variance'])
    s3mask = np.isfinite(mse_rf_kendall_true_cor_sa_trees['MSE'])
    plt.plot(xs[s1mask], mse_rf_kendall_true_cor_sa_trees['bias_squared'][s1mask], label='Squared Bias', color='blue',
             linestyle='-', linewidth=1, marker='.')
    plt.plot(xs[s2mask], mse_rf_pearson_true_cor_sa_trees['variance'][s2mask], label='Variance', color='red', linestyle='-',
             linewidth=1, marker='.')
    plt.plot(xs[s3mask], mse_rf_pearson_true_cor_sa_trees['MSE'][s3mask], label='MSE', color='black', linestyle='--',
             linewidth=1, marker='.')
    plt.xlabel('number of estimators')
    plt.ylabel('MSE')
    plt.legend(fontsize='small', loc='upper center', bbox_to_anchor=(0.5, 1.1), ncol=3, fancybox=True,
               edgecolor='black')
    plt.xlim(0, 1000)
    plt.xticks([10, 100, 300, 600, 1000])
    plt.yticks(np.arange(0, 0.21, 0.02))
    plt.ylim(0, 0.2)
    plt.show()
    """
    ##################################################################################################################
    ###                                   Minimum Determinant Learning Algorithms                                       ###
    ##################################################################################################################
    # Rho_estimate
    # Get information on the minimum determinants over all correlation estimates for all window sizes [3, 100]
    delta_t = range(3, 101)
    det_min_vec = np.full(101, np.nan)
    proxy_type = 'pearson'
    output_type = 'true'
    learner = 'rf'

    """                    
    for dt in delta_t:
        # Load data Pearson/ Kendall
        det_data_vec = np.full(501, np.nan)
        filename = '%s10_%s_%i_estimate_uncertainty_rep_1000_%s_corr.pkl' % (learner, proxy_type, dt, output_type)
        print(filename)
        data = mm.load_data('bivariate_analysis/%s_cor/%s/results_%s_%s_%s_cor/%s'
                            % (output_type, proxy_type, learner, proxy_type, output_type, filename))
        # Compute determinants for every dataset
        for i, rho in enumerate(data['Rho_estimate']):
            det_data_vec[i+1] = preprocesser.determinant_LU_factorization(rho, 2)
        det_min_vec[dt] = np.nanmin(det_data_vec)
    filename_save = 'determinant_min_%s10_%s_%s_cor.pkl' % (learner, proxy_type, output_type)
    mm.save_data('bivariate_analysis/%s_cor/det_results_%s_cor/%s' % (output_type, output_type, filename_save), det_min_vec)
    """

    # Plot minimum determinants of KNN estimates of correlation
    # True Cor
    det_min_knn5_pearson = mm.load_data('bivariate_analysis/true_cor/det_results_true_cor/determinant_min_knn5_pearson_true_cor.pkl')
    det_min_knn5_kendall = mm.load_data('bivariate_analysis/true_cor/det_results_true_cor/determinant_min_knn5_kendall_true_cor.pkl')
    det_min_knn_len_train_pearson = mm.load_data('bivariate_analysis/true_cor/det_results_true_cor/determinant_min_knn_len_train_pearson_true_cor.pkl')
    det_min_knn_len_train_kendall = mm.load_data('bivariate_analysis/true_cor/det_results_true_cor/determinant_min_knn_len_train_kendall_true_cor.pkl')
    det_min_knn_IDW_pearson = mm.load_data('bivariate_analysis/true_cor/det_results_true_cor/determinant_min_knn_IDW_pearson_true_cor.pkl')
    det_min_knn_IDW_kendall = mm.load_data('bivariate_analysis/true_cor/det_results_true_cor/determinant_min_knn_IDW_kendall_true_cor.pkl')
    # Proxy Cor
    det_min_knn5_pearson_proxy = mm.load_data('bivariate_analysis/proxy_cor/det_results_proxy_cor/determinant_min_knn5_pearson_proxy_cor.pkl')
    det_min_knn5_kendall_proxy = mm.load_data('bivariate_analysis/proxy_cor/det_results_proxy_cor/determinant_min_knn5_kendall_proxy_cor.pkl')
    det_min_knn_len_train_pearson_proxy = mm.load_data('bivariate_analysis/proxy_cor/det_results_proxy_cor/determinant_min_knn_len_train_pearson_proxy_cor.pkl')
    det_min_knn_len_train_kendall_proxy = mm.load_data('bivariate_analysis/proxy_cor/det_results_proxy_cor/determinant_min_knn_len_train_kendall_proxy_cor.pkl')
    det_min_knn_IDW_pearson_proxy = mm.load_data('bivariate_analysis/proxy_cor/det_results_proxy_cor/determinant_min_knn_IDW_pearson_proxy_cor.pkl')
    det_min_knn_IDW_kendall_proxy = mm.load_data('bivariate_analysis/proxy_cor/det_results_proxy_cor/determinant_min_knn_IDW_kendall_proxy_cor.pkl')

    """
    plt.figure(1)
    plt.plot(det_min_knn_IDW_pearson_proxy, label='KNN(idw)-Pearson', linewidth=1, color='orange')
    plt.plot(det_min_knn_IDW_kendall_proxy, label='KNN(idw)-Kendall', linewidth=1)
    plt.plot(det_min_knn_len_train_pearson_proxy, label='KNN(unif)-Pearson', linewidth=1)
    plt.plot(det_min_knn_len_train_kendall_proxy, label='KNN(unif)-Kendall', linewidth=1)
    plt.xlabel('window length')
    plt.ylabel('minimum det($R_t)$')
    plt.legend(fontsize='small', loc='upper center', bbox_to_anchor=(0.5, 1.1), ncol=2, fancybox=True,
               edgecolor='black')
    plt.xlim(0, 100)
    plt.yticks(np.arange(-0.1, 1.1, 0.1))
    plt.ylim(-0.1, 1)
    plt.show()
    """
    """