예제 #1
0
def compute_and_save_cov_hat_vs_sample_size(sample_size_from: int,
                                            sample_size_to: int,
                                            sample_size_by: int,
                                            replication_count: int,
                                            mean: int,
                                            sigma: int,
                                            noise_type: str,
                                            lag: int,
                                            sample_type: str = "ma1"):
    par_list = {
        "sample_size_from": sample_size_from,
        "sample_size_to": sample_size_to,
        "sample_size_by": sample_size_by,
        "replication_count": replication_count,
        "mean": mean,
        "sigma": sigma,
        "noise_type": noise_type,
        "lag": lag,
        "sample_type": sample_type
    }

    sample_size_array = np.arange(start=sample_size_from,
                                  stop=sample_size_to,
                                  step=sample_size_by)
    cov_hat_t_free_array = np.full(shape=(replication_count,
                                          len(sample_size_array)),
                                   fill_value=np.nan)

    if sample_type == "ma1":
        true_cov_array = np.repeat(true_cov_ma1_t_free(lag=lag, sigma=sigma),
                                   len(sample_size_array))
    elif sample_type == "ma3":
        true_cov_array = np.repeat(true_cov_ma3_t_free(lag=lag, sigma=sigma),
                                   len(sample_size_array))

    for index_col, sample_size in enumerate(sample_size_array):
        for replication in range(replication_count):
            if sample_type == "ma1":
                sample = diagonal_sample_tvma1(sample_size=sample_size,
                                               mean=mean,
                                               sigma=sigma,
                                               noise_type=noise_type)
            elif sample_type == "ma3":
                sample = diagonal_sample_tvma3(sample_size=sample_size,
                                               mean=mean,
                                               sigma=sigma,
                                               noise_type=noise_type)
            cov_hat_t_free_array[replication,
                                 index_col] = cov_hat_t_free(sample=sample,
                                                             lag=lag)

    plot_double_array(x_array=sample_size_array,
                      hat_double_array=cov_hat_t_free_array,
                      true_array=true_cov_array,
                      title="Cov hat t free by sample size",
                      x_label="sample size",
                      par_list=par_list,
                      axis='row',
                      true_label="true autocovariance t free",
                      y_label='autocovariance')
예제 #2
0
def compute_and_save_v_vs_nw(sample_size,
                             t_par_count,
                             mean,
                             sigma,
                             noise_type,
                             sample_type="ma1"):
    """
    This function is for visual comparison of v double summation formula with regular NW.
    It saves a single image with two curves that must coincide. And they currently do coincide.
    """
    par_list = {
        "sample_size": sample_size,
        "t_par_count": t_par_count,
        "mean": mean,
        "sigma": sigma,
        "noise_type": noise_type,
        "sample_type": sample_type
    }

    t_par_array = create_t_par_array(t_par_count=t_par_count)

    if sample_type == "ma1":
        true_lrv_array = true_lrv_ma1_of_t(sigma=sigma,
                                           t_par_array=t_par_array)
        sample = diagonal_sample_tvma1(sample_size=sample_size,
                                       mean=mean,
                                       sigma=sigma,
                                       noise_type=noise_type)
    elif sample_type == "ma3":
        true_lrv_array = true_lrv_ma3_of_t(sigma=sigma,
                                           t_par_array=t_par_array)
        sample = diagonal_sample_tvma3(sample_size=sample_size,
                                       mean=mean,
                                       sigma=sigma,
                                       noise_type=noise_type)

    max_lag = int(support_bound(sample_size=sample_size)) + 1

    cov_double_array = cov_double_array_of_t(sample=sample,
                                             t_par_count=t_par_count,
                                             max_lag=max_lag)
    original_nw_array = lrv_hat_nw_of_t(cov_double_array=cov_double_array,
                                        sample_size=sample_size)

    double_sum_nw_array = lrv_hat_nw_2_of_t(sample=sample,
                                            t_par_array=t_par_array)

    arrays_dict = {
        "Double sum Newey-West": double_sum_nw_array,
        "Original Newey-West": original_nw_array
    }

    plot_arrays(x_array=t_par_array,
                arrays_dict=arrays_dict,
                true_array=true_lrv_array,
                title='Double sum vs original Newey-West',
                x_label='t par',
                par_list=par_list)
예제 #3
0
def compute_and_save_nw_single_n(sample_size: int,
                                 t_par_count: int,
                                 mean: float,
                                 sigma: float,
                                 noise_type: str,
                                 replication_count: int,
                                 sample_type: str = "ma1") -> np.array:
    """
    This function computes r (replication_count) arrays of t-dependent NW estimates,
    first generating r samples of a given fixed n.
    Then it saves a sinle image to output directory.
    :param cov_double_array: covariance double array
    :return: array of newey west
    """
    par_list = {
        "sample_size": sample_size,
        "t_par_count": t_par_count,
        "mean": mean,
        "sigma": sigma,
        "noise_type": noise_type,
        "replication_count": replication_count,
        "sample_type": sample_type
    }

    t_par_array = create_t_par_array(t_par_count=t_par_count)
    if sample_type == "ma1":
        true_lrv_array = true_lrv_ma1_of_t(sigma=sigma, t_par_array=t_par_array)
    elif sample_type == "ma3":
        true_lrv_array = true_lrv_ma3_of_t(sigma=sigma, t_par_array=t_par_array)

    nw_hat_double_array = np.full(shape=(t_par_count, replication_count),
                                         fill_value=np.nan)
    max_lag = int(support_bound(sample_size=sample_size)) + 1

    for r in range(replication_count):
        if sample_type == "ma1":
            sample = diagonal_sample_tvma1(sample_size=sample_size, mean=mean,
                                           sigma=sigma, noise_type=noise_type)
        elif sample_type == "ma3":
            sample = diagonal_sample_tvma3(sample_size=sample_size, mean=mean,
                                           sigma=sigma, noise_type=noise_type)

        cov_double_array = cov_double_array_of_t(sample=sample,
                                                 t_par_count=t_par_count,
                                                 max_lag=max_lag)
        nw_hat_double_array[:, r] = lrv_hat_nw_of_t(
            cov_double_array=cov_double_array,
            sample_size=sample_size)

    plot_double_array(x_array=t_par_array,
                      hat_double_array=nw_hat_double_array,
                      true_array=true_lrv_array,
                      title=" Newey-West vs true lrv",
                      x_label="t par",
                      par_list=par_list)

    return nw_hat_double_array
예제 #4
0
def compute_and_save_acf(sample_size: int,
                         mean: int,
                         sigma: int,
                         noise_type: str,
                         sd_type: str,
                         sample_type: str="ma1"):
    """
    It saves to output directory, the sample autocovariance function for several lags
    We generate the sample, given the default process, currently TVMA(1).
    If later we change the default process, we would need to change the call
    diagonal_sample_tvma1 to something else.
    Currently, max lag is the one for threshold.
    Later, if need arises, we may introduce the max_lag argument.
    """
    par_list = {"sample_size": sample_size,
                "mean": mean,
                "sigma": sigma,
                "noise_type": noise_type,
                "sd_type": sd_type,
                "sample_type": sample_type}

    if sample_type == "ma1":
        sample = diagonal_sample_tvma1(sample_size=sample_size,
                                       mean=mean,
                                       sigma=sigma,
                                       noise_type=noise_type)
    elif sample_type == "ma3":
        sample = diagonal_sample_tvma3(sample_size=sample_size,
                                       mean=mean,
                                       sigma=sigma,
                                       noise_type=noise_type)
    max_lag = threshold_max_lag(sample_size=sample_size)
    cov_hat = cov_column_t_free(sample=sample,
                                max_lag=max_lag)
    sd_cov_hat_array = np.full(shape=max_lag, fill_value=np.nan)
    for lag in range(max_lag):
        sd_cov_hat_array[lag] = sd_cov_hat(sample_size=sample_size,
                                           lag=lag,
                                           noise_type=noise_type,
                                           sd_type=sd_type,
                                           sample_type=sample_type)
    cloud = sd_cov_hat_array * zhou_treshold(sample_size=sample_size)

    plot_acf(cov_hat=cov_hat,
             cloud=cloud,
             par_list=par_list)
def compute_and_save_cov_hat_hist(sample_size: int,
                                  replication_count: int,
                                  mean: int,
                                  sigma: int,
                                  noise_type: str,
                                  lag: int,
                                  sample_type: str = "ma1"):
    par_list = {
        "sample_size": sample_size,
        "replication_count": replication_count,
        "mean": mean,
        "sigma": sigma,
        "noise_type": noise_type,
        "lag": lag,
        "sample_type": sample_type
    }

    cov_hat_t_free_array = np.full(shape=replication_count, fill_value=np.nan)

    if sample_type == "ma1":
        true_cov = true_cov_ma1_t_free(lag=lag, sigma=sigma)
    elif sample_type == "ma3":
        true_cov = true_cov_ma3_t_free(lag=lag, sigma=sigma)

    for replication in range(replication_count):
        if sample_type == "ma1":
            sample = diagonal_sample_tvma1(sample_size=sample_size,
                                           mean=mean,
                                           sigma=sigma,
                                           noise_type=noise_type)
        elif sample_type == "ma3":
            sample = diagonal_sample_tvma3(sample_size=sample_size,
                                           mean=mean,
                                           sigma=sigma,
                                           noise_type=noise_type)
        cov_hat_t_free_array[replication] = cov_hat_t_free(sample=sample,
                                                           lag=lag)

    arrays_dict = {"Autocovariance": cov_hat_t_free_array}

    plot_histograms(arrays_dict=arrays_dict,
                    true_value=true_cov,
                    title="Autocovariance",
                    par_list=par_list,
                    true_label="true autocovariance")
예제 #6
0
def semi_bootstrap(sample_size: int,
                   lag: int,
                   mean: float,
                   sigma: float,
                   noise_type: str,
                   sample_type: str = "ma1"):
    """
    Generates a sample and computes a block estimate for var(covHat). 
    :param sample_size: size of a sample to be generated. 
    :param lag: lag of autocovariance, whose variance should be generated. 
    :param mean: mean for the noise whose sample should be generated. 
    :param sigma: sigma for the noise whose sample should be generated. 
    :param noise_type: type for the noise whose sample should be generated.
    :param sample_type: type of the sample that should be generated.
    :return: semi_bootstrap_value, a block estimate value. 
    """
    batch_size_value = batch_size(sample_size=sample_size)
    if sample_type == "ma1":
        sample = diagonal_sample_tvma1(sample_size=sample_size,
                                       mean=mean,
                                       sigma=sigma,
                                       noise_type=noise_type)
    elif sample_type == "ma3":
        sample = diagonal_sample_tvma3(sample_size=sample_size,
                                       mean=mean,
                                       sigma=sigma,
                                       noise_type=noise_type)
    paired_product_array = paired_products(sample=sample, lag=lag)
    block_sum_array = block_sums(paired_product_array=paired_product_array)

    cum_sum = 0

    for index in range(len(block_sum_array) - batch_size_value):
        cum_sum += ((block_sum_array[index] -
                     block_sum_array[index + batch_size_value]) /
                    np.sqrt(2 * batch_size_value))**2

    semi_bootstrap_value = cum_sum / sample_size

    return semi_bootstrap_value
예제 #7
0
def compute_and_save_cov_and_cov_hats(sample_size,
                                      t_par_count,
                                      gamma_count,
                                      mean,
                                      sigma,
                                      lag,
                                      noise_type,
                                      diag_or_horiz,
                                      sample_type: str = "ma1"):
    """
    For all values of t, this function computes the true covariance and its estimates
    using K(t).
    Saves image image file. No CSV.
    If diag_or_horiz is horiz, we generate the 2d horizontal sample,
    and for each line we compute cov hats using the non-kernel formula.
    """
    par_list = {"sample_size": sample_size,
                "t_par_count": t_par_count,
                "gamma_count": gamma_count,
                "mean": mean,
                "sigma": sigma,
                "lag": lag,
                "sample_type": sample_type,
                "noise_type": noise_type,
                "diag_or_horiz": diag_or_horiz}

    t_par_array = create_t_par_array(t_par_count=t_par_count)

    true_gamma_array = np.full(shape=t_par_count, fill_value=np.nan)

    gamma_hat_double_array = np.full(shape=(t_par_count, gamma_count),
                                     fill_value=np.nan)

    if sample_type == "ma1":
        for t_index in range(t_par_count):
            true_gamma_array[t_index] = true_cov_ma1_of_t(t_par=t_par_array[t_index],
                                                          sigma=sigma,
                                                          lag=lag)
    elif sample_type == "ma3":
        for t_index in range(t_par_count):
            true_gamma_array[t_index] = true_cov_ma3_of_t(t_par=t_par_array[t_index],
                                                          sigma=sigma,
                                                          lag=lag)
    """
    For each index, generate a sample (later called replication)
    and compute gamma.
    """
    for index in range(gamma_count):
        if sample_type == "scaled_noise":
            if diag_or_horiz == "diag":
                sample = diagonal_sample_scaled_noise(
                    sample_size=sample_size,
                    mean=mean,
                    sigma=sigma,
                    noise_type=noise_type)
            elif diag_or_horiz == "horiz":
                horizontal = horizontal_sample_scaled_noise(
                    sample_size=sample_size,
                    t_par_count=t_par_count,
                    mean=mean,
                    sigma=sigma,
                    noise_type=noise_type)
        elif sample_type == "ma1":
            if diag_or_horiz == "diag":
                sample = diagonal_sample_tvma1(
                    sample_size=sample_size,
                    mean=mean,
                    sigma=sigma,
                    noise_type=noise_type)
            elif diag_or_horiz == "horiz":
                horizontal = horizontal_sample_tvma1(
                    sample_size=sample_size,
                    t_par_count=t_par_count,
                    mean=mean,
                    sigma=sigma,
                    noise_type=noise_type)
        elif sample_type == "ma3":
            if diag_or_horiz == "diag":
                sample = diagonal_sample_tvma3(
                    sample_size=sample_size,
                    mean=mean,
                    sigma=sigma,
                    noise_type=noise_type)
            elif diag_or_horiz == "horiz":
                horizontal = horizontal_sample_tvma3(
                    sample_size=sample_size,
                    t_par_count=t_par_count,
                    mean=mean,
                    sigma=sigma,
                    noise_type=noise_type)
        for t_index in range(t_par_count):
            if diag_or_horiz == "horiz":
                sample = horizontal[t_index]
                gamma_hat_double_array[t_index, index] = cov_hat_t_free(
                    sample=sample,
                    lag=lag)
            elif diag_or_horiz == "diag":
                gamma_hat_double_array[t_index, index] = cov_hat_of_t(
                    sample=sample,
                    t_par=t_par_array[t_index],
                    lag=lag)

        print("There are", gamma_count - (index + 1), "replications left")

    plot_double_array(x_array=t_par_array,
                      hat_double_array=gamma_hat_double_array,
                      true_array=true_gamma_array,
                      title='Autocovariance',
                      axis='column',
                      x_label='t par',
                      par_list=par_list)
def compute_and_save_threshold_nw_t_free(sample_size: int,
                                         replication_count: int,
                                         mean: int,
                                         sigma: int,
                                         noise_type: str,
                                         sd_type: str,
                                         lrv_est: str,
                                         sample_type: str = "ma1"):
    """
    Illustrated in
    402 LRV 3a / computing 2 / project 2 / Threshold / M: threshold t free
    Saves a single image file with
    histogram of replicated NW estimate threshold or both.
    True value is marked on all histograms.
    """
    par_list = {
        "sample_size": sample_size,
        "replication_count": replication_count,
        "mean": mean,
        "sigma": sigma,
        "sd_type": sd_type,
        "noise_type": noise_type,
        "sample_type": sample_type
    }

    threshold_t_free_array = np.full(shape=replication_count,
                                     fill_value=np.nan)
    nw_t_free_array = np.full(shape=replication_count, fill_value=np.nan)

    if sample_type == "ma1":
        true_lrv = true_lrv_ma1_t_free(sigma=sigma)
    elif sample_type == "ma3":
        true_lrv = true_lrv_ma3_t_free(sigma=sigma)

    support_bound_value = int(support_bound(sample_size=sample_size)) + 1
    threshold_max_lag_value = threshold_max_lag(sample_size=sample_size)

    max_lag = max(support_bound_value, threshold_max_lag_value)

    for replication in range(replication_count):
        if sample_type == "ma1":
            sample = diagonal_sample_tvma1(sample_size=sample_size,
                                           mean=mean,
                                           sigma=sigma,
                                           noise_type=noise_type)
        elif sample_type == "ma3":
            sample = diagonal_sample_tvma3(sample_size=sample_size,
                                           mean=mean,
                                           sigma=sigma,
                                           noise_type=noise_type)
        cov_column = cov_column_t_free(sample=sample, max_lag=max_lag)
        if lrv_est == "threshold" or lrv_est == "both":
            threshold_t_free_array[replication] = lrv_hat_threshold_t_free(
                cov_hat_column=cov_column[:threshold_max_lag_value],
                sample_size=sample_size,
                noise_type=noise_type,
                sd_type=sd_type,
                sample_type=sample_type)
        if lrv_est == "nw" or lrv_est == "both":
            nw_t_free_array[replication] = lrv_hat_nw_t_free(
                cov_column=cov_column[:support_bound_value],
                sample_size=sample_size)
        print("compute_and_save_threshold_nw_t_free",
              replication_count - (replication + 1), "left")

    arrays_dict = {
        "Newey-West": nw_t_free_array,
        "Threshold": threshold_t_free_array
    }

    plot_histograms(arrays_dict=arrays_dict,
                    true_value=true_lrv,
                    title="{} t free lrv".format(lrv_est),
                    par_list=par_list)
예제 #9
0
def compute_and_save_var_cov_hat_native_matrix(
        replication_count: int,
        sample_size_array: np.array,
        mean: int,
        sigma: int,
        noise_type: str,
        is_data: bool = False,
        fix_number_of_lags: int = None,
        sample_type: str = "ma1") -> np.array:
    """
    N: Returns and saves to CSV the matrix of variance values of covHat
    for different lags and sample sizes.
    The values currently do not match theoretical ones.
    But I think, it is in a minor way, and the values are usable.

    :param replication_count:
    :param sample_size_array:
    :param mean:
    :param sigma:
    :param noise_type: type of noise 'gaussian' or 'bernoulli'
    :param is_data: save in "data" folder or in "output" folder
    :param fix_number_of_lags: how many lags should use
    :param sample_type: 'ma1' or 'ma3'
    :return:
    """

    if fix_number_of_lags:
        date = '_{}_{}_lags'.format(noise_type, fix_number_of_lags)
    else:
        date = '_{}'.format(noise_type)

    # create directory for data if it doesn't exist
    now = datetime.datetime.now()
    parent_dir = dirname(dirname(__file__))
    if is_data:
        data_folder = os.path.join(parent_dir, "data")
    if not is_data:
        data_folder = os.path.join(parent_dir, "output")
        date += '_{}'.format(now.strftime("%H;%M;%S;%f"))
    if not os.path.exists(data_folder):
        os.mkdir(data_folder)

    if fix_number_of_lags:
        max_lag_array = np.arange(0, fix_number_of_lags + 1)
    else:
        max_lag_array = [
            int(support_bound(sample_size))
            for sample_size in sample_size_array
        ]

    # result matrix
    var_cov_hat_native_matrix = np.full(shape=(max_lag_array[-1] + 1,
                                               len(sample_size_array)),
                                        fill_value=np.nan)

    # for DataFrame
    column_names = [
        "sample size " + str(sample_size) for sample_size in sample_size_array
    ]
    index_names = ["lag " + str(lag) for lag in range(max_lag_array[-1] + 1)]

    for i, sample_size in enumerate(sample_size_array):
        # max lag for current sample_size
        max_lag = max(max_lag_array)

        print('sample size:', sample_size)
        for lag in range(max_lag + 1):
            cov_array = np.full(shape=replication_count, fill_value=np.nan)

            for r in range(replication_count):
                # sample = horizontal_sample_tvma1(sample_size=sample_size,
                #                                  t_par_count=11,
                #                                  mean=mean,
                #                                  sigma=sigma,
                #                                  noise_type=noise_type)[5, :]
                sample = diagonal_sample_tvma3(sample_size=sample_size,
                                               mean=mean,
                                               sigma=sigma,
                                               noise_type=noise_type,
                                               noise=None)
                cov_array[r] = cov_hat_t_free(sample, lag)

            var_cov_hat_native_matrix[lag, i] = np.var(cov_array)
            print('lags left:', max_lag - lag)

            # convert to Pandas DataFrame
            df_var_cov_hat_native_matrix = pd.DataFrame(
                var_cov_hat_native_matrix,
                index=index_names,
                columns=column_names)
            df_var_cov_hat_native_matrix.index.name = 'lag'

            df_var_cov_hat_native_matrix.to_csv(
                os.path.join(
                    data_folder, "var_cov_hat_native_matrix{}_{}.csv".format(
                        date, sample_type)))

    return var_cov_hat_native_matrix
예제 #10
0
def compute_and_save_threshold_single_n(sample_size: int,
                                        t_par_count: int,
                                        mean: int,
                                        sigma: int,
                                        noise_type: str,
                                        sd_type: str,
                                        replication_count: int,
                                        sample_type: str = "ma1"):
    """
    Saves a straw plot of several replicates of t-dependent threshold estimates
    for the given sample_size.
    Illustrated in
    385 LRV3a / computing 2 / project 2 / tests / ME: Test the function
    """
    par_list = {"sample_size": sample_size,
                "t_par_count": t_par_count,
                "mean": mean,
                "sigma": sigma,
                "noise_type": noise_type,
                "sd_type": sd_type,
                "replication_count": replication_count,
                "sample_type": sample_type}

    threshold_hat_double_array = np.full(shape=(t_par_count, replication_count),
                                         fill_value=np.nan)

    t_par_array = create_t_par_array(t_par_count=t_par_count)
    if sample_type == "ma1":
        true_lrv_ma1_array = true_lrv_ma1_of_t(sigma=sigma,
                                               t_par_array=t_par_array)
    elif sample_type == "ma3":
        true_lrv_ma1_array = true_lrv_ma3_of_t(sigma=sigma,
                                               t_par_array=t_par_array)

    max_lag = threshold_max_lag(sample_size=sample_size)

    for replication in range(replication_count):
        if sample_type == "ma1":
            sample = diagonal_sample_tvma1(sample_size=sample_size,
                                           mean=mean,
                                           sigma=sigma,
                                           noise_type=noise_type)
        elif sample_type == "ma3":
            sample = diagonal_sample_tvma3(sample_size=sample_size,
                                           mean=mean,
                                           sigma=sigma,
                                           noise_type=noise_type)
        cov_double_array = cov_double_array_of_t(sample=sample,
                                                 t_par_count=t_par_count,
                                                 max_lag=max_lag)
        threshold_hat_double_array[:, replication] = lrv_hat_threshold_of_t(
            cov_double_array=cov_double_array,
            sample_size=sample_size,
            noise_type=noise_type,
            sd_type=sd_type,
            sample_type=sample_type)

    plot_double_array(x_array=t_par_array,
                      hat_double_array=threshold_hat_double_array,
                      true_array=true_lrv_ma1_array,
                      title="Thresholds vs true lrv",
                      x_label="t par",
                      par_list=par_list)
예제 #11
0
def compute_and_save_nw_threshold_single_t(sample_size_from: int,
                                           sample_size_to: int,
                                           sample_size_by: int,
                                           replication_count: int,
                                           mean: int,
                                           sigma: int,
                                           noise_type: str,
                                           sd_type: str,
                                           t_par="free",
                                           sample_type="ma1"):
    """
    For a series of sample sizes,
    this function generates r samples for each sample size,
    Illustrated in
    412 LRV3a / computing 2 / project 2 / vs_sample_size / M: NW and T vs sample size
    Saves 6 image files.
    For each of the two estimates, it computes and depicts the base estimates,
    and then all 4 precision indicators.
    It may be either for a given t or for t-free.
    """
    par_list = {
        "replication_count": replication_count,
        "mean": mean,
        "sigma": sigma,
        "noise_type": noise_type,
        "sd_type": sd_type,
        "t_par": t_par,
        "sample_type": sample_type
    }

    sample_size_array = np.arange(start=sample_size_from,
                                  stop=sample_size_to,
                                  step=sample_size_by)

    # compute max lags for threshold and Newey-West
    support_bound_array = [
        int(support_bound(sample_size=sample_size)) + 1
        for sample_size in sample_size_array
    ]
    threshold_max_lag_array = [
        threshold_max_lag(sample_size=sample_size)
        for sample_size in sample_size_array
    ]
    max_lag_array = max(threshold_max_lag_array, support_bound_array)

    # empty double arrays for estimates
    threshold_double_array = np.full(shape=(replication_count,
                                            len(sample_size_array)),
                                     fill_value=np.nan)
    nw_double_array = np.full(shape=(replication_count,
                                     len(sample_size_array)),
                              fill_value=np.nan)

    # compute one of two true values
    if isinstance(t_par, numbers.Number):
        if sample_type == "ma1":
            true_LRV_array = np.repeat(
                true_lrv_ma1_of_single_t(sigma=sigma, t_par=t_par),
                len(sample_size_array))
        elif sample_type == "ma3":
            true_LRV_array = np.repeat(
                true_lrv_ma3_of_single_t(sigma=sigma, t_par=t_par),
                len(sample_size_array))
    elif t_par == 'free':
        if sample_type == "ma1":
            true_LRV_array = np.repeat(true_lrv_ma1_t_free(sigma=sigma),
                                       len(sample_size_array))
        elif sample_type == "ma3":
            true_LRV_array = np.repeat(true_lrv_ma3_t_free(sigma=sigma),
                                       len(sample_size_array))
    else:
        raise ValueError(
            't_par parameter should be "free" or float number not' + t_par)

    for col_index, sample_size in enumerate(sample_size_array):
        max_lag = max_lag_array[col_index]
        threshold_max_lag_value = threshold_max_lag_array[col_index]
        nw_max_lag_value = support_bound_array[col_index]
        for replication in range(replication_count):
            if sample_type == "ma1":
                sample = diagonal_sample_tvma1(sample_size=sample_size,
                                               mean=mean,
                                               sigma=sigma,
                                               noise_type=noise_type)
            elif sample_type == "ma3":
                sample = diagonal_sample_tvma3(sample_size=sample_size,
                                               mean=mean,
                                               sigma=sigma,
                                               noise_type=noise_type)
            if isinstance(t_par, numbers.Number):
                cov_hat_column = cov_column_of_t(sample=sample,
                                                 t_par=t_par,
                                                 max_lag=max_lag)
            elif t_par == 'free':
                cov_hat_column = cov_column_t_free(sample=sample,
                                                   max_lag=max_lag)
            threshold_double_array[replication, col_index] = \
                lrv_hat_threshold_t_free(
                    cov_hat_column=cov_hat_column[:threshold_max_lag_value],
                    sample_size=sample_size,
                    noise_type=noise_type,
                    sd_type=sd_type,
                    sample_type=sample_type)
            nw_double_array[replication, col_index] = lrv_hat_nw_t_free(
                cov_column=cov_hat_column[:nw_max_lag_value],
                sample_size=sample_size)

    col_names = [
        "sample size " + str(sample_size) for sample_size in sample_size_array
    ]
    threshold_double_array_df = pd.DataFrame(threshold_double_array,
                                             columns=col_names)
    nw_double_array_df = pd.DataFrame(nw_double_array, columns=col_names)

    plot_ridgline(hat_double_array=threshold_double_array_df,
                  title="Threshold ridgline",
                  x_label="value",
                  par_list=par_list)

    plot_ridgline(hat_double_array=nw_double_array_df,
                  title="Newey-West ridgline",
                  x_label="value",
                  par_list=par_list)

    plot_double_array(x_array=sample_size_array,
                      hat_double_array=threshold_double_array,
                      true_array=true_LRV_array,
                      title="Threshold LRV t = {0}".format(t_par),
                      x_label="sample size",
                      par_list=par_list,
                      axis='row',
                      true_label='True lrv',
                      y_label='LRV')

    plot_double_array(x_array=sample_size_array,
                      hat_double_array=nw_double_array,
                      true_array=true_LRV_array,
                      title="Newey-West LRV t = {0}".format(t_par),
                      x_label="sample size",
                      par_list=par_list,
                      axis='row',
                      true_label='True lrv',
                      y_label='LRV')

    arrays_dict = {
        "Newey-West": nw_double_array,
        "Threshold": threshold_double_array
    }

    compute_and_save_multi_precision_of_t(true_array=true_LRV_array,
                                          est_dict=arrays_dict,
                                          par_list=par_list,
                                          x_label="sample size",
                                          x_array=sample_size_array)
예제 #12
0
def compute_and_save_nw_vs_threshold(sample_size: int,
                                     t_par_count: int,
                                     mean: int,
                                     sigma: int,
                                     noise_type: str,
                                     sd_type: str,
                                     sample_type: str = "ma1"):
    """
    Illustrated in
    394 LRV 3a / computing 2 / project 2 / Threshold / N: compute_and_save_nw_vs_threshold
    It saves one image file of 2 t-dependent plots. For two estimates.
    No precision is computed here so far.
    """
    par_list = {
        "sample_size": sample_size,
        "t_par_count": t_par_count,
        "mean": mean,
        "sigma": sigma,
        "noise_type": noise_type,
        "sd_type": sd_type,
        "sample_type": sample_type
    }

    t_par_array = create_t_par_array(t_par_count=t_par_count)

    if sample_type == "ma1":
        true_lrv_array = true_lrv_ma1_of_t(sigma=sigma,
                                           t_par_array=t_par_array)
        sample = diagonal_sample_tvma1(sample_size=sample_size,
                                       mean=mean,
                                       sigma=sigma,
                                       noise_type=noise_type)
    elif sample_type == "ma3":
        true_lrv_array = true_lrv_ma3_of_t(sigma=sigma,
                                           t_par_array=t_par_array)
        sample = diagonal_sample_tvma3(sample_size=sample_size,
                                       mean=mean,
                                       sigma=sigma,
                                       noise_type=noise_type)

    support_bound_value = int(support_bound(sample_size=sample_size)) + 1
    threshold_max_lag_value = threshold_max_lag(sample_size=sample_size)

    max_lag = max(support_bound_value, threshold_max_lag_value)

    cov_double_array = cov_double_array_of_t(sample=sample,
                                             t_par_count=t_par_count,
                                             max_lag=max_lag)

    nw_lrv_array = lrv_hat_nw_of_t(
        cov_double_array=cov_double_array[:support_bound_value, :],
        sample_size=sample_size)
    threshold_lrv_array = lrv_hat_threshold_of_t(
        cov_double_array=cov_double_array[:threshold_max_lag_value, :],
        sample_size=sample_size,
        noise_type=noise_type,
        sd_type=sd_type,
        sample_type=sample_type)

    arrays_dict = {
        "Newey-West LRV": nw_lrv_array,
        "Threshold LRV": threshold_lrv_array
    }

    plot_arrays(x_array=t_par_array,
                arrays_dict=arrays_dict,
                title="Threshold vs Newey-West",
                x_label="t par",
                par_list=par_list,
                true_array=true_lrv_array,
                y_label="LRV")