def read_and_plot_var_cov_hat_csv(noise_type: str):
    """
    M. Reads 2 CSV's and saves one image by calling plot function. 
    CSV1 is about block estimate of var(covHat). 
    CSV2 is about natively simulated var(covHat). 
    Currently does not work because file names changed. 
    Image is 2 curves in the same axes - naitve and block estimate.  
    :param noise_type: type of noise 'gaussian' or 'bernoulli'
    """
    bootstrap = read_matrix(name="var_cov_hat_bootstrap_matrix_batch_size_formula.csv", index_col='lag')
    native_matrix = read_matrix(name='var_cov_hat_native_matrix_{}.csv'.format(noise_type), index_col='lag')

    sample_size_array = [int(re.sub("[^0-9]", "", sample_size)) for
                         sample_size in bootstrap.columns]

    bootstrap_lag_0 = np.array(bootstrap.loc['lag 0'])
    native_matrix_lag_0 = np.array(native_matrix.loc['lag 0'])

    for index, sample_size in enumerate(sample_size_array):
        native_matrix_lag_0[index] *= sample_size

    arrays_dict = {"bootstrap with lag 0": bootstrap_lag_0,
                   "native matrix with lag 0": native_matrix_lag_0}

    plot_arrays(x_array=sample_size_array,
                arrays_dict=arrays_dict,
                title="bootstrap vs native matrix with {} noise batch size by power".format(noise_type),
                x_label="sample size")

    print('Made picture "bootstrap vs native matrix with {} noise batch size by power"'.format(noise_type))
Esempio n. 2
0
def compute_and_save_v_vs_nw(sample_size,
                             t_par_count,
                             mean,
                             sigma,
                             noise_type,
                             sample_type="ma1"):
    """
    This function is for visual comparison of v double summation formula with regular NW.
    It saves a single image with two curves that must coincide. And they currently do coincide.
    """
    par_list = {
        "sample_size": sample_size,
        "t_par_count": t_par_count,
        "mean": mean,
        "sigma": sigma,
        "noise_type": noise_type,
        "sample_type": sample_type
    }

    t_par_array = create_t_par_array(t_par_count=t_par_count)

    if sample_type == "ma1":
        true_lrv_array = true_lrv_ma1_of_t(sigma=sigma,
                                           t_par_array=t_par_array)
        sample = diagonal_sample_tvma1(sample_size=sample_size,
                                       mean=mean,
                                       sigma=sigma,
                                       noise_type=noise_type)
    elif sample_type == "ma3":
        true_lrv_array = true_lrv_ma3_of_t(sigma=sigma,
                                           t_par_array=t_par_array)
        sample = diagonal_sample_tvma3(sample_size=sample_size,
                                       mean=mean,
                                       sigma=sigma,
                                       noise_type=noise_type)

    max_lag = int(support_bound(sample_size=sample_size)) + 1

    cov_double_array = cov_double_array_of_t(sample=sample,
                                             t_par_count=t_par_count,
                                             max_lag=max_lag)
    original_nw_array = lrv_hat_nw_of_t(cov_double_array=cov_double_array,
                                        sample_size=sample_size)

    double_sum_nw_array = lrv_hat_nw_2_of_t(sample=sample,
                                            t_par_array=t_par_array)

    arrays_dict = {
        "Double sum Newey-West": double_sum_nw_array,
        "Original Newey-West": original_nw_array
    }

    plot_arrays(x_array=t_par_array,
                arrays_dict=arrays_dict,
                true_array=true_lrv_array,
                title='Double sum vs original Newey-West',
                x_label='t par',
                par_list=par_list)
Esempio n. 3
0
def read_and_plot_var_cov_hat_theoretical_and_native_matrixes_csv(
        noise_type: str, what_lag: int, fix_number_of_lags=None) -> None:
    """
    Read from csv and plot two var cov hat matrixes:
    The first is theoretical
    the second if native
    the third is native  means
    plot 3 lines.  The first is theoretical var cov hat,
                        the second is native var cov hat
                        the third is native var cov hat means

    :param noise_type: type of noise 'gaussian' or 'bernoulli'
    :param what_lag: for example, 0,1,2
    """
    theoretical = read_matrix(
        name='var_cov_hat_theoretical_matrix_{}.csv'.format(noise_type),
        index_col='lag')
    native_matrix = read_matrix(
        name='var_cov_hat_native_matrix_{}.csv'.format(noise_type),
        index_col='lag')

    sample_size_array = [
        int(re.sub("[^0-9]", "", sample_size))
        for sample_size in theoretical.columns
    ]

    theoretical_lag = np.array(theoretical.loc['lag {}'.format(what_lag)])

    native_matrix_lag = np.array(native_matrix.loc['lag {}'.format(what_lag)])

    for index, sample_size in enumerate(sample_size_array):
        theoretical_lag[index] *= sample_size
        native_matrix_lag[index] *= sample_size

    native_matrix_lag_mean = np.full(shape=native_matrix_lag.shape,
                                     fill_value=float(
                                         np.mean(native_matrix_lag)))

    arrays_dict = {
        'theoretical with lag {}'.format(what_lag): theoretical_lag,
        'native matrix with lag {}'.format(what_lag): native_matrix_lag,
        'native matrix mean with lag {}'.format(what_lag):
        native_matrix_lag_mean
    }

    plot_arrays(
        x_array=sample_size_array,
        arrays_dict=arrays_dict,
        title="theoretical vs native matrixes with lag={} and {} noise".format(
            what_lag, noise_type),
        x_label="sample size")

    print(
        'Made picture "theoretical vs native matrixes with lag={} and {} noise"'
        .format(what_lag, noise_type))
def read_and_plot_var_cov_hat_native_matrix_means_csv(lags_array: np.
                                                      array = np.array([]),
                                                      sample_type: str = "ma1",
                                                      is_deg: bool = False
                                                      ) -> None:
    """
    Plot var cov hat means for both noise types.
    Plot two lines
    the first is gaussian
    the second is bernoulli
    plot 2 lines
    :param lags_array: array of lags, example: [0,1,2,3,4,5]
    :param sample_type: 'ma1' or 'ma3'
    :param is_deg: if True -'degenerate', if False - 'non degenerate' process
    """
    deg = 'deg' if is_deg else 'non_deg'
    native_matrix = read_matrix(
        name='var_cov_hat_native_matrix_means_{}_{}.csv'.format(
            sample_type, deg),
        index_col='lag')

    noise_type_array = [
        str(noise_type) for noise_type in native_matrix.columns
    ][:2]

    if not len(lags_array):
        lags_array = [
            int(re.sub("[^0-9]", "", lag)) for lag in native_matrix.index
        ]

    par_list = {'sample_type': sample_type, 'is_deg': deg}

    # create dict for plotting
    arrays_dict = dict()
    for type in noise_type_array:
        arrays_dict[type] = np.full(shape=len(lags_array), fill_value=np.NaN)

        for i, lag in enumerate(lags_array):
            arrays_dict[type][i] = native_matrix[type][lag]

    # create title
    sample_title = 'MA(1)' if sample_type == 'ma1' else 'MA(3)' if sample_type == 'ma3' else sample_type
    deg = 'degenerate' if is_deg else 'nondegenerate'
    title = 'sample_size*Var(CovHat) means for {} {} sample'.format(
        deg, sample_title)

    plot_arrays(x_array=lags_array,
                arrays_dict=arrays_dict,
                title=title,
                x_label="lags",
                y_label="var(covHat)",
                par_list=par_list,
                color_array=['red', 'blue'])
    print('Made picture "{}"'.format(title))
Esempio n. 5
0
def read_and_plot_var_cov_hat_native_matrix_by_lags_csv(
        noise_type: str,
        fix_number_of_lags=None,
        sample_type: str = "ma1",
        is_deg: bool = False,
        mult_on_sample_size: bool = False) -> None:
    """
    N: Plot var cov hat vs lags
    Read from csv and plot len(sample_size_array) var cov hat lines:
    :param noise_type: type of noise 'gaussian' or 'bernoulli'
    :param fix_number_of_lags: for take cur csv
    :param sample_type: 'ma1' or 'ma3'
    :param is_deg: if True -'degenerate', if False - 'non degenerate' process
    :param mult_on_sample_size: if True - mult each cell on current sample size, if False - doesn't mult
    :return: plot len(sample_size_array) lines
    """
    # read csv
    deg = 'deg' if is_deg else 'non_deg'
    name = ''
    if fix_number_of_lags:
        name = '_{}_lags'.format(fix_number_of_lags)

    native_matrix = read_matrix(
        name='var_cov_hat_native_matrix_{}{}_{}_{}.csv'.format(
            noise_type, name, sample_type, deg),
        index_col='lag')

    lags_array = [
        int(re.sub("[^0-9]", "", lag)) for lag in native_matrix.index
    ]

    sample_size_array = [
        int(re.sub("[^0-9]", "", sample_size))
        for sample_size in native_matrix.columns
    ]

    arrays_dict = dict()
    for sample_size in sample_size_array:

        # get column
        array_for_plot = native_matrix['sample size {}'.format(sample_size)]

        if mult_on_sample_size:
            array_for_plot_mult_on_sample_size = [
                x * sample_size for x in array_for_plot
            ]

            # create dict for plotting
            arrays_dict['sample size {}'.format(
                sample_size)] = array_for_plot_mult_on_sample_size
        else:
            arrays_dict['sample size {}'.format(sample_size)] = array_for_plot

    # create title
    sample_title = 'MA(1)' if sample_type == 'ma1' else 'MA(3)' if sample_type == 'ma3' else sample_type
    deg_title = 'degenerate' if is_deg else 'nondegenerate'
    title = 'Var(CovHat) with {} noise and {} {} sample vs lags'.format(
        noise_type, deg_title, sample_title)

    if mult_on_sample_size:
        title = 'sample_size*' + title

    plot_arrays(x_array=lags_array,
                arrays_dict=arrays_dict,
                title=title,
                x_label="lags",
                y_label="var(covHat)")

    print('Made picture "{}"'.format(title))
def read_and_plot_var_cov_hat_native_matrix_by_sample_size_csv(
        noise_type: str,
        count_lags: np.array,
        fix_number_of_lags=None,
        sample_type: str = 'ma1',
        is_deg: bool = False,
        mult_on_sample_size: bool = False) -> None:
    """
    N: Plot var cov hat vs sample size
    Read from csv and plot len(count_lags) var cov hat lines:
    plot len(count_lags) lines
    :param noise_type: type of noise 'gaussian' or 'bernoulli'
    :param count_lags: array of lags, example: [0,1,2,3,4,5]
    :param fix_number_of_lags: for take cur csv
    :param sample_type: 'ma1' or 'ma3'
    :param is_deg: if True -'degenerate', if False - 'non degenerate' process
    :param mult_on_sample_size: if True - mult each cell on current sample size, if False - doesn't mult
    :return plot len(count_lags) lines
    """
    # read csv
    deg = 'deg' if is_deg else 'non_deg'
    name = ''
    if fix_number_of_lags:
        name = '_{}_lags'.format(fix_number_of_lags)
    native_matrix = read_matrix(
        name='var_cov_hat_native_matrix_{}{}_{}_{}.csv'.format(
            noise_type, name, sample_type, deg),
        index_col='lag')

    sample_size_array = [
        int(re.sub("[^0-9]", "", sample_size))
        for sample_size in native_matrix.columns
    ]

    matrix_for_plot = np.full(shape=(len(count_lags), len(sample_size_array)),
                              fill_value=np.NaN)

    # get 1 row from csv
    for i, lag in enumerate(count_lags):
        row = native_matrix[lag:lag + 1]

        if mult_on_sample_size:
            new_row = np.full(shape=len(sample_size_array), fill_value=np.NaN)
            for j, sample_size in enumerate(sample_size_array):
                new_row[j] = row['sample size ' +
                                 str(sample_size)][0] * sample_size

            matrix_for_plot[i] = new_row
        else:
            matrix_for_plot[i] = row

    # create dict for plotting
    arrays_dict = dict()
    for i, lag in enumerate(count_lags):
        arrays_dict['lag {}'.format(lag)] = matrix_for_plot[i]

    # create title
    sample_title = 'MA(1)' if sample_type == 'ma1' else 'MA(3)' if sample_type == 'ma3' else sample_type
    deg_title = 'degenerate' if is_deg else 'nondegenerate'
    title = 'Var(CovHat) with {} noise and {} {} sample vs sample sizes'.format(
        noise_type, deg_title, sample_title)

    if mult_on_sample_size:
        title = 'sample_size*' + title

    plot_arrays(x_array=sample_size_array,
                arrays_dict=arrays_dict,
                title=title,
                x_label="sample size",
                y_label="var(covHat)")

    print('Made picture "{}"'.format(title))
Esempio n. 7
0
def compute_and_save_nw_vs_threshold(sample_size: int,
                                     t_par_count: int,
                                     mean: int,
                                     sigma: int,
                                     noise_type: str,
                                     sd_type: str,
                                     sample_type: str = "ma1"):
    """
    Illustrated in
    394 LRV 3a / computing 2 / project 2 / Threshold / N: compute_and_save_nw_vs_threshold
    It saves one image file of 2 t-dependent plots. For two estimates.
    No precision is computed here so far.
    """
    par_list = {
        "sample_size": sample_size,
        "t_par_count": t_par_count,
        "mean": mean,
        "sigma": sigma,
        "noise_type": noise_type,
        "sd_type": sd_type,
        "sample_type": sample_type
    }

    t_par_array = create_t_par_array(t_par_count=t_par_count)

    if sample_type == "ma1":
        true_lrv_array = true_lrv_ma1_of_t(sigma=sigma,
                                           t_par_array=t_par_array)
        sample = diagonal_sample_tvma1(sample_size=sample_size,
                                       mean=mean,
                                       sigma=sigma,
                                       noise_type=noise_type)
    elif sample_type == "ma3":
        true_lrv_array = true_lrv_ma3_of_t(sigma=sigma,
                                           t_par_array=t_par_array)
        sample = diagonal_sample_tvma3(sample_size=sample_size,
                                       mean=mean,
                                       sigma=sigma,
                                       noise_type=noise_type)

    support_bound_value = int(support_bound(sample_size=sample_size)) + 1
    threshold_max_lag_value = threshold_max_lag(sample_size=sample_size)

    max_lag = max(support_bound_value, threshold_max_lag_value)

    cov_double_array = cov_double_array_of_t(sample=sample,
                                             t_par_count=t_par_count,
                                             max_lag=max_lag)

    nw_lrv_array = lrv_hat_nw_of_t(
        cov_double_array=cov_double_array[:support_bound_value, :],
        sample_size=sample_size)
    threshold_lrv_array = lrv_hat_threshold_of_t(
        cov_double_array=cov_double_array[:threshold_max_lag_value, :],
        sample_size=sample_size,
        noise_type=noise_type,
        sd_type=sd_type,
        sample_type=sample_type)

    arrays_dict = {
        "Newey-West LRV": nw_lrv_array,
        "Threshold LRV": threshold_lrv_array
    }

    plot_arrays(x_array=t_par_array,
                arrays_dict=arrays_dict,
                title="Threshold vs Newey-West",
                x_label="t par",
                par_list=par_list,
                true_array=true_lrv_array,
                y_label="LRV")