def read_and_plot_var_cov_hat_csv(noise_type: str): """ M. Reads 2 CSV's and saves one image by calling plot function. CSV1 is about block estimate of var(covHat). CSV2 is about natively simulated var(covHat). Currently does not work because file names changed. Image is 2 curves in the same axes - naitve and block estimate. :param noise_type: type of noise 'gaussian' or 'bernoulli' """ bootstrap = read_matrix(name="var_cov_hat_bootstrap_matrix_batch_size_formula.csv", index_col='lag') native_matrix = read_matrix(name='var_cov_hat_native_matrix_{}.csv'.format(noise_type), index_col='lag') sample_size_array = [int(re.sub("[^0-9]", "", sample_size)) for sample_size in bootstrap.columns] bootstrap_lag_0 = np.array(bootstrap.loc['lag 0']) native_matrix_lag_0 = np.array(native_matrix.loc['lag 0']) for index, sample_size in enumerate(sample_size_array): native_matrix_lag_0[index] *= sample_size arrays_dict = {"bootstrap with lag 0": bootstrap_lag_0, "native matrix with lag 0": native_matrix_lag_0} plot_arrays(x_array=sample_size_array, arrays_dict=arrays_dict, title="bootstrap vs native matrix with {} noise batch size by power".format(noise_type), x_label="sample size") print('Made picture "bootstrap vs native matrix with {} noise batch size by power"'.format(noise_type))
def compute_and_save_v_vs_nw(sample_size, t_par_count, mean, sigma, noise_type, sample_type="ma1"): """ This function is for visual comparison of v double summation formula with regular NW. It saves a single image with two curves that must coincide. And they currently do coincide. """ par_list = { "sample_size": sample_size, "t_par_count": t_par_count, "mean": mean, "sigma": sigma, "noise_type": noise_type, "sample_type": sample_type } t_par_array = create_t_par_array(t_par_count=t_par_count) if sample_type == "ma1": true_lrv_array = true_lrv_ma1_of_t(sigma=sigma, t_par_array=t_par_array) sample = diagonal_sample_tvma1(sample_size=sample_size, mean=mean, sigma=sigma, noise_type=noise_type) elif sample_type == "ma3": true_lrv_array = true_lrv_ma3_of_t(sigma=sigma, t_par_array=t_par_array) sample = diagonal_sample_tvma3(sample_size=sample_size, mean=mean, sigma=sigma, noise_type=noise_type) max_lag = int(support_bound(sample_size=sample_size)) + 1 cov_double_array = cov_double_array_of_t(sample=sample, t_par_count=t_par_count, max_lag=max_lag) original_nw_array = lrv_hat_nw_of_t(cov_double_array=cov_double_array, sample_size=sample_size) double_sum_nw_array = lrv_hat_nw_2_of_t(sample=sample, t_par_array=t_par_array) arrays_dict = { "Double sum Newey-West": double_sum_nw_array, "Original Newey-West": original_nw_array } plot_arrays(x_array=t_par_array, arrays_dict=arrays_dict, true_array=true_lrv_array, title='Double sum vs original Newey-West', x_label='t par', par_list=par_list)
def read_and_plot_var_cov_hat_theoretical_and_native_matrixes_csv( noise_type: str, what_lag: int, fix_number_of_lags=None) -> None: """ Read from csv and plot two var cov hat matrixes: The first is theoretical the second if native the third is native means plot 3 lines. The first is theoretical var cov hat, the second is native var cov hat the third is native var cov hat means :param noise_type: type of noise 'gaussian' or 'bernoulli' :param what_lag: for example, 0,1,2 """ theoretical = read_matrix( name='var_cov_hat_theoretical_matrix_{}.csv'.format(noise_type), index_col='lag') native_matrix = read_matrix( name='var_cov_hat_native_matrix_{}.csv'.format(noise_type), index_col='lag') sample_size_array = [ int(re.sub("[^0-9]", "", sample_size)) for sample_size in theoretical.columns ] theoretical_lag = np.array(theoretical.loc['lag {}'.format(what_lag)]) native_matrix_lag = np.array(native_matrix.loc['lag {}'.format(what_lag)]) for index, sample_size in enumerate(sample_size_array): theoretical_lag[index] *= sample_size native_matrix_lag[index] *= sample_size native_matrix_lag_mean = np.full(shape=native_matrix_lag.shape, fill_value=float( np.mean(native_matrix_lag))) arrays_dict = { 'theoretical with lag {}'.format(what_lag): theoretical_lag, 'native matrix with lag {}'.format(what_lag): native_matrix_lag, 'native matrix mean with lag {}'.format(what_lag): native_matrix_lag_mean } plot_arrays( x_array=sample_size_array, arrays_dict=arrays_dict, title="theoretical vs native matrixes with lag={} and {} noise".format( what_lag, noise_type), x_label="sample size") print( 'Made picture "theoretical vs native matrixes with lag={} and {} noise"' .format(what_lag, noise_type))
def read_and_plot_var_cov_hat_native_matrix_means_csv(lags_array: np. array = np.array([]), sample_type: str = "ma1", is_deg: bool = False ) -> None: """ Plot var cov hat means for both noise types. Plot two lines the first is gaussian the second is bernoulli plot 2 lines :param lags_array: array of lags, example: [0,1,2,3,4,5] :param sample_type: 'ma1' or 'ma3' :param is_deg: if True -'degenerate', if False - 'non degenerate' process """ deg = 'deg' if is_deg else 'non_deg' native_matrix = read_matrix( name='var_cov_hat_native_matrix_means_{}_{}.csv'.format( sample_type, deg), index_col='lag') noise_type_array = [ str(noise_type) for noise_type in native_matrix.columns ][:2] if not len(lags_array): lags_array = [ int(re.sub("[^0-9]", "", lag)) for lag in native_matrix.index ] par_list = {'sample_type': sample_type, 'is_deg': deg} # create dict for plotting arrays_dict = dict() for type in noise_type_array: arrays_dict[type] = np.full(shape=len(lags_array), fill_value=np.NaN) for i, lag in enumerate(lags_array): arrays_dict[type][i] = native_matrix[type][lag] # create title sample_title = 'MA(1)' if sample_type == 'ma1' else 'MA(3)' if sample_type == 'ma3' else sample_type deg = 'degenerate' if is_deg else 'nondegenerate' title = 'sample_size*Var(CovHat) means for {} {} sample'.format( deg, sample_title) plot_arrays(x_array=lags_array, arrays_dict=arrays_dict, title=title, x_label="lags", y_label="var(covHat)", par_list=par_list, color_array=['red', 'blue']) print('Made picture "{}"'.format(title))
def read_and_plot_var_cov_hat_native_matrix_by_lags_csv( noise_type: str, fix_number_of_lags=None, sample_type: str = "ma1", is_deg: bool = False, mult_on_sample_size: bool = False) -> None: """ N: Plot var cov hat vs lags Read from csv and plot len(sample_size_array) var cov hat lines: :param noise_type: type of noise 'gaussian' or 'bernoulli' :param fix_number_of_lags: for take cur csv :param sample_type: 'ma1' or 'ma3' :param is_deg: if True -'degenerate', if False - 'non degenerate' process :param mult_on_sample_size: if True - mult each cell on current sample size, if False - doesn't mult :return: plot len(sample_size_array) lines """ # read csv deg = 'deg' if is_deg else 'non_deg' name = '' if fix_number_of_lags: name = '_{}_lags'.format(fix_number_of_lags) native_matrix = read_matrix( name='var_cov_hat_native_matrix_{}{}_{}_{}.csv'.format( noise_type, name, sample_type, deg), index_col='lag') lags_array = [ int(re.sub("[^0-9]", "", lag)) for lag in native_matrix.index ] sample_size_array = [ int(re.sub("[^0-9]", "", sample_size)) for sample_size in native_matrix.columns ] arrays_dict = dict() for sample_size in sample_size_array: # get column array_for_plot = native_matrix['sample size {}'.format(sample_size)] if mult_on_sample_size: array_for_plot_mult_on_sample_size = [ x * sample_size for x in array_for_plot ] # create dict for plotting arrays_dict['sample size {}'.format( sample_size)] = array_for_plot_mult_on_sample_size else: arrays_dict['sample size {}'.format(sample_size)] = array_for_plot # create title sample_title = 'MA(1)' if sample_type == 'ma1' else 'MA(3)' if sample_type == 'ma3' else sample_type deg_title = 'degenerate' if is_deg else 'nondegenerate' title = 'Var(CovHat) with {} noise and {} {} sample vs lags'.format( noise_type, deg_title, sample_title) if mult_on_sample_size: title = 'sample_size*' + title plot_arrays(x_array=lags_array, arrays_dict=arrays_dict, title=title, x_label="lags", y_label="var(covHat)") print('Made picture "{}"'.format(title))
def read_and_plot_var_cov_hat_native_matrix_by_sample_size_csv( noise_type: str, count_lags: np.array, fix_number_of_lags=None, sample_type: str = 'ma1', is_deg: bool = False, mult_on_sample_size: bool = False) -> None: """ N: Plot var cov hat vs sample size Read from csv and plot len(count_lags) var cov hat lines: plot len(count_lags) lines :param noise_type: type of noise 'gaussian' or 'bernoulli' :param count_lags: array of lags, example: [0,1,2,3,4,5] :param fix_number_of_lags: for take cur csv :param sample_type: 'ma1' or 'ma3' :param is_deg: if True -'degenerate', if False - 'non degenerate' process :param mult_on_sample_size: if True - mult each cell on current sample size, if False - doesn't mult :return plot len(count_lags) lines """ # read csv deg = 'deg' if is_deg else 'non_deg' name = '' if fix_number_of_lags: name = '_{}_lags'.format(fix_number_of_lags) native_matrix = read_matrix( name='var_cov_hat_native_matrix_{}{}_{}_{}.csv'.format( noise_type, name, sample_type, deg), index_col='lag') sample_size_array = [ int(re.sub("[^0-9]", "", sample_size)) for sample_size in native_matrix.columns ] matrix_for_plot = np.full(shape=(len(count_lags), len(sample_size_array)), fill_value=np.NaN) # get 1 row from csv for i, lag in enumerate(count_lags): row = native_matrix[lag:lag + 1] if mult_on_sample_size: new_row = np.full(shape=len(sample_size_array), fill_value=np.NaN) for j, sample_size in enumerate(sample_size_array): new_row[j] = row['sample size ' + str(sample_size)][0] * sample_size matrix_for_plot[i] = new_row else: matrix_for_plot[i] = row # create dict for plotting arrays_dict = dict() for i, lag in enumerate(count_lags): arrays_dict['lag {}'.format(lag)] = matrix_for_plot[i] # create title sample_title = 'MA(1)' if sample_type == 'ma1' else 'MA(3)' if sample_type == 'ma3' else sample_type deg_title = 'degenerate' if is_deg else 'nondegenerate' title = 'Var(CovHat) with {} noise and {} {} sample vs sample sizes'.format( noise_type, deg_title, sample_title) if mult_on_sample_size: title = 'sample_size*' + title plot_arrays(x_array=sample_size_array, arrays_dict=arrays_dict, title=title, x_label="sample size", y_label="var(covHat)") print('Made picture "{}"'.format(title))
def compute_and_save_nw_vs_threshold(sample_size: int, t_par_count: int, mean: int, sigma: int, noise_type: str, sd_type: str, sample_type: str = "ma1"): """ Illustrated in 394 LRV 3a / computing 2 / project 2 / Threshold / N: compute_and_save_nw_vs_threshold It saves one image file of 2 t-dependent plots. For two estimates. No precision is computed here so far. """ par_list = { "sample_size": sample_size, "t_par_count": t_par_count, "mean": mean, "sigma": sigma, "noise_type": noise_type, "sd_type": sd_type, "sample_type": sample_type } t_par_array = create_t_par_array(t_par_count=t_par_count) if sample_type == "ma1": true_lrv_array = true_lrv_ma1_of_t(sigma=sigma, t_par_array=t_par_array) sample = diagonal_sample_tvma1(sample_size=sample_size, mean=mean, sigma=sigma, noise_type=noise_type) elif sample_type == "ma3": true_lrv_array = true_lrv_ma3_of_t(sigma=sigma, t_par_array=t_par_array) sample = diagonal_sample_tvma3(sample_size=sample_size, mean=mean, sigma=sigma, noise_type=noise_type) support_bound_value = int(support_bound(sample_size=sample_size)) + 1 threshold_max_lag_value = threshold_max_lag(sample_size=sample_size) max_lag = max(support_bound_value, threshold_max_lag_value) cov_double_array = cov_double_array_of_t(sample=sample, t_par_count=t_par_count, max_lag=max_lag) nw_lrv_array = lrv_hat_nw_of_t( cov_double_array=cov_double_array[:support_bound_value, :], sample_size=sample_size) threshold_lrv_array = lrv_hat_threshold_of_t( cov_double_array=cov_double_array[:threshold_max_lag_value, :], sample_size=sample_size, noise_type=noise_type, sd_type=sd_type, sample_type=sample_type) arrays_dict = { "Newey-West LRV": nw_lrv_array, "Threshold LRV": threshold_lrv_array } plot_arrays(x_array=t_par_array, arrays_dict=arrays_dict, title="Threshold vs Newey-West", x_label="t par", par_list=par_list, true_array=true_lrv_array, y_label="LRV")