def combine_array(file1, file2): f1 = np.array(loadF(file1)) f2 = np.array(loadF(file2)) f = np.concatenate((f1, f2), axis=1) print(len(f), len(f[0])) saveF(f, "400r_CI_result_MMR.pkl") return
def calculate_variance_h1(f_fname="F_0_1_MMR.pkl"): pi_l_list, pi_t_list = dp.get_pil_pit() pi_l_list = np.array(pi_l_list).astype(np.float) pi_t_list = np.array(pi_t_list).astype(np.float) k = len(pi_l_list[0]) p_list = dp.loadF("h1.pkl")[:-1] p_list = np.array(p_list).astype(np.float) print("p_list", p_list) f0_list, f1_list = dp.loadF(f_fname) #print("0,1",f0_list[0],f1_list[0]) Var_list = [] for l in range(len(pi_l_list)): Var_list.append([]) for t in range(len(pi_t_list)): Var_list[l].append([]) F = change_f01_to_F(f0_list[l][t], f1_list[l][t]) for p in range(k): var = calculate_variance(pi_l_list[l], pi_t_list[t], p_list[p], F) Var_list[l][t].append(var) dp.saveF(Var_list, "var_ltp_h1_" + f_fname[6:-4]) print("var_list", Var_list)
def compare_var(first="var_ltp_h1_optReg", second="var_ltp_h1_MMR"): first_var_list = dp.loadF(first) second_var_list = dp.loadF(second) first_vat_list = np.array(first_var_list) second_var_list = np.array(second_var_list) L = len(first_var_list) T = len(first_var_list[0]) P = len(first_var_list[0][0]) total_len = L * T * P print(total_len) # all 10 p rho10 = [] rho25 = [] rho50 = [] rho75 = [] rho_10 = [] rho_25 = [] rho_50 = [] rho_75 = [] pos = [] neg = [] all_rho = [] L15_T610_rho = [] L15_T610_diff = [] L15_T610_pos = 0 L610_T15_rho = [] L610_T15_diff = [] L610_T15_pos = 0 L15_T15_rho = [] L15_T15_diff = [] L15_T15_pos = 0 L610_T610_rho = [] L610_T610_diff = [] L610_T610_pos = 0 diag_rho = [] diag_diff = [] diag_pos = 0 for l in range(L): for t in range(T): for p in range(P): rho = calculate_rho(first_var_list[l][t][p], second_var_list[l][t][p]) all_rho.append(rho) diff = first_var_list[l][t][p] - second_var_list[l][t][p] if rho >= 0: pos.append(diff) if rho >= 0.75: rho75.append(diff) if rho >= 0.5: rho50.append(diff) if rho >= 0.25: rho25.append(diff) if rho >= 0.1: rho10.append(diff) else: neg.append(diff) if rho <= -0.75: rho_75.append(diff) if rho <= -0.5: rho_50.append(diff) if rho <= -0.25: rho_25.append(diff) if rho <= -0.1: rho_10.append(diff) if l < 5: if t < 5: L15_T15_rho.append(rho) L15_T15_diff.append(diff) if rho >= 0: L15_T15_pos += 1 else: L15_T610_rho.append(rho) L15_T610_diff.append(diff) if rho >= 0: L15_T610_pos += 1 else: if t < 5: L610_T15_rho.append(rho) L610_T15_diff.append(diff) if rho >= 0: L610_T15_pos += 1 else: L610_T610_rho.append(rho) L610_T610_diff.append(diff) if rho >= 0: L610_T610_pos += 1 if l == t: diag_rho.append(rho) diag_diff.append(diff) if rho >= 0: diag_pos += 1 print("10", len(rho10) / total_len, sum(rho10) / len(rho10)) print("25", len(rho25) / total_len, sum(rho25) / len(rho25)) print("50", len(rho50) / total_len, sum(rho50) / len(rho50)) if len(rho75) != 0: print("75", len(rho75) / total_len, sum(rho75) / len(rho75)) print("-10", len(rho_10) / total_len, sum(rho_10) / len(rho_10)) print("-25", len(rho_25) / total_len, sum(rho_25) / len(rho_25)) print("-50", len(rho_50) / total_len, sum(rho_50) / len(rho_50)) print("-75", len(rho_75) / total_len, sum(rho_75) / len(rho_75)) print("pos", len(pos) / total_len, L15_T610_pos / len(L15_T610_rho), L610_T15_pos / len(L610_T15_rho), L15_T15_pos / len(L15_T15_rho), L610_T610_pos / len(L610_T610_rho), diag_pos / len(diag_rho)) print("meandiff", (sum(pos) + sum(neg)) / total_len, sum(L15_T610_diff) / len(L15_T610_rho), sum(L610_T15_diff) / len(L610_T15_rho), sum(L15_T15_diff) / len(L15_T15_rho), sum(L610_T610_diff) / len(L610_T610_rho), sum(diag_diff) / len(diag_rho)) print("meanrho", sum(all_rho) / total_len, sum(L15_T610_rho) / len(L15_T610_rho), sum(L610_T15_rho) / len(L610_T15_rho), sum(L15_T15_rho) / len(L15_T15_rho), sum(L610_T610_rho) / len(L610_T610_rho), sum(diag_rho) / len(diag_rho)) # p 1-5 total_len /= 2 rho10 = [] rho25 = [] rho50 = [] rho75 = [] rho_10 = [] rho_25 = [] rho_50 = [] rho_75 = [] pos = [] neg = [] all_rho = [] L15_T610_rho = [] L15_T610_diff = [] L15_T610_pos = 0 L610_T15_rho = [] L610_T15_diff = [] L610_T15_pos = 0 L15_T15_rho = [] L15_T15_diff = [] L15_T15_pos = 0 L610_T610_rho = [] L610_T610_diff = [] L610_T610_pos = 0 diag_rho = [] diag_diff = [] diag_pos = 0 for l in range(L): for t in range(T): for p in [0, 1, 2, 3, 4]: rho = calculate_rho(first_var_list[l][t][p], second_var_list[l][t][p]) all_rho.append(rho) diff = first_var_list[l][t][p] - second_var_list[l][t][p] if rho >= 0: pos.append(diff) if rho >= 0.75: rho75.append(diff) if rho >= 0.5: rho50.append(diff) if rho >= 0.25: rho25.append(diff) if rho >= 0.1: rho10.append(diff) else: neg.append(diff) if rho <= -0.75: rho_75.append(diff) if rho <= -0.5: rho_50.append(diff) if rho <= -0.25: rho_25.append(diff) if rho <= -0.1: rho_10.append(diff) if l < 5: if t < 5: L15_T15_rho.append(rho) L15_T15_diff.append(diff) if rho >= 0: L15_T15_pos += 1 else: L15_T610_rho.append(rho) L15_T610_diff.append(diff) if rho >= 0: L15_T610_pos += 1 else: if t < 5: L610_T15_rho.append(rho) L610_T15_diff.append(diff) if rho >= 0: L610_T15_pos += 1 else: L610_T610_rho.append(rho) L610_T610_diff.append(diff) if rho >= 0: L610_T610_pos += 1 if l == t: diag_rho.append(rho) diag_diff.append(diff) if rho >= 0: diag_pos += 1 print("10", len(rho10) / total_len, sum(rho10) / len(rho10)) print("25", len(rho25) / total_len, sum(rho25) / len(rho25)) print("50", len(rho50) / total_len, sum(rho50) / len(rho50)) if len(rho75) != 0: print("75", len(rho75) / total_len, sum(rho75) / len(rho75)) print("-10", len(rho_10) / total_len, sum(rho_10) / len(rho_10)) print("-25", len(rho_25) / total_len, sum(rho_25) / len(rho_25)) print("-50", len(rho_50) / total_len, sum(rho_50) / len(rho_50)) print("-75", len(rho_75) / total_len, sum(rho_75) / len(rho_75)) print("pos", len(pos) / total_len, L15_T610_pos / len(L15_T610_rho), L610_T15_pos / len(L610_T15_rho), L15_T15_pos / len(L15_T15_rho), L610_T610_pos / len(L610_T610_rho), diag_pos / len(diag_rho)) print("meandiff", (sum(pos) + sum(neg)) / total_len, sum(L15_T610_diff) / len(L15_T610_rho), sum(L610_T15_diff) / len(L610_T15_rho), sum(L15_T15_diff) / len(L15_T15_rho), sum(L610_T610_diff) / len(L610_T610_rho), sum(diag_diff) / len(diag_rho)) print("meanrho", sum(all_rho) / total_len, sum(L15_T610_rho) / len(L15_T610_rho), sum(L610_T15_rho) / len(L610_T15_rho), sum(L15_T15_rho) / len(L15_T15_rho), sum(L610_T610_rho) / len(L610_T610_rho), sum(diag_rho) / len(diag_rho)) # p 5-10 rho10 = [] rho25 = [] rho50 = [] rho75 = [] rho_10 = [] rho_25 = [] rho_50 = [] rho_75 = [] pos = [] neg = [] all_rho = [] L15_T610_rho = [] L15_T610_diff = [] L15_T610_pos = 0 L610_T15_rho = [] L610_T15_diff = [] L610_T15_pos = 0 L15_T15_rho = [] L15_T15_diff = [] L15_T15_pos = 0 L610_T610_rho = [] L610_T610_diff = [] L610_T610_pos = 0 diag_rho = [] diag_diff = [] diag_pos = 0 for l in range(L): for t in range(T): for p in [5, 6, 7, 8, 9]: rho = calculate_rho(first_var_list[l][t][p], second_var_list[l][t][p]) all_rho.append(rho) diff = first_var_list[l][t][p] - second_var_list[l][t][p] if rho >= 0: pos.append(diff) if rho >= 0.75: rho75.append(diff) if rho >= 0.5: rho50.append(diff) if rho >= 0.25: rho25.append(diff) if rho >= 0.1: rho10.append(diff) else: neg.append(diff) if rho <= -0.75: rho_75.append(diff) if rho <= -0.5: rho_50.append(diff) if rho <= -0.25: rho_25.append(diff) if rho <= -0.1: rho_10.append(diff) if l < 5: if t < 5: L15_T15_rho.append(rho) L15_T15_diff.append(diff) if rho >= 0: L15_T15_pos += 1 else: L15_T610_rho.append(rho) L15_T610_diff.append(diff) if rho >= 0: L15_T610_pos += 1 else: if t < 5: L610_T15_rho.append(rho) L610_T15_diff.append(diff) if rho >= 0: L610_T15_pos += 1 else: L610_T610_rho.append(rho) L610_T610_diff.append(diff) if rho >= 0: L610_T610_pos += 1 if l == t: diag_rho.append(rho) diag_diff.append(diff) if rho >= 0: diag_pos += 1 print("10", len(rho10) / total_len, sum(rho10) / len(rho10)) print("25", len(rho25) / total_len, sum(rho25) / len(rho25)) print("50", len(rho50) / total_len, sum(rho50) / len(rho50)) if len(rho75) != 0: print("75", len(rho75) / total_len, sum(rho75) / len(rho75)) print("-10", len(rho_10) / total_len, sum(rho_10) / len(rho_10)) print("-25", len(rho_25) / total_len, sum(rho_25) / len(rho_25)) print("-50", len(rho_50) / total_len, sum(rho_50) / len(rho_50)) print("-75", len(rho_75) / total_len, sum(rho_75) / len(rho_75)) print("pos", len(pos) / total_len, L15_T610_pos / len(L15_T610_rho), L610_T15_pos / len(L610_T15_rho), L15_T15_pos / len(L15_T15_rho), L610_T610_pos / len(L610_T610_rho), diag_pos / len(diag_rho)) print("meandiff", (sum(pos) + sum(neg)) / total_len, sum(L15_T610_diff) / len(L15_T610_rho), sum(L610_T15_diff) / len(L610_T15_rho), sum(L15_T15_diff) / len(L15_T15_rho), sum(L610_T610_diff) / len(L610_T610_rho), sum(diag_diff) / len(diag_rho)) print("meanrho", sum(all_rho) / total_len, sum(L15_T610_rho) / len(L15_T610_rho), sum(L610_T15_rho) / len(L610_T15_rho), sum(L15_T15_rho) / len(L15_T15_rho), sum(L610_T610_rho) / len(L610_T610_rho), sum(diag_rho) / len(diag_rho)) return
def percentage_success(filename): true_value_list, target_vals_list, clt_inrange, t_inrange, chi_inrange, f_inrange = loadF( filename) method_list = [clt_inrange, t_inrange, chi_inrange, f_inrange] target_data_list = [] for m in range(len(method_list)): analyze_method = method_list[m] target_data = [] opposite_data = [] for i in range(len(analyze_method)): if analyze_method[i] == 1: target_data.append(true_value_list[i]) else: opposite_data.append(true_value_list[i]) ten_value_list = seperate_to_10(target_data) opposite_ten_value_list = seperate_to_10(opposite_data) target_data_list.append(ten_value_list / (ten_value_list + opposite_ten_value_list)) plot_hist("Histalgram of percentage success (clt, t, chi, f)", len(method_list), target_data_list)
def get_variance_info(l, t, p): var_list = loadF("process_data\\var_f_optVar" + str(p) + ".pkl") return var_list[l][t]
def analyze_success(filename): true_value_list, target_vals_list, clt_inrange, t_inrange, chi_inrange, f_inrange = loadF( filename) method_list = [clt_inrange, t_inrange, chi_inrange, f_inrange] target_data_list = [] for m in range(len(method_list)): analyze_method = method_list[m] target_data = [] for i in range(len(analyze_method)): if analyze_method[i] == 1: target_data.append(true_value_list[i]) ten_value_list = seperate_to_10(target_data) ten_value_list /= len(target_data) target_data_list.append(ten_value_list) plot_hist("Histalgram of true value in CI (clt, t, chi, f)", len(method_list), target_data_list, 0, 10, 0, 0.25)
def analyze_value(filename, index=0): true_value_list, target_vals_list, clt_inrange, t_inrange, chi_inrange, f_inrange = loadF( filename) if index == 0: ten_value_list = seperate_to_10(true_value_list) ten_value_list /= len(true_value_list) plot_hist("Histalgram of true value", 1, [ten_value_list]) return
def scatter_plot(coverage_file, seperate_variance=False, variance_file=None): coverage = loadF(coverage_file) fig = plt.figure() if seperate_variance == False: # get x and y x = [] y = coverage for l in range(11): for p in range(10): for t in range(11): variance = get_variance_info(l, t, p) x.append(variance) # calculate covariance covariance = stats.pearsonr(x, y) print(covariance) # plot plt.scatter(x, y, s=0.6, color='blue') plt.xlabel('variance by formula') plt.ylabel('coverage') plt.show() fig.savefig('1' + '.pdf', bbox_inches='tight') elif seperate_variance == "mean": variance = loadF(variance_file) # calculate covariance covariance = stats.pearsonr(variance, coverage) print(covariance) plt.scatter(variance, coverage, s=0.6, color='blue') plt.xlabel('variance by 400 instance') plt.ylabel('coverage') plt.show() fig.savefig('2' + '.pdf', bbox_inches='tight') elif seperate_variance == "VvsV": variance_ins = loadF(variance_file) x = [] for l in range(11): for p in range(10): for t in range(11): variance = get_variance_info(l, t, p) x.append(variance) variance_formula = x # calculate covariance covariance = stats.pearsonr(variance_ins, variance_formula) print(covariance) plt.scatter(variance_ins, variance_formula, s=0.6, color='blue') plt.xlabel('variance by 400 instance') plt.ylabel('variance by formula') plt.show() fig.savefig('3' + '.pdf', bbox_inches='tight') else: x1 = [] x2 = [] y1 = [] y2 = [] i = 0 for l in range(11): for p in range(10): for t in range(11): variance = get_variance_info(l, t, p) if variance <= seperate_variance: x1.append(variance) y1.append(coverage[i]) else: x2.append(variance) y2.append(coverage[i]) i += 1 print("len for <= " + str(seperate_variance) + ": " + str(len(x1))) print("len for > " + str(seperate_variance) + ": " + str(len(x2))) # calculate covariance covariance1 = stats.pearsonr(x1, y1) covariance2 = stats.pearsonr(x2, y2) print("covarance <= " + str(seperate_variance), covariance1) print("covarance > " + str(seperate_variance), covariance2) # plot plt.scatter(x1, y1, s=0.7, color='blue') plt.xlabel('variance by formula <= ' + str(seperate_variance)) plt.ylabel('coverage') plt.show() fig.savefig('4' + '.pdf', bbox_inches='tight') plt.scatter(x2, y2, s=0.7, color='blue') plt.xlabel('variance by formula > ' + str(seperate_variance)) plt.ylabel('coverage') plt.show() fig.savefig('5' + '.pdf', bbox_inches='tight')
def analyze_mean(mean_file, ind_file, ind=None): mean_list = loadF(mean_file) ind_list_clt, ind_list_t, ind_list_chi, ind_list_f = loadF(ind_file) total_list = ind_list_clt + ind_list_t + ind_list_chi + ind_list_f total_list = list(dict.fromkeys(total_list)) mean_list = np.array(mean_list).astype(np.float) if ind != None: if ind == "inf": for i in total_list: print("=====================================================") target_mean_list = mean_list[i] # normality print(stats.normaltest(target_mean_list)) min_x = min(target_mean_list) max_x = max(target_mean_list) print("min and max:", min_x, max_x) target_mean_list -= min_x target_mean_list /= max_x - min_x [ind_name], l, t, p = get_name_from_index([i]) print(ind_name) # get variance variance = get_variance_info(l, t, p) print("variance:", variance) ten_val_list = seperate_to_10(target_mean_list) / len( target_mean_list) plot_hist( str(len(mean_list[0])) + "r Histalgram of mean value with small coverage (" + ind_name[:3] + ")", 1, [ten_val_list], min_x, max_x, 0, 1, "Estimates") else: target_mean_list = mean_list[ind] print(stats.normaltest(target_mean_list)) min_x = min(target_mean_list) max_x = max(target_mean_list) print("min and max:", min_x, max_x) target_mean_list -= min_x target_mean_list /= max_x - min_x [ind_name], l, t, p = get_name_from_index([ind]) print(ind_name) # get variance variance = get_variance_info(l, t, p) print("variance:", variance) ten_val_list = seperate_to_10(target_mean_list) / len( target_mean_list) plot_hist( str(len(mean_list[0])) + "r Histalgram of mean value with small coverage (" + ind_name[:3] + ")", 1, [ten_val_list], min_x, max_x, 0, 1, "Estimates") else: target_mean_list = np.concatenate(mean_list[total_list]) print(get_name_from_index(total_list)) ten_val_list = seperate_to_10(target_mean_list) / len(target_mean_list) plot_hist( str(len(mean_list[0])) + "r Histalgram of mean value with small coverage", 1, [ten_val_list]) return
def get_info_coverage(filename, info_type, estimator="MMR"): true_value_list, target_vals_list, clt_inrange, t_inrange, chi_inrange, f_inrange = loadF( filename) coverage_list_clt = np.zeros(1210) coverage_list_t = np.zeros(1210) coverage_list_chi = np.zeros(1210) coverage_list_f = np.zeros(1210) case_target_val = [[] for _ in range(1210)] for i in range(len(target_vals_list)): coverage_list_clt[i % 1210] += clt_inrange[i] coverage_list_t[i % 1210] += t_inrange[i] coverage_list_chi[i % 1210] += chi_inrange[i] coverage_list_f[i % 1210] += f_inrange[i] case_target_val[i % 1210].append(target_vals_list[i]) saveF( case_target_val, "mean_" + str(int(len(target_vals_list) / 1210)) + "r_" + estimator + ".pkl") # save variance var_mean_list = [] for i in range(len(case_target_val)): var_mean_list.append(np.var(case_target_val[i])) saveF( var_mean_list, "var_mean_" + str(int(len(target_vals_list) / 1210)) + "r_" + estimator + ".pkl") coverage_list_clt /= (len(clt_inrange) / 1210) coverage_list_t /= (len(clt_inrange) / 1210) coverage_list_chi /= (len(clt_inrange) / 1210) coverage_list_f /= (len(clt_inrange) / 1210) saveF(coverage_list_clt, "coverage_clt_" + estimator + ".pkl") saveF(coverage_list_t, "coverage_t_" + estimator + ".pkl") saveF(coverage_list_chi, "coverage_chi_" + estimator + ".pkl") saveF(coverage_list_f, "coverage_f_" + estimator + ".pkl") overall_cov_clt = np.mean(coverage_list_clt) overall_cov_t = np.mean(coverage_list_t) overall_cov_chi = np.mean(coverage_list_chi) overall_cov_f = np.mean(coverage_list_f) print("overall coverage (clt,t,chi,f):", overall_cov_clt, overall_cov_t, overall_cov_chi, overall_cov_f) if info_type == "min": cut_point_clt = np.percentile(coverage_list_clt, 0.49) cut_point_t = np.percentile(coverage_list_t, 0.49) cut_point_chi = np.percentile(coverage_list_chi, 0.49) cut_point_f = np.percentile(coverage_list_f, 0.49) print("min of clt,t,chi,f ; 0.5 percentile:") print(min(coverage_list_clt), cut_point_clt) print(min(coverage_list_t), cut_point_t) print(min(coverage_list_chi), cut_point_chi) print(min(coverage_list_f), cut_point_f) ind_list_clt = [] ind_list_t = [] ind_list_chi = [] ind_list_f = [] for i in range(1210): if coverage_list_clt[i] <= cut_point_clt: ind_list_clt.append(i) if coverage_list_t[i] <= cut_point_t: ind_list_t.append(i) if coverage_list_chi[i] <= cut_point_chi: ind_list_chi.append(i) if coverage_list_f[i] <= cut_point_f: ind_list_f.append(i) print("indlist:", ind_list_clt, ind_list_t, ind_list_chi, ind_list_f) saveF([ind_list_clt, ind_list_t, ind_list_chi, ind_list_f], "ind_" + str(int(len(target_vals_list) / 1210)) + "r_" + estimator + ".pkl") print("ind_" + str(int(len(target_vals_list) / 1210)) + "r_" + estimator + ".pkl") else: cut_point_clt = np.percentile(coverage_list_clt, 99.5) cut_point_t = np.percentile(coverage_list_t, 99.5) cut_point_chi = np.percentile(coverage_list_chi, 99.5) cut_point_f = np.percentile(coverage_list_f, 99.5) print("max of clt,t,chi,f ; 99.5 percentile:") print(max(coverage_list_clt), cut_point_clt) print(max(coverage_list_t), cut_point_t) print(max(coverage_list_chi), cut_point_chi) print(max(coverage_list_f), cut_point_f) ind_list_clt = [] ind_list_t = [] ind_list_chi = [] ind_list_f = [] for i in range(1210): if coverage_list_clt[i] >= cut_point_clt: ind_list_clt.append(i) if coverage_list_t[i] >= cut_point_t: ind_list_t.append(i) if coverage_list_chi[i] >= cut_point_chi: ind_list_chi.append(i) if coverage_list_f[i] >= cut_point_f: ind_list_f.append(i) print("indlist:", ind_list_clt, ind_list_t, ind_list_chi, ind_list_f) saveF([ind_list_clt, ind_list_t, ind_list_chi, ind_list_f], "ind_" + str(int(len(target_vals_list) / 1210)) + "r_max_" + estimator + ".pkl") print("ind_" + str(int(len(target_vals_list) / 1210)) + "r_max_" + estimator + ".pkl") return
def check_data(filename): true_value_list, target_vals_list, clt_inrange, t_inrange, chi_inrange, f_inrange = loadF( filename) print(np.mean(target_vals_list), len(target_vals_list))
def get_data(filename, index): # if the data is [data1, data2] and index is 0, we will only get data1 data = dp.loadF(filename) if index != None: data = data[index] return data
def off_policy_target_value(a, delta, pi_l, pi_t, p_astar): data = dp.loadF("process_data\F_0_1_optVar" + str(p_astar) + ".pkl") data = data[delta][pi_l][pi_t][a] return data