def main(argv): if len(argv) != 12: print_usage(argv[0]) m = int(argv[1]) # number of alternatives t = int( argv[2] ) # (index + 1) of final dataset (number of datasets if base filename is 0...00) n_init = int(argv[3]) # initial experiment number of votes if not n_init > 0: print("Error: Starting number of votes must be greater than 0") print_usage(argv[0]) n_stop = int(argv[4]) # final experiment number of votes if not n_stop > n_init: print( "Error: Final number of votes must be greater than starting number of votes" ) print_usage(argv[0]) n_step = int(argv[5]) # number of votes to increment by each time if not n_step > 0: print("Error: Step number of votes must be greater than 0") print_usage(argv[0]) elif (n_stop - n_init) < n_step or (n_stop - n_init) % n_step != 0: print("Warning: Step number of votes doesn't fit range") p = ((n_stop - n_init) // n_step) + 1 # always positive and >= 1 by above # read in all data required for experiments print("Reading Datasets from Disk...") datasets = [] data_filename_base = argv[6] d = int(data_filename_base.split("_")[-1]) if d < 0: print( "Error: dataset base file name must not contain a negative number") print_usage(argv[0]) len_d = str(len(data_filename_base.split("_")[-1])) data_filename_base = "_".join(data_filename_base.split("_")[:-1]) for i in range(t): infilename = data_filename_base + '_' + ("{0:0" + len_d + "d}").format(i + d) + ".csv" infile = open(infilename) datasets.append(pl.read_mix2pl_dataset(infile, numVotes=n_stop)) # Check files can be written to later: wsse_filename = argv[7] wsse_file = open(wsse_filename, 'w') sse_filename = argv[8] sse_file = open(sse_filename, 'w') time_filename = argv[9] time_file = open(time_filename, 'w') plot_filename = argv[10] plot_file = open(plot_filename, 'w') gmm_solns_filename = argv[11] gmm_solns_file = open(gmm_solns_filename, 'wb') # writable binary mode wsse_res = np.empty((p, 2)) sse_res = np.empty((p, 2)) time_res = np.empty((p, 4)) gmm_solns = [] alts = np.arange(m) # initialize the aggregators for each class of algorithm print("Initializing Aggregator Classes...") gmmagg = gmm_mixpl.GMMMixPLAggregator(alts, use_matlab=True) print("Starting Experiments...") k_n = 0 # experiment index number for n in range(n_init, n_stop + 1, n_step): # for these numbers of agents print("n =", n) print("i = ", end='') sys.stdout.flush() wsse_vals = np.empty((1, t)) sse_vals = np.empty((1, t)) time_vals = np.empty((3, t)) for i in range(t): print("\b" * len(str(i - 1)) + str(i), end='') sys.stdout.flush() # get data params, votes = datasets[i] votes_curr = votes[:n] # DEFAULT: top3_full GMM (20 moments) time_val = time.perf_counter() soln, t0, t1 = gmmagg.aggregate( ##rankings = votes_curr, rankings=None, # for ground-truth empirical limit algorithm="top3_full", epsilon=None, max_iters=None, approx_step=None, ##opto = "matlab_default", opto="matlab_emp_default", # for ground-truth empirical limit ##true_params = None true_params=params # for ground-truth empirical limit ) time_val = time.perf_counter() - time_val wsse_val = stats.mix2PL_wsse(params, soln, m) sse_val = stats.mix2PL_sse(params, soln, m) wsse_vals[0][i] = wsse_val sse_vals[0][i] = sse_val time_vals[0][i] = t0 time_vals[1][i] = t1 time_vals[2][i] = time_val gmm_result = gmm_mixpl.GMMMixPLResult( num_alts=m, ##num_votes = n, num_votes=0, # ground-truth empirical limit num_mix=2, true_params=params, cond="top3_full", ##opto = "matlab_default", opto="matlab_emp_default", # ground-truth empirical limit soln_params=soln, momnts_runtime=t0, opto_runtime=t1, overall_runtime=time_val) gmm_solns.append(gmm_result) print() wsse_res[k_n][0] = n wsse_res[k_n][1] = np.mean(wsse_vals[0]) # GMM sse_res[k_n][0] = n sse_res[k_n][1] = np.mean(sse_vals[0]) # GMM time_res[k_n][0] = n time_res[k_n][1] = np.mean(time_vals[0]) # GMM t0 (moment-calc) time_res[k_n][2] = np.mean(time_vals[1]) # GMM t1 (optimization) time_res[k_n][3] = np.mean(time_vals[2]) # GMM overall time # write results intermediately after a full set of trials for each n pickle.dump(gmm_solns, gmm_solns_file) k_n += 1 pickle.dump(gmm_solns, gmm_solns_file) gmm_solns_file.close() np.savetxt(wsse_filename, wsse_res, delimiter=',', newline="\r\n") wsse_file.close() np.savetxt(sse_filename, sse_res, delimiter=',', newline="\r\n") sse_file.close() np.savetxt(time_filename, time_res, delimiter=',', newline="\r\n") time_file.close() plot.plot_error_time_data(str_error_type="MSE", error_results=sse_res, time_results=time_res, output_img_filename=plot_filename) plot_file.close()
def main(argv): if len(argv) != 14: print_usage(argv[0]) m = int(argv[1]) # number of alternatives t = int(argv[2]) # (index + 1) of final dataset (number of datasets if base filename is 0...00) n_init = int(argv[3]) # initial experiment number of votes if not n_init > 0: print("Error: Starting number of votes must be greater than 0") print_usage(argv[0]) n_stop = int(argv[4]) # final experiment number of votes if not n_stop > n_init: print("Error: Final number of votes must be greater than starting number of votes") print_usage(argv[0]) n_step = int(argv[5]) # number of votes to increment by each time if not n_step > 0: print("Error: Step number of votes must be greater than 0") print_usage(argv[0]) elif (n_stop - n_init) < n_step or (n_stop - n_init) % n_step != 0: print("Warning: Step number of votes doesn't fit range") p = ((n_stop - n_init) // n_step) + 1 # always positive and >= 1 by above # Set EMM algorithm iterations & convergence parameters: emm_epsilon = None # TODO: add option to specify on command-line mm_epsilon = None # TODO: add option to specify on command-line tot_iters = int(argv[6]) if tot_iters < 1: print("Error: Invalid argument for total sum of iterations") print_usage(argv[0]) em_iters = int(argv[7]) if em_iters < 1: print("Error: Invalid argument for EM iterations") print_usage(argv[0]) if em_iters > tot_iters: print("Error: Total iterations less than EM iterations") print_usage(argv[0]) # read in all data required for experiments print("Reading Datasets from Disk...") datasets = [] data_filename_base = argv[8] d = int(data_filename_base.split("_")[-1]) if d < 0: print("Error: dataset base file name must not contain a negative number") print_usage(argv[0]) len_d = str(len(data_filename_base.split("_")[-1])) data_filename_base = "_".join(data_filename_base.split("_")[:-1]) for i in range(t): infilename = data_filename_base + '_' + ("{0:0" + len_d + "d}").format(i + d) + ".csv" infile = open(infilename) datasets.append(pl.read_mix2pl_dataset(infile, numVotes=n_stop)) # Check files can be written to later: wsse_filename = argv[9] wsse_file = open(wsse_filename, 'w') sse_filename = argv[10] sse_file = open(sse_filename, 'w') time_filename = argv[11] time_file = open(time_filename, 'w') plot_filename = argv[12] plot_file = open(plot_filename, 'w') emm_solns_filename = argv[13] emm_solns_file = open(emm_solns_filename, 'wb') # writable binary mode # open previous experiments results files orig_error_results = np.loadtxt("../../MixPL_GmmExperiments/mse_mixPL_04-alts_2000-trials_22.csv", delimiter=',') orig_time_results = np.loadtxt("../../MixPL_GmmExperiments/time_mixPL_04-alts_2000-trials_22.csv", delimiter=',') wsse_res = np.empty((p, 2)) sse_res = np.empty((p, 2)) time_res = np.empty((p, 2)) emm_solns = [] alts = np.arange(m) # initialize the aggregators for each class of algorithm print("Initializing Aggregator Classes...") emmagg = emm.EMMMixPLAggregator(alts) print("Starting Experiments...") k_n = 0 # experiment index number for n in range(n_init, n_stop + 1, n_step): # for these numbers of agents print("n =", n) print("i = ", end='') sys.stdout.flush() wsse_vals = np.empty((1,t)) sse_vals = np.empty((1,t)) time_vals = np.empty((1,t)) for i in range(t): print("\b"*len(str(i-1)) + str(i), end='') sys.stdout.flush() # get data params, votes = datasets[i] votes_curr = votes[:n] # EMM time_val = time.perf_counter() emm_pi, emm_p, pi_0, p_0 = emmagg.aggregate(votes_curr, K=2, epsilon=emm_epsilon, tot_iters=tot_iters, epsilon_mm=mm_epsilon, max_iters_em=em_iters ) time_val = time.perf_counter() - time_val soln = np.hstack((emm_pi[0], emm_p[0], emm_p[1])) wsse_val = stats.mix2PL_wsse(params, soln, m) sse_val = stats.mix2PL_sse(params, soln, m) wsse_vals[0][i] = wsse_val sse_vals[0][i] = sse_val time_vals[0][i] = time_val emm_result = emm.EMMMixPLResult(num_alts = m, num_votes = n, num_mix = 2, true_params = params, epsilon = emm_epsilon, max_iters = "total:" + str(tot_iters), epsilon_mm = mm_epsilon, max_iters_mm = "actually-EM:" + str(em_iters), init_guess = np.hstack(( pi_0[0], p_0[0], p_0[1] )), soln_params = soln, runtime = time_val ) emm_solns.append(emm_result) print() wsse_res[k_n][0] = n wsse_res[k_n][1] = np.mean(wsse_vals[0]) # EMM sse_res[k_n][0] = n sse_res[k_n][1] = np.mean(sse_vals[0]) # EMM time_res[k_n][0] = n time_res[k_n][1] = np.mean(time_vals[0]) # EMM # write results intermediately after a full set of trials for each n pickle.dump(emm_solns, emm_solns_file) k_n += 1 pickle.dump(emm_solns, emm_solns_file) emm_solns_file.close() np.savetxt(wsse_filename, wsse_res, delimiter=',', newline="\r\n") wsse_file.close() np.savetxt(sse_filename, sse_res, delimiter=',', newline="\r\n") sse_file.close() np.savetxt(time_filename, time_res, delimiter=',', newline="\r\n") time_file.close() plot.plot_error_time_data(str_error_type="MSE", iter_1=em_iters, iter_2=tot_iters // em_iters, error_results=sse_res, time_results=time_res, orig_error_results=orig_error_results, orig_time_results=orig_time_results, output_img_filename=plot_filename ) plot_file.close()
def main(argv): if len(argv) != 12: print_usage(argv[0]) m = int(argv[1]) # number of alternatives t = int(argv[2]) # (index + 1) of final dataset (number of datasets if base filename is 0...00) n_init = int(argv[3]) # initial experiment number of votes if not n_init > 0: print("Error: Starting number of votes must be greater than 0") print_usage(argv[0]) n_stop = int(argv[4]) # final experiment number of votes if not n_stop > n_init: print("Error: Final number of votes must be greater than starting number of votes") print_usage(argv[0]) n_step = int(argv[5]) # number of votes to increment by each time if not n_step > 0: print("Error: Step number of votes must be greater than 0") print_usage(argv[0]) elif (n_stop - n_init) < n_step or (n_stop - n_init) % n_step != 0: print("Warning: Step number of votes doesn't fit range") p = ((n_stop - n_init) // n_step) + 1 # always positive and >= 1 by above # read in all data required for experiments print("Reading Datasets from Disk...") datasets = [] data_filename_base = argv[6] d = int(data_filename_base.split("_")[-1]) if d < 0: print("Error: dataset base file name must not contain a negative number") print_usage(argv[0]) len_d = str(len(data_filename_base.split("_")[-1])) data_filename_base = "_".join(data_filename_base.split("_")[:-1]) for i in range(t): infilename = data_filename_base + '_' + ("{0:0" + len_d + "d}").format(i + d) + ".csv" infile = open(infilename) datasets.append(pl.read_mix2pl_dataset(infile, numVotes=n_stop)) # Check files can be written to later: wsse_filename = argv[7] wsse_file = open(wsse_filename, 'w') sse_filename = argv[8] sse_file = open(sse_filename, 'w') time_filename = argv[9] time_file = open(time_filename, 'w') plot_filename = argv[10] plot_file = open(plot_filename, 'w') gmm_solns_filename = argv[11] gmm_solns_file = open(gmm_solns_filename, 'wb') # writable binary mode wsse_res = np.empty((p, 2)) sse_res = np.empty((p, 2)) time_res = np.empty((p, 4)) gmm_solns = [] alts = np.arange(m) # initialize the aggregators for each class of algorithm print("Initializing Aggregator Classes...") gmmagg = gmm_mixpl.GMMMixPLAggregator(alts, use_matlab=True) print("Starting Experiments...") k_n = 0 # experiment index number for n in range(n_init, n_stop + 1, n_step): # for these numbers of agents print("n =", n) print("i = ", end='') sys.stdout.flush() wsse_vals = np.empty((1,t)) sse_vals = np.empty((1,t)) time_vals = np.empty((3,t)) for i in range(t): print("\b"*len(str(i-1)) + str(i), end='') sys.stdout.flush() # get data params, votes = datasets[i] votes_curr = votes[:n] # DEFAULT: top3_full GMM (20 moments) time_val = time.perf_counter() soln, t0, t1 = gmmagg.aggregate(##rankings = votes_curr, rankings = None, # for ground-truth empirical limit algorithm = "top3_full", epsilon = None, max_iters = None, approx_step = None, ##opto = "matlab_default", opto = "matlab_emp_default", # for ground-truth empirical limit ##true_params = None true_params = params # for ground-truth empirical limit ) time_val = time.perf_counter() - time_val wsse_val = stats.mix2PL_wsse(params, soln, m) sse_val = stats.mix2PL_sse(params, soln, m) wsse_vals[0][i] = wsse_val sse_vals[0][i] = sse_val time_vals[0][i] = t0 time_vals[1][i] = t1 time_vals[2][i] = time_val gmm_result = gmm_mixpl.GMMMixPLResult(num_alts = m, ##num_votes = n, num_votes = 0, # ground-truth empirical limit num_mix = 2, true_params = params, cond = "top3_full", ##opto = "matlab_default", opto = "matlab_emp_default", # ground-truth empirical limit soln_params = soln, momnts_runtime = t0, opto_runtime = t1, overall_runtime = time_val ) gmm_solns.append(gmm_result) print() wsse_res[k_n][0] = n wsse_res[k_n][1] = np.mean(wsse_vals[0]) # GMM sse_res[k_n][0] = n sse_res[k_n][1] = np.mean(sse_vals[0]) # GMM time_res[k_n][0] = n time_res[k_n][1] = np.mean(time_vals[0]) # GMM t0 (moment-calc) time_res[k_n][2] = np.mean(time_vals[1]) # GMM t1 (optimization) time_res[k_n][3] = np.mean(time_vals[2]) # GMM overall time # write results intermediately after a full set of trials for each n pickle.dump(gmm_solns, gmm_solns_file) k_n += 1 pickle.dump(gmm_solns, gmm_solns_file) gmm_solns_file.close() np.savetxt(wsse_filename, wsse_res, delimiter=',', newline="\r\n") wsse_file.close() np.savetxt(sse_filename, sse_res, delimiter=',', newline="\r\n") sse_file.close() np.savetxt(time_filename, time_res, delimiter=',', newline="\r\n") time_file.close() plot.plot_error_time_data(str_error_type="MSE", error_results=sse_res, time_results=time_res, output_img_filename=plot_filename ) plot_file.close()
def main(argv): if len(argv) != 16: print_usage(argv[0]) m = int(argv[1]) # number of alternatives t = int(argv[2]) # (index + 1) of final dataset (number of datasets if base filename is 0...00) n_init = int(argv[3]) # initial experiment number of votes if not n_init > 0: print("Error: Starting number of votes must be greater than 0") print_usage(argv[0]) n_stop = int(argv[4]) # final experiment number of votes if not n_stop > n_init: print("Error: Final number of votes must be greater than starting number of votes") print_usage(argv[0]) n_step = int(argv[5]) # number of votes to increment by each time if not n_step > 0: print("Error: Step number of votes must be greater than 0") print_usage(argv[0]) elif (n_stop - n_init) < n_step or (n_stop - n_init) % n_step != 0: print("Warning: Step number of votes doesn't fit range") p = ((n_stop - n_init) // n_step) + 1 # always positive and >= 1 by above # Set EMM algorithm iterations & convergence parameters: emm_epsilon = None # TODO: add option to specify on command-line mm_epsilon = None # TODO: add option to specify on command-line emm_iters = int(argv[6]) if emm_iters < 1: emm_epsilon = 1e-8 emm_iters = 500 mm_iters = int(argv[7]) if mm_iters < 1: mm_epsilon = 1e-8 mm_iters = 50 # read in all data required for experiments print("Reading Datasets from Disk...") datasets = [] data_filename_base = argv[8] d = int(data_filename_base.split("_")[-1]) if d < 0: print("Error: dataset base file name must not contain a negative number") print_usage(argv[0]) len_d = str(len(data_filename_base.split("_")[-1])) data_filename_base = "_".join(data_filename_base.split("_")[:-1]) for i in range(t): infilename = data_filename_base + '_' + ("{0:0" + len_d + "d}").format(i + d) + ".csv" infile = open(infilename) datasets.append(pl.read_mix2pl_dataset(infile, numVotes=n_stop)) # Check files can be written to later: wsse_filename = argv[9] wsse_file = open(wsse_filename, 'w') sse_filename = argv[10] sse_file = open(sse_filename, 'w') time_filename = argv[11] time_file = open(time_filename, 'w') plot_filename = argv[12] plot_file = open(plot_filename, 'w') gmm_solns_filename = argv[13] gmm_solns_file = open(gmm_solns_filename, 'wb') # writable binary mode emm_solns_filename = argv[14] emm_solns_file = open(emm_solns_filename, 'wb') # writable binary mode ttest_wsse_filename = argv[15] ttest_wsse_file = open(ttest_wsse_filename, 'w') ttest_sse_filename = argv[16] ttest_sse_file = open(ttest_sse_filename, 'w') wsse_res = np.empty((p, 3)) sse_res = np.empty((p, 3)) time_res = np.empty((p, 5)) ##wsse_res = np.empty((p, 2)) ##sse_res = np.empty((p, 2)) ##time_res = np.empty((p, 4)) ttest_vals = np.empty((2,p,3)) # 2 t-tests X p points X 3 values (n, mean, std) gmm_solns = [] emm_solns = [] alts = np.arange(m) # initialize the aggregators for each class of algorithm print("Initializing Aggregator Classes...") gmmagg = gmm_mixpl.GMMMixPLAggregator(alts, use_matlab=True) emmagg = emm.EMMMixPLAggregator(alts) print("Starting Experiments...") k_n = 0 # experiment index number for n in range(n_init, n_stop + 1, n_step): # for these numbers of agents print("n =", n) print("i = ", end='') sys.stdout.flush() wsse_vals = np.empty((2,t)) sse_vals = np.empty((2,t)) time_vals = np.empty((4,t)) ##wsse_vals = np.empty((1,t)) ##sse_vals = np.empty((1,t)) ##time_vals = np.empty((3,t)) diff_vals = np.empty((2,t)) for i in range(t): print("\b"*len(str(i-1)) + str(i), end='') sys.stdout.flush() # get data params, votes = datasets[i] votes_curr = votes[:n] # top3_full GMM (20 moments) time_val = time.perf_counter() soln, t0, t1 = gmmagg.aggregate(rankings = votes_curr, ##rankings = None, # for ground-truth empirical limit algorithm = "top3_full", epsilon = None, max_iters = None, approx_step = None, opto = "matlab", ##opto = "matlab_emp", # for ground-truth empirical limit true_params = None ##true_params = params # for ground-truth empirical limit ) time_val = time.perf_counter() - time_val wsse_val = stats.mix2PL_wsse(params, soln, m) sse_val = stats.mix2PL_sse(params, soln, m) wsse_vals[0][i] = wsse_val sse_vals[0][i] = sse_val time_vals[0][i] = t0 time_vals[1][i] = t1 time_vals[2][i] = time_val gmm_result = gmm_mixpl.GMMMixPLResult(num_alts = m, num_votes = n, ##num_votes = 0, # ground-truth empirical limit num_mix = 2, true_params = params, cond = "top3_full", opto = "matlab", ##opto = "matlab_emp", # ground-truth empirical limit soln_params = soln, momnts_runtime = t0, opto_runtime = t1, overall_runtime = time_val ) gmm_solns.append(gmm_result) # EMM time_val = time.perf_counter() emm_pi, emm_p, pi_0, p_0 = emmagg.aggregate(votes_curr, K=2, epsilon=emm_epsilon, max_iters=emm_iters, epsilon_mm=emm_epsilon, max_iters_mm=mm_iters ) time_val = time.perf_counter() - time_val soln = np.hstack((emm_pi[0], emm_p[0], emm_p[1])) wsse_val = stats.mix2PL_wsse(params, soln, m) sse_val = stats.mix2PL_sse(params, soln, m) wsse_vals[1][i] = wsse_val sse_vals[1][i] = sse_val time_vals[3][i] = time_val emm_result = emm.EMMMixPLResult(num_alts = m, num_votes = n, num_mix = 2, true_params = params, epsilon = emm_epsilon, max_iters = emm_iters, epsilon_mm = emm_epsilon, max_iters_mm = mm_iters, init_guess = np.hstack(( pi_0[0], p_0[0], p_0[1] )), soln_params = soln, runtime = time_val ) emm_solns.append(emm_result) # t-test differences diff_vals[0][i] = wsse_vals[1][i] - wsse_vals[0][i] diff_vals[1][i] = sse_vals[1][i] - sse_vals[0][i] print() wsse_res[k_n][0] = n wsse_res[k_n][1] = np.mean(wsse_vals[0]) # GMM wsse_res[k_n][2] = np.mean(wsse_vals[1]) # EMM sse_res[k_n][0] = n sse_res[k_n][1] = np.mean(sse_vals[0]) # GMM sse_res[k_n][2] = np.mean(sse_vals[1]) # EMM time_res[k_n][0] = n time_res[k_n][1] = np.mean(time_vals[0]) # GMM t0 (moment-calc) time_res[k_n][2] = np.mean(time_vals[1]) # GMM t1 (optimization) time_res[k_n][3] = np.mean(time_vals[2]) # GMM overall time time_res[k_n][4] = np.mean(time_vals[3]) # EMM time # GMM vs EMM WSSE ttest_vals[0][k_n][0] = n ttest_vals[0][k_n][1] = np.mean(diff_vals[0]) ttest_vals[0][k_n][2] = np.std(diff_vals[0]) # GMM vs EMM SSE ttest_vals[1][k_n][0] = n ttest_vals[1][k_n][1] = np.mean(diff_vals[1]) ttest_vals[1][k_n][2] = np.std(diff_vals[1]) # write results intermediately after a full set of trials for each n pickle.dump(gmm_solns, gmm_solns_file) pickle.dump(emm_solns, emm_solns_file) k_n += 1 pickle.dump(gmm_solns, gmm_solns_file) gmm_solns_file.close() pickle.dump(emm_solns, emm_solns_file) emm_solns_file.close() np.savetxt(wsse_filename, wsse_res, delimiter=',', newline="\r\n") wsse_file.close() np.savetxt(sse_filename, sse_res, delimiter=',', newline="\r\n") sse_file.close() np.savetxt(time_filename, time_res, delimiter=',', newline="\r\n") time_file.close() np.savetxt(ttest_wsse_filename, ttest_vals[0], delimiter=',', newline="\r\n") ttest_wsse_file.close() np.savetxt(ttest_sse_filename, ttest_vals[1], delimiter=',', newline="\r\n") ttest_sse_file.close() plot.plot_wsse_time_data(str_error_type="MSE", error_results=wsse_res, time_results=time_res, output_img_filename=plot_filename ) plot_file.close()
def main(argv): if len(argv) != 14: print_usage(argv[0]) m = int(argv[1]) # number of alternatives t = int( argv[2] ) # (index + 1) of final dataset (number of datasets if base filename is 0...00) n_init = int(argv[3]) # initial experiment number of votes if not n_init > 0: print("Error: Starting number of votes must be greater than 0") print_usage(argv[0]) n_stop = int(argv[4]) # final experiment number of votes if not n_stop > n_init: print( "Error: Final number of votes must be greater than starting number of votes" ) print_usage(argv[0]) n_step = int(argv[5]) # number of votes to increment by each time if not n_step > 0: print("Error: Step number of votes must be greater than 0") print_usage(argv[0]) elif (n_stop - n_init) < n_step or (n_stop - n_init) % n_step != 0: print("Warning: Step number of votes doesn't fit range") p = ((n_stop - n_init) // n_step) + 1 # always positive and >= 1 by above # Set EMM algorithm iterations & convergence parameters: emm_epsilon = None # TODO: add option to specify on command-line mm_epsilon = None # TODO: add option to specify on command-line tot_iters = int(argv[6]) if tot_iters < 1: print("Error: Invalid argument for total sum of iterations") print_usage(argv[0]) em_iters = int(argv[7]) if em_iters < 1: print("Error: Invalid argument for EM iterations") print_usage(argv[0]) if em_iters > tot_iters: print("Error: Total iterations less than EM iterations") print_usage(argv[0]) # read in all data required for experiments print("Reading Datasets from Disk...") datasets = [] data_filename_base = argv[8] d = int(data_filename_base.split("_")[-1]) if d < 0: print( "Error: dataset base file name must not contain a negative number") print_usage(argv[0]) len_d = str(len(data_filename_base.split("_")[-1])) data_filename_base = "_".join(data_filename_base.split("_")[:-1]) for i in range(t): infilename = data_filename_base + '_' + ("{0:0" + len_d + "d}").format(i + d) + ".csv" infile = open(infilename) datasets.append(pl.read_mix2pl_dataset(infile, numVotes=n_stop)) # Check files can be written to later: wsse_filename = argv[9] wsse_file = open(wsse_filename, 'w') sse_filename = argv[10] sse_file = open(sse_filename, 'w') time_filename = argv[11] time_file = open(time_filename, 'w') plot_filename = argv[12] plot_file = open(plot_filename, 'w') emm_solns_filename = argv[13] emm_solns_file = open(emm_solns_filename, 'wb') # writable binary mode # open previous experiments results files orig_error_results = np.loadtxt( "../../MixPL_GmmExperiments/mse_mixPL_04-alts_2000-trials_22.csv", delimiter=',') orig_time_results = np.loadtxt( "../../MixPL_GmmExperiments/time_mixPL_04-alts_2000-trials_22.csv", delimiter=',') wsse_res = np.empty((p, 2)) sse_res = np.empty((p, 2)) time_res = np.empty((p, 2)) emm_solns = [] alts = np.arange(m) # initialize the aggregators for each class of algorithm print("Initializing Aggregator Classes...") emmagg = emm.EMMMixPLAggregator(alts) print("Starting Experiments...") k_n = 0 # experiment index number for n in range(n_init, n_stop + 1, n_step): # for these numbers of agents print("n =", n) print("i = ", end='') sys.stdout.flush() wsse_vals = np.empty((1, t)) sse_vals = np.empty((1, t)) time_vals = np.empty((1, t)) for i in range(t): print("\b" * len(str(i - 1)) + str(i), end='') sys.stdout.flush() # get data params, votes = datasets[i] votes_curr = votes[:n] # EMM time_val = time.perf_counter() emm_pi, emm_p, pi_0, p_0 = emmagg.aggregate(votes_curr, K=2, epsilon=emm_epsilon, tot_iters=tot_iters, epsilon_mm=mm_epsilon, max_iters_em=em_iters) time_val = time.perf_counter() - time_val soln = np.hstack((emm_pi[0], emm_p[0], emm_p[1])) wsse_val = stats.mix2PL_wsse(params, soln, m) sse_val = stats.mix2PL_sse(params, soln, m) wsse_vals[0][i] = wsse_val sse_vals[0][i] = sse_val time_vals[0][i] = time_val emm_result = emm.EMMMixPLResult( num_alts=m, num_votes=n, num_mix=2, true_params=params, epsilon=emm_epsilon, max_iters="total:" + str(tot_iters), epsilon_mm=mm_epsilon, max_iters_mm="actually-EM:" + str(em_iters), init_guess=np.hstack((pi_0[0], p_0[0], p_0[1])), soln_params=soln, runtime=time_val) emm_solns.append(emm_result) print() wsse_res[k_n][0] = n wsse_res[k_n][1] = np.mean(wsse_vals[0]) # EMM sse_res[k_n][0] = n sse_res[k_n][1] = np.mean(sse_vals[0]) # EMM time_res[k_n][0] = n time_res[k_n][1] = np.mean(time_vals[0]) # EMM # write results intermediately after a full set of trials for each n pickle.dump(emm_solns, emm_solns_file) k_n += 1 pickle.dump(emm_solns, emm_solns_file) emm_solns_file.close() np.savetxt(wsse_filename, wsse_res, delimiter=',', newline="\r\n") wsse_file.close() np.savetxt(sse_filename, sse_res, delimiter=',', newline="\r\n") sse_file.close() np.savetxt(time_filename, time_res, delimiter=',', newline="\r\n") time_file.close() plot.plot_error_time_data(str_error_type="MSE", iter_1=em_iters, iter_2=tot_iters // em_iters, error_results=sse_res, time_results=time_res, orig_error_results=orig_error_results, orig_time_results=orig_time_results, output_img_filename=plot_filename) plot_file.close()
def main(argv): if len(argv) < 8: print_usage(argv[0]) m = int(argv[1]) # number of alternatives t = int(argv[2]) # number of trials a_init = float(argv[3]) # starting and stepping value for alpha p = int(argv[4]) # number of times to increment by a_init # Check files can be written to later: wsse_filename = argv[5] wsse_file = open(wsse_filename, 'w') wsse_file.close() sse_filename = argv[6] sse_file = open(sse_filename, 'w') sse_file.close() plot_filename = argv[7] plot_file = open(plot_filename, 'w') plot_file.close() # Generate data sets with Dirichlet by default: useDirich = True if len(argv) > 8 and argv[8] == "-U": useDirich = False results = np.empty((2, p, 3)) # 2 statistics X p points X 3 observations alts = np.arange(m) # initialize the aggregators for each class of algorithm gmmagg = gmm_mixpl.GMMMixPLAggregator(alts, use_matlab=True) opto_fails = 0 for n in range(p): alpha = a_init * (n + 1) np.random.seed(0) print("n =", n) print("i = ", end='') sys.stdout.flush() wsse_vals = np.empty(t) sse_vals = np.empty(t) for i in range(t): print("\b" * len(str(i - 1)) + str(i), end='') sys.stdout.flush() while True: try: # generate ground-truths gamma1 = np.random.dirichlet(np.ones(m)) gamma2 = np.random.dirichlet(np.ones(m)) params = np.hstack((alpha, gamma1, gamma2)) # MatLab top3_full GMM (20 moments) soln = gmmagg.aggregate(rankings=None, algorithm="top3_full", epsilon=None, max_iters=None, approx_step=None, opto="matlab3", true_params=params) wsse_val = stats.mix2PL_wsse(params, soln, m) sse_val = stats.mix2PL_sse(params, soln, m) wsse_vals[i] = wsse_val sse_vals[i] = sse_val except FloatingPointError: # bad data (objective function NaN) opto_fails += 1 continue # retry except ValueError: # bad data (votes arrays number of dimensions mismatch) opto_fails += 1 continue # retry break # good data! print() # store WSSE values results[0][n][0] = alpha results[0][n][1] = np.mean(wsse_vals) results[0][n][2] = np.std(wsse_vals) # store SSE values results[1][n][0] = alpha results[1][n][1] = np.mean(sse_vals) results[1][n][2] = np.std(sse_vals) print("Optimization Failures Count: " + str(opto_fails)) np.savetxt(wsse_filename, results[0], delimiter=',', newline="\r\n") np.savetxt(sse_filename, results[1], delimiter=',', newline="\r\n") plot.plot_wsse_sse_data(results[0], results[1], plot_filename)
def main(argv): if len(argv) < 8: print_usage(argv[0]) m = int(argv[1]) # number of alternatives t = int(argv[2]) # number of trials a_init = float(argv[3]) # starting and stepping value for alpha p = int(argv[4]) # number of times to increment by a_init # Check files can be written to later: wsse_filename = argv[5] wsse_file = open(wsse_filename, 'w') wsse_file.close() sse_filename = argv[6] sse_file = open(sse_filename, 'w') sse_file.close() plot_filename = argv[7] plot_file = open(plot_filename, 'w') plot_file.close() # Generate data sets with Dirichlet by default: useDirich = True if len(argv) > 8 and argv[8] == "-U": useDirich = False results = np.empty((2, p, 3)) # 2 statistics X p points X 3 observations alts = np.arange(m) # initialize the aggregators for each class of algorithm gmmagg = gmm_mixpl.GMMMixPLAggregator(alts, use_matlab=True) opto_fails = 0 for n in range(p): alpha = a_init * (n + 1) np.random.seed(0) print("n =", n) print("i = ", end='') sys.stdout.flush() wsse_vals = np.empty(t) sse_vals = np.empty(t) for i in range(t): print("\b"*len(str(i-1)) + str(i), end='') sys.stdout.flush() while True: try: # generate ground-truths gamma1 = np.random.dirichlet(np.ones(m)) gamma2 = np.random.dirichlet(np.ones(m)) params = np.hstack((alpha, gamma1, gamma2)) # MatLab top3_full GMM (20 moments) soln = gmmagg.aggregate(rankings=None, algorithm="top3_full", epsilon=None, max_iters=None, approx_step=None, opto="matlab3", true_params=params ) wsse_val = stats.mix2PL_wsse(params, soln, m) sse_val = stats.mix2PL_sse(params, soln, m) wsse_vals[i] = wsse_val sse_vals[i] = sse_val except FloatingPointError: # bad data (objective function NaN) opto_fails += 1 continue # retry except ValueError: # bad data (votes arrays number of dimensions mismatch) opto_fails += 1 continue # retry break # good data! print() # store WSSE values results[0][n][0] = alpha results[0][n][1] = np.mean(wsse_vals) results[0][n][2] = np.std(wsse_vals) # store SSE values results[1][n][0] = alpha results[1][n][1] = np.mean(sse_vals) results[1][n][2] = np.std(sse_vals) print("Optimization Failures Count: " + str(opto_fails)) np.savetxt(wsse_filename, results[0], delimiter=',', newline="\r\n") np.savetxt(sse_filename, results[1], delimiter=',', newline="\r\n") plot.plot_wsse_sse_data(results[0], results[1], plot_filename)