def run(choice, create_data=False, add_data=False, show_plot=False, create_pdf=False, show_pdf=False, shorten_length=False): # -- Setup CHOICE = choice CREATE_DATA = create_data ADD_DATA = add_data SHOW_PDF = show_pdf SHOW_PLOT = show_plot CREATE_PDF = create_pdf STD_FILL = True csv_filename = 'Fig_End-to-End_accuracy_VaryK_{}.csv'.format(CHOICE) header = ['currenttime', 'option', 'k', 'f', 'accuracy'] if CREATE_DATA: save_csv_record(join(data_directory, csv_filename), header, append=False) # -- Default Graph parameters rep_SameGraph = 10 # iterations on same graph initial_h0 = None # initial vector to start finding optimal H distribution = 'powerlaw' exponent = -0.3 length = 5 variant = 1 EC = True # Non-backtracking for learning ymin = 0.3 ymax = 1 xmax = 8 xtick_lab = [2,3,4,5,6,7, 8] xtick_labels = ['2', '3', '4', '5', '6', '7', '8'] ytick_lab = np.arange(0, 1.1, 0.1) f_vec = [0.9 * pow(0.1, 1 / 5) ** x for x in range(21)] k_vec = [3, 4, 5 ] rep_DifferentGraphs = 10 # iterations on different graphs err = 0 avoidNeighbors = False gradient = False pruneRandom = False convergencePercentage_W = None stratified = True label_vec = ['*'] * 10 clip_on_vec = [False] * 10 draw_std_vec = range(10) numberOfSplits = 1 linestyle_vec = ['dashed'] + ['solid'] * 10 linewidth_vec = [5, 4, 3, 3] + [3] * 10 marker_vec = [None, None, 'o', 'x', 'o', '^', 'o', 'x', 'o', '^', 'o', 'x', 'o', '^'] markersize_vec = [0, 0, 4, 8] + [6] * 10 facecolor_vec = ["#4C72B0", "#55A868", "#C44E52", "#8172B2", "#CCB974", "#64B5CD"] # -- Options with propagation variants if CHOICE == 500: ## 1k nodes n = 1000 h = 8 d = 25 option_vec = ['opt1', 'opt2', 'opt3'] learning_method_vec = ['GS', 'MHE', 'DHE'] weight_vec = [10] * 3 alpha_vec = [0] * 10 beta_vec = [0] * 10 gamma_vec = [0] * 10 s_vec = [0.5] * 10 numMaxIt_vec = [10] * 10 randomize_vec = [False] * 2 + [True] xmin = 3. ymin = 0. ymax = 1. label_vec = ['GS', 'MCE', 'DCEr'] facecolor_vec = ['black'] + ["#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974"] * 3 f_vec = [0.03, 0.01, 0.001] k_vec = [3, 4, 5, 6] elif CHOICE == 501: ## 10k nodes n = 10000 h = 8 d = 25 option_vec = ['opt1', 'opt2', 'opt3'] learning_method_vec = ['GT', 'MHE', 'DHE'] weight_vec = [10] * 3 alpha_vec = [0] * 10 beta_vec = [0] * 10 gamma_vec = [0] * 10 s_vec = [0.5] * 10 numMaxIt_vec = [10] * 10 randomize_vec = [False] * 2 + [True] xmin = 2. ymin = 0. ymax = 1. label_vec = ['GT', 'MCE', 'DCEr'] facecolor_vec = ['black'] + ["#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974"] * 3 f_vec = [0.03, 0.01, 0.001] k_vec = [2, 3, 4, 5] elif CHOICE == 502: ## 10k nodes n = 10000 h = 8 d = 25 option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5', 'opt6'] learning_method_vec = ['GT', 'LHE', 'MHE', 'DHE', 'DHE', 'Holdout'] weight_vec = [10] * 10 alpha_vec = [0] * 10 beta_vec = [0] * 10 gamma_vec = [0] * 10 s_vec = [0.5] * 10 numMaxIt_vec = [10] * 10 randomize_vec = [False] * 4 + [True] + [False] xmin = 2 ymin = 0.6 ymax = 1. label_vec = ['GT', 'LCE', 'MCE', 'DCE', 'DCEr', 'Holdout'] facecolor_vec = ['black'] + ["#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974"] * 3 f_vec = [0.01] k_vec = [2, 3, 4, 5, 6, 7, 8] # option_vec = ['opt1', 'opt2', 'opt3', 'opt4'] # learning_method_vec = ['GT', 'LHE', 'MHE', 'DHE'] # k_vec = [2, 3, 4, 5] elif CHOICE == 503: ## 10k nodes n = 10000 h = 3 d = 25 option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5', 'opt6'] learning_method_vec = ['GT', 'LHE', 'MHE', 'DHE', 'DHE', 'Holdout'] weight_vec = [10] * 10 alpha_vec = [0] * 10 beta_vec = [0] * 10 gamma_vec = [0] * 10 s_vec = [0.5] * 10 numMaxIt_vec = [10] * 10 randomize_vec = [False] * 4 + [True] + [False] xmin = 2 ymin = 0.3 ymax = 0.9 label_vec = ['GT', 'LCE', 'MCE', 'DCE', 'DCEr', 'Holdout'] facecolor_vec = ['black'] + ["#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974"] * 3 f_vec = [0.01] k_vec = [2, 3, 4, 5, 6, 7, 8] # k_vec = [6, 7, 8] clip_on_vec = [True] * 10 # option_vec = ['opt1', 'opt2', 'opt3', 'opt4'] # learning_method_vec = ['GT', 'LHE', 'MHE', 'DHE'] # k_vec = [2, 3, 4, 5] elif CHOICE == 504: ## 10k nodes n = 10000 h = 3 d = 25 option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5', 'opt6'] learning_method_vec = ['GT', 'LHE', 'MHE', 'DHE', 'DHE', 'Holdout'] weight_vec = [10] * 10 alpha_vec = [0] * 10 beta_vec = [0] * 10 gamma_vec = [0] * 10 s_vec = [0.5] * 10 numMaxIt_vec = [10] * 10 randomize_vec = [False] * 4 + [True] + [False] xmin = 2 xmax = 7 ymin = 0.2 ymax = 0.9 label_vec = ['GT', 'LCE', 'MCE', 'DCE', 'DCEr', 'Holdout'] facecolor_vec = ['black'] + ["#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974"] * 3 f_vec = [0.01] # k_vec = [2, 3, 4, 5, 6, 7, 8] k_vec = [7] clip_on_vec = [True] * 10 elif CHOICE == 505: ## 10k nodes with f = 0.005 n = 10000 h = 3 d = 25 option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5', 'opt6'] learning_method_vec = ['GT', 'LHE', 'MHE', 'DHE', 'DHE', 'Holdout'] weight_vec = [10] * 10 alpha_vec = [0] * 10 beta_vec = [0] * 10 gamma_vec = [0] * 10 s_vec = [0.5] * 10 numMaxIt_vec = [10] * 10 randomize_vec = [False] * 4 + [True] + [False] xmin = 2 xmax = 7 ymin = 0.2 ymax = 0.9 label_vec = ['GT', 'LCE', 'MCE', 'DCE', 'DCEr', 'Holdout'] facecolor_vec = ['black'] + ["#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974"] * 3 f_vec = [0.005] k_vec = [2, 3, 4, 5, 6, 7] # k_vec = [7] clip_on_vec = [True] * 10 # elif CHOICE == 506: ## 10k nodes with f = 0.005 # n = 10000 # h = 3 # d = 25 # option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5'] # learning_method_vec = ['GT', 'LHE', 'MHE', 'DHE', 'DHE'] # weight_vec = [10] * 10 # alpha_vec = [0] * 10 # beta_vec = [0] * 10 # gamma_vec = [0] * 10 # s_vec = [0.5] * 10 # numMaxIt_vec = [10] * 10 # randomize_vec = [False] * 4 + [True] + [False] # xmin = 2 # xmax = 7 # ymin = 0.2 # ymax = 0.9 # label_vec = ['GT', 'LCE', 'MCE', 'DCE', 'DCEr'] # facecolor_vec = ['black'] + ["#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974"] * 3 # f_vec = [0.005] # k_vec = [2,3,4,5,6,7] # # k_vec = [7] # clip_on_vec = [True] * 10 elif CHOICE == 506: ## 10k nodes n = 10000 h = 8 d = 25 option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5', 'opt6'] option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5'] learning_method_vec = ['GT', 'LHE', 'MHE', 'DHE', 'DHE', 'Holdout'] learning_method_vec = ['GT', 'LHE', 'MHE', 'DHE', 'DHE'] weight_vec = [10] * 10 alpha_vec = [0] * 10 beta_vec = [0] * 10 gamma_vec = [0] * 10 s_vec = [0.5] * 10 numMaxIt_vec = [10] * 10 randomize_vec = [False] * 4 + [True] + [False] xmin = 2 xmax = 7 ymin = 0.2 ymax = 0.9 label_vec = ['GT', 'LCE', 'MCE', 'DCE', 'DCEr', 'Holdout'] facecolor_vec = ['black'] + ["#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974"] * 3 f_vec = [0.005] k_vec = [2, 3, 4, 5, 6, 7, 8] # k_vec = [5] clip_on_vec = [True] * 10 rep_SameGraph = 1 # iterations on same graph rep_DifferentGraphs = 1 # iterations on same graph elif CHOICE == 507: ## 10k nodes with gradient and PruneRandom n = 10000 h = 3 d = 25 option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5', 'opt6'] learning_method_vec = ['GS', 'LHE', 'MHE', 'DHE', 'DHE', 'Holdout'] weight_vec = [10] * 10 alpha_vec = [0] * 10 beta_vec = [0] * 10 gamma_vec = [0] * 10 s_vec = [0.5] * 10 numMaxIt_vec = [10] * 10 randomize_vec = [False] * 4 + [True] + [False] xmin = 2 ymin = 0.1 ymax = 0.9 label_vec = ['GS', 'LCE', 'MCE', 'DCE', 'DCEr', 'Holdout'] facecolor_vec = ['black'] + ["#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974"] * 3 f_vec = [0.01] k_vec = [2, 3, 4, 5, 6, 7, 8] # k_vec = [6, 7, 8] clip_on_vec = [True] * 10 # option_vec = ['opt1', 'opt2', 'opt3', 'opt4'] # learning_method_vec = ['GT', 'LHE', 'MHE', 'DHE'] # k_vec = [2, 3, 4, 5] gradient = True pruneRandom = True elif CHOICE == 508: ## 10k nodes with gradient and PruneRandom n = 1000 h = 3 d = 10 option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5', 'opt6'] learning_method_vec = ['GS', 'LHE', 'MHE', 'DHE', 'DHE', 'Holdout'] weight_vec = [10] * 10 alpha_vec = [0] * 10 beta_vec = [0] * 10 gamma_vec = [0] * 10 s_vec = [0.5] * 10 numMaxIt_vec = [10] * 10 randomize_vec = [False] * 4 + [True] + [False] xmin = 2 ymin = 0.1 ymax = 0.9 label_vec = ['GS', 'LCE', 'MCE', 'DCE', 'DCEr', 'Holdout'] facecolor_vec = ['black'] + ["#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974"] * 3 f_vec = [0.01] k_vec = [2, 3, 4, 5, 6, 7, 8] # k_vec = [6, 7, 8] clip_on_vec = [True] * 10 # option_vec = ['opt1', 'opt2', 'opt3', 'opt4'] # learning_method_vec = ['GT', 'LHE', 'MHE', 'DHE'] # k_vec = [2, 3, 4, 5] gradient = True pruneRandom = True rep_DifferentGraphs = 1 rep_SameGraph = 1 else: raise Warning("Incorrect choice!") RANDOMSEED = None # For repeatability random.seed(RANDOMSEED) # seeds some other python random generator np.random.seed(seed=RANDOMSEED) # seeds the actually used numpy random generator; both are used and thus needed # print("CHOICE: {}".format(CHOICE)) # -- Create data if CREATE_DATA or ADD_DATA: for i in range(rep_DifferentGraphs): # create several graphs with same parameters # print("\ni: {}".format(i)) for k in k_vec: # print("\nk: {}".format(k)) H0 = create_parameterized_H(k, h, symmetric=True) H0c = to_centering_beliefs(H0) a = [1.] * k alpha0 = np.array(a) alpha0 = alpha0 / np.sum(alpha0) W, Xd = planted_distribution_model_H(n, alpha=alpha0, H=H0, d_out=d, distribution=distribution, exponent=exponent, directed=False, debug=False) X0 = from_dictionary_beliefs(Xd) for j in range(rep_SameGraph): # repeat several times for same graph # print("j: {}".format(j)) ind = None for f in f_vec: # Remove fraction (1-f) of rows from X0 (notice that different from first implementation) X1, ind = replace_fraction_of_rows(X0, 1-f, avoidNeighbors=avoidNeighbors, W=W, ind_prior=ind, stratified=stratified) X2 = introduce_errors(X1, ind, err) for option_index, (learning_method, alpha, beta, gamma, s, numMaxIt, weights, randomize) in \ enumerate(zip(learning_method_vec, alpha_vec, beta_vec, gamma_vec, s_vec, numMaxIt_vec, weight_vec, randomize_vec)): # -- Learning if learning_method == 'GT': H2c = H0c elif learning_method == 'Holdout': H2 = estimateH_baseline_serial(X2, ind, W, numMax=numMaxIt, # ignore_rows=ind, numberOfSplits=numberOfSplits, # method=learning_method, variant=1, distance=length, EC=EC, alpha=alpha, beta=beta, gamma=gamma) H2c = to_centering_beliefs(H2) elif learning_method != 'DHE': H2 = estimateH(X2, W, method=learning_method, variant=1, distance=length, EC=EC, weights=weights, randomize=randomize) H2c = to_centering_beliefs(H2) else: H2 = estimateH(X2, W, method=learning_method, variant=1, distance=length, EC=EC, weights=weights, randomize=randomize, gradient=gradient, randomrestarts=pruneRandom) H2c = to_centering_beliefs(H2) # -- Propagation X2c = to_centering_beliefs(X2, ignoreZeroRows=True) # try without eps_max = eps_convergence_linbp_parameterized(H2c, W, method='noecho', alpha=alpha, beta=beta, gamma=gamma, X=X2) eps = s * eps_max try: F, actualIt, actualPercentageConverged = \ linBP_symmetric_parameterized(X2, W, H2c * eps, method='noecho', alpha=alpha, beta=beta, gamma=gamma, numMaxIt=numMaxIt, convergencePercentage=convergencePercentage_W, debug=2) except ValueError as e: print ( "ERROR: {} with {}: d={}, h={}".format(e, learning_method, d, h)) else: accuracy_X = matrix_difference(X0, F, ignore_rows=ind) tuple = [str(datetime.datetime.now())] text = [option_vec[option_index], k, f, accuracy_X] # text = ['' if v is None else v for v in text] # TODO: test with vocabularies # text = np.asarray(text) # without np, entries get ugly format tuple.extend(text) # print("option: {}, f: {}, actualIt: {}, accuracy: {}".format(option_vec[option_index], f, actualIt, accuracy_X)) save_csv_record(join(data_directory, csv_filename), tuple) # -- Read, aggregate, and pivot data for all options df1 = pd.read_csv(join(data_directory, csv_filename)) # print("\n-- df1: (length {}):\n{}".format(len(df1.index), df1.head(15))) # -- Aggregate repetitions df2 = df1.groupby(['option', 'k', 'f']).agg \ ({'accuracy': [np.mean, np.std, np.size, np.median], # Multiple Aggregates }) df2.columns = ['_'.join(col).strip() for col in df2.columns.values] # flatten the column hierarchy df2.reset_index(inplace=True) # remove the index hierarchy df2.rename(columns={'accuracy_size': 'count'}, inplace=True) # print("\n-- df2 (length {}):\n{}".format(len(df2.index), df2.head(15))) # -- Pivot table df3 = pd.pivot_table(df2, index=['f', 'k'], columns=['option'], values=[ 'accuracy_mean', 'accuracy_std'] ) # Pivot # print("\n-- df3 (length {}):\n{}".format(len(df3.index), df3.head(30))) df3.columns = ['_'.join(col).strip() for col in df3.columns.values] # flatten the column hierarchy df3.reset_index(inplace=True) # remove the index hierarchy # df2.rename(columns={'time_size': 'count'}, inplace=True) # print("\n-- df3 (length {}):\n{}".format(len(df3.index), df3.head(100))) # X_f = k_vec X_f = df3['k'].values # read k from values instead Y_hash = defaultdict(dict) Y_hash_std = defaultdict(dict) for f in f_vec: for option in option_vec: Y_hash[f][option] = list() Y_hash_std[f][option] = list() for f in f_vec: for option in option_vec: Y_hash[f][option] = df3.loc[df3['f'] == f]['accuracy_mean_{}'.format(option)].values Y_hash_std[f][option] = df3.loc[df3['f'] == f]['accuracy_std_{}'.format(option)].values if CREATE_PDF or SHOW_PLOT or SHOW_PDF: # -- Setup figure fig_filename = 'Fig_End-to-End_accuracy_varyK_{}.pdf'.format(CHOICE) mpl.rc('font', **{'family': 'sans-serif', 'sans-serif': [u'Arial', u'Liberation Sans']}) mpl.rcParams['axes.labelsize'] = 20 mpl.rcParams['xtick.labelsize'] = 16 mpl.rcParams['ytick.labelsize'] = 16 mpl.rcParams['legend.fontsize'] = 14 mpl.rcParams['grid.color'] = '777777' # grid color mpl.rcParams['xtick.major.pad'] = 2 # padding of tick labels: default = 4 mpl.rcParams['ytick.major.pad'] = 1 # padding of tick labels: default = 4 mpl.rcParams['xtick.direction'] = 'out' # default: 'in' mpl.rcParams['ytick.direction'] = 'out' # default: 'in' mpl.rcParams['axes.titlesize'] = 16 mpl.rcParams['figure.figsize'] = [4, 4] fig = figure() ax = fig.add_axes([0.13, 0.17, 0.8, 0.8]) opt_f_vecs = [(option, f) for option in option_vec for f in f_vec] for ((option, f), color, linewidth, clip_on, linestyle, marker, markersize) in \ zip(opt_f_vecs, facecolor_vec, linewidth_vec, clip_on_vec, linestyle_vec, marker_vec, markersize_vec): # label = learning_method_vec[option_vec.index(option)] label = label_vec[option_vec.index(option)] # label = label + " " + str(f) if STD_FILL: # print((X_f)) # print(Y_hash[f][option]) ax.fill_between(X_f, Y_hash[f][option] + Y_hash_std[f][option], Y_hash[f][option] - Y_hash_std[f][option], facecolor=color, alpha=0.2, edgecolor=None, linewidth=0) ax.plot(X_f, Y_hash[f][option] + Y_hash_std[f][option], linewidth=0.5, color='0.8', linestyle='solid') ax.plot(X_f, Y_hash[f][option] - Y_hash_std[f][option], linewidth=0.5, color='0.8', linestyle='solid') ax.plot(X_f, Y_hash[f][option], linewidth=linewidth, color=color, linestyle=linestyle, label=label, zorder=4, marker=marker, markersize=markersize, markeredgewidth=1, markeredgecolor='black', clip_on=clip_on) if CHOICE==507: Y_f = [1/float(i) for i in X_f] ax.plot(X_f, Y_f, linewidth=2, color='black', linestyle='dashed', label='Random', zorder=4, marker='x', markersize=8, markeredgewidth=1, markeredgecolor='black', clip_on=clip_on) # -- Title and legend if distribution == 'uniform': distribution_label = ',$uniform' else: distribution_label = '$' if n < 1000: n_label='{}'.format(n) else: n_label = '{}k'.format(int(n / 1000)) title(r'$\!\!\!n\!=\!{}, d\!=\!{}, h\!=\!{}, f\!=\!{}{}'.format(n_label, d, h, f, distribution_label)) handles, label_vec = ax.get_legend_handles_labels() legend = plt.legend(handles, label_vec, loc='upper right', # 'upper right' handlelength=2, labelspacing=0, # distance between label entries handletextpad=0.3, # distance between label and the line representation borderaxespad=0.2, # distance between legend and the outer axes borderpad=0.3, # padding inside legend box numpoints=1, # put the marker only once ) # # legend.set_zorder(1) frame = legend.get_frame() frame.set_linewidth(0.0) frame.set_alpha(0.9) # 0.8 # -- Figure settings and save plt.xticks(xtick_lab, xtick_labels) plt.yticks(ytick_lab, ytick_lab) ax.yaxis.set_major_formatter(mpl.ticker.FormatStrFormatter('%.1f')) # Only show ticks on the left and bottom spines ax.yaxis.set_ticks_position('left') ax.xaxis.set_ticks_position('bottom') grid(b=True, which='major', axis='both', alpha=0.2, linestyle='solid', linewidth=0.5) # linestyle='dashed', which='minor', axis='y', grid(b=True, which='minor', axis='both', alpha=0.2, linestyle='solid', linewidth=0.5) # linestyle='dashed', which='minor', axis='y', xlabel(r'Number of Classes $(k)$', labelpad=0) # labelpad=0 ylabel(r'Accuracy', labelpad=0) xlim(xmin, xmax) ylim(ymin, ymax) if CREATE_PDF: savefig(join(figure_directory, fig_filename), format='pdf', dpi=None, edgecolor='w', orientation='portrait', transparent=False, bbox_inches='tight', pad_inches=0.05, frameon=None) if SHOW_PLOT: plt.show() if SHOW_PDF: showfig(join(figure_directory, fig_filename))
def _f_worker_(X0, W, f, f_index): RANDOMSEED = None # For repeatability random.seed(RANDOMSEED) # seeds some other python random generator np.random.seed( seed=RANDOMSEED ) # seeds the actually used numpy random generator; both are used and thus needed X1, ind = replace_fraction_of_rows(X0, 1 - f, avoidNeighbors=avoidNeighbors, W=W, stratified=stratified) X2 = introduce_errors(X1, ind, err) for option_index, (label, select_lambda, learning_method, alpha, beta, gamma, s, numMaxIt, weights, randomize) in \ enumerate(zip(labels, select_lambda_vec, learning_method_vec, alpha_vec, beta_vec, gamma_vec, s_vec, numMaxIt_vec, weight_vec, randomize_vec)): learn_time = -1 # -- Learning if learning_method == 'GT': H2c = H0c elif learning_method == 'Heuristic': # print('Heuristic') H2c = H_heuristic elif learning_method == 'Holdout': # print('Holdout') H2 = estimateH_baseline_serial( X2, ind, W, numMax=numMaxIt, # ignore_rows=ind, numberOfSplits=numberOfSplits, # method=learning_method, variant=1, # distance=length, EC=EC, alpha=alpha, beta=beta, gamma=gamma, doubly_stochastic=doubly_stochastic) H2c = to_centering_beliefs(H2) else: if "DCEr" in learning_method: learning_method = "DCEr" elif "DCE" in learning_method: learning_method = "DCE" # -- choose optimal lambda: allows to specify different lambda for different f # print("option: ", option_index) if select_lambda == True: weight = lambda_vec[f_index] # print("weight : ", weight) else: weight = weights # -- learn H learn_start = time.time() H2 = estimateH(X2, W, method=learning_method, variant=1, distance=length, EC=EC, weights=weight, randomrestarts=num_restarts, randomize=randomize, constraints=constraints, gradient=gradient, doubly_stochastic=doubly_stochastic) learn_time = time.time() - learn_start H2c = to_centering_beliefs(H2) # if learning_method not in ['GT', 'GS']: # print(FILENAMEZ, f, learning_method) # print(H2c) # -- Propagation prop_start = time.time() # X2c = to_centering_beliefs(X2, ignoreZeroRows=True) # try without eps_max = eps_convergence_linbp_parameterized(H2c, W, method='noecho', alpha=alpha, beta=beta, gamma=gamma, X=X2) eps = s * eps_max # print("Max eps: {}, eps: {}".format(eps_max, eps)) # eps = 1 try: F, actualIt, actualPercentageConverged = \ linBP_symmetric_parameterized(X2, W, H2c * eps, method='noecho', alpha=alpha, beta=beta, gamma=gamma, numMaxIt=numMaxIt, convergencePercentage=convergencePercentage_W, debug=2) prop_time = time.time() - prop_start if Macro_Accuracy: accuracy_X = matrix_difference_classwise(X0, F, ignore_rows=ind) precision = matrix_difference_classwise(X0, F, similarity='precision', ignore_rows=ind) recall = matrix_difference_classwise(X0, F, similarity='recall', ignore_rows=ind) else: accuracy_X = matrix_difference(X0, F, ignore_rows=ind) precision = matrix_difference(X0, F, similarity='precision', ignore_rows=ind) recall = matrix_difference(X0, F, similarity='recall', ignore_rows=ind) result = [str(datetime.datetime.now())] text = [ label, f, accuracy_X, precision, recall, learn_time, prop_time ] result.extend(text) # print("method: {}, f: {}, actualIt: {}, accuracy: {}, precision:{}, recall: {}, learning time: {}, propagation time: {}".format(label, f, actualIt, accuracy_X, precision, recall, learn_time, prop_time)) save_csv_record(join(data_directory, csv_filename), result) except ValueError as e: print("ERROR: {} with {}: d={}, h={}".format( e, learning_method, d, h)) raise e return 'success'
def run(choice, create_data=False, add_data=False, show_plot=False, create_pdf=False, show_pdf=False, shorten_length=False): # -- Setup CHOICE = choice CREATE_DATA = create_data ADD_DATA = add_data SHOW_PLOT = show_plot SHOW_PDF = show_pdf CREATE_PDF = create_pdf SHOW_ARROWS = False STD_FILL = False CALCULATE_DATA_STATISTICS = False csv_filename = 'Fig_timing_VaryK_{}.csv'.format(CHOICE) header = ['currenttime', 'option', 'k', 'f', 'time'] if CREATE_DATA: save_csv_record(join(data_directory, csv_filename), header, append=False) # -- Default Graph parameters rep_SameGraph = 2 # iterations on same graph initial_h0 = None # initial vector to start finding optimal H distribution = 'powerlaw' exponent = -0.3 length = 5 variant = 1 EC = True # Non-backtracking for learning ymin = 0.0 ymax = 1 xmin = 2 xmax = 7.5 xtick_lab = [2, 3, 4, 5, 6, 7, 8] xtick_labels = ['2', '3', '4', '5', '6', '7', '8'] ytick_lab = [1e-3, 1e-2, 1e-1, 1, 10, 50] ytick_labels = [ r'$10^{-3}$', r'$10^{-2}$', r'$10^{-1}$', r'$1$', r'$10$', r'$50$' ] f_vec = [0.9 * pow(0.1, 1 / 5)**x for x in range(21)] k_vec = [3, 4, 5] rep_DifferentGraphs = 1000 # iterations on different graphs err = 0 avoidNeighbors = False gradient = False convergencePercentage_W = None stratified = True label_vec = ['*'] * 10 clip_on_vec = [True] * 15 draw_std_vec = range(10) numberOfSplits = 1 linestyle_vec = ['solid'] * 15 linewidth_vec = [3, 2, 4, 2, 3, 2] + [3] * 15 marker_vec = ['^', 's', 'o', 'x', 'o', '+', 's'] * 3 markersize_vec = [8, 7, 8, 10, 7, 6] + [10] * 10 facecolor_vec = [ "#CCB974", "#55A868", "#4C72B0", "#8172B2", "#C44E52", "#64B5CD" ] legend_location = 'upper right' # -- Options with propagation variants if CHOICE == 600: ## 1k nodes n = 1000 h = 8 d = 25 option_vec = ['opt1', 'opt2', 'opt3', 'opt4'] learning_method_vec = ['GT', 'MHE', 'DHE', 'Holdout'] weight_vec = [10] * 4 alpha_vec = [0] * 10 beta_vec = [0] * 10 gamma_vec = [0] * 10 s_vec = [0.5] * 10 numMaxIt_vec = [10] * 10 randomize_vec = [False] * 4 + [True] xmin = 3. xmax = 10. ymin = 0. ymax = 50. label_vec = ['GT', 'MCE', 'DCE', 'Holdout'] facecolor_vec = [ 'black' ] + ["#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974"] * 4 f_vec = [0.03, 0.01, 0.001] k_vec = [3, 4, 5, 6] ytick_lab = [0, 1e-3, 1e-2, 1e-1, 1, 10, 50] ytick_labels = [ r'$0$', r'$10^{-3}$', r'$10^{-2}$', r'$10^{-1}$', r'$1$', r'$10$', r'$50$' ] elif CHOICE == 601: ## 10k nodes n = 10000 h = 8 d = 25 option_vec = ['opt1', 'opt2', 'opt3', 'opt4'] learning_method_vec = ['GT', 'MHE', 'DHE', 'Holdout'] weight_vec = [10] * 4 alpha_vec = [0] * 20 beta_vec = [0] * 20 gamma_vec = [0] * 20 s_vec = [0.5] * 20 numMaxIt_vec = [10] * 20 randomize_vec = [False] * 15 + [True] xmin = 3. xmax = 8. ymin = 0. ymax = 500. label_vec = ['GT', 'MCE', 'DCE', 'Holdout'] facecolor_vec = [ 'black' ] + ["#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974"] * 4 f_vec = [0.03, 0.01, 0.001] k_vec = [3, 4, 5] ytick_lab = [0, 1e-3, 1e-2, 1e-1, 1, 10, 100, 300] ytick_labels = [ r'$0$', r'$10^{-3}$', r'$10^{-2}$', r'$10^{-1}$', r'$1$', r'$10$', r'$100$', r'$300$' ] elif CHOICE == 602: ## 10k nodes n = 10000 h = 8 d = 25 weight_vec = [10] * 20 alpha_vec = [0] * 20 beta_vec = [0] * 20 gamma_vec = [0] * 20 s_vec = [0.5] * 20 numMaxIt_vec = [10] * 20 randomize_vec = [False] * 3 + [True] + [False] ymin = 0.01 ymax = 500 label_vec = ['Holdout', 'LCE', 'MCE', 'DCE', 'DHEr'] facecolor_vec = [ "#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974" ] * 4 f_vec = [0.01] k_vec = [3, 4, 5] ytick_lab = [1e-3, 1e-2, 1e-1, 1, 10, 100, 500] ytick_labels = [ r'$10^{-3}$', r'$10^{-2}$', r'$10^{-1}$', r'$1$', r'$10$', r'$100$', r'$500$' ] option_vec = ['opt5', 'opt6', 'opt2', 'opt3', 'opt4'] learning_method_vec = ['Holdout', 'LHE', 'MHE', 'DHE', 'DHE'] k_vec = [2, 3, 4, 5, 6, 7, 8] # option_vec = ['opt2', 'opt3', 'opt6'] # learning_method_vec = ['MHE', 'DHE', 'LHE'] # k_vec = [2, 3, 4, 5] elif CHOICE == 603: ## 10k nodes n = 10000 h = 3 d = 25 weight_vec = [10] * 20 alpha_vec = [0] * 20 beta_vec = [0] * 20 gamma_vec = [0] * 20 s_vec = [0.5] * 20 numMaxIt_vec = [10] * 20 randomize_vec = [False] * 4 + [True] xmin = 1.8 xmax = 8.2 ymin = 0.01 ymax = 500 label_vec = ['Holdout', 'LCE', 'MCE', 'DCE', 'DCEr'] facecolor_vec = [ "#CCB974", "#55A868", "#4C72B0", "#8172B2", "#C44E52" ] * 4 f_vec = [0.01] k_vec = [3, 4, 5] ytick_lab = [1e-3, 1e-2, 1e-1, 1, 10, 100, 500] ytick_labels = [ r'$10^{-3}$', r'$10^{-2}$', r'$10^{-1}$', r'$1$', r'$10$', r'$100$', r'$500$' ] option_vec = ['opt5', 'opt6', 'opt2', 'opt3', 'opt4'] learning_method_vec = ['Holdout', 'LHE', 'MHE', 'DHE', 'DHE'] k_vec = [2, 3, 4, 5, 6, 7, 8] legend_location = 'upper right' # option_vec = ['opt2', 'opt3', 'opt6'] # learning_method_vec = ['MHE', 'DHE', 'LHE'] # k_vec = [2, 3, 4, 5] # option_vec = ['opt4', 'opt3'] # learning_method_vec = ['MHE', 'MHE'] # randomize_vec = [True, False] # k_vec = [2, 3, 4, 5] elif CHOICE == 604: ## 10k nodes with Gradient n = 10000 h = 3 d = 25 weight_vec = [10] * 20 alpha_vec = [0] * 20 beta_vec = [0] * 20 gamma_vec = [0] * 20 s_vec = [0.5] * 20 numMaxIt_vec = [10] * 20 randomize_vec = [False] * 4 + [True] ymin = 0.00 ymax = 800 label_vec = ['Holdout', 'LCE', 'MCE', 'DCE', 'DCEr'] facecolor_vec = [ "#CCB974", "#55A868", "#4C72B0", "#8172B2", "#C44E52" ] * 4 f_vec = [0.01] k_vec = [3, 4, 5] ytick_lab = [1e-3, 1e-2, 1e-1, 1, 10, 100, 500] ytick_labels = [ r'$10^{-3}$', r'$10^{-2}$', r'$10^{-1}$', r'$1$', r'$10$', r'$100$', r'$500$' ] option_vec = ['opt5', 'opt6', 'opt2', 'opt3', 'opt4'] learning_method_vec = ['Holdout', 'LHE', 'MHE', 'DHE', 'DHE'] k_vec = [2, 3, 4, 5, 6, 7, 8] # k_vec = [7, 8] gradient = True legend_location = 'center right' elif CHOICE == 605: ## 10k nodes with Gradient with f = 0.005 n = 10000 h = 3 d = 25 weight_vec = [10] * 20 alpha_vec = [0] * 20 beta_vec = [0] * 20 gamma_vec = [0] * 20 s_vec = [0.5] * 20 numMaxIt_vec = [10] * 20 randomize_vec = [False] * 4 + [True] ymin = 0.00 ymax = 800 label_vec = ['Holdout', 'LCE', 'MCE', 'DCE', 'DCEr'] facecolor_vec = [ "#CCB974", "#55A868", "#4C72B0", "#8172B2", "#C44E52" ] * 4 f_vec = [0.005] k_vec = [3, 4, 5] ytick_lab = [1e-3, 1e-2, 1e-1, 1, 10, 100, 500] ytick_labels = [ r'$10^{-3}$', r'$10^{-2}$', r'$10^{-1}$', r'$1$', r'$10$', r'$100$', r'$500$' ] option_vec = ['opt5', 'opt6', 'opt2', 'opt3', 'opt4'] learning_method_vec = ['Holdout', 'LHE', 'MHE', 'DHE', 'DHE'] k_vec = [2, 3, 4, 5, 6, 7] # k_vec = [7, 8] gradient = True legend_location = 'center right' elif CHOICE == 606: ## 10k nodes with Gradient with f = 0.005 and Gradient and PruneRandom n = 10000 h = 3 d = 25 weight_vec = [10] * 20 alpha_vec = [0] * 20 beta_vec = [0] * 20 gamma_vec = [0] * 20 s_vec = [0.5] * 20 numMaxIt_vec = [10] * 20 randomize_vec = [False] * 4 + [True] xmin = 1.8 xmax = 7.2 ymin = 0.01 ymax = 800 label_vec = ['Holdout', 'LCE', 'MCE', 'DCE', 'DCEr'] facecolor_vec = [ "#CCB974", "#55A868", "#4C72B0", "#8172B2", "#C44E52" ] * 4 f_vec = [0.005] k_vec = [3, 4, 5] ytick_lab = [1e-3, 1e-2, 1e-1, 1, 10, 100, 500] ytick_labels = [ r'$10^{-3}$', r'$10^{-2}$', r'$10^{-1}$', r'$1$', r'$10$', r'$100$', r'$500$' ] option_vec = ['opt5', 'opt6', 'opt2', 'opt3', 'opt4'] learning_method_vec = ['Holdout', 'LHE', 'MHE', 'DHE', 'DHE'] k_vec = [2, 3, 4, 5, 6, 7] gradient = True pruneRandom = True legend_location = 'upper right' elif CHOICE == 607: ## 10k nodes with gradient and PruneRandom n = 10000 h = 3 d = 25 option_vec = ['opt2', 'opt3', 'opt4', 'opt5', 'opt6'] learning_method_vec = ['LHE', 'MHE', 'DHE', 'DHE', 'Holdout'] weight_vec = [10] * 10 alpha_vec = [0] * 10 beta_vec = [0] * 10 gamma_vec = [0] * 10 s_vec = [0.5] * 10 numMaxIt_vec = [10] * 10 randomize_vec = [False] * 3 + [True] + [False] xmin = 1.8 xmax = 7. ymin = 0.01 ymax = 800 label_vec = ['LCE', 'MCE', 'DCE', 'DCEr', 'Holdout'] facecolor_vec = [ "#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974" ] * 4 legend_location = 'upper left' marker_vec = [None, 's', 'x', 'o', '^', '+'] * 3 markersize_vec = [8, 7, 10, 8, 7, 6] + [10] * 10 f_vec = [0.01] k_vec = [2, 3, 4, 5, 6, 7, 8] clip_on_vec = [True] * 10 gradient = True pruneRandom = True ytick_lab = [1e-3, 1e-2, 1e-1, 1, 10, 100, 500] ytick_labels = [ r'$10^{-3}$', r'$10^{-2}$', r'$10^{-1}$', r'$1$', r'$10$', r'$100$', r'$500$' ] elif CHOICE == 608: ## 10k nodes with gradient and PruneRandom n = 10000 h = 3 d = 25 option_vec = ['opt2', 'opt3', 'opt4', 'opt5', 'opt6'] learning_method_vec = ['LHE', 'MHE', 'DHE', 'DHE', 'Holdout'] weight_vec = [10] * 10 alpha_vec = [0] * 10 beta_vec = [0] * 10 gamma_vec = [0] * 10 s_vec = [0.5] * 10 numMaxIt_vec = [10] * 10 randomize_vec = [False] * 3 + [True] + [False] xmin = 1.8 xmax = 7.2 ymin = 0.01 ymax = 800 label_vec = ['LCE', 'MCE', 'DCE', 'DCEr', 'Holdout'] facecolor_vec = [ "#55A868", "#4C72B0", "#8172B2", "#C44E52", "#CCB974" ] * 4 legend_location = 'upper left' marker_vec = [None, 's', 'x', 'o', '^', '+'] * 3 markersize_vec = [8, 7, 10, 8, 7, 6] + [10] * 10 f_vec = [0.01] k_vec = [2, 3, 4, 5, 6, 7, 8] clip_on_vec = [True] * 10 gradient = True pruneRandom = True ytick_lab = [1e-3, 1e-2, 1e-1, 1, 10, 100, 500] ytick_labels = [ r'$10^{-3}$', r'$10^{-2}$', r'$10^{-1}$', r'$1$', r'$10$', r'$100$', r'$500$' ] rep_DifferentGraphs = 10 else: raise Warning("Incorrect choice!") RANDOMSEED = None # For repeatability random.seed(RANDOMSEED) # seeds some other python random generator np.random.seed( seed=RANDOMSEED ) # seeds the actually used numpy random generator; both are used and thus needed # print("CHOICE: {}".format(CHOICE)) # -- Create data if CREATE_DATA or ADD_DATA: for i in range(rep_DifferentGraphs ): # create several graphs with same parameters # print("\ni: {}".format(i)) for k in k_vec: # print("\nk: {}".format(k)) H0 = create_parameterized_H(k, h, symmetric=True) H0c = to_centering_beliefs(H0) a = [1.] * k alpha0 = np.array(a) alpha0 = alpha0 / np.sum(alpha0) W, Xd = planted_distribution_model_H(n, alpha=alpha0, H=H0, d_out=d, distribution=distribution, exponent=exponent, directed=False, debug=False) X0 = from_dictionary_beliefs(Xd) for j in range( rep_SameGraph): # repeat several times for same graph # print("j: {}".format(j)) ind = None for f in f_vec: # Remove fraction (1-f) of rows from X0 (notice that different from first implementation) X1, ind = replace_fraction_of_rows( X0, 1 - f, avoidNeighbors=avoidNeighbors, W=W, ind_prior=ind, stratified=stratified) X2 = introduce_errors(X1, ind, err) for option_index, (learning_method, alpha, beta, gamma, s, numMaxIt, weights, randomize) in \ enumerate(zip(learning_method_vec, alpha_vec, beta_vec, gamma_vec, s_vec, numMaxIt_vec, weight_vec, randomize_vec)): # -- Learning if learning_method == 'GT': timeTaken = 0.0 elif learning_method == 'Holdout': prev_time = time.time() H2 = estimateH_baseline_serial( X2, ind, W, numMax=numMaxIt, numberOfSplits=numberOfSplits, EC=EC, alpha=alpha, beta=beta, gamma=gamma) timeTaken = time.time() - prev_time else: prev_time = time.time() if gradient and pruneRandom: H2 = estimateH(X2, W, method=learning_method, variant=1, distance=length, EC=EC, weights=weights, randomize=randomize, gradient=gradient) else: H2 = estimateH(X2, W, method=learning_method, variant=1, distance=length, EC=EC, weights=weights, randomize=randomize) timeTaken = time.time() - prev_time tuple = [str(datetime.datetime.now())] text = [option_vec[option_index], k, f, timeTaken] tuple.extend(text) # print("option: {}, f: {}, timeTaken: {}".format(option_vec[option_index], f, timeTaken)) save_csv_record(join(data_directory, csv_filename), tuple) # -- Read, aggregate, and pivot data for all options df1 = pd.read_csv(join(data_directory, csv_filename)) # print("\n-- df1: (length {}):\n{}".format(len(df1.index), df1.head(15))) # -- Aggregate repetitions df2 = df1.groupby(['option', 'k', 'f']).agg \ ({'time': [np.mean, np.std, np.size, np.median], # Multiple Aggregates }) df2.columns = ['_'.join(col).strip() for col in df2.columns.values ] # flatten the column hierarchy df2.reset_index(inplace=True) # remove the index hierarchy df2.rename(columns={'time_size': 'count'}, inplace=True) # print("\n-- df2 (length {}):\n{}".format(len(df2.index), df2.head(15))) # -- Pivot table df3 = pd.pivot_table(df2, index=['f', 'k'], columns=['option'], values=['time_mean', 'time_std', 'time_median']) # Pivot # print("\n-- df3 (length {}):\n{}".format(len(df3.index), df3.head(30))) df3.columns = ['_'.join(col).strip() for col in df3.columns.values ] # flatten the column hierarchy df3.reset_index(inplace=True) # remove the index hierarchy # df2.rename(columns={'time_size': 'count'}, inplace=True) # print("\n-- df3 (length {}):\n{}".format(len(df3.index), df3.head(100))) # X_f = k_vec X_f = df3['k'].values # read k from values instead Y_hash = defaultdict(dict) Y_hash_std = defaultdict(dict) for f in f_vec: for option in option_vec: Y_hash[f][option] = list() Y_hash_std[f][option] = list() for f in f_vec: for option in option_vec: Y_hash[f][option] = df3.loc[df3['f'] == f]['time_mean_{}'.format( option)].values # mean # Y_hash[f][option] = df3.loc[df3['f'] == f]['time_median_{}'.format(option)].values # median Y_hash_std[f][option] = df3.loc[df3['f'] == f][ 'time_std_{}'.format(option)].values if SHOW_PLOT or SHOW_PDF or CREATE_PDF: # -- Setup figure fig_filename = 'Fig_Time_varyK_{}.pdf'.format(CHOICE) mpl.rc( 'font', **{ 'family': 'sans-serif', 'sans-serif': [u'Arial', u'Liberation Sans'] }) mpl.rcParams['axes.labelsize'] = 20 mpl.rcParams['xtick.labelsize'] = 16 mpl.rcParams['ytick.labelsize'] = 16 mpl.rcParams['legend.fontsize'] = 14 mpl.rcParams['grid.color'] = '777777' # grid color mpl.rcParams[ 'xtick.major.pad'] = 2 # padding of tick labels: default = 4 mpl.rcParams[ 'ytick.major.pad'] = 1 # padding of tick labels: default = 4 mpl.rcParams['xtick.direction'] = 'out' # default: 'in' mpl.rcParams['ytick.direction'] = 'out' # default: 'in' mpl.rcParams['axes.titlesize'] = 16 mpl.rcParams['figure.figsize'] = [4, 4] fig = figure() ax = fig.add_axes([0.13, 0.17, 0.8, 0.8]) opt_f_vecs = [(option, f) for option in option_vec for f in f_vec] for ((option, f), color, linewidth, clip_on, linestyle, marker, markersize) in \ zip(opt_f_vecs, facecolor_vec, linewidth_vec, clip_on_vec, linestyle_vec, marker_vec, markersize_vec): label = label_vec[option_vec.index(option)] # label = label + " " + str(f) if STD_FILL: ax.fill_between(X_f, Y_hash[f][option] + Y_hash_std[f][option], Y_hash[f][option] - Y_hash_std[f][option], facecolor=color, alpha=0.2, edgecolor=None, linewidth=0) ax.plot(X_f, Y_hash[f][option] + Y_hash_std[f][option], linewidth=0.5, color='0.8', linestyle='solid') ax.plot(X_f, Y_hash[f][option] - Y_hash_std[f][option], linewidth=0.5, color='0.8', linestyle='solid') ax.plot(X_f, Y_hash[f][option], linewidth=linewidth, color=color, linestyle=linestyle, label=label, zorder=4, marker=marker, markersize=markersize, markeredgecolor='black', markeredgewidth=1, clip_on=clip_on) if SHOW_ARROWS: for indx in [2, 3]: ax.annotate(s='', xy=(X_f[indx] - 0.05, Y_hash[f]['opt4'][indx]), xytext=(X_f[indx] - 0.05, Y_hash[f]['opt5'][indx]), arrowprops=dict(facecolor='blue', arrowstyle='<->')) ax.annotate( str( int( np.round(Y_hash[f]['opt5'][indx] / Y_hash[f]['opt4'][indx]))) + 'x', xy=(X_f[indx] - 0.4, (Y_hash[f]['opt5'][indx] + Y_hash[f]['opt4'][indx]) / 10), color='black', va='center', annotation_clip=False, zorder=5) # -- Title and legend if distribution == 'uniform': distribution_label = ',$uniform' else: distribution_label = '$' if n < 1000: n_label = '{}'.format(n) else: n_label = '{}k'.format(int(n / 1000)) title(r'$\!\!\!n\!=\!{}, d\!=\!{}, h\!=\!{}, f\!=\!{}{}'.format( n_label, d, h, f, distribution_label)) handles, label_vec = ax.get_legend_handles_labels() legend = plt.legend( handles, label_vec, loc=legend_location, # 'upper right' handlelength=2, labelspacing=0, # distance between label entries handletextpad= 0.3, # distance between label and the line representation borderaxespad=0.2, # distance between legend and the outer axes borderpad=0.3, # padding inside legend box numpoints=1, # put the marker only once ) # # legend.set_zorder(1) frame = legend.get_frame() frame.set_linewidth(0.0) frame.set_alpha(0.9) # 0.8 # -- Figure settings and save plt.yscale('log') plt.xticks(xtick_lab, xtick_labels) plt.yticks(ytick_lab, ytick_lab) # Only show ticks on the left and bottom spines ax.yaxis.set_ticks_position('left') ax.xaxis.set_ticks_position('bottom') plt.xlim(xmin, xmax) plt.ylim(ymin, ymax) grid(b=True, which='major', axis='both', alpha=0.2, linestyle='solid', linewidth=0.5) # linestyle='dashed', which='minor', axis='y', grid(b=True, which='minor', axis='both', alpha=0.2, linestyle='solid', linewidth=0.5) # linestyle='dashed', which='minor', axis='y', xlabel(r'Number of Classes $(k)$', labelpad=0) # labelpad=0 ylabel(r'Time [sec]', labelpad=0) if CREATE_PDF: savefig(join(figure_directory, fig_filename), format='pdf', dpi=None, edgecolor='w', orientation='portrait', transparent=False, bbox_inches='tight', pad_inches=0.05, frameon=None) if SHOW_PLOT: plt.show() if SHOW_PDF: showfig(join(figure_directory, fig_filename)) # shows actually created PDF
def run(choice, create_data=False, add_data=False, show_plot=False, create_pdf=False, show_pdf=False, shorten_length=False, show_arrows=False): # -- Setup CHOICE = choice CREATE_DATA = create_data ADD_DATA = add_data SHOW_PLOT = show_plot SHOW_PDF = show_pdf CREATE_PDF = create_pdf SHOW_STD = True ## FALSE for just scatter plot points SHOW_ARROWS = show_arrows # -- Default Graph parameters rep_SameGraph = 1 # iterations on same graph distribution = 'powerlaw' exponent = -0.3 length = 5 variant = 1 EC = False numberOfSplits = 1 scaling_vec = [None]*10 ymin = 0.3 ymax = 1 xmin = 1e-3 xmax = 1e3 xtick_lab = [1e-3, 0.01, 0.1, 1, 10, 100, 1000] xtick_labels = [r'$10^{-3}$', r'$10^{-2}$', r'$10^{-1}$', r'$1$', r'$10$', r'$10^{2}$', r'$10^{3}$'] ytick_lab = np.arange(0, 1.1, 0.1) k = 3 a = 1 rep_DifferentGraphs = 1 # iterations on different graphs err = 0 avoidNeighbors = False convergencePercentage_W = 0.99 facecolor_vec = ["#4C72B0", "#55A868", "#8172B2", "#C44E52", "#CCB974", "#64B5CD"] label_vec = ['MCE', 'LCE', 'DCE', 'Holdout'] linewidth_vec = [4, 3, 1, 2, 2, 1] # clip_ons = [True, True, True, True, True, True] FILEZNAME = 'Fig_timing_accuracy_learning' marker_vec = ['s', '^', 'v', 'o', 'x', '+', 'None'] #'^' length_vec = [5] stratified = True f = 0.01 numMaxIt_vec = [10]*7 alpha_vec = [0] * 7 beta_vec = [0] * 7 # TODO: LinBP does not use beta. Also SSLH uses alpha, but not beta for W^row! Now fixed gamma_vec = [0] * 7 s_vec = [0.5] * 7 # -- Main Options if CHOICE == 1: # Main graph n = 1000 h = 3 d = 25 option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5', 'opt6'] learning_method_vec = ['MHE'] + ['LHE'] + ['DHE'] + ['DHE'] + ['Holdout'] + ['GS'] label_vec = ['MCE', 'LCE', 'DCE', 'DCE r', 'Holdout', 'GS'] randomize_vec = [False]*3 + [True] + [None]*2 scaling_vec = [None]*2 + [10, 100] + [None]*2 splits_vec = [1, 2, 4, 8] elif CHOICE == 2: n = 1000 h = 3 d = 25 option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5'] learning_method_vec = ['MHE'] + ['LHE'] + ['DHE'] + ['DHE'] + ['GS'] label_vec = ['MCE', 'LCE', 'DCE', 'DCE r', 'GS'] randomize_vec = [False]*3 + [True] + [None] scaling_vec = [None]*2 + [10, 100] + [None] elif CHOICE == 3: n = 1000 h = 3 d = 25 option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5'] learning_method_vec = ['MHE'] + ['LHE'] + ['DHE'] + ['DHE'] + ['GS'] label_vec = ['MCE', 'LCE', 'DCE', 'DCE r', 'GS'] randomize_vec = [False]*3 + [True] + [None] scaling_vec = [None]*2 + [10, 100] + [None] f = 0.02 elif CHOICE == 4: # TODO: Overnight Wolfgang n = 1000 h = 3 d = 25 option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5', 'opt6'] learning_method_vec = ['MHE'] + ['LHE'] + ['DHE'] + ['DHE'] + ['Holdout'] + ['GS'] label_vec = ['MCE', 'LCE', 'DCE', 'DCE r', 'Holdout', 'GS'] randomize_vec = [False]*3 + [True] + [None]*2 scaling_vec = [None]*2 + [10, 100] + [None]*2 splits_vec = [1, 2, 4, 8, 16] elif CHOICE == 5: # Toy graph with 100 nodes n = 100 h = 3 d = 8 option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5', 'opt6'] learning_method_vec = ['MHE'] + ['LHE'] + ['DHE'] + ['DHE'] + ['Holdout'] + ['GS'] label_vec = ['MCE', 'LCE', 'DCE', 'DCE r', 'Holdout', 'GS'] randomize_vec = [False]*3 + [True] + [None]*2 scaling_vec = [None]*2 + [10, 100] + [None]*2 splits_vec = [1, 2, 4, 8] f=0.05 elif CHOICE == 6: # To be run by Prakhar on Cluster n = 10000 h = 3 d = 25 option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5', 'opt6'] learning_method_vec = ['MHE'] + ['LHE'] + ['DHE'] + ['DHE'] + ['Holdout'] + ['GS'] label_vec = ['MCE', 'LCE', 'DCE', 'DCEr', 'Holdout', 'GS'] randomize_vec = [False]*3 + [True] + [None]*2 scaling_vec = [None]*2 + [10, 100] + [None]*2 splits_vec = [1, 2, 4, 8] f=0.003 xmin = 1e-2 # ymax = 0.9 ymin = 0.2 ymax = 0.9 xmin = 1e-2 xmax = 1e3 elif CHOICE == 7: n = 1000 h = 3 d = 25 option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5', 'opt6'] learning_method_vec = ['MHE'] + ['LHE'] + ['DHE'] + ['DHE'] + ['Holdout'] + ['GS'] label_vec = ['MCE', 'LCE', 'DCE', 'DCE r', 'Holdout', 'GS'] randomize_vec = [False]*3 + [True] + [None]*2 scaling_vec = [None]*2 + [10, 100] + [None]*2 splits_vec = [1, 2, 4, 8, 16] f=0.009 # elif CHOICE == 8: # not working well # n = 1000 # h = 3 # d = 25 # option_vec = ['opt1', 'opt2', 'opt3', 'opt4', 'opt5', 'opt6'] # learning_method_vec = ['MHE'] + ['LHE'] + ['DHE'] + ['DHE'] + ['Holdout'] + ['GS'] # label_vec = ['MCE', 'LCE', 'DCE', 'DCE r', 'Holdout', 'GS'] # randomize_vec = [False]*3 + [True] + [None]*2 # scaling_vec = [None]*2 + [10, 100] + [None]*2 # splits_vec = [1, 2, 4, 8, 16] # f=0.005 else: raise Warning("Incorrect choice!") csv_filename = '{}_{}.csv'.format(FILEZNAME, CHOICE) header = ['currenttime', 'option', 'lensplit', 'f', 'accuracy', 'timetaken'] if CREATE_DATA: save_csv_record(join(data_directory, csv_filename), header, append=False) alpha0 = np.array([a, 1., 1.]) alpha0 = alpha0 / np.sum(alpha0) H0 = create_parameterized_H(k, h, symmetric=True) H0c = to_centering_beliefs(H0) RANDOMSEED = None # For repeatability random.seed(RANDOMSEED) # seeds some other python random generator np.random.seed(seed=RANDOMSEED) # seeds the actually used numpy random generator; both are used and thus needed # print("CHOICE: {}".format(CHOICE)) # -- Create data if CREATE_DATA or ADD_DATA: for i in range(rep_DifferentGraphs): # create several graphs with same parameters # print("\ni: {}".format(i)) W, Xd = planted_distribution_model_H(n, alpha=alpha0, H=H0, d_out=d, distribution=distribution, exponent=exponent, directed=False, debug=False) X0 = from_dictionary_beliefs(Xd) for j in range(rep_SameGraph): # repeat several times for same graph # print("j: {}".format(j)) ind = None X1, ind = replace_fraction_of_rows(X0, 1-f, avoidNeighbors=avoidNeighbors, W=W, ind_prior=ind, stratified = stratified) # TODO: stratified sampling option = True X2 = introduce_errors(X1, ind, err) for option_index, (learning_method, alpha, beta, gamma, s, numMaxIt, weight, randomize, option) in \ enumerate(zip(learning_method_vec, alpha_vec, beta_vec, gamma_vec, s_vec, numMaxIt_vec, scaling_vec, randomize_vec, option_vec)): # weight = np.array([np.power(scaling, i) for i in range(5)]) # TODO: now enough to specify weight as a scalar! H_est_dict = {} timeTaken_dict = {} # -- Learning if learning_method == 'Holdout' : for numberOfSplits in splits_vec: prev_time = time.time() H_est_dict[numberOfSplits] = estimateH_baseline_serial(X2, ind, W, numMax=numMaxIt, # ignore_rows=ind, numberOfSplits=numberOfSplits, # method=learning_method, variant=1, distance=length, EC=EC, weights=weight, alpha=alpha, beta=beta, gamma=gamma) timeTaken = time.time() - prev_time timeTaken_dict[numberOfSplits] = timeTaken elif learning_method in ['LHE', 'MHE', 'DHE']: # TODO: no smartInit, just randomization as option for length in length_vec: prev_time = time.time() H_est_dict[length] = estimateH(X2, W, method=learning_method, variant=1, randomize=randomize, distance=length, EC=EC, weights=weight) timeTaken = time.time() - prev_time timeTaken_dict[length] = timeTaken elif learning_method == 'GS': H_est_dict['GS'] = H0 for key in H_est_dict: H_est = H_est_dict[key] H2c = to_centering_beliefs(H_est) # print("H_estimated by {} is \n".format(learning_method), H_est) # print("H0 is \n", H0) # print("randomize was: ", randomize) # Propagation X2c = to_centering_beliefs(X2, ignoreZeroRows=True) # try without eps_max = eps_convergence_linbp_parameterized(H2c, W, method='noecho', alpha=alpha, beta=beta, gamma=gamma, X=X2) eps = s * eps_max # print("Max Eps ", eps_max) try: F, actualIt, actualPercentageConverged = \ linBP_symmetric_parameterized(X2, W, H2c * eps, method='noecho', alpha=alpha, beta=beta, gamma=gamma, numMaxIt=numMaxIt, convergencePercentage=convergencePercentage_W, convergenceThreshold=0.99, debug=2) except ValueError as e: print( "ERROR: {} with {}: d={}, h={}".format(e, learning_method, d, h)) else: accuracy_X = matrix_difference(X0, F, ignore_rows=ind) tuple = [str(datetime.datetime.now())] if learning_method == 'Holdout': text = [option,"split{}".format(key), f, accuracy_X, timeTaken_dict[key]] elif learning_method in ['MHE', 'DHE', 'LHE']: text = [option, "len{}".format(key), f, accuracy_X, timeTaken_dict[key]] elif learning_method == 'GS': text = [option, 0, f, accuracy_X, 0] tuple.extend(text) # print("option: {}, f: {}, actualIt: {}, accuracy: {}".format(option, f, actualIt, accuracy_X)) save_csv_record(join(data_directory, csv_filename), tuple) # -- Read, aggregate, and pivot data for all options df1 = pd.read_csv(join(data_directory, csv_filename)) # print("\n-- df1: (length {}):\n{}".format(len(df1.index), df1.head(15))) # Aggregate repetitions df2 = df1.groupby(['option', 'lensplit', 'f']).agg \ ({'accuracy': [np.mean, np.std, np.size], # Multiple Aggregates }) df2.columns = ['_'.join(col).strip() for col in df2.columns.values] # flatten the column hierarchy df2.reset_index(inplace=True) # remove the index hierarchy df2.rename(columns={'accuracy_size': 'count'}, inplace=True) # print("\n-- df2 (length {}):\n{}".format(len(df2.index), df2.head(15))) df3 = df1.groupby(['option', 'lensplit', 'f']).agg({'timetaken': [np.median] }) df3.columns = ['_'.join(col).strip() for col in df3.columns.values] # flatten the column hierarchy df3.reset_index(inplace=True) # remove the index hierarchy # resultdf3 = df3.sort(['timetaken'], ascending=1) # print("\n-- df3 (length {}):\n{}".format(len(df3.index), df3.head(15))) X_time_median_dict = {} Y_acc_dict = {} Y_std_dict = {} for option in option_vec: Y_acc_dict[option] = df2.loc[(df2['option'] == option), "accuracy_mean"].values Y_std_dict[option] = df2.loc[(df2['option'] == option), "accuracy_std"].values X_time_median_dict[option] = df3.loc[(df3['option'] == option), "timetaken_median"].values # print("option: ", option) # print("Y_acc_dict[option]: ", Y_acc_dict[option]) # print("Y_std_dict[option]: ", Y_std_dict[option]) # print("X_time_median_dict[option]: ", X_time_median_dict[option]) # -- Setup figure fig_filename = '{}_{}.pdf'.format(FILEZNAME, CHOICE) mpl.rc('font', **{'family': 'sans-serif', 'sans-serif': [u'Arial', u'Liberation Sans']}) mpl.rcParams['axes.labelsize'] = 18 mpl.rcParams['xtick.labelsize'] = 16 mpl.rcParams['ytick.labelsize'] = 16 mpl.rcParams['axes.titlesize'] = 16 mpl.rcParams['legend.fontsize'] = 14 mpl.rcParams['grid.color'] = '777777' # grid color mpl.rcParams['xtick.major.pad'] = 2 # padding of tick labels: default = 4 mpl.rcParams['ytick.major.pad'] = 1 # padding of tick labels: default = 4 mpl.rcParams['xtick.direction'] = 'out' # default: 'in' mpl.rcParams['ytick.direction'] = 'out' # default: 'in' mpl.rcParams['figure.figsize'] = [4, 4] fig = figure() ax = fig.add_axes([0.13, 0.17, 0.8, 0.8]) SHOW_ARROWS = True for choice, color, learning_method, label, linewidth, marker in \ zip(option_vec, facecolor_vec, learning_method_vec, label_vec, linewidth_vec, marker_vec): if learning_method == 'Holdout': # Draw std X1 = X_time_median_dict[choice] s = X1.argsort() X1 = X1[s] Y1 = Y_acc_dict[choice][s] Y2 = Y_std_dict[choice][s] if SHOW_STD: ax.fill_between(X1, Y1 + Y2, Y1 - Y2, facecolor=color, alpha=0.2, edgecolor=None, linewidth=0) ax.plot(X1, Y1 + Y2, linewidth=0.5, color='0.8', linestyle='solid') ax.plot(X1, Y1 - Y2, linewidth=0.5, color='0.8', linestyle='solid') ax.set_ylim(bottom=ymin) ax.plot(X1, Y1, linewidth=linewidth, color=color, linestyle='solid', label=label, zorder=20, marker='x', markersize=linewidth + 5, markeredgewidth=1) ax.annotate(np.round(X1[1], decimals=1), xy=(X1[1], Y1[1] - 0.05), color=color, va='center', annotation_clip=False, zorder=5) else: ax.scatter(list(X1), list(Y1), color=color, label=label, marker='x', s=42) elif learning_method == 'GS': ax.plot([1e-4, 1e4], [Y_acc_dict[choice], Y_acc_dict[choice]], linewidth=1, color='black', linestyle='dashed', zorder=0, marker=None, label=label, ) else: # For all other if SHOW_STD: ax.errorbar(list(X_time_median_dict[choice]), list(Y_acc_dict[choice]), yerr=Y_std_dict[choice], fmt='-o', linewidth=2, color=color, label=label, marker=marker, markersize=8) ax.annotate(np.round(X_time_median_dict[choice], decimals=2), xy=(X_time_median_dict[choice], Y_acc_dict[choice]-0.05), color=color, va='center', annotation_clip=False, zorder=5) else: ax.scatter(list(X_time_median_dict[choice]), list(Y_acc_dict[choice]), color=color, label=label, marker=marker, s=42) if SHOW_ARROWS: dce_opt = 'opt4' holdout_opt = 'opt5' ax.annotate(s='', xy=(X_time_median_dict[dce_opt], Y_acc_dict[dce_opt]-0.3), xytext=(X_time_median_dict[holdout_opt][2]+0.02, Y_acc_dict[dce_opt]-0.3), arrowprops=dict(arrowstyle='<->')) ax.annotate(str(int(np.round(X_time_median_dict[holdout_opt][2] / X_time_median_dict[dce_opt]))) + 'x', xy=((X_time_median_dict[dce_opt] + X_time_median_dict[holdout_opt][2])/100, Y_acc_dict[dce_opt]-0.28), color='black', va='center', # bbox = dict(boxstyle="round,pad=0.3", fc="w"), annotation_clip=False, zorder=5) # -- Title and legend title(r'$\!\!\!n\!=\!{}\mathrm{{k}}, d\!=\!{}, h\!=\!{}, f\!=\!{}$'.format(int(n / 1000), d, h, f)) handles, label_vec = ax.get_legend_handles_labels() for i, (h, learning_method) in enumerate(zip(handles, learning_method_vec)): # remove error bars in legend if isinstance(handles[i], collections.Container): handles[i] = handles[i][0] # plt.legend(loc='upper left', numpoints=1, ncol=3, fontsize=8, bbox_to_anchor=(0, 0)) SHOW_STD = False legend = plt.legend(handles, label_vec, loc='upper right', # 'upper right' handlelength=2, fontsize=12, labelspacing=0.2, # distance between label entries handletextpad=0.3, # distance between label and the line representation borderaxespad=0.2, # distance between legend and the outer axes borderpad=0.3, # padding inside legend box numpoints=1, # put the marker only once ) if not(SHOW_STD): legend = plt.legend(handles, label_vec, loc='upper right', # 'upper right' handlelength=2, fontsize=10, labelspacing=0.2, # distance between label entries handletextpad=0.3, # distance between label and the line representation borderaxespad=0.2, # distance between legend and the outer axes borderpad=0.3, # padding inside legend box numpoints=1, # put the marker only once scatterpoints=1 # display only one-scatter point in legend ) # # legend.set_zorder(1) frame = legend.get_frame() frame.set_linewidth(0.0) frame.set_alpha(0.9) # 0.8 # -- Figure settings and save plt.xscale('log') plt.xticks(xtick_lab, xtick_labels) plt.yticks(ytick_lab, ytick_lab) ax.yaxis.set_major_formatter(mpl.ticker.FormatStrFormatter('%.1f')) ax.yaxis.set_ticks_position('left') ax.xaxis.set_ticks_position('bottom') ax.set_ylim(bottom=ymin) grid(b=True, which='major', axis='both', alpha=0.2, linestyle='solid', linewidth=0.5) # linestyle='dashed', which='minor', axis='y', grid(b=True, which='minor', axis='both', alpha=0.2, linestyle='solid', linewidth=0.5) # linestyle='dashed', which='minor', axis='y', xlim(xmin, xmax) ylim(ymin, ymax) xlabel(r'Time Median (sec)', labelpad=0) # labelpad=0 ylabel(r'Accuracy', labelpad=0) if CREATE_PDF: savefig(join(figure_directory, fig_filename), format='pdf', dpi=None, edgecolor='w', orientation='portrait', transparent=False, bbox_inches='tight', pad_inches=0.05, frameon=None) if SHOW_PDF: showfig(join(figure_directory, fig_filename)) if SHOW_PLOT: plt.show()
def run(choice, create_data=False, add_data=False, show_plot=False, create_pdf=False, show_pdf=False, shorten_length=False): CHOICE = choice CREATE_DATA = create_data ADD_DATA = add_data SHOW_PLOT = show_plot SHOW_PDF = show_pdf CREATE_PDF = create_pdf STD_FILL = True # SHORTEN_LENGTH = False fig_filename = 'Fig_homophily_{}.pdf'.format(CHOICE) csv_filename = 'Fig_homophily_{}.csv'.format(CHOICE) header = ['currenttime', 'option', 'f', 'accuracy'] if CREATE_DATA: save_csv_record(join(data_directory, csv_filename), header, append=False) # -- Default Graph parameters k = 3 rep_DifferentGraphs = 1 rep_SameGraph = 2 initial_h0 = None distribution = 'powerlaw' exponent = -0.3 length = 5 constraint = True variant = 1 EC = True # Non-backtracking for learning global f_vec, labels, facecolor_vec s = 0.5 err = 0 numMaxIt = 10 avoidNeighbors = False convergencePercentage_W = None stratified = True clip_on_vec = [True] * 10 draw_std_vec = range(10) ymin = 0.3 ymax = 1 xmin = 0.001 xmax = 1 xtick_lab = [0.00001, 0.0001, 0.001, 0.01, 0.1, 1] xtick_labels = ['1e-5', '0.01\%', '0.1\%', '1\%', '10\%', '100\%'] ytick_lab = np.arange(0, 1.1, 0.1) linestyle_vec = ['dashed'] + ['solid'] * 10 linewidth_vec = [5, 2, 3, 3, 3, 3] + [3]*10 marker_vec = [None, '^', 'v', 'o', '^'] + [None]*10 markersize_vec = [0, 8, 8, 8, 6, 6] + [6]*10 facecolor_vec = ['black', "#C44E52", "#64B5CD"] # -- Options with propagation variants if CHOICE == 101: n = 10000 h = 3 d = 15 f_vec = [0.9 * pow(0.1, 1 / 5) ** x for x in range(21)] option_vec = ['opt1', 'opt2', 'opt3'] learning_method_vec = ['GT','DHE','Homophily'] weight_vec = [None] + [10] + [None] randomize_vec = [None] + [True] + [None] xmin = 0.001 ymin = 0.3 ymax = 1 labels = ['GS', 'DCEr', 'Homophily'] else: raise Warning("Incorrect choice!") a = 1 alpha0 = np.array([a, 1., 1.]) alpha0 = alpha0 / np.sum(alpha0) H0 = create_parameterized_H(k, h, symmetric=True) RANDOMSEED = None # For repeatability random.seed(RANDOMSEED) # seeds some other python random generator np.random.seed(seed=RANDOMSEED) # seeds the actually used numpy random generator; both are used and thus needed # print("CHOICE: {}".format(CHOICE)) # -- Create data if CREATE_DATA or ADD_DATA: for i in range(rep_DifferentGraphs): # create several graphs with same parameters W, Xd = planted_distribution_model_H(n, alpha=alpha0, H=H0, d_out=d, distribution=distribution, exponent=exponent, directed=False, debug=False) X0 = from_dictionary_beliefs(Xd) for j in range(rep_SameGraph): # repeat several times for same graph # print("Graph:{} and j: {}".format(i,j)) ind = None for f in f_vec: X1, ind = replace_fraction_of_rows(X0, 1-f, avoidNeighbors=avoidNeighbors, W=W, ind_prior=ind, stratified=stratified) X2 = introduce_errors(X1, ind, err) for option_index, (option, learning_method, weights, randomize) in \ enumerate(zip(option_vec, learning_method_vec, weight_vec, randomize_vec)): # -- Learning if learning_method == 'GT': H2 = H0 elif learning_method == 'Homophily': H2 = np.identity(k) elif learning_method == 'DHE': H2 = estimateH(X2, W, method=learning_method, variant=1, distance=length, EC=EC, weights=weights, randomize=randomize, constraints=constraint) # print("learning_method:", learning_method) # print("H:\n{}".format(H2)) # -- Propagation H2c = to_centering_beliefs(H2) X2c = to_centering_beliefs(X2, ignoreZeroRows=True) try: eps_max = eps_convergence_linbp_parameterized(H2c, W, method='noecho', X=X2) eps = s * eps_max F, actualIt, actualPercentageConverged = \ linBP_symmetric_parameterized(X2, W, H2c * eps, method='noecho', numMaxIt=numMaxIt, convergencePercentage=convergencePercentage_W, debug=2) except ValueError as e: print ( "ERROR: {} with {}: d={}, h={}".format(e, learning_method, d, h)) else: accuracy_X = matrix_difference_classwise(X0, F, ignore_rows=ind) tuple = [str(datetime.datetime.now())] text = [option_vec[option_index], f, accuracy_X] tuple.extend(text) # print("option: {}, f: {}, actualIt: {}, accuracy: {}".format(option_vec[option_index], f, actualIt, accuracy_X)) save_csv_record(join(data_directory, csv_filename), tuple) # -- Read, aggregate, and pivot data for all options df1 = pd.read_csv(join(data_directory, csv_filename)) # print("\n-- df1: (length {}):\n{}".format(len(df1.index), df1.head(15))) desred_decimals = 7 df1['f'] = df1['f'].apply(lambda x: round(x,desred_decimals)) # rounding due to different starting points # print("\n-- df1: (length {}):\n{}".format(len(df1.index), df1.head(15))) # Aggregate repetitions df2 = df1.groupby(['option', 'f']).agg \ ({'accuracy': [np.mean, np.std, np.size], # Multiple Aggregates }) df2.columns = ['_'.join(col).strip() for col in df2.columns.values] # flatten the column hierarchy df2.reset_index(inplace=True) # remove the index hierarchy df2.rename(columns={'accuracy_size': 'count'}, inplace=True) # print("\n-- df2 (length {}):\n{}".format(len(df2.index), df2.head(10))) # Pivot table df3 = pd.pivot_table(df2, index=['f'], columns=['option'], values=['accuracy_mean', 'accuracy_std'] ) # Pivot # print("\n-- df3 (length {}):\n{}".format(len(df3.index), df3.head(30))) df3.columns = ['_'.join(col).strip() for col in df3.columns.values] # flatten the column hierarchy df3.reset_index(inplace=True) # remove the index hierarchy # df2.rename(columns={'time_size': 'count'}, inplace=True) # print("\n-- df3 (length {}):\n{}".format(len(df3.index), df3.head(10))) # Extract values X_f = df3['f'].values # plot x values Y=[] Y_std=[] for option in option_vec: Y.append(df3['accuracy_mean_{}'.format(option)].values) if STD_FILL: Y_std.append(df3['accuracy_std_{}'.format(option)].values) if SHORTEN_LENGTH: SHORT_FACTOR = 2 ## KEEP EVERY Nth ELEMENT X_f = np.copy(X_f[list(range(0, len(X_f), SHORT_FACTOR)), ]) for i in range(len(Y)): Y[i] = np.copy(Y[i][list(range(0, len(Y[i]), SHORT_FACTOR)), ]) if STD_FILL: Y_std[i] = np.copy(Y_std[i][list(range(0, len(Y_std[i]), SHORT_FACTOR)),]) if CREATE_PDF or SHOW_PLOT or SHOW_PDF: # -- Setup figure mpl.rc('font', **{'family': 'sans-serif', 'sans-serif': [u'Arial', u'Liberation Sans']}) mpl.rcParams['axes.labelsize'] = 20 mpl.rcParams['xtick.labelsize'] = 16 mpl.rcParams['ytick.labelsize'] = 16 mpl.rcParams['legend.fontsize'] = 14 mpl.rcParams['grid.color'] = '777777' # grid color mpl.rcParams['xtick.major.pad'] = 2 # padding of tick labels: default = 4 mpl.rcParams['ytick.major.pad'] = 1 # padding of tick labels: default = 4 mpl.rcParams['xtick.direction'] = 'out' # default: 'in' mpl.rcParams['ytick.direction'] = 'out' # default: 'in' mpl.rcParams['font.size'] = 16 mpl.rcParams['axes.titlesize'] = 16 mpl.rcParams['figure.figsize'] = [4, 4] fig = figure() ax = fig.add_axes([0.13, 0.17, 0.8, 0.8]) # -- Drawing if STD_FILL: for choice, (option, facecolor) in enumerate(zip(option_vec, facecolor_vec)): ax.fill_between(X_f, Y[choice] + Y_std[choice], Y[choice] - Y_std[choice], facecolor=facecolor, alpha=0.2, edgecolor=None, linewidth=0) ax.plot(X_f, Y[choice] + Y_std[choice], linewidth=0.5, color='0.8', linestyle='solid') ax.plot(X_f, Y[choice] - Y_std[choice], linewidth=0.5, color='0.8', linestyle='solid') for choice, (option, label, color, linewidth, clip_on, linestyle, marker, markersize) in \ enumerate(zip(option_vec, labels, facecolor_vec, linewidth_vec, clip_on_vec, linestyle_vec, marker_vec, markersize_vec)): P = ax.plot(X_f, Y[choice], linewidth=linewidth, color=color, linestyle=linestyle, label=label, zorder=4, marker=marker, markersize=markersize, markeredgewidth=1, clip_on=clip_on, markeredgecolor='black') plt.xscale('log') # -- Title and legend distribution_label = '$' if distribution == 'uniform': distribution_label = ',$uniform' n_label = '{}k'.format(int(n / 1000)) if n < 1000: n_label='{}'.format(n) a_label = '' if a != 1: a_label = ', a\!=\!{}'.format(a) titleString = r'$\!\!\!n\!=\!{}, d\!=\!{}, h\!=\!{}{}{}'.format(n_label, d, h, a_label, distribution_label) plt.title(titleString) handles, labels = ax.get_legend_handles_labels() legend = plt.legend(handles, labels, loc='upper left', # 'upper right' handlelength=2, labelspacing=0, # distance between label entries handletextpad=0.3, # distance between label and the line representation borderaxespad=0.2, # distance between legend and the outer axes borderpad=0.3, # padding inside legend box numpoints=1, # put the marker only once ) # # legend.set_zorder(1) frame = legend.get_frame() frame.set_linewidth(0.0) frame.set_alpha(0.9) # 0.8 plt.xticks(xtick_lab, xtick_labels) plt.yticks(ytick_lab, ytick_lab) ax.yaxis.set_ticks_position('left') ax.xaxis.set_ticks_position('bottom') ax.yaxis.set_major_formatter(mpl.ticker.FormatStrFormatter('%.1f')) grid(b=True, which='major', axis='both', alpha=0.2, linestyle='solid', linewidth=0.5) # linestyle='dashed', which='minor', axis='y', grid(b=True, which='minor', axis='both', alpha=0.2, linestyle='solid', linewidth=0.5) # linestyle='dashed', which='minor', axis='y', xlabel(r'Label Sparsity $(f)$', labelpad=0) # labelpad=0 ylabel(r'Accuracy', labelpad=0) xlim(xmin, xmax) ylim(ymin, ymax) if CREATE_PDF: savefig(join(figure_directory, fig_filename), format='pdf', dpi=None, edgecolor='w', orientation='portrait', transparent=False, bbox_inches='tight', pad_inches=0.05, frameon=None) if SHOW_PLOT: plt.show() if SHOW_PDF: showfig(join(figure_directory, fig_filename)) # shows actually created PDF