def plot_scrabble_zodiac(): EXP_NUM = 2 building = 'ebu3b' outputfile = FIG_DIR + '/scrabble_zodiac.pdf' fig, ax = plt.subplots(1, 1) xticks, xticks_labels, yticks, yticks_labels, xlim, ylim, interp_x, \ xlabel, ylabel, linestyles, xtickRotate = get_grid_params( ymin = 0, ymax = 35, ydelta = 5, xmin = 10, xmin2=50, xmax = 250, xdelta=50) ylabel = 'Count' # Baseline (Naive Zodiac) with open('result/scrabble_zodiac.json', 'r') as fp: res = json.load(fp) x = res['x'] y = res['y'] ys = [y] legends = ['# of fixed samples'] title = building_anon_map['ebu3b'] _, plots = plotter.plot_multiple_2dline( x, ys, xlabel, ylabel, xticks, xticks_labels, yticks, yticks_labels, title, ax, fig, ylim, xlim, legends, linestyles=[linestyles.pop()]*len(ys), cs=colors, xtickRotate=xtickRotate) ax.grid(True) ax.tick_params(axis='x', pad=-1.5) #ax.xaxis.set_label_coords(1.1, -0.2) ax.legend(bbox_to_anchor=(1.1, 1.35), ncol=1, frameon=False, fontsize='small') #fig.set_size_inches((8,2)) fig.set_size_inches((1.5,1.7)) save_fig(fig, outputfile)
def plot_quiver_zodiac(): EXP_NUM = 2 building = 'ebu3b' outputfile = FIG_DIR + '/quiver_zodiac.pdf' fig, ax = plt.subplots(1, 1) xticks, xticks_labels, yticks, yticks_labels, xlim, ylim, interp_x, \ xlabel, ylabel, linestyles, xtickRotate = get_grid_params() title = building_anon_map[building] # Baseline (Naive Zodiac) xs = [] ys = [] xss = [] f1s = [] mf1s = [] for i in range(0, EXP_NUM): with open('result/pointonly_notransfer_zodiac_{0}_{1}.json' .format(building, i)) as fp: data = json.load(fp) xss.append([datum['learning_srcids'] for datum in data]) f1s.append([datum['metrics']['f1'] for datum in data]) mf1s.append([datum['metrics']['macrof1'] for datum in data]) xs = xss[0] # Assuming all xss are same. f1 = average_data(xss, f1s, interp_x) mf1 = average_data(xss, mf1s, interp_x) x = interp_x ys = [f1, mf1] legends = ['MicroF1, {0}'.format('Zodiac'), 'MacroF1, {0}'.format('Zodiac') ] _, plots = plotter.plot_multiple_2dline( x, ys, xlabel, ylabel, xticks, xticks_labels, yticks, yticks_labels, title, ax, fig, ylim, xlim, legends, linestyles=[linestyles.pop()]*len(ys), cs=colors, xtickRotate=xtickRotate) # Baseline (Naive Zodiac) xs = [] ys = [] xss = [] f1s = [] mf1s = [] for i in range(0, EXP_NUM): with open('result/quiver_zodiac_{0}_{1}.json' .format(building, i)) as fp: data = json.load(fp) xss.append([datum['learning_srcids'] for datum in data]) f1s.append([datum['metrics']['f1'] for datum in data]) mf1s.append([datum['metrics']['macrof1'] for datum in data]) xs = xss[0] # Assuming all xss are same. f1 = average_data(xss, f1s, interp_x) mf1 = average_data(xss, mf1s, interp_x) x = interp_x ys = [f1, mf1] legends = ['MicroF1, {0}'.format('Quiver/Zodiac'), 'MacroF1, {0}'.format('Quiver/Zodiac') ] xtickRotate = 45 _, plots = plotter.plot_multiple_2dline( x, ys, xlabel, ylabel, xticks, xticks_labels, yticks, yticks_labels, title, ax, fig, ylim=ylim, xlim=xlim, dataLabels=legends, linestyles=[linestyles.pop()]*len(ys), cs=colors, xtickRotate=xtickRotate) ax.grid(True) ax.tick_params(axis='x', pad=-1.5) #ax.xaxis.set_label_coords(1.1, -0.2) ax.legend(bbox_to_anchor=(1.26, 1.75), ncol=1, frameon=False, fontsize='small') #fig.set_size_inches((8,2)) fig.set_size_inches((1.5,1.7)) save_fig(fig, outputfile)
def plot_pointonly_notransfer(): EXP_NUM = 4 inferencer_names = ['zodiac', 'al_hong', 'scrabble', 'arka'] buildings = ['ebu3b', 'uva_cse', 'sdh', 'ghc'] #buildings = ['sdh', 'ebu3b'] outputfile = FIG_DIR + '/pointonly_notransfer.pdf' linestyle_dict = { 'arka': ':', 'scrabble': '--' } fig, axes = plt.subplots(1, len(buildings)) xticks = [0, 10] + list(range(50, 251, 50)) xticks_labels = [''] + [str(n) for n in xticks[1:]] yticks = range(0,101,20) yticks_labels = [str(n) for n in yticks] xlim = (0, xticks[-1]) #xlim = (-5, xticks[-1]+5) ylim = (yticks[0], yticks[-1]) #interp_x = list(range(10, 250, 5)) for ax_num, (ax, building) in enumerate(zip(axes, buildings)): # subfigure per building xlabel = '# of Samples' ylabel = 'Metric (%)' title = building_anon_map[building] linestyles = deepcopy(LINESTYLES) for inferencer_name in inferencer_names: if building == 'uva_cse' and inferencer_name == 'scrabble': continue if inferencer_name == 'arka': if building != 'sdh': continue if inferencer_name == 'arka': exp_num = 1 else: exp_num = EXP_NUM xs = [] ys = [] xss = [] f1s = [] mf1s = [] for i in range(0, exp_num): with open('result/pointonly_notransfer_{0}_{1}_{2}.json' .format(inferencer_name, building, i)) as fp: data = json.load(fp) xss.append([datum['learning_srcids'] for datum in data]) if inferencer_name == 'al_hong': f1s.append([datum['metrics']['f1_micro'] for datum in data]) mf1s.append([datum['metrics']['f1_macro'] for datum in data]) else: f1s.append([datum['metrics']['f1'] for datum in data]) mf1s.append([datum['metrics']['macrof1'] for datum in data]) interp_x = list(range(10, min(250, max([max(xs) for xs in xss]) + 5), 5)) f1 = average_data(xss, f1s, interp_x) mf1 = average_data(xss, mf1s, interp_x) x = interp_x ys = [f1, mf1] if ax_num == 2: legends = [ 'micro-F1, {0}'.format( inferencer_display_names[inferencer_name]), 'Macro-F1, {0}'.format( inferencer_display_names[inferencer_name]) ] #if inferencer_name == 'scrabble': # legends.append('Accuracy, {0}'.format(inferencer_name)) else: #data_labels = None legends = None xtickRotate = 45 _, plots = plotter.plot_multiple_2dline( x, ys, xlabel, ylabel, xticks, xticks_labels, yticks, yticks_labels, title, ax, fig, ylim, xlim, legends, linestyles=[linestyles.pop()]*len(ys), cs=colors, xtickRotate=xtickRotate) if ax_num == 2 and inferencer_name in ['scrabble', 'arka']: _, plots = plotter.plot_multiple_2dline( [-10], [[-10]], xlabel, ylabel, xticks, xticks_labels, yticks, yticks_labels, title, ax, fig, ylim, xlim, ['Accuracy, {0}'.format( inferencer_display_names[inferencer_name])], linestyles=[linestyle_dict[inferencer_name]], cs=[ACC_COLOR], xtickRotate=xtickRotate, markers=['.'], markevery=4, markersize=4, ) for ax in axes: ax.grid(True) for i in range(1,len(buildings)): axes[i].set_yticklabels([]) axes[i].set_ylabel('') for i in range(0,len(buildings)): ax = axes[i] ax.tick_params(axis='x', pad=-1.5) if i != 1: ax.set_xlabel('') else: ax.xaxis.set_label_coords(1.1, -0.2) axes[2].legend(bbox_to_anchor=(3.9, 1.15), ncol=1, frameon=False) #axes[0].legend(bbox_to_anchor=(4.3, 1.5), ncol=3, frameon=False) fig.set_size_inches((8.5,2)) save_fig(fig, outputfile)
def plot_entities(): EXP_NUM = 4 outputfile = FIG_DIR + '/entities.pdf' inferencer_names = ['scrabble', 'arka'] xticks, xticks_labels, yticks, yticks_labels, xlim, ylim, interp_x, \ xlabel, ylabel, base_linestyles, xtickRotate = get_grid_params(40,100,20) target_sources = [ ('ebu3b', [None, 'ap_m']), ('sdh', [None, 'ebu3b']), #('sdh', None), #('ebu3b', 'ap_m') ] fig, axes = plt.subplots(1, len(target_sources)) linestyles = { 'scrabble': '--', 'arka': ':' } for ax_num, (ax, (target_building, source_buildings)) \ in enumerate(zip(axes, target_sources)): for inferencer_name in inferencer_names: if inferencer_name == 'arka': #continue #TODO: disable this once making the result work if target_building != 'sdh': continue exp_num = 1 elif inferencer_name == 'scrabble': exp_num = 2 else: exp_num = EXP_NUM # Notransfer xs = [] ys = [] xss = [] accs = [] mf1s = [] for i in range(0, exp_num): with open('result/allentities_notransfer_{0}_{1}_{2}.json' .format(inferencer_name, target_building, i)) as fp: data = json.load(fp) xss.append([datum['learning_srcids'] for datum in data]) accs.append([datum['metrics']['accuracy'] for datum in data]) mf1s.append([datum['metrics']['macrof1-all'] for datum in data]) interp_x = list(range(10, min(250, max([max(xs) for xs in xss]) + 5), 5)) acc = average_data(xss, accs, interp_x) mf1 = average_data(xss, mf1s, interp_x) x = interp_x ys = [acc, mf1] legends = None title = building_anon_map[target_building] _, plots = plotter.plot_multiple_2dline( x, ys, xlabel, ylabel, xticks, xticks_labels, yticks, yticks_labels, title, ax, fig, ylim=ylim, xlim=xlim, dataLabels=legends, linestyles = [linestyles[inferencer_name]] * len(ys), #cs = colors, cs = [ACC_COLOR, colors[1]], xtickRotate=xtickRotate, markers=['.', None], markevery=4, markersize=4, ) """ # transfer if len(source_buildings) == 1 or inferencer_name == 'arka': continue source_building = source_buildings[1] xs = [] ys = [] xss = [] accs = [] mf1s = [] for i in range(0, 1): #for i in range(0, EXP_NUM): #TODO: with open('result/allentities_transfer_{0}_{1}_{2}_{3}.json' .format(inferencer_name, target_building, source_building, i)) as fp: data = json.load(fp) xss.append([datum['learning_srcids'] for datum in data]) accs.append([datum['metrics']['accuracy'] for datum in data]) mf1s.append([datum['metrics']['macrof1-all'] for datum in data]) interp_x = list(range(10, min(250, max([max(xs) for xs in xss]) + 5), 5)) acc = average_data(xss, accs, interp_x) mf1 = average_data(xss, mf1s, interp_x) x = interp_x ys = [acc, mf1] legends = None title = building_anon_map[target_building] _, plots = plotter.plot_multiple_2dline( x, ys, xlabel, ylabel, xticks, xticks_labels, yticks, yticks_labels, title, ax, fig, ylim=ylim, xlim=xlim, dataLabels=legends, linestyles = [linestyles[inferencer_name]] * len(ys), #cs = colors, cs = [ACC_COLOR, colors[1]], xtickRotate=xtickRotate, markers=['x', '|'], markevery=4, markersize=4, ) """ fig.set_size_inches((4,2)) for ax in axes: ax.grid(True) ax.set_ylim(ylim) for i in range(1,len(target_sources)): axes[i].set_yticklabels([]) axes[i].set_ylabel('') for i in range(0,len(target_sources)): ax = axes[i] ax.tick_params(axis='x', pad=-1.5) if i != 0: ax.set_xlabel('') else: ax.xaxis.set_label_coords(1.05, -0.2) save_fig(fig, outputfile)
def plot_pointonly_transfer(): target_sources = [('ebu3b', 'ap_m'), ('ebu3b', 'sdh'), ('sdh', 'ebu3b'), ] EXP_NUM = 4 outputfile = FIG_DIR + '/pointonly_transfer.pdf' #inferencer_names = ['zodiac', 'al_hong', 'scrabble'] inferencer_names = ['zodiac', 'al_hong', 'scrabble'] fig, axes = plt.subplots(1, len(target_sources)) xticks = [0, 10] + list(range(50, 251, 50)) xticks_labels = [''] + [str(n) for n in xticks[1:]] yticks = range(0,101,20) yticks_labels = [str(n) for n in yticks] #xlim = (-5, xticks[-1]+5) #ylim = (yticks[0]-2, yticks[-1]+5) xlim = (0, xticks[-1]) ylim = (yticks[0], yticks[-1]) #interp_x = list(range(10, 250, 5)) for ax_num, (ax, (target_building, source_building)) \ in enumerate(zip(axes, target_sources)): # subfigure per building xlabel = '# of Samples' ylabel = 'Metric (%)' title = '{0} -> {1}'.format(building_anon_map[source_building], building_anon_map[target_building]) linestyles = deepcopy(LINESTYLES) for inferencer_name in inferencer_names: xs = [] ys = [] xss = [] f1s = [] mf1s = [] if inferencer_name == 'scrabble' and target_building == 'ebu3b' and source_building == 'ap_m': #TODO Update this once finished exp_num = 1 elif inferencer_name == 'scrabble' and target_building == 'sdh' and source_building == 'ebu3b': exp_num = 2 else: exp_num = EXP_NUM for i in range(0, exp_num): with open('result/pointonly_transfer_{0}_{1}_{2}_{3}.json' .format(inferencer_name, target_building, source_building, i)) as fp: data = json.load(fp) xss.append([datum['learning_srcids'] for datum in data]) if inferencer_name == 'al_hong': f1s.append([datum['metrics']['f1_micro'] for datum in data]) mf1s.append([datum['metrics']['f1_macro'] for datum in data]) else: f1s.append([datum['metrics']['f1'] for datum in data]) mf1s.append([datum['metrics']['macrof1'] for datum in data]) xs = xss[0] # Assuming all xss are same. #if inferencer_name == 'scrabble': # xs = [x - 200 for x in xs] # xss[0] = xs # xss[1] = xs interp_x = list(range(10, min(250, max([max(xs) for xs in xss]) + 5), 5)) f1 = average_data(xss, f1s, interp_x) mf1 = average_data(xss, mf1s, interp_x) x = interp_x ys = [f1, mf1] #if ax_num == 0: if False: legends = ['MicroF1, {0}'.format(inferencer_name), 'MacroF1, {0}'.format(inferencer_name) ] else: #data_labels = None legends = None xtickRotate = 45 _, plots = plotter.plot_multiple_2dline( x, ys, xlabel, ylabel, xticks, xticks_labels, yticks, yticks_labels, title, ax, fig, ylim, xlim, legends, linestyles=[linestyles.pop()]*len(ys), cs=colors, xtickRotate=xtickRotate) for ax in axes: ax.grid(True) for i in range(1,len(target_sources)): axes[i].set_yticklabels([]) axes[i].set_ylabel('') for i in range(0,len(target_sources)): ax = axes[i] ax.tick_params(axis='x', pad=-1.5) if i != 1: ax.set_xlabel('') else: ax.xaxis.set_label_coords(0.5, -0.2) #axes[0].legend(bbox_to_anchor=(6, 0.8), ncol=1, frameon=False) fig.set_size_inches((6,2)) save_fig(fig, outputfile)
def cls_comp_result(): source_target_list = ('ebu3b', 'ap_m') keys = ['best', 'ts', 'rf', 'svc'] xs = list(range(5, 205, 20)) accuracy_dict = OrderedDict({ 'best': [ 0.8631033290671848, 0.9024136840401907, 0.9233413507509902, 0.9500121364579196, 0.9527101078305895, 0.9650918693087369, 0.9677129764479163, 0.9593822175147483, 0.9711269988378419, 0.9697809553231241 ], 'ts': [ 0.8713471602025806, 0.9166264458433141, 0.9185595580405604, 0.9428053539499326, 0.9417577736854855, 0.9573296850405294, 0.9489047766156204, 0.9534092413610498, 0.953262734588037, 0.9595308306151684 ], 'rf': [ 0.756387822351681, 0.854248495814764, 0.8465179398914331, 0.859092781381938, 0.9137193462494689, 0.9384494020036196, 0.9460637421480792, 0.9512496873942656, 0.9582711799579264, 0.9597065919355077 ], 'svc': [ 0.7210336660658784, 0.8278964869103078, 0.8371634459716821, 0.8901948134091584, 0.9289625735354351, 0.9062837090984304, 0.9072457164626379, 0.9094597402145658, 0.9061470144946531, 0.9317219571018263 ] }) mf1_dict = OrderedDict({ 'best': [ 0.43460544517064525, 0.46207967166726716, 0.60572075680286364, 0.65253670730553948, 0.71164857967833528, 0.77075401369085861, 0.77409145497551546, 0.78223293415400674, 0.79434165930991263, 0.78765666427863568 ], 'ts': [ 0.38456663841099153, 0.47135950957306999, 0.50801383768831809, 0.58379558680943822, 0.61765049559624907, 0.67617354377548211, 0.66706236361751792, 0.70816840695824457, 0.68736126966336153, 0.70501274992734486 ], 'rf': [ 0.094018355593671443, 0.21622362914898177, 0.2939715246436253, 0.38083088857608816, 0.45237091518218492, 0.51912845475805691, 0.56752106411334313, 0.6314794515347395, 0.73066778675441313, 0.81505177770253923 ], 'svc': [ 0.19122967879394315, 0.2501766458806039, 0.27629897715774632, 0.31374977389303144, 0.35811497520318963, 0.36814352938387473, 0.37145631338451729, 0.38910680891542943, 0.35511959588962361, 0.39688667191674587 ] }) legends = ['SCRBL', 'w/ TS', 'RF', 'w/ SVC'] * 2 linestyles = ['-', ':', '-.', '--'] * 2 cs = ['firebrick'] * len(keys) + ['deepskyblue'] * len(keys) fig, ax = plt.subplots(1, 1) fig.set_size_inches(4, 1.7) axes = [ax] mult = lambda x: x * 100 hundreder = lambda seq: list(map(mult, seq)) ys = list( map(hundreder, list(accuracy_dict.values()) + list(mf1_dict.values()))) #ys = [char_precs, phrase_f1s, char_macro_f1s, phrase_macro_f1s] xlabel = '# of Target Building Samples' ylabel = 'Score (%)' xtick = list(range(0, 205, 20)) xtick_labels = [str(n) for n in xtick] ytick = range(0, 101, 20) ytick_labels = [str(n) for n in ytick] xlim = (xtick[0] - 2, xtick[-1] + 5) ylim = (ytick[0] - 2, ytick[-1] + 5) title = None _, plots = plotter.plot_multiple_2dline(xs, ys, xlabel, ylabel, xtick,\ xtick_labels, ytick, ytick_labels, title, ax, fig, \ ylim, xlim, None , xtickRotate=0, \ linestyles=linestyles, cs=cs) #ax.legend(plots, legends, 'upper center', ncol=4 legend_order = [0, 4, 1, 5, 2, 6, 3, 7] new_handles = [plots[i] for i in legend_order] new_legends = [legends[i] for i in legend_order] fig.legend(new_handles, new_legends, ncol=4, bbox_to_anchor=(-0.1, 1.04, 1, 0.095), prop={'size': 7}, frameon=False) for ax in axes: ax.grid(True) plt.text(0.03, 1.135, 'Accuracy: \nMacro $F_1$: ', ha='center', va='center', transform=ax.transAxes, fontsize=7) save_fig(fig, 'figs/cls.pdf') subprocess.call('./send_figures')
def crf_result(): source_target_list = [('ebu3b', 'bml'), ('ghc', 'ebu3b')] n_list_list = [[(1000, 0), (1000, 5), (1000, 20), (1000, 50), (1000, 100), (1000, 150), (1000, 200)], [(200, 0), (200, 5), (200, 20), (200, 50), (200, 100), (200, 150), (200, 200)], [(0, 5), (0, 20), (0, 50), (0, 100), (0, 150), (0, 200)]] char_precs_list = list() phrase_f1s_list = list() #fig, ax = plt.subplots(1, 1) fig, axes = plt.subplots(1, len(source_target_list)) if isinstance(axes, Axes): axes = [axes] fig.set_size_inches(4, 1.5) cs = ['firebrick', 'deepskyblue'] for i, (ax, (source, target)) in enumerate(zip(axes, source_target_list)): linestyles = ['--', '-.', '-'] plot_list = list() legends_list = list() for n_list in n_list_list: target_n_list = [ns[1] for ns in n_list] phrase_f1s = list() char_macro_f1s = list() phrase_macro_f1s = list() #pess_phrase_f1s = list() char_precs = list() for (n_s, n_t) in n_list: if n_s == 0: building_list = [target] source_sample_num_list = [n_t] elif n_t == 0: building_list = [source] source_sample_num_list = [n_s] else: building_list = [source, target] source_sample_num_list = [n_s, n_t] result_query = { 'label_type': 'label', 'token_type': 'justseparate', 'use_cluster_flag': True, 'building_list': building_list, 'source_sample_num_list': source_sample_num_list, 'target_building': target } result = get_crf_results(result_query) try: assert result except: print(n_t) pdb.set_trace() continue result = get_crf_results(result_query) char_prec = result['char_precision'] * 100 char_precs.append(char_prec) phrase_recall = result['phrase_recall'] * 100 phrase_prec = result['phrase_precision'] * 100 phrase_f1 = 2* phrase_prec * phrase_recall \ / (phrase_prec + phrase_recall) phrase_f1s.append(phrase_f1) char_macro_f1s.append(result['char_macro_f1'] * 100) phrase_macro_f1s.append(result['phrase_macro_f1'] * 100) xs = target_n_list ys = [phrase_f1s, phrase_macro_f1s] #ys = [char_precs, phrase_f1s, char_macro_f1s, phrase_macro_f1s] #xlabel = '# of Target Building Samples' xlabel = None ylabel = 'Score (%)' xtick = list(range(0, 205, 40)) #xtick = [0] + [5] + xtick[1:] xtick_labels = [str(n) for n in xtick] ytick = range(0, 101, 20) ytick_labels = [str(n) for n in ytick] xlim = (xtick[0] - 2, xtick[-1] + 5) ylim = (ytick[0] - 2, ytick[-1] + 5) if i == 0: legends = [ #'#S:{0}, Char Prec'.format(n_s), '#$B_S$:{0}'.format(n_s), #'#S:{0}, Char MF1'.format(n_s), '#$B_S$:{0}'.format(n_s), ] else: legends = None #legends_list += legends title = None _, plots = plotter.plot_multiple_2dline(xs, ys, xlabel, ylabel, xtick,\ xtick_labels, ytick, ytick_labels, title, ax, fig, \ ylim, xlim, legends, xtickRotate=0, \ linestyles=[linestyles.pop()]*len(ys), cs=cs) text = '{0} $\\Rightarrow$ {1}'.format(\ anon_building_dict[source], anon_building_dict[target]) ax.text(0.8, 0.1, text, transform=ax.transAxes, ha='right', backgroundcolor='white') #, alpha=0) plot_list += plots pdb.set_trace() #fig.legend(plot_list, legends_list, 'upper center', ncol=3 # , bbox_to_anchor=(0.5,1.3),frameon=False) axes[0].legend(bbox_to_anchor=(0.15, 0.96), ncol=3, frameon=False) for ax in axes: ax.grid(True) axes[1].set_yticklabels([]) axes[1].set_ylabel('') plt.text(0, 1.16, '$F_1$: \nMacro $F_1$: ', va='center', ha='center', transform=axes[0].transAxes) fig.text(0.5, -0.1, '# of Target Building Samples', ha='center') save_fig(fig, 'figs/crf.pdf') subprocess.call('./send_figures')
def entity_result_deprecated(): source_target_list = [('ebu3b', 'ap_m')] #, ('ap_m', 'ebu3b')] n_list_list = [[(0, 5), (0, 50), (0, 100), (0, 150), (0, 200)], [(200, 5), (200, 50), (200, 100), (0, 150), (200, 200)]] ts_flag = False eda_flag = False default_query = { 'metadata.label_type': 'label', 'metadata.token_type': 'justseparate', 'metadata.use_cluster_flag': True, 'metadata.building_list': [], 'metadata.source_sample_num_list': [], 'metadata.target_building': '', 'metadata.ts_flag': ts_flag, 'metadata.eda_flag': eda_flag, 'metadata.use_brick_flag': True } query_list = [deepcopy(default_query),\ deepcopy(default_query),\ deepcopy(default_query)] query_list[0]['metadata.use_brick_flag'] = False query_list[0]['metadata.negative_flag'] = False query_list[1]['metadata.use_brick_flag'] = False query_list[1]['metadata.negative_flag'] = True query_list[2]['metadata.use_brick_flag'] = True query_list[2]['metadata.negative_flag'] = True char_precs_list = list() phrase_f1s_list = list() fig, axes = plt.subplots(1, 3) #axes = [ax] fig.set_size_inches(8, 5) #fig, axes = plt.subplots(1,len(n_list_list)) for ax, (source, target) in zip(axes, source_target_list): for query in query_list: for n_list in n_list_list: target_n_list = [ns[1] for ns in n_list] subset_accuracy_list = list() accuracy_list = list() hierarchy_accuracy_list = list() weighted_f1_list = list() macro_f1_list = list() for (n_s, n_t) in n_list: if n_s == 0: building_list = [target] source_sample_num_list = [n_t] elif n_t == 0: building_list = [source] source_sample_num_list = [n_s] else: building_list = [source, target] source_sample_num_list = [n_s, n_t] query['metadata.building_list'] = building_list query['metadata.source_sample_num_list'] = \ source_sample_num_list query['metadata.target_building'] = target result = get_entity_results(query) try: assert result except: print(n_t) pdb.set_trace() result = get_entity_results(query) #point_precs = result['point_precision_history'][-1] #point_recall = result['point_recall'][-1] subset_accuracy_list.append( result['subset_accuracy_history'][-1] * 100) accuracy_list.append(result['accuracy_history'][-1] * 100) hierarchy_accuracy_list.append( result['hierarchy_accuracy_history'][-1] * 100) weighted_f1_list.append(result['weighted_f1_history'][-1] * 100) macro_f1_list.append(result['macro_f1_history'][-1] * 100) xs = target_n_list ys = [hierarchy_accuracy_list, accuracy_list, macro_f1_list] xlabel = '# of Target Building Samples' ylabel = 'Score (%)' xtick = target_n_list xtick_labels = [str(n) for n in target_n_list] ytick = range(0, 102, 10) ytick_labels = [str(n) for n in ytick] ylim = (ytick[0] - 1, ytick[-1] + 2) legends = [ '{0}, SA:{1}'\ .format(n_s, query['metadata.use_brick_flag']), '{0}, SA:{1}'\ .format(n_s, query['metadata.use_brick_flag']), '{0}, SA:{1}'\ .format(n_s, query['metadata.use_brick_flag']) ] title = None plotter.plot_multiple_2dline(xs, ys, xlabel, ylabel, xtick,\ xtick_labels, ytick, ytick_labels, title, ax, fig, \ ylim, legends) #plotter.plot_multiple_2dline(xs, [ys[1]], xlabel, ylabel, xtick,\ # xtick_labels, ytick, ytick_labels, title, axes[1], fig, \ # ylim, [legends[1]]) #plotter.plot_multiple_2dline(xs, [ys[2]], xlabel, ylabel, xtick,\ # xtick_labels, ytick, ytick_labels, title, axes[2], fig, \ # ylim, [legends[2]]) if not (query['metadata.negative_flag'] and query['metadata.use_brick_flag']): break axes[0].set_title('Hierarchical Accuracy') axes[1].set_title('Accuracy') axes[2].set_title('Macro F1') suptitle = 'Multi Label (TagSets) Classification with a Source building.' fig.suptitle(suptitle) save_fig(fig, 'figs/entity.pdf')
def crf_entity_result(): building_sets = [ ('ebu3b', 'ap_m'), ('ap_m', 'bml'), ('ebu3b', 'ghc'), ('ghc', 'ebu3b'), ('ebu3b', 'bml', 'ap_m') ] ### TODO TODO: this should be changed to use ebu3b,ap_m -> bml fig, axes = plt.subplots(1, len(building_sets)) with open('result/baseline.json', 'r') as fp: baseline_results = json.load(fp) cs = ['firebrick', 'deepskyblue'] plot_list = list() for i, (ax, buildings) in enumerate(zip(axes, building_sets)): print(i) # Baseline result = baseline_results[str(buildings)] init_ns = result['ns'] sample_numbers = result['sample_numbers'] avg_acc = result['avg_acc'] std_acc = result['std_acc'] avg_mf1 = result['avg_mf1'] std_mf1 = result['std_mf1'] xlabel = '# Target Building Samples' ys = [avg_acc, avg_mf1] x = sample_numbers xtick = sample_numbers xtick_labels = [str(no) for no in sample_numbers] ytick = list(range(0, 105, 20)) ytick_labels = [str(no) for no in ytick] ylabel = 'Score (%)' ylabel_flag = False ylim = (-2, 105) xlim = (10, 205) linestyles = [':', ':'] if i == 2: data_labels = ['Baseline Accuracy', 'Baseline Macro $F_1$'] else: data_labels = None title = anon_building_dict[buildings[0]] for building in buildings[1:-1]: title += ',{0}'.format(anon_building_dict[building]) title += '$\\Rightarrow${0}'.format(anon_building_dict[buildings[-1]]) lw = 1.2 _, plot = plotter.plot_multiple_2dline(x, ys, xlabel, ylabel, xtick, xtick_labels, ytick, ytick_labels, title, ax, fig, ylim, xlim, data_labels, 0, linestyles, cs, lw) # scrabble if ''.join(buildings) == 'ebu3bbmlap_m': srcids_offset = 400 else: srcids_offset = 200 try: with open( 'result/crf_entity_iter_{0}.json'.format( ''.join(buildings)), 'r') as fp: result = json.load(fp)[0] except: pdb.set_trace() continue zerofile = 'result/crf_entity_iter_{0}_zero.json'.format( ''.join(buildings)) if os.path.isfile(zerofile): with open(zerofile, 'r') as fp: zero_result = json.load(fp)[0] x_zero = [0] acc_zero = [zero_result['result']['entity'][0]['accuracy'] * 100] mf1_zero = [zero_result['result']['entity'][0]['macro_f1'] * 100] else: x_zero = [] acc_zero = [] mf1_zero = [] fivefile = 'result/crf_entity_iter_{0}_five.json'.format( ''.join(list(buildings) + [buildings[-1]])) if os.path.isfile(fivefile): with open(fivefile, 'r') as fp: five_result = json.load(fp)[0] x_five = [5] acc_five = [five_result['result']['entity'][0]['accuracy'] * 100] mf1_five = [five_result['result']['entity'][0]['macro_f1'] * 100] pdb.set_trace() else: x_five = [] acc_five = [] mf1_five = [] x = x_zero + x_five + [ len(learning_srcids) - srcids_offset for learning_srcids in result['learning_srcids_history'][:-1] ] accuracy = acc_zero + acc_five + [ res['accuracy'] * 100 for res in result['result']['entity'] ] mf1s = mf1_zero + mf1_five + [ res['macro_f1'] * 100 for res in result['result']['entity'] ] ys = [accuracy, mf1s] pdb.set_trace() linestyles = ['-', '-'] if i == 2: data_labels = ['Scrabble Accuracy', 'Scrabble Macro $F_1$'] else: data_labels = None _, plot = plotter.plot_multiple_2dline(x, ys, xlabel, ylabel, xtick, xtick_labels, ytick, ytick_labels, title, ax, fig, ylim, xlim, data_labels, 0, linestyles, cs, lw) if i == 2: ax.legend(bbox_to_anchor=(3.2, 1.45), ncol=4, frameon=False) plot_list.append(plot) pdb.set_trace() fig.set_size_inches(9, 1.5) for ax in axes: ax.grid(True) for i in range(1, len(building_sets)): axes[i].set_yticklabels([]) axes[i].set_ylabel('') for i in range(0, len(building_sets)): if i != 2: axes[i].set_xlabel('') #legends_list = ['Baseline A', 'Baseline MF'] #axes[2].legend(loc='best', legends_list) save_fig(fig, 'figs/crf_entity.pdf') subprocess.call('./send_figures')
def plot_scrabble(): buildings = ['ebu3b', 'uva_cse', 'sdh', 'ghc'] #buildings = ['sdh', 'ebu3b'] outputfile = FIG_DIR + '/pointonly_notransfer.pdf' fig, axes = plt.subplots(1, len(buildings)) xticks = [0, 10] + list(range(50, 251, 50)) xticks_labels = [''] + [str(n) for n in xticks[1:]] yticks = range(0,101,20) yticks_labels = [str(n) for n in yticks] xlim = (-5, xticks[-1]+5) ylim = (yticks[0]-2, yticks[-1]+5) interp_x = list(range(10, 250, 5)) for ax_num, (ax, building) in enumerate(zip(axes, buildings)): # subfigure per building xlabel = '# of Examples' ylabel = 'Score (%)' title = building_anon_map[building] linestyles = deepcopy(LINESTYLES) for inferencer_name in inferencer_names: if building == 'uva_cse' and inferencer_name == 'scrabble': continue xs = [] ys = [] xss = [] f1s = [] mf1s = [] for i in range(0, EXP_NUM): with open('result/pointonly_notransfer_{0}_{1}_{2}.json' .format(inferencer_name, building, i)) as fp: data = json.load(fp) xss.append([datum['learning_srcids'] for datum in data]) if inferencer_name == 'al_hong': f1s.append([datum['metrics']['f1_micro'] for datum in data]) mf1s.append([datum['metrics']['f1_macro'] for datum in data]) else: f1s.append([datum['metrics']['f1'] for datum in data]) mf1s.append([datum['metrics']['macrof1'] for datum in data]) xs = xss[0] # Assuming all xss are same. f1 = average_data(xss, f1s, interp_x) mf1 = average_data(xss, mf1s, interp_x) x = interp_x ys = [f1, mf1] if ax_num == 0: #data_labels = ['Baseline Acc w/o $B_s$', # 'Baseline M-$F_1$ w/o $B_s$'] legends = ['MicroF1, {0}'.format(inferencer_name), 'MacroF1, {0}'.format(inferencer_name) ] else: #data_labels = None legends = None _, plots = plotter.plot_multiple_2dline( x, ys, xlabel, ylabel, xticks, xticks_labels, yticks, yticks_labels, title, ax, fig, ylim, xlim, legends, linestyles=[linestyles.pop()]*len(ys), cs=colors) for ax in axes: ax.grid(True) for i in range(1,len(buildings)): axes[i].set_yticklabels([]) axes[i].set_ylabel('') for i in range(0,len(buildings)): if i != 1: axes[i].set_xlabel('') axes[0].legend(bbox_to_anchor=(3.2, 1.5), ncol=3, frameon=False) fig.set_size_inches((8,2)) save_fig(fig, outputfile)
def entity_ts_result(): source_target_list = [('ebu3b', 'ap_m')] n_list_list = [(200, 5)] ts_flag = False eda_flag = False inc_num = 20 iter_num = 10 default_query = { 'metadata.label_type': 'label', 'metadata.token_type': 'justseparate', 'metadata.use_cluster_flag': True, 'metadata.building_list': [], 'metadata.source_sample_num_list': [], 'metadata.target_building': '', 'metadata.ts_flag': ts_flag, 'metadata.eda_flag': eda_flag, 'metadata.use_brick_flag': True, 'metadata.negative_flag': True, 'metadata.inc_num': inc_num, } query_list = [deepcopy(default_query), deepcopy(default_query)] query_list[0]['metadata.ts_flag'] = True fig, ax = plt.subplots(1, len(source_target_list)) axes = [ax] cs = ['firebrick', 'deepskyblue'] for i, (ax, (source, target)) in enumerate(zip(axes, source_target_list)): linestyles = [':', '-.', '-'] for query in query_list: for ns in n_list_list: if query['metadata.use_brick_flag'] and ns[0] == 0: continue n_s = ns[0] if i == 1 and ns[1] == 5: n_t = 5 else: n_t = ns[1] if n_s == 0: building_list = [target] source_sample_num_list = [n_t] elif n_t == 0: building_list = [source] source_sample_num_list = [n_s] else: building_list = [source, target] source_sample_num_list = [n_s, n_t] query['metadata.building_list'] = building_list query['metadata.source_sample_num_list'] = \ source_sample_num_list query['metadata.target_building'] = target q = {'$and': [query, {'$where': \ 'this.accuracy_history.length=={0}'\ .format(iter_num)}]} result = get_entity_results(q) try: assert result except: print(n_t) pdb.set_trace() result = get_entity_results(query) #point_precs = result['point_precision_history'][-1] #point_recall = result['point_recall'][-1] subset_accuracy_list = [ val * 100 for val in result['subset_accuracy_history'] ] accuracy_list = [ val * 100 for val in result['accuracy_history'] ] hierarchy_accuracy_list = [ val * 100 for val in result['hierarchy_accuracy_history'] ] weighted_f1_list = [ val * 100 for val in result['weighted_f1_history'] ] macro_f1_list = [ val * 100 for val in result['macro_f1_history'] ] exp_num = len(macro_f1_list) target_n_list = list(range(n_t, inc_num * exp_num + 1, inc_num)) xs = target_n_list ys = [accuracy_list, macro_f1_list] #xlabel = '# of Target Building Samples' xlabel = None ylabel = 'Score (%)' xtick = range(0, 205, 50) xtick_labels = [str(n) for n in xtick] ytick = range(0, 102, 20) ytick_labels = [str(n) for n in ytick] ylim = (ytick[0] - 1, ytick[-1] + 2) if i == 0: legends = [ '{0}, SA: {1}'.format( n_s, oxer(query['metadata.use_brick_flag'])), '{0}, SA: {1}'.format( n_s, oxer(query['metadata.use_brick_flag'])) ] else: legends = None title = None plotter.plot_multiple_2dline(xs, ys, xlabel, ylabel, xtick,\ xtick_labels, ytick, ytick_labels, title, ax,\ fig, ylim, None, legends, xtickRotate=0, \ linestyles=[linestyles.pop()]*len(ys), cs=cs) for ax in axes: ax.grid(True) for ax, (source, target) in zip(axes, source_target_list): #ax.set_title('{0} $\Rightarrow$ {1}'.format( # anon_building_dict[source], anon_building_dict[target])) ax.text(0.45, 0.2, '{0} $\Rightarrow$ {1}'.format(anon_building_dict[source], anon_building_dict[target]), fontsize=11, transform=ax.transAxes) for i in range(1, len(source_target_list)): axes[i].set_yticklabels([]) axes[i].set_ylabel('') ax = axes[0] #handles, labels = ax.get_legend_handles_labels() #legend_order = [0,1,2,3,4,5] #new_handles = [handles[i] for i in legend_order] #new_labels = [labels[i] for i in legend_order] #ax.legend(new_handles, new_labels, bbox_to_anchor=(0.15,0.96), ncol=3, frameon=False) plt.text(0, 1.2, 'Accuracy: \nMacro $F_1$: ', ha='center', va='center', transform=ax.transAxes) fig.text(0.5, -0.1, '# of Target Building Samples', ha='center', alpha=0) for i, ax in enumerate(axes): if i != 0: ax.set_xlabel('') fig.set_size_inches(4.4, 1.5) save_fig(fig, 'figs/entity_ts.pdf') subprocess.call('./send_figures')
def crf_entity_result_dep(): building_sets = [('ebu3b', 'ap_m'), ('ap_m', 'bml'), ('ebu3b', 'ghc'), ('ghc', 'ebu3b'), ('ebu3b', 'bml', 'ap_m')] ### TODO TODO: this should be changed to use ebu3b,ap_m -> bml #building_sets = [('ebu3b', 'ghc'), ('ebu3b', 'ghc')] #building_sets = [('ap_m',), ('bml',), # ('ghc',), ('ebu3b',), ('ap_m',)] ### TODO TODO: this should be changed to use ebu3b,ap_m -> bml fig, axes = plt.subplots(1, len(building_sets)) with open('result/baseline.json', 'r') as fp: baseline_results = json.load(fp) cs = ['firebrick', 'deepskyblue'] plot_list = list() acc_better_list = [] mf1_better_list = [] comp_xs = [10, 50, 150] for i, (ax, buildings) in enumerate(zip(axes, building_sets)): print(i) # Config ylim = (-2, 105) xlim = (-2, 205) # Baseline with source result = baseline_results[str(buildings)] init_ns = result['ns'] sample_numbers = result['sample_numbers'] baseline_acc = result['avg_acc'] std_acc = result['std_acc'] baseline_mf1 = result['avg_mf1'] std_mf1 = result['std_mf1'] xlabel = '# Target Building Examples' ys = [baseline_acc, baseline_mf1] baseline_x = sample_numbers #xtick = sample_numbers #xtick_labels = [str(no) for no in sample_numbers] #xtick = [0] + [5] + xtick[1:] xtick = [10] + list(range(40, 205, 40)) #xtick = list(range(0, 205, 40)) xtick_labels = [str(n) for n in xtick] ytick = list(range(0, 105, 20)) ytick_labels = [str(no) for no in ytick] ylabel = 'Score (%)' ylabel_flag = False linestyles = [':', ':'] if i == 2: data_labels = ['Baseline Acc w/ $B_s$', 'Baseline M-$F_1$ w/ $B_s$'] else: data_labels = None title = anon_building_dict[buildings[0]] for building in buildings[1:-1]: title += ',{0}'.format(anon_building_dict[building]) title += '$\\Rightarrow${0}'.format(anon_building_dict[buildings[-1]]) lw = 1.2 _, plot = plotter.plot_multiple_2dline(baseline_x, ys, xlabel, ylabel, xtick, xtick_labels, ytick, ytick_labels, title, ax, fig, ylim, xlim, data_labels, 0, linestyles, cs, lw) plot_list.append(plot) # Baseline without source result = baseline_results[str((list(buildings)[-1],))] init_ns = result['ns'] sample_numbers = result['sample_numbers'] avg_acc = result['avg_acc'] std_acc = result['std_acc'] avg_mf1 = result['avg_mf1'] std_mf1 = result['std_mf1'] xlabel = '# Target Building Examples' ys = [avg_acc, avg_mf1] x = sample_numbers #xtick = sample_numbers #xtick_labels = [str(no) for no in sample_numbers] #xtick = list(range(0, 205, 40)) #xtick_labels = [str(n) for n in xtick] ytick = list(range(0, 105, 20)) ytick_labels = [str(no) for no in ytick] ylabel = 'Score (%)' ylabel_flag = False linestyles = ['-.', '-.'] if i == 2: data_labels = ['Baseline Acc w/o $B_s$', 'Baseline M-$F_1$ w/o $B_s$'] else: data_labels = None title = anon_building_dict[buildings[0]] for building in buildings[1:-1]: title += ',{0}'.format(anon_building_dict[building]) title += '$\\Rightarrow${0}'.format(anon_building_dict[buildings[-1]]) lw = 1.2 _, plot = plotter.plot_multiple_2dline(x, ys, xlabel, ylabel, xtick, xtick_labels, ytick, ytick_labels, title, ax, fig, ylim, xlim, data_labels, 0, linestyles, cs, lw) plot_list.append(plot) # Scrabble without source buildingfix = ''.join([buildings[-1]] * 2) filename = 'result/crf_entity_iter_{0}_char2tagset_iter_nosource1.json'\ .format(buildingfix) if not os.path.exists(filename): continue with open(filename, 'r') as fp: res = json.load(fp) source_num = 0 srcid_lens = [len(r['learning_srcids']) - source_num for r in res] accuracy = [r['result']['entity']['accuracy'] * 100 for r in res] mf1s = [r['result']['entity']['macro_f1'] * 100 for r in res] x = srcid_lens ys = [accuracy, mf1s] linestyles = ['--', '--'] if i == 2: data_labels = ['Scrabble Acc w/o $B_s$', 'Scrabble M-$F_1$ w/o $B_s$'] else: data_labels = None _, plot = plotter.plot_multiple_2dline(x, ys, xlabel, ylabel, xtick, xtick_labels, ytick, ytick_labels, title, ax, fig, ylim, xlim, data_labels, 0, linestyles, cs, lw) plot_list.append(plot) # Scrabble with source buildingfix = ''.join(list(buildings) + [buildings[-1]]) filename_template = 'result/crf_entity_iter_{0}_char2tagset_iter_{1}.json' x = range(10, 205, 10) x_cands = [] acc_cands = [] mf1_cands = [] for exp_num in range(0, 3): filename = filename_template.format(buildingfix, exp_num) if not os.path.exists(filename): continue with open(filename, 'r') as fp: res = json.load(fp) source_num = 200 * (len(buildings) - 1) x_cand = [len(r['learning_srcids']) - source_num for r in res] acc_cand = [r['result']['entity']['accuracy'] * 100 for r in res] mf1_cand = [r['result']['entity']['macro_f1'] * 100 for r in res] x_cands.append(x_cand) acc_cands.append(acc_cand) mf1_cands.append(mf1_cand) acc = lin_interpolated_avg(x, x_cands, acc_cands) mf1 = lin_interpolated_avg(x, x_cands, mf1_cands) ys = [acc, mf1] print(buildings) mf1_betters = [] acc_betters = [] for comp_x in comp_xs: try: comp_idx_target = x.index(comp_x) comp_idx_baseline = baseline_x.index(comp_x) acc_better = \ acc[comp_idx_target]/baseline_acc[comp_idx_baseline] - 1 mf1_better = \ mf1[comp_idx_target]/baseline_mf1[comp_idx_baseline] - 1 """ acc_better = \ acc[comp_idx_target] - baseline_acc[comp_idx_baseline] - 1 mf1_better = \ mf1[comp_idx_target] - baseline_mf1[comp_idx_baseline] - 1 """ mf1_betters.append(mf1_better) acc_betters.append(acc_better) print('srouce#: {0}'.format(comp_x)) print('Acc\t baseline: {0}\t scrbl: {1}\t better: {2}\t' .format( baseline_acc[comp_idx_baseline], acc[comp_idx_target], acc_better )) print('MF1\t baseline: {0}\t scrbl: {1}\t better: {2}\t' .format( baseline_mf1[comp_idx_baseline], mf1[comp_idx_target], mf1_better )) except: pdb.set_trace() mf1_better_list.append(mf1_betters) acc_better_list.append(acc_betters) linestyles = ['-', '-'] if i == 2: data_labels = ['Scrabble Acc w/ $B_s$', 'Scrabble M-$F_1$ w/ $B_s$'] else: data_labels = None _, plot = plotter.plot_multiple_2dline(x, ys, xlabel, ylabel, xtick, xtick_labels, ytick, ytick_labels, title, ax, fig, ylim, xlim, data_labels, 0, linestyles, cs, lw) plot_list.append(plot) if i == 2: ax.legend(bbox_to_anchor=(3.5, 1.53), ncol=4, frameon=False) #ax.legend(bbox_to_anchor=(3.2, 1.45), ncol=4, frameon=False) print('====================') print('Source nums: {0}'.format(comp_xs)) # pdb.set_trace() mf1_better_avgs = [np.mean(list(map(itemgetter(i), mf1_better_list))) for i, _ in enumerate(comp_xs)] acc_better_avgs = [np.mean(list(map(itemgetter(i), acc_better_list))) for i, _ in enumerate(comp_xs)] print('MF1 better in average, {0}'.format(mf1_better_avgs)) print('Acc better in average, {0}'.format(acc_better_avgs)) fig.set_size_inches(9, 1.5) for ax in axes: ax.grid(True) for i in range(1,len(building_sets)): axes[i].set_yticklabels([]) axes[i].set_ylabel('') for i in range(0,len(building_sets)): if i != 2: axes[i].set_xlabel('') #legends_list = ['Baseline A', 'Baseline MF'] #axes[2].legend(loc='best', legends_list) save_fig(fig, 'figs/crf_entity.pdf') subprocess.call('./send_figures')
def plot_one_ir2tagsets(target_building, source_building, fig=None, ax=None): title = '{0}$\\Rightarrow${1}'.format( building_anon_map[source_building], building_anon_map[target_building] ) linestyles = deepcopy(LINESTYLES) configs = get_ir2tagsets_configs(target_building, source_building) if target_building == 'ebu3b': configs.append({ 'use_brick_flag': True, 'negative_flag': True, 'source_building_list': [source_building, target_building], 'target_building': target_building, 'tagset_classifier_type': 'MLP', 'task': 'ir2tagsets', 'ts_flag': True, }) xlabel = '# of Target Building Examples' ylabel = 'Score (%)' if not fig or not ax: fig, ax = plt.subplots(1, 1) for config in configs: filename = get_filename_for_ir2tagsets(target_building, config) with open(filename, 'r') as fp: res = json.load(fp) accuracy = res['accuracy'] macrof1 = res['macrof1'] xticks = [0, 10] + list(range(50, 201, 50)) xticks_labels = [''] + [str(n) for n in xticks[1:]] yticks = range(0,101,20) yticks_labels = [str(n) for n in yticks] xlim = (50, xticks[-1]) ylim = ((0, 100)) #ylim = (yticks[0], yticks[-1]) interp_x = list(range(10, 200, 5)) ys = [accuracy, macrof1] if target_building == 'ebu3b': legends = [ 'Accruracy, {0},SA:{1}{2}'.format( 200 if len(config['source_building_list']) > 1 else 0, 'O' if config['use_brick_flag'] else 'X', ',TS' if config['ts_flag'] else ''), 'Macro-F1, {0},SA:{1}{2}'.format( 200 if len(config['source_building_list']) > 1 else 0, 'O' if config['use_brick_flag'] else 'X', ',TS' if config['ts_flag'] else ''), ] #if inferencer_name == 'scrabble': # legends.append('Accuracy, {0}'.format(inferencer_name)) else: #data_labels = None legends = None _, plots = plotter.plot_multiple_2dline( interp_x, ys, xlabel, ylabel, xticks, xticks_labels, yticks, yticks_labels, None, ax, fig, ylim, xlim, legends, linestyles=[linestyles.pop()]*len(ys), cs=colors) ax.text(0.9, 0.15, title, transform=ax.transAxes, ha='right', backgroundcolor='white' )#, alpha=0) ax.grid(True)
def crf_result_acc(): #source_target_list = [('ebu3b', 'ap_m'), ('ebu3b', 'ap_m')] source_target_list = [('ebu3b', 'ap_m'), ('ghc', 'ebu3b')] #n_list_list = [#[(1000, 0), (1000,5), (1000,20), (1000,50), (1000,100), (1000, 150), (1000,200)], # [(200, 0), (200,5), (200,20), (200,50), (200,100), (200, 150), (200,200)], # [(0,5), (0,20), (0,50), (0,100), (0,150), (0,200)]] char_precs_list = list() phrase_f1s_list = list() #fig, ax = plt.subplots(1, 1) fig, axes = plt.subplots(1, len(source_target_list)) if isinstance(axes, Axes): axes = [axes] fig.set_size_inches(4, 1.5) cs = ['firebrick', 'deepskyblue'] filename_template = 'result/crf_iter_{0}_char2ir_iter_{1}.json' n_s_list = [1000, 200, 0] for i, (ax, (source, target)) in enumerate(zip(axes, source_target_list)): linestyles = ['--', '-.', '-'] plot_list = list() legends_list = list() for n_s in n_s_list: if n_s == 0: buildingfix = ''.join([target, target]) else: buildingfix = ''.join([source, target, target]) n = n_s + 0 xs = [5] + list(range(10, 201, 10)) x_cands = [] f1_cands = [] mf1_cands = [] for exp_num in range(0, 5): nfix = n + exp_num filename = filename_template.format(buildingfix, nfix) if not os.path.exists(filename): pdb.set_trace() continue with open(filename, 'r') as fp: data = json.load(fp) x_cand = [ len(datum['learning_srcids']) - n_s for datum in data ] f1_cand = [] for datum in data: prec = datum['result']['crf']['phrase_precision'] * 100 rec = datum['result']['crf']['phrase_recall'] * 100 f1 = 2 * prec * rec / (prec + rec) f1_cand.append(f1) mf1_cand = [ datum['result']['crf']['phrase_macro_f1'] * 100 for datum in data ] x_cands.append(x_cand) f1_cands.append(f1_cand) mf1_cands.append(mf1_cand) f1s = lin_interpolated_avg(xs, x_cands, f1_cands) mf1s = lin_interpolated_avg(xs, x_cands, mf1_cands) ys = [f1s] #, mf1s] # Print curr result if n_s == 200 or n_s == 0: print('=======') print(source, target, n_s) print('init F1: {0}'.format(f1s[0])) print('init MF1: {0}'.format(mf1s[0])) print('=======') xlabel = None ylabel = 'Score (%)' xtick = [5] + list(range(40, 205, 40)) xtick_labels = [str(n) for n in xtick] ytick = range(0, 101, 20) ytick_labels = [str(n) for n in ytick] xlim = (-5, xtick[-1] + 5) ylim = (ytick[0] - 2, ytick[-1] + 5) if i == 0: legends = [ '#$B_S$:{0}'.format(n_s), #'#$B_S$:{0}'.format(n_s), ] else: legends = None title = None _, plots = plotter.plot_multiple_2dline(xs, ys, xlabel, ylabel, xtick,\ xtick_labels, ytick, ytick_labels, title, ax, fig, \ ylim, xlim, legends, xtickRotate=0, \ linestyles=[linestyles.pop()]*len(ys), cs=cs) text = '{0} $\\Rightarrow$ {1}'.format(\ anon_building_dict[source], anon_building_dict[target]) ax.text(0.8, 0.1, text, transform=ax.transAxes, ha='right', backgroundcolor='white') #, alpha=0) plot_list += plots axes[0].legend(bbox_to_anchor=(0.15, 0.96), ncol=3, frameon=False) for ax in axes: ax.grid(True) axes[1].set_yticklabels([]) axes[1].set_ylabel('') plt.text(0, 1.16, '$F_1$: \nMacro $F_1$: ', va='center', ha='center', transform=axes[0].transAxes) fig.text(0.5, -0.1, '# of Target Building Samples', ha='center') save_fig(fig, 'figs/crf_acc.pdf') subprocess.call('./send_figures')
def cls_comp_result(): source_target_list = ('ebu3b', 'ap_m') keys = ['best', 'ts', 'rf'] xs = list(range(10, 205, 10)) accuracy_dict = OrderedDict({ 'best': [ 89.809313820507768, 92.54815950011843, 94.820762260127921, 95.97224073086943, 96.084653841183666, 96.189745940212362, 96.621875740345899, 96.767353707652205, 97.25703698768065, 97.303271588486126, 97.563484660033183, 98.26716491945038, 97.689250918028904, 98.192926735370776, 98.38512052831085, 98.332192527621629, 98.393721664943683, 98.662756406459749, 98.887643256929636, 98.967675573027705 ], 'ts': [ 0.8939772861881065, 0.8923213679976736, 0.9123210382072324, 0.9135980339105342, 0.9189532249466957, 0.9340140813788202, 0.9352186241411988, 0.9355258676853828, 0.9291091215997943, 0.9378608124876789, 0.9319247243221132, 0.949146448493464, 0.9489394545131488, 0.9502468717020965, 0.9567056828950493, 0.9472988480217964, 0.9615234837716184, 0.966066986496091, 0.9657838041933192, 0.9655206112295668 ], 'rf': [ 0.806640902629711, 0.8715051972281449, 0.8819351901208243, 0.8936811478322669, 0.9154761904761907, 0.9102993218431644, 0.9163216654821128, 0.9187111318407958, 0.9251769426676142, 0.9323353470741529, 0.9335880123193552, 0.9353082059938402, 0.9385065002369106, 0.9440912994551051, 0.9449197465055669, 0.9479770048566685, 0.95334636342099, 0.9520936981757874, 0.9534481609808099, 0.9574993336886989, 0.9613235903814261 ], }) mf1_dict = OrderedDict({ 'best': [ 49.278915576009666, 54.796766717693828, 62.58888234797125, 65.516750225788741, 68.292157713216596, 70.178737730933733, 72.269065905342927, 75.530080228774239, 79.910634234930825, 83.958759694464149, 86.604737828403415, 89.944532313205116, 89.509558650993768, 92.646954050881263, 92.840673983293001, 92.748649991145385, 93.127511989870385, 93.479568639265494, 94.246971132932828, 94.718836697647319 ], 'ts': [ 0.56653458779577659, 0.55708814049375366, 0.5937535218897827, 0.63466926766986798, 0.653458865790845, 0.64011173425185053, 0.67281122169885288, 0.68270291522350057, 0.72076990493532245, 0.71261982497230925, 0.70044729648937165, 0.77730251488642088, 0.76286044963642097, 0.79628750932789027, 0.81995259322192149, 0.81512563219291001, 0.83983065742402829, 0.85147624388541865, 0.85183408423723528, 0.85288622740244369 ], 'rf': [ 0.12250376794594604, 0.18942204544104752, 0.22171884155985688, 0.27069328069179505, 0.30405631973712544, 0.2984466141860372, 0.3205452968001699, 0.33832465365023096, 0.3891719868291194, 0.44145987155626004, 0.4629123930116906, 0.4960558419219113, 0.5335594108556089, 0.5915815154291774, 0.6430516639970087, 0.6950590411205589, 0.7300801879845085, 0.7553289202919391, 0.7856917033978976, 0.8454315647144195, 0.8931418245685142 ], }) for k, v in mf1_dict.items(): if k == 'best': mf1_dict[k] = [vvv / 100 for vvv in v[:len(xs)]] else: mf1_dict[k] = v[:len(xs)] for k, v in accuracy_dict.items(): if k == 'best': accuracy_dict[k] = [vvv / 100 for vvv in v[:len(xs)]] else: accuracy_dict[k] = v[:len(xs)] legends = ['OCC', 'OCC w/ TS', 'RF'] * 2 linestyles = ['-', ':', '-.'] * 2 cs = ['firebrick'] * len(keys) + ['deepskyblue'] * len(keys) fig, ax = plt.subplots(1, 1) fig.set_size_inches(3, 1.7) axes = [ax] mult = lambda x: x * 100 hundreder = lambda seq: list(map(mult, seq)) ys = list( map(hundreder, list(accuracy_dict.values()) + list(mf1_dict.values()))) #ys = [char_precs, phrase_f1s, char_macro_f1s, phrase_macro_f1s] xlabel = '# of Target Building Samples' ylabel = 'Score (%)' xtick = [10] + list(range(40, 205, 40)) xtick_labels = [str(n) for n in xtick] ytick = range(0, 101, 20) ytick_labels = [str(n) for n in ytick] xlim = (xtick[0] - 2, xtick[-1] + 5) ylim = (ytick[0] - 2, ytick[-1] + 5) title = None _, plots = plotter.plot_multiple_2dline(xs, ys, xlabel, ylabel, xtick,\ xtick_labels, ytick, ytick_labels, title, ax, fig, \ ylim, xlim, None , xtickRotate=0, \ linestyles=linestyles, cs=cs) #ax.legend(plots, legends, 'upper center', ncol=4 #legend_order = [0,4,1,5,2,3] legend_order = [0, 3, 1, 4, 2, 5] new_handles = [plots[i] for i in legend_order] new_legends = [legends[i] for i in legend_order] fig.legend(new_handles, new_legends, ncol=3, bbox_to_anchor=(0.15, 1.08, 0.8, 0.095), prop={'size': 7}, frameon=False) for ax in axes: ax.grid(True) plt.text(0.03, 1.135, 'Accuracy: \nMacro $F_1$: ', ha='center', va='center', transform=ax.transAxes, fontsize=7) save_fig(fig, 'figs/cls.pdf') subprocess.call('./send_figures')
def entity_iter_result(): source_target_list = [ ('ebu3b', 'ap_m'), #('ebu3b', 'ap_m'), #('ghc', 'ebu3b') ('ghc', 'ap_m') ] ts_flag = False eda_flag = False fig, axes = plt.subplots(1, len(source_target_list)) # axes = [ax] cs = ['firebrick', 'deepskyblue'] for i, (ax, (source, target)) in enumerate(zip(axes, source_target_list)): #filename_template = 'result/entity_iter_{0}_{1}2.json' filename_template = 'result/entity_iter_{0}_{1}{2}.json' prefixes = [(''.join([target] * 2), 'nosource_nosa'), (''.join([target] * 2), 'nosource_sa'), (''.join([source, target, target]), 'source_nosa'), (''.join([source, target, target]), 'source_sa')] linestyles = [':', '--', '-.', '-'] for linestyle, (buildingfix, optfix) in zip(linestyles, prefixes): sa_flag = 'X' if 'nosa' in optfix else 'O' src_flag = '0' if 'nosource' in optfix else '200' source_num = int(src_flag) """ filename = filename_template.format(buildingfix, optfix) if not os.path.exists(filename): continue with open(filename, 'r') as fp: data = json.load(fp)[1:] x_t = [len(set(datum['learning_srcids'])) - source_num for datum in data] accs = [val * 100 for val in data[-1]['accuracy_history']] mf1s = [val * 100 for val in data[-1]['macro_f1_history']] ys = [accs, mf1s] """ #if sa_flag == 'X' and src_flag == '0': # pdb.set_trace() x_t = range(10, 201, 10) acc_cands = [] mf1_cands = [] x_cands = [] for exp_num in range(1, 3): filename = filename_template.format(buildingfix, optfix, exp_num) if not os.path.exists(filename): continue with open(filename, 'r') as fp: #data = json.load(fp)[1:] data = json.load(fp) x = [ len(set(datum['learning_srcids'])) - source_num for datum in data[:-1] ] #if optfix == 'nosource_nosa': # pdb.set_trace() acc = [val * 100 for val in data[-1]['accuracy_history']] mf1 = [val * 100 for val in data[-1]['macro_f1_history']] x_cands.append(x) acc_cands.append(acc) mf1_cands.append(mf1) if len(x_cands) == 1: pdb.set_trace( ) # for debugging of not existing enough exp data mf1s = lin_interpolated_avg(x_t, x_cands, mf1_cands) accs = lin_interpolated_avg(x_t, x_cands, acc_cands) ys = [accs, mf1s] if optfix == 'source_sa': pdb.set_trace() xlabel = None ylabel = 'Score (%)' xtick = [10] + list(range(50, 205, 50)) xtick_labels = [str(n) for n in xtick] ytick = range(0, 102, 20) ytick_labels = [str(n) for n in ytick] ylim = (ytick[0] - 1, ytick[-1] + 2) if i == 0: legends = [ '{0},SA:{1}'.format(src_flag, sa_flag), '{0},SA:{1}'.format(src_flag, sa_flag) ] else: legends = None title = None plotter.plot_multiple_2dline(x_t, ys, xlabel, ylabel, xtick,\ xtick_labels, ytick, ytick_labels, title, ax,\ fig, ylim, None, legends, xtickRotate=0, \ linestyles=[linestyle]*len(ys), cs=cs) if optfix == 'sa_source': pdb.set_trace() for ax in axes: ax.grid(True) for ax, (source, target) in zip(axes, source_target_list): #ax.set_title('{0} $\Rightarrow$ {1}'.format( # anon_building_dict[source], anon_building_dict[target])) #ax.text(0.45, 0.2, '{0} $\Rightarrow$ {1}'.format( ax.text( 0.45, 0.2, '{0} $\Rightarrow$ {1}'.format(anon_building_dict[source], anon_building_dict[target]), fontsize=11, transform=ax.transAxes, #backgroundcolor='white' ) for i in range(1, len(source_target_list)): axes[i].set_yticklabels([]) axes[i].set_ylabel('') ax = axes[0] handles, labels = ax.get_legend_handles_labels() legend_order = [0, 1, 2, 3, 4, 5, 6, 7] new_handles = [handles[i] for i in legend_order] new_labels = [labels[i] for i in legend_order] ax.legend(new_handles, new_labels, bbox_to_anchor=(0.15, 0.96), ncol=4, frameon=False, handletextpad=0.15, columnspacing=0.7) #ax.legend(new_handles, new_labels, bbox_to_anchor=(0.23,1.35), ncol=3, frameon=False) plt.text(-0.0, 1.18, 'Accuracy: \nMacro $F_1$: ', ha='center', va='center', transform=ax.transAxes) fig.text(0.5, -0.1, '# of Target Building Samples', ha='center') for i, ax in enumerate(axes): if i != 0: ax.set_xlabel('') fig.set_size_inches(4.4, 1.5) save_fig(fig, 'figs/entity_iter.pdf') subprocess.call('./send_figures')