def plot_entities(): EXP_NUM = 4 outputfile = FIG_DIR + '/entities.pdf' inferencer_names = ['scrabble', 'arka'] xticks, xticks_labels, yticks, yticks_labels, xlim, ylim, interp_x, \ xlabel, ylabel, base_linestyles, xtickRotate = get_grid_params(40,100,20) target_sources = [ ('ebu3b', [None, 'ap_m']), ('sdh', [None, 'ebu3b']), #('sdh', None), #('ebu3b', 'ap_m') ] fig, axes = plt.subplots(1, len(target_sources)) linestyles = { 'scrabble': '--', 'arka': ':' } for ax_num, (ax, (target_building, source_buildings)) \ in enumerate(zip(axes, target_sources)): for inferencer_name in inferencer_names: if inferencer_name == 'arka': #continue #TODO: disable this once making the result work if target_building != 'sdh': continue exp_num = 1 elif inferencer_name == 'scrabble': exp_num = 2 else: exp_num = EXP_NUM # Notransfer xs = [] ys = [] xss = [] accs = [] mf1s = [] for i in range(0, exp_num): with open('result/allentities_notransfer_{0}_{1}_{2}.json' .format(inferencer_name, target_building, i)) as fp: data = json.load(fp) xss.append([datum['learning_srcids'] for datum in data]) accs.append([datum['metrics']['accuracy'] for datum in data]) mf1s.append([datum['metrics']['macrof1-all'] for datum in data]) interp_x = list(range(10, min(250, max([max(xs) for xs in xss]) + 5), 5)) acc = average_data(xss, accs, interp_x) mf1 = average_data(xss, mf1s, interp_x) x = interp_x ys = [acc, mf1] legends = None title = building_anon_map[target_building] _, plots = plotter.plot_multiple_2dline( x, ys, xlabel, ylabel, xticks, xticks_labels, yticks, yticks_labels, title, ax, fig, ylim=ylim, xlim=xlim, dataLabels=legends, linestyles = [linestyles[inferencer_name]] * len(ys), #cs = colors, cs = [ACC_COLOR, colors[1]], xtickRotate=xtickRotate, markers=['.', None], markevery=4, markersize=4, ) """ # transfer if len(source_buildings) == 1 or inferencer_name == 'arka': continue source_building = source_buildings[1] xs = [] ys = [] xss = [] accs = [] mf1s = [] for i in range(0, 1): #for i in range(0, EXP_NUM): #TODO: with open('result/allentities_transfer_{0}_{1}_{2}_{3}.json' .format(inferencer_name, target_building, source_building, i)) as fp: data = json.load(fp) xss.append([datum['learning_srcids'] for datum in data]) accs.append([datum['metrics']['accuracy'] for datum in data]) mf1s.append([datum['metrics']['macrof1-all'] for datum in data]) interp_x = list(range(10, min(250, max([max(xs) for xs in xss]) + 5), 5)) acc = average_data(xss, accs, interp_x) mf1 = average_data(xss, mf1s, interp_x) x = interp_x ys = [acc, mf1] legends = None title = building_anon_map[target_building] _, plots = plotter.plot_multiple_2dline( x, ys, xlabel, ylabel, xticks, xticks_labels, yticks, yticks_labels, title, ax, fig, ylim=ylim, xlim=xlim, dataLabels=legends, linestyles = [linestyles[inferencer_name]] * len(ys), #cs = colors, cs = [ACC_COLOR, colors[1]], xtickRotate=xtickRotate, markers=['x', '|'], markevery=4, markersize=4, ) """ fig.set_size_inches((4,2)) for ax in axes: ax.grid(True) ax.set_ylim(ylim) for i in range(1,len(target_sources)): axes[i].set_yticklabels([]) axes[i].set_ylabel('') for i in range(0,len(target_sources)): ax = axes[i] ax.tick_params(axis='x', pad=-1.5) if i != 0: ax.set_xlabel('') else: ax.xaxis.set_label_coords(1.05, -0.2) save_fig(fig, outputfile)
def plot_pointonly_transfer(): target_sources = [('ebu3b', 'ap_m'), ('ebu3b', 'sdh'), ('sdh', 'ebu3b'), ] EXP_NUM = 4 outputfile = FIG_DIR + '/pointonly_transfer.pdf' #inferencer_names = ['zodiac', 'al_hong', 'scrabble'] inferencer_names = ['zodiac', 'al_hong', 'scrabble'] fig, axes = plt.subplots(1, len(target_sources)) xticks = [0, 10] + list(range(50, 251, 50)) xticks_labels = [''] + [str(n) for n in xticks[1:]] yticks = range(0,101,20) yticks_labels = [str(n) for n in yticks] #xlim = (-5, xticks[-1]+5) #ylim = (yticks[0]-2, yticks[-1]+5) xlim = (0, xticks[-1]) ylim = (yticks[0], yticks[-1]) #interp_x = list(range(10, 250, 5)) for ax_num, (ax, (target_building, source_building)) \ in enumerate(zip(axes, target_sources)): # subfigure per building xlabel = '# of Samples' ylabel = 'Metric (%)' title = '{0} -> {1}'.format(building_anon_map[source_building], building_anon_map[target_building]) linestyles = deepcopy(LINESTYLES) for inferencer_name in inferencer_names: xs = [] ys = [] xss = [] f1s = [] mf1s = [] if inferencer_name == 'scrabble' and target_building == 'ebu3b' and source_building == 'ap_m': #TODO Update this once finished exp_num = 1 elif inferencer_name == 'scrabble' and target_building == 'sdh' and source_building == 'ebu3b': exp_num = 2 else: exp_num = EXP_NUM for i in range(0, exp_num): with open('result/pointonly_transfer_{0}_{1}_{2}_{3}.json' .format(inferencer_name, target_building, source_building, i)) as fp: data = json.load(fp) xss.append([datum['learning_srcids'] for datum in data]) if inferencer_name == 'al_hong': f1s.append([datum['metrics']['f1_micro'] for datum in data]) mf1s.append([datum['metrics']['f1_macro'] for datum in data]) else: f1s.append([datum['metrics']['f1'] for datum in data]) mf1s.append([datum['metrics']['macrof1'] for datum in data]) xs = xss[0] # Assuming all xss are same. #if inferencer_name == 'scrabble': # xs = [x - 200 for x in xs] # xss[0] = xs # xss[1] = xs interp_x = list(range(10, min(250, max([max(xs) for xs in xss]) + 5), 5)) f1 = average_data(xss, f1s, interp_x) mf1 = average_data(xss, mf1s, interp_x) x = interp_x ys = [f1, mf1] #if ax_num == 0: if False: legends = ['MicroF1, {0}'.format(inferencer_name), 'MacroF1, {0}'.format(inferencer_name) ] else: #data_labels = None legends = None xtickRotate = 45 _, plots = plotter.plot_multiple_2dline( x, ys, xlabel, ylabel, xticks, xticks_labels, yticks, yticks_labels, title, ax, fig, ylim, xlim, legends, linestyles=[linestyles.pop()]*len(ys), cs=colors, xtickRotate=xtickRotate) for ax in axes: ax.grid(True) for i in range(1,len(target_sources)): axes[i].set_yticklabels([]) axes[i].set_ylabel('') for i in range(0,len(target_sources)): ax = axes[i] ax.tick_params(axis='x', pad=-1.5) if i != 1: ax.set_xlabel('') else: ax.xaxis.set_label_coords(0.5, -0.2) #axes[0].legend(bbox_to_anchor=(6, 0.8), ncol=1, frameon=False) fig.set_size_inches((6,2)) save_fig(fig, outputfile)
def plot_scrabble(): buildings = ['ebu3b', 'uva_cse', 'sdh', 'ghc'] #buildings = ['sdh', 'ebu3b'] outputfile = FIG_DIR + '/pointonly_notransfer.pdf' fig, axes = plt.subplots(1, len(buildings)) xticks = [0, 10] + list(range(50, 251, 50)) xticks_labels = [''] + [str(n) for n in xticks[1:]] yticks = range(0,101,20) yticks_labels = [str(n) for n in yticks] xlim = (-5, xticks[-1]+5) ylim = (yticks[0]-2, yticks[-1]+5) interp_x = list(range(10, 250, 5)) for ax_num, (ax, building) in enumerate(zip(axes, buildings)): # subfigure per building xlabel = '# of Examples' ylabel = 'Score (%)' title = building_anon_map[building] linestyles = deepcopy(LINESTYLES) for inferencer_name in inferencer_names: if building == 'uva_cse' and inferencer_name == 'scrabble': continue xs = [] ys = [] xss = [] f1s = [] mf1s = [] for i in range(0, EXP_NUM): with open('result/pointonly_notransfer_{0}_{1}_{2}.json' .format(inferencer_name, building, i)) as fp: data = json.load(fp) xss.append([datum['learning_srcids'] for datum in data]) if inferencer_name == 'al_hong': f1s.append([datum['metrics']['f1_micro'] for datum in data]) mf1s.append([datum['metrics']['f1_macro'] for datum in data]) else: f1s.append([datum['metrics']['f1'] for datum in data]) mf1s.append([datum['metrics']['macrof1'] for datum in data]) xs = xss[0] # Assuming all xss are same. f1 = average_data(xss, f1s, interp_x) mf1 = average_data(xss, mf1s, interp_x) x = interp_x ys = [f1, mf1] if ax_num == 0: #data_labels = ['Baseline Acc w/o $B_s$', # 'Baseline M-$F_1$ w/o $B_s$'] legends = ['MicroF1, {0}'.format(inferencer_name), 'MacroF1, {0}'.format(inferencer_name) ] else: #data_labels = None legends = None _, plots = plotter.plot_multiple_2dline( x, ys, xlabel, ylabel, xticks, xticks_labels, yticks, yticks_labels, title, ax, fig, ylim, xlim, legends, linestyles=[linestyles.pop()]*len(ys), cs=colors) for ax in axes: ax.grid(True) for i in range(1,len(buildings)): axes[i].set_yticklabels([]) axes[i].set_ylabel('') for i in range(0,len(buildings)): if i != 1: axes[i].set_xlabel('') axes[0].legend(bbox_to_anchor=(3.2, 1.5), ncol=3, frameon=False) fig.set_size_inches((8,2)) save_fig(fig, outputfile)
def crf_entity_result_dep(): building_sets = [('ebu3b', 'ap_m'), ('ap_m', 'bml'), ('ebu3b', 'ghc'), ('ghc', 'ebu3b'), ('ebu3b', 'bml', 'ap_m')] ### TODO TODO: this should be changed to use ebu3b,ap_m -> bml #building_sets = [('ebu3b', 'ghc'), ('ebu3b', 'ghc')] #building_sets = [('ap_m',), ('bml',), # ('ghc',), ('ebu3b',), ('ap_m',)] ### TODO TODO: this should be changed to use ebu3b,ap_m -> bml fig, axes = plt.subplots(1, len(building_sets)) with open('result/baseline.json', 'r') as fp: baseline_results = json.load(fp) cs = ['firebrick', 'deepskyblue'] plot_list = list() acc_better_list = [] mf1_better_list = [] comp_xs = [10, 50, 150] for i, (ax, buildings) in enumerate(zip(axes, building_sets)): print(i) # Config ylim = (-2, 105) xlim = (-2, 205) # Baseline with source result = baseline_results[str(buildings)] init_ns = result['ns'] sample_numbers = result['sample_numbers'] baseline_acc = result['avg_acc'] std_acc = result['std_acc'] baseline_mf1 = result['avg_mf1'] std_mf1 = result['std_mf1'] xlabel = '# Target Building Examples' ys = [baseline_acc, baseline_mf1] baseline_x = sample_numbers #xtick = sample_numbers #xtick_labels = [str(no) for no in sample_numbers] #xtick = [0] + [5] + xtick[1:] xtick = [10] + list(range(40, 205, 40)) #xtick = list(range(0, 205, 40)) xtick_labels = [str(n) for n in xtick] ytick = list(range(0, 105, 20)) ytick_labels = [str(no) for no in ytick] ylabel = 'Score (%)' ylabel_flag = False linestyles = [':', ':'] if i == 2: data_labels = ['Baseline Acc w/ $B_s$', 'Baseline M-$F_1$ w/ $B_s$'] else: data_labels = None title = anon_building_dict[buildings[0]] for building in buildings[1:-1]: title += ',{0}'.format(anon_building_dict[building]) title += '$\\Rightarrow${0}'.format(anon_building_dict[buildings[-1]]) lw = 1.2 _, plot = plotter.plot_multiple_2dline(baseline_x, ys, xlabel, ylabel, xtick, xtick_labels, ytick, ytick_labels, title, ax, fig, ylim, xlim, data_labels, 0, linestyles, cs, lw) plot_list.append(plot) # Baseline without source result = baseline_results[str((list(buildings)[-1],))] init_ns = result['ns'] sample_numbers = result['sample_numbers'] avg_acc = result['avg_acc'] std_acc = result['std_acc'] avg_mf1 = result['avg_mf1'] std_mf1 = result['std_mf1'] xlabel = '# Target Building Examples' ys = [avg_acc, avg_mf1] x = sample_numbers #xtick = sample_numbers #xtick_labels = [str(no) for no in sample_numbers] #xtick = list(range(0, 205, 40)) #xtick_labels = [str(n) for n in xtick] ytick = list(range(0, 105, 20)) ytick_labels = [str(no) for no in ytick] ylabel = 'Score (%)' ylabel_flag = False linestyles = ['-.', '-.'] if i == 2: data_labels = ['Baseline Acc w/o $B_s$', 'Baseline M-$F_1$ w/o $B_s$'] else: data_labels = None title = anon_building_dict[buildings[0]] for building in buildings[1:-1]: title += ',{0}'.format(anon_building_dict[building]) title += '$\\Rightarrow${0}'.format(anon_building_dict[buildings[-1]]) lw = 1.2 _, plot = plotter.plot_multiple_2dline(x, ys, xlabel, ylabel, xtick, xtick_labels, ytick, ytick_labels, title, ax, fig, ylim, xlim, data_labels, 0, linestyles, cs, lw) plot_list.append(plot) # Scrabble without source buildingfix = ''.join([buildings[-1]] * 2) filename = 'result/crf_entity_iter_{0}_char2tagset_iter_nosource1.json'\ .format(buildingfix) if not os.path.exists(filename): continue with open(filename, 'r') as fp: res = json.load(fp) source_num = 0 srcid_lens = [len(r['learning_srcids']) - source_num for r in res] accuracy = [r['result']['entity']['accuracy'] * 100 for r in res] mf1s = [r['result']['entity']['macro_f1'] * 100 for r in res] x = srcid_lens ys = [accuracy, mf1s] linestyles = ['--', '--'] if i == 2: data_labels = ['Scrabble Acc w/o $B_s$', 'Scrabble M-$F_1$ w/o $B_s$'] else: data_labels = None _, plot = plotter.plot_multiple_2dline(x, ys, xlabel, ylabel, xtick, xtick_labels, ytick, ytick_labels, title, ax, fig, ylim, xlim, data_labels, 0, linestyles, cs, lw) plot_list.append(plot) # Scrabble with source buildingfix = ''.join(list(buildings) + [buildings[-1]]) filename_template = 'result/crf_entity_iter_{0}_char2tagset_iter_{1}.json' x = range(10, 205, 10) x_cands = [] acc_cands = [] mf1_cands = [] for exp_num in range(0, 3): filename = filename_template.format(buildingfix, exp_num) if not os.path.exists(filename): continue with open(filename, 'r') as fp: res = json.load(fp) source_num = 200 * (len(buildings) - 1) x_cand = [len(r['learning_srcids']) - source_num for r in res] acc_cand = [r['result']['entity']['accuracy'] * 100 for r in res] mf1_cand = [r['result']['entity']['macro_f1'] * 100 for r in res] x_cands.append(x_cand) acc_cands.append(acc_cand) mf1_cands.append(mf1_cand) acc = lin_interpolated_avg(x, x_cands, acc_cands) mf1 = lin_interpolated_avg(x, x_cands, mf1_cands) ys = [acc, mf1] print(buildings) mf1_betters = [] acc_betters = [] for comp_x in comp_xs: try: comp_idx_target = x.index(comp_x) comp_idx_baseline = baseline_x.index(comp_x) acc_better = \ acc[comp_idx_target]/baseline_acc[comp_idx_baseline] - 1 mf1_better = \ mf1[comp_idx_target]/baseline_mf1[comp_idx_baseline] - 1 """ acc_better = \ acc[comp_idx_target] - baseline_acc[comp_idx_baseline] - 1 mf1_better = \ mf1[comp_idx_target] - baseline_mf1[comp_idx_baseline] - 1 """ mf1_betters.append(mf1_better) acc_betters.append(acc_better) print('srouce#: {0}'.format(comp_x)) print('Acc\t baseline: {0}\t scrbl: {1}\t better: {2}\t' .format( baseline_acc[comp_idx_baseline], acc[comp_idx_target], acc_better )) print('MF1\t baseline: {0}\t scrbl: {1}\t better: {2}\t' .format( baseline_mf1[comp_idx_baseline], mf1[comp_idx_target], mf1_better )) except: pdb.set_trace() mf1_better_list.append(mf1_betters) acc_better_list.append(acc_betters) linestyles = ['-', '-'] if i == 2: data_labels = ['Scrabble Acc w/ $B_s$', 'Scrabble M-$F_1$ w/ $B_s$'] else: data_labels = None _, plot = plotter.plot_multiple_2dline(x, ys, xlabel, ylabel, xtick, xtick_labels, ytick, ytick_labels, title, ax, fig, ylim, xlim, data_labels, 0, linestyles, cs, lw) plot_list.append(plot) if i == 2: ax.legend(bbox_to_anchor=(3.5, 1.53), ncol=4, frameon=False) #ax.legend(bbox_to_anchor=(3.2, 1.45), ncol=4, frameon=False) print('====================') print('Source nums: {0}'.format(comp_xs)) # pdb.set_trace() mf1_better_avgs = [np.mean(list(map(itemgetter(i), mf1_better_list))) for i, _ in enumerate(comp_xs)] acc_better_avgs = [np.mean(list(map(itemgetter(i), acc_better_list))) for i, _ in enumerate(comp_xs)] print('MF1 better in average, {0}'.format(mf1_better_avgs)) print('Acc better in average, {0}'.format(acc_better_avgs)) fig.set_size_inches(9, 1.5) for ax in axes: ax.grid(True) for i in range(1,len(building_sets)): axes[i].set_yticklabels([]) axes[i].set_ylabel('') for i in range(0,len(building_sets)): if i != 2: axes[i].set_xlabel('') #legends_list = ['Baseline A', 'Baseline MF'] #axes[2].legend(loc='best', legends_list) save_fig(fig, 'figs/crf_entity.pdf') subprocess.call('./send_figures')
def crf_result_acc(): #source_target_list = [('ebu3b', 'ap_m'), ('ebu3b', 'ap_m')] source_target_list = [('ebu3b', 'ap_m'), ('ghc', 'ebu3b')] #n_list_list = [#[(1000, 0), (1000,5), (1000,20), (1000,50), (1000,100), (1000, 150), (1000,200)], # [(200, 0), (200,5), (200,20), (200,50), (200,100), (200, 150), (200,200)], # [(0,5), (0,20), (0,50), (0,100), (0,150), (0,200)]] char_precs_list = list() phrase_f1s_list = list() #fig, ax = plt.subplots(1, 1) fig, axes = plt.subplots(1, len(source_target_list)) if isinstance(axes, Axes): axes = [axes] fig.set_size_inches(4, 1.5) cs = ['firebrick', 'deepskyblue'] filename_template = 'result/crf_iter_{0}_char2ir_iter_{1}.json' n_s_list = [1000, 200, 0] for i, (ax, (source, target)) in enumerate(zip(axes, source_target_list)): linestyles = ['--', '-.', '-'] plot_list = list() legends_list = list() for n_s in n_s_list: if n_s == 0: buildingfix = ''.join([target, target]) else: buildingfix = ''.join([source, target, target]) n = n_s + 0 xs = [5] + list(range(10, 201, 10)) x_cands = [] f1_cands = [] mf1_cands = [] for exp_num in range(0, 5): nfix = n + exp_num filename = filename_template.format(buildingfix, nfix) if not os.path.exists(filename): pdb.set_trace() continue with open(filename, 'r') as fp: data = json.load(fp) x_cand = [ len(datum['learning_srcids']) - n_s for datum in data ] f1_cand = [] for datum in data: prec = datum['result']['crf']['phrase_precision'] * 100 rec = datum['result']['crf']['phrase_recall'] * 100 f1 = 2 * prec * rec / (prec + rec) f1_cand.append(f1) mf1_cand = [ datum['result']['crf']['phrase_macro_f1'] * 100 for datum in data ] x_cands.append(x_cand) f1_cands.append(f1_cand) mf1_cands.append(mf1_cand) f1s = lin_interpolated_avg(xs, x_cands, f1_cands) mf1s = lin_interpolated_avg(xs, x_cands, mf1_cands) ys = [f1s] #, mf1s] # Print curr result if n_s == 200 or n_s == 0: print('=======') print(source, target, n_s) print('init F1: {0}'.format(f1s[0])) print('init MF1: {0}'.format(mf1s[0])) print('=======') xlabel = None ylabel = 'Score (%)' xtick = [5] + list(range(40, 205, 40)) xtick_labels = [str(n) for n in xtick] ytick = range(0, 101, 20) ytick_labels = [str(n) for n in ytick] xlim = (-5, xtick[-1] + 5) ylim = (ytick[0] - 2, ytick[-1] + 5) if i == 0: legends = [ '#$B_S$:{0}'.format(n_s), #'#$B_S$:{0}'.format(n_s), ] else: legends = None title = None _, plots = plotter.plot_multiple_2dline(xs, ys, xlabel, ylabel, xtick,\ xtick_labels, ytick, ytick_labels, title, ax, fig, \ ylim, xlim, legends, xtickRotate=0, \ linestyles=[linestyles.pop()]*len(ys), cs=cs) text = '{0} $\\Rightarrow$ {1}'.format(\ anon_building_dict[source], anon_building_dict[target]) ax.text(0.8, 0.1, text, transform=ax.transAxes, ha='right', backgroundcolor='white') #, alpha=0) plot_list += plots axes[0].legend(bbox_to_anchor=(0.15, 0.96), ncol=3, frameon=False) for ax in axes: ax.grid(True) axes[1].set_yticklabels([]) axes[1].set_ylabel('') plt.text(0, 1.16, '$F_1$: \nMacro $F_1$: ', va='center', ha='center', transform=axes[0].transAxes) fig.text(0.5, -0.1, '# of Target Building Samples', ha='center') save_fig(fig, 'figs/crf_acc.pdf') subprocess.call('./send_figures')
def cls_comp_result(): source_target_list = ('ebu3b', 'ap_m') keys = ['best', 'ts', 'rf'] xs = list(range(10, 205, 10)) accuracy_dict = OrderedDict({ 'best': [ 89.809313820507768, 92.54815950011843, 94.820762260127921, 95.97224073086943, 96.084653841183666, 96.189745940212362, 96.621875740345899, 96.767353707652205, 97.25703698768065, 97.303271588486126, 97.563484660033183, 98.26716491945038, 97.689250918028904, 98.192926735370776, 98.38512052831085, 98.332192527621629, 98.393721664943683, 98.662756406459749, 98.887643256929636, 98.967675573027705 ], 'ts': [ 0.8939772861881065, 0.8923213679976736, 0.9123210382072324, 0.9135980339105342, 0.9189532249466957, 0.9340140813788202, 0.9352186241411988, 0.9355258676853828, 0.9291091215997943, 0.9378608124876789, 0.9319247243221132, 0.949146448493464, 0.9489394545131488, 0.9502468717020965, 0.9567056828950493, 0.9472988480217964, 0.9615234837716184, 0.966066986496091, 0.9657838041933192, 0.9655206112295668 ], 'rf': [ 0.806640902629711, 0.8715051972281449, 0.8819351901208243, 0.8936811478322669, 0.9154761904761907, 0.9102993218431644, 0.9163216654821128, 0.9187111318407958, 0.9251769426676142, 0.9323353470741529, 0.9335880123193552, 0.9353082059938402, 0.9385065002369106, 0.9440912994551051, 0.9449197465055669, 0.9479770048566685, 0.95334636342099, 0.9520936981757874, 0.9534481609808099, 0.9574993336886989, 0.9613235903814261 ], }) mf1_dict = OrderedDict({ 'best': [ 49.278915576009666, 54.796766717693828, 62.58888234797125, 65.516750225788741, 68.292157713216596, 70.178737730933733, 72.269065905342927, 75.530080228774239, 79.910634234930825, 83.958759694464149, 86.604737828403415, 89.944532313205116, 89.509558650993768, 92.646954050881263, 92.840673983293001, 92.748649991145385, 93.127511989870385, 93.479568639265494, 94.246971132932828, 94.718836697647319 ], 'ts': [ 0.56653458779577659, 0.55708814049375366, 0.5937535218897827, 0.63466926766986798, 0.653458865790845, 0.64011173425185053, 0.67281122169885288, 0.68270291522350057, 0.72076990493532245, 0.71261982497230925, 0.70044729648937165, 0.77730251488642088, 0.76286044963642097, 0.79628750932789027, 0.81995259322192149, 0.81512563219291001, 0.83983065742402829, 0.85147624388541865, 0.85183408423723528, 0.85288622740244369 ], 'rf': [ 0.12250376794594604, 0.18942204544104752, 0.22171884155985688, 0.27069328069179505, 0.30405631973712544, 0.2984466141860372, 0.3205452968001699, 0.33832465365023096, 0.3891719868291194, 0.44145987155626004, 0.4629123930116906, 0.4960558419219113, 0.5335594108556089, 0.5915815154291774, 0.6430516639970087, 0.6950590411205589, 0.7300801879845085, 0.7553289202919391, 0.7856917033978976, 0.8454315647144195, 0.8931418245685142 ], }) for k, v in mf1_dict.items(): if k == 'best': mf1_dict[k] = [vvv / 100 for vvv in v[:len(xs)]] else: mf1_dict[k] = v[:len(xs)] for k, v in accuracy_dict.items(): if k == 'best': accuracy_dict[k] = [vvv / 100 for vvv in v[:len(xs)]] else: accuracy_dict[k] = v[:len(xs)] legends = ['OCC', 'OCC w/ TS', 'RF'] * 2 linestyles = ['-', ':', '-.'] * 2 cs = ['firebrick'] * len(keys) + ['deepskyblue'] * len(keys) fig, ax = plt.subplots(1, 1) fig.set_size_inches(3, 1.7) axes = [ax] mult = lambda x: x * 100 hundreder = lambda seq: list(map(mult, seq)) ys = list( map(hundreder, list(accuracy_dict.values()) + list(mf1_dict.values()))) #ys = [char_precs, phrase_f1s, char_macro_f1s, phrase_macro_f1s] xlabel = '# of Target Building Samples' ylabel = 'Score (%)' xtick = [10] + list(range(40, 205, 40)) xtick_labels = [str(n) for n in xtick] ytick = range(0, 101, 20) ytick_labels = [str(n) for n in ytick] xlim = (xtick[0] - 2, xtick[-1] + 5) ylim = (ytick[0] - 2, ytick[-1] + 5) title = None _, plots = plotter.plot_multiple_2dline(xs, ys, xlabel, ylabel, xtick,\ xtick_labels, ytick, ytick_labels, title, ax, fig, \ ylim, xlim, None , xtickRotate=0, \ linestyles=linestyles, cs=cs) #ax.legend(plots, legends, 'upper center', ncol=4 #legend_order = [0,4,1,5,2,3] legend_order = [0, 3, 1, 4, 2, 5] new_handles = [plots[i] for i in legend_order] new_legends = [legends[i] for i in legend_order] fig.legend(new_handles, new_legends, ncol=3, bbox_to_anchor=(0.15, 1.08, 0.8, 0.095), prop={'size': 7}, frameon=False) for ax in axes: ax.grid(True) plt.text(0.03, 1.135, 'Accuracy: \nMacro $F_1$: ', ha='center', va='center', transform=ax.transAxes, fontsize=7) save_fig(fig, 'figs/cls.pdf') subprocess.call('./send_figures')
def entity_iter_result(): source_target_list = [ ('ebu3b', 'ap_m'), #('ebu3b', 'ap_m'), #('ghc', 'ebu3b') ('ghc', 'ap_m') ] ts_flag = False eda_flag = False fig, axes = plt.subplots(1, len(source_target_list)) # axes = [ax] cs = ['firebrick', 'deepskyblue'] for i, (ax, (source, target)) in enumerate(zip(axes, source_target_list)): #filename_template = 'result/entity_iter_{0}_{1}2.json' filename_template = 'result/entity_iter_{0}_{1}{2}.json' prefixes = [(''.join([target] * 2), 'nosource_nosa'), (''.join([target] * 2), 'nosource_sa'), (''.join([source, target, target]), 'source_nosa'), (''.join([source, target, target]), 'source_sa')] linestyles = [':', '--', '-.', '-'] for linestyle, (buildingfix, optfix) in zip(linestyles, prefixes): sa_flag = 'X' if 'nosa' in optfix else 'O' src_flag = '0' if 'nosource' in optfix else '200' source_num = int(src_flag) """ filename = filename_template.format(buildingfix, optfix) if not os.path.exists(filename): continue with open(filename, 'r') as fp: data = json.load(fp)[1:] x_t = [len(set(datum['learning_srcids'])) - source_num for datum in data] accs = [val * 100 for val in data[-1]['accuracy_history']] mf1s = [val * 100 for val in data[-1]['macro_f1_history']] ys = [accs, mf1s] """ #if sa_flag == 'X' and src_flag == '0': # pdb.set_trace() x_t = range(10, 201, 10) acc_cands = [] mf1_cands = [] x_cands = [] for exp_num in range(1, 3): filename = filename_template.format(buildingfix, optfix, exp_num) if not os.path.exists(filename): continue with open(filename, 'r') as fp: #data = json.load(fp)[1:] data = json.load(fp) x = [ len(set(datum['learning_srcids'])) - source_num for datum in data[:-1] ] #if optfix == 'nosource_nosa': # pdb.set_trace() acc = [val * 100 for val in data[-1]['accuracy_history']] mf1 = [val * 100 for val in data[-1]['macro_f1_history']] x_cands.append(x) acc_cands.append(acc) mf1_cands.append(mf1) if len(x_cands) == 1: pdb.set_trace( ) # for debugging of not existing enough exp data mf1s = lin_interpolated_avg(x_t, x_cands, mf1_cands) accs = lin_interpolated_avg(x_t, x_cands, acc_cands) ys = [accs, mf1s] if optfix == 'source_sa': pdb.set_trace() xlabel = None ylabel = 'Score (%)' xtick = [10] + list(range(50, 205, 50)) xtick_labels = [str(n) for n in xtick] ytick = range(0, 102, 20) ytick_labels = [str(n) for n in ytick] ylim = (ytick[0] - 1, ytick[-1] + 2) if i == 0: legends = [ '{0},SA:{1}'.format(src_flag, sa_flag), '{0},SA:{1}'.format(src_flag, sa_flag) ] else: legends = None title = None plotter.plot_multiple_2dline(x_t, ys, xlabel, ylabel, xtick,\ xtick_labels, ytick, ytick_labels, title, ax,\ fig, ylim, None, legends, xtickRotate=0, \ linestyles=[linestyle]*len(ys), cs=cs) if optfix == 'sa_source': pdb.set_trace() for ax in axes: ax.grid(True) for ax, (source, target) in zip(axes, source_target_list): #ax.set_title('{0} $\Rightarrow$ {1}'.format( # anon_building_dict[source], anon_building_dict[target])) #ax.text(0.45, 0.2, '{0} $\Rightarrow$ {1}'.format( ax.text( 0.45, 0.2, '{0} $\Rightarrow$ {1}'.format(anon_building_dict[source], anon_building_dict[target]), fontsize=11, transform=ax.transAxes, #backgroundcolor='white' ) for i in range(1, len(source_target_list)): axes[i].set_yticklabels([]) axes[i].set_ylabel('') ax = axes[0] handles, labels = ax.get_legend_handles_labels() legend_order = [0, 1, 2, 3, 4, 5, 6, 7] new_handles = [handles[i] for i in legend_order] new_labels = [labels[i] for i in legend_order] ax.legend(new_handles, new_labels, bbox_to_anchor=(0.15, 0.96), ncol=4, frameon=False, handletextpad=0.15, columnspacing=0.7) #ax.legend(new_handles, new_labels, bbox_to_anchor=(0.23,1.35), ncol=3, frameon=False) plt.text(-0.0, 1.18, 'Accuracy: \nMacro $F_1$: ', ha='center', va='center', transform=ax.transAxes) fig.text(0.5, -0.1, '# of Target Building Samples', ha='center') for i, ax in enumerate(axes): if i != 0: ax.set_xlabel('') fig.set_size_inches(4.4, 1.5) save_fig(fig, 'figs/entity_iter.pdf') subprocess.call('./send_figures')