Esempio n. 1
0
def plot_scrabble_zodiac():
    EXP_NUM = 2
    building = 'ebu3b'
    outputfile = FIG_DIR + '/scrabble_zodiac.pdf'
    fig, ax = plt.subplots(1, 1)
    xticks, xticks_labels, yticks, yticks_labels, xlim, ylim, interp_x, \
        xlabel, ylabel, linestyles, xtickRotate = get_grid_params(
            ymin = 0, ymax = 35, ydelta = 5,
            xmin = 10, xmin2=50, xmax = 250, xdelta=50)
    ylabel = 'Count'
    # Baseline (Naive Zodiac)
    with open('result/scrabble_zodiac.json', 'r') as fp:
        res = json.load(fp)
    x = res['x']
    y = res['y']
    ys = [y]

    legends = ['# of fixed samples']
    title = building_anon_map['ebu3b']

    _, plots = plotter.plot_multiple_2dline(
        x, ys, xlabel, ylabel, xticks, xticks_labels,
        yticks, yticks_labels, title, ax, fig, ylim, xlim, legends,
        linestyles=[linestyles.pop()]*len(ys), cs=colors,
        xtickRotate=xtickRotate)

    ax.grid(True)
    ax.tick_params(axis='x', pad=-1.5)
    #ax.xaxis.set_label_coords(1.1, -0.2)

    ax.legend(bbox_to_anchor=(1.1, 1.35), ncol=1, frameon=False, fontsize='small')
    #fig.set_size_inches((8,2))
    fig.set_size_inches((1.5,1.7))
    save_fig(fig, outputfile)
Esempio n. 2
0
def plot_quiver_zodiac():
    EXP_NUM = 2
    building = 'ebu3b'
    outputfile = FIG_DIR + '/quiver_zodiac.pdf'
    fig, ax = plt.subplots(1, 1)
    xticks, xticks_labels, yticks, yticks_labels, xlim, ylim, interp_x, \
        xlabel, ylabel, linestyles, xtickRotate = get_grid_params()

    title = building_anon_map[building]

    # Baseline (Naive Zodiac)
    xs = []
    ys = []
    xss = []
    f1s = []
    mf1s = []
    for i in range(0, EXP_NUM):
        with open('result/pointonly_notransfer_zodiac_{0}_{1}.json'
                  .format(building, i)) as  fp:
            data = json.load(fp)
        xss.append([datum['learning_srcids'] for datum in data])
        f1s.append([datum['metrics']['f1'] for datum in data])
        mf1s.append([datum['metrics']['macrof1'] for datum in data])
    xs = xss[0] # Assuming all xss are same.
    f1 = average_data(xss, f1s, interp_x)
    mf1 = average_data(xss, mf1s, interp_x)
    x = interp_x
    ys = [f1, mf1]
    legends = ['MicroF1, {0}'.format('Zodiac'),
               'MacroF1, {0}'.format('Zodiac')
               ]

    _, plots = plotter.plot_multiple_2dline(
        x, ys, xlabel, ylabel, xticks, xticks_labels,
        yticks, yticks_labels, title, ax, fig, ylim, xlim, legends,
        linestyles=[linestyles.pop()]*len(ys), cs=colors,
        xtickRotate=xtickRotate)

    # Baseline (Naive Zodiac)
    xs = []
    ys = []
    xss = []
    f1s = []
    mf1s = []
    for i in range(0, EXP_NUM):
        with open('result/quiver_zodiac_{0}_{1}.json'
                  .format(building, i)) as  fp:
            data = json.load(fp)
        xss.append([datum['learning_srcids'] for datum in data])
        f1s.append([datum['metrics']['f1'] for datum in data])
        mf1s.append([datum['metrics']['macrof1'] for datum in data])
    xs = xss[0] # Assuming all xss are same.
    f1 = average_data(xss, f1s, interp_x)
    mf1 = average_data(xss, mf1s, interp_x)
    x = interp_x
    ys = [f1, mf1]
    legends = ['MicroF1, {0}'.format('Quiver/Zodiac'),
               'MacroF1, {0}'.format('Quiver/Zodiac')
               ]
    xtickRotate = 45

    _, plots = plotter.plot_multiple_2dline(
        x, ys, xlabel, ylabel, xticks, xticks_labels,
        yticks, yticks_labels, title, ax, fig,
        ylim=ylim, xlim=xlim,
        dataLabels=legends,
        linestyles=[linestyles.pop()]*len(ys), cs=colors,
        xtickRotate=xtickRotate)



    ax.grid(True)
    ax.tick_params(axis='x', pad=-1.5)
    #ax.xaxis.set_label_coords(1.1, -0.2)

    ax.legend(bbox_to_anchor=(1.26, 1.75), ncol=1, frameon=False, fontsize='small')
    #fig.set_size_inches((8,2))
    fig.set_size_inches((1.5,1.7))
    save_fig(fig, outputfile)
Esempio n. 3
0
def plot_pointonly_notransfer():
    EXP_NUM = 4
    inferencer_names = ['zodiac', 'al_hong', 'scrabble', 'arka']
    buildings = ['ebu3b', 'uva_cse', 'sdh', 'ghc']
    #buildings = ['sdh', 'ebu3b']
    outputfile = FIG_DIR + '/pointonly_notransfer.pdf'
    linestyle_dict = {
        'arka': ':',
        'scrabble': '--'
    }

    fig, axes = plt.subplots(1, len(buildings))
    xticks = [0, 10] + list(range(50, 251, 50))
    xticks_labels = [''] + [str(n) for n in xticks[1:]]
    yticks = range(0,101,20)
    yticks_labels = [str(n) for n in yticks]
    xlim = (0, xticks[-1])
    #xlim = (-5, xticks[-1]+5)
    ylim = (yticks[0], yticks[-1])
    #interp_x = list(range(10, 250, 5))
    for ax_num, (ax, building) in enumerate(zip(axes, buildings)): # subfigure per building
        xlabel = '# of Samples'
        ylabel = 'Metric (%)'
        title = building_anon_map[building]
        linestyles = deepcopy(LINESTYLES)
        for inferencer_name in inferencer_names:
            if building == 'uva_cse' and inferencer_name == 'scrabble':
                continue
            if inferencer_name == 'arka':
                if building != 'sdh':
                    continue
            if inferencer_name == 'arka':
                exp_num = 1
            else:
                exp_num = EXP_NUM

            xs = []
            ys = []
            xss = []
            f1s = []
            mf1s = []
            for i in range(0, exp_num):
                with open('result/pointonly_notransfer_{0}_{1}_{2}.json'
                          .format(inferencer_name, building, i)) as  fp:
                    data = json.load(fp)
                xss.append([datum['learning_srcids'] for datum in data])
                if inferencer_name == 'al_hong':
                    f1s.append([datum['metrics']['f1_micro'] for datum in data])
                    mf1s.append([datum['metrics']['f1_macro'] for datum in data])
                else:
                    f1s.append([datum['metrics']['f1'] for datum in data])
                    mf1s.append([datum['metrics']['macrof1'] for datum in data])
            interp_x = list(range(10,
                                  min(250, max([max(xs) for xs in xss]) + 5),
                                  5))
            f1 = average_data(xss, f1s, interp_x)
            mf1 = average_data(xss, mf1s, interp_x)
            x = interp_x
            ys = [f1, mf1]
            if ax_num == 2:
                legends = [
                    'micro-F1, {0}'.format(
                        inferencer_display_names[inferencer_name]),
                    'Macro-F1, {0}'.format(
                        inferencer_display_names[inferencer_name])
                ]
                #if inferencer_name == 'scrabble':
                #    legends.append('Accuracy, {0}'.format(inferencer_name))
            else:
                #data_labels = None
                legends = None
            xtickRotate = 45

            _, plots = plotter.plot_multiple_2dline(
                x, ys, xlabel, ylabel, xticks, xticks_labels,
                yticks, yticks_labels, title, ax, fig, ylim, xlim, legends,
                linestyles=[linestyles.pop()]*len(ys), cs=colors,
                xtickRotate=xtickRotate)
            if ax_num == 2 and inferencer_name in ['scrabble', 'arka']:
                _, plots = plotter.plot_multiple_2dline(
                    [-10], [[-10]], xlabel, ylabel, xticks, xticks_labels,
                    yticks, yticks_labels, title, ax, fig, ylim, xlim,
                    ['Accuracy, {0}'.format(
                        inferencer_display_names[inferencer_name])],
                    linestyles=[linestyle_dict[inferencer_name]], cs=[ACC_COLOR],
                    xtickRotate=xtickRotate,
                    markers=['.'],
                    markevery=4,
                    markersize=4,
                )
    for ax in axes:
        ax.grid(True)
    for i in range(1,len(buildings)):
        axes[i].set_yticklabels([])
        axes[i].set_ylabel('')
    for i in range(0,len(buildings)):
        ax = axes[i]
        ax.tick_params(axis='x', pad=-1.5)
        if i != 1:
            ax.set_xlabel('')
        else:
            ax.xaxis.set_label_coords(1.1, -0.2)

    axes[2].legend(bbox_to_anchor=(3.9, 1.15), ncol=1, frameon=False)
    #axes[0].legend(bbox_to_anchor=(4.3, 1.5), ncol=3, frameon=False)
    fig.set_size_inches((8.5,2))
    save_fig(fig, outputfile)
Esempio n. 4
0
def plot_entities():
    EXP_NUM = 4
    outputfile = FIG_DIR + '/entities.pdf'
    inferencer_names = ['scrabble', 'arka']
    xticks, xticks_labels, yticks, yticks_labels, xlim, ylim, interp_x, \
        xlabel, ylabel, base_linestyles, xtickRotate = get_grid_params(40,100,20)

    target_sources = [
        ('ebu3b', [None, 'ap_m']),
        ('sdh', [None, 'ebu3b']),
        #('sdh', None),
        #('ebu3b', 'ap_m')
    ]
    fig, axes = plt.subplots(1, len(target_sources))

    linestyles = {
        'scrabble': '--',
        'arka': ':'
    }

    for ax_num, (ax, (target_building, source_buildings)) \
            in enumerate(zip(axes, target_sources)):
        for inferencer_name in inferencer_names:
            if inferencer_name == 'arka':
                #continue #TODO: disable this once making the result work
                if target_building != 'sdh':
                    continue
                exp_num = 1
            elif inferencer_name == 'scrabble':
                exp_num = 2
            else:
                exp_num = EXP_NUM
            # Notransfer
            xs = []
            ys = []
            xss = []
            accs = []
            mf1s = []
            for i in range(0, exp_num):
                with open('result/allentities_notransfer_{0}_{1}_{2}.json'
                          .format(inferencer_name, target_building, i)) as  fp:
                    data = json.load(fp)
                xss.append([datum['learning_srcids'] for datum in data])
                accs.append([datum['metrics']['accuracy'] for datum in data])
                mf1s.append([datum['metrics']['macrof1-all'] for datum in data])
            interp_x = list(range(10,
                                  min(250, max([max(xs) for xs in xss]) + 5),
                                  5))
            acc = average_data(xss, accs, interp_x)
            mf1 = average_data(xss, mf1s, interp_x)
            x = interp_x
            ys = [acc, mf1]
            legends = None
            title = building_anon_map[target_building]
            _, plots = plotter.plot_multiple_2dline(
                x, ys, xlabel, ylabel, xticks, xticks_labels,
                yticks, yticks_labels, title, ax, fig,
                ylim=ylim, xlim=xlim,
                dataLabels=legends,
                linestyles = [linestyles[inferencer_name]] * len(ys),
                #cs = colors,
                cs = [ACC_COLOR, colors[1]],
                xtickRotate=xtickRotate,
                markers=['.', None],
                markevery=4,
                markersize=4,
            )

            """
            # transfer
            if len(source_buildings) == 1 or inferencer_name == 'arka':
                continue

            source_building = source_buildings[1]

            xs = []
            ys = []
            xss = []
            accs = []
            mf1s = []
            for i in range(0, 1):
            #for i in range(0, EXP_NUM): #TODO:
                with open('result/allentities_transfer_{0}_{1}_{2}_{3}.json'
                          .format(inferencer_name,
                                  target_building,
                                  source_building,
                                  i)) as  fp:
                    data = json.load(fp)
                xss.append([datum['learning_srcids'] for datum in data])
                accs.append([datum['metrics']['accuracy'] for datum in data])
                mf1s.append([datum['metrics']['macrof1-all'] for datum in data])
            interp_x = list(range(10,
                                  min(250, max([max(xs) for xs in xss]) + 5),
                                  5))
            acc = average_data(xss, accs, interp_x)
            mf1 = average_data(xss, mf1s, interp_x)
            x = interp_x
            ys = [acc, mf1]
            legends = None
            title = building_anon_map[target_building]
            _, plots = plotter.plot_multiple_2dline(
                x, ys, xlabel, ylabel, xticks, xticks_labels,
                yticks, yticks_labels, title, ax, fig,
                ylim=ylim, xlim=xlim,
                dataLabels=legends,
                linestyles = [linestyles[inferencer_name]] * len(ys),
                #cs = colors,
                cs = [ACC_COLOR, colors[1]],
                xtickRotate=xtickRotate,
                markers=['x', '|'],
                markevery=4,
                markersize=4,
            )
            """

    fig.set_size_inches((4,2))
    for ax in axes:
        ax.grid(True)
        ax.set_ylim(ylim)
    for i in range(1,len(target_sources)):
        axes[i].set_yticklabels([])
        axes[i].set_ylabel('')
    for i in range(0,len(target_sources)):
        ax = axes[i]
        ax.tick_params(axis='x', pad=-1.5)
        if i != 0:
            ax.set_xlabel('')
        else:
            ax.xaxis.set_label_coords(1.05, -0.2)
    save_fig(fig, outputfile)
Esempio n. 5
0
def plot_pointonly_transfer():
    target_sources = [('ebu3b', 'ap_m'),
                      ('ebu3b', 'sdh'),
                      ('sdh', 'ebu3b'),
                      ]
    EXP_NUM = 4
    outputfile = FIG_DIR + '/pointonly_transfer.pdf'
    #inferencer_names = ['zodiac', 'al_hong', 'scrabble']
    inferencer_names = ['zodiac', 'al_hong', 'scrabble']

    fig, axes = plt.subplots(1, len(target_sources))
    xticks = [0, 10] + list(range(50, 251, 50))
    xticks_labels = [''] + [str(n) for n in xticks[1:]]
    yticks = range(0,101,20)
    yticks_labels = [str(n) for n in yticks]
    #xlim = (-5, xticks[-1]+5)
    #ylim = (yticks[0]-2, yticks[-1]+5)
    xlim = (0, xticks[-1])
    ylim = (yticks[0], yticks[-1])
    #interp_x = list(range(10, 250, 5))
    for ax_num, (ax, (target_building, source_building)) \
            in enumerate(zip(axes, target_sources)): # subfigure per building
        xlabel = '# of Samples'
        ylabel = 'Metric (%)'
        title = '{0} -> {1}'.format(building_anon_map[source_building],
                                    building_anon_map[target_building])
        linestyles = deepcopy(LINESTYLES)
        for inferencer_name in inferencer_names:
            xs = []
            ys = []
            xss = []
            f1s = []
            mf1s = []
            if inferencer_name == 'scrabble' and target_building == 'ebu3b' and source_building == 'ap_m':
                #TODO Update this once finished
                exp_num = 1
            elif inferencer_name == 'scrabble' and target_building == 'sdh' and source_building == 'ebu3b':
                exp_num = 2
            else:
                exp_num = EXP_NUM

            for i in range(0, exp_num):
                with open('result/pointonly_transfer_{0}_{1}_{2}_{3}.json'
                          .format(inferencer_name, target_building,
                                  source_building, i)) as  fp:
                    data = json.load(fp)
                xss.append([datum['learning_srcids'] for datum in data])
                if inferencer_name == 'al_hong':
                    f1s.append([datum['metrics']['f1_micro'] for datum in data])
                    mf1s.append([datum['metrics']['f1_macro'] for datum in data])
                else:
                    f1s.append([datum['metrics']['f1'] for datum in data])
                    mf1s.append([datum['metrics']['macrof1'] for datum in data])
            xs = xss[0] # Assuming all xss are same.
            #if inferencer_name == 'scrabble':
            #    xs = [x - 200 for x in xs]
            #    xss[0] = xs
            #    xss[1] = xs
            interp_x = list(range(10,
                                  min(250, max([max(xs) for xs in xss]) + 5),
                                  5))
            f1 = average_data(xss, f1s, interp_x)
            mf1 = average_data(xss, mf1s, interp_x)
            x = interp_x
            ys = [f1, mf1]
            #if ax_num == 0:
            if False:
                legends = ['MicroF1, {0}'.format(inferencer_name),
                           'MacroF1, {0}'.format(inferencer_name)
                           ]
            else:
                #data_labels = None
                legends = None

            xtickRotate = 45
            _, plots = plotter.plot_multiple_2dline(
                x, ys, xlabel, ylabel, xticks, xticks_labels,
                yticks, yticks_labels, title, ax, fig, ylim, xlim, legends,
                linestyles=[linestyles.pop()]*len(ys), cs=colors,
                xtickRotate=xtickRotate)

    for ax in axes:
        ax.grid(True)
    for i in range(1,len(target_sources)):
        axes[i].set_yticklabels([])
        axes[i].set_ylabel('')
    for i in range(0,len(target_sources)):
        ax = axes[i]
        ax.tick_params(axis='x', pad=-1.5)
        if i != 1:
            ax.set_xlabel('')
        else:
            ax.xaxis.set_label_coords(0.5, -0.2)

    #axes[0].legend(bbox_to_anchor=(6, 0.8), ncol=1, frameon=False)
    fig.set_size_inches((6,2))
    save_fig(fig, outputfile)
Esempio n. 6
0
def cls_comp_result():
    source_target_list = ('ebu3b', 'ap_m')
    keys = ['best', 'ts', 'rf', 'svc']
    xs = list(range(5, 205, 20))
    accuracy_dict = OrderedDict({
        'best': [
            0.8631033290671848, 0.9024136840401907, 0.9233413507509902,
            0.9500121364579196, 0.9527101078305895, 0.9650918693087369,
            0.9677129764479163, 0.9593822175147483, 0.9711269988378419,
            0.9697809553231241
        ],
        'ts': [
            0.8713471602025806, 0.9166264458433141, 0.9185595580405604,
            0.9428053539499326, 0.9417577736854855, 0.9573296850405294,
            0.9489047766156204, 0.9534092413610498, 0.953262734588037,
            0.9595308306151684
        ],
        'rf': [
            0.756387822351681, 0.854248495814764, 0.8465179398914331,
            0.859092781381938, 0.9137193462494689, 0.9384494020036196,
            0.9460637421480792, 0.9512496873942656, 0.9582711799579264,
            0.9597065919355077
        ],
        'svc': [
            0.7210336660658784, 0.8278964869103078, 0.8371634459716821,
            0.8901948134091584, 0.9289625735354351, 0.9062837090984304,
            0.9072457164626379, 0.9094597402145658, 0.9061470144946531,
            0.9317219571018263
        ]
    })
    mf1_dict = OrderedDict({
        'best': [
            0.43460544517064525, 0.46207967166726716, 0.60572075680286364,
            0.65253670730553948, 0.71164857967833528, 0.77075401369085861,
            0.77409145497551546, 0.78223293415400674, 0.79434165930991263,
            0.78765666427863568
        ],
        'ts': [
            0.38456663841099153, 0.47135950957306999, 0.50801383768831809,
            0.58379558680943822, 0.61765049559624907, 0.67617354377548211,
            0.66706236361751792, 0.70816840695824457, 0.68736126966336153,
            0.70501274992734486
        ],
        'rf': [
            0.094018355593671443, 0.21622362914898177, 0.2939715246436253,
            0.38083088857608816, 0.45237091518218492, 0.51912845475805691,
            0.56752106411334313, 0.6314794515347395, 0.73066778675441313,
            0.81505177770253923
        ],
        'svc': [
            0.19122967879394315, 0.2501766458806039, 0.27629897715774632,
            0.31374977389303144, 0.35811497520318963, 0.36814352938387473,
            0.37145631338451729, 0.38910680891542943, 0.35511959588962361,
            0.39688667191674587
        ]
    })
    legends = ['SCRBL', 'w/ TS', 'RF', 'w/ SVC'] * 2
    linestyles = ['-', ':', '-.', '--'] * 2
    cs = ['firebrick'] * len(keys) + ['deepskyblue'] * len(keys)
    fig, ax = plt.subplots(1, 1)
    fig.set_size_inches(4, 1.7)
    axes = [ax]
    mult = lambda x: x * 100
    hundreder = lambda seq: list(map(mult, seq))
    ys = list(
        map(hundreder,
            list(accuracy_dict.values()) + list(mf1_dict.values())))
    #ys = [char_precs, phrase_f1s, char_macro_f1s, phrase_macro_f1s]
    xlabel = '# of Target Building Samples'
    ylabel = 'Score (%)'
    xtick = list(range(0, 205, 20))
    xtick_labels = [str(n) for n in xtick]
    ytick = range(0, 101, 20)
    ytick_labels = [str(n) for n in ytick]
    xlim = (xtick[0] - 2, xtick[-1] + 5)
    ylim = (ytick[0] - 2, ytick[-1] + 5)
    title = None
    _, plots = plotter.plot_multiple_2dline(xs, ys, xlabel, ylabel, xtick,\
                            xtick_labels, ytick, ytick_labels, title, ax, fig, \
                            ylim, xlim, None , xtickRotate=0, \
                            linestyles=linestyles, cs=cs)
    #ax.legend(plots, legends, 'upper center', ncol=4
    legend_order = [0, 4, 1, 5, 2, 6, 3, 7]
    new_handles = [plots[i] for i in legend_order]
    new_legends = [legends[i] for i in legend_order]
    fig.legend(new_handles,
               new_legends,
               ncol=4,
               bbox_to_anchor=(-0.1, 1.04, 1, 0.095),
               prop={'size': 7},
               frameon=False)
    for ax in axes:
        ax.grid(True)
    plt.text(0.03,
             1.135,
             'Accuracy: \nMacro $F_1$: ',
             ha='center',
             va='center',
             transform=ax.transAxes,
             fontsize=7)
    save_fig(fig, 'figs/cls.pdf')
    subprocess.call('./send_figures')
Esempio n. 7
0
def crf_result():
    source_target_list = [('ebu3b', 'bml'), ('ghc', 'ebu3b')]
    n_list_list = [[(1000, 0), (1000, 5), (1000, 20), (1000, 50), (1000, 100),
                    (1000, 150), (1000, 200)],
                   [(200, 0), (200, 5), (200, 20), (200, 50), (200, 100),
                    (200, 150), (200, 200)],
                   [(0, 5), (0, 20), (0, 50), (0, 100), (0, 150), (0, 200)]]
    char_precs_list = list()
    phrase_f1s_list = list()
    #fig, ax = plt.subplots(1, 1)
    fig, axes = plt.subplots(1, len(source_target_list))
    if isinstance(axes, Axes):
        axes = [axes]
    fig.set_size_inches(4, 1.5)
    cs = ['firebrick', 'deepskyblue']

    for i, (ax, (source, target)) in enumerate(zip(axes, source_target_list)):
        linestyles = ['--', '-.', '-']
        plot_list = list()
        legends_list = list()
        for n_list in n_list_list:
            target_n_list = [ns[1] for ns in n_list]
            phrase_f1s = list()
            char_macro_f1s = list()
            phrase_macro_f1s = list()
            #pess_phrase_f1s = list()
            char_precs = list()
            for (n_s, n_t) in n_list:
                if n_s == 0:
                    building_list = [target]
                    source_sample_num_list = [n_t]
                elif n_t == 0:
                    building_list = [source]
                    source_sample_num_list = [n_s]
                else:
                    building_list = [source, target]
                    source_sample_num_list = [n_s, n_t]
                result_query = {
                    'label_type': 'label',
                    'token_type': 'justseparate',
                    'use_cluster_flag': True,
                    'building_list': building_list,
                    'source_sample_num_list': source_sample_num_list,
                    'target_building': target
                }
                result = get_crf_results(result_query)
                try:
                    assert result
                except:
                    print(n_t)
                    pdb.set_trace()
                    continue
                    result = get_crf_results(result_query)
                char_prec = result['char_precision'] * 100
                char_precs.append(char_prec)
                phrase_recall = result['phrase_recall'] * 100
                phrase_prec = result['phrase_precision'] * 100
                phrase_f1 = 2* phrase_prec  * phrase_recall \
                                / (phrase_prec + phrase_recall)
                phrase_f1s.append(phrase_f1)
                char_macro_f1s.append(result['char_macro_f1'] * 100)
                phrase_macro_f1s.append(result['phrase_macro_f1'] * 100)
            xs = target_n_list
            ys = [phrase_f1s, phrase_macro_f1s]
            #ys = [char_precs, phrase_f1s, char_macro_f1s, phrase_macro_f1s]
            #xlabel = '# of Target Building Samples'
            xlabel = None
            ylabel = 'Score (%)'
            xtick = list(range(0, 205, 40))
            #xtick = [0] + [5] + xtick[1:]
            xtick_labels = [str(n) for n in xtick]
            ytick = range(0, 101, 20)
            ytick_labels = [str(n) for n in ytick]
            xlim = (xtick[0] - 2, xtick[-1] + 5)
            ylim = (ytick[0] - 2, ytick[-1] + 5)
            if i == 0:
                legends = [  #'#S:{0}, Char Prec'.format(n_s),
                    '#$B_S$:{0}'.format(n_s),
                    #'#S:{0}, Char MF1'.format(n_s),
                    '#$B_S$:{0}'.format(n_s),
                ]
            else:
                legends = None
#legends_list += legends
            title = None
            _, plots = plotter.plot_multiple_2dline(xs, ys, xlabel, ylabel, xtick,\
                             xtick_labels, ytick, ytick_labels, title, ax, fig, \
                             ylim, xlim, legends, xtickRotate=0, \
                             linestyles=[linestyles.pop()]*len(ys), cs=cs)
            text = '{0} $\\Rightarrow$ {1}'.format(\
                    anon_building_dict[source],
                    anon_building_dict[target])
            ax.text(0.8,
                    0.1,
                    text,
                    transform=ax.transAxes,
                    ha='right',
                    backgroundcolor='white')  #, alpha=0)
            plot_list += plots
            pdb.set_trace()


#fig.legend(plot_list, legends_list, 'upper center', ncol=3
#            , bbox_to_anchor=(0.5,1.3),frameon=False)
    axes[0].legend(bbox_to_anchor=(0.15, 0.96), ncol=3, frameon=False)
    for ax in axes:
        ax.grid(True)
    axes[1].set_yticklabels([])
    axes[1].set_ylabel('')
    plt.text(0,
             1.16,
             '$F_1$: \nMacro $F_1$: ',
             va='center',
             ha='center',
             transform=axes[0].transAxes)
    fig.text(0.5, -0.1, '# of Target Building Samples', ha='center')

    save_fig(fig, 'figs/crf.pdf')
    subprocess.call('./send_figures')
Esempio n. 8
0
def entity_result_deprecated():
    source_target_list = [('ebu3b', 'ap_m')]  #, ('ap_m', 'ebu3b')]
    n_list_list = [[(0, 5), (0, 50), (0, 100), (0, 150), (0, 200)],
                   [(200, 5), (200, 50), (200, 100), (0, 150), (200, 200)]]
    ts_flag = False
    eda_flag = False
    default_query = {
        'metadata.label_type': 'label',
        'metadata.token_type': 'justseparate',
        'metadata.use_cluster_flag': True,
        'metadata.building_list': [],
        'metadata.source_sample_num_list': [],
        'metadata.target_building': '',
        'metadata.ts_flag': ts_flag,
        'metadata.eda_flag': eda_flag,
        'metadata.use_brick_flag': True
    }
    query_list = [deepcopy(default_query),\
                 deepcopy(default_query),\
                 deepcopy(default_query)]
    query_list[0]['metadata.use_brick_flag'] = False
    query_list[0]['metadata.negative_flag'] = False
    query_list[1]['metadata.use_brick_flag'] = False
    query_list[1]['metadata.negative_flag'] = True
    query_list[2]['metadata.use_brick_flag'] = True
    query_list[2]['metadata.negative_flag'] = True
    char_precs_list = list()
    phrase_f1s_list = list()
    fig, axes = plt.subplots(1, 3)
    #axes = [ax]
    fig.set_size_inches(8, 5)
    #fig, axes = plt.subplots(1,len(n_list_list))

    for ax, (source, target) in zip(axes, source_target_list):
        for query in query_list:
            for n_list in n_list_list:
                target_n_list = [ns[1] for ns in n_list]
                subset_accuracy_list = list()
                accuracy_list = list()
                hierarchy_accuracy_list = list()
                weighted_f1_list = list()
                macro_f1_list = list()

                for (n_s, n_t) in n_list:
                    if n_s == 0:
                        building_list = [target]
                        source_sample_num_list = [n_t]
                    elif n_t == 0:
                        building_list = [source]
                        source_sample_num_list = [n_s]
                    else:
                        building_list = [source, target]
                        source_sample_num_list = [n_s, n_t]
                    query['metadata.building_list'] = building_list
                    query['metadata.source_sample_num_list'] = \
                            source_sample_num_list
                    query['metadata.target_building'] = target

                    result = get_entity_results(query)
                    try:
                        assert result
                    except:
                        print(n_t)
                        pdb.set_trace()
                        result = get_entity_results(query)
                    #point_precs = result['point_precision_history'][-1]
                    #point_recall = result['point_recall'][-1]
                    subset_accuracy_list.append(
                        result['subset_accuracy_history'][-1] * 100)
                    accuracy_list.append(result['accuracy_history'][-1] * 100)
                    hierarchy_accuracy_list.append(
                        result['hierarchy_accuracy_history'][-1] * 100)
                    weighted_f1_list.append(result['weighted_f1_history'][-1] *
                                            100)
                    macro_f1_list.append(result['macro_f1_history'][-1] * 100)

                xs = target_n_list
                ys = [hierarchy_accuracy_list, accuracy_list, macro_f1_list]
                xlabel = '# of Target Building Samples'
                ylabel = 'Score (%)'
                xtick = target_n_list
                xtick_labels = [str(n) for n in target_n_list]
                ytick = range(0, 102, 10)
                ytick_labels = [str(n) for n in ytick]
                ylim = (ytick[0] - 1, ytick[-1] + 2)
                legends = [
                    '{0}, SA:{1}'\
                    .format(n_s, query['metadata.use_brick_flag']),
                    '{0}, SA:{1}'\
                    .format(n_s, query['metadata.use_brick_flag']),
                    '{0}, SA:{1}'\
                    .format(n_s, query['metadata.use_brick_flag'])
                          ]
                title = None
                plotter.plot_multiple_2dline(xs, ys, xlabel, ylabel, xtick,\
                                 xtick_labels, ytick, ytick_labels, title, ax, fig, \
                                 ylim, legends)
                #plotter.plot_multiple_2dline(xs, [ys[1]], xlabel, ylabel, xtick,\
                #                 xtick_labels, ytick, ytick_labels, title, axes[1], fig, \
                #                 ylim, [legends[1]])
                #plotter.plot_multiple_2dline(xs, [ys[2]], xlabel, ylabel, xtick,\
                #                 xtick_labels, ytick, ytick_labels, title, axes[2], fig, \
                #                 ylim, [legends[2]])
                if not (query['metadata.negative_flag']
                        and query['metadata.use_brick_flag']):
                    break
    axes[0].set_title('Hierarchical Accuracy')
    axes[1].set_title('Accuracy')
    axes[2].set_title('Macro F1')
    suptitle = 'Multi Label (TagSets) Classification with a Source building.'
    fig.suptitle(suptitle)
    save_fig(fig, 'figs/entity.pdf')
Esempio n. 9
0
def crf_entity_result():
    building_sets = [
        ('ebu3b', 'ap_m'), ('ap_m', 'bml'), ('ebu3b', 'ghc'), ('ghc', 'ebu3b'),
        ('ebu3b', 'bml', 'ap_m')
    ]  ### TODO TODO: this should be changed to use ebu3b,ap_m -> bml
    fig, axes = plt.subplots(1, len(building_sets))
    with open('result/baseline.json', 'r') as fp:
        baseline_results = json.load(fp)

    cs = ['firebrick', 'deepskyblue']
    plot_list = list()

    for i, (ax, buildings) in enumerate(zip(axes, building_sets)):
        print(i)
        # Baseline
        result = baseline_results[str(buildings)]
        init_ns = result['ns']
        sample_numbers = result['sample_numbers']
        avg_acc = result['avg_acc']
        std_acc = result['std_acc']
        avg_mf1 = result['avg_mf1']
        std_mf1 = result['std_mf1']
        xlabel = '# Target Building Samples'
        ys = [avg_acc, avg_mf1]
        x = sample_numbers
        xtick = sample_numbers
        xtick_labels = [str(no) for no in sample_numbers]
        ytick = list(range(0, 105, 20))
        ytick_labels = [str(no) for no in ytick]
        ylabel = 'Score (%)'
        ylabel_flag = False
        ylim = (-2, 105)
        xlim = (10, 205)
        linestyles = [':', ':']
        if i == 2:
            data_labels = ['Baseline Accuracy', 'Baseline Macro $F_1$']
        else:
            data_labels = None
        title = anon_building_dict[buildings[0]]
        for building in buildings[1:-1]:
            title += ',{0}'.format(anon_building_dict[building])
        title += '$\\Rightarrow${0}'.format(anon_building_dict[buildings[-1]])
        lw = 1.2
        _, plot = plotter.plot_multiple_2dline(x, ys, xlabel, ylabel, xtick,
                                               xtick_labels, ytick,
                                               ytick_labels, title, ax, fig,
                                               ylim, xlim, data_labels, 0,
                                               linestyles, cs, lw)
        # scrabble
        if ''.join(buildings) == 'ebu3bbmlap_m':
            srcids_offset = 400
        else:
            srcids_offset = 200

        try:
            with open(
                    'result/crf_entity_iter_{0}.json'.format(
                        ''.join(buildings)), 'r') as fp:
                result = json.load(fp)[0]
        except:
            pdb.set_trace()
            continue
        zerofile = 'result/crf_entity_iter_{0}_zero.json'.format(
            ''.join(buildings))
        if os.path.isfile(zerofile):
            with open(zerofile, 'r') as fp:
                zero_result = json.load(fp)[0]
            x_zero = [0]
            acc_zero = [zero_result['result']['entity'][0]['accuracy'] * 100]
            mf1_zero = [zero_result['result']['entity'][0]['macro_f1'] * 100]
        else:
            x_zero = []
            acc_zero = []
            mf1_zero = []

        fivefile = 'result/crf_entity_iter_{0}_five.json'.format(
            ''.join(list(buildings) + [buildings[-1]]))
        if os.path.isfile(fivefile):
            with open(fivefile, 'r') as fp:
                five_result = json.load(fp)[0]
            x_five = [5]
            acc_five = [five_result['result']['entity'][0]['accuracy'] * 100]
            mf1_five = [five_result['result']['entity'][0]['macro_f1'] * 100]
            pdb.set_trace()
        else:
            x_five = []
            acc_five = []
            mf1_five = []

        x = x_zero + x_five + [
            len(learning_srcids) - srcids_offset
            for learning_srcids in result['learning_srcids_history'][:-1]
        ]
        accuracy = acc_zero + acc_five + [
            res['accuracy'] * 100 for res in result['result']['entity']
        ]
        mf1s = mf1_zero + mf1_five + [
            res['macro_f1'] * 100 for res in result['result']['entity']
        ]
        ys = [accuracy, mf1s]
        pdb.set_trace()
        linestyles = ['-', '-']
        if i == 2:
            data_labels = ['Scrabble Accuracy', 'Scrabble Macro $F_1$']
        else:
            data_labels = None
        _, plot = plotter.plot_multiple_2dline(x, ys, xlabel, ylabel, xtick,
                                               xtick_labels, ytick,
                                               ytick_labels, title, ax, fig,
                                               ylim, xlim, data_labels, 0,
                                               linestyles, cs, lw)
        if i == 2:
            ax.legend(bbox_to_anchor=(3.2, 1.45), ncol=4, frameon=False)
        plot_list.append(plot)
        pdb.set_trace()

    fig.set_size_inches(9, 1.5)
    for ax in axes:
        ax.grid(True)
    for i in range(1, len(building_sets)):
        axes[i].set_yticklabels([])
        axes[i].set_ylabel('')
    for i in range(0, len(building_sets)):
        if i != 2:
            axes[i].set_xlabel('')

    #legends_list = ['Baseline A', 'Baseline MF']
    #axes[2].legend(loc='best', legends_list)

    save_fig(fig, 'figs/crf_entity.pdf')
    subprocess.call('./send_figures')
Esempio n. 10
0
def plot_scrabble():
    buildings = ['ebu3b', 'uva_cse', 'sdh', 'ghc']
    #buildings = ['sdh', 'ebu3b']
    outputfile = FIG_DIR + '/pointonly_notransfer.pdf'

    fig, axes = plt.subplots(1, len(buildings))
    xticks = [0, 10] + list(range(50, 251, 50))
    xticks_labels = [''] + [str(n) for n in xticks[1:]]
    yticks = range(0,101,20)
    yticks_labels = [str(n) for n in yticks]
    xlim = (-5, xticks[-1]+5)
    ylim = (yticks[0]-2, yticks[-1]+5)
    interp_x = list(range(10, 250, 5))
    for ax_num, (ax, building) in enumerate(zip(axes, buildings)): # subfigure per building
        xlabel = '# of Examples'
        ylabel = 'Score (%)'
        title = building_anon_map[building]
        linestyles = deepcopy(LINESTYLES)
        for inferencer_name in inferencer_names:
            if building == 'uva_cse' and inferencer_name == 'scrabble':
                continue
            xs = []
            ys = []
            xss = []
            f1s = []
            mf1s = []
            for i in range(0, EXP_NUM):
                with open('result/pointonly_notransfer_{0}_{1}_{2}.json'
                          .format(inferencer_name, building, i)) as  fp:
                    data = json.load(fp)
                xss.append([datum['learning_srcids'] for datum in data])
                if inferencer_name == 'al_hong':
                    f1s.append([datum['metrics']['f1_micro'] for datum in data])
                    mf1s.append([datum['metrics']['f1_macro'] for datum in data])
                else:
                    f1s.append([datum['metrics']['f1'] for datum in data])
                    mf1s.append([datum['metrics']['macrof1'] for datum in data])
            xs = xss[0] # Assuming all xss are same.
            f1 = average_data(xss, f1s, interp_x)
            mf1 = average_data(xss, mf1s, interp_x)
            x = interp_x
            ys = [f1, mf1]
            if ax_num == 0:
                #data_labels = ['Baseline Acc w/o $B_s$',
                #               'Baseline M-$F_1$ w/o $B_s$']
                legends = ['MicroF1, {0}'.format(inferencer_name),
                           'MacroF1, {0}'.format(inferencer_name)
                           ]
            else:
                #data_labels = None
                legends = None

            _, plots = plotter.plot_multiple_2dline(
                x, ys, xlabel, ylabel, xticks, xticks_labels,
                yticks, yticks_labels, title, ax, fig, ylim, xlim, legends,
                linestyles=[linestyles.pop()]*len(ys), cs=colors)
    for ax in axes:
        ax.grid(True)
    for i in range(1,len(buildings)):
        axes[i].set_yticklabels([])
        axes[i].set_ylabel('')
    for i in range(0,len(buildings)):
        if i != 1:
            axes[i].set_xlabel('')
    axes[0].legend(bbox_to_anchor=(3.2, 1.5), ncol=3, frameon=False)
    fig.set_size_inches((8,2))
    save_fig(fig, outputfile)
Esempio n. 11
0
def entity_ts_result():
    source_target_list = [('ebu3b', 'ap_m')]
    n_list_list = [(200, 5)]
    ts_flag = False
    eda_flag = False
    inc_num = 20
    iter_num = 10
    default_query = {
        'metadata.label_type': 'label',
        'metadata.token_type': 'justseparate',
        'metadata.use_cluster_flag': True,
        'metadata.building_list': [],
        'metadata.source_sample_num_list': [],
        'metadata.target_building': '',
        'metadata.ts_flag': ts_flag,
        'metadata.eda_flag': eda_flag,
        'metadata.use_brick_flag': True,
        'metadata.negative_flag': True,
        'metadata.inc_num': inc_num,
    }
    query_list = [deepcopy(default_query), deepcopy(default_query)]
    query_list[0]['metadata.ts_flag'] = True
    fig, ax = plt.subplots(1, len(source_target_list))
    axes = [ax]
    cs = ['firebrick', 'deepskyblue']
    for i, (ax, (source, target)) in enumerate(zip(axes, source_target_list)):
        linestyles = [':', '-.', '-']
        for query in query_list:
            for ns in n_list_list:
                if query['metadata.use_brick_flag'] and ns[0] == 0:
                    continue
                n_s = ns[0]
                if i == 1 and ns[1] == 5:
                    n_t = 5
                else:
                    n_t = ns[1]

                if n_s == 0:
                    building_list = [target]
                    source_sample_num_list = [n_t]
                elif n_t == 0:
                    building_list = [source]
                    source_sample_num_list = [n_s]
                else:
                    building_list = [source, target]
                    source_sample_num_list = [n_s, n_t]
                query['metadata.building_list'] = building_list
                query['metadata.source_sample_num_list'] = \
                        source_sample_num_list
                query['metadata.target_building'] = target
                q = {'$and': [query, {'$where': \
                                      'this.accuracy_history.length=={0}'\
                                      .format(iter_num)}]}

                result = get_entity_results(q)
                try:
                    assert result
                except:
                    print(n_t)
                    pdb.set_trace()
                    result = get_entity_results(query)
                #point_precs = result['point_precision_history'][-1]
                #point_recall = result['point_recall'][-1]
                subset_accuracy_list = [
                    val * 100 for val in result['subset_accuracy_history']
                ]
                accuracy_list = [
                    val * 100 for val in result['accuracy_history']
                ]
                hierarchy_accuracy_list = [
                    val * 100 for val in result['hierarchy_accuracy_history']
                ]
                weighted_f1_list = [
                    val * 100 for val in result['weighted_f1_history']
                ]
                macro_f1_list = [
                    val * 100 for val in result['macro_f1_history']
                ]
                exp_num = len(macro_f1_list)
                target_n_list = list(range(n_t, inc_num * exp_num + 1,
                                           inc_num))

                xs = target_n_list
                ys = [accuracy_list, macro_f1_list]
                #xlabel = '# of Target Building Samples'
                xlabel = None
                ylabel = 'Score (%)'
                xtick = range(0, 205, 50)
                xtick_labels = [str(n) for n in xtick]
                ytick = range(0, 102, 20)
                ytick_labels = [str(n) for n in ytick]
                ylim = (ytick[0] - 1, ytick[-1] + 2)
                if i == 0:
                    legends = [
                        '{0}, SA: {1}'.format(
                            n_s, oxer(query['metadata.use_brick_flag'])),
                        '{0}, SA: {1}'.format(
                            n_s, oxer(query['metadata.use_brick_flag']))
                    ]
                else:
                    legends = None
                title = None
                plotter.plot_multiple_2dline(xs, ys, xlabel, ylabel, xtick,\
                                 xtick_labels, ytick, ytick_labels, title, ax,\
                                 fig, ylim, None, legends, xtickRotate=0, \
                                 linestyles=[linestyles.pop()]*len(ys), cs=cs)

    for ax in axes:
        ax.grid(True)
    for ax, (source, target) in zip(axes, source_target_list):
        #ax.set_title('{0} $\Rightarrow$ {1}'.format(
        #    anon_building_dict[source], anon_building_dict[target]))
        ax.text(0.45,
                0.2,
                '{0} $\Rightarrow$ {1}'.format(anon_building_dict[source],
                                               anon_building_dict[target]),
                fontsize=11,
                transform=ax.transAxes)

    for i in range(1, len(source_target_list)):
        axes[i].set_yticklabels([])
        axes[i].set_ylabel('')

    ax = axes[0]
    #handles, labels = ax.get_legend_handles_labels()
    #legend_order = [0,1,2,3,4,5]
    #new_handles = [handles[i] for i in legend_order]
    #new_labels = [labels[i] for i in legend_order]
    #ax.legend(new_handles, new_labels, bbox_to_anchor=(0.15,0.96), ncol=3, frameon=False)
    plt.text(0,
             1.2,
             'Accuracy: \nMacro $F_1$: ',
             ha='center',
             va='center',
             transform=ax.transAxes)
    fig.text(0.5, -0.1, '# of Target Building Samples', ha='center', alpha=0)

    for i, ax in enumerate(axes):
        if i != 0:
            ax.set_xlabel('')

    fig.set_size_inches(4.4, 1.5)
    save_fig(fig, 'figs/entity_ts.pdf')
    subprocess.call('./send_figures')
Esempio n. 12
0
def crf_entity_result_dep():
    building_sets = [('ebu3b', 'ap_m'), ('ap_m', 'bml'),
                 ('ebu3b', 'ghc'), ('ghc', 'ebu3b'), ('ebu3b', 'bml', 'ap_m')] ### TODO TODO: this should be changed to use ebu3b,ap_m -> bml
    #building_sets = [('ebu3b', 'ghc'), ('ebu3b', 'ghc')]
    #building_sets = [('ap_m',), ('bml',),
    #             ('ghc',), ('ebu3b',), ('ap_m',)] ### TODO TODO: this should be changed to use ebu3b,ap_m -> bml
    fig, axes = plt.subplots(1, len(building_sets))
    with open('result/baseline.json', 'r') as fp:
        baseline_results = json.load(fp)

    cs = ['firebrick', 'deepskyblue']
    plot_list = list()
    acc_better_list = []
    mf1_better_list = []
    comp_xs = [10, 50, 150]
    for i, (ax, buildings) in enumerate(zip(axes, building_sets)):
        print(i)
        # Config
        ylim = (-2, 105)
        xlim = (-2, 205)

        # Baseline with source
        result = baseline_results[str(buildings)]
        init_ns = result['ns']
        sample_numbers = result['sample_numbers']
        baseline_acc = result['avg_acc']
        std_acc = result['std_acc']
        baseline_mf1 = result['avg_mf1']
        std_mf1 = result['std_mf1']
        xlabel = '# Target Building Examples'
        ys = [baseline_acc, baseline_mf1]
        baseline_x = sample_numbers
        #xtick = sample_numbers
        #xtick_labels = [str(no) for no in sample_numbers]
        #xtick = [0] + [5] + xtick[1:]
        xtick = [10] + list(range(40, 205, 40))
        #xtick = list(range(0, 205, 40))
        xtick_labels = [str(n) for n in xtick]
        ytick = list(range(0, 105, 20))
        ytick_labels = [str(no) for no in ytick]
        ylabel = 'Score (%)'
        ylabel_flag = False
        linestyles = [':', ':']
        if i == 2:
            data_labels = ['Baseline Acc w/ $B_s$',
                           'Baseline M-$F_1$ w/ $B_s$']
        else:
            data_labels = None
        title = anon_building_dict[buildings[0]]
        for building in  buildings[1:-1]:
            title += ',{0}'.format(anon_building_dict[building])
        title += '$\\Rightarrow${0}'.format(anon_building_dict[buildings[-1]])
        lw = 1.2
        _, plot = plotter.plot_multiple_2dline(baseline_x, ys, xlabel, ylabel, xtick,
                             xtick_labels, ytick, ytick_labels, title,
                             ax, fig, ylim, xlim, data_labels, 0, linestyles,
                                               cs, lw)
        plot_list.append(plot)

        # Baseline without source
        result = baseline_results[str((list(buildings)[-1],))]
        init_ns = result['ns']
        sample_numbers = result['sample_numbers']
        avg_acc = result['avg_acc']
        std_acc = result['std_acc']
        avg_mf1 = result['avg_mf1']
        std_mf1 = result['std_mf1']
        xlabel = '# Target Building Examples'
        ys = [avg_acc, avg_mf1]
        x = sample_numbers
        #xtick = sample_numbers
        #xtick_labels = [str(no) for no in sample_numbers]
        #xtick = list(range(0, 205, 40))
        #xtick_labels = [str(n) for n in xtick]
        ytick = list(range(0, 105, 20))
        ytick_labels = [str(no) for no in ytick]
        ylabel = 'Score (%)'
        ylabel_flag = False
        linestyles = ['-.', '-.']
        if i == 2:
            data_labels = ['Baseline Acc w/o $B_s$',
                           'Baseline M-$F_1$ w/o $B_s$']
        else:
            data_labels = None
        title = anon_building_dict[buildings[0]]
        for building in  buildings[1:-1]:
            title += ',{0}'.format(anon_building_dict[building])
        title += '$\\Rightarrow${0}'.format(anon_building_dict[buildings[-1]])
        lw = 1.2
        _, plot = plotter.plot_multiple_2dline(x, ys, xlabel, ylabel, xtick,
                             xtick_labels, ytick, ytick_labels, title,
                             ax, fig, ylim, xlim, data_labels, 0, linestyles,
                                               cs, lw)
        plot_list.append(plot)

        # Scrabble without source
        buildingfix = ''.join([buildings[-1]] * 2)
        filename = 'result/crf_entity_iter_{0}_char2tagset_iter_nosource1.json'\
                       .format(buildingfix)
        if not os.path.exists(filename):
            continue
        with open(filename, 'r') as fp:
            res = json.load(fp)
        source_num = 0
        srcid_lens = [len(r['learning_srcids']) - source_num for r in res]
        accuracy = [r['result']['entity']['accuracy'] * 100 for r in res]
        mf1s = [r['result']['entity']['macro_f1'] * 100 for r in res]
        x = srcid_lens
        ys = [accuracy, mf1s]
        linestyles = ['--', '--']
        if i == 2:
            data_labels = ['Scrabble Acc w/o $B_s$',
                           'Scrabble M-$F_1$ w/o $B_s$']
        else:
            data_labels = None
        _, plot = plotter.plot_multiple_2dline(x, ys, xlabel, ylabel, xtick,
                             xtick_labels, ytick, ytick_labels, title,
                             ax, fig, ylim, xlim, data_labels, 0, linestyles,
                                               cs, lw)
        plot_list.append(plot)

        # Scrabble with source
        buildingfix = ''.join(list(buildings) + [buildings[-1]])

        filename_template = 'result/crf_entity_iter_{0}_char2tagset_iter_{1}.json'
        x = range(10, 205, 10)
        x_cands = []
        acc_cands = []
        mf1_cands = []
        for exp_num in range(0, 3):
            filename = filename_template.format(buildingfix, exp_num)
            if not os.path.exists(filename):
                continue
            with open(filename, 'r') as fp:
                res = json.load(fp)
            source_num = 200 * (len(buildings) - 1)
            x_cand = [len(r['learning_srcids']) - source_num for r in res]
            acc_cand = [r['result']['entity']['accuracy'] * 100 for r in res]
            mf1_cand = [r['result']['entity']['macro_f1'] * 100 for r in res]
            x_cands.append(x_cand)
            acc_cands.append(acc_cand)
            mf1_cands.append(mf1_cand)
        acc = lin_interpolated_avg(x, x_cands, acc_cands)
        mf1 = lin_interpolated_avg(x, x_cands, mf1_cands)
        ys = [acc, mf1]

        print(buildings)
        mf1_betters = []
        acc_betters = []
        for comp_x in comp_xs:
            try:
                comp_idx_target = x.index(comp_x)
                comp_idx_baseline = baseline_x.index(comp_x)
                acc_better = \
                    acc[comp_idx_target]/baseline_acc[comp_idx_baseline] - 1
                mf1_better = \
                    mf1[comp_idx_target]/baseline_mf1[comp_idx_baseline] - 1
                """
                acc_better = \
                    acc[comp_idx_target] - baseline_acc[comp_idx_baseline] - 1
                mf1_better = \
                    mf1[comp_idx_target] - baseline_mf1[comp_idx_baseline] - 1
                """
                mf1_betters.append(mf1_better)
                acc_betters.append(acc_better)
                print('srouce#: {0}'.format(comp_x))
                print('Acc\t baseline: {0}\t scrbl: {1}\t better: {2}\t'
                      .format(
                          baseline_acc[comp_idx_baseline],
                          acc[comp_idx_target],
                          acc_better
                          ))
                print('MF1\t baseline: {0}\t scrbl: {1}\t better: {2}\t'
                      .format(
                          baseline_mf1[comp_idx_baseline],
                          mf1[comp_idx_target],
                          mf1_better
                          ))
            except:
                pdb.set_trace()
        mf1_better_list.append(mf1_betters)
        acc_better_list.append(acc_betters)

        linestyles = ['-', '-']
        if i == 2:
            data_labels = ['Scrabble Acc w/ $B_s$',
                           'Scrabble M-$F_1$ w/ $B_s$']
        else:
            data_labels = None
        _, plot = plotter.plot_multiple_2dline(x, ys, xlabel, ylabel, xtick,
                             xtick_labels, ytick, ytick_labels, title,
                             ax, fig, ylim, xlim, data_labels, 0, linestyles,
                                               cs, lw)
        plot_list.append(plot)

        if i == 2:
            ax.legend(bbox_to_anchor=(3.5, 1.53), ncol=4, frameon=False)
            #ax.legend(bbox_to_anchor=(3.2, 1.45), ncol=4, frameon=False)
    print('====================')
    print('Source nums: {0}'.format(comp_xs))
#    pdb.set_trace()
    mf1_better_avgs = [np.mean(list(map(itemgetter(i), mf1_better_list)))
                       for i, _ in enumerate(comp_xs)]
    acc_better_avgs = [np.mean(list(map(itemgetter(i), acc_better_list)))
                       for i, _ in enumerate(comp_xs)]
    print('MF1 better in average, {0}'.format(mf1_better_avgs))
    print('Acc better in average, {0}'.format(acc_better_avgs))


    fig.set_size_inches(9, 1.5)
    for ax in axes:
        ax.grid(True)
    for i in range(1,len(building_sets)):
        axes[i].set_yticklabels([])
        axes[i].set_ylabel('')
    for i in range(0,len(building_sets)):
        if i != 2:
            axes[i].set_xlabel('')

    #legends_list = ['Baseline A', 'Baseline MF']
    #axes[2].legend(loc='best', legends_list)


    save_fig(fig, 'figs/crf_entity.pdf')
    subprocess.call('./send_figures')
Esempio n. 13
0
def plot_one_ir2tagsets(target_building, source_building,
                        fig=None, ax=None):
    title = '{0}$\\Rightarrow${1}'.format(
        building_anon_map[source_building],
        building_anon_map[target_building]
    )
    linestyles = deepcopy(LINESTYLES)
    configs = get_ir2tagsets_configs(target_building, source_building)
    if target_building == 'ebu3b':
        configs.append({
            'use_brick_flag': True,
            'negative_flag': True,
            'source_building_list': [source_building, target_building],
            'target_building': target_building,
            'tagset_classifier_type': 'MLP',
            'task': 'ir2tagsets',
            'ts_flag': True,
        })
    xlabel = '# of Target Building Examples'
    ylabel = 'Score (%)'
    if not fig or not ax:
        fig, ax = plt.subplots(1, 1)
    for config in configs:
        filename = get_filename_for_ir2tagsets(target_building, config)
        with open(filename, 'r') as fp:
            res = json.load(fp)
        accuracy = res['accuracy']
        macrof1 = res['macrof1']
        xticks = [0, 10] + list(range(50, 201, 50))
        xticks_labels = [''] + [str(n) for n in xticks[1:]]
        yticks = range(0,101,20)
        yticks_labels = [str(n) for n in yticks]
        xlim = (50, xticks[-1])
        ylim = ((0, 100))
        #ylim = (yticks[0], yticks[-1])
        interp_x = list(range(10, 200, 5))
        ys = [accuracy, macrof1]
        if target_building == 'ebu3b':
            legends = [
                'Accruracy, {0},SA:{1}{2}'.format(
                    200 if len(config['source_building_list']) > 1 else 0,
                    'O' if config['use_brick_flag'] else 'X',
                    ',TS' if config['ts_flag'] else ''),
                'Macro-F1, {0},SA:{1}{2}'.format(
                    200 if len(config['source_building_list']) > 1 else 0,
                    'O' if config['use_brick_flag'] else 'X',
                    ',TS' if config['ts_flag'] else ''),
            ]
            #if inferencer_name == 'scrabble':
            #    legends.append('Accuracy, {0}'.format(inferencer_name))
        else:
            #data_labels = None
            legends = None


        _, plots = plotter.plot_multiple_2dline(
            interp_x, ys, xlabel, ylabel, xticks, xticks_labels,
            yticks, yticks_labels, None, ax, fig, ylim, xlim, legends,
            linestyles=[linestyles.pop()]*len(ys), cs=colors)
        ax.text(0.9, 0.15, title, transform=ax.transAxes, ha='right',
                backgroundcolor='white'
                )#, alpha=0)
    ax.grid(True)
Esempio n. 14
0
def crf_result_acc():
    #source_target_list = [('ebu3b', 'ap_m'), ('ebu3b', 'ap_m')]
    source_target_list = [('ebu3b', 'ap_m'), ('ghc', 'ebu3b')]
    #n_list_list = [#[(1000, 0), (1000,5), (1000,20), (1000,50), (1000,100), (1000, 150), (1000,200)],
    #               [(200, 0), (200,5), (200,20), (200,50), (200,100), (200, 150), (200,200)],
    #               [(0,5), (0,20), (0,50), (0,100), (0,150), (0,200)]]
    char_precs_list = list()
    phrase_f1s_list = list()
    #fig, ax = plt.subplots(1, 1)
    fig, axes = plt.subplots(1, len(source_target_list))
    if isinstance(axes, Axes):
        axes = [axes]
    fig.set_size_inches(4, 1.5)
    cs = ['firebrick', 'deepskyblue']
    filename_template = 'result/crf_iter_{0}_char2ir_iter_{1}.json'
    n_s_list = [1000, 200, 0]

    for i, (ax, (source, target)) in enumerate(zip(axes, source_target_list)):
        linestyles = ['--', '-.', '-']
        plot_list = list()
        legends_list = list()
        for n_s in n_s_list:
            if n_s == 0:
                buildingfix = ''.join([target, target])
            else:
                buildingfix = ''.join([source, target, target])
            n = n_s + 0
            xs = [5] + list(range(10, 201, 10))
            x_cands = []
            f1_cands = []
            mf1_cands = []
            for exp_num in range(0, 5):
                nfix = n + exp_num
                filename = filename_template.format(buildingfix, nfix)
                if not os.path.exists(filename):
                    pdb.set_trace()
                    continue
                with open(filename, 'r') as fp:
                    data = json.load(fp)
                x_cand = [
                    len(datum['learning_srcids']) - n_s for datum in data
                ]
                f1_cand = []
                for datum in data:
                    prec = datum['result']['crf']['phrase_precision'] * 100
                    rec = datum['result']['crf']['phrase_recall'] * 100
                    f1 = 2 * prec * rec / (prec + rec)
                    f1_cand.append(f1)
                mf1_cand = [
                    datum['result']['crf']['phrase_macro_f1'] * 100
                    for datum in data
                ]
                x_cands.append(x_cand)
                f1_cands.append(f1_cand)
                mf1_cands.append(mf1_cand)
            f1s = lin_interpolated_avg(xs, x_cands, f1_cands)
            mf1s = lin_interpolated_avg(xs, x_cands, mf1_cands)
            ys = [f1s]  #, mf1s]
            # Print curr result
            if n_s == 200 or n_s == 0:
                print('=======')
                print(source, target, n_s)
                print('init F1: {0}'.format(f1s[0]))
                print('init MF1: {0}'.format(mf1s[0]))
                print('=======')

            xlabel = None
            ylabel = 'Score (%)'
            xtick = [5] + list(range(40, 205, 40))
            xtick_labels = [str(n) for n in xtick]
            ytick = range(0, 101, 20)
            ytick_labels = [str(n) for n in ytick]
            xlim = (-5, xtick[-1] + 5)
            ylim = (ytick[0] - 2, ytick[-1] + 5)
            if i == 0:
                legends = [
                    '#$B_S$:{0}'.format(n_s),
                    #'#$B_S$:{0}'.format(n_s),
                ]
            else:
                legends = None

            title = None
            _, plots = plotter.plot_multiple_2dline(xs, ys, xlabel, ylabel, xtick,\
                             xtick_labels, ytick, ytick_labels, title, ax, fig, \
                             ylim, xlim, legends, xtickRotate=0, \
                             linestyles=[linestyles.pop()]*len(ys), cs=cs)
            text = '{0} $\\Rightarrow$ {1}'.format(\
                    anon_building_dict[source],
                    anon_building_dict[target])
            ax.text(0.8,
                    0.1,
                    text,
                    transform=ax.transAxes,
                    ha='right',
                    backgroundcolor='white')  #, alpha=0)
            plot_list += plots

    axes[0].legend(bbox_to_anchor=(0.15, 0.96), ncol=3, frameon=False)
    for ax in axes:
        ax.grid(True)
    axes[1].set_yticklabels([])
    axes[1].set_ylabel('')
    plt.text(0,
             1.16,
             '$F_1$: \nMacro $F_1$: ',
             va='center',
             ha='center',
             transform=axes[0].transAxes)
    fig.text(0.5, -0.1, '# of Target Building Samples', ha='center')

    save_fig(fig, 'figs/crf_acc.pdf')
    subprocess.call('./send_figures')
Esempio n. 15
0
def cls_comp_result():
    source_target_list = ('ebu3b', 'ap_m')
    keys = ['best', 'ts', 'rf']
    xs = list(range(10, 205, 10))
    accuracy_dict = OrderedDict({
        'best': [
            89.809313820507768, 92.54815950011843, 94.820762260127921,
            95.97224073086943, 96.084653841183666, 96.189745940212362,
            96.621875740345899, 96.767353707652205, 97.25703698768065,
            97.303271588486126, 97.563484660033183, 98.26716491945038,
            97.689250918028904, 98.192926735370776, 98.38512052831085,
            98.332192527621629, 98.393721664943683, 98.662756406459749,
            98.887643256929636, 98.967675573027705
        ],
        'ts': [
            0.8939772861881065, 0.8923213679976736, 0.9123210382072324,
            0.9135980339105342, 0.9189532249466957, 0.9340140813788202,
            0.9352186241411988, 0.9355258676853828, 0.9291091215997943,
            0.9378608124876789, 0.9319247243221132, 0.949146448493464,
            0.9489394545131488, 0.9502468717020965, 0.9567056828950493,
            0.9472988480217964, 0.9615234837716184, 0.966066986496091,
            0.9657838041933192, 0.9655206112295668
        ],
        'rf': [
            0.806640902629711, 0.8715051972281449, 0.8819351901208243,
            0.8936811478322669, 0.9154761904761907, 0.9102993218431644,
            0.9163216654821128, 0.9187111318407958, 0.9251769426676142,
            0.9323353470741529, 0.9335880123193552, 0.9353082059938402,
            0.9385065002369106, 0.9440912994551051, 0.9449197465055669,
            0.9479770048566685, 0.95334636342099, 0.9520936981757874,
            0.9534481609808099, 0.9574993336886989, 0.9613235903814261
        ],
    })
    mf1_dict = OrderedDict({
        'best': [
            49.278915576009666, 54.796766717693828, 62.58888234797125,
            65.516750225788741, 68.292157713216596, 70.178737730933733,
            72.269065905342927, 75.530080228774239, 79.910634234930825,
            83.958759694464149, 86.604737828403415, 89.944532313205116,
            89.509558650993768, 92.646954050881263, 92.840673983293001,
            92.748649991145385, 93.127511989870385, 93.479568639265494,
            94.246971132932828, 94.718836697647319
        ],
        'ts': [
            0.56653458779577659, 0.55708814049375366, 0.5937535218897827,
            0.63466926766986798, 0.653458865790845, 0.64011173425185053,
            0.67281122169885288, 0.68270291522350057, 0.72076990493532245,
            0.71261982497230925, 0.70044729648937165, 0.77730251488642088,
            0.76286044963642097, 0.79628750932789027, 0.81995259322192149,
            0.81512563219291001, 0.83983065742402829, 0.85147624388541865,
            0.85183408423723528, 0.85288622740244369
        ],
        'rf': [
            0.12250376794594604, 0.18942204544104752, 0.22171884155985688,
            0.27069328069179505, 0.30405631973712544, 0.2984466141860372,
            0.3205452968001699, 0.33832465365023096, 0.3891719868291194,
            0.44145987155626004, 0.4629123930116906, 0.4960558419219113,
            0.5335594108556089, 0.5915815154291774, 0.6430516639970087,
            0.6950590411205589, 0.7300801879845085, 0.7553289202919391,
            0.7856917033978976, 0.8454315647144195, 0.8931418245685142
        ],
    })

    for k, v in mf1_dict.items():
        if k == 'best':
            mf1_dict[k] = [vvv / 100 for vvv in v[:len(xs)]]
        else:
            mf1_dict[k] = v[:len(xs)]

    for k, v in accuracy_dict.items():
        if k == 'best':
            accuracy_dict[k] = [vvv / 100 for vvv in v[:len(xs)]]
        else:
            accuracy_dict[k] = v[:len(xs)]

    legends = ['OCC', 'OCC w/ TS', 'RF'] * 2
    linestyles = ['-', ':', '-.'] * 2
    cs = ['firebrick'] * len(keys) + ['deepskyblue'] * len(keys)
    fig, ax = plt.subplots(1, 1)
    fig.set_size_inches(3, 1.7)
    axes = [ax]
    mult = lambda x: x * 100
    hundreder = lambda seq: list(map(mult, seq))
    ys = list(
        map(hundreder,
            list(accuracy_dict.values()) + list(mf1_dict.values())))
    #ys = [char_precs, phrase_f1s, char_macro_f1s, phrase_macro_f1s]
    xlabel = '# of Target Building Samples'
    ylabel = 'Score (%)'
    xtick = [10] + list(range(40, 205, 40))
    xtick_labels = [str(n) for n in xtick]
    ytick = range(0, 101, 20)
    ytick_labels = [str(n) for n in ytick]
    xlim = (xtick[0] - 2, xtick[-1] + 5)
    ylim = (ytick[0] - 2, ytick[-1] + 5)
    title = None
    _, plots = plotter.plot_multiple_2dline(xs, ys, xlabel, ylabel, xtick,\
                            xtick_labels, ytick, ytick_labels, title, ax, fig, \
                            ylim, xlim, None , xtickRotate=0, \
                            linestyles=linestyles, cs=cs)
    #ax.legend(plots, legends, 'upper center', ncol=4
    #legend_order = [0,4,1,5,2,3]
    legend_order = [0, 3, 1, 4, 2, 5]
    new_handles = [plots[i] for i in legend_order]
    new_legends = [legends[i] for i in legend_order]
    fig.legend(new_handles,
               new_legends,
               ncol=3,
               bbox_to_anchor=(0.15, 1.08, 0.8, 0.095),
               prop={'size': 7},
               frameon=False)
    for ax in axes:
        ax.grid(True)
    plt.text(0.03,
             1.135,
             'Accuracy: \nMacro $F_1$: ',
             ha='center',
             va='center',
             transform=ax.transAxes,
             fontsize=7)
    save_fig(fig, 'figs/cls.pdf')
    subprocess.call('./send_figures')
Esempio n. 16
0
def entity_iter_result():
    source_target_list = [
        ('ebu3b', 'ap_m'),
        #('ebu3b', 'ap_m'),
        #('ghc', 'ebu3b')
        ('ghc', 'ap_m')
    ]
    ts_flag = False
    eda_flag = False
    fig, axes = plt.subplots(1, len(source_target_list))
    #    axes = [ax]
    cs = ['firebrick', 'deepskyblue']
    for i, (ax, (source, target)) in enumerate(zip(axes, source_target_list)):

        #filename_template = 'result/entity_iter_{0}_{1}2.json'
        filename_template = 'result/entity_iter_{0}_{1}{2}.json'
        prefixes = [(''.join([target] * 2), 'nosource_nosa'),
                    (''.join([target] * 2), 'nosource_sa'),
                    (''.join([source, target, target]), 'source_nosa'),
                    (''.join([source, target, target]), 'source_sa')]
        linestyles = [':', '--', '-.', '-']
        for linestyle, (buildingfix, optfix) in zip(linestyles, prefixes):
            sa_flag = 'X' if 'nosa' in optfix else 'O'
            src_flag = '0' if 'nosource' in optfix else '200'
            source_num = int(src_flag)
            """
            filename = filename_template.format(buildingfix, optfix)
            if not os.path.exists(filename):
                continue
            with open(filename, 'r') as fp:
                data = json.load(fp)[1:]
            x_t = [len(set(datum['learning_srcids'])) - source_num for datum in data]
            accs = [val * 100 for val in data[-1]['accuracy_history']]
            mf1s = [val * 100 for val in data[-1]['macro_f1_history']]
            ys = [accs, mf1s]
            """
            #if sa_flag == 'X' and src_flag == '0':
            #    pdb.set_trace()
            x_t = range(10, 201, 10)
            acc_cands = []
            mf1_cands = []
            x_cands = []
            for exp_num in range(1, 3):
                filename = filename_template.format(buildingfix, optfix,
                                                    exp_num)
                if not os.path.exists(filename):
                    continue
                with open(filename, 'r') as fp:
                    #data = json.load(fp)[1:]
                    data = json.load(fp)
                x = [
                    len(set(datum['learning_srcids'])) - source_num
                    for datum in data[:-1]
                ]
                #if optfix == 'nosource_nosa':
                #    pdb.set_trace()
                acc = [val * 100 for val in data[-1]['accuracy_history']]
                mf1 = [val * 100 for val in data[-1]['macro_f1_history']]
                x_cands.append(x)
                acc_cands.append(acc)
                mf1_cands.append(mf1)
            if len(x_cands) == 1:
                pdb.set_trace(
                )  # for debugging of not existing enough exp data
            mf1s = lin_interpolated_avg(x_t, x_cands, mf1_cands)
            accs = lin_interpolated_avg(x_t, x_cands, acc_cands)
            ys = [accs, mf1s]

            if optfix == 'source_sa':
                pdb.set_trace()

            xlabel = None
            ylabel = 'Score (%)'
            xtick = [10] + list(range(50, 205, 50))
            xtick_labels = [str(n) for n in xtick]
            ytick = range(0, 102, 20)
            ytick_labels = [str(n) for n in ytick]
            ylim = (ytick[0] - 1, ytick[-1] + 2)
            if i == 0:
                legends = [
                    '{0},SA:{1}'.format(src_flag, sa_flag),
                    '{0},SA:{1}'.format(src_flag, sa_flag)
                ]
            else:
                legends = None
            title = None
            plotter.plot_multiple_2dline(x_t, ys, xlabel, ylabel, xtick,\
                             xtick_labels, ytick, ytick_labels, title, ax,\
                             fig, ylim, None, legends, xtickRotate=0, \
                             linestyles=[linestyle]*len(ys), cs=cs)
            if optfix == 'sa_source':
                pdb.set_trace()

    for ax in axes:
        ax.grid(True)
    for ax, (source, target) in zip(axes, source_target_list):
        #ax.set_title('{0} $\Rightarrow$ {1}'.format(
        #    anon_building_dict[source], anon_building_dict[target]))
        #ax.text(0.45, 0.2, '{0} $\Rightarrow$ {1}'.format(
        ax.text(
            0.45,
            0.2,
            '{0} $\Rightarrow$ {1}'.format(anon_building_dict[source],
                                           anon_building_dict[target]),
            fontsize=11,
            transform=ax.transAxes,
            #backgroundcolor='white'
        )

    for i in range(1, len(source_target_list)):
        axes[i].set_yticklabels([])
        axes[i].set_ylabel('')

    ax = axes[0]
    handles, labels = ax.get_legend_handles_labels()
    legend_order = [0, 1, 2, 3, 4, 5, 6, 7]
    new_handles = [handles[i] for i in legend_order]
    new_labels = [labels[i] for i in legend_order]
    ax.legend(new_handles,
              new_labels,
              bbox_to_anchor=(0.15, 0.96),
              ncol=4,
              frameon=False,
              handletextpad=0.15,
              columnspacing=0.7)
    #ax.legend(new_handles, new_labels, bbox_to_anchor=(0.23,1.35), ncol=3, frameon=False)
    plt.text(-0.0,
             1.18,
             'Accuracy: \nMacro $F_1$: ',
             ha='center',
             va='center',
             transform=ax.transAxes)
    fig.text(0.5, -0.1, '# of Target Building Samples', ha='center')

    for i, ax in enumerate(axes):
        if i != 0:
            ax.set_xlabel('')

    fig.set_size_inches(4.4, 1.5)
    save_fig(fig, 'figs/entity_iter.pdf')
    subprocess.call('./send_figures')