def ops():
    for w in workloads:
        data = load_data("physops", [w])[w]

        sns.set_context("paper", font_scale=font_scale, rc={"lines.linewidth": 2.5})
        # sns.set_style("whitegrid")

        fig, ax = plt.subplots(1, figsize=(8, 4))

        data.sort(order='count')

        c = data['count'].astype(float)
        c /= sum(c)
        c *= 100
        ypos = np.arange(len(data['phys_operator']))
        ppl.barh(ax, ypos, c, yticklabels=data['phys_operator'], grid='x', annotate=True, color=colors[w])

        plt.title("Operator Frequency in {}".format(labels[w]))
        #ax.set_ylabel('Physical operator')
        ax.set_xlabel('\% of queries')

        ax.xaxis.grid(False)
        ax.yaxis.grid(False)

        #plt.subplots_adjust(bottom=.2, left=.3, right=.99, top=.9, hspace=.35)

        ax.title.set_position((ax.title._x, 1.04))
        fig.tight_layout(rect=[0.03, 0, 1, 1])
        fig.text(0.02, 0.55, 'Physical operator', rotation=90, va='center')

        plt.savefig(root_path + 'plot_ops_%s.eps' % w, format='eps')
Exemple #2
0
def logical_ops_sqlshare():
    fig, ax = plt.subplots(1, figsize=(8, 5))

    data = read_csv(['logical_ops', 'count'], False)
    data.sort(order='count')
    #data = data[-14:]

    c = data['count'].astype(float)
    c /= sum(c)
    c *= 100
    ypos = np.arange(len(data['logical_op']))
    ppl.barh(ax, ypos, c, yticklabels=data['logical_op'], grid='x', annotate=True)

    #ax.set_ylabel('Physical operator')
    ax.set_xlabel('% of queries')

    #plt.subplots_adjust(bottom=.2, left=.3, right=.99, top=.9, hspace=.35)

    fig.tight_layout(rect=[0.03, 0, 1, 1])
    fig.text(0.02, 0.55, 'Logical operator', rotation=90, va='center')

    plt.show()

    fig.savefig('plot_logops_sqlshare.pdf', format='pdf', transparent=True)
    fig.savefig('plot_logops_sqlshare.png', format='png', transparent=True)
def queries_per_table():
    sns.set_context("paper", font_scale=font_scale, rc={"lines.linewidth": 2.5})

    with open('../results/sqlshare/queries_per_table.csv') as f:
        data = np.recfromcsv(f)
    num_queries = data['num_queries'].astype(float)
    tables = data['table']
    p = re.compile(ur'^.*[A-F0-9]{5}$')

    logical_tables = []
    for t in tables:
        short_name = re.findall(p, t)
        if len(short_name) == 0:
            if t not in logical_tables:
                logical_tables.append(t)
        else:
            if short_name[0][0:-5] not in logical_tables:
                logical_tables.append(short_name[0][0:-5])
    num_queries_lt = []
    for lt in logical_tables:
        max_num_queries = 0
        for i,t in enumerate(tables):
            if lt in t:
                if num_queries[i] > max_num_queries:
                    max_num_queries = num_queries[i]
        num_queries_lt.append(max_num_queries)

    c = [0,0,0,0,0]
    for num in num_queries_lt:
        if num == 1.0:
            c[0] += 1
        elif num == 2.0:
            c[1] += 1
        elif num == 3.0:
            c[2] += 1
        elif num == 4.0:
            c[3] += 1
        else:
            c[4] += 1


    fig, ax = plt.subplots(1, figsize=(8, 4))

    ypos = np.arange(len(c))
    ppl.barh(ax, ypos, c, yticklabels=['1', '2', '3', '4', '$>=5$'], grid='x', annotate=True, color=g)

    plt.title("Number of queries per table")
    ax.set_ylabel('Number of queries')
    ax.set_xlabel('Number of tables')

    ax.xaxis.grid(False)
    ax.yaxis.grid(False)
    plt.tight_layout()

    plt.savefig(root_path + 'plot_queries_per_table.eps', format='eps')
def barh_chart(title, yvalues, xvalues, file_name):

    fig, ax = plt.subplots(1)
    x_pos = np.arange(len(xvalues))

    plt.title(title)
    y_pos = np.arange(len(yvalues))

    #plt.barh(y_pos, xvalues)
    ppl.barh(y_pos, xvalues, grid='x')
    ppl.barh(y_pos, xvalues, grid='x')
    plt.yticks(y_pos, yvalues)
    plt.savefig(file_name + ".eps")
    plt.close()
Exemple #5
0
def barh_chart(title, yvalues, xvalues, file_name):

    fig, ax = plt.subplots(1)
    x_pos = np.arange(len(xvalues))

    plt.title(title)
    y_pos = np.arange(len(yvalues))

    #plt.barh(y_pos, xvalues)
    ppl.barh(y_pos, xvalues, grid='x')
    ppl.barh(y_pos, xvalues, grid='x')
    plt.yticks(y_pos, yvalues)
    plt.savefig(file_name + ".eps")
    plt.close()
Exemple #6
0
def enrich_plot(res, pre="test", x="-log2( p )", t="Enriched GO Terms"):
    terms, ps = [], []
    for r in res:
        terms.append("%s(%s)" % (r[1], r[0]))
        ps.append(r[-1])
    fig = pylab.figure(figsize=(16, int(8 / 30.0 * len(terms)) + 2))
    ax = fig.add_subplot(1, 1, 1)
    pos = np.array(range(len(terms)))
    ppl.barh(pos, ps, yticklabels=terms, ax=ax, grid="x", annotate=False)
    pylab.subplots_adjust(left=0.6, bottom=0.2, top=0.8)
    ax.set_ylim([0, len(terms)])
    pylab.xlabel(x, fontsize=16)
    pylab.title(t, fontsize=30)
    fig.tight_layout()
    pylab.savefig(pre + ".svg", dpi=1000, bbox_inches='tight')
Exemple #7
0
def _plot_evaluation(df_csv):
    """Provide plot of evaluation metrics, stratified by event size.
    """
    if ppl is None:
        return None
    out_file = "%s.pdf" % os.path.splitext(df_csv)[0]
    if not utils.file_uptodate(out_file, df_csv):
        metrics = ["sensitivity", "precision"]
        df = pd.read_csv(df_csv).fillna("0%")
        fig, axs = ppl.subplots(len(_EVENT_SIZES), len(metrics))
        callers = sorted(df["caller"].unique())
        if "ensemble" in callers:
            callers.remove("ensemble")
            callers.append("ensemble")
        for i, size in enumerate(_EVENT_SIZES):
            size_label = "%s to %sbp" % size
            size = "%s-%s" % size
            for j, metric in enumerate(metrics):
                ax = axs[i][j]
                ax.get_xaxis().set_ticks([])
                ax.spines['bottom'].set_visible(False)
                ax.spines['left'].set_visible(False)
                ax.set_xlim(0, 100.0)
                if i == 0:
                    ax.set_title(metric, size=12, y=1.2)
                vals, labels = _get_plot_val_labels(df, size, metric, callers)
                ppl.barh(ax, np.arange(len(vals)), vals, yticklabels=callers)
                if j == 0:
                    ax.tick_params(axis='y', which='major', labelsize=8)
                    ax.text(80, 4.2, size_label, fontsize=10)
                else:
                    ax.get_yaxis().set_ticks([])
                for ai, (val, label) in enumerate(zip(vals, labels)):
                    ax.annotate(label, (val + 1, ai + 0.35),
                                va='center',
                                size=7)
        fig.set_size_inches(7, 6)
        fig.savefig(out_file)
    return out_file
Exemple #8
0
def ops():
    for w in workloads:
        data = load_data("physops", [w])[w]

        sns.set_context("paper",
                        font_scale=font_scale,
                        rc={"lines.linewidth": 2.5})
        # sns.set_style("whitegrid")

        fig, ax = plt.subplots(1, figsize=(8, 4))

        data.sort(order='count')

        c = data['count'].astype(float)
        c /= sum(c)
        c *= 100
        ypos = np.arange(len(data['phys_operator']))
        ppl.barh(ax,
                 ypos,
                 c,
                 yticklabels=data['phys_operator'],
                 grid='x',
                 annotate=True,
                 color=colors[w])

        plt.title("Operator Frequency in {}".format(labels[w]))
        #ax.set_ylabel('Physical operator')
        ax.set_xlabel('\% of queries')

        ax.xaxis.grid(False)
        ax.yaxis.grid(False)

        #plt.subplots_adjust(bottom=.2, left=.3, right=.99, top=.9, hspace=.35)

        ax.title.set_position((ax.title._x, 1.04))
        fig.tight_layout(rect=[0.03, 0, 1, 1])
        fig.text(0.02, 0.55, 'Physical operator', rotation=90, va='center')

        plt.savefig(root_path + 'plot_ops_%s.eps' % w, format='eps')
Exemple #9
0
def _plot_evaluation(df_csv):
    """Provide plot of evaluation metrics, stratified by event size.
    """
    if ppl is None:
        return None
    out_file = "%s.pdf" % os.path.splitext(df_csv)[0]
    if not utils.file_uptodate(out_file, df_csv):
        metrics = ["sensitivity", "precision"]
        df = pd.read_csv(df_csv).fillna("0%")
        fig, axs = ppl.subplots(len(_EVENT_SIZES), len(metrics))
        callers = sorted(df["caller"].unique())
        if "ensemble" in callers:
            callers.remove("ensemble")
            callers.append("ensemble")
        for i, size in enumerate(_EVENT_SIZES):
            size_label = "%s to %sbp" % size
            size = "%s-%s" % size
            for j, metric in enumerate(metrics):
                ax = axs[i][j]
                ax.get_xaxis().set_ticks([])
                ax.spines['bottom'].set_visible(False)
                ax.spines['left'].set_visible(False)
                ax.set_xlim(0, 100.0)
                if i == 0:
                    ax.set_title(metric, size=12, y=1.2)
                vals, labels = _get_plot_val_labels(df, size, metric, callers)
                ppl.barh(ax, np.arange(len(vals)), vals, yticklabels=callers)
                if j == 0:
                    ax.tick_params(axis='y', which='major', labelsize=8)
                    ax.text(80, 4.2, size_label, fontsize=10)
                else:
                    ax.get_yaxis().set_ticks([])
                for ai, (val, label) in enumerate(zip(vals, labels)):
                    ax.annotate(label, (val + 1, ai + 0.35), va='center', size=7)
        fig.set_size_inches(7, 6)
        fig.savefig(out_file)
    return out_file
Exemple #10
0
def main(argv):

    # get options passed at command line
    try:
        opts, args = getopt.getopt(argv, "p:o:t:f:l:x:")
    except getopt.GetoptError:
        #print helpString
        sys.exit(2)


    #print opts

    for opt, arg in opts:
        if opt == '-p':
            params = arg
        elif opt == '-t':
            title = arg
        elif opt == '-o':
            outfile_name = arg
        elif opt == '-f':
            file_names = arg
        elif opt == '-l':
            labels = arg
        elif opt == '-x':
            x_vals = arg



    file_names = file_names.split(",")
    #print file_names
    labels = labels.split(",")
    #print x_vals
    x_vals = x_vals.split(",")
    #print x_vals
    x_vals = [int(x) for x in x_vals]

    #colors = sns.color_palette("husl", 20)

    colour_dict = {'DiamondMegan_filtered':(190,190,190),
             'CLARK':(178,223,138),
             'CLARK-S':(51,160,44),
             'DiamondEnsemble':(227,26,28),
             'BlastEnsemble':(251,154,153),
             'Kraken':(255,127,0), 
             'Kraken_filtered':(253,191,111),
             'PhyloSift':(102,205,0),
             'PhyloSift_filtered':(127,255,0),
             'MetaPhlAn':(148,0,211),
             'LMAT':(176,48,96),
             'GOTTCHA':(106,61,154),
             'MetaFlow':(177,89,40),
             'Community':(0,0,0),
             'DiamondMegan_filtered+Kraken_filtered':(202,178,214),
             'CLARK+GOTTCHA':(255,233,0),
             'BlastMegan_filtered+LMAT':(33,160,160),
             'BlastMegan_filtered':(166,206,227),
             'BlastMegan_filtered_liberal':(31,120,180),
             'NBC':(255,0,255)}
    for tool in colour_dict:
        colour_dict[tool] = tuple(map(lambda x: x/255., colour_dict[tool]))

    #colors_255=[(102,205,0),(176,48,96),(51,160,44),(255,127,0),(178,223,138),(253,191,111),(255,0,255),(127,255,0),(227,26,28),(31,120,180),(251,154,153),(106,61,154),(148,0,211),(202,178,214),(177,89,40),(166,206,227),(190,190,190)]

    #colors_1 = []
    #for (r,g,b) in colors_255:
    #    colors_1.append((float(r)/255.0, float(g)/255.0, float(b)/255.0))

    #print colors_1

    #colors = colors_1

    figure = plt.figure(figsize=(9, 5), dpi=300)    
    font = {'family' : 'Arial',
        'weight' : 'normal',
        'size'   : 8}

    rc('font', **font)  # pass in the font dict as kwargs


    ##### line plot ######

    lineplt = figure.add_subplot(121)

    data = []
    colors = []
    for file_name,label in zip(file_names,labels):
        colors.append(colour_dict[label])
        data_points = list(np.genfromtxt(file_name, usecols=0))
        print label,data_points
        data.append(data_points)
    data_max = []
    for data_points in data:
        data_max.append(int(max(data_points)))

    data_percent = []
    for i, data_points in enumerate(data):
        percents = []
        for val in data_points:
            per = float(val) / data_max[i] * 100
            percents.append(per)
        data_percent.append(percents)

    print data_max

    print labels

    for i, data_percents in enumerate(data_percent):
        ppl.plot(x_vals[0:len(data_percents)], data_percents, '-', color=colors[i])

        

    xticks(x_vals, rotation=60)
    lineplt.set_xlabel("Input dataset size (Million Reads)")
    yticks(np.arange(0,105,10))
    ylim(top=110)
    lineplt.set_ylabel("Percent max species recovered")
    #plt.show()


    barplt = figure.add_subplot(122)


    labels, data_max, colors = [list(x) for x in zip(*sorted(zip(labels, data_max, colors), key=lambda pair: pair[1]))]

    ppl.barh(np.arange(0,len(labels)), data_max, log=1, color=colors)
    yticks(np.arange(0,len(labels)), labels)
    barplt.yaxis.label.set_verticalalignment('top')
    barplt.set_xlabel("Maximum number of species predicted")

    #plt.legend(labels, bbox_to_anchor=(0.98,0.68))
    plt.tight_layout()

    plt.savefig(outfile_name, format='pdf')
Exemple #11
0
def test_barh_xticklabels():
    np.random.seed(14)
    n = 10
    ppl.barh(np.arange(n), np.abs(np.random.randn(n)),
            yticklabels=UPPERCASE_CHARS[:n])
Exemple #12
0
def test_barh_annotate_user():
    np.random.seed(14)
    ppl.barh(np.arange(10), np.abs(np.random.randn(10)),
            annotate=range(10,20))
Exemple #13
0
def test_barh_annotate():
    np.random.seed(14)
    ppl.barh(np.arange(10), np.abs(np.random.randn(10)), annotate=True)
Exemple #14
0
def test_barh_grid():
    np.random.seed(14)
    ppl.barh(np.arange(10), np.abs(np.random.randn(10)), grid='x')
Exemple #15
0
def test_barh():
    np.random.seed(14)
    ppl.barh(np.arange(10), np.abs(np.random.randn(10)))
Exemple #16
0
def queries_per_table():
    sns.set_context("paper",
                    font_scale=font_scale,
                    rc={"lines.linewidth": 2.5})

    with open('../results/sqlshare/queries_per_table.csv') as f:
        data = np.recfromcsv(f)
    num_queries = data['num_queries'].astype(float)
    tables = data['table']
    p = re.compile(ur'^.*[A-F0-9]{5}$')

    logical_tables = []
    for t in tables:
        short_name = re.findall(p, t)
        if len(short_name) == 0:
            if t not in logical_tables:
                logical_tables.append(t)
        else:
            if short_name[0][0:-5] not in logical_tables:
                logical_tables.append(short_name[0][0:-5])
    num_queries_lt = []
    for lt in logical_tables:
        max_num_queries = 0
        for i, t in enumerate(tables):
            if lt in t:
                if num_queries[i] > max_num_queries:
                    max_num_queries = num_queries[i]
        num_queries_lt.append(max_num_queries)

    c = [0, 0, 0, 0, 0]
    for num in num_queries_lt:
        if num == 1.0:
            c[0] += 1
        elif num == 2.0:
            c[1] += 1
        elif num == 3.0:
            c[2] += 1
        elif num == 4.0:
            c[3] += 1
        else:
            c[4] += 1

    fig, ax = plt.subplots(1, figsize=(8, 4))

    ypos = np.arange(len(c))
    ppl.barh(ax,
             ypos,
             c,
             yticklabels=['1', '2', '3', '4', '$>=5$'],
             grid='x',
             annotate=True,
             color=g)

    plt.title("Number of queries per table")
    ax.set_ylabel('Number of queries')
    ax.set_xlabel('Number of tables')

    ax.xaxis.grid(False)
    ax.yaxis.grid(False)
    plt.tight_layout()

    plt.savefig(root_path + 'plot_queries_per_table.eps', format='eps')