Esempio n. 1
0
def make_output(tss_cov, out_prefix, upstream, downstream):
    # dump raw counts to file
    raw_out = open('%s_raw.txt' % out_prefix,'w')
    for i in range(-upstream,downstream+1):
        print >> raw_out, '%d\t%e' % (i, tss_cov[upstream+i])
    raw_out.close()

    # make plot data structures
    tss_i = ro.IntVector(range(-upstream,downstream+1))
    cov = ro.FloatVector(tss_cov)
    df = ro.DataFrame({'tss_i':tss_i, 'cov':cov})

    # construct full plot
    gp = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='tss_i', y='cov') + \
        ggplot2.geom_point() + \
        ggplot2.scale_x_continuous('TSS index') + \
        ggplot2.scale_y_continuous('Coverage')

    # plot to file
    grdevices.pdf(file='%s_full.pdf' % out_prefix)
    gp.plot()
    grdevices.dev_off()

    # construct zoomed plot
    gp = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='tss_i', y='cov') + \
        ggplot2.geom_point() + \
        ggplot2.scale_x_continuous('TSS index',limits=ro.IntVector([-1000,1000])) + \
        ggplot2.scale_y_continuous('Coverage')

    # plot to file
    grdevices.pdf(file='%s_zoom.pdf' % out_prefix)
    gp.plot()
    grdevices.dev_off()
Esempio n. 2
0
    def test_aes(self):
        gp = ggplot2.ggplot(mtcars)
        gp += ggplot2.aes(x='wt', y='mpg')
        gp += ggplot2.geom_point()
        assert isinstance(gp, ggplot2.GGPlot)

        gp = ggplot2.ggplot(mtcars)
        gp += ggplot2.aes('wt', 'mpg')
        gp += ggplot2.geom_point()
        assert isinstance(gp, ggplot2.GGPlot)
Esempio n. 3
0
def make_output_and(cov, control_cov, out_prefix, window):
    # dump raw counts to file
    raw_out = open('%s_raw.txt' % out_prefix,'w')
    for i in range(-window/2,window/2+1):
        print >> raw_out, '%d\t%e\t%e' % (i, cov[window/2+i], control_cov[window/2+i])
    raw_out.close()

    # make plot data structures
    splice_i = ro.IntVector(2*range(-window/2,window/2+1))
    cov_r = ro.FloatVector(cov+control_cov)
    labels = ro.StrVector(['Main']*len(cov)+['Control']*len(control_cov))
    df = ro.DataFrame({'splice_i':splice_i, 'cov':cov_r, 'label':labels})

    # construct plot
    gp = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='splice_i', y='cov', colour='label') + \
        ggplot2.geom_point() + \
        ggplot2.scale_x_continuous('Position relative to splice site') + \
        ggplot2.scale_y_continuous('Coverage') + \
        ggplot2.scale_colour_discrete('')

    # plot to file
    grdevices.pdf(file='%s.pdf' % out_prefix)
    gp.plot()
    grdevices.dev_off()
Esempio n. 4
0
def make_output_and(cov, control_cov, out_prefix, window):
    # dump raw counts to file
    raw_out = open("%s_raw.txt" % out_prefix, "w")
    for i in range(-window / 2, window / 2 + 1):
        print >> raw_out, "%d\t%e\t%e" % (i, cov[window / 2 + i], control_cov[window / 2 + i])
    raw_out.close()

    # make plot data structures
    splice_i = ro.IntVector(2 * range(-window / 2, window / 2 + 1))
    cov_r = ro.FloatVector(cov + control_cov)
    labels = ro.StrVector(["Main"] * len(cov) + ["Control"] * len(control_cov))
    df = ro.DataFrame({"splice_i": splice_i, "cov": cov_r, "label": labels})

    # construct plot
    gp = (
        ggplot2.ggplot(df)
        + ggplot2.aes_string(x="splice_i", y="cov", colour="label")
        + ggplot2.geom_point()
        + ggplot2.scale_x_continuous("Position relative to splice site")
        + ggplot2.scale_y_continuous("Coverage")
        + ggplot2.scale_colour_discrete("")
    )

    # plot to file
    grdevices.pdf(file="%s.pdf" % out_prefix)
    gp.plot()
    grdevices.dev_off()
Esempio n. 5
0
def make_output(cov, out_prefix, window):
    # dump raw counts to file
    raw_out = open("%s_raw.txt" % out_prefix, "w")
    for i in range(-window / 2, window / 2 + 1):
        print >> raw_out, "%d\t%e" % (i, cov[window / 2 + i])
    raw_out.close()

    # make plot data structures
    splice_i = ro.IntVector(range(-window / 2, window / 2 + 1))
    cov = ro.FloatVector(cov)
    df = ro.DataFrame({"splice_i": splice_i, "cov": cov})

    # construct plot
    gp = (
        ggplot2.ggplot(df)
        + ggplot2.aes_string(x="splice_i", y="cov")
        + ggplot2.geom_point()
        + ggplot2.scale_x_continuous("Position relative to splice site")
        + ggplot2.scale_y_continuous("Coverage")
    )

    # plot to file
    grdevices.pdf(file="%s.pdf" % out_prefix)
    gp.plot()
    grdevices.dev_off()
Esempio n. 6
0
def render_plot(gp, args):
  """Render a plot using ggplot

  :gp: A base ggplot2 object
  :x: The x value expression
  :y: The y value expression
  :type: The type of plot to make

  """
  args = util.Namespace(args)

  import rpy2.robjects.lib.ggplot2 as ggplot2

  pp = gp + ggplot2.aes_string(x=args.x,
                               y=args.y)

  if args.type == 'points':
    pp += ggplot2.geom_point()
  elif args.type == 'lines':
    pp += ggplot2.geom_line()
  elif args.type == 'boxplot':
    pp += ggplot2.geom_boxplot()
  else:
    raise Exception("{0} not implemented".format(args.type))

  if args.facets is not None:
    try:
      pp += ggplot2.facet_grid(ro.Formula(args.facets))
    except Exception:
      pass

  try:
    pp.plot()
  except Exception:
    pass
 def line_plot(self, data, title, ylabel, img_file,
               x='date', y='size', c='type', clabel=''):
     if PLOTLIB == 'ggplot':
         # date_label = "%Y\n%b"
         date_label = "%Y\n%W"  # year + week number
         p = ggplot(data,
                    aes(x=x, y=y, color=c)) \
             + ggtitle(title) \
             + ylab(ylabel) \
             + xlab(' ') \
             + scale_x_date(breaks=date_breaks('3 months'),
                            labels=date_label) \
             + geom_line() + geom_point()
     elif PLOTLIB == 'rpy2.ggplot2':
         # convert y axis to float because R uses 32-bit signed integers,
         # values > 2 bln. (2^31) will overflow
         data[y] = data[y].astype(float)
         p = ggplot2.ggplot(data) \
             + ggplot2.aes_string(x=x, y=y, color=c) \
             + ggplot2.geom_line() + ggplot2.geom_point() \
             + GGPLOT2_THEME \
             + ggplot2.labs(title=title, x='', y=ylabel, color=clabel)
     img_path = os.path.join(PLOTDIR, img_file)
     p.save(img_path)
     # data.to_csv(img_path + '.csv')
     return p
Esempio n. 8
0
def make_output_and(te_tss_cov, control_te_tss_cov, out_prefix, upstream,
                    downstream):
    # clean raw counts dir
    if os.path.isdir('%s_raw' % out_prefix):
        shutil.rmtree('%s_raw' % out_prefix)
    os.mkdir('%s_raw' % out_prefix)

    # dump raw counts to file
    for te in te_tss_cov:
        if te[0] in [
                'n', '*', 'HERVH-int', 'L2a', 'AluSx', 'AluJb', 'MIRb', 'LTR7'
        ] and te[1] in [
                'n', '*', 'LINE/L1', 'SINE/Alu', 'LTR/ERV1', 'LTR/ERVL-MaLR',
                'LINE/L2', 'LTR/ERVL', 'SINE/MIR', 'DNA/hAT-Charlie',
                'LTR/ERVK', 'DNA/TcMar-Tigger'
        ]:
            raw_out = open(
                '%s_raw/%s_%s.txt' %
                (out_prefix, te[0].replace('/', '_'), te[1].replace('/', '_')),
                'w')
            for i in range(-upstream, downstream + 1):
                print >> raw_out, '%d\t%e\t%e' % (i, te_tss_cov[te][
                    upstream + i], control_te_tss_cov[te][upstream + i])
            raw_out.close()

    # clean plot dirs
    if os.path.isdir('%s_plot' % out_prefix):
        shutil.rmtree('%s_plot' % out_prefix)
    os.mkdir('%s_plot' % out_prefix)

    # make data structures
    tss_i = ro.IntVector(2 * range(-upstream, downstream + 1))
    labels = ro.StrVector(['Main'] * (upstream + downstream + 1) +
                          ['Control'] * (upstream + downstream + 1))
    for te in te_tss_cov:
        if te[0] in [
                'n', '*', 'HERVH-int', 'L2a', 'AluSx', 'AluJb', 'MIRb', 'LTR7'
        ] and te[1] in [
                'n', '*', 'LINE/L1', 'SINE/Alu', 'LTR/ERV1', 'LTR/ERVL-MaLR',
                'LINE/L2', 'LTR/ERVL', 'SINE/MIR', 'DNA/hAT-Charlie',
                'LTR/ERVK', 'DNA/TcMar-Tigger'
        ]:
            cov = ro.FloatVector(te_tss_cov[te] + control_te_tss_cov[te])
            df = ro.DataFrame({'tss_i': tss_i, 'cov': cov, 'label': labels})

            # construct full plot
            gp = ggplot2.ggplot(df) + \
                ggplot2.aes_string(x='tss_i', y='cov', colour='label') + \
                ggplot2.geom_point() + \
                ggplot2.scale_x_continuous('TSS index') + \
                ggplot2.scale_y_continuous('Coverage') + \
                ggplot2.scale_colour_discrete('')

            # plot to file
            grdevices.pdf(
                file='%s_plot/%s_%s.pdf' %
                (out_prefix, te[0].replace('/', '_'), te[1].replace('/', '_')))
            gp.plot()
            grdevices.dev_off()
Esempio n. 9
0
def line_plot(pdf_file,
              data,
              x,
              y,
              var,
              null_label="N/A",
              linetype=None,
              title=None,
              xlab=None,
              ylab=None,
              colorname=None,
              linename=None,
              **extra_aes_params):

    pdf(pdf_file, width=11.7, height=8.3, paper="a4r")
    if any(data[x].isnull()):
        labels = [null_label] + map(str, sorted(set(
            data[data[x].notnull()][x])))
        labels = robjects.StrVector(labels)
        nulls = data[x].isnull()
        label_vals = dict(zip(labels, range(len(labels))))
        data[x] = data[x].astype("str")
        data[x][nulls] = null_label
        data['sortcol'] = data[x].map(label_vals.__getitem__)
        data.sort('sortcol', inplace=True)
    else:
        labels = None

    if linetype and linetype != var:
        data['group'] = data[var].map(str) + data[linetype].map(str)
    else:
        data['group'] = data[var]

    rdata = common.convert_to_r_dataframe(data)
    if labels:
        ix = rdata.names.index(x)
        rdata[ix] = ordered(rdata[ix], levels=labels)

    gp = gg2.ggplot(rdata)
    pp = (
        gp + gg2.geom_point(size=3) +
        gg2.scale_colour_hue(name=(colorname or var)) +
        #gg2.scale_colour_continuous(low="black") +
        gg2.aes_string(x=x, y=y, color=var, variable=var) +
        ggtitle(title or "") + xlabel(xlab or x) + ylabel(ylab or y)  #+
        #gg2.scale_y_continuous(breaks=seq(0.0, 1.0, 0.05))
    )

    # line type stuff
    if linetype:
        pp += gg2.geom_path(gg2.aes_string(group='group', linetype=linetype),
                            size=0.5)
        pp += gg2.scale_linetype(name=(linename or linetype))
    else:
        pp += gg2.geom_path(gg2.aes_string(group='group'), size=0.5)

    pp.plot()
    dev_off()
Esempio n. 10
0
def plot_total_bp(parser, args, tot_bp_per_pore):
    """
	Plot the pore performance
	"""
    import math

    r = robjects.r
    r.library("ggplot2")
    grdevices = importr("grDevices")

    flowcell_layout = minion_flowcell_layout()

    pore_values = []
    for pore in flowcell_layout:
        if pore in tot_bp_per_pore:
            pore_values.append(math.log10(tot_bp_per_pore[pore]))
        else:
            pore_values.append(0)

            # make a data frame of the lists
    d = {
        "rownum": robjects.IntVector(range(1, 17) * 32),
        "colnum": robjects.IntVector(sorted(range(1, 33) * 16)),
        "log10_tot_bp": robjects.IntVector(pore_values),
        "labels": robjects.IntVector(flowcell_layout),
    }

    df = robjects.DataFrame(d)
    gp = gg.ggplot(df)
    pp = (
        gp
        + gg.aes_string(y="factor(rownum, rev(rownum))", x="factor(colnum)")
        + gg.geom_point(gg.aes_string(color="log10_tot_bp"), size=7)
        + gg.geom_text(gg.aes_string(label="labels"), colour="white", size=2)
        + gg.scale_colour_gradient2(low="black", mid="black", high="red")
        + gg.coord_fixed(ratio=1.4)
        + gg.labs(x=gg.NULL, y=gg.NULL)
    )

    if args.saveas is not None:
        plot_file = args.saveas
        if plot_file.endswith(".pdf"):
            grdevices.pdf(plot_file, width=11, height=8.5)
        elif plot_file.endswith(".png"):
            grdevices.png(plot_file, width=11, height=8.5, units="in", res=300)
        else:
            logger.error("Unrecognized extension for %s!" % (plot_file))
            sys.exit()

        pp.plot()
        grdevices.dev_off()
    else:
        pp.plot()
        # keep the plot open until user hits enter
        print ("Type enter to exit.")
        raw_input()
Esempio n. 11
0
def plot_start(x, y):
    import rpy2.robjects.lib.ggplot2 as ggplot2
    ##由于这一条import会有警告信息,放到这里,只有调用这个函数才会出现警告。
    utils = importr('utils')
    data = utils.read_csv(glob('*.csv')[0])
    plot = ggplot2.ggplot(data)
    plot = (plot + ggplot2.aes_string(x=x, y=y) + ggplot2.geom_point() +
            ggplot2.scale_colour_gradient(low="yellow", high="red") +
            ggplot2.labs(title="mtcars", x='wt', y='mpg'))
    plot.save('point.png')
Esempio n. 12
0
    def plot_ROC(self, path):
        robjects.r["pdf"](path, width=14, height=8)

        df = self.df
        # print(df)
        gp = ggplot2.ggplot(convert_to_r_dataframe(df, strings_as_factors=True))
        gp += ggplot2.aes_string(x="fpr", y="tpr")
        gp += ggplot2.geom_line(color="blue")
        gp += ggplot2.geom_point(size=2)
        gp.plot()
Esempio n. 13
0
def rank_abundance_plot(counter, name):
    grdevices.png('analytics_out/{0}_rank_abundance.png'.format(name))
    ranks, fracs = rank_abundance_data(counter)
    df = robjects.DataFrame({'rank': ranks, 'f': fracs})
    pp = ggplot.ggplot(df) + \
        ggplot.aes_string(x = 'rank', y = 'f') + \
        ggplot.geom_point() + \
        ggplot.scale_y_log10(name = 'fraction of hits')
    pp.plot()
    grdevices.dev_off()
Esempio n. 14
0
    def plot_ROC(self, path):
        robjects.r['pdf'](path, width=14, height=8)

        df = self.df
        print(df)
        gp = ggplot2.ggplot(convert_to_r_dataframe(df, strings_as_factors=True))
        gp += ggplot2.aes_string(x='fpr', y='tpr')
        gp += ggplot2.geom_line(color='blue')
        gp += ggplot2.geom_point(size=2)
        gp.plot()
Esempio n. 15
0
def plot_total_bp(parser, args, tot_bp_per_pore):
    """
    Plot the pore performance
    """
    import math
    r = robjects.r
    r.library("ggplot2")
    grdevices = importr('grDevices')

    flowcell_layout = minion_flowcell_layout()

    pore_values = []
    for pore in flowcell_layout:
        if pore in tot_bp_per_pore:
            pore_values.append(math.log10(tot_bp_per_pore[pore]))
        else:
            pore_values.append(0)

    # make a data frame of the lists
    d = {'rownum': robjects.IntVector(range(1,17)*32),
         'colnum': robjects.IntVector(sorted(range(1,33)*16)),
         'log10_tot_bp': robjects.IntVector(pore_values),
         'labels': robjects.IntVector(flowcell_layout)
         }

    df = robjects.DataFrame(d)
    gp = gg.ggplot(df)
    pp = gp + gg.aes_string(y = 'factor(rownum, rev(rownum))', \
                     x = 'factor(colnum)') \
        + gg.geom_point(gg.aes_string(color='log10_tot_bp'), size = 7) \
        + gg.geom_text(gg.aes_string(label ='labels'), colour="white", size = 2) \
        + gg.scale_colour_gradient2(low = "black", mid= "black", high="red") \
        + gg.coord_fixed(ratio=1.4) \
        + gg.labs(x=gg.NULL, y=gg.NULL)

    if args.saveas is not None:
        plot_file = args.saveas
        if plot_file.endswith(".pdf"):
            grdevices.pdf(plot_file, width = 11, height = 8.5)
        elif plot_file.endswith(".png"):
            grdevices.png(plot_file, width = 11, height = 8.5,
                units = "in", res = 300)
        else:
            logger.error("Unrecognized extension for %s!" % (plot_file))
            sys.exit()

        pp.plot()
        grdevices.dev_off()
    else:
        pp.plot()
        # keep the plot open until user hits enter
        print('Type enter to exit.')
        raw_input()
Esempio n. 16
0
    def plot_all_errors(self, path):
        # print self.error_matrix[0]

        robjects.r["pdf"](path, width=14, height=8)

        df = pandas.melt(self.df, id_vars="iteration")
        gp = ggplot2.ggplot(convert_to_r_dataframe(df, strings_as_factors=True))
        x_col = "iteration"
        gp += ggplot2.aes_string(x=x_col, y="value", color="variable")
        gp += ggplot2.geom_point(size=2)
        gp += ggplot2.geom_line()
        gp.plot()
def plotStats(data,
              outFolder,
              tiles,
              prop="qual",
              prefix="",
              high="yellow",
              low="blue",
              pdf=False,
              detail=True):
    #overview plot
    p = ggplot.ggplot(data)
    p = p + ggplot.aes_string(x="x", y="y", col=prop) \
        + ggplot.geom_point(size=0.1) \
        + ggplot.facet_wrap(robjects.Formula("~ tile")) \
        + ggplot.scale_colour_gradient(high=high, low=low) \
        + ggplot.ggtitle("Overview %s" % (prop))
    if prefix:
        fileName = "%s_overview_%s.png" % (prefix, prop)
    else:
        fileName = "overview_%s.png" % (prop)
    p.save(os.path.join(outFolder, fileName), scale=2)

    #detail plots
    if detail:
        detailFolder = os.path.join(outFolder, "detailPlots")
        for t in tiles:
            p = ggplot.ggplot(data.rx(data.rx2("tile").ro == t, True))
            p = p + ggplot.aes_string(x="x", y="y", col=prop) \
                + ggplot.geom_point(size=1) \
                + ggplot.facet_wrap(robjects.Formula("~ tile")) \
                + ggplot.scale_colour_gradient(high=high, low=low) \
                + ggplot.ggtitle("%i %s" % (t, prop))
            if prefix:
                fileName = "%s_%i_%s.png" % (prefix, t, prop)
            else:
                fileName = "%i_%s.png" % (t, prop)
            p.save(os.path.join(detailFolder, fileName), scale=2)
            if pdf:
                fileName = "%s%i_%s.pdf" % (prefix, t, prop)
                p.save(os.path.join(detailFolder, fileName), scale=2)
Esempio n. 18
0
def single_locus_net_informativeness(locus_table, net_pi_table, locus):
    qry = '''"SELECT {0}.locus, time, pi FROM {0}, {1} 
    WHERE {0}.id = {1}.id AND locus = '{2}'"'''.format(locus_table,
            net_pi_table, locus)
    frame = robjects.r('''dbGetQuery(con, {})'''.format(qry))
    gg_frame = ggplot2.ggplot(frame)
    plot = gg_frame + ggplot2.aes_string(x = 'time', y='pi') + \
            ggplot2.geom_point(size = 3, alpha = 0.4) + \
            ggplot2.scale_x_reverse('years ago') + \
            ggplot2.scale_y_continuous('phylogenetic informativeness') + \
            ggplot2.opts(title = locus)

    return plot
 def create_plot(filename, data, performance_object):
     grdevices.png(file=filename)
     (ggplot2.ggplot(data) + ggplot2.aes_string(
         x="dimension", y="mean.%s" % performance_object) +
      ggplot2.geom_point(ggplot2.aes_string(colour="signature")) +
      ggplot2.geom_errorbar(
          ggplot2.aes_string(
              ymax="mean.%s+stderror.%s" %
              (performance_object, performance_object),
              ymin="mean.%s-stderror.%s" %
              (performance_object, performance_object),
          ))).plot()
     grdevices.dev_off()
Esempio n. 20
0
    def compute(x0, y0, x1, y1):
        # Selected square
        sel = 255 - average(img[y0:y1,x0:x1],2)
        # Average across x’s
        ysel = average(sel,1)
        line = ysel
        xs = mgrid[0:line.shape[0]]

        # Pass the data to R
        rxs = robjects.FloatVector(xs)
        rys = robjects.FloatVector(line)
        rdf = robjects.DataFrame({'x': rxs, 'y': rys})
        robjects.globalenv['xs'] = rxs
        robjects.globalenv['df'] = rdf
        #print(rys.r_repr())

        # Fir an R model
        robjects.r('''fit <- nls(y ~ (off + c1 * exp(-(x-mu1)**2/(2*sg1**2))
                                    + c2 * exp(-(x-mu2)**2/(2*sg2**2))),
                               data=df,
                               start=list(off = 90, c1=120, mu1=30, sg1=10,
                                   c2=120, mu2=60, sg2=10),
                               algorithm='port')''')

        # Get fit results
        robjects.r('''k <- coef(fit)
                      fitdat <- data.frame(x=xs)
                      fitdat$y <- predict(fit, newdata=fitdat)
                      ## Independent Gaussians
                      fitg1 <- data.frame(x=xs)
                      fitg1$y <- k[['off']] + k[['c1']] * exp(-(xs-k[['mu1']])**2/(2*k[['sg1']]**2))
                      fitg2 <- data.frame(x=xs)
                      fitg2$y <- k[['off']] + k[['c2']] * exp(-(xs-k[['mu2']])**2/(2*k[['sg2']]**2))''')
        
        # Plot R fits
        fitdat = robjects.globalenv['fitdat']
        fitg1 = robjects.globalenv['fitg1']
        fitg2 = robjects.globalenv['fitg2']
        pp = ggplot2.ggplot(rdf) \
             + ggplot2.aes_string(x='x', y='y') \
             + ggplot2.geom_point() \
             + ggplot2.geom_smooth(data=fitdat, stat="identity", size=1.5) \
             + ggplot2.geom_smooth(data=fitg1, stat="identity") \
             + ggplot2.geom_smooth(data=fitg2, stat="identity")
        pp.plot()

        # Compute the ratio of the gaussian integrals
        ratio = robjects.r('''k[['c1']]*k[['sg1']]/(k[['c1']]*k[['sg1']]+k[['c2']]*k[['sg2']])''')
        
        title('ratio='+str(ratio))
        show()
Esempio n. 21
0
def direct_taxon_abundance_box_plot(data, plot_file_path, title, xlabel, ylabel):
    grdevices.pdf(file=plot_file_path)

    gp = ggplot2.ggplot(data)
    pp = gp \
        + ggplot2.aes_string(x='genotype', y='abundance') \
        + ggplot2.geom_boxplot() \
        + ggplot2.ggtitle(title) \
        + ggplot2.labs(x=xlabel, y=ylabel) \
        + ggplot2.geom_jitter(position=ggplot2.position_jitter(w=0.1)) \
        + ggplot2.geom_point()

    pp.plot()

    grdevices.dev_off()
Esempio n. 22
0
def generate_step3_5_lrr_acc20_line_chart(subgroups_to_lrrs_acc20mean,
                                          prefix=''):
    pandas2ri.activate()
    subgroups_to_lrr_count = {}
    columns_to_data = {'subgroup': [], 'pos': [], 'acc20': []}
    for subgroup, (acc20means,
                   acc20_count) in subgroups_to_lrrs_acc20mean.items():
        subgroups_to_lrr_count[subgroup] = acc20_count
        for index, acc20mean in enumerate(acc20means):
            columns_to_data['subgroup'].append(subgroup)
            columns_to_data['pos'].append(index + 1)
            columns_to_data['acc20'].append(acc20mean)

    # Write the count of LRRs for each subgroup to file
    with open(os.path.join(OUTPUT_PATH, prefix + "step3_5_lrr_count.txt"),
              'w') as f:
        for subgroup, lrr_count in subgroups_to_lrr_count.items():
            f.write(str.format("{}: {}\n", subgroup, lrr_count))

    # Generate the line chart file
    r_columns_to_data = {
        'subgroup': ro.StrVector(columns_to_data['subgroup']),
        'pos': ro.IntVector(columns_to_data['pos']),
        'acc20': ro.FloatVector(columns_to_data['acc20'])
    }
    df = ro.DataFrame(r_columns_to_data)

    line_chart_file_path = os.path.join(OUTPUT_PATH,
                                        prefix + "step3_5_lrr_acc20_line.png")
    logging.debug(
        str.format("The Data Frame for file {}: \n{}", line_chart_file_path,
                   df))
    grdevices.png(file=line_chart_file_path, width=1024, height=512)
    gp = ggplot2.ggplot(df)
    pp = gp + \
         ggplot2.theme_bw() + \
         ggplot2.theme_classic() + \
         ggplot2.theme(**{'axis.text.x': ggplot2.element_text(size=35)}) + \
         ggplot2.theme(**{'axis.text.y': ggplot2.element_text(size=35)}) + \
         ggplot2.aes_string(x='pos', y='acc20', group='subgroup', colour='subgroup') + \
         ggplot2.geom_point(size=4, shape=20) + \
         ggplot2.geom_line(size=3) + \
         ggplot2.theme(**{'legend.title': ggplot2.element_blank()}) + \
         ggplot2.theme(**{'legend.text': ggplot2.element_text(size=20)}) + \
         ggplot2.scale_x_continuous(breaks=ro.IntVector(range(1, 25)), labels=ro.StrVector(list('LxxLxLxxNxLsGxIPxxLxxLxx')))
    pp.plot()
    logging.info(str.format("Output step3 file {}", line_chart_file_path))
    grdevices.dev_off()
Esempio n. 23
0
def plot_cels(expr, expt_names, expt_name_idx, cel_names, outdir = None):
    """Makes correlation plots between CEL files for the same cell type"""

    fsize = 10
    names_1 = []
    names_2 = []
    cors = []
    titles = []
    
    for ex_idx, ex in enumerate(expt_names):
        # Indices of CEL files (columns of expr) corresponding to that cell type
        tmp_idx = expt_name_idx[ex]
        plot_idx = 0
        
        for i in range(len(tmp_idx)):
            name1 = re.sub('_', '.', cel_names[tmp_idx[i]])
            for j in range(i + 1, len(tmp_idx)):
                name2 = re.sub('_', '.', cel_names[tmp_idx[j]])
                plot_idx += 1
                cor = np.corrcoef(expr[:, tmp_idx[i]], expr[:, tmp_idx[j]])[0, 1]
                names_1.append(name1)
                names_2.append(name2)
                cors.append(cor)
                titles.append(ex + '-' + str(plot_idx))
                
                df = ro.DataFrame({'x':ro.FloatVector(expr[:, tmp_idx[i]]), 
                                   'y':ro.FloatVector(expr[:, tmp_idx[j]])})
                gp = ggplot2.ggplot(df) + ggplot2.aes_string(x = 'x', y = 'y') + \
                ggplot2.geom_point(size = 1) + \
                ggplot2.scale_x_continuous(name1) + ggplot2.scale_y_continuous(name2) + \
                ggplot2.theme_bw() + ggplot2.ggtitle('{:s}-{:d} ({:.4f})'.format(ex, plot_idx, cor)) + \
                ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize),
                                 'axis.title.x':ggplot2.element_text(size = 8),
                                 'axis.text.y':ggplot2.element_text(size = fsize),
                                 'axis.title.y':ggplot2.element_text(size = 8, angle = 90),
                                 'plot.title':ggplot2.element_text(size = fsize)})
                
                if outdir is None:
                    gp.plot()
                else:
                    if not os.path.isdir(outdir):
                        os.makedirs(outdir)
                    outfile = os.path.join(outdir, ex + '-' + str(plot_idx) + '.png')
                    ro.r.ggsave(filename = outfile, plot = gp, width = 85, height = 85, unit = 'mm')
    df = pd.DataFrame({'name1':names_1, 'name2':names_2, 'cor':cors}, index = titles)
    if not outdir is None:
        df.to_csv(os.path.join(outdir, 'cor_summary.txt'), sep = '\t')
    return df
Esempio n. 24
0
def multiple_locus_net_informativeness_scatterplot(locus_table, net_pi_table,
        loci):
    if loci[0].lower() != 'all':
        qry = '''"SELECT {0}.locus, time, pi FROM {0}, {1} 
            WHERE {0}.id = {1}.id and locus in {2}"'''.format(locus_table,
            net_pi_table, tuple(loci))
    else:
        qry = '''"SELECT {0}.locus, time, pi FROM {0}, {1} 
            WHERE {0}.id = {1}.id"'''.format(locus_table,
            net_pi_table)
    frame = robjects.r('''dbGetQuery(con, {})'''.format(qry))
    gg_frame = ggplot2.ggplot(frame)
    plot = gg_frame + ggplot2.aes_string(x = 'time', y = 'pi') + \
            ggplot2.geom_point(ggplot2.aes_string(colour = 'locus'), \
            size = 3, alpha = 0.4) + ggplot2.scale_x_reverse('years ago') + \
            ggplot2.scale_y_continuous('phylogenetic informativeness')
    return plot
Esempio n. 25
0
	def scatter(self, dataframe, filename, parm1, parm2, units1, units2, group,logx,logy):
		grdevices.png(file=filename, width=512, height=512)
		data = ggplot2.ggplot(dataframe)
		aes = ggplot2.aes_string(x=parm1, y=parm2,colour=group)
		geom = ggplot2.geom_point(alpha = 0.7)
		labs = ggplot2.labs(x=parm1+ " " + units1, y=parm2 + " " + units2)
		xlogscale = ggplot2.scale_x_log10()
		ylogscale = ggplot2.scale_y_log10()
		
		if logx == True and logy == True:
			gg = data + aes + geom + labs + xlogscale + ylogscale
		elif logx == True:
			gg = data + aes + geom + labs + xlogscale 
		elif logy == True:
			gg = data + aes + geom + labs + ylogscale
		else:
			gg = data + aes + geom + labs 
			
		gg.plot()
		grdevices.dev_off()
Esempio n. 26
0
def _generate_step3_5_ss_acc20_line_chart(ts_to_acc20s, tname,
                                          line_chart_file_path):
    logging.debug(
        str.format("Begin to generate {}, data {}", line_chart_file_path,
                   ts_to_acc20s))
    ts_to_acc20mean = calc_acc20mean_by_types(ts_to_acc20s)
    columns_to_data = {tname: [], 'site': [], 'acc20': []}
    for ss, acc20means in ts_to_acc20mean.items():
        for index, acc20mean in enumerate(acc20means):
            columns_to_data[tname].append(ss)
            columns_to_data['site'].append(index - 5)
            columns_to_data['acc20'].append(acc20mean)

    # Generate the line chart file
    r_columns_to_data = {
        tname: ro.StrVector(columns_to_data[tname]),
        'site': ro.IntVector(columns_to_data['site']),
        'acc20': ro.FloatVector(columns_to_data['acc20'])
    }
    df = ro.DataFrame(r_columns_to_data)

    logging.debug(
        str.format("The Data Frame for file {}: \n{}", line_chart_file_path,
                   df))
    grdevices.png(file=line_chart_file_path, width=1024, height=512)
    gp = ggplot2.ggplot(df)
    pp = gp + \
         ggplot2.theme_bw() + \
         ggplot2.theme_classic() + \
         ggplot2.theme(**{'axis.text.x': ggplot2.element_text(size=35)}) + \
         ggplot2.theme(**{'axis.text.y': ggplot2.element_text(size=35)}) + \
         ggplot2.aes_string(x='site', y='acc20', group=tname, colour=tname) + \
         ggplot2.geom_point(size=4, shape=20) + \
         ggplot2.geom_line(size=3) + \
         ggplot2.theme(**{'legend.title': ggplot2.element_blank()}) + \
         ggplot2.theme(**{'legend.text': ggplot2.element_text(size=20)}) + \
         ggplot2.scale_x_continuous(breaks=ro.IntVector(list(range(-5, 6))),
                                    labels=ro.StrVector(['-5', '-4', '-3', '-2', '-1', 'N', '1', '2', '3', '4', '5']))
    pp.plot()
    logging.info(str.format("Output step3 file {}", line_chart_file_path))
    grdevices.dev_off()
Esempio n. 27
0
    def scatter(self, dataframe, filename, parm1, parm2, units1, units2, group,
                logx, logy):
        grdevices.png(file=filename, width=512, height=512)
        data = ggplot2.ggplot(dataframe)
        aes = ggplot2.aes_string(x=parm1, y=parm2, colour=group)
        geom = ggplot2.geom_point(alpha=0.7)
        labs = ggplot2.labs(x=parm1 + " " + units1, y=parm2 + " " + units2)
        xlogscale = ggplot2.scale_x_log10()
        ylogscale = ggplot2.scale_y_log10()

        if logx == True and logy == True:
            gg = data + aes + geom + labs + xlogscale + ylogscale
        elif logx == True:
            gg = data + aes + geom + labs + xlogscale
        elif logy == True:
            gg = data + aes + geom + labs + ylogscale
        else:
            gg = data + aes + geom + labs

        gg.plot()
        grdevices.dev_off()
Esempio n. 28
0
def make_output_and(te_tss_cov, control_te_tss_cov, out_prefix, upstream, downstream):
    # clean raw counts dir
    if os.path.isdir('%s_raw' % out_prefix):
        shutil.rmtree('%s_raw' % out_prefix)
    os.mkdir('%s_raw' % out_prefix)

    # dump raw counts to file
    for te in te_tss_cov:
        if te[0] in ['n','*','HERVH-int','L2a','AluSx','AluJb','MIRb','LTR7'] and te[1] in ['n','*','LINE/L1','SINE/Alu','LTR/ERV1','LTR/ERVL-MaLR','LINE/L2','LTR/ERVL','SINE/MIR','DNA/hAT-Charlie','LTR/ERVK','DNA/TcMar-Tigger']:
            raw_out = open('%s_raw/%s_%s.txt' % (out_prefix,te[0].replace('/','_'),te[1].replace('/','_')),'w')
            for i in range(-upstream,downstream+1):
                print >> raw_out, '%d\t%e\t%e' % (i, te_tss_cov[te][upstream+i], control_te_tss_cov[te][upstream+i])
            raw_out.close()

    # clean plot dirs
    if os.path.isdir('%s_plot' % out_prefix):
        shutil.rmtree('%s_plot' % out_prefix)
    os.mkdir('%s_plot' % out_prefix)

    # make data structures
    tss_i = ro.IntVector(2*range(-upstream,downstream+1))
    labels = ro.StrVector(['Main']*(upstream+downstream+1)+['Control']*(upstream+downstream+1))
    for te in te_tss_cov:
        if te[0] in ['n','*','HERVH-int','L2a','AluSx','AluJb','MIRb','LTR7'] and te[1] in ['n','*','LINE/L1','SINE/Alu','LTR/ERV1','LTR/ERVL-MaLR','LINE/L2','LTR/ERVL','SINE/MIR','DNA/hAT-Charlie','LTR/ERVK','DNA/TcMar-Tigger']:
            cov = ro.FloatVector(te_tss_cov[te] + control_te_tss_cov[te])
            df = ro.DataFrame({'tss_i':tss_i, 'cov':cov, 'label':labels})

            # construct full plot
            gp = ggplot2.ggplot(df) + \
                ggplot2.aes_string(x='tss_i', y='cov', colour='label') + \
                ggplot2.geom_point() + \
                ggplot2.scale_x_continuous('TSS index') + \
                ggplot2.scale_y_continuous('Coverage') + \
                ggplot2.scale_colour_discrete('')

            # plot to file
            grdevices.pdf(file='%s_plot/%s_%s.pdf' % (out_prefix,te[0].replace('/','_'),te[1].replace('/','_')))
            gp.plot()
            grdevices.dev_off()
Esempio n. 29
0
  def build(self):
    ##print grdevices.palette()
    if self.spec['type'] == 'csv' :
        df = robjects.DataFrame.from_csvfile('./data/' + self.spec['name'] + '.csv')
    else :
        print type(self.spec['name'])
        samplename = self.spec['name'].encode('ascii','ignore')
        df = data(datasets).fetch(samplename)[samplename]

    #print df
    grdevices.png(file=self.sfilename, width=700, height=400)
    pp = ggplot2.ggplot(df)

    ppargs = {}

    if len(self.spec['viz[xaxis]']) != 0 :
        ppargs['x'] = self.spec['viz[xaxis]']

    if len(self.spec['viz[yaxis]']) != 0 :
        ppargs['y'] = self.spec['viz[yaxis]']

    if len(self.spec['viz[color]']) != 0 :
        ppargs['colour'] = self.spec['viz[color]']

    if len(self.spec['viz[shape]']) != 0 :
        ppargs['shape'] = self.spec['viz[shape]']

    player1 = self.spec['viz[layer1]'] if len(self.spec['viz[layer1]']) != 0 else None
    player2 = self.spec['viz[layer2]'] if len(self.spec['viz[layer2]']) != 0 else None 

    pp = pp + ggplot2.aes_string(**ppargs)
    ##pp = pp + ggplot2.geom_bar(stat="identity", fill="white", colour="darkgreen")
    ##pp = pp + ggplot2.scale_fill_brewer(palette="blues")
    ##pp = pp + ggplot2.geom_point() 
    pp = pp + ggplot2.geom_point(size=5) 
    pp.plot()
    grdevices.dev_off()
    return self.cfilename
Esempio n. 30
0
def make_output(cov, out_prefix, window):
    # dump raw counts to file
    raw_out = open('%s_raw.txt' % out_prefix,'w')
    for i in range(-window/2,window/2+1):
        print >> raw_out, '%d\t%e' % (i, cov[window/2+i])
    raw_out.close()

    # make plot data structures
    splice_i = ro.IntVector(range(-window/2,window/2+1))
    cov = ro.FloatVector(cov)
    df = ro.DataFrame({'splice_i':splice_i, 'cov':cov})

    # construct plot
    gp = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='splice_i', y='cov') + \
        ggplot2.geom_point() + \
        ggplot2.scale_x_continuous('Position relative to splice site') + \
        ggplot2.scale_y_continuous('Coverage')

    # plot to file
    grdevices.pdf(file='%s.pdf' % out_prefix)
    gp.plot()
    grdevices.dev_off()
Esempio n. 31
0
 def plot_domain_cumul(self, crawl):
     # -- coverage (cumulative pages) per domain
     data = self.histogr
     data = data[data['type'].isin(['domain'])]
     data = data[data['crawl'] == crawl]
     data = data[data['type_counted'].isin(['url'])]
     data['urls'] = data['count']*data['frequency']
     print(data)
     data = data[['urls', 'count', 'frequency']]
     data = data.sort_values(['count'], ascending=0)
     data['cum_domains'] = data['frequency'].cumsum()
     data['cum_urls'] = data['urls'].cumsum()
     data_perc = data.apply(lambda x: round(100.0*x/float(x.sum()), 1))
     data['%domains'] = data_perc['frequency']
     data['%urls'] = data_perc['urls']
     data['%cum_domains'] = data['cum_domains'].apply(
         lambda x: round(100.0*x/float(data['frequency'].sum()), 1))
     data['%cum_urls'] = data['cum_urls'].apply(
         lambda x: round(100.0*x/float(data['urls'].sum()), 1))
     with pandas.option_context('display.max_rows', None,
                                'display.max_columns', None,
                                'display.width', 200):
         print(data)
     img_path = os.path.join(PLOTDIR,
                             'crawler/histogr_domain_cumul.png')
     # data.to_csv(img_path + '.csv')
     title = 'Cumulative URLs for Top Domains'
     p = ggplot2.ggplot(data) \
         + ggplot2.aes_string(x='cum_domains', y='cum_urls') \
         + ggplot2.geom_line() + ggplot2.geom_point() \
         + GGPLOT2_THEME \
         + ggplot2.labs(title=title, x='domains cumulative',
                        y='URLs cumulative') \
         + ggplot2.scale_y_log10() \
         + ggplot2.scale_x_log10()
     p.save(img_path)
     return p
Esempio n. 32
0
 def plot_domain_cumul(self, crawl):
     # -- coverage (cumulative pages) per domain
     data = self.histogr
     data = data[data['type'].isin(['domain'])]
     data = data[data['crawl'] == crawl]
     data = data[data['type_counted'].isin(['url'])]
     data['urls'] = data['count'] * data['frequency']
     print(data)
     data = data[['urls', 'count', 'frequency']]
     data = data.sort_values(['count'], ascending=0)
     data['cum_domains'] = data['frequency'].cumsum()
     data['cum_urls'] = data['urls'].cumsum()
     data_perc = data.apply(lambda x: round(100.0 * x / float(x.sum()), 1))
     data['%domains'] = data_perc['frequency']
     data['%urls'] = data_perc['urls']
     data['%cum_domains'] = data['cum_domains'].apply(
         lambda x: round(100.0 * x / float(data['frequency'].sum()), 1))
     data['%cum_urls'] = data['cum_urls'].apply(
         lambda x: round(100.0 * x / float(data['urls'].sum()), 1))
     with pandas.option_context('display.max_rows', None,
                                'display.max_columns', None,
                                'display.width', 200):
         print(data)
     img_path = os.path.join(PLOTDIR, 'crawler/histogr_domain_cumul.png')
     # data.to_csv(img_path + '.csv')
     title = 'Cumulative URLs for Top Domains'
     p = ggplot2.ggplot(data) \
         + ggplot2.aes_string(x='cum_domains', y='cum_urls') \
         + ggplot2.geom_line() + ggplot2.geom_point() \
         + GGPLOT2_THEME \
         + ggplot2.labs(title=title, x='domains cumulative',
                        y='URLs cumulative') \
         + ggplot2.scale_y_log10() \
         + ggplot2.scale_x_log10()
     p.save(img_path)
     return p
Esempio n. 33
0
import math, datetime
import rpy2.robjects.lib.ggplot2 as ggplot2
import rpy2.robjects as ro
from rpy2.robjects.packages import importr

base = importr("base")

datasets = importr("datasets")
mtcars = datasets.data.fetch("mtcars")["mtcars"]

pp = (
    ggplot2.ggplot(mtcars)
    + ggplot2.aes_string(x="wt", y="mpg", col="factor(cyl)")
    + ggplot2.geom_point()
    + ggplot2.geom_smooth(ggplot2.aes_string(group="cyl"), method="lm")
)
pp.plot()
Esempio n. 34
0
 def test_vars(self):
     gp = (ggplot2.ggplot(mtcars) + ggplot2.aes(x='wt', y='mpg') +
           ggplot2.geom_point() + ggplot2.facet_wrap(ggplot2.vars('gears')))
     assert isinstance(gp, ggplot2.GGPlot)
Esempio n. 35
0
d['code'] = StrVector([x[0] for x in combos]) + StrVector([x[0] for x in combos_r])
d['sequence'] = StrVector([x[-2] for x in combos]) + StrVector([x[0] for x in combos_r])
d['time'] = FloatVector([x for x in times]) + FloatVector(times_r)
d['n_loop']    = IntVector([x[-1] for x in combos]) + IntVector([x[3] for x in combos_r])
d['group'] = StrVector([d['code'][x] + ':' + d['sequence'][x] for x in range(len(d['n_loop']))])
dataf = DataFrame(d)



from rpy2.robjects.lib import ggplot2
p = ggplot2.ggplot(dataf) + \
    ggplot2.geom_line(ggplot2.aes_string(x="n_loop", 
                                         y="time",
                                         colour="code")) + \
    ggplot2.geom_point(ggplot2.aes_string(x="n_loop", 
                                          y="time",
                                          colour="code")) + \
    ggplot2.facet_wrap(Formula('~sequence')) + \
    ggplot2.scale_y_continuous('running time') + \
    ggplot2.scale_x_continuous('repeated n times', ) + \
    ggplot2.xlim(0, max(n_loops)) + \
    ggplot2.labs(title = "Benchmark (running time)")


from rpy2.robjects.packages import importr
grdevices = importr('grDevices')
grdevices.png('../../_static/benchmark_sum.png',
              width = 712, height = 512)
p.plot()
grdevices.dev_off()
Esempio n. 36
0
File: test.py Progetto: dvu4/udacity
     ggplot2.scale_fill_gradient(high = 'blue', low = 'red') + \
     ggplot2.scale_fill_continuous(name = "Obama Vote Share") + \
     ggplot2.scale_colour_continuous(name = "Obama Vote Share") + \
     ggplot2.opts(**{'legend.position': 'left', 'legend.key.size': robjects.r.unit(2, 'lines'), 'legend.title' : ggplot2.theme_text(size = 14, hjust=0), \
                     'legend.text': ggplot2.theme_text(size = 12), 'title' : "Obama Vote Share and Distance to Railroads in IL", \
                     'plot.title': ggplot2.theme_text(size = 24), 'plot.margin': robjects.r.unit(robjects.r.rep(0,4),'lines'), \
                     'panel.background': ggplot2.theme_blank(), 'panel.grid.minor': ggplot2.theme_blank(), 'panel.grid.major': ggplot2.theme_blank(), \
                     'axis.ticks': ggplot2.theme_blank(), 'axis.title.x': ggplot2.theme_blank(), 'axis.title.y': ggplot2.theme_blank(), \
                     'axis.title.x': ggplot2.theme_blank(), 'axis.title.x': ggplot2.theme_blank(), 'axis.text.x': ggplot2.theme_blank(), \
                     'axis.text.y': ggplot2.theme_blank()} ) + \
     ggplot2.geom_line(ggplot2.aes(x='long', y='lat', group='group'), data=IL_railroads, color='grey', size=0.2) + \
     ggplot2.coord_equal()
 
p_map.plot()
 
## add the scatterplot
## define layout of subplot with viewports

vp_sub = grid.viewport(x = 0.19, y = 0.2, width = 0.32, height = 0.4)
 
p_sub = ggplot2.ggplot(RR_distance) + \
    ggplot2.aes_string(x = 'OBAMA_SHAR', y= 'NEAR_DIST') + \
    ggplot2.geom_point(ggplot2.aes(color='OBAMA_SHAR')) + \
    ggplot2.stat_smooth(color="black") + \
    ggplot2.opts(**{'legend.position': 'none'}) + \
    ggplot2.scale_x_continuous("Obama Vote Share") + \
    ggplot2.scale_y_continuous("Distance to nearest Railroad")
 
p_sub.plot(vp=vp_sub)

grdevices.dev_off()
Esempio n. 37
0
 def testAdd(self):
     gp = ggplot2.ggplot(mtcars)
     pp = gp + \
         ggplot2.aes_string(x='wt', y='mpg') + \
         ggplot2.geom_point()
     self.assertTrue(isinstance(pp, ggplot2.GGPlot))
Esempio n. 38
0
File: test.py Progetto: hphp/Kaggle
import math, datetime
import time
import rpy2.robjects.lib.ggplot2 as ggplot2
import rpy2.robjects as ro
from rpy2.robjects.packages import importr
from rpy2.interactive import process_revents
grdevices = importr('grDevices')
process_revents.start()

base = importr('base')
datasets= importr('datasets')

mtcars = datasets.__rdata__.fetch('mtcars')['mtcars']
pp = ggplot2.ggplot(mtcars) +  ggplot2.aes_string(x='wt', y='mpg', col='factor(cyl)') +  ggplot2.geom_point() +  ggplot2.geom_smooth(ggplot2.aes_string(group = 'cyl'), method = 'lm') 
#pp.plot()
#process_revents.start()
print(pp)
process_revents.process_revents()

while True:
    time.sleep(1)
process_revents.stop()
Esempio n. 39
0
xmin = np.min(x)
xmax = np.max(x)
xs = np.linspace(xmin, xmax, num=100).reshape(100, 1)
lm = LinearRegression()
# The training data for scikit models must be in matrix
# form, i.e. columns == features, rows == observations.
# For this we need to reshape the 1-dimensional arrays.
X = corr_nci60.reshape(len(x), 1)
y = corr_sec
lm.fit(X, y)
y_pred = lm.predict(xs)

# Plot the data using the R-bridge rpy and ggplot
p = gg.ggplot(pd.DataFrame())
p += gg.geom_point(
    gg.aes_string(x='r_nci60', y='r_sec'),
    data=pd.DataFrame({
        'r_nci60': corr_nci60,
        'r_sec': corr_sec
    })
)
p += gg.geom_line(
    gg.aes_string(x='x', y='y'),
    data=pd.DataFrame({
        'x': xs.reshape(-1),
        'y': y_pred
    }),
    color='red'
)
p.plot()
Esempio n. 40
0
number_of_peaks = len(dataf[0])


cvI = []
newRow = []
for i in range(1,number_of_peaks+1):
    row = dataf.rx(i,True)
    rowA = np.array(row)
    newRow.append(rowA[2:])
    cvI.append(cv(rowA[2:]))
#cv.append(rowA[2:].std()/rowA[2:].mean())
cv_r=robjects.conversion.py2ri(cvI)
df_cv = {'CV' : cv_r}
dataf_cv = robjects.DataFrame(df_cv)
dtf_cv = robjects.r.melt(dataf_cv)
d=dataf.cbind(dtf_cv.rx(2))
d.names[tuple(d.colnames).index('value')] = 'CV'
#d = base.merge_data_frame(dataf,dtf_cv.rx(2))
utilis.write_csv(d, options.csv_output)


dc = dtf_cv.cbind(n_peak = robjects.IntVector(range(1,number_of_peaks+1)))
#n_peak = robjects.IntVector(1,number_of_peaks)
gp = ggplot2.ggplot(dc)
pp=gp+ggplot2.aes_string(x='n_peak',y='value') + ggplot2.geom_point()+ggplot2.theme_bw()+ ggplot2.ggtitle('Coefficient of Variation')+ \
ggplot2.scale_x_continuous("Number of Peaks")+ ggplot2.scale_y_continuous("CV")

r.X11()
pp.plot()

Esempio n. 41
0
from rpy2.robjects.packages import importr
base = importr('base')

datasets = importr('datasets')
mtcars = datasets.mtcars

#-- setupggplot2-end

grdevices.png('../../_static/graphics_ggplot2mtcars.png',
              width = 612, height = 612, antialias="subpixel", type="cairo")
#-- ggplot2mtcars-begin
gp = ggplot2.ggplot(mtcars)

pp = gp + \
     ggplot2.aes_string(x='wt', y='mpg') + \
     ggplot2.geom_point()

pp.plot()
#-- ggplot2mtcars-end
grdevices.dev_off()

grdevices.png('../../_static/graphics_ggplot2geombin2d.png',
              width = 1000, height = 350, antialias="subpixel", type="cairo")
grid.newpage()
grid.viewport(layout=grid.layout(1, 3)).push()

vp = grid.viewport(**{'layout.pos.col':1, 'layout.pos.row': 1})
#-- ggplot2geombin2d-begin
gp = ggplot2.ggplot(dataf_rnorm)

pp = gp + \
Esempio n. 42
0
mtcars = data(datasets).fetch('mtcars')['mtcars']

#-- setupggplot2-end

grdevices.png('../../_static/graphics_ggplot2mtcars.png',
              width=612,
              height=612,
              antialias="subpixel",
              type="cairo")
#-- ggplot2mtcars-begin
gp = ggplot2.ggplot(mtcars)

pp = gp + \
     ggplot2.aes_string(x='wt', y='mpg') + \
     ggplot2.geom_point()

pp.plot()
#-- ggplot2mtcars-end
grdevices.dev_off()

grdevices.png('../../_static/graphics_ggplot2geombin2d.png',
              width=1000,
              height=350,
              antialias="subpixel",
              type="cairo")
grid.newpage()
grid.viewport(layout=grid.layout(1, 3)).push()

vp = grid.viewport(**{'layout.pos.col': 1, 'layout.pos.row': 1})
#-- ggplot2geombin2d-begin
Esempio n. 43
0
        # "index" is equivalent to "names" in R
        if obj.ndim == 1:
            res.names = ListVector({'x': ro.conversion.py2ri(obj.index)})
        else:
            res.dimnames = ListVector(ro.conversion.py2ri(obj.index))
        return res
    else:
        return py2ri_orig(obj) 
rpy2.robjects.conversion.py2ri = conversion_pydataframe

# <codecell>

import pandas

# <codecell>

import rpy2.robjects.lib.ggplot2 as ggplot2

# <codecell>

df = pandas.DataFrame({"a":range(10), "b":range(10,20)})

# <codecell>

pp = ggplot2.ggplot(df) + ggplot2.aes_string(x="a", y="b") + ggplot2.geom_point()
pp.plot()

# <codecell>


def plot_volcano_with_r(
    data,
    xlabel='Estimated effect (change in H/L ratio)',
    title='',
    max_labels=20,
    color_background='#737373',
    color_significant='#252525',
    color_significant_muted='#252525',
    label_only_large_fc=False,
    special_labels=None,
    special_palette=None,
    base_size=12,
    label_size=3,
    x='logFC',
    y='neg_log10_p_adjust',
    special_labels_mode='all',
    xlim=None,
    skip_labels=None,
    nudges=None,
):

    r_data, r_like_data = transform_data_for_ggplot(
        data,
        label_only_large_fc=label_only_large_fc,
        special_labels=special_labels,
        max_labels=max_labels,
        special_labels_mode=special_labels_mode,
        skip_labels=skip_labels,
        nudges=nudges)

    plot = r_ggplot2.ggplot(r_data)
    plot += r_ggplot2.theme_minimal(base_size=base_size)
    plot += r_ggplot2.theme(
        **{
            'panel.grid.major':
            r_ggplot2.element_blank(),
            'panel.grid.minor':
            r_ggplot2.element_blank(),
            'panel.border':
            r_ggplot2.element_rect(fill=robjects.rinterface.NA, color="black")
        })
    plot += r_ggplot2.theme(
        text=r_ggplot2.element_text(family='Helvetica', face='plain'))
    plot += r_ggplot2.theme(
        **{
            'plot.title': r_ggplot2.element_text(hjust=0.5),
            #                               'axis.title.y': r_ggplot2.element_text((t = 0, r = 20, b = 0, l = 0)),
        })

    aes_points = r_ggplot2.aes_string(x=x, y=y, color='group')
    scale_points = r_ggplot2.scale_colour_manual(
        aes_points,
        values=r_label_palette(
            r_like_data,
            special_palette,
            color_background=color_background,
            color_significant=color_significant,
            color_significant_muted=color_significant_muted))

    plot += aes_points
    plot += scale_points

    if xlim is not None:
        plot += r_ggplot2.scale_x_continuous(
            labels=r_custom.formatterFunTwoDigits, limits=robjects.r.c(*xlim))
    else:
        plot += r_ggplot2.scale_x_continuous(
            labels=r_custom.formatterFunTwoDigits)

    plot += r_ggplot2.scale_y_continuous(labels=r_custom.formatterFunOneDigit)

    plot += r_ggplot2.geom_hline(
        yintercept=float(-np.log10(FDR_THRESHOLD_RESPONSE)),
        color='#BDBDBD',
        alpha=.3)
    plot += r_ggplot2.geom_vline(xintercept=float(FC_THRESHOLD_RESPONSE),
                                 color='#BDBDBD',
                                 alpha=.3)
    plot += r_ggplot2.geom_vline(xintercept=-float(FC_THRESHOLD_RESPONSE),
                                 color='#BDBDBD',
                                 alpha=.3)

    plot += r_ggplot2.geom_point(**{'show.legend': False})

    aes_text = r_ggplot2.aes_string(label='label')
    plot += aes_text
    plot += r_ggrepel.geom_text_repel(
        aes_text,
        nudge_x=r_dollar(r_data, 'nudgex'),
        nudge_y=r_dollar(r_data, 'nudgey'),
        size=label_size,
        family='Helvetica',
        **{
            'show.legend': False,
            'point.padding': 0.25,
            'min.segment.length': 0,
            #'max.iter':0,
            'segment.color': '#BDBDBD'
        },
    )

    plot += r_ggplot2.labs(x=xlabel,
                           y='Adjusted p value (-log10)',
                           title=title)

    plot.plot()
Esempio n. 45
0
File: runDE.py Progetto: wqhf/flair
def main():
    '''
    maine
    '''

    # Command Line Stuff...
    myCommandLine = CommandLine()

    outdir     = myCommandLine.args['outDir']
    group1     = myCommandLine.args['group1']
    group2     = myCommandLine.args['group2']
    batch      = myCommandLine.args['batch']  
    matrix     = myCommandLine.args['matrix']
    prefix     = myCommandLine.args['prefix']
    formula    = myCommandLine.args['formula']

    print("running DESEQ2 %s" % prefix, file=sys.stderr)

    # make the quant DF
    quantDF  = pd.read_table(matrix, header=0, sep='\t', index_col=0)
    df = pandas2ri.py2ri(quantDF)

    # import formula
    formulaDF     = pd.read_csv(formula,header=0, sep="\t",index_col=0)
    sampleTable = pandas2ri.py2ri(formulaDF)


    if "batch" in list(formulaDF):
        design = Formula("~ batch + condition")
    else:
        design = Formula("~ condition")
   

    # import DESeq2
    from rpy2.robjects.packages import importr
    import rpy2.robjects.lib.ggplot2 as ggplot2
    methods   = importr('methods')
    deseq     = importr('DESeq2')
    grdevices = importr('grDevices')
    qqman     = importr('qqman')



    ### RUN DESEQ2 ###
    R.assign('df', df)
    R.assign('sampleTable', sampleTable)
    R.assign('design',design)
    R('dds <- DESeqDataSetFromMatrix(countData = df, colData = sampleTable, design = design)')
    R('dds <- DESeq(dds)')
    R('name <- grep("condition", resultsNames(dds), value=TRUE)')

    ###
    ###
    # Get Results and shrinkage values
    res    = R('results(dds, name=name)')
    resLFC = R('lfcShrink(dds, coef=name)')
    vsd    = R('vst(dds,blind=FALSE)')
    resdf  = robjects.r['as.data.frame'](res) 
    reslfc = robjects.r['as.data.frame'](resLFC)
    dds    = R('dds')

    
    ### Plotting section ###
    # plot MA and PC stats for the user
    plotMA    = robjects.r['plotMA']
    plotDisp  = robjects.r['plotDispEsts']
    plotPCA   = robjects.r['plotPCA']
    plotQQ    = robjects.r['qq']
    
    # get pca data
    if "batch" in list(formulaDF):
        pcaData    = plotPCA(vsd, intgroup=robjects.StrVector(("condition", "batch")), returnData=robjects.r['T'])
        percentVar = robjects.r['attr'](pcaData, "percentVar")
    else:
        print(vsd)
        pcaData    = plotPCA(vsd, intgroup="condition", returnData=robjects.r['T'])
        percentVar = robjects.r['attr'](pcaData, "percentVar")
    # arrange 


    data_folder = os.path.join(os.getcwd(), outdir)
    qcOut = os.path.join(data_folder, "%s_QCplots_%s_v_%s.pdf"  % (prefix,group1,group2))
    
    grdevices.pdf(file=qcOut)

    x = "PC1: %s" % int(percentVar[0]*100) + "%% variance"
    y = "PC2: %s" % int(percentVar[1]*100) + "%% variance"

    if "batch" in list(formulaDF):
        pp = ggplot2.ggplot(pcaData) + \
                ggplot2.aes_string(x="PC1", y="PC2", color="condition", shape="batch") + \
                ggplot2.geom_point(size=3) + \
                robjects.r['xlab'](x) + \
                robjects.r['ylab'](y) + \
                ggplot2.theme_classic() + \
                ggplot2.coord_fixed()

    else:
        pp = ggplot2.ggplot(pcaData) + \
                ggplot2.aes_string(x="PC1", y="PC2", color="condition") + \
                ggplot2.geom_point(size=3) + \
                robjects.r['xlab'](x) + \
                robjects.r['ylab'](y) + \
                ggplot2.theme_classic() + \
                ggplot2.coord_fixed()
    pp.plot()
    plotMA(res, ylim=robjects.IntVector((-3,3)), main="MA-plot results")
    plotMA(resLFC, ylim=robjects.IntVector((-3,3)), main="MA-plot LFCSrhinkage")    
    plotQQ(reslfc.rx2('pvalue'), main="LFCSrhinkage pvalue QQ")
    hh = ggplot2.ggplot(resdf) + \
            ggplot2.aes_string(x="pvalue") + \
            ggplot2.geom_histogram() + \
            ggplot2.theme_classic() + \
            ggplot2.ggtitle("pvalue distribution")
    hh.plot()
    plotDisp(dds, main="Dispersion Estimates")
    grdevices.dev_off()


    data_folder = os.path.join(os.getcwd(), outdir)
    lfcOut = os.path.join(data_folder, "%s_%s_v_%s_deseq2_results_shrinkage.tsv"  % (prefix,group1,group2))
    resOut = os.path.join(data_folder, "%s_%s_v_%s_deseq2_results.tsv"  % (prefix,group1,group2))
   
    robjects.r['write.table'](reslfc, file=lfcOut, quote=False, sep="\t")
    robjects.r['write.table'](resdf, file=resOut, quote=False, sep="\t")
Esempio n. 46
0
#print "onlysurf"
#print onlysurf

#colours2 = grdevices.topo_colors(10)
colours2 = grdevices.cm_colors(10)
#colours2 = grdevices.rainbow(20)
#print colours2
#colours = ggplot2.rainbow(54)
#bins=10
gp = ggplot2.ggplot(onlysurf)
#gp = ggplot2.ggplot(onlyfilts)

gp=gp+ggplot2.aes_string(x="Lon", y="Lat", col="Temp",label="Station")
gp=gp+ggplot2.scale_colour_gradientn(colours=colours2)
gp=gp+ggplot2.geom_text(col="black",offset = 10)
gp=gp+ggplot2.geom_point(position="jitter")
gp=gp+ggplot2.ggtitle(graphtitle)

robjects.r('library(ggmap)')
robjects.r('library(mapproj)')
robjects.r('map <- get_map(location = "Europe", zoom = 4)')
robjects.r('ggmap(map)')

#robjects.r('library(maps)')

#robjects.r('map("world", interior = FALSE)')

#robjects.r('map("state", boundary = FALSE, col="gray", add = TRUE)')
#gp.plot()

'''
Esempio n. 47
0
print("\nggplot")
print("------")
import numpy as np
import pandas as pd
import rpy2.robjects.packages as packages
import rpy2.robjects.lib.ggplot2 as ggplot2
import rpy2.robjects as ro
# Importando o dataset do R, o mtcars
R = ro.r
datasets = packages.importr('datasets')
mtcars = packages.data(datasets).fetch('mtcars')['mtcars']
# Gerando o gráfico com ggplot
gp = ggplot2.ggplot(mtcars)
pyplot = (gp
      + ggplot2.aes_string(x = 'wt', y = 'mpg')
      + ggplot2.geom_point(ggplot2.aes_string(colour = 'qsec'))
      + ggplot2.scale_colour_gradient(low = "yellow", high = "red")
      + ggplot2.geom_smooth(method = 'auto')
      + ggplot2.labs(title = "mtcars", x = 'wt', y = 'mpg'))

pyplot.plot()

print("\nAnálise de Variância")
print("--------------------")
import rpy2.robjects as robjects

r = robjects.r

controle = robjects.FloatVector([4.17,5.58,5.18,6.11,4.50,4.61,
                                 5.17,4.53,5.33,5.14])
tratamento = robjects.FloatVector([4.81,4.17,4.41,3.59,5.87,3.83,
Esempio n. 48
0
base = importr('base')

mtcars = data(datasets).fetch('mtcars')['mtcars']

#-- setupggplot2-end

grdevices.png('../../_static/graphics_ggplot2mtcars.png',
              width=612,
              height=612,
              antialias=ANTIALIAS,
              type="cairo")
#-- ggplot2mtcars-begin
gp = ggplot2.ggplot(mtcars)

pp = (gp + ggplot2.aes_string(x='wt', y='mpg') + ggplot2.geom_point())

pp.plot()
#-- ggplot2mtcars-end
grdevices.dev_off()
grdevices.png('../../_static/graphics_ggplot2geombin2d.png',
              width=1000,
              height=350,
              antialias=ANTIALIAS,
              type="cairo")
grid.newpage()
grid.viewport(layout=grid.layout(1, 3)).push()

vp = grid.viewport(**{'layout.pos.col': 1, 'layout.pos.row': 1})
#-- ggplot2geombin2d-begin
gp = ggplot2.ggplot(dataf_rnorm)
Esempio n. 49
0
File: test.py Progetto: hphp/Kaggle
import math, datetime
import time
import rpy2.robjects.lib.ggplot2 as ggplot2
import rpy2.robjects as ro
from rpy2.robjects.packages import importr
from rpy2.interactive import process_revents
grdevices = importr('grDevices')
process_revents.start()

base = importr('base')
datasets = importr('datasets')

mtcars = datasets.__rdata__.fetch('mtcars')['mtcars']
pp = ggplot2.ggplot(mtcars) + ggplot2.aes_string(
    x='wt', y='mpg',
    col='factor(cyl)') + ggplot2.geom_point() + ggplot2.geom_smooth(
        ggplot2.aes_string(group='cyl'), method='lm')
#pp.plot()
#process_revents.start()
print(pp)
process_revents.process_revents()

while True:
    time.sleep(1)
process_revents.stop()
Esempio n. 50
0
def as_dataframe (cfg, results, basis):
  r = robjects.r
  varis = []
  langs = []
  probs = []
  times = []
  threads = []

  # speedups, with upper and lower bounds below
  speedups = [] 
  speedup_lowers = []
  speedup_uppers = []

  ses = [] # standard errors
  mems = [] # memory usage

  langs_ideal = list (cfg.languages)
  langs_ideal.append ('ideal')

  probs_ideal = list (cfg.problems)
  probs_ideal.append ('ideal')

  for var in cfg.variations:
    for lang in langs_ideal: # cfg.languages:
      for prob in probs_ideal: # cfg.problems:
        for thread in cfg.threads:

          if lang == 'ideal' and prob == 'ideal':
            continue
          elif lang == 'ideal' or prob == 'ideal':
            varis.append (var)
            langs.append (pretty_langs[lang])
            probs.append (prob)
            threads.append (thread)
            speedups.append (thread)
            speedup_lowers.append (thread)
            speedup_uppers.append (thread)
            times.append (0)
            ses.append(0)
            mems.append (0)
            continue

          varis.append (var) # pretty_varis [var])
          langs.append (pretty_langs [lang])
          probs.append (prob)
          threads.append (thread)
          
          if var.find('seq') >= 0:
            thread = cfg.threads[-1]

          vals = FloatVector (results[thread][prob][var][lang][0])
          time = mean (vals)
          times.append (time)

          #
          # time confidence interval
          #
          t_result = r['t.test'] (FloatVector(vals), 
                                  **{" conf.level": 0.999}).rx ('conf.int')[0]
          ses.append ((t_result[1] - t_result[0])/2)

          #
          # memory usage
          #
          mem_filename = get_mem_output (lang, prob, var)
          with open (mem_filename, 'r') as mem_file:
            mem = mem_file.readline()
            mems.append (float (mem))

          # we include dummy data for the sequential case to avoid the 
          # speedup calculation below
          if var.find('seq') >= 0:
            speedups.append (1)
            speedup_lowers.append (1)
            speedup_uppers.append (1)
            continue
            
          #
          # speedup values and confidence intervals
          #
          seq_vals = results[cfg.threads[-1]][prob][var.replace ('par', 'seq')][lang][0]

          # sequential base
          base = FloatVector (seq_vals)
          # base with p = 1
          base_p1 = FloatVector (results[1][prob][var][lang][0])
          # use fastest sequential program
          if basis == 'fastest' and mean (base_p1) < mean(base):
            base = base_p1
          elif basis == 'seq':
            pass
          elif basis == 'p1':
            base = base_p1
      

          labels = ['Base'] * r.length(base)[0] + ['N']*r.length (vals)[0]
          df = DataFrame ({'Times': base + vals, 
                           'Type': StrVector(labels)})
          ratio_test = r['pairwiseCI'] (r('Times ~ Type'), data=df,
                                        control='N',
                                        method='Param.ratio',
                                        **{'var.equal': False})[0][0]

          speedups.append (mean(base) / time)
          speedup_lowers.append (ratio_test[1][0])
          speedup_uppers.append (ratio_test[2][0])

  df = robjects.DataFrame({'Language': StrVector (langs),
                           'Problem': StrVector (probs),
                           'Variation' : StrVector (varis),
                           'Threads': IntVector (threads),
                           
                           'Time': FloatVector (times),
                           'SE': FloatVector (ses),
                           
                           'Speedup': FloatVector (speedups),
                           'SpeedupLower': FloatVector (speedup_lowers),
                           'SpeedupUpper': FloatVector (speedup_uppers),
                           
                           'Mem' : FloatVector (mems)
                           })


  r.assign ('df', df)

  r ('save (df, file="performance.Rda")')
  
  # reshape the data to make variation not a column itself, but a part of
  # the other columns describe ie, time, speedup, etc.
  #
  # also, remove the 'ideal' problem as we don't want it in this plot.
  df = r('''
redf = reshape (df, 
                timevar="Variation", 
                idvar = c("Language","Problem","Threads"), 
                direction="wide")
redf$Problem <- factor(redf$Problem, levels = c("randmat","thresh","winnow","outer","product","chain"))
redf[which(redf$Problem != "ideal"),]
''')
  
  r.pdf ('speedup-expertpar-all.pdf',
         height=6.5, width=10)

  change_name = 'Language'

  legendVec = IntVector (range (len (langs_ideal)))
  legendVec.names = StrVector (langs_ideal)

  gg = ggplot2.ggplot (df)

  limits = ggplot2.aes (ymax = 'SpeedupUpper.expertpar', ymin = 'SpeedupLower.expertpar')
  dodge = ggplot2.position_dodge (width=0.9)

  pp = gg + \
      ggplot2.geom_line() + ggplot2.geom_point(size=2.5) +\
      robjects.r('scale_color_manual(values = c("#ffcb7e", "#1da06b", "#b94646", "#00368a", "#CCCCCC"))') +\
      ggplot2.aes_string(x='Threads', y='Speedup.expertpar', 
                         group=change_name, color=change_name, 
                         shape=change_name) + \
      ggplot2.geom_errorbar (limits, width=0.25) + \
      ggplot2.opts (**{'axis.title.x' : ggplot2.theme_text(family = 'serif', face = 'bold', size = 10, vjust=-0.2),
                       'axis.title.y' : ggplot2.theme_text(family = 'serif', face = 'bold', size = 10, angle=90, vjust=0.2),
                       'axis.text.x' : ggplot2.theme_text(family = 'serif', size = 10),
                       'axis.text.y' : ggplot2.theme_text(family = 'serif', size = 10),
                       'legend.title' : ggplot2.theme_text(family = 'serif', face = 'bold', size = 10),
                       'legend.text' : ggplot2.theme_text(family = 'serif', size = 10),
                       'strip.text.x' : ggplot2.theme_text(family = 'serif', size = 10),
                       'aspect.ratio' : 1,
                       }) + \
      robjects.r('ylab("Speedup")') + \
      robjects.r('xlab("Number of cores")') + \
      ggplot2.facet_wrap ('Problem', nrow = 2)

  pp.plot()

  r['dev.off']()
Esempio n. 51
0
def line_plot (cfg, var, control, change_name, changing, selector, base_selector, basis):
  speedups = []
  thrds = []
  changes = []
  lowers = []
  uppers = []

  for n in cfg.threads:
    probs.append ('ideal')
    langs.append ('ideal')
    speedups.append (n)
    thrds.append (n)
    changes.append ('ideal')
    lowers.append (n)
    uppers.append (n)
    
  for c in changing:
    sel  = selector (c)

    # sequential base
    base = FloatVector (base_selector(c))
    # base with p = 1
    base_p1 = FloatVector (sel(1))
    # use fastest sequential program
    if basis == 'fastest' and mean (base_p1) < mean(base):
      base = base_p1
    elif basis == 'seq':
      pass
    elif basis == 'p1':
      base = base_p1
      
    for n in cfg.threads:
      ntimes = FloatVector (sel(n))

      # ratio confidence interval
      labels = ['Base'] * r.length(base)[0] + ['N']*r.length (ntimes)[0]
      df = DataFrame ({'Times': base + ntimes, 
                       'Type': StrVector(labels)})
      ratio_test = r['pairwiseCI'] (r('Times ~ Type'), data=df,
                                    control='N',
                                    method='Param.ratio',
                                    **{'var.equal': False,
                                    'conf.level': 0.999})[0][0]

      lowers.append (ratio_test[1][0])
      uppers.append (ratio_test[2][0])

      mn = mean (ntimes)      
      speedups.append (mean(base) / mn)
      # plot slowdowns
      #speedups.append (-mn/base)#(base / mn)
      thrds.append (n)
      if change_name == 'Language':
        changes.append (pretty_langs [c])
      else:
        changes.append (c)

  df = DataFrame ({'Speedup': FloatVector (speedups),
                   'Threads': IntVector (thrds),
                   change_name: StrVector (changes),
                   'Lower': FloatVector (lowers),
                   'Upper': FloatVector (uppers)
                   })
  ideal_changing = ['ideal']
  if change_name == 'Language':
    ideal_changing.extend ([pretty_langs [c] for c in changing])
  else:
    ideal_changing.extend (changing)

  legendVec = IntVector (range (len (ideal_changing)))
  legendVec.names = StrVector (ideal_changing)

  gg = ggplot2.ggplot (df)

  limits = ggplot2.aes (ymax = 'Upper', ymin = 'Lower')
  dodge = ggplot2.position_dodge (width=0.9)

  pp = gg + \
      ggplot2.geom_line() + ggplot2.geom_point(size=3) +\
      ggplot2.aes_string(x='Threads', y='Speedup', 
                         group=change_name, color=change_name, 
                         shape=change_name) + \
      ggplot2.scale_shape_manual(values=legendVec) + \
      ggplot2.geom_errorbar (limits, width=0.25) + \
      ggplot2_options () + \
      ggplot2_colors () + \
      ggplot2.opts (**{'axis.title.x' : ggplot2.theme_text(family = 'serif', face = 'bold', size = 15, vjust=-0.2)}) + \
      robjects.r('ylab("Speedup")') + \
      robjects.r('xlab("Cores")')

      # ggplot2.xlim (min(threads), max(threads)) + ggplot2.ylim(min(threads), max(threads)) +\
  pp.plot()

  r['dev.off']()
Esempio n. 52
0
annotate1 = r('annotate("text", x = '+str(max(areas)-30)+', y = 0.5, color = "red", label = "Mean Annual", parse=FALSE)')
annotate2 = r('annotate("text", x = '+str(max(areas)-30)+', y = 0.42, label = "'+r_sq_lab+'", color = "red", parse=TRUE)')
annotate3 = r('annotate("text", x = '+str(max(areas)-30)+', y = 0.34, label = "slope~'+sl+'", color = "red", parse=TRUE)')

annotate4 = r('annotate("text", x = '+str(max(areas)-150)+', y = 0.7, color = "blue", label = "LGM", parse=FALSE)')
annotate5 = r('annotate("text", x = '+str(max(areas)-150)+', y = 0.6, color = "blue", label = "'+r_sq_lab_lgm+'", parse=TRUE)')
annotate6 = r('annotate("text", x = '+str(max(areas)-150)+', y = 0.5, color = "blue", label = "slope~'+sl_lgm+'", parse=TRUE)')

pp = ggplot2.ggplot(dat_frame) + \
    ggplot2.aes_string(y='discharge', x='areas') + \
    ggplot2.ggtitle('Area vs. Sediment Flux') + \
    ggplot2.scale_x_log10(x_lab) + \
    ggplot2.theme_bw() + \
    ggplot2.stat_smooth(method = "lm", formula = 'y ~ x') + \
    ggplot2.scale_y_log10(y_lab) + \
    annotate1 + \
    annotate2 + \
    annotate3 + \
    annotate4 + \
    annotate5 + \
    annotate6 + \
    ggplot2.geom_point(color='blue') + \
    ggplot2.geom_errorbar(ggplot2.aes_string(ymin='min',ymax='max'), data=dat_frame, width=.02, alpha=.3) + \
    ggplot2.geom_point(data=dat_frame2,color='red',show_guide='FALSE' ) + \
    ggplot2.stat_smooth(data=dat_frame2, method = "lm", formula = 'y ~ x', color='red')

grdevices = importr('grDevices')

grdevices.pdf(file="area_qs.pdf")
pp.plot()
grdevices.dev_off()
Esempio n. 53
0
    [x[0] for x in combos_r])
d['time'] = FloatVector([x for x in times]) + FloatVector(
    [x[0] for x in combos_r])
d['n_loop'] = IntVector([x[-1] for x in combos]) + IntVector(
    [x[1] for x in combos_r])
d['group'] = StrVector(
    [d['code'][x] + ':' + d['sequence'][x] for x in xrange(len(d['n_loop']))])
dataf = DataFrame(d)

from rpy2.robjects.lib import ggplot2
p = ggplot2.ggplot(dataf) + \
    ggplot2.geom_line(ggplot2.aes_string(x="n_loop",
                                         y="time",
                                         colour="code")) + \
    ggplot2.geom_point(ggplot2.aes_string(x="n_loop",
                                          y="time",
                                          colour="code")) + \
    ggplot2.facet_wrap(Formula('~sequence')) + \
    ggplot2.scale_y_continuous('running time') + \
    ggplot2.scale_x_continuous('repeated n times', ) + \
    ggplot2.xlim(0, max(n_loops)) + \
    ggplot2.opts(title = "Benchmark (running time)")

from rpy2.robjects.packages import importr
grdevices = importr('grDevices')
grdevices.png('../../_static/benchmark_sum.png', width=712, height=512)
p.plot()
grdevices.dev_off()

#base = importr("base")
stats = importr('stats')
Esempio n. 54
0
def rest():
    df = q1_median_q3_rep_wide
    pops = ["pdc", "dc-cd11b", "dc-cd8a"]

    stats_l = []
    for stat, (popa, popb) in product(["Q1", "median", "Q3"],
                                      product(pops, pops)):
        print(stat, popa, popb)

        popa = "hsc"
        popb = "pdc"
        stat = "median"

        mw_u, pvalue = scipy.stats.mannwhitneyu(
            [0.8, 0.81, 0.79],
            [0.4, 0.39, 0.41],
            # df.query("Population == @popa")[stat].to_numpy(),
            # df.query("Population == @popb")[stat].to_numpy(),
            use_continuity=True,
            alternative="two-sided",
        )
        pvalue

        stats_l.append([stat, popa, popb, mw_u, pvalue])
    stats_df = pd.DataFrame(stats_l).set_axis(
        ["stat", "popA", "popB", "U", "pvalue"], axis=1)

    kruskal_format_means = pd.pivot(
        q1_median_q3_rep_wide.query("Population in @pops"),
        index="Population",
        columns="Replicate",
        values="mean",
    )

    import scikit_posthocs

    stat, p_value = scipy.stats.kruskal(
        *[kruskal_format_means.loc[pop].to_numpy() for pop in pops], )

    dunn_res_df = scikit_posthocs.posthoc_dunn(
        kruskal_format_means.to_numpy(),
        p_adjust='fdr_bh',
        sort=True,
    )

    stat, pvalue = scipy.stats.f_oneway(
        *[kruskal_format_means.loc[pop].to_numpy() for pop in pops], )

    import statsmodels

    df = kruskal_format_means.stack().reset_index()

    kruskal_format_means

    res = statsmodels.stats.multicomp.pairwise_tukeyhsd(
        df[0], df['Population'].to_numpy(), alpha=0.05)

    res.pvalues
    res.summary()

    # wilcox.test(c(0.8, 0.79, 0.81), c(0.4, 0.39, 0.41), paired=F, exact=F)

    plot_pops = ["pdc", "dc-cd8a", "dc-cd11b"]

    results_dir = "/icgc/dkfzlsdf/analysis/hs_ontogeny/notebook-data/gNs4xcMJscaLLwlt"
    point_plot_quartiles_png = results_dir + "/point-plot-quartiles.png"

    q1_median_q3_rep_wide

    ggplot_data = (
        q1_median_q3_rep_long.query("Population in @plot_pops").sort_values(
            "value",
            ascending=False,
        ).groupby(["Population", "stat"]).apply(
            lambda df: df.assign(group_order=np.arange(1, df.shape[0] + 1))))

    g = (gg.ggplot(ggplot_data) + gg.aes_string(
        x="Population", y="value", group="group_order", color="stat") +
         gg.geom_point(position=gg.position_dodge(width=0.5), size=1) +
         mh_rpy2_styling.gg_paper_theme + gg.labs(y='Methylation (%)', x=''))
    a = 3

    rpy2_utils.image_png2(g, (ut.cm(6), ut.cm(6)))

    ut.save_and_display(
        g,
        png_path=point_plot_quartiles_png,
        # additional_formats=tuple(),
        height=ut.cm(6),
        width=ut.cm(6),
    )

    q1_median_q3_rep_wide

    g = (
        gg.ggplot(
            q1_median_q3_rep_wide.query("Population in @plot_pops").assign(
                sample=lambda df: df["Population"].astype(str) + df[
                    "Replicate"].astype(str))) + gg.geom_boxplot(
                        gg.aes_string(
                            x="Population",
                            fill="Population",
                            group="sample",
                            lower="Q1",
                            upper="Q3",
                            middle="median",
                            ymin="min1",
                            ymax="max99",
                            # position=gg.position_dodge(width=0.5),
                        ),
                        stat="identity",
                    )
        # + mh_rpy2_styling.gg_paper_theme
        + gg.theme(axis_text_x=gg.element_text(angle=90, hjust=1)) +
        gg.scale_fill_brewer(guide=False))
    a = 3
    ut.save_and_display(
        g,
        png_path=point_plot_quartiles_png,
        additional_formats=tuple(),
        height=ut.cm(6),
        width=ut.cm(7),
    )
    # image_png2(g, (ut.cm(12), ut.cm(12)))

    beta_values.loc[:, ("hsc", "1")]
Esempio n. 55
0
def main():
    '''
    maine
    '''

    # Command Line Stuff...
    myCommandLine = CommandLine()

    outdir     = myCommandLine.args['outDir']
    group1     = myCommandLine.args['group1']
    group2     = myCommandLine.args['group2']
    batch      = myCommandLine.args['batch']  
    matrix     = myCommandLine.args['matrix']
    prefix     = myCommandLine.args['prefix']
    formula    = myCommandLine.args['formula']




    # make the quant DF
    quantDF  = pd.read_table(matrix, header=0, sep='\t', index_col=0)
    df = pandas2ri.py2ri(quantDF)
    #print(df.head())
    # import formula
    formulaDF     = pd.read_csv(formula,header=0, sep="\t",index_col=0)
    sampleTable = pandas2ri.py2ri(formulaDF)

    if "batch" in list(formulaDF):
        design = Formula("~ batch + condition")
    else:
        design = Formula("~ condition")
    #print(sampleTable)

    # import DESeq2
    from rpy2.robjects.packages import importr
    import rpy2.robjects.lib.ggplot2 as ggplot2
    methods   = importr('methods')
    deseq     = importr('DESeq2')
    grdevices = importr('grDevices')
    qqman     = importr('qqman')



    dds = deseq.DESeqDataSetFromMatrix(countData = df,
                                        colData = sampleTable,
                                        design = design)

    dds  = deseq.DESeq(dds)
    cont = robjects.r["grep"]("condition",robjects.r['resultsNames'](dds),value=True)
    #print(cont)
    # get results; orient the results for groupA vs B
    res = deseq.results(dds, name=cont)
    # results with shrinkage
    resLFC = deseq.lfcShrink(dds, coef=cont, type="apeglm")
    resdf  = robjects.r['as.data.frame'](res)
    
    R.assign('res', res)
    
    reslfc  = robjects.r['as.data.frame'](resLFC)

    # plot MA and PC stats for the user
    plotMA    = robjects.r['plotMA']
    plotDisp  = robjects.r['plotDispEsts']
    plotPCA   = robjects.r['plotPCA']
    plotQQ    = robjects.r['qq']
    
    vsd       = robjects.r['vst'](dds, blind=robjects.r['F'])
    # get pca data
    if "batch" in list(formulaDF):
        pcaData    = plotPCA(vsd, intgroup=robjects.StrVector(("condition", "batch")), returnData=robjects.r['T'])
        percentVar = robjects.r['attr'](pcaData, "percentVar")
    else:
        print(vsd)
        pcaData    = plotPCA(vsd, intgroup="condition", returnData=robjects.r['T'])
        percentVar = robjects.r['attr'](pcaData, "percentVar")
    # arrange 
    grdevices.pdf(file="./%s/%s_QCplots_%s_v_%s.pdf" % (outdir,prefix,group1,group2))


    x = "PC1: %s" % int(percentVar[0]*100) + "%% variance"
    y = "PC2: %s" % int(percentVar[1]*100) + "%% variance"

    if "batch" in list(formulaDF):
        pp = ggplot2.ggplot(pcaData) + \
                ggplot2.aes_string(x="PC1", y="PC2", color="condition", shape="batch") + \
                ggplot2.geom_point(size=3) + \
                robjects.r['xlab'](x) + \
                robjects.r['ylab'](y) + \
                ggplot2.theme_classic() + \
                ggplot2.coord_fixed()
        pp.plot()
    else:
        pp = ggplot2.ggplot(pcaData) + \
                ggplot2.aes_string(x="PC1", y="PC2", color="condition") + \
                ggplot2.geom_point(size=3) + \
                robjects.r['xlab'](x) + \
                robjects.r['ylab'](y) + \
                ggplot2.theme_classic() + \
                ggplot2.coord_fixed()
        pp.plot()
    plotMA(res, ylim=robjects.IntVector((-3,3)), main="MA-plot results")
    #plotMA(res, main="MA-plot results")
    plotMA(resLFC, ylim=robjects.IntVector((-3,3)), main="MA-plot LFCSrrhinkage")
    #plotMA(resLFC, main="MA-plot LFCSrrhinkage")
    plotQQ(resdf.rx2('pvalue'), main="pvalue QQ")
    plotQQ(reslfc.rx2('pvalue'), main="LFCSrhinkage pvalue QQ")
    hh = ggplot2.ggplot(resdf) + \
            ggplot2.aes_string(x="pvalue") + \
            ggplot2.geom_histogram() + \
            ggplot2.theme_classic() 
    hh.plot()
    plotDisp(dds, main="Dispersion Estimates")
    grdevices.dev_off()


    lfcOut =  "./%s/%s_%s_v_%s_deseq2_results_shrinkage.tsv" % (outdir,prefix,group1,group2)
    resOut =  "./%s/%s_%s_v_%s_deseq2_results.tsv" % (outdir,prefix,group1,group2)

    robjects.r['write.table'](reslfc, file=lfcOut, quote=False, sep="\t")
    robjects.r['write.table'](resdf, file=resOut, quote=False, sep="\t")