Ejemplo n.º 1
0
def interval(locus_table, interval_table, intervals, loci, boxplot = True):
    qry = get_interval_query(intervals, loci, locus_table, interval_table)
    frame = robjects.r('''data <- dbGetQuery(con, {})'''.format(qry))
    # because we're sorting by interval, which is a factor, we need to
    # explicitly re-sort the data by the first integer value
    # of the interval.  This is a bit cumbersome, because sorting
    # in R is less than pleasant.
    sort_string = '''data$interval <- factor(data$interval, {})'''.format(order_intervals(frame[1]))
    robjects.r(sort_string)
    gg_frame = ggplot2.ggplot(robjects.r('''data'''))
    if boxplot:
        plot = gg_frame + ggplot2.aes_string(x = 'interval', y = 'pi') + \
                ggplot2.geom_boxplot(**{
                    'outlier.size':0, 
                    'alpha':0.3
                    }
                ) + \
                ggplot2.geom_jitter(ggplot2.aes_string(color = 'locus'), size = 3, \
                alpha = 0.6, position=ggplot2.position_jitter(width=0.25)) + \
                ggplot2.scale_y_continuous('phylogenetic informativeness') + \
                ggplot2.scale_x_discrete('interval (years ago)')

    else:
        plot = gg_frame + ggplot2.aes_string(x = 'interval', y = 'pi',
                fill='locus') + ggplot2.geom_bar() + \
                ggplot2.facet_wrap(robjects.Formula('~ locus')) + \
                ggplot2.opts(**{
                    'axis.text.x':ggplot2.theme_text(angle = -90,  hjust = 0),
                    'legend.position':'none'
                    }) + \
                ggplot2.scale_y_continuous('phylogenetic informativeness') + \
                ggplot2.scale_x_discrete('interval (years ago)')
    return plot
Ejemplo n.º 2
0
def plot(request):
    r = robjects.r

    ungram = Sentence.objects.filter(grammatical=False).exclude(
        rating='N').values_list('similarity', flat=True)
    gram = Sentence.objects.filter(grammatical=True).exclude(
        rating='N').values_list('similarity', flat=True)

    gram_r = robjects.FloatVector(gram)
    ungram_r = robjects.FloatVector(ungram)

    df = robjects.r["data.frame"]
    gram_df = df(gram="GRAM", similarity=gram_r)
    ungram_df = df(gram="UNGRAM", similarity=ungram_r)

    rbind = r['rbind']
    data = rbind(gram_df, ungram_df)

    pp = ggplot2.ggplot(data) + \
        ggplot2.aes_string(x="gram", y="similarity") + \
        ggplot2.geom_boxplot()

    grdevices = importr('grDevices')
    grdevices.png(file="data.png", width=580, height=512)
    pp.plot()
    grdevices.dev_off()

    image_data = open("data.png", "rb").read()

    return HttpResponse(image_data, mimetype="image/png")
Ejemplo n.º 3
0
def plot(request):
    r = robjects.r

    ungram = Sentence.objects.filter(grammatical=False).exclude(rating='N').values_list('similarity', flat=True)
    gram = Sentence.objects.filter(grammatical=True).exclude(rating='N').values_list('similarity', flat=True)

    gram_r = robjects.FloatVector(gram)
    ungram_r = robjects.FloatVector(ungram)

    df = robjects.r["data.frame"]
    gram_df = df(gram="GRAM", similarity=gram_r)
    ungram_df = df(gram="UNGRAM", similarity=ungram_r)

    rbind = r['rbind']
    data = rbind(gram_df, ungram_df)

    pp = ggplot2.ggplot(data) + \
        ggplot2.aes_string(x="gram", y="similarity") + \
        ggplot2.geom_boxplot()

    grdevices = importr('grDevices')
    grdevices.png(file="data.png", width=580, height=512)
    pp.plot()
    grdevices.dev_off()

    image_data = open("data.png", "rb").read()

    return HttpResponse(image_data, mimetype="image/png")
def makePlot(grdevices, plotName, samp_set1_vals, samp_set2_vals,
             image_file_type):
    samp_vector = ["set1" for i in range(len(samp_set1_vals))]
    samp_vector.extend(["set2" for i in range(len(samp_set2_vals))])

    dframe = robjects.DataFrame({
        "sample":
        robjects.StrVector(samp_vector),
        "value":
        robjects.FloatVector(samp_set1_vals + samp_set2_vals)
    })

    gp = ggplot2.ggplot(dframe)

    pp = gp + \
     ggplot2.aes_string(x="sample", y='value') + \
     ggplot2.geom_boxplot() +\
     ggplot2.geom_jitter() +\
     ggplot2.theme_bw()

    if image_file_type == "pdf":
        grdevices.pdf(file=plotName)
    else:
        grdevices.png(file=plotName, width=512, height=512)
    pp.plot()
    grdevices.dev_off()
Ejemplo n.º 5
0
def render_plot(gp, args):
  """Render a plot using ggplot

  :gp: A base ggplot2 object
  :x: The x value expression
  :y: The y value expression
  :type: The type of plot to make

  """
  args = util.Namespace(args)

  import rpy2.robjects.lib.ggplot2 as ggplot2

  pp = gp + ggplot2.aes_string(x=args.x,
                               y=args.y)

  if args.type == 'points':
    pp += ggplot2.geom_point()
  elif args.type == 'lines':
    pp += ggplot2.geom_line()
  elif args.type == 'boxplot':
    pp += ggplot2.geom_boxplot()
  else:
    raise Exception("{0} not implemented".format(args.type))

  if args.facets is not None:
    try:
      pp += ggplot2.facet_grid(ro.Formula(args.facets))
    except Exception:
      pass

  try:
    pp.plot()
  except Exception:
    pass
Ejemplo n.º 6
0
 def BoxPlot_One(self, metabolite):
     #print(self.raw_data)
     r('graphics.off()')
     gp = ggplot2.ggplot(self.raw_data)
     pp = gp + \
         ggplot2.aes_string(x=self.metadata, y='`'+self.metabolite_dict[metabolite]+'`') + \
         ggplot2.geom_boxplot()
     pp.plot()
Ejemplo n.º 7
0
	def boxPlot(self, dataframe, filename, x_parm, y_parm): 

		grdevices.png(file=filename, width=512, height=512)
		data = ggplot2.ggplot(dataframe)
		aes = ggplot2.aes_string(x=x_parm,y=y_parm,)
		geom = ggplot2.geom_boxplot(alpha = 0.7,fill="aquamarine3")
		gg = data + aes + geom
		gg.plot()
		grdevices.dev_off()
Ejemplo n.º 8
0
def plot_coef(feat_mat_dir,
              model_dir,
              expt_names,
              pref,
              outfile=None,
              height=120,
              fsize=12):

    for expt_idx, ex in enumerate(expt_names):
        feat_mat_file = os.path.join(feat_mat_dir, ex + '_feat_mat.npz')
        model_file = os.path.join(model_dir, pref + ex + '_model.pkl')
        model = read_model(model_file)
        (tmp_feat, tmp_y, tmp_feat_names,
         tmp_gene_names) = read_feat_mat(feat_mat_file)

        if expt_idx == 0:
            feat_names = tmp_feat_names
            clf_coef = model.clf_coef()
            reg_coef = model.reg_coef()
        else:
            assert (all(f[0] == f[1] for f in zip(feat_names, tmp_feat_names)))
            clf_coef = np.concatenate((clf_coef, model.clf_coef()), axis=1)
            reg_coef = np.concatenate((reg_coef, model.reg_coef()), axis=1)

    nexpt = expt_idx + 1

    # Now clf_coef has one row per coefficient and one column per experiment.
    # The reshape below will read the data row-first.
    df = pd.DataFrame({
        'feature': np.repeat(feat_names, nexpt),
        'Classification': np.reshape(clf_coef, (clf_coef.size, )),
        'Regression': np.reshape(reg_coef, (reg_coef.size, ))
    })

    df2 = pd.melt(df, id_vars='feature', var_name='fun')
    r_df = com.convert_to_r_dataframe(df2)
    gp = ggplot2.ggplot(r_df) + ggplot2.aes_string(x = 'factor(feature)', y = 'value') + \
        ggplot2.facet_wrap('fun', scales = 'free_y') + \
        ggplot2.geom_boxplot() + ggplot2.scale_y_continuous('Importance') + \
        ggplot2.scale_x_discrete('') + ggplot2.theme_bw() + \
        ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize, angle = 65, vjust = 1, hjust = 1),
                         'axis.text.y':ggplot2.element_text(size = fsize),
                         'strip.text.x':ggplot2.element_text(size = fsize + 1)})
    w = max(22 * nexpt, 80)
    if outfile is None:
        gp.plot()
    else:
        ro.r.ggsave(filename=outfile,
                    plot=gp,
                    width=w,
                    height=height,
                    unit='mm')
    return df
Ejemplo n.º 9
0
    def boxPlot(self, dataframe, filename, x_parm, y_parm):

        grdevices.png(file=filename, width=512, height=512)
        data = ggplot2.ggplot(dataframe)
        aes = ggplot2.aes_string(
            x=x_parm,
            y=y_parm,
        )
        geom = ggplot2.geom_boxplot(alpha=0.7, fill="aquamarine3")
        gg = data + aes + geom
        gg.plot()
        grdevices.dev_off()
Ejemplo n.º 10
0
def plot_cv_r2(pandas_df, outfile, fsize = 10, height = 120, max_width = 50, xlab = 'Parameters'):
    """Makes boxplots of cross-validation results for different parameter settings"""

    ncv = len(set(list(pandas_df['title'])))
    r_df = com.convert_to_r_dataframe(pandas_df)
    
    gp = ggplot2.ggplot(r_df) + ggplot2.aes_string(x = 'factor(title)', y = 'r2') + \
        ggplot2.geom_boxplot() + ggplot2.scale_y_continuous('R-squared') + \
        ggplot2.scale_x_discrete(xlab) + ggplot2.theme_bw() + \
        ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize, angle = 65, vjust = 1, hjust = 1),
                         'axis.text.y':ggplot2.element_text(size = fsize)})
    w = max(5 * ncv, max_width) 
    ro.r.ggsave(filename = outfile, plot = gp, width = w, height = height, unit = 'mm')
def runBoruta():
    base.load("Rcode/zscores.RData")
    base.source('Z:/Cristina/MassNonmass/codeProject/codeBase/trainClassifier/Rcode/borutaRelevance.R')
    outputBoruta = globalenv['findRelevant'](globalenv['massallfeatures'], globalenv['nonmassallfeatures'])

    # generate boxplot comparison of relevant mass features vs. the same non-mass feature
    plotgp = ggplot2.ggplot(outputBoruta.rx2("masszscore_selected")) + \
          ggplot2.aes_string(x='MorN', y='zscores', fill = 'factor(MorN)') + \
          ggplot2.geom_boxplot() + \
          ggplot2.opts(title = "Comparison of Z-scores for Mass confirmed features", y="Z-scores") 
    plotgp.plot()
    
    return
Ejemplo n.º 12
0
def makeDistanceBox( alldata, figurename, feature="distance") :
    alldata["distance"] = alldata.het + alldata.hom

    r_dataframe = com.convert_to_r_dataframe(alldata)
    p = ggplot2.ggplot(r_dataframe) + \
                ggplot2.aes_string(x="factor(continent)", y=feature) + \
                ggplot2.geom_boxplot() + \
                ggplot2.ggtitle("Distance from Reference by Continent") + \
                ggplot2.theme(**mytheme) #+ \
                #ggplot2.theme(**{'axis.text.x': ggplot2.element_text(angle = 45)}) + \
                #ggplot2.facet_grid( robjects.Formula('RVIS_type ~ .') )

    grdevices.png(figurename)
    p.plot()
    grdevices.dev_off()
Ejemplo n.º 13
0
def plot_summary(barcodes_obs, barcode_table, directory, expt_id):
    barcodes, counts, matches = get_vectors(barcodes_obs, barcode_table)
    df = DataFrame({'barcode': barcodes, 'count': counts, 'matched': matches})
    p = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='factor(matched)', y='count / 1000000') + \
        ggplot2.geom_boxplot(outlier_size = 0) + \
        ggplot2.geom_jitter() + \
        ggplot2.ggtitle(label = expt_id) + \
        ggplot2.ggplot2.xlab(label = "") + \
        ggplot2.scale_y_continuous(name = "Count\n(million reads)")

    filename = "{0}/{1}.png".format(directory, expt_id)
    grdevices.png(filename=filename, width=4, height=5, unit='in', res=300)
    p.plot()
    grdevices.dev_off()
Ejemplo n.º 14
0
def direct_taxon_abundance_box_plot(data, plot_file_path, title, xlabel, ylabel):
    grdevices.pdf(file=plot_file_path)

    gp = ggplot2.ggplot(data)
    pp = gp \
        + ggplot2.aes_string(x='genotype', y='abundance') \
        + ggplot2.geom_boxplot() \
        + ggplot2.ggtitle(title) \
        + ggplot2.labs(x=xlabel, y=ylabel) \
        + ggplot2.geom_jitter(position=ggplot2.position_jitter(w=0.1)) \
        + ggplot2.geom_point()

    pp.plot()

    grdevices.dev_off()
Ejemplo n.º 15
0
def ridge_cv_plot(val_err, lam_range):
    """
    Source: http://rpy.sourceforge.net/rpy2/doc-2.3/html/graphics.html
    """
    base = importr('base')
    df = pd.DataFrame(val_err, columns = lam_range)
    df = pd.melt(df)
    df_r = com.convert_to_r_dataframe(df)
    # Create boxplot
    gp = ggplot2.ggplot(df_r)
    pp = gp + \
         ggplot2.aes_string(x='factor(variable)', y='value') + \
         ggplot2.geom_boxplot() + \
         ggplot2.ggtitle("Validation Error by Lambda")
    pp.plot()
    return
Ejemplo n.º 16
0
def plot_summary(barcodes_obs, barcode_table, directory, expt_id):
    barcodes, counts, matches = get_vectors(barcodes_obs, barcode_table)
    df = DataFrame({'barcode': barcodes,
                    'count': counts,
                    'matched': matches})
    p = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='factor(matched)', y='count / 1000000') + \
        ggplot2.geom_boxplot(outlier_size = 0) + \
        ggplot2.geom_jitter() + \
        ggplot2.ggtitle(label = expt_id) + \
        ggplot2.ggplot2.xlab(label = "") + \
        ggplot2.scale_y_continuous(name = "Count\n(million reads)")

    filename = "{0}/{1}.png".format(directory, expt_id)
    grdevices.png(filename=filename, width=4, height=5, unit='in', res=300)
    p.plot()
    grdevices.dev_off()
Ejemplo n.º 17
0
    def BoxPlot_All(self):
        #print(self.metabolite_list)
        r('graphics.off()')
        plots = []
        for i in range(1, len(self.metabolite_list) + 1):
            if i % 4 == 0 and i > 0:
                grDevices.X11()
                gridExtra.grid_arrange(*plots, nrow=2, ncol=2)
                plots = []

            gp = ggplot2.ggplot(self.raw_data)
            plots.append(gp + \
                 ggplot2.aes_string(x=self.metadata, y='`'+str(i)+'`') + \
                 ggplot2.geom_boxplot())

        grDevices.X11()
        gridExtra.grid_arrange(*plots, nrow=2, ncol=2)
Ejemplo n.º 18
0
def plot_thresh_distr(motif_names, thresh, out_dir, width = 350):
    """Creates boxplots of the thresholds used with each feature."""

    df = pd.DataFrame({'motif':motif_names, 'thresh':thresh})
    df = df[df['thresh'] > 1]

    df.to_csv(os.path.join(out_dir, 'count_thresh.txt'), sep = '\t', index = False)
    fsize = 10
    r_df = com.convert_to_r_dataframe(df)
    gp = ggplot2.ggplot(r_df) + ggplot2.aes_string(x = 'factor(motif)', y = 'thresh') + \
            ggplot2.geom_boxplot() + ggplot2.scale_y_continuous('Threshold counts', limits = ro.IntVector([0, 70])) + \
            ggplot2.scale_x_discrete('') + ggplot2.theme_bw() + ggplot2.coord_flip() + \
            ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize),
                             'axis.text.y':ggplot2.element_text(size = fsize, hjust = 1),
                             'strip.text.x':ggplot2.element_text(size = fsize + 1)})
    for ext in ['.pdf', '.png']:
        ro.r.ggsave(filename = os.path.join(out_dir, 'count_thresh_bar' + ext),
                    plot = gp, width = width, height = 300, unit = 'mm')
Ejemplo n.º 19
0
def plot_cv_r2(pandas_df,
               outfile,
               fsize=10,
               height=120,
               max_width=50,
               xlab='Parameters'):
    """Makes boxplots of cross-validation results for different parameter settings"""

    ncv = len(set(list(pandas_df['title'])))
    r_df = com.convert_to_r_dataframe(pandas_df)

    gp = ggplot2.ggplot(r_df) + ggplot2.aes_string(x = 'factor(title)', y = 'r2') + \
        ggplot2.geom_boxplot() + ggplot2.scale_y_continuous('R-squared') + \
        ggplot2.scale_x_discrete(xlab) + ggplot2.theme_bw() + \
        ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize, angle = 65, vjust = 1, hjust = 1),
                         'axis.text.y':ggplot2.element_text(size = fsize)})
    w = max(5 * ncv, max_width)
    ro.r.ggsave(filename=outfile, plot=gp, width=w, height=height, unit='mm')
Ejemplo n.º 20
0
def compare_mean_boxplot(locus_table, interval_table, intervals, loci, names, rows):
    frame = get_r_data_by_top(locus_table, interval_table, intervals, names,
            rows)
    if len(intervals) > 1:
        sort_string = '''data$interval <- factor(data$interval, {})'''.format(order_intervals(frame[1]))
        robjects.r(sort_string)
    gg_frame = ggplot2.ggplot(robjects.r('''data'''))
    plot = gg_frame + ggplot2.aes_string(x = 'interval', y = 'pi') + \
                ggplot2.geom_boxplot(ggplot2.aes_string(fill = 'factor(db)'), **{
                    'outlier.size':3,
                    'outlier.colour':'#767676',
                    'outlier.alpha':0.3,
                    'alpha':0.6
                    }
                ) + \
                ggplot2.scale_y_continuous('mean phylogenetic informativeness') + \
                ggplot2.scale_x_discrete('interval (years ago)') + \
                ggplot2.scale_fill_brewer("database", palette='Blues')
    return plot
Ejemplo n.º 21
0
def plot_thresh_distr(motif_names, thresh, out_dir, width=350):
    """Creates boxplots of the thresholds used with each feature."""

    df = pd.DataFrame({'motif': motif_names, 'thresh': thresh})
    df = df[df['thresh'] > 1]

    df.to_csv(os.path.join(out_dir, 'count_thresh.txt'), sep='\t', index=False)
    fsize = 10
    r_df = com.convert_to_r_dataframe(df)
    gp = ggplot2.ggplot(r_df) + ggplot2.aes_string(x = 'factor(motif)', y = 'thresh') + \
            ggplot2.geom_boxplot() + ggplot2.scale_y_continuous('Threshold counts', limits = ro.IntVector([0, 70])) + \
            ggplot2.scale_x_discrete('') + ggplot2.theme_bw() + ggplot2.coord_flip() + \
            ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize),
                             'axis.text.y':ggplot2.element_text(size = fsize, hjust = 1),
                             'strip.text.x':ggplot2.element_text(size = fsize + 1)})
    for ext in ['.pdf', '.png']:
        ro.r.ggsave(filename=os.path.join(out_dir, 'count_thresh_bar' + ext),
                    plot=gp,
                    width=width,
                    height=300,
                    unit='mm')
Ejemplo n.º 22
0
def plot_coef(feat_mat_dir, model_dir, expt_names, pref, outfile = None, height = 120, fsize = 12):
    
    for expt_idx, ex in enumerate(expt_names):
        feat_mat_file = os.path.join(feat_mat_dir, ex + '_feat_mat.npz')
        model_file = os.path.join(model_dir, pref + ex + '_model.pkl')
        model = read_model(model_file)
        (tmp_feat, tmp_y, tmp_feat_names, tmp_gene_names) = read_feat_mat(feat_mat_file)
        
        if expt_idx == 0:
            feat_names = tmp_feat_names
            clf_coef = model.clf_coef()
            reg_coef = model.reg_coef()
        else:
            assert(all(f[0] == f[1] for f in zip(feat_names, tmp_feat_names)))
            clf_coef = np.concatenate((clf_coef, model.clf_coef()), axis = 1)
            reg_coef = np.concatenate((reg_coef, model.reg_coef()), axis = 1)
    
    nexpt = expt_idx + 1
    
    # Now clf_coef has one row per coefficient and one column per experiment.
    # The reshape below will read the data row-first.
    df = pd.DataFrame({'feature':np.repeat(feat_names, nexpt),
                       'Classification':np.reshape(clf_coef, (clf_coef.size,)),
                       'Regression':np.reshape(reg_coef, (reg_coef.size,))})

    df2 = pd.melt(df, id_vars = 'feature', var_name = 'fun')
    r_df = com.convert_to_r_dataframe(df2)
    gp = ggplot2.ggplot(r_df) + ggplot2.aes_string(x = 'factor(feature)', y = 'value') + \
        ggplot2.facet_wrap('fun', scales = 'free_y') + \
        ggplot2.geom_boxplot() + ggplot2.scale_y_continuous('Importance') + \
        ggplot2.scale_x_discrete('') + ggplot2.theme_bw() + \
        ggplot2.theme(**{'axis.text.x':ggplot2.element_text(size = fsize, angle = 65, vjust = 1, hjust = 1),
                         'axis.text.y':ggplot2.element_text(size = fsize),
                         'strip.text.x':ggplot2.element_text(size = fsize + 1)})
    w = max(22 * nexpt, 80)
    if outfile is None:
        gp.plot()
    else:
        ro.r.ggsave(filename = outfile, plot = gp, width = w, height = height, unit = 'mm')
    return df
def makePlot(grdevices, plotName, samp_set1_vals, samp_set2_vals,
image_file_type):
    samp_vector = ["set1" for i in range(len(samp_set1_vals))]
    samp_vector.extend(["set2" for i in range(len(samp_set2_vals))])

    dframe = robjects.DataFrame({"sample":robjects.StrVector(samp_vector),
                                 "value":robjects.FloatVector(samp_set1_vals + samp_set2_vals)})

    gp = ggplot2.ggplot(dframe)

    pp = gp + \
     ggplot2.aes_string(x="sample", y='value') + \
     ggplot2.geom_boxplot() +\
     ggplot2.geom_jitter() +\
     ggplot2.theme_bw()

    if image_file_type == "pdf":
        grdevices.pdf(file=plotName)
    else:
        grdevices.png(file=plotName, width=512, height=512)
    pp.plot()
    grdevices.dev_off()
Ejemplo n.º 24
0
pp.plot(vp=vp)
#-- ggplot2geomhexbin-end

grdevices.dev_off()

grdevices.png('../../_static/graphics_ggplot2geomboxplot.png',
              width=612,
              height=612,
              antialias="subpixel",
              type="cairo")
#-- ggplot2geomboxplot-begin
gp = ggplot2.ggplot(mtcars)

pp = gp + \
     ggplot2.aes_string(x='factor(cyl)', y='mpg') + \
     ggplot2.geom_boxplot()

pp.plot()
#-- ggplot2geomboxplot-end
grdevices.dev_off()

#-- ggplot2geomhistogram-begin
gp = ggplot2.ggplot(mtcars)

pp = gp + \
     ggplot2.aes_string(x='wt') + \
     ggplot2.geom_histogram()

#pp.plot()
#-- ggplot2geomhistogram-end
Ejemplo n.º 25
0
pp.plot(vp = vp)
#-- ggplot2geomhexbin-end

grdevices.dev_off()




grdevices.png('../../_static/graphics_ggplot2geomboxplot.png',
              width = 612, height = 612, antialias="subpixel", type="cairo")
#-- ggplot2geomboxplot-begin
gp = ggplot2.ggplot(mtcars)

pp = gp + \
     ggplot2.aes_string(x='factor(cyl)', y='mpg') + \
     ggplot2.geom_boxplot()

pp.plot()
#-- ggplot2geomboxplot-end
grdevices.dev_off()


#-- ggplot2geomhistogram-begin
gp = ggplot2.ggplot(mtcars)

pp = gp + \
     ggplot2.aes_string(x='wt') + \
     ggplot2.geom_histogram()

#pp.plot()
#-- ggplot2geomhistogram-end
Ejemplo n.º 26
0
	def plot(self, fn, x='x', y='y', col=None, group=None, w=1100, h=800, size=2, smooth=True, point=True, jitter=False, boxplot=False, boxplot2=False, title=False, flip=False, se=False, density=False, line=False):
		df=self.df
		#import math, datetime
		

		grdevices = importr('grDevices')

		if not title:
			title=fn.split("/")[-1]

		grdevices.png(file=fn, width=w, height=h)
		gp = ggplot2.ggplot(df)
		pp = gp	
		if col and group:
			pp+=ggplot2.aes_string(x=x, y=y,col=col,group=group)
		elif col:
			pp+=ggplot2.aes_string(x=x, y=y,col=col)
		elif group:
			pp+=ggplot2.aes_string(x=x, y=y,group=group)
		else:
			pp+=ggplot2.aes_string(x=x, y=y)	

		if boxplot:
			if col:
				pp+=ggplot2.geom_boxplot(ggplot2.aes_string(fill=col),color='blue')
			else:
				pp+=ggplot2.geom_boxplot(color='blue')	

		if point:
			if jitter:
				if col:
					pp+=ggplot2.geom_point(ggplot2.aes_string(fill=col,col=col),size=size,position='jitter')
				else:
					pp+=ggplot2.geom_point(size=size,position='jitter')
			else:
				if col:
					pp+=ggplot2.geom_point(ggplot2.aes_string(fill=col,col=col),size=size)
				else:
					pp+=ggplot2.geom_point(size=size)


		if boxplot2:
			if col:
				pp+=ggplot2.geom_boxplot(ggplot2.aes_string(fill=col),color='blue',outlier_colour="NA")
			else:
				pp+=ggplot2.geom_boxplot(color='blue')

		if smooth:
			if smooth=='lm':
				if col:
					pp+=ggplot2.stat_smooth(ggplot2.aes_string(col=col),size=1,method='lm',se=se)
				else:
					pp+=ggplot2.stat_smooth(col='blue',size=1,method='lm',se=se)
			else:
				if col:
					pp+=ggplot2.stat_smooth(ggplot2.aes_string(col=col),size=1,se=se)
				else:
					pp+=ggplot2.stat_smooth(col='blue',size=1,se=se)

		if density:
			pp+=ggplot2.geom_density(ggplot2.aes_string(x=x,y='..count..'))

		if line:
			pp+=ggplot2.geom_line(position='jitter')


		pp+=ggplot2.opts(**{'title' : title, 'axis.text.x': ggplot2.theme_text(size=24), 'axis.text.y': ggplot2.theme_text(size=24,hjust=1)} )
		#pp+=ggplot2.scale_colour_brewer(palette="Set1")
		pp+=ggplot2.scale_colour_hue()
		if flip:
			pp+=ggplot2.coord_flip()



		pp.plot()
		grdevices.dev_off()
		print ">> saved: "+fn
Ejemplo n.º 27
0
  # print str(a)
   try:
      if dsumFC.has_key(drug):
         dsumFC[drug]['Fold_Change'].append(math.log10(float(val)))
         dsumY[drug]['Year'].append(yr)
      else:
         dsumFC[drug]= {'Fold_Change': [math.log10(float(val)),]}
         dsumY[drug]= {'Year': [yr,]}
   except:
      print "FAILURE: dsumFC="+str(dsumFC)+"\n\ndsumY="+str(dsumY)
      sys.exit()
drugs = dsumFC.keys()

for x in drugs:
   od = rlc.OrdDict([('Fold_Change',robjects.FloatVector(dsumFC[x]['Fold_Change'])),('Year',robjects.FactorVector(dsumY[x]['Year'])),('Drug',robjects.FactorVector(x))])
grdevices.pdf(file="drugs.pdf",width=7,height=7)
   
   dataf = robjects.DataFrame(od)
   gp3 = ggplot2.ggplot(dataf)
   pp3 = gp3 + ggplot2.scale_fill_brewer(palette='BrBG',name="Year")+ ggplot2.aes_string(x='Year',y='Fold_Change',fill='factor(Year)') +  ggplot2.geom_boxplot() + ggplot2.opts(title =  x+" Yearly Trend")
  # pp3 = gp3 + ggplot2.scale_colour_hue(h=base.c(180,270),name="Year")+ ggplot2.aes_string(x='Year',y='Fold_Change',fill='factor(Year)') +  ggplot2.geom_boxplot() + ggplot2.opts(title =  x+" Yearly Trend")
   #+ ggplot2.scale_y_log10()
   pp3.plot()
   grdevices.dev_off()
   
f.close()
print "\nfinished\n"



Ejemplo n.º 28
0
def rest():
    df = q1_median_q3_rep_wide
    pops = ["pdc", "dc-cd11b", "dc-cd8a"]

    stats_l = []
    for stat, (popa, popb) in product(["Q1", "median", "Q3"],
                                      product(pops, pops)):
        print(stat, popa, popb)

        popa = "hsc"
        popb = "pdc"
        stat = "median"

        mw_u, pvalue = scipy.stats.mannwhitneyu(
            [0.8, 0.81, 0.79],
            [0.4, 0.39, 0.41],
            # df.query("Population == @popa")[stat].to_numpy(),
            # df.query("Population == @popb")[stat].to_numpy(),
            use_continuity=True,
            alternative="two-sided",
        )
        pvalue

        stats_l.append([stat, popa, popb, mw_u, pvalue])
    stats_df = pd.DataFrame(stats_l).set_axis(
        ["stat", "popA", "popB", "U", "pvalue"], axis=1)

    kruskal_format_means = pd.pivot(
        q1_median_q3_rep_wide.query("Population in @pops"),
        index="Population",
        columns="Replicate",
        values="mean",
    )

    import scikit_posthocs

    stat, p_value = scipy.stats.kruskal(
        *[kruskal_format_means.loc[pop].to_numpy() for pop in pops], )

    dunn_res_df = scikit_posthocs.posthoc_dunn(
        kruskal_format_means.to_numpy(),
        p_adjust='fdr_bh',
        sort=True,
    )

    stat, pvalue = scipy.stats.f_oneway(
        *[kruskal_format_means.loc[pop].to_numpy() for pop in pops], )

    import statsmodels

    df = kruskal_format_means.stack().reset_index()

    kruskal_format_means

    res = statsmodels.stats.multicomp.pairwise_tukeyhsd(
        df[0], df['Population'].to_numpy(), alpha=0.05)

    res.pvalues
    res.summary()

    # wilcox.test(c(0.8, 0.79, 0.81), c(0.4, 0.39, 0.41), paired=F, exact=F)

    plot_pops = ["pdc", "dc-cd8a", "dc-cd11b"]

    results_dir = "/icgc/dkfzlsdf/analysis/hs_ontogeny/notebook-data/gNs4xcMJscaLLwlt"
    point_plot_quartiles_png = results_dir + "/point-plot-quartiles.png"

    q1_median_q3_rep_wide

    ggplot_data = (
        q1_median_q3_rep_long.query("Population in @plot_pops").sort_values(
            "value",
            ascending=False,
        ).groupby(["Population", "stat"]).apply(
            lambda df: df.assign(group_order=np.arange(1, df.shape[0] + 1))))

    g = (gg.ggplot(ggplot_data) + gg.aes_string(
        x="Population", y="value", group="group_order", color="stat") +
         gg.geom_point(position=gg.position_dodge(width=0.5), size=1) +
         mh_rpy2_styling.gg_paper_theme + gg.labs(y='Methylation (%)', x=''))
    a = 3

    rpy2_utils.image_png2(g, (ut.cm(6), ut.cm(6)))

    ut.save_and_display(
        g,
        png_path=point_plot_quartiles_png,
        # additional_formats=tuple(),
        height=ut.cm(6),
        width=ut.cm(6),
    )

    q1_median_q3_rep_wide

    g = (
        gg.ggplot(
            q1_median_q3_rep_wide.query("Population in @plot_pops").assign(
                sample=lambda df: df["Population"].astype(str) + df[
                    "Replicate"].astype(str))) + gg.geom_boxplot(
                        gg.aes_string(
                            x="Population",
                            fill="Population",
                            group="sample",
                            lower="Q1",
                            upper="Q3",
                            middle="median",
                            ymin="min1",
                            ymax="max99",
                            # position=gg.position_dodge(width=0.5),
                        ),
                        stat="identity",
                    )
        # + mh_rpy2_styling.gg_paper_theme
        + gg.theme(axis_text_x=gg.element_text(angle=90, hjust=1)) +
        gg.scale_fill_brewer(guide=False))
    a = 3
    ut.save_and_display(
        g,
        png_path=point_plot_quartiles_png,
        additional_formats=tuple(),
        height=ut.cm(6),
        width=ut.cm(7),
    )
    # image_png2(g, (ut.cm(12), ut.cm(12)))

    beta_values.loc[:, ("hsc", "1")]
Ejemplo n.º 29
0
    def plot(self,
             fn,
             x='x',
             y='y',
             col=None,
             group=None,
             w=1100,
             h=800,
             size=2,
             smooth=True,
             point=True,
             jitter=False,
             boxplot=False,
             boxplot2=False,
             title=False,
             flip=False,
             se=False,
             density=False,
             line=False):
        df = self.df
        #import math, datetime

        grdevices = importr('grDevices')

        if not title:
            title = fn.split("/")[-1]

        grdevices.png(file=fn, width=w, height=h)
        gp = ggplot2.ggplot(df)
        pp = gp
        if col and group:
            pp += ggplot2.aes_string(x=x, y=y, col=col, group=group)
        elif col:
            pp += ggplot2.aes_string(x=x, y=y, col=col)
        elif group:
            pp += ggplot2.aes_string(x=x, y=y, group=group)
        else:
            pp += ggplot2.aes_string(x=x, y=y)

        if boxplot:
            if col:
                pp += ggplot2.geom_boxplot(ggplot2.aes_string(fill=col),
                                           color='blue')
            else:
                pp += ggplot2.geom_boxplot(color='blue')

        if point:
            if jitter:
                if col:
                    pp += ggplot2.geom_point(ggplot2.aes_string(fill=col,
                                                                col=col),
                                             size=size,
                                             position='jitter')
                else:
                    pp += ggplot2.geom_point(size=size, position='jitter')
            else:
                if col:
                    pp += ggplot2.geom_point(ggplot2.aes_string(fill=col,
                                                                col=col),
                                             size=size)
                else:
                    pp += ggplot2.geom_point(size=size)

        if boxplot2:
            if col:
                pp += ggplot2.geom_boxplot(ggplot2.aes_string(fill=col),
                                           color='blue',
                                           outlier_colour="NA")
            else:
                pp += ggplot2.geom_boxplot(color='blue')

        if smooth:
            if smooth == 'lm':
                if col:
                    pp += ggplot2.stat_smooth(ggplot2.aes_string(col=col),
                                              size=1,
                                              method='lm',
                                              se=se)
                else:
                    pp += ggplot2.stat_smooth(col='blue',
                                              size=1,
                                              method='lm',
                                              se=se)
            else:
                if col:
                    pp += ggplot2.stat_smooth(ggplot2.aes_string(col=col),
                                              size=1,
                                              se=se)
                else:
                    pp += ggplot2.stat_smooth(col='blue', size=1, se=se)

        if density:
            pp += ggplot2.geom_density(ggplot2.aes_string(x=x, y='..count..'))

        if line:
            pp += ggplot2.geom_line(position='jitter')

        pp += ggplot2.opts(
            **{
                'title': title,
                'axis.text.x': ggplot2.theme_text(size=24),
                'axis.text.y': ggplot2.theme_text(size=24, hjust=1)
            })
        #pp+=ggplot2.scale_colour_brewer(palette="Set1")
        pp += ggplot2.scale_colour_hue()
        if flip:
            pp += ggplot2.coord_flip()

        pp.plot()
        grdevices.dev_off()
        print ">> saved: " + fn