Example #1
0
 def draw_plots(self,savedir, con,modelname):
     """
     plot the predicted vs. the actual score with different colors for each group
     """
     import matplotlib.pyplot as plt
     import rpy2.robjects as rob
     from rpy2.robjects import FloatVector as FV
     from rpy2.robjects.packages import importr
     import numpy as np
     import os
     stats = importr('stats')
     base = importr('base')
     self.get_rmse()
     predscores, actualscores, meanerr, rmsqerr = self.prediction, self.actual, np.mean(abs(self.pred_errors)), self.rmse
     plt.figure()
     plt.scatter(actualscores,predscores,s=70)
     x = np.array(range(100))
     plt.plot(x,x,'g',label='optimal model')
     # make regression
     rob.globalenv["pred"] = FV(predscores)
     rob.globalenv["act"] = FV(actualscores)
     mult_lm = stats.lm("pred ~ act + 1")
     coeffs = np.array(mult_lm.rx("coefficients")[0])
     rsqrd = base.summary(mult_lm).rx("r.squared")[0][0]
     y = coeffs[1]*x+coeffs[0]
     plt.plot(x,y,'k',label='our model',linewidth=2)
     plt.xlabel("actual lsas delta")
     plt.ylabel("predicted lsas delta")
     plt.title("Predicted vs. Actual LSAS_delta score")
     plt.axis([0,100,0,100])
     axes = plt.axes()
     axes.text(0.05,0.8,"meanerr: %.2f\nrmse: %.2f\nr: %.2f (Rsqrd: %.2f)"%(meanerr,rmsqerr,np.sqrt(rsqrd),rsqrd),transform=axes.transAxes)
     plt.legend()
     plt.savefig(os.path.join(savedir,"%s_%s_lsas_delta_act_pred.pdf"%(modelname,con)),dpi=300,format="pdf")
Example #2
0
def setup():
    global corpcor
    try:
        corpcor = importr('corpcor')
    except rpy2.rinterface.RRuntimeError:
        r("install.packages('corpcor', repos='http://cran.us.r-project.org')")
        corpcor = importr('corpcor')
Example #3
0
	def pretty_hm(self,r_mat=None,file=None):
		# use R pheatmap
		# create heatmap file
				
		if r_mat is None and self.mat is None:
			raise Exception("A matrix needs to be provided at pretty_hm()")
		if file is None and self.file is None:
			raise Exception("A filename needs to be provided at pretty_hm()")
		
		phm = importr("pheatmap")
		rcb = importr("RColorBrewer")
		
		w = 10.5
		h = self.find_h(r_mat)
		cw = 10 #cell width
		ch = 10 #cell height
		fs = 1
		
		# rcolorbrewer, include white for null (0) values
		ncolor = 5
		spectrum = "YlOrRd"
		palette = "brewer.pal("+str(ncolor)+",'"+spectrum+"')"		

		rob.r.pheatmap(r_mat, filename=file, cellwidth=cw, cellheight=ch, color=rob.r.c("#FFFFFF",rob.r(palette)), width=w, height=h, font_size=fs, cluster_cols=False)
				
		return file
Example #4
0
File: limma.py Project: mfiers/rat
def run_simple(A, B):

    from rpy2.robjects import pandas2ri
    from rpy2.robjects.packages import importr
    import rpy2.robjects as ro
    r = ro.r

    pandas2ri.activate()
    
    limma = importr('limma')
    edgeR = importr('edgeR')

    counts = pd.concat([A, B], 1)
    groups = r.factor(r.c(*([0] * A.shape[1] + [1] * B.shape[1])))
    ro.globalenv['exp'] = groups
                 
    design = r('model.matrix(~exp)')
    dge = r.DGEList(counts=counts)
    dge = r.calcNormFactors(dge)
    v = r.voom(dge, design, plot=False)
    fit = r.lmFit(v, design)
    fit = r.eBayes(fit)
    tt = r.topTable(fit, coef=r.ncol(design), number=1e12)
    ttidx = r['row.names'](tt)
    tt =  pandas2ri.ri2py(tt)
    cols = tt.columns.to_series()
    cols[0] = 'lfc'
    cols[3] = 'pval'
    cols[4] = 'padj'
    tt.columns = cols
    tt['slp'] = np.log10(tt['pval'])
    tt.loc[tt['lfc'] > 0, 'slp'] = -np.log10(tt.loc[tt['lfc'] > 0, 'pval'])
    tt.index = ttidx
    return tt
Example #5
0
def getPredictedKLDisByArima(playlist,songDict):
  importr("forecast")
  #get playlist's training list
  trainingList = playlist.getTrainingList()
  count = len(trainingList)
  #define base diatribution and dis list
  baseDict = {}
  disList = []
  #loop every song in training list
  #add distribution of sids to tsDict to construct some time series
  for i in range(0,count):
    sid = trainingList[i]
    sTopicDict = songDict[sid].getTopicDict()
    if len(baseDict) == 0:
      length = len(sTopicDict)
      for t in range(0,length)
        baseDict[t] = 1.0 / length
    disList.append(KLSim(sTopicDict,baseDict))

  #using auto arima to forecast the kl distance
  vec = robjects.FloatVector(disList)
  ts = robjects.r['ts'](vec)
  fit = robjects.r['auto.arima'](ts)
  next = robjects.r['forecast'](fit,h=1)
  return float(next.rx('mean')[0][0])
Example #6
0
 def predict_proba(self, X):
     importr('glmnet')
     pred = R['predict'](self.R_model_, X, type="response")
     pred = np.squeeze(np.asarray(pred))
     if self.binary_classification:
         pred = np.vstack((1-pred, pred)).T
     return pred
Example #7
0
def av(data, formula, model='', output='', as_strings='', title='Title for Your Output', label='Label for Your Output', pythontex=True):
    if not output:
	output = 'xtable'
    if not model:
	model = 'aov'
    
    if output == 'stargazer':
	stargazer = importr('stargazer')
    elif output == 'texreg':
	texreg = importr('texreg')

    formula = robjects.Formula(formula)
    dfr = com.convert_to_r_dataframe(data)  # convert from pandas to R and make string columns factors
	
    if model == 'aov':
	output = 'xtable' #aov only works with xtable
	av_model = stats.aov(formula, data=dfr)
	av_model_sum = base.summary(av_model)
    
    if output == 'xtable':
	xtable = importr('xtable')
	latex = xtable.xtable(av_model_sum, caption=title, label=label)
    if pythontex:
	return latex
    else:
	return '\n'.join(np.array(latex))
Example #8
0
def deseq2_gene_expression_normalization(df_data):
    rpy2.robjects.pandas2ri.activate()
    df_data = df_data.dropna()

    r_data_set = robjects.conversion.py2ri(df_data)

    base = importr("base")
    deseq2 = importr("DESeq2")
    bio_generics = importr("BiocGenerics")
    gr = importr('GenomicRanges')
    vec = importr('S4Vectors')


    conds = vec.DataFrame(condition=base.factor(base.c(base.colnames(r_data_set))))
    print df_data.head()
    design = robjects.r('formula(~ condition)')

    dds = deseq2.DESeqDataSetFromMatrix(r_data_set, colData=conds, design=design)
    print("dds loaded")
    logs = deseq2.rlog(dds, fast=True)
    logs_count = gr.assay(logs)
    print("logs_count loaded")
    rpy2.robjects.pandas2ri.deactivate()
    res = Result()
    res.frame = pd.DataFrame(numpy.matrix(logs_count), columns=logs_count.colnames, index=logs_count.rownames)
    res.package = "DESeq2"
    res.version = deseq2.__version__

    return res
Example #9
0
def drawRbarplot(united, Motifs, Nodes, output, top=25):
    if not enriched:
        return
    grdevices = importr('grDevices')
    graphics = importr('graphics')

    Counts = [len(seqnames) for seqnames in Motifs.itervalues()]
    Counts_names = [united.uid2Motif[int(uid)] for uid in Motifs.iterkeys()]
    Counts = robj.IntVector(Counts)
    Counts.names = robj.StrVector(Counts_names)

    Ps = [node[-1] for node in Result['Nodes'] if node[-1] < 0.05]
    Ps_names = [node[0] for node in Result['Nodes'] if node[-1] < 0.05]
    Ps = robj.FloatVector(Ps)
    Ps.names = robj.StrVector(Ps_names)
    Ps = robj.r.sort(Ps, decreasing=True)

    enriched_counts = Counts.rx(Ps.names)
    grdevices.png(file="%s/%s_bar.png" % (output, output),
        width=512, height=512)
    margin = robj.IntVector([3, 9, 4, 2])
    graphics.par(mar=margin)
    bar = graphics.barplot(enriched_counts,
        main="Enriched Motifs Counts", horiz=True, las=1, col='lightblue')
    Ps_lab = robj.r('format(signif(%s ,digits=2), scientific=T)' % Ps.r_repr())
    graphics.text(x=enriched_counts, y=bar,
        label=Ps_lab, po=2)
    #graphics.text(bar,labels=top_counts,pos=4,offset=10)
    grdevices.dev_off()
Example #10
0
def plot_degree_distrib(adj_mat, i, filepath='.', prefix='ddist_'):
    """
    Compute degree distribution and plot using lattice
    """
    
    lattice = importr('lattice')
    grdevices = importr('grDevices')
    xyplot = lattice.xyplot
    rprint = robjects.globalenv.get("print")
    
    try:
        deg = robjects.r.rowSums(adj_mat)
        degdens = robjects.r.density(deg, **{'na.rm':True})
    except:
        myfname = ''
    
    myf = robjects.Formula('y ~ x')
    myf.getenvironment()['x'] = degdens.rx2('x')
    myf.getenvironment()['y'] = degdens.rx2('y')
    p = xyplot(myf, type='l', lwd=3, 
               xlab='Node Degree', ylab='Density',
               main='Node degree distribution')
    
    myfname = prefix + str(i) + '.png'
    grdevices.png(file=os.path.join(filepath, myfname), width=512, height=512, type="cairo")
    rprint(p)
    grdevices.dev_off()
    
    return myfname
Example #11
0
def cummeRbund(input_file, output_file, options):

    # import the grapher and cummerBund
    grdevices = importr('grDevices')
    r_bund = importr("cummeRbund")
    r_plot = robjects.r('plot')


    # read in the diff results
    cuff = r_bund.readCufflinks(options.output)
    grdevices.pdf(file=input_file, width=10, height=10)

    r_plot(r_bund.dispersionPlot(r_bund.genes(cuff)))
    r_plot(r_bund.csBoxplot(r_bund.genes(cuff),replicates=True))
    r_plot(r_bund.csDendro(r_bund.genes(cuff),replicates=True))
    r_plot(r_bund.csBoxplot(r_bund.genes(cuff)))
    r_plot(r_bund.csDistHeat(r_bund.genes(cuff)))
    r_plot(r_bund.csDistHeat(r_bund.genes(cuff), replicates=True))
    r_plot(r_bund.PCAplot(r_bund.genes(cuff),"PC1","PC2"))
    r_plot(r_bund.PCAplot(r_bund.genes(cuff),"PC1","PC2",replicates=True))
    r_plot(r_bund.PCAplot(r_bund.genes(cuff),"PC2","PC2"))
    r_plot(r_bund.PCAplot(r_bund.genes(cuff),"PC3","PC2",replicates=True))

    # close the dev
    grdevices.dev_off()
Example #12
0
def insallRpackages(options):
    """Install R packages that cannot be installed using pip install ..."""
    #from rpy2.robjects.packages import importr
    src = join(sys.prefix, "lib", "R-3.2.3", "lib64", "R", "lib")
    spe = "$"
    cmd = 'export LD_LIBRARY_PATH=%sLD_LIBRARY_PATH:%s' %(spe,src)
    sh(cmd)
   # import rpy2.robjects as robjects
    import rpy2.robjects.packages as rpackages
    #from rpy2.robjects.vectors import StrVector
    packageNames = ('ggplot2')

    if all(rpackages.isinstalled(x) for x in packageNames):

        have_packages = True

    else:

        have_packages = False

    if not have_packages:

         #utils = rpackages.importr('utils')
         #utils.chooseCRANmirror(ind=1, useHTTPS=False)
         packnames_to_install = [x for x in packageNames if not rpackages.isinstalled(x)]
        # if len(packnames_to_install) > 0:
         #    utils.install_packages(StrVector(packnames_to_install))
         if len(packnames_to_install) > 0:
             # install biocondcutor package
            base = rpackages.importr('base')
            base.source("http://www.bioconductor.org/biocLite.R")
            biocinstaller = rpackages.importr("BiocInstaller")
            biocinstaller.biocLite("ggplot2", suppressUpdates=True)
Example #13
0
def drawRCoDistibution(motif_pair,outpath,pair_name):
    motifA_pos=[]
    motifB_pos=[]
    filename="&".join(pair_name).replace('/','_')
    (motifa,motifb)=pair_name
    for REF_SeqName in motif_pair.keys():
        #print motif_pair[REF_SeqName].keys()
        for motifa_pos,motifb_pos in motif_pair[REF_SeqName].keys():
            motifA_pos.append(motifa_pos)
            motifB_pos.append(motifb_pos)
            #print motifa_pos,motifb_pos
    grdevices = importr('grDevices')
    graphics = importr('graphics')
    geneplotter = importr('geneplotter')
    motifA_pos=-robj.IntVector(motifA_pos).ro
    motifB_pos=-robj.IntVector(motifB_pos).ro
    Pos={motifa:motifA_pos,motifb:motifB_pos}
    Pos=robj.ListVector(Pos)
    grdevices.png(file="%s/%s_distribution.png" % (outpath,filename), width=512, height=512)
    geneplotter.multidensity(Pos.rx(),lwd=3,xlab="Distribution",main="Distribution of \n%s" % filename)
    graphics.rug(motifA_pos,col=4)
    graphics.rug(motifB_pos,col=2)
    grdevices.dev_off()
    #Scatter plot
    grdevices.png(file="%s/%s_Scatter.png" % (outpath,filename), width=512, height=512)
    limit=robj.IntVector([-3000,0])
    graphics.plot(motifA_pos,motifB_pos,main="Position Scatter Plot of\n%s&%s" % (motifa,motifb), \
                    xlab="Positions of %s" % motifa, \
                    ylab="Positions of %s" % motifb, \
                    xlim=limit,ylim=limit)
    graphics.abline(1,1)
    grdevices.dev_off()
Example #14
0
def process_ccs(cc_in):
    cc = cc_in[:300, :300]
    for i, c in enumerate(cc):
        c[i] = 0
    import rpy2.robjects as robjects
    from rpy2.rlike.container import TaggedList
    from rpy2.robjects.packages import importr

    r = robjects.r

    base = importr("base")
    # create a numerical matrix of size 100x10 filled with NAs
    nc = nr = shape(cc)[0]

    from rpy2.robjects.numpy2ri import numpy2ri

    m = numpy2ri(cc)  # robjects.r['matrix'](v, nrow = nr, ncol = nc)

    biclust = importr("biclust")
    mb = biclust.binarize(m, 0.90)

    # hcv = r.hclust(r.dist(mb))
    # hcv = r.hclust(r.dist(mb))
    # hm = r.heatmap(mb)

    # raise Exception()

    out = biclust.biclust(m, method=biclust.BCPlaid())
    n_bc = out.do_slot("Number")
    rows = array(out.do_slot("RowxNumber"))
    cols = array(out.do_slot("NumberxCol")).T

    return rows, cols, array(m), array(mb)
def plot_tree(outfile, tree1, tree2=None, root=False):
    """Generate tree(s) plot"""
    ape = importr('ape')
    graphics  = importr('graphics')
    grdevices = importr('grDevices')
    if tree2 is None:
        grdevices.png(file=outfile, width=1024, height=768)
        if root:
            ape.plot_phylo(ape.root(tree1,root),edge_width=2,cex=1,underscore=1)
        else:
            ape.plot_phylo(tree1,edge_width=2,cex=1,underscore=1)
        graphics.title(main='Neighbor Joining')
        grdevices.dev_off()
    elif tree2 is not None:
        grdevices.png(file=outfile, width=1024, height=768)
        graphics.par(mfcol=array.array('i',[1,2]))
        if root:
            ape.plot_phylo(ape.root(tree1,root),edge_width=2,cex=1,underscore=1)
        else:
            ape.plot_phylo(tree1,edge_width=2,cex=1,underscore=1)
        graphics.title(main='Neighbor Joining', cex=1.5, font=2)
        if root:
            ape.plot_phylo(ape.root(tree2,root),edge_width=2,cex=1,underscore=1)
        else:
            ape.plot_phylo(tree2,edge_width=2,cex=1,underscore=1)
        graphics.title(main='Maximum Parsimony',cex=1.5, font=2)
        grdevices.dev_off()
    return
Example #16
0
def plot_test():
   from rpy2.robjects.packages import importr
   graphics = importr('graphics')
   grdevices = importr('grDevices')
   base = importr('base')
   stats = importr('stats')
   
   import array
   
   x = array.array('i', range(10))
   y = stats.rnorm(10)
   
   grdevices.X11()
   
   graphics.par(mfrow = array.array('i', [2,2]))
   graphics.plot(x, y, ylab = "foo/bar", col = "red")
   
   kwargs = {'ylab':"foo/bar", 'type':"b", 'col':"blue", 'log':"x"}
   graphics.plot(x, y, **kwargs)
   
   
   m = base.matrix(stats.rnorm(100), ncol=5)
   pca = stats.princomp(m)
   graphics.plot(pca, main="Eigen values")
   stats.biplot(pca, main="biplot")
Example #17
0
	def pca(self,fn='pca.png',col=None,w=1200,h=1200):
		stats    = importr('stats')
		graphics = importr('graphics')

		# 
		# if col:
		# 	df,factors=dataframe(df,factorcol=col)
		df=self.df
		pca = stats.princomp(df)

		grdevices = importr('grDevices')
		ofn=".".join(fn.split(".")[:-1]+["eigens"]+[fn.split(".")[-1]])
		strfacts=str(df.nrow)+" items using "+str(df.ncol)+" features ["+ofn.split("/")[-1]+"]"
		grdevices.png(file=ofn, width=w, height=h)
		graphics.plot(pca, main = "Eigenvalues for "+strfacts)
		# if col:
		# 	graphics.hilight(pca,factors)
		grdevices.dev_off()
		print ">> saved: "+ofn	

		grdevices = importr('grDevices')
		ofn=".".join(fn.split(".")[:-1]+["biplot"]+[fn.split(".")[-1]])
		strfacts=str(df.nrow)+" items using "+str(df.ncol)+" features ["+ofn.split("/")[-1]+"]"
		grdevices.png(file=ofn, width=w, height=h)
		stats.biplot(pca, scale=1,main = "biplot of "+strfacts)
		grdevices.dev_off()
		print ">> saved: "+ofn
Example #18
0
def goid_annot(goid):
    """
    Get GO annotation for a GO id. If the annotation is empty, you probably need
    to update your GO.db package.

    Depends on the Bioconductor GO.db package.

    >>> get_go_terms('GO:0051649')
    result

    """
    importr('GO.db')
    term = str(robjects.r['Term'](goid)[0])
    ont = str(robjects.r['Ontology'](goid)[0])

    syn = robjects.r['Synonym'](goid)[0]
    if robjects.r['is.null'](syn):
        syn = None
    else:
        syn = [str(i) for i in syn]

    sec = robjects.r['Secondary'](goid)[0]
    if robjects.r['is.null'](sec):
        sec = None
    else:
        sec = [str(i) for i in sec]

    defn = str(robjects.r['Definition'](goid)[0])
    return GoAnnot(goid, term, ont, syn, sec, defn)
def agglom(E, k=100, linkage="complete", simdist_function="pearson_correlation"):
    importr("cluster")
    ro.globalenv["distances"] =  simdist(E, simdist_function, similarity=False)
    ro.r("hclust_results = hclust(as.dist(distances), method='{linkage}')".format(**locals()))
    rresults = ro.r("labels = cutree(hclust_results, k={k})".format(**locals()))
    modules = convert_labels2modules(list(rresults), E.columns)
    return modules
def create_files(args, dir, loci):
	# where the gene trees are
	subdir = os.path.join(dir, 'gene_trees')
	if not os.path.isdir(subdir):
		os.mkdir(subdir)	

	# where to put the files
	outdir = os.path.join(dir, 'astrid_astral')
	if not os.path.isdir(outdir):
		os.mkdir(outdir)

	# start up r
	ape = importr('ape')
	phangorn = importr('phangorn')

	out = os.path.join(outdir, 'best_trees_miss%s_tol%s_collapse%s.trees' % 
                          (args.miss, args.tol, args.collapse))
	bs = os.path.join(outdir, 'bootstrap_files_miss%s_tol%s_collapse%s.txt' % 
                         (args.miss, args.tol, args.collapse))
	bs_out = open(bs, 'w')

	for locus in loci:
		tree = os.path.join(subdir, '%s.bestTree.tre' % locus)
		if os.path.isfile(tree):
			# deal with best tree
			a = ape.read_tree(tree)
			a = manipulate_gene_tree(ape, phangorn, a, args.tol, args.collapse)
			ape.write_tree(a, file=out, append=True)
	
			bs = os.path.join(subdir, '%s.bootstrap.trees' % locus)
			bs_out.write('%s\n' % bs)

	bs_out.close()
def kmedoids(E, number=100, simdist_function="pearson_correlation"):
    importr("cluster")

    distances = simdist(E, simdist_function, similarity=False)
    rresults = ro.r["pam"](distances, diss=True, k=number)
    modules = convert_labels2modules(list(rresults.rx2("clustering")), E.columns)
    return modules
Example #22
0
def test_mdmr_with_connectir_distances():
    """uses the distances from output of connectir to specifically test the mdmr portion of things"""
    import os

    os.chdir("../C-PAC")

    from CPAC.cwas.mdmr import mdmr
    import numpy as np
    from os import path as op
    import rpy2.robjects as robjects
    from rpy2.robjects.numpy2ri import numpy2ri
    from rpy2.robjects.packages import importr

    robjects.conversion.py2ri = numpy2ri
    from pandas import read_csv

    bigmemory = importr("bigmemory")
    base = importr("base")

    sdir = "/home/data/Projects/CPAC_Regression_Test/2013-05-30_cwas/results_adhd04.r"
    sfile = op.join(sdir, "subdist.desc")
    dmats = np.array(robjects.r("as.matrix(attach.big.matrix('%s'))" % sfile))
    n = np.sqrt(dmats.shape[0])

    rfile = "/home2/data/Projects/CPAC_Regression_Test/2013-05-30_cwas/configs/adhd04_regressors.txt"
    regressors = np.loadtxt(rfile)

    ps, Fs, _, _ = mdmr(dmats[:, :10], regressors, [1], 1000)
Example #23
0
def rocbees(ARGVS):
   '''
   ROC curves and the beeswarm plot 
   from beeswarm R package
   '''
   beeswarm = importr('beeswarm')
   Cairo = importr('Cairo')
   ROC   = importr('ROC')
   filename = ARGVS['file']
   data     = ARGVS['data']
   title    = ARGVS['title']
   category = ARGVS['opts']
   filewrite = ROOT_PATH + '/media/tmp/' + filename 
   resp    = []
   expr    = []
   names = data.keys()
   for name in names:
      resp.append(data[name]['resp'])
      expr.append(data[name]['expression'])
   robjects.r ('''
    approx3 <- function(x, y = NULL, theta = 0.001) {
     xy <- xy.coords(x, y)
     dx <- diff(xy$x)/(max(xy$x) - min(xy$x))
     dy <- diff(xy$y)/(max(xy$y) - min(xy$y))
     angle <- atan2(dy, dx)
     diff.angle <- diff(angle)%%pi
     abs.diff.angle <- pmin(diff.angle, pi - diff.angle)
     keep <- c(TRUE, abs.diff.angle > theta, TRUE)
     xy$x <- xy$x[keep]
     xy$y <- xy$y[keep]
     xy
    }
    aronroc <- function(x, truth, type = "l", xlab = expression(1 -
     specificity), ylab = "Sensitivity", ...) {
     require(ROC)
     r <- rocdemo.sca(truth, x)
     xy <- list(x = 1 - r@spec, y = r@sens)
     xy.trimmed <- approx3(xy)
     plot(xy.trimmed, type = type, xlab = xlab, ylab = ylab, ...)
     invisible(xy.trimmed)
    }
    plotResps <- function (filename,expr,resp,category, main='') {
     expr  = as.numeric(expr)
     resp  = as.character(resp)
     CairoPNG(filename=filename,width = 800, height = 400)
     par(oma = c(0,0,1,0))
     layout(matrix(1:2, nrow = 1), widths = c(1,1))
     beeswarm(expr ~ resp,col=c(1:length(unique(resp))),
      pch=16,xlab='Response Categories', ylab='Expression')
     par(xpd = NA)
     aronroc (expr, resp == category)
     title(main,outer=TRUE)
     dev.off()
    }    
   ''')
   try:
      robjects.r['plotResps'](filename = filewrite, expr = expr, resp = resp, category = category, main = title)
      return filename
   except:
      return 'Error'
Example #24
0
def heatmap3py(numDataR, ColSideColors, annoColDicList, fileName=None, outPath=None):
    from rpy2.robjects.packages import importr    
    heatmap = importr("heatmap3")
    grdevices = importr("grDevices")                   
    from rpy2.robjects.functions import SignatureTranslatedFunction
    # explicitly translate the R argument to legal python name 
    heatmap.showLegend = SignatureTranslatedFunction(heatmap.showLegend,
                                           init_prm_translate = {'pt_bg': 'pt.bg'})       
    
    # annoName = ColSideColors.colnames
    # draw heatmap in file
    if (fileName!=None):
        grdevices.pdf(file = fileName )
        heatmap.heatmap3(numDataR,ColSideColors=ColSideColors,showRowDendro=False)
        grdevices.dev_off()
        for i in range(len(annoColDicList)):
            anno = robjects.StrVector(annoColDicList[i].keys())
            col =  robjects.StrVector(annoColDicList[i].values())            
            fileName = outPath +"/heatmapLegend" + str(i) + ".pdf"
            grdevices.pdf(file = fileName )
            heatmap.showLegend(legend= anno,col=col,cex=1.5, title="Annotation Legend: "+ColSideColors.colnames[i], pch=22, lwd = robjects.NA_Integer, pt_bg=col)
            grdevices.dev_off()        
    else:    
        # Draw heatmap in R window
        heatmap.heatmap3(numDataR,ColSideColors=ColSideColors,showRowDendro=False)      
        # Plot legends in another window
        for i in range(len(annoColDicList)):
            grdevices.dev_new()
            anno = robjects.StrVector(annoColDicList[i].keys())
            col =  robjects.StrVector(annoColDicList[i].values())
            heatmap.showLegend(legend= anno,col=col,cex=1.5, title="Annotation Legend: "+ ColSideColors.colnames[i], pch=22, lwd = robjects.NA_Integer, pt_bg=col)
Example #25
0
def edger_gene_expression_normalization(df_data):
    rpy2.robjects.pandas2ri.activate()
    df_data = df_data.dropna()
    r_data_set = robjects.conversion.py2ri(df_data)


    edger = importr("edgeR")
    base = importr("base")
    mult = robjects.r.get('*')

    factors = base.factor(base.c(base.colnames(r_data_set)))
    dge = edger.DGEList(counts=r_data_set, group=factors)
    y = edger.calcNormFactors(dge)
    y = edger.estimateCommonDisp(y)

    #y [counts] and y[samples][size factors] accessed by index
    #bit tricky but yeah...
    #there is a conversion between python 0 based index and r (1 based)
    #done by rpy2 which is a fabulous library!!!
    normalized = mult(y[0], y[1][2])

    rpy2.robjects.pandas2ri.deactivate()

    print("preparing result")
    res = Result()
    res.frame = pd.DataFrame(numpy.round(numpy.matrix(normalized)), columns=normalized.colnames, index=normalized.rownames)
    res.package = "edgeR"
    res.version = edger.__version__

    return res
Example #26
0
def test_r_environment():
    """Test if all required R packages are installed to use the NBLAST API.
    """
    setup_is_ok = False
    try:
        rnat = importr('nat')
        relmr = importr('elmr')
        rnblast = importr('nat.nblast')
        rcatmaid = importr('catmaid')
        setup_is_ok = True
    except:
        setup_is_ok = False
        logger.info("""
        Please make sure the following R packages are installed to use CATMAID's
        NBLAST support. This can be done by executing the following in the R
        environment of the user running CATMAID (e.g. www-data):

        if(!require("devtools")) install.packages("devtools")
        devtools::install_github(c("jefferis/nat", "jefferislab/nat.nblast",
                "jefferis/rcatmaid", "jefferis/elmr"))

        This is required to let CATMAID compute NBLAST scores.
        """)

    return JsonResponse({
        'setup_ok': setup_is_ok,
    })
Example #27
0
def drawRboxplot(united, Motifs, enriched, REF_SeqNames, output):
    '''
    motif_dist[motif][SeqName]=[(start, end),...]
    '''
    grdevices = importr('grDevices')
    graphics = importr('graphics')
    #Merged_dist=dict([(k,list(v)) for k,v in Merged_dist.items()])
    Positions = {}
    for motif in enriched:
        Pos = []
        for seqname in REF_SeqNames:
            Pos.extend([start for start, stop in \
                Motifs[str(united.Motif2uid[motif])][seqname]])
        if Pos:
            Positions[motif] = robj.IntVector(map(lambda x: x - 3000, Pos))

    Positions = robj.ListVector(Positions)
    enriched_names = robj.StrVector(enriched)
    grdevices.png(file="%s/%s_box.png" % \
        (output, output), width=512, height=512)
    margin = robj.IntVector([3, 9, 4, 2])
    graphics.par(mar=margin)
    graphics.boxplot(Positions.rx(enriched_names),
        main="Boxplot of Motif Positions",
        horizontal=True, las=1, col='lightblue')
    grdevices.dev_off()
Example #28
0
def some_rpy2():
	flash('Loading data...please wait')
	r.load('mtu_inf_111813.RData')
	pm = r['predictor.mats']
	pmm = pm.rx(1)
	dataframe = r['data.frame']
	df = dataframe(pmm)
	firstcol = df.rx(1)
	seccol = df.rx(2)

	lattice = importr('lattice')
	xyplot = lattice.xyplot
	rprint = robjects.globalenv.get("print")

	#formula = Formula('firstcol ~ seccol')
	#formula.getenvironment()['firstcol'] = df.rx2(1)
	#formula.getenvironment()['seccol'] = df.rx2(2)
	#p = lattice.xyplot(formula)

	grdevices = importr('grDevices')

	#filenm = app.config['IMGS_FOLDER'] + 'hist.png'
	filenm = 'hist.png' # why is this in tmp still???

	grdevices.png(file=filenm, width=512, height=512)
	p = r.histogram(df.rx2(1))
	rprint(p) # works
	grdevices.dev_off()

	return render_template("hist.html", image='static/tmp/hist.png')
Example #29
0
    def plot(self, data_array, width, height):
        """
        Create a plot with R
        """

        # Start R timing
        startTime = time.time()

        rinterface.initr()

        r = robjects.r
        grdevices = importr('grDevices')

        # Import the bfast package
        bfast = importr('bfast')

        b = robjects.FloatVector(data_array)

        # arry by b to time serie vector
        b_ts = r.ts(b, start=robjects.IntVector([2000, 4]), frequency=23)

        # calculate bfast
        h = 23.0 / float(len(b_ts))
        b_bfast = r.bfast(b_ts, h=h, season="harmonic", max_iter=2)

        # Get the index names of the ListVector b_bfast
        names = b_bfast.names
        log.debug(names)

        temp_datadir = self.config.get('main', 'temp.datadir')
        temp_url = self.config.get('main', 'temp.url')
        file = NamedTemporaryFile(suffix=".png", dir=temp_datadir, delete=False)

        log.debug(file.name)
        grdevices.png(file=file.name, width=width, height=height)
        # Plotting code here
        r.par(col="black")
        r.plot(b_bfast)
        # Close the device
        grdevices.dev_off()

        # End R timing and log it
        endTime = time.time()
        log.debug('It took ' + str(endTime - startTime) + ' seconds to initalize R and draw a plot.')

        file.close()

        result = {"file": "%s/%s" % (temp_url, file.name.split("/")[-1])}
        try:
            result['magnitude'] = str(tuple(b_bfast[names.index("Magnitude")])[0])
        except ValueError:
            pass
        try:
            result['time'] = str(tuple(b_bfast[names.index("Time")])[0])
        except ValueError:
            pass

        self.outputs['plot']['value'] = json.dumps({"file": "%s/%s" % (temp_url, file.name.split("/")[-1])})
        return SERVICE_SUCCEEDED
Example #30
0
 def translate(self, ds):
     url = ds.data_url().pop()
     base = importr('base')
     utils = importr('utils')
     utils.download_file(url, destfile="data_source.RData")
     base.load("data_source.RData")
     cds = R('cds')
     print(cds)
Example #31
0
#         samples2.columns=['GeneID',colV]
#         # print(samples2.head())

#         samples = pd.merge(samples,samples2, on='GeneID', )

# print("\n",'List of file names read to dataframe:','\n')
# samples.set_index("GeneID", inplace = True)
# print(samples.head())
########################################################################
samples.to_csv('matrix.csv', )
# print(colV)
samples_annotated = samples
import rpy2.robjects as robjects
from rpy2.robjects.packages import importr

ALL = importr('ALL')
limma = importr('limma')
exprs = robjects.r['exprs']
summary = robjects.r['summary']
matrix = robjects.r['as.matrix']
new = robjects.r['new']
robjects.r('data("ALL")')
data = robjects.globalenv['ALL']
featureNames = robjects.r['featureNames']
ExpressionSet = robjects.r['ExpressionSet']
character = robjects.r['as.character']
pas = robjects.r['paste']
fac = robjects.r['as.factor']
mmax = robjects.r['model.matrix']

from rpy2.robjects import pandas2ri
Example #32
0
import pandas as pd
import multiprocessing
from sklearn.externals.joblib import Parallel, delayed

num_cores = multiprocessing.cpu_count()
import seaborn as sns
homedir = os.environ['HOME']
from rpy2.robjects.packages import importr
from rpy2.robjects import pandas2ri
pandas2ri.activate()
import rpy2.robjects.numpy2ri
rpy2.robjects.numpy2ri.activate()
R = ro.r
import warnings
warnings.filterwarnings("ignore")
ismev = importr('ismev')
gof = importr('gnFit')
base = importr('base')
statis = importr('stats')
eva = importr('eva')
extremes = importr('extRemes')


def save_as_pickled_object(obj, filepath):
    """
    This is a defensive way to write pickle.write, allowing for very large files on all platforms
    """
    max_bytes = 2**31 - 1
    bytes_out = pickle.dumps(obj)
    n_bytes = sys.getsizeof(bytes_out)
    with open(filepath, 'wb') as f_out:
Example #33
0
from pandas import read_csv, DataFrame, Series
import statsmodels.api as sm
import rpy2.robjects as R
from rpy2.robjects.packages import importr
import pandas.rpy.common as com
from pandas import date_range
import numpy as np
import csv
import re
import pandas as pd
import matplotlib.pyplot as plt
import time
forecast = importr('forecast')
stats = importr('stats')
tseries = importr('tseries')


def parse_csv(path):
    term = path.split('/')[-1].split('.')[0]
    trend = []
    index = []
    with open(path, 'rb') as new:
        newread = csv.reader(new, delimiter='\n')
        for i, var in enumerate(newread):
            #            if re.findall(r'\d+-\d+-\d+',str(var)) != [] and int(str(var[0])[0:4])>=2007:
            if re.findall(r'\d+-\d+-\d+', str(var)) != []:
                trend.append(var[0].split(',')[1])
                index.append(var[0].split(',')[0])
    if trend == []:
        return
    my_trend = [float(var) for var in trend[1:]]
    set_active_unpaused = set_active_gene & (set_total_gene - set_paused_gene)
    set_unactive_paused = (set_total_gene - set_active_gene) & set_paused_gene
    set_unactive_unpaused = (set_total_gene - set_active_gene) & (
        set_total_gene - set_paused_gene)
    return set_active_paused, set_active_unpaused, set_unactive_paused, set_unactive_unpaused


def write_gene_to_file(set_gene, file_name):
    with open(file_name, 'w') as f:
        for i in set_gene:
            f.write(i + '\n')


import rpy2.robjects as robjects
from rpy2.robjects.packages import importr
importr('org.Sc.sgd.db')
importr('clusterProfiler')


def go_analysis(set_gene_file, output):
    DE_list = robjects.StrVector(list(set_gene_file))

    rcode_bp = 'enrichGO(gene= %s,keyType="ENZYME",OrgDb =%s ,ont ="%s", \
    pAdjustMethod = "BH",pvalueCutoff = 0.01, qvalueCutoff = 0.05, \
    readable = TRUE)' % (DE_list.r_repr(), 'org.Sc.sgd.db', 'BP')
    rcode_cc = 'enrichGO(gene= %s,keyType="ENZYME",OrgDb =%s ,ont ="%s", \
    pAdjustMethod = "BH",pvalueCutoff = 0.01, qvalueCutoff = 0.05, \
    readable = TRUE)' % (DE_list.r_repr(), 'org.Sc.sgd.db', 'CC')
    rcode_mf = 'enrichGO(gene= %s,keyType="ENZYME",OrgDb =%s ,ont ="%s", \
    pAdjustMethod = "BH",pvalueCutoff = 0.01, qvalueCutoff = 0.05, \
    readable = TRUE)' % (DE_list.r_repr(), 'org.Sc.sgd.db', 'MF')
Example #35
0
import pandas as pd
import numpy as np
import math
from rpy2.robjects.packages import importr
import rpy2.robjects.packages as rpackages
from rpy2.robjects.vectors import StrVector
from rpy2.robjects import pandas2ri
from rpy2.robjects import Formula
import matplotlib.pyplot as plt
from scipy.interpolate import UnivariateSpline

# Install packages
pandas2ri.activate()

utils = rpackages.importr('utils')
utils.chooseCRANmirror(ind=1)

packnames = ['vegan','zoo','stats']
names_to_install = []
#names_to_install = [x for packnames if not rpackages.isinstalled(x)]

for x in packnames:
    if (rpackages.isinstalled(x)==False):
        names_to_install.append(x)

if len(names_to_install) > 0:
    utils.install_packages(StrVector(names_to_install))

# Setup R packages
rvegan = importr('vegan')
Example #36
0
import numpy as np
from chainladder.utils.cupy import cp
import pytest
import chainladder as cl
from rpy2.robjects.packages import importr
from rpy2.robjects import r

CL = importr('ChainLadder')


@pytest.fixture
def atol():
    return 1e-5


def mack_r(data, alpha, est_sigma, tail):
    if tail:
        return r(
            'mack<-MackChainLadder({},alpha={}, est.sigma="{}", tail=TRUE)'.
            format(data, alpha, est_sigma))
    else:
        return r('mack<-MackChainLadder({},alpha={}, est.sigma="{}")'.format(
            data, alpha, est_sigma))


def mack_p(data, average, est_sigma, tail):
    if tail:
        return cl.MackChainladder().fit(
            cl.TailCurve(curve='exponential').fit_transform(
                cl.Development(average=average,
                               sigma_interpolation=est_sigma).fit_transform(
Example #37
0
from __future__ import division
import mne, os
import numpy as np
from scipy.stats import ttest_rel
from scipy import io as spio
from rpy2.robjects.packages import importr
from rpy2.robjects.vectors import FloatVector
from xfrange import xfrange

stats = importr('stats')

markers = ['Obj', 'Subj']
groups = ['kids', 'adults']
basePath = os.environ['DATDIR']
hemispheres = ['lh', 'rh']
metrics = ['signed', 'norm']
channelNames = ['Left-', 'Right-']
locations = ['PAC', 'pSTG', 'aSTG', 'pSTS', 'aSTS', 'BA45', 'BA44', 'BA6v']

# Load list of subjects
subjects = [[], []]
dirEntries = os.listdir(os.environ['LOCDIR'])
for dirEntry in dirEntries:
    if len(dirEntry) == 5 and dirEntry[0:2] == 'dh':
        if int(dirEntry[2:4]) > 50:
            subjects[1].append(dirEntry)
        else:
            subjects[0].append(dirEntry)
timescale = xfrange(-1.0, 4.0, 0.001)

# Create regular time intervals
Example #38
0
        }
        try:
            x = kinds[x.dtype.kind](x)
        except KeyError:
            pass  # just pass it along
    return numpy2ri(x)


ro.conversion.py2ri = numpy2ri_avoiding_zerodim

# make inline happy
if list(ro.r('Sys.getenv("R_ARCH")'))[0] == '':
    arch = list(ro.r('.Platform$r_arch'))[0]
    ro.r('Sys.setenv(R_ARCH="/{}")'.format(arch))

rstan = importr('rstan')

# TODO: stan_model, stanfit class wrappers


def get_model(filename, cache_filename=None, check_times=True, use_cache=True):
    '''
    Returns a stan_model for the model code in filename.
    If use_cache (by default), tries to load the compiled file from
    cache_filename (default filename + '.model.pkl[2|3].gz') if available,
    otherwise compiles it and saves into the gzipped, pickled cache file.
    '''
    if cache_filename is None and use_cache:
        cache_filename = '{}.model.pkl{}.gz'.format(filename,
                                                    sys.version_info[0])
Example #39
0
import pytest
import contextlib
import os
import tempfile

from rpy2.robjects.packages import importr, data
datasets = importr('datasets')
mtcars = data(datasets).fetch('mtcars')['mtcars']
from rpy2.robjects import r

from rpy2.robjects.lib import grdevices


@contextlib.contextmanager
def set_filenames_to_delete():
    todelete = set()
    yield todelete
    for fn in todelete:
        if os.path.exists(fn):
            os.unlink(fn)


def test_rendertobytes_noplot():
    with grdevices.render_to_bytesio(grdevices.png) as b:
        pass
    assert len(b.getvalue()) == 0


def test_rendertofile():
    fn = tempfile.mktemp(suffix=".png")
    with set_filenames_to_delete() as todelete:
Example #40
0
    def run_fe1(self, params):
        """
        run_fe1: Functional Enrichment One

        required params:
        feature_set_ref: FeatureSet object reference
        workspace_name: the name of the workspace it gets saved to

        optional params:
        propagation: includes is_a relationship to all go terms (default is 1)
        filter_ref_features: filter reference genome features with no go terms (default is 0)
        statistical_significance: parameter for statistical significance.
                                  Select one from left_tailed, right_tailed or two_tailed
                                  (default is left_tailed)
        ignore_go_term_not_in_feature_set: ignore Go term analysis if term is not associated with
                                           FeatureSet (default is 1)

        return:
        result_directory: folder path that holds all files generated by run_deseq2_app
        report_name: report name generated by KBaseReport
        report_ref: report reference generated by KBaseReport
        """
        log('--->\nrunning FunctionalEnrichmentUtil.run_fe1\n' +
            'params:\n{}'.format(json.dumps(params, indent=1)))

        self._validate_run_fe1_params(params)
        propagation = params.get('propagation', True)
        filter_ref_features = params.get('filter_ref_features', False)
        statistical_significance = params.get('statistical_significance',
                                              'left_tailed')
        ignore_go_term_not_in_feature_set = params.get(
            'ignore_go_term_not_in_feature_set', True)

        result_directory = os.path.join(self.scratch, str(uuid.uuid4()))
        self._mkdir_p(result_directory)

        feature_set_ids, genome_ref = self._process_feature_set(
            params.get('feature_set_ref'))

        (feature_id_go_id_list_map, go_id_feature_id_list_map,
         go_id_go_term_map, feature_id_feature_info_map
         ) = self._get_go_maps_from_genome(genome_ref)

        if filter_ref_features:
            log('start filtering featrues with no term')
            feature_ids = []
            for feature_id, go_ids in feature_id_go_id_list_map.iteritems():
                if isinstance(go_ids, list):
                    feature_ids.append(feature_id)
        else:
            feature_ids = feature_id_go_id_list_map.keys()

        ontology_hash = dict()
        ontologies = self.ws.get_objects([{
            'workspace': 'KBaseOntology',
            'name': 'gene_ontology'
        }, {
            'workspace': 'KBaseOntology',
            'name': 'plant_ontology'
        }])
        ontology_hash.update(ontologies[0]['data']['term_hash'])
        ontology_hash.update(ontologies[1]['data']['term_hash'])

        if propagation:
            go_id_parent_ids_map = self._generate_parent_child_map(
                ontology_hash,
                go_id_go_term_map.keys(),
                regulates_relationship=False)
        else:
            go_id_parent_ids_map = {}
            for go_id in go_id_go_term_map.keys():
                go_id_parent_ids_map.update({go_id: []})

        log('including parents to feature id map')
        for go_id, parent_ids in go_id_parent_ids_map.iteritems():
            mapped_features = go_id_feature_id_list_map.get(go_id)

            for parent_id in parent_ids:
                parent_mapped_features = go_id_feature_id_list_map.get(
                    parent_id)

                if not parent_mapped_features:
                    parent_mapped_features = []

                if mapped_features:
                    parent_mapped_features += mapped_features

                go_id_feature_id_list_map.update(
                    {parent_id: list(set(parent_mapped_features))})

        log('start calculating p-values')
        enrichment_map = {}
        go_info_map = {}
        all_raw_p_value = []
        pos = 0
        for go_id, go_term in go_id_go_term_map.iteritems():
            mapped_features = go_id_feature_id_list_map.get(go_id)
            # in feature_set matches go_id
            a = len(set(mapped_features).intersection(feature_set_ids))
            # ignore go term analysis if not associated with FeatureSet
            if ignore_go_term_not_in_feature_set and a == 0:
                continue
            # in feature_set doesn't match go_id
            b = len(feature_set_ids) - a
            # not in feature_set matches go_id
            c = len(mapped_features) - a
            # not in feature_set doesn't match go_id
            d = len(feature_ids) - len(feature_set_ids) - c

            fisher_value = fisher.pvalue(a, b, c, d)
            if statistical_significance == 'left_tailed':
                raw_p_value = self._round(fisher_value.left_tail)
            elif statistical_significance == 'right_tailed':
                raw_p_value = self._round(fisher_value.right_tail)
            elif statistical_significance == 'two_tailed':
                raw_p_value = self._round(fisher_value.two_tail)
            else:
                raise ValueError('Improper statistical_significance value')

            all_raw_p_value.append(raw_p_value)
            go_info_map.update({
                go_id: {
                    'raw_p_value': raw_p_value,
                    'num_in_ref_genome': len(mapped_features),
                    'num_in_subset_feature_set': a,
                    'pos': pos,
                    'mapped_features': mapped_features
                }
            })
            pos += 1

        stats = importr('stats')
        adjusted_p_values = stats.p_adjust(FloatVector(all_raw_p_value),
                                           method='fdr')

        for go_id, go_info in go_info_map.iteritems():
            if go_id not in ontology_hash:
                continue

            adjusted_p_value = self._round(
                adjusted_p_values[go_info.get('pos')])
            namespace = ontology_hash[go_id]['namespace']
            enrichment_map.update({
                go_id: {
                    'raw_p_value':
                    go_info.get('raw_p_value'),
                    'adjusted_p_value':
                    adjusted_p_value,
                    'num_in_ref_genome':
                    go_info.get('num_in_ref_genome'),
                    'num_in_subset_feature_set':
                    go_info.get('num_in_subset_feature_set'),
                    'go_term':
                    go_id_go_term_map.get(go_id),
                    'namespace':
                    namespace.split("_")[1][0].upper(),
                    'mapped_features':
                    go_info.get('mapped_features')
                }
            })

        returnVal = {'result_directory': result_directory}
        report_output = self._generate_report(enrichment_map, result_directory,
                                              params.get('workspace_name'),
                                              feature_id_go_id_list_map,
                                              feature_set_ids, genome_ref,
                                              go_id_parent_ids_map,
                                              feature_ids)

        returnVal.update(report_output)

        return returnVal
Example #41
0
from rpy2.robjects.packages import importr
# Usando importr, importamos paquetes de R que van a funcionar algo
# así como módulos de Python

#%%
## EJECUTAR ESTO si no tienen instalado el paquete igraph (para instalarlo)
## import rpy2's package module
## select a mirror for R packages
#utils = importr('utils')
#utils.chooseCRANmirror(ind=2) # elijo de dónde descargar el paquete
## Instalo
#from rpy2.robjects.vectors import StrVector
#utils.install_packages(StrVector(['igraph']))
#%%
# Realizo el ajuste de la powerlaw
igraph = importr('igraph')
# Creamos un vector de R pasándole los degrees
degrees_r = ro.FloatVector(degrees)
# Documentación de fit_power_law:
# https://rdrr.io/cran/igraph/man/fit_power_law.html
resultado = igraph.fit_power_law(degrees_r, implementation='plfit')
print(resultado.r_repr())

#%%
# Graficamos histograma + ajuste
kmin = resultado.rx2('xmin')[0]
gamma = resultado.rx2('alpha')[0]
ksp = resultado.rx2('KS.p')[0]

from scipy.special import zeta
Example #42
0
chrom_dict = gdb.get_chromosome_dict()

sys.stderr.write("reading transcripts\n")
trs = genome.transcript.read_transcripts(tr_path, chrom_dict)
tr_dict = dict([(tr.name, tr) for tr in trs])

for tr_name in tr_names:
    if tr_name not in tr_dict:
        sys.stderr.write("WARNING: could not find transcript %s\n" % tr_name)
        continue

    tr = tr_dict[tr_name]

    r = robjects.r

    grdevices = importr('grDevices')

    output_format = "pdf"
    output_filename = "%s.%s" % (tr_name, output_format)
    width = 8
    height = 5

    sys.stderr.write("drawing transcript (filename=%s)\n" % output_filename)

    grdevices.pdf(file=output_filename, width=width, height=height)

    region = tr
    options = {
        'color': "#08306B",
        'utr_color': '#DEEBF7',
        'border': 'false',
Example #43
0
import numpy as np
from genieclust.genie import *
from genieclust.inequity import *
from genieclust.compare_partitions import *
import time
import gc, os

import scipy.spatial.distance
from rpy2.robjects.packages import importr
stats = importr("stats")
genie = importr("genie")
genieclustr = importr("genieclust")
import numpy as np
import rpy2.robjects.numpy2ri
rpy2.robjects.numpy2ri.activate()

verbose = False

np.random.seed(123)
n = 50000
d = 69
X = np.random.normal(size=(n, d))
labels = np.random.choice(np.r_[1, 2, 3, 4, 5, 6, 7, 8], n)

k = len(np.unique(labels[labels >= 0]))
# center X + scale (NOT: standardize!)
X = (X - X.mean(axis=0)) / X.std(axis=None, ddof=1)
g = 0.3
metric = "euclidean"

print("n=%d, d=%d, g=%.2f, k=%d" % (n, d, g, k))
Example #44
0
def art_two_way_anova(dataDF):
    """
    Performs Aligned rank transform of the data followed by two-way Anova.
    Ref: Wobbrock, J.O., Findlater, L., Gergle, D. and Higgins, J.J. (2011).
    "The Aligned Rank Transform for nonparametric factorial analyses using only ANOVA procedures."
    Proceedings of the ACM Conference on Human Factors in Computing Systems (CHI '11). doi: 10.1145/1978942.1978963
    :param dataDF: pandas.DataFrame with the following column ordering:
                      Column 1. Factor 1
                      Column 2. Factor 2
                      Column 3. Measurements
    :return: correctness_ART, pVal_tuple
             correctness_ART: bool, correctness of ART procedure (see section "Ensuring Correctness" of the reference
                              paper)
             pVal_tuple: tuple, has three members -- pVal for effect of Factor 1, pVal for effect of Factor 2,
                         pVal for the interaction effect between Factor 1 and Factor 2.

    """

    assert type(
        dataDF
    ) is pd.DataFrame, "Input <dataDF> is not a pandas DataFrame as expected"
    assert type(dataDF.shape[1] ==
                3), "The number of columns in <dataDF> is not 4 as expected"

    ART_IPDF_r = robjects.DataFrame({
        "f1":
        robjects.FactorVector(dataDF.iloc[:, 0]),
        "f2":
        robjects.FactorVector(dataDF.iloc[:, 1]),
        "m":
        robjects.FloatVector(dataDF.iloc[:, 2])
    })

    rUtils = rpackages.importr("utils")
    rUtils.chooseCRANmirror(ind=1)

    try:
        rpackages.importr("ARTool")
    except RRuntimeError as re:
        if str(re).find("Error in loadNamespace") >= 0:
            print(
                "Insatalling package \"ARTool\" in the embedded R. This might take a while"
            )
            rUtils.install_packages(robjects.StrVector(["ARTool"]))
            rpackages.importr("ARTool")
        else:
            raise re

    ARTFunc_r = robjects.r["art"]

    modelFormula = robjects.Formula("m~f1*f2")
    ART_OPDF = ARTFunc_r(modelFormula, data=ART_IPDF_r)
    rsummary = robjects.r["summary"]

    try:
        ART_OP_SUM = rsummary(ART_OPDF)
    except RRuntimeError as re:
        if str(re).find("Error in Anova.lm") >= 0:
            return 1, (np.nan, np.nan, np.nan)
        else:
            raise (re)

    columnSums = np.array(ART_OP_SUM[10])
    fVal_aligned_anova = np.array(ART_OP_SUM[11][4])

    ART_success = np.allclose(columnSums, 0) and np.allclose(
        fVal_aligned_anova, 0)

    ranova = robjects.r("anova")
    ART_res = ranova(ART_OPDF)

    return ART_success, tuple(ART_res[6])
Example #45
0
import rpy2
import numpy as np
import rpy2.robjects as robjects
from rpy2.robjects.packages import importr
from rpy2.robjects import FloatVector as FV
from rpy2.robjects import numpy2ri
numpy2ri.activate()
from numpy import asarray as npa
import numbers

mvtnorm = importr('mvtnorm')
r'''
We require the R package `mvtnorm` to compute certain high dimensional expectations
involving erfs, by reducing such expectations to Gaussian orthant probabilities
and evaluating them with `mvtnorm`.
'''


def Eerf2(mu, var):
    r'''
    Computes
        E[erf(x)^2: x ~ N(mu, var)]
    when mu and var are scalars, or
        E[erf(x) erf(y): (x, y) ~ N(mu, var)]
    when mu is a length-2 vector and var is a 2x2 matrix
    Example:
        >>> Eerf2([0, 0], [[1, 1], [1, 1]])
        0.4645590543975399
        
    Inputs:
        mu: scalar or length-2 vector
Example #46
0
import rpy2.rinterface as rinterface
from rpy2.robjects import r, pandas2ri
from rpy2.robjects.methods import RS4
from rpy2.robjects.packages import importr
from scipy.sparse import csr_matrix

import os
import pandas
import numpy
import argparse
import warnings

warnings.filterwarnings("ignore")
pandas2ri.activate()

SingleCellExperimentInterface = importr('SingleCellExperiment')
SummarizedExperimentInterface = importr('SummarizedExperiment')
BiocGenericsInterface = importr('BiocGenerics')
MatrixInterface = importr('Matrix')


class SingleCellExperiment(RS4):
    @classmethod
    def fromRData(sce_class, rdata):
        rs4_object = r.readRDS(rdata)
        sce = sce_class.fromRS4(rs4_object)
        sce.rs4 = rs4_object
        return sce

    def asSummarizedExperiment(self):
        data = robjects.r["SummarizedExperiment"](self.rs4)
def extract_dataframe_from_R(dataframe_name):
    temp = pandas2ri.ri2py(r(dataframe_name))
    temp_rows = pandas2ri.ri2py(r("rownames(" + dataframe_name + ")"))
    temp_cols = np.float32(
        pandas2ri.ri2py(r("colnames(" + dataframe_name + ")")))

    df = pd.DataFrame(data=temp, columns=temp_cols, index=temp_rows)
    return df


mpl.use("Agg")
mpl.rcParams['pdf.fonttype'] = 42
# mpl.style.use('ggplot')

R = rpy2.robjects.r
DTW = importr('dtw')
DTWclust = importr('dtwclust')

# Load data sets in R
r("""load("/scratch/PI/mcovert/dvanva/sequencing/all_cells_scde_fit_linear.RData")"""
  )
r("""load("/scratch/PI/mcovert/dvanva/sequencing/counts_data.RData")""")

# Load pickle file with cell objects
direc = '/scratch/PI/mcovert/dvanva/sequencing/'
all_cell_file = 'all_cells_qc_complete.pkl'
all_cells_total = pickle.load(open(os.path.join(direc, all_cell_file)))

# Determine which genes to look at
inflammatory_genes = [
    "Cxcl3", "Cxcl2", "Lif", "Ccl4", "Csf3", "Il1f9", "Ccl3", "Ccl5", "Tnf",
###
# Create overlap

overlap_lh = (scan1_lh[:] > 0) * (scan2_lh[:] > 0)
overlap_rh = (scan1_rh[:] > 0) * (scan2_rh[:] > 0)

###

###
# Plot overlap

# Color bar
cbar = load_colorbar(cbarfile)

# Colors for the peaks
colorspace = importr('colorspace')
cols = np.array(
    robjects.r(
        'rbind(col2rgb(rainbow_hcl(4, c=100, l=65, start=15)), rep(255, 4))'))
cols = cols.T / 255

# Input
surf_data = {"lh": overlap_lh, "rh": overlap_rh}

## Just maxima

# Output
oprefix = path.join(odir, "check_overlap_with_maxima")

# Loop
for hemi in hemis:
Example #49
0
from itertools import groupby
from gensim.models import Word2Vec
import glob
import math
import itertools
from sklearn.metrics import *
import pandas as pd
import csv

np.random.seed(1234)

r = robjects.r
rpy2.robjects.numpy2ri.activate()

#np.set_printoptions(threshold = 1e6)
importr('genlasso')
importr('gsubfn')

def perf_measure(y_true, y_pred):
    TP_FN = np.count_nonzero(y_true)
    FP_TN = y_true.shape[0] * y_true.shape[1] - TP_FN
    FN = np.where((y_true - y_pred) == 1)[0].shape[0]
    TP = TP_FN - FN
    FP = np.count_nonzero(y_pred) - TP
    TN = FP_TN - FP
    Precision = float(float(TP) / float(TP + FP + 1e-9))
    Recall = float(float(TP) / float((TP + FN + 1e-9)))
    accuracy = float(float(TP + TN) / float((TP_FN + FP_TN + 1e-9)))
    F1 =  2*((Precision * Recall) / (Precision + Recall))
    return Precision, Recall, accuracy
Example #50
0
    b_1 = cur_param[J:(2*J)]
    W_2 = cur_param[(2*J):(2*J+J*K)]
    b_2 = cur_param[(2*J+J*K):(2*J+J*K+K)]
    W_3 = cur_param[(2*J+J*K+K):(2*J+J*K+2*K)]
    b_3 = cur_param[2*J+J*K+2*K]
    nn = neural_net(X,W_1,b_1,W_2,b_2,W_3,b_3)
    est_q[i,] = np.reshape((nn+1)*(max_y-min_y)/2+min_y,(len(X),))

est_q = np.mean(est_q, axis = 0)

plt.plot(X_s,y_s,'k.')
plt.plot(X_s,q_true,'r-',label='True')
plt.plot(X_s,est_q,'b-',label='Estimate')
plt.legend()

utils = importr("utils")
utils.install_packages("MASS")
MASS = importr("MASS")
motor = data(MASS).fetch('mcycle')['mcycle']
motor = pandas2ri.ri2py(motor)

plt.plot('times', 'accel', '.', data = motor)
plt.xlabel('Time')
plt.ylabel('Acceleration')

X = np.array(motor['times'])
X = np.reshape(X,(len(X),1))
y = np.array(motor['accel'])
y = np.reshape(y,(len(y),1))

max_X = np.max(X)
    def run(self, data, regression, resources=None):
        """
        The method prints out summary of the BMA procedure and creates an imageplot.
        If resources has an entry 'bma_imageplot_filename', the imageplot is sent to this file as pdf.
        The method does not return any useful results - it is a tool for variable selection.
        Once you selected your variables, use estimate_linear_regression for further usage of the coefficients. 
        
        Expects an entry 'outcome' in resources that provides the values of the dependent variable.
        'data' is a 2D numpy array of the actual data (nobservations x ncoefficients),
            it can be created by Dataset.create_regression_data_for_estimation(...).
        'regression' is an instance of a regression class.
        """
        r = robjects.r
        if data.ndim < 2:
            raise StandardError, "Argument 'data' must be a 2D numpy array."

        nobs = data.shape[0]
        nvar = data.shape[1]
        constant_position = resources.get("constant_position",  array([], dtype='int32')) #position for intercept

        if constant_position.size == 0: #position for intercept
            constant_position=-1
            nvalues = nvar
        else:
            constant_position=constant_position[0]
            nvalues = nvar+1

        beta = zeros(nvalues).astype(float32)

        coef_names = resources.get("coefficient_names",  nvar*[])
        data_for_r = {}
        for icoef in range(len(coef_names)):
            data_for_r[coef_names[icoef]] = robjects.FloatVector(data[:, icoef])
        bma = importr("BMA")
        d = robjects.DataFrame(data_for_r)
        try:
            bma_params = {'x': d, 'y': robjects.FloatVector(resources["outcome"]), 'glm.family': "gaussian", 'strict':1}
            #fit = bma.bic_glm(x=d, y=resources["outcome"], glm_family="gaussian", strict=1)
            fit = bma.bic_glm(**bma_params)
            fit[20] = '' # to have less output in the summary
            r.summary(fit)
            estimates = array(fit[11])
            standard_errors = array(fit[12])
            postprob = concatenate((array([1]), array(fit[10])/100.)) # add intercept (always included, therefore 1)
            nmodels = fit[21][0]
            filename = resources.get('bma_imageplot_filename', None)
            subtitle = ''
            submodel = resources.get('submodel', -2)
            if resources > -2:
                subtitle = "Submodel: %s" % submodel
            plot_params = {'bma.out':fit, 'cex.axis':0.7, 'sub': subtitle}
            if filename is not None:
                r.pdf(file=filename)
                bma.imageplot_bma(**plot_params)
                r['dev.off']()
            else:
                r.X11()
                bma.imageplot_bma(**plot_params)
            result = {"estimators":estimates, "standard_errors":standard_errors,
                      "other_measures":{"post_probability": postprob,
                                        "t_statistic": zeros(estimates.size) # not applicable
                                        },
                      "other_info":{"nmodels": nmodels}}
        except:
            logger.log_warning("Error in BMA procedure.")
            result = {}
        return result
import scipy.cluster.hierarchy as sch
from seq_functions import smFISH_cell, smFISH_stim_cell
import rpy2
from rpy2.robjects.packages import importr
import cPickle as pickle
rpy2.robjects.numpy2ri.activate()
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
matplotlib.rcParams['pdf.fonttype'] = 42
"""
Initialize R instances
"""

R = rpy2.robjects.r
DTW = importr('dtw')
DTWCLUST = importr('dtwclust')
"""
Load excel files
"""

direc = "/scratch/PI/mcovert/dvanva/sequencing/smFISH"

file_name = os.path.join(direc, "12072015", "12072015_ExperMetadata.xlsx")
data_0 = pd.read_excel(file_name, sheetname=0)

file_name = os.path.join(direc, "12162015", "12162015_ExperMetadata.xlsx")
data_1 = pd.read_excel(file_name, sheetname=0)
"""
Load cluster averages
"""
Example #53
0
from utility import genofile_parser  # genofile_parser

import base64
import array
import csv
import itertools

from base import data_set
from base import trait as TRAIT

from utility import helper_functions
from utility.tools import locate

from rpy2.robjects.packages import importr

utils = importr("utils")

r_library = ro.r["library"]  # Map the library function
r_options = ro.r["options"]  # Map the options function
r_write_table = ro.r["write.table"]  # Map the write.table function
r_head = ro.r["head"]  # Map the head function
r_load = ro.r["load"]  # Map the head function
r_colnames = ro.r["colnames"]  # Map the colnames function
r_list = ro.r["list"]  # Map the list function
r_c = ro.r["c"]  # Map the c (combine) function
r_print = ro.r["print"]  # Map the print function
r_seq = ro.r["seq"]  # Map the rep (repeat) function


class EPheWAS(object):
    def __init__(self):
def characterize(trk_in, hdr_in, normalize=True):
    '''
    convert track tuple to adehabitatLT ltraj object
    return track(r.ltraj),segment(r.ltraj),active(py.str_list),linearity(py.str_list),fishing(py.str_list)
    '''

    ##Embed trk normalization
    if normalize:
        print 'Normalize track to hourly bin.'

        if len(trk_in) < 50:
            print 'Track length < 50, abort characterization', len(trk_in)
            sta = ['Pending'] * len(trk_in)
            sgid = [vms_constants.null_val_str] * len(trk_in)
            return sta, sgid

        trk, hdr = vms_tools.vms_track_normalize(trk_in, hdr_in)
        print 'before normalize:', len(trk_in)
        print 'after normalize:', len(trk)
#        vms_visualize.plot_track(trk_in,hdr_in)
#        vms_visualize.plot_track(trk,hdr)

    else:
        trk = trk_in[:]
        hdr = hdr_in[:]

    lat_id = hdr.index('LATITUDE')
    lon_id = hdr.index('LONGITUDE')
    tno_id = hdr.index('TRANSMITTER_NO')
    pdt_id = hdr.index('REPORTDATE')

    #    print trk[0]

    rlat = ro.FloatVector([float(i[lat_id]) for i in trk])
    rlon = ro.FloatVector([float(i[lon_id]) for i in trk])
    rtno = ro.StrVector([str(i[tno_id]) for i in trk])
    rpdt = ro.StrVector([str(i[pdt_id]) for i in trk])

    #    print rpdt

    r.assign('y', rlat)
    r.assign('x', rlon)
    r.assign('tno', rtno)
    r.assign('pdt', rpdt)

    none_list = np.chararray(len(rlat), itemsize=15)
    none_list[:] = vms_constants.null_val_str  #'NA'

    r('xy <- cbind(x,y)')
    #    r('XY <- as.data.frame(xy)')

    ##todo: reproject XY to UTM zones designated by average longitude
    ##Thus enable velocity model to be defined by KM/h instead of deg/h
    ##Reference: https://stackoverflow.com/questions/7927863/location-data-format-for-adehabitat-package
    importr('rgdal')
    print 'mean(x)', r('mean(x)')
    print 'max(x)', r('max(x)')
    print 'min(x)', r('min(x)')
    r('utmz <- (floor((mean(x) + 180)/6) %% 60) + 1')  #find utmz
    r('utmxy <- project (xy,paste("+proj=utm +zone=",toString(utmz)," ellps=WGS84"))'
      )
    r('XY <- as.data.frame(utmxy)')

    importr('adehabitatLT')
    importr('miscTools')
    r('pdt <- as.POSIXct(strptime(pdt,"%Y%m%d%H%M%S"),"GMT")')
    r('trl <- as.ltraj(XY,id=tno,date=pdt)')

    #    print r('trl')

    #    r('plotltr(trl,"dist")')

    #####
    ##Take care of the landing records, which will create NA angle and cause partmod.ltraj to fail.

    ##If none Active record in the trl, return trl and skip segmentation
    ##Add stationary detector to avoid landing being recognized as mod.1 '4926123 2014Apr.'

    #    print r('trl')
    #    vms_visualize.plot_track(trk,hdr)
    #    return r('trl'),None

    sta = check_stationary(r('trl'))
    if 'Active' not in set(sta):
        print 'The track is completely stationary. Skip segmentation and classification.'
        #        return consolidate_trl(r('trl'),r('trl'),sta=sta)
        sg = np.chararray(len(sta), itemsize=15)
        sg[:] = vms_constants.null_val_str
        return denormalize_status(trk_in, hdr_in, trk, hdr, sta, sg)
#    print sta
#    print set(sta)
#    float('a')

#test K
    print 'Begin track segmentation and classification.'

    #check if there is stationary points
    #if true, the track needs to be splitted
    '''check stationary by looking for dist==0'''
    #    r('length(which.ltraj(trl,"dist==0")[[1]])')

    #remove stationary records

    r('trlo <- trl')  #backup original trl as trlo
    print 'Found Stationary:', 'Stationary' in sta
    print 'Found Landing:', 'Landing' in sta
    if ('Stationary' in sta) or ('Landing' in sta):
        print 'Remvoe stationary records'
        sta_idx = [
            i + 1 for (i, j) in enumerate(sta)
            if j == 'Stationary' or j == 'Landing'
        ]

        r.assign('sta_idx', ro.IntVector(sta_idx))
        r('trlf <- trl[[1]][-c(sta_idx),]'
          )  #filter active recs with check_stationary result
        print r(
            'trl <- as.ltraj(trlf[,c("x","y")],trlf[,c("date")],id=id(trl))'
        )  #recast into ltraj

#    print 'paused after remove stationary records'
#    raw_input()
    trl_len = r('length(trl[[1]]$x)')
    print trl_len[0]
    if trl_len[0] < 50:
        print 'Active record less then 50, abort characterization.', trl_len[0]
        print 'The status is marked [Pending] for later run to pickup.'
        sta[:] = 'Pending'
        sg = np.chararray(len(sta), itemsize=15)
        sg[:] = vms_constants.null_val_str
        status, sgid = denormalize_status(trk_in, hdr_in, trk, hdr, sta, sg)
        return status, sgid
#    print 'force exit'
#    sys.exit(1)
##TODO: change velocity model from degree based to meter based
    r('tested.means <- seq(0,20000,length=10)')
    r('(limod <- as.list(paste("dnorm(dist, mean =",tested.means,", sd = 2000)")))'
      )

    #    r('tested.means <- seq(0, 0.25, length = 10)')
    #    r('(limod <- as.list(paste("dnorm(dist, mean =",tested.means,", sd = 0.03)")))')

    r('mod <- modpartltraj(trl,limod)')
    r('bestmod <- bestpartmod(mod,Km=round(length(trl[[1]]$x)/5),plotit=FALSE)'
      )
    r('k <- which.max(colMedians(bestmod$correction,na.rm=TRUE))'
      )  #require colMedians from miscTools package
    r('save(XY,trl,k,mod,limod,file="XY_trl_k_mod_limod.RData")')
    #    r('trl[[1]]$rel.angle[is.na(trl[[1]]$rel.angle)]<-0')
    #    r('trl[[1]]$abs.angle[is.na(trl[[1]]$abs.angle)]<-0')
    #    print r('trl[[1]]')

    ## if only one segment is recognized, put it to pending for future merging
    #    if r('k')[0]==1:
    #        print 'Unable to split track, set to pending.'
    #        sta[:] = 'Pending'
    #        status = denormalize_status(trk_in,hdr_in,trk,hdr,sta)
    #        return status

    while r('k')[0] > 0:
        try:
            if r('k')[0] == 1:
                print 'Unable to split track, set to pending.'
                sta[:] = 'Pending'
                sg = np.chararray(len(sta), itemsize=15)
                sg[:] = vms_constants.null_val_str
                status, sgid = denormalize_status(trk_in, hdr_in, trk, hdr,
                                                  sta, sg)
                return status, sgid
            else:
                print r('pm <- partmod.ltraj(trl,k,mod,na.manage=c("locf"))')
                break
        except:
            r(
                'k<-k-1'
            )  #if previious fails, very likely to be nparts(k) is overestimated by 1
            print 'Trying k=', r('k')[0]
            continue

#        print r('pm <- partmod.ltraj(trl,k,mod,na.manage=c("locf"))')

##    print r('pm <- partmod.ltraj(trl,k,mod,na.manage=c("prop.move","locf"))')
#    r('plot(pm)')

#    raw_input()
#    float('a')
#    vms_visualize.plot_track(trk,hdr)

##Check linearity
##Let consolidate_trl do the job
#    linear_list = check_linear(pm)
#    print 'consolidate_trail'
    status, sgid = consolidate_trl(r('trlo'), r('trl'), pm=r('pm'), sta=sta)
    #    print 'len(status)',len(status)
    #    return trk_in,hdr_in,trk,hdr,status
    #    return r('trl'),r('pm'),sta,linear_list,fish_list
    #    for i in range(0,len(status)):
    #        print i+1,status[i]

    #    print len(status),len(sgid)
    #    for i in range(0,len(status)-1):
    #        print [i,status[i],sgid[i]]
    #    raw_input()

    if normalize:
        #Blow hourly bin back to original binning
        status_tmp = status[:]
        status, sgid = denormalize_status(trk_in, hdr_in, trk, hdr, status,
                                          sgid)


#    for i in range(0,len(status)-1):
#        print [i,status[i],sgid[i]]
#    raw_input()
#    sys.exit(1)#

#if fishing segment touches landing, the segment will be changed to transit
#    print status
    glist = [list(j) for i, j in groupby(status)]
    gname = np.array([i for i, j in groupby(status)])
    #    print gname
    #do nothing if there is only one segment
    if len(gname) == 1:
        return status, sgid

    for gidx in list(np.where(gname == 'Fishing')[0]):
        #        print gidx
        if gidx == 0:
            #            print gname[gidx+1]
            #            raw_input()
            if gname[gidx + 1] == 'Landing':
                glist[gidx][:] = ['Transit'] * len(glist[gidx][:])
        elif gidx == len(gname) - 1:
            if gname[gidx - 1] == 'Landing':
                glist[gidx][:] = ['Transit'] * len(glist[gidx][:])
        else:
            if gname[gidx + 1] == 'Landing' or gname[gidx - 1] == 'Landing':
                glist[gidx][:] = ['Transit'] * len(glist[gidx][:])

    status = [i for x in glist for i in x]

    return status, sgid
import pandas as pd
import numpy as np
import statsmodels.api as sm
import rpy2.robjects as robjects
import rpy2.robjects.numpy2ri

from rpy2.robjects import pandas2ri
from scm_analytics import ScmAnalytics, Analytics
from statsmodels.api import families
from rpy2.robjects.packages import importr

base = importr('base')
stats = importr('stats')
broom = importr('broom')

pandas2ri.activate()
robjects.numpy2ri.activate()


# 129636, 38242
def surgery_usage_regression_df(surgery_df,
                                usage_df,
                                item_ids=[],
                                case_cart_df=None,
                                filters=[],
                                common_events=True):
    surgery_df = Analytics.process_filters(surgery_df, filters)
    surgery_df = surgery_df.drop_duplicates(subset=["event_id"])
    usage_df = Analytics.process_filters(usage_df, filters)
    if case_cart_df:
        case_cart_df = Analytics.process_filters(case_cart_df, filters)
Example #56
0
os.environ['R_HOME']='C:\Program Files/R/R-3.4.2
import pandas as pd
import rpy2
print(rpy2.__version__)
from rpy2.robjects.packages import importr

#Import R objects into python
import rpy2.robjects.packages as rpackages
import rpy2.robjects as ro
from rpy2.robjects import pandas2ri

#Activate pands to r translation
pandas2ri.activate
from rpy2.objectsvectors import DataFrame, StrVector, IntVector, ListVector

base = importr('base')

#Import R script in order to load R envinronment (Global Variables)
Imported_R_Envinronment=ro.r('source("path_to_R_file.R")')

#Load particular R function from recently loaded R envinronment as a Python instance function
R_FUN_Loaded=ro.r('R')

def DF_Python_To_R(df,Factor_Cols="Amount","AmountUSD"]):
	#Function which translates Python DataFrame into Python DataFrame
	#Inputs are:
	#-DataFrame wchich is going to to be tranlsted
	#-Columns names whcih feature StringAsFactors equals True
	#The purpose is to avoid issue with rpy2 translation, once StringAsFactors = True columns are turned values are turned into column names
	def df_Str(tab,col):
		return DataFrame({col:base.I(StrVector(tab[col]))})
Example #57
0
    def _exec_r_module(self):
        try:
            import rpy2.robjects
            from rpy2.robjects import numpy2ri
            from rpy2.robjects import pandas2ri
            from rpy2.robjects.packages import importr
        except ImportError:
            raise ImportError('R module cannot be run, because '
                              '"rpy2" package is not installed.')
        module_name = os.path.splitext(os.path.basename(self.source_file))[0]
        if os.path.exists(self.source_file):
            logger.debug('import module "%s" from source file: %s',
                         self.source_file)
            logger.debug('source module: "%s"', self.source_file)
            rpy2.robjects.r('source("{0}")'.format(self.source_file))
            module = rpy2.robjects.r[module_name]
        else:
            logger.debug('import module "%s" from "jtmodules" package')
            rpackage = importr('jtmodules')
            module = getattr(rpackage, module_name)
        version = module.get('VERSION')[0]
        if version != self.handles.version:
            raise PipelineRunError(
                'Version of source and handles is not the same.')
        func = module.get('main')
        numpy2ri.activate()  # enables use of numpy arrays
        pandas2ri.activate()  # enable use of pandas data frames
        kwargs = self.keyword_arguments
        logger.debug('evaluate main() function with INPUTS: "%s"',
                     '", "'.join(kwargs.keys()))
        # R doesn't have unsigned integer types
        for k, v in kwargs.iteritems():
            if isinstance(v, np.ndarray):
                if v.dtype == np.uint16 or v.dtype == np.uint8:
                    logging.debug(
                        'module "%s" input argument "%s": '
                        'convert unsigned integer data type to integer',
                        self.name, k)
                    kwargs[k] = v.astype(int)
            elif isinstance(v, pd.DataFrame):
                # TODO: We may have to translate pandas data frames explicitly
                # into the R equivalent.
                # pandas2ri.py2ri(v)
                kwargs[k] = v
        args = rpy2.robjects.ListVector({k: v for k, v in kwargs.iteritems()})
        base = importr('base')
        r_out = base.do_call(func, args)

        for handle in self.handles.output:
            # NOTE: R functions are supposed to return a list. Therefore
            # we can extract the output argument using rx2().
            # The R equivalent would be indexing the list with "[[]]".
            if isinstance(r_out.rx2(handle.name),
                          rpy2.robjects.vectors.DataFrame):
                handle.value = pandas2ri.ri2py(r_out.rx2(handle.name))
                # handle.value = pd.DataFrame(r_out.rx2(handle.name))
            else:
                # NOTE: R doesn't have an unsigned integer data type.
                # So we cast to uint16.
                handle.value = numpy2ri.ri2py(r_out.rx2(handle.name)).astype(
                    np.uint16)
                # handle.value = np.array(r_out.rx2(handle.name), np.uint16)

        return self.handles.output
Example #58
0
import pandas as pd
from rpy2.robjects.packages import importr
from rpy2.robjects import r as R

utils = importr('rsm')
print(utils.__rdata__)
print(type(R('pi')[0]))
ChemReact = R['ChemReact']
print(ChemReact)
print(type(ChemReact))

bbd = R['bbd']
print(bbd(3, 2)[5])
Example #59
0
import numpy as np

from rpy2.robjects import pandas2ri
from rpy2.robjects.packages import importr

r_qmap = importr('qmap')
pandas2ri.activate()


def do_qmap(obs,
            c_mod,
            p_mod=None,
            proj_adj_type='cdf',
            wet_day=False,
            verbose=True):
    """ Quantile mapping
    
    Arguments:
        obs {ndarray} -- observed time series
        c_mod {ndarray} -- model data for the reference period
    
    Keyword Arguments:
        p_mod {ndarray} -- model data for the future scenario (default: {None})
        proj_adj_type {str} -- type of adjustment to be applied to the future scenario
                            -- 'cdf' quantile mapping (default)
                            -- 'edcdf' Equidistant CDF; Li et al. (2010)
                            -- 'dqm' Detrended QM; Cannon et al. (2015)
                            -- 'qdm' Quantile Delta Mapping; Cannon et al. (2015)
        wet_day {bool} -- indicating whether to perform wet day correction or not
                {float} -- threshold below which all values are set to zero
    
Example #60
0
import tensorflow as tf
import keras.backend as K
from mnist import *
from cifar10 import load_data, set_flags, load_model
from attack_utils import gen_grad
from tf_utils_adv import batch_eval
from os.path import basename
import numpy as np

from rpy2.robjects.packages import importr
from rpy2.robjects import FloatVector

from tensorflow.python.platform import flags

FLAGS = flags.FLAGS
multici = importr("MultinomialCI")


def isRobust(prob, sd, epsilon):
    fv = FloatVector(sorted(prob)[::-1])
    ci = np.array(multici.multinomialCI(fv, 0.05))
    qi = ci[0, 0]
    qj = ci[1, 1]
    alpha = np.linspace(1.01, 2.0, 100)
    # pdb.set_trace()
    bound = (-np.log(1 - qi - qj + 2 * (
        (qi**(1 - alpha) + qj**(1 - alpha)) / 2)**(1 / (1 - alpha))) /
             alpha).max()
    # return np.sqrt(bound*2.*sd**2)
    if bound > epsilon**(2.) / 2. / sd**(2.):
        return np.array([True, np.sqrt(bound * 2.) * sd])