def draw_plots(self,savedir, con,modelname): """ plot the predicted vs. the actual score with different colors for each group """ import matplotlib.pyplot as plt import rpy2.robjects as rob from rpy2.robjects import FloatVector as FV from rpy2.robjects.packages import importr import numpy as np import os stats = importr('stats') base = importr('base') self.get_rmse() predscores, actualscores, meanerr, rmsqerr = self.prediction, self.actual, np.mean(abs(self.pred_errors)), self.rmse plt.figure() plt.scatter(actualscores,predscores,s=70) x = np.array(range(100)) plt.plot(x,x,'g',label='optimal model') # make regression rob.globalenv["pred"] = FV(predscores) rob.globalenv["act"] = FV(actualscores) mult_lm = stats.lm("pred ~ act + 1") coeffs = np.array(mult_lm.rx("coefficients")[0]) rsqrd = base.summary(mult_lm).rx("r.squared")[0][0] y = coeffs[1]*x+coeffs[0] plt.plot(x,y,'k',label='our model',linewidth=2) plt.xlabel("actual lsas delta") plt.ylabel("predicted lsas delta") plt.title("Predicted vs. Actual LSAS_delta score") plt.axis([0,100,0,100]) axes = plt.axes() axes.text(0.05,0.8,"meanerr: %.2f\nrmse: %.2f\nr: %.2f (Rsqrd: %.2f)"%(meanerr,rmsqerr,np.sqrt(rsqrd),rsqrd),transform=axes.transAxes) plt.legend() plt.savefig(os.path.join(savedir,"%s_%s_lsas_delta_act_pred.pdf"%(modelname,con)),dpi=300,format="pdf")
def setup(): global corpcor try: corpcor = importr('corpcor') except rpy2.rinterface.RRuntimeError: r("install.packages('corpcor', repos='http://cran.us.r-project.org')") corpcor = importr('corpcor')
def pretty_hm(self,r_mat=None,file=None): # use R pheatmap # create heatmap file if r_mat is None and self.mat is None: raise Exception("A matrix needs to be provided at pretty_hm()") if file is None and self.file is None: raise Exception("A filename needs to be provided at pretty_hm()") phm = importr("pheatmap") rcb = importr("RColorBrewer") w = 10.5 h = self.find_h(r_mat) cw = 10 #cell width ch = 10 #cell height fs = 1 # rcolorbrewer, include white for null (0) values ncolor = 5 spectrum = "YlOrRd" palette = "brewer.pal("+str(ncolor)+",'"+spectrum+"')" rob.r.pheatmap(r_mat, filename=file, cellwidth=cw, cellheight=ch, color=rob.r.c("#FFFFFF",rob.r(palette)), width=w, height=h, font_size=fs, cluster_cols=False) return file
def run_simple(A, B): from rpy2.robjects import pandas2ri from rpy2.robjects.packages import importr import rpy2.robjects as ro r = ro.r pandas2ri.activate() limma = importr('limma') edgeR = importr('edgeR') counts = pd.concat([A, B], 1) groups = r.factor(r.c(*([0] * A.shape[1] + [1] * B.shape[1]))) ro.globalenv['exp'] = groups design = r('model.matrix(~exp)') dge = r.DGEList(counts=counts) dge = r.calcNormFactors(dge) v = r.voom(dge, design, plot=False) fit = r.lmFit(v, design) fit = r.eBayes(fit) tt = r.topTable(fit, coef=r.ncol(design), number=1e12) ttidx = r['row.names'](tt) tt = pandas2ri.ri2py(tt) cols = tt.columns.to_series() cols[0] = 'lfc' cols[3] = 'pval' cols[4] = 'padj' tt.columns = cols tt['slp'] = np.log10(tt['pval']) tt.loc[tt['lfc'] > 0, 'slp'] = -np.log10(tt.loc[tt['lfc'] > 0, 'pval']) tt.index = ttidx return tt
def getPredictedKLDisByArima(playlist,songDict): importr("forecast") #get playlist's training list trainingList = playlist.getTrainingList() count = len(trainingList) #define base diatribution and dis list baseDict = {} disList = [] #loop every song in training list #add distribution of sids to tsDict to construct some time series for i in range(0,count): sid = trainingList[i] sTopicDict = songDict[sid].getTopicDict() if len(baseDict) == 0: length = len(sTopicDict) for t in range(0,length) baseDict[t] = 1.0 / length disList.append(KLSim(sTopicDict,baseDict)) #using auto arima to forecast the kl distance vec = robjects.FloatVector(disList) ts = robjects.r['ts'](vec) fit = robjects.r['auto.arima'](ts) next = robjects.r['forecast'](fit,h=1) return float(next.rx('mean')[0][0])
def predict_proba(self, X): importr('glmnet') pred = R['predict'](self.R_model_, X, type="response") pred = np.squeeze(np.asarray(pred)) if self.binary_classification: pred = np.vstack((1-pred, pred)).T return pred
def av(data, formula, model='', output='', as_strings='', title='Title for Your Output', label='Label for Your Output', pythontex=True): if not output: output = 'xtable' if not model: model = 'aov' if output == 'stargazer': stargazer = importr('stargazer') elif output == 'texreg': texreg = importr('texreg') formula = robjects.Formula(formula) dfr = com.convert_to_r_dataframe(data) # convert from pandas to R and make string columns factors if model == 'aov': output = 'xtable' #aov only works with xtable av_model = stats.aov(formula, data=dfr) av_model_sum = base.summary(av_model) if output == 'xtable': xtable = importr('xtable') latex = xtable.xtable(av_model_sum, caption=title, label=label) if pythontex: return latex else: return '\n'.join(np.array(latex))
def deseq2_gene_expression_normalization(df_data): rpy2.robjects.pandas2ri.activate() df_data = df_data.dropna() r_data_set = robjects.conversion.py2ri(df_data) base = importr("base") deseq2 = importr("DESeq2") bio_generics = importr("BiocGenerics") gr = importr('GenomicRanges') vec = importr('S4Vectors') conds = vec.DataFrame(condition=base.factor(base.c(base.colnames(r_data_set)))) print df_data.head() design = robjects.r('formula(~ condition)') dds = deseq2.DESeqDataSetFromMatrix(r_data_set, colData=conds, design=design) print("dds loaded") logs = deseq2.rlog(dds, fast=True) logs_count = gr.assay(logs) print("logs_count loaded") rpy2.robjects.pandas2ri.deactivate() res = Result() res.frame = pd.DataFrame(numpy.matrix(logs_count), columns=logs_count.colnames, index=logs_count.rownames) res.package = "DESeq2" res.version = deseq2.__version__ return res
def drawRbarplot(united, Motifs, Nodes, output, top=25): if not enriched: return grdevices = importr('grDevices') graphics = importr('graphics') Counts = [len(seqnames) for seqnames in Motifs.itervalues()] Counts_names = [united.uid2Motif[int(uid)] for uid in Motifs.iterkeys()] Counts = robj.IntVector(Counts) Counts.names = robj.StrVector(Counts_names) Ps = [node[-1] for node in Result['Nodes'] if node[-1] < 0.05] Ps_names = [node[0] for node in Result['Nodes'] if node[-1] < 0.05] Ps = robj.FloatVector(Ps) Ps.names = robj.StrVector(Ps_names) Ps = robj.r.sort(Ps, decreasing=True) enriched_counts = Counts.rx(Ps.names) grdevices.png(file="%s/%s_bar.png" % (output, output), width=512, height=512) margin = robj.IntVector([3, 9, 4, 2]) graphics.par(mar=margin) bar = graphics.barplot(enriched_counts, main="Enriched Motifs Counts", horiz=True, las=1, col='lightblue') Ps_lab = robj.r('format(signif(%s ,digits=2), scientific=T)' % Ps.r_repr()) graphics.text(x=enriched_counts, y=bar, label=Ps_lab, po=2) #graphics.text(bar,labels=top_counts,pos=4,offset=10) grdevices.dev_off()
def plot_degree_distrib(adj_mat, i, filepath='.', prefix='ddist_'): """ Compute degree distribution and plot using lattice """ lattice = importr('lattice') grdevices = importr('grDevices') xyplot = lattice.xyplot rprint = robjects.globalenv.get("print") try: deg = robjects.r.rowSums(adj_mat) degdens = robjects.r.density(deg, **{'na.rm':True}) except: myfname = '' myf = robjects.Formula('y ~ x') myf.getenvironment()['x'] = degdens.rx2('x') myf.getenvironment()['y'] = degdens.rx2('y') p = xyplot(myf, type='l', lwd=3, xlab='Node Degree', ylab='Density', main='Node degree distribution') myfname = prefix + str(i) + '.png' grdevices.png(file=os.path.join(filepath, myfname), width=512, height=512, type="cairo") rprint(p) grdevices.dev_off() return myfname
def cummeRbund(input_file, output_file, options): # import the grapher and cummerBund grdevices = importr('grDevices') r_bund = importr("cummeRbund") r_plot = robjects.r('plot') # read in the diff results cuff = r_bund.readCufflinks(options.output) grdevices.pdf(file=input_file, width=10, height=10) r_plot(r_bund.dispersionPlot(r_bund.genes(cuff))) r_plot(r_bund.csBoxplot(r_bund.genes(cuff),replicates=True)) r_plot(r_bund.csDendro(r_bund.genes(cuff),replicates=True)) r_plot(r_bund.csBoxplot(r_bund.genes(cuff))) r_plot(r_bund.csDistHeat(r_bund.genes(cuff))) r_plot(r_bund.csDistHeat(r_bund.genes(cuff), replicates=True)) r_plot(r_bund.PCAplot(r_bund.genes(cuff),"PC1","PC2")) r_plot(r_bund.PCAplot(r_bund.genes(cuff),"PC1","PC2",replicates=True)) r_plot(r_bund.PCAplot(r_bund.genes(cuff),"PC2","PC2")) r_plot(r_bund.PCAplot(r_bund.genes(cuff),"PC3","PC2",replicates=True)) # close the dev grdevices.dev_off()
def insallRpackages(options): """Install R packages that cannot be installed using pip install ...""" #from rpy2.robjects.packages import importr src = join(sys.prefix, "lib", "R-3.2.3", "lib64", "R", "lib") spe = "$" cmd = 'export LD_LIBRARY_PATH=%sLD_LIBRARY_PATH:%s' %(spe,src) sh(cmd) # import rpy2.robjects as robjects import rpy2.robjects.packages as rpackages #from rpy2.robjects.vectors import StrVector packageNames = ('ggplot2') if all(rpackages.isinstalled(x) for x in packageNames): have_packages = True else: have_packages = False if not have_packages: #utils = rpackages.importr('utils') #utils.chooseCRANmirror(ind=1, useHTTPS=False) packnames_to_install = [x for x in packageNames if not rpackages.isinstalled(x)] # if len(packnames_to_install) > 0: # utils.install_packages(StrVector(packnames_to_install)) if len(packnames_to_install) > 0: # install biocondcutor package base = rpackages.importr('base') base.source("http://www.bioconductor.org/biocLite.R") biocinstaller = rpackages.importr("BiocInstaller") biocinstaller.biocLite("ggplot2", suppressUpdates=True)
def drawRCoDistibution(motif_pair,outpath,pair_name): motifA_pos=[] motifB_pos=[] filename="&".join(pair_name).replace('/','_') (motifa,motifb)=pair_name for REF_SeqName in motif_pair.keys(): #print motif_pair[REF_SeqName].keys() for motifa_pos,motifb_pos in motif_pair[REF_SeqName].keys(): motifA_pos.append(motifa_pos) motifB_pos.append(motifb_pos) #print motifa_pos,motifb_pos grdevices = importr('grDevices') graphics = importr('graphics') geneplotter = importr('geneplotter') motifA_pos=-robj.IntVector(motifA_pos).ro motifB_pos=-robj.IntVector(motifB_pos).ro Pos={motifa:motifA_pos,motifb:motifB_pos} Pos=robj.ListVector(Pos) grdevices.png(file="%s/%s_distribution.png" % (outpath,filename), width=512, height=512) geneplotter.multidensity(Pos.rx(),lwd=3,xlab="Distribution",main="Distribution of \n%s" % filename) graphics.rug(motifA_pos,col=4) graphics.rug(motifB_pos,col=2) grdevices.dev_off() #Scatter plot grdevices.png(file="%s/%s_Scatter.png" % (outpath,filename), width=512, height=512) limit=robj.IntVector([-3000,0]) graphics.plot(motifA_pos,motifB_pos,main="Position Scatter Plot of\n%s&%s" % (motifa,motifb), \ xlab="Positions of %s" % motifa, \ ylab="Positions of %s" % motifb, \ xlim=limit,ylim=limit) graphics.abline(1,1) grdevices.dev_off()
def process_ccs(cc_in): cc = cc_in[:300, :300] for i, c in enumerate(cc): c[i] = 0 import rpy2.robjects as robjects from rpy2.rlike.container import TaggedList from rpy2.robjects.packages import importr r = robjects.r base = importr("base") # create a numerical matrix of size 100x10 filled with NAs nc = nr = shape(cc)[0] from rpy2.robjects.numpy2ri import numpy2ri m = numpy2ri(cc) # robjects.r['matrix'](v, nrow = nr, ncol = nc) biclust = importr("biclust") mb = biclust.binarize(m, 0.90) # hcv = r.hclust(r.dist(mb)) # hcv = r.hclust(r.dist(mb)) # hm = r.heatmap(mb) # raise Exception() out = biclust.biclust(m, method=biclust.BCPlaid()) n_bc = out.do_slot("Number") rows = array(out.do_slot("RowxNumber")) cols = array(out.do_slot("NumberxCol")).T return rows, cols, array(m), array(mb)
def plot_tree(outfile, tree1, tree2=None, root=False): """Generate tree(s) plot""" ape = importr('ape') graphics = importr('graphics') grdevices = importr('grDevices') if tree2 is None: grdevices.png(file=outfile, width=1024, height=768) if root: ape.plot_phylo(ape.root(tree1,root),edge_width=2,cex=1,underscore=1) else: ape.plot_phylo(tree1,edge_width=2,cex=1,underscore=1) graphics.title(main='Neighbor Joining') grdevices.dev_off() elif tree2 is not None: grdevices.png(file=outfile, width=1024, height=768) graphics.par(mfcol=array.array('i',[1,2])) if root: ape.plot_phylo(ape.root(tree1,root),edge_width=2,cex=1,underscore=1) else: ape.plot_phylo(tree1,edge_width=2,cex=1,underscore=1) graphics.title(main='Neighbor Joining', cex=1.5, font=2) if root: ape.plot_phylo(ape.root(tree2,root),edge_width=2,cex=1,underscore=1) else: ape.plot_phylo(tree2,edge_width=2,cex=1,underscore=1) graphics.title(main='Maximum Parsimony',cex=1.5, font=2) grdevices.dev_off() return
def plot_test(): from rpy2.robjects.packages import importr graphics = importr('graphics') grdevices = importr('grDevices') base = importr('base') stats = importr('stats') import array x = array.array('i', range(10)) y = stats.rnorm(10) grdevices.X11() graphics.par(mfrow = array.array('i', [2,2])) graphics.plot(x, y, ylab = "foo/bar", col = "red") kwargs = {'ylab':"foo/bar", 'type':"b", 'col':"blue", 'log':"x"} graphics.plot(x, y, **kwargs) m = base.matrix(stats.rnorm(100), ncol=5) pca = stats.princomp(m) graphics.plot(pca, main="Eigen values") stats.biplot(pca, main="biplot")
def pca(self,fn='pca.png',col=None,w=1200,h=1200): stats = importr('stats') graphics = importr('graphics') # # if col: # df,factors=dataframe(df,factorcol=col) df=self.df pca = stats.princomp(df) grdevices = importr('grDevices') ofn=".".join(fn.split(".")[:-1]+["eigens"]+[fn.split(".")[-1]]) strfacts=str(df.nrow)+" items using "+str(df.ncol)+" features ["+ofn.split("/")[-1]+"]" grdevices.png(file=ofn, width=w, height=h) graphics.plot(pca, main = "Eigenvalues for "+strfacts) # if col: # graphics.hilight(pca,factors) grdevices.dev_off() print ">> saved: "+ofn grdevices = importr('grDevices') ofn=".".join(fn.split(".")[:-1]+["biplot"]+[fn.split(".")[-1]]) strfacts=str(df.nrow)+" items using "+str(df.ncol)+" features ["+ofn.split("/")[-1]+"]" grdevices.png(file=ofn, width=w, height=h) stats.biplot(pca, scale=1,main = "biplot of "+strfacts) grdevices.dev_off() print ">> saved: "+ofn
def goid_annot(goid): """ Get GO annotation for a GO id. If the annotation is empty, you probably need to update your GO.db package. Depends on the Bioconductor GO.db package. >>> get_go_terms('GO:0051649') result """ importr('GO.db') term = str(robjects.r['Term'](goid)[0]) ont = str(robjects.r['Ontology'](goid)[0]) syn = robjects.r['Synonym'](goid)[0] if robjects.r['is.null'](syn): syn = None else: syn = [str(i) for i in syn] sec = robjects.r['Secondary'](goid)[0] if robjects.r['is.null'](sec): sec = None else: sec = [str(i) for i in sec] defn = str(robjects.r['Definition'](goid)[0]) return GoAnnot(goid, term, ont, syn, sec, defn)
def agglom(E, k=100, linkage="complete", simdist_function="pearson_correlation"): importr("cluster") ro.globalenv["distances"] = simdist(E, simdist_function, similarity=False) ro.r("hclust_results = hclust(as.dist(distances), method='{linkage}')".format(**locals())) rresults = ro.r("labels = cutree(hclust_results, k={k})".format(**locals())) modules = convert_labels2modules(list(rresults), E.columns) return modules
def create_files(args, dir, loci): # where the gene trees are subdir = os.path.join(dir, 'gene_trees') if not os.path.isdir(subdir): os.mkdir(subdir) # where to put the files outdir = os.path.join(dir, 'astrid_astral') if not os.path.isdir(outdir): os.mkdir(outdir) # start up r ape = importr('ape') phangorn = importr('phangorn') out = os.path.join(outdir, 'best_trees_miss%s_tol%s_collapse%s.trees' % (args.miss, args.tol, args.collapse)) bs = os.path.join(outdir, 'bootstrap_files_miss%s_tol%s_collapse%s.txt' % (args.miss, args.tol, args.collapse)) bs_out = open(bs, 'w') for locus in loci: tree = os.path.join(subdir, '%s.bestTree.tre' % locus) if os.path.isfile(tree): # deal with best tree a = ape.read_tree(tree) a = manipulate_gene_tree(ape, phangorn, a, args.tol, args.collapse) ape.write_tree(a, file=out, append=True) bs = os.path.join(subdir, '%s.bootstrap.trees' % locus) bs_out.write('%s\n' % bs) bs_out.close()
def kmedoids(E, number=100, simdist_function="pearson_correlation"): importr("cluster") distances = simdist(E, simdist_function, similarity=False) rresults = ro.r["pam"](distances, diss=True, k=number) modules = convert_labels2modules(list(rresults.rx2("clustering")), E.columns) return modules
def test_mdmr_with_connectir_distances(): """uses the distances from output of connectir to specifically test the mdmr portion of things""" import os os.chdir("../C-PAC") from CPAC.cwas.mdmr import mdmr import numpy as np from os import path as op import rpy2.robjects as robjects from rpy2.robjects.numpy2ri import numpy2ri from rpy2.robjects.packages import importr robjects.conversion.py2ri = numpy2ri from pandas import read_csv bigmemory = importr("bigmemory") base = importr("base") sdir = "/home/data/Projects/CPAC_Regression_Test/2013-05-30_cwas/results_adhd04.r" sfile = op.join(sdir, "subdist.desc") dmats = np.array(robjects.r("as.matrix(attach.big.matrix('%s'))" % sfile)) n = np.sqrt(dmats.shape[0]) rfile = "/home2/data/Projects/CPAC_Regression_Test/2013-05-30_cwas/configs/adhd04_regressors.txt" regressors = np.loadtxt(rfile) ps, Fs, _, _ = mdmr(dmats[:, :10], regressors, [1], 1000)
def rocbees(ARGVS): ''' ROC curves and the beeswarm plot from beeswarm R package ''' beeswarm = importr('beeswarm') Cairo = importr('Cairo') ROC = importr('ROC') filename = ARGVS['file'] data = ARGVS['data'] title = ARGVS['title'] category = ARGVS['opts'] filewrite = ROOT_PATH + '/media/tmp/' + filename resp = [] expr = [] names = data.keys() for name in names: resp.append(data[name]['resp']) expr.append(data[name]['expression']) robjects.r (''' approx3 <- function(x, y = NULL, theta = 0.001) { xy <- xy.coords(x, y) dx <- diff(xy$x)/(max(xy$x) - min(xy$x)) dy <- diff(xy$y)/(max(xy$y) - min(xy$y)) angle <- atan2(dy, dx) diff.angle <- diff(angle)%%pi abs.diff.angle <- pmin(diff.angle, pi - diff.angle) keep <- c(TRUE, abs.diff.angle > theta, TRUE) xy$x <- xy$x[keep] xy$y <- xy$y[keep] xy } aronroc <- function(x, truth, type = "l", xlab = expression(1 - specificity), ylab = "Sensitivity", ...) { require(ROC) r <- rocdemo.sca(truth, x) xy <- list(x = 1 - r@spec, y = r@sens) xy.trimmed <- approx3(xy) plot(xy.trimmed, type = type, xlab = xlab, ylab = ylab, ...) invisible(xy.trimmed) } plotResps <- function (filename,expr,resp,category, main='') { expr = as.numeric(expr) resp = as.character(resp) CairoPNG(filename=filename,width = 800, height = 400) par(oma = c(0,0,1,0)) layout(matrix(1:2, nrow = 1), widths = c(1,1)) beeswarm(expr ~ resp,col=c(1:length(unique(resp))), pch=16,xlab='Response Categories', ylab='Expression') par(xpd = NA) aronroc (expr, resp == category) title(main,outer=TRUE) dev.off() } ''') try: robjects.r['plotResps'](filename = filewrite, expr = expr, resp = resp, category = category, main = title) return filename except: return 'Error'
def heatmap3py(numDataR, ColSideColors, annoColDicList, fileName=None, outPath=None): from rpy2.robjects.packages import importr heatmap = importr("heatmap3") grdevices = importr("grDevices") from rpy2.robjects.functions import SignatureTranslatedFunction # explicitly translate the R argument to legal python name heatmap.showLegend = SignatureTranslatedFunction(heatmap.showLegend, init_prm_translate = {'pt_bg': 'pt.bg'}) # annoName = ColSideColors.colnames # draw heatmap in file if (fileName!=None): grdevices.pdf(file = fileName ) heatmap.heatmap3(numDataR,ColSideColors=ColSideColors,showRowDendro=False) grdevices.dev_off() for i in range(len(annoColDicList)): anno = robjects.StrVector(annoColDicList[i].keys()) col = robjects.StrVector(annoColDicList[i].values()) fileName = outPath +"/heatmapLegend" + str(i) + ".pdf" grdevices.pdf(file = fileName ) heatmap.showLegend(legend= anno,col=col,cex=1.5, title="Annotation Legend: "+ColSideColors.colnames[i], pch=22, lwd = robjects.NA_Integer, pt_bg=col) grdevices.dev_off() else: # Draw heatmap in R window heatmap.heatmap3(numDataR,ColSideColors=ColSideColors,showRowDendro=False) # Plot legends in another window for i in range(len(annoColDicList)): grdevices.dev_new() anno = robjects.StrVector(annoColDicList[i].keys()) col = robjects.StrVector(annoColDicList[i].values()) heatmap.showLegend(legend= anno,col=col,cex=1.5, title="Annotation Legend: "+ ColSideColors.colnames[i], pch=22, lwd = robjects.NA_Integer, pt_bg=col)
def edger_gene_expression_normalization(df_data): rpy2.robjects.pandas2ri.activate() df_data = df_data.dropna() r_data_set = robjects.conversion.py2ri(df_data) edger = importr("edgeR") base = importr("base") mult = robjects.r.get('*') factors = base.factor(base.c(base.colnames(r_data_set))) dge = edger.DGEList(counts=r_data_set, group=factors) y = edger.calcNormFactors(dge) y = edger.estimateCommonDisp(y) #y [counts] and y[samples][size factors] accessed by index #bit tricky but yeah... #there is a conversion between python 0 based index and r (1 based) #done by rpy2 which is a fabulous library!!! normalized = mult(y[0], y[1][2]) rpy2.robjects.pandas2ri.deactivate() print("preparing result") res = Result() res.frame = pd.DataFrame(numpy.round(numpy.matrix(normalized)), columns=normalized.colnames, index=normalized.rownames) res.package = "edgeR" res.version = edger.__version__ return res
def test_r_environment(): """Test if all required R packages are installed to use the NBLAST API. """ setup_is_ok = False try: rnat = importr('nat') relmr = importr('elmr') rnblast = importr('nat.nblast') rcatmaid = importr('catmaid') setup_is_ok = True except: setup_is_ok = False logger.info(""" Please make sure the following R packages are installed to use CATMAID's NBLAST support. This can be done by executing the following in the R environment of the user running CATMAID (e.g. www-data): if(!require("devtools")) install.packages("devtools") devtools::install_github(c("jefferis/nat", "jefferislab/nat.nblast", "jefferis/rcatmaid", "jefferis/elmr")) This is required to let CATMAID compute NBLAST scores. """) return JsonResponse({ 'setup_ok': setup_is_ok, })
def drawRboxplot(united, Motifs, enriched, REF_SeqNames, output): ''' motif_dist[motif][SeqName]=[(start, end),...] ''' grdevices = importr('grDevices') graphics = importr('graphics') #Merged_dist=dict([(k,list(v)) for k,v in Merged_dist.items()]) Positions = {} for motif in enriched: Pos = [] for seqname in REF_SeqNames: Pos.extend([start for start, stop in \ Motifs[str(united.Motif2uid[motif])][seqname]]) if Pos: Positions[motif] = robj.IntVector(map(lambda x: x - 3000, Pos)) Positions = robj.ListVector(Positions) enriched_names = robj.StrVector(enriched) grdevices.png(file="%s/%s_box.png" % \ (output, output), width=512, height=512) margin = robj.IntVector([3, 9, 4, 2]) graphics.par(mar=margin) graphics.boxplot(Positions.rx(enriched_names), main="Boxplot of Motif Positions", horizontal=True, las=1, col='lightblue') grdevices.dev_off()
def some_rpy2(): flash('Loading data...please wait') r.load('mtu_inf_111813.RData') pm = r['predictor.mats'] pmm = pm.rx(1) dataframe = r['data.frame'] df = dataframe(pmm) firstcol = df.rx(1) seccol = df.rx(2) lattice = importr('lattice') xyplot = lattice.xyplot rprint = robjects.globalenv.get("print") #formula = Formula('firstcol ~ seccol') #formula.getenvironment()['firstcol'] = df.rx2(1) #formula.getenvironment()['seccol'] = df.rx2(2) #p = lattice.xyplot(formula) grdevices = importr('grDevices') #filenm = app.config['IMGS_FOLDER'] + 'hist.png' filenm = 'hist.png' # why is this in tmp still??? grdevices.png(file=filenm, width=512, height=512) p = r.histogram(df.rx2(1)) rprint(p) # works grdevices.dev_off() return render_template("hist.html", image='static/tmp/hist.png')
def plot(self, data_array, width, height): """ Create a plot with R """ # Start R timing startTime = time.time() rinterface.initr() r = robjects.r grdevices = importr('grDevices') # Import the bfast package bfast = importr('bfast') b = robjects.FloatVector(data_array) # arry by b to time serie vector b_ts = r.ts(b, start=robjects.IntVector([2000, 4]), frequency=23) # calculate bfast h = 23.0 / float(len(b_ts)) b_bfast = r.bfast(b_ts, h=h, season="harmonic", max_iter=2) # Get the index names of the ListVector b_bfast names = b_bfast.names log.debug(names) temp_datadir = self.config.get('main', 'temp.datadir') temp_url = self.config.get('main', 'temp.url') file = NamedTemporaryFile(suffix=".png", dir=temp_datadir, delete=False) log.debug(file.name) grdevices.png(file=file.name, width=width, height=height) # Plotting code here r.par(col="black") r.plot(b_bfast) # Close the device grdevices.dev_off() # End R timing and log it endTime = time.time() log.debug('It took ' + str(endTime - startTime) + ' seconds to initalize R and draw a plot.') file.close() result = {"file": "%s/%s" % (temp_url, file.name.split("/")[-1])} try: result['magnitude'] = str(tuple(b_bfast[names.index("Magnitude")])[0]) except ValueError: pass try: result['time'] = str(tuple(b_bfast[names.index("Time")])[0]) except ValueError: pass self.outputs['plot']['value'] = json.dumps({"file": "%s/%s" % (temp_url, file.name.split("/")[-1])}) return SERVICE_SUCCEEDED
def translate(self, ds): url = ds.data_url().pop() base = importr('base') utils = importr('utils') utils.download_file(url, destfile="data_source.RData") base.load("data_source.RData") cds = R('cds') print(cds)
# samples2.columns=['GeneID',colV] # # print(samples2.head()) # samples = pd.merge(samples,samples2, on='GeneID', ) # print("\n",'List of file names read to dataframe:','\n') # samples.set_index("GeneID", inplace = True) # print(samples.head()) ######################################################################## samples.to_csv('matrix.csv', ) # print(colV) samples_annotated = samples import rpy2.robjects as robjects from rpy2.robjects.packages import importr ALL = importr('ALL') limma = importr('limma') exprs = robjects.r['exprs'] summary = robjects.r['summary'] matrix = robjects.r['as.matrix'] new = robjects.r['new'] robjects.r('data("ALL")') data = robjects.globalenv['ALL'] featureNames = robjects.r['featureNames'] ExpressionSet = robjects.r['ExpressionSet'] character = robjects.r['as.character'] pas = robjects.r['paste'] fac = robjects.r['as.factor'] mmax = robjects.r['model.matrix'] from rpy2.robjects import pandas2ri
import pandas as pd import multiprocessing from sklearn.externals.joblib import Parallel, delayed num_cores = multiprocessing.cpu_count() import seaborn as sns homedir = os.environ['HOME'] from rpy2.robjects.packages import importr from rpy2.robjects import pandas2ri pandas2ri.activate() import rpy2.robjects.numpy2ri rpy2.robjects.numpy2ri.activate() R = ro.r import warnings warnings.filterwarnings("ignore") ismev = importr('ismev') gof = importr('gnFit') base = importr('base') statis = importr('stats') eva = importr('eva') extremes = importr('extRemes') def save_as_pickled_object(obj, filepath): """ This is a defensive way to write pickle.write, allowing for very large files on all platforms """ max_bytes = 2**31 - 1 bytes_out = pickle.dumps(obj) n_bytes = sys.getsizeof(bytes_out) with open(filepath, 'wb') as f_out:
from pandas import read_csv, DataFrame, Series import statsmodels.api as sm import rpy2.robjects as R from rpy2.robjects.packages import importr import pandas.rpy.common as com from pandas import date_range import numpy as np import csv import re import pandas as pd import matplotlib.pyplot as plt import time forecast = importr('forecast') stats = importr('stats') tseries = importr('tseries') def parse_csv(path): term = path.split('/')[-1].split('.')[0] trend = [] index = [] with open(path, 'rb') as new: newread = csv.reader(new, delimiter='\n') for i, var in enumerate(newread): # if re.findall(r'\d+-\d+-\d+',str(var)) != [] and int(str(var[0])[0:4])>=2007: if re.findall(r'\d+-\d+-\d+', str(var)) != []: trend.append(var[0].split(',')[1]) index.append(var[0].split(',')[0]) if trend == []: return my_trend = [float(var) for var in trend[1:]]
set_active_unpaused = set_active_gene & (set_total_gene - set_paused_gene) set_unactive_paused = (set_total_gene - set_active_gene) & set_paused_gene set_unactive_unpaused = (set_total_gene - set_active_gene) & ( set_total_gene - set_paused_gene) return set_active_paused, set_active_unpaused, set_unactive_paused, set_unactive_unpaused def write_gene_to_file(set_gene, file_name): with open(file_name, 'w') as f: for i in set_gene: f.write(i + '\n') import rpy2.robjects as robjects from rpy2.robjects.packages import importr importr('org.Sc.sgd.db') importr('clusterProfiler') def go_analysis(set_gene_file, output): DE_list = robjects.StrVector(list(set_gene_file)) rcode_bp = 'enrichGO(gene= %s,keyType="ENZYME",OrgDb =%s ,ont ="%s", \ pAdjustMethod = "BH",pvalueCutoff = 0.01, qvalueCutoff = 0.05, \ readable = TRUE)' % (DE_list.r_repr(), 'org.Sc.sgd.db', 'BP') rcode_cc = 'enrichGO(gene= %s,keyType="ENZYME",OrgDb =%s ,ont ="%s", \ pAdjustMethod = "BH",pvalueCutoff = 0.01, qvalueCutoff = 0.05, \ readable = TRUE)' % (DE_list.r_repr(), 'org.Sc.sgd.db', 'CC') rcode_mf = 'enrichGO(gene= %s,keyType="ENZYME",OrgDb =%s ,ont ="%s", \ pAdjustMethod = "BH",pvalueCutoff = 0.01, qvalueCutoff = 0.05, \ readable = TRUE)' % (DE_list.r_repr(), 'org.Sc.sgd.db', 'MF')
import pandas as pd import numpy as np import math from rpy2.robjects.packages import importr import rpy2.robjects.packages as rpackages from rpy2.robjects.vectors import StrVector from rpy2.robjects import pandas2ri from rpy2.robjects import Formula import matplotlib.pyplot as plt from scipy.interpolate import UnivariateSpline # Install packages pandas2ri.activate() utils = rpackages.importr('utils') utils.chooseCRANmirror(ind=1) packnames = ['vegan','zoo','stats'] names_to_install = [] #names_to_install = [x for packnames if not rpackages.isinstalled(x)] for x in packnames: if (rpackages.isinstalled(x)==False): names_to_install.append(x) if len(names_to_install) > 0: utils.install_packages(StrVector(names_to_install)) # Setup R packages rvegan = importr('vegan')
import numpy as np from chainladder.utils.cupy import cp import pytest import chainladder as cl from rpy2.robjects.packages import importr from rpy2.robjects import r CL = importr('ChainLadder') @pytest.fixture def atol(): return 1e-5 def mack_r(data, alpha, est_sigma, tail): if tail: return r( 'mack<-MackChainLadder({},alpha={}, est.sigma="{}", tail=TRUE)'. format(data, alpha, est_sigma)) else: return r('mack<-MackChainLadder({},alpha={}, est.sigma="{}")'.format( data, alpha, est_sigma)) def mack_p(data, average, est_sigma, tail): if tail: return cl.MackChainladder().fit( cl.TailCurve(curve='exponential').fit_transform( cl.Development(average=average, sigma_interpolation=est_sigma).fit_transform(
from __future__ import division import mne, os import numpy as np from scipy.stats import ttest_rel from scipy import io as spio from rpy2.robjects.packages import importr from rpy2.robjects.vectors import FloatVector from xfrange import xfrange stats = importr('stats') markers = ['Obj', 'Subj'] groups = ['kids', 'adults'] basePath = os.environ['DATDIR'] hemispheres = ['lh', 'rh'] metrics = ['signed', 'norm'] channelNames = ['Left-', 'Right-'] locations = ['PAC', 'pSTG', 'aSTG', 'pSTS', 'aSTS', 'BA45', 'BA44', 'BA6v'] # Load list of subjects subjects = [[], []] dirEntries = os.listdir(os.environ['LOCDIR']) for dirEntry in dirEntries: if len(dirEntry) == 5 and dirEntry[0:2] == 'dh': if int(dirEntry[2:4]) > 50: subjects[1].append(dirEntry) else: subjects[0].append(dirEntry) timescale = xfrange(-1.0, 4.0, 0.001) # Create regular time intervals
} try: x = kinds[x.dtype.kind](x) except KeyError: pass # just pass it along return numpy2ri(x) ro.conversion.py2ri = numpy2ri_avoiding_zerodim # make inline happy if list(ro.r('Sys.getenv("R_ARCH")'))[0] == '': arch = list(ro.r('.Platform$r_arch'))[0] ro.r('Sys.setenv(R_ARCH="/{}")'.format(arch)) rstan = importr('rstan') # TODO: stan_model, stanfit class wrappers def get_model(filename, cache_filename=None, check_times=True, use_cache=True): ''' Returns a stan_model for the model code in filename. If use_cache (by default), tries to load the compiled file from cache_filename (default filename + '.model.pkl[2|3].gz') if available, otherwise compiles it and saves into the gzipped, pickled cache file. ''' if cache_filename is None and use_cache: cache_filename = '{}.model.pkl{}.gz'.format(filename, sys.version_info[0])
import pytest import contextlib import os import tempfile from rpy2.robjects.packages import importr, data datasets = importr('datasets') mtcars = data(datasets).fetch('mtcars')['mtcars'] from rpy2.robjects import r from rpy2.robjects.lib import grdevices @contextlib.contextmanager def set_filenames_to_delete(): todelete = set() yield todelete for fn in todelete: if os.path.exists(fn): os.unlink(fn) def test_rendertobytes_noplot(): with grdevices.render_to_bytesio(grdevices.png) as b: pass assert len(b.getvalue()) == 0 def test_rendertofile(): fn = tempfile.mktemp(suffix=".png") with set_filenames_to_delete() as todelete:
def run_fe1(self, params): """ run_fe1: Functional Enrichment One required params: feature_set_ref: FeatureSet object reference workspace_name: the name of the workspace it gets saved to optional params: propagation: includes is_a relationship to all go terms (default is 1) filter_ref_features: filter reference genome features with no go terms (default is 0) statistical_significance: parameter for statistical significance. Select one from left_tailed, right_tailed or two_tailed (default is left_tailed) ignore_go_term_not_in_feature_set: ignore Go term analysis if term is not associated with FeatureSet (default is 1) return: result_directory: folder path that holds all files generated by run_deseq2_app report_name: report name generated by KBaseReport report_ref: report reference generated by KBaseReport """ log('--->\nrunning FunctionalEnrichmentUtil.run_fe1\n' + 'params:\n{}'.format(json.dumps(params, indent=1))) self._validate_run_fe1_params(params) propagation = params.get('propagation', True) filter_ref_features = params.get('filter_ref_features', False) statistical_significance = params.get('statistical_significance', 'left_tailed') ignore_go_term_not_in_feature_set = params.get( 'ignore_go_term_not_in_feature_set', True) result_directory = os.path.join(self.scratch, str(uuid.uuid4())) self._mkdir_p(result_directory) feature_set_ids, genome_ref = self._process_feature_set( params.get('feature_set_ref')) (feature_id_go_id_list_map, go_id_feature_id_list_map, go_id_go_term_map, feature_id_feature_info_map ) = self._get_go_maps_from_genome(genome_ref) if filter_ref_features: log('start filtering featrues with no term') feature_ids = [] for feature_id, go_ids in feature_id_go_id_list_map.iteritems(): if isinstance(go_ids, list): feature_ids.append(feature_id) else: feature_ids = feature_id_go_id_list_map.keys() ontology_hash = dict() ontologies = self.ws.get_objects([{ 'workspace': 'KBaseOntology', 'name': 'gene_ontology' }, { 'workspace': 'KBaseOntology', 'name': 'plant_ontology' }]) ontology_hash.update(ontologies[0]['data']['term_hash']) ontology_hash.update(ontologies[1]['data']['term_hash']) if propagation: go_id_parent_ids_map = self._generate_parent_child_map( ontology_hash, go_id_go_term_map.keys(), regulates_relationship=False) else: go_id_parent_ids_map = {} for go_id in go_id_go_term_map.keys(): go_id_parent_ids_map.update({go_id: []}) log('including parents to feature id map') for go_id, parent_ids in go_id_parent_ids_map.iteritems(): mapped_features = go_id_feature_id_list_map.get(go_id) for parent_id in parent_ids: parent_mapped_features = go_id_feature_id_list_map.get( parent_id) if not parent_mapped_features: parent_mapped_features = [] if mapped_features: parent_mapped_features += mapped_features go_id_feature_id_list_map.update( {parent_id: list(set(parent_mapped_features))}) log('start calculating p-values') enrichment_map = {} go_info_map = {} all_raw_p_value = [] pos = 0 for go_id, go_term in go_id_go_term_map.iteritems(): mapped_features = go_id_feature_id_list_map.get(go_id) # in feature_set matches go_id a = len(set(mapped_features).intersection(feature_set_ids)) # ignore go term analysis if not associated with FeatureSet if ignore_go_term_not_in_feature_set and a == 0: continue # in feature_set doesn't match go_id b = len(feature_set_ids) - a # not in feature_set matches go_id c = len(mapped_features) - a # not in feature_set doesn't match go_id d = len(feature_ids) - len(feature_set_ids) - c fisher_value = fisher.pvalue(a, b, c, d) if statistical_significance == 'left_tailed': raw_p_value = self._round(fisher_value.left_tail) elif statistical_significance == 'right_tailed': raw_p_value = self._round(fisher_value.right_tail) elif statistical_significance == 'two_tailed': raw_p_value = self._round(fisher_value.two_tail) else: raise ValueError('Improper statistical_significance value') all_raw_p_value.append(raw_p_value) go_info_map.update({ go_id: { 'raw_p_value': raw_p_value, 'num_in_ref_genome': len(mapped_features), 'num_in_subset_feature_set': a, 'pos': pos, 'mapped_features': mapped_features } }) pos += 1 stats = importr('stats') adjusted_p_values = stats.p_adjust(FloatVector(all_raw_p_value), method='fdr') for go_id, go_info in go_info_map.iteritems(): if go_id not in ontology_hash: continue adjusted_p_value = self._round( adjusted_p_values[go_info.get('pos')]) namespace = ontology_hash[go_id]['namespace'] enrichment_map.update({ go_id: { 'raw_p_value': go_info.get('raw_p_value'), 'adjusted_p_value': adjusted_p_value, 'num_in_ref_genome': go_info.get('num_in_ref_genome'), 'num_in_subset_feature_set': go_info.get('num_in_subset_feature_set'), 'go_term': go_id_go_term_map.get(go_id), 'namespace': namespace.split("_")[1][0].upper(), 'mapped_features': go_info.get('mapped_features') } }) returnVal = {'result_directory': result_directory} report_output = self._generate_report(enrichment_map, result_directory, params.get('workspace_name'), feature_id_go_id_list_map, feature_set_ids, genome_ref, go_id_parent_ids_map, feature_ids) returnVal.update(report_output) return returnVal
from rpy2.robjects.packages import importr # Usando importr, importamos paquetes de R que van a funcionar algo # así como módulos de Python #%% ## EJECUTAR ESTO si no tienen instalado el paquete igraph (para instalarlo) ## import rpy2's package module ## select a mirror for R packages #utils = importr('utils') #utils.chooseCRANmirror(ind=2) # elijo de dónde descargar el paquete ## Instalo #from rpy2.robjects.vectors import StrVector #utils.install_packages(StrVector(['igraph'])) #%% # Realizo el ajuste de la powerlaw igraph = importr('igraph') # Creamos un vector de R pasándole los degrees degrees_r = ro.FloatVector(degrees) # Documentación de fit_power_law: # https://rdrr.io/cran/igraph/man/fit_power_law.html resultado = igraph.fit_power_law(degrees_r, implementation='plfit') print(resultado.r_repr()) #%% # Graficamos histograma + ajuste kmin = resultado.rx2('xmin')[0] gamma = resultado.rx2('alpha')[0] ksp = resultado.rx2('KS.p')[0] from scipy.special import zeta
chrom_dict = gdb.get_chromosome_dict() sys.stderr.write("reading transcripts\n") trs = genome.transcript.read_transcripts(tr_path, chrom_dict) tr_dict = dict([(tr.name, tr) for tr in trs]) for tr_name in tr_names: if tr_name not in tr_dict: sys.stderr.write("WARNING: could not find transcript %s\n" % tr_name) continue tr = tr_dict[tr_name] r = robjects.r grdevices = importr('grDevices') output_format = "pdf" output_filename = "%s.%s" % (tr_name, output_format) width = 8 height = 5 sys.stderr.write("drawing transcript (filename=%s)\n" % output_filename) grdevices.pdf(file=output_filename, width=width, height=height) region = tr options = { 'color': "#08306B", 'utr_color': '#DEEBF7', 'border': 'false',
import numpy as np from genieclust.genie import * from genieclust.inequity import * from genieclust.compare_partitions import * import time import gc, os import scipy.spatial.distance from rpy2.robjects.packages import importr stats = importr("stats") genie = importr("genie") genieclustr = importr("genieclust") import numpy as np import rpy2.robjects.numpy2ri rpy2.robjects.numpy2ri.activate() verbose = False np.random.seed(123) n = 50000 d = 69 X = np.random.normal(size=(n, d)) labels = np.random.choice(np.r_[1, 2, 3, 4, 5, 6, 7, 8], n) k = len(np.unique(labels[labels >= 0])) # center X + scale (NOT: standardize!) X = (X - X.mean(axis=0)) / X.std(axis=None, ddof=1) g = 0.3 metric = "euclidean" print("n=%d, d=%d, g=%.2f, k=%d" % (n, d, g, k))
def art_two_way_anova(dataDF): """ Performs Aligned rank transform of the data followed by two-way Anova. Ref: Wobbrock, J.O., Findlater, L., Gergle, D. and Higgins, J.J. (2011). "The Aligned Rank Transform for nonparametric factorial analyses using only ANOVA procedures." Proceedings of the ACM Conference on Human Factors in Computing Systems (CHI '11). doi: 10.1145/1978942.1978963 :param dataDF: pandas.DataFrame with the following column ordering: Column 1. Factor 1 Column 2. Factor 2 Column 3. Measurements :return: correctness_ART, pVal_tuple correctness_ART: bool, correctness of ART procedure (see section "Ensuring Correctness" of the reference paper) pVal_tuple: tuple, has three members -- pVal for effect of Factor 1, pVal for effect of Factor 2, pVal for the interaction effect between Factor 1 and Factor 2. """ assert type( dataDF ) is pd.DataFrame, "Input <dataDF> is not a pandas DataFrame as expected" assert type(dataDF.shape[1] == 3), "The number of columns in <dataDF> is not 4 as expected" ART_IPDF_r = robjects.DataFrame({ "f1": robjects.FactorVector(dataDF.iloc[:, 0]), "f2": robjects.FactorVector(dataDF.iloc[:, 1]), "m": robjects.FloatVector(dataDF.iloc[:, 2]) }) rUtils = rpackages.importr("utils") rUtils.chooseCRANmirror(ind=1) try: rpackages.importr("ARTool") except RRuntimeError as re: if str(re).find("Error in loadNamespace") >= 0: print( "Insatalling package \"ARTool\" in the embedded R. This might take a while" ) rUtils.install_packages(robjects.StrVector(["ARTool"])) rpackages.importr("ARTool") else: raise re ARTFunc_r = robjects.r["art"] modelFormula = robjects.Formula("m~f1*f2") ART_OPDF = ARTFunc_r(modelFormula, data=ART_IPDF_r) rsummary = robjects.r["summary"] try: ART_OP_SUM = rsummary(ART_OPDF) except RRuntimeError as re: if str(re).find("Error in Anova.lm") >= 0: return 1, (np.nan, np.nan, np.nan) else: raise (re) columnSums = np.array(ART_OP_SUM[10]) fVal_aligned_anova = np.array(ART_OP_SUM[11][4]) ART_success = np.allclose(columnSums, 0) and np.allclose( fVal_aligned_anova, 0) ranova = robjects.r("anova") ART_res = ranova(ART_OPDF) return ART_success, tuple(ART_res[6])
import rpy2 import numpy as np import rpy2.robjects as robjects from rpy2.robjects.packages import importr from rpy2.robjects import FloatVector as FV from rpy2.robjects import numpy2ri numpy2ri.activate() from numpy import asarray as npa import numbers mvtnorm = importr('mvtnorm') r''' We require the R package `mvtnorm` to compute certain high dimensional expectations involving erfs, by reducing such expectations to Gaussian orthant probabilities and evaluating them with `mvtnorm`. ''' def Eerf2(mu, var): r''' Computes E[erf(x)^2: x ~ N(mu, var)] when mu and var are scalars, or E[erf(x) erf(y): (x, y) ~ N(mu, var)] when mu is a length-2 vector and var is a 2x2 matrix Example: >>> Eerf2([0, 0], [[1, 1], [1, 1]]) 0.4645590543975399 Inputs: mu: scalar or length-2 vector
import rpy2.rinterface as rinterface from rpy2.robjects import r, pandas2ri from rpy2.robjects.methods import RS4 from rpy2.robjects.packages import importr from scipy.sparse import csr_matrix import os import pandas import numpy import argparse import warnings warnings.filterwarnings("ignore") pandas2ri.activate() SingleCellExperimentInterface = importr('SingleCellExperiment') SummarizedExperimentInterface = importr('SummarizedExperiment') BiocGenericsInterface = importr('BiocGenerics') MatrixInterface = importr('Matrix') class SingleCellExperiment(RS4): @classmethod def fromRData(sce_class, rdata): rs4_object = r.readRDS(rdata) sce = sce_class.fromRS4(rs4_object) sce.rs4 = rs4_object return sce def asSummarizedExperiment(self): data = robjects.r["SummarizedExperiment"](self.rs4)
def extract_dataframe_from_R(dataframe_name): temp = pandas2ri.ri2py(r(dataframe_name)) temp_rows = pandas2ri.ri2py(r("rownames(" + dataframe_name + ")")) temp_cols = np.float32( pandas2ri.ri2py(r("colnames(" + dataframe_name + ")"))) df = pd.DataFrame(data=temp, columns=temp_cols, index=temp_rows) return df mpl.use("Agg") mpl.rcParams['pdf.fonttype'] = 42 # mpl.style.use('ggplot') R = rpy2.robjects.r DTW = importr('dtw') DTWclust = importr('dtwclust') # Load data sets in R r("""load("/scratch/PI/mcovert/dvanva/sequencing/all_cells_scde_fit_linear.RData")""" ) r("""load("/scratch/PI/mcovert/dvanva/sequencing/counts_data.RData")""") # Load pickle file with cell objects direc = '/scratch/PI/mcovert/dvanva/sequencing/' all_cell_file = 'all_cells_qc_complete.pkl' all_cells_total = pickle.load(open(os.path.join(direc, all_cell_file))) # Determine which genes to look at inflammatory_genes = [ "Cxcl3", "Cxcl2", "Lif", "Ccl4", "Csf3", "Il1f9", "Ccl3", "Ccl5", "Tnf",
### # Create overlap overlap_lh = (scan1_lh[:] > 0) * (scan2_lh[:] > 0) overlap_rh = (scan1_rh[:] > 0) * (scan2_rh[:] > 0) ### ### # Plot overlap # Color bar cbar = load_colorbar(cbarfile) # Colors for the peaks colorspace = importr('colorspace') cols = np.array( robjects.r( 'rbind(col2rgb(rainbow_hcl(4, c=100, l=65, start=15)), rep(255, 4))')) cols = cols.T / 255 # Input surf_data = {"lh": overlap_lh, "rh": overlap_rh} ## Just maxima # Output oprefix = path.join(odir, "check_overlap_with_maxima") # Loop for hemi in hemis:
from itertools import groupby from gensim.models import Word2Vec import glob import math import itertools from sklearn.metrics import * import pandas as pd import csv np.random.seed(1234) r = robjects.r rpy2.robjects.numpy2ri.activate() #np.set_printoptions(threshold = 1e6) importr('genlasso') importr('gsubfn') def perf_measure(y_true, y_pred): TP_FN = np.count_nonzero(y_true) FP_TN = y_true.shape[0] * y_true.shape[1] - TP_FN FN = np.where((y_true - y_pred) == 1)[0].shape[0] TP = TP_FN - FN FP = np.count_nonzero(y_pred) - TP TN = FP_TN - FP Precision = float(float(TP) / float(TP + FP + 1e-9)) Recall = float(float(TP) / float((TP + FN + 1e-9))) accuracy = float(float(TP + TN) / float((TP_FN + FP_TN + 1e-9))) F1 = 2*((Precision * Recall) / (Precision + Recall)) return Precision, Recall, accuracy
b_1 = cur_param[J:(2*J)] W_2 = cur_param[(2*J):(2*J+J*K)] b_2 = cur_param[(2*J+J*K):(2*J+J*K+K)] W_3 = cur_param[(2*J+J*K+K):(2*J+J*K+2*K)] b_3 = cur_param[2*J+J*K+2*K] nn = neural_net(X,W_1,b_1,W_2,b_2,W_3,b_3) est_q[i,] = np.reshape((nn+1)*(max_y-min_y)/2+min_y,(len(X),)) est_q = np.mean(est_q, axis = 0) plt.plot(X_s,y_s,'k.') plt.plot(X_s,q_true,'r-',label='True') plt.plot(X_s,est_q,'b-',label='Estimate') plt.legend() utils = importr("utils") utils.install_packages("MASS") MASS = importr("MASS") motor = data(MASS).fetch('mcycle')['mcycle'] motor = pandas2ri.ri2py(motor) plt.plot('times', 'accel', '.', data = motor) plt.xlabel('Time') plt.ylabel('Acceleration') X = np.array(motor['times']) X = np.reshape(X,(len(X),1)) y = np.array(motor['accel']) y = np.reshape(y,(len(y),1)) max_X = np.max(X)
def run(self, data, regression, resources=None): """ The method prints out summary of the BMA procedure and creates an imageplot. If resources has an entry 'bma_imageplot_filename', the imageplot is sent to this file as pdf. The method does not return any useful results - it is a tool for variable selection. Once you selected your variables, use estimate_linear_regression for further usage of the coefficients. Expects an entry 'outcome' in resources that provides the values of the dependent variable. 'data' is a 2D numpy array of the actual data (nobservations x ncoefficients), it can be created by Dataset.create_regression_data_for_estimation(...). 'regression' is an instance of a regression class. """ r = robjects.r if data.ndim < 2: raise StandardError, "Argument 'data' must be a 2D numpy array." nobs = data.shape[0] nvar = data.shape[1] constant_position = resources.get("constant_position", array([], dtype='int32')) #position for intercept if constant_position.size == 0: #position for intercept constant_position=-1 nvalues = nvar else: constant_position=constant_position[0] nvalues = nvar+1 beta = zeros(nvalues).astype(float32) coef_names = resources.get("coefficient_names", nvar*[]) data_for_r = {} for icoef in range(len(coef_names)): data_for_r[coef_names[icoef]] = robjects.FloatVector(data[:, icoef]) bma = importr("BMA") d = robjects.DataFrame(data_for_r) try: bma_params = {'x': d, 'y': robjects.FloatVector(resources["outcome"]), 'glm.family': "gaussian", 'strict':1} #fit = bma.bic_glm(x=d, y=resources["outcome"], glm_family="gaussian", strict=1) fit = bma.bic_glm(**bma_params) fit[20] = '' # to have less output in the summary r.summary(fit) estimates = array(fit[11]) standard_errors = array(fit[12]) postprob = concatenate((array([1]), array(fit[10])/100.)) # add intercept (always included, therefore 1) nmodels = fit[21][0] filename = resources.get('bma_imageplot_filename', None) subtitle = '' submodel = resources.get('submodel', -2) if resources > -2: subtitle = "Submodel: %s" % submodel plot_params = {'bma.out':fit, 'cex.axis':0.7, 'sub': subtitle} if filename is not None: r.pdf(file=filename) bma.imageplot_bma(**plot_params) r['dev.off']() else: r.X11() bma.imageplot_bma(**plot_params) result = {"estimators":estimates, "standard_errors":standard_errors, "other_measures":{"post_probability": postprob, "t_statistic": zeros(estimates.size) # not applicable }, "other_info":{"nmodels": nmodels}} except: logger.log_warning("Error in BMA procedure.") result = {} return result
import scipy.cluster.hierarchy as sch from seq_functions import smFISH_cell, smFISH_stim_cell import rpy2 from rpy2.robjects.packages import importr import cPickle as pickle rpy2.robjects.numpy2ri.activate() import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt matplotlib.rcParams['pdf.fonttype'] = 42 """ Initialize R instances """ R = rpy2.robjects.r DTW = importr('dtw') DTWCLUST = importr('dtwclust') """ Load excel files """ direc = "/scratch/PI/mcovert/dvanva/sequencing/smFISH" file_name = os.path.join(direc, "12072015", "12072015_ExperMetadata.xlsx") data_0 = pd.read_excel(file_name, sheetname=0) file_name = os.path.join(direc, "12162015", "12162015_ExperMetadata.xlsx") data_1 = pd.read_excel(file_name, sheetname=0) """ Load cluster averages """
from utility import genofile_parser # genofile_parser import base64 import array import csv import itertools from base import data_set from base import trait as TRAIT from utility import helper_functions from utility.tools import locate from rpy2.robjects.packages import importr utils = importr("utils") r_library = ro.r["library"] # Map the library function r_options = ro.r["options"] # Map the options function r_write_table = ro.r["write.table"] # Map the write.table function r_head = ro.r["head"] # Map the head function r_load = ro.r["load"] # Map the head function r_colnames = ro.r["colnames"] # Map the colnames function r_list = ro.r["list"] # Map the list function r_c = ro.r["c"] # Map the c (combine) function r_print = ro.r["print"] # Map the print function r_seq = ro.r["seq"] # Map the rep (repeat) function class EPheWAS(object): def __init__(self):
def characterize(trk_in, hdr_in, normalize=True): ''' convert track tuple to adehabitatLT ltraj object return track(r.ltraj),segment(r.ltraj),active(py.str_list),linearity(py.str_list),fishing(py.str_list) ''' ##Embed trk normalization if normalize: print 'Normalize track to hourly bin.' if len(trk_in) < 50: print 'Track length < 50, abort characterization', len(trk_in) sta = ['Pending'] * len(trk_in) sgid = [vms_constants.null_val_str] * len(trk_in) return sta, sgid trk, hdr = vms_tools.vms_track_normalize(trk_in, hdr_in) print 'before normalize:', len(trk_in) print 'after normalize:', len(trk) # vms_visualize.plot_track(trk_in,hdr_in) # vms_visualize.plot_track(trk,hdr) else: trk = trk_in[:] hdr = hdr_in[:] lat_id = hdr.index('LATITUDE') lon_id = hdr.index('LONGITUDE') tno_id = hdr.index('TRANSMITTER_NO') pdt_id = hdr.index('REPORTDATE') # print trk[0] rlat = ro.FloatVector([float(i[lat_id]) for i in trk]) rlon = ro.FloatVector([float(i[lon_id]) for i in trk]) rtno = ro.StrVector([str(i[tno_id]) for i in trk]) rpdt = ro.StrVector([str(i[pdt_id]) for i in trk]) # print rpdt r.assign('y', rlat) r.assign('x', rlon) r.assign('tno', rtno) r.assign('pdt', rpdt) none_list = np.chararray(len(rlat), itemsize=15) none_list[:] = vms_constants.null_val_str #'NA' r('xy <- cbind(x,y)') # r('XY <- as.data.frame(xy)') ##todo: reproject XY to UTM zones designated by average longitude ##Thus enable velocity model to be defined by KM/h instead of deg/h ##Reference: https://stackoverflow.com/questions/7927863/location-data-format-for-adehabitat-package importr('rgdal') print 'mean(x)', r('mean(x)') print 'max(x)', r('max(x)') print 'min(x)', r('min(x)') r('utmz <- (floor((mean(x) + 180)/6) %% 60) + 1') #find utmz r('utmxy <- project (xy,paste("+proj=utm +zone=",toString(utmz)," ellps=WGS84"))' ) r('XY <- as.data.frame(utmxy)') importr('adehabitatLT') importr('miscTools') r('pdt <- as.POSIXct(strptime(pdt,"%Y%m%d%H%M%S"),"GMT")') r('trl <- as.ltraj(XY,id=tno,date=pdt)') # print r('trl') # r('plotltr(trl,"dist")') ##### ##Take care of the landing records, which will create NA angle and cause partmod.ltraj to fail. ##If none Active record in the trl, return trl and skip segmentation ##Add stationary detector to avoid landing being recognized as mod.1 '4926123 2014Apr.' # print r('trl') # vms_visualize.plot_track(trk,hdr) # return r('trl'),None sta = check_stationary(r('trl')) if 'Active' not in set(sta): print 'The track is completely stationary. Skip segmentation and classification.' # return consolidate_trl(r('trl'),r('trl'),sta=sta) sg = np.chararray(len(sta), itemsize=15) sg[:] = vms_constants.null_val_str return denormalize_status(trk_in, hdr_in, trk, hdr, sta, sg) # print sta # print set(sta) # float('a') #test K print 'Begin track segmentation and classification.' #check if there is stationary points #if true, the track needs to be splitted '''check stationary by looking for dist==0''' # r('length(which.ltraj(trl,"dist==0")[[1]])') #remove stationary records r('trlo <- trl') #backup original trl as trlo print 'Found Stationary:', 'Stationary' in sta print 'Found Landing:', 'Landing' in sta if ('Stationary' in sta) or ('Landing' in sta): print 'Remvoe stationary records' sta_idx = [ i + 1 for (i, j) in enumerate(sta) if j == 'Stationary' or j == 'Landing' ] r.assign('sta_idx', ro.IntVector(sta_idx)) r('trlf <- trl[[1]][-c(sta_idx),]' ) #filter active recs with check_stationary result print r( 'trl <- as.ltraj(trlf[,c("x","y")],trlf[,c("date")],id=id(trl))' ) #recast into ltraj # print 'paused after remove stationary records' # raw_input() trl_len = r('length(trl[[1]]$x)') print trl_len[0] if trl_len[0] < 50: print 'Active record less then 50, abort characterization.', trl_len[0] print 'The status is marked [Pending] for later run to pickup.' sta[:] = 'Pending' sg = np.chararray(len(sta), itemsize=15) sg[:] = vms_constants.null_val_str status, sgid = denormalize_status(trk_in, hdr_in, trk, hdr, sta, sg) return status, sgid # print 'force exit' # sys.exit(1) ##TODO: change velocity model from degree based to meter based r('tested.means <- seq(0,20000,length=10)') r('(limod <- as.list(paste("dnorm(dist, mean =",tested.means,", sd = 2000)")))' ) # r('tested.means <- seq(0, 0.25, length = 10)') # r('(limod <- as.list(paste("dnorm(dist, mean =",tested.means,", sd = 0.03)")))') r('mod <- modpartltraj(trl,limod)') r('bestmod <- bestpartmod(mod,Km=round(length(trl[[1]]$x)/5),plotit=FALSE)' ) r('k <- which.max(colMedians(bestmod$correction,na.rm=TRUE))' ) #require colMedians from miscTools package r('save(XY,trl,k,mod,limod,file="XY_trl_k_mod_limod.RData")') # r('trl[[1]]$rel.angle[is.na(trl[[1]]$rel.angle)]<-0') # r('trl[[1]]$abs.angle[is.na(trl[[1]]$abs.angle)]<-0') # print r('trl[[1]]') ## if only one segment is recognized, put it to pending for future merging # if r('k')[0]==1: # print 'Unable to split track, set to pending.' # sta[:] = 'Pending' # status = denormalize_status(trk_in,hdr_in,trk,hdr,sta) # return status while r('k')[0] > 0: try: if r('k')[0] == 1: print 'Unable to split track, set to pending.' sta[:] = 'Pending' sg = np.chararray(len(sta), itemsize=15) sg[:] = vms_constants.null_val_str status, sgid = denormalize_status(trk_in, hdr_in, trk, hdr, sta, sg) return status, sgid else: print r('pm <- partmod.ltraj(trl,k,mod,na.manage=c("locf"))') break except: r( 'k<-k-1' ) #if previious fails, very likely to be nparts(k) is overestimated by 1 print 'Trying k=', r('k')[0] continue # print r('pm <- partmod.ltraj(trl,k,mod,na.manage=c("locf"))') ## print r('pm <- partmod.ltraj(trl,k,mod,na.manage=c("prop.move","locf"))') # r('plot(pm)') # raw_input() # float('a') # vms_visualize.plot_track(trk,hdr) ##Check linearity ##Let consolidate_trl do the job # linear_list = check_linear(pm) # print 'consolidate_trail' status, sgid = consolidate_trl(r('trlo'), r('trl'), pm=r('pm'), sta=sta) # print 'len(status)',len(status) # return trk_in,hdr_in,trk,hdr,status # return r('trl'),r('pm'),sta,linear_list,fish_list # for i in range(0,len(status)): # print i+1,status[i] # print len(status),len(sgid) # for i in range(0,len(status)-1): # print [i,status[i],sgid[i]] # raw_input() if normalize: #Blow hourly bin back to original binning status_tmp = status[:] status, sgid = denormalize_status(trk_in, hdr_in, trk, hdr, status, sgid) # for i in range(0,len(status)-1): # print [i,status[i],sgid[i]] # raw_input() # sys.exit(1)# #if fishing segment touches landing, the segment will be changed to transit # print status glist = [list(j) for i, j in groupby(status)] gname = np.array([i for i, j in groupby(status)]) # print gname #do nothing if there is only one segment if len(gname) == 1: return status, sgid for gidx in list(np.where(gname == 'Fishing')[0]): # print gidx if gidx == 0: # print gname[gidx+1] # raw_input() if gname[gidx + 1] == 'Landing': glist[gidx][:] = ['Transit'] * len(glist[gidx][:]) elif gidx == len(gname) - 1: if gname[gidx - 1] == 'Landing': glist[gidx][:] = ['Transit'] * len(glist[gidx][:]) else: if gname[gidx + 1] == 'Landing' or gname[gidx - 1] == 'Landing': glist[gidx][:] = ['Transit'] * len(glist[gidx][:]) status = [i for x in glist for i in x] return status, sgid
import pandas as pd import numpy as np import statsmodels.api as sm import rpy2.robjects as robjects import rpy2.robjects.numpy2ri from rpy2.robjects import pandas2ri from scm_analytics import ScmAnalytics, Analytics from statsmodels.api import families from rpy2.robjects.packages import importr base = importr('base') stats = importr('stats') broom = importr('broom') pandas2ri.activate() robjects.numpy2ri.activate() # 129636, 38242 def surgery_usage_regression_df(surgery_df, usage_df, item_ids=[], case_cart_df=None, filters=[], common_events=True): surgery_df = Analytics.process_filters(surgery_df, filters) surgery_df = surgery_df.drop_duplicates(subset=["event_id"]) usage_df = Analytics.process_filters(usage_df, filters) if case_cart_df: case_cart_df = Analytics.process_filters(case_cart_df, filters)
os.environ['R_HOME']='C:\Program Files/R/R-3.4.2 import pandas as pd import rpy2 print(rpy2.__version__) from rpy2.robjects.packages import importr #Import R objects into python import rpy2.robjects.packages as rpackages import rpy2.robjects as ro from rpy2.robjects import pandas2ri #Activate pands to r translation pandas2ri.activate from rpy2.objectsvectors import DataFrame, StrVector, IntVector, ListVector base = importr('base') #Import R script in order to load R envinronment (Global Variables) Imported_R_Envinronment=ro.r('source("path_to_R_file.R")') #Load particular R function from recently loaded R envinronment as a Python instance function R_FUN_Loaded=ro.r('R') def DF_Python_To_R(df,Factor_Cols="Amount","AmountUSD"]): #Function which translates Python DataFrame into Python DataFrame #Inputs are: #-DataFrame wchich is going to to be tranlsted #-Columns names whcih feature StringAsFactors equals True #The purpose is to avoid issue with rpy2 translation, once StringAsFactors = True columns are turned values are turned into column names def df_Str(tab,col): return DataFrame({col:base.I(StrVector(tab[col]))})
def _exec_r_module(self): try: import rpy2.robjects from rpy2.robjects import numpy2ri from rpy2.robjects import pandas2ri from rpy2.robjects.packages import importr except ImportError: raise ImportError('R module cannot be run, because ' '"rpy2" package is not installed.') module_name = os.path.splitext(os.path.basename(self.source_file))[0] if os.path.exists(self.source_file): logger.debug('import module "%s" from source file: %s', self.source_file) logger.debug('source module: "%s"', self.source_file) rpy2.robjects.r('source("{0}")'.format(self.source_file)) module = rpy2.robjects.r[module_name] else: logger.debug('import module "%s" from "jtmodules" package') rpackage = importr('jtmodules') module = getattr(rpackage, module_name) version = module.get('VERSION')[0] if version != self.handles.version: raise PipelineRunError( 'Version of source and handles is not the same.') func = module.get('main') numpy2ri.activate() # enables use of numpy arrays pandas2ri.activate() # enable use of pandas data frames kwargs = self.keyword_arguments logger.debug('evaluate main() function with INPUTS: "%s"', '", "'.join(kwargs.keys())) # R doesn't have unsigned integer types for k, v in kwargs.iteritems(): if isinstance(v, np.ndarray): if v.dtype == np.uint16 or v.dtype == np.uint8: logging.debug( 'module "%s" input argument "%s": ' 'convert unsigned integer data type to integer', self.name, k) kwargs[k] = v.astype(int) elif isinstance(v, pd.DataFrame): # TODO: We may have to translate pandas data frames explicitly # into the R equivalent. # pandas2ri.py2ri(v) kwargs[k] = v args = rpy2.robjects.ListVector({k: v for k, v in kwargs.iteritems()}) base = importr('base') r_out = base.do_call(func, args) for handle in self.handles.output: # NOTE: R functions are supposed to return a list. Therefore # we can extract the output argument using rx2(). # The R equivalent would be indexing the list with "[[]]". if isinstance(r_out.rx2(handle.name), rpy2.robjects.vectors.DataFrame): handle.value = pandas2ri.ri2py(r_out.rx2(handle.name)) # handle.value = pd.DataFrame(r_out.rx2(handle.name)) else: # NOTE: R doesn't have an unsigned integer data type. # So we cast to uint16. handle.value = numpy2ri.ri2py(r_out.rx2(handle.name)).astype( np.uint16) # handle.value = np.array(r_out.rx2(handle.name), np.uint16) return self.handles.output
import pandas as pd from rpy2.robjects.packages import importr from rpy2.robjects import r as R utils = importr('rsm') print(utils.__rdata__) print(type(R('pi')[0])) ChemReact = R['ChemReact'] print(ChemReact) print(type(ChemReact)) bbd = R['bbd'] print(bbd(3, 2)[5])
import numpy as np from rpy2.robjects import pandas2ri from rpy2.robjects.packages import importr r_qmap = importr('qmap') pandas2ri.activate() def do_qmap(obs, c_mod, p_mod=None, proj_adj_type='cdf', wet_day=False, verbose=True): """ Quantile mapping Arguments: obs {ndarray} -- observed time series c_mod {ndarray} -- model data for the reference period Keyword Arguments: p_mod {ndarray} -- model data for the future scenario (default: {None}) proj_adj_type {str} -- type of adjustment to be applied to the future scenario -- 'cdf' quantile mapping (default) -- 'edcdf' Equidistant CDF; Li et al. (2010) -- 'dqm' Detrended QM; Cannon et al. (2015) -- 'qdm' Quantile Delta Mapping; Cannon et al. (2015) wet_day {bool} -- indicating whether to perform wet day correction or not {float} -- threshold below which all values are set to zero
import tensorflow as tf import keras.backend as K from mnist import * from cifar10 import load_data, set_flags, load_model from attack_utils import gen_grad from tf_utils_adv import batch_eval from os.path import basename import numpy as np from rpy2.robjects.packages import importr from rpy2.robjects import FloatVector from tensorflow.python.platform import flags FLAGS = flags.FLAGS multici = importr("MultinomialCI") def isRobust(prob, sd, epsilon): fv = FloatVector(sorted(prob)[::-1]) ci = np.array(multici.multinomialCI(fv, 0.05)) qi = ci[0, 0] qj = ci[1, 1] alpha = np.linspace(1.01, 2.0, 100) # pdb.set_trace() bound = (-np.log(1 - qi - qj + 2 * ( (qi**(1 - alpha) + qj**(1 - alpha)) / 2)**(1 / (1 - alpha))) / alpha).max() # return np.sqrt(bound*2.*sd**2) if bound > epsilon**(2.) / 2. / sd**(2.): return np.array([True, np.sqrt(bound * 2.) * sd])