def get_data(numgroups): with localconverter(ro.default_converter + pandas2ri.converter): if numgroups == 2: r.source('~/Documents/rscripts/splatter-2.R') elif numgroups == 6: r.source('~/Documents/rscripts/splatter-6.R') counts = r2py(r['counts']) # cell-by-gene dataframe cellinfo = r2py(r['cellinfo']) # Cell, Batch, Group geneinfo = r2py(r['geneinfo']) # Gene sim = sc.AnnData(counts.values, obs=cellinfo, var=geneinfo) sim.obs_names = cellinfo.Cell sim.var_names = geneinfo.Gene if numgroups == 2: sc.pp.filter_genes( sim, min_counts=1 ) # omitted in 6 case so we can generalize to diff dropout %s truecounts = r2py(r['truecounts']) dropout = r2py(r['dropout']) print("percent dropout: {}".format( np.sum(dropout.values) / (sim.n_obs * sim.n_vars))) sim_true = sc.AnnData(truecounts.values, obs=cellinfo, var=geneinfo) sim_true.obs_names = cellinfo.Cell sim_true.var_names = geneinfo.Gene sim_true = sim_true[:, sim.var_names] return [sim, sim_true]
def stage2_calculate(): print 'Execute jpsurvRest/stage2_calculate' print 'Yes, yes, yes...' print print(OKGREEN + UNDERLINE + BOLD + "****** Stage 2: CALCULATE BUTTON ***** " + ENDC) jpsurvDataString = request.args.get('jpsurvData', False) jpsurvDataString = fix_jpsurv(jpsurvDataString) print(BOLD + "**** jpsurvDataString ****" + ENDC) print(jpsurvDataString) print(OKBLUE + "The jpsurv STRING::::::" + ENDC) print(jpsurvDataString) jpsurvData = json.loads(jpsurvDataString) print(BOLD + "**** jpsurvData ****" + ENDC) for key, value in jpsurvData.iteritems(): print("var: %s = %s" % (key, value)) print("var: %s = %s" % (key, value)) #Init the R Source r.source('./JPSurvWrapper.R') print(BOLD + "**** Calling getFittedResultsWrapper ****" + ENDC) r.getFittedResultWrapper(UPLOAD_DIR, jpsurvDataString) status = '{"status":"OK"}' mimetype = 'application/json' out_json = json.dumps(status) return current_app.response_class(out_json, mimetype=mimetype)
def calc_norm_factors(counts_df, method): assert method in ['TMM', 'TMMwsp', 'RLE', 'upperquartile'] from rpy2.robjects import numpy2ri, pandas2ri, r numpy2ri.activate() pandas2ri.activate() r.source(os.path.join('workflow','scripts','utils_calcNormFactors.R')) return r.calcNormFactors(counts_df, method=method)
def compute_degs(dataframe, method, samples, controls, constant_threshold=10, filter_low_expressed=False, min_counts=10): # Filter lowly expressed genes if filter_low_expressed: dataframe = dataframe.loc[[index for index, value in dataframe.sum(axis=1).iteritems() if value > min_counts]] print(dataframe.shape) # Connect to R r.source('scripts/code_library.R') pandas2ri.activate() # Create design dict sample_dict = {'samples': samples, 'controls': controls} # Create design dataframe design_dataframe = pd.DataFrame({group_label: {sample:int(sample in group_samples) for sample in dataframe.columns} for group_label, group_samples in sample_dict.items()}) # Convert to R dataframe_r = pandas2ri.py2ri(dataframe) design_dataframe_r = pandas2ri.py2ri(design_dataframe) # Run if method == 'CD': signature_dataframe_r = r.apply_characteristic_direction(dataframe_r, design_dataframe_r, constant_threshold) elif method == 'limma': signature_dataframe_r = r.apply_limma(dataframe_r, design_dataframe_r) else: raise ValueError('Wrong method supplied. Must be limma or CD.') # Convert to pandas and sort signature_dataframe = pandas2ri.ri2py(signature_dataframe_r) # Return return signature_dataframe
def brt(fname, species_name, gbm_opts, weights): """ Takes the name of a CSV file containing a data frame and a dict of options for gbm.step, runs gbm.step, and returns the results. """ from rpy2.robjects import r import anopheles_brt r.source(os.path.join(anopheles_brt.__path__[0],'brt.functions.R')) heads = file(os.path.join('anopheles-caches',fname)).readline().split(',') weight_str = str(weights.tolist()).replace('[','c(').replace(']',')') base_argstr = 'data=read.csv("anopheles-caches/%s"), gbm.x=2:%i, gbm.y=1, family="bernoulli", site.weights=%s, silent=TRUE'%(fname, len(heads), weight_str) opt_argstr = ', '.join([base_argstr] + map(lambda t: '%s=%s'%t, gbm_opts.iteritems())) varname = sanitize_species_name(species_name) brt_fname = hashlib.sha1(opt_argstr).hexdigest()+'.r' if brt_fname in os.listdir('anopheles-caches'): r('load')(os.path.join('anopheles-caches', brt_fname)) return r(varname) else: r('%s<-gbm.step(%s)'%(varname,opt_argstr)) if str(r(varname))=='NULL': raise ValueError, 'gbm.step returned NULL' r('save(%s, file="%s")'%(varname,os.path.join('anopheles-caches', brt_fname))) return r(varname)
def compute_signature(rawcount_dataframe, method, experimental_samples, control_samples): # Connect to R r.source('scripts/signature.R') pandas2ri.activate() # Create design dict sample_dict = {'experimental': experimental_samples, 'control': control_samples} # Create design dataframe design_dataframe = pd.DataFrame({group_label: {sample:int(sample in group_samples) for sample in rawcount_dataframe.columns} for group_label, group_samples in sample_dict.iteritems()}) # Convert to R rawcount_dataframe_r = pandas2ri.py2ri(rawcount_dataframe) design_dataframe_r = pandas2ri.py2ri(design_dataframe) # Run if method == 'CD': signature_dataframe_r = r.run_characteristic_direction(rawcount_dataframe_r, design_dataframe_r) elif method == 'limma': signature_dataframe_r = r.run_limma(rawcount_dataframe_r, design_dataframe_r) else: raise ValueError('Wrong method supplied. Must be limma or CD.') # Convert to pandas and sort signature_dataframe = pandas2ri.ri2py(signature_dataframe_r) # Add return signature_dataframe
def load_active_driver(local_ad=True): if local_ad: r.source("ActiveDriver/R/ActiveDriver.R") # ActiveDriver is in the global namespace now return r else: return importr("ActiveDriver")
def ComBat(X, batch, covariate=None, parametric=False, empirical_bayes=True, save_dir=None): # Check X if not isinstance(X, (pd.DataFrame, pd.Series)): if isinstance(X, (list, tuple, np.ndarray, Mapping)): df = pd.DataFrame(X) else: raise TypeError('X must be an array-like object, dictionary or pandas Dataframe/Series') else: df = X row_names = df.index r_df = pandas2ri.py2ri(df) # Check covariate if covariate is None: covariate = np.ones((len(batch), 1)) else: if not isinstance(covariate, (list, tuple, np.ndarray)): if isinstance(covariate, pd.DataFrame) or isinstance(covariate, pd.Series): covariate = covariate.to_numpy() else: raise TypeError('covariate array must be an array like or pandas Dataframe/Series') else: covariate = np.array(covariate) if len(covariate.shape) == 1: covariate = covariate.reshape(-1, 1) elif len(covariate.shape) > 2: raise ValueError('covariate array must be 1D or 2D') nr, nc = covariate.shape r_covariate = r.matrix(covariate, nrow=nr, ncol=nc) # Check batch if not isinstance(batch, (list, tuple, np.ndarray)): if isinstance(batch, pd.DataFrame) or isinstance(batch, pd.Series): batch = batch.to_numpy() else: raise TypeError('batch array must be an array like or pandas Dataframe/Series') else: batch = np.array(batch) if len(batch.shape) != 1: if len(batch.shape) == 2 and batch.shape[1] == 1: batch.reshape(-1) else: raise ValueError('batch array must be 1D or 2D with second dimension equal to 1') if len(np.unique(batch)) <= 1: raise ValueError('batch array must have at least 2 classes') r_batch = Vector(batch) # cwd = os.path.dirname(sys.argv[0]) cwd = os.path.dirname(os.path.abspath(__file__)) r.setwd(cwd) # r.source('./Statistical_analysis/R_scripts/ComBat.R') r.source('./R_scripts/ComBat.R') r_dr_results = r.ComBat_harmonization(r_df, r_covariate, r_batch, parametric, empirical_bayes) R_object_dict = {} keys = r_dr_results.names for i in range(len(keys)): R_object_dict[keys[i]] = np.array(r_dr_results[i]) results = pd.DataFrame(R_object_dict) results.index = row_names if save_dir is not None: results.to_excel(os.path.join(save_dir, 'Features_ComBat.xlsx')) return results
def plotIDR(output_file, input_prefixes): '''create IDR plots. This code is taken from the R script batch-consistency-plot.r within the IDR package. ''' dirname = os.path.dirname(__file__) R.source(os.path.join(dirname, "WrapperIDR.r")) R('''df.txt = 10''') R('''uri.list <- list() uri.list.match <- list() ez.list <- list() legend.txt <- c() em.output.list <- list() uri.output.list <- list()''') npair = len(input_prefixes) for x, input_prefix in enumerate(input_prefixes): R.load(input_prefix + "-uri.sav") R.load(input_prefix + "-em.sav") i = x + 1 R('''uri.output.list[[%(i)i]] <- uri.output; em.output.list[[%(i)i]] <- em.output; # reverse =T for error rate;''' % locals()) R(''' ez.list[[%(i)i]] <- get.ez.tt.all(em.output, uri.output.list[[%(i)i]]$data12.enrich$merge1, uri.output.list[[%(i)i]]$data12.enrich$merge2);''' % locals()) R(''' # URI for all peaks uri.list[[%(i)i]] <- uri.output$uri.n; # URI for matched peaks uri.match <- get.uri.matched(em.output$data.pruned, df=df.txt); uri.list.match[[%(i)i]] <- uri.match$uri.n; ''' % locals()) legend = "%(i)i = %(input_prefix)s" % locals() R(''' legend.txt[%(i)i] <- '%(legend)s'; ''' % locals()) R.pdf(output_file) R('''par(mfcol=c(2,3), mar=c(5,6,4,2)+0.1)''') R('''plot.uri.group(uri.list, NULL, file.name=NULL, c(1:%(npair)i), title.txt="all peaks"); plot.uri.group(uri.list.match, NULL, file.name=NULL, c(1:%(npair)i), title.txt="matched peaks"); plot.ez.group(ez.list, plot.dir=NULL, file.name=NULL, legend.txt=c(1:%(npair)i), y.lim=c(0, 0.6)); plot(0, 1, type="n", xlim=c(0,1), ylim=c(0,1), xlab="", ylab="", xaxt="n", yaxt="n"); legend(0, 1, legend.txt, cex=0.6);''' % locals()) R["dev.off"]()
def plotIDR( output_file, input_prefixes ): '''create IDR plots. This code is taken from the R script batch-consistency-plot.r within the IDR package. ''' dirname = os.path.dirname(__file__) R.source(os.path.join( dirname, "WrapperIDR.r")) R('''df.txt = 10''') R('''uri.list <- list() uri.list.match <- list() ez.list <- list() legend.txt <- c() em.output.list <- list() uri.output.list <- list()''') npair = len(input_prefixes) for x, input_prefix in enumerate(input_prefixes): R.load( input_prefix + "-uri.sav" ) R.load( input_prefix + "-em.sav" ) i = x + 1 R( '''uri.output.list[[%(i)i]] <- uri.output; em.output.list[[%(i)i]] <- em.output; # reverse =T for error rate;''' % locals()) R(''' ez.list[[%(i)i]] <- get.ez.tt.all(em.output, uri.output.list[[%(i)i]]$data12.enrich$merge1, uri.output.list[[%(i)i]]$data12.enrich$merge2);''' % locals()) R(''' # URI for all peaks uri.list[[%(i)i]] <- uri.output$uri.n; # URI for matched peaks uri.match <- get.uri.matched(em.output$data.pruned, df=df.txt); uri.list.match[[%(i)i]] <- uri.match$uri.n; ''' % locals() ) legend = "%(i)i = %(input_prefix)s" % locals() R(''' legend.txt[%(i)i] <- '%(legend)s'; '''% locals()) R.pdf( output_file ) R('''par(mfcol=c(2,3), mar=c(5,6,4,2)+0.1)''') R('''plot.uri.group(uri.list, NULL, file.name=NULL, c(1:%(npair)i), title.txt="all peaks"); plot.uri.group(uri.list.match, NULL, file.name=NULL, c(1:%(npair)i), title.txt="matched peaks"); plot.ez.group(ez.list, plot.dir=NULL, file.name=NULL, legend.txt=c(1:%(npair)i), y.lim=c(0, 0.6)); plot(0, 1, type="n", xlim=c(0,1), ylim=c(0,1), xlab="", ylab="", xaxt="n", yaxt="n"); legend(0, 1, legend.txt, cex=0.6);''' % locals()) R["dev.off"]()
def load_predict_func(file_path): """Load Predict Function""" LOG.info("Loading predict function from rds file {}".format(file_path)) try: r.source("../mlfmodelserver/deserialize_model.R") return r.get(r.deserialize_model(file_path)) except Exception as generic_exception: LOG.error( "Exception occured while unpickling {}".format(generic_exception)) raise generic_exception
def init_topGO(): try: topGO = importr("topGO") except: print ("It looks like topGO is not installed. Trying to install topGO via" "Bioconductor...") try: R.source("http://bioconductor.org/biocLite.R") R.biocLite("topGO") topGO = importr("topGO") except: print "Problem installing topGO from Bioconductor!" print ("Please install manually from: " "http://www.bioconductor.org/packages/2.13/bioc/html/topGO.html") return topGO
def apply_voom(dataframe): # Connect to R r.source('scripts/code_library.R') pandas2ri.activate() # Convert to R dataframe_r = pandas2ri.py2ri(dataframe) # Run signature_dataframe_r = r.apply_voom(dataframe_r) # Convert to pandas and sort signature_dataframe = pandas2ri.ri2py(signature_dataframe_r) # Return return signature_dataframe
def r(code=None, path=None, rel=True, conda=True, convert=True, repo='https://cran.microsoft.com/', **kwargs): ''' Runs the R script and returns the result. :arg str code: R code to execute. :arg str path: R script path. Cannot be used if code is specified :arg bool rel: True treats path as relative to the caller function's file :arg bool conda: True overrides R_HOME to use the Conda R :arg bool convert: True converts R objects to Pandas and vice versa :arg str repo: CRAN repo URL All other keyword arguments as passed as parameters ''' # Use Conda R if possible if conda: r_home = _conda_r_home() if r_home: os.environ['R_HOME'] = r_home # Import the global R session try: from rpy2.robjects import r, pandas2ri, globalenv except ImportError: app_log.error('rpy2 not installed. Run "conda install rpy2"') raise except RuntimeError: app_log.error('Cannot find R. Set R_HOME env variable') raise # Set a repo so that install.packages() need not ask for one r('local({r <- getOption("repos"); r["CRAN"] <- "%s"; options(repos = r)})' % repo) # Activate or de-activate automatic conversion # https://pandas.pydata.org/pandas-docs/version/0.22.0/r_interface.html if convert: pandas2ri.activate() else: pandas2ri.deactivate() # Pass all other kwargs as global environment variables for key, val in kwargs.items(): globalenv[key] = val if code and path: raise RuntimeError('Use r(code=) or r(path=...), not both') if path: # if rel=True, load path relative to parent directory if rel: stack = inspect.getouterframes(inspect.currentframe(), 2) folder = os.path.dirname(os.path.abspath(stack[1][1])) path = os.path.join(folder, path) result = r.source(path, chdir=True) # source() returns a withVisible: $value and $visible. Use only the first result = result[0] else: result = r(code) return result
def _process(self, *args, **kwargs): with localconverter(default_converter + pandas2ri.converter): globalenv['get_occurrence_dataframe'] = \ self.get_occurrence_dataframe globalenv['get_plot_dataframe'] = self.get_plot_dataframe globalenv['get_plot_occurrence_dataframe'] = \ self.get_plot_occurrence_dataframe globalenv['get_taxon_dataframe'] = self.get_taxon_dataframe globalenv['get_raster'] = self.get_raster r.source(self.r_script_path) process_func = r['process'] df = pandas2ri.ri2py(process_func()) if isinstance(df, pd.DataFrame): return int32_to_int64(fill_str_empty_with_nan(df)), [], {} if len(df) == 1: return df[0], [], {} return df, [], {}
def init_qvalue(): global __qvalue if __qvalue is None: try: print "Importing qvalue ..." qvalue = importr("qvalue") except: print ("It looks like qvalue is not installed. Trying to install qvalue via" "Bioconductor...") try: R.source("http://bioconductor.org/biocLite.R") R.biocLite("qvalue") qvalue = importr("qvalue") except: print "Problem installing qvalue from Bioconductor!" print ("Please install manually from: " "http://www.bioconductor.org/packages/release/bioc/html/qvalue.html") __qvalue = qvalue
def init_topGO(): global __topGo if __topGo is None: try: print "Importing topGO ..." topGO = importr("topGO") except: print ("It looks like topGO is not installed. Trying to install topGO via" "Bioconductor...") try: R.source("http://bioconductor.org/biocLite.R") R.biocLite("topGO") topGO = importr("topGO") except: print "Problem installing topGO from Bioconductor!" print ("Please install manually from: " "http://www.bioconductor.org/packages/release/bioc/html/topGO.html") __topGo = topGO
def compute_prob(size_sample, prior_pg, r_read_file_name="20k_test_elbos.csv", prob_result_file="20k_test_prob_result.csv", working_folder="./"): # save args in a txt file for R script yo read # todo don't run compute_prob in parallel... filename.txt will be changed # names_file = prob_result_file[:-4] + '.txt' # save_path = 'filenames_for_r/' # if not os.path.exists(save_path): # os.makedirs(save_path) text_file = open('filename.txt', "w") text_file.write(r_read_file_name) text_file.write('\n') text_file.write(prob_result_file) text_file.write('\n') text_file.write(working_folder) text_file.write('\n') text_file.write(str(size_sample)) text_file.write('\n') text_file.write(str(prior_pg)) text_file.write('\n') text_file.close() # kernel and local ELBOs # training_results = "20k_test_elbos.pkl", # with open(training_results, "rb") as fin: # valid_ker = pickle.load(fin) # elbos = pickle.load(fin) # save as csv file # "kernels" "L_i" # data = {'kernels': valid_ker, # 'L_i': np.array(elbos).reshape(-1)} # print(data) # df = pd.DataFrame(data) # print(df) # df.to_csv(r_read_file_name, index=None) # compute probability r.setwd('~/BKS/src/R_bks') r.source('bks_run_global_python.R')
def hierarchical_clust_parmar(self, X, y=None): """ Consensus Clustering with hierarchical clustering as described in : Radiomic feature clusters and Prognostic Signatures specific for Lung and Head & Neck cancer. Parmar et al., Scientific Reports, 2015 """ df = pd.DataFrame(X) r_df = pandas2ri.py2ri(df) cwd = os.path.dirname(sys.argv[0]) r.setwd(cwd) r.source( './Statistical_analysis/R_scripts/hierarchical_clustering_Parmar.R' ) if self.cluster_reduction in self.cluster_reduction_methods: r_dr_results = r.hierarchical_clustering_parmar( r_df, max_k=20, threshold=1 - self.threshold, corr_metric=self.corr_metric, cluster_reduction=self.cluster_reduction) else: raise ValueError( 'cluster_reduction must be one of : %s. ' '%s was passed' % (self.cluster_reduction_methods, self.cluster_reduction)) R_object_dict = {} keys = r_dr_results.names for i in range(len(keys)): R_object_dict[keys[i]] = np.array(r_dr_results[i]) dr_results = pd.DataFrame(R_object_dict).to_numpy() self.cluster_labels = dr_results[:, 0] nb_cluster = np.amax(dr_results[:, 0]).astype(int) coefficient_matrix = np.zeros( (dr_results.shape[0], nb_cluster)) # Shape of (n_features, nb cluster) for i in range(nb_cluster): coefficient_matrix[:, i] = np.where(dr_results[:, 0] == i + 1, dr_results[:, 1], 0) coefficient_matrix = coefficient_matrix.T return coefficient_matrix
def run_cd(rawcount_dataframe, experimental_samples, control_samples, method, signature_name): # Connect to R r.source('/Users/denis/Documents/Projects/scripts/Scripts.R') pandas2ri.activate() # Create design dict sample_dict = { 'experimental': experimental_samples, 'control': control_samples } # Create design dataframe design_dataframe = pd.DataFrame({ group_label: { sample: int(sample in group_samples) for sample in rawcount_dataframe.columns } for group_label, group_samples in sample_dict.items() }) # Convert to R rawcount_dataframe_r = pandas2ri.py2ri(rawcount_dataframe) design_dataframe_r = pandas2ri.py2ri(design_dataframe) # Run cd_dataframe_r = r.run_characteristic_direction(rawcount_dataframe_r, design_dataframe_r) # Convert to pandas and sort cd_dataframe = pandas2ri.ri2py(cd_dataframe_r) # Add mean expression # signature_dataframe['AveExpr'] = rawcount_dataframe.loc[signature_dataframe.index].apply(np.average, axis=1) # Add return cd_dataframe
def stage4_trends_calculate(): print 'Go' print(OKGREEN + UNDERLINE + BOLD + "****** Stage 4: Trends BUTTON ***** " + ENDC) print("Recalculating ...") print(BOLD + "**** Calling getTrendsData ****" + ENDC) jpsurvDataString = request.args.get('jpsurvData', False) jpsurvDataString = fix_jpsurv(jpsurvDataString) #Init the R Source r.source('./JPSurvWrapper.R') # Next line execute the R Program r.getTrendsData(UPLOAD_DIR, jpsurvDataString) status = '{"status":"OK"}' mimetype = 'application/json' out_json = json.dumps(status) return current_app.response_class(out_json, mimetype=mimetype)
def init_biomaRt(): global __biomaRt global __mart if __biomaRt is None: try: print "Importing biomaRt ..." biomaRt = importr("biomaRt") except: print ("It looks like biomaRt is not installed. Trying to install biomaRt via" "Bioconductor...") try: R.source("http://bioconductor.org/biocLite.R") R.biocLite("biomaRt") biomaRt = importr("biomaRt") except: print "Problem installing biomaRt from Bioconductor!" print ("Please install manually from: " "http://www.bioconductor.org/packages/release/bioc/html/biomaRt.html") __biomaRt = biomaRt __mart = R.useMart(biomart = "ensembl", dataset = __mart_dataset)
def univariate_analysis(X, y, adjusted_method='BH', save_dir=None): if not isinstance(X, (pd.DataFrame, pd.Series)): if isinstance(X, (list, tuple, np.ndarray, Mapping)): if len(np.array(X).shape) != 2: raise ValueError('X array must 2D') X = pd.DataFrame(X) else: raise TypeError('X must be an array-like object, dictionary or pandas Dataframe/Series') if not isinstance(y, (list, tuple, np.ndarray)): if isinstance(y, pd.DataFrame) or isinstance(y, pd.Series): y = y.to_numpy() else: raise TypeError('y array must be an array like or pandas Dataframe/Series') else: y = np.array(y) if len(y.shape) != 1: if len(y.shape) == 2 and y.shape[1] == 1: y.reshape(-1) else: raise ValueError('y array must be 1D or 2D with second dimension equal to 1') if len(np.unique(y)) <= 1: raise ValueError('y array must have at least 2 classes') r_X = pandas2ri.py2ri(X) r_y = Vector(y) cwd = os.path.dirname(sys.argv[0]) r.setwd(cwd) r.source('./Statistical_analysis/R_scripts/univariate_analysis.R') r_dr_results = r.univariate_analysis(r_X, r_y, adjusted_method=adjusted_method) R_object_dict = {} keys = r_dr_results.names for i in range(len(keys)): R_object_dict[keys[i]] = np.array(r_dr_results[i]) results = pd.DataFrame(R_object_dict) if save_dir is not None: results.to_excel(os.path.join(save_dir, 'univariate_stats_analysis.xlsx')) return results
def main(): gn = Granatum() assay_df = gn.pandas_from_assay(gn.get_import('assay')) grdict = gn.get_import('groupVec') phe_dict = pd.Series(gn.get_import('groupVec')) groups = set(parse(gn.get_arg('groups'))) inv_map = {} for k, v in grdict.items(): if v in groups: inv_map[v] = inv_map.get(v, []) + [k] cells = [] for k, v in inv_map.items(): cells.extend(v) assay_df = assay_df.loc[:, cells] assay_df = assay_df.sparse.to_dense().fillna(0) #assay_mat = r['as.matrix'](pandas2ri.py2ri(assay_df)) # assay_mat = r['as.matrix'](conversion.py2rpy(assay_df)) phe_vec = phe_dict[assay_df.columns] r.source('./drive_DESeq2.R') ret_r = r['run_DESeq'](assay_df, phe_vec) ret_r_as_df = r['as.data.frame'](ret_r) # ret_py_df = pandas2ri.ri2py(ret_r_as_df) # TODO: maybe rename the columns to be more self-explanatory? result_df = ret_r_as_df result_df = result_df.sort_values('padj') result_df.index.name = 'gene' gn.add_pandas_df(result_df.reset_index(), description='The result table as returned by DESeq2.') gn.export(result_df.to_csv(), 'DESeq2_results.csv', raw=True) significant_genes = result_df.loc[ result_df['padj'] < 0.05]['log2FoldChange'].to_dict() gn.export(significant_genes, 'Significant genes', kind='geneMeta') gn.commit()
def run_limma(rawcount_dataframe, experimental_samples, control_samples, signature_name): # Connect to R # r.source('/Users/denis/Documents/Projects/scripts/Scripts.R') r.source( '/Users/maayanlab/Library/Mobile Documents/com~apple~CloudDocs/Documents/Projects/scripts/Scripts.R' ) pandas2ri.activate() # Create design dict sample_dict = { 'experimental': experimental_samples, 'control': control_samples } # Create design dataframe design_dataframe = pd.DataFrame({ group_label: { sample: int(sample in group_samples) for sample in rawcount_dataframe.columns } for group_label, group_samples in sample_dict.items() }) # Convert to R rawcount_dataframe_r = pandas2ri.py2ri(rawcount_dataframe) design_dataframe_r = pandas2ri.py2ri(design_dataframe) # Run limma_dataframe_r = r.run_limma(rawcount_dataframe_r, design_dataframe_r) # Convert to pandas and sort limma_dataframe = pandas2ri.ri2py(limma_dataframe_r).sort_values('P.Value') return limma_dataframe
def runIDR( options, peakfile1, peakfile2 ): '''run IDR analysis. This code is taken from the R script batch-consistency-analysis.r ''' if options.half_width != None: R.assign( "half.width", options.half_width ) else: R('''half.width = NULL''') R.assign( "overlap.ratio", options.overlap_ratio ) R.assign( "is.broadpeak", options.is_broadpeak ) R.assign( "sig.value", options.signal_value ) dirname = os.path.dirname(__file__) R.source(os.path.join( dirname, "WrapperIDR.r")) # read the length of the chromosomes, which will be used to concatenate chr's R('''chr.size <- read.table('%s', sep='\t')''' % options.filename_chromosome_table) output_prefix = options.output_prefix output_uri = output_prefix + "-uri.sav" output_em = output_prefix + "-em.sav" output_overlapped_peaks = output_prefix + "-overlapped-peaks.txt" output_peaks_above_idr = output_prefix + "-npeaks-aboveIDR.txt" # process data, summit: the representation of the location of summit E.info("loading data" ) R('''rep1 <- process.narrowpeak('%(peakfile1)s', chr.size, half.width=half.width, summit="offset", broadpeak=is.broadpeak)''' % locals()) R('''rep2 <- process.narrowpeak('%(peakfile2)s', chr.size, half.width=half.width, summit="offset", broadpeak=is.broadpeak)''' % locals()) E.info( "replicate 1: read %s: %i peaks, %i after filtering" % \ (peakfile1, R('''nrow(rep1$data.ori)''')[0], R('''nrow(rep1$data.cleaned)''')[0])) E.info( "replicate 2: read %s: %i peaks, %i after filtering" % \ (peakfile2, R('''nrow(rep2$data.ori)''')[0], R('''nrow(rep2$data.cleaned)''')[0])) E.info( "computing correspondence profile (URI)") R('''uri.output <- compute.pair.uri(rep1$data.cleaned, rep2$data.cleaned, sig.value1=sig.value, sig.value2=sig.value, overlap.ratio=overlap.ratio)''') E.info( "saving correspondence profile to %s" % output_uri ) R('''save(uri.output, file='%(output_uri)s') ''' % locals()) E.info( "computing EM procedure for inference") R('''em.output <- fit.em(uri.output$data12.enrich, fix.rho2=T)''') E.info( "saving EM to %s" % output_em ) R('''save(em.output, file='%(output_em)s') ''' % locals()) # write em output into a file # cat(paste("EM estimation for the following files\n", peakfile1, "\n", peakfile2, "\n", sep="")) options.stdout.write( "em_estimation\n%s\n" % str(R('''em.output$em.fit$para'''))) # add on 3-29-10 # output both local idr and IDR E.info( "writing overlapped peaks to %s" % output_overlapped_peaks) R('''idr.local <- 1-em.output$em.fit$e.z''') R('''IDR <- c()''') R('''o <- order(idr.local)''') R('''IDR[o] <- cumsum(idr.local[o])/c(1:length(o))''') R(''' write.out.data <- data.frame(chr1=em.output$data.pruned$sample1[, "chr"], start1=em.output$data.pruned$sample1[, "start.ori"], stop1=em.output$data.pruned$sample1[, "stop.ori"], sig.value1=em.output$data.pruned$sample1[, "sig.value"], chr2=em.output$data.pruned$sample2[, "chr"], start2=em.output$data.pruned$sample2[, "start.ori"], stop2=em.output$data.pruned$sample2[, "stop.ori"], sig.value2=em.output$data.pruned$sample2[, "sig.value"], idr.local=1-em.output$em.fit$e.z, IDR=IDR) ''') R('''write.table(write.out.data, file='%(output_overlapped_peaks)s')''' % locals()) # number of peaks passing IDR range (0.01-0.25) E.info("computing number of peaks at various thresholds") R('''IDR.cutoff <- seq(0.01, 0.25, by=0.01)''') R('''idr.o <- order(write.out.data$idr.local)''') R('''idr.ordered <- write.out.data$idr.local[idr.o]''') R('''IDR.sum <- cumsum(idr.ordered)/c(1:length(idr.ordered))''') R(''' IDR.count <- c() n.cutoff <- length(IDR.cutoff) for(i in 1:n.cutoff){ IDR.count[i] <- sum(IDR.sum <= IDR.cutoff[i]) } ''') # write the number of peaks passing various IDR ranges into a file E.info( "writing number of peaks above IDR cutoffs in range [0.01, 0.25] to %s" % output_peaks_above_idr) R('''idr.cut <- data.frame( cutoff=IDR.cutoff, count=IDR.count)''') R('''write.table(idr.cut, file='%(output_peaks_above_idr)s', quote=F, row.names=F, col.names=T, sep='\t')''' % locals()) R('''mar.mean <- get.mar.mean(em.output$em.fit)''') options.stdout.write( "marginal mean of two components\n%s\n)" % R('''print(mar.mean)'''))
def show1(): open1() r.source('D:/Postgraduate/Course/2-semester/R-language/TimeAnalyze/Programe/R/head1.r',encoding="utf-8") data = DataFrame.from_csvfile('D:/Postgraduate/Course/2-semester/R-language/TimeAnalyze/Programe/temp/day1.csv') pp = ggplot2.ggplot(data)+ggplot2.aes_string(x='project', y='time',fill = 'project')+ggplot2.geom_bar(stat ='identity')+ggplot2.ggtitle("今日项目时间分布图")+ggplot2.labs(x='项目',y='时间 (min)')+ggplot2.theme(**{'axis.text.x': ggplot2.element_text(angle = 45)}) pp.plot()
def generate_var(): # FIXME: make a test? import pandas.rpy.common as prp from rpy2.robjects import r r.source("tests/var.R") return prp.convert_robj(r["result"], use_pandas=False)
def main( argv = None ): """script main. parses command line options in sys.argv, unless *argv* is given. """ if not argv: argv = sys.argv # setup command line parser parser = E.OptionParser( version = "%prog version: $Id: cgat_script_template.py 2871 2010-03-03 10:20:44Z andreas $", usage = globals()["__doc__"] ) parser.add_option("-a", "--gtf-a", dest="gtf_a", type="string", help="supply a gtf file - will compress uncompressed files" ) parser.add_option("-b", "--gtf-b", dest = "gtf_b", type = "string", help="supply a second gtf file - will compress uncompressed files") parser.add_option("-s", "--scripts-dir", dest = "scripts_dir", type = "string", help="supply a location for accessory scripts") parser.add_option( "--no-venn", dest = "no_venn", action="store_true", help="set if no venn is to be drawn") ## add common options (-h/--help, ...) and parse command line (options, args) = E.Start( parser, argv = argv ) gtf_files = [options.gtf_a, options.gtf_b] merged_files = [] prefices = [] E.info("merging gtf files") for gtf in gtf_files: if gtf.endswith(".gtf.gz"): outfile = P.snip(gtf, ".gtf.gz") + ".merged.gtf.gz" prefices.append(P.snip(gtf, ".gtf.gz")) merged_files.append(outfile) statement = '''zcat %s | python %s/gtf2gtf.py --merge-transcripts --log=%s.log | gzip > %s''' % (gtf, options.scripts_dir, outfile, outfile) P.run() elif gtf.endswith(".gtf"): outfile = P.snip(gtf, ".gtf") + ".merged.gtf.gz" prefices.append(P.snip(gtf,".gtf")) merged_files.append(outfile) statement = '''cat %s | python %s/gtf2gtf.py --merge-transcripts --log=%s.log | gzip > %s''' % (gtf, options.scripts_dir, outfile, outfile) P.run() else: raise ValueError("cannot perform merge on %s: is not a gtf file" % gtf) for prefix in prefices: if options.gtf_a.find(prefix) != -1: gtf_a = prefix + ".merged.gtf.gz" prefix_a = prefix elif options.gtf_b.find(prefix) != -1: gtf_b = prefix + ".merged.gtf.gz" prefix_b = prefix E.info("intersecting gtf files") # intersect the resulting merged files scriptsdir = options.scripts_dir intersection_out = "_vs_".join([prefix_a, prefix_b]) + ".intersection.gtf.gz" statement = '''intersectBed -a %(gtf_a)s -b %(gtf_b)s -s -wa | python %(scriptsdir)s/gtf2gtf.py --merge-transcripts --log=log | gzip > %(intersection_out)s''' P.run() if not options.no_venn: E.info("producing venn diagram for %s vs %s..." % (options.gtf_a, options.gtf_b)) # produce the venn diagram intersection_file = intersection_out gtf_a_merged = gtf_a gtf_b_merged = gtf_b # create dictionary key gtf_pair = (gtf_a_merged, gtf_b_merged) # containers for counts count_gtf_merged_a = 0 count_gtf_merged_b = 0 count_intersection = 0 # create GTF iterator objects gtf_iterator_a = GTF.iterator(IOTools.openFile(gtf_pair[0])) gtf_iterator_b = GTF.iterator(IOTools.openFile(gtf_pair[1])) gtf_iterator_intersection = GTF.iterator(IOTools.openFile(intersection_file)) # do the counts for each file E.info("counting entries in %s" % gtf_a) for entry in gtf_iterator_a: count_gtf_merged_a += 1 print "counts for gtf-a: ",count_gtf_merged_a E.info("counting entries in %s" % gtf_b) for entry in gtf_iterator_b: count_gtf_merged_b += 1 print "counts for gtf-b: ",count_gtf_merged_b E.info("counting entries in %s" % intersection_file) for entry in gtf_iterator_intersection: count_intersection += 1 print "counts for intersection: ", count_intersection # this is the important bit - basically take an arbitrary list of numbers to represent the list of lincrna in the refnoncoding set # then use the intersection count to represent the overlapping section in the lincrna set and add a set of random numbers to this # set to make up the remaining - non-overlapping set result = {} E.info("assembling count lists") result[gtf_pair] = {"gtf-b" : map(str,xrange(count_gtf_merged_b)) , "gtf-a" : map(str,xrange(count_intersection)) + map(str, [random.random() for i in range(count_intersection,count_gtf_merged_a)] )} R_source = os.path.join(os.path.abspath(options.scripts_dir), "venn_diagram.R") R.source(R_source) prefix_a = prefix_a.replace(".", "_").replace("-", "_") prefix_b = prefix_b.replace(".", "_").replace("-", "_") R('''prefix.a <- "%s"''' % prefix_a) R('''prefix.b <- "%s"''' % prefix_b) E.info("drawing venn diagram to %s" % (prefix_a + "_vs_" + prefix_b + ".overlap.png")) R["venn.diagram2"](R.list( A = result[gtf_pair]["gtf-a"], B = result[gtf_pair]["gtf-b"]) , prefix_a + "_vs_" + prefix_b + ".overlap.png" , **{'cat.cex': 1.5 , 'main.fontfamily': "Arial" , 'cat.pos':FloatVector((0,0)) , 'cat.fontfamily':"Arial" , 'main.cex':1.8 , 'height':1000 , 'width':1000 , 'cex':2 , 'fontfamily':"Arial" , 'lwd':R.c(1,1) , 'fill':R.c(R.rgb(0,0,0.5,0.5), R.rgb(0.5,0,0,0.5)) , 'category.names':R.c(prefix_a, prefix_b) , 'margin' : R.c(0.1,0.1,0.1,0.1) }) ## write footer and output benchmark information. E.Stop()
############################################# ########## 2. General Setup ############################################# ##### 1. Variables ##### dataset_names = { 'HMS_Dataset_20303': 'cytosolic-24h', 'HMS_Dataset_20304': 'nuclear-24h', 'HMS_Dataset_20305': 'cytosolic-48h', 'HMS_Dataset_20306': 'nuclear-48h', 'HMS_Dataset_20307': 'cytosolic-72h', 'HMS_Dataset_20308': 'nuclear-72h', } ##### 2. R Connection ##### rSource = 'pipeline/scripts/pipeline-mcf10a-cycif.R' r.source(rSource) ##### 3. Files ##### concatenatedExpressionFile = 's1-data.dir/HMS_Datasets-merged_filtered.txt' differentialExpressionFiles = glob.glob('s2-gene_differential_expression.dir/*') ####################################################### ####################################################### ########## S1. Process Data ####################################################### ####################################################### ############################################# ########## 1. Process dataset #############################################
def _read_source(self): r.source(self.source_file)
from flask import Flask, request from rpy2.robjects import r import traceback app = Flask(__name__) r.source('crosstalkWrapper.R') @app.route('/calculate/', methods=['POST']) def calculate(): try: return r.calculate(request.stream.read())[0] except Exception as e: print('------------EXCEPTION------------') traceback.print_exc(1) return str(e), 400 @app.after_request def after_request(response): response.headers.add('Access-Control-Allow-Origin', '*') response.headers.add('Access-Control-Allow-Headers', 'Content-Type,Authorization') response.headers.add('Access-Control-Allow-Methods', 'GET,PUT,POST,DELETE') return response import argparse if __name__ == '__main__': parser = argparse.ArgumentParser()
import time import json import io from flask import Flask, send_file, request, jsonify, make_response # from rpy2.robjects.packages import SignatureTranslatedAnonymousPackage # from rpy2.robjects.vectors import IntVector, FloatVector from rpy2.robjects import r import traceback from socket import gethostname import tempfile, os import random import os, base64 import uuid from util import * r.source('LCWrapper.R') # Initialize the Flask application if __name__ == '__main__': app = Flask(__name__, static_folder='.', static_url_path='', template_folder='.') else: app = Flask(__name__) @app.route('/') def index(): return send_file('{}/index.html'.format(CLIENT_APP_FOLDER))
################################################################# ################################################################# ############################################# ########## 1. Load libraries ############################################# ##### 1. General support ##### import scipy.stats as ss import numpy as np import warnings import os from rpy2.robjects import r, pandas2ri pandas2ri.activate() ##### 2. R ##### r.source( os.path.join(os.path.dirname(os.path.realpath(__file__)), 'normalize.R')) ####################################################### ####################################################### ########## S1. Dataset Normalization ####################################################### ####################################################### ############################################# ########## 1. logCPM ############################################# def logCPM(dataset): # Get raw data
def runIDR(options, peakfile1, peakfile2): '''run IDR analysis. This code is taken from the R script batch-consistency-analysis.r ''' if options.half_width is not None: R.assign("half.width", options.half_width) else: R('''half.width = NULL''') R.assign("overlap.ratio", options.overlap_ratio) R.assign("is.broadpeak", options.is_broadpeak) R.assign("sig.value", options.signal_value) dirname = os.path.dirname(__file__) R.source(os.path.join(dirname, "WrapperIDR.r")) # read the length of the chromosomes, which will be used to concatenate # chr's R('''chr.size <- read.table('%s', sep='\t')''' % options.filename_chromosome_table) output_prefix = options.output_prefix output_uri = output_prefix + "-uri.sav" output_em = output_prefix + "-em.sav" output_overlapped_peaks = output_prefix + "-overlapped-peaks.txt" output_peaks_above_idr = output_prefix + "-npeaks-aboveIDR.txt" # process data, summit: the representation of the location of summit E.info("loading data") R('''rep1 <- process.narrowpeak('%(peakfile1)s', chr.size, half.width=half.width, summit="offset", broadpeak=is.broadpeak)''' % locals()) R('''rep2 <- process.narrowpeak('%(peakfile2)s', chr.size, half.width=half.width, summit="offset", broadpeak=is.broadpeak)''' % locals()) E.info("replicate 1: read %s: %i peaks, %i after filtering" % (peakfile1, R('''nrow(rep1$data.ori)''')[0], R('''nrow(rep1$data.cleaned)''')[0])) E.info("replicate 2: read %s: %i peaks, %i after filtering" % (peakfile2, R('''nrow(rep2$data.ori)''')[0], R('''nrow(rep2$data.cleaned)''')[0])) E.info("computing correspondence profile (URI)") R('''uri.output <- compute.pair.uri(rep1$data.cleaned, rep2$data.cleaned, sig.value1=sig.value, sig.value2=sig.value, overlap.ratio=overlap.ratio)''') E.info("saving correspondence profile to %s" % output_uri) R('''save(uri.output, file='%(output_uri)s') ''' % locals()) E.info("computing EM procedure for inference") R('''em.output <- fit.em(uri.output$data12.enrich, fix.rho2=T)''') E.info("saving EM to %s" % output_em) R('''save(em.output, file='%(output_em)s') ''' % locals()) # write em output into a file # cat(paste("EM estimation for the following files\n", peakfile1, "\n", peakfile2, "\n", sep="")) options.stdout.write("em_estimation\n%s\n" % str(R('''em.output$em.fit$para'''))) # add on 3-29-10 # output both local idr and IDR E.info("writing overlapped peaks to %s" % output_overlapped_peaks) R('''idr.local <- 1-em.output$em.fit$e.z''') R('''IDR <- c()''') R('''o <- order(idr.local)''') R('''IDR[o] <- cumsum(idr.local[o])/c(1:length(o))''') R(''' write.out.data <- data.frame(chr1=em.output$data.pruned$sample1[, "chr"], start1=em.output$data.pruned$sample1[, "start.ori"], stop1=em.output$data.pruned$sample1[, "stop.ori"], sig.value1=em.output$data.pruned$sample1[, "sig.value"], chr2=em.output$data.pruned$sample2[, "chr"], start2=em.output$data.pruned$sample2[, "start.ori"], stop2=em.output$data.pruned$sample2[, "stop.ori"], sig.value2=em.output$data.pruned$sample2[, "sig.value"], idr.local=1-em.output$em.fit$e.z, IDR=IDR) ''') R('''write.table(write.out.data, file='%(output_overlapped_peaks)s')''' % locals()) # number of peaks passing IDR range (0.01-0.25) E.info("computing number of peaks at various thresholds") R('''IDR.cutoff <- seq(0.01, 0.25, by=0.01)''') R('''idr.o <- order(write.out.data$idr.local)''') R('''idr.ordered <- write.out.data$idr.local[idr.o]''') R('''IDR.sum <- cumsum(idr.ordered)/c(1:length(idr.ordered))''') R(''' IDR.count <- c() n.cutoff <- length(IDR.cutoff) for(i in 1:n.cutoff){ IDR.count[i] <- sum(IDR.sum <= IDR.cutoff[i]) } ''') # write the number of peaks passing various IDR ranges into a file E.info( "writing number of peaks above IDR cutoffs in range [0.01, 0.25] to %s" % output_peaks_above_idr) R('''idr.cut <- data.frame( cutoff=IDR.cutoff, count=IDR.count)''') R('''write.table(idr.cut, file='%(output_peaks_above_idr)s', quote=F, row.names=F, col.names=T, sep='\t')''' % locals()) R('''mar.mean <- get.mar.mean(em.output$em.fit)''') options.stdout.write("marginal mean of two components\n%s\n)" % R('''print(mar.mean)'''))
def show4(): open4() r.source('D:/Postgraduate/Course/2-semester/R-language/TimeAnalyze/Programe/R/end.R',encoding="utf-8") data = DataFrame.from_csvfile('D:/Postgraduate/Course/2-semester/R-language/TimeAnalyze/Programe/temp/project2.csv') pp = ggplot2.ggplot(data)+ggplot2.aes_string(x='day', y='time',fill = 'factor(project)')+ggplot2.geom_bar(stat ='identity',position = 'dodge')+ggplot2.ggtitle("两项目时间对比图")+ggplot2.labs(x='日期',y='时间 (min)')+ggplot2.theme(**{'axis.text.x': ggplot2.element_text(angle = 45)}) pp.plot()
def compute_far(plot_pdf, data_rds, yvarname, xvarname, f_yhist, f_yrcp, f_xhist, f_xrcp, y_compute_ano=True, y_start_ano="1961-01-01T00:00:00", y_end_ano="1990-12-31T23:59:59", y_bbox='-127,-65,25,50', y_season='DJF', y_first_spatial=True, y_spatial_aggregator="mean", y_time_aggregator="mean", x_compute_ano=True, x_start_ano="1961-01-01T00:00:00", x_end_ano="1990-12-31T23:59:59", x_bbox='-127,-65,25,50', x_season='DJF', x_first_spatial=True, x_spatial_aggregator="mean", x_time_aggregator="mean", stat_model="gauss_fit", qthreshold=0.9, nbootstrap=250): LOGGER.debug('initialization') xname = "x_" + xvarname yname = "y_" + yvarname LOGGER.debug('bug0!!!') run_tokeep = select_run_tokeep(f_xhist, f_xrcp, f_yhist, f_yrcp) xvar, xyear = prepare_dat(varname=xvarname, lfhist=f_xhist, lfrcp=f_xrcp, run_tokeep=run_tokeep, compute_ano=x_compute_ano, start_ano=x_start_ano, end_ano=x_end_ano, first_spatial=y_first_spatial, spatial_aggregator=x_spatial_aggregator, time_aggregator=x_time_aggregator, bbox=x_bbox, season=x_season) LOGGER.debug('bug2!!!') yvar, yyear = prepare_dat(varname=yvarname, lfhist=f_yhist, lfrcp=f_yrcp, run_tokeep=run_tokeep, compute_ano=y_compute_ano, start_ano=y_start_ano, end_ano=y_end_ano, first_spatial=y_first_spatial, spatial_aggregator=y_spatial_aggregator, time_aggregator=y_time_aggregator, bbox=y_bbox, season=y_season) LOGGER.info('data prepared') dfx = {} dfx['year'] = xyear dfx[xname] = xvar dfx = pandas.DataFrame.from_dict(dfx) dfy = {} dfy['year'] = yyear dfy[yname] = yvar dfy = pandas.DataFrame.from_dict(dfy) if all(dfx['year'] == dfy['year']): df = pandas.concat([dfx, dfy[yname]], axis=1)[['year', yname, xname]] else: raise Exception("years of x and y not corresponding") Rsrc = config.Rsrc_dir() # import rpy2's package module import rpy2.robjects.packages as rpackages from rpy2.robjects import pandas2ri pandas2ri.activate() from rpy2.robjects import r from rpy2.robjects.packages import importr # import R's utility package utils = importr('utils') r.source(join(Rsrc, "compute_and_plot_far.R")) farg = importr("FARg") LOGGER.debug('rcode prepared') if (stat_model == "gauss_fit"): far = r.compute_and_plot_far(mdata=df, yvar=yname, xvar=xname, tvar="year", xp=1.6, R=nbootstrap, stat_model=farg.gauss_fit, ci_p=0.9, pdf_name=plot_pdf) if (stat_model == "gev_fit"): far = r.compute_and_plot_far(mdata=df, yvar=yname, xvar=xname, tvar="year", xp=1.6, R=nbootstrap, stat_model=farg.gev_fit, ci_p=0.9, pdf_name=plot_pdf) if (stat_model == "gpd_fit"): far = r.compute_and_plot_far(mdata=df, yvar=yname, xvar=xname, tvar="year", xp=1.6, R=nbootstrap, stat_model=farg.gpd_fit, ci_p=0.9, qthreshold=qthreshold, pdf_name=plot_pdf) LOGGER.debug('far computed') r.saveRDS(far, file=data_rds)
################################################################# ############### Generate Signature ################################################################# ################################################################# ############################################# ########## 1. Load libraries ############################################# ##### 1. General support ##### import os import pandas as pd from rpy2.robjects import r, pandas2ri pandas2ri.activate() ##### 2. R ##### r.source(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'R', 'signature.R')) ####################################################### ####################################################### ########## S1. Design Matrix ####################################################### ####################################################### def make_design_matrix(expression_dataframe, group_A, group_B): # Sample names group_A = [x.replace(':', '.').replace('-', '.') for x in group_A] group_B = [x.replace(':', '.').replace('-', '.') for x in group_B] expression_dataframe.columns = [x.replace(':', '.').replace('-', '.') for x in expression_dataframe.columns] # Get expression dataframe
def run_cummeRbund_install(): """ provides R install of cummeRbund and provides user with all R output and prompts. """ r.source("http://bioconductor.org/biocLite.R") r.biocLite('cummeRbund')
from sklearn.cluster import KMeans from bag import * import numpy as np from rpy2.robjects import r from rpy2.robjects import pandas2ri pandas2ri.activate() from tabulate import tabulate import time #import warnings #warnings.simplefilter("error") t0 = time.clock() # set environment in order to access and call funcitons there r.source("gennonnormal.r") # Set parameters k = 8 # num centroids percent = .6 # percent to hold out from center. n =1 # pull in parameter matrix from gennonnormal.r params = np.array(r.params()) print params # store results in dict for comparison prop_= [] prop_reg_ = [] rsq_ = [] rsq_reg_ = []
def generate_var(): from rpy2.robjects import r import pandas.rpy.common as prp r.source('tests/var.R') return prp.convert_robj(r['result'], use_pandas=False)
from flask import Flask, request from rpy2.robjects import r app = Flask(__name__, static_folder='', static_url_path='') r.source('apcWrapper.R') @app.route('/calculate/', methods=['POST'], strict_slashes=False) def calculate(): return r.calculate(request.data.decode())[0] @app.route('/apcRest/ping/', strict_slashes=False) @app.route('/ping/', strict_slashes=False) def ping(): return r('"true"')[0] @app.errorhandler(Exception) def error_handler(e): """ Ensure errors are logged and returned """ app.logger.error(str(e)) return str(e), 400 @app.after_request def after_request(response): response.headers.add('Access-Control-Allow-Origin', '*') response.headers.add('Access-Control-Allow-Headers', 'Content-Type,Authorization') response.headers.add('Access-Control-Allow-Methods', 'GET,PUT,POST,DELETE')