def mkSimSem(variances=[0.5, 1.1, 0.8, 0.4, 0.4, 0.8, 0.8, 0.5, 0.6]): string = """ library(lavaan) mkdata=function(n){ popModel <- " f1 =~ 1*y1 + 0.6*y2 + 0.7*y3 f2 =~ 1*y4 + 1.1*y5 + 0.9*y6 f3 =~ 1*y7 + 1.2*y8 + 1.1*y9 f1 ~~ 0.8*f1 f2 ~~ 0.9*f2 f3 ~~ 0.4*f3 f1 ~~ 0.4*f2 f1 ~~ 0.2*f3 f2 ~~ 0.3*f3 y1 ~~ %f*y1 y2 ~~ %f*y2 y3 ~~ %f*y3 y4 ~~ %f*y4 y5 ~~ %f*y5 y6 ~~ %f*y6 y7 ~~ %f*y7 y8 ~~ %f*y8 y9 ~~ %f*y9 " analyzeModel <- " f1 =~ y1 + y2 + y3 f2 =~ y4 + y5 + y6 f3 =~ y7 + y8 + y9 " s=simulateData(popModel,sample.nobs=n) return(s) }""" % tuple((i for i in variances)) return SignatureTranslatedAnonymousPackage(string, "semsimdata")
def call_r(df): ''' Arguments: df: A string replicating a CSV file. The observations for the dependent variable MUST be in the FIRST COLUMN Returns: an rpy2 Robject float vector which stores the coefficients of the linear regression ''' from rpy2.robjects.packages import SignatureTranslatedAnonymousPackage from io import StringIO from rpy2.robjects import DataFrame from rpy2.robjects import FloatVector import rpy2.rinterface as ri ri.initr() file_like_obj = StringIO(df) constructor_dict = parser(file_like_obj) rpy2_dataframe = DataFrame(constructor_dict) with open('regression_app\linear_modeler_function.R') as f: str = f.read() mod = SignatureTranslatedAnonymousPackage(str, 'mod') a = mod.linear_modeler(rpy2_dataframe) del mod return a
def generate_solutions_tables(self): ''' code from Adam use rpy2 to execute rcode which reads out a solutions file to pandas ''' col_names = [ 'alpha', 'tau', 'AT', 'b', 'delta', 'LL', 'mode_curv', 'genome mass', 'sigma.h.hat', 'theta.z.hat', 'sigma.A.hat', 'theta.Q.hat', 'lambda.hat', 'theta.0', 'frac.het', 'SCNA_LL', 'entropy', 'Kar_LL', 'WGD', 'combined_LL', 'SSNV_LL', 'SCNA_Theta_integral', 'dens' ] # Build R function to be used as a python package load_RData_func_str = """ load_RData <- function(file_path) { load(file_path) head_name <- ls()[1] file_name <- names(`segobj.list`)[1] r_data <- `segobj.list`[[file_name]]$mode.res$mode.tab return(r_data) } """ # Pack the function above as a package r_pack = SignatureTranslatedAnonymousPackage(load_RData_func_str, "r_pack") print 'Generating absolute tables for ' + str(len( self.data_table)) + ' samples' pandas2ri.activate() for index, row in self.data_table.iterrows(): if np.mod(index, 100) == 0: print str(index) + '/' + str(len(self.data_table)) r_data = r_pack.load_RData(row['absolute_summary_data']) abs_table = pd.DataFrame(pandas2ri.ri2py(r_data), columns=col_names) self.pp_modes_tables[row['pair_id']] = abs_table pandas2ri.deactivate()
def get_taxon_abundance_box_plot(): box_plot_fnc = """ require("dplyr") require("ggplot2") taxon_abundance_box_plot <- function(data, plot_file_path, title, xlabel, ylabel) { temp <- data[order(data$variant_allele_count),] #sort by variant_allele_count temp$genotype <- factor(temp$genotype,levels=unique(temp$genotype)) #use reordered genotypes as levels pdf(plot_file_path) ap <- ggplot(data=temp, aes(x=genotype,y=abundance) ) ap <- ap + geom_boxplot() ap <- ap + ggtitle(title) ap <- ap + labs(x=xlabel, y=ylabel) ap <- ap + geom_jitter(position=position_jitter(w=0.1)) print(ap) dev.off() } """ pck = SignatureTranslatedAnonymousPackage(box_plot_fnc, 'pck') return pck.taxon_abundance_box_plot
def run_hydrology(init_gwstorage, init_C, init_Nash, init_Qq, init_Qs, climate_type): if "hydrological" in CONFIG.paths: path = CONFIG.paths['hydrological'] else: path = os.path.dirname(__file__) #end if r_path = os.path.join(path, 'WrappableRunIhacresGw.R') with open(r_path) as r_file: """ import .R file and call function """ string = r_file.read() IhacresGW = SignatureTranslatedAnonymousPackage(string, "IhacresGW") workingdir = CONFIG.paths[ "hydrological"] if "hydrological" in CONFIG.paths else os.path.dirname( __file__) + "/" #workingdir = os.path.dirname(__file__) # workingdir = "~/Dropbox/integrated/Mike/hydrological" # datadir = workingdir + "/Maules_19690101_20100302" datadir = workingdir + "data" workingdir = workingdir[: -1] #Remove last slash as function below expects it to be empty # sim, tdat = IhacresGW.RunIhacresGw(workingdir, datadir) return IhacresGW.RunIhacresGw(workingdir, datadir, init_gwstorage, init_C, FloatVector(init_Nash), init_Qq, init_Qs, climate_type)
def get_taxon_abundance_stacked_bar_plot(): box_plot_fnc = """ require("dplyr") require("ggplot2") taxon_abundance_stacked_bar_plot <- function(data, plot_file_path, title, xlabel, ylabel) { temp <- data[order(data$variant_allele_count),] #sort by variant_allele_count temp$genotype <- factor(temp$genotype,levels=unique(temp$genotype)) #use reordered genotypes as levels #creates a new data frame with median abundance from each combo result <- temp %>% group_by(genotype, gene, taxon) %>% summarize(medianAbundance = median(abundance)) #If you want the heights of the bars to represent values in the data, #use stat="identity" and map a value to the y aesthetic. pdf(plot_file_path, width=8, height=4) ap <- ggplot(data=result, aes(x=genotype,y=medianAbundance,fill=taxon)) + geom_bar(stat='identity') + ggtitle(title) ap <- ap + labs(x=xlabel, y=ylabel) ap <- ap + theme(legend.direction = 'vertical', legend.position = 'bottom') ap <- ap + guides(fill = guide_legend(reverse = TRUE)) print(ap) dev.off() } """ pck = SignatureTranslatedAnonymousPackage(box_plot_fnc, 'pck') return pck.taxon_abundance_stacked_bar_plot
def extract_strain_id(busco_result): ''' provide busco_result as input input:busco_result return:>95% busco result list ''' R_extract_95_code = """ extract_95 <- function(busco_result) { require(tidyverse) in_fl <- read.table(busco_result) in_fl_95 <- in_fl %>% filter(V2 >= 95) # print(in_fl_95$V1) pb_protein_id=in_fl_95[grep("_PB",in_fl_95$V1,ignore.case = F),1] need_remove=gsub("_PB","",pb_protein_id) in_fl_95_removed_pb=in_fl_95[!(in_fl_95$V1%in%need_remove),] # print(in_fl_95_removed_pb$V1) return(as.character(in_fl_95_removed_pb$V1)) } """ R_extract_95 = SignatureTranslatedAnonymousPackage(R_extract_95_code, "R_extract_95") R_95_strain = R_extract_95.extract_95(busco_result) strain_95_list = list(R_95_strain) strain_95_list.remove("70-15") strain_95_list.remove("HO_busco") strain_95_list.remove("PH42_busco") strain_95_list.append("magnaporthe_oryzae_70-15_8_proteins_T0") strain_95_list.append("HO") strain_95_list.append("PH42") return strain_95_list
def remove_MGG_unpresent_Augustus( pav_orthofinder, MGG_unpresent_Augustus_unassianed_list_file_name, pav_orthofinder_1574): ''' 用于从pav_orthofinder中删掉1574个出现在unassigned gene中的 input 1: pav_orthofinder input 2: MGG_unpresent_Augustus_unassianed_list output 1: pav_orthofinder_1574 ''' R_code_remove_MGG_unpresent_Augustus = ''' R_remove_MGG_unpresent_Augustus=function( pav_orthofinder_file_name, MGG_unpresent_Augustus_unassianed_list_file_name, pav_orthofinder_1574_file_name ){ require(readxl) require(WriteXLS) require(dplyr) MGG_unpresent_Augustus_unassianed_list=read.table(MGG_unpresent_Augustus_unassianed_list_file_name) pav_orthofinder=read_xlsx(pav_orthofinder_file_name) pav_orthofinder_1574=pav_orthofinder %>% filter(!(protein_id %in% MGG_unpresent_Augustus_unassianed_list$V1)) pav_orthofinder_1574=pav_orthofinder_1574[,-1] pav_orthofinder_1574=pav_orthofinder_1574[,c(1,158,2:157)] WriteXLS::WriteXLS(pav_orthofinder_1574,pav_orthofinder_1574_file_name) } ''' R_remove_MGG_unpresent_Augustus = SignatureTranslatedAnonymousPackage( R_code_remove_MGG_unpresent_Augustus, "R_remove_MGG_unpresent_Augustus") R_remove_MGG_unpresent_Augustus.R_remove_MGG_unpresent_Augustus( str(pav_orthofinder), str(MGG_unpresent_Augustus_unassianed_list_file_name), str(pav_orthofinder_1574))
def predictPrices(path): r = robjects.r sourcepath = os.path.abspath("rpy2/project/R/predict.R") source = r.source(sourcepath) from rpy2.robjects.packages import SignatureTranslatedAnonymousPackage project = SignatureTranslatedAnonymousPackage( "predictPrice <- " + str(source[0]), "project") return project.predictPrice(path)
def convertRtoPandas(file_path): # Pack the function above as a package r_pack = SignatureTranslatedAnonymousPackage(load_RData_func_str, "r_pack") pandas2ri.activate() r_data = r_pack.load_RData(file_path) py_data = pd.DataFrame(pandas2ri.ri2py(r_data), columns=col_names) pandas2ri.deactivate() return py_data
def getFDRCorrection(pvals): rcode = """ fdr <- function(pvals) { return(p.adjust(pvals, method = "fdr")) } """ rStats = SignatureTranslatedAnonymousPackage(rcode, "rStats") pvals_r = robjects.FloatVector(pvals) return rStats.fdr(pvals_r)
def __init__(self, r_filename): # Read in r file and init data engine string = "" with open(r_filename, "r") as myfile: string = ''.join(myfile.readlines()) r = SignatureTranslatedAnonymousPackage(string, "r") robjects.r(string) self.r = r
def r_cal(df): string = """ ptsPPP <- function(df) { X <- with(df, ppp(x, y, c(-25,25), c(-25,25))) plot(X) return(X) } """ sp = SignatureTranslatedAnonymousPackage(string, "powerpack") pandas2ri.activate() r_num_meanDis_DF = pandas2ri.py2ri(df[["x", "y"]]) ptsPPP = sp.ptsPPP(r_num_meanDis_DF)
def load_r_file(filename, namespace): if namespace not in r_namespaces: import rpy2.robjects.numpy2ri rpy2.robjects.numpy2ri.activate() if PROJECT_DIR not in filename: filename = os.path.join(PROJECT_DIR, 'r_src', 'forAndrej', filename) with open(filename, 'r') as pout: source = pout.read() res = SignatureTranslatedAnonymousPackage(source, namespace) r_namespaces[namespace] = res return r_namespaces[namespace]
def arxiv_crawl(crawling_list, limit=None, batchsize=100, submission_range=None, update_range=None, delay=None): """ This is a python wrapper for the aRxiv "arxiv_search" function. If submission_range or update_range are given, the results are filtered according to the date ranges. :param crawling_list: The subcategories to crawl. NOT "stat" -> USE "stat.AP" etc... :type crawling_list: dict of lists. :param limit: Max number of results to return. :type limit: int. :param batchsize: Number of queries per request. :type batchsize: int. :param submission_range: The range of submission dates. :type submission_range: Tuple (start,end). :param update_range: The range of last-update dates. :type update_range: Tuple (start,end). :returns: The created folder """ # Timestamp of starting datetime ts_start = time.time() timestamp = datetime.datetime.fromtimestamp(ts_start).strftime( '%Y-%m-%d_%H-%M-%S') # Create folder structure working_folder = base_directory + timestamp os.makedirs(working_folder) os.makedirs(working_folder + "/temp_files") # Setup logging config = logging_confdict(working_folder, __name__) logging.config.dictConfig(config) arxiv_logger = logging.getLogger(__name__) arxiv_logger.info("Starting new crawl for {}".format(str(crawling_list))) arxiv_logger.info("Created new folder: <<" + working_folder + ">>") # Load R-scripts arxiv_logger.debug("Loading R-Scripts ...") try: with open('../r_scripts/arxiv.R', 'r') as f: string = ''.join(f.readlines()) arxiv_crawler = SignatureTranslatedAnonymousPackage( string, "arxiv_crawler") except Exception, e: arxiv_logger.exception("Error while loading R-Scripts.") sys.exit('Could not load R-Scripts!')
def run(self): try: result = "0" # grise self.disalbledButtonsCalibration() self.ui.resetCalibrationPushButton.setDisabled(True) with open("fonctions_apprentissage.r", "r", encoding="utf-8") as apprentissageRopen: apprentissage = "".join(apprentissageRopen.readlines()) apprentissage = SignatureTranslatedAnonymousPackage(apprentissage, "apprentissage") self.CalibrationOutPath = "/".join(str(self.dicoFoldersCalibration["leaf"]).split("/")[:-1]) self.CalibrationBasename = self.CalibrationOutPath.split("/")[-1] self.actualizeOutFiles() if debug: print("{}\n{}".format(apprentissage, dir(apprentissage))) # test if Rdata file already exist, if yes remove file if user say yes, or stop analyse if os.path.exists(self.CalibrationFilesOut["RData"]): reply = QMessageBox.question(self, 'Warning', 'File will be overwritten.\nDo you still want to proceed?', QMessageBox.Yes | QMessageBox.No, QMessageBox.No) if reply == QMessageBox.Yes: for key, path in self.CalibrationFilesOut.items(): os.remove(path) reloadCalibration = True elif reply == QMessageBox.No: reloadCalibration = False else: reloadCalibration = True if reloadCalibration: self.ui.statusbar.showMessage(str("Running Calibration, please waiting ...."),9600) #result , self.CalibrationFilesOut["RData"] = apprentissage.apprentissage(self.dicoObjectOpenLineEditCalibration["leaf"],self.dicoObjectOpenLineEditCalibration["symptom"],self.dicoObjectOpenLineEditCalibration["background"]).r_repr().replace('"','').replace("c(","").replace(")","").split(",") result , good = apprentissage.apprentissage(self.CalibrationOutPath).r_repr().replace('"','').replace("c(","").replace(")","").split(",") self.calibrationFileOpenLineEdit.setText(self.CalibrationFilesOut["RData"]) if result == "1" and os.path.exists(self.CalibrationFilesOut["RData"]): print(result, self.CalibrationFilesOut["RData"]) self.infoDialogue(status = "new") self.ui.statusbar.showMessage(str("FINISH, files were product on : %s" % self.CalibrationOutPath),9600) self.ui.resetCalibrationPushButton.setEnabled(True) elif result == "0" and os.path.exists(self.CalibrationFilesOut["RData"]): self.infoDialogue(status = "already") print(result, self.CalibrationFilesOut["RData"]) self.calibrationFileOpenLineEdit.setText("") self.ui.resetCalibrationPushButton.setEnabled(True) self.resetLoadFolder() self.enableButtonsCalibration() elif result == "0" and not os.path.exists(self.CalibrationFilesOut["RData"]): self.displayError(error = "Error when running R code....") except Exception as e: self.displayError(error = e)
def file_to_anonymous_package( file: str) -> SignatureTranslatedAnonymousPackage: """ Takes some file.R and sources it in rpy2 as an anonymous package Returns the R package as an object The name of the package is accessible by package.__rname__ as str """ package_name = os.path.splitext(os.path.split(file)[1])[0] with open(file, "r") as r_package_file: r_package_src = r_package_file.read() package_src = SignatureTranslatedAnonymousPackage(r_package_src, name=package_name) return package_src
def crossref_lookup(working_folder, index, authors, titles, submitted, num_threads=1): # Load r-scripts print("\nLoading R-Scripts ...") with open('../r_scripts/doi_lookup.R', 'r') as f: string = ''.join(f.readlines()) doi_lookuper = SignatureTranslatedAnonymousPackage(string, "doi_lookuper") cr_input_queue = Queue.Queue() cr_to_process = Queue.Queue() process_to_result = Queue.Queue() doc_count = 0 for idx, author, title, date in zip(index, authors, titles, submitted): tokens = author.split("|") if len(tokens) >= 15: author = "|".join(tokens[:15]) cr_input_queue.put((idx, author, title, date)) doc_count += 1 process_thread = ProcessingThread(working_folder, cr_to_process, process_to_result, doc_count) print("\nStarting crossref crawl process...") crossref_threads = [] for i in range(num_threads): thread = CrossrefAPIThread(cr_input_queue, cr_to_process, doi_lookuper) thread.start() crossref_threads.append(thread) process_thread.start() for thread in crossref_threads: thread.event.set() for thread in crossref_threads: thread.join() process_thread.event.set() process_thread.join() results = [] while not process_to_result.empty(): results.append(process_to_result.get()) return results
def Bing_cust(lam1, lam2, lam3): string_rbing1 = """ rbingham <- function(n, A) { p <- ncol(A) ## dimensionality of A eig <- eigen(A) V <- eig$vectors ## eigenvectors lam <- c(%f,%f,%f) lam <- lam - lam[p] lam <- lam[-p] ### f.rbing part lam <- sort(lam, decreasing = TRUE) ## sort the eigenvalues in desceding order nsamp <- 0 X <- NULL lam.full <- c(lam, 0) qa <- length(lam.full) mu <- numeric(qa) sigacginv <- 1 + 2 * lam.full SigACG <- sqrt( 1 / ( 1 + 2 * lam.full ) ) Ntry <- 0 while (nsamp < n) { x.samp <- FALSE while ( !x.samp ) { yp <- rnorm(qa, mu, SigACG) y <- yp / sqrt( sum( yp^2 ) ) lratio <- - sum( y^2 * lam.full ) - qa/2 * log(qa) + 0.5 * (qa - 1) + qa/2 * log( sum(y^2 * sigacginv ) ) if ( log(runif(1) ) < lratio) { X <- c(X, y) x.samp <- TRUE nsamp <- nsamp + 1 } Ntry <- Ntry + 1 } } x <- matrix(X, byrow = TRUE, ncol = qa) ## the avtry is the estimate of the M in rejection sampling ## 1/M is the probability of acceptance ## the x contains the simulated values tcrossprod(x, V) ## simulated data } """ % (lam1, lam2, lam3) # 200,0.05 powerpack1 = SignatureTranslatedAnonymousPackage(string_rbing1, "powerpack") return powerpack1
def set_minus_cut(start_point,pav_df_file_name,result_path): R_code_set_minus_cut=''' Cut=function(start_point,pav_df_file_name,result_path){ require(readxl) require(WriteXLS) require(tidyverse) pav_df=read_xlsx(pav_df_file_name) gene_is=pav_df %>% filter((!!sym(start_point))==1) %>% # filter(`70-15`)==1 select("protein_id") minus_part=pav_df %>% filter((!!sym(start_point))==1) %>% column_to_rownames("protein_id") # pav_df_colsum=colSums(minus_part_num) # pav_df_colsum_sort=sort(pav_df_colsum) add_part=pav_df %>% filter((!!sym(start_point))==0) %>% column_to_rownames("protein_id") # add_part=add_part %>% # column_to_rownames(pav_df_raw$...2) add_part_num=sapply(add_part[2:157], function(x) as.numeric(x)) pav_df_colsum=colSums(add_part_num) pav_df_colsum_sort=sort(pav_df_colsum) write.table(attributes(pav_df_colsum_sort),paste(result_path,sprintf("set_minus_sort_protein_id_%s.txt", start_point),sep = ""),append = F,quote = F,row.names = F,col.names = F) write.table(pav_df_colsum_sort,paste(result_path,sprintf("set_minus_sort_protein_id_num_%s.txt", start_point),sep = ""),append = F,quote = F,row.names = T,col.names = F) WriteXLS::WriteXLS( minus_part, paste(result_path,sprintf("set_minus_minus_%s.xlsx", start_point),sep = ""), col.names = T, row.names = T ) WriteXLS::WriteXLS( add_part, paste(result_path,sprintf("set_minus_add_%s.xlsx", start_point),sep = ""), col.names = T, row.names = T ) write.table(gene_is,paste(result_path,sprintf("set_minus_gene_id_%s.txt", start_point),sep = ""),append = F,quote = F,row.names = F,col.names = F) } ''' R_set_minus_cut = SignatureTranslatedAnonymousPackage(R_code_set_minus_cut, "R_set_minus_cut") R_set_minus_cut.Cut(start_point,str(pav_df_file_name),result_path)
def matchit(outcome, treatment, data, method='nearest', distance='glm', replace=False): if replace: replace = 'TRUE' else: replace = 'FALSE' data.to_csv('data.csv', index=False) formula_cov = treatment + ' ~ ' i = 0 for cov in data.columns: if cov != outcome and cov != treatment: if i != 0: formula_cov += '+' formula_cov += str(cov) i += 1 string = """ library('MatchIt') data <- read.csv('data.csv') r <- matchit( %s,estimand="ATE", method = "%s", data = data, replace = %s) matrix <- r$match.matrix[,] names <- as.numeric(names(r$match.matrix[,])) mtch <- data[as.numeric(names(r$match.matrix[,])),] hh <- data[as.numeric(names(r$match.matrix[,])),'%s']- data[as.numeric(r$match.matrix[,]),'%s'] data2 <- data data2$%s <- 1 - data2$%s r2 <- matchit( %s, estimand="ATE", method = "%s", data = data2, replace = %s) matrix2 <- r2$match.matrix[,] names2 <- as.numeric(names(r2$match.matrix[,])) mtch2 <- data2[as.numeric(names(r2$match.matrix[,])),] hh2 <- data2[as.numeric(r2$match.matrix[,]),'%s'] - data2[as.numeric(names(r2$match.matrix[,])),'%s'] """ % (formula_cov, method, replace, outcome, outcome, treatment, treatment, formula_cov, method, replace, outcome, outcome) psnn = SignatureTranslatedAnonymousPackage(string, "powerpack") match = psnn.mtch match2 = psnn.mtch2 t_hat = pd.DataFrame(np.hstack((np.array(psnn.hh), np.array(psnn.hh2))), index=list(psnn.names.astype(int)) + list(psnn.names2.astype(int)), columns=['CATE']) ate = np.mean(t_hat['CATE']) return ate
def run_mccoil(barcode_file_lines, maf_file_lines=None, verbose=False): ## init barcode set barcode_set = Barcode.SetOfBarcodes() barcode_set.readBarcodeFileLines(barcode_file_lines) # validate is_valid_barcode_set, err_msg = barcode_set.validate() if not is_valid_barcode_set: print err_msg return ([]) ## init mafs if not maf_file_lines: mafs = barcode_set.computeMAFFromBarcodes(1) else: mafs = MAF.MAF() mafs.readMAFFileLines(maf_file_lines) mafs_R_vector = robjects.Vector(mafs.minor_allele_freqs()) # validate is_valid_mafs, err_msg = mafs.validate() if not is_valid_mafs: print err_msg return ({}) ## compute zygosity matrix, then convert to R DataFrame zygosity_matrix = barcode_set.to_zygosity_matrix(mafs, header=True, index=True) if verbose: print(zygosity_matrix) data = to_R_zygosity_df(zygosity_matrix) if verbose: print(data) ## import MCCOIL mccoil_R_code = open(mccoil_R, 'r').read() mccoil = SignatureTranslatedAnonymousPackage(mccoil_R_code, 'mccoil') ## compute result result = mccoil.McCOIL_categorical(data, P=mafs_R_vector) #print result ## get sites/samples sites, samples = list(result[-2]), list(result[-1]) ## get maf/coi predictions, which map 1-to-1 sites/samples # (i.e. maf_prediction[i] is prediction for site[i]) maf_predictions, coi_predictions = list(result[6]), list(result[5]) return ({ 'mafs': zip(sites, maf_predictions), 'cois': zip(samples, coi_predictions) })
def get_arima_rsi(prices): df = pd.DataFrame(prices) pandas2ri.activate() calculate_models = """ calculate <- function(x, size=100){ x <- na.omit(x) library(TTR) library(stats) x <- ts(x) f <- function(m) class(try(solve(m),silent=T))=="matrix" if(f(x)){ x[50] = x[50] + 2 } arima <- arima(x, c(0,0,0)) rsi <- RSI(x, size-1)[size] list <-c(arima$coef, rsi) return(as.array(list)) }""" calculate = SignatureTranslatedAnonymousPackage(calculate_models, "calculate") stats = calculate.calculate(df, len(df)) return stats
def xml2df(url): # make some terrible R code from rpy2.robjects.packages import SignatureTranslatedAnonymousPackage from rpy2.robjects import pandas2ri string = """ require(XML) require(plyr) getXML <- function(x) { xmlfile <- xmlTreeParse(x) temp = xmlToList(xmlfile, addAttributes = F) df <- ldply(temp, .fun=function(x) {data.frame(t(unlist(x)))}) return(df) } """ test = SignatureTranslatedAnonymousPackage(string, "test") # make a pandas DF out of the stupid R df pydf = pandas2ri.ri2py_dataframe(test.getXML(url)) return pydf
def main(): mydata = [] # Open and Read the data file from a csv to convert it to a list with open('breakout_detection_wraper/fuel_data.csv', 'r') as csvfile: dat = csv.reader(csvfile) for line in dat: mydata.append(float(line[0])) # Define the parameters to configure the break out detection algoritm minsize = 30 method = 'multi' degree = 1 # Open the R file to run it on the wrapper with open('breakout_detection_wraper/breakout_function.R') as code: rcode = os.linesep.join(code.readlines()) # Create the wrapper as an anonymous package signature wrapper = SignatureTranslatedAnonymousPackage(rcode, "breakout_function") # Execute the method from the wrapper result = wrapper.Detect(FloatVector(mydata), minsize, method, degree) # Print the result returned from the R function print(result)
def __init__(self): self.pd_active() self._dotty = self.func('py_dotted_data', self.DOTTED_DATA) self.pdata = self.func('py_packdata', self.PACKDATA) self.available = self.func('py_avail',self.AVAIL)() self.installed = self.calc('installed.packages') self._histlines = self.func('py_draw_histlines', self.HISTLINES) self._lines = self.func('py_draw_lines', self.LINES) self._hist = self.func('py_draw_hist', self.HIST) self._pareto = self.func('py_draw_pareto', self.PARETO) self._sleaf = self.func('py_draw_sleaf', self.SLEAF) self._csv = self.func('py_r_csvread', self.CSV) self._tapply = self.func('py_r_csvread', self.TAPPLY) self._closest = self.func('py_r_csvread', self.CLOSEST) self._jupyter_opt = self.func('py_r_jupyter', self.JUPYTER_OPT) self._readtab = self.func('py_r_readtab', self.READTAB) self._mul = self.func('py_r_mul', self.MUL) self._exp = self.func('py_r_exp', self.EXP) self._div = self.func('py_r_div', self.DIV) self._add = self.func('py_r_add', self.ADD) self._sub = self.func('py_r_sub', self.SUB) self._str = self.func('py_r_str', self.STR) self._cond = self.func('py_r_cond', self.COND) self._lmplot = self.func('py_r_lmplot', self.LMPLOT) self._plot = self.func('py_r_plot', self.PLOT) self._splot = self.func('py_r_splot', self.SPLOT) self._samps = self.func('py_sample_size', self.SAMP_SIZE) self._sample = self.func('py_sample', self.SAMPLE) self._randomsample = self.func('py_randomsample', self.RANDOMSAMPLE) self._column_ext = self.func('py_column_extract', self.COLUMN_EXTRACT) self._serror = self.func('py_serror_samp', self.SERROR_SAMP) self._pretty = self.func('py_pretty_frame', self.PRETTY_FRAME ) self._show_row = self.func('py_show_row', self.SHOW_ROW) self._some_nums = self.func('py_show_row', self.SOME_NUMBERS) self._packs = self.func('py_packs', self.PACKS) self._help = self.func('py_help', self.HELP) self._contents = self.func('py_contents', self.PACK_CONTENTS) self.anon = SignatureTranslatedAnonymousPackage(self.ANON_PACK, "anon_pack")
def MGG_unpresent_Augustus_locate_in_pav_orthofinder( MGG_unpresent_Augustus_list_file_name, pav_orthofinder_file_name, gene_protein_mapping_table_file_name, pav_MGG_unpresent_Augustus_file_name, orthofinder_unassianed_tsv_file_name, MGG_unpresent_Augustus_unassianed_list_file_name): R_code = ''' MGG_unpresent_Augustus_locate_in_pav_orthofinder=function(MGG_unpresent_Augustus_list_file_name,pav_orthofinder_file_name,gene_protein_mapping_table_file_name,pav_MGG_unpresent_Augustus_file_name,orthofinder_unassianed_tsv_file_name,MGG_unpresent_Augustus_unassianed_list_file_name){ require(readxl) require(WriteXLS) require(dplyr) MGG_unpresent_Augustus_list=read.table(MGG_unpresent_Augustus_list_file_name,stringsAsFactors = F) pav_orthofinde=read_xlsx(pav_orthofinder_file_name) pav_orthofinde=pav_orthofinde[,-1] gene_protein_mapping_table=read.table(gene_protein_mapping_table_file_name) pav_orthofinde_protein=merge(MGG_unpresent_Augustus_list,gene_protein_mapping_table,by.x = 1,by.y = 1) pav_MGG_unpresent_Augustus =pav_orthofinde %>% filter(protein_id %in% pav_orthofinde_protein$V2) pav_MGG_unpresent_Augustus=pav_MGG_unpresent_Augustus[,c(158,1:157)] WriteXLS::WriteXLS(pav_MGG_unpresent_Augustus,pav_MGG_unpresent_Augustus_file_name) orthofinder_unassianed=read.table(orthofinder_unassianed_tsv_file_name,sep = "\t",header = T,check.names = F) MGG_unpresent_Augustus_unassianed_list=intersect(pav_MGG_unpresent_Augustus$protein_id,orthofinder_unassianed$`70-15_protein`) write.table(MGG_unpresent_Augustus_unassianed_list,MGG_unpresent_Augustus_unassianed_list_file_name,quote = F,row.names = F,col.names = F) } ''' R_MGG_unpresent_Augustus_locate_in_pav_orthofinder = SignatureTranslatedAnonymousPackage( R_code, "R_MGG_unpresent_Augustus_locate_in_pav_orthofinder") R_MGG_unpresent_Augustus_locate_in_pav_orthofinder.MGG_unpresent_Augustus_locate_in_pav_orthofinder( MGG_unpresent_Augustus_list_file_name, pav_orthofinder_file_name, gene_protein_mapping_table_file_name, pav_MGG_unpresent_Augustus_file_name, orthofinder_unassianed_tsv_file_name, MGG_unpresent_Augustus_unassianed_list_file_name)
def filter_pan_id(pan_id_file_name, length_table_file, filtered_pan_id_file_name): ''' input 1: pan_id_file_name input 2: length_table_file output 1: filtered_pan_id_file_name ''' R_code = ''' filter_pan_id=function(pan_id_file_name,length_table_file,filtered_pan_id_file_name){ require(dplyr) pan_id=read.table(pan_id_file_name) length_table=read.table(length_table_file) length_table_filter=length_table %>% filter(V2>20) pan_id_filter=merge(pan_id,length_table_filter,by.x = 2,by.y = 1,all.y = T) pan_id_filter=pan_id_filter[,-3] pan_id_filter=pan_id_filter[,c(2,1)] write.table(pan_id_filter,filtered_pan_id_file_name,sep = "\t",quote = F,row.names = F,col.names = F) } ''' R_filter_pan_id = SignatureTranslatedAnonymousPackage( R_code, "R_filter_pan_id") R_filter_pan_id.filter_pan_id(pan_id_file_name, length_table_file, filtered_pan_id_file_name)
} else{ multi_blast_vlaue_df=R_blast_vlaue_df %>% filter(gene_name %in% multi_copy_gene_Vector) %>% mutate(ok = blast_value >= lower & blast_value <= upper) if(!(all(multi_blast_vlaue_df$ok))){ multi_blast_vlaue_df=multi_blast_vlaue_df[multi_blast_vlaue_df$ok,] } multi_blast_vlaue_df=mutate(multi_blast_vlaue_df,strain_id=strsplit(gene_name,"_")) dsaf=aggregate(blast_value~strain_id,data = multi_blast_vlaue_df,max) best_value_df=filter(multi_blast_vlaue_df,blast_vlaue %in% dsaf) } return(c(single_blast_vlaue_df$gene_name,best_value_df$gene_name)) } ''' R_parse_blast_result = SignatureTranslatedAnonymousPackage( R_code_parse_blast_result, "R_parse_blast_result") R_filter_strain_num = SignatureTranslatedAnonymousPackage( R_code_filter_strain_num, "R_filter_strain_num") def extract_gene_gff(orthogroup_file, strain_db_dir_path, contig_path, all_row_gene_fasta_dir): global_names = globals() for db_file in strain_db_dir_path.iterdir(): global_names[db_file.stem.strip() + "_db"] = gffutils.FeatureDB(db_file) global_names["MGG_db"] = SeqIO.index( "../../70-15_refference_genome/magnaporthe_oryzae_70-15_8_genes.fasta", "fasta") gene_file_path = all_row_gene_fasta_dir / (orthogroup_file.stem + ".fasta") if gene_file_path.exists() is True: return
import warnings from rpy2.rinterface import RRuntimeWarning warnings.filterwarnings("ignore", category=RRuntimeWarning) #from numba.typing.typeof import typeof #import datasets #from joblib import Memory #import networkx as nx path = os.path.dirname(__file__) #memory = Memory(cachedir=path+"/cache", verbose=0) with open (path+"/ptestglm.R", "r") as pdnfile: code = ''.join(pdnfile.readlines()) ptestpdn = SignatureTranslatedAnonymousPackage(code, "ptestpdnglm") # def getPtestpdnAdjacencyMatrix(data): # numpy2ri.activate() # try: # df = robjects.r["as.data.frame"](data) # out = ptestpdn.ptestglmblock(df) # #print(out) # except Exception as e: # #numpy.savetxt("/Users/alejomc/Dropbox/pspn/spyn/bin/data/graphlets/errordata.txt", data) # print(e) # print(data) # raise e #