Esempio n. 1
0
def mkSimSem(variances=[0.5, 1.1, 0.8, 0.4, 0.4, 0.8, 0.8, 0.5, 0.6]):
    string = """
    library(lavaan)
    mkdata=function(n){
    popModel <- "
        f1 =~ 1*y1 + 0.6*y2 + 0.7*y3
    f2 =~ 1*y4 + 1.1*y5 + 0.9*y6
    f3 =~ 1*y7 + 1.2*y8 + 1.1*y9
    f1 ~~ 0.8*f1
    f2 ~~ 0.9*f2
    f3 ~~ 0.4*f3
    f1 ~~ 0.4*f2
    f1 ~~ 0.2*f3
    f2 ~~ 0.3*f3
    y1 ~~ %f*y1
    y2 ~~ %f*y2
    y3 ~~ %f*y3
    y4 ~~ %f*y4
    y5 ~~ %f*y5
    y6 ~~ %f*y6
    y7 ~~ %f*y7
    y8 ~~ %f*y8
    y9 ~~ %f*y9
    "
    analyzeModel <- "
        f1 =~ y1 + y2 + y3
    f2 =~ y4 + y5 + y6
    f3 =~ y7 + y8 + y9
    "

    s=simulateData(popModel,sample.nobs=n)
    return(s)
    }""" % tuple((i for i in variances))

    return SignatureTranslatedAnonymousPackage(string, "semsimdata")
Esempio n. 2
0
def call_r(df):
    '''
	Arguments:
	df: A string replicating a CSV file. The observations for the dependent
		variable MUST be in the FIRST COLUMN

	Returns: an rpy2 Robject float vector which stores the coefficients of the
	linear regression
	'''
    from rpy2.robjects.packages import SignatureTranslatedAnonymousPackage
    from io import StringIO
    from rpy2.robjects import DataFrame
    from rpy2.robjects import FloatVector
    import rpy2.rinterface as ri

    ri.initr()
    file_like_obj = StringIO(df)
    constructor_dict = parser(file_like_obj)
    rpy2_dataframe = DataFrame(constructor_dict)
    with open('regression_app\linear_modeler_function.R') as f:
        str = f.read()
    mod = SignatureTranslatedAnonymousPackage(str, 'mod')
    a = mod.linear_modeler(rpy2_dataframe)
    del mod
    return a
    def generate_solutions_tables(self):
        ''' code from Adam use rpy2 to execute rcode which reads out a solutions file to pandas '''
        col_names = [
            'alpha', 'tau', 'AT', 'b', 'delta', 'LL', 'mode_curv',
            'genome mass', 'sigma.h.hat', 'theta.z.hat', 'sigma.A.hat',
            'theta.Q.hat', 'lambda.hat', 'theta.0', 'frac.het', 'SCNA_LL',
            'entropy', 'Kar_LL', 'WGD', 'combined_LL', 'SSNV_LL',
            'SCNA_Theta_integral', 'dens'
        ]

        # Build R function to be used as a python package
        load_RData_func_str = """
                       load_RData <- function(file_path) {
                          load(file_path)
                          head_name <- ls()[1]
                          file_name <- names(`segobj.list`)[1]
                          r_data <- `segobj.list`[[file_name]]$mode.res$mode.tab
                          return(r_data)
                      }
                      """
        # Pack the function above as a package
        r_pack = SignatureTranslatedAnonymousPackage(load_RData_func_str,
                                                     "r_pack")
        print 'Generating absolute tables for ' + str(len(
            self.data_table)) + ' samples'
        pandas2ri.activate()
        for index, row in self.data_table.iterrows():
            if np.mod(index, 100) == 0:
                print str(index) + '/' + str(len(self.data_table))
            r_data = r_pack.load_RData(row['absolute_summary_data'])
            abs_table = pd.DataFrame(pandas2ri.ri2py(r_data),
                                     columns=col_names)
            self.pp_modes_tables[row['pair_id']] = abs_table
        pandas2ri.deactivate()
Esempio n. 4
0
def get_taxon_abundance_box_plot():
    box_plot_fnc = """
        require("dplyr")
        require("ggplot2")

        taxon_abundance_box_plot <- function(data, plot_file_path, title, xlabel, ylabel) {

            temp <- data[order(data$variant_allele_count),] #sort by variant_allele_count
            temp$genotype <- factor(temp$genotype,levels=unique(temp$genotype)) #use reordered genotypes as levels

            pdf(plot_file_path)

            ap <- ggplot(data=temp,
                         aes(x=genotype,y=abundance)
                  )
            ap <- ap + geom_boxplot()
            ap <- ap + ggtitle(title)
            ap <- ap + labs(x=xlabel, y=ylabel)
            ap <- ap + geom_jitter(position=position_jitter(w=0.1))

            print(ap)
            dev.off()
        }
        """
    pck = SignatureTranslatedAnonymousPackage(box_plot_fnc, 'pck')
    return pck.taxon_abundance_box_plot
Esempio n. 5
0
def run_hydrology(init_gwstorage, init_C, init_Nash, init_Qq, init_Qs,
                  climate_type):

    if "hydrological" in CONFIG.paths:
        path = CONFIG.paths['hydrological']
    else:
        path = os.path.dirname(__file__)
    #end if

    r_path = os.path.join(path, 'WrappableRunIhacresGw.R')
    with open(r_path) as r_file:
        """
		import .R file and call function
		"""
        string = r_file.read()
        IhacresGW = SignatureTranslatedAnonymousPackage(string, "IhacresGW")

        workingdir = CONFIG.paths[
            "hydrological"] if "hydrological" in CONFIG.paths else os.path.dirname(
                __file__) + "/"

        #workingdir = os.path.dirname(__file__)
        # workingdir = "~/Dropbox/integrated/Mike/hydrological"
        # datadir = workingdir + "/Maules_19690101_20100302"

        datadir = workingdir + "data"
        workingdir = workingdir[:
                                -1]  #Remove last slash as function below expects it to be empty

        # sim, tdat = IhacresGW.RunIhacresGw(workingdir, datadir)
        return IhacresGW.RunIhacresGw(workingdir, datadir,
                                      init_gwstorage, init_C,
                                      FloatVector(init_Nash), init_Qq, init_Qs,
                                      climate_type)
Esempio n. 6
0
def get_taxon_abundance_stacked_bar_plot():
    box_plot_fnc = """
        require("dplyr")
        require("ggplot2")

        taxon_abundance_stacked_bar_plot <- function(data, plot_file_path, title, xlabel, ylabel) {

            temp <- data[order(data$variant_allele_count),] #sort by variant_allele_count
            temp$genotype <- factor(temp$genotype,levels=unique(temp$genotype)) #use reordered genotypes as levels

            #creates a new data frame with median abundance from each combo
            result <- temp %>%
                group_by(genotype, gene, taxon) %>%
                    summarize(medianAbundance = median(abundance))
            #If you want the heights of the bars to represent values in the data,
            #use stat="identity" and map a value to the y aesthetic.

            pdf(plot_file_path, width=8, height=4)

            ap <- ggplot(data=result, aes(x=genotype,y=medianAbundance,fill=taxon)) +
                geom_bar(stat='identity') +
                ggtitle(title)
            ap <- ap + labs(x=xlabel, y=ylabel)
            ap <- ap + theme(legend.direction = 'vertical', legend.position = 'bottom')
            ap <- ap + guides(fill = guide_legend(reverse = TRUE))
            print(ap)
            dev.off()
        }
        """
    pck = SignatureTranslatedAnonymousPackage(box_plot_fnc, 'pck')
    return pck.taxon_abundance_stacked_bar_plot
Esempio n. 7
0
def extract_strain_id(busco_result):
    '''
    provide busco_result as input
    input:busco_result
    return:>95% busco result list
    '''
    R_extract_95_code = """
    extract_95 <- function(busco_result) {
        require(tidyverse)
        in_fl <- read.table(busco_result)
        in_fl_95 <- in_fl %>%
        filter(V2 >= 95)
        # print(in_fl_95$V1)
        pb_protein_id=in_fl_95[grep("_PB",in_fl_95$V1,ignore.case = F),1]
        need_remove=gsub("_PB","",pb_protein_id)
        in_fl_95_removed_pb=in_fl_95[!(in_fl_95$V1%in%need_remove),]
        # print(in_fl_95_removed_pb$V1)
        return(as.character(in_fl_95_removed_pb$V1))
    }
    """
    R_extract_95 = SignatureTranslatedAnonymousPackage(R_extract_95_code,
                                                       "R_extract_95")
    R_95_strain = R_extract_95.extract_95(busco_result)
    strain_95_list = list(R_95_strain)
    strain_95_list.remove("70-15")
    strain_95_list.remove("HO_busco")
    strain_95_list.remove("PH42_busco")
    strain_95_list.append("magnaporthe_oryzae_70-15_8_proteins_T0")
    strain_95_list.append("HO")
    strain_95_list.append("PH42")
    return strain_95_list
def remove_MGG_unpresent_Augustus(
        pav_orthofinder, MGG_unpresent_Augustus_unassianed_list_file_name,
        pav_orthofinder_1574):
    '''
    用于从pav_orthofinder中删掉1574个出现在unassigned gene中的
    input 1: pav_orthofinder
    input 2: MGG_unpresent_Augustus_unassianed_list
    output 1: pav_orthofinder_1574
    '''
    R_code_remove_MGG_unpresent_Augustus = '''
    R_remove_MGG_unpresent_Augustus=function(
    pav_orthofinder_file_name,
    MGG_unpresent_Augustus_unassianed_list_file_name,
    pav_orthofinder_1574_file_name
    ){
        require(readxl)
        require(WriteXLS)
        require(dplyr)
        MGG_unpresent_Augustus_unassianed_list=read.table(MGG_unpresent_Augustus_unassianed_list_file_name)
        pav_orthofinder=read_xlsx(pav_orthofinder_file_name)
        pav_orthofinder_1574=pav_orthofinder %>% 
            filter(!(protein_id %in% MGG_unpresent_Augustus_unassianed_list$V1))
        pav_orthofinder_1574=pav_orthofinder_1574[,-1]
        pav_orthofinder_1574=pav_orthofinder_1574[,c(1,158,2:157)]
        WriteXLS::WriteXLS(pav_orthofinder_1574,pav_orthofinder_1574_file_name)
    }
    '''
    R_remove_MGG_unpresent_Augustus = SignatureTranslatedAnonymousPackage(
        R_code_remove_MGG_unpresent_Augustus,
        "R_remove_MGG_unpresent_Augustus")
    R_remove_MGG_unpresent_Augustus.R_remove_MGG_unpresent_Augustus(
        str(pav_orthofinder),
        str(MGG_unpresent_Augustus_unassianed_list_file_name),
        str(pav_orthofinder_1574))
Esempio n. 9
0
def predictPrices(path):
    r = robjects.r
    sourcepath = os.path.abspath("rpy2/project/R/predict.R")
    source = r.source(sourcepath)
    from rpy2.robjects.packages import SignatureTranslatedAnonymousPackage
    project = SignatureTranslatedAnonymousPackage(
        "predictPrice <- " + str(source[0]), "project")
    return project.predictPrice(path)
Esempio n. 10
0
def convertRtoPandas(file_path):
    # Pack the function above as a package
    r_pack = SignatureTranslatedAnonymousPackage(load_RData_func_str, "r_pack")

    pandas2ri.activate()
    r_data = r_pack.load_RData(file_path)
    py_data = pd.DataFrame(pandas2ri.ri2py(r_data), columns=col_names)
    pandas2ri.deactivate()

    return py_data
Esempio n. 11
0
    def getFDRCorrection(pvals):
        rcode = """
            fdr <- function(pvals) {
                return(p.adjust(pvals, method = "fdr"))
            }
            """
        rStats = SignatureTranslatedAnonymousPackage(rcode, "rStats")

        pvals_r = robjects.FloatVector(pvals)
        return rStats.fdr(pvals_r)
Esempio n. 12
0
    def __init__(self, r_filename):

        # Read in r file and init data engine
        string = ""
        with open(r_filename, "r") as myfile:
            string = ''.join(myfile.readlines())
        r = SignatureTranslatedAnonymousPackage(string, "r")

        robjects.r(string)

        self.r = r
def r_cal(df):
    string = """
    ptsPPP <- function(df) {
        X <- with(df, ppp(x, y, c(-25,25), c(-25,25)))
        plot(X)
        return(X)
    }
    """
    sp = SignatureTranslatedAnonymousPackage(string, "powerpack")
    pandas2ri.activate()
    r_num_meanDis_DF = pandas2ri.py2ri(df[["x", "y"]])
    ptsPPP = sp.ptsPPP(r_num_meanDis_DF)
def load_r_file(filename, namespace):
    if namespace not in r_namespaces:
        import rpy2.robjects.numpy2ri
        rpy2.robjects.numpy2ri.activate()
        if PROJECT_DIR not in filename:
            filename = os.path.join(PROJECT_DIR, 'r_src', 'forAndrej',
                                    filename)
        with open(filename, 'r') as pout:
            source = pout.read()
        res = SignatureTranslatedAnonymousPackage(source, namespace)
        r_namespaces[namespace] = res
    return r_namespaces[namespace]
Esempio n. 15
0
def arxiv_crawl(crawling_list,
                limit=None,
                batchsize=100,
                submission_range=None,
                update_range=None,
                delay=None):
    """
    This is a python wrapper for the aRxiv "arxiv_search" function.

    If submission_range or update_range are given, the results are filtered according to the date ranges.

    :param crawling_list: The subcategories to crawl. NOT "stat" -> USE "stat.AP" etc...
    :type crawling_list: dict of lists.
    :param limit: Max number of results to return.
    :type limit: int.
    :param batchsize: Number of queries per request.
    :type batchsize: int.
    :param submission_range: The range of submission dates.
    :type submission_range: Tuple (start,end).
    :param update_range: The range of last-update dates.
    :type update_range: Tuple (start,end).

    :returns:  The created folder
    """

    # Timestamp of starting datetime
    ts_start = time.time()
    timestamp = datetime.datetime.fromtimestamp(ts_start).strftime(
        '%Y-%m-%d_%H-%M-%S')

    # Create folder structure
    working_folder = base_directory + timestamp
    os.makedirs(working_folder)
    os.makedirs(working_folder + "/temp_files")

    # Setup logging
    config = logging_confdict(working_folder, __name__)
    logging.config.dictConfig(config)
    arxiv_logger = logging.getLogger(__name__)

    arxiv_logger.info("Starting new crawl for {}".format(str(crawling_list)))
    arxiv_logger.info("Created new folder: <<" + working_folder + ">>")

    # Load R-scripts
    arxiv_logger.debug("Loading R-Scripts ...")
    try:
        with open('../r_scripts/arxiv.R', 'r') as f:
            string = ''.join(f.readlines())
        arxiv_crawler = SignatureTranslatedAnonymousPackage(
            string, "arxiv_crawler")
    except Exception, e:
        arxiv_logger.exception("Error while loading R-Scripts.")
        sys.exit('Could not load R-Scripts!')
Esempio n. 16
0
	def run(self):
		try:
			result = "0"
			# grise
			self.disalbledButtonsCalibration()
			self.ui.resetCalibrationPushButton.setDisabled(True)

			with open("fonctions_apprentissage.r", "r", encoding="utf-8") as apprentissageRopen:
				apprentissage = "".join(apprentissageRopen.readlines())

			apprentissage = SignatureTranslatedAnonymousPackage(apprentissage, "apprentissage")

			self.CalibrationOutPath = "/".join(str(self.dicoFoldersCalibration["leaf"]).split("/")[:-1])
			self.CalibrationBasename = self.CalibrationOutPath.split("/")[-1]
			self.actualizeOutFiles()

			if debug: print("{}\n{}".format(apprentissage, dir(apprentissage)))

			# test if Rdata file already exist, if yes remove file if user say yes, or stop analyse
			if os.path.exists(self.CalibrationFilesOut["RData"]):
				reply = QMessageBox.question(self, 'Warning', 'File will be overwritten.\nDo you still want to proceed?', QMessageBox.Yes | QMessageBox.No, QMessageBox.No)
				if reply == QMessageBox.Yes:
					for key, path in self.CalibrationFilesOut.items():
						os.remove(path)
					reloadCalibration = True
				elif reply == QMessageBox.No:
					reloadCalibration = False
			else:
				reloadCalibration = True

			if reloadCalibration:
				self.ui.statusbar.showMessage(str("Running Calibration, please waiting ...."),9600)
				#result , self.CalibrationFilesOut["RData"] = apprentissage.apprentissage(self.dicoObjectOpenLineEditCalibration["leaf"],self.dicoObjectOpenLineEditCalibration["symptom"],self.dicoObjectOpenLineEditCalibration["background"]).r_repr().replace('"','').replace("c(","").replace(")","").split(",")
				result , good = apprentissage.apprentissage(self.CalibrationOutPath).r_repr().replace('"','').replace("c(","").replace(")","").split(",")
				self.calibrationFileOpenLineEdit.setText(self.CalibrationFilesOut["RData"])
			if result == "1" and os.path.exists(self.CalibrationFilesOut["RData"]):
				print(result, self.CalibrationFilesOut["RData"])
				self.infoDialogue(status = "new")
				self.ui.statusbar.showMessage(str("FINISH, files were product on : %s" % self.CalibrationOutPath),9600)
				self.ui.resetCalibrationPushButton.setEnabled(True)
			elif  result == "0" and os.path.exists(self.CalibrationFilesOut["RData"]):
				self.infoDialogue(status = "already")
				print(result, self.CalibrationFilesOut["RData"])
				self.calibrationFileOpenLineEdit.setText("")
				self.ui.resetCalibrationPushButton.setEnabled(True)
				self.resetLoadFolder()
				self.enableButtonsCalibration()
			elif result == "0" and not os.path.exists(self.CalibrationFilesOut["RData"]):
				self.displayError(error = "Error when running R code....")

		except Exception as e:
			self.displayError(error = e)
Esempio n. 17
0
def file_to_anonymous_package(
        file: str) -> SignatureTranslatedAnonymousPackage:
    """
    Takes some file.R and sources it in rpy2 as an anonymous package
    Returns the R package as an object
    The name of the package is accessible by package.__rname__ as str
    """
    package_name = os.path.splitext(os.path.split(file)[1])[0]
    with open(file, "r") as r_package_file:
        r_package_src = r_package_file.read()
    package_src = SignatureTranslatedAnonymousPackage(r_package_src,
                                                      name=package_name)
    return package_src
Esempio n. 18
0
def crossref_lookup(working_folder,
                    index,
                    authors,
                    titles,
                    submitted,
                    num_threads=1):
    # Load r-scripts
    print("\nLoading R-Scripts ...")
    with open('../r_scripts/doi_lookup.R', 'r') as f:
        string = ''.join(f.readlines())
    doi_lookuper = SignatureTranslatedAnonymousPackage(string, "doi_lookuper")

    cr_input_queue = Queue.Queue()
    cr_to_process = Queue.Queue()
    process_to_result = Queue.Queue()

    doc_count = 0
    for idx, author, title, date in zip(index, authors, titles, submitted):
        tokens = author.split("|")
        if len(tokens) >= 15:
            author = "|".join(tokens[:15])
        cr_input_queue.put((idx, author, title, date))
        doc_count += 1

    process_thread = ProcessingThread(working_folder, cr_to_process,
                                      process_to_result, doc_count)

    print("\nStarting crossref crawl process...")
    crossref_threads = []
    for i in range(num_threads):
        thread = CrossrefAPIThread(cr_input_queue, cr_to_process, doi_lookuper)
        thread.start()
        crossref_threads.append(thread)

    process_thread.start()

    for thread in crossref_threads:
        thread.event.set()

    for thread in crossref_threads:
        thread.join()

    process_thread.event.set()

    process_thread.join()

    results = []
    while not process_to_result.empty():
        results.append(process_to_result.get())

    return results
def Bing_cust(lam1, lam2, lam3):
    string_rbing1 = """
    rbingham <- function(n, A) {
       p <- ncol(A)  ## dimensionality of A
       eig <- eigen(A)
       V <- eig$vectors  ## eigenvectors
       lam <- c(%f,%f,%f)
       lam <- lam - lam[p]
       lam <- lam[-p]
       ### f.rbing part
       lam <- sort(lam, decreasing = TRUE)  ## sort the eigenvalues in desceding order
       nsamp <- 0
       X <- NULL
       lam.full <- c(lam, 0)
       qa <- length(lam.full)
       mu <- numeric(qa)
       sigacginv <- 1 + 2 * lam.full
       SigACG <- sqrt( 1 / ( 1 + 2 * lam.full ) )
       Ntry <- 0
    
       while (nsamp < n) {
         x.samp <- FALSE
         while ( !x.samp ) {
           yp <- rnorm(qa, mu, SigACG)
           y <- yp / sqrt( sum( yp^2 ) )
           lratio <-  - sum( y^2 * lam.full ) - qa/2 * log(qa) + 0.5 * (qa - 1) + qa/2 * log( sum(y^2 * sigacginv ) )
           if ( log(runif(1) ) < lratio) {
             X <- c(X, y)
             x.samp <- TRUE
             nsamp <- nsamp + 1
           }
           Ntry <- Ntry + 1
         }
       }
    
       x <- matrix(X, byrow = TRUE, ncol = qa)
       ## the avtry is the estimate of the M in rejection sampling
       ## 1/M is the probability of acceptance
       ## the x contains the simulated values
       tcrossprod(x, V) ## simulated data
     }
    """ % (lam1, lam2, lam3)  # 200,0.05

    powerpack1 = SignatureTranslatedAnonymousPackage(string_rbing1,
                                                     "powerpack")

    return powerpack1
def set_minus_cut(start_point,pav_df_file_name,result_path):
    R_code_set_minus_cut='''
    Cut=function(start_point,pav_df_file_name,result_path){
    require(readxl)
    require(WriteXLS)
    require(tidyverse)
    pav_df=read_xlsx(pav_df_file_name)
    gene_is=pav_df %>% 
        filter((!!sym(start_point))==1) %>%
        # filter(`70-15`)==1
        select("protein_id") 
    
    minus_part=pav_df %>% 
        filter((!!sym(start_point))==1) %>% 
        column_to_rownames("protein_id")
    # pav_df_colsum=colSums(minus_part_num)
    # pav_df_colsum_sort=sort(pav_df_colsum)
    add_part=pav_df %>% 
        filter((!!sym(start_point))==0) %>% 
        column_to_rownames("protein_id")
    # add_part=add_part %>% 
    #   column_to_rownames(pav_df_raw$...2)
    add_part_num=sapply(add_part[2:157], function(x) as.numeric(x))
    pav_df_colsum=colSums(add_part_num)
    pav_df_colsum_sort=sort(pav_df_colsum)
    
    write.table(attributes(pav_df_colsum_sort),paste(result_path,sprintf("set_minus_sort_protein_id_%s.txt", start_point),sep = ""),append = F,quote = F,row.names = F,col.names = F)
    write.table(pav_df_colsum_sort,paste(result_path,sprintf("set_minus_sort_protein_id_num_%s.txt", start_point),sep = ""),append = F,quote = F,row.names = T,col.names = F)
    
    
    WriteXLS::WriteXLS(
        minus_part,
        paste(result_path,sprintf("set_minus_minus_%s.xlsx", start_point),sep = ""),
        col.names = T,
        row.names = T
    )
    WriteXLS::WriteXLS(
        add_part,
        paste(result_path,sprintf("set_minus_add_%s.xlsx", start_point),sep = ""),
        col.names = T,
        row.names = T
    )
    write.table(gene_is,paste(result_path,sprintf("set_minus_gene_id_%s.txt", start_point),sep = ""),append = F,quote = F,row.names = F,col.names = F)
    }
    '''
    R_set_minus_cut = SignatureTranslatedAnonymousPackage(R_code_set_minus_cut, "R_set_minus_cut")
    R_set_minus_cut.Cut(start_point,str(pav_df_file_name),result_path)
Esempio n. 21
0
def matchit(outcome,
            treatment,
            data,
            method='nearest',
            distance='glm',
            replace=False):
    if replace:
        replace = 'TRUE'
    else:
        replace = 'FALSE'
    data.to_csv('data.csv', index=False)
    formula_cov = treatment + ' ~ '
    i = 0
    for cov in data.columns:
        if cov != outcome and cov != treatment:
            if i != 0:
                formula_cov += '+'
            formula_cov += str(cov)
            i += 1
    string = """
    library('MatchIt')
    data <- read.csv('data.csv')
    r <- matchit( %s,estimand="ATE", method = "%s", data = data, replace = %s)
    matrix <- r$match.matrix[,]
    names <- as.numeric(names(r$match.matrix[,]))
    mtch <- data[as.numeric(names(r$match.matrix[,])),]
    hh <- data[as.numeric(names(r$match.matrix[,])),'%s']- data[as.numeric(r$match.matrix[,]),'%s']
    
    data2 <- data
    data2$%s <- 1 - data2$%s
    r2 <- matchit( %s, estimand="ATE", method = "%s", data = data2, replace = %s)
    matrix2 <- r2$match.matrix[,]
    names2 <- as.numeric(names(r2$match.matrix[,]))
    mtch2 <- data2[as.numeric(names(r2$match.matrix[,])),]
    hh2 <- data2[as.numeric(r2$match.matrix[,]),'%s'] - data2[as.numeric(names(r2$match.matrix[,])),'%s']
    """ % (formula_cov, method, replace, outcome, outcome, treatment,
           treatment, formula_cov, method, replace, outcome, outcome)

    psnn = SignatureTranslatedAnonymousPackage(string, "powerpack")
    match = psnn.mtch
    match2 = psnn.mtch2
    t_hat = pd.DataFrame(np.hstack((np.array(psnn.hh), np.array(psnn.hh2))),
                         index=list(psnn.names.astype(int)) +
                         list(psnn.names2.astype(int)),
                         columns=['CATE'])
    ate = np.mean(t_hat['CATE'])
    return ate
Esempio n. 22
0
def run_mccoil(barcode_file_lines, maf_file_lines=None, verbose=False):
    ## init barcode set
    barcode_set = Barcode.SetOfBarcodes()
    barcode_set.readBarcodeFileLines(barcode_file_lines)
    # validate
    is_valid_barcode_set, err_msg = barcode_set.validate()
    if not is_valid_barcode_set:
        print err_msg
        return ([])
    ## init mafs
    if not maf_file_lines:
        mafs = barcode_set.computeMAFFromBarcodes(1)
    else:
        mafs = MAF.MAF()
        mafs.readMAFFileLines(maf_file_lines)
    mafs_R_vector = robjects.Vector(mafs.minor_allele_freqs())
    # validate
    is_valid_mafs, err_msg = mafs.validate()
    if not is_valid_mafs:
        print err_msg
        return ({})
    ## compute zygosity matrix, then convert to R DataFrame
    zygosity_matrix = barcode_set.to_zygosity_matrix(mafs,
                                                     header=True,
                                                     index=True)
    if verbose: print(zygosity_matrix)
    data = to_R_zygosity_df(zygosity_matrix)
    if verbose: print(data)
    ## import MCCOIL
    mccoil_R_code = open(mccoil_R, 'r').read()
    mccoil = SignatureTranslatedAnonymousPackage(mccoil_R_code, 'mccoil')
    ## compute result
    result = mccoil.McCOIL_categorical(data, P=mafs_R_vector)
    #print result
    ## get sites/samples
    sites, samples = list(result[-2]), list(result[-1])
    ## get maf/coi predictions, which map 1-to-1 sites/samples
    # (i.e. maf_prediction[i] is prediction for site[i])
    maf_predictions, coi_predictions = list(result[6]), list(result[5])
    return ({
        'mafs': zip(sites, maf_predictions),
        'cois': zip(samples, coi_predictions)
    })
Esempio n. 23
0
 def get_arima_rsi(prices):
     df = pd.DataFrame(prices)
     pandas2ri.activate()
     calculate_models = """ calculate <- function(x, size=100){
                             x <- na.omit(x)
                             library(TTR)
                             library(stats)
                             x <- ts(x)
                             f <- function(m) class(try(solve(m),silent=T))=="matrix"
                             if(f(x)){
                             x[50] = x[50] + 2
                             }
                             arima <- arima(x, c(0,0,0))
                             rsi <- RSI(x, size-1)[size]
                             list <-c(arima$coef, rsi)
                             return(as.array(list))
                             }"""
     calculate = SignatureTranslatedAnonymousPackage(calculate_models, "calculate")
     stats = calculate.calculate(df, len(df))
     return stats
Esempio n. 24
0
def xml2df(url):
     # make some terrible R code
     from rpy2.robjects.packages import SignatureTranslatedAnonymousPackage
     from rpy2.robjects import pandas2ri

     string = """
     require(XML)
     require(plyr)

     getXML <- function(x) {
          xmlfile <- xmlTreeParse(x)
          temp = xmlToList(xmlfile, addAttributes = F)
          df <- ldply(temp, .fun=function(x) {data.frame(t(unlist(x)))})
          return(df)
     }
     """
     test = SignatureTranslatedAnonymousPackage(string, "test")

     # make a pandas DF out of the stupid R df
     pydf = pandas2ri.ri2py_dataframe(test.getXML(url))
     return pydf
Esempio n. 25
0
def main():
    mydata = []
    # Open and Read the data file from a csv to convert it to a list
    with open('breakout_detection_wraper/fuel_data.csv', 'r') as csvfile:
        dat = csv.reader(csvfile)
        for line in dat:
            mydata.append(float(line[0]))

    # Define the parameters to configure the break out detection algoritm
    minsize = 30
    method = 'multi'
    degree = 1
    # Open the R file to run it on the wrapper
    with open('breakout_detection_wraper/breakout_function.R') as code:
        rcode = os.linesep.join(code.readlines())
        # Create the wrapper as an anonymous package signature
        wrapper = SignatureTranslatedAnonymousPackage(rcode,
                                                      "breakout_function")
    # Execute the method from the wrapper
    result = wrapper.Detect(FloatVector(mydata), minsize, method, degree)
    # Print the result returned from the R function
    print(result)
Esempio n. 26
0
 def __init__(self):
     self.pd_active()
     self._dotty = self.func('py_dotted_data', self.DOTTED_DATA)
     self.pdata = self.func('py_packdata', self.PACKDATA)
     self.available = self.func('py_avail',self.AVAIL)()
     self.installed = self.calc('installed.packages')
     self._histlines = self.func('py_draw_histlines', self.HISTLINES)
     self._lines = self.func('py_draw_lines', self.LINES)
     self._hist  = self.func('py_draw_hist', self.HIST)
     self._pareto = self.func('py_draw_pareto', self.PARETO)
     self._sleaf = self.func('py_draw_sleaf', self.SLEAF)
     self._csv   = self.func('py_r_csvread', self.CSV)
     self._tapply = self.func('py_r_csvread', self.TAPPLY)
     self._closest = self.func('py_r_csvread', self.CLOSEST)
     self._jupyter_opt = self.func('py_r_jupyter', self.JUPYTER_OPT)
     self._readtab = self.func('py_r_readtab', self.READTAB)
     self._mul   = self.func('py_r_mul', self.MUL)
     self._exp   = self.func('py_r_exp', self.EXP)
     self._div   = self.func('py_r_div', self.DIV)
     self._add   = self.func('py_r_add', self.ADD)
     self._sub   = self.func('py_r_sub', self.SUB)
     self._str   = self.func('py_r_str', self.STR)
     self._cond   = self.func('py_r_cond', self.COND)
     self._lmplot   = self.func('py_r_lmplot', self.LMPLOT)
     self._plot   = self.func('py_r_plot', self.PLOT)
     self._splot   = self.func('py_r_splot', self.SPLOT)
     self._samps = self.func('py_sample_size', self.SAMP_SIZE)
     self._sample = self.func('py_sample', self.SAMPLE)
     self._randomsample = self.func('py_randomsample', self.RANDOMSAMPLE)
     self._column_ext = self.func('py_column_extract', self.COLUMN_EXTRACT)
     self._serror = self.func('py_serror_samp', self.SERROR_SAMP)
     self._pretty = self.func('py_pretty_frame', self.PRETTY_FRAME )
     self._show_row = self.func('py_show_row', self.SHOW_ROW)
     self._some_nums = self.func('py_show_row', self.SOME_NUMBERS)
     self._packs = self.func('py_packs', self.PACKS)
     self._help = self.func('py_help', self.HELP)
     self._contents = self.func('py_contents', self.PACK_CONTENTS)
     self.anon  = SignatureTranslatedAnonymousPackage(self.ANON_PACK, "anon_pack")
def MGG_unpresent_Augustus_locate_in_pav_orthofinder(
        MGG_unpresent_Augustus_list_file_name, pav_orthofinder_file_name,
        gene_protein_mapping_table_file_name,
        pav_MGG_unpresent_Augustus_file_name,
        orthofinder_unassianed_tsv_file_name,
        MGG_unpresent_Augustus_unassianed_list_file_name):
    R_code = '''
    MGG_unpresent_Augustus_locate_in_pav_orthofinder=function(MGG_unpresent_Augustus_list_file_name,pav_orthofinder_file_name,gene_protein_mapping_table_file_name,pav_MGG_unpresent_Augustus_file_name,orthofinder_unassianed_tsv_file_name,MGG_unpresent_Augustus_unassianed_list_file_name){
        require(readxl)
        require(WriteXLS)
        require(dplyr)
        MGG_unpresent_Augustus_list=read.table(MGG_unpresent_Augustus_list_file_name,stringsAsFactors = F)
        pav_orthofinde=read_xlsx(pav_orthofinder_file_name)
        pav_orthofinde=pav_orthofinde[,-1]

        gene_protein_mapping_table=read.table(gene_protein_mapping_table_file_name)

        pav_orthofinde_protein=merge(MGG_unpresent_Augustus_list,gene_protein_mapping_table,by.x = 1,by.y = 1)
        pav_MGG_unpresent_Augustus =pav_orthofinde %>% 
        filter(protein_id %in% pav_orthofinde_protein$V2)
        pav_MGG_unpresent_Augustus=pav_MGG_unpresent_Augustus[,c(158,1:157)]
        WriteXLS::WriteXLS(pav_MGG_unpresent_Augustus,pav_MGG_unpresent_Augustus_file_name)

        orthofinder_unassianed=read.table(orthofinder_unassianed_tsv_file_name,sep = "\t",header = T,check.names = F)
        MGG_unpresent_Augustus_unassianed_list=intersect(pav_MGG_unpresent_Augustus$protein_id,orthofinder_unassianed$`70-15_protein`)
        write.table(MGG_unpresent_Augustus_unassianed_list,MGG_unpresent_Augustus_unassianed_list_file_name,quote = F,row.names = F,col.names = F)
}
    '''
    R_MGG_unpresent_Augustus_locate_in_pav_orthofinder = SignatureTranslatedAnonymousPackage(
        R_code, "R_MGG_unpresent_Augustus_locate_in_pav_orthofinder")
    R_MGG_unpresent_Augustus_locate_in_pav_orthofinder.MGG_unpresent_Augustus_locate_in_pav_orthofinder(
        MGG_unpresent_Augustus_list_file_name, pav_orthofinder_file_name,
        gene_protein_mapping_table_file_name,
        pav_MGG_unpresent_Augustus_file_name,
        orthofinder_unassianed_tsv_file_name,
        MGG_unpresent_Augustus_unassianed_list_file_name)
Esempio n. 28
0
def filter_pan_id(pan_id_file_name, length_table_file,
                  filtered_pan_id_file_name):
    '''
    input 1: pan_id_file_name
    input 2: length_table_file
    output 1: filtered_pan_id_file_name
    '''
    R_code = '''
    filter_pan_id=function(pan_id_file_name,length_table_file,filtered_pan_id_file_name){
    require(dplyr)
    pan_id=read.table(pan_id_file_name)
    length_table=read.table(length_table_file)
    length_table_filter=length_table %>% 
        filter(V2>20)
    pan_id_filter=merge(pan_id,length_table_filter,by.x = 2,by.y = 1,all.y = T)
    pan_id_filter=pan_id_filter[,-3]
    pan_id_filter=pan_id_filter[,c(2,1)]
    write.table(pan_id_filter,filtered_pan_id_file_name,sep = "\t",quote = F,row.names = F,col.names = F)
    }
    '''
    R_filter_pan_id = SignatureTranslatedAnonymousPackage(
        R_code, "R_filter_pan_id")
    R_filter_pan_id.filter_pan_id(pan_id_file_name, length_table_file,
                                  filtered_pan_id_file_name)
  }
  else{
    multi_blast_vlaue_df=R_blast_vlaue_df %>% 
      filter(gene_name %in% multi_copy_gene_Vector) %>% 
      mutate(ok = blast_value >= lower & blast_value <= upper)
    if(!(all(multi_blast_vlaue_df$ok))){
      multi_blast_vlaue_df=multi_blast_vlaue_df[multi_blast_vlaue_df$ok,]
    }
    multi_blast_vlaue_df=mutate(multi_blast_vlaue_df,strain_id=strsplit(gene_name,"_")) 
    dsaf=aggregate(blast_value~strain_id,data = multi_blast_vlaue_df,max)
    best_value_df=filter(multi_blast_vlaue_df,blast_vlaue %in% dsaf)
  }
  return(c(single_blast_vlaue_df$gene_name,best_value_df$gene_name))
}
'''
R_parse_blast_result = SignatureTranslatedAnonymousPackage(
    R_code_parse_blast_result, "R_parse_blast_result")
R_filter_strain_num = SignatureTranslatedAnonymousPackage(
    R_code_filter_strain_num, "R_filter_strain_num")


def extract_gene_gff(orthogroup_file, strain_db_dir_path, contig_path,
                     all_row_gene_fasta_dir):
    global_names = globals()
    for db_file in strain_db_dir_path.iterdir():
        global_names[db_file.stem.strip() +
                     "_db"] = gffutils.FeatureDB(db_file)
    global_names["MGG_db"] = SeqIO.index(
        "../../70-15_refference_genome/magnaporthe_oryzae_70-15_8_genes.fasta",
        "fasta")
    gene_file_path = all_row_gene_fasta_dir / (orthogroup_file.stem + ".fasta")
    if gene_file_path.exists() is True: return
Esempio n. 30
0
import warnings
from rpy2.rinterface import RRuntimeWarning
warnings.filterwarnings("ignore", category=RRuntimeWarning)

#from numba.typing.typeof import typeof
#import datasets
#from joblib import Memory
#import networkx as nx


path = os.path.dirname(__file__)
#memory = Memory(cachedir=path+"/cache", verbose=0)
    
with open (path+"/ptestglm.R", "r") as pdnfile:
    code = ''.join(pdnfile.readlines())
    ptestpdn = SignatureTranslatedAnonymousPackage(code, "ptestpdnglm")
 
 

# def getPtestpdnAdjacencyMatrix(data):
#     numpy2ri.activate()
#     try:
#         df = robjects.r["as.data.frame"](data)
#         out = ptestpdn.ptestglmblock(df)
#         #print(out)
#     except Exception as e:
#         #numpy.savetxt("/Users/alejomc/Dropbox/pspn/spyn/bin/data/graphlets/errordata.txt", data)
#         print(e)
#         print(data)
#         raise e 
#