Beispiel #1
0
def _load_folds_from_rdata(data_file, fold_id):
    """
    (internal) returns folds from RData file in the pipeline
    :param data_file:
    :param fold_id:
    :param inner_fold_id:
    :return:
    """

    fold_id = validate_fold_id(fold_id)
    r_variables = "data_file='%s'; fold_id='%s'" % (data_file, fold_id)

    import rpy2.robjects as rn
    from .rpy2_helper import r_clear

    if is_inner_fold_id(fold_id):
        r_cmd = """raw_data = new.env()
        load(data_file, envir=raw_data)
        folds = raw_data$cvindices[[fold_id]][,1]
        """
    else:
        r_cmd = """raw_data = new.env()
        load(data_file, envir=raw_data)
        folds = raw_data$cvindices[[substring(fold_id, 1, 3)]][, as.double(substr(fold_id, 5, 6))]
        """

    rn.reval(r_variables)
    rn.reval(r_cmd)
    folds = np.array(rn.r['folds'])
    folds = validate_folds(folds, fold_id)
    r_clear()
    return folds
    def create_excel_file(self, filename: str) -> None:
        """
        Create the Excel result file based on the `reactome_obj` in the R session
        :param filename: Path to the Excel file that will be created
        """

        # inject the result path
        ri.globalenv["excel_result_file"] = ri.StrSexpVector([filename])

        ro.reval("""
            # get the pathways table
            pathway_result <- pathways(reactome_obj)

            # create the Excel file
            library(xlsx)

            # add the combined pathway data
            write.xlsx2(pathway_result, file = excel_result_file, sheetName = "Pathways",
                        col.names = T, row.names = T, append = F)

            # add the expression values for every result
            if ("fold_changes" %in% result_types(reactome_obj)) {
                for (dataset_name in names(reactome_obj)) {
                    fold_changes <- get_result(reactome_obj, type = "fold_changes", name = dataset_name)

                    # add the fold-changes to the Excel file
                    write.xlsx2(fold_changes, file = excel_result_file, 
                                sheetName = paste0(dataset_name, " - fold changes"),
                                col.names = T, row.names = F, append = T)
                }
            }
        """)
Beispiel #3
0
    def test_df_to_str(self):
        # create the test data.frame
        ro.reval("""
          test_frame = data.frame(
              name = c("John", "Doe"),
              age = c(1, 2),
              row.names = c("Id1", "Id2")
          )

          test_frame_2 = data.frame(
              name = c("John", "Doe"),
              age = c(1.12345, 2.12345),
              row.names = c("Id1", "Id2")
          )
      """)

        r_data_frame = ri.globalenv["test_frame"]

        string_df = ReactomeRAnalyser.data_frame_to_string(r_data_frame)

        self.assertIsNotNone(string_df)
        self.assertEqual("\\tname\\tage\\nId1\\tJohn\\t1.0\\nId2\\tDoe\\t2.0",
                         string_df)

        # check the precision
        r_data_frame2 = ri.globalenv["test_frame_2"]

        string_df2 = ReactomeRAnalyser.data_frame_to_string(r_data_frame2)

        self.assertIsNotNone(string_df2)
        self.assertEqual(
            "\\tname\\tage\\nId1\\tJohn\\t1.12345\\nId2\\tDoe\\t2.12345",
            string_df2)
    def _mark_timestamp(self, blSegsL):
        """
        mark segs in final sample
        """
        # 此处应用R来进行求解

        # 首先,求解每相邻数据的基线之差的集合
        #
        # 或直接列出所有基线

        # 然后,根据相邻数据的基线之差,映射到数据的非基线之上,确定归宿于哪一个
        # 基线之差
        #
        # 或找出落入基线之中的最大索引

        # 最后,所有的数据点中最先落入基线之差的为目标时间戳
        #
        # 根据该索引作为时间戳

        from rpy2.robjects.packages import importr
        from rpy2.robjects import IntVector, StrVector, globalenv
        import rpy2.robjects as robjects

        GR = importr('GenomicRanges')
        IR = importr('IRanges')

        GRL = GR.GRangesList()
        globalenv["GRL"] = GRL
        for blSegs, idx in zip(blSegsL, range(len(blSegsL))):
            chromNames = StrVector([seg.chromName for seg in blSegs])
            starts = IntVector([seg.start for seg in blSegs])
            ends = IntVector([seg.end for seg in blSegs])
            tempGR = GR.GRanges(seqnames = chromNames, ranges=IR.IRanges(starts, ends))
            globalenv["tempGR"] = tempGR
            robjects.r("GRL[[{0}]]=tempGR".format(str(idx+1)))
            GRL = robjects.r["GRL"]

        # 此处由于list中保存的是指向目标Seg的指针,所以更新nonBLSegs即可
        nonBlSegs = list(set(self._segPoolL[-1].segments) - set(blSegsL[-1]))
        chromNames = StrVector([seg.chromName for seg in nonBlSegs])
        starts = IntVector([seg.start for seg in nonBlSegs])
        ends = IntVector([seg.end for seg in nonBlSegs])
        nonBlGR = GR.GRanges(seqnames = chromNames, ranges=IR.IRanges(starts, ends))

        # fo = IR.findOverlaps(nonBlGR, GRL)
        # For large SCNA
        fo = IR.findOverlaps(nonBlGR, GRL, minoverlap=5000)
        globalenv["fo"] = fo
        robjects.reval("fom <- as.matrix(fo)")
        overlapIdx = np.array(list(robjects.r.fom)).reshape(tuple(reversed(robjects.r.fom.dim))) - 1
        # [[2, 2, 3, 3],
        # [1, 2, 1, 2]]
        #
        print overlapIdx

        for index in set(overlapIdx[0,]):
            yIdxes = np.where(overlapIdx[0,]==index)[0]
            ts = np.max(overlapIdx[1,yIdxes]+1)
            nonBlSegs[index].tag = str(ts)
Beispiel #5
0
def _load_processed_data_rdata(file_name):

    import rpy2.robjects as rn

    rn.reval("data = new.env(); load('%s', data)" % file_name)
    r_data = rn.r.data
    data_fields = list(rn.r.data.keys())

    loaded_data = dict()

    for xf, yf, sw in [('X', 'Y', 'sample_weights'),
                       ('X_test', 'Y_test', 'sample_weights_test'),
                       ('X_validation', 'Y_validation',
                        'sample_weights_validation')]:

        if xf in data_fields and yf in data_fields and len(np.array(
                r_data[yf])) > 0:

            loaded_data[yf] = np.array(r_data[yf]).flatten()
            loaded_data[yf][loaded_data[yf] == 0] = -1
            loaded_data[xf] = np.array(r_data[xf])

            if loaded_data[xf].shape[1] == len(loaded_data[yf]):
                loaded_data[xf] = np.transpose(loaded_data[xf])

            if sw in data_fields:
                loaded_data[sw] = np.array(r_data[sw]).flatten()

    if 'variable_names' in data_fields:
        loaded_data['variable_names'] = np.array(
            rn.r.data['variable_names']).tolist()
    elif 'X_headers' in data_fields:
        loaded_data['variable_names'] = np.array(
            rn.r.data['X_headers']).tolist()
    elif 'X_header' in data_fields:
        loaded_data['variable_names'] = np.array(
            rn.r.data['X_header']).tolist()

    if 'outcome_name' in data_fields:
        loaded_data['outcome_name'] = np.array(r_data['outcome_name'])[0]
    elif 'Y_headers' in data_fields:
        loaded_data['outcome_name'] = np.array(r_data['Y_headers'])[0]
    elif 'Y_header' in data_fields:
        loaded_data['outcome_name'] = np.array(r_data['Y_header'])[0]

    if 'format' in data_fields:
        loaded_data['format'] = np.array(r_data['format'])[0]

    if 'partitions' in data_fields:
        loaded_data['partitions'] = np.array(rn.r.data['partitions']).tolist()

    cvindices = _load_cvindices_from_rdata(file_name)
    data = set_defaults_for_data(loaded_data)
    return data, cvindices
Beispiel #6
0
def r_save_to_disk(file_name, variables_to_save=None):

    if variables_to_save is None:
        save_command = "save(file='%s')" % file_name
    else:
        save_string = ", ".join(variables_to_save)
        save_command = "save(%s, file='%s')" % (save_string, file_name)

    rn.reval(save_command)
    assert (os.path.isfile(file_name))
    return True
Beispiel #7
0
def _save_data_as_rdata(file_name, data, cvindices):

    import rpy2.robjects as rn
    from .rpy2_helper import r_assign, r_save_to_disk
    from rpy2.robjects import pandas2ri
    data = set_defaults_for_data(data)
    assert check_data(data)

    fields_to_save = [
        "format", "Y", "sample_weights", "outcome_name", "variable_names"
    ]

    try:

        for k in fields_to_save:
            r_assign(data[k], k)

    except:

        from eqm.debug import ipsh
        ipsh()

    r_assign(cvindices, "cvindices")

    pandas2ri.activate()

    X_df = pd.DataFrame(data=data['X'])
    X_df.columns = data['variable_names']
    rn.r.assign('X', X_df)

    # test set
    has_test_set = ('X_test' in data) and ('Y_test'
                                           in data) and ('sample_weights_test'
                                                         in data)
    if has_test_set:
        X_test_df = pd.DataFrame(data=data['X_test'])
        X_test_df.columns = data['variable_names']
        rn.r.assign('X_test', pandas2ri.py2ri(X_test_df))
        r_assign(data['Y_test'], 'Y_test')
        r_assign(data['sample_weights_test'], 'sample_weights_test')
    else:
        rn.reval("""
                X_test = matrix(data=NA, nrow = 0, ncol = ncol(X));
                Y_test = matrix(data=NA, nrow = 0, ncol = 1);
                sample_weights_test = matrix(data=1.0, nrow = 0, ncol = 1);
                """)

    pandas2ri.deactivate()
    variables_to_save = fields_to_save + [
        "cvindices", "X", "X_test", "Y_test", "sample_weights_test"
    ]
    r_save_to_disk(file_name, variables_to_save)
    return True
    def run(self) -> None:
        try:
            # inject the analysis_result into the R session
            ri.globalenv["analysis_result_json"] = ri.StrSexpVector(
                [self.analysis_result.decode()])

            # inject the metadata
            ri.globalenv["include_interactors"] = ri.BoolSexpVector(
                [self.report_request.include_interactors])
            ri.globalenv["include_disease"] = ri.BoolSexpVector(
                [self.report_request.include_disease])

            # create the analysis result object
            LOGGER.debug("Creating result R object...")
            ro.reval("""
                library(ReactomeGSA)

                # convert the reactome object
                result_obj <- jsonlite::fromJSON(analysis_result_json)
                reactome_obj <- ReactomeGSA:::convert_reactome_result(result_obj)
            """)

            # create the Excel file
            LOGGER.debug("Creating Excel file ...")

            excel_filename = "/tmp/result_" + self.report_request.analysis_id + ".xlsx"
            self.create_excel_file(excel_filename)

            self.result_queue.put(excel_filename)

            # create the PDF report
            LOGGER.debug("Creating PDF report...")

            pdf_filename = "/tmp/result_" + self.report_request.analysis_id + ".pdf"
            self.create_pdf_report(pdf_filename)

            self.result_queue.put(pdf_filename)

            # create the R script
            LOGGER.debug("Creating R script...")

            r_filename = "/tmp/result_" + self.report_request.analysis_id + ".r"
            self.create_r_script(r_filename)

            self.result_queue.put(r_filename)
        except Exception as e:
            # put the error message in the queue
            LOGGER.error("Error during report generation: " + str(e))
            self.result_queue.put(e)

        finally:
            LOGGER.debug("Setting on_complete")
            self.on_complete.set()
Beispiel #9
0
def install(download_dir):
    local_filename = "%s/MEGSA_beta.zip" % download_dir
    if not os.path.exists(local_filename):
        filename, response = urllib.urlretrieve(MEGSA_URL, local_filename)
    else:
        filename = local_filename
    nbsupport.util.check_digest(filename, "655e1ec48a67530672303d8ac7fbc925")

    with zipfile.ZipFile(local_filename) as archive:
        with io.TextIOWrapper(archive.open("version beta/MEGSA.R")) as stream:
            robjects.reval(stream.read())

    global megsa

    def megsa(events):
        s = robjects.r.funEstimate(events.T).rx2("S")[0]
        return 0.5 * scipy.stats.chisqprob(s, 1) + 0.5 * int(s == 0)
def scran_normalize(adata):
    import numpy as np
    import rpy2.robjects as ro
    from rpy2.robjects import numpy2ri
    from rpy2.robjects.packages import importr
    importr('scran')
    numpy2ri.activate()
    ro.r.assign('mat', adata.X.T)
    qclust_params = 'mat'
    # qclust_params = f'mat, min.size={min_size}, max.size={max_size}'
    ro.reval(f'cl <- quickCluster({qclust_params})')
    csf_params = f'mat, clusters=cl'
    # csf_params = f'mat, clusters=cl, min.mean={min_mean}'
    sf = np.asarray(ro.reval(f'computeSumFactors({csf_params})'))
    adata.obs['sf'] = sf
    adata.layers['counts'] = adata.X.copy()
    adata.X /= adata.obs['sf'].values[:, None]
    numpy2ri.deactivate()
    return adata
    def create_pdf_report(self, pdf_filename) -> None:
        """"
        Create the PDF report based on the `reactome_obj` in the
        current R session.
        :param pdf_filename: The target filename of the PDF report. Will be overwritten if it exists.
        """
        # inject the path used to store the xlsx file
        ri.globalenv["pdf_result_file"] = ri.StrSexpVector([pdf_filename])

        try:
            ro.reval("""
                library(ReactomeGSA.report)
                
                options(tinytex.verbose = TRUE)
    
                # create the report
                create_pdf_report(reactome_obj, pdf_result_file, include_disease = include_disease, include_interactors = include_interactors)
            """)
        except Exception as e:
            LOGGER.error("RRuntimeError: " + str(e))
Beispiel #12
0
def getProbFactors(fitted, events, evidence):
    env = Environment()
    env['fitted'] = fitted

    str_cpquery = "cpquery(fitted,({}),({}))"
    suppVals = [
        reval(str_cpquery.format(event, evidence), envir=env)[0]
        for event in events
    ]

    return suppVals
Beispiel #13
0
def hugeLearnGraph(X, nonPara = True, method = "mb", nTunings = 20, modelSelectCrit = "ric", ebicTuning = 0.5, lambdaRatio = None, verbose = False):
    
    n,d = X.shape
       
    if nonPara:
        #X = hugeR.huge_npn(X, verbose = verbose) # transform the data
        X = transform(X, returnNumpyArray= False, verbose = verbose)
  
    asR = robjects.r['as']
    
    if method == "mb" and modelSelectCrit == "mbDefault":
        
        # single value for the regularization parameter
        alpha = 0.05
        lambbda = 2/np.sqrt(n)*norm.ppf(1 - alpha/(2*d**2))
         
        # cannot pass "lambda = x" argument to hugeR.huge function directly because "lambda" is illegal variable name..
        lambda_r = robjects.FloatVector([lambbda])
        robjects.rinterface.globalenv['lambda_r'] = lambda_r # define variable in R environment
        lambdaArg = robjects.reval('lambda = lambda_r') # this can be passed 

        est= hugeR.huge(X, lambdaArg, method = method, verbose = verbose, sym = "and")
        
        #print(robjects.r['get']('method',est))
        #print(robjects.r['get']('sym',est))
        path = robjects.r['get']('path',est)[0]
        estG = np.array(asR(path,"matrix"),dtype = np.int)
             
    else:
        if lambdaRatio is not None:            
            est= hugeR.huge(X,lambda_min_ratio = lambdaRatio, method = method, nlambda = nTunings, verbose = verbose)
        else:
        # estimate graphs for a range of hyperparameters
            est= hugeR.huge(X, method = method, nlambda = nTunings, verbose = verbose)
    
        # set seed to obtain reproducable results (depends on the numpy global seed) this affects only if criterion is "ric" or "stars"
        seed = np.random.randint(1,1e9)
        robjects.r['set.seed'](seed)
        
        #ebic_r = robjects.FloatVector([ebicTuning])
        #robjects.rinterface.globalenv['ebicGamma'] = ebic_r # define variable in R environment
        #ebicArg = robjects.reval('ebic.gamma = ebicGamma') # this can be passed 
    
        # do model selection
        methodRes = hugeR.huge_select(est,ebic_gamma = ebicTuning, criterion = modelSelectCrit, verbose = verbose)
        
        G_R = robjects.r['get']('refit',methodRes)
        asR = robjects.r['as']
        estG = np.array(asR(G_R,"matrix"),dtype = np.int)
    
    # sparsity/ies for the estimated graph(s)
    sparsity = np.array(robjects.r['get']('sparsity',est))
    
    return(estG,sparsity)
def slingshot(adata, start, n_pcs=5, cl=None):
    import numpy as np
    import pandas as pd
    import rpy2.robjects as ro
    from rpy2.robjects import numpy2ri, pandas2ri
    from rpy2.robjects.packages import importr
    importr('slingshot')
    numpy2ri.activate()
    pandas2ri.activate()
    ro.r.assign('pca', adata.obsm['X_pca'][:, :n_pcs])
    ro.r.assign('cl', adata.obs[cl])
    ro.reval('sds <- newSlingshotDataSet(pca, cl)')
    ro.reval(f'sce <- slingshot(sds, cl, start.clus="{start}")')
    pt = pd.DataFrame(np.asarray(ro.reval('slingPseudotime(sce)')),
                      index=adata.obs_names)
    pt.columns = [f'{cl}_lineage_{c}' for c in pt.columns]
    try:
        adata.obs = adata.obs.drop(pt.columns, axis=1)
    except KeyError:
        print('PT keys not dropped in obs dataframe: Not found.')
    adata.obs = pd.concat([adata.obs, pt], axis=1)
    adata.uns['slingshot'] = {}
    adata.uns['slingshot']['lineages'] = {}
    lineages = np.asarray(np.asarray(ro.reval('sce@lineages')))
    for i, l in enumerate(lineages):
        adata.uns['slingshot']['lineages'][i] = list(np.asarray(l))
    numpy2ri.deactivate()
    pandas2ri.deactivate()
    return adata
Beispiel #15
0
    def __init__(self, mtype, seed, nmaxcomp):
        if mtype == 1:
            ro.reval("""
            set.seed({})
            y <- rnorm(50, sample(c(-2, 0, 1, 2), 50,
                       TRUE, c(.3, .2, .2, .3)))
            """.format(seed + 1))
            self.y = np.array(ro.r['y'], dtype=np.float64)
            self.transformation = "logit"

            def truedensity(y_):
                #y_unc = scipy.special.logit(y_)
                dnorm = stats.norm.pdf
                results = .3 * dnorm(y_, -2) + .2 * dnorm(y_, 0) +\
                          .2 * dnorm(y_, 1) + .3 * dnorm(y_, 2)
                return results
                #abs jacobian transform
                #return results / abs(y_ - y_**2)

            self.truedensity = truedensity
        elif mtype == 2:
            ro.reval("""
            set.seed({})
            Ns <- as.vector(rmultinom(1, 50, c(.2, .25, .35, .2)))
            y <- c(rbeta(Ns[1], 1.3, 1.3),
                   rbeta(Ns[2], 1.1, 3),
                   rbeta(Ns[3], 5, 1),
                   rbeta(Ns[4], 1.5, 4))
            """.format(seed + 1))
            self.y = np.array(ro.r['y'], dtype=np.float64)
            self.transformation = None

            def truedensity(y_):
                dbeta = stats.beta.pdf
                return (.20 * dbeta(y_, 1.3, 1.3) + .25 * dbeta(y_, 1.1, 3) +
                        .35 * dbeta(y_, 5, 1) + .20 * dbeta(y_, 1.5, 4))

            self.truedensity = truedensity
        elif mtype == 3:
            ro.reval("""
            set.seed({})
            y <- rbeta(50, 2, 5)
            """.format(seed + 1))
            self.y = np.array(ro.r['y'], dtype=np.float64)
            self.transformation = None

            def truedensity(y_):
                return stats.beta.pdf(y_, 2, 5)

            self.truedensity = truedensity
        elif mtype == "real":
            tab = pd.read_csv("data.csv")
            tab.dropna(inplace=True)
            self.y = tab.loc[tab.iloc[:, 0] == seed].iloc[:, 1]
            self.y = np.array(self.y, dtype=np.float64)
            self.n = self.y.size
            self.transformation = {"transf": "fixed", "vmin": 50, "vmax": 107}

        self.phi = npc.fourierseries(self.y, nmaxcomp)
Beispiel #16
0
def _load_folds_from_rdata(data_file, fold_id):
    """
    (internal) returns folds from RData file in the pipeline
    :param data_file:
    :param fold_id:
    :param inner_fold_id:
    :return:
    """

    if os.path.isfile(data_file):
        file_extension = data_file.rsplit('.')[-1]
        assert file_extension.lower(
        ) == 'rdata', 'unsupported file extension: %r' % file_extension
    else:
        raise IOError('could not find data_file: %s' % data_file)

    fold_id = validate_fold_id(fold_id)
    r_variables = "data_file='%s'; fold_id='%s'" % (data_file, fold_id)

    import rpy2.robjects as rn
    from .rpy2_helper import r_clear

    if is_inner_fold_id(fold_id):
        r_cmd = """raw_data = new.env()
        load(data_file, envir=raw_data)
        folds = raw_data$cvindices[[fold_id]][,1]
        """
    else:
        r_cmd = """raw_data = new.env()
        load(data_file, envir=raw_data)
        folds = raw_data$cvindices[[substring(fold_id, 1, 3)]][, as.double(substr(fold_id, 5, 6))]
        """

    rn.reval(r_variables)
    rn.reval(r_cmd)
    folds = np.array(rn.r['folds'])
    folds = validate_folds(folds, fold_id)
    r_clear()
    return folds
Beispiel #17
0
def r_assign(pval, rname, print_flag=False):

    assert len(rname) > 0
    assert type(rname) is str

    if type(pval) is dict:
        return_flag = r_assign_list(pval, rname)
    else:
        return_flag = r_assign_value(pval, rname)

    if print_flag:
        print(rn.reval("print(%s)" % rname)[0])

    return return_flag
Beispiel #18
0
def r_assign_list(pdict, rname):

    rn.reval("%s = list();" % rname)

    for key in pdict:
        r_assign_str(key, "field_name")
        r_assign_value(pdict[key], "field_value")
        rn.reval("%s[[field_name]] = field_value" % rname)

    rn.reval("rm(field_name); rm(field_value);")

    return True
Beispiel #19
0
def knitr(absfile):
    i = absfile.rindex('/')
    j = absfile.find('.', i)
    s = absfile[0:j]
    figpath = s + '_img/'
    reval = """
        knitr::opts_chunk$set(echo=FALSE, fig.path='{figpath}')
        library('knitr')
        knit('{absfile}')
        """.format(figpath=figpath, absfile=absfile)
    print('--------------------------------------')
    print(reval)
    print(type(reval))
    r2 = robjects.reval(reval)
    fname = r2[0]
    f = open(fname, 'r')
    c = f.read()
    f.close()
    return c
Beispiel #20
0
def r_clear():
    rn.reval('rm(list=ls());')
Beispiel #21
0
    def analyse_request(self, request, gene_set_mappings, identifier_mappings,
                        gene_set):
        # clean the environment
        ro.reval("rm(list=ls())")

        # get the pathway id to name mapping
        pathway_names = self.dict_of_list_to_r(gene_set.gene_set_names)

        # process every dataset separately
        analysis_results = list()
        previous_progress = 0.3

        for dataset in request.datasets:
            # make sure the dataset has a design
            if dataset.design is None:
                raise AnalysisException(
                    "Dataset '" + dataset.name +
                    "' does not contain an experimental design.")

            LOGGER.debug("Analysing dataset " + dataset.name)

            # get the gene index
            gene_index = self.dict_of_list_to_r(
                gene_set_mappings[dataset.name].gene_set_indices,
                value_type=int)

            # prepare the dataset for the analysis - including pre-processing
            (expression_data, sample_data, design) = \
                self._prepare_dataset(dataset=dataset)

            self._update_status("Analysing dataset '{}' using {}".format(
                dataset.name, request.method_name),
                                complete=previous_progress +
                                (0.3 / len(request.datasets)))

            LOGGER.debug("Starting GSA...")

            result = self._perform_gsa(
                method=request.method_name,
                parameters=getattr(dataset, "parameter_dict", dict()),
                expression_data=expression_data,
                sample_data=sample_data,
                design=design,
                gene_index=gene_index,
                data_type=dataset.type,
                pathway_names=pathway_names,
                comparison_group_1=dataset.design.comparison.group1,
                comparison_group_2=dataset.design.comparison.group2)

            self._update_status("Analysing dataset '{}' using {}".format(
                dataset.name, request.method_name),
                                complete=previous_progress +
                                (0.5 / len(request.datasets)))

            LOGGER.debug("Estimating fold changes...")

            fold_changes = self._estimate_gene_fc(
                method=request.method_name,
                parameters=getattr(dataset, "parameter_dict", dict()),
                expression_data=expression_data,
                sample_data=sample_data,
                design=design,
                data_type=dataset.type,
                comparison_group_1=dataset.design.comparison.group1,
                comparison_group_2=dataset.design.comparison.group2)

            self._update_status("Analysing dataset '{}' using {}".format(
                dataset.name, request.method_name),
                                complete=previous_progress +
                                (0.7 / len(request.datasets)))

            LOGGER.debug("Adding pathway fold changes...")

            # add average fold-changes to the analysis result
            # pylint: disable=no-member
            result = ReactomeRAnalyser.preprocess.add_pathway_foldchanges(
                result, fold_changes, gene_index, expression_data)

            LOGGER.debug("Creating the analysis result...")

            analysis_result = AnalysisResultResults(
                name=dataset.name,
                pathways=ReactomeRAnalyser.data_frame_to_string(result),
                fold_changes=ReactomeRAnalyser.data_frame_to_string(
                    fold_changes))

            analysis_results.append(analysis_result)

            previous_progress += 0.7 / len(request.datasets)

            LOGGER.debug("Dataset analysis complete")

        return analysis_results
"""
import pandas as pd

from rpy2.robjects import pandas2ri
pandas2ri.activate()

from rpy2.robjects import r
r.data('prediction')
df_iris = pandas2ri.ri2py(r[prediction])



from rpy2 import robjects

Rdir  = "I:/DOCUMENTS/WEGC/02_PhD_research/03_Data/ZAMG/SPARTACUS/TMAX/rda/Tx20130227.rda"
f = r'/Tx20130227.rda'

obj = Rdir + f

m=robjects.r('matrix(1:6, nrow=2, ncol=3)')

m = robjects.reval(obj)

rdf = 'I:/DOCUMENTS/WEGC/02_PhD_research/03_Data/ZAMG/SPARTACUS/TMAX/rda/Tx20130227.rda'

pandas2ri.ri2py(rdf)



test = pd.read_csv(r'C:\Users\Kaddabadda\Documents\test.csv', 
                   index_col = [0])
Beispiel #23
0
def _save_data_as_rdata(file_name, data, cvindices):

    import rpy2.robjects as rn
    from .rpy2_helper import r_assign, r_save_to_disk
    from rpy2.robjects import pandas2ri
    data = set_defaults_for_data(data)
    assert check_data(data)

    fields_to_save = [
        "format", "Y", "sample_weights", "outcome_name", "variable_names",
        "variable_types", "variable_orderings"
    ]

    if data['format'] == FORMAT_NAME_RULES:
        fields_to_save += [
            "feature_groups", "feature_names", "feature_types",
            "feature_orderings", "feature_group_limits"
        ]
    elif data['format'] == FORMAT_NAME_DCP:
        fields_to_save += ['partitions']

    try:

        for k in fields_to_save:
            r_assign(data[k], k)

    except:

        from dcptree.debug import ipsh
        ipsh()

    r_assign(cvindices, "cvindices")

    # feature matrix
    var_type_to_col_type = {
        'boolean': 'bool',
        'categorical': 'str',
        'numeric': 'float',
        'ordinal': 'str',
    }
    col_types = {
        n: var_type_to_col_type[data['variable_types'][n]]
        for n in data['variable_names']
    }

    pandas2ri.activate()

    X_df = pd.DataFrame(data=data['X'])
    X_df.columns = data['variable_names']
    X_df = X_df.astype(col_types)
    rn.r.assign('X', X_df)

    # test set
    has_test_set = ('X_test' in data) and ('Y_test'
                                           in data) and ('sample_weights_test'
                                                         in data)
    if has_test_set:
        X_test_df = pd.DataFrame(data=data['X_test'])
        X_test_df.columns = data['variable_names']
        X_test_df = X_test_df.astype(col_types)
        rn.r.assign('X_test', pandas2ri.py2ri(X_test_df))
        r_assign(data['Y_test'], 'Y_test')
        r_assign(data['sample_weights_test'], 'sample_weights_test')
    else:
        rn.reval("""
                X_test = matrix(data=NA, nrow = 0, ncol = ncol(X));
                Y_test = matrix(data=NA, nrow = 0, ncol = 1);
                sample_weights_test = matrix(data=1.0, nrow = 0, ncol = 1);
                """)

    pandas2ri.deactivate()

    variables_to_save = fields_to_save + [
        "cvindices", "X", "X_test", "Y_test", "sample_weights_test"
    ]
    r_save_to_disk(file_name, variables_to_save)
    return True
Beispiel #24
0
def anova2(x, print_summary=False):
    """
    Uses {car} Anova in R, via rpy2, to compute two-way repeeated measures anova.
    ``x`` should be a pandas dataframe of the form::
        
        
        factor_a    a0            a1       
        factor_b    b0     b1     b0     b1
        0          0.3   0.35   0.44   0.49
        1          0.5   0.47   0.92   1.20
        2         43.7  42.60  18.10  17.40
        3          3.8   4.50   9.20  10.40
        4         18.2  17.60  21.30  21.90
        5         22.4  23.10  19.30  19.80
        ...
        
    In the above, there are two "factors", which we have called "factor_a" and
    "factor_b". Here, each of the two factors has two "levels": ["a0", "a1"]
    and ["b0", "b1"]. Note that it is the fact that we have two *factors* that
    makes this a two-way anova, you *can* have more than two levels in each
    factor.
    
    This module includes ``_dummy_data_anova2``, which you can use here.
    
    Note on sphericity:
        The sphericity requirement is, roughly speaking, that all *pairs* of
        levels within a given factor must have roughly the same covariance.
        i.e. the "information" about a repeated measure is distributed evenly 
        across all the levels rather than some of the levels being more correlated
        to each other than others.  Note that if there are only two levels then
        there is only one covariance, so sphericity must be valid 
        (see http://stats.stackexchange.com/a/59206).
        When sphericity is violated, the way to compensate is to reduce the 
        number of degrees of freedom.  There are three similar ways of doing this.
        The most convervative is the "lower bound", then "Greenhouse-G", then
        "Huynh-F". You are recommended to just look at the Greenhouse-G values.
        (see https://youtu.be/wkMwW_2_TzY?t=40m34s).
        
    Returns a namedtuple with three pandas dataframes: 
        univariate_tests, sphericity_tests, and pval_adjustments
    There is also an attribute "full_dict", the values of which are rpy2 objects
    and provide the full output of the anova.
    
    You need to install R, rpy2, and the car package in R.
    Good luck.
    
    DM, Jun 2015.
    """
    from rpy2.robjects import pandas2ri
    from rpy2.robjects.packages import importr
    import rpy2.robjects as R
    pandas2ri.activate()
    car = importr("car")

    level_values = x.columns.to_series().reset_index().drop(0, axis=1)
    level_names = x.columns.names
    x = x.copy()
    x.columns = [xx[0] + xx[1] for xx in x.columns]
    R.globalenv["data_x"] = R.r["as.matrix"](pandas2ri.py2ri(x))
    anova_r = car.Anova(R.r.lm("data_x ~ 1"),
                        idata=pandas2ri.py2ri(level_values),
                        idesign=R.reval("~" + "*".join(level_names)))
    R.r.rm("data_x")
    ret = R.r.summary(anova_r)
    if print_summary:
        print ret

    full_dict = {k.replace(".", "_"): v for k, v in ret.items()}

    def to_df(v):
        try:
            return pd.DataFrame(pandas2ri.ri2py(v),
                                columns=v.colnames,
                                index=v.rownames)
        except TypeError:
            return None

    return ANOVA2(univariate_tests=to_df(full_dict["univariate_tests"]),
                  sphericity_tests=to_df(full_dict["sphericity_tests"]),
                  pval_adjustments=to_df(full_dict["pval_adjustments"]),
                  full_dict=full_dict)
Beispiel #25
0
    def analyse_request(self, request: AnalysisInput, gene_set_mappings, identifier_mappings, gene_set):
        # clean the environment
        ro.reval("rm(list=ls())")

        # get the pathway id to name mapping
        pathway_names = self.dict_of_list_to_r(gene_set.gene_set_names)

        # load the analyser R code
        LOGGER.debug("Processing data using R analysis code for {}".format(request.method_name.lower()))
        analysis_package = self.methods[request.method_name.lower()]

        LOGGER.debug("Retrieved analysis package")

        # load the libraries
        try:
            analysis_package.load_libraries()
        except Exception as e:
            LOGGER.critical("Failed to load required package: " + str(e))
            raise AnalysisException("Failed to load required R package")

        LOGGER.debug("R libraries loaded")

        # get the analysis-level parameters
        analysis_parameters = getattr(request, "parameter_dict", dict())

        # indicates whether the visualization should be disabled
        disable_visualization = False

        # if pathways are filtered using a list of pathways disable visualization
        if len(analysis_parameters.get("pathways", "")) > 0:
            disable_visualization = True

        # process every dataset separately
        analysis_results = list()
        previous_progress = 0.3

        for dataset in request.datasets:
            # create save sample names
            org_names = dataset.design.samples if dataset.design else dataset.df.dtype.names[1:]
            sample_names = self._create_save_names(org_names)

            # if there are more then MAX_SAMPLES, disable the visualization
            if len(sample_names) > ReactomeGSVARAnalyser.MAX_SAMPLES:
                disable_visualization = True

            LOGGER.debug("Converting expression data")

            # convert the expression data to an R matrix
            expression_data = self._convert_dataset(dataset, sample_names)

            # convert the fold_changes to the text representation
            # pylint: disable=no-member
            expression_data_id = ReactomeRAnalyser.preprocess.change_first_column(expression_data, 
                                                                               rowname_column=ri.StrSexpVector(["Identifier"]))
            r_fold_change_text = ReactomeRAnalyser.data_frame_to_string(expression_data_id)

            LOGGER.debug("Converting gene_index")

            # get the gene index
            gene_index = self.dict_of_list_to_r(gene_set_mappings[dataset.name].gene_set_indices, value_type=int)

            self._update_status("Analysing dataset '{}' using {}".format(dataset.name, request.method_name),
                                complete=previous_progress + (0.3 / len(request.datasets)))

            # perform the analysis
            LOGGER.debug("Starting GSVA analysis for {}".format(dataset.name))

            # use float before int to support scientific notation for max_size. This happens for large
            # numbers in the R package
            max_size = int(float(analysis_parameters.get("max_size", 1_000_000)))

            result = analysis_package.process(expression_data, gene_index, ri.StrSexpVector([dataset.type]),
                                              # These parameters are currently not visible to the user as
                                              # it might cause inconsistencies in the reactome result conversion
                                              ri.IntSexpVector([int(analysis_parameters.get("min_size", 0))]),
                                              ri.IntSexpVector([max_size]),
                                              ri.StrSexpVector([analysis_parameters.get("pathways", "")]))

            # add the pathway's name
            # pylint: disable=no-member
            result = ReactomeRAnalyser.preprocess.add_pathway_names(result, pathway_names)

            LOGGER.debug("GSVA analysis completed for {}".format(dataset.name))

            self._update_status("Analysing dataset '{}' using {}".format(dataset.name, request.method_name),
                                complete=previous_progress + (0.6 / len(request.datasets)))

            # convert the data.frame to a string
            r_text_result = ReactomeRAnalyser.data_frame_to_string(result)

            # add the result
            analysis_results.append(AnalysisResultResults(name=dataset.name,
                                                          pathways=r_text_result,
                                                          fold_changes=r_fold_change_text))

            previous_progress += 0.7 / len(request.datasets)

        LOGGER.debug("Returning combined analysis result")

        # disable the visualization if set
        if disable_visualization:
            if not hasattr(request, "parameter_dict"):
                request.parameter_dict = dict()

            request.parameter_dict["create_reactome_visualization"] = "False"

        return analysis_results
import rpy2.robjects as r
import numpy as np

r.require("splines")

r.reval("x = rnorm(5)")
r.reval("x = bs( x, df = 5)")
np.array(list(r.r.x))

m = r.bs(x, df=5)

r.reval('m1 <- c(1:10)')
r.reval("m2 <- matrix(as.complex(m1), nrow=5)")
np.array(list(r.r.m2)).reshape(r.r.m2.dim)
Beispiel #27
0
def make_outfile(stan_file, data_file, author):

    print(stan_file)

    model_fname = stan_file[stan_file.rfind('/') + 1:-5]
    data_fname = stan_file[data_file.rfind('/') + 1:-5]

    f_stan = open(stan_file, 'r')
    stan_text = f_stan.readlines()

    # Parse stan file
    flag_data = False
    flag_parameters = False
    flag_model = False
    data_num = 0
    param_num = 0
    model_num = 0
    param_type = []
    data_type = []
    param_name = []
    data_name = []
    param_size = []
    data_size = []
    param_bounds = []
    data_bounds = []
    v_mu = []
    v_sigma = []
    v_bounds = []
    calc_order = []
    for text_line in stan_text:
        # highlight relevant sections
        if ('data {' in text_line):
            if ('transformed' not in text_line):
                flag_data = True
        elif ('parameters {' in text_line):
            if ('transformed' not in text_line):
                flag_parameters = True
        elif ('model {' in text_line):
            flag_model = True
        # handle each section
        elif (flag_data == True):
            if ('}' in text_line):
                flag_data = False
            else:
                [data_type, data_size, data_name, data_bounds,
                 data_num] = stan_read(text_line, data_type, data_size,
                                       data_name, data_bounds, data_num)
        elif (flag_parameters == True):
            if ('}' in text_line):
                flag_parameters = False
            else:
                [param_type, param_size, param_name, param_bounds,
                 param_num] = stan_read(text_line, param_type, param_size,
                                        param_name, param_bounds, param_num)

    f_stan.close()

    f_data = open(data_file, 'r')
    data_text = f_data.readlines()

    flag_array = False
    flag_start = False
    data_value = {}

    # Parse data file
    for text_line in data_text:
        if ('<-' in text_line):
            if (flag_array == True):
                flag_array = False
                data_value[index] = np.array(robjects.reval(array_string))
            elif (flag_start == True):
                Exception('Variable has unassigned value in data file')
            name_end = text_line.find('<')
            name = remove_whitespace(text_line[0:name_end], False)
            index = try_find(name, data_name)
            if (index != -1):
                if (data_size[index] == 1):
                    if ('c(' in text_line):
                        raise Exception(
                            'Specified data sizes do not match those in the data file'
                        )
                    else:
                        value_start = text_line.rfind('-') + 1
                        value = remove_whitespace(text_line[value_start:],
                                                  False)
                        if (is_number(value) == True):
                            if (value[-1:] == '\n'):
                                data_value[index] = value[:-1]
                            else:
                                data_value[index] = value
                        else:
                            flag_start = True
                else:
                    if ('c(' in text_line):
                        flag_array = True
                        value_start = text_line.find('-') + 1
                        if (text_line[-2:] == '\n'):
                            array_string = text_line[value_start:-3]
                        else:
                            array_string = text_line[value_start:]
                    else:
                        flag_start = True
        elif (flag_array == True):
            if (text_line[-2:] == '\n'):
                array_string += text_line[:-3]
            else:
                array_string += text_line
        elif (flag_start == True):
            if (data_size[index] == 1):
                if ('c(' in text_line):
                    raise Exception(
                        'Specified data sizes do not match those in the data file'
                    )
                else:
                    if (text_line[-1:] == '\n'):
                        value = remove_whitespace(text_line[:-1], False)
                    else:
                        value = remove_whitespace(text_line, False)
                    if (is_number(value) == True):
                        data_value[index] = value
                        flag_start = False
        else:
            if ('c(' in text_line):
                flag_array = True
                #value_start = text_line.rfind('c')
                #array_string = text_line[value_start:]
                if (text_line[-2:] == '\n'):
                    array_string = text_line[:-3]
                else:
                    array_string = text_line
                flag_start = False

    if (flag_array == True):
        data_value[index] = np.array(robjects.reval(array_string))

    f_data.close()

    #if len(data_value) != len(data_name):
    #    raise Exception('Specified data sizes do not match those in the data file')

    eval_param_size = []

    if (param_num > 0):
        for size_string in param_size:
            if (isinstance(size_string, int)):
                eval_param_size.append(size_string)
            elif (isinstance(size_string, str)):
                # not list, only one element to deal with
                eval_param_size.append(
                    eval(get_eval_string(size_string, data_name, data_value)))
            else:
                eval_param_size.append(1)
                for size_string_element in size_string:
                    # evaluate elements and multiply together to get total size
                    #print(get_eval_string(size_string_element,data_name,data_value))
                    eval_param_size[-1] *= eval(
                        get_eval_string(size_string_element, data_name,
                                        data_value))

    # get keywords from directory
    index = -1
    ind = 0
    keywords = []
    url = 'https://github.com/stan-dev/example-models'
    while ind < len(stan_file) - 5:
        if (stan_file[ind] == '/'):
            ind += 1
            index += 1
            if (ind < len(stan_file) - 5):
                keywords.append(stan_file[ind])
                url += '/' + stan_file[ind]
                ind += 1
            else:
                url += '/'
                break
        elif (ind == 0):
            index += 1
            keywords.append(stan_file[ind])
            url += '/' + stan_file[ind]
            ind += 1
        else:
            keywords[index] += stan_file[ind]
            url += stan_file[ind]
            ind += 1

    outf = open("{}.json".format(model_fname), "w")

    # write .json file
    outf.write('{\n')
    outf.write('  "name": "{}",\n'.format(model_fname))
    outf.write('  "keywords": {},\n'.format(json.dumps(keywords)))
    outf.write('  "urls": "{}",\n'.format(url))
    outf.write('  "model_name": "{}",\n'.format(model_fname))
    outf.write('  "data_name": "{}",\n'.format(data_fname))
    outf.write('  "reference_posterior_name": null,\n')
    outf.write('  "references": [],\n')
    outf.write('  "dimensions": {\n')
    if (len(param_name) > 1):
        for n in range(0, len(param_name) - 1):
            outf.write('    "{}": {},\n'.format(param_name[n],
                                                eval_param_size[n]))
        outf.write('    "{}": {}\n'.format(param_name[-1],
                                           eval_param_size[-1]))
    else:
        outf.write('    "{}": {}\n'.format(param_name[0], eval_param_size[0]))
    outf.write('  },\n')
    outf.write('  "added_date": "{}",\n'.format(date.today()))
    outf.write('  "added_by": "{}"\n'.format(author))
    outf.write('}')
def mast_de(adata, key, perc=-1, covs=''):
    import numpy as np
    import pandas as pd
    import rpy2.robjects as ro
    from rpy2.robjects import numpy2ri, pandas2ri
    from rpy2.robjects.packages import importr
    ro.reval('rm(list=ls())')
    importr('MAST')
    numpy2ri.activate()
    pandas2ri.activate()
    ro.reval('options(mc.cores=2)')
    ro.r.assign(
        'mat',
        pd.DataFrame(adata.raw.X,
                     index=adata.obs_names,
                     columns=adata.raw.var_names))
    print('Filtering genes')
    if perc > 0:
        ro.reval('mat <- mat[,colSums(mat > 0) > length(colnames(mat)) * ' +
                 str(perc) + ']; dim(mat);')
    else:
        ro.reval('mat <- mat[,colSums(mat > 0) > ' +
                 str(adata.obs[key].value_counts().min() - 1) + ']; dim(mat);')
    if adata.obs.shape[1] == 0:
        adata.obs['Barcode'] = list(adata.obs.index)
    ro.r.assign('cdat', adata.obs)
    ro.reval('fdat <- as.data.frame(colnames(mat)); \
        row.names(fdat) <- fdat[,1]')
    ro.reval('raw <- FromMatrix(t(mat), cdat, fdat)')
    print('Data loaded')
    de = None
    for group in adata.obs[key].cat.categories:
        print(f'Group {group}')
        cmd = 'group <- colData(raw)$' + key + '; \
            levels(group) <- factor(c(unique(group), "-1")); \
            group[group != "' + group + '"] = "-1"; \
            colData(raw)$group <- group; \
            zlmCond <- zlm(~group + n_genes' + covs + ', raw); \
            summaryCond <- summary(zlmCond, doLRT="group' + group + '"); \
            summaryDT <- summaryCond$datatable; \
            fcHurdle <- merge(summaryDT[contrast=="group' + group + '" & component=="H",.(primerid, `Pr(>Chisq)`)], summaryDT[contrast=="group' + group + '" & component=="logFC", .(primerid, coef, ci.hi, ci.lo)], by="primerid"); \
            fcHurdle[,fdr:=p.adjust(`Pr(>Chisq)`, "fdr")];'

        ro.reval(cmd)
        index = list(ro.reval('data.frame(fcHurdle)$primerid'))
        coef = list(ro.reval('data.frame(fcHurdle)$coef'))
        fdr = list(ro.reval('data.frame(fcHurdle)$fdr'))
        group_de = pd.DataFrame(
            {
                f'{group}_coef': coef,
                f'{group}_fdr': fdr
            },
            index=index,
        )
        de = pd.concat([de, group_de], axis=1)
    return de
Beispiel #29
0
def _load_cvindices_from_rdata(data_file):
    """
    (internal) cvindices object stored in a RData file in the pipeline
    :param data_file:
    """
    import rpy2.robjects as rn
    from .rpy2_helper import r_clear

    r_variables = "data_file = '%s'" % data_file
    r_cmd = """
    raw_data = new.env()
    load(data_file, envir=raw_data)
    all_fold_ids = names(raw_data$cvindices)
    list2env(raw_data$cvindices, globalenv())
    remove(raw_data, cvindices);
    """
    rn.reval(r_variables)
    rn.reval(r_cmd)
    all_fold_ids = np.array(rn.r['all_fold_ids'])

    cvindices = {}
    max_fold_value = 0
    for key in all_fold_ids:
        try:
            folds = np.array(rn.r[key]).flatten()
            max_fold_value = max(max_fold_value, np.max(folds))
            cvindices[key] = folds
        except Exception:
            warnings.warn('failed to load fold %s from file %s' %
                          (key, data_file))
            pass

    #cast as unsigned integers to save storage space
    if max_fold_value < 255:
        storage_type = 'uint8'
    elif max_fold_value < 65535:
        storage_type = 'uint16'
    elif max_fold_value < 4294967295:
        storage_type = 'uint32'
    else:
        storage_type = 'uint64'

    for key in cvindices.keys():
        cvindices[key] = cvindices[key].astype(storage_type)

    #break down matrices to just folds
    all_keys = list(cvindices.keys())
    for key in all_keys:
        if key[0] == 'K' and len(key) == 3:
            fold_matrix = cvindices.pop(key)
            n_repeats = fold_matrix.shape[1]
            for r in range(n_repeats):
                folds = np.array(fold_matrix[:, r])
                folds = folds.flatten()
                folds = folds.astype(storage_type)
                fold_id = '%sN%02d' % (key, r)
                cvindices[fold_id] = folds

    # cleanup in the R environment just in case
    r_clear()
    return cvindices
Beispiel #30
0
def r_assign_empty_list(rname):
    assert len(rname) > 0
    assert type(rname) is str
    rn.reval('%s = list()' % rname)
    return True
Beispiel #31
0
    def run(self) -> None:
        try:
            # inject the r_file_path into the R session
            ri.globalenv["filename"] = ri.StrSexpVector([self.r_file_path])

            # process the file using the R session
            LOGGER.debug("Extracting expression data from R file")
            ro.reval("""
                # load the file
                load(filename)

                # make sure it only contains 1 experiment
                if (length(experimentSummary) != 1) {
                    stop("Error: Unexpected number of experiments")
                }

                # get the metadata
                metadata <- NA
                data_type <- NA
                expression_values <- NA

                # test if it's RNA-seq or microarray
                if (is(experimentSummary[[1]], "RangedSummarizedExperiment")) {
                    # load the required library
                    library(SummarizedExperiment)

                    data_type <- "rnaseq_counts"
                    metadata <- colData(experimentSummary[[1]])

                    expression_values <- data.frame(assays(experimentSummary[[1]])$counts)
                } else if (is(experimentSummary[[1]], "ExpressionSet")) {
                    # load the required library
                    library(Biobase)

                    # TODO: test for microarray normalisation
                    data_type <- "microarray_norm"
                    metadata <- pData(experimentSummary[[1]])

                    expression_values <- data.frame(exprs(experimentSummary[[1]]))
                } else {
                    stop("Error: Unknown assay type encountered.")
                }

                # add a "sample.id" column for the metadata
                metadata$sample.id <- rownames(metadata)
                metadata <- metadata[, c(ncol(metadata), 1:(ncol(metadata)-1))]
                # convert to a standard data.frame
                metadata <- data.frame(metadata)

                # add a "gene" column for the expression values
                expression_values$gene <- rownames(expression_values)
                expression_values <- expression_values[, c(ncol(expression_values), 1:(ncol(expression_values)-1))]
            """)
            # always check whether to exit
            if self.exit:
                return

            # convert the R objects to python objects
            LOGGER.debug("Converting R objects to python")
            data_type = str(ri.globalenv["data_type"][0])
            self.heartbeat()

            LOGGER.debug("Converting metadata data.frame to string")
            metadata_string = ReactomeRAnalyser.data_frame_to_string(
                ri.globalenv["metadata"])
            self.heartbeat()

            if self.exit:
                return

            LOGGER.debug("Converting expression data.frame to string")
            expression_value_string = ReactomeRAnalyser.data_frame_to_string(
                ri.globalenv["expression_values"])
            self.heartbeat()

            if self.exit:
                return

            # save the result and mark the on_complete event
            LOGGER.debug("Returning results through the queue")
            self.result_queue.put({
                'data_type': data_type,
                'metadata': metadata_string,
                'expression_values': expression_value_string
            })
            self.on_complete.set()

        except Exception as e:
            # put the error message in the queue
            LOGGER.error("Error during loading of R file: " + str(e))
            self.result_queue.put(e)
        finally:
            LOGGER.debug("Setting on_complete")
            self.on_complete.set()