Example #1
0
def R_inegration():
    r.setwd("~/tuning/")
    parameters = irace.readParameters("parameters-acotsp.txt")
    scenario = irace.readScenario(filename="scenario.txt",
                                  scenario=irace.defaultScenario())

    ans = irace(scenario=scenario, parameters=parameters)
Example #2
0
def ComBat(X, batch, covariate=None, parametric=False, empirical_bayes=True, save_dir=None):
    # Check X
    if not isinstance(X, (pd.DataFrame, pd.Series)):
        if isinstance(X, (list, tuple, np.ndarray, Mapping)):
            df = pd.DataFrame(X)
        else:
            raise TypeError('X must be an array-like object, dictionary or pandas Dataframe/Series')
    else:
        df = X
    row_names = df.index
    r_df = pandas2ri.py2ri(df)
    # Check covariate
    if covariate is None:
        covariate = np.ones((len(batch), 1))
    else:
        if not isinstance(covariate, (list, tuple, np.ndarray)):
            if isinstance(covariate, pd.DataFrame) or isinstance(covariate, pd.Series):
                covariate = covariate.to_numpy()
            else:
                raise TypeError('covariate array must be an array like or pandas Dataframe/Series')
        else:
            covariate = np.array(covariate)
    if len(covariate.shape) == 1:
        covariate = covariate.reshape(-1, 1)
    elif len(covariate.shape) > 2:
        raise ValueError('covariate array must be 1D or 2D')
    nr, nc = covariate.shape
    r_covariate = r.matrix(covariate, nrow=nr, ncol=nc)
    # Check batch
    if not isinstance(batch, (list, tuple, np.ndarray)):
        if isinstance(batch, pd.DataFrame) or isinstance(batch, pd.Series):
            batch = batch.to_numpy()
        else:
            raise TypeError('batch array must be an array like or pandas Dataframe/Series')
    else:
        batch = np.array(batch)
    if len(batch.shape) != 1:
        if len(batch.shape) == 2 and batch.shape[1] == 1:
            batch.reshape(-1)
        else:
            raise ValueError('batch array must be 1D or 2D with second dimension equal to 1')
    if len(np.unique(batch)) <= 1:
        raise ValueError('batch array must have at least 2 classes')
    r_batch = Vector(batch)
    # cwd = os.path.dirname(sys.argv[0])
    cwd = os.path.dirname(os.path.abspath(__file__))
    r.setwd(cwd)
    # r.source('./Statistical_analysis/R_scripts/ComBat.R')
    r.source('./R_scripts/ComBat.R')
    r_dr_results = r.ComBat_harmonization(r_df, r_covariate, r_batch, parametric, empirical_bayes)
    R_object_dict = {}
    keys = r_dr_results.names
    for i in range(len(keys)):
        R_object_dict[keys[i]] = np.array(r_dr_results[i])
    results = pd.DataFrame(R_object_dict)
    results.index = row_names
    if save_dir is not None:
        results.to_excel(os.path.join(save_dir, 'Features_ComBat.xlsx'))
    return results
def report(configDict):
    r('.libPaths("/home/pilat/R/i686-pc-linux-gnu-library/2.15/")')
    r.setwd('/home/pilat/workspace/web_fEPSPA/media/')
    knitr=importr("knitr")
    md=importr("markdown")
    for i in configDict.keys():
        tmpString='%s<-"%s"' % (i,configDict[i])
        r(tmpString)
    o = knitr.spin("/home/pilat/workspace/PostProcessing_v.2/control.R", knit = r('FALSE'))
    out = knitr.knit(o, output="/home/pilat/workspace/web_fEPSPA/Rinterface/control.md")
    md.markdownToHTML("/home/pilat/workspace/web_fEPSPA/Rinterface/control.md","/home/pilat/workspace/web_fEPSPA/media/Routput.html")
Example #4
0
 def hierarchical_clust_parmar(self, X, y=None):
     """
     Consensus Clustering with hierarchical clustering as described in :
         Radiomic feature clusters and Prognostic Signatures specific for Lung and Head & Neck cancer.
         Parmar et al., Scientific Reports, 2015
     """
     df = pd.DataFrame(X)
     r_df = pandas2ri.py2ri(df)
     cwd = os.path.dirname(sys.argv[0])
     r.setwd(cwd)
     r.source(
         './Statistical_analysis/R_scripts/hierarchical_clustering_Parmar.R'
     )
     if self.cluster_reduction in self.cluster_reduction_methods:
         r_dr_results = r.hierarchical_clustering_parmar(
             r_df,
             max_k=20,
             threshold=1 - self.threshold,
             corr_metric=self.corr_metric,
             cluster_reduction=self.cluster_reduction)
     else:
         raise ValueError(
             'cluster_reduction must be one of : %s. '
             '%s was passed' %
             (self.cluster_reduction_methods, self.cluster_reduction))
     R_object_dict = {}
     keys = r_dr_results.names
     for i in range(len(keys)):
         R_object_dict[keys[i]] = np.array(r_dr_results[i])
     dr_results = pd.DataFrame(R_object_dict).to_numpy()
     self.cluster_labels = dr_results[:, 0]
     nb_cluster = np.amax(dr_results[:, 0]).astype(int)
     coefficient_matrix = np.zeros(
         (dr_results.shape[0],
          nb_cluster))  # Shape of (n_features, nb cluster)
     for i in range(nb_cluster):
         coefficient_matrix[:, i] = np.where(dr_results[:, 0] == i + 1,
                                             dr_results[:, 1], 0)
     coefficient_matrix = coefficient_matrix.T
     return coefficient_matrix
Example #5
0
def compute_prob(size_sample,
                 prior_pg,
                 r_read_file_name="20k_test_elbos.csv",
                 prob_result_file="20k_test_prob_result.csv",
                 working_folder="./"):
    # save args in a txt file for R script yo read
    # todo don't run compute_prob in parallel... filename.txt will be changed
    # names_file = prob_result_file[:-4] + '.txt'
    # save_path = 'filenames_for_r/'
    # if not os.path.exists(save_path):
    #     os.makedirs(save_path)
    text_file = open('filename.txt', "w")
    text_file.write(r_read_file_name)
    text_file.write('\n')
    text_file.write(prob_result_file)
    text_file.write('\n')
    text_file.write(working_folder)
    text_file.write('\n')
    text_file.write(str(size_sample))
    text_file.write('\n')
    text_file.write(str(prior_pg))
    text_file.write('\n')
    text_file.close()
    # kernel and local ELBOs
    # training_results = "20k_test_elbos.pkl",
    # with open(training_results, "rb") as fin:
    #     valid_ker = pickle.load(fin)
    #     elbos = pickle.load(fin)

    # save as csv file
    # "kernels" "L_i"
    # data = {'kernels': valid_ker,
    #         'L_i': np.array(elbos).reshape(-1)}
    # print(data)
    # df = pd.DataFrame(data)
    # print(df)
    # df.to_csv(r_read_file_name, index=None)
    # compute probability
    r.setwd('~/BKS/src/R_bks')
    r.source('bks_run_global_python.R')
Example #6
0
def univariate_analysis(X, y, adjusted_method='BH', save_dir=None):
    if not isinstance(X, (pd.DataFrame, pd.Series)):
        if isinstance(X, (list, tuple, np.ndarray, Mapping)):
            if len(np.array(X).shape) != 2:
                raise ValueError('X array must 2D')
            X = pd.DataFrame(X)
        else:
            raise TypeError('X must be an array-like object, dictionary or pandas Dataframe/Series')
    if not isinstance(y, (list, tuple, np.ndarray)):
        if isinstance(y, pd.DataFrame) or isinstance(y, pd.Series):
            y = y.to_numpy()
        else:
            raise TypeError('y array must be an array like or pandas Dataframe/Series')
    else:
        y = np.array(y)
    if len(y.shape) != 1:
        if len(y.shape) == 2 and y.shape[1] == 1:
            y.reshape(-1)
        else:
            raise ValueError('y array must be 1D or 2D with second dimension equal to 1')
    if len(np.unique(y)) <= 1:
        raise ValueError('y array must have at least 2 classes')
    r_X = pandas2ri.py2ri(X)
    r_y = Vector(y)
    cwd = os.path.dirname(sys.argv[0])
    r.setwd(cwd)
    r.source('./Statistical_analysis/R_scripts/univariate_analysis.R')
    r_dr_results = r.univariate_analysis(r_X, r_y, adjusted_method=adjusted_method)
    R_object_dict = {}
    keys = r_dr_results.names
    for i in range(len(keys)):
        R_object_dict[keys[i]] = np.array(r_dr_results[i])
    results = pd.DataFrame(R_object_dict)
    if save_dir is not None:
        results.to_excel(os.path.join(save_dir, 'univariate_stats_analysis.xlsx'))
    return results
valores_python = list(valores)

he = IntVector([10, 2, 23, 11, 14, 35, 46, 32, 13, 51, 27, 49])
ha = he
print r.var(he)[0]
print r.cov(ha, he)[0]
print r.cor(ha, he)[0]

#  funções

sqr = robjects.r('function(x) x^2')
print(sqr)
print(sqr(2))
print(sqr(IntVector([4])))
print(sqr(IntVector([4,4])))

eleva3 = robjects.r('function(a){ return(a*a*a); }')
print(eleva3)
print(eleva3(2))
print(eleva3(IntVector([4])))
print(eleva3(IntVector([4,4])))


# utilitários

r.getwd()
r.setwd("c:/docs/mydir") # lançam exceções de python
r.dir() # Lista arquivos do cwd.

import ipdb; ipdb.set_trace()
Example #8
0
valores_python = list(valores)

he = IntVector([10, 2, 23, 11, 14, 35, 46, 32, 13, 51, 27, 49])
ha = he
print r.var(he)[0]
print r.cov(ha, he)[0]
print r.cor(ha, he)[0]

#  funções

sqr = robjects.r('function(x) x^2')
print(sqr)
print(sqr(2))
print(sqr(IntVector([4])))
print(sqr(IntVector([4, 4])))

eleva3 = robjects.r('function(a){ return(a*a*a); }')
print(eleva3)
print(eleva3(2))
print(eleva3(IntVector([4])))
print(eleva3(IntVector([4, 4])))

# utilitários

r.getwd()
r.setwd("c:/docs/mydir")  # lançam exceções de python
r.dir()  # Lista arquivos do cwd.

import ipdb
ipdb.set_trace()
Example #9
0
        robjects.globalenv["surv_data"] = pandas2ri.py2rpy(surv_data)
    robjects.globalenv["surv_diff"] = r(
        f"survdiff(Surv({OS}, {Censored})~group,surv_data,rho = 0)")
    Pvalue = r("1 - pchisq(surv_diff$chisq, length(surv_diff$n) -1)")[0]
    if ggsave and Pvalue < pvalue:
        r.ggsave(r(
            f"autoplot(survfit(Surv({OS}, {Censored})~group,surv_data), xlab = 'Time', ylab = 'Survival')+ggtitle('Pvalue = {Pvalue}')"
        ),
                 file=f"{path}/{gene}.pdf")
    return Pvalue


if __name__ == "__main__":
    # 设置数据路径
    path = "/install/git/Bioinformatics_paper/胶质母细胞瘤微环境预后相关基因的TCGA数据库挖掘/"
    r.setwd(path)
    # 读取处理好的数据
    sample = pd.read_csv(f"{path}sample.txt", sep="\t", index_col=0)
    sample_Group = sample["Stromal_Group"]
    # 读取处理好的基因表达数据
    HT_HG_U133A_sample = pd.read_csv(f"{path}HT_HG_U133A_sample.txt",
                                     sep="\t").dropna()

    ################# 方差分析(ANOVA) GeneExp_Subtype #################
    # https://www.bioinfo-scrounger.com/archives/588/
    with localconverter(ro.default_converter + pandas2ri.converter):
        ANOVA_data_R = ro.conversion.py2rpy(
            sample[["Stromal_score", "GeneExp_Subtype"]])
        print(
            r.summary(
                r.aov(r("Stromal_score~GeneExp_Subtype"), data=ANOVA_data_R)))