def R_inegration(): r.setwd("~/tuning/") parameters = irace.readParameters("parameters-acotsp.txt") scenario = irace.readScenario(filename="scenario.txt", scenario=irace.defaultScenario()) ans = irace(scenario=scenario, parameters=parameters)
def ComBat(X, batch, covariate=None, parametric=False, empirical_bayes=True, save_dir=None): # Check X if not isinstance(X, (pd.DataFrame, pd.Series)): if isinstance(X, (list, tuple, np.ndarray, Mapping)): df = pd.DataFrame(X) else: raise TypeError('X must be an array-like object, dictionary or pandas Dataframe/Series') else: df = X row_names = df.index r_df = pandas2ri.py2ri(df) # Check covariate if covariate is None: covariate = np.ones((len(batch), 1)) else: if not isinstance(covariate, (list, tuple, np.ndarray)): if isinstance(covariate, pd.DataFrame) or isinstance(covariate, pd.Series): covariate = covariate.to_numpy() else: raise TypeError('covariate array must be an array like or pandas Dataframe/Series') else: covariate = np.array(covariate) if len(covariate.shape) == 1: covariate = covariate.reshape(-1, 1) elif len(covariate.shape) > 2: raise ValueError('covariate array must be 1D or 2D') nr, nc = covariate.shape r_covariate = r.matrix(covariate, nrow=nr, ncol=nc) # Check batch if not isinstance(batch, (list, tuple, np.ndarray)): if isinstance(batch, pd.DataFrame) or isinstance(batch, pd.Series): batch = batch.to_numpy() else: raise TypeError('batch array must be an array like or pandas Dataframe/Series') else: batch = np.array(batch) if len(batch.shape) != 1: if len(batch.shape) == 2 and batch.shape[1] == 1: batch.reshape(-1) else: raise ValueError('batch array must be 1D or 2D with second dimension equal to 1') if len(np.unique(batch)) <= 1: raise ValueError('batch array must have at least 2 classes') r_batch = Vector(batch) # cwd = os.path.dirname(sys.argv[0]) cwd = os.path.dirname(os.path.abspath(__file__)) r.setwd(cwd) # r.source('./Statistical_analysis/R_scripts/ComBat.R') r.source('./R_scripts/ComBat.R') r_dr_results = r.ComBat_harmonization(r_df, r_covariate, r_batch, parametric, empirical_bayes) R_object_dict = {} keys = r_dr_results.names for i in range(len(keys)): R_object_dict[keys[i]] = np.array(r_dr_results[i]) results = pd.DataFrame(R_object_dict) results.index = row_names if save_dir is not None: results.to_excel(os.path.join(save_dir, 'Features_ComBat.xlsx')) return results
def report(configDict): r('.libPaths("/home/pilat/R/i686-pc-linux-gnu-library/2.15/")') r.setwd('/home/pilat/workspace/web_fEPSPA/media/') knitr=importr("knitr") md=importr("markdown") for i in configDict.keys(): tmpString='%s<-"%s"' % (i,configDict[i]) r(tmpString) o = knitr.spin("/home/pilat/workspace/PostProcessing_v.2/control.R", knit = r('FALSE')) out = knitr.knit(o, output="/home/pilat/workspace/web_fEPSPA/Rinterface/control.md") md.markdownToHTML("/home/pilat/workspace/web_fEPSPA/Rinterface/control.md","/home/pilat/workspace/web_fEPSPA/media/Routput.html")
def hierarchical_clust_parmar(self, X, y=None): """ Consensus Clustering with hierarchical clustering as described in : Radiomic feature clusters and Prognostic Signatures specific for Lung and Head & Neck cancer. Parmar et al., Scientific Reports, 2015 """ df = pd.DataFrame(X) r_df = pandas2ri.py2ri(df) cwd = os.path.dirname(sys.argv[0]) r.setwd(cwd) r.source( './Statistical_analysis/R_scripts/hierarchical_clustering_Parmar.R' ) if self.cluster_reduction in self.cluster_reduction_methods: r_dr_results = r.hierarchical_clustering_parmar( r_df, max_k=20, threshold=1 - self.threshold, corr_metric=self.corr_metric, cluster_reduction=self.cluster_reduction) else: raise ValueError( 'cluster_reduction must be one of : %s. ' '%s was passed' % (self.cluster_reduction_methods, self.cluster_reduction)) R_object_dict = {} keys = r_dr_results.names for i in range(len(keys)): R_object_dict[keys[i]] = np.array(r_dr_results[i]) dr_results = pd.DataFrame(R_object_dict).to_numpy() self.cluster_labels = dr_results[:, 0] nb_cluster = np.amax(dr_results[:, 0]).astype(int) coefficient_matrix = np.zeros( (dr_results.shape[0], nb_cluster)) # Shape of (n_features, nb cluster) for i in range(nb_cluster): coefficient_matrix[:, i] = np.where(dr_results[:, 0] == i + 1, dr_results[:, 1], 0) coefficient_matrix = coefficient_matrix.T return coefficient_matrix
def compute_prob(size_sample, prior_pg, r_read_file_name="20k_test_elbos.csv", prob_result_file="20k_test_prob_result.csv", working_folder="./"): # save args in a txt file for R script yo read # todo don't run compute_prob in parallel... filename.txt will be changed # names_file = prob_result_file[:-4] + '.txt' # save_path = 'filenames_for_r/' # if not os.path.exists(save_path): # os.makedirs(save_path) text_file = open('filename.txt', "w") text_file.write(r_read_file_name) text_file.write('\n') text_file.write(prob_result_file) text_file.write('\n') text_file.write(working_folder) text_file.write('\n') text_file.write(str(size_sample)) text_file.write('\n') text_file.write(str(prior_pg)) text_file.write('\n') text_file.close() # kernel and local ELBOs # training_results = "20k_test_elbos.pkl", # with open(training_results, "rb") as fin: # valid_ker = pickle.load(fin) # elbos = pickle.load(fin) # save as csv file # "kernels" "L_i" # data = {'kernels': valid_ker, # 'L_i': np.array(elbos).reshape(-1)} # print(data) # df = pd.DataFrame(data) # print(df) # df.to_csv(r_read_file_name, index=None) # compute probability r.setwd('~/BKS/src/R_bks') r.source('bks_run_global_python.R')
def univariate_analysis(X, y, adjusted_method='BH', save_dir=None): if not isinstance(X, (pd.DataFrame, pd.Series)): if isinstance(X, (list, tuple, np.ndarray, Mapping)): if len(np.array(X).shape) != 2: raise ValueError('X array must 2D') X = pd.DataFrame(X) else: raise TypeError('X must be an array-like object, dictionary or pandas Dataframe/Series') if not isinstance(y, (list, tuple, np.ndarray)): if isinstance(y, pd.DataFrame) or isinstance(y, pd.Series): y = y.to_numpy() else: raise TypeError('y array must be an array like or pandas Dataframe/Series') else: y = np.array(y) if len(y.shape) != 1: if len(y.shape) == 2 and y.shape[1] == 1: y.reshape(-1) else: raise ValueError('y array must be 1D or 2D with second dimension equal to 1') if len(np.unique(y)) <= 1: raise ValueError('y array must have at least 2 classes') r_X = pandas2ri.py2ri(X) r_y = Vector(y) cwd = os.path.dirname(sys.argv[0]) r.setwd(cwd) r.source('./Statistical_analysis/R_scripts/univariate_analysis.R') r_dr_results = r.univariate_analysis(r_X, r_y, adjusted_method=adjusted_method) R_object_dict = {} keys = r_dr_results.names for i in range(len(keys)): R_object_dict[keys[i]] = np.array(r_dr_results[i]) results = pd.DataFrame(R_object_dict) if save_dir is not None: results.to_excel(os.path.join(save_dir, 'univariate_stats_analysis.xlsx')) return results
valores_python = list(valores) he = IntVector([10, 2, 23, 11, 14, 35, 46, 32, 13, 51, 27, 49]) ha = he print r.var(he)[0] print r.cov(ha, he)[0] print r.cor(ha, he)[0] # funções sqr = robjects.r('function(x) x^2') print(sqr) print(sqr(2)) print(sqr(IntVector([4]))) print(sqr(IntVector([4,4]))) eleva3 = robjects.r('function(a){ return(a*a*a); }') print(eleva3) print(eleva3(2)) print(eleva3(IntVector([4]))) print(eleva3(IntVector([4,4]))) # utilitários r.getwd() r.setwd("c:/docs/mydir") # lançam exceções de python r.dir() # Lista arquivos do cwd. import ipdb; ipdb.set_trace()
valores_python = list(valores) he = IntVector([10, 2, 23, 11, 14, 35, 46, 32, 13, 51, 27, 49]) ha = he print r.var(he)[0] print r.cov(ha, he)[0] print r.cor(ha, he)[0] # funções sqr = robjects.r('function(x) x^2') print(sqr) print(sqr(2)) print(sqr(IntVector([4]))) print(sqr(IntVector([4, 4]))) eleva3 = robjects.r('function(a){ return(a*a*a); }') print(eleva3) print(eleva3(2)) print(eleva3(IntVector([4]))) print(eleva3(IntVector([4, 4]))) # utilitários r.getwd() r.setwd("c:/docs/mydir") # lançam exceções de python r.dir() # Lista arquivos do cwd. import ipdb ipdb.set_trace()
robjects.globalenv["surv_data"] = pandas2ri.py2rpy(surv_data) robjects.globalenv["surv_diff"] = r( f"survdiff(Surv({OS}, {Censored})~group,surv_data,rho = 0)") Pvalue = r("1 - pchisq(surv_diff$chisq, length(surv_diff$n) -1)")[0] if ggsave and Pvalue < pvalue: r.ggsave(r( f"autoplot(survfit(Surv({OS}, {Censored})~group,surv_data), xlab = 'Time', ylab = 'Survival')+ggtitle('Pvalue = {Pvalue}')" ), file=f"{path}/{gene}.pdf") return Pvalue if __name__ == "__main__": # 设置数据路径 path = "/install/git/Bioinformatics_paper/胶质母细胞瘤微环境预后相关基因的TCGA数据库挖掘/" r.setwd(path) # 读取处理好的数据 sample = pd.read_csv(f"{path}sample.txt", sep="\t", index_col=0) sample_Group = sample["Stromal_Group"] # 读取处理好的基因表达数据 HT_HG_U133A_sample = pd.read_csv(f"{path}HT_HG_U133A_sample.txt", sep="\t").dropna() ################# 方差分析(ANOVA) GeneExp_Subtype ################# # https://www.bioinfo-scrounger.com/archives/588/ with localconverter(ro.default_converter + pandas2ri.converter): ANOVA_data_R = ro.conversion.py2rpy( sample[["Stromal_score", "GeneExp_Subtype"]]) print( r.summary( r.aov(r("Stromal_score~GeneExp_Subtype"), data=ANOVA_data_R)))