def SCCA_r(X,Y, n_components, pen): df_X = pd.DataFrame(X) df_Y = pd.DataFrame(Y) rmat_X = com.convert_to_r_matrix(df_X) rmat_Y = com.convert_to_r_matrix(df_Y) ri.globalenv['X'] = rmat_X ri.globalenv['Y'] = rmat_Y com.r( """ out <- CCA(x = X, z = Y, K = %i, niter = 100, standardize = FALSE, penaltyx = %f, penaltyz = %f) """ % (n_components, pen[0], pen[1])) # convert the results back to dataframes and then to numpy arrays df_u = com.convert_robj(com.r('out[1]'))['u'] df_v = com.convert_robj(com.r('out[2]'))['v'] cors = com.convert_robj(com.r('out[16]'))['cors'] x_loadings = df_u.as_matrix() y_loadings = df_v.as_matrix() cors = np.array(cors) loadings = (x_loadings, y_loadings) return loadings, cors
def test_convert_r_matrix(self): is_na = robj.baseenv.get("is.na") seriesd = tm.getSeriesData() frame = pd.DataFrame(seriesd, columns=["D", "C", "B", "A"]) # Null data frame["E"] = [np.nan for item in frame["A"]] r_dataframe = com.convert_to_r_matrix(frame) assert np.array_equal(com.convert_robj(r_dataframe.rownames), frame.index) assert np.array_equal(com.convert_robj(r_dataframe.colnames), frame.columns) assert all(is_na(item) for item in r_dataframe.rx(True, "E")) for column in frame[["A", "B", "C", "D"]]: coldata = r_dataframe.rx(True, column) original_data = frame[column] assert np.array_equal(com.convert_robj(coldata), original_data) # Pandas bug 1282 frame["F"] = ["text" if item % 2 == 0 else np.nan for item in range(30)] try: wrong_matrix = com.convert_to_r_matrix(frame) except TypeError: pass except Exception: raise
def test_convert_r_matrix(self): is_na = robj.baseenv.get("is.na") seriesd = tm.getSeriesData() frame = pd.DataFrame(seriesd, columns=['D', 'C', 'B', 'A']) # Null data frame["E"] = [np.nan for item in frame["A"]] r_dataframe = com.convert_to_r_matrix(frame) assert np.array_equal( com.convert_robj(r_dataframe.rownames), frame.index) assert np.array_equal( com.convert_robj(r_dataframe.colnames), frame.columns) assert all(is_na(item) for item in r_dataframe.rx(True, "E")) for column in frame[["A", "B", "C", "D"]]: coldata = r_dataframe.rx(True, column) original_data = frame[column] assert np.array_equal(com.convert_robj(coldata), original_data) # Pandas bug 1282 frame["F"] = ["text" if item % 2 == 0 else np.nan for item in range(30)] try: wrong_matrix = com.convert_to_r_matrix(frame) except TypeError: pass except Exception: raise
def main(args): df = pd.io.parsers.read_table(args.highlow, header=0) genes = df.columns mirs = df.columns testing = {} for mir in mirs: if not mir.startswith("hsa"): continue testing[mir] = {} # print mir for gene in genes: if gene.endswith("_notest") or gene.startswith("hsa"): continue success = 0 fail = 0 for key, row in df.iterrows(): if row[mir] == 0: # 0 success; 1 fail if row[gene] == 0: success += 1 else: fail += 1 # miR == 0 else: # 1 success; 0 fail if row[gene] == 1: success += 1 else: fail += 1 # create dataframe for testing temp_df = pd.DataFrame({'success': [success], 'fail': [fail]}) r_matrix = com.convert_to_r_matrix(temp_df) p = stats.binom_test(r_matrix)[2][0] # print "%s: %f" % (gene, p) testing[mir][gene] = p testing_df = pd.DataFrame(testing) testing_df.to_csv(sys.stdout, sep="\t")
def pca_test(exp, block, es): try: importr("miXGENE", lib_loc=R_LIB_CUSTOM_PATH) assert isinstance(es, ExpressionSet) dataset = R.r['new']('mixData') r_data = com.convert_to_r_matrix(es.get_assay_data_frame()) dataset.do_slot_assign('data', r_data) dataset_factor = R.r.new('mixPheno') pheno_df = es.get_pheno_data_frame() r_phenotype = R.r.factor(R.StrVector(pheno_df['Sample_title'].tolist())) dataset_factor.do_slot_assign("phenotype", r_phenotype) pca = R.r['mixPca']( dataset=dataset, dataset_factor=dataset_factor, ) r_points = pca.do_slot('points') np_points = rpyn.ri2numpy(r_points) df_points = pd.DataFrame(np_points) df_points.index = pheno_df.index res = PcaResult( base_dir=exp.get_data_folder(), base_filename= "%s_pca" % block.uuid ) res.store_pca(df_points) block.pca_result = res block.do_action("success", exp) except Exception, e: block.errors.append(e) block.do_action("error", exp)
def apply_ranking( exp, block, es, ranking_name, result_table, pheno_class_column=None, options=None ): if not options: options = {} if not pheno_class_column: pheno_class_column = es.pheno_metadata["user_class_title"] R.r['source'](R_LIB_CUSTOM_PATH + '/ranking.Methods.r') func = R.r[ranking_name] assay_df = es.get_assay_data_frame() x = com.convert_to_r_matrix(assay_df) y = es.get_pheno_column_as_r_obj(pheno_class_column) log.debug("Computing ranking: `%s` options: `%s`", ranking_name, options) with stopwatch(name="Computing ranking: `%s` options: `%s`" % (ranking_name, options), threshold=0.01): ranking_list = list(func(R.r['t'](x), y, **options)) ranking_fixed = map(lambda a: int(a - 1), ranking_list) df = pd.DataFrame( index=assay_df.index, data=[len(assay_df)] * len(assay_df), columns=["rank"] ) for rank, row_num in enumerate(ranking_fixed): df.ix[row_num, "rank"] = rank result_table.store_table(df) return [result_table], {}
def combat(df, annotation_col="sample_class"): import pandas.rpy.common as com names = df[["gse_name", "gpl_name"]].drop_duplicates().to_records(index=False) # drop genes with missing data df["code"] = df.gsm_name + "_" + df.gpl_name + "_" + df.gse_name df = df.set_index("code") combined_matrix = combine_matrix(names) # combined_matrix.to_csv("combined_matrix.csv") m = drop_missing_samples(combined_matrix).dropna() # drop_missing_genes = drop_missing_genes(dropMissingSamples(combined_matrix)).dropna() #UNNECESSARY samples_m = df.index.intersection(m.columns) m = m[samples_m] m.to_csv("m.csv") samples = df.ix[m.columns].reset_index() # samples.to_csv("samples.csv") edata = com.convert_to_r_matrix(m) batch = robjects.StrVector(samples.gse_name + "_" + samples.gpl_name) # pheno = robjects.FactorVector(samples.sample_class) pheno = robjects.FactorVector(samples[annotation_col]) r.library("sva") fmla = robjects.Formula("~pheno") fmla.environment["pheno"] = pheno # fmla = robjects.Formula('~1') # fmla.environment['pheno'] = r['as.factor'](pheno) mod = r["model.matrix"](fmla) r_combat_edata = r.ComBat(dat=edata, batch=batch, mod=mod) combat_matrix = pd.DataFrame(np.asmatrix(r_combat_edata)) combat_matrix.index = m.index combat_matrix.columns = m.columns return combat_matrix, samples
def apply_ranking(exp, block, es, ranking_name, result_table, pheno_class_column=None, options=None): if not options: options = {} if not pheno_class_column: pheno_class_column = es.pheno_metadata["user_class_title"] R.r['source'](R_LIB_CUSTOM_PATH + '/ranking.Methods.r') func = R.r[ranking_name] assay_df = es.get_assay_data_frame() x = com.convert_to_r_matrix(assay_df) y = es.get_pheno_column_as_r_obj(pheno_class_column) log.debug("Computing ranking: `%s` options: `%s`", ranking_name, options) with stopwatch(name="Computing ranking: `%s` options: `%s`" % (ranking_name, options), threshold=0.01): ranking_list = list(func(R.r['t'](x), y, **options)) ranking_fixed = map(lambda a: int(a - 1), ranking_list) df = pd.DataFrame(index=assay_df.index, data=[len(assay_df)] * len(assay_df), columns=["rank"]) for rank, row_num in enumerate(ranking_fixed): df.ix[row_num, "rank"] = rank result_table.store_table(df) return [result_table], {}
def from_dataframe(cls, dataframe, **kwargs): '''Instantiate a Rollcall object from a pandas.DataFrame corresponding to the R matrix described in the pscl docs. See http://cran.r-project.org/web/packages/pscl/pscl.pdf ''' r_matrix = rpy_common.convert_to_r_matrix(dataframe) return cls.from_matrix(r_matrix, **kwargs)
def gt_basic(es, gene_sets, pheno_class_column, model="logistic", permutations=100): """ @param es: Expression set with defined user class in pheno @type es: ExpressionSet @type gene_sets: environment.structures.GeneSets @param pheno_class_column: Column name of target classes in phenotype table @type pheno_class_column: string or None """ GlobalTest.gt_init() dataset = com.convert_to_r_matrix(es.get_assay_data_frame()) response = es.get_pheno_column_as_r_obj(pheno_class_column) genes_in_es = es.get_assay_data_frame().index.tolist() gs_filtered = filter_gs_by_genes(gene_sets.get_gs(), genes_in_es) gt_instance = GlobalTest.gt( response, R.r['t'](dataset), subsets=gs_filtered.to_r_obj(), model=model, permutations=permutations, ) result = gt_instance.do_slot('result') result_df = com.convert_robj(result) return result_df
def impute_data(data): # data.to_csv("data.csv") r.library("impute") r_data = com.convert_to_r_matrix(data) r_imputedData = r['impute.knn'](r_data) npImputedData = np.asarray(r_imputedData[0]) imputedData = pd.DataFrame(npImputedData) imputedData.index = data.index imputedData.columns = data.columns return imputedData
def apply_ranking( exp, block, es, ranking_name, result_table, pheno_class_column=None, options=None ): if not options: options = {} if not pheno_class_column: pheno_class_column = es.pheno_metadata["user_class_title"] R.r['source'](R_LIB_CUSTOM_PATH + '/ranking.Methods.r') func = R.r[ranking_name] if settings.CELERY_DEBUG: import sys sys.path.append('/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg') import pydevd pydevd.settrace('localhost', port=6901, stdoutToServer=True, stderrToServer=True) assay_df = es.get_assay_data_frame() cols = assay_df.columns # We must rename cols to be unique for R out_genes = {} out_cols = [] for i, g in enumerate(cols): g = g.split('.')[0] if g in out_genes: new_g = g + '__' + str(i) out_genes[g].append(new_g) out_cols.append(new_g) else: out_genes[g] = [g] out_cols.append(g) assay_df.columns = out_cols assay_df = assay_df.T x = com.convert_to_r_matrix(assay_df) y = es.get_pheno_column_as_r_obj(pheno_class_column) exp.log(block.uuid, "Computing ranking: `%s` options: `%s`" % (ranking_name, options)) log.debug("Computing ranking: `%s` options: `%s`", ranking_name, options) with stopwatch(name="Computing ranking: `%s` options: `%s`" % (ranking_name, options), threshold=0.01): ranking_list = list(func(R.r['t'](x), y, **options)) ranking_fixed = map(lambda a: int(a - 1), ranking_list) df = pd.DataFrame( index=assay_df.index, data=[len(assay_df)] * len(assay_df), columns=["rank"] ) for rank, row_num in enumerate(ranking_fixed): df.ix[row_num, "rank"] = rank result_table.store_table(df) return [result_table], {}
def impute_data(data): import rpy2.robjects as robjects r = robjects.r import pandas.rpy.common as com r.library("impute") r_data = com.convert_to_r_matrix(data) r_imputedData = r['impute.knn'](r_data) npImputedData = np.asarray(r_imputedData[0]) imputedData = pd.DataFrame(npImputedData) imputedData.index = data.index imputedData.columns = data.columns return imputedData
def calculate(self, method, data_frame, positive_samples, negative_samples): ## construct matrix_r r = robjects.r samples = [] for sample in data_frame.axes[1]: if sample in positive_samples + negative_samples: samples.append(sample) features = data_frame.axes[0] matrix = data_frame[samples] matrix_r = common.convert_to_r_matrix(matrix) ## construct cls_r cls = {} for sample in samples: if sample in positive_samples: cls[sample] = 1 elif sample in negative_samples: cls[sample] = 0 cls_r = common.convert_to_r_matrix( pandas.DataFrame( [cls] ) ) ## generate signature with method sam_out = self.siggenes.sam(matrix_r, r.c(cls_r)) sam_att = r.cbind( r.c(r.attributes(sam_out).rx2('d')), r.c(r.attributes(sam_out).rx2('vec.false')), r.c(r.attributes(sam_out).rx2('q.value')), r.c(r.attributes(sam_out).rx2('p.value')), r.c(r.attributes(sam_out).rx2('s')) ) ## return results as a data_frame ocols = ['Score', 'FalseCalls', 'Q-value', 'P-value', 'StdDev'] output = {} for j, col in enumerate(ocols): row = {} for i, n in enumerate(features): # print n, col, sam_att.rx(i + 1, j + 1)[0] row[n] = sam_att.rx(i + 1, j + 1)[0] # print row output[col] = row return(pandas.DataFrame(output))
def mcnemar_test(results_matrix): """Mcnemar chi-squared test from R. Returns ------- c, p : tuple chi-sq stat and p-value from Mcnemar test. """ import pandas.rpy.common as rcom mcnemar_test_fun = rcom.r["mcnemar.test"] results_matrix = rcom.convert_to_r_matrix(results_matrix) test_result = mcnemar_test_fun(results_matrix) return test_result[0][0], test_result[2][0]
def chisq_test(results_matrix): """Chi-square test from R. Returns ------- c, p : tuple chi-sq stat and p-value from test. """ import pandas.rpy.common as rcom test_fun = rcom.r["chisq.test"] results_matrix = rcom.convert_to_r_matrix(results_matrix) test_result = test_fun(results_matrix) return test_result[0][0], test_result[2][0]
def show_allocated_portfolio(form): selected_tickers = form['ticker_selection'].split(',') if selected_tickers[-1] == ' ': selected_tickers = selected_tickers[:-1] data = pd.read_csv('stock_data.csv').set_index('Date').sort_index()[1:] ro.globalenv['data'] = com.convert_to_r_matrix(data[selected_tickers]) ro.r('source("calculations.R")') raw = ro.r('function_make_everything_work(data,' + str(form['horizon']) + ')') weights = np.around(np.array(raw),2) result = zip(selected_tickers,weights,weights > 0) rfalloc = 1 - weights.sum() result.append(('Risk-Free',rfalloc,rfalloc>0)) return render_template('allocated.html', horizon=form['horizon'], result=result)
def quantile_norm_with_R(input_df): R_norm_func = robjects.r("""quantnorm <- function(inputmatrix) { y<-normalize.quantiles(inputmatrix) return(y) }""" ) R_matrix = com.convert_to_r_matrix(input_df) print input_df normed_matrix = R_norm_func(R_matrix) normed_df = com.convert_robj(normed_matrix) print normed_df normed_df.index = input_df.index normed_df.columns = input_df.columns return normed_df
def gt_basic(es, gene_sets, pheno_class_column, model="logistic", permutations=100): """ @param es: Expression set with defined user class in pheno @type es: ExpressionSet @type gene_sets: environment.structures.GeneSets @param pheno_class_column: Column name of target classes in phenotype table @type pheno_class_column: string or None """ if settings.CELERY_DEBUG: import sys sys.path.append('/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg') import pydevd pydevd.settrace('localhost', port=6901, stdoutToServer=True, stderrToServer=True) src_gs = gene_sets.get_gs() # GlobalTest.gt_init() df = es.get_assay_data_frame() df, gs_filtered = preprocess_df_gs(df, src_gs) dataset = com.convert_to_r_matrix(df.T) response = es.get_pheno_column_as_r_obj(pheno_class_column) ds_r = R.r['t'](dataset) gs_r = gs_filtered.to_r_obj() try: R.r['library']("globaltest") gt = R.r['gt'] gt_instance = gt( response, ds_r, subsets=gs_r, # model=model, # permutations=permutations ) except: import sys log.error("Unexpected error: %s" % sys.exc_info()[0]) raise result = gt_instance.do_slot('result') result_df = com.convert_robj(result) return result_df
def convert_to_r_series(self ,resampled_df, start_date ,data_freq): # convert to R dataframe r_dataframe = com.convert_to_r_matrix(resampled_df) if data_freq == 12: start_val = self.get_start_for_r_monthly(start_date) elif data_freq == 52: start_val = self.get_start_for_r_series_weekly(start_date) else: raise NotImplementedError( "Implemented only for other frequencies 12 and 52" ) #convert to R time Series ts = robjects.r['ts'] r_series = ts(r_dataframe, start=start_val, frequency=data_freq ) return r_series
def fetch_equities_daily(self, symbols, ohlc=False, r_type=False, returns=False, **kwargs): if len(symbols) == 0: return pd.DataFrame() if isinstance(symbols, str): symbols = symbols.split(",") if ohlc: data = load_bars_from_yahoo(stocks=symbols, **kwargs) # data.items = symbols else: data = load_from_yahoo(stocks=symbols, **kwargs) # data.columns = symbols # NOTE Would it work with a pandas panel ? if returns: data = ((data - data.shift(1)) / data).fillna(method="bfill") if r_type: data = convert_to_r_matrix(data) return data
def quantile_norm_with_R(input_df): """Uses R normalize.quantiles to normalize a DataFrame. """ robjects.r('require(preprocessCore)') R_norm_func = robjects.r("""quantnorm <- function(inputmatrix) { y<-normalize.quantiles(inputmatrix) return(y) }""") R_matrix = com.convert_to_r_matrix(input_df) normed_matrix = R_norm_func(R_matrix) normed_df = com.convert_robj(normed_matrix) normed_df.index = input_df.index normed_df.columns = input_df.columns return normed_df
def calculate(self, method, data_frame, positive_samples, negative_samples): ## construct matrix_r r = robjects.r samples = [] for sample in data_frame.axes[1]: if sample in positive_samples + negative_samples: samples.append(sample) samples.sort() features = list(data_frame.axes[0]) features.sort() matrix = data_frame[samples].loc[features] matrix_r = common.convert_to_r_dataframe(matrix) ## construct cls_r cls = {} for sample in samples: if sample in positive_samples: cls[sample] = 1 elif sample in negative_samples: cls[sample] = 0 cls_r = common.convert_to_r_matrix( pandas.DataFrame( [cls] ) ) ## generate signature with method sam_out = self.siggenes.sam(matrix_r, r.c(cls_r)) sam_att = r.cbind( r.c(r.attributes(sam_out).rx2("d")), r.c(r.attributes(sam_out).rx2("vec.false")), r.c(r.attributes(sam_out).rx2("q.value")), r.c(r.attributes(sam_out).rx2("p.value")), r.c(r.attributes(sam_out).rx2("s")) ) ## return results as a data_frame ocols = ["Score", "FalseCalls", "Q-value", "P-value", "StdDev"] output = {} for j, col in enumerate(ocols): row = {} for i, n in enumerate(features): # print n, col, sam_att.rx(i + 1, j + 1)[0] row[n] = sam_att.rx(i + 1, j + 1)[0] # print row output[col] = row return(pandas.DataFrame(output))
def fetch_equities_daily(self, equities, ohlc=False, r_type=False, returns=False, **kwargs): if len(equities) == 0: return pd.DataFrame() if isinstance(equities, str): equities = equities.split(',') symbols = [self.datafeed.guess_name(equity) for equity in equities] if ohlc: data = load_bars_from_yahoo(stocks=symbols, **kwargs) data.items = equities else: data = load_from_yahoo(stocks=symbols, **kwargs) data.columns = equities #NOTE Would it work with a pandas panel ? if returns: data = ((data - data.shift(1)) / data).fillna(method='bfill') if r_type: data = convert_to_r_matrix(data) return data
behavioral_data = all_behavioral_data[:, 1:] rest_data = np.load(expanduser(rscorrfn)) X = rest_data[subjet_subset] Y = behavioral_data #demean S = Y.sum(axis=0) / Y.shape[0] Y -= S[np.newaxis, :] var = (Y**2).sum(axis=0) var[var == 0] = 1 Y /= var X[np.isnan(X)] = 1 df_X = pd.DataFrame(X) df_Y = pd.DataFrame(Y) rmat_X = com.convert_to_r_matrix(df_X) rmat_Y = com.convert_to_r_matrix(df_Y) ri.globalenv['X'] = rmat_X ri.globalenv['Y'] = rmat_Y # explained variable from sklearn.linear_model import LinearRegression limit_exp_var = len(keys) #save for later exp_var_X = [] exp_var_Y = [] for i in range(1, limit_exp_var + 1): n_com = i com.r(""" out <- CCA(x = X, z = Y, K = %i, niter = 100, standardize = FALSE, penaltyx = %f, penaltyz = %f)
def peakFound(cubematrix,t2m,hdr,loc_IS,delta_drug,delta_tiss,hws,hws_t,loc_tiss): devst=mean(t2m[1:t2m.size-1]-t2m[0:t2m.size-2]) matrix_data=np.zeros([np.size(t2m),2]) peakIS_matrix=np.zeros([hdr[0]*hdr[1],1]) peakdrug_matrix=np.zeros([hdr[0]*hdr[1],1]) peaktissue_matrix=np.zeros([hdr[0]*hdr[1],1]) range_drugmatrix=np.zeros([hdr[0]*hdr[1],2]) range_tissmatrix=np.zeros([hdr[0]*hdr[1],2]) range_ISmatrix=np.zeros([hdr[0]*hdr[1],2]) matrix_data[:,0]=t2m for c in range(hdr[1] * hdr[0]): matrix_data[:,1]=cubematrix[:,c] # spettrum of single pixel "c" matrix_data_DF = pd.DataFrame(matrix_data,dtype='float') data = com.convert_to_r_matrix(matrix_data_DF) ro.globalenv['matrix_data_DF']=data ro.globalenv['hws']=hws ro.globalenv['loc_IS']=loc_IS ro.globalenv['hws_t']=hws_t ro.globalenv['loc_tiss']=loc_tiss ro.globalenv['delta_drug']=delta_drug ro.globalenv['delta_tiss']=delta_tiss ro.globalenv['devst']=devst ro.r('spectra<-createMassSpectrum(mass=matrix_data_DF[,c(1)], intensity=matrix_data_DF[,c(2)])') ro.r('peaks<-detectPeaks(spectra, SNR=2,halfWindowSize=hws)') ## identification of peak of internal standard ro.r('all_peaks_IS<-peaks@mass[(peaks@mass>=loc_IS[1])&(peaks@mass<=loc_IS[2])]')# all the peaks in this m/z range ro.r('posISs<-match(all_peaks_IS,spectra@mass)') ro.r('NpeaksIS<-length(all_peaks_IS)') NpeaksIS=ro.r('NpeaksIS')[0] if NpeaksIS==0: continue if NpeaksIS>1: ro.r('info_peaksIS<-matrix(data = 0,nrow = NpeaksIS ,ncol =3 )') ro.r('info_peaksIS[,c(1)]<-posISs') ro.r('info_peaksIS[,c(2)]<-all_peaks_IS') ro.r('info_peaksIS[,c(3)]<-spectra@intensity[posISs]') ro.r('pos_peakIS_moda<-info_peaksIS[match(max(info_peaksIS[,c(3)]),info_peaksIS[,c(3)]),c(1)]') else: ro.r('pos_peakIS_moda<-posISs') ro.r('pos_peakIS_moda')[0] ro.r('matrix_lav<-matrix(data = 0,nrow = hws*2+1 ,ncol =4 )') ro.r('matrix_lav[,c(1)]<-spectra@mass[(pos_peakIS_moda-hws):(pos_peakIS_moda+hws)]') ro.r('matrix_lav[,c(2)]<-spectra@intensity[(pos_peakIS_moda-hws):(pos_peakIS_moda+hws)]') ro.r('matrix_lav[,c(3)]<-(pos_peakIS_moda-hws):(pos_peakIS_moda+hws)') ro.r('peakIS_mean<-sum(matrix_lav[,c(1)]*matrix_lav[,c(2)])/sum(matrix_lav[,c(2)])') ro.r('matrix_lav[,c(4)]<-abs(matrix_lav[,c(1)]-peakIS_mean)') pos_peakIS=ro.r('pos_peakIS<-matrix_lav[match(min(matrix_lav[,c(4)]),matrix_lav[,c(4)]),c(3)]')[0] peaks_IS=ro.r('peakIS_mean') if np.isnan(peaks_IS): continue if NpeaksIS>0: ro.r('range_IS<-c(spectra@mass[pos_peakIS-hws], spectra@mass[pos_peakIS+hws])') rim=ro.r('range_IS') range_ISmatrix[c,0]=rim[0] range_ISmatrix[c,1]=rim[1] peakIS_matrix[c]=peaks_IS[0] ro.r('all_peaks_drug<-spectra@mass[(spectra@mass>=peakIS_mean-delta_drug-devst)&(spectra@mass<=peakIS_mean-delta_drug+devst)]') ro.r('Npeaks_drug<-length(all_peaks_drug)') all_peaks_drug=ro.r('all_peaks_drug') Npeaks_drug=ro.r('Npeaks_drug')[0] if Npeaks_drug>1: peaks_drug=0 p=0 min_diff_peaks_drug=ro.r('min(abs(peakIS_mean-delta_drug-all_peaks_drug))')[0] while peaks_drug<=0: if abs(peaks_IS[0]-delta_drug-all_peaks_drug[p])==min_diff_peaks_drug: peaks_drug=all_peaks_drug[p] p=p+1 else: peaks_drug=all_peaks_drug peakdrug_matrix[c]= peaks_drug ro.globalenv['peaks_drug']=peaks_drug ro.r('pos_peakdrug<-match(peaks_drug,spectra@mass)') ro.r('range_drug<-c(spectra@mass[pos_peakdrug-hws], spectra@mass[pos_peakdrug+hws])') rdm=ro.r('range_drug') range_drugmatrix[c,0]=rdm[0] range_drugmatrix[c,1]=rdm[1] # identification of peak of tissue ro.r('all_peaks_tiss<-peaks@mass[(peaks@mass>=loc_tiss[1])&(peaks@mass<=loc_tiss[2])]')# tutti i picchi nell'intervallo cercato ro.r('pos_tiss<-match(all_peaks_tiss,spectra@mass)') ro.r('Npeaks_tiss<-length(all_peaks_tiss)') NpeaksTiss=ro.r('Npeaks_tiss')[0] if NpeaksTiss==0: continue if NpeaksTiss>1: ro.r('info_peaksTiss<-matrix(data = 0,nrow = Npeaks_tiss ,ncol =3 )') ro.r('info_peaksTiss[,c(1)]<-pos_tiss') ro.r('info_peaksTiss[,c(2)]<-all_peaks_tiss') ro.r('info_peaksTiss[,c(3)]<-spectra@intensity[pos_tiss]') ro.r('peaks_tiss<-info_peaksTiss[match(max(info_peaksTiss[,c(3)]),info_peaksTiss[,c(3)]),c(2)]') ro.r('pos_peakTiss_moda<-info_peaksTiss[match(max(info_peaksTiss[,c(3)]),info_peaksTiss[,c(3)]),c(1)]') else: ro.r('pos_peakTiss_moda<-pos_tiss') ro.r('pos_peakTiss_moda')[0] ro.r('matrix_lav<-matrix(data = 0,nrow = hws_t*2+1 ,ncol =4 )') ro.r('matrix_lav[,c(1)]<-spectra@mass[(pos_peakTiss_moda-hws_t):(pos_peakTiss_moda+hws_t)]') ro.r('matrix_lav[,c(2)]<-spectra@intensity[(pos_peakTiss_moda-hws_t):(pos_peakTiss_moda+hws_t)]') ro.r('matrix_lav[,c(3)]<-(pos_peakTiss_moda-hws_t):(pos_peakTiss_moda+hws_t)') ro.r('peaktiss_mean<-sum(matrix_lav[,c(1)]*matrix_lav[,c(2)])/sum(matrix_lav[,c(2)])') ro.r('matrix_lav[,c(4)]<-abs(matrix_lav[,c(1)]-peaktiss_mean)') pos_peakTiss=ro.r('pos_peaktiss<-matrix_lav[match(min(matrix_lav[,c(4)]),matrix_lav[,c(4)]),c(3)]')[0] peaks_tiss=ro.r('peaktiss_mean') if np.isnan(peaks_tiss): continue if NpeaksTiss>0: ro.r('range_tiss<-c(spectra@mass[pos_peaktiss-hws_t], spectra@mass[pos_peaktiss+hws_t])') rtm=ro.r('range_tiss') range_tissmatrix[c,0]=rtm[0] range_tissmatrix[c,1]=rtm[1] peaktissue_matrix[c]=peaks_tiss[0] return (peakIS_matrix,peakdrug_matrix,peaktissue_matrix,range_ISmatrix, range_drugmatrix, range_tissmatrix)
# <codecell> robjects.r('require(preprocessCore)') # <codecell> robjects.r("""quantnorm <- function(inputmatrix) { y<-normalize.quantiles(inputmatrix) return(y) }""" ) # <codecell> small_dataframe = com.convert_to_r_matrix(cyto_data[['VEGF', 'HGF', 'Rantes']].T) output = robjects.r['quantnorm'](small_dataframe) # <codecell> normed = com.convert_robj(output).T # <codecell> def quantile_norm_with_R(input_df): R_norm_func = robjects.r("""quantnorm <- function(inputmatrix) { y<-normalize.quantiles(inputmatrix) return(y)
behavioral_data = all_behavioral_data[:, 1:] rest_data = np.load(expanduser(rscorrfn)) X = rest_data[subjet_subset] Y = behavioral_data #demean S = Y.sum(axis=0) / Y.shape[0] Y -= S[np.newaxis, :] var = (Y ** 2).sum(axis=0) var[var == 0] = 1 Y /= var X[np.isnan(X)] = 1 df_X = pd.DataFrame(X) df_Y = pd.DataFrame(Y) rmat_X = com.convert_to_r_matrix(df_X) rmat_Y = com.convert_to_r_matrix(df_Y) ri.globalenv['X'] = rmat_X ri.globalenv['Y'] = rmat_Y # explained variable from sklearn.linear_model import LinearRegression limit_exp_var = len(keys) #save for later exp_var_X = [] exp_var_Y = [] for i in range(1, limit_exp_var+1): n_com = i com.r( """ out <- CCA(x = X, z = Y, K = %i, niter = 100, standardize = FALSE,