def timex_test(events, groups): events_r = robjects.Matrix(events.T) return [ timex_package.testCliqueAsGroup(robjects.IntVector(group), events_r).rx2("pvalueLRT")[0] for group in groups ]
def pca_impute(data, method='bpca', scale='none', center=True, npcs=5): """ Impute the missing data elements using PCA. Requires the 'pcaMethods' Bioconductor package. Args: * data * method: Which PCA algorithm to use. One of: * 'svd' * 'ppca' * 'bpca' * 'svdImpute' * 'nipals * 'robustPca' * scale: * center: * npcs: number of principal components. """ kwargs = locals() r.importr('pcaMethods') data = remove_na_rows(data) r_data = robjects.Matrix(data) prepped = robjects.r['prep'](r_data, scale=scale, center=center) result = robjects.r['pca'](prepped, method=method, center=False, nPcs=npcs) imputed = numpy.array(robjects.r['completeObs'](result)) return _same_type_(imputed, data)
def heatmap(data, bicluster=None, local=False, palette=None, **kwargs): """ Plots the dataset as a heatmap. Optionally rearrange for a bicluster, if supplied. Args: * data: a numpy.ndarray to plot. * bicluster: an optional bicluster * local: if True, only plot the bicluster's submatrix; otherwise plot the whole dataset, but with rows and columns shuffled so that the bicluster is in the top-left corner. * palette: The color palette to use. Must be an RPy2 object representing a color palette, eg: palette=rpy2.r.r("heat.colors(10)") * kwargs: any other arguments that the R function 'plot' accepts. Common ones include: file, width, height. """ kwargs["local"] = local #hack to keep visualization from vertical mirroring itself if bicluster is None: data = data[::-1] if bicluster is not None: nrows, ncols = data.shape assert max(bicluster.rows) < nrows assert max(bicluster.cols) < ncols kwargs["bicResult"] = _get_r_biclust_([bicluster]) kwargs["number"] = 1 if palette is not None: kwargs["beamercolor"] = True kwargs["paleta"] = palette _rplot_("drawHeatmap", r.Matrix(data), **kwargs)
def _call_helper_(function_name, data, **kwargs): params = kwargs params['X'] = data robjects.r.library('fabia') r_data = robjects.Matrix(data) func = robjects.r[function_name] factorization = func(**params) return _extract_biclusters_(factorization)
def _rfunction_(functionname, data, **kwargs): """ get an R object for the data """ r_data = robjects.Matrix(data) #get the function robjects.r.library('biclust') func = robjects.r[functionname] result = numpy.array(func(r_data, **kwargs)) return _same_type_(result, data)
def affinity(E, preference_fraction=0.5, simdist_function="pearson_correlation", damping=0.5, max_iter=200, **kwargs): similarities = simdist(E, simdist_function, **kwargs) similarities_max, similarities_min = similarities.values.max(), similarities.values.min() preference = (similarities_max - similarities_min) * preference_fraction ro.packages.importr("apcluster") rresults = ro.r["apcluster"](s=ro.Matrix(similarities.values), p=preference) labels = np.array(ro.r["labels"](rresults, "enum")) modules = convert_labels2modules(labels, E.columns) return modules
def parallel_coordinates(bicluster, plot='cols', compare=True, info=False, ylab="Value", color=1, **kwargs): """ Parallel coordinate plot of a bicluster. Args: * bicluster: Bicluster to plot. * plot: 'rows', 'cols', or 'both' * compare: If True, also plots the rest of the rows/columns in a lighter color. * info: If True: include an informative title. * ylab: y-axis label. * color: foreground color; integer. * kwargs: any other arguments that the R function 'plot' accepts. Common ones include: file, width, height. """ kwargs.update(locals()) valid_plots = ['cols', 'rows', 'both'] if not plot in valid_plots: raise Exception("Error: 'plot' argument must be one of: {0}".format( " ".join(valid_plots))) for k in ('bicluster', 'plot', 'kwargs', 'color'): kwargs.pop(k) kwargs['col'] = color bicResult = _get_r_biclust_([bicluster]) number = 1 assert bicluster.data is not None data = r.Matrix(bicluster.data) if plot == 'rows': kwargs["plotcol"] = False elif plot == 'both': kwargs["plotBoth"] = True _rplot_("parallelCoordinates", data, bicResult, number, **kwargs)
def clues(E, disMethod="1-corr", n0=300, alpha=0.05, eps=1e-4, itmax=20, strengthMethod="sil", strengthIni=-1, **kwargs): ro.packages.importr("clues") rresults = ro.r["clues"]( ro.Matrix(standardize(E).T.values), disMethod=disMethod, n0=n0, alpha=alpha, eps=eps, itmax=itmax, strengthMethod=strengthMethod, strengthIni=strengthIni, quiet=False ) modules = convert_labels2modules(list(rresults.rx2("mem")), E.columns) return modules
def bubbleplot(data, biclusters1, biclusters2=None, biclusters3=None, projection='mean', show_labels=False, **kwargs): """ A bubbleplot comparison of multiple sets of biclusters which attempts to project them down to two dimensions. Args: * data: numpy.ndarray on which all biclusters are defined. * biclusters1: a list of biclusters. * biclusters2: a list of biclusters. * biclusters3: a list of biclusters. * projection: projection method; one of 'mean', 'isomds', 'cmdscale'. * show_labels: if True, label each bicluster in the plot. * kwargs: any other arguments that the R function 'plot' accepts. Common ones include: file, width, height. """ valid_projections = ['mean', 'isomds', 'cmdscale'] if not projection in valid_projections: raise Exception( "Error: 'projection' argument must be one of: {0}".format( " ".join(valid_projections))) kwargs['projection'] = projection kwargs['showLabels'] = show_labels bicResult1 = _get_r_biclust_(biclusters1) kwargs['bicResult1'] = bicResult1 if biclusters2 is not None: bicResult2 = _get_r_biclust_(biclusters2) kwargs['bicResult2'] = bicResult2 if biclusters3 is not None: bicResult3 = _get_r_biclust_(biclusters3) kwargs['bicResult3'] = bicResult3 _rplot_("bubbleplot", r.Matrix(data), **kwargs)
def calculatingAovR(tableFactor, Data, Formula): """Computes and fits an Analysis of Variance Model""" numpy2ri.activate() for t in tableFactor: factorName = t[0] factorType = t[1] factorData = t[2] # sending Data to global variable in R (Factor definition for # Subject, Within or Between Type and FloatVector for Covariate if factorType == 'Covariate': tmp = robjects.FloatVector(factorData) robjects.globalenv[factorName] = tmp else: tmp = robjects.r.factor(factorData) robjects.globalenv[factorName] = tmp DataR = robjects.Matrix(Data.T) robjects.globalenv["DataR"] = DataR TextR = 'aov(%s)' % Formula express = robjects.r.parse(text=TextR) Fit = robjects.r.eval(express) robjects.globalenv["Fit"] = Fit raw = robjects.r.summary(Fit) df = [] for r in raw: for d in r[0][0][:-1]: df.append([int(d), int(r[0][0][-1])]) pValue = np.hstack([np.array([c[4][:-1] for c in r]) for r in raw]) FValue = np.hstack([np.array([c[3][:-1] for c in r]) for r in raw]) terms = [] if len(raw) == 1: for r in raw[0]: for t in r.rownames[0:-1]: terms.append(t.replace(' ', '')) else: for i in raw: for r in i: for t in r.rownames[0:-1]: terms.append(t.replace(' ', '')) return pValue, FValue, terms, df
def gwr_mth(self, pt, mth, nnghs=None, stns_rm=None): if nnghs == None: # Get the nnghs to use from the optimal values at surrounding stations nnghs = self._GwrTairAnom__get_nnghs(pt, mth, stns_rm) self.stn_slct.set_ngh_stns(pt[LAT], pt[LON], nnghs, load_obs=True, stns_rm=stns_rm, obs_mth=mth) ngh_obs = self.stn_slct.ngh_obs ngh_stns = self.stn_slct.ngh_stns ngh_wgt = self.stn_slct.ngh_wgt ngh_obs_cntr = ngh_obs - ngh_stns[get_norm_varname(mth)] a_pt = np.array( [pt[LON], pt[LAT], pt[ELEV], pt[TDI], pt[get_lst_varname(mth)]]) rslt = r.gwr_anomaly( robjects.FloatVector(ngh_stns[LON]), robjects.FloatVector(ngh_stns[LAT]), robjects.FloatVector(ngh_stns[ELEV]), robjects.FloatVector(ngh_stns[TDI]), robjects.FloatVector(ngh_stns[get_lst_varname(mth)]), robjects.FloatVector(ngh_wgt), robjects.Matrix(ngh_obs_cntr), robjects.FloatVector(a_pt)) fit_anom = np.array(rslt.rx('fit_anom')) nrow = np.array(rslt.rx('fit_nrow'))[0] ncol = np.array(rslt.rx('fit_ncol'))[0] fit_anom = np.reshape(fit_anom, (nrow, ncol), order='F') interp_anom = np.array(rslt.rx('pt_anom')).ravel() interp_vals = interp_anom + pt[get_norm_varname(mth)] return interp_vals
def make_isa_data(nrows=300, ncols=50, nclusts=3, nclustrows=None, nclustcols=None, noise=0, bicluster_signals=None, bicluster_noise=None, noverlap_rows=0, noverlap_cols=None, shuffle=None): """ Make ISA-style data. Generates a dataset using the Bioconductor 'isa2' package's make.isa.data function. If an argument is None, it is not included, and isa2's defaults are used. Requires that 'isa2' be installed. Args: * nrows: Number of rows in the data matrix. * cols: Number of columns in the data matrix. * nclusts: Number of biclusters. * nclustrows: Rows in each bicluster. Defaults to round(0.5 * num_rows/num_fact) * nclustcols: Cols in each bicluster. round(0.5 * num_cols/num_fact) * noise: Standard deviation of normal noise in background. * bicluster_signals: List of base signals for each bicluster. Defaults to 1's. * bicluster_noise: List of noise standard deviations for each bicluster. Defaults to 0's. * noverlap_rows: Number of bicluster rows that overlap. * noverlap_cols: Number of coluster columns that overlap. Defaults to 'overlap_row'. * shuffle: If True, shuffle rows and columns. """ args = locals() isa_map = dict( nrows='num_rows', ncols='num_cols', nclusts='num_fact', nclustrows='mod_row_size', nclustcols='mod_col_size', noise='noise', bicluster_signals='mod_signal', bicluster_noise='mod_noise', noverlap_rows='overlap_row', noverlap_cols='overlap_col', ) isa_args = dict() for key, argkey in isa_map.iteritems(): isa_args[argkey] = args[key] #remove empty keys empty_keys = [] for key in isa_args: if isa_args[key] is None: empty_keys.append(key) for key in empty_keys: isa_args.pop(key) for key in ['mod_signal', 'mod_noise']: if key in isa_args: isa_args[key] = robjects.FloatVector(list(isa_args[key])) robjects.r.library('isa2') #get data func = robjects.r['isa.in.silico'] result = func(**isa_args) #convert to python data = numpy.array(robjects.Matrix(result[0])).copy() rows = numpy.array(robjects.Matrix(result[1])).copy() cols = numpy.array(robjects.Matrix(result[2])).copy() nbiclusters = rows.shape[1] row_list = [] for i in range(nbiclusters): row = list(rows[:, i].nonzero()[0]) row_list.append(row) col_list = [] for i in range(nbiclusters): col = list(cols[:, i].nonzero()[0]) col_list.append(col) expected = [] for r, c, in zip(row_list, col_list): expected.append(Bicluster(r, c, data)) if shuffle: data, expected = _shuffle_(data, expected) return data, expected
for j in range(len(remained)): combination = np.zeros(bioN) tmpselected = np.append(selected, remained[j]) combination[tmpselected - 1] = 1 comb = np.concatenate(([combination], comb)) return comb ## implentment iteration thresholdN = 10 iterationT = 2 while (iterationT < thresholdN): iterationT = iterationT + 1 comb = prepareCombination(topres, bioN) rcomb = numpy2ri.py2ri(comb) rcomb = robjects.Matrix(rcomb) robjects.globalenv['rcomb'] = rcomb rscript_calC = ''' rcomb <- data.frame(rcomb) starttimeC<-Sys.time() resC<-func_bycb(rcomb) endtimeC<-Sys.time() ctimeC<-endtimeC-starttimeC ''' robjects.r(rscript_calC) #print(robjects.r['head']('rcomb')) npresC = np.array(robjects.r['resC']) npresC = np.reshape(npresC, newshape=(npresC.shape[0], npresC.shape[1])) npresC = np.transpose(npresC) npres = np.concatenate((npres, npresC), axis=0) tmpres = -1 * npres
def isa(data, thr_row=None, thr_col=None, no_seeds=100, direction=['updown', 'updown']): """ ISA biclustering algorithm. Args: * data: numpy.ndarray. * thr_row: threshold value for rows. * thr_col: threshold value for cols. * no_seeds: number of seeds to generate biclusters. * direction: either 'up' for upregulated, 'down' for downregulated, 'updown' for both(default). Returns: A list of biclusters. """ #load the isa library robjects.r.library('isa2') #get an R object for the data r_data = robjects.Matrix(data) def handle_threshold(x): if x is None: x = robjects.r['seq'](1, 3, by=0.5) else: if not isiterable(x): x = [x] x = robjects.FloatVector(list(x)) return x thr_row = handle_threshold(thr_row) thr_col = handle_threshold(thr_col) direction = robjects.StrVector(direction) #run biclustering func = robjects.r('isa') result = func(r_data, thr_row, thr_col, no_seeds, direction) #get rowXnumber array row_matrix = numpy.array(robjects.Matrix(result[0])) #get numberXcolumn array col_matrix = numpy.array(robjects.Matrix(result[1])) num_biclusters = row_matrix.shape[1] assert num_biclusters == col_matrix.shape[1] #make list of biclusters biclusters = [] for i in range(num_biclusters): row_vals = row_matrix[:, i] col_vals = col_matrix[:, i] rows = [index for index, elt in enumerate(row_vals) if elt] cols = [index for index, elt in enumerate(col_vals) if elt] biclusters.append(Bicluster(rows, cols, data=data)) return biclusters