def fit_nbinom(samples): from rpy import r r.library('MASS') f = r.fitdistr(samples,'negative binomial') s,m = f['estimate']['size'],f['estimate']['mu'] qp = r.qnbinom(r.ppoints(samples),size=s,mu=m) return qp,s,m
def get_quadprog(lc, trend_set): ''' Return de-trended lc by quadratic programming. It constraints the free parameters to be bigger than 0. See Kim et al. 2008 for more details. lc : Original light curve of flux. trend_set : Set of trend light curves constructed by create_trend routine. return : De-trended light curve. ''' r.library('quadprog') X = transpose(trend_set) dmat = r.crossprod(X, X) dvec = r.crossprod(lc, X) results = r.solve_QP(dmat, dvec, r.diag(len(trend_set))) #print results['solution'], results['value'] return lc - dot(results['solution'], trend_set)
def calcKinship(snps): """ Requires EMMA to be installed. """ a = array(snps) r.library("emma") return r.emma_kinship(a)
def fit_poisson(samples): from rpy import r r.library('MASS') f = r.fitdistr(samples,'poisson') l = f['estimate']['lambda'] #predicted mean qp = r.qpois(r.ppoints(samples),l) return qp,l
def fit_gamma(samples): from rpy import r samples = [double(n) for n in samples if n > 0]#because rpy does not like longs! r.library('MASS') f = r.fitdistr(samples,'gamma') shap,rat = f['estimate']['shape'],f['estimate']['rate'] qp = r.qgamma(r.ppoints(samples),shape=shap,rate=rat) return qp,shape,rat
def fit_weibull(samples): from rpy import r #samples = [double(n) for n in samples if n > 0]#because rpy does not like longs! r.library('MASS') f = r.fitdistr(samples,'weibull') sc,sh = f['estimate']['scale'],f['estimate']['shape'] qp = r.qweibull(r.ppoints(samples),scale=sc,shape=sh) return qp,sc,sh
def fit_exponential(samples): from rpy import r samples = [double(n) for n in samples]#because rpy does not like longs! r.library('MASS') f = r.fitdistr(samples,'exponential') rat = f['estimate']['rate'] qp = r.qexp(r.ppoints(samples),rate=rat) return qp, rat
def randomForest_fit(self, known_data, parameter_list, bit_string="1111111"): """ 03-17-06 2006-10-302006-10-30, add avg_degree(vertex_gradient) and unknown_cut_off """ if self.debug: sys.stderr.write("Fitting randomForest...\n") mty = parameter_list[0] from rpy import r r._libPaths( os.path.join(lib_path, "R") ) # better than r.library("randomForest", lib_loc=os.path.join(lib_path, "R")) (see plone doc) r.library("randomForest") coeff_name_list = [ "p_value", "recurrence", "connectivity", "cluster_size", "gradient", "avg_degree", "unknown_ratio", ] # 2006-10-30 formula_list = [] for i in range(len(bit_string)): if bit_string[i] == "1": formula_list.append(coeff_name_list[i]) formula = r("is_correct~%s" % "+".join(formula_list)) known_data = array(known_data) set_default_mode(NO_CONVERSION) data_frame = r.as_data_frame( { "p_value": known_data[:, 0], "recurrence": known_data[:, 1], "connectivity": known_data[:, 2], "cluster_size": known_data[:, 3], "gradient": known_data[:, 4], "avg_degree": known_data[:, 5], "unknown_ratio": known_data[:, 6], "is_correct": r.factor(known_data[:, -1]), } ) # 03-17-06, watch r.factor #2006-10-30 if mty > 0: fit = r.randomForest(formula, data=data_frame, mty=mty) else: fit = r.randomForest(formula, data=data_frame) del data_frame if self.debug: sys.stderr.write("Done fitting randomForest.\n") return fit
def runEmma(phed, p_i, k, snps): # Assume that the accessions are ordered. i = phed.getPhenIndex(p_i) r.library("emma") phenValues = [] for vals in phed.phenotypeValues: phenValues.append(float(vals[i])) phenArray = array([phenValues]) snpsArray = array(snps) res = r.emma_REML_t(phenArray, snpsArray, k) # print res return res
def compute(self , waveforms , spike_times , minG = 1 , maxG = 32,): #~ from rpy import r if not R_available: return None r.library('mclust') ret = r.Mclust( waveforms , minG = minG , maxG =maxG) code = array(ret['classification'] , dtype = 'i') return code
def LinearRegression(ls1,ls2,return_rsqrd): intercept = 0 ### when forced through the origin from rpy import r r.library('MASS') k = r.options(warn=-1) ### suppress all warning messages from R #print ls1; print ls2 d = r.data_frame(x=ls1, y=ls2) model = r("y ~ x - 1") ### when not forced through the origin it is r("y ~ x") fitted_model = r.rlm(model, data = d) ###errors: rlm failed to converge in 20 steps - maxit=21 slope = fitted_model['coefficients']['x'] #intercept = fitted_model['coefficients']['(Intercept)'] if return_rsqrd == 'yes': from scipy import stats rsqrd = math.pow(stats.linregress(ls1,ls2)[2],2) return slope,rsqrd else: return slope
def compute(self , waveforms , spike_times , centers = 16 , iter_base = 10, base_centers = 30, hclust_method = "ward") : if not R_available: return None r.library('e1071') ret = r.bclust(waveforms , centers = centers , iter_base = iter_base , base_centers = base_centers, hclust_method = hclust_method) #~ print ret code = numpy.array(ret['cluster'] , dtype = 'i') #~ print type(code) return code
def rpart_fit_and_predict(self, all_data, known_data, rpart_cp, loss_matrix, prior_prob, bit_string='11111'): """ 11-09-05 1st use known_data to get the fit model 2nd use the fit model to do prediction on all_data, result is prob for each class 11-09-05 add rpart_cp 11-17-05 add loss_matrix, prior_prob return two pred """ sys.stderr.write("rpart fitting and predicting...\n") r.library("rpart") coeff_name_list = ['p_value', 'recurrence', 'connectivity', 'cluster_size', 'gradient'] formula_list = [] for i in range(len(bit_string)): if bit_string[i] == '1': formula_list.append(coeff_name_list[i]) #11-17-05 transform into array all_data = array(all_data) known_data = array(known_data) set_default_mode(NO_CONVERSION) data_frame = r.as_data_frame({"p_value":known_data[:,0], "recurrence":known_data[:,1], "connectivity":known_data[:,2], \ "cluster_size":known_data[:,3], "gradient":known_data[:,4], "is_correct":known_data[:,-1]}) if prior_prob: prior_prob = [prior_prob, 1-prior_prob] #get the full list fit = r.rpart(r("is_correct~%s"%'+'.join(formula_list)), data=data_frame, method="class", control=r.rpart_control(cp=rpart_cp),\ parms=r.list(prior=prior_prob, loss=r.matrix(loss_matrix) ) ) else: fit = r.rpart(r("is_correct~%s"%'+'.join(formula_list)), data=data_frame, method="class", control=r.rpart_control(cp=rpart_cp),\ parms=r.list(loss=r.matrix(loss_matrix) ) ) set_default_mode(BASIC_CONVERSION) pred_training = r.predict(fit, data_frame, type=["class"]) del data_frame set_default_mode(NO_CONVERSION) all_data_frame = r.as_data_frame({"p_value":all_data[:,0], "recurrence":all_data[:,1], "connectivity":all_data[:,2], \ "cluster_size":all_data[:,3], "gradient":all_data[:,4], "is_correct":all_data[:,-1]}) set_default_mode(BASIC_CONVERSION) pred = r.predict(fit, all_data_frame, type=["class"]) #11-17-05 type=c("class") del all_data_frame sys.stderr.write("Done rpart fitting and predicting.\n") return pred, pred_training
def compute( self, waveforms, spike_times, minG=1, maxG=32, ): #~ from rpy import r if not R_available: return None r.library('mclust') ret = r.Mclust(waveforms, minG=minG, maxG=maxG) code = array(ret['classification'], dtype='i') return code
def _mcmc_betas_same_sources(self, tag_list): """ The given tag_list contains tags that all have the same features available. Train on the tags in tag_list using only the songs in self.only_these_songs, or all available songs if self.only_these_songs is None. """ if not self.production_run: self.mcmc_reps = 75 # save time rc.library("bayesm") data = [] for tag in tag_list: data.append(rc.list(X=self.X[tag],y=self.y[tag])) rpy.set_default_mode(rpy.NO_CONVERSION) # Turn off conversion so that lm returns Robj. data = rc.list(*data) if self.regtype in ["Hierarchical Linear", "Hierarchical Mixture"]: Data = rc.list(regdata=data) elif self.regtype=="Hierarchical Logistic": Data = rc.list(lgtdata=data) if self.regtype=="Hierarchical Mixture": Prior = rc.list(ncomp=self.ncomp) Mcmc=rc.list(R=self.mcmc_reps) rpy.set_default_mode(rpy.BASIC_CONVERSION) try: if self.regtype=="Hierarchical Linear": output = rc.rhierLinearModel(Data=Data,Mcmc=Mcmc) elif self.regtype=="Hierarchical Logistic": output = rc.rhierBinLogit(Data=Data,Mcmc=Mcmc) elif self.regtype=="Hierarchical Mixture": output = rc.rhierLinearMixture(Data=Data,Prior=Prior,Mcmc=Mcmc) except: #pdb.set_trace() self._info_about_r_error(tag_list) return beta_matrix = output['betadraw'].mean(axis=2) # nregressions x ncoeffs, averaged along third dim matrix_index = 0 for tag in tag_list: cur_tag_beta_vec = beta_matrix[matrix_index,:] beta_dict_list = [dict([('beta', coeff)]) for coeff in cur_tag_beta_vec] self.beta[tag] = dict(zip(self.sorted_sources[tag],beta_dict_list)) self.stats[tag] = dict() # I'm not currently storing any stats for hierarchical regressions. matrix_index += 1
def smooth_data(data): sample_data=data[0] window_size=data[1] for rep_num in range(sample_data.get_number_of_replicates()): for chrom in sample_data.get_chromosome_list(): met_manager = sample_data.get_manager_of_chrom(chrom) pos=[] m=[] cov=[] for methyl_c in met_manager: pos.append(methyl_c.position) m.append(methyl_c.get_methylrate(rep_num)) cov.append(methyl_c.get_coverage(rep_num)) r.warnings() r.library("locfit") r.assign("pos",pos) r.assign("m",m) r.assign("cov",cov) r.assign("h",window_size) r("posm=data.frame(pos,m)") r("fit=locfit(m~lp(pos,h=h),data=posm,maxk=1000000,weights=cov)") r("pp=preplot(fit,where='data',band='local',newdata=data.frame(pos=pos))") fit=r("pp")["fit"] list=r("unlist(pp$xev$xev)") for i, each in enumerate(list): position=int(each[0]) methyl_c=met_manager.get_methyl_c(position) if methyl_c: smoothedrate=None if 1 <= fit[i]: smoothedrate=1 elif fit[i] <= 0: smoothedrate=0 else: smoothedrate=fit[i] methyl_c.update_methylrate(rep_num,smoothedrate) else: sys.stderr.write("methyl_c doesn't exist at %d",position) sys.exit(1)
def compute(self, waveforms, spike_times, centers=16, iter_base=10, base_centers=30, hclust_method="ward"): if not R_available: return None r.library('e1071') ret = r.bclust(waveforms, centers=centers, iter_base=iter_base, base_centers=base_centers, hclust_method=hclust_method) #~ print ret code = numpy.array(ret['cluster'], dtype='i') #~ print type(code) return code
def get_linprog(lc, trend_set): ''' Return de-trended lc by linear programming. It constraints the free parameters to be bigger than 0. lc : Original light curve of flux. trend_set : Set of trend light curves constructed by create_trend routine. return : De-trended light curve. ''' r.library('linprog') X = transpose(trend_set) #dmat = r.crossprod(X, X) dvec = r.crossprod(lc, X) results = r.solveLP(dvec, zeros([len(trend_set)]), r.diag(len(trend_set))) print results['opt'], results['solution'] sys.exit()
# Written by Joyce Tipping <*****@*****.**> # License: MIT <http://www.opensource.org/licenses/mit-license.php> # # This library provides helpful functions for approximating the binomial with the skew normal from __future__ import division from rpy import r import math r.library('sn') #*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*# # Binomial # def binomial_pmf (n, p, pairs=False): # Given n and p, returns a binomial pmf in two arrays of xs and ys, respectively # If pairs is True, it returns the pmf as an array of points xs = r.seq(0, n) ys = r.dbinom(xs, n, p) return pair(xs, ys) if pairs else {'xs':xs, 'ys':ys} #*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*# # Normal Approximation # def normal_pdf (mu, sigma, pairs=False): # Given mu and sigma, returns a normal pdf in two arrays of xs and ys, respectively # If pairs is True, it returns the pdf as an array of points xs = r.seq(mu - 5*sigma, mu + 5*sigma, 0.01) ys = r.dnorm(xs, mu, sigma) return pair(xs, ys) if pairs else {'xs':xs, 'ys':ys}
def _heatmap_R(self,labels,filename=None,format='pdf',**kwargs): """Plot a clustered heat map using R.""" # Note on coloring: test if data is z-score normalized and if # true, force scale='row' for the heat map so that this is # reflected in the legend (in fact, the z-score is recomputed # over the data for heatmap.2 coloring but it looks identical # and in any case, the clustering is done on the original # data --- which is NOT clear from the heatmap docs) try: from rpy import r, RException except ImportError: from rpy2.rpy_classic import r, RException hm_args = dict(scale='none', margins=(10,10), # space for long labels N_colors=32, ) hm_args.update(kwargs) N_colors = hm_args.pop('N_colors') # N_colors not a true heatmap argument if filename is not None: interactive = False def r_format(): r[format](filename) def r_dev_off(): r.dev_off() else: interactive = True def r_format(): try: r.X11() except RException: r.quartz() def r_dev_off(): msg(1,"Interactive R display. Kill with hop.analysis.kill_R()") pass if self.normalization_method is 'zscore' and hm_args['scale'] is not 'row': hm_args['scale'] = 'row' msg(3,"Forcing scale='row' for z-score normalized heat map.\n") # (This only has the effect to put the label 'Row Z-score' in the graph) try: r.library('colorRamps') r_color = r.matlab_like(N_colors) # getting somewhat close to matplotlib 'jet' except RException: msg(1,"For matplotlib-like colors install the R-package 'colorRamps':\n" ">>> import rpy\n" ">>> rpy.r.install_packages('colorRamps',type='source')") r_color = r.topo_colors(N_colors) try: r.library('gplots') r_heatmap = r.heatmap_2 hm_args.update(dict(key=r.TRUE, symkey=r.FALSE, density_info='histogram', trace='none')) except RException: msg(1,"For heatmap with a legend install the R-package 'gplots' via \n" ">>> import rpy\n" ">>> rpy.r.install_packages('gplots',type='source')") r_heatmap = r.heatmap r_format() r_heatmap(self.heatmap, labRow=labels['observables'], labCol=labels['columns'], col=r_color, **hm_args) r_dev_off()
def rpart_fit(self, known_data, parameter_list, bit_string="11111"): """ 11-09-05 1st use known_data to get the fit model 2nd use the fit model to do prediction on all_data, result is prob for each class 11-09-05 add rpart_cp 11-17-05 add loss_matrix, prior_prob return two pred 11-23-05 split fit and predict. rpart_fit_and_predict() is split into rpart_fit() and rpart_predict() 11-27-05 r cleanup 03-17-06 use parameter_list instead """ if self.debug: sys.stderr.write("Doing rpart_fit...\n") # 03-17-06 rpart_cp, loss_matrix, prior_prob = parameter_list # 11-27-05 r cleanup from rpy import r r.library("rpart") coeff_name_list = ["p_value", "recurrence", "connectivity", "cluster_size", "gradient"] formula_list = [] for i in range(len(bit_string)): if bit_string[i] == "1": formula_list.append(coeff_name_list[i]) # 11-17-05 transform into array known_data = array(known_data) set_default_mode(NO_CONVERSION) data_frame = r.as_data_frame( { "p_value": known_data[:, 0], "recurrence": known_data[:, 1], "connectivity": known_data[:, 2], "cluster_size": known_data[:, 3], "gradient": known_data[:, 4], "is_correct": known_data[:, -1], } ) if prior_prob: prior_prob = [prior_prob, 1 - prior_prob] # get the full list fit = r.rpart( r("is_correct~%s" % "+".join(formula_list)), data=data_frame, method="class", control=r.rpart_control(cp=rpart_cp), parms=r.list(prior=prior_prob, loss=r.matrix(loss_matrix)), ) else: fit = r.rpart( r("is_correct~%s" % "+".join(formula_list)), data=data_frame, method="class", control=r.rpart_control(cp=rpart_cp), parms=r.list(loss=r.matrix(loss_matrix)), ) del data_frame if self.debug: sys.stderr.write("Done rpart_fit.\n") return fit
def __init__(self, y, design, model_type=r.lm, **kwds): ''' Set up and estimate R model with data and design ''' r.library('MASS') # still needs to be in test, but also here for # logical tests at the end not to show an error self.y = np.array(y) self.design = np.array(design) self.model_type = model_type self._design_cols = [ 'x.%d' % (i + 1) for i in range(self.design.shape[1]) ] # Note the '-1' for no intercept - this is included in the design self.formula = r('y ~ %s-1' % '+'.join(self._design_cols)) self.frame = r.data_frame(y=y, x=self.design) rpy.set_default_mode(rpy.NO_CONVERSION) results = self.model_type(self.formula, data=self.frame, **kwds) self.robj = results # keep the Robj model so it can be # used in the tests rpy.set_default_mode(rpy.BASIC_CONVERSION) rsum = r.summary(results) self.rsum = rsum # Provide compatible interface with scipy models self.results = results.as_py() # coeffs = self.results['coefficients'] # self.beta0 = np.array([coeffs[c] for c in self._design_cols]) self.nobs = len(self.results['residuals']) if isinstance(self.results['residuals'], dict): self.resid = np.zeros((len(self.results['residuals'].keys()))) for i in self.results['residuals'].keys(): self.resid[int(i) - 1] = self.results['residuals'][i] else: self.resid = self.results['residuals'] self.fittedvalues = self.results['fitted.values'] self.df_resid = self.results['df.residual'] self.params = rsum['coefficients'][:, 0] self.bse = rsum['coefficients'][:, 1] self.bt = rsum['coefficients'][:, 2] try: self.pvalues = rsum['coefficients'][:, 3] except: pass self.rsquared = rsum.setdefault('r.squared', None) self.rsquared_adj = rsum.setdefault('adj.r.squared', None) self.aic_R = rsum.setdefault('aic', None) self.fvalue = rsum.setdefault('fstatistic', None) if self.fvalue and isinstance(self.fvalue, dict): self.fvalue = self.fvalue.setdefault('value', None) # for wls df = rsum.setdefault('df', None) if df: # for RLM, works for other models? self.df_model = df[0] - 1 # R counts intercept self.df_resid = df[1] self.bcov_unscaled = rsum.setdefault('cov.unscaled', None) self.bcov = rsum.setdefault('cov.scaled', None) if 'sigma' in rsum: self.scale = rsum['sigma'] elif 'dispersion' in rsum: self.scale = rsum['dispersion'] else: self.scale = None self.llf = r.logLik(results) if model_type == r.glm: self.getglm() if model_type == r.rlm: self.getrlm()
def __init__(self, y, design, model_type=r.lm, **kwds): """ Set up and estimate R model with data and design """ r.library("MASS") # still needs to be in test, but also here for # logical tests at the end not to show an error self.y = np.array(y) self.design = np.array(design) self.model_type = model_type self._design_cols = ["x.%d" % (i + 1) for i in range(self.design.shape[1])] # Note the '-1' for no intercept - this is included in the design self.formula = r("y ~ %s-1" % "+".join(self._design_cols)) self.frame = r.data_frame(y=y, x=self.design) rpy.set_default_mode(rpy.NO_CONVERSION) results = self.model_type(self.formula, data=self.frame, **kwds) self.robj = results # keep the Robj model so it can be # used in the tests rpy.set_default_mode(rpy.BASIC_CONVERSION) rsum = r.summary(results) self.rsum = rsum # Provide compatible interface with scipy models self.results = results.as_py() # coeffs = self.results['coefficients'] # self.beta0 = np.array([coeffs[c] for c in self._design_cols]) self.nobs = len(self.results["residuals"]) if isinstance(self.results["residuals"], dict): self.resid = np.zeros((len(list(self.results["residuals"].keys())))) for i in list(self.results["residuals"].keys()): self.resid[int(i) - 1] = self.results["residuals"][i] else: self.resid = self.results["residuals"] self.fittedvalues = self.results["fitted.values"] self.df_resid = self.results["df.residual"] self.params = rsum["coefficients"][:, 0] self.bse = rsum["coefficients"][:, 1] self.bt = rsum["coefficients"][:, 2] try: self.pvalues = rsum["coefficients"][:, 3] except: pass self.rsquared = rsum.setdefault("r.squared", None) self.rsquared_adj = rsum.setdefault("adj.r.squared", None) self.aic_R = rsum.setdefault("aic", None) self.fvalue = rsum.setdefault("fstatistic", None) if self.fvalue and isinstance(self.fvalue, dict): self.fvalue = self.fvalue.setdefault("value", None) # for wls df = rsum.setdefault("df", None) if df: # for RLM, works for other models? self.df_model = df[0] - 1 # R counts intercept self.df_resid = df[1] self.bcov_unscaled = rsum.setdefault("cov.unscaled", None) self.bcov = rsum.setdefault("cov.scaled", None) if "sigma" in rsum: self.scale = rsum["sigma"] elif "dispersion" in rsum: self.scale = rsum["dispersion"] else: self.scale = None self.llf = r.logLik(results) if model_type == r.glm: self.getglm() if model_type == r.rlm: self.getrlm()
aa=np.genfromtxt(r'gspc_table.csv',skiprows=0, delimiter=',',names=True) cl = aa['Close'] ret = np.diff(np.log(cl))[-2000:]*1000. ggmod = Garch(ret - ret.mean())#hgjr4[:nobs])#-hgjr4.mean()) #errgjr4) ggmod.nar = 1 ggmod.nma = 1 ggmod._start_params = np.array([-0.1, 0.1, 0.1, 0.1]) ggres = ggmod.fit(start_params=np.array([-0.1, 0.1, 0.1, 0.0]), maxiter=1000,method='bfgs') print 'ggres.params', ggres.params garchplot(ggmod.errorsest, ggmod.h, title='Garch estimated') from rpy import r r.library('fGarch') f = r.formula('~garch(1, 1)') fit = r.garchFit(f, data = ret - ret.mean(), include_mean=False) f = r.formula('~arma(1,1) + ~garch(1, 1)') fit = r.garchFit(f, data = ret) ggmod0 = Garch0(ret - ret.mean())#hgjr4[:nobs])#-hgjr4.mean()) #errgjr4) ggmod0.nar = 1 ggmod.nma = 1 start_params = np.array([-0.1, 0.1, ret.var()]) ggmod0._start_params = start_params #np.array([-0.6, 0.1, 0.2, 0.0]) ggres0 = ggmod0.fit(start_params=start_params, maxiter=2000) print 'ggres0.params', ggres0.params g11res = optimize.fmin(lambda params: -loglike_GARCH11(params, ret - ret.mean())[0], [0.01, 0.1, 0.1])
from util import * import os import colors from rpy import r import plot_utilities import copy import re import time r.library("RColorBrewer") RESULT_FILENAME = '/home/twalter/data/JoanaDruggableResult.csv' HTML_DIR = '/netshare/mitofiler/Thomas/Joana_HTML/html' IMAGE_BASE_DIR = '/netshare/mitofiler/Thomas/Joana' #PICKLERESULTDIR = '/netshare/mitofiler/Thomas/Joana_HTML' LOCAL_PLOT_DIR = os.path.join(HTML_DIR, 'plots') NB_ROW = 32 NB_COL = 12 def get_color_brewer_pattern(brew_str): hex_pattern = r.brewer_pal(9, brew_str) #def hex2int(hexStr): return([int(hexStr[1:3], 16), int(hexStr[3:5], 16), int(hexStr[5:], 16)]) pattern = [(int(hexStr[1:3], 16) / 255.0, int(hexStr[3:5], 16) / 255.0, int(hexStr[5:], 16) / 255.0) for hexStr in hex_pattern] return pattern def convert_numeric_table_to_res(filename=RESULT_FILENAME): tabD = readTableFromFile(filename, sep=';', header=True) lt_idL = ['%s_%s' % (x[2:], y) for x,y in zip(tabD['Labtek'], tabD['Date']) ] idL = ['%s--%03i' % (x, int(y)) for x, y in zip(lt_idL, tabD['Spot'])]
__use_cython__ = False __use_weave__ = True if __use_weave__: try: from scipy import weave weave.inline('std::cout << "weave works!" << std::endl;') except: print "WARNING: Coul not load weave" __use_weave__ = False __use_R__ = True if __use_R__: try: from rpy import r as R R.library("CircStats") except: print "WARNING: Could not load R-project" __use_R__ = False # # load (and reload) modules # modules = ['model', 'utils', 'circstats', 'plotlib', 'f_energy'] for name in modules: mod = __import__(name, globals(), locals(), []) # reload modules (useful during development) reload(mod) # from model import * # from circstats import *
return data, is_correct_list known_fname = '/tmp/hs_fim_92m5x25bfsdfl10q0_7gf1.known' unknown_fname = '/tmp/hs_fim_92m5x25bfsdfl10q0_7gf1.unknown' known_data, known_is_correct_list = read_data(known_fname) unknown_data, unknown_is_correct_list = read_data(unknown_fname) from numarray import array from rpy import r, set_default_mode,NO_CONVERSION,BASIC_CONVERSION set_default_mode(NO_CONVERSION) #pack data into data_frame known_data = array(known_data) known_data_frame = r.as_data_frame({"p_value":known_data[:,0], "recurrence":known_data[:,1], "connectivity":known_data[:,2], \ "cluster_size":known_data[:,3], "gradient":known_data[:,4]}) unknown_data = array(unknown_data) unknown_data_frame = r.as_data_frame({"p_value":unknown_data[:,0], "recurrence":unknown_data[:,1], "connectivity":unknown_data[:,2], \ "cluster_size":unknown_data[:,3], "gradient":unknown_data[:,4]}) #start to call randomF.r to run randomForest r.library('randomForest') r.source('randomF.r') #rf_model still needs to be in pure R object rf_model = r.randomF(known_data_frame, known_data[:,-1]) set_default_mode(BASIC_CONVERSION) unknown_pred = r.predictRandomF(rf_model, unknown_data_frame) rf_model= rf_model.as_py(BASIC_CONVERSION) print rf_model.keys() print rf_model['confusion']
def krige_to_grid(grid_fname, obs_x, obs_y, obs_data, vgm_par): """Interpolate point data onto a grid using Kriging. Interpolate point data onto a regular rectangular grid of square cells using Kriging with a predefined semi-variogram. The observed data locations must be specified in the same projection and coordinate system as the grid, which is defined in an ArcGIS raster file. Parameters ---------- grid_fname : string Filename of an ArcGIS float grid raster defining the required grid to Krige onto. All cells are included regardless of their value. obs_x : array_like The x coordinates of the observation locations. obs_y : array_like The y coordinates of the observation locations. obs_data : array_like The data values at the observation locations. vgm : dict A dictionary describing the semi-variogram model. Required keys are: 'model' can be one of {'Lin', 'Exp', 'Sph', 'Gau'} 'nugget' must be a scalar 'range' must be a scalar 'sill' must be a scalar Returns ------- kriged_est : 2darray A 2D array containing the Kriged estimates at each point on the specified rectangular grid. Notes ----- This function requires that R, RPy and the R gstat library are correctly installed. """ grid, headers = arcfltgrid.read(grid_fname) cols = headers[0] rows = headers[1] x0 = headers[2] y0 = headers[3] cell_size = headers[4] # TO DO: adjust x0, y0 by 0.5*cell_size if llcorner.. # define the grid (pixel centre's) xt, yt = np.meshgrid(np.linspace(x0, x0 + (cols-1)*cell_size, num=cols), np.linspace(y0 + (rows-1)*cell_size, y0, num=rows)) xt = xt.flatten() yt = yt.flatten() # Krige using gstat via RPy r.library('gstat') rpy.set_default_mode(rpy.NO_CONVERSION) obs_frame = r.data_frame(x=obs_x, y=obs_y, data=obs_data) target_grid = r.data_frame(x=xt, y=yt) v = r.vgm(vgm_par['sill'], vgm_par['model'], vgm_par['range'], vgm_par['nugget']) result = r.krige(r('data ~ 1'), r('~ x + y'), obs_frame, target_grid, model=v) rpy.set_default_mode(rpy.BASIC_CONVERSION) result = result.as_py() kriged_est = np.array(result['var1.pred']) kriged_est = kriged_est.reshape(rows, cols) return kriged_est
for i in range(n): for j in range(m): D[i, j] = norm(SS1[:, i] - SS2[:, j]) return D if __name__ == "__main__": import os from dlab import pcmio, labelio from pylab import figure, cm, show from rpy import r r.library("fpc") r.library("MASS") # FFT parameters window = 10.0 # ms shift = 2.0 padding = 5 # number of frames to use for padding the spectrogram; these are cut off later # "standard" DTW cost matrix: costs = [[1, 1, 1], [1, 0, 1], [0, 1, 1]] # tends to produce smoother paths: costs = [[1, 1, 1], [1, 0, 1], [0, 1, 1], [1, 2, 2], [2, 1, 2]] # prevents more than one frame from being omitted from either signal costs = [[1, 1, 1], [1, 2, 2], [2, 1, 2]] # example data
from plugin import Projections from rpy import r from numpy import dot, array import wx r.library("fastICA") class Ica(Projections): name = "Ica" def Main(self,model): # self.model = model data = array(model.GetCurrentData()[:]) k = wx.GetNumberFromUser("ICA Dialog", "Enter number of independent components", "k", 1) ica_data = r.fastICA(data, k, alg_typ = "deflation", fun = "logcosh", alpha = 1, method = "R", row_norm = 0, maxit = 200, tol = 0.0001, verbose = 1) fields = ['Comp%02d' % c for c in range(1, k+1)] model.updateHDF('ICA', ica_data['S'], fields=fields)
def find_group_DW(tree, dist_matrix, mes=0, seed_max=0, l_significance=0.1): ''' For more details, see the Kim et al. 2008. tree : Tree structure returned from Pycluster module. dist_matrix : Distance matrix (= 1. - correlation matrix) mes = 0 : Total number of measurement of each light curve. seed_max = 0 : To get more tighter seed. 1 ~ 10 are good values. '10' gets more tighter clusters than '1'. return : List of clusters. ''' r.library('nortest') clusters = [] #print tree, len(tree) density_list = [] for i in range(len(dist_matrix) - 1): for j in range(i + 1, len(dist_matrix)): density_list.append(dist_matrix[i][j]) density_list_clip = sigma_clipping(density_list, sigma=3.) overall_density = (max(density_list_clip) - min(density_list_clip)) / len(dist_matrix) #print overall_density, mean(density_list_clip), std(density_list_clip) #get highly correlated pair of elements. initial_seed = [] for i in range(len(tree)): #both left and right element has to be star. not a link to other cluster. if tree[i].left >= 0 and tree[i].right >= 0: #to get more tight elements. if dist_matrix[tree[i].left][ tree[i].right] <= median(density_list_clip) / seed_max: if mes == 0: initial_seed.append(i) elif dist_matrix[tree[i].left][tree[i].right] <= ( 1. - 3. / math.sqrt(mes)): initial_seed.append(i) #print initial_seed #start from highly correlated initial pair. for i in initial_seed: #print tree[i] current_node = i while current_node < len(tree) - 1: cluster_1 = [] cluster_2 = [] #find base cluster --> cluster_1 simplify_group(find_group_with_node_index(tree, current_node), cluster_1) #find cluster which will be merged --> cluster_2 dummy = find_one_side_group(tree, (current_node + 1) * -1) current_node = dummy[0] simplify_group(dummy[1], cluster_2) #check the density changes with overall density #initial density d_1 = [] for ele_i in range(len(cluster_1) - 1): for ele_j in range(ele_i + 1, len(cluster_1)): if ele_i != ele_j: d_1.append( dist_matrix[cluster_1[ele_i]][cluster_1[ele_j]]) #density after merged d_merge = [] cluster_3 = hstack([cluster_1, cluster_2]) for ele_i in range(len(cluster_3) - 1): for ele_j in range(ele_i + 1, len(cluster_3)): if ele_i != ele_j: d_merge.append( dist_matrix[cluster_3[ele_i]][cluster_3[ele_j]]) d_1 = array(d_1) d_merge = array(d_merge) if len(d_merge) < 8: continue else: #the resulting clusters are almost identical. not use anymore. #d_merge = array(d_merge) #d_merge = .5 * log((1. + d_merge) / (1. - d_merge)) ad = r.ad_test(d_merge) ad_p = ad['p.value'] p_value = ad_p #check the level of significance #if it's out of normality, the previous cluster is the final cluster. if p_value < l_significance: #becausd AD test needs at least 8 elements. if len(cluster_1) >= 5: #print cluster_1 clusters.append(cluster_1) break #it's still gaussian, but if there comes outliers into clusters, stop it. #the resulting clusters are almost identical. not use anymore. #elif len(d_1[where(d_1 > mean(density_list_clip))]) > 0: # if len(cluster_1) >= 5: # clusters.append(cluster_1) # break return clusters
__use_cython__ = False __use_weave__ = True if __use_weave__: try: from scipy import weave weave.inline('printf("weave works!\n"') except: print "WARNING: Could not load weave" __use_weave__ = False __use_R__ = True if __use_R__: try: from rpy import r as R R.library("CircStats") except: print "WARNING: Could not load R-project" __use_R__ = False # # load (and reload) modules # modules = ['model','utils','circstats','plotlib','f_energy'] for name in modules: mod = __import__(name,globals(),locals(),[]) # reload modules (useful during development) reload(mod) # from model import * # from circstats import *
SS1 = ifft(nx.log10(nx.sqrt(S1)), axis=0) SS2 = ifft(nx.log10(nx.sqrt(S2)), axis=0) for i in range(n): for j in range(m): D[i,j] = norm(SS1[:,i] - SS2[:,j]) return D if __name__=="__main__": import os from dlab import pcmio, labelio from pylab import figure, cm, show from rpy import r r.library('fpc') r.library('MASS') # FFT parameters window = 10. # ms shift = 2. padding = 5 # number of frames to use for padding the spectrogram; these are cut off later # "standard" DTW cost matrix: costs = [[1,1,1],[1,0,1],[0,1,1]] # tends to produce smoother paths: costs = [[1,1,1],[1,0,1],[0,1,1],[1,2,2],[2,1,2]] # prevents more than one frame from being omitted from either signal costs = [[1,1,1],[1,2,2],[2,1,2]] # example data
ggmod.nma = 1 start_params = np.array([-0.6, 0.2, 0.1]) ggmod0._start_params = start_params #np.array([-0.6, 0.1, 0.2, 0.0]) ggres0 = ggmod0.fit(start_params=start_params, method='bfgs', maxiter=2000) print 'ggres0.params', ggres0.params g11res = optimize.fmin( lambda params: -loglike_GARCH11(params, errgjr4 - errgjr4.mean())[0], [0.93, 0.9, 0.2]) print g11res llf = loglike_GARCH11(g11res, errgjr4 - errgjr4.mean()) print llf[0] if 'rpyfit' in examples: from rpy import r r.library('fGarch') f = r.formula('~garch(1, 1)') fit = r.garchFit(f, data=errgjr4 - errgjr4.mean(), include_mean=False) if 'rpysim' in examples: from rpy import r f = r.formula('~garch(1, 1)') #fit = r.garchFit(f, data = errgjr4) x = r.garchSim(n=500) print 'R acf', tsa.acf(np.power(x, 2))[:15] arma3 = Arma(np.power(x, 2)) arma3res = arma3.fit(start_params=[-0.2, 0.1, 0.5], maxiter=5000) print arma3res.params arma3b = Arma(np.power(x, 2)) arma3bres = arma3b.fit(start_params=[-0.2, 0.1, 0.5], maxiter=5000,
def krige_to_grid(grid_fname, obs_x, obs_y, obs_data, vgm_par): """Interpolate point data onto a grid using Kriging. Interpolate point data onto a regular rectangular grid of square cells using Kriging with a predefined semi-variogram. The observed data locations must be specified in the same projection and coordinate system as the grid, which is defined in an ArcGIS raster file. Parameters ---------- grid_fname : string Filename of an ArcGIS float grid raster defining the required grid to Krige onto. All cells are included regardless of their value. obs_x : array_like The x coordinates of the observation locations. obs_y : array_like The y coordinates of the observation locations. obs_data : array_like The data values at the observation locations. vgm : dict A dictionary describing the semi-variogram model. Required keys are: 'model' can be one of {'Lin', 'Exp', 'Sph', 'Gau'} 'nugget' must be a scalar 'range' must be a scalar 'sill' must be a scalar Returns ------- kriged_est : 2darray A 2D array containing the Kriged estimates at each point on the specified rectangular grid. Notes ----- This function requires that R, RPy and the R gstat library are correctly installed. """ grid, headers = arcfltgrid.read(grid_fname) cols = headers[0] rows = headers[1] x0 = headers[2] y0 = headers[3] cell_size = headers[4] # TO DO: adjust x0, y0 by 0.5*cell_size if llcorner.. # define the grid (pixel centre's) xt, yt = np.meshgrid( np.linspace(x0, x0 + (cols - 1) * cell_size, num=cols), np.linspace(y0 + (rows - 1) * cell_size, y0, num=rows)) xt = xt.flatten() yt = yt.flatten() # Krige using gstat via RPy r.library('gstat') rpy.set_default_mode(rpy.NO_CONVERSION) obs_frame = r.data_frame(x=obs_x, y=obs_y, data=obs_data) target_grid = r.data_frame(x=xt, y=yt) v = r.vgm(vgm_par['sill'], vgm_par['model'], vgm_par['range'], vgm_par['nugget']) result = r.krige(r('data ~ 1'), r('~ x + y'), obs_frame, target_grid, model=v) rpy.set_default_mode(rpy.BASIC_CONVERSION) result = result.as_py() kriged_est = np.array(result['var1.pred']) kriged_est = kriged_est.reshape(rows, cols) return kriged_est