def regression(data): """Calls R's lm to make a linear regression on each of its inputs.""" reg = r.lm(r('x ~ y'), data = r.data_frame(x=data[:,0], y=data[:,1]) )['coefficients'] return reg
def calibrate(self): """ Performs a calibration based on the available datapoints. """ from rpy import r if len(self.pts) < 2: return False in_x = [] in_y = [] in_z = [] out_x = [] out_y = [] out_z = [] # Index all points so they can be fed into the R multiple linear regression for in_pt, out_pt in self.pts: in_x.append(in_pt[0]) in_y.append(in_pt[1]) in_z.append(in_pt[2]) out_x.append(out_pt[0]) out_y.append(out_pt[1]) out_z.append(out_pt[2]) # Perform the regression analysis fx = r.lm(r("x ~ a + b + c"), data = r.data_frame(a=in_x, b=in_y, c=in_z, x=out_x))["coefficients"] fy = r.lm(r("y ~ a + b + c"), data = r.data_frame(a=in_x, b=in_y, c=in_z, y=out_y))["coefficients"] fz = r.lm(r("z ~ a + b + c"), data = r.data_frame(a=in_x, b=in_y, c=in_z, z=out_z))["coefficients"] self.fx = fx["(Intercept)"], fx["a"], fx["b"], fx["c"] self.fy = fy["(Intercept)"], fy["a"], fy["b"], fy["c"] self.fz = fz["(Intercept)"], fz["a"], fz["b"], fz["c"] self.calibrated = True return True
def LinearRegression_lm(ls1,ls2,return_rsqrd): intercept = 0 ### when forced through the origin from rpy import r d = r.data_frame(x=ls1, y=ls2) model = r("y ~ x - 1") ### when not forced through the origin it is r("y ~ x") fitted_model = r.lm(model, data = d) slope = fitted_model['coefficients']['x'] #intercept = fitted_model['coefficients']['(Intercept)'] if return_rsqrd == 'yes': from scipy import stats rsqrd = math.pow(stats.linregress(ls1,ls2)[2],2) return slope,rsqrd else: return slope
def funcion(dato,variable,caso,opciones): # Cambiar cosa por caso from rpy import r #pylint: disable=import-error variable1 = variable[0] variable2 = variable[1] lista1=dato.query(variable1,caso) lista2=dato.query(variable2,caso) #lista2=[float(x) for x in dato.getCol(variable2,caso=caso)] resultadoprueba=r.lm(r("y ~ x"),data=r.data_frame(x=lista1, y=lista2)) sumario=r.summary_lm(resultadoprueba,True) anova=r.anova_lm(resultadoprueba) #resultadoprueba=r.lsfit(lista1,lista2) midiccionario={"resultado":resultadoprueba,"sumario":sumario,"anova":anova} return midiccionario
def __init__(self, y, design, model_type=r.lm): """ Set up and estimate R model with data and design """ self.y = y self.design = design self.model_type = model_type self._design_cols = ["x.%d" % (i + 1) for i in range(self.design.shape[1])] # Note the '-1' for no intercept - this is included in the design self.formula = r("y ~ %s-1" % "+".join(self._design_cols)) self.frame = r.data_frame(y=y, x=self.design) self.results = self.model_type(self.formula, data=self.frame) # Provide compatible interface with scipy models coeffs = self.results["coefficients"] self.beta = np.array([coeffs[c] for c in self._design_cols]) self.resid = self.results["residuals"] self.predict = self.results["fitted.values"] self.df_resid = self.results["df.residual"]
def LinearRegression(ls1,ls2,return_rsqrd): intercept = 0 ### when forced through the origin from rpy import r r.library('MASS') k = r.options(warn=-1) ### suppress all warning messages from R #print ls1; print ls2 d = r.data_frame(x=ls1, y=ls2) model = r("y ~ x - 1") ### when not forced through the origin it is r("y ~ x") fitted_model = r.rlm(model, data = d) ###errors: rlm failed to converge in 20 steps - maxit=21 slope = fitted_model['coefficients']['x'] #intercept = fitted_model['coefficients']['(Intercept)'] if return_rsqrd == 'yes': from scipy import stats rsqrd = math.pow(stats.linregress(ls1,ls2)[2],2) return slope,rsqrd else: return slope
def check_R(model,g): import rpy from rpy import r from numpy import array,allclose vars = [ v.replace(':','.').replace('+','p').replace('-','m').replace('_','.') for v in model.vars[1:] ] frame = dict( (v,model.X[:,i+1].reshape(-1)) for i,v in enumerate(vars) ) frame['y'] = model.y.reshape(-1) formula = 'y ~ ' + ' + '.join(v.replace(':','.') for v in vars) rpy.set_default_mode(rpy.NO_CONVERSION) mod = r.glm(r(formula),data=r.data_frame(**frame),family=r.binomial('logit')) rpy.set_default_mode(rpy.BASIC_CONVERSION) pmod = mod.as_py() coef = r.coefficients(mod) coef = array([coef['(Intercept)']] + [ coef[v] for v in vars ],dtype=float) coef2 = g.beta.reshape(-1)
def fitPoly(xarray, yarray, order): r.lm.local_mode(rpy.NO_CONVERSION) xl=list(xarray) yl=list(yarray) modelDef = "y ~ poly(x,%d)" % order model=r.lm(r(modelDef), data=r.data_frame(x=xl,y=yl)) pred=r.predict(model) # pred is now a dict with keys from '1' to 'N', where N is the size of xl predvals = [] for i in range(len(xl)): predvals.append(pred[str(i+1)]) return(xl, predvals)
def krige_to_grid(grid_fname, obs_x, obs_y, obs_data, vgm_par): """Interpolate point data onto a grid using Kriging. Interpolate point data onto a regular rectangular grid of square cells using Kriging with a predefined semi-variogram. The observed data locations must be specified in the same projection and coordinate system as the grid, which is defined in an ArcGIS raster file. Parameters ---------- grid_fname : string Filename of an ArcGIS float grid raster defining the required grid to Krige onto. All cells are included regardless of their value. obs_x : array_like The x coordinates of the observation locations. obs_y : array_like The y coordinates of the observation locations. obs_data : array_like The data values at the observation locations. vgm : dict A dictionary describing the semi-variogram model. Required keys are: 'model' can be one of {'Lin', 'Exp', 'Sph', 'Gau'} 'nugget' must be a scalar 'range' must be a scalar 'sill' must be a scalar Returns ------- kriged_est : 2darray A 2D array containing the Kriged estimates at each point on the specified rectangular grid. Notes ----- This function requires that R, RPy and the R gstat library are correctly installed. """ grid, headers = arcfltgrid.read(grid_fname) cols = headers[0] rows = headers[1] x0 = headers[2] y0 = headers[3] cell_size = headers[4] # TO DO: adjust x0, y0 by 0.5*cell_size if llcorner.. # define the grid (pixel centre's) xt, yt = np.meshgrid( np.linspace(x0, x0 + (cols - 1) * cell_size, num=cols), np.linspace(y0 + (rows - 1) * cell_size, y0, num=rows)) xt = xt.flatten() yt = yt.flatten() # Krige using gstat via RPy r.library('gstat') rpy.set_default_mode(rpy.NO_CONVERSION) obs_frame = r.data_frame(x=obs_x, y=obs_y, data=obs_data) target_grid = r.data_frame(x=xt, y=yt) v = r.vgm(vgm_par['sill'], vgm_par['model'], vgm_par['range'], vgm_par['nugget']) result = r.krige(r('data ~ 1'), r('~ x + y'), obs_frame, target_grid, model=v) rpy.set_default_mode(rpy.BASIC_CONVERSION) result = result.as_py() kriged_est = np.array(result['var1.pred']) kriged_est = kriged_est.reshape(rows, cols) return kriged_est
There are also R scripts included with most of the datasets to run some basic models for comparisons of results to statsmodels. ''' from rpy import r import numpy as np import scikits.statsmodels.api as sm examples = [1, 2] if 1 in examples: data = sm.datasets.longley.load() y, x = data.endog, sm.add_constant(data.exog) des_cols = ['x.%d' % (i + 1) for i in range(x.shape[1])] formula = r('y~%s-1' % '+'.join(des_cols)) frame = r.data_frame(y=y, x=x) results = r.lm(formula, data=frame) print results.keys() print results['coefficients'] if 2 in examples: data2 = sm.datasets.star98.load() y2, x2 = data2.endog, sm.add_constant(data2.exog) import rpy y2 = y2[:, 0] / y2.sum(axis=1) des_cols2 = ['x.%d' % (i + 1) for i in range(x2.shape[1])] formula2 = r('y~%s-1' % '+'.join(des_cols2)) frame2 = r.data_frame(y=y2, x=x2) results2 = r.glm(formula2, data=frame2, family='binomial') params_est = [ results2['coefficients'][k] for k in sorted(results2['coefficients'])
def __init__(self, y, design, model_type=r.lm, **kwds): """ Set up and estimate R model with data and design """ r.library("MASS") # still needs to be in test, but also here for # logical tests at the end not to show an error self.y = np.array(y) self.design = np.array(design) self.model_type = model_type self._design_cols = ["x.%d" % (i + 1) for i in range(self.design.shape[1])] # Note the '-1' for no intercept - this is included in the design self.formula = r("y ~ %s-1" % "+".join(self._design_cols)) self.frame = r.data_frame(y=y, x=self.design) rpy.set_default_mode(rpy.NO_CONVERSION) results = self.model_type(self.formula, data=self.frame, **kwds) self.robj = results # keep the Robj model so it can be # used in the tests rpy.set_default_mode(rpy.BASIC_CONVERSION) rsum = r.summary(results) self.rsum = rsum # Provide compatible interface with scipy models self.results = results.as_py() # coeffs = self.results['coefficients'] # self.beta0 = np.array([coeffs[c] for c in self._design_cols]) self.nobs = len(self.results["residuals"]) if isinstance(self.results["residuals"], dict): self.resid = np.zeros((len(list(self.results["residuals"].keys())))) for i in list(self.results["residuals"].keys()): self.resid[int(i) - 1] = self.results["residuals"][i] else: self.resid = self.results["residuals"] self.fittedvalues = self.results["fitted.values"] self.df_resid = self.results["df.residual"] self.params = rsum["coefficients"][:, 0] self.bse = rsum["coefficients"][:, 1] self.bt = rsum["coefficients"][:, 2] try: self.pvalues = rsum["coefficients"][:, 3] except: pass self.rsquared = rsum.setdefault("r.squared", None) self.rsquared_adj = rsum.setdefault("adj.r.squared", None) self.aic_R = rsum.setdefault("aic", None) self.fvalue = rsum.setdefault("fstatistic", None) if self.fvalue and isinstance(self.fvalue, dict): self.fvalue = self.fvalue.setdefault("value", None) # for wls df = rsum.setdefault("df", None) if df: # for RLM, works for other models? self.df_model = df[0] - 1 # R counts intercept self.df_resid = df[1] self.bcov_unscaled = rsum.setdefault("cov.unscaled", None) self.bcov = rsum.setdefault("cov.scaled", None) if "sigma" in rsum: self.scale = rsum["sigma"] elif "dispersion" in rsum: self.scale = rsum["dispersion"] else: self.scale = None self.llf = r.logLik(results) if model_type == r.glm: self.getglm() if model_type == r.rlm: self.getrlm()
for id in py_id: x1 = poly_x_vals[i,0] x2 = poly_x_vals[i,1] y1 = poly_y_vals[i,0] y2 = poly_y_vals[i,1] xy = poly_xy_vals[i] if poly_values: poly_values = poly_values + "," poly_values += "(%s, %f, %f, %f, %f, %f)" % (id, x1, x2, y1, y2, xy) i = i+1 query = query + poly_values # print query c.execute(query) model = r.lm(r("delta ~ poly(x, 2) + poly(y, 2) + poly(x*y, 1)"), data=r.data_frame(x=py_x, y=py_y, delta=py_delta), weights=py_wt) model_summary = r.summary(model) model_coeff = array(model_summary['coefficients']) if not model_coeff.shape == (6,4): print "Bad model for %s" % exp continue c0 = model_coeff[0][0] c0_sigma = model_coeff[0][1] cx1 = model_coeff[1][0] cx1_sigma = model_coeff[1][1] cx2 = model_coeff[2][0] cx2_sigma = model_coeff[2][1] cy1 = model_coeff[3][0] cy1_sigma = model_coeff[3][1] cy2 = model_coeff[4][0]
some basic models for comparisons of results to statsmodels. ''' from rpy import r import numpy as np import scikits.statsmodels.api as sm examples = [1, 2] if 1 in examples: data = sm.datasets.longley.load() y,x = data.endog, sm.add_constant(data.exog) des_cols = ['x.%d' % (i+1) for i in range(x.shape[1])] formula = r('y~%s-1' % '+'.join(des_cols)) frame = r.data_frame(y=y, x=x) results = r.lm(formula, data=frame) print results.keys() print results['coefficients'] if 2 in examples: data2 = sm.datasets.star98.load() y2,x2 = data2.endog, sm.add_constant(data2.exog) import rpy y2 = y2[:,0]/y2.sum(axis=1) des_cols2 = ['x.%d' % (i+1) for i in range(x2.shape[1])] formula2 = r('y~%s-1' % '+'.join(des_cols2)) frame2 = r.data_frame(y=y2, x=x2) results2 = r.glm(formula2, data=frame2, family='binomial') params_est = [results2['coefficients'][k] for k in sorted(results2['coefficients'])]
# phylip value phy_r = result.mCorrelations[x][y] import rpy from rpy import r as R ## Various ways to calculate r. It is not possible to use ## cor.test or lsfit directly, as you have to perform a ## regression through the origin. ## uncomment to check pearson r against phylip's value ## r = calculateCorrelationCoefficient( columns[x], columns[y] ) ## for significance, use linear regression models in R rpy.set_default_mode(rpy.NO_CONVERSION) linear_model = R.lm(R("y ~ x - 1"), data = R.data_frame(x=columns[x], y=columns[y])) rpy.set_default_mode(rpy.BASIC_CONVERSION) ss = R.summary(linear_model) ## extract the p-value p = ss['coefficients'][-1][-1] if p < 0.001: code = "***" elif p < 0.01: code = "**" elif p < 0.05: code = "*" else: code = ""
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option("-c", "--columns", dest="columns", type="string", help="columns to take for calculating histograms.") parser.add_option("-t", "--tree-nh-file", dest="filename_tree", type="string", help="filename with tree(s).") parser.add_option("--skip-header", dest="add_header", action="store_false", help="do not add header to flat format.") parser.add_option("--output-with-header", dest="write_header", action="store_true", help="write header and exit.") parser.add_option("--debug", dest="debug", action="store_true", help="debug mode") parser.add_option("--display-tree", dest="display_tree", action="store_true", help="display the tree") parser.add_option("-m", "--method", dest="methods", type="choice", action="append", choices=("contrasts", "spearman", "pearson", "compute"), help="methods to perform on contrasts.") parser.set_defaults( columns="all", filename_tree=None, add_header=True, write_header=False, debug=False, methods=[], value_format="%6.4f", pvalue_format="%e", display_tree=False, ) (options, args) = E.Start(parser, quiet=True) if options.columns not in ("all", "all-but-first"): options.columns = map(lambda x: int(x) - 1, options.columns.split(",")) phylip = WrapperPhylip.Phylip() if options.debug: phylip.setLogLevel(options.loglevel) phylip.setProgram("contrast") ########################################################## ########################################################## ########################################################## # retrieve data and give to phylip data = [] headers = [] first = True for line in sys.stdin: if line[0] == "#": continue d = line[:-1].strip().split("\t") if first: first = False headers = d[1:] continue data.append(d) phylip.setData(data) ncolumns = len(headers) nrows = len(data) ########################################################## ########################################################## ########################################################## # read trees nexus = None if options.filename_tree: nexus = TreeTools.Newick2Nexus(open(options.filename_tree, "r")) if not nexus: raise ValueError("please provide trees with branchlenghts") ########################################################## ########################################################## ########################################################## # set up phylip phylip_options = [] # print out contrasts phylip_options.append("C") phylip_options.append("Y") phylip.setOptions(phylip_options) ########################################################## ########################################################## ########################################################## # main loop ########################################################## for tree in nexus.trees: if options.display_tree: tree.display() # compute this before giving the tree to the phylip module, # as it remaps taxon names. map_node2data = {} for x in range(nrows): taxon = data[x][0] map_node2data[tree.search_taxon(taxon)] = x phylip.setTree(tree) result = phylip.run() for method in options.methods: if method in ("pearson", "spearman"): options.stdout.write("header1\theader2\tr\tp\tcode\n") # n = len(result.mContrasts) columns = [] for c in range(ncolumns): columns.append(map(lambda x: x[c], result.mContrasts)) for x in range(0, ncolumns - 1): for y in range(x + 1, ncolumns): # phylip value phy_r = result.mCorrelations[x][y] import rpy from rpy import r as R # Various ways to calculate r. It is not # possible to use cor.test or lsfit directly, # as you have to perform a regression through # the origin. # uncomment to check pearson r against # phylip's value r = # calculateCorrelationCoefficient(columns[x], # columns[y]) # for significance, use linear regression models in R rpy.set_default_mode(rpy.NO_CONVERSION) linear_model = R.lm( R("y ~ x - 1"), data=R.data_frame(x=columns[x], y=columns[y])) rpy.set_default_mode(rpy.BASIC_CONVERSION) ss = R.summary(linear_model) # extract the p-value p = ss['coefficients'][-1][-1] if p < 0.001: code = "***" elif p < 0.01: code = "**" elif p < 0.05: code = "*" else: code = "" options.stdout.write("\t".join( (headers[x], headers[y], options.value_format % phy_r, options.pvalue_format % p, code)) + "\n") elif method == "contrasts": options.stdout.write("\t".join(headers) + "\n") for d in result.mContrasts: options.stdout.write( "\t".join( map(lambda x: options.value_format % x, d)) + "\n") elif method == "compute": # make room for all internal nodes and one dummy node # for unrooted trees. max_index = TreeTools.GetMaxIndex(tree) + 2 variances = [None] * max_index values = [[None] * nrows for x in range(max_index)] contrasts = [] for x in range(max_index): contrasts.append([None] * ncolumns) branchlengths = [None] * max_index def update_data(node_id, bl, c1, c2, ): b1, b2 = branchlengths[c1], branchlengths[c2] rb1 = 1.0 / b1 rb2 = 1.0 / b2 # compute variance variance = math.sqrt(b1 + b2) # extend branch length of this node to create correct # variance for parent branchlengths[node_id] = bl + (b1 * b2) / (b1 + b2) variances[node_id] = variance for c in range(ncolumns): v1, v2 = values[c1][c], values[c2][c] # save ancestral value as weighted mean values[node_id][c] = ( (rb1 * v1 + rb2 * v2)) / (rb1 + rb2) # compute normalized contrast contrasts[node_id][c] = (v1 - v2) / variance def update_contrasts(node_id): """update contrasts for a node.""" node = tree.node(node_id) if node.succ: if len(node.succ) == 2: c1, c2 = node.succ update_data( node_id, node.data.branchlength, c1, c2) else: assert(node_id == tree.root) assert(len(node.succ) == 3) update_data( node_id, node.data.branchlength, node.succ[0], node.succ[1]) update_data( max_index - 1, node.data.branchlength, node_id, node.succ[2]) else: for c in range(ncolumns): values[node_id][c] = float( data[map_node2data[node_id]][c + 1]) branchlengths[node_id] = node.data.branchlength tree.dfs(tree.root, post_function=update_contrasts) options.stdout.write( "node_id\tvariance\t%s\n" % "\t".join(headers)) for node_id in range(max_index): if variances[node_id] is None: continue options.stdout.write("%s\t%s\t%s\n" % ( node_id, options.value_format % variances[ node_id], "\t".join( map(lambda x: options.value_format % x, contrasts[node_id])), )) E.Stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv parser = E.OptionParser( version= "%prog version: $Id: data2phylocontrasts.py 2782 2009-09-10 11:40:29Z andreas $", usage=globals()["__doc__"]) parser.add_option("-c", "--columns", dest="columns", type="string", help="columns to take for calculating histograms.") parser.add_option("-t", "--filename-tree", dest="filename_tree", type="string", help="filename with tree(s).") parser.add_option("--skip-header", dest="add_header", action="store_false", help="do not add header to flat format.") parser.add_option("--write-header", dest="write_header", action="store_true", help="write header and exit.") parser.add_option("--debug", dest="debug", action="store_true", help="debug mode") parser.add_option("--display-tree", dest="display_tree", action="store_true", help="display the tree") parser.add_option("-m", "--method", dest="methods", type="choice", action="append", choices=("contrasts", "spearman", "pearson", "compute"), help="methods to perform on contrasts.") parser.set_defaults( columns="all", filename_tree=None, add_header=True, write_header=False, debug=False, methods=[], value_format="%6.4f", pvalue_format="%e", display_tree=False, ) (options, args) = E.Start(parser, quiet=True) if options.columns not in ("all", "all-but-first"): options.columns = map(lambda x: int(x) - 1, options.columns.split(",")) phylip = WrapperPhylip.Phylip() if options.debug: phylip.setLogLevel(options.loglevel) phylip.setProgram("contrast") ########################################################## ########################################################## ########################################################## # retrieve data and give to phylip data = [] headers = [] first = True for line in sys.stdin: if line[0] == "#": continue d = line[:-1].strip().split("\t") if first: first = False headers = d[1:] continue data.append(d) phylip.setData(data) ncolumns = len(headers) nrows = len(data) ########################################################## ########################################################## ########################################################## # read trees nexus = None if options.filename_tree: nexus = TreeTools.Newick2Nexus(open(options.filename_tree, "r")) if not nexus: raise ValueError("please provide trees with branchlenghts") ########################################################## ########################################################## ########################################################## # set up phylip phylip_options = [] # print out contrasts phylip_options.append("C") phylip_options.append("Y") phylip.setOptions(phylip_options) ########################################################## ########################################################## ########################################################## # main loop ########################################################## for tree in nexus.trees: if options.display_tree: tree.display() # compute this before giving the tree to the phylip module, # as it remaps taxon names. map_node2data = {} for x in range(nrows): taxon = data[x][0] map_node2data[tree.search_taxon(taxon)] = x phylip.setTree(tree) result = phylip.run() for method in options.methods: if method in ("pearson", "spearman"): options.stdout.write("header1\theader2\tr\tp\tcode\n") n = len(result.mContrasts) columns = [] for c in range(ncolumns): columns.append(map(lambda x: x[c], result.mContrasts)) for x in range(0, ncolumns - 1): for y in range(x + 1, ncolumns): # phylip value phy_r = result.mCorrelations[x][y] import rpy from rpy import r as R # Various ways to calculate r. It is not possible to use # cor.test or lsfit directly, as you have to perform a # regression through the origin. # uncomment to check pearson r against phylip's value ## r = calculateCorrelationCoefficient( columns[x], columns[y] ) # for significance, use linear regression models in R rpy.set_default_mode(rpy.NO_CONVERSION) linear_model = R.lm(R("y ~ x - 1"), data=R.data_frame(x=columns[x], y=columns[y])) rpy.set_default_mode(rpy.BASIC_CONVERSION) ss = R.summary(linear_model) # extract the p-value p = ss['coefficients'][-1][-1] if p < 0.001: code = "***" elif p < 0.01: code = "**" elif p < 0.05: code = "*" else: code = "" options.stdout.write("\t".join( (headers[x], headers[y], options.value_format % phy_r, options.pvalue_format % p, code)) + "\n") elif method == "contrasts": options.stdout.write("\t".join(headers) + "\n") for d in result.mContrasts: options.stdout.write( "\t".join(map(lambda x: options.value_format % x, d)) + "\n ") elif method == "compute": # make room for all internal nodes and one dummy node # for unrooted trees. max_index = TreeTools.GetMaxIndex(tree) + 2 variances = [None] * max_index values = [[None] * nrows for x in range(max_index)] contrasts = [] for x in range(max_index): contrasts.append([None] * ncolumns) branchlengths = [None] * max_index def update_data( node_id, bl, c1, c2, ): b1, b2 = branchlengths[c1], branchlengths[c2] rb1 = 1.0 / b1 rb2 = 1.0 / b2 # compute variance variance = math.sqrt(b1 + b2) # extend branch length of this node to create correct # variance for parent branchlengths[node_id] = bl + (b1 * b2) / (b1 + b2) variances[node_id] = variance for c in range(ncolumns): v1, v2 = values[c1][c], values[c2][c] # save ancestral value as weighted mean values[node_id][c] = ( (rb1 * v1 + rb2 * v2)) / (rb1 + rb2) # compute normalized contrast contrasts[node_id][c] = (v1 - v2) / variance def update_contrasts(node_id): """update contrasts for a node.""" node = tree.node(node_id) if node.succ: if len(node.succ) == 2: c1, c2 = node.succ update_data(node_id, node.data.branchlength, c1, c2) else: assert (node_id == tree.root) assert (len(node.succ) == 3) update_data(node_id, node.data.branchlength, node.succ[0], node.succ[1]) update_data(max_index - 1, node.data.branchlength, node_id, node.succ[2]) else: for c in range(ncolumns): values[node_id][c] = float( data[map_node2data[node_id]][c + 1]) branchlengths[node_id] = node.data.branchlength tree.dfs(tree.root, post_function=update_contrasts) options.stdout.write("node_id\tvariance\t%s\n" % "\t".join(headers)) for node_id in range(max_index): if variances[node_id] is None: continue options.stdout.write("%s\t%s\t%s\n" % ( node_id, options.value_format % variances[node_id], "\t".join( map(lambda x: options.value_format % x, contrasts[node_id])), )) E.Stop()
def krige_to_grid(grid_fname, obs_x, obs_y, obs_data, vgm_par): """Interpolate point data onto a grid using Kriging. Interpolate point data onto a regular rectangular grid of square cells using Kriging with a predefined semi-variogram. The observed data locations must be specified in the same projection and coordinate system as the grid, which is defined in an ArcGIS raster file. Parameters ---------- grid_fname : string Filename of an ArcGIS float grid raster defining the required grid to Krige onto. All cells are included regardless of their value. obs_x : array_like The x coordinates of the observation locations. obs_y : array_like The y coordinates of the observation locations. obs_data : array_like The data values at the observation locations. vgm : dict A dictionary describing the semi-variogram model. Required keys are: 'model' can be one of {'Lin', 'Exp', 'Sph', 'Gau'} 'nugget' must be a scalar 'range' must be a scalar 'sill' must be a scalar Returns ------- kriged_est : 2darray A 2D array containing the Kriged estimates at each point on the specified rectangular grid. Notes ----- This function requires that R, RPy and the R gstat library are correctly installed. """ grid, headers = arcfltgrid.read(grid_fname) cols = headers[0] rows = headers[1] x0 = headers[2] y0 = headers[3] cell_size = headers[4] # TO DO: adjust x0, y0 by 0.5*cell_size if llcorner.. # define the grid (pixel centre's) xt, yt = np.meshgrid(np.linspace(x0, x0 + (cols-1)*cell_size, num=cols), np.linspace(y0 + (rows-1)*cell_size, y0, num=rows)) xt = xt.flatten() yt = yt.flatten() # Krige using gstat via RPy r.library('gstat') rpy.set_default_mode(rpy.NO_CONVERSION) obs_frame = r.data_frame(x=obs_x, y=obs_y, data=obs_data) target_grid = r.data_frame(x=xt, y=yt) v = r.vgm(vgm_par['sill'], vgm_par['model'], vgm_par['range'], vgm_par['nugget']) result = r.krige(r('data ~ 1'), r('~ x + y'), obs_frame, target_grid, model=v) rpy.set_default_mode(rpy.BASIC_CONVERSION) result = result.as_py() kriged_est = np.array(result['var1.pred']) kriged_est = kriged_est.reshape(rows, cols) return kriged_est
def __init__(self, y, design, model_type=r.lm, **kwds): ''' Set up and estimate R model with data and design ''' r.library('MASS') # still needs to be in test, but also here for # logical tests at the end not to show an error self.y = np.array(y) self.design = np.array(design) self.model_type = model_type self._design_cols = [ 'x.%d' % (i + 1) for i in range(self.design.shape[1]) ] # Note the '-1' for no intercept - this is included in the design self.formula = r('y ~ %s-1' % '+'.join(self._design_cols)) self.frame = r.data_frame(y=y, x=self.design) rpy.set_default_mode(rpy.NO_CONVERSION) results = self.model_type(self.formula, data=self.frame, **kwds) self.robj = results # keep the Robj model so it can be # used in the tests rpy.set_default_mode(rpy.BASIC_CONVERSION) rsum = r.summary(results) self.rsum = rsum # Provide compatible interface with scipy models self.results = results.as_py() # coeffs = self.results['coefficients'] # self.beta0 = np.array([coeffs[c] for c in self._design_cols]) self.nobs = len(self.results['residuals']) if isinstance(self.results['residuals'], dict): self.resid = np.zeros((len(self.results['residuals'].keys()))) for i in self.results['residuals'].keys(): self.resid[int(i) - 1] = self.results['residuals'][i] else: self.resid = self.results['residuals'] self.fittedvalues = self.results['fitted.values'] self.df_resid = self.results['df.residual'] self.params = rsum['coefficients'][:, 0] self.bse = rsum['coefficients'][:, 1] self.bt = rsum['coefficients'][:, 2] try: self.pvalues = rsum['coefficients'][:, 3] except: pass self.rsquared = rsum.setdefault('r.squared', None) self.rsquared_adj = rsum.setdefault('adj.r.squared', None) self.aic_R = rsum.setdefault('aic', None) self.fvalue = rsum.setdefault('fstatistic', None) if self.fvalue and isinstance(self.fvalue, dict): self.fvalue = self.fvalue.setdefault('value', None) # for wls df = rsum.setdefault('df', None) if df: # for RLM, works for other models? self.df_model = df[0] - 1 # R counts intercept self.df_resid = df[1] self.bcov_unscaled = rsum.setdefault('cov.unscaled', None) self.bcov = rsum.setdefault('cov.scaled', None) if 'sigma' in rsum: self.scale = rsum['sigma'] elif 'dispersion' in rsum: self.scale = rsum['dispersion'] else: self.scale = None self.llf = r.logLik(results) if model_type == r.glm: self.getglm() if model_type == r.rlm: self.getrlm()