def heritability( self ): """ compute the regression coefficient (need to get the p-value) """ x = R.FloatVector( self.mid_parent ) y = R.FloatVector( self.offspring ) formula = R.Formula( 'y ~ x' ) env = formula.environment env['x'] = x env['y'] = y results = stats.lm( formula ) print summary( results )[3][1], summary( results )[3][7] r = results[0][1] self.h_sqrd = 2*r**2
def linear_model(data, Input, Output, Condition): try: stats = importr('stats') base = importr('base') pandas2ri.activate() r_df = pandas2ri.py2rpy(data) pandas2ri.deactivate() formula = '{y}~{x}*{condition}'.format(y=Output, x=Input, condition=Condition) lm = stats.lm(formula, r_df) summary = (base.summary(lm)) results = summary.rx2('coefficients') results_df = base.as_data_frame_matrix(results) py_results_df = pd.DataFrame(results_df).transpose() py_results_df.columns = results_df.colnames py_results_df.index = results_df.rownames return (py_results_df) except: return (pd.DataFrame({}))
reanalysis_subset, ilat=lat_i, ilon=lon_i) observations_data, observations_months = collect_monthly_data( observations_subset, ilat=lat_i, ilon=lon_i) for month_i, month in enumerate(range(1, 13)): #print('month: '+str(month)) reanalysis = reanalysis_data[reanalysis_months == month].copy() observations = observations_data[observations_months == month].copy() # The ranking step of the downscaling model reanalysis.sort() observations.sort() slope, intercept, *_ = lm(x=reanalysis, y=observations) #print(slope) #print(intercept) full_array_lat_i = np.where( land_mask.lat.values == land_mask_subset.lat.values[lat_i])[0][0] full_array_lon_i = np.where( land_mask.lon.values == land_mask_subset.lon.values[lon_i])[0][0] model_coef['slope'][month_i, full_array_lat_i, full_array_lon_i] = slope model_coef['intercept'][month_i, full_array_lat_i, full_array_lon_i] = intercept progress += 1
def calc_mefs_helper(data): x = data[XCOL].values y = data[LABELS] # Run regression for each column and store results results = y.apply(lambda v: lm(x,v)) return results
ccallengths.append(allobs[cspcevent].length[j]) ccalHgs.append(allobs[cspcevent].Hg[j]) else: cHg_obs = allobs[cspcevent].Hg[j] clen = allobs[cspcevent].length[j] dat_ofp.close() # perform a linear regression to obtain initial parameters x = np.array(ccallengths) x = np.log(x+1.0) y = np.array(ccalHgs) y = np.log((y*1000.0)+1.0) # check to be sure of uniqueness in both length and Hg if (len(np.unique((x))) >= 1): if (len(np.unique((y))) >= 1): cspclm, ceventlm, r_value, p_value, cstderrlm = lm(x,y) sigma_calc = np.std(y) # ## sigma value, using STD of Hg sig_ofp = open('Hgsigma.dat','w') sig_ofp.write('%f\n' %(sigma_calc)) sig_ofp.close() # ## SpC parameter value spc_ofp = open('Hgspc.srt','w') spc_ofp.write('%d %f\n' %(allobs[cspcevent].SpC,cspclm)) spc_ofp.close() # ## Event parameter value event_ofp = open('Hgevents.srt','w') event_ofp.write('%d %f\n' %(allobs[cspcevent].Event,ceventlm)) event_ofp.close() # ## Write the index file ndx_ofp = open('Hgdata.ndx','w')
indat = np.genfromtxt(infile,delimiter = ',', dtype=None,names=True) Hg = indat['Hg'] lens = indat['length'] DL = ['DL'] SpC_Event = indat['SpC_EVENT'] # set NDs as hald detection limit for linear regressions NDs = np.nonzero(DL==1)[0] for cind in NDs: Hg[cind] = 0.5*Hg[cind] # perform the log transformations Hg = np.log((Hg*1000.0)+1) lens = np.log(lens+1.0) # now perform all the linear regressions, writing the results to a file ofp = open(outfile,'w') ofp.write('%20s'*6 %('SpC_EVENT','SpC_par','Event_par','sigma','r_squared','N') + '\n') allSpC_Events = np.unique(SpC_Event) k=0 allK = len(allSpC_Events) for cspcev in allSpC_Events: k+=1 print "rockin' " + cspcev + '-->%d of %d' %(k,allK) cind = np.nonzero(SpC_Event == cspcev)[0] y = Hg[cind] x = lens[cind] slope, intercept, r_value, p_value, std_err = lm(x,y) sigma_calc = np.std(y) ofp.write('%19s %19f %19f %19f %19f %19d\n' %(cspcev,slope,intercept,sigma_calc,r_value**2,len(x))) ofp.close()
forecasts = collect_forecast_data(historic_forecasts, month=month, lead_time=lead_time, ilat=lat_i, ilon=lon_i) obs = collect_obs_data(historic_observations, month=month, ilat=lat_i, ilon=lon_i) forecasts, obs = reconcile_forecasts_and_obs( forecasts, obs) # The ranking step of the downscaling model forecasts['data'].sort() obs['data'].sort() slope, intercept, *_ = lm(x=forecasts['data'], y=obs['data']) model_coef['slope'][lead_time_i, month_i, lat_i, lon_i] = slope model_coef['intercept'][lead_time_i, month_i, lat_i, lon_i] = intercept pixel_processing_times.append( round(time.time() - pixel_start_time, 1)) print( str(progress) + '/' + str(total_pixels) + ' pixels, ' + str(pixel_processing_times[-1]) + ' sec') print('avg: ' + str(np.mean(pixel_processing_times)))
warnings.warn(x, RRuntimeWarning) RRuntimeWarning: Error in eval(expr, envir, enclos) : object 'y' not found Traceback (most recent call last): File "<pyshell#140>", line 1, in <module> M=R.lm('y~x') File "/usr/local/lib/python2.7/dist-packages/rpy2/robjects/functions.py", line 178, in __call__ return super(SignatureTranslatedFunction, self).__call__(*args, **kwargs) File "/usr/local/lib/python2.7/dist-packages/rpy2/robjects/functions.py", line 106, in __call__ res = super(Function, self).__call__(*new_args, **new_kwargs) RRuntimeError: Error in eval(expr, envir, enclos) : object 'y' not found >>> M=stats.lm(y~x) SyntaxError: invalid syntax >>> M=stats.lm('y~x') Traceback (most recent call last): File "<pyshell#142>", line 1, in <module> M=stats.lm('y~x') File "/usr/local/lib/python2.7/dist-packages/rpy2/robjects/functions.py", line 178, in __call__ return super(SignatureTranslatedFunction, self).__call__(*args, **kwargs) File "/usr/local/lib/python2.7/dist-packages/rpy2/robjects/functions.py", line 106, in __call__ res = super(Function, self).__call__(*new_args, **new_kwargs) RRuntimeError: Error in eval(expr, envir, enclos) : object 'y' not found >>> M=stats.lm('y~x', data=(y,x)) Traceback (most recent call last): File "<pyshell#143>", line 1, in <module> M=stats.lm('y~x', data=(y,x))