def get_confidence_interval_for_mean(self,X=[]): """ Calculates the confidence interval for each datapoint, given a model fit This is the confidence interval of the model, not the prediction interval """ if isinstance(X,pd.core.frame.DataFrame): X=X[self.independent_] df_results=pd.DataFrame({'y_hat':numpy.zeros(X.shape[0])}) y_hat=self.predict(X) w=numpy.matrix(X) # XT_X=numpy.matrix(X).T*\ # numpy.matrix(X) #print "X_XT" #print X_XT # print "w" # print numpy.shape(w) # print "XT_T" # print numpy.shape(XT_X) #logging.debug(numpy.shape(s_2*inv(XT_X))) s_c_2=numpy.array(w*numpy.power(self.s_y,2)*inv(self.X_dash_X)*w.T) #logging.debug("s_c_2: {}".format(s_c_2)) #we only want the diagonal s_c_2=numpy.diagonal(s_c_2) #logging.debug("s_c_2 diag: {}".format(s_c_2)) #tau=df_new.apply(lambda x:numpy.matrix(x[est.params.index.values].values),axis=1) # X_XT*numpy.matrix(x[est.params.index.values].values).T) # tau=numpy.matrix(df_new[est.params.index.values].values[])*X_XT*\ # numpy.matrix(df_new[est.params.index.values].values).T #print "tau" #print numpy.shape(numpy.squeeze(tau)) #95% confidence interval so alpha =0.95 alpha=0.05 t_val=stats.t.ppf(1-alpha/2,self.df_resid+1) upper=y_hat+t_val*numpy.sqrt(s_c_2) lower=y_hat-t_val*numpy.sqrt(s_c_2) # df_orig['s_c_2']=s_c_2 # #df_orig['sigma_tilde']=sigma_tilde # df_orig['t']=t_val # df_orig['upper_y_hat']=upper # df_orig['lower_y_hat']=lower df=pd.DataFrame({'y_hat':y_hat,'upper_mean':upper,'lower_mean':lower}) return (df)
def gen_simplemodel_data(n=1000,k=1): numpy.random.seed(10) df_x=pd.DataFrame({'alpha':numpy.ones(n)}) coefs=numpy.random.rand(k+1) for ii in range(k): #draw from normal distribution and scale it randomly df_x['X{}'.format(ii)]=numpy.random.normal(0,1,n) # logging.debug(df_x.head()) # logging.debug("coefs: {}. df_x: {}. disturb: {}".format(\ # numpy.shape(numpy.matrix(coefs)),\ # numpy.shape(numpy.matrix(df_x)),\ # numpy.shape(numpy.matrix(numpy.random.normal(0,1,n)*numpy.random.rand(1)).T))) data=numpy.array(numpy.matrix(df_x)*numpy.matrix(coefs).T+numpy.matrix(numpy.random.normal(0,1,n)*numpy.random.rand(1)).T) df_x['y']=data return (coefs,df_x)