예제 #1
0
파일: lin_model.py 프로젝트: ejokeeffe/ML
    def get_confidence_interval_for_mean(self,X=[]):
        """

        Calculates the confidence interval for each datapoint, given a model fit
        This is the confidence interval of the model, not the prediction interval


        """
        if isinstance(X,pd.core.frame.DataFrame):
            X=X[self.independent_]
        df_results=pd.DataFrame({'y_hat':numpy.zeros(X.shape[0])})
        y_hat=self.predict(X)
        w=numpy.matrix(X)

     
        # XT_X=numpy.matrix(X).T*\
        #     numpy.matrix(X) 
        #print "X_XT"
        #print X_XT
        
    #    print "w"
    #    print numpy.shape(w)
    #    print "XT_T"
    #    print numpy.shape(XT_X)
        #logging.debug(numpy.shape(s_2*inv(XT_X)))
        s_c_2=numpy.array(w*numpy.power(self.s_y,2)*inv(self.X_dash_X)*w.T)
        #logging.debug("s_c_2: {}".format(s_c_2))
        #we only want the diagonal
        s_c_2=numpy.diagonal(s_c_2)
        #logging.debug("s_c_2 diag: {}".format(s_c_2))
        #tau=df_new.apply(lambda x:numpy.matrix(x[est.params.index.values].values),axis=1)
    #        X_XT*numpy.matrix(x[est.params.index.values].values).T)
    #    tau=numpy.matrix(df_new[est.params.index.values].values[])*X_XT*\
    #        numpy.matrix(df_new[est.params.index.values].values).T
        #print "tau"
        #print numpy.shape(numpy.squeeze(tau))
        #95% confidence interval so alpha =0.95
        alpha=0.05
        t_val=stats.t.ppf(1-alpha/2,self.df_resid+1)
        upper=y_hat+t_val*numpy.sqrt(s_c_2)
        lower=y_hat-t_val*numpy.sqrt(s_c_2)
        

        # df_orig['s_c_2']=s_c_2
        # #df_orig['sigma_tilde']=sigma_tilde
        # df_orig['t']=t_val
        
        # df_orig['upper_y_hat']=upper
        # df_orig['lower_y_hat']=lower
        df=pd.DataFrame({'y_hat':y_hat,'upper_mean':upper,'lower_mean':lower})
        return (df)
예제 #2
0
파일: examples.py 프로젝트: ejokeeffe/ML
def gen_simplemodel_data(n=1000,k=1):
	numpy.random.seed(10)
	df_x=pd.DataFrame({'alpha':numpy.ones(n)})
	coefs=numpy.random.rand(k+1)
	for ii in range(k):
		#draw from normal distribution and scale it randomly
		df_x['X{}'.format(ii)]=numpy.random.normal(0,1,n)

	# logging.debug(df_x.head())
	# logging.debug("coefs: {}. df_x: {}. disturb: {}".format(\
	# 	numpy.shape(numpy.matrix(coefs)),\
	# 	numpy.shape(numpy.matrix(df_x)),\
	# 	numpy.shape(numpy.matrix(numpy.random.normal(0,1,n)*numpy.random.rand(1)).T)))
	data=numpy.array(numpy.matrix(df_x)*numpy.matrix(coefs).T+numpy.matrix(numpy.random.normal(0,1,n)*numpy.random.rand(1)).T)
	df_x['y']=data

	return (coefs,df_x)