Esempio n. 1
0
def f_Golden(y, x_glob, x_loc, y_off, coords, mType, wType,criterion, maxVal, minVal, tol, maxIter=200,flag=0):
    """
    Golden section search
    
    Arguments
    ----------
        y              : array
                         n*1, dependent variable.
        x_glob         : array
                         n*k1, fixed independent variable.
        x_local        : array
                         n*k2, local independent variable, including constant.
        y_off          : array
                         n*1, offset variable for Poisson model
        coords         : dictionary
                         including (x,y) coordinates involved in the weight evaluation (including point i)  
        mType          : integer
                         GWR model type, 0: Gaussian, 1: Poisson, 2: Logistic
        wType          : integer
                         kernel type, 0: fix_Gaussian, 1: adap_Gaussian, 2: fix_Bisquare, 3: adap_Bisquare 
        criterion      : integer
                         bandwidth selection criterion, 0: AICc, 1: AIC, 2: BIC, 3: CV
        maxVal         : float
                         maximum value used in bandwidth searching
        minVal         : float
                         minimum value used in bandwidth searching
        tol            : float
                         tolerance used to determine convergence 
        maxIter        : integer
                         maximum number of iteration if convergence cannot arrive at the tolerance
        flag           : integer
                         distance type
    
    Return:
           opt_band   : float
                        optimal bandwidth
           opt_weit   : kernel
                        optimal kernel
           output     : list of tuple
                        report searching process, keep bandwidth and score, [(bandwidth, score),(bandwidth, score),...]
    """
    dist = Kernel.get_pairDist(coords,flag) #get pairwise distance between points
    
    # 1 set range of bandwidth
    if x_glob is None:
        nVar_glob = 0
    else:
        nVar_glob = len(x_glob[0])
        
    if x_loc is None:
        nVar_loc = 0
    else:
        nVar_loc = len(x_loc[0])
        
    nVars = nVar_glob + nVar_loc
    
    a,c = ini_band_dist(dist, nVars, wType, maxVal, minVal)
    
    # 2 get initial b value
    output = [] 
   
    lamda = 0.38197 #1 - (np.sqrt(5.0)-1.0)/2.0
    
    # get b and d
    b = a + lamda * abs(c-a) #distance or nn based on wType
    d = c - lamda * abs(c-a) # golden section
    if wType == 1 or wType == 3: # bandwidth is nn
        b = round(b,0)     
        d = round(d,0)       
            
    # 3 loop
    pre_opt = 0.0
    diff = 1.0e9  
    nIter  = 0
    while abs(diff) > tol and nIter < maxIter:           
        nIter += 1
        
        # 3.1 create kernel
        weit_a = Kernel.GWR_W(coords, a, wType, dist) 
        weit_b = Kernel.GWR_W(coords, b, wType, dist) 
        weit_c = Kernel.GWR_W(coords, c, wType, dist)
        weit_d = Kernel.GWR_W(coords, d, wType, dist)
        
        # 3.2 decide whether local model or mixed model
        if x_glob is None: # local model
            #if  mType == 0: #mType == 0 or
                #gwrMod_a = GWR_Gaussian_Base(y, x_loc, weit_a)
                #gwrMod_b = GWR_Gaussian_Base(y, x_loc, weit_b)
                #gwrMod_c = GWR_Gaussian_Base(y, x_loc, weit_c)
                #gwrMod_d = GWR_Gaussian_Base(y, x_loc, weit_d)                
            #else:
            gwrMod_a = GWGLM_Base(y, x_loc, weit_a, mType, y_off)
            gwrMod_b = GWGLM_Base(y, x_loc, weit_b, mType, y_off)
            gwrMod_c = GWGLM_Base(y, x_loc, weit_c, mType, y_off)
            gwrMod_d = GWGLM_Base(y, x_loc, weit_d, mType, y_off)                
        else: # mixed model
            gwrMod_a = semiGWR_Base(y, x_glob, x_loc, weit_a, mType, y_off)
            gwrMod_b = semiGWR_Base(y, x_glob, x_loc, weit_b, mType, y_off)
            gwrMod_c = semiGWR_Base(y, x_glob, x_loc, weit_c, mType, y_off)
            gwrMod_d = semiGWR_Base(y, x_glob, x_loc, weit_d, mType, y_off)
            
        
        # 3.3 get diagnostic value(0: AICc, 1: AIC, 2: BIC, 3: CV)   
        if mType == 0:#or mType == 3
            f_a = getDiag_GWR[criterion](gwrMod_a)
            f_b = getDiag_GWR[criterion](gwrMod_b)
            f_c = getDiag_GWR[criterion](gwrMod_c)
            f_d = getDiag_GWR[criterion](gwrMod_d)
        else:
            f_a = getDiag_GWGLM[criterion](gwrMod_a)
            f_b = getDiag_GWGLM[criterion](gwrMod_b)
            f_c = getDiag_GWGLM[criterion](gwrMod_c)
            f_d = getDiag_GWGLM[criterion](gwrMod_d) 
        
        #print "a: %.3f, b: %.3f, c: %.3f, d: %.3f" % (a, b, c, d)             
        
        # determine next triple
        if f_b <= f_d:
            # current optimal bandwidth
            opt_weit = weit_b
            opt_band = b
            opt_cri = f_b
            c = d
            d = b
            b = a + lamda * abs(c-a)            
            if wType == 1 or wType == 3: # bandwidth is nn
                b = round(b,0)             
        else:
            # current optimal bandwidth
            opt_weit = weit_d
            opt_band = d
            opt_cri = f_d
            a = b
            b = d  
            d = c - lamda * abs(c-a)            
            if wType == 1 or wType == 3: # bandwidth is nn  
                d = round(d,0) 
            
            
        output.append((opt_band,opt_cri))
        
        # determine diff
        diff = f_b - f_d #opt_cri - pre_opt
        pre_opt = opt_cri   
        #print "diff: %.6f" % (diff)
        
    return opt_band, opt_weit, output
Esempio n. 2
0
def f_Golden(y,
             x_glob,
             x_loc,
             y_off,
             coords,
             mType,
             wType,
             criterion,
             maxVal,
             minVal,
             tol,
             maxIter=200,
             flag=0):
    """
    Golden section search
    
    Arguments
    ----------
        y              : array
                         n*1, dependent variable.
        x_glob         : array
                         n*k1, fixed independent variable.
        x_local        : array
                         n*k2, local independent variable, including constant.
        y_off          : array
                         n*1, offset variable for Poisson model
        coords         : dictionary
                         including (x,y) coordinates involved in the weight evaluation (including point i)  
        mType          : integer
                         GWR model type, 0: Gaussian, 1: Poisson, 2: Logistic
        wType          : integer
                         kernel type, 0: fix_Gaussian, 1: adap_Gaussian, 2: fix_Bisquare, 3: adap_Bisquare 
        criterion      : integer
                         bandwidth selection criterion, 0: AICc, 1: AIC, 2: BIC, 3: CV
        maxVal         : float
                         maximum value used in bandwidth searching
        minVal         : float
                         minimum value used in bandwidth searching
        tol            : float
                         tolerance used to determine convergence 
        maxIter        : integer
                         maximum number of iteration if convergence cannot arrive at the tolerance
        flag           : integer
                         distance type
    
    Return:
           opt_band   : float
                        optimal bandwidth
           opt_weit   : kernel
                        optimal kernel
           output     : list of tuple
                        report searching process, keep bandwidth and score, [(bandwidth, score),(bandwidth, score),...]
    """
    dist = Kernel.get_pairDist(coords,
                               flag)  #get pairwise distance between points

    # 1 set range of bandwidth
    if x_glob is None:
        nVar_glob = 0
    else:
        nVar_glob = len(x_glob[0])

    if x_loc is None:
        nVar_loc = 0
    else:
        nVar_loc = len(x_loc[0])

    nVars = nVar_glob + nVar_loc

    a, c = ini_band_dist(dist, nVars, wType, maxVal, minVal)

    # 2 get initial b value
    output = []

    lamda = 0.38197  #1 - (np.sqrt(5.0)-1.0)/2.0

    # get b and d
    b = a + lamda * abs(c - a)  #distance or nn based on wType
    d = c - lamda * abs(c - a)  # golden section
    if wType == 1 or wType == 3:  # bandwidth is nn
        b = round(b, 0)
        d = round(d, 0)

    # 3 loop
    pre_opt = 0.0
    diff = 1.0e9
    nIter = 0
    while abs(diff) > tol and nIter < maxIter:
        nIter += 1

        # 3.1 create kernel
        weit_a = Kernel.GWR_W(coords, a, wType, dist)
        weit_b = Kernel.GWR_W(coords, b, wType, dist)
        weit_c = Kernel.GWR_W(coords, c, wType, dist)
        weit_d = Kernel.GWR_W(coords, d, wType, dist)

        # 3.2 decide whether local model or mixed model
        if x_glob is None:  # local model
            #if  mType == 0: #mType == 0 or
            #gwrMod_a = GWR_Gaussian_Base(y, x_loc, weit_a)
            #gwrMod_b = GWR_Gaussian_Base(y, x_loc, weit_b)
            #gwrMod_c = GWR_Gaussian_Base(y, x_loc, weit_c)
            #gwrMod_d = GWR_Gaussian_Base(y, x_loc, weit_d)
            #else:
            gwrMod_a = GWGLM_Base(y, x_loc, weit_a, mType, y_off)
            gwrMod_b = GWGLM_Base(y, x_loc, weit_b, mType, y_off)
            gwrMod_c = GWGLM_Base(y, x_loc, weit_c, mType, y_off)
            gwrMod_d = GWGLM_Base(y, x_loc, weit_d, mType, y_off)
        else:  # mixed model
            gwrMod_a = semiGWR_Base(y, x_glob, x_loc, weit_a, mType, y_off)
            gwrMod_b = semiGWR_Base(y, x_glob, x_loc, weit_b, mType, y_off)
            gwrMod_c = semiGWR_Base(y, x_glob, x_loc, weit_c, mType, y_off)
            gwrMod_d = semiGWR_Base(y, x_glob, x_loc, weit_d, mType, y_off)

        # 3.3 get diagnostic value(0: AICc, 1: AIC, 2: BIC, 3: CV)
        if mType == 0:  #or mType == 3
            f_a = getDiag_GWR[criterion](gwrMod_a)
            f_b = getDiag_GWR[criterion](gwrMod_b)
            f_c = getDiag_GWR[criterion](gwrMod_c)
            f_d = getDiag_GWR[criterion](gwrMod_d)
        else:
            f_a = getDiag_GWGLM[criterion](gwrMod_a)
            f_b = getDiag_GWGLM[criterion](gwrMod_b)
            f_c = getDiag_GWGLM[criterion](gwrMod_c)
            f_d = getDiag_GWGLM[criterion](gwrMod_d)

        #print "a: %.3f, b: %.3f, c: %.3f, d: %.3f" % (a, b, c, d)

        # determine next triple
        if f_b <= f_d:
            # current optimal bandwidth
            opt_weit = weit_b
            opt_band = b
            opt_cri = f_b
            c = d
            d = b
            b = a + lamda * abs(c - a)
            if wType == 1 or wType == 3:  # bandwidth is nn
                b = round(b, 0)
        else:
            # current optimal bandwidth
            opt_weit = weit_d
            opt_band = d
            opt_cri = f_d
            a = b
            b = d
            d = c - lamda * abs(c - a)
            if wType == 1 or wType == 3:  # bandwidth is nn
                d = round(d, 0)

        output.append((opt_band, opt_cri))

        # determine diff
        diff = f_b - f_d  #opt_cri - pre_opt
        pre_opt = opt_cri
        #print "diff: %.6f" % (diff)

    return opt_band, opt_weit, output
Esempio n. 3
0
def f_Interval(y, x_glob, x_loc, y_off, coords, mType, wType, criterion, maxVal, minVal, interval,flag=0):
    """
    Interval search, using interval as stepsize
    
    Arguments
    ----------
        y              : array
                         n*1, dependent variable.
        x_glob         : array
                         n*k1, fixed independent variable.
        x_local        : array
                         n*k2, local independent variable, including constant.
        y_off          : array
                         n*1, offset variable for Poisson model
        coords         : dictionary
                         including (x,y) coordinates involved in the weight evaluation (including point i)  
        mType          : integer
                         GWR model type, 0: M_Gaussian, 1: M_Poisson, 2: Logistic
        wType          : integer
                         kernel type, 0: fix_Gaussian, 1: adap_Gaussian, 2: fix_Bisquare, 3: adap_Bisquare 
        criterion      : integer
                         bandwidth selection criterion, 0: AICc, 1: AIC, 2: BIC, 3: CV
        maxVal         : float
                         maximum value used in bandwidth searching
        minVal         : float
                         minimum value used in bandwidth searching
        interval       : float
                         interval used in interval search 
        flag           : integer
                         distance type
    Return:
           opt_band   : float
                        optimal bandwidth
           opt_weit   : kernel
                        optimal kernel
           output     : list of tuple
                        report searching process, keep bandwidth and score, [(bandwidth, score),(bandwidth, score),...]
    """
    dist = Kernel.get_pairDist(coords,flag=0) #get pairwise distance between points
    
    a = minVal
    c = maxVal
    
    # add codes to check whether a and c are valid
    #------------------------------------------------------------
    
    if wType == 1 or wType == 3: # bandwidth is nn
        a = int(a)
        c = int(c)        
    
    output = []    
   
    # 1 get initial b value
    b = a + interval #distance or nn based on wType
    if wType == 1 or wType == 3: # bandwidth is nn
        b = int(b) 
            
    # 2 create weight
    weit_a = Kernel.GWR_W(coords, a, wType, dist)  
    weit_c = Kernel.GWR_W(coords, c, wType, dist) 
    
    # 3 create model
    if x_glob is None: # local model
        #if mType == 3:
            #gwrMod_a = GWR_Gaussian(y, x_loc, weit_a)            
            #gwrMod_c = GWR_Gaussian(y, x_loc, weit_c)            
        #else:
        gwrMod_a = GWGLM_Base(y, x_loc, weit_a, mType, y_off)               
        gwrMod_c = GWGLM_Base(y, x_loc, weit_c, mType, y_off)               
    else: # mixed model
        gwrMod_a = semiGWR_Base(y, x_glob, x_loc, weit_a, mType, y_off)           
        gwrMod_c = semiGWR_Base(y, x_glob, x_loc, weit_c, mType, y_off)           
    
    # 4 get diagnostic value
    if mType == 0:#or mType == 3
        f_a = getDiag_GWR[criterion](gwrMod_a)        
        f_c = getDiag_GWR[criterion](gwrMod_c)        
    else:
        f_a = getDiag_GWGLM[criterion](gwrMod_a)        
        f_c = getDiag_GWGLM[criterion](gwrMod_c)       
    
    # 5 add to the output
    output.append((a,f_a))
    output.append((c,f_c))
    
    #print "bandwidth: %.3f, f value: %.6f" % (a, f_a)    
    #print "bandwidth: %.3f, f value: %.6f" % (c, f_c)
    
    if f_a < f_c:
        opt_weit = weit_a
        opt_band = a
        opt_val = f_a
    else:
        opt_weit = weit_c
        opt_band = c   
        opt_val = f_c
    
    while b < c:             
           
        # model using bandwidth b
        weit_b = Kernel.GWR_W(coords, b, wType, dist) # local model
        if x_glob is None: # local model                      
            #if mType == 3:
                #gwrMod_b = GWR_Gaussian(y, x_loc, weit_b)                      
            #else:
            gwrMod_b = GWGLM_Base(y, x_loc, weit_b, mType, y_off)                             
        else: # mixed model
            gwrMod_b = semiGWR_Base(y, x_glob, x_loc, weit_b, mType, y_off)           
        
        if mType == 0:#or mType == 3
            f_b = getDiag_GWR[criterion](gwrMod_b)           
        else:
            f_b = getDiag_GWGLM[criterion](gwrMod_b)        
            
        #print "bandwidth: %.3f, f value: %.6f" % (b, f_b)
        
        # add output
        output.append((b,f_b))
        
        # determine next triple
        if f_b < opt_val:
            opt_weit = weit_b
            opt_band = b
            opt_val = f_b
                    
        # update b
        b = b + interval
        
    return opt_band,opt_weit, output
Esempio n. 4
0
def f_Interval(y,
               x_glob,
               x_loc,
               y_off,
               coords,
               mType,
               wType,
               criterion,
               maxVal,
               minVal,
               interval,
               flag=0):
    """
    Interval search, using interval as stepsize
    
    Arguments
    ----------
        y              : array
                         n*1, dependent variable.
        x_glob         : array
                         n*k1, fixed independent variable.
        x_local        : array
                         n*k2, local independent variable, including constant.
        y_off          : array
                         n*1, offset variable for Poisson model
        coords         : dictionary
                         including (x,y) coordinates involved in the weight evaluation (including point i)  
        mType          : integer
                         GWR model type, 0: M_Gaussian, 1: M_Poisson, 2: Logistic
        wType          : integer
                         kernel type, 0: fix_Gaussian, 1: adap_Gaussian, 2: fix_Bisquare, 3: adap_Bisquare 
        criterion      : integer
                         bandwidth selection criterion, 0: AICc, 1: AIC, 2: BIC, 3: CV
        maxVal         : float
                         maximum value used in bandwidth searching
        minVal         : float
                         minimum value used in bandwidth searching
        interval       : float
                         interval used in interval search 
        flag           : integer
                         distance type
    Return:
           opt_band   : float
                        optimal bandwidth
           opt_weit   : kernel
                        optimal kernel
           output     : list of tuple
                        report searching process, keep bandwidth and score, [(bandwidth, score),(bandwidth, score),...]
    """
    dist = Kernel.get_pairDist(coords,
                               flag=0)  #get pairwise distance between points

    a = minVal
    c = maxVal

    # add codes to check whether a and c are valid
    #------------------------------------------------------------

    if wType == 1 or wType == 3:  # bandwidth is nn
        a = int(a)
        c = int(c)

    output = []

    # 1 get initial b value
    b = a + interval  #distance or nn based on wType
    if wType == 1 or wType == 3:  # bandwidth is nn
        b = int(b)

    # 2 create weight
    weit_a = Kernel.GWR_W(coords, a, wType, dist)
    weit_c = Kernel.GWR_W(coords, c, wType, dist)

    # 3 create model
    if x_glob is None:  # local model
        #if mType == 3:
        #gwrMod_a = GWR_Gaussian(y, x_loc, weit_a)
        #gwrMod_c = GWR_Gaussian(y, x_loc, weit_c)
        #else:
        gwrMod_a = GWGLM_Base(y, x_loc, weit_a, mType, y_off)
        gwrMod_c = GWGLM_Base(y, x_loc, weit_c, mType, y_off)
    else:  # mixed model
        gwrMod_a = semiGWR_Base(y, x_glob, x_loc, weit_a, mType, y_off)
        gwrMod_c = semiGWR_Base(y, x_glob, x_loc, weit_c, mType, y_off)

    # 4 get diagnostic value
    if mType == 0:  #or mType == 3
        f_a = getDiag_GWR[criterion](gwrMod_a)
        f_c = getDiag_GWR[criterion](gwrMod_c)
    else:
        f_a = getDiag_GWGLM[criterion](gwrMod_a)
        f_c = getDiag_GWGLM[criterion](gwrMod_c)

    # 5 add to the output
    output.append((a, f_a))
    output.append((c, f_c))

    #print "bandwidth: %.3f, f value: %.6f" % (a, f_a)
    #print "bandwidth: %.3f, f value: %.6f" % (c, f_c)

    if f_a < f_c:
        opt_weit = weit_a
        opt_band = a
        opt_val = f_a
    else:
        opt_weit = weit_c
        opt_band = c
        opt_val = f_c

    while b < c:

        # model using bandwidth b
        weit_b = Kernel.GWR_W(coords, b, wType, dist)  # local model
        if x_glob is None:  # local model
            #if mType == 3:
            #gwrMod_b = GWR_Gaussian(y, x_loc, weit_b)
            #else:
            gwrMod_b = GWGLM_Base(y, x_loc, weit_b, mType, y_off)
        else:  # mixed model
            gwrMod_b = semiGWR_Base(y, x_glob, x_loc, weit_b, mType, y_off)

        if mType == 0:  #or mType == 3
            f_b = getDiag_GWR[criterion](gwrMod_b)
        else:
            f_b = getDiag_GWGLM[criterion](gwrMod_b)

        #print "bandwidth: %.3f, f value: %.6f" % (b, f_b)

        # add output
        output.append((b, f_b))

        # determine next triple
        if f_b < opt_val:
            opt_weit = weit_b
            opt_band = b
            opt_val = f_b

        # update b
        b = b + interval

    return opt_band, opt_weit, output
Esempio n. 5
0
def varyTest(y,
             x_glob,
             x_loc,
             kernel,
             mType=0,
             y_off=None,
             criterion=0,
             orig_mod=None):
    """
    Geographical variability test
    All the models use the same bandwidth
    
    Arguments
    ----------
        y              : array
                         n*1, dependent variable.
        x_glob         : array
                         n*k1, fixed independent variable.
        x_local        : array
                         n*k2, local independent variable, including constant.
        kernel         : dictionary
                         including (x,y) coordinates involved in the weight evaluation (including point i)  
        mType          : integer
                         GWR model type, 0: Gaussian, 1: Poisson, 2: Logistic
        y_off          : array
                         n*1, offset variable for Poisson model
	criterion      : integer
                         bandwidth selection criterion, 0: AICc, 1: AIC, 2: BIC, 3: CV
	orig_mod       : object of GWR model
	                 original model
    Return:
        geoVary        : list of tuple
			 including each X variable and associated F statistics: [(x1,f_stat),(x2,f_stat),...()]
    
    """
    nObs = len(y)

    # 1 original model
    if orig_mod is None:
        if x_glob is None:
            gwrMod_old = GWGLM_Base(y, x_loc, kernel, mType, y_off)
        else:
            gwrMod_old = semiGWR_Base(y, x_glob, x_loc, kernel, mType, y_off)
    else:
        gwrMod_old = orig_mod

    # 2 original statistics
    p_old = gwrMod_old.tr_S
    if mType == 0:
        cri_old = getDiag_GWR[criterion](gwrMod_old)
        dev_old = gwrMod_old.res2
    else:
        cri_old = getDiag_GWGLM[criterion](gwrMod_old)
        dev_old = gwrMod_old.dev_res

    # 3 loop
    geoVary = []
    nVar_loc = len(x_loc[0])
    if nVar_loc > 0:  # intercept is local variable by default
        for i in range(nVar_loc):
            tmpX = x_loc[:, i]
            # new x_loc
            xg = np.delete(x_loc, i, 1)
            # new x_glob
            if x_glob is None:
                xf = tmpX
                xf = np.reshape(xf, (-1, 1))
            else:
                nVar_glob = len(x_glob[0])
                xf = np.zeros(shape=(nObs, nVar_glob))
                xf = x_glob
                tmpX = np.reshape(tmpX, (-1, 1))
                xf = np.hstack((xf, tmpX))
            # new model
            gwrMod_new = semiGWR_Base(y, xf, xg, kernel, mType, y_off)
            # new statistics
            p_new = gwrMod_new.tr_S
            if mType == 0:
                cri_new = getDiag_GWR[criterion](gwrMod_new)
                dev_new = gwrMod_new.res2
            else:
                cri_new = getDiag_GWGLM[criterion](gwrMod_new)
                dev_new = gwrMod_new.dev_res
            # differentce
            diffp = p_old - p_new
            diffcri = cri_old - cri_new
            if mType == 0:
                df = nObs - p_old
                f = ((dev_new - dev_old) / diffp) / (dev_old / df)
                geoVary.append((f, diffp, df, diffcri))
            else:
                diffdev = dev_old - dev_new
                geoVary.append((diffdev, diffp, diffcri))

    return geoVary
Esempio n. 6
0
def G2L(y,
        x_glob,
        x_loc,
        coords,
        mType=0,
        wType=3,
        y_off=None,
        orig_mod=None,
        criterion=0,
        bdinfo=0,
        band=0,
        maxVal=0.0,
        minVal=0.0,
        interval=0.0,
        tol=1.0e-2,
        maxIter=50):
    """
    Variable selection: global to local
    
    Arguments
    ----------
        y              : array
                         n*1, dependent variable.
        x_glob         : array
                         n*k1, fixed independent variable.
        x_loc          : array
                         n*k2, local independent variable, including constant.
	coords         : dictionary
                         including (x,y) coordinates involved in the weight evaluation (including point i) 
        wType          : integer
                         weight type  
        mType          : integer
                         GWR model type, 0: Gaussian, 1: Poisson, 2: Logistic
        y_off          : array
                         n*1, offset variable for Poisson model
	orig_mod       : object of GWR model
	                 original model
	criterion      : integer
                         bandwidth selection criterion, 0: AICc, 1: AIC, 2: BIC, 3: CV
	bdinfo         : integer
	                 bandwidth searching method: 0: golden search 1: interval 2: fixed single bandwidth
	band           : float
	                 given bandwidth if bdinfo=2 
	maxVal         : float
                         maximum value used in bandwidth searching
        minVal         : float
                         minimum value used in bandwidth searching
        interval       : float
                         interval used in interval search 
        tol            : float
                         tolerance used to determine convergence   
        maxIter        : integer
                         maximum number of iteration if convergence cannot arrived to the tolerance
               
            
    Return:
        varsL          : list,
                         ids of local Xs 
        varsG          : list,
                         ids of global Xs 
	optband        : list
	                 info of optimal bandwidth searching results
	optWeit        : kernel
	                 kernel of best model
	optcri         : float
		         criterion value for optimal model
    """
    nObs = len(y)
    nVars_glob = len(x_glob[0])
    if x_loc is None:
        nVars_loc = 0
        tmp_loc = np.zeros(shape=(nObs, 0))
    else:
        nVars_loc = len(x_loc[0])
        tmp_loc = np.zeros(shape=(nObs, nVars_loc))
        tmp_loc = x_loc
    nVars = nVars_loc + nVars_glob
    optband = []

    # loop
    flag = True  # check whether is x moved to global
    if nVars_glob > 0:
        if orig_mod is None:
            # 1 set original model
            if x_loc is None:  # global model
                gwrMod_old = GLM_Base(y, x_glob, mType, y_off)
                cri_old = getDiag_GLM[criterion](gwrMod_old)
            else:  # should be mixed model# check original bandwidth
                if bdinfo == 0 or bdinfo == 1:  # golden or interval search
                    rs = M_selection.Band_Sel(y, x_glob, x_loc, coords, mType,
                                              y_off, wType, criterion, bdinfo,
                                              maxVal, minVal, interval, tol,
                                              maxIter)
                    band = rs[0]
                    weit = rs[1]
                    optband.append(rs)
                else:
                    # set original kernel
                    weit = Kernel.GWR_W(coords, band, wType)
                optWeit = weit
                gwrMod_old = semiGWR_Base(y, x_glob, x_loc, weit, mType, y_off)
                # get original diagnostics
                if mType == 0:
                    cri_old = getDiag_GWR[criterion](gwrMod_old)
                else:
                    cri_old = getDiag_GWGLM[criterion](gwrMod_old)
        else:
            gwrMod_old = orig_mod
            weit = orig_mod.kernel
            optWeit = weit

        #print "original cri:"
        #print cri_old

        # 2 loop
        orilist = range(nVars_glob)  # ids of original global Xs
        while flag:  #  until no improvement in one loop in orilist
            flag = False
            #print "original list:"
            #print orilist
            outlist = []  # ids of Xs from global to local
            n_currXs = len(orilist)  # every time loop through orilist
            # set global x
            tmp_glob = np.zeros(shape=(nObs, 0))
            for i in orilist:
                tmp_glob = np.hstack(
                    (tmp_glob, np.reshape(x_glob[:, i], (-1, 1))))
            for i in range(n_currXs):
                idx = orilist[i]
                #print i
                #print idx
                # try to remove ith x
                x_out = np.reshape(x_glob[:, idx], (-1, 1))
                tmp_glob = np.delete(tmp_glob, i - len(outlist), 1)
                # get new x_loc
                tmp_loc = np.hstack((tmp_loc, x_out))
                # new bandwidth
                if bdinfo == 0 or bdinfo == 1:  # golden or interval search
                    rs = M_selection.Band_Sel(y, tmp_glob, tmp_loc, coords,
                                              mType, y_off, wType, criterion,
                                              bdinfo, maxVal, minVal, interval,
                                              tol, maxIter)
                    band = rs[0]
                    weit = rs[1]
                    optband.append(rs)
                else:
                    # new kernel
                    weit = Kernel.GWR_W(coords, band, wType)

                # decide whether is a local model
                if len(tmp_loc[0]) == nVars:  # local model
                    gwrMod_new = GWGLM_Base(y, tmp_loc, weit, mType, y_off)
                    cri_new = getDiag_GWGLM[criterion](gwrMod_new)
                else:  # should be mixed model
                    gwrMod_new = semiGWR_Base(y, tmp_glob, tmp_loc, weit,
                                              mType, y_off)
                    if mType == 0:  # get diagnostics
                        cri_new = getDiag_GWR[criterion](gwrMod_new)
                    else:
                        cri_new = getDiag_GWGLM[criterion](gwrMod_new)
                #print cri_new
                # check improvements
                if cri_new < cri_old:  # move x from local to global
                    outlist.append(idx)
                    cri_old = cri_new  # update criteria
                    flag = True
                    optWeit = weit
                else:
                    tmp_loc = np.delete(tmp_loc, -1, 1)  # move x back to local
                    tmp_glob = np.hstack((x_out, tmp_glob))
            orilist = list(set(orilist) - set(outlist))
            #print "outlist:"
            #print outlist
        #print "old cri:"
        #print cri_old

    varsG = orilist
    varsL = list(set(range(nVars_glob)) - set(orilist))

    return varsL, varsG, optband, optWeit, cri_old
Esempio n. 7
0
def varyTest(y, x_glob, x_loc, kernel, mType=0, y_off=None, criterion=0, orig_mod=None):
    """
    Geographical variability test
    All the models use the same bandwidth
    
    Arguments
    ----------
        y              : array
                         n*1, dependent variable.
        x_glob         : array
                         n*k1, fixed independent variable.
        x_local        : array
                         n*k2, local independent variable, including constant.
        kernel         : dictionary
                         including (x,y) coordinates involved in the weight evaluation (including point i)  
        mType          : integer
                         GWR model type, 0: Gaussian, 1: Poisson, 2: Logistic
        y_off          : array
                         n*1, offset variable for Poisson model
	criterion      : integer
                         bandwidth selection criterion, 0: AICc, 1: AIC, 2: BIC, 3: CV
	orig_mod       : object of GWR model
	                 original model
    Return:
        geoVary        : list of tuple
			 including each X variable and associated F statistics: [(x1,f_stat),(x2,f_stat),...()]
    
    """
    nObs = len(y)    
        
    # 1 original model
    if orig_mod is None:
	if x_glob is None:
	    gwrMod_old = GWGLM_Base(y, x_loc, kernel, mType, y_off)
	else:
	    gwrMod_old = semiGWR_Base(y,x_glob,x_loc,kernel,mType,y_off)
    else:
	gwrMod_old = orig_mod
    
    # 2 original statistics
    p_old = gwrMod_old.tr_S      
    if mType == 0 : 
	cri_old = getDiag_GWR[criterion](gwrMod_old)
	dev_old = gwrMod_old.res2
    else:
	cri_old = getDiag_GWGLM[criterion](gwrMod_old)	
	dev_old = gwrMod_old.dev_res
    
    # 3 loop
    geoVary = []
    nVar_loc = len(x_loc[0])
    if nVar_loc > 0: # intercept is local variable by default
	for i in range(nVar_loc):	    
	    tmpX = x_loc[:,i] 
	    # new x_loc
	    xg = np.delete(x_loc, i, 1)
	    # new x_glob
	    if x_glob is None:
		xf = tmpX
		xf = np.reshape(xf,(-1,1))
	    else:
		nVar_glob = len(x_glob[0])
		xf = np.zeros(shape=(nObs,nVar_glob))
		xf = x_glob
		tmpX = np.reshape(tmpX,(-1,1))
		xf = np.hstack((xf,tmpX))
	    # new model
	    gwrMod_new = semiGWR_Base(y,xf,xg,kernel,mType,y_off)
	    # new statistics
	    p_new = gwrMod_new.tr_S 	    
	    if mType == 0 : 
		cri_new = getDiag_GWR[criterion](gwrMod_new)
		dev_new = gwrMod_new.res2
	    else:
		cri_new = getDiag_GWGLM[criterion](gwrMod_new)		
		dev_new = gwrMod_new.dev_res
	    # differentce
	    diffp = p_old - p_new
	    diffcri = cri_old - cri_new
	    if mType == 0: 		
		df = nObs - p_old
		f = ( (dev_new - dev_old) / diffp ) / ( dev_old/ df )		
		geoVary.append((f,diffp, df,diffcri))
	    else:    
		diffdev = dev_old - dev_new		
		geoVary.append((diffdev,diffp,diffcri))
	    
    return geoVary
Esempio n. 8
0
def G2L(y, x_glob, x_loc, coords, mType=0, wType=3, y_off=None, orig_mod=None, criterion=0, bdinfo=0, band=0, maxVal=0.0, minVal=0.0, interval=0.0, tol=1.0e-2, maxIter=50):
    """
    Variable selection: global to local
    
    Arguments
    ----------
        y              : array
                         n*1, dependent variable.
        x_glob         : array
                         n*k1, fixed independent variable.
        x_loc          : array
                         n*k2, local independent variable, including constant.
	coords         : dictionary
                         including (x,y) coordinates involved in the weight evaluation (including point i) 
        wType          : integer
                         weight type  
        mType          : integer
                         GWR model type, 0: Gaussian, 1: Poisson, 2: Logistic
        y_off          : array
                         n*1, offset variable for Poisson model
	orig_mod       : object of GWR model
	                 original model
	criterion      : integer
                         bandwidth selection criterion, 0: AICc, 1: AIC, 2: BIC, 3: CV
	bdinfo         : integer
	                 bandwidth searching method: 0: golden search 1: interval 2: fixed single bandwidth
	band           : float
	                 given bandwidth if bdinfo=2 
	maxVal         : float
                         maximum value used in bandwidth searching
        minVal         : float
                         minimum value used in bandwidth searching
        interval       : float
                         interval used in interval search 
        tol            : float
                         tolerance used to determine convergence   
        maxIter        : integer
                         maximum number of iteration if convergence cannot arrived to the tolerance
               
            
    Return:
        varsL          : list,
                         ids of local Xs 
        varsG          : list,
                         ids of global Xs 
	optband        : list
	                 info of optimal bandwidth searching results
	optWeit        : kernel
	                 kernel of best model
	optcri         : float
		         criterion value for optimal model
    """
    nObs = len(y)
    nVars_glob = len(x_glob[0])  
    if x_loc is None:
	nVars_loc = 0
	tmp_loc = np.zeros(shape=(nObs,0))
    else:
	nVars_loc = len(x_loc[0])
	tmp_loc = np.zeros(shape=(nObs,nVars_loc))
	tmp_loc = x_loc
    nVars = nVars_loc + nVars_glob
    optband = []
    
    # loop    
    flag = True # check whether is x moved to global
    if nVars_glob > 0:    
	if orig_mod is None:
	    # 1 set original model
	    if x_loc is None:  # global model
		gwrMod_old = GLM_Base(y,x_glob,mType,y_off)
		cri_old = getDiag_GLM[criterion](gwrMod_old)
	    else:   # should be mixed model# check original bandwidth		
		if bdinfo == 0 or bdinfo == 1: # golden or interval search
		    rs = M_selection.Band_Sel(y, x_glob, x_loc, coords, mType, y_off, wType, criterion, bdinfo, maxVal, minVal, interval, tol, maxIter)
		    band = rs[0]
		    weit = rs[1]
		    optband.append(rs)
		else: 
		    # set original kernel
		    weit = Kernel.GWR_W(coords, band, wType)
		optWeit = weit
		gwrMod_old = semiGWR_Base(y,x_glob,x_loc,weit,mType,y_off)
		# get original diagnostics
		if mType == 0:
		    cri_old = getDiag_GWR[criterion](gwrMod_old)
		else:
		    cri_old = getDiag_GWGLM[criterion](gwrMod_old)
	else:
	    gwrMod_old = orig_mod
	    weit = orig_mod.kernel	    
	    optWeit = weit
	
	#print "original cri:"
	#print cri_old
    
	# 2 loop
	orilist = range(nVars_glob) # ids of original global Xs			
	while flag: #  until no improvement in one loop in orilist
	    flag = False
	    #print "original list:"
	    #print orilist
	    outlist = [] # ids of Xs from global to local
	    n_currXs = len(orilist) # every time loop through orilist
	    # set global x
	    tmp_glob = np.zeros(shape=(nObs,0))
	    for i in orilist:
		tmp_glob = np.hstack((tmp_glob,np.reshape(x_glob[:,i],(-1,1))))	
	    for i in range(n_currXs):
		idx = orilist[i]
		#print i
		#print idx
		# try to remove ith x
		x_out = np.reshape(x_glob[:,idx],(-1,1))
		tmp_glob = np.delete(tmp_glob, i-len(outlist), 1)
		# get new x_loc
		tmp_loc = np.hstack((tmp_loc,x_out))
		# new bandwidth
		if bdinfo == 0 or bdinfo == 1: # golden or interval search
		    rs = M_selection.Band_Sel(y, tmp_glob, tmp_loc, coords, mType, y_off, wType, criterion, bdinfo, maxVal, minVal, interval, tol, maxIter)
		    band = rs[0]
		    weit = rs[1]
		    optband.append(rs)
		else:
		    # new kernel
		    weit = Kernel.GWR_W(coords, band, wType) 
		
		# decide whether is a local model
		if len(tmp_loc[0]) == nVars:   # local model		   
		    gwrMod_new = GWGLM_Base(y,tmp_loc,weit,mType,y_off) 
		    cri_new = getDiag_GWGLM[criterion](gwrMod_new)
		else:            # should be mixed model
		    gwrMod_new = semiGWR_Base(y,tmp_glob,tmp_loc,weit,mType,y_off)		    
		    if mType == 0:# get diagnostics
			cri_new = getDiag_GWR[criterion](gwrMod_new)
		    else:
			cri_new = getDiag_GWGLM[criterion](gwrMod_new)
		#print cri_new
		# check improvements
		if cri_new < cri_old: # move x from local to global
		    outlist.append(idx)
		    cri_old = cri_new # update criteria
		    flag = True
		    optWeit = weit
		else:
		    tmp_loc = np.delete(tmp_loc, -1, 1) # move x back to local
		    tmp_glob = np.hstack((x_out,tmp_glob))
	    orilist = list(set(orilist) - set(outlist))
	    #print "outlist:"
	    #print outlist
	#print "old cri:"
	#print cri_old
	
    varsG = orilist
    varsL = list(set(range(nVars_glob)) - set(orilist))    
    
    return varsL, varsG, optband, optWeit, cri_old