Example #1
0
def f_Golden(y,
             x_glob,
             x_loc,
             y_off,
             coords,
             mType,
             wType,
             criterion,
             maxVal,
             minVal,
             tol,
             maxIter=200,
             flag=0):
    """
    Golden section search
    
    Arguments
    ----------
        y              : array
                         n*1, dependent variable.
        x_glob         : array
                         n*k1, fixed independent variable.
        x_local        : array
                         n*k2, local independent variable, including constant.
        y_off          : array
                         n*1, offset variable for Poisson model
        coords         : dictionary
                         including (x,y) coordinates involved in the weight evaluation (including point i)  
        mType          : integer
                         GWR model type, 0: Gaussian, 1: Poisson, 2: Logistic
        wType          : integer
                         kernel type, 0: fix_Gaussian, 1: adap_Gaussian, 2: fix_Bisquare, 3: adap_Bisquare 
        criterion      : integer
                         bandwidth selection criterion, 0: AICc, 1: AIC, 2: BIC, 3: CV
        maxVal         : float
                         maximum value used in bandwidth searching
        minVal         : float
                         minimum value used in bandwidth searching
        tol            : float
                         tolerance used to determine convergence 
        maxIter        : integer
                         maximum number of iteration if convergence cannot arrive at the tolerance
        flag           : integer
                         distance type
    
    Return:
           opt_band   : float
                        optimal bandwidth
           opt_weit   : kernel
                        optimal kernel
           output     : list of tuple
                        report searching process, keep bandwidth and score, [(bandwidth, score),(bandwidth, score),...]
    """
    dist = Kernel.get_pairDist(coords,
                               flag)  #get pairwise distance between points

    # 1 set range of bandwidth
    if x_glob is None:
        nVar_glob = 0
    else:
        nVar_glob = len(x_glob[0])

    if x_loc is None:
        nVar_loc = 0
    else:
        nVar_loc = len(x_loc[0])

    nVars = nVar_glob + nVar_loc

    a, c = ini_band_dist(dist, nVars, wType, maxVal, minVal)

    # 2 get initial b value
    output = []

    lamda = 0.38197  #1 - (np.sqrt(5.0)-1.0)/2.0

    # get b and d
    b = a + lamda * abs(c - a)  #distance or nn based on wType
    d = c - lamda * abs(c - a)  # golden section
    if wType == 1 or wType == 3:  # bandwidth is nn
        b = round(b, 0)
        d = round(d, 0)

    # 3 loop
    pre_opt = 0.0
    diff = 1.0e9
    nIter = 0
    while abs(diff) > tol and nIter < maxIter:
        nIter += 1

        # 3.1 create kernel
        weit_a = Kernel.GWR_W(coords, a, wType, dist)
        weit_b = Kernel.GWR_W(coords, b, wType, dist)
        weit_c = Kernel.GWR_W(coords, c, wType, dist)
        weit_d = Kernel.GWR_W(coords, d, wType, dist)

        # 3.2 decide whether local model or mixed model
        if x_glob is None:  # local model
            #if  mType == 0: #mType == 0 or
            #gwrMod_a = GWR_Gaussian_Base(y, x_loc, weit_a)
            #gwrMod_b = GWR_Gaussian_Base(y, x_loc, weit_b)
            #gwrMod_c = GWR_Gaussian_Base(y, x_loc, weit_c)
            #gwrMod_d = GWR_Gaussian_Base(y, x_loc, weit_d)
            #else:
            gwrMod_a = GWGLM_Base(y, x_loc, weit_a, mType, y_off)
            gwrMod_b = GWGLM_Base(y, x_loc, weit_b, mType, y_off)
            gwrMod_c = GWGLM_Base(y, x_loc, weit_c, mType, y_off)
            gwrMod_d = GWGLM_Base(y, x_loc, weit_d, mType, y_off)
        else:  # mixed model
            gwrMod_a = semiGWR_Base(y, x_glob, x_loc, weit_a, mType, y_off)
            gwrMod_b = semiGWR_Base(y, x_glob, x_loc, weit_b, mType, y_off)
            gwrMod_c = semiGWR_Base(y, x_glob, x_loc, weit_c, mType, y_off)
            gwrMod_d = semiGWR_Base(y, x_glob, x_loc, weit_d, mType, y_off)

        # 3.3 get diagnostic value(0: AICc, 1: AIC, 2: BIC, 3: CV)
        if mType == 0:  #or mType == 3
            f_a = getDiag_GWR[criterion](gwrMod_a)
            f_b = getDiag_GWR[criterion](gwrMod_b)
            f_c = getDiag_GWR[criterion](gwrMod_c)
            f_d = getDiag_GWR[criterion](gwrMod_d)
        else:
            f_a = getDiag_GWGLM[criterion](gwrMod_a)
            f_b = getDiag_GWGLM[criterion](gwrMod_b)
            f_c = getDiag_GWGLM[criterion](gwrMod_c)
            f_d = getDiag_GWGLM[criterion](gwrMod_d)

        #print "a: %.3f, b: %.3f, c: %.3f, d: %.3f" % (a, b, c, d)

        # determine next triple
        if f_b <= f_d:
            # current optimal bandwidth
            opt_weit = weit_b
            opt_band = b
            opt_cri = f_b
            c = d
            d = b
            b = a + lamda * abs(c - a)
            if wType == 1 or wType == 3:  # bandwidth is nn
                b = round(b, 0)
        else:
            # current optimal bandwidth
            opt_weit = weit_d
            opt_band = d
            opt_cri = f_d
            a = b
            b = d
            d = c - lamda * abs(c - a)
            if wType == 1 or wType == 3:  # bandwidth is nn
                d = round(d, 0)

        output.append((opt_band, opt_cri))

        # determine diff
        diff = f_b - f_d  #opt_cri - pre_opt
        pre_opt = opt_cri
        #print "diff: %.6f" % (diff)

    return opt_band, opt_weit, output
Example #2
0
def f_Interval(y,
               x_glob,
               x_loc,
               y_off,
               coords,
               mType,
               wType,
               criterion,
               maxVal,
               minVal,
               interval,
               flag=0):
    """
    Interval search, using interval as stepsize
    
    Arguments
    ----------
        y              : array
                         n*1, dependent variable.
        x_glob         : array
                         n*k1, fixed independent variable.
        x_local        : array
                         n*k2, local independent variable, including constant.
        y_off          : array
                         n*1, offset variable for Poisson model
        coords         : dictionary
                         including (x,y) coordinates involved in the weight evaluation (including point i)  
        mType          : integer
                         GWR model type, 0: M_Gaussian, 1: M_Poisson, 2: Logistic
        wType          : integer
                         kernel type, 0: fix_Gaussian, 1: adap_Gaussian, 2: fix_Bisquare, 3: adap_Bisquare 
        criterion      : integer
                         bandwidth selection criterion, 0: AICc, 1: AIC, 2: BIC, 3: CV
        maxVal         : float
                         maximum value used in bandwidth searching
        minVal         : float
                         minimum value used in bandwidth searching
        interval       : float
                         interval used in interval search 
        flag           : integer
                         distance type
    Return:
           opt_band   : float
                        optimal bandwidth
           opt_weit   : kernel
                        optimal kernel
           output     : list of tuple
                        report searching process, keep bandwidth and score, [(bandwidth, score),(bandwidth, score),...]
    """
    dist = Kernel.get_pairDist(coords,
                               flag=0)  #get pairwise distance between points

    a = minVal
    c = maxVal

    # add codes to check whether a and c are valid
    #------------------------------------------------------------

    if wType == 1 or wType == 3:  # bandwidth is nn
        a = int(a)
        c = int(c)

    output = []

    # 1 get initial b value
    b = a + interval  #distance or nn based on wType
    if wType == 1 or wType == 3:  # bandwidth is nn
        b = int(b)

    # 2 create weight
    weit_a = Kernel.GWR_W(coords, a, wType, dist)
    weit_c = Kernel.GWR_W(coords, c, wType, dist)

    # 3 create model
    if x_glob is None:  # local model
        #if mType == 3:
        #gwrMod_a = GWR_Gaussian(y, x_loc, weit_a)
        #gwrMod_c = GWR_Gaussian(y, x_loc, weit_c)
        #else:
        gwrMod_a = GWGLM_Base(y, x_loc, weit_a, mType, y_off)
        gwrMod_c = GWGLM_Base(y, x_loc, weit_c, mType, y_off)
    else:  # mixed model
        gwrMod_a = semiGWR_Base(y, x_glob, x_loc, weit_a, mType, y_off)
        gwrMod_c = semiGWR_Base(y, x_glob, x_loc, weit_c, mType, y_off)

    # 4 get diagnostic value
    if mType == 0:  #or mType == 3
        f_a = getDiag_GWR[criterion](gwrMod_a)
        f_c = getDiag_GWR[criterion](gwrMod_c)
    else:
        f_a = getDiag_GWGLM[criterion](gwrMod_a)
        f_c = getDiag_GWGLM[criterion](gwrMod_c)

    # 5 add to the output
    output.append((a, f_a))
    output.append((c, f_c))

    #print "bandwidth: %.3f, f value: %.6f" % (a, f_a)
    #print "bandwidth: %.3f, f value: %.6f" % (c, f_c)

    if f_a < f_c:
        opt_weit = weit_a
        opt_band = a
        opt_val = f_a
    else:
        opt_weit = weit_c
        opt_band = c
        opt_val = f_c

    while b < c:

        # model using bandwidth b
        weit_b = Kernel.GWR_W(coords, b, wType, dist)  # local model
        if x_glob is None:  # local model
            #if mType == 3:
            #gwrMod_b = GWR_Gaussian(y, x_loc, weit_b)
            #else:
            gwrMod_b = GWGLM_Base(y, x_loc, weit_b, mType, y_off)
        else:  # mixed model
            gwrMod_b = semiGWR_Base(y, x_glob, x_loc, weit_b, mType, y_off)

        if mType == 0:  #or mType == 3
            f_b = getDiag_GWR[criterion](gwrMod_b)
        else:
            f_b = getDiag_GWGLM[criterion](gwrMod_b)

        #print "bandwidth: %.3f, f value: %.6f" % (b, f_b)

        # add output
        output.append((b, f_b))

        # determine next triple
        if f_b < opt_val:
            opt_weit = weit_b
            opt_band = b
            opt_val = f_b

        # update b
        b = b + interval

    return opt_band, opt_weit, output
Example #3
0
def pred(data,
         refData,
         band,
         y,
         x_local,
         y_hat=None,
         wType=0,
         mType=0,
         flag=0,
         y_offset=None,
         sigma2=1,
         y_fix=None,
         fMatrix=None):
    """
    predict values at unsampled locations
    
    Arguments:
        data           : dictionary, 
                         (x,y) of unsampled locations
        refData        : dictionary,
                         (x,y) of sampled locations  
        band           : float
                         bandwidth
        y              : array
                         n*1, dependent variable
        y_hat          : array
                         n*1, predicted y from original model, to calculate local statistics
        x_local        : array
                         n*k1, local independent variable
        y_offset       : array
                         n*1, offset variable for Poisson model
	sigma2         : float
	                 used to calculate std. error of betas for Gaussian model
        y_fix          : array
                         n*1, fixed part of y from global Xs, used in mixed model
        fMatrix        : array
                         n*n, hat matrix for global model, used in mixed model
        wType          : integer
                         define which kernel function to use  
        mType          : integer
                         model type, model type, 0: Gaussian, 1: Poisson, 2: Logistic
        flag           : dummy,
                         0 or 1, 0: Euclidean distance; 1: spherical distance
               
            
    Return:
        Betas          : array
                         n*k, Beta estimation
        std_err        : array
                         n*k, standard errors of Beta
        t_stat         : array
                         n*k, local t-statistics
        localR2        : array
                         n*1, local R square or local p-dev   
    """
    # 1 get W matrix
    dicDist = {}
    n_pred = len(data.keys())
    for i in range(
            n_pred
    ):  # calculate distance between unsampled obs and sampled obs
        dicDist[i] = Kernel.get_focusDist(data[i], refData, flag)

    weit = Kernel.GWR_W(data, band, wType, dicDist)
    #print len(dicDist[0].keys())
    #print len(weit.w.keys())
    #print len(weit.w[0])

    # 2 get predicted local Beta estimation
    #if mType == 0:# 2.1 basic Gaussian
    #mod_loc = GWR_Gaussian_Base(y, x_local, weit)
    #else:# 2.2 GWGLM models including mixed models
    mod_loc = GWGLM_Base(y, x_local, weit, mType, y_offset, y_fix, fMatrix)

    pred_betas = mod_loc.Betas[:n_pred]

    # 3 get std errors of Betas
    #if mType == 1 or mType == 2:
    #sigma2 = 1.0
    pred_stdErr = np.sqrt(mod_loc.CCT * sigma2)

    # 4 get t statistics
    pred_tstat = pred_betas / pred_stdErr

    # 5 get local R2 or local p-dev
    localR2 = np.zeros(shape=(n_pred, 1))
    n_reg = len(y)
    if mType == 0:  # Gaussian model  or mType == 3
        for i in range(n_pred):
            w_i = np.reshape(np.array(weit.w[i]), (-1, 1))
            sum_yw = np.sum(y * w_i)
            ybar = 1.0 * sum_yw / np.sum(w_i)
            rss = np.sum(w_i * (y - y_hat)**2)
            tss = np.sum(w_i * (y - ybar)**2)
            localR2[i] = (tss - rss) / tss
    if mType == 1:  # Poisson model
        for i in range(n_pred):
            w_i = np.reshape(np.array(weit.w[i]), (-1, 1))
            sum_yw = np.sum(y * w_i)
            ybar = 1.0 * sum_yw / np.sum(w_i * y_offset)
            dev = 0.0
            dev0 = 0.0
            for j in range(n_reg):
                if y[j] <> 0:
                    dev += 2 * y[j] * (np.log(y[j]) -
                                       np.log(y_hat[j])) * w_i[j]
                    dev0 += 2 * y[j] * (np.log(y[j]) -
                                        np.log(ybar * y_offset[j])) * w_i[j]
                dev -= 2 * (y[j] - y_hat[j]) * w_i[j]
                dev0 -= 2 * (y[j] - ybar * y_offset[j]) * w_i[j]
            localR2[i] = 1.0 - dev / dev0
    if mType == 2:  # Logistic model
        for i in range(n_pred):
            w_i = np.reshape(np.array(weit.w[i]), (-1, 1))
            sum_yw = np.sum(y * w_i)
            ybar = 1.0 * sum_yw / np.sum(w_i)
            dev = 0.0
            dev0 = 0.0
            for j in range(n_reg):
                if (1.0 - y_hat[j] < 1e-10):
                    nu = np.log(y_hat[j] / 1e-10)
                    dev += -2 * (y[j] * nu + np.log(1e-10)) * w_i[j]
                else:
                    nu = np.log(y_hat[j] / (1.0 - y_hat[j]))
                    dev += -2 * (y[j] * nu + np.log(1.0 - y_hat[j])) * w_i[j]
                nu0 = np.log(ybar / (1 - ybar))
                dev0 += -2 * (y[j] * nu0 + np.log(1.0 - ybar)) * w_i[j]

            localR2[i] = 1.0 - dev / dev0

    return pred_betas, pred_stdErr, pred_tstat, localR2
Example #4
0
    coords = {}
    for i in range(nobs):
        coords[i] = dic_data[i][:2]  # get coordinates
        lst_data.append(dic_data[i][2:])
    arr_data = np.array(lst_data)

    # create x, y
    y = np.reshape(arr_data[:, 0], (-1, 1))
    y_off = np.reshape(arr_data[:, 1], (-1, 1))
    x = arr_data[:, 2:]
    x = np.hstack((np.ones(y.shape), x))

    #**********************************1. GWR Poisson (adaptive bandwithd: bisquare)*************************
    #******************************************************************************************************
    band = 100
    weit = Kernel.GWR_W(coords, band, 3)
    begin_t = datetime.now()
    print begin_t
    myMod = GWGLM(y, x, weit, 1, y_off, False, 1e-6, 200, 'db2564', 'eb2564',
                  ['OCC_TEC', 'OWNH', 'POP65', 'UNEMP'], flePath, True)
    end_t = datetime.now()
    print end_t
    #print myMod.Betas[:5]
    #print myMod.std_err[:5]
    #print myMod.nObs
    #print myMod.nVars

    #print myMod.tr_S
    #print myMod.tr_SWSTW
    #print myMod.tr_STS
    #print myMod.y_pred[:5]
Example #5
0
def G2L(y,
        x_glob,
        x_loc,
        coords,
        mType=0,
        wType=3,
        y_off=None,
        orig_mod=None,
        criterion=0,
        bdinfo=0,
        band=0,
        maxVal=0.0,
        minVal=0.0,
        interval=0.0,
        tol=1.0e-2,
        maxIter=50):
    """
    Variable selection: global to local
    
    Arguments
    ----------
        y              : array
                         n*1, dependent variable.
        x_glob         : array
                         n*k1, fixed independent variable.
        x_loc          : array
                         n*k2, local independent variable, including constant.
	coords         : dictionary
                         including (x,y) coordinates involved in the weight evaluation (including point i) 
        wType          : integer
                         weight type  
        mType          : integer
                         GWR model type, 0: Gaussian, 1: Poisson, 2: Logistic
        y_off          : array
                         n*1, offset variable for Poisson model
	orig_mod       : object of GWR model
	                 original model
	criterion      : integer
                         bandwidth selection criterion, 0: AICc, 1: AIC, 2: BIC, 3: CV
	bdinfo         : integer
	                 bandwidth searching method: 0: golden search 1: interval 2: fixed single bandwidth
	band           : float
	                 given bandwidth if bdinfo=2 
	maxVal         : float
                         maximum value used in bandwidth searching
        minVal         : float
                         minimum value used in bandwidth searching
        interval       : float
                         interval used in interval search 
        tol            : float
                         tolerance used to determine convergence   
        maxIter        : integer
                         maximum number of iteration if convergence cannot arrived to the tolerance
               
            
    Return:
        varsL          : list,
                         ids of local Xs 
        varsG          : list,
                         ids of global Xs 
	optband        : list
	                 info of optimal bandwidth searching results
	optWeit        : kernel
	                 kernel of best model
	optcri         : float
		         criterion value for optimal model
    """
    nObs = len(y)
    nVars_glob = len(x_glob[0])
    if x_loc is None:
        nVars_loc = 0
        tmp_loc = np.zeros(shape=(nObs, 0))
    else:
        nVars_loc = len(x_loc[0])
        tmp_loc = np.zeros(shape=(nObs, nVars_loc))
        tmp_loc = x_loc
    nVars = nVars_loc + nVars_glob
    optband = []

    # loop
    flag = True  # check whether is x moved to global
    if nVars_glob > 0:
        if orig_mod is None:
            # 1 set original model
            if x_loc is None:  # global model
                gwrMod_old = GLM_Base(y, x_glob, mType, y_off)
                cri_old = getDiag_GLM[criterion](gwrMod_old)
            else:  # should be mixed model# check original bandwidth
                if bdinfo == 0 or bdinfo == 1:  # golden or interval search
                    rs = M_selection.Band_Sel(y, x_glob, x_loc, coords, mType,
                                              y_off, wType, criterion, bdinfo,
                                              maxVal, minVal, interval, tol,
                                              maxIter)
                    band = rs[0]
                    weit = rs[1]
                    optband.append(rs)
                else:
                    # set original kernel
                    weit = Kernel.GWR_W(coords, band, wType)
                optWeit = weit
                gwrMod_old = semiGWR_Base(y, x_glob, x_loc, weit, mType, y_off)
                # get original diagnostics
                if mType == 0:
                    cri_old = getDiag_GWR[criterion](gwrMod_old)
                else:
                    cri_old = getDiag_GWGLM[criterion](gwrMod_old)
        else:
            gwrMod_old = orig_mod
            weit = orig_mod.kernel
            optWeit = weit

        #print "original cri:"
        #print cri_old

        # 2 loop
        orilist = range(nVars_glob)  # ids of original global Xs
        while flag:  #  until no improvement in one loop in orilist
            flag = False
            #print "original list:"
            #print orilist
            outlist = []  # ids of Xs from global to local
            n_currXs = len(orilist)  # every time loop through orilist
            # set global x
            tmp_glob = np.zeros(shape=(nObs, 0))
            for i in orilist:
                tmp_glob = np.hstack(
                    (tmp_glob, np.reshape(x_glob[:, i], (-1, 1))))
            for i in range(n_currXs):
                idx = orilist[i]
                #print i
                #print idx
                # try to remove ith x
                x_out = np.reshape(x_glob[:, idx], (-1, 1))
                tmp_glob = np.delete(tmp_glob, i - len(outlist), 1)
                # get new x_loc
                tmp_loc = np.hstack((tmp_loc, x_out))
                # new bandwidth
                if bdinfo == 0 or bdinfo == 1:  # golden or interval search
                    rs = M_selection.Band_Sel(y, tmp_glob, tmp_loc, coords,
                                              mType, y_off, wType, criterion,
                                              bdinfo, maxVal, minVal, interval,
                                              tol, maxIter)
                    band = rs[0]
                    weit = rs[1]
                    optband.append(rs)
                else:
                    # new kernel
                    weit = Kernel.GWR_W(coords, band, wType)

                # decide whether is a local model
                if len(tmp_loc[0]) == nVars:  # local model
                    gwrMod_new = GWGLM_Base(y, tmp_loc, weit, mType, y_off)
                    cri_new = getDiag_GWGLM[criterion](gwrMod_new)
                else:  # should be mixed model
                    gwrMod_new = semiGWR_Base(y, tmp_glob, tmp_loc, weit,
                                              mType, y_off)
                    if mType == 0:  # get diagnostics
                        cri_new = getDiag_GWR[criterion](gwrMod_new)
                    else:
                        cri_new = getDiag_GWGLM[criterion](gwrMod_new)
                #print cri_new
                # check improvements
                if cri_new < cri_old:  # move x from local to global
                    outlist.append(idx)
                    cri_old = cri_new  # update criteria
                    flag = True
                    optWeit = weit
                else:
                    tmp_loc = np.delete(tmp_loc, -1, 1)  # move x back to local
                    tmp_glob = np.hstack((x_out, tmp_glob))
            orilist = list(set(orilist) - set(outlist))
            #print "outlist:"
            #print outlist
        #print "old cri:"
        #print cri_old

    varsG = orilist
    varsL = list(set(range(nVars_glob)) - set(orilist))

    return varsL, varsG, optband, optWeit, cri_old