def f_Golden(y, x_glob, x_loc, y_off, coords, mType, wType,criterion, maxVal, minVal, tol, maxIter=200,flag=0): """ Golden section search Arguments ---------- y : array n*1, dependent variable. x_glob : array n*k1, fixed independent variable. x_local : array n*k2, local independent variable, including constant. y_off : array n*1, offset variable for Poisson model coords : dictionary including (x,y) coordinates involved in the weight evaluation (including point i) mType : integer GWR model type, 0: Gaussian, 1: Poisson, 2: Logistic wType : integer kernel type, 0: fix_Gaussian, 1: adap_Gaussian, 2: fix_Bisquare, 3: adap_Bisquare criterion : integer bandwidth selection criterion, 0: AICc, 1: AIC, 2: BIC, 3: CV maxVal : float maximum value used in bandwidth searching minVal : float minimum value used in bandwidth searching tol : float tolerance used to determine convergence maxIter : integer maximum number of iteration if convergence cannot arrive at the tolerance flag : integer distance type Return: opt_band : float optimal bandwidth opt_weit : kernel optimal kernel output : list of tuple report searching process, keep bandwidth and score, [(bandwidth, score),(bandwidth, score),...] """ dist = Kernel.get_pairDist(coords,flag) #get pairwise distance between points # 1 set range of bandwidth if x_glob is None: nVar_glob = 0 else: nVar_glob = len(x_glob[0]) if x_loc is None: nVar_loc = 0 else: nVar_loc = len(x_loc[0]) nVars = nVar_glob + nVar_loc a,c = ini_band_dist(dist, nVars, wType, maxVal, minVal) # 2 get initial b value output = [] lamda = 0.38197 #1 - (np.sqrt(5.0)-1.0)/2.0 # get b and d b = a + lamda * abs(c-a) #distance or nn based on wType d = c - lamda * abs(c-a) # golden section if wType == 1 or wType == 3: # bandwidth is nn b = round(b,0) d = round(d,0) # 3 loop pre_opt = 0.0 diff = 1.0e9 nIter = 0 while abs(diff) > tol and nIter < maxIter: nIter += 1 # 3.1 create kernel weit_a = Kernel.GWR_W(coords, a, wType, dist) weit_b = Kernel.GWR_W(coords, b, wType, dist) weit_c = Kernel.GWR_W(coords, c, wType, dist) weit_d = Kernel.GWR_W(coords, d, wType, dist) # 3.2 decide whether local model or mixed model if x_glob is None: # local model #if mType == 0: #mType == 0 or #gwrMod_a = GWR_Gaussian_Base(y, x_loc, weit_a) #gwrMod_b = GWR_Gaussian_Base(y, x_loc, weit_b) #gwrMod_c = GWR_Gaussian_Base(y, x_loc, weit_c) #gwrMod_d = GWR_Gaussian_Base(y, x_loc, weit_d) #else: gwrMod_a = GWGLM_Base(y, x_loc, weit_a, mType, y_off) gwrMod_b = GWGLM_Base(y, x_loc, weit_b, mType, y_off) gwrMod_c = GWGLM_Base(y, x_loc, weit_c, mType, y_off) gwrMod_d = GWGLM_Base(y, x_loc, weit_d, mType, y_off) else: # mixed model gwrMod_a = semiGWR_Base(y, x_glob, x_loc, weit_a, mType, y_off) gwrMod_b = semiGWR_Base(y, x_glob, x_loc, weit_b, mType, y_off) gwrMod_c = semiGWR_Base(y, x_glob, x_loc, weit_c, mType, y_off) gwrMod_d = semiGWR_Base(y, x_glob, x_loc, weit_d, mType, y_off) # 3.3 get diagnostic value(0: AICc, 1: AIC, 2: BIC, 3: CV) if mType == 0:#or mType == 3 f_a = getDiag_GWR[criterion](gwrMod_a) f_b = getDiag_GWR[criterion](gwrMod_b) f_c = getDiag_GWR[criterion](gwrMod_c) f_d = getDiag_GWR[criterion](gwrMod_d) else: f_a = getDiag_GWGLM[criterion](gwrMod_a) f_b = getDiag_GWGLM[criterion](gwrMod_b) f_c = getDiag_GWGLM[criterion](gwrMod_c) f_d = getDiag_GWGLM[criterion](gwrMod_d) #print "a: %.3f, b: %.3f, c: %.3f, d: %.3f" % (a, b, c, d) # determine next triple if f_b <= f_d: # current optimal bandwidth opt_weit = weit_b opt_band = b opt_cri = f_b c = d d = b b = a + lamda * abs(c-a) if wType == 1 or wType == 3: # bandwidth is nn b = round(b,0) else: # current optimal bandwidth opt_weit = weit_d opt_band = d opt_cri = f_d a = b b = d d = c - lamda * abs(c-a) if wType == 1 or wType == 3: # bandwidth is nn d = round(d,0) output.append((opt_band,opt_cri)) # determine diff diff = f_b - f_d #opt_cri - pre_opt pre_opt = opt_cri #print "diff: %.6f" % (diff) return opt_band, opt_weit, output
def f_Golden(y, x_glob, x_loc, y_off, coords, mType, wType, criterion, maxVal, minVal, tol, maxIter=200, flag=0): """ Golden section search Arguments ---------- y : array n*1, dependent variable. x_glob : array n*k1, fixed independent variable. x_local : array n*k2, local independent variable, including constant. y_off : array n*1, offset variable for Poisson model coords : dictionary including (x,y) coordinates involved in the weight evaluation (including point i) mType : integer GWR model type, 0: Gaussian, 1: Poisson, 2: Logistic wType : integer kernel type, 0: fix_Gaussian, 1: adap_Gaussian, 2: fix_Bisquare, 3: adap_Bisquare criterion : integer bandwidth selection criterion, 0: AICc, 1: AIC, 2: BIC, 3: CV maxVal : float maximum value used in bandwidth searching minVal : float minimum value used in bandwidth searching tol : float tolerance used to determine convergence maxIter : integer maximum number of iteration if convergence cannot arrive at the tolerance flag : integer distance type Return: opt_band : float optimal bandwidth opt_weit : kernel optimal kernel output : list of tuple report searching process, keep bandwidth and score, [(bandwidth, score),(bandwidth, score),...] """ dist = Kernel.get_pairDist(coords, flag) #get pairwise distance between points # 1 set range of bandwidth if x_glob is None: nVar_glob = 0 else: nVar_glob = len(x_glob[0]) if x_loc is None: nVar_loc = 0 else: nVar_loc = len(x_loc[0]) nVars = nVar_glob + nVar_loc a, c = ini_band_dist(dist, nVars, wType, maxVal, minVal) # 2 get initial b value output = [] lamda = 0.38197 #1 - (np.sqrt(5.0)-1.0)/2.0 # get b and d b = a + lamda * abs(c - a) #distance or nn based on wType d = c - lamda * abs(c - a) # golden section if wType == 1 or wType == 3: # bandwidth is nn b = round(b, 0) d = round(d, 0) # 3 loop pre_opt = 0.0 diff = 1.0e9 nIter = 0 while abs(diff) > tol and nIter < maxIter: nIter += 1 # 3.1 create kernel weit_a = Kernel.GWR_W(coords, a, wType, dist) weit_b = Kernel.GWR_W(coords, b, wType, dist) weit_c = Kernel.GWR_W(coords, c, wType, dist) weit_d = Kernel.GWR_W(coords, d, wType, dist) # 3.2 decide whether local model or mixed model if x_glob is None: # local model #if mType == 0: #mType == 0 or #gwrMod_a = GWR_Gaussian_Base(y, x_loc, weit_a) #gwrMod_b = GWR_Gaussian_Base(y, x_loc, weit_b) #gwrMod_c = GWR_Gaussian_Base(y, x_loc, weit_c) #gwrMod_d = GWR_Gaussian_Base(y, x_loc, weit_d) #else: gwrMod_a = GWGLM_Base(y, x_loc, weit_a, mType, y_off) gwrMod_b = GWGLM_Base(y, x_loc, weit_b, mType, y_off) gwrMod_c = GWGLM_Base(y, x_loc, weit_c, mType, y_off) gwrMod_d = GWGLM_Base(y, x_loc, weit_d, mType, y_off) else: # mixed model gwrMod_a = semiGWR_Base(y, x_glob, x_loc, weit_a, mType, y_off) gwrMod_b = semiGWR_Base(y, x_glob, x_loc, weit_b, mType, y_off) gwrMod_c = semiGWR_Base(y, x_glob, x_loc, weit_c, mType, y_off) gwrMod_d = semiGWR_Base(y, x_glob, x_loc, weit_d, mType, y_off) # 3.3 get diagnostic value(0: AICc, 1: AIC, 2: BIC, 3: CV) if mType == 0: #or mType == 3 f_a = getDiag_GWR[criterion](gwrMod_a) f_b = getDiag_GWR[criterion](gwrMod_b) f_c = getDiag_GWR[criterion](gwrMod_c) f_d = getDiag_GWR[criterion](gwrMod_d) else: f_a = getDiag_GWGLM[criterion](gwrMod_a) f_b = getDiag_GWGLM[criterion](gwrMod_b) f_c = getDiag_GWGLM[criterion](gwrMod_c) f_d = getDiag_GWGLM[criterion](gwrMod_d) #print "a: %.3f, b: %.3f, c: %.3f, d: %.3f" % (a, b, c, d) # determine next triple if f_b <= f_d: # current optimal bandwidth opt_weit = weit_b opt_band = b opt_cri = f_b c = d d = b b = a + lamda * abs(c - a) if wType == 1 or wType == 3: # bandwidth is nn b = round(b, 0) else: # current optimal bandwidth opt_weit = weit_d opt_band = d opt_cri = f_d a = b b = d d = c - lamda * abs(c - a) if wType == 1 or wType == 3: # bandwidth is nn d = round(d, 0) output.append((opt_band, opt_cri)) # determine diff diff = f_b - f_d #opt_cri - pre_opt pre_opt = opt_cri #print "diff: %.6f" % (diff) return opt_band, opt_weit, output
def f_Interval(y, x_glob, x_loc, y_off, coords, mType, wType, criterion, maxVal, minVal, interval,flag=0): """ Interval search, using interval as stepsize Arguments ---------- y : array n*1, dependent variable. x_glob : array n*k1, fixed independent variable. x_local : array n*k2, local independent variable, including constant. y_off : array n*1, offset variable for Poisson model coords : dictionary including (x,y) coordinates involved in the weight evaluation (including point i) mType : integer GWR model type, 0: M_Gaussian, 1: M_Poisson, 2: Logistic wType : integer kernel type, 0: fix_Gaussian, 1: adap_Gaussian, 2: fix_Bisquare, 3: adap_Bisquare criterion : integer bandwidth selection criterion, 0: AICc, 1: AIC, 2: BIC, 3: CV maxVal : float maximum value used in bandwidth searching minVal : float minimum value used in bandwidth searching interval : float interval used in interval search flag : integer distance type Return: opt_band : float optimal bandwidth opt_weit : kernel optimal kernel output : list of tuple report searching process, keep bandwidth and score, [(bandwidth, score),(bandwidth, score),...] """ dist = Kernel.get_pairDist(coords,flag=0) #get pairwise distance between points a = minVal c = maxVal # add codes to check whether a and c are valid #------------------------------------------------------------ if wType == 1 or wType == 3: # bandwidth is nn a = int(a) c = int(c) output = [] # 1 get initial b value b = a + interval #distance or nn based on wType if wType == 1 or wType == 3: # bandwidth is nn b = int(b) # 2 create weight weit_a = Kernel.GWR_W(coords, a, wType, dist) weit_c = Kernel.GWR_W(coords, c, wType, dist) # 3 create model if x_glob is None: # local model #if mType == 3: #gwrMod_a = GWR_Gaussian(y, x_loc, weit_a) #gwrMod_c = GWR_Gaussian(y, x_loc, weit_c) #else: gwrMod_a = GWGLM_Base(y, x_loc, weit_a, mType, y_off) gwrMod_c = GWGLM_Base(y, x_loc, weit_c, mType, y_off) else: # mixed model gwrMod_a = semiGWR_Base(y, x_glob, x_loc, weit_a, mType, y_off) gwrMod_c = semiGWR_Base(y, x_glob, x_loc, weit_c, mType, y_off) # 4 get diagnostic value if mType == 0:#or mType == 3 f_a = getDiag_GWR[criterion](gwrMod_a) f_c = getDiag_GWR[criterion](gwrMod_c) else: f_a = getDiag_GWGLM[criterion](gwrMod_a) f_c = getDiag_GWGLM[criterion](gwrMod_c) # 5 add to the output output.append((a,f_a)) output.append((c,f_c)) #print "bandwidth: %.3f, f value: %.6f" % (a, f_a) #print "bandwidth: %.3f, f value: %.6f" % (c, f_c) if f_a < f_c: opt_weit = weit_a opt_band = a opt_val = f_a else: opt_weit = weit_c opt_band = c opt_val = f_c while b < c: # model using bandwidth b weit_b = Kernel.GWR_W(coords, b, wType, dist) # local model if x_glob is None: # local model #if mType == 3: #gwrMod_b = GWR_Gaussian(y, x_loc, weit_b) #else: gwrMod_b = GWGLM_Base(y, x_loc, weit_b, mType, y_off) else: # mixed model gwrMod_b = semiGWR_Base(y, x_glob, x_loc, weit_b, mType, y_off) if mType == 0:#or mType == 3 f_b = getDiag_GWR[criterion](gwrMod_b) else: f_b = getDiag_GWGLM[criterion](gwrMod_b) #print "bandwidth: %.3f, f value: %.6f" % (b, f_b) # add output output.append((b,f_b)) # determine next triple if f_b < opt_val: opt_weit = weit_b opt_band = b opt_val = f_b # update b b = b + interval return opt_band,opt_weit, output
def f_Interval(y, x_glob, x_loc, y_off, coords, mType, wType, criterion, maxVal, minVal, interval, flag=0): """ Interval search, using interval as stepsize Arguments ---------- y : array n*1, dependent variable. x_glob : array n*k1, fixed independent variable. x_local : array n*k2, local independent variable, including constant. y_off : array n*1, offset variable for Poisson model coords : dictionary including (x,y) coordinates involved in the weight evaluation (including point i) mType : integer GWR model type, 0: M_Gaussian, 1: M_Poisson, 2: Logistic wType : integer kernel type, 0: fix_Gaussian, 1: adap_Gaussian, 2: fix_Bisquare, 3: adap_Bisquare criterion : integer bandwidth selection criterion, 0: AICc, 1: AIC, 2: BIC, 3: CV maxVal : float maximum value used in bandwidth searching minVal : float minimum value used in bandwidth searching interval : float interval used in interval search flag : integer distance type Return: opt_band : float optimal bandwidth opt_weit : kernel optimal kernel output : list of tuple report searching process, keep bandwidth and score, [(bandwidth, score),(bandwidth, score),...] """ dist = Kernel.get_pairDist(coords, flag=0) #get pairwise distance between points a = minVal c = maxVal # add codes to check whether a and c are valid #------------------------------------------------------------ if wType == 1 or wType == 3: # bandwidth is nn a = int(a) c = int(c) output = [] # 1 get initial b value b = a + interval #distance or nn based on wType if wType == 1 or wType == 3: # bandwidth is nn b = int(b) # 2 create weight weit_a = Kernel.GWR_W(coords, a, wType, dist) weit_c = Kernel.GWR_W(coords, c, wType, dist) # 3 create model if x_glob is None: # local model #if mType == 3: #gwrMod_a = GWR_Gaussian(y, x_loc, weit_a) #gwrMod_c = GWR_Gaussian(y, x_loc, weit_c) #else: gwrMod_a = GWGLM_Base(y, x_loc, weit_a, mType, y_off) gwrMod_c = GWGLM_Base(y, x_loc, weit_c, mType, y_off) else: # mixed model gwrMod_a = semiGWR_Base(y, x_glob, x_loc, weit_a, mType, y_off) gwrMod_c = semiGWR_Base(y, x_glob, x_loc, weit_c, mType, y_off) # 4 get diagnostic value if mType == 0: #or mType == 3 f_a = getDiag_GWR[criterion](gwrMod_a) f_c = getDiag_GWR[criterion](gwrMod_c) else: f_a = getDiag_GWGLM[criterion](gwrMod_a) f_c = getDiag_GWGLM[criterion](gwrMod_c) # 5 add to the output output.append((a, f_a)) output.append((c, f_c)) #print "bandwidth: %.3f, f value: %.6f" % (a, f_a) #print "bandwidth: %.3f, f value: %.6f" % (c, f_c) if f_a < f_c: opt_weit = weit_a opt_band = a opt_val = f_a else: opt_weit = weit_c opt_band = c opt_val = f_c while b < c: # model using bandwidth b weit_b = Kernel.GWR_W(coords, b, wType, dist) # local model if x_glob is None: # local model #if mType == 3: #gwrMod_b = GWR_Gaussian(y, x_loc, weit_b) #else: gwrMod_b = GWGLM_Base(y, x_loc, weit_b, mType, y_off) else: # mixed model gwrMod_b = semiGWR_Base(y, x_glob, x_loc, weit_b, mType, y_off) if mType == 0: #or mType == 3 f_b = getDiag_GWR[criterion](gwrMod_b) else: f_b = getDiag_GWGLM[criterion](gwrMod_b) #print "bandwidth: %.3f, f value: %.6f" % (b, f_b) # add output output.append((b, f_b)) # determine next triple if f_b < opt_val: opt_weit = weit_b opt_band = b opt_val = f_b # update b b = b + interval return opt_band, opt_weit, output
def varyTest(y, x_glob, x_loc, kernel, mType=0, y_off=None, criterion=0, orig_mod=None): """ Geographical variability test All the models use the same bandwidth Arguments ---------- y : array n*1, dependent variable. x_glob : array n*k1, fixed independent variable. x_local : array n*k2, local independent variable, including constant. kernel : dictionary including (x,y) coordinates involved in the weight evaluation (including point i) mType : integer GWR model type, 0: Gaussian, 1: Poisson, 2: Logistic y_off : array n*1, offset variable for Poisson model criterion : integer bandwidth selection criterion, 0: AICc, 1: AIC, 2: BIC, 3: CV orig_mod : object of GWR model original model Return: geoVary : list of tuple including each X variable and associated F statistics: [(x1,f_stat),(x2,f_stat),...()] """ nObs = len(y) # 1 original model if orig_mod is None: if x_glob is None: gwrMod_old = GWGLM_Base(y, x_loc, kernel, mType, y_off) else: gwrMod_old = semiGWR_Base(y, x_glob, x_loc, kernel, mType, y_off) else: gwrMod_old = orig_mod # 2 original statistics p_old = gwrMod_old.tr_S if mType == 0: cri_old = getDiag_GWR[criterion](gwrMod_old) dev_old = gwrMod_old.res2 else: cri_old = getDiag_GWGLM[criterion](gwrMod_old) dev_old = gwrMod_old.dev_res # 3 loop geoVary = [] nVar_loc = len(x_loc[0]) if nVar_loc > 0: # intercept is local variable by default for i in range(nVar_loc): tmpX = x_loc[:, i] # new x_loc xg = np.delete(x_loc, i, 1) # new x_glob if x_glob is None: xf = tmpX xf = np.reshape(xf, (-1, 1)) else: nVar_glob = len(x_glob[0]) xf = np.zeros(shape=(nObs, nVar_glob)) xf = x_glob tmpX = np.reshape(tmpX, (-1, 1)) xf = np.hstack((xf, tmpX)) # new model gwrMod_new = semiGWR_Base(y, xf, xg, kernel, mType, y_off) # new statistics p_new = gwrMod_new.tr_S if mType == 0: cri_new = getDiag_GWR[criterion](gwrMod_new) dev_new = gwrMod_new.res2 else: cri_new = getDiag_GWGLM[criterion](gwrMod_new) dev_new = gwrMod_new.dev_res # differentce diffp = p_old - p_new diffcri = cri_old - cri_new if mType == 0: df = nObs - p_old f = ((dev_new - dev_old) / diffp) / (dev_old / df) geoVary.append((f, diffp, df, diffcri)) else: diffdev = dev_old - dev_new geoVary.append((diffdev, diffp, diffcri)) return geoVary
def G2L(y, x_glob, x_loc, coords, mType=0, wType=3, y_off=None, orig_mod=None, criterion=0, bdinfo=0, band=0, maxVal=0.0, minVal=0.0, interval=0.0, tol=1.0e-2, maxIter=50): """ Variable selection: global to local Arguments ---------- y : array n*1, dependent variable. x_glob : array n*k1, fixed independent variable. x_loc : array n*k2, local independent variable, including constant. coords : dictionary including (x,y) coordinates involved in the weight evaluation (including point i) wType : integer weight type mType : integer GWR model type, 0: Gaussian, 1: Poisson, 2: Logistic y_off : array n*1, offset variable for Poisson model orig_mod : object of GWR model original model criterion : integer bandwidth selection criterion, 0: AICc, 1: AIC, 2: BIC, 3: CV bdinfo : integer bandwidth searching method: 0: golden search 1: interval 2: fixed single bandwidth band : float given bandwidth if bdinfo=2 maxVal : float maximum value used in bandwidth searching minVal : float minimum value used in bandwidth searching interval : float interval used in interval search tol : float tolerance used to determine convergence maxIter : integer maximum number of iteration if convergence cannot arrived to the tolerance Return: varsL : list, ids of local Xs varsG : list, ids of global Xs optband : list info of optimal bandwidth searching results optWeit : kernel kernel of best model optcri : float criterion value for optimal model """ nObs = len(y) nVars_glob = len(x_glob[0]) if x_loc is None: nVars_loc = 0 tmp_loc = np.zeros(shape=(nObs, 0)) else: nVars_loc = len(x_loc[0]) tmp_loc = np.zeros(shape=(nObs, nVars_loc)) tmp_loc = x_loc nVars = nVars_loc + nVars_glob optband = [] # loop flag = True # check whether is x moved to global if nVars_glob > 0: if orig_mod is None: # 1 set original model if x_loc is None: # global model gwrMod_old = GLM_Base(y, x_glob, mType, y_off) cri_old = getDiag_GLM[criterion](gwrMod_old) else: # should be mixed model# check original bandwidth if bdinfo == 0 or bdinfo == 1: # golden or interval search rs = M_selection.Band_Sel(y, x_glob, x_loc, coords, mType, y_off, wType, criterion, bdinfo, maxVal, minVal, interval, tol, maxIter) band = rs[0] weit = rs[1] optband.append(rs) else: # set original kernel weit = Kernel.GWR_W(coords, band, wType) optWeit = weit gwrMod_old = semiGWR_Base(y, x_glob, x_loc, weit, mType, y_off) # get original diagnostics if mType == 0: cri_old = getDiag_GWR[criterion](gwrMod_old) else: cri_old = getDiag_GWGLM[criterion](gwrMod_old) else: gwrMod_old = orig_mod weit = orig_mod.kernel optWeit = weit #print "original cri:" #print cri_old # 2 loop orilist = range(nVars_glob) # ids of original global Xs while flag: # until no improvement in one loop in orilist flag = False #print "original list:" #print orilist outlist = [] # ids of Xs from global to local n_currXs = len(orilist) # every time loop through orilist # set global x tmp_glob = np.zeros(shape=(nObs, 0)) for i in orilist: tmp_glob = np.hstack( (tmp_glob, np.reshape(x_glob[:, i], (-1, 1)))) for i in range(n_currXs): idx = orilist[i] #print i #print idx # try to remove ith x x_out = np.reshape(x_glob[:, idx], (-1, 1)) tmp_glob = np.delete(tmp_glob, i - len(outlist), 1) # get new x_loc tmp_loc = np.hstack((tmp_loc, x_out)) # new bandwidth if bdinfo == 0 or bdinfo == 1: # golden or interval search rs = M_selection.Band_Sel(y, tmp_glob, tmp_loc, coords, mType, y_off, wType, criterion, bdinfo, maxVal, minVal, interval, tol, maxIter) band = rs[0] weit = rs[1] optband.append(rs) else: # new kernel weit = Kernel.GWR_W(coords, band, wType) # decide whether is a local model if len(tmp_loc[0]) == nVars: # local model gwrMod_new = GWGLM_Base(y, tmp_loc, weit, mType, y_off) cri_new = getDiag_GWGLM[criterion](gwrMod_new) else: # should be mixed model gwrMod_new = semiGWR_Base(y, tmp_glob, tmp_loc, weit, mType, y_off) if mType == 0: # get diagnostics cri_new = getDiag_GWR[criterion](gwrMod_new) else: cri_new = getDiag_GWGLM[criterion](gwrMod_new) #print cri_new # check improvements if cri_new < cri_old: # move x from local to global outlist.append(idx) cri_old = cri_new # update criteria flag = True optWeit = weit else: tmp_loc = np.delete(tmp_loc, -1, 1) # move x back to local tmp_glob = np.hstack((x_out, tmp_glob)) orilist = list(set(orilist) - set(outlist)) #print "outlist:" #print outlist #print "old cri:" #print cri_old varsG = orilist varsL = list(set(range(nVars_glob)) - set(orilist)) return varsL, varsG, optband, optWeit, cri_old
def varyTest(y, x_glob, x_loc, kernel, mType=0, y_off=None, criterion=0, orig_mod=None): """ Geographical variability test All the models use the same bandwidth Arguments ---------- y : array n*1, dependent variable. x_glob : array n*k1, fixed independent variable. x_local : array n*k2, local independent variable, including constant. kernel : dictionary including (x,y) coordinates involved in the weight evaluation (including point i) mType : integer GWR model type, 0: Gaussian, 1: Poisson, 2: Logistic y_off : array n*1, offset variable for Poisson model criterion : integer bandwidth selection criterion, 0: AICc, 1: AIC, 2: BIC, 3: CV orig_mod : object of GWR model original model Return: geoVary : list of tuple including each X variable and associated F statistics: [(x1,f_stat),(x2,f_stat),...()] """ nObs = len(y) # 1 original model if orig_mod is None: if x_glob is None: gwrMod_old = GWGLM_Base(y, x_loc, kernel, mType, y_off) else: gwrMod_old = semiGWR_Base(y,x_glob,x_loc,kernel,mType,y_off) else: gwrMod_old = orig_mod # 2 original statistics p_old = gwrMod_old.tr_S if mType == 0 : cri_old = getDiag_GWR[criterion](gwrMod_old) dev_old = gwrMod_old.res2 else: cri_old = getDiag_GWGLM[criterion](gwrMod_old) dev_old = gwrMod_old.dev_res # 3 loop geoVary = [] nVar_loc = len(x_loc[0]) if nVar_loc > 0: # intercept is local variable by default for i in range(nVar_loc): tmpX = x_loc[:,i] # new x_loc xg = np.delete(x_loc, i, 1) # new x_glob if x_glob is None: xf = tmpX xf = np.reshape(xf,(-1,1)) else: nVar_glob = len(x_glob[0]) xf = np.zeros(shape=(nObs,nVar_glob)) xf = x_glob tmpX = np.reshape(tmpX,(-1,1)) xf = np.hstack((xf,tmpX)) # new model gwrMod_new = semiGWR_Base(y,xf,xg,kernel,mType,y_off) # new statistics p_new = gwrMod_new.tr_S if mType == 0 : cri_new = getDiag_GWR[criterion](gwrMod_new) dev_new = gwrMod_new.res2 else: cri_new = getDiag_GWGLM[criterion](gwrMod_new) dev_new = gwrMod_new.dev_res # differentce diffp = p_old - p_new diffcri = cri_old - cri_new if mType == 0: df = nObs - p_old f = ( (dev_new - dev_old) / diffp ) / ( dev_old/ df ) geoVary.append((f,diffp, df,diffcri)) else: diffdev = dev_old - dev_new geoVary.append((diffdev,diffp,diffcri)) return geoVary
def G2L(y, x_glob, x_loc, coords, mType=0, wType=3, y_off=None, orig_mod=None, criterion=0, bdinfo=0, band=0, maxVal=0.0, minVal=0.0, interval=0.0, tol=1.0e-2, maxIter=50): """ Variable selection: global to local Arguments ---------- y : array n*1, dependent variable. x_glob : array n*k1, fixed independent variable. x_loc : array n*k2, local independent variable, including constant. coords : dictionary including (x,y) coordinates involved in the weight evaluation (including point i) wType : integer weight type mType : integer GWR model type, 0: Gaussian, 1: Poisson, 2: Logistic y_off : array n*1, offset variable for Poisson model orig_mod : object of GWR model original model criterion : integer bandwidth selection criterion, 0: AICc, 1: AIC, 2: BIC, 3: CV bdinfo : integer bandwidth searching method: 0: golden search 1: interval 2: fixed single bandwidth band : float given bandwidth if bdinfo=2 maxVal : float maximum value used in bandwidth searching minVal : float minimum value used in bandwidth searching interval : float interval used in interval search tol : float tolerance used to determine convergence maxIter : integer maximum number of iteration if convergence cannot arrived to the tolerance Return: varsL : list, ids of local Xs varsG : list, ids of global Xs optband : list info of optimal bandwidth searching results optWeit : kernel kernel of best model optcri : float criterion value for optimal model """ nObs = len(y) nVars_glob = len(x_glob[0]) if x_loc is None: nVars_loc = 0 tmp_loc = np.zeros(shape=(nObs,0)) else: nVars_loc = len(x_loc[0]) tmp_loc = np.zeros(shape=(nObs,nVars_loc)) tmp_loc = x_loc nVars = nVars_loc + nVars_glob optband = [] # loop flag = True # check whether is x moved to global if nVars_glob > 0: if orig_mod is None: # 1 set original model if x_loc is None: # global model gwrMod_old = GLM_Base(y,x_glob,mType,y_off) cri_old = getDiag_GLM[criterion](gwrMod_old) else: # should be mixed model# check original bandwidth if bdinfo == 0 or bdinfo == 1: # golden or interval search rs = M_selection.Band_Sel(y, x_glob, x_loc, coords, mType, y_off, wType, criterion, bdinfo, maxVal, minVal, interval, tol, maxIter) band = rs[0] weit = rs[1] optband.append(rs) else: # set original kernel weit = Kernel.GWR_W(coords, band, wType) optWeit = weit gwrMod_old = semiGWR_Base(y,x_glob,x_loc,weit,mType,y_off) # get original diagnostics if mType == 0: cri_old = getDiag_GWR[criterion](gwrMod_old) else: cri_old = getDiag_GWGLM[criterion](gwrMod_old) else: gwrMod_old = orig_mod weit = orig_mod.kernel optWeit = weit #print "original cri:" #print cri_old # 2 loop orilist = range(nVars_glob) # ids of original global Xs while flag: # until no improvement in one loop in orilist flag = False #print "original list:" #print orilist outlist = [] # ids of Xs from global to local n_currXs = len(orilist) # every time loop through orilist # set global x tmp_glob = np.zeros(shape=(nObs,0)) for i in orilist: tmp_glob = np.hstack((tmp_glob,np.reshape(x_glob[:,i],(-1,1)))) for i in range(n_currXs): idx = orilist[i] #print i #print idx # try to remove ith x x_out = np.reshape(x_glob[:,idx],(-1,1)) tmp_glob = np.delete(tmp_glob, i-len(outlist), 1) # get new x_loc tmp_loc = np.hstack((tmp_loc,x_out)) # new bandwidth if bdinfo == 0 or bdinfo == 1: # golden or interval search rs = M_selection.Band_Sel(y, tmp_glob, tmp_loc, coords, mType, y_off, wType, criterion, bdinfo, maxVal, minVal, interval, tol, maxIter) band = rs[0] weit = rs[1] optband.append(rs) else: # new kernel weit = Kernel.GWR_W(coords, band, wType) # decide whether is a local model if len(tmp_loc[0]) == nVars: # local model gwrMod_new = GWGLM_Base(y,tmp_loc,weit,mType,y_off) cri_new = getDiag_GWGLM[criterion](gwrMod_new) else: # should be mixed model gwrMod_new = semiGWR_Base(y,tmp_glob,tmp_loc,weit,mType,y_off) if mType == 0:# get diagnostics cri_new = getDiag_GWR[criterion](gwrMod_new) else: cri_new = getDiag_GWGLM[criterion](gwrMod_new) #print cri_new # check improvements if cri_new < cri_old: # move x from local to global outlist.append(idx) cri_old = cri_new # update criteria flag = True optWeit = weit else: tmp_loc = np.delete(tmp_loc, -1, 1) # move x back to local tmp_glob = np.hstack((x_out,tmp_glob)) orilist = list(set(orilist) - set(outlist)) #print "outlist:" #print outlist #print "old cri:" #print cri_old varsG = orilist varsL = list(set(range(nVars_glob)) - set(orilist)) return varsL, varsG, optband, optWeit, cri_old