def get_x_lag(self, w, regimes_att): if regimes_att: xlag = weights.lag_spatial(w, regimes_att['x']) xlag = REGI.Regimes_Frame.__init__(self, xlag, regimes_att['regimes'], constant_regi=None, cols2regi=regimes_att['cols2regi'])[0] xlag = xlag.toarray() else: xlag = weights.lag_spatial(w, self.x) return xlag
def _calc(self, Y, w, k): wY = weights.lag_spatial(w, Y) dx = Y[:, -1] - Y[:, 0] dy = wY[:, -1] - wY[:, 0] self.wY = wY self.Y = Y r = np.sqrt(dx * dx + dy * dy) theta = np.arctan2(dy, dx) neg = theta < 0.0 utheta = theta * (1 - neg) + neg * (2 * np.pi + theta) counts, bins = np.histogram(utheta, self.cuts) results = {} results['counts'] = counts results['theta'] = theta results['bins'] = bins results['r'] = r results['lag'] = wY results['dx'] = dx results['dy'] = dy return results
def _calc(self, Y, w, k): wY = weights.lag_spatial(w, Y) dx = Y[:, -1] - Y[:,0] dy = wY[:, -1] - wY[:, 0] self.wY = wY self.Y = Y r = np.sqrt(dx*dx + dy*dy) theta = np.arctan2(dy, dx) neg = theta < 0.0 utheta = theta * (1 - neg) + neg * (2 *np.pi + theta) counts, bins = np.histogram(utheta, self.cuts) results = {} results['counts'] = counts results['theta'] = theta results['bins' ] = bins results['r'] = r results['lag'] = wY results['dx'] = dx results['dy'] = dy return results
def __init__(self, y, x, w, method='full', epsilon=0.0000001): # set up main regression variables and spatial filters self.y = y self.x = x self.n, self.k = self.x.shape self.method = method self.epsilon = epsilon #W = w.full()[0] #Wsp = w.sparse ylag = weights.lag_spatial(w, y) # b0, b1, e0 and e1 xtx = spdot(self.x.T, self.x) xtxi = la.inv(xtx) xty = spdot(self.x.T, self.y) xtyl = spdot(self.x.T, ylag) b0 = spdot(xtxi, xty) b1 = spdot(xtxi, xtyl) e0 = self.y - spdot(x, b0) e1 = ylag - spdot(x, b1) methodML = method.upper() # call minimizer using concentrated log-likelihood to get rho if methodML in ['FULL', 'LU', 'ORD']: if methodML == 'FULL': W = w.full()[0] # moved here res = minimize_scalar(lag_c_loglik, 0.0, bounds=(-1.0, 1.0), args=( self.n, e0, e1, W), method='bounded', tol=epsilon) elif methodML == 'LU': I = sp.identity(w.n) Wsp = w.sparse # moved here W = Wsp res = minimize_scalar(lag_c_loglik_sp, 0.0, bounds=(-1.0,1.0), args=(self.n, e0, e1, I, Wsp), method='bounded', tol=epsilon) elif methodML == 'ORD': # check on symmetry structure if w.asymmetry(intrinsic=False) == []: ww = symmetrize(w) WW = np.array(ww.todense()) evals = la.eigvalsh(WW) W = WW else: W = w.full()[0] # moved here evals = la.eigvals(W) res = minimize_scalar(lag_c_loglik_ord, 0.0, bounds=(-1.0, 1.0), args=( self.n, e0, e1, evals), method='bounded', tol=epsilon) else: # program will crash, need to catch print(("{0} is an unsupported method".format(methodML))) self = None return self.rho = res.x[0][0] # compute full log-likelihood, including constants ln2pi = np.log(2.0 * np.pi) llik = -res.fun - self.n / 2.0 * ln2pi - self.n / 2.0 self.logll = llik[0][0] # b, residuals and predicted values b = b0 - self.rho * b1 self.betas = np.vstack((b, self.rho)) # rho added as last coefficient self.u = e0 - self.rho * e1 self.predy = self.y - self.u xb = spdot(x, b) self.predy_e = inverse_prod( w.sparse, xb, self.rho, inv_method="power_exp", threshold=epsilon) self.e_pred = self.y - self.predy_e # residual variance self._cache = {} self.sig2 = self.sig2n # no allowance for division by n-k # information matrix # if w should be kept sparse, how can we do the following: a = -self.rho * W spfill_diagonal(a, 1.0) ai = spinv(a) wai = spdot(W, ai) tr1 = wai.diagonal().sum() #same for sparse and dense wai2 = spdot(wai, wai) tr2 = wai2.diagonal().sum() waiTwai = spdot(wai.T, wai) tr3 = waiTwai.diagonal().sum() ### to here wpredy = weights.lag_spatial(w, self.predy_e) wpyTwpy = spdot(wpredy.T, wpredy) xTwpy = spdot(x.T, wpredy) # order of variables is beta, rho, sigma2 v1 = np.vstack( (xtx / self.sig2, xTwpy.T / self.sig2, np.zeros((1, self.k)))) v2 = np.vstack( (xTwpy / self.sig2, tr2 + tr3 + wpyTwpy / self.sig2, tr1 / self.sig2)) v3 = np.vstack( (np.zeros((self.k, 1)), tr1 / self.sig2, self.n / (2.0 * self.sig2 ** 2))) v = np.hstack((v1, v2, v3)) self.vm1 = la.inv(v) # vm1 includes variance for sigma2 self.vm = self.vm1[:-1, :-1] # vm is for coefficients only
def __init__(self, y, x, w, method='full', epsilon=0.0000001, regimes_att=None): # set up main regression variables and spatial filters self.y = y if regimes_att: self.x = x.toarray() else: self.x = x self.n, self.k = self.x.shape self.method = method self.epsilon = epsilon #W = w.full()[0] #wait to build pending what is needed #Wsp = w.sparse ylag = weights.lag_spatial(w, self.y) xlag = self.get_x_lag(w, regimes_att) # call minimizer using concentrated log-likelihood to get lambda methodML = method.upper() if methodML in ['FULL', 'LU', 'ORD']: if methodML == 'FULL': W = w.full()[0] # need dense here res = minimize_scalar(err_c_loglik, 0.0, bounds=(-1.0, 1.0), args=(self.n, self.y, ylag, self.x, xlag, W), method='bounded', tol=epsilon) elif methodML == 'LU': I = sp.identity(w.n) Wsp = w.sparse # need sparse here res = minimize_scalar(err_c_loglik_sp, 0.0, bounds=(-1.0, 1.0), args=(self.n, self.y, ylag, self.x, xlag, I, Wsp), method='bounded', tol=epsilon) W = Wsp elif methodML == 'ORD': # check on symmetry structure if w.asymmetry(intrinsic=False) == []: ww = symmetrize(w) WW = np.array(ww.todense()) evals = la.eigvalsh(WW) W = WW else: W = w.full()[0] # need dense here evals = la.eigvals(W) res = minimize_scalar(err_c_loglik_ord, 0.0, bounds=(-1.0, 1.0), args=(self.n, self.y, ylag, self.x, xlag, evals), method='bounded', tol=epsilon) else: raise Exception("{0} is an unsupported method".format(method)) self.lam = res.x # compute full log-likelihood, including constants ln2pi = np.log(2.0 * np.pi) llik = -res.fun - self.n / 2.0 * ln2pi - self.n / 2.0 self.logll = llik # b, residuals and predicted values ys = self.y - self.lam * ylag xs = self.x - self.lam * xlag xsxs = np.dot(xs.T, xs) xsxsi = np.linalg.inv(xsxs) xsys = np.dot(xs.T, ys) b = np.dot(xsxsi, xsys) self.betas = np.vstack((b, self.lam)) self.u = y - np.dot(self.x, b) self.predy = self.y - self.u # residual variance self.e_filtered = self.u - self.lam * weights.lag_spatial(w, self.u) self.sig2 = np.dot(self.e_filtered.T, self.e_filtered) / self.n # variance-covariance matrix betas varb = self.sig2 * xsxsi # variance-covariance matrix lambda, sigma a = -self.lam * W spfill_diagonal(a, 1.0) ai = spinv(a) wai = spdot(W, ai) tr1 = wai.diagonal().sum() wai2 = spdot(wai, wai) tr2 = wai2.diagonal().sum() waiTwai = spdot(wai.T, wai) tr3 = waiTwai.diagonal().sum() v1 = np.vstack((tr2 + tr3, tr1 / self.sig2)) v2 = np.vstack((tr1 / self.sig2, self.n / (2.0 * self.sig2**2))) v = np.hstack((v1, v2)) self.vm1 = np.linalg.inv(v) # create variance matrix for beta, lambda vv = np.hstack((varb, np.zeros((self.k, 1)))) vv1 = np.hstack((np.zeros((1, self.k)), self.vm1[0, 0] * np.ones( (1, 1)))) self.vm = np.vstack((vv, vv1))
sa_df = merged.filter([week,'REG_NAME','geometry'], axis=1) #print(week + "Dataframe:", sa_df) # Create output path and files output_path = r"C:\path_to_folder\Maps" filepath = os.path.join(output_path, week+'.png') #Spatial Weights - select one #w = weights.Queen.from_dataframe(sa_df, idVariable="region_name") # Queen Contiguity Matrix #w = weights.Rook.from_dataframe(sa_df, idVariable="region_name") # Rook contiguity Matrix w = weights.distance.KNN.from_dataframe(sa_df, ids="REG_NAME", k=6) # K-Nearest Neighbors w.transform = "R" sa_df["lag_infections"] = weights.lag_spatial(w, sa_df[week]) # Global spatial autocorrelation y = sa_df[week] moran = Moran(y, w) # Local spatial autocorrelation m_local = Moran_Local(y, w) lisa = m_local.Is # set CRS sa_df = sa_df.to_crs("EPSG:3857") #Plot map fig, ax = plt.subplots(figsize=(9,9))
# where $Y$ is a Nx1 vector with the values of the variable. Recall that the product of a matrix and a vector equals the sum of a row by column element multiplication for the resulting value of a given row. In terms of the spatial lag: # # $$ # y_{sl-i} = \displaystyle \sum_j w_{ij} y_j # $$ # # If we are using row-standardized weights, $w_{ij}$ becomes a proportion between zero and one, and $y_{sl-i}$ can be seen as the average value of $Y$ in the neighborhood of $i$. # # The spatial lag is a key element of many spatial analysis techniques, as we will see later on and, as such, it is fully supported in `PySAL`. To compute the spatial lag of a given variable, `imd_score` for example: # In[62]: # Row-standardize the queen matrix w_queen.transform = 'R' # Compute spatial lag of `imd_score` w_queen_score = weights.lag_spatial(w_queen, imd['imd_score']) # Print the first five elements w_queen_score[:5] # Line 4 contains the actual computation, which is highly optimized in `PySAL`. Note that, despite passing in a `pd.Series` object, the output is a `numpy` array. This however, can be added directly to the table `imd`: # In[63]: imd['w_queen_score'] = w_queen_score # --- # # **[Optional exercise]** # # Explore the spatial lag of `w_queen_score` by constructing a density/histogram plot similar to those created in Lab 2. Compare these with one for `imd_score`. What differences can you tell? #
def __init__(self, y, x, w, method='full', epsilon=0.0000001, regimes_att=None): # set up main regression variables and spatial filters self.y = y if regimes_att: self.x = x.toarray() else: self.x = x self.n, self.k = self.x.shape self.method = method self.epsilon = epsilon #W = w.full()[0] #wait to build pending what is needed #Wsp = w.sparse ylag = weights.lag_spatial(w, self.y) xlag = self.get_x_lag(w, regimes_att) # call minimizer using concentrated log-likelihood to get lambda methodML = method.upper() if methodML in ['FULL', 'LU', 'ORD']: if methodML == 'FULL': W = w.full()[0] # need dense here res = minimize_scalar(err_c_loglik, 0.0, bounds=(-1.0, 1.0), args=(self.n, self.y, ylag, self.x, xlag, W), method='bounded', tol=epsilon) elif methodML == 'LU': I = sp.identity(w.n) Wsp = w.sparse # need sparse here res = minimize_scalar(err_c_loglik_sp, 0.0, bounds=(-1.0,1.0), args=(self.n, self.y, ylag, self.x, xlag, I, Wsp), method='bounded', tol=epsilon) W = Wsp elif methodML == 'ORD': # check on symmetry structure if w.asymmetry(intrinsic=False) == []: ww = symmetrize(w) WW = np.array(ww.todense()) evals = la.eigvalsh(WW) W = WW else: W = w.full()[0] # need dense here evals = la.eigvals(W) res = minimize_scalar( err_c_loglik_ord, 0.0, bounds=(-1.0, 1.0), args=(self.n, self.y, ylag, self.x, xlag, evals), method='bounded', tol=epsilon) else: raise Exception("{0} is an unsupported method".format(method)) self.lam = res.x # compute full log-likelihood, including constants ln2pi = np.log(2.0 * np.pi) llik = -res.fun - self.n / 2.0 * ln2pi - self.n / 2.0 self.logll = llik # b, residuals and predicted values ys = self.y - self.lam * ylag xs = self.x - self.lam * xlag xsxs = np.dot(xs.T, xs) xsxsi = np.linalg.inv(xsxs) xsys = np.dot(xs.T, ys) b = np.dot(xsxsi, xsys) self.betas = np.vstack((b, self.lam)) self.u = y - np.dot(self.x, b) self.predy = self.y - self.u # residual variance self.e_filtered = self.u - self.lam * weights.lag_spatial(w, self.u) self.sig2 = np.dot(self.e_filtered.T, self.e_filtered) / self.n # variance-covariance matrix betas varb = self.sig2 * xsxsi # variance-covariance matrix lambda, sigma a = -self.lam * W spfill_diagonal(a, 1.0) ai = spinv(a) wai = spdot(W, ai) tr1 = wai.diagonal().sum() wai2 = spdot(wai, wai) tr2 = wai2.diagonal().sum() waiTwai = spdot(wai.T, wai) tr3 = waiTwai.diagonal().sum() v1 = np.vstack((tr2 + tr3, tr1 / self.sig2)) v2 = np.vstack((tr1 / self.sig2, self.n / (2.0 * self.sig2 ** 2))) v = np.hstack((v1, v2)) self.vm1 = np.linalg.inv(v) # create variance matrix for beta, lambda vv = np.hstack((varb, np.zeros((self.k, 1)))) vv1 = np.hstack( (np.zeros((1, self.k)), self.vm1[0, 0] * np.ones((1, 1)))) self.vm = np.vstack((vv, vv1))
# Now, because we have row-standardize them, the weight given to each of the four neighbors is 0.2 which, all together, sum up to one. # In[ ]: w['E08000012'] # ### Spatial lag # # Once we have the data and the spatial weights matrix ready, we can start by computing the spatial lag of the percentage of votes that went to leave the EU. Remember the spatial lag is the product of the spatial weights matrix and a given variable and that, if $W$ is row-standardized, the result amounts to the average value of the variable in the neighborhood of each observation. # # We can calculate the spatial lag for the variable `Pct_Leave` and store it directly in the main table with the following line of code: # In[ ]: br['w_Pct_Leave'] = weights.lag_spatial(w, br['Pct_Leave']) # Let us have a quick look at the resulting variable, as compared to the original one: # In[ ]: br[['LAD14NM', 'Pct_Leave', 'w_Pct_Leave']].head() # The way to interpret the spatial lag (`w_Pct_Leave`) for say the first observation is as follow: Hartlepool, where 69,6% of the electorate voted to leave is surrounded by neighbouring local authorities where, on average, almost 62% of the electorate also voted to leave the EU. For the purpose of illustration, we can in fact check this is correct by querying the spatial weights matrix to find out Hartepool's neighbors: # In[ ]: w.neighbors['E06000001'] # And then checking their values: