Beispiel #1
0
 def get_x_lag(self, w, regimes_att):
     if regimes_att:
         xlag = weights.lag_spatial(w, regimes_att['x'])
         xlag = REGI.Regimes_Frame.__init__(self, xlag,
                                            regimes_att['regimes'], constant_regi=None, cols2regi=regimes_att['cols2regi'])[0]
         xlag = xlag.toarray()
     else:
         xlag = weights.lag_spatial(w, self.x)
     return xlag
Beispiel #2
0
 def _calc(self, Y, w, k):
     wY = weights.lag_spatial(w, Y)
     dx = Y[:, -1] - Y[:, 0]
     dy = wY[:, -1] - wY[:, 0]
     self.wY = wY
     self.Y = Y
     r = np.sqrt(dx * dx + dy * dy)
     theta = np.arctan2(dy, dx)
     neg = theta < 0.0
     utheta = theta * (1 - neg) + neg * (2 * np.pi + theta)
     counts, bins = np.histogram(utheta, self.cuts)
     results = {}
     results['counts'] = counts
     results['theta'] = theta
     results['bins'] = bins
     results['r'] = r
     results['lag'] = wY
     results['dx'] = dx
     results['dy'] = dy
     return results
Beispiel #3
0
 def _calc(self, Y, w, k):
     wY = weights.lag_spatial(w, Y)
     dx = Y[:, -1] - Y[:,0]
     dy = wY[:, -1] - wY[:, 0]
     self.wY = wY
     self.Y = Y
     r = np.sqrt(dx*dx + dy*dy)
     theta = np.arctan2(dy, dx)
     neg = theta < 0.0
     utheta = theta * (1 - neg) + neg * (2 *np.pi + theta)
     counts, bins = np.histogram(utheta, self.cuts)
     results = {}
     results['counts'] = counts
     results['theta'] = theta
     results['bins' ] = bins
     results['r'] = r
     results['lag'] = wY
     results['dx'] = dx
     results['dy'] = dy
     return results
Beispiel #4
0
    def __init__(self, y, x, w, method='full', epsilon=0.0000001):
        # set up main regression variables and spatial filters
        self.y = y
        self.x = x
        self.n, self.k = self.x.shape
        self.method = method
        self.epsilon = epsilon
        #W = w.full()[0]
        #Wsp = w.sparse
        ylag = weights.lag_spatial(w, y)
        # b0, b1, e0 and e1
        xtx = spdot(self.x.T, self.x)
        xtxi = la.inv(xtx)
        xty = spdot(self.x.T, self.y)
        xtyl = spdot(self.x.T, ylag)
        b0 = spdot(xtxi, xty)
        b1 = spdot(xtxi, xtyl)
        e0 = self.y - spdot(x, b0)
        e1 = ylag - spdot(x, b1)
        methodML = method.upper()
        # call minimizer using concentrated log-likelihood to get rho
        if methodML in ['FULL', 'LU', 'ORD']:
            if methodML == 'FULL':
                W = w.full()[0]     # moved here
                res = minimize_scalar(lag_c_loglik, 0.0, bounds=(-1.0, 1.0),
                                      args=(
                                          self.n, e0, e1, W), method='bounded',
                                      tol=epsilon)
            elif methodML == 'LU':
                I = sp.identity(w.n)
                Wsp = w.sparse  # moved here
                W = Wsp
                res = minimize_scalar(lag_c_loglik_sp, 0.0, bounds=(-1.0,1.0),
                                      args=(self.n, e0, e1, I, Wsp),
                                      method='bounded', tol=epsilon)
            elif methodML == 'ORD':
                # check on symmetry structure
                if w.asymmetry(intrinsic=False) == []:
                    ww = symmetrize(w)
                    WW = np.array(ww.todense())
                    evals = la.eigvalsh(WW)
                    W = WW
                else:
                    W = w.full()[0]     # moved here
                    evals = la.eigvals(W)
                res = minimize_scalar(lag_c_loglik_ord, 0.0, bounds=(-1.0, 1.0),
                                      args=(
                                          self.n, e0, e1, evals), method='bounded',
                                      tol=epsilon)
        else:
            # program will crash, need to catch
            print(("{0} is an unsupported method".format(methodML)))
            self = None
            return

        self.rho = res.x[0][0]

        # compute full log-likelihood, including constants
        ln2pi = np.log(2.0 * np.pi)
        llik = -res.fun - self.n / 2.0 * ln2pi - self.n / 2.0
        self.logll = llik[0][0]

        # b, residuals and predicted values

        b = b0 - self.rho * b1
        self.betas = np.vstack((b, self.rho))   # rho added as last coefficient
        self.u = e0 - self.rho * e1
        self.predy = self.y - self.u

        xb = spdot(x, b)

        self.predy_e = inverse_prod(
            w.sparse, xb, self.rho, inv_method="power_exp", threshold=epsilon)
        self.e_pred = self.y - self.predy_e

        # residual variance
        self._cache = {}
        self.sig2 = self.sig2n  # no allowance for division by n-k

        # information matrix
        # if w should be kept sparse, how can we do the following:
        a = -self.rho * W
        spfill_diagonal(a, 1.0)
        ai = spinv(a)
        wai = spdot(W, ai)
        tr1 = wai.diagonal().sum() #same for sparse and dense

        wai2 = spdot(wai, wai)
        tr2 = wai2.diagonal().sum()

        waiTwai = spdot(wai.T, wai)
        tr3 = waiTwai.diagonal().sum()
        ### to here

        wpredy = weights.lag_spatial(w, self.predy_e)
        wpyTwpy = spdot(wpredy.T, wpredy)
        xTwpy = spdot(x.T, wpredy)

        # order of variables is beta, rho, sigma2

        v1 = np.vstack(
            (xtx / self.sig2, xTwpy.T / self.sig2, np.zeros((1, self.k))))
        v2 = np.vstack(
            (xTwpy / self.sig2, tr2 + tr3 + wpyTwpy / self.sig2, tr1 / self.sig2))
        v3 = np.vstack(
            (np.zeros((self.k, 1)), tr1 / self.sig2, self.n / (2.0 * self.sig2 ** 2)))

        v = np.hstack((v1, v2, v3))

        self.vm1 = la.inv(v)  # vm1 includes variance for sigma2
        self.vm = self.vm1[:-1, :-1]  # vm is for coefficients only
Beispiel #5
0
    def __init__(self,
                 y,
                 x,
                 w,
                 method='full',
                 epsilon=0.0000001,
                 regimes_att=None):
        # set up main regression variables and spatial filters
        self.y = y
        if regimes_att:
            self.x = x.toarray()
        else:
            self.x = x
        self.n, self.k = self.x.shape
        self.method = method
        self.epsilon = epsilon

        #W = w.full()[0] #wait to build pending what is needed
        #Wsp = w.sparse

        ylag = weights.lag_spatial(w, self.y)
        xlag = self.get_x_lag(w, regimes_att)

        # call minimizer using concentrated log-likelihood to get lambda
        methodML = method.upper()
        if methodML in ['FULL', 'LU', 'ORD']:
            if methodML == 'FULL':
                W = w.full()[0]  # need dense here
                res = minimize_scalar(err_c_loglik,
                                      0.0,
                                      bounds=(-1.0, 1.0),
                                      args=(self.n, self.y, ylag, self.x, xlag,
                                            W),
                                      method='bounded',
                                      tol=epsilon)
            elif methodML == 'LU':
                I = sp.identity(w.n)
                Wsp = w.sparse  # need sparse here
                res = minimize_scalar(err_c_loglik_sp,
                                      0.0,
                                      bounds=(-1.0, 1.0),
                                      args=(self.n, self.y, ylag, self.x, xlag,
                                            I, Wsp),
                                      method='bounded',
                                      tol=epsilon)
                W = Wsp
            elif methodML == 'ORD':
                # check on symmetry structure
                if w.asymmetry(intrinsic=False) == []:
                    ww = symmetrize(w)
                    WW = np.array(ww.todense())
                    evals = la.eigvalsh(WW)
                    W = WW
                else:
                    W = w.full()[0]  # need dense here
                    evals = la.eigvals(W)
                res = minimize_scalar(err_c_loglik_ord,
                                      0.0,
                                      bounds=(-1.0, 1.0),
                                      args=(self.n, self.y, ylag, self.x, xlag,
                                            evals),
                                      method='bounded',
                                      tol=epsilon)
        else:
            raise Exception("{0} is an unsupported method".format(method))

        self.lam = res.x

        # compute full log-likelihood, including constants
        ln2pi = np.log(2.0 * np.pi)
        llik = -res.fun - self.n / 2.0 * ln2pi - self.n / 2.0

        self.logll = llik

        # b, residuals and predicted values

        ys = self.y - self.lam * ylag
        xs = self.x - self.lam * xlag
        xsxs = np.dot(xs.T, xs)
        xsxsi = np.linalg.inv(xsxs)
        xsys = np.dot(xs.T, ys)
        b = np.dot(xsxsi, xsys)

        self.betas = np.vstack((b, self.lam))

        self.u = y - np.dot(self.x, b)
        self.predy = self.y - self.u

        # residual variance

        self.e_filtered = self.u - self.lam * weights.lag_spatial(w, self.u)
        self.sig2 = np.dot(self.e_filtered.T, self.e_filtered) / self.n

        # variance-covariance matrix betas

        varb = self.sig2 * xsxsi

        # variance-covariance matrix lambda, sigma

        a = -self.lam * W
        spfill_diagonal(a, 1.0)
        ai = spinv(a)
        wai = spdot(W, ai)
        tr1 = wai.diagonal().sum()

        wai2 = spdot(wai, wai)
        tr2 = wai2.diagonal().sum()

        waiTwai = spdot(wai.T, wai)
        tr3 = waiTwai.diagonal().sum()

        v1 = np.vstack((tr2 + tr3, tr1 / self.sig2))
        v2 = np.vstack((tr1 / self.sig2, self.n / (2.0 * self.sig2**2)))

        v = np.hstack((v1, v2))

        self.vm1 = np.linalg.inv(v)

        # create variance matrix for beta, lambda
        vv = np.hstack((varb, np.zeros((self.k, 1))))
        vv1 = np.hstack((np.zeros((1, self.k)), self.vm1[0, 0] * np.ones(
            (1, 1))))

        self.vm = np.vstack((vv, vv1))
Beispiel #6
0
    sa_df = merged.filter([week,'REG_NAME','geometry'], axis=1)
    #print(week + "Dataframe:", sa_df)    

    # Create output path and files
    output_path = r"C:\path_to_folder\Maps"
    filepath = os.path.join(output_path, week+'.png')    


    #Spatial Weights - select one
    #w = weights.Queen.from_dataframe(sa_df, idVariable="region_name") # Queen Contiguity Matrix
    #w = weights.Rook.from_dataframe(sa_df, idVariable="region_name")  # Rook contiguity Matrix
    w = weights.distance.KNN.from_dataframe(sa_df, ids="REG_NAME", k=6) # K-Nearest Neighbors

    w.transform = "R"
    
    sa_df["lag_infections"] = weights.lag_spatial(w, sa_df[week])
    
    # Global spatial autocorrelation
    y = sa_df[week]
    moran = Moran(y, w)
    
    # Local spatial autocorrelation
    m_local = Moran_Local(y, w)
    lisa = m_local.Is
    
    
    # set CRS
    sa_df = sa_df.to_crs("EPSG:3857")

    #Plot map
    fig, ax = plt.subplots(figsize=(9,9))
Beispiel #7
0
# where $Y$ is a Nx1 vector with the values of the variable. Recall that the product of a matrix and a vector equals the sum of a row by column element multiplication for the resulting value of a given row. In terms of the spatial lag:
#
# $$
# y_{sl-i} = \displaystyle \sum_j w_{ij} y_j
# $$
#
# If we are using row-standardized weights, $w_{ij}$ becomes a proportion between zero and one, and $y_{sl-i}$ can be seen as the average value of $Y$ in the neighborhood of $i$.
#
# The spatial lag is a key element of many spatial analysis techniques, as we will see later on and, as such, it is fully supported in `PySAL`. To compute the spatial lag of a given variable, `imd_score` for example:

# In[62]:

# Row-standardize the queen matrix
w_queen.transform = 'R'
# Compute spatial lag of `imd_score`
w_queen_score = weights.lag_spatial(w_queen, imd['imd_score'])
# Print the first five elements
w_queen_score[:5]

# Line 4 contains the actual computation, which is highly optimized in `PySAL`. Note that, despite passing in a `pd.Series` object, the output is a `numpy` array. This however, can be added directly to the table `imd`:

# In[63]:

imd['w_queen_score'] = w_queen_score

# ---
#
# **[Optional exercise]**
#
# Explore the spatial lag of `w_queen_score` by constructing a density/histogram plot similar to those created in Lab 2. Compare these with one for `imd_score`. What differences can you tell?
#
Beispiel #8
0
    def __init__(self, y, x, w, method='full', epsilon=0.0000001, regimes_att=None):
        # set up main regression variables and spatial filters
        self.y = y
        if regimes_att:
            self.x = x.toarray()
        else:
            self.x = x
        self.n, self.k = self.x.shape
        self.method = method
        self.epsilon = epsilon

        #W = w.full()[0] #wait to build pending what is needed
        #Wsp = w.sparse

        ylag = weights.lag_spatial(w, self.y)
        xlag = self.get_x_lag(w, regimes_att)

        # call minimizer using concentrated log-likelihood to get lambda
        methodML = method.upper()
        if methodML in ['FULL', 'LU', 'ORD']:
            if methodML == 'FULL':  
                W = w.full()[0]      # need dense here
                res = minimize_scalar(err_c_loglik, 0.0, bounds=(-1.0, 1.0),
                                      args=(self.n, self.y, ylag, self.x,
                                            xlag, W), method='bounded',
                                      tol=epsilon)
            elif methodML == 'LU':
                I = sp.identity(w.n)
                Wsp = w.sparse   # need sparse here
                res = minimize_scalar(err_c_loglik_sp, 0.0, bounds=(-1.0,1.0),
                                      args=(self.n, self.y, ylag, 
                                            self.x, xlag, I, Wsp),
                                      method='bounded', tol=epsilon)
                W = Wsp
            elif methodML == 'ORD':
                # check on symmetry structure
                if w.asymmetry(intrinsic=False) == []:
                    ww = symmetrize(w)
                    WW = np.array(ww.todense())
                    evals = la.eigvalsh(WW)
                    W = WW
                else:
                    W = w.full()[0]      # need dense here
                    evals = la.eigvals(W)
                res = minimize_scalar(
                    err_c_loglik_ord, 0.0, bounds=(-1.0, 1.0),
                    args=(self.n, self.y, ylag, self.x,
                          xlag, evals), method='bounded',
                    tol=epsilon)
        else:
            raise Exception("{0} is an unsupported method".format(method))

        self.lam = res.x

        # compute full log-likelihood, including constants
        ln2pi = np.log(2.0 * np.pi)
        llik = -res.fun - self.n / 2.0 * ln2pi - self.n / 2.0

        self.logll = llik

        # b, residuals and predicted values

        ys = self.y - self.lam * ylag
        xs = self.x - self.lam * xlag
        xsxs = np.dot(xs.T, xs)
        xsxsi = np.linalg.inv(xsxs)
        xsys = np.dot(xs.T, ys)
        b = np.dot(xsxsi, xsys)

        self.betas = np.vstack((b, self.lam))

        self.u = y - np.dot(self.x, b)
        self.predy = self.y - self.u

        # residual variance

        self.e_filtered = self.u - self.lam * weights.lag_spatial(w, self.u)
        self.sig2 = np.dot(self.e_filtered.T, self.e_filtered) / self.n

        # variance-covariance matrix betas

        varb = self.sig2 * xsxsi

        # variance-covariance matrix lambda, sigma

        a = -self.lam * W
        spfill_diagonal(a, 1.0)
        ai = spinv(a)
        wai = spdot(W, ai)
        tr1 = wai.diagonal().sum()

        wai2 = spdot(wai, wai)
        tr2 = wai2.diagonal().sum()

        waiTwai = spdot(wai.T, wai)
        tr3 = waiTwai.diagonal().sum()

        v1 = np.vstack((tr2 + tr3,
                        tr1 / self.sig2))
        v2 = np.vstack((tr1 / self.sig2,
                        self.n / (2.0 * self.sig2 ** 2)))

        v = np.hstack((v1, v2))

        self.vm1 = np.linalg.inv(v)

        # create variance matrix for beta, lambda
        vv = np.hstack((varb, np.zeros((self.k, 1))))
        vv1 = np.hstack(
            (np.zeros((1, self.k)), self.vm1[0, 0] * np.ones((1, 1))))

        self.vm = np.vstack((vv, vv1))
Beispiel #9
0
# Now, because we have row-standardize them, the weight given to each of the four neighbors is 0.2 which, all together, sum up to one.

# In[ ]:

w['E08000012']

# ### Spatial lag
#
# Once we have the data and the spatial weights matrix ready, we can start by computing the spatial lag of the percentage of votes that went to leave the EU. Remember the spatial lag is the product of the spatial weights matrix and a given variable and that, if $W$ is row-standardized, the result amounts to the average value of the variable in the neighborhood of each observation.
#
# We can calculate the spatial lag for the variable `Pct_Leave` and store it directly in the main table with the following line of code:

# In[ ]:

br['w_Pct_Leave'] = weights.lag_spatial(w, br['Pct_Leave'])

# Let us have a quick look at the resulting variable, as compared to the original one:

# In[ ]:

br[['LAD14NM', 'Pct_Leave', 'w_Pct_Leave']].head()

# The way to interpret the spatial lag (`w_Pct_Leave`) for say the first observation is as follow: Hartlepool, where 69,6% of the electorate voted to leave is surrounded by neighbouring local authorities where, on average, almost 62% of the electorate also voted to leave the EU. For the purpose of illustration, we can in fact check this is correct by querying the spatial weights matrix to find out Hartepool's neighbors:

# In[ ]:

w.neighbors['E06000001']

# And then checking their values: