def inverse_scg(w, data, scalar, transpose=False, symmetric=False,\
               threshold=0.00001,\
               max_iterations=None):
    
    #multiplier = SP.identity(w.n) - (scalar*w.sparse)       # A      n x n   
    count = 0                                               # k      scalar (step 1)
    run_tot = copy.copy(data)                               # z_k    n x 1  (step 1)
    #residuals = data - run_tot * multiplier                 # r_k    n x 1  (step 2)
    residuals = data - pysal.lag_spatial(w, scalar*data)
    #test1 = la.norm(residuals)                              # G_k    scalar (step 3)
    test1 = norm(residuals)
    directions = copy.copy(residuals)                       # d_k    n x 1  (step 6)
    while test1 > threshold:                                #               (step 4)
        count += 1                                          #               (step 5)
        #changes = multiplier * directions                   # t      n x 1  (step 7)
        changes = directions - pysal.lag_spatial(w, scalar*directions)
        intensity = test1 / (np.dot(directions.T, changes)) # alpha  scalar (step 8)
        #int_dir = intensity * directions                    #               (step 8)
        run_tot += intensity * directions
        #run_tot += int_dir                                  #               (step 8)
        #residuals -= int_dir                                #               (step 8)
        residuals -= intensity * changes
        #test2 = la.norm(residuals)                          #               (step 3)
        test2 = norm(residuals)
        directions = residuals + ((test2/test1)*directions) #               (step 6)
        test1 = test2
    return run_tot
Beispiel #2
0
def set_endog(y, x, w, yend, q, w_lags, lag_q):
    # Create spatial lag of y
    yl = lag_spatial(w, y)
    # spatial and non-spatial instruments
    if issubclass(type(yend), np.ndarray):
        if lag_q:
            lag_vars = sphstack(x, q)
        else:
            lag_vars = x
        spatial_inst = get_lags(w, lag_vars, w_lags)
        q = sphstack(q, spatial_inst)
        yend = sphstack(yend, yl)
    elif yend == None:  # spatial instruments only
        q = get_lags(w, x, w_lags)
        yend = yl
    else:
        raise Exception, "invalid value passed to yend"
    return yend, q

    lag = lag_spatial(w, x)
    spat_lags = lag
    for i in range(w_lags - 1):
        lag = lag_spatial(w, lag)
        spat_lags = sphstack(spat_lags, lag)
    return spat_lags
Beispiel #3
0
def get_lags(w, x, w_lags):
    '''
    Calculates a given order of spatial lags and all the smaller orders

    Parameters
    ----------
    w       : weight
              PySAL weights instance
    x       : array
              nxk arrays with the variables to be lagged  
    w_lags  : integer
              Maximum order of spatial lag

    Returns
    --------
    rs      : array
              nxk*(w_lags+1) array with original and spatially lagged variables

    '''
    lag = lag_spatial(w, x)
    spat_lags = lag
    for i in range(w_lags-1):
        lag = lag_spatial(w, lag)
        spat_lags = sphstack(spat_lags, lag)
    return spat_lags
Beispiel #4
0
 def __calc(self, z):
     zl = pysal.lag_spatial(self.w, z)
     bb = sum(z * zl) / 2.0
     zw = 1 - z
     zl = pysal.lag_spatial(self.w, zw)
     ww = sum(zw * zl) / 2.0
     bw = self.J - (bb + ww)
     return (bb, ww, bw)
Beispiel #5
0
 def get_x_lag(self, w, regimes_att):
     if regimes_att:
         xlag = ps.lag_spatial(w, regimes_att['x'])
         xlag = REGI.Regimes_Frame.__init__(self, xlag,
                                            regimes_att['regimes'], constant_regi=None, cols2regi=regimes_att['cols2regi'])[0]
         xlag = xlag.toarray()
     else:
         xlag = ps.lag_spatial(w, self.x)
     return xlag
Beispiel #6
0
 def get_x_lag(self, w, regimes_att):
     if regimes_att:
         xlag = ps.lag_spatial(w, regimes_att['x'])
         xlag = REGI.Regimes_Frame.__init__(
             self,
             xlag,
             regimes_att['regimes'],
             constant_regi=None,
             cols2regi=regimes_att['cols2regi'])[0]
         xlag = xlag.toarray()
     else:
         xlag = ps.lag_spatial(w, self.x)
     return xlag
Beispiel #7
0
    def __init__(self, y, w, permutations=0, significance_level=0.05):
        y = y.transpose()
        pml = pysal.Moran_Local

        #################################################################
        # have to optimize conditional spatial permutations over a
        # time series - this is a place holder for the foreclosure paper
        ml = [pml(yi, w, permutations=permutations) for yi in y]
        #################################################################

        q = np.array([mli.q for mli in ml]).transpose()
        classes = np.arange(1, 5)  # no guarantee all 4 quadrants are visited
        Markov.__init__(self, q, classes)
        self.q = q
        self.w = w
        n, k = q.shape
        k -= 1
        self.significance_level = significance_level
        move_types = np.zeros((n, k), int)
        sm = np.zeros((n, k), int)
        self.significance_level = significance_level
        if permutations > 0:
            p = np.array([mli.p_z_sim for mli in ml]).transpose()
            self.p_values = p
            pb = p <= significance_level
        else:
            pb = np.zeros_like(y.T)
        for t in range(k):
            origin = q[:, t]
            dest = q[:, t + 1]
            p_origin = pb[:, t]
            p_dest = pb[:, t]
            for r in range(n):
                move_types[r, t] = TT[origin[r], dest[r]]
                key = (origin[r], dest[r], p_origin[r], p_dest[r])
                sm[r, t] = MOVE_TYPES[key]
        if permutations > 0:
            self.significant_moves = sm
        self.move_types = move_types

        # null of own and lag moves being independent

        ybar = y.mean(axis=0)
        r = y / ybar
        ylag = np.array([pysal.lag_spatial(w, yt) for yt in y])
        rlag = ylag / ybar
        rc = r < 1.
        rlagc = rlag < 1.
        markov_y = pysal.Markov(rc)
        markov_ylag = pysal.Markov(rlagc)
        A = np.matrix([[1, 0, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1], [0, 1, 0, 0]])

        kp = A * np.kron(markov_y.p, markov_ylag.p) * A.T
        trans = self.transitions.sum(axis=1)
        t1 = np.diag(trans) * kp
        t2 = self.transitions
        t1 = t1.getA()
        self.chi_2 = pysal.spatial_dynamics.markov.chi2(t1, t2)
        self.expected_t = t1
        self.permutations = permutations
Beispiel #8
0
 def __calc(self, z, op):
     if op == 'c':  # cross-product
         zl = pysal.lag_spatial(self.w, z)
         g = (z * zl).sum()
     elif op == 's':  # squared difference
         zs = np.zeros(z.shape)
         z2 = z**2
         for i, i0 in enumerate(self.w.id_order):
             neighbors = self.w.neighbor_offsets[i0]
             wijs = self.w.weights[i0]
             zw = zip(neighbors, wijs)
             zs[i] = sum([
                 wij * (z2[i] - 2.0 * z[i] * z[j] + z2[j]) for j, wij in zw
             ])
         g = zs.sum()
     elif op == 'a':  # absolute difference
         zs = np.zeros(z.shape)
         for i, i0 in enumerate(self.w.id_order):
             neighbors = self.w.neighbor_offsets[i0]
             wijs = self.w.weights[i0]
             zw = zip(neighbors, wijs)
             zs[i] = sum([wij * abs(z[i] - z[j]) for j, wij in zw])
         g = zs.sum()
     else:  # any previously defined function op
         zs = np.zeros(z.shape)
         for i, i0 in enumerate(self.w.id_order):
             neighbors = self.w.neighbor_offsets[i0]
             wijs = self.w.weights[i0]
             zw = zip(neighbors, wijs)
             zs[i] = sum([wij * op(z, i, j) for j, wij in zw])
         g = zs.sum()
     return g
Beispiel #9
0
 def __calc(self, z, op):
     if op == 'c':     # cross-product
         zl = pysal.lag_spatial(self.w, z)
         g = (z * zl).sum()
     elif op == 's':   # squared difference
         zs = np.zeros(z.shape)
         z2 = z ** 2
         for i, i0 in enumerate(self.w.id_order):
             neighbors = self.w.neighbor_offsets[i0]
             wijs = self.w.weights[i0]
             zw = zip(neighbors, wijs)
             zs[i] = sum([wij * (z2[i] - 2.0 * z[i] * z[
                 j] + z2[j]) for j, wij in zw])
         g = zs.sum()
     elif op == 'a':    # absolute difference
         zs = np.zeros(z.shape)
         for i, i0 in enumerate(self.w.id_order):
             neighbors = self.w.neighbor_offsets[i0]
             wijs = self.w.weights[i0]
             zw = zip(neighbors, wijs)
             zs[i] = sum([wij * abs(z[i] - z[j]) for j, wij in zw])
         g = zs.sum()
     else:              # any previously defined function op
         zs = np.zeros(z.shape)
         for i, i0 in enumerate(self.w.id_order):
             neighbors = self.w.neighbor_offsets[i0]
             wijs = self.w.weights[i0]
             zw = zip(neighbors, wijs)
             zs[i] = sum([wij * op(z, i, j) for j, wij in zw])
         g = zs.sum()
     return g
Beispiel #10
0
def moran_dispersao(IM, title='', xlabel='', ylabel=''):

    y_norm = normalizar(IM.y)
    y_lag = ps.lag_spatial(IM.w, IM.y)
    y_lag_norm = normalizar(y_lag)
    dados = pd.DataFrame({
        'y': IM.y,
        'y_norm': y_norm,
        'y_lag': y_lag,
        'y_lag_norm': y_lag_norm
    })

    f, ax = plt.subplots(1, figsize=(7, 5))
    sns.regplot('y_norm',
                'y_lag_norm',
                data=dados,
                ci=None,
                color='black',
                line_kws={'color': 'red'})
    plt.axvline(0, c='gray', alpha=0.7)
    plt.axhline(0, c='gray', alpha=0.7)

    limits = np.array(
        [y_norm.min(),
         y_norm.max(),
         y_lag_norm.min(),
         y_lag_norm.max()])
    limits = np.abs(limits).max()
    border = 0.02
    ax.set_xlim(-limits - border, limits + border)
    ax.set_ylim(-limits - border, limits + border)
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.show()
def mplot(m, xlabel='', ylabel='', title='', custom=(7, 7)):
    """
    Produce basic Moran Plot 

    Parameters
    ----------
    m : pysal.Moran instance
        values of Moran's I Global Autocorrelation Statistic
    xlabel : str
        label for x axis
    ylabel : str
        label for y axis
    title : str
        title of plot
    custom : tuple
        dimensions of figure size

    Returns
    -------
    fig : Matplotlib Figure instance
        Moran scatterplot figure

    Examples
    --------
    >>> import matplotlib.pyplot as plt
    >>> import pysal as ps
    >>> from pysal.contrib.pdio import read_files
    >>> from pysal.contrib.viz.plot import mplot

    >>> link = ps.examples.get_path('columbus.shp')
    >>> db = read_files(link)
    >>> y = db['HOVAL'].values
    >>> w = ps.queen_from_shapefile(link)
    >>> w.transform = 'R'

    >>> m = ps.Moran(y, w)
    >>> mplot(m, xlabel='Response', ylabel='Spatial Lag',
    ...       title='Moran Scatterplot', custom=(7,7))

    >>> plt.show()
            
    """
    lag = ps.lag_spatial(m.w, m.z)
    fit = ps.spreg.OLS(m.z[:, None], lag[:, None])

    # Customize plot
    fig = plt.figure(figsize=custom)
    ax = fig.add_subplot(111)

    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    fig.suptitle(title)

    ax.scatter(m.z, lag, s=60, color='k', alpha=.6)
    ax.plot(lag, fit.predy, color='r')

    ax.axvline(0, alpha=0.5)
    ax.axhline(0, alpha=0.5)

    return fig
Beispiel #12
0
def moran_plot(IM):

    import pandas as pd
    import seaborn as sns
    import matplotlib.pyplot as plt
    import numpy as np
    import pysal as ps

    y_norm = normalize(IM.y)
    y_lag = ps.lag_spatial(IM.w, IM.y)
    y_lag_norm = normalize(y_lag)
    dados = pd.DataFrame({'y':IM.y, 'y_norm':y_norm,
                          'y_lag':y_lag, 'y_lag_norm':y_lag_norm})

    f, ax = plt.subplots(1, figsize=(7, 5))
    sns.regplot('y_norm', 'y_lag_norm', data=dados, ci=None,
                color='black', line_kws={'color':'red'})
    plt.axvline(0, c='gray', alpha=0.7)
    plt.axhline(0, c='gray', alpha=0.7)

    limits = np.array([y_norm.min(), y_norm.max(), y_lag_norm.min(), y_lag_norm.max()])
    limits = np.abs(limits).max()
    border = 0.02
    ax.set_xlim(- limits - border, limits + border)
    ax.set_ylim(- limits - border, limits + border)

    plt.show();
Beispiel #13
0
 def test_lag_spatial(self):
     yl = pysal.lag_spatial(self.w, self.y)
     np.testing.assert_array_almost_equal(yl, [1., 2., 1.])
     self.w.id_order = ['b', 'c', 'a']
     y = np.array([1, 2, 0])
     yl = pysal.lag_spatial(self.w, y)
     np.testing.assert_array_almost_equal(yl, [2., 1., 1.])
     w = pysal.lat2W(3, 3)
     y = np.arange(9)
     yl = pysal.lag_spatial(w, y)
     ylc = np.array([4., 6., 6., 10., 16., 14., 10., 18., 12.])
     np.testing.assert_array_almost_equal(yl, ylc)
     w.transform = 'r'
     yl = pysal.lag_spatial(w, y)
     ylc = np.array([2., 2., 3., 3.33333333, 4., 4.66666667, 5., 6., 6.])
     np.testing.assert_array_almost_equal(yl, ylc)
Beispiel #14
0
 def test_lag_spatial(self):
     yl = pysal.lag_spatial(self.w, self.y)
     np.testing.assert_array_almost_equal(yl, [1., 2., 1.])
     self.w.id_order = ['b', 'c', 'a']
     y = np.array([1, 2, 0])
     yl = pysal.lag_spatial(self.w, y)
     np.testing.assert_array_almost_equal(yl, [2., 1., 1.])
     w = pysal.lat2W(3, 3)
     y = np.arange(9)
     yl = pysal.lag_spatial(w, y)
     ylc = np.array([4., 6., 6., 10., 16., 14., 10., 18., 12.])
     np.testing.assert_array_almost_equal(yl, ylc)
     w.transform = 'r'
     yl = pysal.lag_spatial(w, y)
     ylc = np.array(
         [2., 2., 3., 3.33333333, 4.,
          4.66666667, 5., 6., 6.])
     np.testing.assert_array_almost_equal(yl, ylc)
 def rose(self, Y, w, k=8):
     sw = 2*np.pi/k 
     cuts = np.arange(0.0,2*np.pi+sw,sw)
     wY = ps.lag_spatial(w,Y)
     dx = Y[:,-1]-Y[:,0]
     dy = wY[:,-1]-wY[:,0]
     theta = np.arctan2(dy,dx)
     neg = theta < 0.0
     utheta = theta*(1-neg) + neg * (2*np.pi+theta)         
     return cuts, utheta, dx, dy
Beispiel #16
0
def drawW(rel,w,k):
    r=np.random.permutation(rel)
    wy=pysal.lag_spatial(w,r)
    y=wy[:,-1]-wy[:,0]
    x=r[:,-1]-r[:,0]
    theta=np.arctan2(y,x)
    neg=theta < 0.0
    utheta=theta*(1-neg) + neg * (2*np.pi+theta)
    k=8
    width=2*np.pi/k
    cuts=np.arange(0.0,2*np.pi+width,width)
    counts,bin=np.histogram(utheta,cuts)
    return counts
Beispiel #17
0
def run_sim(df,
            start,
            end,
            sim,
            models=[],
            tsvars=[],
            spatvars=[],
            transformvars=[],
            transformvars_post=[]):

    nunits = len(df.loc[start].index)
    tsstreams = [streamers.init_order(nunits, tsvar) for tsvar in tsvars]
    # Seed the streamers
    for stream in tsstreams:
        for value, streamer in zip(df.loc[start - 1, stream['name']].values,
                                   stream['streamers']):
            streamer.seed(value)

    # load the weight matrices
    for sdict in spatvars:
        with open(sdict['path_weight'], 'rb') as p:
            w = pickle.load(p)
            #print(sdict['name'], "loaded", sdict['path_weight'])
        sdict.update({'w': w})

    for t in range(start, end + 1):

        for stream in tsstreams:
            update = streamers.tick(stream['streamers'],
                                    df.loc[t - 1, stream['var']].values)
            df.loc[t, stream['name']] = update

        for sdict in spatvars:
            update = pysal.lag_spatial(sdict['w'], df.loc[t,
                                                          sdict['var']].values)
            df.loc[t, sdict['name']] = update

        for transform in transformvars:
            df = apply_transform(df, transform)

        for model in models:
            outputs, varnames = model.predict(sim=sim, data=df.ix[t])
            for output, varname in zip(outputs, varnames):
                df.loc[t, varname] = output

        for transform in transformvars_post:
            df = apply_transform(df, transform)

    return df
Beispiel #18
0
def log_lik_lag(ldet, w, b, X, y):
    n = w.n
    r = b[0]    # ml estimate of rho
    b = b[1:]   # ml for betas
    yl = ps.lag_spatial(w,y)
    ys = y - r * yl
    XX = np.dot(X.T, X)
    iXX = np.linalg.inv(XX)
    b = np.dot(iXX, np.dot(X.T,ys))
    yhat = r * yl + np.dot(X,b)
    e = y - yhat
    e2 = (e**2).sum()
    sig2 = e2 / n
    ln2pi = np.log(2*np.pi)
    return ldet - n/2. * ln2pi - n/2. * np.log(sig2) - e2/(2 * sig2)
Beispiel #19
0
def _moran_scatterplot_calc(moran_loc, p):
    lag = ps.lag_spatial(moran_loc.w, moran_loc.z)
    fit = ps.spreg.OLS(moran_loc.z[:, None], lag[:, None])
    if p is not None:
        if not isinstance(moran_loc, Moran_Local):
            raise ValueError("`moran_loc` is not a esda.moran.Moran_Local instance")

        _, _, colors, _ = mask_local_auto(moran_loc, p=p)
    else:
        colors = 'black'

    data = {'moran_z': moran_loc.z, 'lag': lag,
            'colors': colors, 'fit_y': fit.predy.flatten(),
            'moranloc_psim': moran_loc.p_sim, 'moranloc_q': moran_loc.q}
    return data
Beispiel #20
0
def AddLagVars(dataframe, shp, idfield, SumVars, AvgVars, Adj=None):
    df=dataframe.copy()
    if idfield not in df.columns:
        print 'Dataframe missing id field'
    if idfield not in pysal.open(shp[:-3]+'dbf').header:
        print 'Shp missing id field'

    if Adj==None:
        Adj=pysal.queen_from_shapefile(shp, idVariable=idfield)
    else:
        pass
   
    df.set_index(idfield, inplace=True)
    df=df.reindex(Adj.id_order)
    
    Adj.transform='o'
    for Var in SumVars:
        df[Var+'_LAG_SUM']=pysal.lag_spatial(Adj, np.array(df[Var]))
    Adj.transform=('r')    
    for Var in AvgVars:
        df[Var+'_LAG_AVG']=pysal.lag_spatial(Adj, np.array(df[Var])) 

    df.reset_index(inplace=True) 
    return df
Beispiel #21
0
    def run(self, path):
        W = self.loadWeights()
        if W == None:
            return False

        if self.verify() == False:
            return False
        else:
            print "running"
            # print self.data['wtFiles'][self.data['wtFile']]
            newVars = [var.run() for var in self.newVars]
            names = [v[0] for v in newVars]
            vars = [v[1] for v in newVars]
            db = self.db()
            xid = [db.header.index(i) for i in vars]
            X = [db[:, i] for i in xid]

            lag = [pysal.lag_spatial(W, y) for y in X]
            lag = zip(*lag)  # transpose
            lag = map(list, lag)
            new_header = db.header + names

            if path.endswith('.dbf'):
                new_spec = db.field_spec + [('N', 20, 10) for n in names]
                data = db.read()
                db.close()
                newdb = pysal.open(path, 'w')
                newdb.header = new_header
                newdb.field_spec = new_spec
                for i, row in enumerate(data):
                    newdb.write(row + lag[i])
                newdb.close()

            elif path.endswith('.csv'):
                data = db.read()
                db.close()
                newdb = pysal.open(path, 'wb')
                writer = csv.writer(newdb)
                writer.writerow(new_header)
                for i, row in enumerate(data):
                    writer.writerow(row + lag[i])
                newdb.close()
Beispiel #22
0
def mplot(m, xlabel='', ylabel='', title='', custom=(7,7)):
    '''
    Produce basic Moran Plot 
    ...
    Parameters
    ---------
    m            : array
                   values of Moran's I 
    xlabel       : str
                   label for x axis
    ylabel       : str
                   label for y axis                
    title        : str
                   title of plot
    custom       : tuple
                   dimensions of figure size

    Returns
    ---------
    plot         : png 
                    image file showing plot
            
    '''
    
    lag = ps.lag_spatial(m.w, m.z)
    fit = ps.spreg.OLS(m.z[:, None], lag[:,None])

    ## Customize plot
    fig = plt.figure(figsize=custom)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.suptitle(title)

    plt.scatter(m.z, lag, s=60, color='k', alpha=.6)
    plt.plot(lag, fit.predy, color='r')

    plt.axvline(0, alpha=0.5)
    plt.axhline(0, alpha=0.5)
    plt.show()

    return None
Beispiel #23
0
    def _calc(self, y, w, classes, k):
        # lag markov
        ly = pysal.lag_spatial(w, y)
        npm = np.matrix
        npa = np.array
        if self.fixed:
            l_classes = pysal.Quantiles(ly.flatten(), k=k).yb
            l_classes.shape = ly.shape
        else:
            l_classes = npa([
                pysal.Quantiles(ly[:, i], k=k).yb for i in np.arange(self.cols)
            ])
            l_classes = l_classes.transpose()
        l_classic = Markov(l_classes)
        T = np.zeros((k, k, k))
        n, t = y.shape
        for t1 in range(t - 1):
            t2 = t1 + 1
            for i in range(n):
                T[l_classes[i, t1], classes[i, t1], classes[i, t2]] += 1

        P = np.zeros_like(T)
        F = np.zeros_like(T)  # fmpt
        ss = np.zeros_like(T[0])
        for i, mat in enumerate(T):
            row_sum = mat.sum(axis=1)
            row_sum = row_sum + (row_sum == 0)
            p_i = np.matrix(np.diag(1. / row_sum) * np.matrix(mat))
            #print i
            #print mat
            #print p_i
            ss[i] = steady_state(p_i).transpose()
            try:
                F[i] = fmpt(p_i)
            except:
                #pylint; "No exception type(s) specified"
                print "Singlular fmpt matrix for class ", i
            P[i] = p_i
        return T, P, ss, F
Beispiel #24
0
    def _calc(self, y, w, classes, k):
        # lag markov
        ly = pysal.lag_spatial(w, y)
        npm = np.matrix
        npa = np.array
        if self.fixed:
            l_classes = pysal.Quantiles(ly.flatten(), k=k).yb
            l_classes.shape = ly.shape
        else:
            l_classes = npa([pysal.Quantiles(
                ly[:, i], k=k).yb for i in np.arange(self.cols)])
            l_classes = l_classes.transpose()
        l_classic = Markov(l_classes)
        T = np.zeros((k, k, k))
        n, t = y.shape
        for t1 in range(t - 1):
            t2 = t1 + 1
            for i in range(n):
                T[l_classes[i, t1], classes[i, t1], classes[i, t2]] += 1

        P = np.zeros_like(T)
        F = np.zeros_like(T)  # fmpt
        ss = np.zeros_like(T[0])
        for i, mat in enumerate(T):
            row_sum = mat.sum(axis=1)
            row_sum = row_sum + (row_sum == 0)
            p_i = np.matrix(np.diag(1. / row_sum) * np.matrix(mat))
            #print i
            #print mat
            #print p_i
            ss[i] = steady_state(p_i).transpose()
            try:
                F[i] = fmpt(p_i)
            except:
                #pylint; "No exception type(s) specified"
                print "Singlular fmpt matrix for class ", i
            P[i] = p_i
        return T, P, ss, F
Beispiel #25
0
def hac_multi(reg, gwk, constant=False):
    """
    HAC robust estimation of the variance-covariance matrix for multi-regression object 

    Parameters
    ----------

    reg             : Regression object (OLS or TSLS)
                      output instance from a regression model

    gwk             : PySAL weights object
                      Spatial weights based on kernel functions

    Returns
    --------

    psi             : kxk array
                      Robust estimation of the variance-covariance

    """
    if not constant:
        reg.hac_var = check_constant(reg.hac_var)
    xu = spbroadcast(reg.hac_var, reg.u)
    gwkxu = lag_spatial(gwk, xu)
    psi0 = spdot(xu.T, gwkxu)
    counter = 0
    for m in reg.multi:
        reg.multi[m].robust = 'hac'
        reg.multi[m].name_gwk = reg.name_gwk
        try:
            psi1 = spdot(reg.multi[m].varb, reg.multi[m].zthhthi)
            reg.multi[m].vm = spdot(psi1, np.dot(psi0, psi1.T))
        except:
            reg.multi[m].vm = spdot(
                reg.multi[m].xtxi, np.dot(psi0, reg.multi[m].xtxi))
        reg.vm[(counter * reg.kr):((counter + 1) * reg.kr),
               (counter * reg.kr):((counter + 1) * reg.kr)] = reg.multi[m].vm
        counter += 1
Beispiel #26
0
def hac_multi(reg, gwk, constant=False):
    """
    HAC robust estimation of the variance-covariance matrix for multi-regression object 

    Parameters
    ----------

    reg             : Regression object (OLS or TSLS)
                      output instance from a regression model

    gwk             : PySAL weights object
                      Spatial weights based on kernel functions

    Returns
    --------

    psi             : kxk array
                      Robust estimation of the variance-covariance

    """
    if not constant:
        reg.hac_var = check_constant(reg.hac_var)
    xu = spbroadcast(reg.hac_var, reg.u)
    gwkxu = lag_spatial(gwk, xu)
    psi0 = spdot(xu.T, gwkxu)
    counter = 0
    for m in reg.multi:
        reg.multi[m].robust = 'hac'
        reg.multi[m].name_gwk = reg.name_gwk
        try:
            psi1 = spdot(reg.multi[m].varb, reg.multi[m].zthhthi)
            reg.multi[m].vm = spdot(psi1, np.dot(psi0, psi1.T))
        except:
            reg.multi[m].vm = spdot(reg.multi[m].xtxi,
                                    np.dot(psi0, reg.multi[m].xtxi))
        reg.vm[(counter * reg.kr):((counter + 1) * reg.kr),
               (counter * reg.kr):((counter + 1) * reg.kr)] = reg.multi[m].vm
        counter += 1
Beispiel #27
0
def moran_scatter_plot(shp, dbf, var, w):
    y = np.array(dbf.by_col[var])
    y_lag = pysal.lag_spatial(w, y)
   
    y_z = (y - y.mean()) / y.std()
    y_lag_z = (y_lag - y_lag.mean()) / y_lag.std()
    
    global SHP_DICT 
    uuid = SHP_DICT[shp]
    
    global WS_SERVER 
    ws = create_connection(WS_SERVER)
    msg = {
        "command": "moran_scatter_plot",
        "uuid":  uuid,
        "title": "Moran Scatter plot for variable [%s]" % var,
        "data": { "x": y_z.tolist(), "y" : y_lag_z.tolist() },
        "fields": [var, "lagged %s" % var]
    }
    str_msg = json.dumps(msg)
    ws.send(str_msg)
    #print "send:", str_msg
    ws.close()
Beispiel #28
0
def moran_dispersao(IM, title='', xlabel='', ylabel=''):

    y_norm = normalizar(IM.y)
    y_lag = ps.lag_spatial(IM.w, IM.y)
    y_lag_norm = normalizar(y_lag)
    dados = pd.DataFrame({'y':IM.y, 'y_norm':y_norm,
                          'y_lag':y_lag, 'y_lag_norm':y_lag_norm})

    f, ax = plt.subplots(1, figsize=(7, 5))
    sns.regplot('y_norm', 'y_lag_norm', data=dados, ci=None,
                color='black', line_kws={'color':'red'})
    plt.axvline(0, c='gray', alpha=0.7)
    plt.axhline(0, c='gray', alpha=0.7)

    limits = np.array([y_norm.min(), y_norm.max(), y_lag_norm.min(), y_lag_norm.max()])
    limits = np.abs(limits).max()
    border = 0.02
    ax.set_xlim(- limits - border, limits + border)
    ax.set_ylim(- limits - border, limits + border)
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.show();
    def grafico(self):

        """Grafico de dispersion"""
        w=self.w
        y=self.y
        ystd=(y-np.mean(y))/np.std(y)
        w.transform = 'r'
        yl = pysal.lag_spatial(w,ystd)

        colors = np.random.rand(100)
        """area = np.pi * (15 * np.random.rand(100))**2 # 0 to 15 point radiuses"""

        fig, ax = plt.subplots()
        m, fit = np.polyfit(ystd, yl, deg=1)
        """ax.plot(self.y, fit[0] * self.y + fit[1], color='blue', alpha=0.4, linewidth=0.3, linestyle='dotted')"""
        ax.scatter(ystd, yl, c=colors, alpha=0.5)
        ax.plot(ystd, m*ystd + fit, color='blue', alpha=0.4, linewidth=0.3)

        fig.suptitle('Moran`s I: '+str(round(self.mi.I,5)))
        plt.xlabel(self.fieldName)
        plt.ylabel('Spatial Lag '+self.fieldName)

        ax.set_yticks([np.mean(yl)],minor=False)
        ax.yaxis.set_major_locator(FixedLocator([round(np.mean(yl),5)]))
        ax.yaxis.grid(True)

        ax.set_xticks([np.mean(ystd), np.amax(ystd)],minor=True)
        ax.xaxis.set_major_locator(FixedLocator([round(np.mean(ystd),5)]))
        ax.xaxis.grid(True)

        """ax.spines['left'].set_position((y,np.mean(y)))"""
        """ax.spines['bottom'].set_position((yl,np.mean(yl)))"""
        """ax.set_yticklabels(['Bill', 'Jim'])"""
        """plt.grid(True)"""
        """fig.savefig('test.jpg')"""
        fig.show()
Beispiel #30
0
    def run(self, path):
        if self.verify():
            print "running"
            # print self.data['wtFiles'][self.data['wtFile']]
            newVars = [var.run() for var in self.newVars]
            names = [v[0] for v in newVars]
            vars = [v[1] for v in newVars]
            db = self.db()
            xid = [db.header.index(i) for i in vars]
            X = [db[:, i] for i in xid]
            W = self.loadWeights()
            lag = [pysal.lag_spatial(W, y) for y in X]
            lag = zip(*lag)  # transpose
            lag = map(list, lag)
            new_header = db.header + names

            if path.endswith('.dbf'):
                new_spec = db.field_spec + [('N', 20, 10) for n in names]
                data = db.read()
                db.close()
                newdb = pysal.open(path, 'w')
                newdb.header = new_header
                newdb.field_spec = new_spec
                for i, row in enumerate(data):
                    newdb.write(row + lag[i])
                newdb.close()

            elif path.endswith('.csv'):
                data = db.read()
                db.close()
                newdb = pysal.open(path, 'wb')
                writer = csv.writer(newdb)
                writer.writerow(new_header)
                for i, row in enumerate(data):
                    writer.writerow(row + lag[i])
                newdb.close()
Beispiel #31
0
def calculate_lag_value(x):
    return ps.lag_spatial(W, x)
Beispiel #32
0
    def spatial_trend(self,
                      subquery,
                      time_cols,
                      num_classes=7,
                      w_type='knn',
                      num_ngbrs=5,
                      permutations=0,
                      geom_col='the_geom',
                      id_col='cartodb_id'):
        """
            Predict the trends of a unit based on:
            1. history of its transitions to different classes (e.g., 1st
               quantile -> 2nd quantile)
            2. average class of its neighbors

            Inputs:
            @param subquery string: e.g., SELECT the_geom, cartodb_id,
              interesting_time_column FROM table_name
            @param time_cols list of strings: list of strings of column names
            @param num_classes (optional): number of classes to break
              distribution of values into. Currently uses quantile bins.
            @param w_type string (optional): weight type ('knn' or 'queen')
            @param num_ngbrs int (optional): number of neighbors (if knn type)
            @param permutations int (optional): number of permutations for test
              stats
            @param geom_col string (optional): name of column which contains
              the geometries
            @param id_col string (optional): name of column which has the ids
              of the table

            Outputs:
            @param trend_up float: probablity that a geom will move to a higher
              class
            @param trend_down float: probablity that a geom will move to a
              lower class
            @param trend float: (trend_up - trend_down) / trend_static
            @param volatility float: a measure of the volatility based on
              probability stddev(prob array)
        """

        if len(time_cols) < 2:
            plpy.error('More than one time column needs to be passed')

        params = {
            "id_col": id_col,
            "time_cols": time_cols,
            "geom_col": geom_col,
            "subquery": subquery,
            "num_ngbrs": num_ngbrs
        }

        result = self.data_provider.get_markov(w_type, params)

        # build weight
        weights = pu.get_weight(result, w_type)
        weights.transform = 'r'

        # prep time data
        t_data = get_time_data(result, time_cols)

        sp_markov_result = ps.Spatial_Markov(t_data,
                                             weights,
                                             k=num_classes,
                                             fixed=False,
                                             permutations=permutations)

        # get lag classes
        lag_classes = ps.Quantiles(ps.lag_spatial(weights, t_data[:, -1]),
                                   k=num_classes).yb

        # look up probablity distribution for each unit according to class and
        #  lag class
        prob_dist = get_prob_dist(sp_markov_result.P, lag_classes,
                                  sp_markov_result.classes[:, -1])

        # find the ups and down and overall distribution of each cell
        trend_up, trend_down, trend, volatility = get_prob_stats(
            prob_dist, sp_markov_result.classes[:, -1])

        # output the results
        return zip(trend, trend_up, trend_down, volatility, weights.id_order)
Beispiel #33
0
    def __init__(self, w, target_est_count=None, target_moe_count=None, target_th_count=None,\
                    target_est_prop=None, target_moe_prop=None, target_th_prop=None,\
                    target_est_ratio=None, target_moe_ratio=None, target_th_ratio=None,\
                    target_th_all=None, count_est=None, count_th_min=None, count_th_max=None,\
                    exclude=None, auto_exclude=0, base_solutions=100,\
                    zscore=True, pca=True, local_improvement=True, local_params=None,\
                    compactness=None, points=None, anchor=None, cardinality=False,\
                    cv_exclude_count=0, cv_exclude_prop=0, cv_exclude_ratio=0):

        time1 = time.time()
        time_output = {
            'prep': 0,
            'base': 0,
            'base_wrapup': 0,
            'local': 0,
            'local_wrapup': 0,
            'wrapup': 0,
            'total': 0
        }
        # convert arbitrary IDs in W object to integers
        id2i = w.id2i
        neighbors = {
            id2i[key]: [id2i[neigh] for neigh in w.neighbors[key]]
            for key in w.id_order
        }
        w = ps.W(neighbors)

        # build KDTree for use in finding base solution
        if issubclass(type(points), scipy.spatial.KDTree):
            kd = points
            points = kd.data
        elif type(points).__name__ == 'ndarray':
            kd = ps.common.KDTree(points)
        elif issubclass(type(points),
                        ps.core.IOHandlers.pyShpIO.PurePyShpWrapper):
            #loop to find centroids, need to be sure order matches W and data
            centroids = []
            for i in points:
                centroids.append(i.centroid)
            kd = ps.common.KDTree(centroids)
            points = kd.data
        elif points is None:
            kd = None
        else:
            raise Exception, 'Unsupported type passed to points'

        # dictionary allowing multivariate and univariate flexibility
        target_parts = {'target_est_count':target_est_count,\
                        'target_est_prop':target_est_prop,\
                        'target_est_ratio':target_est_ratio,\
                        'target_sde_count':target_moe_count ,\
                        'target_sde_prop':target_moe_prop,\
                        'target_sde_ratio':target_moe_ratio}

        # setup the holder for the variables to minimize; later we will put all
        # the count, ratio and proportion variables into this array.
        # Also, convert MOEs to standard errors when appropriate
        total_vars = 0
        rows = 0
        if target_est_count is not None:
            rows, cols = target_est_count.shape
            total_vars += cols
            target_parts['target_est_count'] = target_est_count * 1.0
            target_parts['target_sde_count'] = target_moe_count / 1.645
        if target_est_prop is not None:
            rows, cols = target_est_prop.shape
            total_vars += cols / 2
            target_parts['target_est_prop'] = target_est_prop * 1.0
            target_parts['target_sde_prop'] = target_moe_prop / 1.645
        if target_est_ratio is not None:
            rows, cols = target_est_ratio.shape
            total_vars += cols / 2
            target_parts['target_est_ratio'] = target_est_ratio * 1.0
            target_parts['target_sde_ratio'] = target_moe_ratio / 1.645

        if total_vars == 0:
            target_est = None
            print 'warning: optimization steps will not be run since no target_est variables provided'
        else:
            target_est = np.ones((rows, total_vars)) * -999

        # organize and check the input data; prep data for actual computations
        position = 0
        target_th = []
        # IMPORTANT: maintain the order of count then proportion then ratio
        if target_est_count is not None:
            target_est, target_th, position = mv_data_prep(target_est_count,\
                                              target_th_count, target_th_all,\
                                              target_est, target_th, position,\
                                              scale=1, ratio=False)
        if target_est_prop is not None:
            target_est, target_th, position = mv_data_prep(target_est_prop,\
                                              target_th_prop, target_th_all,\
                                              target_est, target_th, position,\
                                              scale=2, ratio=False)
        if target_est_ratio is not None:
            target_est, target_th, position = mv_data_prep(target_est_ratio,\
                                              target_th_ratio, target_th_all,\
                                              target_est, target_th, position,\
                                              scale=2, ratio=True)
        target_th = np.array(target_th)

        # compute zscores
        # NOTE: zscores computed using all data, i.e. we do not screen out
        #       observations in the exclude list.
        if zscore and target_est is not None:
            if pca:
                # Python does not currently have a widely used tool for
                # computing PCA with missing values. In principle,
                # NIPALS (Nonlinear Iterative Partial Least Squares)
                # can accommodate missing values, but the implementation in MDP
                # 3.4 will return a matrix of NAN values if there is an NAN
                # value in the input data.
                # http://sourceforge.net/p/mdp-toolkit/mailman/mdp-toolkit-users/?viewmonth=201111
                # http://stats.stackexchange.com/questions/35561/imputation-of-missing-values-for-pca
                # Therefore, we impute the missing values when the user
                # requests PCA; compute the z-scores on the imputed data; and
                # then pass this on to the PCA step.
                # The imputation replaces a missing value with the average of
                # its neighbors (i.e., its spatial lag). If missing values
                # remain (due to missing values in a missing value's neighbor
                # set), then that value is replaced by the column average.
                w_standardized = copy.deepcopy(w)
                w_standardized.transform = 'r'
                target_est_lag = ps.lag_spatial(w_standardized, target_est)
                # replace troublemakers with their spatial lag
                trouble = np.isfinite(target_est)
                trouble = np.bitwise_not(trouble)
                target_est[trouble] = target_est_lag[trouble]
                del target_est_lag
                del trouble
            # Pandas ignores missing values by default, so we can
            # compute the z-score and retain the missing values
            target_est = pd.DataFrame(target_est)
            target_est = (target_est -
                          target_est.mean(axis=0)) / target_est.std(axis=0)
            target_est = target_est.values
            if pca:
                # For the PCA case we need to replace any remaining missing
                # values with their column average. Since we now have z-scores,
                # we know that the average of every column is zero.
                # If it's not the PCA case, then we can leave the missing
                # values in as they will be ignored down the line.
                if np.isfinite(target_est.sum()) == False:
                    trouble = np.isfinite(target_est)
                    trouble = np.bitwise_not(trouble)
                    target_est[trouble] = 0.
                    del trouble

        # run principle components on target data (skip PCA if pca=False)
        # NOTE: matplotlib has deprecated PCA function, also it only uses SVD
        #       which can get tripped up by bad data
        # NOTE: the logic here is to first identify the principle components and
        #       then weight each component in preparation for future SSD
        #       computations; we weight the data here so that we don't need to
        #       weight the data each time the SSD is computed; in effect we want
        #       to compute the SSD on each raw component and then weight that
        #       component's contribution to the total SSD by the component's share
        #       of total variance explained, since the SSD computation has a
        #       squared term we can take the square root of the data now and then
        #       not have to weight it later
        # NOTE: PCA computed using all data, i.e. we do not screen out
        #       observations in the exclude list.
        if pca and target_est is not None:
            try:
                # eigenvector approach
                pca_node = MDP.nodes.PCANode()
                target_est = pca_node.execute(
                    target_est)  # get principle components
            except:
                try:
                    # singular value decomposition approach
                    pca_node = MDP.nodes.PCANode(svd=True)
                    target_est = pca_node.execute(
                        target_est)  # get principle components
                except:
                    # NIPALS would be a better approach than imputing
                    # missing values entirely, but MDP 3.4 does not handle
                    # missing values. Leaving this code as a place holder in
                    # case MDP is updated later.
                    ###pca_node = MDP.nodes.NIPALSNode()
                    ###target_est = pca_node.execute(target_est)  # get principle components
                    raise Exception, "PCA not possible given input data and settings. Set zscore=True to automatically impute missing values or address missing values in advance."

            pca_variance = np.sqrt(pca_node.d / pca_node.total_variance)
            target_est = target_est * pca_variance  # weighting for SSD

        # NOTE: the target_est variable is passed to the SSD function, and the
        #       target_parts variable is passed to the feasibility test function

        # set the appropriate objective function plan
        build_region, enclave_test, local_test = function_picker(count_est,\
                                    count_th_min, count_th_max, target_th_count,\
                                    target_th_prop, target_th_ratio, target_th_all)

        # setup the CV computation
        get_cv = UTILS.get_mv_cv
        cv_exclude = [cv_exclude_count, cv_exclude_prop, cv_exclude_ratio]

        # setup areas to be excluded from computations
        if exclude:
            exclude = [id2i[j] for j in exclude]
            original_exclude = exclude[:]  # in integer ID form
        else:
            original_exclude = []
        # might consider an automated process to drop observations where
        # count_est=0; at this time the user would be expected to add these
        # observations to the exclude list

        time2 = time.time()
        time_output['prep'] = time2 - time1
        # find the feasible solution with the most number of regions
        regions, id2region, exclude, enclaves = BASE.base_region_iterator(\
                             w, count_th_min, count_th_max, count_est, target_th, target_est,\
                             exclude, auto_exclude, get_cv, base_solutions,\
                             target_parts, build_region, enclave_test, kd, points,
                             anchor, cardinality, cv_exclude)

        time3 = time.time()
        time_output['base'] = time3 - time2
        problem_ids = list(set(exclude).difference(original_exclude))
        if id2region == False:
            # Infeasible base run
            exit = "no feasible solution"
            time3a = time4 = time4a = time.time()
        else:
            if target_est is not None:
                # only compute SSDs if there are target_est variables
                start_ssds = np.array([
                    UTILS.sum_squares(region, target_est) for region in regions
                ])
            else:
                start_ssds = np.ones(len(regions)) * -999.0

            if compactness:
                # capture compactness from base solution
                start_compactness = UTILS.compactness_global(
                    regions, compactness)

            if local_improvement and len(regions) > 1:
                # only run the local improvement if the appropriate flag is set
                # (local_improvement=True) and if there is more then one region to
                # swap areas between
                # swap areas along region borders that improve SSD
                time3a = time.time()
                regions, id2region, exit = \
                              LOCAL.local_search(regions, id2region, w, count_th_min, count_th_max,\
                                                 count_est, target_th, target_parts,\
                                                 target_est, exclude, get_cv,\
                                                 local_test, local_params, cv_exclude)
                time4 = time.time()
                # collect stats on SSD for each region
                end_ssds = np.array([
                    UTILS.sum_squares(region, target_est) for region in regions
                ])
                ssd_improvement = (end_ssds - start_ssds) / start_ssds
                ssd_improvement[np.isnan(
                    ssd_improvement
                )] = 0.0  # makes singleton regions have 0 improvement
                ssds = np.vstack((start_ssds, end_ssds, ssd_improvement)).T
                if compactness:
                    # capture compactness from final solution
                    end_compactness = UTILS.compactness_global(
                        regions, compactness)
                    compact_change = \
                        (end_compactness - start_compactness) / start_compactness
                    compacts = np.vstack(
                        (start_compactness, end_compactness, compact_change)).T
                else:
                    compacts = np.ones((len(regions), 3)) * -999.0
                time4a = time.time()
            else:
                time3a = time4 = time.time()
                # capture start SSDs and compactness, insert -999 for "improvements"
                ssds = np.vstack((start_ssds, np.ones(start_ssds.shape)*-999,\
                                              np.ones(start_ssds.shape)*-999)).T
                if compactness:
                    compacts = np.vstack((start_compactness, np.ones(start_compactness.shape)*-999,\
                                                             np.ones(start_compactness.shape)*-999)).T
                else:
                    compacts = np.ones((len(regions), 3)) * -999.0
                exit = 'no local improvement'
                print "Did not run local improvement"
                time4a = time.time()

        time_output['base_wrapup'] = time3a - time3
        time_output['local'] = time4 - time3a
        time_output['local_wrapup'] = time4a - time4

        ####################
        # process regionalization results for user output
        ####################

        # setup header for the pandas dataframes (estimates, MOEs, CVs)
        header = []
        if target_est_count is not None:
            if 'pandas' in str(type(target_est_count)):
                header.extend(target_est_count.columns.tolist())
            else:
                header.extend([
                    'count_var' + str(i)
                    for i in range(target_est_count.shape[1])
                ])
        if target_est_prop is not None:
            if 'pandas' in str(type(target_est_prop)):
                header.extend(target_est_count.prop.tolist())
            else:
                header.extend([
                    'prop_var' + str(i)
                    for i in range(target_est_prop.shape[1] / 2)
                ])
        if target_est_ratio is not None:
            if 'pandas' in str(type(target_est_ratio)):
                header.extend(target_est_ratio.columns.tolist())
            else:
                header.extend([
                    'ratio_var' + str(i)
                    for i in range(target_est_ratio.shape[1] / 2)
                ])

        # initialize pandas dataframes (estimates, MOEs, CVs; regions and areas)
        regionID = pd.Index(range(len(regions)), name='regionID')
        ests_region = pd.DataFrame(index=regionID, columns=header)
        moes_region = pd.DataFrame(index=regionID, columns=header)
        cvs_region = pd.DataFrame(index=regionID, columns=header)
        areaID = pd.Index(range(w.n), name='areaID')
        ests_area = pd.DataFrame(index=areaID, columns=header)
        moes_area = pd.DataFrame(index=areaID, columns=header)
        cvs_area = pd.DataFrame(index=areaID, columns=header)

        # setup header and pandas dataframe (count variable, if applicable)
        header = ['count']
        if count_est is not None:
            if 'pandas' in str(type(count_est)):
                header = [count_est.columns[0]]
        counts_region = pd.DataFrame(index=range(len(regions)), columns=header)
        counts_area = pd.DataFrame(index=range(w.n), columns=header)

        # create SSD and compactness dataframes
        if id2region == False:
            # Infeasible base run
            ssds = None
            compacts = None
        else:
            ssds = pd.DataFrame(
                ssds,
                index=regionID,
                columns=['start_ssd', 'end_ssd', 'ssd_improvement'])
            compacts = pd.DataFrame(compacts,
                                    index=regionID,
                                    columns=[
                                        'start_compactness', 'end_compactness',
                                        'compactness_improvement'
                                    ])

        # this one-dimensional list will contain the region IDs (ordered by area)
        ordered_region_ids = np.ones(w.n) * -9999

        for i, region in enumerate(regions):
            if count_est is not None:
                # get region totals for count variable
                counts_region.ix[i] = count_est[region].sum()
                for j in region:
                    counts_area.ix[j] = count_est[j]
            ests = []
            sdes = []
            if target_est_count is not None:
                # est, MOE and CV for count data
                est, sde = UTILS.get_est_sde_count(region, target_parts)
                est[np.isnan(est)] = 0.0  # clean up 0/0 case
                sde[np.isnan(sde)] = 0.0  # clean up 0/0 case
                ests.extend(est)
                sdes.extend(sde)
            if target_est_prop is not None:
                # est, MOE and CV for proportion data
                est, sde = UTILS.get_est_sde_prop(region, target_parts)
                est[np.isnan(est)] = 0.0  # clean up 0/0 case
                sde[np.isnan(sde)] = 0.0  # clean up 0/0 case
                ests.extend(est)
                sdes.extend(sde)
            if target_est_ratio is not None:
                # est, MOE and CV for ratio data
                est, sde = UTILS.get_est_sde_ratio(region, target_parts)
                est[np.isnan(est)] = 0.0  # clean up 0/0 case
                sde[np.isnan(sde)] = 0.0  # clean up 0/0 case
                ests.extend(est)
                sdes.extend(sde)
            ests_region, moes_region, cvs_region = wrapup_region(\
                                i, ests, sdes, target_parts,
                                ests_region, moes_region, cvs_region)
            ests_area, moes_area, cvs_area = wrapup_areas(\
                                region, target_parts,
                                ests_area, moes_area, cvs_area)
            ordered_region_ids[region] = i
        # set excluded areas to region ID -999
        ordered_region_ids[exclude] = -999
        time5 = time.time()
        time_output['wrapup'] = time5 - time4
        time_output['total'] = time5 - time1

        self.exit = exit
        self.time = time_output
        self.enclaves = enclaves
        self.p = len(regions)
        self.regions = regions
        self.region_ids = ordered_region_ids.tolist()
        self.ssds = ssds
        self.compactness = compacts
        self.ests_region = ests_region
        self.moes_region = moes_region
        self.cvs_region = cvs_region
        self.ests_area = ests_area
        self.moes_area = moes_area
        self.cvs_area = cvs_area
        self.counts_region = counts_region
        self.counts_area = counts_area
        self.problem_ids = problem_ids
Beispiel #34
0
def ml_error(y, X, w, precrit=0.0000001, verbose=False, method='full'):
    """
    Maximum likelihood of spatial error model

    Parameters
    ----------
    y: dependent variable (nx1 array)

    w: spatial weights object

    X: explanatory variables (nxk array)

    precrit: convergence criterion

    verbose: boolen to print iterations in estimation

    method: method to use for evaluating jacobian term in  concentrated likelihood function
    (FULL|ORD) where FULL=Brute Force, ORD = eigenvalue based jacobian

    Returns
    -------

    Results: dictionary with estimates, standard errors, vcv, and z-values

    """
    n = w.n
    n,k = X.shape
    yy = (y**2).sum()
    yl = ps.lag_spatial(w, y)
    ylyl = (yl**2).sum()
    Xy = np.dot(X.T,y)
    Xl = ps.lag_spatial(w, X)
    Xly = np.dot(Xl.T,y) + np.dot(X.T, yl)
    Xlyl = np.dot(Xl.T, yl)
    XX = np.dot(X.T, X)
    XlX = np.dot(Xl.T,X) + np.dot(X.T, Xl)
    XlXl = np.dot(Xl.T, Xl)
    yly = np.dot(yl.T, y)
    yyl = np.dot(y.T, yl)
    ylyl = np.dot(yl.T, yl)


    lam = 0
    dlik, b, sig2, tr, dd = defer(w, lam, yy, yyl, ylyl, Xy, Xly, Xlyl, XX, XlX,
            XlXl)

    roots = np.linalg.eigvals(w.full()[0])
    maxroot = 1./roots.max()
    minroot = 1./roots.min()
    delta = 0.0001



    if dlik > 0:
        ll = lam
        ul = maxroot - delta
    else:
        ul = lam
        ll = minroot + delta

    # bisection
    t = 10

    lam0 = (ll + ul) /2.
    i = 0

    if verbose:
        line ="\nMaximum Likelihood Estimation of Spatial error Model"
        print line
        line ="%-5s\t%12s\t%12s\t%12s\t%12s"%("Iter.","LL","LAMBDA","UL","dlik")
        print line

    while abs(t - lam0)  > precrit:
        if verbose:
            print "%d\t%12.8f\t%12.8f\t%12.8f\t%12.8f"  % (i,ll, lam0, ul, dlik)
        i += 1

        dlik, b, sig2, tr, dd = defer(w, lam0, yy, yyl, ylyl, Xy, Xly, Xlyl,
                XX, XlX, XlXl)
        if dlik > 0:
            ll = lam0
        else:
            ul = lam0
        t = lam0
        lam0 = (ul + ll)/ 2.


    ldet = _logJacobian(w, lam0, method)
    llik = log_lik_error(ldet, w, b, lam0, X, y, sig2)

    # Info Matrix 
    # l = lambda
    # B = betas
    # s = sigma2

    # Vl  ClB Cls
    # CBl VB  CBs
    # Csl CsB Vs

    # Vll
    n,k = X.shape
    W = w.full()[0]
    B_inv = np.linalg.inv(np.eye(n) - lam0 * W)
    WB = np.dot(W, B_inv)
    trWB = np.trace(WB)
    Vl = trWB**2 + np.trace(np.dot(WB.T, WB))

    # Cls
    Cls = trWB / sig2

    # Vs
    Vs = n / (2.0 * sig2 * sig2)

    # VB
    XL = X - lam0 * ps.lag_spatial(w, X)
    VB = sig2 * np.linalg.inv(np.dot(XL.T, XL))
    
    #Variance for l and s is inverse of 2x2 information matrix
    Infols = np.zeros((2,2))
    Infols[0,0] = Vl
    Infols[1,0] = Cls
    Infols[0,1] = Cls
    Infols[1,1] = Vs
    Varls = np.linalg.inv(Infols)
    


    results = {}
    results['betas'] = b
    results['lambda'] = lam0
    results['llik'] = llik
    results['sig2'] = sig2
    results['std.error_B'] = np.sqrt(np.diag(VB))
    results['std.error_l'] = np.sqrt(Varls[0,0])
    results['method'] = method
    
    return results
Beispiel #35
0
    def __init__(self, y, x, w, method='full', epsilon=0.0000001):
        # set up main regression variables and spatial filters
        self.y = y
        self.x = x
        self.n, self.k = self.x.shape
        self.method = method
        self.epsilon = epsilon
        #W = w.full()[0]
        #Wsp = w.sparse
        ylag = ps.lag_spatial(w, y)
        # b0, b1, e0 and e1
        xtx = spdot(self.x.T, self.x)
        xtxi = la.inv(xtx)
        xty = spdot(self.x.T, self.y)
        xtyl = spdot(self.x.T, ylag)
        b0 = np.dot(xtxi, xty)
        b1 = np.dot(xtxi, xtyl)
        e0 = self.y - spdot(x, b0)
        e1 = ylag - spdot(x, b1)
        methodML = method.upper()
        # call minimizer using concentrated log-likelihood to get rho
        if methodML in ['FULL', 'LU', 'ORD']:
            if methodML == 'FULL':
                W = w.full()[0]     # moved here
                res = minimize_scalar(lag_c_loglik, 0.0, bounds=(-1.0, 1.0),
                                      args=(
                                          self.n, e0, e1, W), method='bounded',
                                      tol=epsilon)
            elif methodML == 'LU':
                I = sp.identity(w.n)
                Wsp = w.sparse  # moved here
                res = minimize_scalar(lag_c_loglik_sp, 0.0, bounds=(-1.0,1.0),
                                      args=(self.n, e0, e1, I, Wsp),
                                      method='bounded', tol=epsilon)
            elif methodML == 'ORD':
                # check on symmetry structure
                if w.asymmetry(intrinsic=False) == []:
                    ww = symmetrize(w)
                    WW = ww.todense()
                    evals = la.eigvalsh(WW)
                else:
                    W = w.full()[0]     # moved here
                    evals = la.eigvals(W)
                res = minimize_scalar(lag_c_loglik_ord, 0.0, bounds=(-1.0, 1.0),
                                      args=(
                                          self.n, e0, e1, evals), method='bounded',
                                      tol=epsilon)
        else:
            # program will crash, need to catch
            print("{0} is an unsupported method".format(methodML))
            self = None
            return

        self.rho = res.x[0][0]

        # compute full log-likelihood, including constants
        ln2pi = np.log(2.0 * np.pi)
        llik = -res.fun - self.n / 2.0 * ln2pi - self.n / 2.0
        self.logll = llik[0][0]

        # b, residuals and predicted values

        b = b0 - self.rho * b1
        self.betas = np.vstack((b, self.rho))   # rho added as last coefficient
        self.u = e0 - self.rho * e1
        self.predy = self.y - self.u

        xb = spdot(x, b)

        self.predy_e = inverse_prod(
            w.sparse, xb, self.rho, inv_method="power_exp", threshold=epsilon)
        self.e_pred = self.y - self.predy_e

        # residual variance
        self.sig2 = self.sig2n  # no allowance for division by n-k

        # information matrix
        a = -self.rho * W
        np.fill_diagonal(a, 1.0)
        ai = la.inv(a)
        wai = np.dot(W, ai)
        tr1 = np.trace(wai)

        wai2 = np.dot(wai, wai)
        tr2 = np.trace(wai2)

        waiTwai = np.dot(wai.T, wai)
        tr3 = np.trace(waiTwai)

        wpredy = ps.lag_spatial(w, self.predy_e)
        wpyTwpy = np.dot(wpredy.T, wpredy)
        xTwpy = spdot(x.T, wpredy)

        # order of variables is beta, rho, sigma2

        v1 = np.vstack(
            (xtx / self.sig2, xTwpy.T / self.sig2, np.zeros((1, self.k))))
        v2 = np.vstack(
            (xTwpy / self.sig2, tr2 + tr3 + wpyTwpy / self.sig2, tr1 / self.sig2))
        v3 = np.vstack(
            (np.zeros((self.k, 1)), tr1 / self.sig2, self.n / (2.0 * self.sig2 ** 2)))

        v = np.hstack((v1, v2, v3))

        self.vm1 = la.inv(v)  # vm1 includes variance for sigma2
        self.vm = self.vm1[:-1, :-1]  # vm is for coefficients only
Beispiel #36
0
 '"Far West 3/"']
snames=[name for name in names if name not in out]
sids=[names.index(name) for name in snames]
states=data[sids,:]
us=data[0]

from pylab import *

years=np.arange(1969,2009)
rel=states/(us*1.)

gal=pysal.open('states48.gal')
w=gal.read()
rt=rel.transpose()
w.transform='r'
wrel=pysal.lag_spatial(w,rel)

y1=rel[:,0]
wy1=wrel[:,0]
y2=rel[:,-1]
wy2=wrel[:,-1]


minx,miny=rel.min(),rel.min()
maxx,maxy=rel.max(),rel.max()
import matplotlib.pyplot as plt
import matplotlib.patches as mpp
fig=plt.figure()

dx=y2-y1
dy=wy2-wy1
Beispiel #37
0
    ## build weight
    weights = pu.get_weight(query_result, w_type)
    weights.transform = "r"

    ## prep time data
    t_data = get_time_data(query_result, time_cols)

    plpy.debug("shape of t_data %d, %d" % t_data.shape)
    plpy.debug("number of weight objects: %d, %d" % (weights.sparse).shape)
    plpy.debug("first num elements: %f" % t_data[0, 0])

    sp_markov_result = ps.Spatial_Markov(t_data, weights, k=num_classes, fixed=False, permutations=permutations)

    ## get lag classes
    lag_classes = ps.Quantiles(ps.lag_spatial(weights, t_data[:, -1]), k=num_classes).yb

    ## look up probablity distribution for each unit according to class and lag class
    prob_dist = get_prob_dist(sp_markov_result.P, lag_classes, sp_markov_result.classes[:, -1])

    ## find the ups and down and overall distribution of each cell
    trend_up, trend_down, trend, volatility = get_prob_stats(prob_dist, sp_markov_result.classes[:, -1])

    ## output the results
    return zip(trend, trend_up, trend_down, volatility, weights.id_order)


def get_time_data(markov_data, time_cols):
    """
        Extract the time columns and bin appropriately
    """
Beispiel #38
0
# I didn't understand QURBRURX
db1['POPDENS'] = db.ACS12_5yr_B01003001 / (db.SE_T02A_002 * 1.)

# if no home value, assign the spatial lag of the estimate and SE
homeval = db1['MHSEVAL_ALT'].copy()
homeval_se = db.ACS12_5yr_B25077001s.copy()
dbf = ps.open(os.path.join(spath, 'USA_Counties_500k.dbf'))

# Rename dbf GEOIDs to match homeval
geoid = dbf.by_col('geoFIPS')

shp_fips = pd.DataFrame(dbf.by_col('geoFIPS'), index=geoid)
shp_fips = shp_fips.join(homeval)
shp_fips = shp_fips.join(homeval_se)
shp_fips['MHSEVAL_ALT_LAG'] = ps.lag_spatial(w, shp_fips.MHSEVAL_ALT)
shp_fips['MHSEVAL_ALT_LAG_SE'] = ps.lag_spatial(w, shp_fips.ACS12_5yr_B25077001s)

mh = shp_fips.ix[shp_fips.MHSEVAL_ALT_LAG == 0].MHSEVAL_ALT.tolist()

# Reassign values to MHSEVAL_ALT_LAG
shp_fips.ix[shp_fips.MHSEVAL_ALT_LAG == 0, 'MHSEVAL_ALT_LAG'] = mh

# Reassign missing standard error values
mhs = shp_fips.ix[shp_fips.MHSEVAL_ALT_LAG_SE == 0].ACS12_5yr_B25077001s.tolist()
shp_fips.ix[shp_fips.MHSEVAL_ALT_LAG_SE == 0, 'MHSEVAL_ALT_LAG_SE'] = mhs

# Get rid of nan values - reassign MHSEVAL_ALT(_SE)
shp_fips.MHSEVAL_ALT_LAG[np.isnan(shp_fips.MHSEVAL_ALT_LAG)] = \
    shp_fips.MHSEVAL_ALT[np.isnan(shp_fips.MHSEVAL_ALT_LAG)]  # replace NA with lag
shp_fips.MHSEVAL_ALT_LAG_SE[np.isnan(shp_fips.MHSEVAL_ALT_LAG_SE)] = \
for s in finalSet:
    XVarsdummy.append(s)
    lst[s] = lst[s].astype(float)

XVars = ['median_income', 'LivingArea', 'Age', 'num_trees']
yxs = lst.loc[:, XVars + [YVar]].dropna()
yxs_dummy = lst.loc[:, XVarsdummy + [YVar]].dropna()
y = lst[YVar]

w = pysal.knnW_from_array(lst.loc[\
                               yxs.index, \
                              ['centroid_long', 'centroid_lat']\
                              ].values, k=30)
w.transform = 'R'

yxs = yxs.assign(w_res=pysal.lag_spatial(w, yxs_dummy['residential'].values))
"""
yxs = yxs.assign(w_mixed=pysal.lag_spatial(w, yxs_dummy['mixed'].values))
yxs = yxs.assign(w_retail=pysal.lag_spatial(w, yxs_dummy['retail'].values))
yxs = yxs.assign(w_apt=pysal.lag_spatial(w, yxs_dummy['apt'].values))
yxs = yxs.assign(w_industrial=pysal.lag_spatial(w, yxs_dummy['industrial'].values))
yxs = yxs.assign(w_office=pysal.lag_spatial(w, yxs_dummy['office'].values))
yxs = yxs.assign(w_school=pysal.lag_spatial(w, yxs_dummy['school'].values))
yxs = yxs.assign(w_auto_shop=pysal.lag_spatial(w, yxs_dummy['auto_shop'].values))
yxs = yxs.assign(w_religious=pysal.lag_spatial(w, yxs_dummy['religious'].values))
yxs = yxs.assign(w_food=pysal.lag_spatial(w, yxs_dummy['food'].values))
yxs = yxs.assign(w_charitable=pysal.lag_spatial(w, yxs_dummy['charitable'].values))
yxs = yxs.assign(w_gov=pysal.lag_spatial(w, yxs_dummy['gov'].values))
yxs = yxs.assign(w_medical=pysal.lag_spatial(w, yxs_dummy['medical'].values))
yxs = yxs.assign(w_gas_mart=pysal.lag_spatial(w, yxs_dummy['gas_mart'].values))
"""
Beispiel #40
0
def ml_lag(y, X, w,  precrit=0.0000001, verbose=False, method='full'):
    """
    Maximum likelihood estimation of spatial lag model

    Parameters
    ----------

    y: dependent variable (nx1 array)

    w: spatial weights object

    X: explanatory variables (nxk array)

    precrit: convergence criterion

    verbose: boolen to print iterations in estimation

    method: method to use for evaluating jacobian term in  concentrated likelihood function
    (FULL|ORD) where FULL=Brute Force, ORD = eigenvalue based jacobian

    Returns
    -------

    Results: dictionary with estimates, standard errors, vcv, and z-values
    """

    # step 1 OLS of X on y yields b1
    d = np.linalg.inv(np.dot(X.T, X))
    b1 = np.dot(d, np.dot(X.T, y))

    # step 2 OLS of X on Wy: yields b2
    wy = ps.lag_spatial(w,y)
    b2 = np.dot(d, np.dot(X.T, wy))

    # step 3 compute residuals e1, e2
    e1 = y - np.dot(X,b1)
    e2 = wy - np.dot(X,b2)

    # step 4 given e1, e2 find rho that maximizes Lc

    # ols estimate of rho
    XA = np.hstack((wy,X))
    bols = np.dot(np.linalg.inv(np.dot(XA.T, XA)), np.dot(XA.T,y))
    rols = bols[0][0]

    while np.abs(rols) > 1.0:
        rols = rols/2.0

    if rols > 0.0:
        r1 = rols
        r2 = r1 / 5.0
    else:
        r2 = rols
        r1 = r2 / 5.0

    df1 = 0
    df2 = 0
    tr = 0
    df1, tr = defl_lag(r1, w, e1, e2)
    df2, tr = defl_lag(r2, w, e1, e2)

    if df1*df2 <= 0:
        ll = r2
        ul = r1
    elif df1 >= 0.0 and df1 >= df2:
        ll = -0.999
        ul = r2
        df1 = df2
        df2 = -(10.0**10)
    elif df1 >= 0.0 and df1 < df2:
        ll = r1
        ul = 0.999
        df2 = df1
        df1 = -(10.0**10)
    elif df1 < 0.0 and df1 >= df2:
        ul = 0.999
        ll = r1
        df2 = df1
        df1 = 10.0**10
    else:
        ul = r2
        ll = -0.999
        df1 = df2
        df2 = 10.0**10

    # main bisection iteration

    err = 10
    t = rols
    ro = (ll+ul) / 2.
    if verbose:
        line ="\nMaximum Likelihood Estimation of Spatial lag Model"
        print line
        line ="%-5s\t%12s\t%12s\t%12s\t%12s"%("Iter.","LL","RHO","UL","DFR")
        print line

    i = 0
    while err > precrit:
        if verbose:
            print "%d\t%12.8f\t%12.8f\t%12.8f\t%12.8f"  % (i,ll, ro, ul, df1)
        dfr, tr = defl_lag(ro, w, e1, e2)
        if dfr*df1 < 0.0:
            ll = ro
        else:
            ul = ro
            df1 = dfr
        err = np.abs(t-ro)
        t = ro
        ro =(ul+ll)/2.
        i += 1
    ro = t
    tr1 = tr
    bml = b1 - (ro * b2)
    b = [ro,bml]

    xb = np.dot(X, bml)
    eml = y - ro * wy - xb
    sig2 = (eml**2).sum() / w.n
    
    # Likelihood evaluation
    ldet = _logJacobian(w, ro, method)
    llik = log_lik_lag(ldet, w, b, X, y)

    # Information matrix 
    # Ipp IpB  Ips
    # IBp IBB  IBs
    # Isp IsB  Iss

    # Ipp
    n,k = X.shape
    W = w.full()[0]
    A_inv = np.linalg.inv(np.eye(w.n) - ro * W)
    WA = np.dot(W, A_inv)
    tr1 = np.trace(WA)
    tr1 = tr1**2
    tr2 = np.trace(np.dot(WA.T, WA))
    WAXB = np.dot(WA,xb)
    Ipp = tr1 + tr2 + np.dot(WAXB.T, WAXB)/sig2

    I = np.eye(w.n)
    IpB = np.dot(X.T, WAXB).T / sig2
    Ips = np.trace(WA) / sig2

    IBp = IpB.T
    IBB = np.dot(X.T,X) / sig2

    Isp = Ips
    Iss = n / (2 * sig2 * sig2)

    results = {}
    results['betas'] = bml
    results['rho'] = ro
    results['llik'] = llik
    results['sig2'] = sig2
    dim = k + 2 
    Info = np.zeros((dim,dim))
    Info[0,0] = Ipp
    Info[0,1:k+1] = IpB
    Info[0,k+1] = Ips
    Info[1:k+1,0 ] = IpB
    Info[1:k+1, 1:k+1] = IBB
    Info[k+1,0 ] = Ips
    Info[k+1, k+1] = Iss
    VCV = np.linalg.inv(Info)
    se_b = np.sqrt(np.diag(VCV)[1:k+1])
    se_b.shape = (k,1)
    z_b = bml/se_b
    se_rho = np.sqrt(VCV[0,0])
    z_rho = ro / se_rho
    se_sig2 = np.sqrt(VCV[k+1,k+1])
    results['se_b'] = se_b
    results['z_b'] = z_b
    results['se_rho'] = se_rho
    results['z_rho'] = z_rho
    results['se_sig2'] = se_sig2
    results['VCV'] = VCV
    results['method'] = method
    return results
Beispiel #41
0
def robust_vm(reg, gwk=None, sig2n_k=False):
    """
    Robust estimation of the variance-covariance matrix. Estimated by White (default) or HAC (if wk is provided). 

    Parameters
    ----------

    reg             : Regression object (OLS or TSLS)
                      output instance from a regression model

    gwk             : PySAL weights object
                      Optional. Spatial weights based on kernel functions
                      If provided, returns the HAC variance estimation
    sig2n_k         : boolean
                      If True, then use n-k to rescale the vc matrix.
                      If False, use n. (White only)

    Returns
    --------

    psi             : kxk array
                      Robust estimation of the variance-covariance

    Examples
    --------

    >>> import numpy as np
    >>> import pysal
    >>> from ols import OLS
    >>> from twosls import TSLS
    >>> db=pysal.open(pysal.examples.get_path("NAT.dbf"),"r")
    >>> y = np.array(db.by_col("HR90"))
    >>> y = np.reshape(y, (y.shape[0],1))
    >>> X = []
    >>> X.append(db.by_col("RD90"))
    >>> X.append(db.by_col("DV90"))
    >>> X = np.array(X).T                       

    Example with OLS with unadjusted standard errors

    >>> ols = OLS(y,X)
    >>> ols.vm
    array([[ 0.17004545,  0.00226532, -0.02243898],
           [ 0.00226532,  0.00941319, -0.00031638],
           [-0.02243898, -0.00031638,  0.00313386]])

    Example with OLS and White

    >>> ols = OLS(y,X, robust='white')
    >>> ols.vm
    array([[ 0.24515481,  0.01093322, -0.03441966],
           [ 0.01093322,  0.01798616, -0.00071414],
           [-0.03441966, -0.00071414,  0.0050153 ]])

    Example with OLS and HAC

    >>> wk = pysal.kernelW_from_shapefile(pysal.examples.get_path('NAT.shp'),k=15,function='triangular', fixed=False)
    >>> wk.transform = 'o'
    >>> ols = OLS(y,X, robust='hac', gwk=wk)
    >>> ols.vm
    array([[ 0.29213532,  0.01670361, -0.03948199],
           [ 0.01655557,  0.02295829, -0.00116874],
           [-0.03941483, -0.00119077,  0.00568314]])

    Example with 2SLS and White

    >>> yd = []
    >>> yd.append(db.by_col("UE90"))
    >>> yd = np.array(yd).T
    >>> q = []
    >>> q.append(db.by_col("UE80"))
    >>> q = np.array(q).T
    >>> tsls = TSLS(y, X, yd, q=q, robust='white')
    >>> tsls.vm
    array([[ 0.29569954,  0.04119843, -0.02496858, -0.01640185],
           [ 0.04119843,  0.03647762,  0.004702  , -0.00987345],
           [-0.02496858,  0.004702  ,  0.00648262, -0.00292891],
           [-0.01640185, -0.00987345, -0.00292891,  0.0053322 ]])

    Example with 2SLS and HAC

    >>> tsls = TSLS(y, X, yd, q=q, robust='hac', gwk=wk)
    >>> tsls.vm
    array([[ 0.41985329,  0.06823119, -0.02883889, -0.02788116],
           [ 0.06867042,  0.04887508,  0.00497443, -0.01367746],
           [-0.02856454,  0.00501402,  0.0072195 , -0.00321604],
           [-0.02810131, -0.01364908, -0.00318197,  0.00713251]])

    """
    if hasattr(reg, 'h'):  # If reg has H, do 2SLS estimator. OLS otherwise.
        tsls = True
        xu = spbroadcast(reg.h, reg.u)
    else:
        tsls = False
        xu = spbroadcast(reg.x, reg.u)

    if gwk:  # If gwk do HAC. White otherwise.
        gwkxu = lag_spatial(gwk, xu)
        psi0 = spdot(xu.T, gwkxu)
    else:
        psi0 = spdot(xu.T, xu)
        if sig2n_k:
            psi0 = psi0 * (1. * reg.n / (reg.n - reg.k))
    if tsls:
        psi1 = spdot(reg.varb, reg.zthhthi)
        psi = spdot(psi1, np.dot(psi0, psi1.T))
    else:
        psi = spdot(reg.xtxi, np.dot(psi0, reg.xtxi))

    return psi
Beispiel #42
0
    ...

    Arguments
    ---------

    var	         	: array
                      values of variable
    w       	    : array
                      values of spatial weight
    '''
	
    self.var = var
    self.w = w

	w.transform = 'r'
	slag = ps.lag_spatial(w, var)

	zx   = (var - var.mean())/var.std()
	zy  = (slag - slag.mean())/slag.std()

	fit = ps.spreg.OLS(zx[:, None], zy[:,None])

	## Customize plot
	fig1 = plt.figure(figsize=custom)
	plt.xlabel(xlabel, fontsize=20)
	plt.ylabel(ylabel, fontsize=20)
	plt.suptitle(title, fontsize=30)

	plt.scatter(zx, zy, s=60, color='k', alpha=.6)
	plot(zy, fit.predy, color='r')
# In[ ]:

# Now we would like to standardize all the weights. This can be 
# done by specifying 'R' as the matrix transformation.
w_knn3.transform = 'R'
w_knn5.transform = 'R'
w_knn9.transform = 'R'


# In[ ]:

# and then compute the spatial lag for all neighborhoods based
# on the spatial weight matrix. We also store this as a column
# named 'w_percent_knn3' in the original table.
sl = ps.lag_spatial(w_knn3, Y)
data['w_percent_knn3'] = sl


# In[ ]:

data.head()


# In[ ]:

#calculate moran's i 
moran = ps.Moran(Y, w_knn3)


# In[ ]:
Beispiel #44
0
def rose(Y, w, k=8, permutations=0):
    """
    Calculation of rose diagram for local indicators of spatial association

    Parameters
    ----------

    Y: array (n,2)
       variable observed on n spatial units over 2 time periods

    w: spatial weights object

    k: int
       number of circular sectors in rose diagram

    permutations: int
       number of random spatial permutations for calculation of pseudo
       p-values

    Returns
    -------

    results: dictionary (keys defined below)

    counts:  array (k,1)
        number of vectors with angular movement falling in each sector

    cuts: array (k,1)
        intervals defining circular sectors (in radians)

    random_counts: array (permutations,k)
        counts from random permutations

    pvalues: array (kx1)
        one sided (upper tail) pvalues for observed counts

    Notes
    -----
    Based on Rey, Murray, and Anselin (2011) [1]_

    Examples
    --------

    Constructing data for illustration of directional LISA analytics.
    Data is for the 48 lower US states over the period 1969-2009 and
    includes per capita income normalized to the national average. 

    Load comma delimited data file in and convert to a numpy array

    >>> f=open(pysal.examples.get_path("spi_download.csv"),'r')
    >>> lines=f.readlines()
    >>> f.close()
    >>> lines=[line.strip().split(",") for line in lines]
    >>> names=[line[2] for line in lines[1:-5]]
    >>> data=np.array([map(int,line[3:]) for line in lines[1:-5]])

    Bottom of the file has regional data which we don't need for this example
    so we will subset only those records that match a state name

    >>> sids=range(60)
    >>> out=['"United States 3/"',
    ...      '"Alaska 3/"',
    ...      '"District of Columbia"',
    ...      '"Hawaii 3/"',
    ...      '"New England"',
    ...      '"Mideast"',
    ...      '"Great Lakes"',
    ...      '"Plains"',
    ...      '"Southeast"',
    ...      '"Southwest"',
    ...      '"Rocky Mountain"',
    ...      '"Far West 3/"']
    >>> snames=[name for name in names if name not in out]
    >>> sids=[names.index(name) for name in snames]
    >>> states=data[sids,:]
    >>> us=data[0]
    >>> years=np.arange(1969,2009)

    Now we convert state incomes to express them relative to the national
    average

    >>> rel=states/(us*1.)

    Create our contiguity matrix from an external GAL file and row standardize
    the resulting weights

    >>> gal=pysal.open(pysal.examples.get_path('states48.gal'))
    >>> w=gal.read()
    >>> w.transform='r'

    Take the first and last year of our income data as the interval to do the
    directional directional analysis

    >>> Y=rel[:,[0,-1]]

    Set the random seed generator which is used in the permutation based
    inference for the rose diagram so that we can replicate our example
    results

    >>> np.random.seed(100)

    Call the rose function to construct the directional histogram for the
    dynamic LISA statistics. We will use four circular sectors for our
    histogram

    >>> r4=rose(Y,w,k=4,permutations=999)

    What are the cut-offs for our histogram - in radians

    >>> r4['cuts']
    array([ 0.        ,  1.57079633,  3.14159265,  4.71238898,  6.28318531])

    How many vectors fell in each sector

    >>> r4['counts']
    array([32,  5,  9,  2])

    What are the pseudo-pvalues for these counts based on 999 random spatial
    permutations of the state income data

    >>> r4['pvalues']
    array([ 0.02 ,  0.001,  0.001,  0.001])

    Repeat the exercise but now for 8 rather than 4 sectors

    >>> r8=rose(Y,w,permutations=999)
    >>> r8['counts']
    array([19, 13,  3,  2,  7,  2,  1,  1])
    >>> r8['pvalues']
    array([ 0.445,  0.042,  0.079,  0.003,  0.005,  0.1  ,  0.269,  0.002])

    References
    ----------

    .. [1] Rey, S.J., A.T. Murray and L. Anselin. 2011. "Visualizing
        regional income distribution dynamics." Letters in Spatial and Resource Sciences, 4: 81-90.

    """
    results = {}
    sw = 2 * np.pi / k
    cuts = np.arange(0.0, 2 * np.pi + sw, sw)
    wY = pysal.lag_spatial(w, Y)
    dx = Y[:, -1] - Y[:, 0]
    dy = wY[:, -1] - wY[:, 0]
    theta = np.arctan2(dy, dx)
    neg = theta < 0.0
    utheta = theta * (1 - neg) + neg * (2 * np.pi + theta)
    counts, bins = np.histogram(utheta, cuts)
    results['counts'] = counts
    results['cuts'] = cuts
    if permutations:
        n, k1 = Y.shape
        ids = np.arange(n)
        all_counts = np.zeros((permutations, k))
        for i in range(permutations):
            rid = np.random.permutation(ids)
            YR = Y[rid, :]
            wYR = pysal.lag_spatial(w, YR)
            dx = YR[:, -1] - YR[:, 0]
            dy = wYR[:, -1] - wYR[:, 0]
            theta = np.arctan2(dy, dx)
            neg = theta < 0.0
            utheta = theta * (1 - neg) + neg * (2 * np.pi + theta)
            c, b = np.histogram(utheta, cuts)
            c.shape = (1, k)
            all_counts[i, :] = c
        larger = sum(all_counts >= counts)
        p_l = permutations - larger
        extreme = (p_l) < larger
        extreme = np.where(extreme, p_l, larger)
        p = (extreme + 1.) / (permutations + 1.)
        results['pvalues'] = p
        results['random_counts'] = all_counts

    return results
Beispiel #45
0
    def __init__(self, y, w, permutations=0,
                 significance_level=0.05):
        y = y.transpose()
        pml = pysal.Moran_Local

        #################################################################
        # have to optimize conditional spatial permutations over a
        # time series - this is a place holder for the foreclosure paper
        ml = [pml(yi, w, permutations=permutations) for yi in y]
        #################################################################

        q = np.array([mli.q for mli in ml]).transpose()
        classes = np.arange(1, 5)  # no guarantee all 4 quadrants are visited
        Markov.__init__(self, q, classes)
        self.q = q
        self.w = w
        n, k = q.shape
        k -= 1
        self.significance_level = significance_level
        move_types = np.zeros((n, k), int)
        sm = np.zeros((n, k), int)
        self.significance_level = significance_level
        if permutations > 0:
            p = np.array([mli.p_z_sim for mli in ml]).transpose()
            self.p_values = p
            pb = p <= significance_level
        else:
            pb = np.zeros_like(y.T)
        for t in range(k):
            origin = q[:, t]
            dest = q[:, t + 1]
            p_origin = pb[:, t]
            p_dest = pb[:, t]
            for r in range(n):
                move_types[r, t] = TT[origin[r], dest[r]]
                key = (origin[r], dest[r], p_origin[r], p_dest[r])
                sm[r, t] = MOVE_TYPES[key]
        if permutations > 0:
            self.significant_moves = sm
        self.move_types = move_types

        # null of own and lag moves being independent

        ybar = y.mean(axis=0)
        r = y / ybar
        ylag = np.array([pysal.lag_spatial(w, yt) for yt in y])
        rlag = ylag / ybar
        rc = r < 1.
        rlagc = rlag < 1.
        markov_y = pysal.Markov(rc)
        markov_ylag = pysal.Markov(rlagc)
        A = np.matrix([[1, 0, 0, 0],
                       [0, 0, 1, 0],
                       [0, 0, 0, 1],
                       [0, 1, 0, 0]])

        kp = A * np.kron(markov_y.p, markov_ylag.p) * A.T
        trans = self.transitions.sum(axis=1)
        t1 = np.diag(trans) * kp
        t2 = self.transitions
        t1 = t1.getA()
        self.chi_2 = pysal.spatial_dynamics.markov.chi2(t1, t2)
        self.expected_t = t1
        self.permutations = permutations
    plt.savefig(figName, bbox_inches='tight')
    plt.show()

    plt.figure(10)
    #ax8 = plt.subplot(212)
    sns.kdeplot(mi.sim, shade=True)
    plt.vlines(mi.sim, 0, 1)
    plt.vlines(mi.EI + .01, 0, 40, 'r')
    plt.suptitle(filename)
    figName = "lasso\\" + filename + "_LassoMoranStatistics2.png"
    plt.savefig(figName, bbox_inches='tight')
    plt.show()

    #Moran scatterplot with statistically significant LISA values highlighted.
    #spatial lags
    Lag_response = pysal.lag_spatial(w, response)

    #plot the statistically-significant LISA values in a different color than the others
    #find all of the statistically significant LISAs. Since the p-values are in the same
    #order as the I_i statistics, we can do this in the following way
    plt.figure(11)
    sigs = response[lm.p_sim <= .001]
    W_sigs = Lag_response[lm.p_sim <= .001]
    insigs = response[lm.p_sim > .001]
    W_insigs = Lag_response[lm.p_sim > .001]

    b, a = np.polyfit(response, Lag_response, 1)

    #plot the statistically significant points in a dark red color.
    plt.plot(sigs, W_sigs, '.', color='firebrick')
    plt.plot(insigs, W_insigs, '.k', alpha=.2)
Beispiel #47
0
    def __init__(self, y, x, w, method='full', epsilon=0.0000001, regimes_att=None):
        # set up main regression variables and spatial filters
        self.y = y
        if regimes_att:
            self.x = x.toarray()
        else:
            self.x = x
        self.n, self.k = self.x.shape
        self.method = method
        self.epsilon = epsilon

        #W = w.full()[0] #wait to build pending what is needed
        #Wsp = w.sparse

        ylag = ps.lag_spatial(w, self.y)
        xlag = self.get_x_lag(w, regimes_att)

        # call minimizer using concentrated log-likelihood to get lambda
        methodML = method.upper()
        if methodML in ['FULL', 'LU', 'ORD']:
            if methodML == 'FULL':  
                W = w.full()[0]      # need dense here
                res = minimize_scalar(err_c_loglik, 0.0, bounds=(-1.0, 1.0),
                                      args=(self.n, self.y, ylag, self.x,
                                            xlag, W), method='bounded',
                                      tol=epsilon)
            elif methodML == 'LU':
                I = sp.identity(w.n)
                Wsp = w.sparse   # need sparse here
                res = minimize_scalar(err_c_loglik_sp, 0.0, bounds=(-1.0,1.0),
                                      args=(self.n, self.y, ylag, 
                                            self.x, xlag, I, Wsp),
                                      method='bounded', tol=epsilon)
            elif methodML == 'ORD':
                # check on symmetry structure
                if w.asymmetry(intrinsic=False) == []:
                    ww = symmetrize(w)
                    WW = ww.todense()
                    evals = la.eigvalsh(WW)
                else:
                    W = w.full()[0]      # need dense here
                    evals = la.eigvals(W)
                res = minimize_scalar(
                    err_c_loglik_ord, 0.0, bounds=(-1.0, 1.0),
                    args=(self.n, self.y, ylag, self.x,
                          xlag, evals), method='bounded',
                    tol=epsilon)
        else:
            raise Exception, "{0} is an unsupported method".format(method)

        self.lam = res.x

        # compute full log-likelihood, including constants
        ln2pi = np.log(2.0 * np.pi)
        llik = -res.fun - self.n / 2.0 * ln2pi - self.n / 2.0

        self.logll = llik

        # b, residuals and predicted values

        ys = self.y - self.lam * ylag
        xs = self.x - self.lam * xlag
        xsxs = np.dot(xs.T, xs)
        xsxsi = np.linalg.inv(xsxs)
        xsys = np.dot(xs.T, ys)
        b = np.dot(xsxsi, xsys)

        self.betas = np.vstack((b, self.lam))

        self.u = y - np.dot(self.x, b)
        self.predy = self.y - self.u

        # residual variance

        self.e_filtered = self.u - self.lam * ps.lag_spatial(w, self.u)
        self.sig2 = np.dot(self.e_filtered.T, self.e_filtered) / self.n

        # variance-covariance matrix betas

        varb = self.sig2 * xsxsi

        # variance-covariance matrix lambda, sigma

        a = -self.lam * W
        np.fill_diagonal(a, 1.0)
        ai = la.inv(a)
        wai = np.dot(W, ai)
        tr1 = np.trace(wai)

        wai2 = np.dot(wai, wai)
        tr2 = np.trace(wai2)

        waiTwai = np.dot(wai.T, wai)
        tr3 = np.trace(waiTwai)

        v1 = np.vstack((tr2 + tr3,
                        tr1 / self.sig2))
        v2 = np.vstack((tr1 / self.sig2,
                        self.n / (2.0 * self.sig2 ** 2)))

        v = np.hstack((v1, v2))

        self.vm1 = np.linalg.inv(v)

        # create variance matrix for beta, lambda
        vv = np.hstack((varb, np.zeros((self.k, 1))))
        vv1 = np.hstack(
            (np.zeros((1, self.k)), self.vm1[0, 0] * np.ones((1, 1))))

        self.vm = np.vstack((vv, vv1))
# In[2]:

QueenWeight = ps.queen_from_shapefile(shape)
QueenWeightMatrix, ids = QueenWeight.full()
QueenWeightMatrix

# In[3]:

#Spatial Lag
#Is a variable that averages the neighboring values of a locaation
#Accounts for the acutocorrelation in the model with the weight matrix
#
data = ps.pdio.read_files(shape)
Queen = ps.queen_from_shapefile(shape)
Queen.transform = 'r'
percent16Lag = ps.lag_spatial(Queen, data.percent16)

# In[4]:

#This is a spatial lag graph of the percentages of suicide for the year 2016.
#Spatial lag is a form of regression that accounts for the weight matrix of the shape file
#and the dependent veriable that you have chosen.

import matplotlib.pyplot as plt
us = file
percent16LagQ16 = ps.Quantiles(percent16Lag, k=10)
f, ax = plt.subplots(1, figsize=(150, 150))
us.assign(cl=percent16LagQ16.yb).plot(column='cl',
                                      categorical=True,
                                      k=10,
                                      cmap='OrRd',
Beispiel #49
0
    ## prep time data
    t_data = get_time_data(query_result, time_cols)

    plpy.debug('shape of t_data %d, %d' % t_data.shape)
    plpy.debug('number of weight objects: %d, %d' % (weights.sparse).shape)
    plpy.debug('first num elements: %f' % t_data[0, 0])

    sp_markov_result = ps.Spatial_Markov(t_data,
                                         weights,
                                         k=num_classes,
                                         fixed=False,
                                         permutations=permutations)

    ## get lag classes
    lag_classes = ps.Quantiles(
        ps.lag_spatial(weights, t_data[:, -1]),
        k=num_classes).yb

    ## look up probablity distribution for each unit according to class and lag class
    prob_dist = get_prob_dist(sp_markov_result.P,
                              lag_classes,
                              sp_markov_result.classes[:, -1])

    ## find the ups and down and overall distribution of each cell
    trend_up, trend_down, trend, volatility = get_prob_stats(prob_dist,
                                                             sp_markov_result.classes[:, -1])

    ## output the results
    return zip(trend, trend_up, trend_down, volatility, weights.id_order)

def get_time_data(markov_data, time_cols):
Beispiel #50
0
    def __init__(self,
                 y,
                 x,
                 w,
                 method='full',
                 epsilon=0.0000001,
                 regimes_att=None):
        # set up main regression variables and spatial filters
        self.y = y
        if regimes_att:
            self.x = x.toarray()
        else:
            self.x = x
        self.n, self.k = self.x.shape
        self.method = method
        self.epsilon = epsilon

        #W = w.full()[0] #wait to build pending what is needed
        #Wsp = w.sparse

        ylag = ps.lag_spatial(w, self.y)
        xlag = self.get_x_lag(w, regimes_att)

        # call minimizer using concentrated log-likelihood to get lambda
        methodML = method.upper()
        if methodML in ['FULL', 'LU', 'ORD']:
            if methodML == 'FULL':
                W = w.full()[0]  # need dense here
                res = minimize_scalar(err_c_loglik,
                                      0.0,
                                      bounds=(-1.0, 1.0),
                                      args=(self.n, self.y, ylag, self.x, xlag,
                                            W),
                                      method='bounded',
                                      tol=epsilon)
            elif methodML == 'LU':
                I = sp.identity(w.n)
                Wsp = w.sparse  # need sparse here
                res = minimize_scalar(err_c_loglik_sp,
                                      0.0,
                                      bounds=(-1.0, 1.0),
                                      args=(self.n, self.y, ylag, self.x, xlag,
                                            I, Wsp),
                                      method='bounded',
                                      tol=epsilon)
            elif methodML == 'ORD':
                # check on symmetry structure
                if w.asymmetry(intrinsic=False) == []:
                    ww = symmetrize(w)
                    WW = ww.todense()
                    evals = la.eigvalsh(WW)
                else:
                    W = w.full()[0]  # need dense here
                    evals = la.eigvals(W)
                res = minimize_scalar(err_c_loglik_ord,
                                      0.0,
                                      bounds=(-1.0, 1.0),
                                      args=(self.n, self.y, ylag, self.x, xlag,
                                            evals),
                                      method='bounded',
                                      tol=epsilon)
        else:
            raise Exception, "{0} is an unsupported method".format(method)

        self.lam = res.x

        # compute full log-likelihood, including constants
        ln2pi = np.log(2.0 * np.pi)
        llik = -res.fun - self.n / 2.0 * ln2pi - self.n / 2.0

        self.logll = llik

        # b, residuals and predicted values

        ys = self.y - self.lam * ylag
        xs = self.x - self.lam * xlag
        xsxs = np.dot(xs.T, xs)
        xsxsi = np.linalg.inv(xsxs)
        xsys = np.dot(xs.T, ys)
        b = np.dot(xsxsi, xsys)

        self.betas = np.vstack((b, self.lam))

        self.u = y - np.dot(self.x, b)
        self.predy = self.y - self.u

        # residual variance

        self.e_filtered = self.u - self.lam * ps.lag_spatial(w, self.u)
        self.sig2 = np.dot(self.e_filtered.T, self.e_filtered) / self.n

        # variance-covariance matrix betas

        varb = self.sig2 * xsxsi

        # variance-covariance matrix lambda, sigma

        a = -self.lam * W
        np.fill_diagonal(a, 1.0)
        ai = la.inv(a)
        wai = np.dot(W, ai)
        tr1 = np.trace(wai)

        wai2 = np.dot(wai, wai)
        tr2 = np.trace(wai2)

        waiTwai = np.dot(wai.T, wai)
        tr3 = np.trace(waiTwai)

        v1 = np.vstack((tr2 + tr3, tr1 / self.sig2))
        v2 = np.vstack((tr1 / self.sig2, self.n / (2.0 * self.sig2**2)))

        v = np.hstack((v1, v2))

        self.vm1 = np.linalg.inv(v)

        # create variance matrix for beta, lambda
        vv = np.hstack((varb, np.zeros((self.k, 1))))
        vv1 = np.hstack((np.zeros((1, self.k)), self.vm1[0, 0] * np.ones(
            (1, 1))))

        self.vm = np.vstack((vv, vv1))
D.head()

mi = ps.Moran(D.n_assaults.values[:, None], qW, two_tailed=False)

mi.I
mi.EI

y = D.n_assaults.values[:, None]
xs = D.n_subwayex.values[:, None]

m1 = ps.spreg.OLS(y, xs, w=qW, spat_diag=True)

print(m1.summary)

sl = ps.lag_spatial(qW, D.n_subwayex.values[:, None])

D_sl = D.assign(w_subway=sl)

m2 = ps.spreg.OLS(D.n_assaults.values[:, None],
                  D_sl[['n_subwayex', 'w_subway']].values,
                  w=qW,
                  spat_diag=True,
                  name_x=D_sl[['n_subwayex', 'w_subway']].columns.tolist(),
                  name_y='assaults')

m3 = ps.spreg.GM_Lag(D.n_assaults.values[:, None],
                     D_sl[['n_subwayex', 'w_subway']].values,
                     w=qW,
                     spat_diag=True,
                     name_x=D_sl[['n_subwayex', 'w_subway']].columns.tolist(),
    ## prep time data
    t_data = get_time_data(query_result, time_cols)

    plpy.debug('shape of t_data %d, %d' % t_data.shape)
    plpy.debug('number of weight objects: %d, %d' % (weights.sparse).shape)
    plpy.debug('first num elements: %f' % t_data[0, 0])

    sp_markov_result = ps.Spatial_Markov(t_data,
                                         weights,
                                         k=num_classes,
                                         fixed=False,
                                         permutations=permutations)

    ## get lag classes
    lag_classes = ps.Quantiles(ps.lag_spatial(weights, t_data[:, -1]),
                               k=num_classes).yb

    ## look up probablity distribution for each unit according to class and lag class
    prob_dist = get_prob_dist(sp_markov_result.P, lag_classes,
                              sp_markov_result.classes[:, -1])

    ## find the ups and down and overall distribution of each cell
    trend_up, trend_down, trend, volatility = get_prob_stats(
        prob_dist, sp_markov_result.classes[:, -1])

    ## output the results
    return zip(trend, trend_up, trend_down, volatility, weights.id_order)


def get_time_data(markov_data, time_cols):