Example #1
0
    def plot_blobs(self, variable=None, k=None):
        # show blobs we created
        if not variable:
            data = self.regions
            variable = 'blob ID'
        else:
            data = []
            try:
                a = self.blobs_data[variable]
            except:
                try:
                    a = self.blobs_data[variable + '_mean']
                    variable = variable + '_mean'
                except:
                    print("invalid variable name; variables are the following: \n" + 
                        ', '.join(self.blobs_data.columns))
                    return False
            for i in self.d.tractID:
                try:
                    data.append(self.blobs_data.ix[self.r.area2region[str(i)],variable])
                except KeyError:
                    data.append(0)
            data = np.array(data)

        if not k:
            k = self.r.p
        print('  Plotting...'),
        maps.plot_choropleth(self.shp_link, data, type='quantiles',
            title='Blobs from Census ' + self.level + 's\nby ' + variable + 
                ' (' + str(self.r.p)+' blobs)', k=k, figsize=(6,9))
        print('\r             \n')
Example #2
0
    def plot_map(self, variable=None):
        # plot clusters on the map
        plots = np.zeros(self.b.regions.shape)
        if not variable:
            for i in range(len(self.b.regions)):
                plots[i] = self.assignments[self.b.regions[i]]
        else:
            for i in range(len(self.b.regions)):
                plots[i] = self.centers.ix[self.assignments[self.b.regions[i]], variable]

        maps.plot_choropleth(self.b.shp_link, plots, type='equal_interval',
            title=('Clustered blobs from Census ' + self.b.level + 's'), 
            k=30, figsize=(6,9))
Example #3
0
def blobs(vars, min_pop, iterations, method='equal votes', weights=[], 
    initial=10, plot=False):
    solutions = []
    top_scores = []
    times = []
    num_blobs = []
    current_time = []
    iteration = []
    best_score = 10**12
    best_solution = None
    for i in range(0,iterations):
        start = time.time()
        r=ps.Maxp(w, format_blobs(blob_vars, method, weights=weights), 
            floor=min_pop, floor_variable=calls['pop'], initial=initial)
        end = time.time()
        times.append(end - start)
        current_time.append(end)
        solutions.append(r.objective_function())
        num_blobs.append(r.k)
        if (r.objective_function() < best_score):
            best_score = r.objective_function()
            best_solution = r
        top_scores.append(best_score)
        iteration.append(i)
        print('iteration '+str(i+1)+' - score: '+str(round(r.objective_function(),2))+
        ' ('+str(r.k)+' blobs), best: '+str(round(best_score,2))+', '+
        str(round(end-start,1))+'s')
    if plot:
        # prep for plotting
        r = best_solution
        ids=np.array(calls['tractce10']).astype(str)
        # r.sort_regions(method='mean')  # uncomment to sort regions by intensity of the variable
        regions=np.empty(calls.shape[0])
        for j in range(0,calls.shape[0]):
            reg=r.area2region[ids[j]]
            regions[j]=reg
        # show blobs we created
        maps.plot_choropleth(shp_link, regions, type='quantiles',
            title='Chicago blobs from census tracts\n(min ' + 
                str(r.floor) +' population per blob, ' + 
                str(r.p)+' blobs)', k=r.p, figsize=(6,8))
    return dict(times=times, solutions=solutions, top_scores=top_scores, 
        current_time=current_time, iteration=iteration, best=r)
Example #4
0
 def plot_map(self, variable=None, blob_shp=None):
     # plot clusters on the map
     plots = np.zeros(self.b.regions.shape)
     if blob_shp:
         cluster = np.zeros(len(self.b.contours))
         for i in range(len(self.b.contours)):
             cluster[i] = self.assignments[self.b.contours_to_blobs[i]]
         maps.plot_choropleth(blob_shp, cluster, type='unique_values',
             title=('Clustered blobs from Census ' + self.b.level + 's'), 
             k=30, figsize=(6,8))
     elif not variable:
         for i in range(len(self.b.regions)):
             plots[i] = self.assignments[self.b.regions[i]]
         maps.plot_choropleth(self.b.shp_link, plots, type='unique_values',
             title=('Clustered blobs from Census ' + self.b.level + 's'), 
             k=30, figsize=(6,8))
     else:
         for i in range(len(self.b.regions)):
             plots[i] = self.centers.ix[self.assignments[self.b.regions[i]], variable]
         maps.plot_choropleth(self.b.shp_link, plots, type='equal_interval',
             title=('Clustered blobs from Census ' + self.b.level + 's'), 
             k=30, figsize=(6,9))
Example #5
0
    def plot_blobs(self, blob_shp=None, variable=None, k=None, mapType=None):
        # show blobs we created
        if blob_shp:
            k = len(self.contours)
            if not variable:
                variable = 'blob ID'
                data = np.arange(len(self.contours))
                mapType = 'unique_values'
            else:
                data = []
                try:
                    a = self.blobs_data[variable]
                except:
                    try:
                        a = self.blobs_data[variable + '_mean']
                        variable = variable + '_mean'
                    except:
                        print("invalid variable name; variables are the following: \n" + 
                            ', '.join(self.blobs_data.columns))
                        return False
                for i in range(len(self.contours)):
                    try:
                        data.append(self.blobs_data.ix[self.contours_to_blobs[i],variable])
                    except KeyError:
                        data.append(0)
                data = np.array(data)
                mapType = 'quantiles'
        elif not variable:
            data = self.regions
            variable = 'blob ID'
        else:
            data = []
            try:
                a = self.blobs_data[variable]
            except:
                try:
                    a = self.blobs_data[variable + '_mean']
                    variable = variable + '_mean'
                except:
                    print("invalid variable name; variables are the following: \n" + 
                        ', '.join(self.blobs_data.columns))
                    return False
            for i in self.d.tractID:
                try:
                    data.append(self.blobs_data.ix[self.r.area2region[str(i)],variable])
                except KeyError:
                    data.append(0)
            data = np.array(data)

        if not k:
            k = self.r.p
        if not mapType:
            mapType = 'quantiles'
        print('  Plotting...')
        map_shp = None
        
        if blob_shp:
            map_shp = blob_shp
        else:
            map_shp = self.shp_link
        if mapType == 'unique_values':
            maps.plot_choropleth(map_shp, data, type=mapType, cmap = 'Paired',
            title='Blobs from Test ' + self.level + 's\nby ' + variable +
                ' (' + str(self.r.p)+' blobs)', k=k, figsize=(1,3))
        else:
            maps.plot_choropleth(map_shp, data, type=mapType,
            cmap = 'hot_r', title='Blobs from Census ' + self.level + 's\nby ' + variable + 
                ' (' + str(self.r.p)+' blobs)', k=k, figsize=(1,3))
        print('\r             \n')
def blobs(v, min_pop, floor_var='pop', iterations=10, method='equal votes', weights=[], 
    initial=10, plot=True, savedata=False, plot_values=False, verbose=True):
    """Create a max-p regions solution for a given shapefile and associated 
    dataset. Builds on pysal.Maxp with improvements to the user interface, 
    verbosity, and mapping. 

    Original problem from "The Max-p-Regions Problem," Duque, Anselin, and Rey, 
    JRS, October 2010, available at http://geography.sdsu.edu/Research/
    Projects/IPC/publication/MaxP_authored.pdf.

    Parameters
    ----------
    v           : array
                  array of variables on which to create blobs (for all 
                    variables, use ['all'])

    min_pop     : int
                  minimum population in each blob

    iterations  : int
                  number of blobs solutions to create (will return best): 10 by 
                    default

    method      : {'equal votes', 'default', 'weighted'}
                  equal votes' by default, can change to 'weighted'

    weights     : array
                  if method='weighted', add weights for variables as an array

    initial     : int
                  number of times to revise each solution (10 by default)

    plot        : boolean
                  will plot the best solution (True by default)

    savedata    : boolean
                  will save a CSV of the blobs data to the root folder (False 
                    by default)

    plot_values : boolean
                  will color-code the plot by the mean of the underlying 
                    variables. only makes sense with one variable. default 
                    False (plots by ID of the blob)
    
    Sample usage
    ------------

    >>> blobs(['all_calls_per1000'], min_pop=10000, plot_values=True)

    """
    
    solutions = []
    top_scores = []
    times = []
    num_blobs = []
    current_time = []
    iteration = []
    best_score = -1
    best_solution = None
    floor_var_array = np.ones((calls.shape[0],1))   #################### changed this
    blob_vars = np.array(calls.loc[:,v], np.float64)
    if len(v) == 1:
        # add shape to the array
        blob_vars.shape = (blob_vars.shape[0], 1)
    print('\n### CREATING BLOBS FROM ' + str(len(v)) + 
        ' VARIABLES ###\n    PARAMETERS:\n     # Minimum ' + floor_var + ' in each blob: ' + 
        str(int(min_pop)) + '\n     # Iterations: ' + str(iterations) +
        '\n     # Method: ' + method + '\n     # Plot blobs: ' + str(plot) + 
        '\n     # Save blobs data: ' + str(savedata) + '\n')
    for i in range(0,iterations):
        start = time.time()
        r=ps.Maxp(w, format_blobs(blob_vars, method, weights=weights), 
            floor=min_pop, floor_variable=floor_var_array, initial=initial, verbose=verbose)
        end = time.time()
        times.append(end - start)
        current_time.append(end)
        current_score = r.objective_function()
        solutions.append(current_score)
        num_blobs.append(r.k)
        if (best_score == -1 or current_score < best_score):
            best_score = current_score
            best_solution = r
        top_scores.append(best_score)
        iteration.append(i)
        msg = '\r# ITERATION '+str(i+1)+'                 \n  Score: ' + \
            str(round(current_score,2)) + '\n  Created '+str(r.k)+' blobs (' + \
            str(int(calls.shape[0]/r.k)) + ' tracts per blob)\n  Best solution so far: ' + \
            str(round(best_score,2))
        msg += '\n  Time taken: '+str(round(end-start,1))+' seconds ('+ \
            str(int(np.mean(times)*(iterations-i-1)))+' seconds remaining)\n'
        print msg
    
    r = best_solution
    print('\r# BEST SOLUTION:                      \n  Score: '+
        str(round(r.objective_function(),2)) + 
        '\n  '+str(r.k)+' blobs ('+str(int(calls.shape[0]/r.k))+
        ' blocks per blob)')
    if plot:
        print('  Plotting...'),
        # prep for plotting
        ids=np.array(calls['block_id']).astype(str)
        if plot_values:
            r.sort_regions(method='mean')  # sort regions by intensity of the variable
        regions=np.empty(calls.shape[0])
        for j in range(0,calls.shape[0]):
            reg=r.area2region[ids[j]]
            regions[j]=reg
        # show blobs we created
        maps.plot_choropleth(shp_link, regions, type='quantiles',
            title='Chicago blobs from census tracts\n(min ' + 
                str(int(r.floor)) +' population per blob, ' + 
                str(r.p)+' blobs)', k=r.p, figsize=(6,9))
        print('\r             \n')
    
    #build data structure
    sr = np.zeros([r.k, len(v)*2+4])
    for region in range(0,r.k):
        # blob ID
        sr[region][0] = region
        selectionIDs = [r.w.id_order.index(i) for i in r.regions[region]]
        m = r.z[selectionIDs, :]
        # objective function
        var = m.var(axis=0)
        sr[region][1] = sum(np.transpose(var)) * len(r.regions[region])
        # blob size (number of places in blob)
        sr[region][2] = len(r.regions[region])
        # blob population
        sr[region][3] = calls.loc[selectionIDs, floor_var].sum()
        # variable means and standard deviations
        for j in range(0,len(v)):
            sr[region][4+j*2] = m[:,j].mean()
            sr[region][5+j*2] = m[:,j].std()
    srdf = pd.DataFrame(sr)
    cols = ['Blob', 'Score', 'Size', floor_var]
    for j in range(0, len(v)):
        cols.append(v[j]+'_mean')
        cols.append(v[j]+'_stdev')
    srdf.columns = cols
    if savedata:
        srdf.to_csv('Blobs data ' + datetime.datetime.now().strftime('%Y%m%d %H%M') + \
            '.csv', index=False)
    return dict(best=r, data=srdf, regions=r.area2region)
Example #7
0
    def plot_blobs(self, blob_shp=None, variable=None, k=None, mapType=None):
        # show blobs we created
        if blob_shp:
            k = len(self.contours)
            if not variable:
                variable = 'blob ID'
                data = np.arange(len(self.contours))
                mapType = 'unique_values'
            else:
                data = []
                try:
                    a = self.blobs_data[variable]
                except:
                    try:
                        a = self.blobs_data[variable + '_mean']
                        variable = variable + '_mean'
                    except:
                        print("invalid variable name; variables are the following: \n" + 
                            ', '.join(self.blobs_data.columns))
                        return False
                for i in range(len(self.contours)):
                    try:
                        data.append(self.blobs_data.ix[self.contours_to_blobs[i],variable])
                    except KeyError:
                        data.append(0)
                data = np.array(data)
                mapType = 'quantiles'
        elif not variable:
            data = self.regions
            variable = 'blob ID'
        else:
            data = []
            try:
                a = self.blobs_data[variable]
            except:
                try:
                    a = self.blobs_data[variable + '_mean']
                    variable = variable + '_mean'
                except:
                    print("invalid variable name; variables are the following: \n" + 
                        ', '.join(self.blobs_data.columns))
                    return False
            for i in self.d.tractID:
                try:
                    data.append(self.blobs_data.ix[self.r.area2region[str(i)],variable])
                except KeyError:
                    data.append(0)
            data = np.array(data)

        if not k:
            k = self.r.p
        if not mapType:
            mapType = 'quantiles'
        print('  Plotting...')
        map_shp = None
        
        if blob_shp:
            map_shp = blob_shp
        else:
            map_shp = self.shp_link
        if mapType == 'unique_values':
            maps.plot_choropleth(map_shp, data, type=mapType, cmap = 'Paired',
            title='Blobs from Census ' + self.level + 's\nby ' + variable +
                ' (' + str(self.r.p)+' blobs)', k=k, figsize=(12,16))
        else:
            maps.plot_choropleth(map_shp, data, type=mapType,
            cmap = 'hot_r', title='Blobs from Census ' + self.level + 's\nby ' + variable + 
                ' (' + str(self.r.p)+' blobs)', k=k, figsize=(12,16))
        print('\r             \n')
Example #8
0
from pysal.contrib.viz import mapping as maps

# Where will our shapefile be stored
shp_link = os.path.join('outputs', 'lsoas_kde.shp')

# Save it!
sdf.to_file(shp_link)

# And now re-load the values from the DBF file
# associated with the shapefile.
values = np.array(ps.open(shp_link.replace('.shp', '.dbf')).by_col(k_var))

maps.plot_choropleth(shp_link,
                     values,
                     'unique_values',
                     title='K-Means ' + str(k_pref) + ' Cluster Analysis',
                     savein=os.path.join('outputs', 'K-Means.png'),
                     dpi=150,
                     figsize=(8, 6),
                     alpha=0.9)

#save pickle for later analysis
data_std.to_pickle(os.path.join("outputs", "clusters.pickle"))

##################################################
#trying out dbscan clustering for fun. Looks like the results are awful though
data_std = pd.read_pickle(os.path.join('outputs', 'clusters.pickle'))
d_var = 'DBSCAN'

# Quick sanity check in case something hasn't
# run successfully -- these muck up k-means
data_std.drop(list(data_std.columns[data_std.isnull().any().values].values),
Example #9
0
# if duplicates, need to remove
len(df.ix[df.duplicated('geoid10'),:])  # number of duplicate pairs

## create weights (only need to run once)
w = ps.rook_from_shapefile(shp_link)
w.n == df.shape[0] # should be true
gal = ps.open('blocks/CensusBlockTIGER2010.gal','w')
gal.write(w)
gal.close()

df.tractce10 = df.tractce10.astype('int')
df['order'] = df.index

# plot community areas
maps.plot_choropleth(shp_link, np.array(df.tractce10), type='equal_interval',
     title='Initial Map', k=80)

# get spatial weights
w=ps.open('blocks/CensusBlockTIGER2010.gal').read()
# need to fix the ohare island (tracts 980000 and 770602)
# the following was saved to X_fixed.gal
# w.neighbors['770602'] = ['980000', '090100']
# w.weights['770602'] = [1.0, 1.0]
# w.neighbors['980000'] = ['770602', '760802']
# w.weights['980000'] = [1.0, 1.0]
# w.neighbors['090100'] = ['770602', '090200']
# w.weights['090100'] = [1.0, 1.0]
# w.neighbors['760802'] = ['980000', '760801', '170500', '170600', '760803', '770902']
# w.weights['760802'] = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0]

Example #10
0
df.columns = df.columns.map(lambda x: x.lower())
df.commarea = df.commarea.astype('int')
df['order'] = df.index
calls = pd.read_csv('master311.csv', dtype=object)
for c in calls.columns[1:]:
    calls[c] = calls[c].astype('float')
ordered_tracts = pd.DataFrame(df.loc[:,['tractce10', 'commarea', 'order']])
calls = pd.merge(calls, ordered_tracts, how='right', left_on='tract', 
    right_on='tractce10', sort=False).fillna(0)
calls = calls.sort(['order'])

# all calls by census tract
y = np.array(calls['all_calls_per1000'])
# map values
maps.plot_choropleth(shp_link, np.array(calls.all_calls_per1000), type='fisher_jenks',
     title='All 311 Calls by Census Area, 2011-2015\nUnsmoothed', 
     k=20, figsize=(6,9))

# Global Moran's I
mi = ps.Moran(y, w)
mi.I
mi.EI
mi.p_norm


# Geary's C
gc = ps.Geary(y, w)
gc.C
gc.EC
gc.z_norm