Beispiel #1
0
def merge_map_data(path, featuredf, store=False):
    """
    For each set of features, take results of graph reduction,
    assign cluster numbers, and compute all other data for insertion
    into final geojson file.

    Args:
        path : path to directory containing graph cut list files, string
        featuredf : scaled features, pandas dataframe
    Returns:
        Data for insertion into geojson, pandas dataframe
    """
    # get filenames
    files = os.listdir(path)
    files = [f[2:-4] for f in files if f[:2] == 'CL']

    # null map
    files.remove('xx')

    # only allow 3 or less features
    mapnos = [f for f in files if len(f) <= 6]

    fnums = [mapno2list(f) for f in mapnos]

    # column names
    fnames = map(lambda x: [FDICT[n] for n in x], fnums)

    # fixed number of clusters
    nclustersmax = 28

    # make null map
    cnum = cut2cluster('xx', nclustersmax, allowed_nodes=featuredf.index)

    # retain only mutual nodes
    nodelist = set(featuredf.index).intersection(set(cnum.index))
    featuredf = featuredf.ix[nodelist]
    cnum = cnum.ix[nodelist]
    nclusters = len(cnum.unique())

    # compute data

    # similarity colors
    rgmatrix = rg_colormatrix(most_similar(featuredf, cnum))
    # feature bar graph data
    fbars = feature_bars(featuredf[FDICT.values()], cnum)

    # shape file polygons
    fn = 'data/uscensus/tl_2010_06075_tabblock10/tl_2010_06075_tabblock10.dbf'
    mergedf = merge_shapefiles(featuredf[['lat', 'lon']], fn)
    polys = make_shapefiles(featuredf[['lat', 'lon']], mergedf.polys, cnum)

    # compile into single dataframe
    alldf = pd.DataFrame({'cnum': cnum.unique(),
                          'polygon': polys})
    alldf['rgmatrix'] = map(lambda x: list(rgmatrix.ix[x]), cnum.unique())
    alldf['mapno'] = ''
    alldf['fbars'] = map(list, fbars.round(2).values)

    # store results
    if store:
        alldf.to_csv('results/geojson.csv')

    # make all other maps
    for i, f in enumerate(mapnos):
        cnum = cut2cluster(f, nclustersmax, allowed_nodes=featuredf.index)
        rgmatrix = rg_colormatrix(most_similar(featuredf, cnum))

        fbars = feature_bars(featuredf[fnames[i]], cnum)
        polys = make_shapefiles(featuredf[['lat', 'lon']],
                                mergedf.polys, cnum)

        onedf = pd.DataFrame({'cnum': cnum.unique(),
                              'polygon': polys})
        onedf['rgmatrix'] = map(lambda x: list(rgmatrix.ix[x]), cnum.unique())
        onedf['mapno'] = f
        onedf['fbars'] = map(list, fbars.round(2).values)

        # append results after each map
        if store:
            with open('results/geojson.csv', 'a') as storefile:
                onedf.to_csv(storefile, header=False)

        alldf = pd.concat((alldf, onedf), axis=0, ignore_index=True)

    with open('results/geojsondf.pkl', 'wb') as f:
        	pickle.dump(alldf, f)

    return alldf
Beispiel #2
0
def merge_map_data(path, featuredf, store=False):
	files = os.listdir(path)
	files = [f[2:-4] for f in files if f[:2] == 'CL']
	files.remove('xx')

	# incomplete cut list
	# files.remove('000104')

	mapnos = [f for f in files if len(f) <= 6]
	mapnos = ['020408']

	fnums = [mapno2list(f) for f in mapnos]

	# column names
	fnames = map(lambda x: [FDICT[n] for n in x], fnums)

	nclustersmax = 28

	### make null map
	cnum = cut2cluster('xx', nclustersmax, allowed_nodes=featuredf.index)

	# retain only mutual nodes
	nodelist = set(featuredf.index).intersection(set(cnum.index))
	featuredf = featuredf.ix[nodelist]
	cnum = cnum.ix[nodelist]
	nclusters = len(cnum.unique())

	clist = gencolors(nclusters)
	rgmatrix = rg_colormatrix(most_similar(featuredf, cnum))
	fbars = feature_bars(featuredf[FDICT.values()], cnum)

	fn = 'data/uscensus/tl_2010_06075_tabblock10/tl_2010_06075_tabblock10.dbf'
	mergedf = merge_shapefiles(featuredf[['lat', 'lon']], fn)
	polys = make_shapefiles(featuredf[['lat', 'lon']], mergedf.polys, cnum)

	# pdb.set_trace()

	alldf = pd.DataFrame({'cnum': cnum.unique(),
                      'polygon': polys})
	# alldf['color'] = clist
	alldf['rgmatrix'] = map(lambda x: list(rgmatrix.ix[x]), cnum.unique())
	alldf['mapno'] = ''
	alldf['fbars'] = map(list, fbars.round(2).values)

	# store results
	if store:
		alldf.to_csv('results/alldf.csv')

	# make all other maps
	for i, f in enumerate(mapnos):
		print f
		cnum = cut2cluster(f, nclustersmax, allowed_nodes=featuredf.index)
		rgmatrix = rg_colormatrix(most_similar(featuredf, cnum))

		# cnum = cnum.ix[nodelist]

		fbars = feature_bars(featuredf[fnames[i]], cnum)
		polys = make_shapefiles(featuredf[['lat', 'lon']],
								mergedf.polys, cnum)

		onedf = pd.DataFrame({'cnum': cnum.unique(),
                      'polygon': polys})

		# onedf['color'] = clist
		onedf['rgmatrix'] = map(lambda x: list(rgmatrix.ix[x]), cnum.unique())
		onedf['mapno'] = f
		onedf['fbars'] = map(list, fbars.round(2).values)

		if store:
			with open('results/alldf.csv', 'a') as storefile:
			    onedf.to_csv(storefile, header=False)

		alldf = pd.concat((alldf, onedf), axis=0, ignore_index=True)

	if store:
		with open('results/alldf.pkl', 'wb') as f:
			pickle.dump(alldf, f)
	return alldf