def load_licenses(filename, property): """Load in business license data.""" businesses = {} with open(filename) as stream: reader = data.RawReader(stream) for row in reader: neighborhood = row.neighborhood if property == 'start_date': year = row.start_date.year elif property == 'end_date': if row.end_date: year = row.end_date.year else: year = this_year account_number = row.account_number site_number = row.site_number businesses[(account_number, site_number)] = { 'neighborhood': neighborhood, 'year': year } return businesses
import data # read shapefile data using fiona boundaries = {} with fiona.drivers(): with fiona.open(sys.argv[1]) as source: neighborhood_projection = pyproj.Proj(preserve_units=True, **source.crs) for thing in source: neighborhood = thing['properties']['PRI_NEIGH'] boundaries[neighborhood] = shape(thing['geometry']) # add the neighborhood to the row using a brute force search, which could be # sped up computationally if necessary reader = data.RawReader(sys.stdin) rows = [] for row in reader: rows.append(row) row['NEIGHBORHOOD'] = '' if row['LONGITUDE'] and row['LATITUDE']: x, y = map(float, [row[k] for k in ['LONGITUDE', 'LATITUDE']]) point = Point(neighborhood_projection(x, y)) neighborhoods = set() for neighborhood, geometry in boundaries.iteritems(): if geometry.contains(point): neighborhoods.add(neighborhood) assert len(neighborhoods) <= 1, "%s\n\n%s" % (row, neighborhoods) if neighborhoods: row['NEIGHBORHOOD'] = neighborhoods.pop()
class Counter(collections.Counter): year_range = range(2004, 2015) def restrict_to_year_range(self, multiplier=1): output = [] for year in self.year_range: output.append(multiplier * self[year]) return output out_dir = sys.argv[-1] with open(sys.argv[1]) as stream: reader = data.RawReader(stream) neighborhood = None new_counts, old_counts = Counter(), Counter() for row in reader: year = int(row['year']) if neighborhood is None: neighborhood = row['neighborhood'] if neighborhood != row['neighborhood']: if not neighborhood: neighborhood = "unknown" fig = figs.FlowOverTime( Counter.year_range, new_counts.restrict_to_year_range(), old_counts.restrict_to_year_range(multiplier=-1), ) filename = os.path.join(