def parse_regions(database: SqliteUtil, regions_file: str, src_epsg: int, prj_epsg: int): log.info('Allocating tables for regions.') create_tables(database) transformer = Transformer.from_crs(f'epsg:{src_epsg}', f'epsg:{prj_epsg}', always_xy=True, skip_equivalent=True) project = transformer.transform log.info('Parsing regions from shapefile.') parser = shapefile.Reader(regions_file) iter_regions = counter(iter(parser), 'Parsing region %s.') regions = [] for item in iter_regions: points = (project(*point) for point in item.shape.points) polygon = Polygon(points) regions.append( (item.record.MAZ_ID_10, item.record.TAZ_2015, item.record.Sq_miles, dumps(polygon.centroid), dumps(polygon))) parser.close() log.info('Writing parsed regions to database.') database.insert_values('regions', regions, 5) database.connection.commit() log.info('Creating indexes on new tables.') create_indexes(database)
def parse_abm(database: SqliteUtil, trips_file: str, households_file: str, persons_file: str): log.info('Allocating tables for households, persons and trips.') create_tables(database) log.info('Parsing households.') households = load_households(households_file) database.insert_values('households', households, 18) database.connection.commit() del households log.info('Parsing persons.') persons = load_persons(persons_file) database.insert_values('persons', persons, 39) database.connection.commit() del persons log.info('Parsing trips.') trips = load_trips(trips_file) database.insert_values('trips', trips, 18) database.connection.commit() del trips log.info('Creating indexes on new tables.') create_indexes(database)
def parse_temperatures(database: SqliteUtil, tmin_files: List[str], tmax_files: List[str], steps: int, day: int, src_epsg: int, prj_epsg: int): log.info('Allocating tables for air temperatures.') create_tables(database) files = zip(tmax_files, tmin_files) profile_count = 0 point_count = 0 temperatures = [] points = [] profiles = {} n = 1 transformer = Transformer.from_crs(f'epsg:{src_epsg}', f'epsg:{prj_epsg}', always_xy=True, skip_equivalent=True) project = transformer.transform def apply(id: int, temp: Callable): for step in range(steps): prop = step / steps row = (id, step, int(86400 * prop), temp(24 * prop)) yield row log.info('Loading temperatures from netCDF4 files.') for tmax_file, tmin_file in files: tmaxnc = Dataset(tmax_file, 'r') tminnc = Dataset(tmin_file, 'r') lons = tmaxnc.variables['lon'] lats = tmaxnc.variables['lat'] shape = tmaxnc.variables['tmax'].shape tmaxs = tmaxnc.variables['tmax'][day] tmins = tminnc.variables['tmin'][day] for i in range(shape[1]): for j in range(shape[2]): tmax = tmaxs[i][j] tmin = tmins[i][j] if tmax != -9999.0: x, y = project(lons[i][j], lats[i][j]) idx = f'{tmax}-{tmin}' if idx not in profiles: temp = iterpolation(tmin, tmax, 5, 15) temperatures.extend(apply(profile_count, temp)) profiles[idx] = profile_count profile_count += 1 profile = profiles[idx] point = Point(point_count, x, y, profile) points.append(point) point_count += 1 if point_count == n: log.info( f'Loading air temperature reading {point_count}.') n <<= 1 tmaxnc.close() tminnc.close() if point_count != n >> 1: log.info(f'Loading air temperature reading {point_count}.') def load(): for point in points: x, y = point.x, point.y yield (point.id, (x, y, x, y), point.profile) log.info('Starting network update for air temperatures.') log.info('Building spatial index from temperature profile locations.') index = Index(load()) used = set() log.info('Loading network links.') links = load_links(database) log.info('Applying temperature profiles to links.') iter_links = counter(links, 'Applying profile to link %s.') for link in iter_links: result = index.nearest((link.x, link.y, link.x, link.y), objects=True) profile = next(result).object link.air_temperature = profile used.add(profile) def dump_links(): for link in links: yield (link.id, link.air_temperature) log.info('Writing updated links to database.') database.insert_values('temp_links', dump_links(), 2) database.connection.commit() del links log.info('Loading network parcels.') parcels = load_parcels(database) residential = profile_count temperatures.extend(apply(profile_count, lambda x: 26.6667)) profile_count += 1 commercial = profile_count temperatures.extend(apply(profile_count, lambda x: 26.6667)) profile_count += 1 other = profile_count temperatures.extend(apply(profile_count, lambda x: 26.6667)) profile_count += 1 used.add(residential) used.add(commercial) used.add(other) log.info('Applying temperature profiles to parcels.') iter_parcels = counter(parcels, 'Applying profile to parcel %s.') for parcel in iter_parcels: if not parcel.cooling: x, y = xy(parcel.center) result = index.nearest((x, y, x, y), objects=True) profile = next(result).object parcel.air_temperature = profile used.add(profile) elif parcel.kind == 'residential': parcel.air_temperature = residential elif parcel.kind == 'commercial': parcel.air_temperature = commercial else: parcel.air_temperature = other def dump_parcels(): for parcel in parcels: yield (parcel.apn, parcel.air_temperature) log.info('Writing updated parcels to database.') database.insert_values('temp_parcels', dump_parcels(), 2) database.connection.commit() del parcels def dump_temperatures(): for temp in temperatures: if temp[0] in used: yield temp log.info('Writing parsed air temperatures to database.') database.insert_values('air_temperatures', dump_temperatures(), 4) database.connection.commit() del temperatures log.info('Merging, dropping and renaming old tables.') query = ''' CREATE INDEX temp_links_link ON temp_links(link_id); ''' database.cursor.execute(query) query = ''' CREATE TABLE temp_links_merged AS SELECT links.link_id, links.source_node, links.terminal_node, links.length, links.freespeed, links.capacity, links.permlanes, links.oneway, links.modes, temp_links.air_temperature, links.mrt_temperature FROM links INNER JOIN temp_links USING(link_id); ''' database.cursor.execute(query) query = ''' CREATE INDEX temp_parcels_parcel ON temp_parcels(apn); ''' database.cursor.execute(query) query = ''' CREATE TABLE temp_parcels_merged AS SELECT parcels.apn, parcels.maz, parcels.type, parcels.cooling, temp_parcels.air_temperature, parcels.mrt_temperature, parcels.center, parcels.region FROM parcels INNER JOIN temp_parcels USING(apn); ''' database.cursor.execute(query) original = database.count_rows('links') merged = database.count_rows('temp_links_merged') if original != merged: log.error('Original links and updated links tables ' 'do not align; quiting to prevent data loss.') raise RuntimeError else: database.drop_table('links', 'temp_links') query = ''' ALTER TABLE temp_links_merged RENAME TO links; ''' database.cursor.execute(query) original = database.count_rows('parcels') merged = database.count_rows('temp_parcels_merged') if original != merged: log.error('Original parcels and updated parcels tables ' 'do not align; quiting to prevent data loss.') raise RuntimeError else: database.drop_table('parcels', 'temp_parcels') query = ''' ALTER TABLE temp_parcels_merged RENAME TO parcels; ''' database.cursor.execute(query) database.connection.commit() log.info('Creating indexes on new tables.') create_indexes(database) log.info('Writing process metadata.')
def parse_parcels(database: SqliteUtil, residence_file: str, commerce_file: str, parcel_file: str, cooling_file: str, src_epsg: int, prj_epsg: int): boundaries = {} cooling = {} parcels = [] apns = set() transformer = Transformer.from_crs(f'epsg:{src_epsg}', f'epsg:{prj_epsg}', always_xy=True, skip_equivalent=True) project = transformer.transform log.info('Allocating tables for parcels.') create_tables(database) log.info('Parsing parcel boudaries from shapefile.') parser = shapefile.Reader(parcel_file) iter_boundaries = counter(iter(parser), 'Parsing parcel boundary %s.') for parcel in iter_boundaries: if len(parcel.shape.points): apn = parcel.record['APN'] points = (project(*pt) for pt in parcel.shape.points) polygon = Polygon(points) boundaries[apn] = polygon parser.close() log.info('Loading cooling information from csv file.') with open(cooling_file, 'r') as open_file: lines = csv.reader(open_file, delimiter=',', quotechar='"') next(lines) for desc, _, cool in lines: cooling[desc] = bool(cool) log.info('Parsing residential parcels from database file.') parser = shapefile.Reader(residence_file) iter_parcels = counter(parser.iterRecords(), 'Parsing residential parcel %s.') for record in iter_parcels: apn = record['APN'] if apn in boundaries and apn not in apn: cool = True polygon = boundaries[apn] parcel = Parcel(apn, 'residential', cool, polygon) parcels.append(parcel) apns.add(apn) parser.close() log.info('Parsing comercial parcels from database file.') parser = shapefile.Reader(commerce_file) iter_parcels = counter(parser.iterRecords(), 'Parsing commercial parcel %s.') for record in iter_parcels: apn = record['APN'] if apn in boundaries and apn not in apns: desc = record['DESCRIPT'] cool = cooling[desc] polygon = boundaries[apn] parcel = Parcel(apn, 'commercial', cool, polygon) parcels.append(parcel) apns.add(apn) parser.close() log.info('Parsing extraneous parcels from shapefile.') other = set(boundaries.keys()) - apns other = counter(other, 'Parsing extraneous parcel %s.') for apn in other: polygon = boundaries[apn] parcel = Parcel(apn, 'other', True, polygon) parcels.append(parcel) def load(): for idx, parcel in enumerate(parcels): pt = parcel.polygon.centroid yield (idx, (pt.x, pt.y, pt.x, pt.y), None) log.info('Building spatial index from parcel data.') index = Index(load()) log.info('Loading network region data.') regions = load_regions(database) log.info('Scanning regions and mapping mazs to parcels.') iter_regions = counter(regions, 'Sacnning region %s.') for region in iter_regions: apn = f'maz-{region.maz}' parcel = Parcel(apn, 'default', True, region.polygon) parcel.maz = region.maz parcels.append(parcel) result = index.intersection(region.polygon.bounds) for idx in result: parcel = parcels[idx] if region.polygon.contains(parcel.polygon.centroid): if parcel.maz is not None: warning = 'Parcel %s is in both region %s and %s' \ '; the latter region will be kept.' log.warning(warning % (parcel.apn, parcel.maz, region.maz)) parcel.maz = region.maz del regions def dump(): for parcel in parcels: yield (parcel.apn, parcel.maz, parcel.kind, int(parcel.cooling), None, None, dumps(parcel.polygon.centroid), dumps(parcel.polygon)) log.info('Writing parsed parcels to database.') database.insert_values('parcels', dump(), 8) database.connection.commit() log.info('Creating indexes on new tables.') create_indexes(database)
def parse_roads(database: SqliteUtil, networkpath: str, src_epsg: int, prj_epsg: int): log.info('Allocating tables for network links and nodes.') create_tables(database) log.info('Loading network roads file.') network = multiopen(networkpath, mode='rb') parser = iter(iterparse(network, events=('start', 'end'))) evt, root = next(parser) transformer = Transformer.from_crs(f'epsg:{src_epsg}', f'epsg:{prj_epsg}', always_xy=True, skip_equivalent=True) project = transformer.transform links = [] nodes = [] count, n = 0, 1 for evt, elem in parser: if evt == 'start': if elem.tag == 'nodes': log.info('Parsing nodes from network file.') count, n = 0, 1 root.clear() elif elem.tag == 'links': if count != n << 1: log.info(f'Parsing node {count}.') log.info('Parsing links from network file.') count, n = 0, 1 root.clear() elif evt == 'end': if elem.tag == 'node': node_id = str(elem.get('id')) x = float(elem.get('x')) y = float(elem.get('y')) x, y = project(x, y) wkt = f'POINT ({x} {y})' nodes.append((node_id, None, wkt)) count += 1 if count == n: log.info(f'Parsing node {count}.') n <<= 1 if count % 100000 == 0: root.clear() elif elem.tag == 'link': source_node = str(elem.get('from')) terminal_node = str(elem.get('to')) links.append( (str(elem.get('id')), source_node, terminal_node, float(elem.get('length')), float(elem.get('freespeed')), float(elem.get('capacity')), float(elem.get('permlanes')), int(elem.get('oneway')), str(elem.get('modes')), None, None)) count += 1 if count == n: log.info(f'Parsing link {count}.') n <<= 1 if count % 100000 == 0: root.clear() if count != n << 1: log.info(f'Parsing link {count}.') network.close() log.info('Writing parsed links and nodes to database.') database.insert_values('nodes', nodes, 3) database.insert_values('links', links, 11) database.connection.commit() log.info('Creating indexes on new tables.') create_indexes(database)
def parse_mrt(database: SqliteUtil, path: str, src_epsg: int, prj_epsg: int, bounds:int = 30, steps: int = 96): log.info('Allocating tables for MRT temperature profiles.') create_tables(database) log.info('Loading network nodes from database.') nodes: Dict[str,Node] nodes = load_nodes(database) log.info('Loading network links from database.') links: Dict[str,Link] links= load_links(database, nodes) log.info(f'Searching for mrt files in {path}') csvfiles = iter(glob(f'{path}/**/*.csv', recursive=True)) log.info('Handling initial dataset for profile construction.') points: List[Point] time: int points, time = parse_points(next(csvfiles), src_epsg, prj_epsg) log.info('Building spatial index on MRT points.') index = Index((point.entry() for point in points)) log.info('Scanning link bounds and building profiles.') mapping: Dict[FrozenSet[int],int] = {} count = 0 empty = 0 iter_links = counter(links.values(), 'Scanning link %s.') for link in iter_links: d = link.terminal_node.x * link.source_node.y - \ link.source_node.x * link.terminal_node.y dx = link.terminal_node.x - link.source_node.x dy = link.terminal_node.y - link.source_node.y l = sqrt(dy * dy + dx * dx) nearby = index.intersection(link.bounds(bounds)) contained = [] for uuid in nearby: point = points[uuid] x = point.x y = point.y if l > 0: dist = abs(dy * x - dx * y + d ) / l else: px = point.x - link.source_node.x py = point.y - link.source_node.y dist = sqrt(px * px + py * py) if dist <= bounds: contained.append(point.id) if contained: profile = frozenset(contained) if profile in mapping: link.profile = mapping[profile] else: mapping[profile] = count link.profile = count count += 1 else: empty += 1 profiles: List[Tuple[int]] profiles = [tuple(key) for key in mapping.keys()] if empty: log.warning(f'Found {empty} links without any MRT temperature profile.') def dump_points(): idx = time // (86400 // steps) for uuid, profile in enumerate(profiles): mrt, pet, utci = 0, 0, 0 count = len(profile) for ptid in profile: point = points[ptid] mrt += point.mrt pet += point.pet utci += point.utci yield (uuid, idx, time, mrt / count, pet / count, utci / count) def dump_links(): for link in links.values(): yield (link.id, link.profile) log.info('Writing link updates and temperatures to dataabse.') database.insert_values('mrt_temperatures', dump_points(), 6) database.insert_values('temp_links', dump_links(), 2) log.info('Merging, dropping and renaming old tables.') query = ''' CREATE INDEX temp_links_link ON temp_links(link_id); ''' database.cursor.execute(query) query = ''' CREATE TABLE temp_links_merged AS SELECT links.link_id, links.source_node, links.terminal_node, links.length, links.freespeed, links.capacity, links.permlanes, links.oneway, links.modes, links.air_temperature, temp_links.mrt_temperature FROM links INNER JOIN temp_links USING(link_id); ''' database.cursor.execute(query) original = database.count_rows('links') merged = database.count_rows('temp_links_merged') if original != merged: log.error('Original links and updated links tables ' 'do not align; quiting to prevent data loss.') raise RuntimeError database.drop_table('links', 'temp_links') query = ''' ALTER TABLE temp_links_merged RENAME TO links; ''' database.cursor.execute(query) database.connection.commit() del links del nodes del index del mapping del points log.info('Handling remain temperatures with defined profile.') def dump_temperaures(time: int, temperatures: List[Tuple[float,float,float]]): idx = time // (86400 // steps) for uuid, profile in enumerate(profiles): mrt, pet, utci = 0, 0, 0 count = len(profile) for tempid in profile: temp = temperatures[tempid] mrt += temp[0] pet += temp[1] utci += temp[2] yield (uuid, idx, time, mrt / count, pet / count, utci / count) for csvfile in csvfiles: time: int temperatures: List[Tuple[float,float,float]] temperatures, time = parse_temperatures(csvfile) log.info('Writing temperature data to database.') database.insert_values('mrt_temperatures', dump_temperaures(time, temperatures), 6) database.connection.commit() log.info('Creating indexes on new/updated tables.') create_indexes(database)