def test_config_must_be_dag(): config = ptg.config.default_config() assert config.has_edge('routes.txt', 'trips.txt') # Make a cycle config.add_edge('trips.txt', 'routes.txt') path = zip_file('amazon-2017-08-06') with pytest.raises(AssertionError, message='Config must be a DAG'): ptg.feed(path, config=config)
def test_filtered_columns(path): service_ids_by_date = ptg.read_service_ids_by_date(path) service_ids = list(service_ids_by_date.values())[0] feed_full = ptg.feed(path) feed_view = ptg.feed(path, view={'trips.txt': {'service_id': service_ids}}) feed_null = ptg.feed(path, view={'trips.txt': { 'service_id': 'never-match' }}) assert set(feed_full.trips.columns) == set(feed_view.trips.columns) assert set(feed_full.trips.columns) == set(feed_null.trips.columns)
def test_add_shape_dist_traveled(zip_file, scenario_results, scenario_date): service_ids_by_date = ptg.read_service_ids_by_date(zip_file) service_ids = service_ids_by_date[scenario_date] feed = ptg.feed(zip_file, view={ 'trips.txt': { 'service_id': service_ids, }, }) stop_times_df = Trip.add_shape_dist_traveled(feed.stop_times, feed.stops) stop_times_df.sort_values( [Trip.TRIPS_COLUMN_TRIP_ID, Trip.STOPTIMES_COLUMN_STOP_SEQUENCE], inplace=True) for trip_id, expected_array in scenario_results.iteritems(): print stop_times_df[stop_times_df[ Trip.TRIPS_COLUMN_TRIP_ID] == trip_id][ Trip.STOPTIMES_COLUMN_SHAPE_DIST_TRAVELED].values.tolist() np.testing.assert_allclose( stop_times_df[stop_times_df[Trip.TRIPS_COLUMN_TRIP_ID] == trip_id][ Trip.STOPTIMES_COLUMN_SHAPE_DIST_TRAVELED].values, expected_array, rtol=0, atol=0.00001)
def get_representative_feed(file_loc: str, day_type: str = 'busiest'): # Extract service ids and then trip counts by those dates service_ids_by_date = ptg.read_service_ids_by_date(file_loc) trip_counts_by_date = ptg.read_trip_counts_by_date(file_loc) # Make sure we have some valid values returned in trips if not len(trip_counts_by_date.items()): # Otherwise, error out raise InvalidGTFS('No valid trip counts by date ' 'were identified in GTFS.') # At this point, different methods can be implemented to help select how # to pick which date/schedule id to use if day_type == 'busiest': # Choose the service id that has the most trips associated with it (selected_date, trip_count) = max(trip_counts_by_date.items(), key=lambda p: p[1]) else: raise NotImplementedError('Unsupported day type string supplied.') log('Selected_date: {}'.format(selected_date)) log('Number of trips on that date: {}'.format(trip_count)) all_service_ids = '\n\t'.join(service_ids_by_date[selected_date]) log('\nAll related service IDs: \n\t{}'.format(all_service_ids)) sub = service_ids_by_date[selected_date] feed_query = {'trips.txt': {'service_id': sub}} return ptg.feed(file_loc, view=feed_query)
def test_extract_routes(path): fd = ptg.feed(path) agencies = fd.agency assert len(agencies) == 3 routes = fd.routes assert len(routes) == 14 route_ids = [routes.iloc[0].route_id] agency_ids = set(fd.routes[fd.routes.route_id.isin(route_ids)].agency_id) trip_ids = set(fd.trips[fd.trips.route_id.isin(route_ids)].trip_id) stop_ids = set(fd.stop_times[fd.stop_times.trip_id.isin(trip_ids)].stop_id) assert len(agency_ids) assert len(trip_ids) assert len(stop_ids) try: tmpdir = tempfile.mkdtemp() outfile = os.path.join(tmpdir, 'test.zip') result = ptg.extract_routes(path, outfile, route_ids) assert result == outfile new_fd = ptg.feed(outfile) assert list(new_fd.routes.route_id) == route_ids assert set(new_fd.agency.agency_id) == agency_ids assert set(new_fd.trips.trip_id) == trip_ids assert set(new_fd.stop_times.trip_id) == trip_ids assert set(new_fd.stops.stop_id) == stop_ids nodes = [] for node in fd.config.nodes(): df = fd.get(node) if not df.empty: nodes.append(node) assert len(nodes) for node in nodes: original_df = fd.get(node) new_df = new_fd.get(node) assert set(original_df.columns) == set(new_df.columns) finally: shutil.rmtree(tmpdir)
def test_read_file(path, dates, shapes): service_ids_by_date = ptg.read_service_ids_by_date(path) service_ids = { service_id for date in dates if date in service_ids_by_date for service_id in service_ids_by_date[date] } if service_ids: feed = ptg.feed(path, view={'trips.txt': {'service_id': service_ids}}) else: feed = ptg.feed(path) for filename, shape in shapes.items(): assert feed.get(filename).shape == shape, \ '{}/{} dataframe shape was incorrect'.format(path, filename)
def get_partridge_feed_by_date(zip_path, date): service_ids_by_date = ptg.read_service_ids_by_date(zip_path) service_ids = service_ids_by_date[date] feed = ptg.feed(zip_path, view={ 'trips.txt': { 'service_id': service_ids, }, }) return feed
def gtfs_feed(zip_file, network_date): from fasttrips.Assignment import Assignment Assignment.NETWORK_BUILD_DATE = network_date service_ids_by_date = ptg.read_service_ids_by_date(zip_file) service_ids = service_ids_by_date[network_date] feed = ptg.feed(os.path.join(zip_file), config=Util.get_fast_trips_config(), view={ 'trips.txt': { 'service_id': service_ids }, }) yield feed
def get_gtfs_feed(network, network_date): from fasttrips.Assignment import Assignment from fasttrips.Util import Util Assignment.NETWORK_BUILD_DATE = network_date service_ids_by_date = ptg.read_service_ids_by_date(network) service_ids = service_ids_by_date[network_date] feed = ptg.feed(network, config=Util.get_fast_trips_config(), view={ 'trips.txt': { 'service_id': service_ids }, }) return feed
def read_input_files(self): """ Reads in the input network and demand files and initializes the relevant data structures. """ self.performance.record_step_start(0, 0, 0, "read_input_files") # Read the gtfs files first FastTripsLogger.info("Reading GTFS schedule") service_ids_by_date = ptg.read_service_ids_by_date( Assignment.INPUT_NETWORK_ARCHIVE) service_ids = service_ids_by_date[Assignment.NETWORK_BUILD_DATE] gtfs_feed = ptg.feed(os.path.join(Assignment.INPUT_NETWORK_ARCHIVE), config=Util.get_fast_trips_config(), view={ 'trips.txt': { 'service_id': service_ids }, }) # Read Stops (gtfs-required) self.stops = Stop(Assignment.INPUT_NETWORK_ARCHIVE, Assignment.OUTPUT_DIR, gtfs_feed, Assignment.NETWORK_BUILD_DATE) # Read routes, agencies, fares self.routes = Route(Assignment.INPUT_NETWORK_ARCHIVE, Assignment.OUTPUT_DIR, gtfs_feed, Assignment.NETWORK_BUILD_DATE, self.stops) # Read Transfers self.transfers = Transfer(Assignment.INPUT_NETWORK_ARCHIVE, Assignment.OUTPUT_DIR, gtfs_feed) # Read trips, vehicles, calendar and stoptimes self.trips = Trip(Assignment.INPUT_NETWORK_ARCHIVE, Assignment.OUTPUT_DIR, gtfs_feed, Assignment.NETWORK_BUILD_DATE, self.stops, self.routes, Assignment.PREPEND_ROUTE_ID_TO_TRIP_ID) # read the TAZs into a TAZ instance self.tazs = TAZ(Assignment.OUTPUT_DIR, gtfs_feed, Assignment.NETWORK_BUILD_DATE, self.stops, self.transfers, self.routes) # Read the demand int passenger_id -> passenger instance self.passengers = Passenger(Assignment.INPUT_DEMAND_DIR, Assignment.OUTPUT_DIR, Assignment.NETWORK_BUILD_DATE, self.stops, self.routes, Assignment.CAPACITY_CONSTRAINT)
def get_partridge_feed_by_date(zip_path, date): service_ids_by_date = ptg.read_service_ids_by_date( zip_path) # , encoding='utf-8') service_ids = service_ids_by_date[date] feed = ptg.feed( zip_path, view={ 'trips.txt': { 'service_id': service_ids, }, }, # encoding='utf-8' # CUSTOM VERSION, NOT YET PUSHED ) return feed
def gtfs_feed(network): from fasttrips.Assignment import Assignment Assignment.NETWORK_BUILD_DATE = network[2] network_dir = os.path.join(HOME_DIR, network[0], "networks", network[1]) service_ids_by_date = ptg.read_service_ids_by_date(network_dir) service_ids = service_ids_by_date[network_date] feed = ptg.feed(network_dir, config=Util.get_fast_trips_config(), view={ 'trips.txt': { 'service_id': service_ids }, }) yield feed
def prepare_partridge_feed(date: datetime.date, gtfs_file_full_path: str, filtered_feeds_directory=configuration.files.full_paths.filtered_feeds): if configuration.write_filtered_feed: filtered_gtfs_path = join(filtered_feeds_directory, basename(gtfs_file_full_path)) logging.info(f'Filtering gtfs feed for {date} from {gtfs_file_full_path} into {filtered_gtfs_path}') write_filtered_feed_by_date(gtfs_file_full_path, date, filtered_gtfs_path) logging.info(f'Reading filtered feed for file from path {filtered_gtfs_path}') feed = ptg.feed(filtered_gtfs_path) else: logging.info(f'Creating daily partridge feed for {date} from {gtfs_file_full_path}') feed = get_partridge_feed_by_date(gtfs_file_full_path, date) logging.debug(f'Finished creating daily partridge feed for {date} from {gtfs_file_full_path}') return feed
trn_stop_labels["TM2 Node"] = trn_stop_labels["TM2 Node"].astype(int) for operator in ["Caltrain", "San Francisco MUNI","Vallejo Baylink Ferry", "Blue and Gold", "Amtrak Capitol Cor. & Reg. Svc", "BART", "ACE", "Golden Gate Ferry", "Alameda Harbor Bay Ferry","Alameda/Oakland Ferry","Vallejo Baylink Ferry", "Santa Clara VTA", "Blue and Gold"]: Wrangler.WranglerLogger.info("Processing operator %s" % operator) # get the stop labels for this operator operator_stop_labels = trn_stop_labels.loc[ trn_stop_labels["Operator"] == operator ] Wrangler.WranglerLogger.debug("operator_stop_labels.head()\n%s" % operator_stop_labels.head()) operator_stop_label_dict = operator_stop_labels.set_index(["TM2 Node"]).to_dict(orient="index") # read GTFS fullpath = os.path.join(GTFS_DIR, GTFS_NETWORKS[operator]) service_ids_by_date = partridge.read_service_ids_by_date(fullpath) service_ids = service_ids_by_date[datetime.date(2015,03,11)] feed = partridge.feed(fullpath, view={'trips.txt':{'service_id':service_ids}}) # lets see the stop_times with the stop names gtfs_stop_times = pandas.merge(left=feed.stop_times, right=feed.stops[["stop_id","stop_name"]]).sort_values(by=["trip_id","stop_sequence"]) # and the route_id and direction_id gtfs_stop_times = pandas.merge(left=gtfs_stop_times, right=feed.trips[["trip_id","route_id","direction_id"]], how="left") # and route_long_name and route_type gtfs_stop_times = pandas.merge(left=gtfs_stop_times, right=feed.routes[["route_id","route_long_name","route_type"]], how="left") # => filter out buses since the travel time comes from traffic gtfs_stop_times = gtfs_stop_times.loc[gtfs_stop_times.route_type != 3,:] # join TM2 node number from both the gtfs cols in the mapping for gtfs_col in ["GTFS stop_id NB/inbound","GTFS stop_id SB/outbound"]:
def get_partridge_feed_by_date(zip_path: str, date: datetime.date): return ptg.feed(zip_path, view=get_partridge_filter_for_date(zip_path, date))
def test_missing_zip(): with pytest.raises(AssertionError, message='File or path not found'): ptg.feed(fixture('missing.zip'))
def after (data, datadir): global join, readZippedShapefile, fastOverlay, ptg, Point, gp, pd, FOOT_TO_METER, ACRE_TO_HECTARE print('reprojecting data') data = data.to_crs(epsg=26942) # M-1, M-1(S) and M-2 zones conditionally permit multifamily housing iff it is in the central city or within 1/4 mile # of a light rail stop print('loading central city') # this file was created by hand based on the description in the code centralCity = readZippedShapefile(join(datadir, 'sacramento_central_city.zip')).to_crs(epsg=26942) print('loading light rail stations from GTFS') feed = ptg.feed(join(datadir, 'sacramento_gtfs_20180213.zip')) lightRailRoutes = feed.routes.route_id[feed.routes.route_type == 0] lightRailTrips = feed.trips.trip_id[feed.trips.route_id.isin(lightRailRoutes)] feed.stop_times.set_index(['trip_id', 'stop_sequence'], inplace=True) lightRailStopIds = feed.stop_times.loc[lightRailTrips, 'stop_id'].unique() feed.stops.set_index('stop_id', inplace=True) lightRailStops = feed.stops.loc[lightRailStopIds].copy() print(f'found {len(lightRailStops)} light rail stops') # convert to geodataframe lightRailStops['geometry'] = lightRailStops.apply(lambda stop: Point(stop.stop_lon, stop.stop_lat), 1) # GTFS is defined to be WGS 84 lightRailStops = gp.GeoDataFrame(lightRailStops, geometry='geometry', crs={'init': 'epsg:4326'}) lightRailStops = lightRailStops.to_crs(epsg=26942) # save memory del feed print('buffering light rail stops') lightRailStops['geometry'] = lightRailStops.buffer(5280 / 4 * FOOT_TO_METER, resolution=32) # For M and RMX-SPD-R St zones, we cut these zones out of the whole file, overlay them with the affected area, and # then merge them back in. print('adding multifamily as conditional use to industrial zones in central city and near light rail') industrialZoneLocs = data.zone.apply(lambda zone: zone.startswith('M-1') or zone.startswith('M-1(S)' or zone.startswith('M-2'))) industrialZones = data[industrialZoneLocs] affectedAreas = lightRailStops.loc[:,['geometry']].copy() # and add the central city affectedAreas = affectedAreas.append(centralCity) affectedAreas['affected'] = 42 # add a flag column so we know which resulting geometries overlapped # Do an overlay so that we split large industrial zones at the boundaries of the affected area splitIndustrialAreas = gp.overlay(industrialZones, affectedAreas, how='union') # and set the multiFamily flag splitIndustrialAreas['multiFamily'] = splitIndustrialAreas['affected'].apply(lambda x: 'conditional' if x == 42 else 'no') print('setting density limits in RMX-SPD-R Street Corridor') rmxSpdRstLocs = data.zone == 'RMX-SPD-R Street Corridor' rmxSpdRst = data[rmxSpdRstLocs] affectedAreas = lightRailStops.loc[:,['geometry']].copy() affectedAreas['affected'] = 42 # add a flag column so we know which resulting geometries overlapped splitRmxSpd = gp.overlay(rmxSpdRst, affectedAreas, how='union') splitRmxSpd['loMaxUnitsPerHectare'] = splitRmxSpd['hiMaxUnitsPerHectare'] =\ splitRmxSpd.affected.apply(lambda x: 100 / ACRE_TO_HECTARE if x == 42 else 60 / ACRE_TO_HECTARE) # put it all back together into a single dataframe recombined = gp.GeoDataFrame( pd.concat([ data[~(rmxSpdRstLocs | industrialZoneLocs)], splitRmxSpd, splitIndustrialAreas ]), geometry='geometry' , crs={'init': 'epsg:26942'} ) # drop the 'affected' column we were using as a flag del recombined['affected'] return recombined
def after (data, datadir): global join, readZippedShapefile, fastOverlay, ptg, Point, gp, pd, FOOT_TO_METER, ACRE_TO_HECTARE, np print('reprojecting data') data = data.to_crs(epsg=26942) print('adding parking requirements \U0001f697') # Parking Districts required a CA Public Records Act request: # https://sacramentoca.mycusthelp.com/WEBAPP/_rs/(S(2rztm4xsj04qo445twgqihlj))/RequestArchiveDetails.aspx?rid=8033&view=1 parkingDistricts = readZippedShapefile(join(datadir, 'sacramento_parking.zip')).to_crs(epsg=26942)\ .rename(columns={'SECTION': 'parkingDist'}) data = fastOverlay(data, parkingDistricts) data['parkingDist'] = data.parkingDist.astype('category') # http://www.qcode.us/codes/sacramento/view.php?topic=17-vi-17_608-17_608_030&frames=on # Most places have no max parking requirement, overwritten in the CBD below data['loMaxParkingPerUnit'] = np.inf data['hiMaxParkingPerUnit'] = np.inf data.loc[data.parkingDist == 'Central Business District', 'loMinParkingPerUnit'] = 0 data.loc[data.parkingDist == 'Central Business District', 'hiMinParkingPerUnit'] = 0 data.loc[(data.parkingDist == 'Central Business District') & (data.multiFamily == 'yes'), 'loMaxParkingPerUnit'] = 1 data.loc[(data.parkingDist == 'Central Business District') & (data.multiFamily == 'yes'), 'hiMaxParkingPerUnit'] = 1 data.loc[(data.parkingDist == 'Urban') & (data.multiFamily == 'yes'), 'loMinParkingPerUnit'] = 0.5 data.loc[(data.parkingDist == 'Urban') & (data.multiFamily == 'yes'), 'hiMinParkingPerUnit'] = 0.5 # != yes: no or conditional # NB not encoding exception for lots under 3200 sq feet data.loc[(data.parkingDist == 'Urban') & (data.multiFamily != 'yes'), 'loMinParkingPerUnit'] = 1 data.loc[(data.parkingDist == 'Urban') & (data.multiFamily != 'yes'), 'hiMinParkingPerUnit'] = 1 data.loc[data.parkingDist == 'Traditional', 'loMinParkingPerUnit'] = 1 data.loc[data.parkingDist == 'Traditional', 'hiMinParkingPerUnit'] = 1 data.loc[(data.parkingDist == 'Suburban') & (data.multiFamily == 'yes'), 'loMinParkingPerUnit'] = 1.5 data.loc[(data.parkingDist == 'Suburban') & (data.multiFamily == 'yes'), 'hiMinParkingPerUnit'] = 1.5 data.loc[(data.parkingDist == 'Suburban') & (data.multiFamily != 'yes'), 'loMinParkingPerUnit'] = 1 data.loc[(data.parkingDist == 'Suburban') & (data.multiFamily != 'yes'), 'hiMinParkingPerUnit'] = 1 # M-1, M-1(S) and M-2 zones conditionally permit multifamily housing iff it is in the central city or within 1/4 mile # of a light rail stop print('loading central city') # this file was created by hand based on the description in the code centralCity = readZippedShapefile(join(datadir, 'sacramento_central_city.zip')).to_crs(epsg=26942) print('loading light rail stations from GTFS') feed = ptg.feed(join(datadir, 'sacramento_gtfs_20180213.zip')) lightRailRoutes = feed.routes.route_id[feed.routes.route_type == 0] lightRailTrips = feed.trips.trip_id[feed.trips.route_id.isin(lightRailRoutes)] feed.stop_times.set_index(['trip_id', 'stop_sequence'], inplace=True) lightRailStopIds = feed.stop_times.loc[lightRailTrips, 'stop_id'].unique() feed.stops.set_index('stop_id', inplace=True) lightRailStops = feed.stops.loc[lightRailStopIds].copy() print(f'found {len(lightRailStops)} light rail stops') # convert to geodataframe lightRailStops['geometry'] = lightRailStops.apply(lambda stop: Point(stop.stop_lon, stop.stop_lat), 1) # GTFS is defined to be WGS 84 lightRailStops = gp.GeoDataFrame(lightRailStops, geometry='geometry', crs={'init': 'epsg:4326'}) lightRailStops = lightRailStops.to_crs(epsg=26942) # save memory del feed print('buffering light rail stops') lightRailStops['geometry'] = lightRailStops.buffer(5280 / 4 * FOOT_TO_METER, resolution=32) # For M and RMX-SPD-R St zones, we cut these zones out of the whole file, overlay them with the affected area, and # then merge them back in. print('adding multifamily as conditional use to industrial zones near light rail') industrialZoneLocs = data.zone.apply(lambda zone: zone.startswith('M-1') or zone.startswith('M-1(S)' or zone.startswith('M-2'))) industrialZones = data[industrialZoneLocs] affectedAreas = lightRailStops.loc[:,['geometry']].copy() affectedAreas['lightRail'] = True # add a flag column so we know which resulting geometries overlapped # and add the central city # Do an overlay so that we split large industrial zones at the boundaries of the affected area splitIndustrialAreas = gp.overlay(industrialZones, affectedAreas, how='identity') centralCity['centralCity'] = True splitIndustrialAreas = gp.overlay(splitIndustrialAreas, centralCity, how='identity') splitIndustrialAreas['lightRail'] = splitIndustrialAreas.lightRail.fillna(False) splitIndustrialAreas['centralCity'] = splitIndustrialAreas.centralCity.fillna(False) # and set the multiFamily flag. Conditional at light rail, yes in central city # http://qcode.us/codes/sacramento/view.php?topic=17-ii-17_220-i-17_220_110&frames=on splitIndustrialAreas['multiFamily'] = splitIndustrialAreas\ .apply(lambda x: 'yes' if x.centralCity else 'conditional' if x.lightRail else 'no', 'columns') print('setting density limits in RMX-SPD-R Street Corridor') rmxSpdRstLocs = data.zone == 'RMX-SPD-R Street Corridor' rmxSpdRst = data[rmxSpdRstLocs] affectedAreas = lightRailStops.loc[:,['geometry']].copy() affectedAreas['affected'] = 42 # add a flag column so we know which resulting geometries overlapped splitRmxSpd = gp.overlay(rmxSpdRst, affectedAreas, how='identity') splitRmxSpd['loMaxUnitsPerHectare'] = splitRmxSpd['hiMaxUnitsPerHectare'] =\ splitRmxSpd.affected.apply(lambda x: 100 / ACRE_TO_HECTARE if x == 42 else 60 / ACRE_TO_HECTARE) # put it all back together into a single dataframe recombined = gp.GeoDataFrame( pd.concat([ data[~(rmxSpdRstLocs | industrialZoneLocs)], splitRmxSpd, splitIndustrialAreas ]), geometry='geometry' , crs={'init': 'epsg:26942'} ) # drop the 'affected' column we were using as a flag del recombined['affected'] return recombined
Wrangler.WranglerLogger.info("Processing operator %s" % operator) # get the stop labels for this operator operator_stop_labels = trn_stop_labels.loc[trn_stop_labels["Operator"] == operator] Wrangler.WranglerLogger.debug("operator_stop_labels.head()\n%s" % operator_stop_labels.head()) operator_stop_label_dict = operator_stop_labels.set_index( ["TM2 Node"]).to_dict(orient="index") # read GTFS fullpath = os.path.join(GTFS_DIR, GTFS_NETWORKS[operator]) service_ids_by_date = partridge.read_service_ids_by_date(fullpath) service_ids = service_ids_by_date[datetime.date(2015, 03, 11)] feed = partridge.feed(fullpath, view={'trips.txt': { 'service_id': service_ids }}) # lets see the stop_times with the stop names gtfs_stop_times = pandas.merge( left=feed.stop_times, right=feed.stops[["stop_id", "stop_name" ]]).sort_values(by=["trip_id", "stop_sequence"]) # and the route_id and direction_id gtfs_stop_times = pandas.merge( left=gtfs_stop_times, right=feed.trips[["trip_id", "route_id", "direction_id"]], how="left") # and route_long_name and route_type gtfs_stop_times = pandas.merge( left=gtfs_stop_times,