#!/usr/bin/env python # coding: utf-8 import gtfstk as gt path = 'gtfs.zip' feed = gt.read_gtfs(path, dist_units='km') for agency in feed.agency.agency_id: route_ids_for_agency = list( feed.routes[feed.routes['agency_id'] == agency]['route_id']) little_feed = feed.restrict_to_routes(route_ids=route_ids_for_agency) gt.write_gtfs(little_feed, 'output/{}.zip'.format(agency))
def read_gtfs( path, dt, #date to validate feed upon, it can be like "Thrusday" or "20181201" dist_units=None): """ Create a Feed instance from the given path and given distance units. The path should be a directory containing GTFS text files or a zip file that unzips as a collection of GTFS text files (and not as a directory containing GTFS text files). The distance units given must lie in :const:`constants.dist_units` Notes ----- - Ignore non-GTFS files - Automatically strip whitespace from the column names in GTFS files - This is based on gtfstk library """ gt_feed = gt.read_gtfs(path, dist_units) #Validate feed for an specific day (eigther a date or the day of week)======== if not gt.valid_date(dt): dt = gt_feed.get_first_week()[parse(dt).weekday()] gt_feed = hp.validate_feed(gt_feed, dt) feed_dict = hp.feed_obj_to_dict(gt_feed) feed_dict['valid_date'] = dt #calculate PT segments======================================================== PT_links_df = feed_dict['stop_times'].copy() #making sure trips are sorted by the trip sequence PT_links_df.sort_values(by=['trip_id', 'stop_sequence'], inplace=True) #converting the stop_times into pt links PT_links_df.rename(columns={ 'arrival_time': 'o_time', 'stop_id': 'o_stop', 'stop_sequence': 'o_sequence' }, inplace=True) PT_links_df[['d_time', 'd_stop', 'd_sequence' ]] = PT_links_df[['o_time', 'o_stop', 'o_sequence']].shift(-1) PT_links_df = PT_links_df[ PT_links_df['o_sequence'] < PT_links_df['d_sequence']].copy( ) #removes the last stops #Convert the time into seconds for easier time calculatins PT_links_df['o_time_sec'] = PT_links_df['o_time'].apply(hp.text2sec) PT_links_df['d_time_sec'] = PT_links_df['d_time'].apply(hp.text2sec) PT_links_df[ 'duration'] = PT_links_df['d_time_sec'] - PT_links_df['o_time_sec'] #Add route_id using the trips table PT_links_df = PT_links_df.merge(feed_dict['trips']) #Add route type in text format to the link dataset PT_links_df = PT_links_df.merge(feed_dict['routes']) route_type = { '0': 'Tram, Streetcar, Light rail', '1': 'Subway, Metro', '2': 'Rail', '3': 'Bus', '4': 'Ferry', '5': 'Cable car', '6': 'Gondola, Suspended cable car', '7': 'Funicular' } PT_links_df['route_type'] = PT_links_df['route_type'].astype(str) PT_links_df['route_type'].replace(route_type, inplace=True) #add stop sequence to PT_links_df def stop_seq_for_trips(stop_times_df): """ The objective is to create a dataframe of stop sequence for each trip The output format will be: first field is: trip_ids seocond field is: stop_ids separeated by comma in order of their sequence """ def get_first_trip(group): stop_seq = ";".join(group['stop_id'].tolist()) + ";" trip_id = group['trip_id'].iat[0] trip_dict = {'stop_seq': stop_seq, 'trip_id': trip_id} return pd.DataFrame(trip_dict, index=[0]) stop_seq_df = stop_times_df.groupby('trip_id').apply( get_first_trip).reset_index(drop=True) return stop_seq_df stop_seq_df = stop_seq_for_trips(feed_dict['stop_times']) PT_links_df = PT_links_df.merge(stop_seq_df) def remaining_stops(row): sid = row['o_stop'] + ";" seq = row['stop_seq'] return seq.split(sid, 1)[-1] PT_links_df['stop_seq'] = PT_links_df.apply(remaining_stops, axis=1) # add stops lat and lon PT_links_df = PT_links_df.merge( feed_dict['stops'][['stop_id', 'stop_lat', 'stop_lon']], left_on='o_stop', right_on='stop_id', how='left').drop('stop_id', axis=1) PT_links_df.rename(columns={ 'stop_lat': 'o_stop_lat', 'stop_lon': 'o_stop_lon' }, inplace=True) PT_links_df = PT_links_df.merge( feed_dict['stops'][['stop_id', 'stop_lat', 'stop_lon']], left_on='d_stop', right_on='stop_id', how='left').drop('stop_id', axis=1) PT_links_df.rename(columns={ 'stop_lat': 'd_stop_lat', 'stop_lon': 'd_stop_lon' }, inplace=True) feed_dict['feed_segments'] = PT_links_df for key in ['_trips_i', '_calendar_i', '_calendar_dates_g']: if key in feed_dict: del feed_dict[key] return Feed(**feed_dict)
slow = pytest.mark.skipif(not pytest.config.getoption("--runslow"), reason="need --runslow option to run") # Check if GeoPandas is installed loader = importlib.find_loader('geopandas') if loader is None: HAS_GEOPANDAS = False else: HAS_GEOPANDAS = True # Check if Folium is installed loader = importlib.find_loader('folium') if loader is None: HAS_FOLIUM = False else: HAS_FOLIUM = True # Load/create test feeds DATA_DIR = Path('data') sample = gtfstk.read_gtfs(DATA_DIR / 'sample_gtfs.zip', dist_units='km') cairns = gtfstk.read_gtfs(DATA_DIR / 'cairns_gtfs.zip', dist_units='km') cairns_shapeless = cairns.copy() cairns_shapeless.shapes = None t = cairns_shapeless.trips t['shape_id'] = np.nan cairns_shapeless.trips = t week = cairns.get_first_week() cairns_dates = [week[0], week[1]] cairns_trip_stats = pd.read_csv(DATA_DIR / 'cairns_trip_stats.csv', dtype=gtfstk.DTYPE)
def url2gtfs(url): r = requests.get(url) with tempfile.NamedTemporaryFile(delete=False) as f: f.write(r._content) return gtfstk.read_gtfs(f.name, dist_units='mi')
import gtfstk as gt from .context import clean_auckland_gtfs, DATA_DIR from clean_auckland_gtfs import * feed = gt.read_gtfs(DATA_DIR / 'raw_auckland_gtfs_20161122.zip', dist_units='km') def test_drop_school_routes(): n = feed.routes.shape[0] feed1 = drop_school_routes(feed) # Should drop some routes k = feed1.routes.shape[0] assert k < n def test_clean(): n = feed.routes.shape[0] feed1 = clean(feed) # Should drop some routes k = feed1.routes.shape[0] assert k < n # Route short names should be unique j = feed1.routes.route_short_name.nunique() assert j == k
reason="need --runslow option to run", ) # Check if GeoPandas is installed loader = importlib.find_loader("geopandas") if loader is None: HAS_GEOPANDAS = False else: HAS_GEOPANDAS = True # Check if Folium is installed loader = importlib.find_loader("folium") if loader is None: HAS_FOLIUM = False else: HAS_FOLIUM = True # Load/create test feeds DATA_DIR = Path("data") sample = gtfstk.read_gtfs(DATA_DIR / "sample_gtfs.zip", dist_units="km") cairns = gtfstk.read_gtfs(DATA_DIR / "cairns_gtfs.zip", dist_units="km") cairns_shapeless = cairns.copy() cairns_shapeless.shapes = None t = cairns_shapeless.trips t["shape_id"] = np.nan cairns_shapeless.trips = t week = cairns.get_first_week() cairns_dates = [week[0], week[1]] cairns_trip_stats = pd.read_csv(DATA_DIR / "cairns_trip_stats.csv", dtype=gtfstk.DTYPE)
def export_gtfs_as_geo(input_gtfs_file, output_file_name): working_directory = tempfile.TemporaryDirectory() feed = gtfstk.read_gtfs(input_gtfs_file, dist_units='km') feed_w_shapes = gtfstk.miscellany.create_shapes(feed) # keep only a relevant subset feed_w_shapes_selected = feed_w_shapes.trips[[ 'route_id', 'shape_id', 'trip_id' ]] feed_w_shapes_dedup = feed_w_shapes_selected.drop_duplicates( subset=['route_id', 'shape_id']) trip_stats = feed_w_shapes.compute_trip_stats() trips_full = feed_w_shapes_dedup.merge(trip_stats, left_on='trip_id', right_on='trip_id', suffixes=('', '_')) trips_full_selected = trips_full[[ 'route_id', 'shape_id', 'trip_id', 'start_stop_id', 'end_stop_id', 'num_stops', 'is_loop' ]] # id to human readeable info trips_full_s1 = trips_full_selected.merge(feed.stops, left_on='start_stop_id', right_on='stop_id', suffixes=('', '_')) trips_full_s1 = trips_full_s1[[ 'route_id', 'shape_id', 'trip_id', 'stop_name', 'end_stop_id', 'num_stops', 'is_loop' ]] trips_full_s1.rename(columns={"stop_name": "origin_stop_name"}, inplace=True) trips_full_s2 = trips_full_s1.merge(feed.stops, left_on='end_stop_id', right_on='stop_id', suffixes=('', '_')) trips_full_s2 = trips_full_s2[[ 'route_id', 'shape_id', 'trip_id', 'origin_stop_name', 'stop_name', 'num_stops', 'is_loop' ]] trips_full_s2.rename(columns={"stop_name": "destination_stop_name"}, inplace=True) trips_full_w_routes = trips_full_s2.merge(feed.routes, on='route_id') trips_full_w_agency = trips_full_w_routes.merge(feed.agency, on='agency_id') trips_full_w_agency['route_mode'] = trips_full_w_agency[ 'route_type'].apply(lambda x: route_type_to_mode(x)) trips_full_w_agency['trip_name'] = trips_full_w_agency[ 'route_id'] + "_" + trips_full_w_agency['trip_id'] trips_full_w_agency['file_name'] = trips_full_w_agency['trip_name'].apply( lambda x: x.replace(' ', 'u').replace(':', 'u').replace('/', 'u')) # write outputs for id_, elem in trips_full_w_agency.iterrows(): with open( os.path.join(working_directory.name, "{}.geojson".format(elem["file_name"])), 'w') as fp: as_geojson = feed_w_shapes.trip_to_geojson(elem["trip_id"], include_stops=True) as_geojson['features'][0]['properties'] = json.loads( elem.to_json()) #put the stops in the right order stop_id_s_in_order = list( feed_w_shapes.stop_times[feed_w_shapes.stop_times["trip_id"] == elem["trip_id"]].sort_values( by=['stop_sequence'])['stop_id']) new_FeatureCollection = [] new_FeatureCollection.append(as_geojson['features'][0]) for stop_id in stop_id_s_in_order: feature = [ elem for elem in as_geojson['features'] if elem['properties'].get('stop_id') == stop_id ] new_FeatureCollection.append(feature[0]) as_geojson['features'] = new_FeatureCollection json.dump(as_geojson, fp) trips_full_w_agency = trips_full_w_agency[[ 'file_name', 'origin_stop_name', 'destination_stop_name', 'num_stops', 'is_loop', 'route_short_name', 'route_long_name', 'route_mode', 'route_color', 'agency_name', 'agency_url' ]] trips_full_w_agency.to_csv( os.path.join(working_directory.name, "trips.csv")) feed_w_shapes.stops.rename(columns={"stop_lat": "latitude"}, inplace=True) feed_w_shapes.stops.rename(columns={"stop_lon": "longitude"}, inplace=True) if "location_type" in feed_w_shapes.stops.columns: feed_w_shapes.stops.fillna({'location_type': 0}, inplace=True) feed_w_shapes.stops['stop_type'] = feed_w_shapes.stops[ 'location_type'].apply(lambda x: location_type_to_stop_type(x)) else: feed_w_shapes.stops['stop_type'] = "stops" stop_types = feed_w_shapes.stops['stop_type'].unique() for stop_type_name in stop_types: stops = feed_w_shapes.stops[feed_w_shapes.stops['stop_type'] == stop_type_name] stops.to_csv(os.path.join(working_directory.name, "{}.csv".format(stop_type_name)), float_format='%.6f') shutil.make_archive( output_file_name.split('.')[0], 'zip', working_directory.name) working_directory.cleanup()