def test_aggregating_further_by_station_name_with_missing_names(): df = DataFrame({ 'station_A': { 0: '4', 5: '01' }, 'station_B': { 0: '1', 5: '04' }, 'station_A_name': { 0: '', 5: 'Stop_1' }, 'station_B_name': { 0: 'Stop_1', 5: '' }, 'mode': { 0: 'bus', 5: 'bus' }, 'routes_in_common': { 0: {'11', '1'}, 5: {'012', '2'} }, 'number_of_trips': { 0: 3, 5: 3 } }) aggregated = use_schedule.aggregate_by_stop_names(df) assert aggregated.empty
def test_aggregating_trips_per_day_per_route_by_end_stop_pairs_with_different_modes(): df = DataFrame( {'station_A': {0: '4', 1: '01', 2: 'a', 3: 'b'}, 'station_B': {0: '1', 1: '04', 2: 'b', 3: 'a'}, 'station_A_name': {0: 'Stop_4', 1: 'Stop_1', 2: 'A', 3: 'B'}, 'station_B_name': {0: 'Stop_1', 1: 'Stop_4', 2: 'B', 3: 'A'}, 'mode': {0: 'bus', 1: 'bus', 2: 'rail', 3: 'horse'}, 'routes_in_common': {0: {'11', '1'}, 1: {'012', '2'}, 2: {'lol'}, 3: {''}}, 'number_of_trips': {0: 3, 1: 3, 2: 4, 3: 10}}) aggregated = use_schedule.aggregate_by_stop_names(df) assert_frame_equal(aggregated, DataFrame( {'station_A_name': {0: 'A', 1: 'A', 2: 'Stop_1'}, 'station_B_name': {0: 'B', 1: 'B', 2: 'Stop_4'}, 'mode': {0: 'horse', 1: 'rail', 2: 'bus'}, 'number_of_trips': {0: 10, 1: 4, 2: 6}}))
def generate_standard_outputs_for_schedule(schedule, output_dir, gtfs_day='19700101', include_shp_files=False): logging.info('Generating geojson standard outputs for schedule') schedule_links = schedule.to_geodataframe()['links'].to_crs("epsg:4326") df = schedule.route_trips_with_stops_to_dataframe(gtfs_day=gtfs_day) df_all_modes_vph = None vph_dir = os.path.join(output_dir, 'vehicles_per_hour') subgraph_dir = os.path.join(output_dir, 'subgraphs') graph_mode_map = schedule.mode_graph_map() for mode in schedule.modes(): logging.info(f'Generating vehicles per hour for {mode}') df_vph = use_schedule.generate_edge_vph_geodataframe(df[df['mode'] == mode], schedule_links) save_geodataframe( df_vph, filename=f'vehicles_per_hour_{mode}', output_dir=vph_dir, include_shp_files=include_shp_files ) if df_all_modes_vph is None: df_vph['mode'] = mode df_all_modes_vph = df_vph else: df_vph['mode'] = mode df_all_modes_vph = df_all_modes_vph.append(df_vph) logging.info(f'Generating schedule graph for {mode}') schedule_subgraph = generate_geodataframes( schedule.subgraph(graph_mode_map[mode])) save_geodataframe( schedule_subgraph['links'].to_crs("epsg:4326"), filename=f'schedule_subgraph_links_{mode}', output_dir=subgraph_dir, include_shp_files=include_shp_files ) save_geodataframe( schedule_subgraph['nodes'].to_crs("epsg:4326"), filename=f'schedule_subgraph_nodes_{mode}', output_dir=subgraph_dir, include_shp_files=include_shp_files ) logging.info('Saving vehicles per hour for all PT modes') save_geodataframe( df_all_modes_vph, filename='vehicles_per_hour_all_modes', output_dir=vph_dir, include_shp_files=include_shp_files ) logging.info('Saving vehicles per hour for all PT modes for selected hour slices') for h in [7, 8, 9, 13, 16, 17, 18]: save_geodataframe( df_all_modes_vph[df_all_modes_vph['hour'].dt.hour == h], filename=f'vph_all_modes_within_{h-1}:30-{h}:30', output_dir=vph_dir, include_shp_files=include_shp_files ) logging.info('Generating csv for vehicles per hour for each service') use_schedule.vehicles_per_hour( df, aggregate_by=['service', 'service_name', 'mode'], output_path=os.path.join(vph_dir, 'vph_per_service.csv')) logging.info('Generating csv for vehicles per hour per stop') use_schedule.vehicles_per_hour( df, aggregate_by=['from_stop', 'from_stop_name', 'mode'], output_path=os.path.join(vph_dir, 'vph_per_stop_departing_from.csv')) use_schedule.vehicles_per_hour( df, aggregate_by=['to_stop', 'to_stop_name', 'mode'], output_path=os.path.join(vph_dir, 'vph_per_stop_arriving_at.csv')) logging.info('Generating csvs for trips per day') use_schedule.trips_per_day_per_service(df, output_dir=output_dir) df_trips_per_route = use_schedule.trips_per_day_per_route(df, output_dir=output_dir) # stop-to-stop trips per day aggregation aggregated_per_stops = use_schedule.aggregate_trips_per_day_per_route_by_end_stop_pairs( schedule, df_trips_per_route) aggregated_per_stops.to_csv(os.path.join(output_dir, 'trips_per_day_per_route_aggregated_per_stop_id_pair.csv')) use_schedule.aggregate_by_stop_names(aggregated_per_stops).to_csv( os.path.join(output_dir, 'trips_per_day_per_route_aggregated_per_stop_name_pair.csv'))