def get_rawnav_data(get_cwd, get_rawnav_summary_dat): path_parquet = os.path.join(get_cwd, "data", "00-raw", "demo_data", "02_notebook_data") rawnav_dat = wr.read_cleaned_rawnav(analysis_routes_=["H8"], analysis_days_=["Sunday"], path=os.path.join( path_parquet, "rawnav_data_demo.parquet")) rawnav_summary_keys_col = get_rawnav_summary_dat[[ 'filename', 'index_run_start' ]] rawnav_qjump_dat = rawnav_dat.merge(rawnav_summary_keys_col, on=['filename', 'index_run_start'], how='right') rawnav_qjump_gdf = (gpd.GeoDataFrame(rawnav_qjump_dat, geometry=gpd.points_from_xy( rawnav_qjump_dat.long, rawnav_qjump_dat.lat), crs='EPSG:4326').to_crs(epsg=2248)) return rawnav_qjump_gdf
def get_summary(get_analysis_routes, get_analysis_days, get_cwd): rawnav_summary_dat = (wr.read_cleaned_rawnav( analysis_routes_=get_analysis_routes, analysis_days_=get_analysis_days, path=os.path.join(get_cwd, "data", "00-raw", "demo_data", "02_notebook_data", "rawnav_summary_demo.parquet"))) return (rawnav_summary_dat)
def get_rawnav(get_analysis_route, get_cwd): rawnav_dat = (wr.read_cleaned_rawnav( analysis_routes_=get_analysis_route, path=os.path.join(get_cwd, "data", "00-raw", "demo_data", "03_notebook_data", "rawnav_data.parquet"))) return (rawnav_dat)
def get_rawnav_gdf(get_analysis_routes, get_analysis_days, get_cwd): rawnav_dat = (wr.read_cleaned_rawnav( analysis_routes_=get_analysis_routes, analysis_days_=get_analysis_days, path=os.path.join(get_cwd, "data", "00-raw", "demo_data", "02_notebook_data", "rawnav_data_demo.parquet"))) rawnav_qjump_gdf = (gpd.GeoDataFrame(rawnav_dat, geometry=gpd.points_from_xy( rawnav_dat.long, rawnav_dat.lat), crs='EPSG:4326').to_crs(epsg=2248)) return (rawnav_qjump_gdf)
def get_rawnav_summary_dat(get_cwd): path_parquet = os.path.join(get_cwd, "data", "00-raw", "demo_data", "02_notebook_data") rawnav_summary_dat = wr.read_cleaned_rawnav( analysis_routes_=["H8"], analysis_days_=["Sunday"], path=os.path.join(path_parquet, "rawnav_summary_demo.parquet")) rawnav_summary_dat = rawnav_summary_dat.query( 'not (run_duration_from_sec < 600 | dist_odom_mi < 2)') return rawnav_summary_dat
path_stop_index = os.path.join(path_processed_data, "stop_index.parquet") if not os.path.isdir(path_stop_index): os.mkdir(path_stop_index) for analysis_route in analysis_routes: print("*" * 100) print('Processing analysis route {}'.format(analysis_route)) for analysis_day in analysis_days: print('Processing analysis route {} for {}...'.format( analysis_route, analysis_day)) # Reload data try: rawnav_dat = (wr.read_cleaned_rawnav( analysis_routes_=analysis_route, analysis_days_=analysis_day, path=os.path.join( path_processed_data, "rawnav_data.parquet")).drop( columns=['blank', 'lat_raw', 'long_raw', 'sat_cnt'])) except Exception as e: print(e) # usually no data found or something similar continue else: rawnav_summary_dat = (wr.read_cleaned_rawnav( analysis_routes_=analysis_route, analysis_days_=analysis_day, path=os.path.join(path_processed_data, "rawnav_summary.parquet"))) # Subset Rawnav Data to Records Desired rawnav_summary_dat = rawnav_summary_dat.query(
analysis_routes = q_jump_route_list analysis_days = [ 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday' ] # EPSG code for WMATA-area work wmata_crs = 2248 import wmatarawnav as wr # Summary # 37720 runs on our analysis routes rawnav_summary_dat = (wr.read_cleaned_rawnav(analysis_routes_=analysis_routes, analysis_days_=analysis_days, path=os.path.join( path_processed_data, "rawnav_summary.parquet"))) print("Number of rawnav runs in analysis routes is {}".format( len(rawnav_summary_dat))) # Stop summary stop_summary = (pq.read_table(source=os.path.join(path_processed_data, "stop_summary.parquet"), use_pandas_metadata=True).to_pandas()) print("Number of rawnav runs after stop merge is {}".format(len(stop_summary))) print( "{} runs were removed where distances were less than 2 miles or 10 minutes long." .format(len(rawnav_summary_dat) - len(stop_summary))) # Segment Summary
for seg in list(xwalk_seg_pattern_stop.seg_name_id.drop_duplicates()): #["eleventh_i_new_york"]: #list(xwalk_seg_pattern_stop.seg_name_id.drop_duplicates()): # Iterate over Segments print('now on {}'.format(seg)) # 2.1. Read-in Data ################### # Reduce rawnav data to runs present in the summary file after filtering. xwalk_seg_pattern_stop_fil = xwalk_seg_pattern_stop.query('seg_name_id == @seg') seg_routes = list(xwalk_seg_pattern_stop_fil.route.drop_duplicates()) rawnav_dat = ( wr.read_cleaned_rawnav( analysis_routes_ = seg_routes, path = os.path.join(path_processed_data, "rawnav_data.parquet") ) .drop(columns=['blank', 'lat_raw', 'long_raw', 'sat_cnt']) ) segment_summary = ( pq.read_table( source = os.path.join(path_processed_data,"segment_summary_2017_test.parquet"), filters = [['seg_name_id', "=", seg]], use_pandas_metadata = True ) .to_pandas() ) segment_summary_fil = ( segment_summary
# 1.3 Import User-Defined Package ############################################ import wmatarawnav as wr executionTime = str(datetime.now() - begin_time).split('.')[0] print( f"Run Time Section 1 Import Libraries and Set Global Parameters : {executionTime}" ) print("*" * 100) # 2.1 Rawnav data ############################################ rawnav_dat = wr.read_cleaned_rawnav( analysis_routes_=analysis_routes, path_processed_route_data=path_processed_route_data, restrict=restrict_n, analysis_days=analysis_days) rawnav_dat = wr.fix_rawnav_names(rawnav_dat) # 2.2 Summary data ############################################ rawnav_summary_dat, rawnav_trips_less_than_600sec_or_2miles = wr.read_summary_rawnav( analysis_routes_=analysis_routes, path_processed_route_data=path_processed_route_data, restrict=restrict_n, analysis_days=analysis_days) rawnav_summary_dat = wr.fix_rawnav_names(rawnav_summary_dat) rawnav_summary_keys_col = rawnav_summary_dat[[ 'filename', 'index_trip_start_in_clean_data' ]]