def get_rawnav_data(get_cwd, get_rawnav_summary_dat):

    path_parquet = os.path.join(get_cwd, "data", "00-raw", "demo_data",
                                "02_notebook_data")

    rawnav_dat = wr.read_cleaned_rawnav(analysis_routes_=["H8"],
                                        analysis_days_=["Sunday"],
                                        path=os.path.join(
                                            path_parquet,
                                            "rawnav_data_demo.parquet"))

    rawnav_summary_keys_col = get_rawnav_summary_dat[[
        'filename', 'index_run_start'
    ]]

    rawnav_qjump_dat = rawnav_dat.merge(rawnav_summary_keys_col,
                                        on=['filename', 'index_run_start'],
                                        how='right')

    rawnav_qjump_gdf = (gpd.GeoDataFrame(rawnav_qjump_dat,
                                         geometry=gpd.points_from_xy(
                                             rawnav_qjump_dat.long,
                                             rawnav_qjump_dat.lat),
                                         crs='EPSG:4326').to_crs(epsg=2248))

    return rawnav_qjump_gdf
def get_summary(get_analysis_routes, get_analysis_days, get_cwd):

    rawnav_summary_dat = (wr.read_cleaned_rawnav(
        analysis_routes_=get_analysis_routes,
        analysis_days_=get_analysis_days,
        path=os.path.join(get_cwd, "data", "00-raw", "demo_data",
                          "02_notebook_data", "rawnav_summary_demo.parquet")))
    return (rawnav_summary_dat)
def get_rawnav(get_analysis_route, get_cwd):

    rawnav_dat = (wr.read_cleaned_rawnav(
        analysis_routes_=get_analysis_route,
        path=os.path.join(get_cwd, "data", "00-raw", "demo_data",
                          "03_notebook_data", "rawnav_data.parquet")))

    return (rawnav_dat)
def get_rawnav_gdf(get_analysis_routes, get_analysis_days, get_cwd):

    rawnav_dat = (wr.read_cleaned_rawnav(
        analysis_routes_=get_analysis_routes,
        analysis_days_=get_analysis_days,
        path=os.path.join(get_cwd, "data", "00-raw", "demo_data",
                          "02_notebook_data", "rawnav_data_demo.parquet")))

    rawnav_qjump_gdf = (gpd.GeoDataFrame(rawnav_dat,
                                         geometry=gpd.points_from_xy(
                                             rawnav_dat.long, rawnav_dat.lat),
                                         crs='EPSG:4326').to_crs(epsg=2248))

    return (rawnav_qjump_gdf)
def get_rawnav_summary_dat(get_cwd):

    path_parquet = os.path.join(get_cwd, "data", "00-raw", "demo_data",
                                "02_notebook_data")

    rawnav_summary_dat = wr.read_cleaned_rawnav(
        analysis_routes_=["H8"],
        analysis_days_=["Sunday"],
        path=os.path.join(path_parquet, "rawnav_summary_demo.parquet"))

    rawnav_summary_dat = rawnav_summary_dat.query(
        'not (run_duration_from_sec < 600 | dist_odom_mi < 2)')

    return rawnav_summary_dat
path_stop_index = os.path.join(path_processed_data, "stop_index.parquet")
if not os.path.isdir(path_stop_index):
    os.mkdir(path_stop_index)

for analysis_route in analysis_routes:
    print("*" * 100)
    print('Processing analysis route {}'.format(analysis_route))
    for analysis_day in analysis_days:
        print('Processing analysis route {} for {}...'.format(
            analysis_route, analysis_day))

        # Reload data
        try:
            rawnav_dat = (wr.read_cleaned_rawnav(
                analysis_routes_=analysis_route,
                analysis_days_=analysis_day,
                path=os.path.join(
                    path_processed_data, "rawnav_data.parquet")).drop(
                        columns=['blank', 'lat_raw', 'long_raw', 'sat_cnt']))
        except Exception as e:
            print(e)  # usually no data found or something similar
            continue
        else:

            rawnav_summary_dat = (wr.read_cleaned_rawnav(
                analysis_routes_=analysis_route,
                analysis_days_=analysis_day,
                path=os.path.join(path_processed_data,
                                  "rawnav_summary.parquet")))

            # Subset Rawnav Data to Records Desired
            rawnav_summary_dat = rawnav_summary_dat.query(
Beispiel #7
0
analysis_routes = q_jump_route_list
analysis_days = [
    'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday',
    'Sunday'
]

# EPSG code for WMATA-area work
wmata_crs = 2248

import wmatarawnav as wr

# Summary
# 37720 runs on our analysis routes
rawnav_summary_dat = (wr.read_cleaned_rawnav(analysis_routes_=analysis_routes,
                                             analysis_days_=analysis_days,
                                             path=os.path.join(
                                                 path_processed_data,
                                                 "rawnav_summary.parquet")))
print("Number of rawnav runs in analysis routes is {}".format(
    len(rawnav_summary_dat)))

# Stop summary
stop_summary = (pq.read_table(source=os.path.join(path_processed_data,
                                                  "stop_summary.parquet"),
                              use_pandas_metadata=True).to_pandas())
print("Number of rawnav runs after stop merge is {}".format(len(stop_summary)))
print(
    "{} runs were removed where distances were less than 2 miles or 10 minutes long."
    .format(len(rawnav_summary_dat) - len(stop_summary)))

# Segment Summary
Beispiel #8
0
for seg in list(xwalk_seg_pattern_stop.seg_name_id.drop_duplicates()): #["eleventh_i_new_york"]: #list(xwalk_seg_pattern_stop.seg_name_id.drop_duplicates()):
# Iterate over Segments
    print('now on {}'.format(seg))
    # 2.1. Read-in Data 
    ###################
    # Reduce rawnav data to runs present in the summary file after filtering.
    
    xwalk_seg_pattern_stop_fil = xwalk_seg_pattern_stop.query('seg_name_id == @seg')

    seg_routes = list(xwalk_seg_pattern_stop_fil.route.drop_duplicates())
    
    rawnav_dat = (
        wr.read_cleaned_rawnav(
           analysis_routes_ = seg_routes,
           path = os.path.join(path_processed_data, "rawnav_data.parquet")
        )
        .drop(columns=['blank', 'lat_raw', 'long_raw', 'sat_cnt'])
    )
            
    segment_summary = (
        pq.read_table(
            source = os.path.join(path_processed_data,"segment_summary_2017_test.parquet"),
            filters = [['seg_name_id', "=", seg]],
            use_pandas_metadata = True
        )
        .to_pandas()
    )

    segment_summary_fil = (
        segment_summary
# 1.3 Import User-Defined Package
############################################
import wmatarawnav as wr

executionTime = str(datetime.now() - begin_time).split('.')[0]
print(
    f"Run Time Section 1 Import Libraries and Set Global Parameters : {executionTime}"
)
print("*" * 100)

# 2.1 Rawnav data
############################################
rawnav_dat = wr.read_cleaned_rawnav(
    analysis_routes_=analysis_routes,
    path_processed_route_data=path_processed_route_data,
    restrict=restrict_n,
    analysis_days=analysis_days)
rawnav_dat = wr.fix_rawnav_names(rawnav_dat)

# 2.2 Summary data
############################################
rawnav_summary_dat, rawnav_trips_less_than_600sec_or_2miles = wr.read_summary_rawnav(
    analysis_routes_=analysis_routes,
    path_processed_route_data=path_processed_route_data,
    restrict=restrict_n,
    analysis_days=analysis_days)
rawnav_summary_dat = wr.fix_rawnav_names(rawnav_summary_dat)
rawnav_summary_keys_col = rawnav_summary_dat[[
    'filename', 'index_trip_start_in_clean_data'
]]