예제 #1
0
def prep_data(route):
    """
    Gather and prepare the data necessary for simple route models

    """

    print('gathering data')
    route_names = os.listdir(
        '/home/student/ResearchPracticum/data/routesplits')
    to_get = [r for r in route_names if r.split('_')[0] == route]
    to_concat = []
    for rt in to_get:
        to_concat.append(route_tools.get_munged_route_data(rt))
    all_stops = pd.concat(to_concat)
    print('Assigning target variables')
    all_stops['traveltime'] = all_stops['actualtime_arr_to'] - all_stops[
        'actualtime_dep_from']
    all_stops['dwelltime'] = all_stops['actualtime_dep_from'] - all_stops[
        'actualtime_arr_from']
    print('Assigning date times')
    time_format = "%d-%b-%y %H:%M:%S"
    all_stops['dt'] = pd.to_datetime(all_stops['dayofservice'],
                                     format=time_format)
    all_stops['dayofweek'] = all_stops['dt'].dt.dayofweek
    all_stops['month'] = all_stops['dt'].dt.month
    all_stops['weekend'] = all_stops['dayofweek'] > 4
    print('Done')
    return all_stops
예제 #2
0
Every stop should reference the stops it connects to, with values for the average dwell time, average lateness, and average travel time to that connection

"""

import dbanalysis.route_tools as rt
import pandas as pd
import json
import os
import pickle
directory = '/home/student/ResearchPracticum/data/routesplits'
files = os.listdir(directory)
directory_root = directory + '/'
d = {}
for route in files:

    df = rt.get_munged_route_data(route)
    df['traveltime'] = df['actualtime_arr_to'] - df['actualtime_dep_from']
    df['dwelltime'] = df['actualtime_dep_from'] - df['actualtime_arr_from']
    df['lateness'] = df['actualtime_arr_to'] - df['plannedtime_arr_to']
    for from_stop in df['fromstop'].unique():
        fs = str(from_stop)
        if fs not in d:
            d[fs] = {'tostops': {}, 'dwelltime': {'tot': 0, 'num': 0}}

        gf = df[df['fromstop'] == from_stop]
        d[fs]['dwelltime']['tot'] += sum(gf['dwelltime'])
        d[fs]['dwelltime']['num'] += gf.shape[0]
        for to_stop in gf['tostop'].unique():
            ts = str(to_stop)
            tf = gf[gf['tostop'] == to_stop]
            if ts not in d[fs]['tostops']:
예제 #3
0
import os
import pandas as pd
from sqlalchemy import create_engine
routes = os.listdir('/home/student/data/routesplits')
connstring = 'mysql://'+'dublinbus'+':'+'Ucd4dogs!'+'@'+'127.0.0.1:3306'+'/researchpracticum'
engine = create_engine(connstring)
from dbanalysis import  route_tools

for route in routes:
    df = route_tools.get_munged_route_data(route)
    df['route_id']=route
    df['stopA'] = df['fromstop']
    df['stopB'] = df['tostop']
    df['plannedtime_arr_A']=df['plannedtime_arr_from']
    df['plannedtime_dep_A']=df['plannedtime_dep_from']
    df['actualtime_arr_A']=df['actualtime_arr_from']
    df['actualtime_dep_A']=df['actualtime_dep_from']
    df['plannedtime_arr_B']=df['plannedtime_arr_to']
    df['actualtime_arr_B']=df['actualtime_arr_to']
    time_format = "%d-%b-%y %H:%M:%S"
    df['dt'] = pd.to_datetime(df['dayofservice'],format=time_format)
    df['dt'] = df['dt'].dt.date
    gf = df[['dt','stopA','stopB','plannedtime_arr_A','plannedtime_dep_A','actualtime_arr_A',\
             'actualtime_dep_A','plannedtime_arr_B','actualtime_arr_B','route_id']]
    for i in range (0,10):
        if i<9:
            a = int(gf.shape[0]/10)*i
            b = int(gf.shape[0]/10)*(i+1)
            cf = gf.iloc[a:b]
        else:
            a=int(gf.shape[0]/10)*i