def prep_data(route): """ Gather and prepare the data necessary for simple route models """ print('gathering data') route_names = os.listdir( '/home/student/ResearchPracticum/data/routesplits') to_get = [r for r in route_names if r.split('_')[0] == route] to_concat = [] for rt in to_get: to_concat.append(route_tools.get_munged_route_data(rt)) all_stops = pd.concat(to_concat) print('Assigning target variables') all_stops['traveltime'] = all_stops['actualtime_arr_to'] - all_stops[ 'actualtime_dep_from'] all_stops['dwelltime'] = all_stops['actualtime_dep_from'] - all_stops[ 'actualtime_arr_from'] print('Assigning date times') time_format = "%d-%b-%y %H:%M:%S" all_stops['dt'] = pd.to_datetime(all_stops['dayofservice'], format=time_format) all_stops['dayofweek'] = all_stops['dt'].dt.dayofweek all_stops['month'] = all_stops['dt'].dt.month all_stops['weekend'] = all_stops['dayofweek'] > 4 print('Done') return all_stops
Every stop should reference the stops it connects to, with values for the average dwell time, average lateness, and average travel time to that connection """ import dbanalysis.route_tools as rt import pandas as pd import json import os import pickle directory = '/home/student/ResearchPracticum/data/routesplits' files = os.listdir(directory) directory_root = directory + '/' d = {} for route in files: df = rt.get_munged_route_data(route) df['traveltime'] = df['actualtime_arr_to'] - df['actualtime_dep_from'] df['dwelltime'] = df['actualtime_dep_from'] - df['actualtime_arr_from'] df['lateness'] = df['actualtime_arr_to'] - df['plannedtime_arr_to'] for from_stop in df['fromstop'].unique(): fs = str(from_stop) if fs not in d: d[fs] = {'tostops': {}, 'dwelltime': {'tot': 0, 'num': 0}} gf = df[df['fromstop'] == from_stop] d[fs]['dwelltime']['tot'] += sum(gf['dwelltime']) d[fs]['dwelltime']['num'] += gf.shape[0] for to_stop in gf['tostop'].unique(): ts = str(to_stop) tf = gf[gf['tostop'] == to_stop] if ts not in d[fs]['tostops']:
import os import pandas as pd from sqlalchemy import create_engine routes = os.listdir('/home/student/data/routesplits') connstring = 'mysql://'+'dublinbus'+':'+'Ucd4dogs!'+'@'+'127.0.0.1:3306'+'/researchpracticum' engine = create_engine(connstring) from dbanalysis import route_tools for route in routes: df = route_tools.get_munged_route_data(route) df['route_id']=route df['stopA'] = df['fromstop'] df['stopB'] = df['tostop'] df['plannedtime_arr_A']=df['plannedtime_arr_from'] df['plannedtime_dep_A']=df['plannedtime_dep_from'] df['actualtime_arr_A']=df['actualtime_arr_from'] df['actualtime_dep_A']=df['actualtime_dep_from'] df['plannedtime_arr_B']=df['plannedtime_arr_to'] df['actualtime_arr_B']=df['actualtime_arr_to'] time_format = "%d-%b-%y %H:%M:%S" df['dt'] = pd.to_datetime(df['dayofservice'],format=time_format) df['dt'] = df['dt'].dt.date gf = df[['dt','stopA','stopB','plannedtime_arr_A','plannedtime_dep_A','actualtime_arr_A',\ 'actualtime_dep_A','plannedtime_arr_B','actualtime_arr_B','route_id']] for i in range (0,10): if i<9: a = int(gf.shape[0]/10)*i b = int(gf.shape[0]/10)*(i+1) cf = gf.iloc[a:b] else: a=int(gf.shape[0]/10)*i