def get_stop_link(stopA, stopB, src='file', merge_weather=False): """ Almost redundant,or possibly still used by the Big Route Model. Use stop_tools.stop_data() instead Retrieves the data describing the link between two stops """ import os import pandas as pd from dbanalysis import headers as hds if src == 'file': if not os.path.exists('/data/stops/' + str(stopA) + '/' + str(stopB) + '.csv'): print('Error - stop link data not on disk') return None else: df = pd.read_csv('/home/student/data/stops/' + str(stopA) + '/' + str(stopB) + '.csv', names=hds.get_stop_link_headers()) df['stopA'] = stopA df['stopB'] = stopB len_df_1 = len(df) elif src == 'db': #insert method here for grabbing data from database pass if merge_weather: #merge data with weather data .csv weather = pd.read_csv('/home/student/data/cleanweather.csv') weather['date'] = pd.to_datetime(weather['date']) weather['hour'] = weather['date'].dt.hour weather['date'] = weather['date'].dt.date df['dt'] = pd.to_datetime(df['dayofservice'], format="%d-%b-%y %H:%M:%S") df['date'] = df['dt'].dt.date df['hour'] = df['actualtime_arr_from'] // 3600 cols = [ 'dayofservice', 'tripid', 'plannedtime_arr_from', 'plannedtime_dep_from', 'actualtime_arr_from', 'actualtime_dep_from', 'plannedtime_arr_to', 'actualtime_arr_to', 'routeid', 'stopA', 'stopB', 'hour', 'dewpt', 'msl', 'rain', 'rhum', 'temp', 'vappr', 'wetb' ] a = pd.merge(df, weather, on=['date', 'hour'])[cols] len_df2 = len(a) print(len_df_1, len_df2) return a else: return df
def prep_test_stop(filename,weather,fromstop,tostop): from dbanalysis import headers as hds s_getter = stop_getter() df=pd.read_csv(filename,names=hds.get_stop_link_headers()) df['fromstop']=fromstop df['tostop']=tostop df['traveltime']=df['actualtime_arr_to']-df['actualtime_arr_from'] df['distance'] = s_getter.get_stop_distance(fromstop,tostop) df['speed'] = df['distance'] / (df['traveltime']/3600) df['dt']=pd.to_datetime(df['dayofservice'],format= "%d-%b-%y %H:%M:%S") df['date']=df['dt'].dt.date df['day'] = df['dt'].dt.dayofweek df['month'] = df['dt'].dt.month df['hour']=df['actualtime_arr_from']//3600 df['year'] = df['dt'].dt.year weather.drop('dt', axis=1,inplace=True) df = pd.merge(df,weather, on=['date','hour']) del weather del s_getter return df.dropna()
d[str(day)+'_'+str(hour)+'speed']=[] from dbanalysis import headers as hds stop_dirs = os.listdir('/home/student/data/stops') for fromstop in stop_dirs: for tostop in os.listdir('/home/student/data/stops/'+fromstop): if tostop != 'orphans.csv': ts = int(tostop.split('.')[0]) fs=int(fromstop) df = pd.read_csv('/home/student/data/stops/'+fromstop+'/'+tostop,names=hds.get_stop_link_headers()) df=prep_stop2(df,fs,ts) d['fromstop'].append(fs) d['tostop'].append(ts) for day in range(7): for hour in range(24): d[str(day)+'_'+str(hour)+'dwell'].append(df[str(day)+'_'+str(hour)+'dwell']) d[str(day)+'_'+str(hour)+'speed'].append(df[str(day)+'_'+str(hour)+'speed']) import pickle with open('/home/student/dbanalysis/dbanalysis/resources/dwellmatrix.pickle','wb') as handle: pickle.dump(d,handle,protocol=pickle.HIGHEST_PROTOCOL)
for sB in files: count += 1 print(count) if count % 100 == 0: print(count) stopB = sB.split('.')[0] d[stopA][stopB] = {} distance = s_getter.get_stop_distance(stopA, stopB) if (not isinstance(distance, float)) and (not isinstance( distance, int)): continue d[stopA][stopB]['distance'] = distance df = pd.read_csv(base_dir + '/' + stopA + '/' + sB, names=hds.get_stop_link_headers()) df['dt'] = pd.to_datetime(df['dayofservice'], format=time_format) df['hour'] = df['actualtime_arr_from'] // 3600 df['day'] = df['dt'].dt.dayofweek df['traveltime'] = df['actualtime_arr_to'] - df['actualtime_arr_from'] #big mistake here for day in range(0, 7): d[stopA][stopB][day] = {} for hour in range(0, 24): x = df[(df['day'] == day) & (df['hour'] == hour)] if x.shape[0] > 1:
def get_stop_link(stopA,stopB, src='file',merge_weather=False): """ Retrieve the data describing the link between two stops """ import os import pandas as pd from dbanalysis import headers as hds if src== 'file': if not os.path.exists('/home/student/data/stops/'+str(stopA) +'/' + str(stopB) +'.csv'): print('Error - stop link data not on disk') return None else: df=pd.read_csv('/home/student/data/stops/'+str(stopA) +'/' + str(stopB) +'.csv', names=hds.get_stop_link_headers()) df['stopA'] = stopA df['stopB'] = stopB elif src=='db': pass if merge_weather: weather = pd.read_csv('/home/student/data/cleanweather.csv') weather['date']=pd.to_datetime(weather['date']) weather['hour']=weather['date'].dt.hour weather['date']=weather['date'].dt.date df['dt']=pd.to_datetime(df['dayofservice'],format="%d-%b-%y %H:%M:%S") df['date']=df['dt'].dt.date df['hour']=df['actualtime_arr_from']//3600 cols=['dayofservice', 'tripid', 'plannedtime_arr_from', 'plannedtime_dep_from', 'actualtime_arr_from', 'actualtime_dep_from', 'plannedtime_arr_to', 'actualtime_arr_to', 'routeid', 'stopA', 'stopB','hour', 'dewpt', 'msl', 'rain', 'rhum', 'temp', 'vappr', 'wetb'] return pd.merge(df,weather,on=['date','hour'])[cols] else: return df