def stops(tables, static_feed, arg_agency_id): columns = ['agency_id', 'stop_id', 'stop_code', 'stop_name', 'stop_desc', 'stop_lat', 'stop_lon', 'lat_lon', 'stop_url', 'location_type', 'parent_station', 'wheelchair_boarding', 'version'] # tables['Stops'] = pd.DataFrame(index=np.r_[0:len(static_feed['stops'].index)], columns=columns) tables['Stops'] = pd.DataFrame() print static_feed['stops'].columns for i, row in static_feed['stops'].iterrows(): new_row = {} new_row['agency_id'] = arg_agency_id # new_row['stop_id'] = str(row['stop_id']) # print "Name of first row: " + row.index[0] if row.index[0] == 'stop_id': new_row['stop_id'] = row['stop_id'] else: new_row['stop_id'] = row[0] new_row['stop_code'] = str(helper.optional_field(i, 'stop_code', static_feed['stops'])) new_row['stop_name'] = str(row['stop_name']) new_row['stop_desc'] = str(helper.optional_field(i, 'stop_desc', static_feed['stops'])) new_row['stop_lat'] = float(row['stop_lat']) new_row['stop_lon'] = float(row['stop_lon']) # new_row['stop_url'] = str(helper.optional_field(i, 'stop_url', static_feed['stops'])) new_row['location_type'] = int(helper.optional_field(i, 'location_type', static_feed['stops'], 0)) new_row['parent_station'] = int(helper.optional_field(i, 'parent_station', static_feed['stops'], 0)) new_row['wheelchair_boarding'] = int(helper.optional_field(i, 'wheelchair_boarding', static_feed['stops'], 0)) tables['Stops'] = tables['Stops'].append(pd.Series(new_row), ignore_index=True) helper.write_table(tables, "Stops") print "SUCCESS with stops"
def route_stop_seq(tables, static_feed, trip_update_feed, alert_feed, vehicle_position_feed, agency_id, trip2pattern): count = 0 columns = [ 'agency_id', 'route_short_name', 'route_dir', 'pattern_id', 'stop_id', 'seq', 'is_time_point', 'version', 'trip_id' ] tables['Route_stop_seq'] = pd.DataFrame() for i, row in static_feed['routes'].iterrows( ): #iterate through the different routes route_id = row['route_id'] patterns = [] #the patterns for j, subrow in static_feed['trips'].loc[ static_feed['trips']['route_id'] == route_id].iterrows(): trip_id = subrow['trip_id'] #get the trip_id print trip_id direction_id = subrow[ 'direction_id'] if 'direction_id' in subrow else 0 #get the direction id in the trip trip_id_block = static_feed['stop_times'].loc[ static_feed['stop_times']['trip_id'] == trip_id] sequence = trip_id_block['stop_id'].tolist() if str(sequence) not in patterns: patterns += [str(sequence)] pattern_num = patterns.index(str(sequence)) + 1 route_short_name = str( helper.optional_field(i, 'route_long_name', static_feed['routes'])) pattern_id = "{0}_{1}_{2}".format(route_short_name, direction_id, pattern_num) for k, subsubrow in trip_id_block.iterrows(): new_row = {} new_row['trip_id'] = trip_id new_row['agency_id'] = agency_id new_row['route_short_name'] = route_short_name new_row['route_dir'] = direction_id new_row['pattern_id'] = pattern_id new_row['stop_id'] = str(subsubrow['stop_id']) new_row['seq'] = subsubrow['stop_sequence'] new_row['is_time_point'] = int( helper.optional_field(k, 'timepoint', static_feed['stop_times'], 0)) new_row['version'] = 1 #replace later tables["Route_stop_seq"] = tables["Route_stop_seq"].append( pd.Series(new_row), ignore_index=True) count += 1 trip2pattern[trip_id] = pattern_id with open('Trip2Pattern.csv', 'wb') as f: writer = csv.writer(f) writer.writerow(["trip_id", "pattern_id"]) for key, value in trip2pattern.items(): writer.writerow([key, value]) helper.write_table(tables, 'Route_stop_seq') print "SUCCESS with Route Stop Seq"
def route_stop_seq(tables, static_feed, arg_agency_id, arg_route_id, trip2pattern): count = 0 columns = ['agency_id', 'route_short_name', 'route_dir', 'pattern_id', 'stop_id', 'seq', 'is_time_point', 'version', 'trip_id'] tables['Route_stop_seq'] = pd.DataFrame() for i, row in static_feed['routes'].iterrows(): #iterate through the different routes if arg_route_id == 'all': route_id = row['route_id'] else : route_id = int(arg_route_id) print route_id patterns = [] #the patterns for j, subrow in static_feed['trips'].loc[static_feed['trips']['route_id'] == route_id].iterrows(): trip_id = subrow['trip_id'] #get the trip_id print trip_id direction_id = subrow['direction_id'] if 'direction_id' in subrow else 0 #get the direction id in the trip trip_id_block = static_feed['stop_times'].loc[static_feed['stop_times']['trip_id'] == trip_id] sequence = trip_id_block['stop_id'].tolist() if str(sequence) not in patterns: patterns += [str(sequence)] pattern_num = patterns.index(str(sequence)) + 1 route_short_name = str(helper.optional_field(i, 'route_long_name', static_feed['routes'])) pattern_id = "{0}_{1}_{2}".format(route_short_name, direction_id, pattern_num) for k, subsubrow in trip_id_block.iterrows(): new_row = {} new_row['trip_id'] = trip_id new_row['agency_id'] = arg_agency_id new_row['route_short_name'] = route_short_name new_row['route_dir'] = direction_id new_row['pattern_id'] = pattern_id new_row['stop_id'] = str(subsubrow['stop_id']) new_row['seq'] = subsubrow['stop_sequence'] new_row['is_time_point'] = int(helper.optional_field(k, 'timepoint', static_feed['stop_times'], 0)) new_row['version'] = 1; #replace later tables["Route_stop_seq"] = tables["Route_stop_seq"].append(pd.Series(new_row), ignore_index=True) count += 1 trip2pattern[trip_id] = pattern_id if arg_route_id != 'all': break with open('Trip2Pattern.csv', 'wb') as f: writer = csv.writer(f) writer.writerow(["trip_id", "pattern_id"]) for key, value in trip2pattern.items(): writer.writerow([key, value]) helper.write_table(tables, 'Route_stop_seq') print "SUCCESS with Route Stop Seq"
def runPattern(tables, static_feed, arg_agency_id): columns = ['agency_id', 'route_short_name', 'start_date', 'end_date', 'service_id', 'day', 'route_dir', 'run', 'pattern_id', 'trip_headsign', 'trip_id', 'version'] tables['RunPattern'] = pd.DataFrame(index=np.r_[0:len(static_feed['trips'].index)], columns=columns) run_count = {} #open the trip2pattern file trip2pattern = helper.csv2df("Trip2Pattern.csv") #load the trip2pattern csv #iterate through eveyr trip for i, row in static_feed['trips'].iterrows(): #get this specific row new_row = tables["RunPattern"].loc[i] new_row['agency_id'] = arg_agency_id #getting the name of the route j = np.where(static_feed['routes']['route_id'] == row['route_id'])[0][0] new_row['route_short_name'] = str(helper.optional_field(j, 'route_short_name', static_feed['routes'], static_feed['routes'].iloc[j]['route_long_name'])) #use calendar.txt new_row['service_id'] = row['service_id'] calendar = static_feed['calendar'].loc[static_feed['calendar']['service_id'] == row['service_id']].iloc[0] new_row['start_date'] = datetime.datetime.strptime(str(calendar['start_date']), "%Y%m%d") new_row['end_date'] = datetime.datetime.strptime(str(calendar['end_date']), "%Y%m%d") new_row['route_dir'] = int(helper.optional_field(i, 'direction_id', static_feed['trips'], 0)) new_row['day'] = "{0}{1}{2}{3}{4}{5}{6}".format(calendar['monday'], calendar['tuesday'], calendar['wednesday'], calendar['thursday'], calendar['friday'], calendar['saturday'], calendar['sunday']) #calculating the runs...Each run is unqiuely identified by #1. route_short_name #2. service_id #3. route_dir if new_row['route_short_name'] not in run_count.keys(): run_count[new_row['route_short_name']] = {new_row['service_id']: {new_row['route_dir'] : 1}} if new_row['service_id'] not in run_count[new_row['route_short_name']].keys(): run_count[new_row['route_short_name']] = {new_row['service_id']: {new_row['route_dir'] : 1}} if new_row['route_dir'] not in run_count[new_row['route_short_name']][new_row['service_id']].keys(): run_count[new_row['route_short_name']] = {new_row['service_id']: {new_row['route_dir'] : 1}} #Todo: order runs by time of first stop in pattern new_row['run'] = run_count[new_row['route_short_name']][new_row['service_id']][new_row['route_dir']] run_count[new_row['route_short_name']][new_row['service_id']][new_row['route_dir']] += 1 #increment the run because we've seen this before.... new_row['pattern_id'] = str(trip2pattern[trip2pattern['trip_id'] == row['trip_id']].iloc[0]['pattern_id']) # new_row['trip_headsign'] = helper.optional_field(i, 'trip_headsign', static_feed['trips'], static_feed['stop_times'].loc[static_feed['stop_times']['trip_id'] == row['trip_id']]['stop_headsign'].iloc[0]) new_row['trip_id'] = str(row['trip_id']) new_row['version'] = 1 helper.write_table(tables, 'RunPattern') print "SUCCESS with RunPatterns"
def agencies(tables, static_feed, arg_agency_id): columns = ['agency_id', 'agency_name', 'agency_url', 'agency_timezone', 'agency_lang', 'agency_phone', 'timezone_name'] tables['Agency'] = pd.DataFrame(index=np.r_[0:len(static_feed['agency'].index)], columns=columns) for i, row in static_feed['agency'].iterrows(): new_row = tables["Agency"].loc[i] #instantiate a NEW ROW new_row['agency_id'] = arg_agency_id new_row['agency_name'] = row['agency_name'] new_row['agency_url'] = row['agency_url'] timezone = pytz.timezone(row['agency_timezone']) new_row['agency_timezone'] = timezone new_row['agency_lang'] = helper.optional_field(i, 'agency_lang', static_feed['agency']) new_row['agency_phone'] = helper.optional_field(i, 'agency_phone', static_feed['agency']) new_row['timezone_name'] = row['agency_timezone'] helper.write_table(tables, 'Agency') print "SUCCESS finished with agencies"
def stops(tables, static_feed, trip_update_feed, alert_feed, vehicle_position_feed, agency_id): columns = [ 'agency_id', 'stop_id', 'stop_code', 'stop_name', 'stop_desc', 'stop_lat', 'stop_lon', 'lat_lon', 'stop_url', 'location_type', 'parent_station', 'wheelchair_boarding', 'version' ] # tables['Stops'] = pd.DataFrame(index=np.r_[0:len(static_feed['stops'].index)], columns=columns) tables['Stops'] = pd.DataFrame() print static_feed['stops'].columns for i, row in static_feed['stops'].iterrows(): new_row = {} new_row['agency_id'] = agency_id # new_row['stop_id'] = str(row['stop_id']) # print "Name of first row: " + row.index[0] if row.index[0] == 'stop_id': new_row['stop_id'] = row['stop_id'] else: new_row['stop_id'] = row[0] new_row['stop_code'] = str( helper.optional_field(i, 'stop_code', static_feed['stops'])) new_row['stop_name'] = str(row['stop_name']) new_row['stop_desc'] = str( helper.optional_field(i, 'stop_desc', static_feed['stops'])) new_row['stop_lat'] = float(row['stop_lat']) new_row['stop_lon'] = float(row['stop_lon']) # new_row['stop_url'] = str(helper.optional_field(i, 'stop_url', static_feed['stops'])) new_row['location_type'] = int( helper.optional_field(i, 'location_type', static_feed['stops'], 0)) new_row['parent_station'] = int( helper.optional_field(i, 'parent_station', static_feed['stops'], 0)) new_row['wheelchair_boarding'] = int( helper.optional_field(i, 'wheelchair_boarding', static_feed['stops'], 0)) tables['Stops'] = tables['Stops'].append(pd.Series(new_row), ignore_index=True) helper.write_table(tables, "Stops") print "SUCCESS with stops"
def agencies(tables, static_feed, arg_agency_id): columns = [ 'agency_id', 'agency_name', 'agency_url', 'agency_timezone', 'agency_lang', 'agency_phone', 'timezone_name' ] tables['Agency'] = pd.DataFrame( index=np.r_[0:len(static_feed['agency'].index)], columns=columns) for i, row in static_feed['agency'].iterrows(): new_row = tables["Agency"].loc[i] #instantiate a NEW ROW new_row['agency_id'] = arg_agency_id new_row['agency_name'] = row['agency_name'] new_row['agency_url'] = row['agency_url'] timezone = pytz.timezone(row['agency_timezone']) new_row['agency_timezone'] = timezone new_row['agency_lang'] = helper.optional_field(i, 'agency_lang', static_feed['agency']) new_row['agency_phone'] = helper.optional_field( i, 'agency_phone', static_feed['agency']) new_row['timezone_name'] = row['agency_timezone'] helper.write_table(tables, 'Agency') print "SUCCESS finished with agencies"
def routes(tables, static_feed, trip_update_feed, alert_feed, vehicle_position_feed, agency_id): columns = [ 'agency_id', 'route_short_name', 'route_dir', 'route_type', 'route_long_name', 'route_desc', 'route_url', 'route_color', 'route_text_color', 'route_id', 'version' ] tables['Routes'] = pd.DataFrame() for i, row in static_feed['routes'].iterrows(): for direction_id in static_feed['trips'].loc[ static_feed['trips']['route_id'] == row['route_id']]['direction_id'].unique(): new_row = {} new_row['agency_id'] = agency_id new_row['route_short_name'] = str( helper.optional_field( i, 'route_short_name', static_feed['routes'], static_feed['routes'].iloc[i]['route_long_name'])) new_row['route_dir'] = direction_id new_row['route_type'] = int(row['route_type']) new_row['route_long_name'] = str( helper.optional_field( i, 'route_long_name', static_feed['routes'], static_feed['routes'].iloc[i]['route_short_name'])) new_row['route_desc'] = helper.optional_field( i, 'route_desc', static_feed['routes']) new_row['route_url'] = helper.optional_field( i, 'route_url', static_feed['routes']) new_row['route_color'] = helper.optional_field( i, 'route_color', static_feed['routes'], default='FFFFFF').upper() new_row['route_text_color'] = helper.optional_field( i, 'route_text_color', static_feed['routes'], default='000000').upper() new_row['route_id'] = str(row['route_id']) new_row['version'] = 1 tables['Routes'] = tables['Routes'].append(pd.Series(new_row), ignore_index=True) helper.write_table(tables, 'Routes') print "SUCCESS with routes"
def routes(tables, static_feed, arg_agency_id): columns = ['agency_id', 'route_short_name', 'route_dir', 'route_type', 'route_long_name', 'route_desc', 'route_url', 'route_color', 'route_text_color', 'route_id', 'version'] tables['Routes'] = pd.DataFrame() for i, row in static_feed['routes'].iterrows(): for direction_id in static_feed['trips'].loc[static_feed['trips']['route_id'] == row['route_id']]['direction_id'].unique(): new_row = {} new_row['agency_id'] = arg_agency_id new_row['route_short_name'] = str(helper.optional_field(i, 'route_short_name', static_feed['routes'], static_feed['routes'].iloc[i]['route_long_name'])) new_row['route_dir'] = direction_id new_row['route_type'] = int(row['route_type']) new_row['route_long_name'] = str(helper.optional_field(i, 'route_long_name', static_feed['routes'], static_feed['routes'].iloc[i]['route_short_name'])) new_row['route_desc'] = helper.optional_field(i, 'route_desc', static_feed['routes']) new_row['route_url'] = helper.optional_field(i, 'route_url', static_feed['routes']) new_row['route_color'] = helper.optional_field(i, 'route_color', static_feed['routes'], default='FFFFFF').upper() new_row['route_text_color'] = helper.optional_field(i, 'route_text_color', static_feed['routes'], default='000000').upper() new_row['route_id'] = str(row['route_id']) new_row['version'] = 1 tables['Routes'] = tables['Routes'].append(pd.Series(new_row), ignore_index=True) helper.write_table(tables, 'Routes') print "SUCCESS with routes"
def schedules(tables, static_feed, arg_agency_id, trip2pattern): try: login = {'host': "localhost", 'user': SQL_USER, 'passwd': SQL_PWD, 'db': "PATHTransit"} run_pattern_df = helper.sql2df('RunPattern', login) agency_name = static_feed['agency'].iloc[0]['agency_name'] trip2pattern = helper.csv2df("Trip2Pattern.csv") #load the trip2pattern csv route_stop_seq_df = helper.sql2df('Route_stop_seq', login) except Exception as e: print e columns = ['agency_id', 'route_short_name', 'start_date', 'end_date', 'day', 'route_dir', 'run', 'pattern_id', 'seq', 'stop_id', 'is_time_point', 'pickup_type', 'drop_off_type', 'arrival_time', 'departure_time', 'stop_headsign', 'trip_id'] tables["Schedules"] = pd.DataFrame() pattern_id_memoization = {} #to save calculations and memory counter = 0 #use the unique tripid's to find the the patternid's specific to that trip for a, row in static_feed['stop_times'].iterrows(): print counter counter += 1 #find the pattern_id(What is the point of pattern id? we already have the stops.) pattern_id = trip2pattern[trip2pattern['trip_id'] == row['trip_id']]['pattern_id'] new_row = {} #agency_id basic info new_row['agency_id'] = arg_agency_id #information from pattern_id current_trip_id = row['trip_id'] if current_trip_id in pattern_id_memoization.keys(): new_row['pattern_id'] = pattern_id_memoization[current_trip_id] print current_trip_id else: #access trip2pattern.csv to match the trip_id with the pattern pattern_id = trip2pattern[trip2pattern['trip_id'] == current_trip_id].iloc[0]['pattern_id'] pattern_id_memoization[current_trip_id] = pattern_id new_row['pattern_id'] = pattern_id_memoization[current_trip_id] print "current trip id: " + str(current_trip_id) # print route_stop_seq_df #get information by matching with route_stop_seq matching by trip_id route_stop_seq_stop_time_specific = route_stop_seq_df[route_stop_seq_df['trip_id'] == current_trip_id] if not route_stop_seq_stop_time_specific.empty: route_stop_seq_stop_time_specific = route_stop_seq_stop_time_specific.iloc[0] new_row['route_short_name'] = route_stop_seq_stop_time_specific['route_short_name'] new_row['route_dir'] = route_stop_seq_stop_time_specific['route_dir'] #get information using run pattern by matching with trip_id run_pattern_stop_time_specific = run_pattern_df[run_pattern_df['trip_id'] == current_trip_id] if not run_pattern_stop_time_specific.empty: run_pattern_stop_time_specific = run_pattern_stop_time_specific.iloc[0] new_row['start_date'] = run_pattern_stop_time_specific['start_date'] new_row['end_date'] = run_pattern_stop_time_specific['end_date'] new_row['day'] = run_pattern_stop_time_specific['day'] new_row['run'] = run_pattern_stop_time_specific['run'] #all information in stop_times.txt new_row['seq'] = row['stop_sequence'] new_row['stop_id'] = row['stop_id'] #time point is optional new_row['is_time_point'] = helper.optional_field(a, 'time_point', static_feed['stop_times']) # new_row['is_time_point'] = row['timepoint'] new_row['pickup_type'] = helper.optional_field(a, 'pickup_type', static_feed['stop_times']) # new_row['pickup_type'] = row['pickup_type'] # new_row['drop_off_type'] = row['drop_off_type'] new_row['pickup_type'] = helper.optional_field(a, 'drop_off_type', static_feed['stop_times']) new_row['arrival_time'] = row['arrival_time'] new_row['departure_time'] = row['departure_time'] # new_row['stop_headsign'] = row['stop_headsign'] new_row['stop_headsign'] = helper.optional_field(a, 'stop_headsign', static_feed['stop_times']) new_row['trip_id'] = row['trip_id'] tables["Schedules"] = tables["Schedules"].append(pd.Series(new_row), ignore_index=True) helper.write_table(tables, "Schedules") print "Sucess with Schedules"
def schedules(tables, static_feed, trip_update_feed, alert_feed, vehicle_position_feed, agency_id, trip2pattern): try: login = { 'host': "localhost", 'user': "******", 'passwd': "root", 'db': "TrafficTransit" } run_pattern_df = helper.sql2df('RunPattern', login) agency_name = static_feed['agency'].iloc[0]['agency_name'] trip2pattern = helper.csv2df( "Trip2Pattern.csv") #load the trip2pattern csv route_stop_seq_df = helper.sql2df('route_stop_seq', login) except Exception as e: print e columns = [ 'agency_id', 'route_short_name', 'start_date', 'end_date', 'day', 'route_dir', 'run', 'pattern_id', 'seq', 'stop_id', 'is_time_point', 'pickup_type', 'drop_off_type', 'arrival_time', 'departure_time', 'stop_headsign', 'trip_id' ] tables["Schedules"] = pd.DataFrame() pattern_id_memoization = {} #to save calculations and memory counter = 0 #use the unique tripid's to find the the patternid's specific to that trip for a, row in static_feed['stop_times'].iterrows(): print counter counter += 1 #find the pattern_id(What is the point of pattern id? we already have the stops.) pattern_id = trip2pattern[trip2pattern['trip_id'] == row['trip_id']]['pattern_id'] new_row = {} #agency_id basic info new_row['agency_id'] = agency_id #information from pattern_id current_trip_id = row['trip_id'] if current_trip_id in pattern_id_memoization.keys(): new_row['pattern_id'] = pattern_id_memoization[current_trip_id] print current_trip_id else: #access trip2pattern.csv to match the trip_id with the pattern pattern_id = trip2pattern[trip2pattern['trip_id'] == current_trip_id].iloc[0]['pattern_id'] pattern_id_memoization[current_trip_id] = pattern_id new_row['pattern_id'] = pattern_id_memoization[current_trip_id] print "current trip id: " + str(current_trip_id) # print route_stop_seq_df #get information by matching with route_stop_seq matching by trip_id route_stop_seq_stop_time_specific = route_stop_seq_df[ route_stop_seq_df['trip_id'] == current_trip_id] if not route_stop_seq_stop_time_specific.empty: route_stop_seq_stop_time_specific = route_stop_seq_stop_time_specific.iloc[ 0] new_row['route_short_name'] = route_stop_seq_stop_time_specific[ 'route_short_name'] new_row['route_dir'] = route_stop_seq_stop_time_specific[ 'route_dir'] #get information using run pattern by matching with trip_id run_pattern_stop_time_specific = run_pattern_df[ run_pattern_df['trip_id'] == current_trip_id] if not run_pattern_stop_time_specific.empty: run_pattern_stop_time_specific = run_pattern_stop_time_specific.iloc[ 0] new_row['start_date'] = run_pattern_stop_time_specific[ 'start_date'] new_row['end_date'] = run_pattern_stop_time_specific['end_date'] new_row['day'] = run_pattern_stop_time_specific['day'] new_row['run'] = run_pattern_stop_time_specific['run'] #all information in stop_times.txt new_row['seq'] = row['stop_sequence'] new_row['stop_id'] = row['stop_id'] #time point is optional new_row['is_time_point'] = helper.optional_field( a, 'time_point', static_feed['stop_times']) # new_row['is_time_point'] = row['timepoint'] new_row['pickup_type'] = helper.optional_field( a, 'pickup_type', static_feed['stop_times']) # new_row['pickup_type'] = row['pickup_type'] # new_row['drop_off_type'] = row['drop_off_type'] new_row['pickup_type'] = helper.optional_field( a, 'drop_off_type', static_feed['stop_times']) new_row['arrival_time'] = row['arrival_time'] new_row['departure_time'] = row['departure_time'] # new_row['stop_headsign'] = row['stop_headsign'] new_row['stop_headsign'] = helper.optional_field( a, 'stop_headsign', static_feed['stop_times']) new_row['trip_id'] = row['trip_id'] tables["Schedules"] = tables["Schedules"].append(pd.Series(new_row), ignore_index=True) helper.write_table(tables, "Schedules") print "Sucess with Schedules"
def runPattern(tables, static_feed, agency_id): columns = [ 'agency_id', 'route_short_name', 'start_date', 'end_date', 'service_id', 'day', 'route_dir', 'run', 'pattern_id', 'trip_headsign', 'trip_id', 'version' ] tables['RunPattern'] = pd.DataFrame( index=np.r_[0:len(static_feed['trips'].index)], columns=columns) run_count = {} #open the trip2pattern file trip2pattern = helper.csv2df( "Trip2Pattern.csv") #load the trip2pattern csv #iterate through eveyr trip for i, row in static_feed['trips'].iterrows(): #get this specific row new_row = tables["RunPattern"].loc[i] new_row['agency_id'] = agency_id #getting the name of the route j = np.where( static_feed['routes']['route_id'] == row['route_id'])[0][0] new_row['route_short_name'] = str( helper.optional_field( j, 'route_short_name', static_feed['routes'], static_feed['routes'].iloc[j]['route_long_name'])) #use calendar.txt new_row['service_id'] = row['service_id'] calendar = static_feed['calendar'].loc[ static_feed['calendar']['service_id'] == row['service_id']].iloc[0] new_row['start_date'] = datetime.datetime.strptime( str(calendar['start_date']), "%Y%m%d") new_row['end_date'] = datetime.datetime.strptime( str(calendar['end_date']), "%Y%m%d") new_row['route_dir'] = int( helper.optional_field(i, 'direction_id', static_feed['trips'], 0)) new_row['day'] = "{0}{1}{2}{3}{4}{5}{6}".format( calendar['monday'], calendar['tuesday'], calendar['wednesday'], calendar['thursday'], calendar['friday'], calendar['saturday'], calendar['sunday']) #calculating the runs...Each run is unqiuely identified by #1. route_short_name #2. service_id #3. route_dir if new_row['route_short_name'] not in run_count.keys(): run_count[new_row['route_short_name']] = { new_row['service_id']: { new_row['route_dir']: 1 } } if new_row['service_id'] not in run_count[ new_row['route_short_name']].keys(): run_count[new_row['route_short_name']] = { new_row['service_id']: { new_row['route_dir']: 1 } } if new_row['route_dir'] not in run_count[new_row['route_short_name']][ new_row['service_id']].keys(): run_count[new_row['route_short_name']] = { new_row['service_id']: { new_row['route_dir']: 1 } } #Todo: order runs by time of first stop in pattern new_row['run'] = run_count[new_row['route_short_name']][ new_row['service_id']][new_row['route_dir']] run_count[new_row['route_short_name']][new_row['service_id']][new_row[ 'route_dir']] += 1 #increment the run because we've seen this before.... new_row['pattern_id'] = str(trip2pattern[ trip2pattern['trip_id'] == row['trip_id']].iloc[0]['pattern_id']) # new_row['trip_headsign'] = helper.optional_field(i, 'trip_headsign', static_feed['trips'], static_feed['stop_times'].loc[static_feed['stop_times']['trip_id'] == row['trip_id']]['stop_headsign'].iloc[0]) new_row['trip_id'] = str(row['trip_id']) new_row['version'] = 1 helper.write_table(tables, 'RunPattern') print "SUCCESS with RunPatterns"