def analyze_average_commute(date_val): # Display a contiguous commute time from a source station to a target station # Get DB data session = manage.get_session() stop_names = manage.get_stop_names() station_id_dict = station_id2name('../data/stops_ids_and_names.txt') # Create a graph representation of the train stops G = create_train_graph(session, stop_names, station_id_dict, date_val) target = 4600 # station_id for Tel-Aviv Hashalom time_len = 60*24 hour_vec = np.array(range(24*60))/60 minute_vec = np.array(range(24*60)) % 60 time_indx_real = map(lambda x: '%04d'%x, hour_vec*100 + minute_vec) # Calculation of the DOT graph algorithms dist_vec, path_vec = dynamic_all_to_one(G, target, time_len) display_graph(G, draw_edge_label=False) station_id = 5410 xindx = range(len(time_indx_real)) plt.xticks(xindx[240::240], time_indx_real[240::240]), plt.plot(xindx[240:], dist_vec[station_id][240:], label=station_id_dict[station_id], linewidth=2.5), plt.grid(), plt.ylim(0, 150) plt.xlabel('time of day'), plt.ylabel('time of commute in minutes'), plt.title(('Commute Time to %s') % (station_id_dict[4600])) station_id = 3500 plt.plot(xindx[240:], dist_vec[station_id][240:], label=station_id_dict[station_id], linewidth=2.5) station_id=8700 plt.plot(xindx[240:], dist_vec[station_id][240:], label=station_id_dict[station_id], linewidth=2.5) plt.legend() plt.show()
def create(self): # creating the bolts session = manage.get_session() query = session.query( TrainStop) #.filter(TrainStop.date == datetime.date(2013, 1, 1)) trainstops = query.all() #spout = self.addbolt(bolts.FileSpout('rawlines', self._args.input_db_file[0])) spout = self.addbolt( bolts.SQLAlchemyToLineSpout('rawlines', trainstops)) spoutnames = self.addbolt( bolts.FileSpout('rawnames', self._args.station_names_file[0])) lineparser = self.addbolt(bolts.LineParserBolt()) noname = self.addbolt(bolts.StationNonameFilter()) rushhours = self.addbolt( bolts.TimeFilter([('07:00', '10:00'), ('17:00', '19:00')])) delaycalc = self.addbolt(bolts.DelayCalculatorBolt()) stationsstats = self.addbolt(bolts.StationsStatsBolt()) stationsprinting = self.addbolt( bolts.CSVPrintingSink('stationsstats', self._args.output_station_stats[0])) #trainstats = self.addbolt(bolts.TrainStationStatsBolt()) #trainavgdelay = self.addbolt(bolts.TrainStationAvgDelayBolt()) # connecting them ministorm.Stream('rawlines', spout, lineparser) ministorm.Stream('rawnames', spoutnames, lineparser) ministorm.Stream('parsedlines', lineparser, noname) ministorm.Stream('parsedlines', noname, rushhours) ministorm.Stream('parsedlines', rushhours, delaycalc) ministorm.Stream('parsedlines_delay', delaycalc, stationsstats) ministorm.Stream('stationsstats', stationsstats, stationsprinting)
def analyze_average_commute(date_val): # Display a contiguous commute time from a source station to a target station # Get DB data session = manage.get_session() stop_names = manage.get_stop_names() station_id_dict = station_id2name('../data/stops_ids_and_names.txt') # Create a graph representation of the train stops G = create_train_graph(session, stop_names, station_id_dict, date_val) target = 4600 # station_id for Tel-Aviv Hashalom time_len = 60 * 24 hour_vec = np.array(range(24 * 60)) / 60 minute_vec = np.array(range(24 * 60)) % 60 time_indx_real = map(lambda x: '%04d' % x, hour_vec * 100 + minute_vec) # Calculation of the DOT graph algorithms dist_vec, path_vec = dynamic_all_to_one(G, target, time_len) display_graph(G, draw_edge_label=False) station_id = 5410 xindx = range(len(time_indx_real)) plt.xticks(xindx[240::240], time_indx_real[240::240]), plt.plot( xindx[240:], dist_vec[station_id][240:], label=station_id_dict[station_id], linewidth=2.5), plt.grid(), plt.ylim(0, 150) plt.xlabel('time of day'), plt.ylabel( 'time of commute in minutes'), plt.title( ('Commute Time to %s') % (station_id_dict[4600])) station_id = 3500 plt.plot(xindx[240:], dist_vec[station_id][240:], label=station_id_dict[station_id], linewidth=2.5) station_id = 8700 plt.plot(xindx[240:], dist_vec[station_id][240:], label=station_id_dict[station_id], linewidth=2.5) plt.legend() plt.show()
stop_names_list_ordered = [stop_names_list[i] for i in order] title = 'Percentage of on-time trains per station. On time is when delay < {} minutes'.format(minutes) vals_dict = {"all":all_vals_ordered, "rush":rush_ordered} display_station_delay_bar_graph(vals_dict, stop_names_list_ordered, title, (22.0, 16.0)) print title print "Name\tAll\tRush-hour" for val in reversed(zip(stop_names_list_ordered, all_vals_ordered, rush_ordered)): print val[0] + '\t' + str(val[1]) + '\t' + str(val[2]) print "" passenger_weighted_all, passenger_weighted_rush = calc_passenger_weighted_ontime_percent(stops, stop_passenger_ratio, data) print "Passenger ontime={}%, rush={}% (up to {} minutes delay)".format(passenger_weighted_all, passenger_weighted_rush, minutes) if __name__ == "__main__": session = manage.get_session() # excluding stops with less than 1% of passengers: exclude_stops = [4170, 8700, 7000, 6300, 4100, 4250, 5410, 4690, 9100, 9000, 700, 4660, 5150, 300, 6700, 5010, 5300, 1300, 8550, 4640, 4800, 2500, 6500, 7500] for minutes in [1, 5]: get_ontime_percentage_report_for_given_minutes(minutes, session, exclude_stops) ## snippet of 2d visualization of lateness on stops vs hours axis: #hours = range(0,24) #d = {} #for hour in hours: #d[hour] = Series([0] * len(stops), index=stops) #df = DataFrame(d, dtype=float) ##df.loc[9900][0] = [0,1,2] ##DataFrame(d, index=['d', 'b', 'a'], columns=['two', 'three'])
(22.0, 16.0)) print title print "Name\tAll\tRush-hour" for val in reversed( zip(stop_names_list_ordered, all_vals_ordered, rush_ordered)): print val[0] + '\t' + str(val[1]) + '\t' + str(val[2]) print "" passenger_weighted_all, passenger_weighted_rush = calc_passenger_weighted_ontime_percent( stops, stop_passenger_ratio, data) print "Passenger ontime={}%, rush={}% (up to {} minutes delay)".format( passenger_weighted_all, passenger_weighted_rush, minutes) if __name__ == "__main__": session = manage.get_session() # excluding stops with less than 1% of passengers: exclude_stops = [ 4170, 8700, 7000, 6300, 4100, 4250, 5410, 4690, 9100, 9000, 700, 4660, 5150, 300, 6700, 5010, 5300, 1300, 8550, 4640, 4800, 2500, 6500, 7500 ] for minutes in [1, 5]: get_ontime_percentage_report_for_given_minutes(minutes, session, exclude_stops) ## snippet of 2d visualization of lateness on stops vs hours axis: #hours = range(0,24) #d = {} #for hour in hours: #d[hour] = Series([0] * len(stops), index=stops)