def analyze_average_commute(date_val):
    # Display a contiguous commute time from a source station to a target station

    # Get DB data
    session = manage.get_session()
    stop_names = manage.get_stop_names()
    station_id_dict = station_id2name('../data/stops_ids_and_names.txt')
    # Create a graph representation of the train stops
    G = create_train_graph(session, stop_names, station_id_dict, date_val)

    target = 4600   # station_id for Tel-Aviv Hashalom
    time_len = 60*24
    hour_vec = np.array(range(24*60))/60
    minute_vec = np.array(range(24*60)) % 60
    time_indx_real = map(lambda x: '%04d'%x, hour_vec*100 + minute_vec)

    # Calculation of the DOT graph algorithms
    dist_vec, path_vec = dynamic_all_to_one(G, target, time_len)

    display_graph(G, draw_edge_label=False)

    station_id = 5410
    xindx = range(len(time_indx_real))
    plt.xticks(xindx[240::240], time_indx_real[240::240]), plt.plot(xindx[240:], dist_vec[station_id][240:], label=station_id_dict[station_id], linewidth=2.5), plt.grid(),  plt.ylim(0, 150)
    plt.xlabel('time of day'), plt.ylabel('time of commute in minutes'), plt.title(('Commute Time to %s') % (station_id_dict[4600]))
    station_id = 3500
    plt.plot(xindx[240:], dist_vec[station_id][240:], label=station_id_dict[station_id], linewidth=2.5)
    station_id=8700
    plt.plot(xindx[240:], dist_vec[station_id][240:], label=station_id_dict[station_id], linewidth=2.5)
    plt.legend()
    plt.show()
    def create(self):
        # creating the bolts
        session = manage.get_session()
        query = session.query(
            TrainStop)  #.filter(TrainStop.date == datetime.date(2013, 1, 1))
        trainstops = query.all()
        #spout = self.addbolt(bolts.FileSpout('rawlines', self._args.input_db_file[0]))
        spout = self.addbolt(
            bolts.SQLAlchemyToLineSpout('rawlines', trainstops))
        spoutnames = self.addbolt(
            bolts.FileSpout('rawnames', self._args.station_names_file[0]))
        lineparser = self.addbolt(bolts.LineParserBolt())
        noname = self.addbolt(bolts.StationNonameFilter())
        rushhours = self.addbolt(
            bolts.TimeFilter([('07:00', '10:00'), ('17:00', '19:00')]))
        delaycalc = self.addbolt(bolts.DelayCalculatorBolt())
        stationsstats = self.addbolt(bolts.StationsStatsBolt())
        stationsprinting = self.addbolt(
            bolts.CSVPrintingSink('stationsstats',
                                  self._args.output_station_stats[0]))

        #trainstats = self.addbolt(bolts.TrainStationStatsBolt())
        #trainavgdelay = self.addbolt(bolts.TrainStationAvgDelayBolt())

        # connecting them
        ministorm.Stream('rawlines', spout, lineparser)
        ministorm.Stream('rawnames', spoutnames, lineparser)
        ministorm.Stream('parsedlines', lineparser, noname)
        ministorm.Stream('parsedlines', noname, rushhours)
        ministorm.Stream('parsedlines', rushhours, delaycalc)
        ministorm.Stream('parsedlines_delay', delaycalc, stationsstats)
        ministorm.Stream('stationsstats', stationsstats, stationsprinting)
Exemple #3
0
def analyze_average_commute(date_val):
    # Display a contiguous commute time from a source station to a target station

    # Get DB data
    session = manage.get_session()
    stop_names = manage.get_stop_names()
    station_id_dict = station_id2name('../data/stops_ids_and_names.txt')
    # Create a graph representation of the train stops
    G = create_train_graph(session, stop_names, station_id_dict, date_val)

    target = 4600  # station_id for Tel-Aviv Hashalom
    time_len = 60 * 24
    hour_vec = np.array(range(24 * 60)) / 60
    minute_vec = np.array(range(24 * 60)) % 60
    time_indx_real = map(lambda x: '%04d' % x, hour_vec * 100 + minute_vec)

    # Calculation of the DOT graph algorithms
    dist_vec, path_vec = dynamic_all_to_one(G, target, time_len)

    display_graph(G, draw_edge_label=False)

    station_id = 5410
    xindx = range(len(time_indx_real))
    plt.xticks(xindx[240::240], time_indx_real[240::240]), plt.plot(
        xindx[240:],
        dist_vec[station_id][240:],
        label=station_id_dict[station_id],
        linewidth=2.5), plt.grid(), plt.ylim(0, 150)
    plt.xlabel('time of day'), plt.ylabel(
        'time of commute in minutes'), plt.title(
            ('Commute Time to %s') % (station_id_dict[4600]))
    station_id = 3500
    plt.plot(xindx[240:],
             dist_vec[station_id][240:],
             label=station_id_dict[station_id],
             linewidth=2.5)
    station_id = 8700
    plt.plot(xindx[240:],
             dist_vec[station_id][240:],
             label=station_id_dict[station_id],
             linewidth=2.5)
    plt.legend()
    plt.show()
  stop_names_list_ordered = [stop_names_list[i] for i in order]

  title = 'Percentage of on-time trains per station. On time is when delay < {} minutes'.format(minutes)
  vals_dict = {"all":all_vals_ordered, "rush":rush_ordered}  
  display_station_delay_bar_graph(vals_dict, stop_names_list_ordered, title, (22.0, 16.0))
  print title
  print "Name\tAll\tRush-hour"
  for val in reversed(zip(stop_names_list_ordered, all_vals_ordered, rush_ordered)):
    print val[0] + '\t' + str(val[1]) + '\t' + str(val[2])
  print ""

  passenger_weighted_all, passenger_weighted_rush = calc_passenger_weighted_ontime_percent(stops, stop_passenger_ratio, data)
  print "Passenger ontime={}%, rush={}% (up to {} minutes delay)".format(passenger_weighted_all, passenger_weighted_rush, minutes)

if __name__ == "__main__":
  session = manage.get_session()
  # excluding stops with less than 1% of passengers:
  exclude_stops = [4170, 8700, 7000, 6300, 4100, 4250, 5410, 4690, 9100, 9000, 700, 4660, 5150, 300, 6700, 5010, 5300, 1300, 8550, 4640, 4800, 2500, 6500, 7500]
  
  for minutes in [1, 5]:
    get_ontime_percentage_report_for_given_minutes(minutes, session, exclude_stops)
  
  
  ## snippet of 2d visualization of lateness on stops vs hours axis:
  #hours = range(0,24)
  #d = {}
  #for hour in hours:
    #d[hour] = Series([0] * len(stops), index=stops)
  #df = DataFrame(d, dtype=float)
  ##df.loc[9900][0] = [0,1,2]
  ##DataFrame(d, index=['d', 'b', 'a'], columns=['two', 'three'])
                                    (22.0, 16.0))
    print title
    print "Name\tAll\tRush-hour"
    for val in reversed(
            zip(stop_names_list_ordered, all_vals_ordered, rush_ordered)):
        print val[0] + '\t' + str(val[1]) + '\t' + str(val[2])
    print ""

    passenger_weighted_all, passenger_weighted_rush = calc_passenger_weighted_ontime_percent(
        stops, stop_passenger_ratio, data)
    print "Passenger ontime={}%, rush={}% (up to {} minutes delay)".format(
        passenger_weighted_all, passenger_weighted_rush, minutes)


if __name__ == "__main__":
    session = manage.get_session()
    # excluding stops with less than 1% of passengers:
    exclude_stops = [
        4170, 8700, 7000, 6300, 4100, 4250, 5410, 4690, 9100, 9000, 700, 4660,
        5150, 300, 6700, 5010, 5300, 1300, 8550, 4640, 4800, 2500, 6500, 7500
    ]

    for minutes in [1, 5]:
        get_ontime_percentage_report_for_given_minutes(minutes, session,
                                                       exclude_stops)

    ## snippet of 2d visualization of lateness on stops vs hours axis:
    #hours = range(0,24)
    #d = {}
    #for hour in hours:
    #d[hour] = Series([0] * len(stops), index=stops)