Esempio n. 1
0
def lateness_observed( trip_id, stop_id, day_of_week, stop_sequence, 
                       lateness_seconds, auto_create=True ):
  """
  Increments the count for the indicated observation and returns True. 
  If the entry does not exist, will create the entry if auto_create is 
  True, otherwise returns False.
  """
  sql = """\
update simplified_lateness_observations
set num_observations = num_observations+1
where minutes_late=%(lateness)s
  and observed_stop_id=%(osid)s
"""
  osid = get_observation_stop_id( trip_id,stop_id,day_of_week,stop_sequence,
                                  auto_create = False )
  # round to the nearest minute
  ret = True
  if lateness_seconds is not None:
    lateness_minutes = int( (lateness_seconds/60.0) + 0.5 )
  else:
    lateness_minutes = None
  cur = get_cursor()
  SQLExec(cur,sql, {'osid':osid,'lateness':lateness_minutes})

  if cur.rowcount == 0:
    if auto_create:
      create_observation_row(trip_id,stop_id,day_of_week,stop_sequence,
                             lateness_minutes,initial_num_obs=1)
    else:
      ret = False
  elif cur.rowcount > 1:
    raise Exception, "Redundant rows in observations table"

  cur.close()
  return ret
Esempio n. 2
0
def get_route_for_dirtag(dirtag,routetag=None):
  """
  Given a nextbus dirtag, returns the GTFS route ID if known.
  If routetag is provided, and the GTFS route ID is not known,
  attempts to make a match based off the routetag alone.
  """
  cur = get_cursor();
  SQLExec(cur,
          """Select route_id from routeid_dirtag where dirtag=%(dirtag)s""",
          {'dirtag':dirtag});
  ret = [r[0] for r in cur];

  if not ret and routetag:
    SQLExec(cur,
            """select route_id from gtf_routes gr 
               where gr.route_short_name=%(routetag)s""",
            {'routetag':routetag})
    ret = [r[0] for r in cur]  

  cur.close()

  if len(ret) > 1:
    print "MORE THAN ONE ROUTE PER DIRTAG"
    print "  dirtag:",dirtag
    print "  routes:",ret
  if len(ret) == 0:
    print "No routes mapped for dirtag",dirtag
    
    return None
  return ret[0]
Esempio n. 3
0
def get_shapes_for_route(route_short_name):
  """
  Given a route short name, returns a list of dictlike rows
  containing the shapes associated with that route, sorted in
  order of ascending shape ID then ascending shape point sequence.
  Keys:
  'shape_id',
  'shape_pt_lat',
  'shape_pt_lon',
  'shape_pt_sequence',
  'shape_dist_traveled',
  'dirtag'
  """
  cur = get_cursor();
  SQLExec(cur, """SELECT gtf_shapes.*,shape_dirtag.dirtag 
                    FROM gtf_shapes,shape_dirtag
                    WHERE gtf_shapes.shape_id = shape_dirtag.shape_id 
                      and gtf_shapes.shape_id IN 
                        (select distinct(shape_id) from gtf_trips 
                           where route_id IN
                             (select route_id from gtf_routes 
                                where route_short_name = %(route_short_name)s
                             )
                        )
                    ORDER BY gtf_shapes.shape_id asc, 
                             gtf_shapes.shape_pt_sequence asc""",
          {'route_short_name':route_short_name});
  ret = [r for r in cur];

  cur.close();
  return ret;
Esempio n. 4
0
def conditional_lateness_gained(rows=None,
                               degrees_sep=1,
                               conds=(-6,0,6,60,120)):
  
  if rows is None:
    print "Selecting..."
    cur = db.get_cursor()
    db.SQLExec(cur,"""select d1.lateness_gained as cond,d2.lateness_gained,
trip_stop_weight 
from datamining_table d1 inner join datamining_table d2
  on d1.gps_segment_id=d2.gps_segment_id
  and d1.stop_number+%(deg_sep)s=d2.stop_number
  and d1.lateness_gained in (""" + ",".join(map(str,conds)) + """)
  and d2.lateness_gained is not null
inner join trip_stop_weights tsw on d2.gtfs_trip_id = tsw.gtfs_trip_id
  and d2.stop_id = tsw.stop_id
""",
               {'deg_sep':degrees_sep});
    print "Retrieving..."
    rows = cur.fetchall()
    cur.close()
    print len(rows),"rows retrieved."

  try:
    compare_ecdfs(('cond',),rows,col_name='lateness_gained',
                  plot_CIs=True,plot_Es=False,plot_E_CIs=False)
  except e:
    print e
  return rows
Esempio n. 5
0
def measure_prob_mass( trip_id, stop_id, day_of_week, stop_sequence, 
                       lateness_bounds ):
  sql = """\
select num_observations, minutes_late
from simplified_lateness_observations slo
  inner join observation_attributes oa
    on slo.observed_stop_id = oa.observed_stop_id
      and oa.trip_id=%(tid)s
      and oa.stop_sequence=%(seq)s
      and oa.day_of_week=%(dow)s
"""

  cur = get_cursor()
  SQLExec(cur, sql, {'tid':trip_id,'seq':stop_sequence,'dow':day_of_week})

  rows = map( lambda r: (r['num_observations'],r['minutes_late']),
              cur.fetchall() );
  
  cur.close()

  reducer = lambda l,r: l+r[0]
  total = reduce( reducer, rows, 0 )
  sums = [0] * len(lateness_bounds)

  for i,(min,max) in enumerate(lateness_bounds):
    sums[i] = reduce(reducer,
                     filter( lambda r: min<=r[1]<=max, rows ),
                     0)
  
  if total == 0:
    return None

  return map(lambda i: float(sums[i])/total, 
             range(len(lateness_bounds)))
Esempio n. 6
0
def create_observation_id( trip_id, stop_id, day_of_week, stop_sequence ):
  """
  Inserts an entry with new observed_stop_id into observation_attributes.
  Returns the new ID.
  """
  ## observed_stop_id is not necessarily unique, across service intervals
  cur = get_cursor()
  SQLExec(cur,"select max(observed_stop_id) from observation_attributes")
  r = list(cur)

  if len(r) == 0 or r[0][0] is None:
    newid = 0
  else:
    newid = r[0][0] + 1

  sql = """\
insert into observation_attributes
  (observed_stop_id, trip_id, stop_id, stop_sequence, day_of_week)
values
  ( %(osid)s, %(tid)s, %(sid)s, %(seq)s, %(dow)s )
"""

  SQLExec(cur, sql, {'osid':newid, 'tid':trip_id, 'sid':stop_id,
                     'seq':stop_sequence, 'dow':day_of_week})
  cur.close()

  return newid
Esempio n. 7
0
def get_vehicle_reports(dirtags,tzdiff=0):
  """
  Given a list of dirtags, returns a list of dictlike rows of 
  vehicle tracking reports, sorted in ascending order of update time.
  Keys:
  'id',
  'lat',
  'lon',
  'routetag',
  'dirtag',
  'reported_update_time'
  """
  if len(dirtags) == 0:
    return []
  p = {}
  for i,d in enumerate(dirtags):
    p['k'+str(i)] = d;
  sql = """SELECT id,lat,lon,routetag,dirtag,
               reported_update_time + interval '%d hours'
             from vehicle_track 
             where dirtag IN ( %s )
           order by reported_update_time asc""" \
      % (int(tzdiff), ','.join(map(lambda k: "%("+k+")s", p.keys())) )

  cur = get_cursor();
  print "Executing..."
  SQLExec(cur, sql, p);
  print "Retrieving..."
  ret = cur.fetchall();
  print "...done."

  cur.close();
  return ret;
Esempio n. 8
0
def conditional_lateness_plots(rows=None,
                               degrees_sep=1,
                               conds=(0,60,300,600,1200)):
  """
  Plots the (weighted) conditional lateness distribution as
  
    F( lateness at stop | lateness at Dth stop previous )
    
  where D = degrees_sep. This is plotted as conditioned on each of
  the latenesses provided in conds.
  """

  if rows is None:
    cur = db.get_cursor();
    print "Selecting..."
    db.SQLExec(cur,"""select d1.lateness as cond,d2.lateness,trip_stop_weight 
from datamining_table d1 inner join datamining_table d2
  on d1.gps_segment_id=d2.gps_segment_id
  and d1.stop_number+%(deg_sep)s=d2.stop_number
  and d1.lateness in (""" + ",".join(map(str,conds)) + """)
  and d2.lateness is not null
inner join trip_stop_weights tsw on d2.gtfs_trip_id = tsw.gtfs_trip_id
  and d2.stop_id = tsw.stop_id
""",
               {'deg_sep':degrees_sep});
    print "Retrieving..."
    rows = cur.fetchall()
    cur.close()
    print len(rows),"rows retrieved."

  try:
    compare_ecdfs(('cond',),rows)
  except e:
    print e
  return rows
Esempio n. 9
0
def load_gps_schedule(segment_id):
  """
  Given a segment_id, loads the corresponding arrival schedule from the
  gps_stop_times table in the database.

  Returns a list of dictlike rows, each with the following keys:
      'stop_id'
      'stop_sequence'
      'stop_headsign'
      'pickup_type'
      'drop_off_type',
      'shape_dist_traveled',
      'timepoint',
      'arrival_time_seconds',
      'departure_time_seconds',
      'actual_arrival_time_seconds'
      'seconds_since_last_stop'

  The rows will be in order of increasing stop sequence.
  """
  
  sql = """select * from gps_stop_times 
             where gps_segment_id=%(segid)s
             order by stop_sequence asc"""
  cur = get_cursor();
  SQLExec(cur,sql,{'segid':segment_id});
  ret = list(cur);
  cur.close();
  return ret;
Esempio n. 10
0
def get_stop_info( stop_id, day_of_week ):

  ## SF hack here, for now.
  ## Need to define a way of handling the "day of week"
  ## problem in terms of service IDs.
  if 0 <= day_of_week <= 4:
    service_id = '1'
  elif day_of_week == 5:
    service_id = '2'
  elif day_of_week == 6:
    service_id = '3'
  else:
    raise Exception, "Not a day of week"

  sql = """\
select gst.*, gt.*, gr.*, oa.observed_stop_id 
from gtf_stop_times gst
  inner join gtf_trips gt on gst.trip_id = gt.trip_id
  inner join gtf_routes gr on gt.route_id = gr.route_id
  left outer join observation_attributes oa
    on oa.trip_id = gst.trip_id 
      and oa.stop_sequence = gst.stop_sequence
      and oa.day_of_week=%(dow)s
where gst.stop_id=%(stopid)s
  and gt.service_id=%(sid)s
order by gr.route_short_name, gst.arrival_time_seconds
"""

  cur = get_cursor()
  SQLExec(cur,sql,{'stopid':stop_id,'sid':service_id,'dow':day_of_week})
  rows = cur.fetchall()
  cur.close()

  return map(dict,rows)
Esempio n. 11
0
def simplified_lateness_counts():
  """
  This is a one-time function to translate all data from datamining_table
  into simplified_lateness_observations.
  """
  sql = """
select dm.lateness, dm.gtfs_trip_id, dm.stop_id, dm.stop_sequence, 
  ((EXTRACT(DOW FROM gs.trip_date) + 6)::integer % 7) as dow
from datamining_table dm
  inner join gps_segments gs on gs.gps_segment_id = dm.gps_segment_id
"""

  cur = get_cursor()
  SQLExec(cur,sql);

  tot = cur.rowcount
  i=1
  for row in cur:
    if row['lateness'] is None:
      continue
    if i%1000 == 0:
      print i,"/",tot
    i+=1
    lateness_observed( row['gtfs_trip_id'], row['stop_id'],
                       row['dow'], 
                       row['stop_sequence'], row['lateness'],
                       auto_create = True );
  cur.close()
Esempio n. 12
0
def load_gps_route(segment_id):
  """
  Given a segment ID, loads the associated trip from the tracked_routes table
  in order of increasing report time. 

  Returns (trip_id, trip_date, vehicle_id, schedule_error, offset, route)
  where trip_id is the gtfs trip ID, trip_date is the date on whose schedule
  the trip took place, vehicle_id is the gps vehicle's ID, schedule_error
  is the measured error between the GPS route and the GTFS schedule, offset 
  is the number of seconds to substract from any GTFS schedule times, and
  route is a list of [lat,lon,reported_update_time] triples.
  """
  
  

  sql = """select lat, lon, reported_update_time
           from tracked_routes
           where gps_segment_id=%(segID)s
           order by reported_update_time"""

  cur = get_cursor()  
  SQLExec(cur,sql,{'segID':segment_id});
  res = [r for r in cur];
  cur.close();

  trip_id,trip_date,veh_id,sched_err,sched_off= load_gps_segment_header(segment_id);

  rows = [[r['lat'],r['lon'],r['reported_update_time']] for r in res];

  return trip_id,trip_date,veh_id,sched_err,sched_off,rows
Esempio n. 13
0
def get_observation_stop_id( trip_id, stop_id, day_of_week, stop_sequence,
                             auto_create = True):
  sql="""\
select observed_stop_id
from observation_attributes oa
where oa.trip_id=%(tid)s
  and oa.stop_sequence=%(seq)s
  and oa.day_of_week=%(dow)s
"""

  cur = get_cursor()
  SQLExec(cur, sql, {'tid':trip_id,'seq':stop_sequence,'dow':day_of_week})
  
  rows = [r[0] for r in cur]

  if len(rows) == 0:
    if auto_create:
      ret = create_observation_id(trip_id,stop_id,day_of_week,stop_sequence)
    else:
      ret = None
  elif len(rows) > 1:
    raise Exception, "Redundant observation IDs"
  else:
    ret = rows[0]

  return ret
Esempio n. 14
0
def conditional_lateness_prediction_intervals(rows=None,
                                              degrees_sep=(1,5,10,20,50),
                                              alpha=0.05):

  if rows is None:
    print "Selecting..."
    cur = db.get_cursor()
    db.SQLExec(cur,"""
select 30*(d1.lateness/30.0)::int as conditional, d2.stop_number-d1.stop_number as sepdegree,
  d2.lateness, trip_stop_weight
from datamining_table d2
natural join trip_stop_weights
inner join datamining_table d1
  on d1.gps_segment_id = d2.gps_segment_id
  and d2.stop_number-d1.stop_number in (""" + \
                 ",".join(map(str,degrees_sep)) + """)
  and d2.lateness is not null and d1.lateness is not null
""")
    print "Retrieving..."
    rows = cur.fetchall()
    cur.close()
    print len(rows),"rows retrieved."

  figure()

  sep_split = DM.split_on_attributes(('sepdegree',),rows)
  sds = array([k[0] for k in sep_split.keys()])
  sds.sort()
  for i,sd in enumerate(reversed(sds)):
    sdrows = sep_split[(sd,)]
    cond_split = DM.split_on_attributes(('conditional',),sdrows)
    conds = array([k[0] for k in cond_split.keys()])
    conds.sort()
    
    upper_preds = []
    lower_preds = []
    upup_preds = []
    lolo_preds = []
    for cond in conds:
      cond_rows = array([(r['lateness'],r['trip_stop_weight']) 
                         for r in cond_split[(cond,)]])
      x,p,a_n = ecdf(cond_rows,weighted=True)
      (lower,upper),(lolo,hihi) = find_pred_interval(x,p,a_n,alpha=alpha)
      upper_preds.append(upper)
      lower_preds.append(lower)
      

      upup_preds.append(hihi)
      lolo_preds.append(lolo)

    #plot(conds,upper_preds,pcolors[i],label="D.o.S="+str(sd))
    #plot(conds,lower_preds,pcolors[i],label=None)
    plot(conds,upup_preds,pcolors[i]+'+-',label="D.o.S="+str(sd))
    plot(conds,lolo_preds,pcolors[i]+'+-',label=None)
      
  legend()
  xlabel("Conditional Lateness")
  ylabel("Lateness Prediction Interval")
  title("%d%% Prediction Intervals vs. Stop Separation, Prev Lateness"%(100*(1-alpha),))
Esempio n. 15
0
def get_segment_IDs(scheduled_only=True):
  cur = get_cursor();
  if scheduled_only:
    sql = "select gps_segment_id from gps_segments where trip_id is not null"
  else:
    sql = "select gps_segment_id from gps_segments"
  SQLExec(cur,sql)
  seg_ids = [s['gps_segment_id'] for s in cur]
  cur.close()
  return seg_ids
Esempio n. 16
0
def export_gps_route( trip_id, trip_date, vehicle_id, 
                      gtfs_error, offset_seconds,
                      gps_data ):
  """
  Writes the given entry to the "tracked_routes" table. This table is used
  to cache the results of finding and filtering only the valid routes as
  represented in the GPS dataset.

  Returns segment_id, a unique identifier for this GPS segment
  
  trip_id: the GTFS trip id
  trip_date: the date of the trip
  vehicle_id: as reported in the GPS data
  gtfs_error: The distance from the matched GTFS trip as measured by
              the GPSBusTrack metric
  offset_seconds: Number of seconds to subtract from GTFS trip to normalize.

  gps_data: A list of (lat, lon, reported_update_time) values, exactly as
            reported in the GPS dat. Note that reported_update_time should
            be a timestamp.


  WARNING: No effort is made to prevent duplicate entries! If you do this
  more than once for the same route then YOU MUST DELETE IT FIRST!
  """

  sql1 = """insert into gps_segments (
              trip_id, trip_date, vehicle_id,
              schedule_error, schedule_offset_seconds
         ) VALUES (
               %(trip_id)s,%(trip_date)s,%(vehicle_id)s,
               %(gtfs_error)s, %(offset)s
         ) RETURNING gps_segment_id"""

  sql2 = """insert into tracked_routes (
               gps_segment_id, lat, lon, reported_update_time
             ) VALUES (
               %(seg_id)s,%(lat)s,%(lon)s,%(reported_update_time)s
             )"""
  cur = get_cursor()

  
  SQLExec(cur,sql1,
          {'trip_id':trip_id,'trip_date':trip_date,'vehicle_id':vehicle_id,
           'gtfs_error':str(gtfs_error),'offset':offset_seconds});
  segment_id = list(cur.fetchall())[0][0];
  
  for lat,lon,reported_update_time in gps_data:
    SQLExec(cur,sql2,
            {'lat':lat,'lon':lon,
             'reported_update_time':reported_update_time,
             'seg_id':str(segment_id)});

  cur.close()
  return segment_id
Esempio n. 17
0
def get_rows(rsubset=0.1):
    print "Selecting..."
    cur = db.get_cursor()
    if rsubset is not None:
        db.SQLExec(
            cur, """select * from datamining_table dm
               where random() < %f""" % (rsubset, ))
    else:
        db.SQLExec(cur, "select * from datamining_table")

    return cur
Esempio n. 18
0
def compare_route_portion(rows=None):
  """Compares lateness distributions between portions of the route"""

  if rows is None:
    cur = db.get_cursor()
    print "Selecting..."
    sql = """
select stop_number, total_num_stops, total_num_stops-stop_number as stops_before_end, (100*stop_number::numeric/total_num_stops)::int as route_portion, lateness, trip_stop_weight 
from datamining_table dm 
  natural join trip_stop_weights 
  natural join gps_segments 
  inner join (select count(*) as total_num_stops, trip_id 
              from gtf_stop_times 
              group by trip_id) ns 
    on ns.trip_id = dm.gtfs_trip_id 
where lateness is not null
"""
    db.SQLExec(cur,sql)
    print "Retrieving..."
    rows = cur.fetchall()
    cur.close()
    print len(rows),'rows fetched.'

  # Plot ECDF comparisons
  stop_num_split = DM.split_on_attributes(('stop_number',),rows)
  end_num_split = DM.split_on_attributes(('stops_before_end',),rows)
  halfway_split = DM.split_on_attributes(('route_portion',),rows)

  cdf_dict = { "Second stop" : stop_num_split[(1,)],
               "Middle stop" : halfway_split[(50,)]+halfway_split[(51,)],
               "Next to last stop" : end_num_split[(1,)] }
  compare_ecdfs("Stop Position",cdf_dict);

  # Plot E vs stop number
  Es = []
  moes = []
  sns = array([k[0] for k in stop_num_split.keys()])
  sns.sort()
  for sn in sns:
    rowdata = array([(r['lateness'],r['trip_stop_weight']) for r in stop_num_split[(sn,)]])
    Eval,moe = E(rowdata,weighted=True)
    Es.append(Eval)
    moes.append(moe)
  Es = array(Es)
  moes = array(moes)
  
  figure()
  plot(sns,Es,'k-',label="Estimated expectation")
  plot(sns,Es+moes,'k--',label=None)
  plot(sns,Es-moes,'k--',label=None)
  #legend()
  xlabel("Stop Number")
  ylabel("Expected Latenes")
  title("Expected Lateness vs Stop Number")
Esempio n. 19
0
def depict_predinterval_calculation(rows=None,degsep=1,cond=60,alpha=0.05):
  """
  Creates a plot explaining how the prediction interval calculations
  work.
  """
  
  if rows is None:
    print "Selecting..."
    cur = db.get_cursor()
    db.SQLExec(cur,"""
select d2.lateness, trip_stop_weight
from datamining_table d2
natural join trip_stop_weights
inner join datamining_table d1
  on d1.gps_segment_id = d2.gps_segment_id
  and d2.stop_number-d1.stop_number=%(degsep)s
  and d1.lateness = %(cond)s
  and d2.lateness is not null and d1.lateness is not null
""",  {'degsep':degsep,'cond':cond});

    print "Retrieving..."
    rows = cur.fetchall();
    cur.close()
    print len(rows),"rows retrieved."


  figure()

  rowdata = array([(r['lateness'],r['trip_stop_weight']) for r in rows])
  x,p,a_n = ecdf(rowdata,weighted=True)
  
  plot(x,p,'k-',label="Conditional ECDF")
  plot(x,p+a_n,'k--',label="ECDF 95% CI")
  plot(x,p-a_n,'k--',label=None)
  
  (lower,upper),(lolo,upup) = find_pred_interval(x,p,a_n,alpha=alpha)

  plot( (lower,lower),(0,alpha/2), 'r-',label="Lower interval bound")
  plot( (-2000,lower),(alpha/2,alpha/2), 'r-',label=None)

  plot( (upper,upper),(0,1-alpha/2),'g-',label="Upper interval bound")
  plot( (-2000,upper),(1-alpha/2,1-alpha/2),'g-',label=None)

  plot( (lolo,lolo),(0,alpha/2), 'c-',label="Lower bound CI")
  #plot( (-2000,lolo),(alpha/2,alpha/2), 'c-',label=None)

  plot( (upup,upup),(0,1-alpha/2),'m-',label="Upper bound CI")
  #plot( (-2000,upup),(1-alpha/2,1-alpha/2),'m-',label=None)

  legend()
  xlabel("Lateness")
  ylabel("ECDF(Lateness)")
  title("Prediction Interval Calculation")
Esempio n. 20
0
def correct_gps_schedule( segment_id, trip_id, gtfs_error, offset_seconds,
                       gps_data ):
  sql1="""update gps_segments set trip_id=%(tid)s,schedule_error=%(gerr)s,
            schedule_offset_seconds=%(os)s
          where gps_segment_id=%(gid)s"""
  sql2="""delete from gps_stop_times where gps_segment_id=%(gid)s"""
  cur = get_cursor()
  SQLExec(cur,sql1,{'tid':trip_id,'gerr':gtfs_error,'os':offset_seconds,
                    'gid':segment_id});
  SQLExec(cur,sql2,{'gid':segment_id});
  cur.close()
  export_gps_schedule( segment_id, gps_data )
Esempio n. 21
0
def get_previous_trip_ID(trip_id, start_date, offset, numtrips=10):
  """
  Given GTFS trip ID, the date it ran on, and the schedule's offset in seconds,
  finds the immediately previous GTFS trip ID with the same direction and 
  route. The start_date is necessary in cases close to midnight.
  """
  
  cur = get_cursor();

  SQLExec(cur,"select route_id,direction_id from gtf_trips where trip_id=%(tid)s",
          {'tid':trip_id});
  routeinfo = list(cur)[0];
  route_id,dir_id = map(lambda s:routeinfo[s], 
                        "route_id,direction_id".split(","));
  
  SQLExec(cur,"""select first_departure as mintime
                   from gtf_trip_information where trip_id=%(tid)s""",
          {'tid':trip_id});

  # start_time is the time the bus started for the date start_date
  start_time = list(cur)[0]['mintime'] - offset;


  yesterday_ids = map(lambda sid: "'"+str(sid)+"'",
                      get_serviceIDs_for_date(start_date - 
                                              datetime.timedelta(days=1)));
  today_ids = map(lambda sid: "'"+str(sid)+"'", 
                  get_serviceIDs_for_date(start_date));
  sql = """(select trip_id, 0 as offset,
                  abs(first_departure- %(start_time)s) as diff 
             from gtf_trips natural join gtf_trip_information
             where direction_id=%(dir_id)s and route_id=%(route_id)s
               and service_id in (""" + ','.join(today_ids) + """)
               and first_departure < %(start_time)s
           union
           select trip_id, 86400 as offset,
                 abs(first_departure-86400- %(start_time)s ) as diff
             from gtf_trips natural join gtf_trip_information
             where direction_id=%(dir_id)s and route_id=%(route_id)s
               and service_id in (""" + ','.join(yesterday_ids) + """)
               and first_departure-86400 < %(start_time)s
           ) order by diff limit """ + str(numtrips)

  SQLExec(cur,sql,
          {'start_time':start_time,'dir_id':dir_id,'route_id':route_id});
  
  ret = [(r['trip_id'],r['offset']) for r in cur]

  cur.close()

  if len(ret) == 0:
    return None
  return ret
Esempio n. 22
0
def export_lateness_data( gpssched, sched_error ):
  """
  Given a GPSBusSchedule gpssched, adds entries into the datamining_table
  which records observations of lateness along with their attributes.
  """
  
  sql = """
insert into datamining_table dt
( gps_segment_id, gtfs_trip_id, rms_schedule_error, vehicle_id,
  route_name, vehicle_type, service_id, direction_id,
  stop_lat, stop_lon, stop_id, stop_sequence,
  scheduled_arrival_time, scheduled_departure_time,
  actual_arrival_time, lateness, prev_stop_id )
values
( null, %(trip_id)s, %(sched_err)s, %(vehid)s,
  %(routename)s, %(vehtype)s, %(service_id)s, %(dir_id)s,
  %(stoplat)s,%(stoplon)s,%(stopid)s,%(stopseq)s,
  %(sched_arr)s,%(sched_dep)s,%(actual_arr)s,%(lateness)s,
  %(prev_stop_id)s )
"""
  
  gtfs = gpssched.getGTFSSchedule()

  basedict = { 'trip_id' : gtfs.trip_id,
               'sched_err' : sched_error,
               'vehid' : gpssched.segment.vehicle_id,
               'routename' : gtfs.route_short_name,
               'vehtype' : gtfs.route_type,
               'service_id' : gtfs.service_id,
               'dir_id' : gtfs.direction_id
               }

  cur = get_cursor()
  
  for arrival in gpssched.getGPSSchedule():
    print dict(arrival)
    stopdict = dict(basedict)
    stopdict.update ( { 'stoplat' : arrival['stop_lat'],
                        'stoplon' : arrival['stop_lon'],
                        'stopid' : arrival['stop_id'],
                        'stopseq' : arrival['stop_sequence'],
                        'sched_arr' : arrival['arrival_time_seconds'],
                        'sched_dep' : arrival['departure_time_seconds'],
                        'actual_arr' : arrival['actual_arrival_time_seconds'],
                        'lateness' : arrival['actual_arrival_time_seconds'] \
                          - arrival['departure_time_seconds'],
                        'prev_stop_id' : arrival['prev_stop_id']
                        } )
    SQLExec(cur, sql, stopdict)


  cur.close()
Esempio n. 23
0
def create_observation_row( trip_id, stop_id, day_of_week, stop_sequence, 
                             lateness_minutes, initial_num_obs=1 ):
  obs_id = get_observation_stop_id(trip_id,stop_id,day_of_week,stop_sequence,
                                   auto_create = True)
  sql = """\
insert into simplified_lateness_observations 
  ( minutes_late, num_observations, observed_stop_id )
values
  ( %(minutes)s, %(init)s, %(obsid)s )
"""
  cur = get_cursor()
  SQLExec(cur, sql, {'minutes':lateness_minutes,'init':initial_num_obs,
                     'obsid':obs_id})
  cur.close()
Esempio n. 24
0
def get_rows(rsubset=0.1):
    print "Selecting..."
    cur = db.get_cursor()
    if rsubset is not None:
        db.SQLExec(
            cur,
            """select * from datamining_table dm
               where random() < %f"""
            % (rsubset,),
        )
    else:
        db.SQLExec(cur, "select * from datamining_table")

    return cur
Esempio n. 25
0
def get_joined_rows(prev_attrs=(), degree_of_sep=1):
    print "Selecting..."
    if prev_attrs:
        sql = """select d2.*,""" + ",".join(
            map(lambda a: "d1." + a, prev_attrs))
    else:
        sql = """select d2.* """
    sql += """
from datamining_table d1 inner join datamining_table d2
on d1.gps_segment_id = d2.gps_segment_id
and d1.stop_number+""" + str(degree_of_sep) + """=d2.stop_number"""

    cur = db.get_cursor()
    db.SQLExec(cur, sql)
    return cur
Esempio n. 26
0
def get_route_dirtags(route_short_name):
  """
  Given the short name for a route, returns a list of dirtags
  which are associated with that route.
  """
  cur = get_cursor();
  SQLExec(cur,
          """SELECT dirtag FROM routeid_dirtag
               WHERE route_id IN (select route_id from gtf_routes 
                                    where route_short_name = %(rsn)s)""",
               {'rsn':route_short_name});
  ret = map(lambda r: r[0], cur);

  cur.close()
  return ret;
Esempio n. 27
0
def criticalmass_compare_rows():
  """
  Selects and pairs rows from the same trip where one is from
  March 27 and the others are not (this is a many-to-one pairing),
  and subtracts their latenesses as the column 'ldiff'.

  Also selects all latenesses with a column 'is_cmass' indicating
  0 for not-cmass days and 1 for cmass days, for an ecdf comparison
  split.
  """

  print "Selecting..."
  cur = db.get_cursor()
  db.SQLExec(cur,
             """
select (dm_cm.lateness - dm.lateness) as ldiff, trip_stop_weight
from 
datamining_table dm_cm natural join trip_stop_weights
inner join gps_segments gs_cm on dm_cm.gps_segment_id=gs_cm.gps_segment_id
  and gs_cm.trip_date = '2009-03-27'
inner join datamining_table dm on dm.gtfs_trip_id=dm_cm.gtfs_trip_id
  and dm.stop_sequence=dm_cm.stop_sequence
inner join gps_segments gs on dm.gps_segment_id=gs.gps_segment_id
  and gs.trip_date != '2009-03-27'
where dm_cm.lateness is not null and dm.lateness is not null
and dm_cm.route_name in ('1','9','19')
             """);
  print "Retrieving..."
  diffrows=cur.fetchall();
  print len(diffrows),"rows retrieved."

  print "Selecting..."
  db.SQLExec(cur,
             """
select lateness, trip_stop_weight,
case when trip_date='2009-03-27' then 1
     else                             0
end as is_cmass
from datamining_table natural join gps_segments 
  natural join trip_stop_weights
where lateness is not null and service_id='1'
""");
  print "Retrieving..."
  rows = cur.fetchall();
  print len(rows),'rows retrieved.'
  cur.close();

  return diffrows,rows
Esempio n. 28
0
def get_stops( min_lat, max_lat, min_lon, max_lon ):
  sql = """\
select * from gtf_stops 
where stop_lat >= %(min_lat)s
  and stop_lat <= %(max_lat)s
  and stop_lon >= %(min_lon)s
  and stop_lon <= %(max_lon)s
"""
  cur = get_cursor()
  SQLExec(cur,sql,{'min_lat':min_lat,
                   'max_lat':max_lat,
                   'min_lon':min_lon,
                   'max_lon':max_lon})
  rows = cur.fetchall()
  cur.close()
  return map(dict,rows)
Esempio n. 29
0
def get_direction_for_dirtag(dirtag):
  """
  Given a nextbus dirtag, returns the GTFS direction ID
  """
  dir = dirtag.find("OB");
  if dir < 0: dir = "Inbound"
  else: dir = "Outbound"

  cur = get_cursor();
  SQLExec(cur,
          """Select direction_id from gtfs_directions 
               where description=%(dir)s""",
          {'dir':dir});
  ret = cur.next()[0];
  
  cur.close();
  return ret;
Esempio n. 30
0
def get_routes_for_stop( stop_id ):
  sql = """\
select distinct(route_short_name) 
from gtf_routes gr 
  inner join gtf_trips gt 
    on gr.route_id = gt.route_id 
  inner join gtf_stop_times gst 
    on gst.trip_id = gt.trip_id 
where gst.stop_id = %(stopid)s
"""

  cur = get_cursor()
  SQLExec(cur,sql,{'stopid':stop_id})
  rows = cur.fetchall()
  cur.close()

  return [r[0] for r in rows]
Esempio n. 31
0
    def __init__(self, dbconn, autoFill=False, autoCommit=False):
        """
    Creates a ServiceDateHandler using the database from dbconn.
    If autoFill is True, then any missing service combinations
    are added to the database. If autoCommit is True, these
    changes will be committed immediately.
    """
        cur = db.get_cursor()

        ## Prepare calendar data

        db.SQLExec(
            cur, """select monday,tuesday,wednesday,thursday,friday,saturday,
                 sunday, service_id, start_date, end_date from gtf_calendar""")
        self.calendar_rows = cur.fetchall()
        db.SQLExec(cur, """select * from gtf_calendar_dates""")
        self.calendar_date_rows = cur.fetchall()

        ## Load existing combos

        db.SQLExec(
            cur, """select * from service_combinations 
                   order by combination_id, service_id""")
        service_combo_rows = cur.fetchall()

        self.combos = {}
        # map from combo_id to combo
        # note that combo lists are sorted by service_id
        for row in service_combo_rows:
            service_id, combo_id = row['service_id'], int(
                row['combination_id'])
            if not self.combos.has_key(combo_id):
                self.combos[combo_id] = []
            self.combos[combo_id].append(service_id)

        # map from combo to combo_id (reverse of self.combos)
        self.existing_combos = {}
        for combo_id in self.combos:
            self.existing_combos[tuple(self.combos[combo_id])] = int(combo_id)

        cur.close()

        ## Fill in missing combos

        if autoFill:
            self.fill_unique_service_combinations(dbconn, autoCommit)
Esempio n. 32
0
def compare_hour_of_weekday(rows=None):
  """Compares lateness distributions between hours of the weekday"""
  
  if rows is None:
    cur = db.get_cursor()
    sql = """
select scheduled_hour_of_arrival as hoa, lateness, trip_stop_weight
from datamining_table natural join trip_stop_weights
where lateness is not null and service_id='1'
"""
    print "Selecting..."
    db.SQLExec(cur,sql)
    print "Retrieving..."
    rows = cur.fetchall()
    cur.close()
    print len(rows),"rows retrieved."

  compare_ecdfs(('hoa',),rows)
Esempio n. 33
0
def get_joined_rows(prev_attrs=(), degree_of_sep=1):
    print "Selecting..."
    if prev_attrs:
        sql = """select d2.*,""" + ",".join(map(lambda a: "d1." + a, prev_attrs))
    else:
        sql = """select d2.* """
    sql += (
        """
from datamining_table d1 inner join datamining_table d2
on d1.gps_segment_id = d2.gps_segment_id
and d1.stop_number+"""
        + str(degree_of_sep)
        + """=d2.stop_number"""
    )

    cur = db.get_cursor()
    db.SQLExec(cur, sql)
    return cur