def _query_journeys(service_id, direction, date): """ Creates query to find all IDs for journeys that run on a particular day. Journeys are included and excluded them by matching with special dates, bank holidays, date ranges associated with organisations, weeks of month and days of week. """ # Set as parameters for SQL query - reduces repetition of dates p_service_id = db.bindparam("service", service_id) p_direction = db.bindparam("direction", direction) p_date = db.bindparam("date", date, type_=db.Date) departures = _get_departure_range( p_date + models.Journey.departure, "departures" ) departure = db.column("departures") # Find all journeys and their departures journeys = ( db.session.query( models.Journey.id.label("journey_id"), departure.label("departure") ) .select_from(models.JourneyPattern) .join(models.JourneyPattern.journeys) # SQLAlchemy does not have CROSS JOIN so use INNER JOIN ON TRUE .join(departures, db.true()) .filter( # Match journey patterns on service ID and direction models.JourneyPattern.service_ref == p_service_id, models.JourneyPattern.direction.is_(p_direction), # Filter out generated times 1 hour before and after departures db.extract("HOUR", db.func.timezone(_GB_TZ, departure)) == db.extract("HOUR", models.Journey.departure), ) .group_by(models.Journey.id, departure) ) # Add filters for departure dates journeys = _filter_journey_dates(journeys, p_date) return journeys
def record_set(cls, column=None): """ Get the defined record set from the JSON data in the links column. """ return ( db.func.jsonb_to_recordset(column or cls.data) .table_valued( db.column("stop_point_ref", db.Text), db.column("timing_point", db.Boolean), db.column("stopping", db.Boolean), db.column("sequence", db.Integer), db.column("arrive", db.Interval), db.column("depart", db.Interval) ) .render_derived("data", with_types=True) )
def matching_groups(cls, query, admin_areas=None): """ Finds all admin areas and table names covering matching results for a query. The areas and groups are sorted into two sets, to be used for filtering. :param query: web search query as string. :param admin_areas: Filter by admin areas to get matching groups :returns: A tuple with a dict of groups and a dict with administrative area references and names """ if not _test_query(query): return None array_t = pg.array_agg(db.distinct(cls.table_name)) if admin_areas is not None: # Filter array of tables by whether aggregated rows' admin areas # match those already selected array_t = array_t.filter(cls.admin_areas.overlap(admin_areas)) array_areas = pg.array((AdminArea.code, AdminArea.name)) array_a = pg.array_agg(db.distinct(array_areas)) result = (db.session.query( array_t.label("tables"), array_a.label("areas")).select_from( db.func.unnest(cls.admin_areas).alias("unnest_areas")).join( AdminArea, db.column("unnest_areas") == AdminArea.code).filter( cls.match(query)).one()) # All data should have been aggregated into one row tables = set(result.tables) if result.tables is not None else set() groups = { g: n for g, n in cls.GROUP_NAMES.items() if tables & cls.GROUPS[g] } areas = dict(result.areas) if result.areas is not None else {} return groups, areas
def _query_next_services(atco_code, timestamp=None, interval=None): """ Creates query for getting all services stopping at this stop point in an interval. """ if timestamp is None: p_timestamp = db.func.now() elif timestamp.tzinfo is None: # Assume this is a local timestamp with GB timezone. p_timestamp = db.func.timezone(_GB_TZ, db.bindparam("timestamp", timestamp)) else: p_timestamp = db.bindparam("timestamp", timestamp) if interval is None: p_interval = _ONE_HOUR else: param = db.bindparam("interval", interval) p_interval = db.cast(param, db.Interval) journey_match = _query_journeys_at_stop(atco_code).cte("journey_match") time_start = p_timestamp - journey_match.c.t_offset time_end = time_start + p_interval times = ( db.select([ time_start.label("utc_start"), time_end.label("utc_end"), db.func.timezone(_GB_TZ, time_start).label("local_start"), db.func.timezone(_GB_TZ, time_end).label("local_end"), ]) .correlate(journey_match) .lateral("times") ) local_start_date = db.cast(times.c.local_start, db.Date) local_end_date = db.cast(times.c.local_end, db.Date) local_start_time = db.cast(times.c.local_start, db.Time) local_end_time = db.cast(times.c.local_end, db.Time) journey_departure = ( db.session.query( journey_match.c.id, journey_match.c.t_offset, times.c.utc_start, times.c.utc_end, db.case( ( (local_start_date == local_end_date) | (journey_match.c.departure > local_start_time), local_start_date ), else_=local_end_date, ).label("date"), journey_match.c.departure.label("time"), ) .select_from(journey_match) .join(times, db.true()) .filter( (local_start_date == local_end_date) & db.between( journey_match.c.departure, local_start_time, local_end_time ) | (local_start_date < local_end_date) & ( (journey_match.c.departure > local_start_time) | (journey_match.c.departure < local_end_time) ) ) .cte("journey_departure") ) utc_departures = _get_departure_range( journey_departure.c.date + journey_departure.c.time, "utc_departure", ) utc_departure = db.column("utc_departure") journey_filter = _filter_journey_dates( db.session.query( journey_departure.c.id, (utc_departure + journey_departure.c.t_offset).label("expected") ) .select_from(journey_departure) .join( utc_departures, db.between( utc_departure, journey_departure.c.utc_start, journey_departure.c.utc_end, ) ) .join(models.Journey, journey_departure.c.id == models.Journey.id) .join(models.Journey.pattern) .group_by( journey_departure.c.id, journey_departure.c.t_offset, utc_departure ), journey_departure.c.date, ).cte("journey_filter") query = ( db.session.query( models.Service.line.label("line"), models.JourneyPattern.origin.label("origin"), models.JourneyPattern.destination.label("destination"), models.Operator.code.label("op_code"), models.Operator.name.label("op_name"), journey_filter.c.expected, db.cast(db.extract("EPOCH", journey_filter.c.expected - p_timestamp), db.Integer).label("seconds") ) .select_from(journey_filter) .join(models.Journey, journey_filter.c.id == models.Journey.id) .join(models.Journey.pattern) .join(models.JourneyPattern.service) .join(models.JourneyPattern.operator) .order_by(journey_filter.c.expected) ) return query
def insert_service_pairs(connection): """ Uses existing service data to update list of pairs of similar services. """ service = Service.__table__ pattern = JourneyPattern.__table__ link = JourneyLink.__table__ # Temporary table for all services, direction and stops they call at service_stops = db.Table("service_stops", db.MetaData(), db.Column("id", db.Integer, autoincrement=False, index=True), db.Column("stop_point_ref", db.Text, nullable=True, index=True), db.Column("outbound", db.Boolean, nullable=False, index=True), db.Column("inbound", db.Boolean, nullable=False, index=True), prefixes=["TEMPORARY"], postgresql_on_commit="DROP") fill_service_stops = service_stops.insert().from_select( ["id", "stop_point_ref", "outbound", "inbound"], db.select([ service.c.id, db.cast(link.c.stop_point_ref, db.Text), db.func.bool_or(~pattern.c.direction), db.func.bool_or(pattern.c.direction) ]).select_from( service.join(pattern, service.c.id == pattern.c.service_ref).join( link, pattern.c.id == link.c.pattern_ref)).where( link.c.stop_point_ref.isnot(None)).group_by( service.c.id, db.cast(link.c.stop_point_ref, db.Text))) # Find all services sharing at least one stop ss0 = service_stops.alias("ss0") ss1 = service_stops.alias("ss1") shared = (db.select([ ss0.c.id.label("id0"), ss1.c.id.label("id1") ]).distinct().select_from( ss0.join(ss1, (ss0.c.id < ss1.c.id) & (ss0.c.stop_point_ref == ss1.c.stop_point_ref))).alias("t")) # Iterate over possible combinations of directions directions = (db.select([db.column("d", db.Integer)]).select_from( db.func.generate_series(0, 3).alias("d")).alias("d")) # For each service, find all stops and count them select_a = db.select([ service_stops.c.stop_point_ref ]).where((service_stops.c.id == shared.c.id0) & ((directions.c.d.op("&")(1) > 0) & service_stops.c.inbound | (directions.c.d.op("&") (1) == 0) & service_stops.c.outbound)).correlate( shared, directions) select_b = db.select([ service_stops.c.stop_point_ref ]).where((service_stops.c.id == shared.c.id1) & ((directions.c.d.op("&")(2) > 0) & service_stops.c.inbound | (directions.c.d.op("&") (2) == 0) & service_stops.c.outbound)).correlate( shared, directions) select_c = select_a.intersect(select_b) count = db.func.count(db.literal_column("*")).label("count") la = db.select([count]).select_from(select_a.alias("a")).lateral("a") lb = db.select([count]).select_from(select_b.alias("b")).lateral("b") lc = db.select([count]).select_from(select_c.alias("c")).lateral("c") utils.logger.info( "Querying all services and stops they call at to find similar services" ) service_stops.create(connection) connection.execute(fill_service_stops) return [ db.select([ db.func.row_number().over(), shared.c.id0, (directions.c.d.op("&")(1) > 0).label("dir0"), la.c.count, shared.c.id1, (directions.c.d.op("&")(2) > 0).label("dir1"), lb.c.count, db.cast(lc.c.count, db.Float) / db.func.least(la.c.count, lb.c.count) ]).where((la.c.count > 0) & (lb.c.count > 0) & (lc.c.count > 0)) ]
""" Models for the nextbus database. """ import re import sqlalchemy.dialects.postgresql as pg from nextbus import db, location from nextbus.models import utils MIN_GROUPED = 72 MAX_DIST = 500 # Aliases for tables or views not yet defined _stop_point = db.table("stop_point", db.column("atco_code"), db.column("stop_area_ref"), db.column("active")) _service = db.table("service", db.column("id"), db.column("line")) _pattern = db.table("journey_pattern", db.column("id"), db.column("service_ref")) _link = db.table("journey_link", db.column("pattern_ref"), db.column("stop_point_ref")) _pair = db.table("service_pair", db.column("service0"), db.column("service1"), db.column("direction0"), db.column("direction1"), db.column("similarity")) class ServiceMode(db.Model): """ Lookup table for service modes, eg bus and tram. """ __tablename__ = "service_mode" id = db.Column(db.Integer, primary_key=True, autoincrement=False)