Ejemplo n.º 1
0
class WMTSettings():
    """
    Class representing a settings read/write handler (for remembering data between sessions) for When's My Transport
    """
    def __init__(self, instance_name):
        self.instance_name = instance_name
        self.settingsdb = WMTDatabase('%s.settings.db' % self.instance_name)
        self.settingsdb.write_query("create table if not exists %s_settings (setting_name unique, setting_value)" % self.instance_name)

    def get_setting(self, setting_name):
        """
        Fetch value of setting from settings database
        """
        # pylint: disable=W0703
        setting_value = self.settingsdb.get_value("select setting_value from %s_settings where setting_name = ?" % self.instance_name, (setting_name,))
        # Try unpickling, if this doesn't work then return the raw value (to deal with legacy databases)
        if setting_value is not None:
            try:
                setting_value = pickle.loads(setting_value.encode('utf-8'))
            except Exception:  # Pickle can throw loads of weird exceptions, gotta catch them all!
                pass
        return setting_value

    def update_setting(self, setting_name, setting_value):
        """
        Set value of named setting in settings database
        """
        setting_value = pickle.dumps(setting_value)
        self.settingsdb.write_query("insert or replace into %s_settings (setting_name, setting_value) values (?, ?)" % self.instance_name,
                                    (setting_name, setting_value))
Ejemplo n.º 2
0
def scrape_odd_platform_directions(write_file=False):
    """
    Check Tfl Tube API for Underground platforms that are not designated with a *-bound direction, and (optionally)
    generates a blank CSV template for those stations with Inner/Outer Rail designations
    """
    database = WMTDatabase("whensmytrain.geodata.db")
    print "Platforms without a Inner/Outer Rail specification:"
    station_platforms = {}
    all_train_data = get_tfl_prediction_summaries()
    for (line_code, train_data) in all_train_data.items():
        for station in train_data.findall('S'):
            station_code = station.attrib['Code'][:3]
            station_name = station.attrib['N'][:-1]
            station_name = station_name.replace(" Circle", "")

            for platform in station.findall('P'):
                platform_name = platform.attrib['N']
                direction = re.search("(North|East|South|West)bound", platform_name, re.I)
                if direction is None:
                    rail = re.search("(Inner|Outer) Rail", platform_name, re.I)
                    if rail:
                        if (station_name, station_code) not in station_platforms:
                            station_platforms[(station_name, station_code)] = []
                        if line_code not in station_platforms[(station_name, station_code)]:
                            station_platforms[(station_name, station_code)].append(line_code)

                if direction is None and rail is None:
                    print "%s %s" % (station_name, platform_name)

    print ""
    if write_file:
        outputfile = open('./sourcedata/circle_platform_data.csv', 'w')
    else:
        outputfile = sys.stdout

    writer = csv.writer(outputfile)
    writer.writerow(['Station Code', 'Station Name', 'Line Code', 'Inner Rail', 'Outer Rail'])
    errors = []
    for (station_name, station_code) in sorted(station_platforms.keys()):
        for line_code in sorted(station_platforms[(station_name, station_code)]):
            writer.writerow([station_code, station_name, line_code, '', ''])
        if not database.get_value("SELECT name FROM locations WHERE name=?", (station_name,)):
            errors.append("%s is not in the station database" % station_name)
    outputfile.flush()

    print ""
    for error in errors:
        print error

    outputfile.close()
def scrape_tfl_destination_codes():
    """
    Scrape codes from TfL's TrackerNet and save to a database
    """
    database = WMTDatabase("whensmytube.destinationcodes.db")
    destination_summary = {}
    all_train_data = get_tfl_prediction_summaries()
    for (line_code, train_data) in all_train_data.items():
        for train in train_data.findall('.//T'):
            destination = train.attrib['DE']
            destination_code = train.attrib['D']
            if destination_summary.get(destination_code, destination) != destination and destination_code != '0':
                print "Error - mismatching destinations: %s (existing) and %s (new) with code %s" \
                      % (destination_summary[destination_code], destination, destination_code)

            database.write_query("INSERT OR IGNORE INTO destination_codes VALUES (?, ?, ?)", (destination_code, line_code, destination))
            destination_summary[destination_code] = destination
    pprint(destination_summary)
def check_tfl_destination_codes():
    """
    Audit codes we have recorded and make sure that they are all fine
    """
    # Check to see if destination is in our database
    geodata = RailStationLocations()
    database = WMTDatabase("whensmytube.destinationcodes.db")

    rows = database.get_rows("SELECT destination_name, destination_code, line_code FROM destination_codes")
    for (destination_name, destination_code, line_code) in rows:
        # Hack: Fake a ElementTree object to use the XML parser's tube train filter function
        fake_tag = lambda x: 1
        fake_tag.attrib = {'Destination': destination_name, 'DestCode': str(destination_code)}
        if not filter_tube_train(fake_tag):
            continue
        train = TubeTrain(destination_name, "Northbound", "1200", "C", "001")
        destination = train.get_destination_no_via()
        if not destination.endswith("Train") and not geodata.find_fuzzy_match(destination, {}):
            print "Destination %s (%s) on %s not found in locations database" % (destination, destination_code, line_code)
        via = train.get_via()
        if via and not geodata.find_fuzzy_match(via, {}):
            print "Via %s (%s) on %s not found in locations database" % (via, destination_code, line_code)
Ejemplo n.º 5
0
def import_network_data_to_graph():
    """
    Import data from a file describing the edges of the Tube network and turn it into a graph object which we pickle and save
    """
    database = WMTDatabase("whensmytrain.geodata.db")

    # Adapted from https://github.com/smly/hubigraph/blob/fa23adc07c87dd2a310a20d04f428f819d43cbdb/test/LondonUnderground.txt
    # which is a CSV of all edges in the network
    reader = csv.reader(open('./sourcedata/tube-connections.csv'))
    reader.next()

    # First we organise our data so that each station knows which lines it is on, and which stations it connects to
    stations_neighbours = {}
    interchanges_by_foot = []
    for (station1, station2, line) in reader:
        if line in ("National Rail", "East London"):
            continue
        if line == "Walk":
            interchanges_by_foot.append((station1, station2))
        else:
            # When a line splits into two branches, we don't want people being able to travel from one branch to another without
            # changing. So for these special cases, we mark the transitions as being in a particular direction in the CSV, with the
            # direction coming after a colon (e.g. "Leytonstone:Northbound","Wanstead","Central" and "Snaresbrook","Leytonstone:Southbound","Central"
            # Effectively the Central Line station has become two nodes, and now you cannot go directly from Snaresbrook to Wanstead.
            direction = station1.partition(':')[2]  # Blank for most
            station1 = station1.partition(':')[0]  # So station name becomes just e.g. Leytonstone

            station_data = stations_neighbours.get(station1, [])
            if (station2, direction, line) not in station_data:
                station_data += [(station2, direction, line)]
            stations_neighbours[station1] = station_data

    # Sanity-check our data and make sure it matches database
    canonical_data = database.get_rows("SELECT * FROM locations")
    canonical_station_names = unique_values([canonical['name'] for canonical in canonical_data])
    for station in sorted(stations_neighbours.keys()):
        if station not in canonical_station_names:
            print "Error! %s is not in the canonical database of station names" % station
        for (neighbour, direction, line) in stations_neighbours[station]:
            line_code = get_line_code(line)
            if not database.get_value("SELECT name FROM locations WHERE name=? AND line=?", (station, line_code)):
                print "Error! %s is mistakenly labelled as being on the %s line in list of nodes" % (station, line)
    for station in sorted(canonical_station_names):
        if station not in stations_neighbours.keys():
            print "Error! %s is not in the list of station nodes" % station
            continue
        database_lines = database.get_rows("SELECT line FROM locations WHERE name=?", (station,))
        for row in database_lines:
            if row['line'] not in [get_line_code(line) for (neighbour, direction, line) in stations_neighbours[station]]:
                print "Error! %s is not shown as being on the %s line in the list of nodes" % (station, row['line'])

    # Produce versions of the graphs for unique lines
    graphs = {}
    lines = unique_values([line for station in stations_neighbours.values() for (neighbour, direction, line) in station])
    for line in lines:
        this_line_only = {}
        for (station_name, neighbours) in stations_neighbours.items():
            neighbours_for_this_line = [neighbour for neighbour in neighbours if neighbour[2] == line]
            if neighbours_for_this_line:
                this_line_only[station_name] = neighbours_for_this_line
        graphs[get_line_code(line)] = create_graph_from_dict(this_line_only, database, interchanges_by_foot)
    graphs['All'] = create_graph_from_dict(stations_neighbours, database, interchanges_by_foot)

    pickle.dump(graphs, open("./db/whensmytrain.network.gr", "w"))
Ejemplo n.º 6
0
 def __init__(self, instance_name):
     self.instance_name = instance_name
     self.settingsdb = WMTDatabase('%s.settings.db' % self.instance_name)
     self.settingsdb.write_query("create table if not exists %s_settings (setting_name unique, setting_value)" % self.instance_name)
Ejemplo n.º 7
0
 def __init__(self, instance_name):
     self.database = WMTDatabase('%s.geodata.db' % instance_name)
     self.network = None
     self.returned_object = Location
Ejemplo n.º 8
0
class WMTLocations():
    """
    Service object used to find stops or stations (locations) - given a position, exact match or fuzzy match,
    will return the best matching stop. Subclassed and not called directly
    """
    def __init__(self, instance_name):
        self.database = WMTDatabase('%s.geodata.db' % instance_name)
        self.network = None
        self.returned_object = Location

    def find_closest(self, position, params):
        """
        Find the closest location to the (lat, long) position specified, querying the database with dictionary params, of the format
        { Column Name : value }. Returns an object of class returned_object, or None if none found nearby
        """
        # GPSes use WGS84 model of Globe, but Easting/Northing based on OSGB36, so convert to an easting/northing
        logging.debug("Position in WGS84 determined as lat/long: %s %s", position[0], position[1])
        easting, northing = convertWGS84toOSEastingNorthing(*position)
        logging.debug("Translated into OS Easting %s, Northing %s", easting, northing)

        # Do a funny bit of Pythagoras to work out closest stop. We can't find square root of a number in sqlite
        # but then again, we don't need to, the smallest square will do. Sort by this column in ascending order
        # and find the first row
        (where_statement, where_values) = self.database.make_where_statement('locations', params)
        query = """
                SELECT (location_easting - %d)*(location_easting - %d) + (location_northing - %d)*(location_northing - %d) AS dist_squared,
                      *
                FROM locations
                WHERE %s
                ORDER BY dist_squared
                LIMIT 1
                """ % (easting, easting, northing, northing, where_statement)
        row = self.database.get_row(query, where_values)
        if row:
            obj = self.returned_object(Distance=sqrt(row['dist_squared']), **row)
            logging.debug("Have found nearest location %s", obj)
            return obj
        else:
            logging.debug("No location found near %s, sorry", position)
            return None

    def find_fuzzy_match(self, stop_or_station_name, params):
        """
        Find the best fuzzy match to the query_string, querying the database with dictionary params, of the format
        { Column Name : value, }. Returns an object of class returned_object, or None if no fuzzy match found
        """
        if not stop_or_station_name or stop_or_station_name == "Unknown":
            return None
        # Try to get an exact match first against station names in database
        exact_params = params.copy()
        exact_params.update({'name': stop_or_station_name})
        exact_match = self.find_exact_match(exact_params)
        if exact_match:
            return exact_match

        # Users may not give exact details, so we try to match fuzzily
        (where_statement, where_values) = self.database.make_where_statement('locations', params)
        rows = self.database.get_rows("SELECT * FROM locations WHERE %s" % where_statement, where_values)
        possible_matches = [self.returned_object(**row) for row in rows]
        best_match = get_best_fuzzy_match(stop_or_station_name, possible_matches)
        if best_match:
            return best_match
        else:
            return None

    def find_exact_match(self, params):
        """
        Find the exact match for an item matching params. Returns an object of class returned_object, or None if no
        fuzzy match found
        """
        (where_statement, where_values) = self.database.make_where_statement('locations', params)
        row = self.database.get_row("SELECT * FROM locations WHERE %s LIMIT 1" % where_statement, where_values)
        if row:
            return self.returned_object(**row)
        else:
            return None