Example #1
0
    def __str__(self):
        """
        Return a formatted string representing this data for use in a Tweet
        Departures are sorted by slot ID and then by earliest within that slot. Multiple times for same departure grouped together

        e.g. "Upminster 1200 1201 1204, Tower Hill 1203; Wimbledon 1200, Ealing Bdwy 1202 1204, Richmond 1208"
        """
        if not self.departure_data:
            return ""
        # This is a dictionary, each key a slot, each value a { destination:[list of times] } dictionary itself
        departures_output = {}
        for slot in sorted(self.departure_data.keys()):

            # Group by departure within each slot
            departures = unique_values(sorted(self.departure_data[slot]))[:5]
            destinations = unique_values([departure.get_destination(True) for departure in departures])
            departures_by_destination = {}
            for destination in destinations:
                departures_by_destination[destination] = [departure.get_departure_time() for departure in departures if departure.get_destination(True) == destination]

            # Then sort grouped departures, earliest first within the slot. Different destinations separated by commas
            sort_earliest_departure_first = lambda (destination1, times1), (destination2, times2): cmp(times1[0], times2[0])
            destinations_and_times = sorted(departures_by_destination.items(), sort_earliest_departure_first)
            departures_for_this_slot = ["%s %s" % (destination, ' '.join(times[:3])) for (destination, times) in destinations_and_times]
            departures_output[slot] = ', '.join([departure.strip() for departure in departures_for_this_slot])

            # Bus stops get their names included as well, if there is a departure
            if isinstance(slot, BusStop) and not departures_output[slot].startswith("None shown"):
                departures_output[slot] = "%s to %s" % (slot.get_clean_name(), departures_output[slot])

        # Return slots separated by semi-colons
        return '; '.join([departures_output[slot] for slot in sorted(departures_output.keys())])
Example #2
0
 def test_listutils(self):
     """
     Unit test for listutils methods
     """
     test_list = [random.Random().randint(0, 10) for _i in range(0, 100)]
     unique_list = unique_values(test_list)
     # Make sure every value in new list was in old list
     for value in unique_list:
         self.assertTrue(value in test_list)
     # And that every value in the old list is now exactly once in new list
     for value in test_list:
         self.assertEqual(unique_list.count(value), 1)
Example #3
0
    def merge_common_slots(self):
        """
        Merges pairs of slots that serve the same destinations

        Some slots run departures the same way (e.g. at termini). The DLR doesn't tell us if this is the case, so we look at the destinations
        on each pair of slots and see if there is any overlap, using the set object and its intersection function. Any such
        overlapping slots, we merge their data together (though only for the first pair though, to be safe)
        """
        slot_pairs = [(slot1, slot2) for slot1 in self.departure_data.keys() for slot2 in self.departure_data.keys() if slot1 < slot2]
        common_slots = [(slot1, slot2) for (slot1, slot2) in slot_pairs
                             if set([t.get_destination() for t in self.departure_data[slot1]]).intersection([t.get_destination() for t in self.departure_data[slot2]])]
        for (slot1, slot2) in common_slots[:1]:
            logging.debug("Merging platforms %s and %s", slot1, slot2)
            self.departure_data[slot1 + ' & ' + slot2] = unique_values(self.departure_data[slot1] + self.departure_data[slot2])
            del self.departure_data[slot1], self.departure_data[slot2]
def create_graph_from_dict(stations, database, interchanges_by_foot):
    """
    Take a dictionary of stations and their neighbours and return a digraph object
    """
    # Start creating our directed graph - first by adding all the nodes. Each station is represented by multiple nodes: one to represent
    # the entrance, and one the exit, and then at least one for every line the station serves (i.e. the platforms). This seems complicated, but is
    # designed so that we can accurately simulate the extra delay an interchange takes by adding weighted edges between the platforms
    #
    # If the station needs to have directional info handled (e.g. the line is splitting, or looping on itself), we have one node for each
    # direction on each line that needs to be split. Else the direction is an empty string and so both directions are handled by the same node
    graph = digraph()

    for (station, station_data) in stations.items():
        graph.add_node("%s:entrance" % station)
        graph.add_node("%s:exit" % station)
        directions_and_lines = unique_values([(direction, line) for (neighbour, direction, line) in station_data])
        for (direction, line) in directions_and_lines:
            graph.add_node(":".join((station, direction, line)))

    # Now we add the nodes for each line - connecting each set of platforms for each station to the neighbouring stations
    # Each node is encoded as a Name:Direction:Line string. If Direction is an empty string, we treat is both directions
    for (station, station_data) in stations.items():
        for (neighbour, direction, line) in station_data:
            neighbour_name = neighbour.partition(':')[0]
            departure = "%s:%s:%s" % (station, direction, line)
            arrival = "%s:%s:%s" % (neighbour_name, neighbour.partition(':')[2], line)

            sql = "SELECT location_easting, location_northing FROM locations WHERE name=?"
            station_position = database.get_row(sql, (station,))
            neighbour_position = database.get_row(sql, (neighbour_name,))
            distance = sqrt((station_position[0] - neighbour_position[0]) ** 2 + (station_position[1] - neighbour_position[1]) ** 2)
            time = 0.5 + distance / 600  # Assume 36km/h for tube trains, which is 36000 m/h or 600 m/min, plus 30 secs for stopping
            graph.add_edge((departure, arrival), wt=time)

            if (station, line) not in [(s.partition(':')[0], l) for (s, _d, l) in stations[neighbour_name]]:
                # Note, for Heathrow Terminal 4 (the only one-way station on the network), this is fine
                print "Warning! Connection from %s to %s but not %s to %s on %s line" % (station, neighbour_name, neighbour_name, station, line)

    # After that, we can add the interchanges between each line at each station, and the movements from entrance and to the exit.
    # Entrances and exits have zero travel time; because the graph is directed, it is not possible for us to change trains by
    # going to an exit and then back to a platform (or likewise with an entrance); we are forced to use the interchange edge,
    # which has an expensive travel time of 6 minutes
    for (station, station_data) in stations.items():
        graph.add_edge(("%s:entrance" % station, "%s:exit" % station), wt=0)
        directions_and_lines = unique_values([(direction, line) for (neighbour, direction, line) in station_data])
        for (direction, line) in directions_and_lines:
            graph.add_edge(("%s:entrance" % station, "%s:%s:%s" % (station, direction, line)), wt=2)
            graph.add_edge(("%s:%s:%s" % (station, direction, line), "%s:exit" % station), wt=0)
            for (other_direction, other_line) in directions_and_lines:
                if line != other_line or direction != other_direction:
                    graph.add_edge(("%s:%s:%s" % (station, direction, line), "%s:%s:%s" % (station, other_direction, other_line)), wt=6)

    # Add in interchanges by foot between different stations
    for (station1, station2) in interchanges_by_foot:
        if station1 in stations.keys() and station2 in stations.keys():
            graph.add_edge(("%s:exit" % station1, "%s:entrance" % station2), wt=10)

    #Remove altogether some expensive changes (Edgware Road, Paddington)
    expensive_interchanges = (
        ('Edgware Road', '', 'Bakerloo', None),
        ('Paddington', 'Hammersmith Branch', 'Hammersmith & City', 10),
        ('Paddington', 'Hammersmith Branch', 'Circle', 10)
    )
    for (station, direction, line, weight) in expensive_interchanges:
        node = "%s:%s:%s" % (station, direction, line)
        if not graph.has_node(node):
            continue
        for outbound_node in list(graph.neighbors(node)):
            if outbound_node.startswith(station) and not outbound_node.endswith('exit'):
                graph.del_edge((node, outbound_node))
                if weight:
                    graph.add_edge((node, outbound_node), wt=weight)
        for inbound_node in list(graph.incidents(node)):
            if inbound_node.startswith(station) and not inbound_node.endswith('entrance'):
                graph.del_edge((inbound_node, node))
                if weight:
                    graph.add_edge((inbound_node, node), wt=weight)
    return graph
def import_network_data_to_graph():
    """
    Import data from a file describing the edges of the Tube network and turn it into a graph object which we pickle and save
    """
    database = WMTDatabase("whensmytrain.geodata.db")

    # Adapted from https://github.com/smly/hubigraph/blob/fa23adc07c87dd2a310a20d04f428f819d43cbdb/test/LondonUnderground.txt
    # which is a CSV of all edges in the network
    reader = csv.reader(open('./sourcedata/tube-connections.csv'))
    reader.next()

    # First we organise our data so that each station knows which lines it is on, and which stations it connects to
    stations_neighbours = {}
    interchanges_by_foot = []
    for (station1, station2, line) in reader:
        if line in ("National Rail", "East London"):
            continue
        if line == "Walk":
            interchanges_by_foot.append((station1, station2))
        else:
            # When a line splits into two branches, we don't want people being able to travel from one branch to another without
            # changing. So for these special cases, we mark the transitions as being in a particular direction in the CSV, with the
            # direction coming after a colon (e.g. "Leytonstone:Northbound","Wanstead","Central" and "Snaresbrook","Leytonstone:Southbound","Central"
            # Effectively the Central Line station has become two nodes, and now you cannot go directly from Snaresbrook to Wanstead.
            direction = station1.partition(':')[2]  # Blank for most
            station1 = station1.partition(':')[0]  # So station name becomes just e.g. Leytonstone

            station_data = stations_neighbours.get(station1, [])
            if (station2, direction, line) not in station_data:
                station_data += [(station2, direction, line)]
            stations_neighbours[station1] = station_data

    # Sanity-check our data and make sure it matches database
    canonical_data = database.get_rows("SELECT * FROM locations")
    canonical_station_names = unique_values([canonical['name'] for canonical in canonical_data])
    for station in sorted(stations_neighbours.keys()):
        if station not in canonical_station_names:
            print "Error! %s is not in the canonical database of station names" % station
        for (neighbour, direction, line) in stations_neighbours[station]:
            line_code = get_line_code(line)
            if not database.get_value("SELECT name FROM locations WHERE name=? AND line=?", (station, line_code)):
                print "Error! %s is mistakenly labelled as being on the %s line in list of nodes" % (station, line)
    for station in sorted(canonical_station_names):
        if station not in stations_neighbours.keys():
            print "Error! %s is not in the list of station nodes" % station
            continue
        database_lines = database.get_rows("SELECT line FROM locations WHERE name=?", (station,))
        for row in database_lines:
            if row['line'] not in [get_line_code(line) for (neighbour, direction, line) in stations_neighbours[station]]:
                print "Error! %s is not shown as being on the %s line in the list of nodes" % (station, row['line'])

    # Produce versions of the graphs for unique lines
    graphs = {}
    lines = unique_values([line for station in stations_neighbours.values() for (neighbour, direction, line) in station])
    for line in lines:
        this_line_only = {}
        for (station_name, neighbours) in stations_neighbours.items():
            neighbours_for_this_line = [neighbour for neighbour in neighbours if neighbour[2] == line]
            if neighbours_for_this_line:
                this_line_only[station_name] = neighbours_for_this_line
        graphs[get_line_code(line)] = create_graph_from_dict(this_line_only, database, interchanges_by_foot)
    graphs['All'] = create_graph_from_dict(stations_neighbours, database, interchanges_by_foot)

    pickle.dump(graphs, open("./db/whensmytrain.network.gr", "w"))