def __str__(self): """ Return a formatted string representing this data for use in a Tweet Departures are sorted by slot ID and then by earliest within that slot. Multiple times for same departure grouped together e.g. "Upminster 1200 1201 1204, Tower Hill 1203; Wimbledon 1200, Ealing Bdwy 1202 1204, Richmond 1208" """ if not self.departure_data: return "" # This is a dictionary, each key a slot, each value a { destination:[list of times] } dictionary itself departures_output = {} for slot in sorted(self.departure_data.keys()): # Group by departure within each slot departures = unique_values(sorted(self.departure_data[slot]))[:5] destinations = unique_values([departure.get_destination(True) for departure in departures]) departures_by_destination = {} for destination in destinations: departures_by_destination[destination] = [departure.get_departure_time() for departure in departures if departure.get_destination(True) == destination] # Then sort grouped departures, earliest first within the slot. Different destinations separated by commas sort_earliest_departure_first = lambda (destination1, times1), (destination2, times2): cmp(times1[0], times2[0]) destinations_and_times = sorted(departures_by_destination.items(), sort_earliest_departure_first) departures_for_this_slot = ["%s %s" % (destination, ' '.join(times[:3])) for (destination, times) in destinations_and_times] departures_output[slot] = ', '.join([departure.strip() for departure in departures_for_this_slot]) # Bus stops get their names included as well, if there is a departure if isinstance(slot, BusStop) and not departures_output[slot].startswith("None shown"): departures_output[slot] = "%s to %s" % (slot.get_clean_name(), departures_output[slot]) # Return slots separated by semi-colons return '; '.join([departures_output[slot] for slot in sorted(departures_output.keys())])
def test_listutils(self): """ Unit test for listutils methods """ test_list = [random.Random().randint(0, 10) for _i in range(0, 100)] unique_list = unique_values(test_list) # Make sure every value in new list was in old list for value in unique_list: self.assertTrue(value in test_list) # And that every value in the old list is now exactly once in new list for value in test_list: self.assertEqual(unique_list.count(value), 1)
def merge_common_slots(self): """ Merges pairs of slots that serve the same destinations Some slots run departures the same way (e.g. at termini). The DLR doesn't tell us if this is the case, so we look at the destinations on each pair of slots and see if there is any overlap, using the set object and its intersection function. Any such overlapping slots, we merge their data together (though only for the first pair though, to be safe) """ slot_pairs = [(slot1, slot2) for slot1 in self.departure_data.keys() for slot2 in self.departure_data.keys() if slot1 < slot2] common_slots = [(slot1, slot2) for (slot1, slot2) in slot_pairs if set([t.get_destination() for t in self.departure_data[slot1]]).intersection([t.get_destination() for t in self.departure_data[slot2]])] for (slot1, slot2) in common_slots[:1]: logging.debug("Merging platforms %s and %s", slot1, slot2) self.departure_data[slot1 + ' & ' + slot2] = unique_values(self.departure_data[slot1] + self.departure_data[slot2]) del self.departure_data[slot1], self.departure_data[slot2]
def create_graph_from_dict(stations, database, interchanges_by_foot): """ Take a dictionary of stations and their neighbours and return a digraph object """ # Start creating our directed graph - first by adding all the nodes. Each station is represented by multiple nodes: one to represent # the entrance, and one the exit, and then at least one for every line the station serves (i.e. the platforms). This seems complicated, but is # designed so that we can accurately simulate the extra delay an interchange takes by adding weighted edges between the platforms # # If the station needs to have directional info handled (e.g. the line is splitting, or looping on itself), we have one node for each # direction on each line that needs to be split. Else the direction is an empty string and so both directions are handled by the same node graph = digraph() for (station, station_data) in stations.items(): graph.add_node("%s:entrance" % station) graph.add_node("%s:exit" % station) directions_and_lines = unique_values([(direction, line) for (neighbour, direction, line) in station_data]) for (direction, line) in directions_and_lines: graph.add_node(":".join((station, direction, line))) # Now we add the nodes for each line - connecting each set of platforms for each station to the neighbouring stations # Each node is encoded as a Name:Direction:Line string. If Direction is an empty string, we treat is both directions for (station, station_data) in stations.items(): for (neighbour, direction, line) in station_data: neighbour_name = neighbour.partition(':')[0] departure = "%s:%s:%s" % (station, direction, line) arrival = "%s:%s:%s" % (neighbour_name, neighbour.partition(':')[2], line) sql = "SELECT location_easting, location_northing FROM locations WHERE name=?" station_position = database.get_row(sql, (station,)) neighbour_position = database.get_row(sql, (neighbour_name,)) distance = sqrt((station_position[0] - neighbour_position[0]) ** 2 + (station_position[1] - neighbour_position[1]) ** 2) time = 0.5 + distance / 600 # Assume 36km/h for tube trains, which is 36000 m/h or 600 m/min, plus 30 secs for stopping graph.add_edge((departure, arrival), wt=time) if (station, line) not in [(s.partition(':')[0], l) for (s, _d, l) in stations[neighbour_name]]: # Note, for Heathrow Terminal 4 (the only one-way station on the network), this is fine print "Warning! Connection from %s to %s but not %s to %s on %s line" % (station, neighbour_name, neighbour_name, station, line) # After that, we can add the interchanges between each line at each station, and the movements from entrance and to the exit. # Entrances and exits have zero travel time; because the graph is directed, it is not possible for us to change trains by # going to an exit and then back to a platform (or likewise with an entrance); we are forced to use the interchange edge, # which has an expensive travel time of 6 minutes for (station, station_data) in stations.items(): graph.add_edge(("%s:entrance" % station, "%s:exit" % station), wt=0) directions_and_lines = unique_values([(direction, line) for (neighbour, direction, line) in station_data]) for (direction, line) in directions_and_lines: graph.add_edge(("%s:entrance" % station, "%s:%s:%s" % (station, direction, line)), wt=2) graph.add_edge(("%s:%s:%s" % (station, direction, line), "%s:exit" % station), wt=0) for (other_direction, other_line) in directions_and_lines: if line != other_line or direction != other_direction: graph.add_edge(("%s:%s:%s" % (station, direction, line), "%s:%s:%s" % (station, other_direction, other_line)), wt=6) # Add in interchanges by foot between different stations for (station1, station2) in interchanges_by_foot: if station1 in stations.keys() and station2 in stations.keys(): graph.add_edge(("%s:exit" % station1, "%s:entrance" % station2), wt=10) #Remove altogether some expensive changes (Edgware Road, Paddington) expensive_interchanges = ( ('Edgware Road', '', 'Bakerloo', None), ('Paddington', 'Hammersmith Branch', 'Hammersmith & City', 10), ('Paddington', 'Hammersmith Branch', 'Circle', 10) ) for (station, direction, line, weight) in expensive_interchanges: node = "%s:%s:%s" % (station, direction, line) if not graph.has_node(node): continue for outbound_node in list(graph.neighbors(node)): if outbound_node.startswith(station) and not outbound_node.endswith('exit'): graph.del_edge((node, outbound_node)) if weight: graph.add_edge((node, outbound_node), wt=weight) for inbound_node in list(graph.incidents(node)): if inbound_node.startswith(station) and not inbound_node.endswith('entrance'): graph.del_edge((inbound_node, node)) if weight: graph.add_edge((inbound_node, node), wt=weight) return graph
def import_network_data_to_graph(): """ Import data from a file describing the edges of the Tube network and turn it into a graph object which we pickle and save """ database = WMTDatabase("whensmytrain.geodata.db") # Adapted from https://github.com/smly/hubigraph/blob/fa23adc07c87dd2a310a20d04f428f819d43cbdb/test/LondonUnderground.txt # which is a CSV of all edges in the network reader = csv.reader(open('./sourcedata/tube-connections.csv')) reader.next() # First we organise our data so that each station knows which lines it is on, and which stations it connects to stations_neighbours = {} interchanges_by_foot = [] for (station1, station2, line) in reader: if line in ("National Rail", "East London"): continue if line == "Walk": interchanges_by_foot.append((station1, station2)) else: # When a line splits into two branches, we don't want people being able to travel from one branch to another without # changing. So for these special cases, we mark the transitions as being in a particular direction in the CSV, with the # direction coming after a colon (e.g. "Leytonstone:Northbound","Wanstead","Central" and "Snaresbrook","Leytonstone:Southbound","Central" # Effectively the Central Line station has become two nodes, and now you cannot go directly from Snaresbrook to Wanstead. direction = station1.partition(':')[2] # Blank for most station1 = station1.partition(':')[0] # So station name becomes just e.g. Leytonstone station_data = stations_neighbours.get(station1, []) if (station2, direction, line) not in station_data: station_data += [(station2, direction, line)] stations_neighbours[station1] = station_data # Sanity-check our data and make sure it matches database canonical_data = database.get_rows("SELECT * FROM locations") canonical_station_names = unique_values([canonical['name'] for canonical in canonical_data]) for station in sorted(stations_neighbours.keys()): if station not in canonical_station_names: print "Error! %s is not in the canonical database of station names" % station for (neighbour, direction, line) in stations_neighbours[station]: line_code = get_line_code(line) if not database.get_value("SELECT name FROM locations WHERE name=? AND line=?", (station, line_code)): print "Error! %s is mistakenly labelled as being on the %s line in list of nodes" % (station, line) for station in sorted(canonical_station_names): if station not in stations_neighbours.keys(): print "Error! %s is not in the list of station nodes" % station continue database_lines = database.get_rows("SELECT line FROM locations WHERE name=?", (station,)) for row in database_lines: if row['line'] not in [get_line_code(line) for (neighbour, direction, line) in stations_neighbours[station]]: print "Error! %s is not shown as being on the %s line in the list of nodes" % (station, row['line']) # Produce versions of the graphs for unique lines graphs = {} lines = unique_values([line for station in stations_neighbours.values() for (neighbour, direction, line) in station]) for line in lines: this_line_only = {} for (station_name, neighbours) in stations_neighbours.items(): neighbours_for_this_line = [neighbour for neighbour in neighbours if neighbour[2] == line] if neighbours_for_this_line: this_line_only[station_name] = neighbours_for_this_line graphs[get_line_code(line)] = create_graph_from_dict(this_line_only, database, interchanges_by_foot) graphs['All'] = create_graph_from_dict(stations_neighbours, database, interchanges_by_foot) pickle.dump(graphs, open("./db/whensmytrain.network.gr", "w"))