def segment_stream(bb): # Sort records by GPS time if PARAM['segment_sort_by_gpstime']: bb = sorted(bb, key=gpstime) # Reverse list for easy pop-ing bb = list(reversed(list(bb))) while bb: # Initiate a new segment segment = [bb.pop()] # Build segment while no indicators change while bb and all((bb[-1].get(k, '?') == segment[-1].get(k, '?')) for k in PARAM['segment_indicators']): # None of the indicators have changed: continue the segment record (t0, t1) = (gpstime(b) for b in [segment[-1], bb[-1]]) assert (t0 <= t1), "The records should be sorted by GPS time" # ... unless there is a large time gap if ((t1 - t0) > dt.timedelta(minutes=PARAM['segment_timegap_minutes'])): break b = bb.pop() # If the timestamp is the same ignore this record # Note: sometimes the rest of the fields may change, # in particular the location (even over 100m), # possibly due to the GPS-time field precision of 1 sec if (t0 == t1): continue segment.append(b) # Sanity check: only one bus tracked in the segment assert (1 == len(set(map(BUSID_OF, segment)))) # Collapse into one record run = next( iter( commons.index_dicts_by_key( segment, BUSID_OF, keys_dont_collapse=PARAM['keys_dont_collapse'], keys_singletons_ok=PARAM['keys_singletons_ok']).values())) # Attach an ad-hoc ID tag run['RunUUID'] = uuid.uuid4().hex if PARAM['drop_stationary']: if (1 == len(set(run[KEYS['pos']]))): continue yield run
def generate_timetables(): motc_routes = commons.index_dicts_by_key( commons.zipjson_load(IFILE['MOTC_routes'].format(City="Kaohsiung")), ROUTE_KEY) run_files = commons.ls(IFILE['segment_by_route'].format(scenario="**", routeid="*", dir="*")) print("Found {} route files.".format(len(run_files))) for run_file in run_files: print("===") print("Analyzing route file {}.".format(run_file)) (scenario, routeid, dir) = re.fullmatch( IFILE['segment_by_route'].format(scenario="(.*)", routeid="(.*)", dir="(.*)"), run_file).groups() case = {'scenario': scenario, 'routeid': routeid, 'dir': int(dir)} print("Route: {routeid}, direction: {dir} (from scenario: {scenario})". format(**case)) # # DEBUG: trouble case # if not ((routeid, int(dir)) == ('KHH116', 1)) : continue fn_output = commons.makedirs(OFILE['timetable_json'].format(**case)) if os.path.isfile(fn_output): print("Output file exists ({}) -- skipping".format(fn_output)) continue # Load all bus run segments for this case runs = commons.zipjson_load(run_file) print("Number of runs: {} ({})".format(len(runs), "total")) runs = [run for run in runs if (run.get('quality') == "+")] print("Number of runs: {} ({})".format(len(runs), "quality")) try: route = motc_routes[(case['routeid'], case['dir'])] stops = route['Stops'] except KeyError: print( "Warning: No stops info for route {routeid}, direction {dir}". format(**case)) continue # ETA table of Busrun x Stop ETA = np.vstack( Parallel(n_jobs=PARAM['n_parallel_jobs'])( delayed(bus_at_stops)(run, stops) for run in progressbar(runs)) # bus_at_stops(run, stops) for run in progressbar(runs) ) # 2018-12-12: pandas does not digest dt.datetime with timezones # https://github.com/pandas-dev/pandas/issues/13287 # Note: datetime64 automatically converts to UTC, # i.e. these are the same: # np.datetime64(dt.datetime.utcnow()) # np.datetime64(dt.datetime.now().astimezone()) ETA = ETA.astype('datetime64[ms]') # Timetable as DataFrame df = pd.DataFrame(data=ETA, columns=[s['StopUID'] for s in stops]) J = { 'case': case, 'route': route, 'run_file': run_file, 'timetable_df': df.to_json(), } with open(fn_output, 'w') as fd: json.dump(J, fd)
def match_routes() : # MOTC route info motc_routes = commons.index_dicts_by_key(commons.zipjson_load(IFILE['MOTC_routes']), (lambda r: r['SubRouteUID'])) # for (route_id, route) in route_stops.items() : # stops = dict(zip(route['Direction'], route['Stops'])) OSM = pickle.load(open(IFILE['OSM'], 'rb')) for (route_id, route) in OSM['rels']['route'].items(): # Skip non-bus routes if not (route['t'].get('route') == 'bus'): continue # Note: most routes have relations in route['r'] (route_tags, route_stops, route_ways) = (route['t'], route['n'], route['w']) # https://wiki.openstreetmap.org/wiki/Buses route_name = route_tags['name'] # Common routines def strip_brackets(s): return re.match(r'(?P<name>\w+)+[ ]*(?P<extra>\(\w+\))*', s).group('name') def matchratio_stop_names(name1, name2): return difflib.SequenceMatcher(None, strip_brackets(name1), strip_brackets(name2)).ratio() # Method 0: Match route names top_namematch_motc_ids = None try : top_namematch_motc_ids = sorted( motc_routes.keys(), key=(lambda j : matchratio_stop_names(route_name, motc_routes[j]['RouteName']['Zh_tw'])), reverse=True )[0:6] #print("Route {} best matches: {}".format(route_name, ",".join([motc_routes[j]['RouteName']['Zh_tw'] for j in top_namematch_motc_ids]))) except : raise # Method 1: Match route start/end stops def zip_listify(a, b) : return zip(a, b) if (type(a) is list) else zip([a], [b]) try : (route_a, route_b) = (route_tags['from'], route_tags['to']) def matchratio_ab(motc_route) : # motc_name = motc_route['RouteName']['Zh_tw'] for (dir, stops) in zip_listify(motc_route['Direction'], motc_route['Stops']) : (motc_a, motc_b) = map(commons.inspect({'StopName' : 'Zh_tw'}), [stops[0], stops[-1]]) ab_ratio = (matchratio_stop_names(route_a, motc_a) + matchratio_stop_names(route_b, motc_b)) / 2 assert((0 <= ab_ratio) and (ab_ratio <= 1)) yield (ab_ratio, { 'SubRouteUID' : motc_route['SubRouteUID'], 'Direction' : dir }) ab_matchratios = sorted( chain.from_iterable([ matchratio_ab(motc_routes[j]) for j in top_namematch_motc_ids ]), key=(lambda p: p[0]), reverse=True ) print(route_name, ab_matchratios) except KeyError as e : #print("Method 1 failed on route {}".format(route_name)) continue #print(route_tags) continue if (len(route_stops) < 2) : #print("Route {} has fewer than two stops".format(route_name)) #print(route_ways) continue # Method 2: Match all stops # Get stop info if not all(OSM['node_tags'].get(i) for i in route_stops) : print("Nodes of route {} not found".format(route_tags['name'])) continue route_stops = { i : OSM['node_tags'].get(i) for i in route_stops } print(route_stops) #print(route['n']) #time.sleep(1) # # route_name = route['t'].get('name') # # route_ref = route['t']['ref'] # #if (route_ref == '88') : # print(route_name, route_id, route['t']) # exit(39) return
def vis1() : # OSM = pickle.load(open(IFILE['OSM'], 'rb')) # for (route_id, route) in OSM['rels']['route'].items(): # # Skip non-bus routes # if not (route['t'].get('route') == 'bus'): continue # # route_name = route['t'].get('name') # # route_ref = route['t']['ref'] # #if (route_ref == '88') : # print(route_name, route_id, route['t']) # exit(39) routeid_of = (lambda r: r['SubRouteUID']) # List of filenames, one file per physical bus, identified by plate number bus_files = commons.ls(IFILE['busses'].format(busid="*")) # Refile bus runs by their route ID runs_by_route = defaultdict(list) for fn in bus_files : runs = commons.zipjson_load(fn) for run in runs : runs_by_route[routeid_of(run)].append(run) # route_stops = commons.index_dicts_by_key(commons.zipjson_load(IFILE['route-stops']), routeid_of) # Are those valid route ID that can be found among the routes? unknown_route_ids = sorted(set(runs_by_route.keys()) - set(route_stops.keys())) if unknown_route_ids : print("The following route IDs from bus records are unknown:") print(", ".join(unknown_route_ids)) raise KeyError("Unkown route IDs in bus records") # route_uid = 'KHH24' runs = runs_by_route[route_uid] route = route_stops[route_uid] # Kaohsiung (left, bottom, right, top) bbox = (120.2593, 22.5828, 120.3935, 22.6886) (left, bottom, right, top) = bbox # Download the background map i = maps.get_map_by_bbox(bbox, token=PARAM['mapbox_api_token']) # Show the background map (fig, ax) = plt.subplots() plt.ion() ax.axis([left, right, bottom, top]) ax.imshow(i, extent=(left, right, bottom, top), interpolation='quadric') #fig.canvas.draw_idle() plt.pause(0.1) stops_by_direction = dict(zip(route['Direction'], route['Stops'])) # Draw stops for both route directions for (dir, stops) in stops_by_direction.items() : # Stop locations (y, x) = zip(*[ commons.inspect({'StopPosition': ('PositionLat', 'PositionLon')})(stop) for stop in stops ]) # Plot as dots ax.scatter(x, y, c=('b' if dir else 'g'), marker='o', s=4) # Show bus location for run in runs : # Trace bus (y, x) = (run['PositionLat'], run['PositionLon']) h1 = ax.plot(x, y, '--+', c='r', linewidth=1) h2 = ax.plot(x[0], y[0], 'o', c='r') h3 = ax.plot(x[-1], y[-1], 's', c='r') plt.title(run['PlateNumb']) #plt.savefig("{}.png".format(route_uid), dpi=180) plt.pause(0.1) bus_at_stops(run, stops_by_direction[run['Direction']]) plt.pause(0.1) [h[0].remove() for h in [h1, h2, h3]] return
def debug_compare_two(): uuids = [ "16b767f12ac841fea47ad9b735df1504", "69e47ef6a81a4a3aae0529b8b974896b" ] (J1, J2) = (commons.zipjson_load(OFILE['transit_map'].format(uuid=uuid, ext="json")) for uuid in uuids) o = tuple(J1['origin']['x']) assert (J1['origin'] == J2['origin']) (H1, H2) = ({}, {}) (O1, O2) = ({}, {}) for (J, H, O) in zip([J1, J2], [H1, H2], [O1, O2]): # Location --> Transit time in minutes ; keep track of duplicates J['gohere'] = commons.index_dicts_by_key( J['gohere'], key_func=(lambda __: tuple(__['x'])), collapse_repetitive=False) # Keep the *time* field H.update({x: attr['s'] for (x, attr) in J['gohere'].items()}) # Keep the *origin* field O.update({x: attr['o'] for (x, attr) in J['gohere'].items()}) # The two datasets cover the same geo-locations assert (set(H1) == set(H2)) X = sorted([x for x in H1 if (set(H1[x]) != set(H2[x]))], key=(lambda x: sum(H1[x]) + sum(H2[x]))) # commons.logger.debug("Earliest differing location: {}".format(items[0])) for x in X[0:4]: g1 = nx.DiGraph() g2 = nx.DiGraph() def retrace(O, g, x): for o in O[x]: if o: o = tuple(o) if not g.has_edge(o, x): g.add_edge(o, x) retrace(O, g, o) g.nodes[x]['xy'] = ll2xy(x) retrace(O1, g1, x) retrace(O2, g2, x) commons.logger.debug("Graph 1: {}".format(g1.nodes)) commons.logger.debug("Graph 2: {}".format(g2.nodes)) import matplotlib as mpl mpl.use("TkAgg") import matplotlib.pyplot as plt (fig, ax) = plt.subplots() # # "Inner" Kaohsiung # bbox = (120.2593, 22.5828, 120.3935, 22.6886) # # Set plot view to the bbox # ax.axis(maps.mb2ax(*bbox)) # ax.autoscale(enable=False) nx.draw_networkx(g1, ax=ax, pos=nx.get_node_attributes(g1, 'xy'), edge_color='b', node_size=1, with_labels=False) nx.draw_networkx(g2, ax=ax, pos=nx.get_node_attributes(g2, 'xy'), edge_color='g', node_size=1, with_labels=False) plt.show()
def make_transit_img(J, backend='Agg') -> bytes: import matplotlib as mpl mpl.use(backend) mpl.rcParams['figure.max_open_warning'] = 10 import dateutil.parser as dateparser import matplotlib.pyplot as plt ax: plt.Axes (fig, ax) = plt.subplots() origin = { 'x': J['origin']['x'], 't': dateparser.parse(J['origin']['t']), 'desc': J['origin'].get('desc'), } # Location --> Transit time in minutes ; keep track of duplicates gohere = commons.index_dicts_by_key(J['gohere'], key_func=(lambda __: tuple(__['x'])), collapse_repetitive=False) # Keep only the minimal reach time, convert to minutes gohere = {x: (min(attr['s']) / 60) for (x, attr) in gohere.items()} # # Cut-off (and normalize) T = 60 # Minutes # gohere = { p : s for (p, s) in gohere.items() if (s <= T) } # Reindex datapoints by (x, y) pairs contour_pts = dict(zip(map(ll2xy, gohere.keys()), gohere.values())) #boxes = dict(boxify(gohere, maxinbox=10)) # "Inner" Kaohsiung bbox = (120.2593, 22.5828, 120.3935, 22.6886) # Set plot view to the bbox ax.axis(maps.mb2ax(*bbox)) ax.autoscale(enable=False) try: background_map = maps.get_map_by_bbox(bbox, **PARAM['mapbox']) ax.imshow(background_map, interpolation='quadric', extent=maps.mb2ax(*bbox), zorder=-100) except Exception as e: commons.logger.warning("No background map ({})".format(e)) # Cross at the origin ((ox, oy), ot) = (ll2xy(origin['x']), origin['t'].strftime("%Y-%m-%d / %H:%M")) ax.plot(ox, oy, 'gx', ms=3, mew=0.2) ax.text(ox, oy, s="\n{}".format(ot), ha='center', va='top', fontsize=2) # Show all datapoints #ax.scatter(*zip(*contour_pts), marker='o', c='k', s=0.1, lw=0, edgecolor='none') # # Hack! for corners # for (x, y) in product(ax.axis()[:2], ax.axis()[2:]) : # contour_pts[(x, y)] = max(gohere.values()) cmap = plt.get_cmap('Purples') cmap.set_over('k') # https://stackoverflow.com/questions/37327308/add-alpha-to-an-existing-matplotlib-colormap from matplotlib.colors import ListedColormap cmap = ListedColormap( np.vstack( [cmap(np.arange(cmap.N))[:, 0:3].T, np.linspace(0, 0.5, cmap.N)]).T) (x, y) = zip(*contour_pts) levels = list(range(0, T, 5)) c = ax.tricontourf(x, y, list(contour_pts.values()), levels=levels, zorder=100, cmap=cmap, extent=maps.mb2ax(*bbox), extend='max') # Reduce fontsize ax.tick_params(axis='both', which='both', labelsize='xx-small') fig.colorbar(c).ax.tick_params(axis='both', which='both', labelsize='xx-small') # import matplotlib.patches as patches # boxes = dict(boxify(contour_pts, maxinbox=20)) # for (bb, gohere_part) in list(boxes.items()) : # (y, x) = bb[0:2] # (h, w) = (bb[2]-bb[0], bb[3]-bb[1]) # ax.add_patch(patches.Rectangle((x, y), w, h, linewidth=0.5, edgecolor='k', facecolor='none')) # # for (latlon, s) in list(gohere_part.items()) : # # ax.plot(*ll2xy(latlon), 'o', c=plt.get_cmap('Purples')(s), markersize=3) buffer = io.BytesIO() fig.savefig(buffer, bbox_inches='tight', pad_inches=0, dpi=300) buffer.seek(0) if backend.lower() in ["tkagg"]: plt.ion() plt.show() plt.pause(0.1) plt.close(fig) return buffer.read()