def segment_by_route_img(): for fn in progressbar( commons.ls(OFILE['segment_by_route'].format(routeid="*", dir="*", ext="json"))): with open(fn, 'r') as fd: segments = json.load(fd) waypoints_by_quality = { q: list(s[KEYS['pos']] for s in g) for (q, g) in groupby(segments, key=(lambda s: s[PARAM['quality_key']])) } assert ({'-', '+'}.issuperset(waypoints_by_quality.keys())) # commons.logger.debug("\n".join(waypoints_by_quality.items())) waypoints = [] + list( chain.from_iterable(waypoints_by_quality.get('-', []))) tracks = [[]] + waypoints_by_quality.get('+', []) with open( commons.reformat(OFILE['segment_by_route'], fn, {'ext': "png"}), 'wb') as fd: maps.write_track_img(waypoints=waypoints, tracks=tracks, fd=fd, mapbox_api_token=PARAM['mapbox_api_token'])
def get_all_logs(): return [ fn for fn in commons.ls(IFILE['realtime_log_file'].format(d="*", t="*")) if PARAM['datetime_filter']['func'](dt.datetime.strptime( fn, IFILE['realtime_log_file'].format(d="%Y%m%d", t="%H%M%S"))) ]
def img_transit(): for fn in commons.ls(OFILE['transit_map'].format(uuid="*", ext="json")): with open(commons.reformat(OFILE['transit_map'], fn, {'ext': "png"}), 'wb') as fd: J = commons.zipjson_load(fn) fd.write(make_transit_img(J))
def segment_by_bus_img(): for fn in progressbar( commons.ls(OFILE['segment_by_bus'].format(busid="*", ext="json"))): with open(fn, 'r') as fd: tracks = [b[KEYS['pos']] for b in json.load(fd)] commons.logger.debug(tracks) with open( commons.reformat(OFILE['segment_by_bus'], fn, {'ext': "png"}), 'wb') as fd: maps.write_track_img(waypoints=[], tracks=tracks, fd=fd, mapbox_api_token=PARAM['mapbox_api_token'])
def segment_by_route(): # A "case" is the result of "RUN_KEY", i.e. a pair (routeid, direction) commons.logger.info("Collecting cases...") # Associate to each case a list of files that contain instances of it # case_directory : run_key --> list of filenames case_directory = { case: set(r[1] for r in g) for (case, g) in groupby(sorted( (RUN_KEY(s), busfile) for busfile in commons.ls( IFILE['segment_by_bus'].format(busid="*", ext="json")) for s in commons.zipjson_load(busfile)), key=(lambda r: r[0])) }
def map_routes(): commons.seed() # Dictionary of key-values like # ('Kaohsiung/TIME', 'KHH144', '0') --> List of files [PATHTO]/Kaohsiung/TIME/KHH144/0/*.json files_by_case = { case: list(g) for (case, g) in groupby( commons.ls(IFILE['mapmatched'].format(scenario="**", routeid="*", direction="*", mapmatch_uuid="*", ext="json")), key=(lambda s: re.fullmatch( IFILE['mapmatched'].format(scenario="(.*)", routeid="([A-Z0-9]+)", direction="([01])", mapmatch_uuid=".*", ext="json"), s).groups())) }
def generate_timetables(): motc_routes = commons.index_dicts_by_key( commons.zipjson_load(IFILE['MOTC_routes'].format(City="Kaohsiung")), ROUTE_KEY) run_files = commons.ls(IFILE['segment_by_route'].format(scenario="**", routeid="*", dir="*")) print("Found {} route files.".format(len(run_files))) for run_file in run_files: print("===") print("Analyzing route file {}.".format(run_file)) (scenario, routeid, dir) = re.fullmatch( IFILE['segment_by_route'].format(scenario="(.*)", routeid="(.*)", dir="(.*)"), run_file).groups() case = {'scenario': scenario, 'routeid': routeid, 'dir': int(dir)} print("Route: {routeid}, direction: {dir} (from scenario: {scenario})". format(**case)) # # DEBUG: trouble case # if not ((routeid, int(dir)) == ('KHH116', 1)) : continue fn_output = commons.makedirs(OFILE['timetable_json'].format(**case)) if os.path.isfile(fn_output): print("Output file exists ({}) -- skipping".format(fn_output)) continue # Load all bus run segments for this case runs = commons.zipjson_load(run_file) print("Number of runs: {} ({})".format(len(runs), "total")) runs = [run for run in runs if (run.get('quality') == "+")] print("Number of runs: {} ({})".format(len(runs), "quality")) try: route = motc_routes[(case['routeid'], case['dir'])] stops = route['Stops'] except KeyError: print( "Warning: No stops info for route {routeid}, direction {dir}". format(**case)) continue # ETA table of Busrun x Stop ETA = np.vstack( Parallel(n_jobs=PARAM['n_parallel_jobs'])( delayed(bus_at_stops)(run, stops) for run in progressbar(runs)) # bus_at_stops(run, stops) for run in progressbar(runs) ) # 2018-12-12: pandas does not digest dt.datetime with timezones # https://github.com/pandas-dev/pandas/issues/13287 # Note: datetime64 automatically converts to UTC, # i.e. these are the same: # np.datetime64(dt.datetime.utcnow()) # np.datetime64(dt.datetime.now().astimezone()) ETA = ETA.astype('datetime64[ms]') # Timetable as DataFrame df = pd.DataFrame(data=ETA, columns=[s['StopUID'] for s in stops]) J = { 'case': case, 'route': route, 'run_file': run_file, 'timetable_df': df.to_json(), } with open(fn_output, 'w') as fd: json.dump(J, fd)
def mapmatch_all(): commons.seed() PARAM['graph_bbox'] = maps.bbox_for_points( nx.get_node_attributes( trim_graph_to_busable( pickle.load(open(IFILE['OSM_graph_file'], 'rb'))['main_component_with_knn']['g']), 'pos').values()) for route_file_template in IFILE['segment_by_route']: route_files = commons.ls( route_file_template.format(scenario="**", routeid="*", direction="*")) commons.logger.info( "Route file template: {}".format(route_file_template)) commons.logger.info("Found {} route files".format(len(route_files))) for route_file in route_files: # time.sleep(2) commons.logger.info("===") commons.logger.info("Analyzing route file {}.".format(route_file)) case = commons.unformat(route_file_template, route_file) commons.logger.info( "Route: {routeid}, direction: {direction} (from scenario: {scenario})" .format(**case)) # # DEBUG # if not ("KHH239-0" == "{routeid}-{direction}".format(**case)) : # continue # Load all bus run segments for this case runs = commons.zipjson_load(route_file) commons.logger.info("Number of runs: {} ({})".format( len(runs), "total")) # Check that the file indeed contains only one type of route assert ({(case['routeid'], int(case['direction'])) } == set(RUN_KEY(r) for r in runs)) # Remove runs that have a negative quality flag runs = [run for run in runs if not (run.get('quality') == "-")] commons.logger.info("Number of runs: {} ({})".format( len(runs), "not marked as bad quality")) # Keep only runs within the map runs = [ run for run in runs if all( is_in_map(*p) for p in run[KEYS.pos]) ] commons.logger.info("Number of runs: {} ({})".format( len(runs), "within the map bbox")) if (len(runs) > PARAM['max_runs_to_mapmatch']): commons.logger.info( "Out of {} available runs, will mapmatch only random {}". format(len(runs), PARAM['max_runs_to_mapmatch'])) runs = commons.random_subset(runs, k=PARAM['max_runs_to_mapmatch']) if (len(runs) < PARAM['min_runs_to_mapmatch']): commons.logger.warning("Skipping mapmatch: too few runs.") continue # Q: clustering here? # Existing mapmatched runs for this route existing = commons.ls(OFILE['mapmatched'].format(**case, mapmatch_uuid="*", ext="json")) if existing: commons.logger.warning( "Skipping mapmatch: {} mapmatched files found".format( len(existing))) continue try: mapmatch_runs(case['scenario'], runs) except Exception as e: commons.logger.error("Mapmatch failed ({}) \n{}".format( e, traceback.format_exc())) commons.logger.warning( "Mapmatch incomplete on route {routeid}-{direction} from scenario '{scenario}'" .format(**case)) time.sleep(5)
def vis1() : # OSM = pickle.load(open(IFILE['OSM'], 'rb')) # for (route_id, route) in OSM['rels']['route'].items(): # # Skip non-bus routes # if not (route['t'].get('route') == 'bus'): continue # # route_name = route['t'].get('name') # # route_ref = route['t']['ref'] # #if (route_ref == '88') : # print(route_name, route_id, route['t']) # exit(39) routeid_of = (lambda r: r['SubRouteUID']) # List of filenames, one file per physical bus, identified by plate number bus_files = commons.ls(IFILE['busses'].format(busid="*")) # Refile bus runs by their route ID runs_by_route = defaultdict(list) for fn in bus_files : runs = commons.zipjson_load(fn) for run in runs : runs_by_route[routeid_of(run)].append(run) # route_stops = commons.index_dicts_by_key(commons.zipjson_load(IFILE['route-stops']), routeid_of) # Are those valid route ID that can be found among the routes? unknown_route_ids = sorted(set(runs_by_route.keys()) - set(route_stops.keys())) if unknown_route_ids : print("The following route IDs from bus records are unknown:") print(", ".join(unknown_route_ids)) raise KeyError("Unkown route IDs in bus records") # route_uid = 'KHH24' runs = runs_by_route[route_uid] route = route_stops[route_uid] # Kaohsiung (left, bottom, right, top) bbox = (120.2593, 22.5828, 120.3935, 22.6886) (left, bottom, right, top) = bbox # Download the background map i = maps.get_map_by_bbox(bbox, token=PARAM['mapbox_api_token']) # Show the background map (fig, ax) = plt.subplots() plt.ion() ax.axis([left, right, bottom, top]) ax.imshow(i, extent=(left, right, bottom, top), interpolation='quadric') #fig.canvas.draw_idle() plt.pause(0.1) stops_by_direction = dict(zip(route['Direction'], route['Stops'])) # Draw stops for both route directions for (dir, stops) in stops_by_direction.items() : # Stop locations (y, x) = zip(*[ commons.inspect({'StopPosition': ('PositionLat', 'PositionLon')})(stop) for stop in stops ]) # Plot as dots ax.scatter(x, y, c=('b' if dir else 'g'), marker='o', s=4) # Show bus location for run in runs : # Trace bus (y, x) = (run['PositionLat'], run['PositionLon']) h1 = ax.plot(x, y, '--+', c='r', linewidth=1) h2 = ax.plot(x[0], y[0], 'o', c='r') h3 = ax.plot(x[-1], y[-1], 's', c='r') plt.title(run['PlateNumb']) #plt.savefig("{}.png".format(route_uid), dpi=180) plt.pause(0.1) bus_at_stops(run, stops_by_direction[run['Direction']]) plt.pause(0.1) [h[0].remove() for h in [h1, h2, h3]] return
def compress(): realtime_files = commons.ls(IFILE['realtime'].format(city=PARAM['city'], date="*", time="*")) #commons.logger.debug(realtime_files) # Allow for pending write operations time.sleep(1) if True: # Brutal compression step commons.logger.info("COMPRESSION 0: Zip all") for fn in commons.progressbar(realtime_files): try: # See if file is in a valid format commons.zipjson_load(fn) try: commons.zipjson_load(fn, insist=True) # commons.logger.info("File {}: compressed already".format(fn)) except RuntimeError: commons.zipjson_dump(commons.zipjson_load(fn), fn) commons.logger.info("File {}: compressed".format(fn)) except: commons.logger.exception( "File {}: unexpected error".format(fn)) except: commons.logger.warning("File {}: reading error".format(fn)) if False: commons.logger.info( "COMPRESSION I: Remove duplicates in back-to-back records") for (fn1, fn2) in zip(realtime_files[:-1], realtime_files[1:]): def hashable(J): assert (type(J) is list) return list(map(json.dumps, J)) def unhashable(J): assert (type(J) is list) return list(map(json.loads, J)) try: J1 = set(hashable(commons.zipjson_load(fn1))) J2 = set(hashable(commons.zipjson_load(fn2))) except EOFError: # Raised by zipjson_load if a file is empty continue except Exception as e: commons.logger.warning("Cannot open {}/{} ({})".format( fn1, fn2, e)) continue if not J1.intersection(J2): continue J1 = J1.difference(J2) J1 = list(unhashable(list(J1))) J2 = list(unhashable(list(J2))) commons.logger.info("Compressing {}".format(fn1)) commons.zipjson_dump(J1, fn1) if False: commons.logger.info( "COMPRESSION II: Remove redundancies from individual records") unknown_subroutes = set() # Route meta R = commons.zipjson_load(IFILE['routes'].format(city=PARAM['city'])) # Reindex by subroute-direction S = defaultdict(dict) for r in R: for s in r['SubRoutes']: sid = s['SubRouteUID'] dir = s['Direction'] assert (dir not in S[sid]) S[sid][dir] = s # S = dict(S) # Reindex by RouteUID assert (commons.all_distinct([g['RouteUID'] for g in R])) R = {g['RouteUID']: g for g in R} def remove_single_route_redundancies(j): subroute_id = j['SubRouteUID'] if not (subroute_id in S): if not (subroute_id in unknown_subroutes): commons.logger.warning( "Unknown subroute {} [warning will not be repeated]". format(subroute_id)) unknown_subroutes.add(subroute_id) return j assert (j['Direction'] in S[subroute_id]) s = S[subroute_id][j['Direction']] for key in ['SubRouteName', 'SubRouteID']: if key in j: if (j[key] == s[key]): del j[key] else: # commons.logger.warning("Unexpected attribute value {}={}".format(key, j[key])) pass if ('RouteUID' in j): route_id = j['RouteUID'] assert (route_id in R) r = R[route_id] for key in ['RouteName', 'RouteID']: if key in j: if not (j[key] == r[key]): commons.logger.warning( "Unexpected attribute value {}={}".format( key, j[key])) else: del j[key] if (j['RouteUID'] == j['SubRouteUID']): del j['RouteUID'] assert ('GPSTime' in j) for key in ['SrcUpdateTime', 'UpdateTime']: if key in j: del j[key] # Note: # - we keep the 'OperatorID' field, even if s['OperatorIDs'] has length 1 # - of the time stamps, we keep 'GPSTime' which is the bus on-board time return j for fn in realtime_files: try: J = commons.zipjson_load(fn) except EOFError: commons.logger.warning("{} appears empty".format(fn)) continue except Exception: commons.logger.warning("Failed to open {}".format(fn)) continue b = len(json.dumps(J)) # Before compression try: J = list(map(remove_single_route_redundancies, J)) except ValueError as e: commons.logger.exception("ValueError at {} -- {}".format( fn, e)) continue except AssertionError as e: commons.logger.exception("Assertion error at {} -- {}".format( fn, e)) continue except Exception as e: commons.logger.exception( "Warning: Compression attempt failed for {} -- {}".format( fn, e)) continue # J = remove_global_route_redundancies(J) a = len(json.dumps(J)) # After compression assert (a <= b) if (a == b): continue commons.logger.info("Compressing {}".format(fn)) commons.zipjson_dump(J, fn) commons.logger.info("DONE")
def osf_upload(): file_lists = { # Downloaded real-time logs 'a': get_all_logs(), # Segmented by bus 'b': commons.ls(OFILE['segment_by_bus'].format(busid="*", ext="json")), # Segmented by route 'c': commons.ls(OFILE['segment_by_route'].format(routeid="*", dir="*", ext="*")), } # 1. Create new folder in the OSF repo # https://files.osf.io/v1/resources/nr2yz/providers/osfstorage/?meta= # https://developer.osf.io/#operation/files_detail headers = { 'Authorization': "Bearer {}".format(PARAM['osf_dump']['token']), } params = { 'kind': "folder", 'name': "{}__{}".format(PARAM['datetime_filter']['path'].replace("/", "_"), dt.datetime.utcnow().strftime('%Y%m%d-%H%M%S')), } response = requests.put(PARAM['osf_dump']['base_url'], headers=headers, params=params) # https://waterbutler.readthedocs.io/en/latest/api.html#actions upload_url = response.json()['data']['links']['upload'] # 2. Upload zipfiles for (k, file_list) in file_lists.items(): commons.logger.info("Step {}".format(k).upper()) params = { 'name': PARAM['osf_dump'][k], } commons.logger.info("Archiving...") s = io.BytesIO() with zipfile.ZipFile(file=s, mode='a', compression=zipfile.ZIP_DEFLATED) as z: for f in progressbar(file_list): z.write(f) commons.logger.info("Uploading...") response = requests.put(upload_url, data=s.getvalue(), headers=headers, params=params) commons.logger.info("Response: {}".format(response.json()))
def map_transit_from(tt: List[dt.datetime], xx: List[Tuple]) -> None: graph_with_knn = load_walkable_graph_with_knn() # True/False filter for timetable files def keep_ttfile(fn): return True J = commons.zipjson_load(fn) # "Inner" Kaohsiung bbox = (120.2593, 22.5828, 120.3935, 22.6886) (left, bottom, right, top) = bbox (lat, lon) = map( np.asarray, zip(*map( commons.inspect( {'StopPosition': ('PositionLat', 'PositionLon')}), J['route']['Stops']))) return all([bottom <= lat, lat <= top, left <= lon, lon <= right]) # A* INITIALIZE commons.logger.info("Initializing transit...") with commons.Timer('transit_prepare'): tr = transit.Transit(filter( keep_ttfile, commons.ls(IFILE['timetable_json'].format(routeid="*", dir="*"))), graph_with_knn=graph_with_knn) for (t, x) in product(tt, xx): # Transit computation callback def tr_callback(result): if (result['status'] in {"zero", "init"}): return # Next callback update if (result['status'] == "opti"): if (result.get('ncu', dt.datetime.min) > dt.datetime.now()): return g: nx.DiGraph g = result['astar_graph'] J = { 'origin': { 'x': x, 't': t.isoformat(), }, 'gohere': [{ 'x': g.nodes[n]['loc'].x, 's': (g.nodes[n]['loc'].t - t.astimezone( dt.timezone.utc).replace(tzinfo=None)).total_seconds(), 'o': (g.nodes[next(iter(g.pred[n]))]['loc'].x if g.pred[n] else None), } for n in list(g.nodes)], } # Preserve the UUID and the filename between callbacks fn = OFILE['transit_map'].format(uuid=result.setdefault( 'file_uuid', uuid.uuid4().hex), ext="json") with open(fn, 'w') as fd: json.dump(J, fd) commons.logger.info("Number of locations mapped is {}".format( g.number_of_nodes())) # make_transit_img(J, backend='TkAgg') # Next callback update result['ncu'] = dt.datetime.now() + dt.timedelta(seconds=10) # A* COMPUTE commons.logger.info("Computing transit from {} at {}...".format(x, t)) with commons.Timer('transit_execute'): tr.connect(transit.Loc(t=t, x=x), callback=tr_callback) commons.Timer.report()