def main(): """Main worker entrypoint.""" LOG.warning("worker online") # until we can resume jobs for state in (Keys.processing.value, Keys.pending.value): CACHE.delete_many(*utils.list_keys(state)) while True: prune = [] for glet in WORKERS: if glet.successful(): prune.append(glet) elif glet.dead: LOG.warning( "worker crashed: %s", "".join(format_exception(*glet.exc_info)).strip(), ) prune.append(glet) for glet in prune: WORKERS.remove(glet) process_new() gc.collect() gevent.sleep(10)
def write_data(uuid, data): """Try to store the data, log errors.""" try: _write_data(uuid, data) except Exception as error: LOG.warning("Failed to save data: %r", error)
def knife(uuid, token, verify, roles): # pylint: disable=R0914 """Pull all ESI data for a character_id. Args: uuid: string uuid token token: SSO access token verify: dictionary return from /verify/ roles: list of corporation roles """ character_id = verify["CharacterID"] LOG.warning("knife run started for character: %s", character_id) scopes = verify["Scopes"] _, _, public = utils.request_or_wait("{}/latest/characters/{}/".format( ESI, character_id)) if isinstance(public, str): CACHE.delete("{}{}".format(Keys.processing.value, uuid)) utils.write_data(uuid, {"public info failure": public}) return headers = {"Authorization": "Bearer {}".format(token)} results = get_results(public, character_id, scopes, roles, headers) utils.write_data(uuid, results) CACHE.delete("{}{}".format(Keys.processing.value, uuid)) CACHE.cache.inc(Keys.alltime.value, 1) LOG.warning("completed character: %r", character_id)
def request_or_wait(url, *args, _as_res=False, page=None, method="get", **kwargs): """Request the URL, or wait if we're error limited.""" check_x_pages = True if page: kwargs["params"] = kwargs.get("params", {}) kwargs["params"]["page"] = page check_x_pages = False LOG.warning("requesting: %s (page %d)", url, page) else: LOG.warning("requesting: %s", url) try: res = getattr(SESSION, method)(url, *args, **kwargs) res.raise_for_status() except Exception as err: try: if res.status_code == 420: wait = int(res.headers.get("X-Esi-Error-Limit-Reset", 1)) + 1 APP.error_limited = True LOG.warning("hit the error limit, waiting %d seconds", wait) # error limited. wait out the window then carry on gevent.sleep(wait) return request_or_wait(url, *args, _as_res=_as_res, page=page, method=method, **kwargs) except Exception as error: LOG.warning("error handling error: %r: %r", err, error) try: content = res.json() except Exception: content = res.text # /shrug some other error, can't win em all return None, url, \ res if _as_res else "Error fetching data: {} {}".format( res.status_code, content, ) else: if check_x_pages: try: pages = list(range(2, int(res.headers.get("X-Pages", 0)))) except Exception as error: LOG.warning("error checking x-pages for %s: %r", url, error) pages = None else: pages = page return pages, url, res if _as_res else res.json()
def process_new(): """Process all new tokens, verify or we're done early.""" for new_key in utils.list_keys(Keys.new.value): uuid = new_key.split(".")[-1] LOG.warning("processing new uuid: %r", uuid) token = CACHE.get(new_key) CACHE.delete(new_key) if not token: LOG.warning("no token stored for uuid: %r", uuid) continue pending_key = "{}{}".format(Keys.pending.value, uuid) CACHE.set( pending_key, "1", timeout=70, ) headers = {"Authorization": "Bearer {}".format(token)} _, res = utils.request_or_wait( "{}/verify/".format(ESI), headers=headers, ) failed = False if isinstance(res, str) or "CharacterID" not in res: utils.write_data(uuid, {"auth failure": res}) failed = True else: _, roles = utils.request_or_wait( "{}/latest/characters/{}/roles/".format( ESI, res["CharacterID"], ), headers=headers, ) if isinstance(roles, str): utils.write_data(uuid, {"roles failure": roles}) failed = True CACHE.delete(pending_key) if not failed: CACHE.set( "{}{}".format(Keys.processing.value, uuid), res["CharacterID"], timeout=7200, ) WORKERS.append( gevent.spawn(knife, uuid, token, res, roles) )
def write_data(uuid, data): """Try to store the data, log errors.""" try: CACHE.set( "{}{}".format(Keys.complete.value, uuid), codecs.decode( base64.b64encode( gzip.compress(codecs.encode( ujson.dumps(data), "utf-8", ))), "utf-8", ), timeout=EXPIRY, ) except Exception as error: LOG.warning("Failed to save data: %r", error)
def get_data(filename, decompress=True): """Open and return the content from file.""" filepath = os.path.join(DATA, filename) try: with open(filepath, "r") as openfile: content = openfile.read() except Exception as error: LOG.warning("failed to open %s: %r", filename, error) else: try: if decompress: return ujson.loads(gzip.decompress(base64.b64decode(content))) return ujson.loads(content) except Exception as error: LOG.warning("failed to decode %s: %r", filename, error) return None
def _get_names(ids): """Resolve ids to names.""" resolved = {} failed = [] for i in range(0, len(ids), 1000): batch = ids[i:i+1000] _, _, res = utils.request_or_wait( "{}/latest/universe/names/".format(ESI), method="post", json=batch, ) if isinstance(res, list): for _res in res: resolved[_res["id"]] = _res["name"] else: failed.extend(batch) while failed: still_failed = [] random.shuffle(failed) batch_size = max(min(int(len(failed) / 2), 500), 1) for i in range(0, len(failed), batch_size): batch = failed[i:i+batch_size] _, _, res = utils.request_or_wait( "{}/latest/universe/names/".format(ESI), method="post", json=batch, ) if isinstance(res, list): for _res in res: resolved[_res["id"]] = _res["name"] else: still_failed.extend(batch) failed = still_failed if batch_size == 1 and still_failed: LOG.warning("failed to resolve: %r", still_failed) break return resolved
def refresh_spec(): """Refresh the ESI spec. Returns: dictionary: JSON loaded swagger spec """ try: spec_details = CACHE.get(Keys.spec.value) except redis.exceptions.ConnectionError: spec_details = None save_results = False else: save_results = True if spec_details is None: spec_details = {"timestamp": 0} if time.time() - spec_details["timestamp"] > 300: headers = {} if spec_details.get("etag"): headers["If-None-Match"] = spec_details["etag"] _, _, res = request_or_wait( "{}/latest/swagger.json".format(ESI), _as_res=True, headers=headers, ) if isinstance(res, str): LOG.warning("failed to refresh spec: %s", res) return spec_details.get("spec", {}) spec_details["timestamp"] = time.time() if res.status_code != 304: spec_details["etag"] = res.headers.get("ETag") spec_details["spec"] = JsonDeref().deref(res.json()) if save_results: CACHE.set(Keys.spec.value, spec_details, timeout=3600) return spec_details["spec"]
def request_or_wait(url, *args, _as_res=False, **kwargs): """Request the URL, or wait if we're error limited.""" try: LOG.warning("requesting: %s", url) res = SESSION.get(url, *args, **kwargs) res.raise_for_status() return url, res if _as_res else res.json() except Exception as err: try: if res.status_code == 420: wait = int(res.headers.get("X-Esi-Error-Limit-Reset", 1)) + 1 LOG.warning("hit the error limit, waiting %d seconds", wait) # error limited. wait out the window then carry on gevent.sleep(wait) return request_or_wait(url, *args, _as_res=_as_res, **kwargs) except Exception as error: LOG.warning("error handling error: %r: %r", err, error) try: content = res.json() except Exception: content = res.text # /shrug some other error, can't win em all return url, res if _as_res else "Error fetching data: {} {}".format( res.status_code, content, )
def refresh_spec(): """Refresh the ESI spec. Returns: dictionary: JSON loaded swagger spec """ spec_file = os.path.join(DATA, ".esi.json") if os.path.isfile(spec_file): with open(spec_file, "r") as open_spec: spec_details = ujson.loads(open_spec.read()) else: spec_details = {"timestamp": 0} if time.time() - spec_details["timestamp"] > 300: headers = {} if spec_details.get("etag"): headers["If-None-Match"] = spec_details["etag"] _, res = request_or_wait( "{}/latest/swagger.json".format(ESI), _as_res=True, headers=headers, ) if isinstance(res, str): LOG.warning("failed to refresh spec: %s", res) return spec_details.get("spec", {}) spec_details["timestamp"] = time.time() if res.status_code != 304: spec_details["etag"] = res.headers.get("ETag") spec_details["spec"] = JsonDeref().deref(res.json()) with open(spec_file, "w") as new_spec: new_spec.write(ujson.dumps(spec_details)) return spec_details["spec"]
def get_data(uuid): """Open and return the character's data.""" cache_key = "{}{}".format(Keys.complete.value, uuid) try: content = CACHE.get(cache_key) except Exception as error: LOG.warning("failed to get %s: %r", cache_key, error) else: if content is None: return None try: return ujson.loads(gzip.decompress(base64.b64decode(content))) except Exception as error: LOG.warning("failed to decode %s: %r", content, error) else: CACHE.cache._client.expire( # pylint: disable=protected-access cache_key, EXPIRY, ) return None
def knife(uuid, token, verify, roles): # pylint: disable=R0914 """Pull all ESI data for a character_id. Args: uuid: string uuid token token: SSO access token verify: dictionary return from /verify/ roles: list of corporation roles """ character_id = verify["CharacterID"] LOG.warning("knife run started for character: %s", character_id) scopes = verify["Scopes"] _, public = utils.request_or_wait( "{}/latest/characters/{}/".format(ESI, character_id) ) if isinstance(public, str): CACHE.delete("{}{}".format(Keys.processing.value, uuid)) utils.write_data(uuid, {"public info failure": public}) return all_params = copy.deepcopy(ADDITIONAL_PARAMS) known_params = {"character_id": character_id} if public["corporation_id"] > 2000000: known_params["corporation_id"] = public["corporation_id"] else: all_params.pop("corporation_id") if "alliance_id" in public: known_params["alliance_id"] = public["alliance_id"] spec = utils.refresh_spec() headers = {"Authorization": "Bearer {}".format(token)} results = expand_params( scopes, roles, spec, known_params, all_params, headers, ) urls = build_urls(scopes, roles, spec, known_params, all_params) with ThreadPoolExecutor(max_workers=20) as pool: futures = [] for url in urls: futures.append(pool.submit( utils.request_or_wait, url, headers=headers, )) for future in as_completed(futures): url, result = future.result() results[url] = result utils.write_data(uuid, results) CACHE.delete("{}{}".format(Keys.processing.value, uuid)) LOG.warning("completed character: %r", character_id)
def expand_params(scopes, roles, spec, # pylint: disable=R0914,R0913 known_params, all_params, headers): """Gather IDs from all_params into known_params.""" errors = [] purge = {x: [] for x in all_params} transform = { "/characters/{character_id}/mail/labels/": \ lambda x: [i["label_id"] for i in x["labels"]], "/characters/{character_id}/planets/": \ lambda x: [i["planet_id"] for i in x], "/characters/{character_id}/calendar/": \ lambda x: [i["event_id"] for i in x], "/characters/{character_id}/contracts/": \ lambda x: [i["contract_id"] for i in x], "/corporations/{corporation_id}/calendar/": \ lambda x: [i["event_id"] for i in x], "/corporations/{corporation_id}/contracts/": \ lambda x: [i["contract_id"] for i in x], } expansion_results = {} with ThreadPoolExecutor(max_workers=20) as pool: futures = {} for parent, id_types in all_params.items(): for id_type, url in id_types.items(): oper = spec["paths"][url]["get"] required_roles = oper.get("x-required-roles", []) if any(x not in roles for x in required_roles): # we don't have the corporate roles for this route purge[parent].append(id_type) continue required_sso = oper.get("security", [{}])[0].get("evesso", []) if any(x not in scopes for x in required_sso): # our access token doesn't have this scope purge[parent].append(id_type) continue path = "https://esi.evetech.net/latest{}".format( url.format(**known_params) ) futures[pool.submit( utils.request_or_wait, path, headers=headers, )] = (url, parent, id_type) for future in as_completed(futures): templated_url, parent, id_type = futures[future] url, data = future.result() if templated_url in transform: expansion_results[url] = data all_params[parent][id_type] = transform[templated_url](data) elif isinstance(data, list): all_params[parent][id_type] = data else: LOG.warning("worker expansion error: %r", data) for parent, purged_ids in purge.items(): for purged_id in purged_ids: all_params[parent].pop(purged_id) if errors: LOG.warning("worker errors: %s", " ".join(errors)) return expansion_results
def expand_params(scopes, roles, spec, # pylint: disable=R0914,R0913 known_params, all_params, headers): """Gather IDs from all_params into known_params.""" errors = [] purge = {x: [] for x in all_params} transform = { "/characters/{character_id}/mail/labels/": \ lambda x: [i["label_id"] for i in x["labels"]], "/characters/{character_id}/planets/": \ lambda x: [i["planet_id"] for i in x], "/characters/{character_id}/calendar/": \ lambda x: [i["event_id"] for i in x], "/characters/{character_id}/contracts/": \ lambda x: [i["contract_id"] for i in x], "/characters/{character_id}/mail/": \ lambda x: [i["mail_id"] for i in x], "/corporations/{corporation_id}/calendar/": \ lambda x: [i["event_id"] for i in x], "/corporations/{corporation_id}/contracts/": \ lambda x: [i["contract_id"] for i in x], } expansion_results = {} with ThreadPoolExecutor(max_workers=20) as pool: futures = {} for parent, id_types in all_params.items(): for id_type, url in id_types.items(): oper = spec["paths"][url]["get"] required_roles = oper.get("x-required-roles", []) if any(x not in roles for x in required_roles): # we don't have the corporate roles for this route purge[parent].append(id_type) continue required_sso = oper.get("security", [{}])[0].get("evesso", []) if any(x not in scopes for x in required_sso): # our access token doesn't have this scope purge[parent].append(id_type) continue path = "https://esi.evetech.net/latest{}".format( url.format(**known_params) ) futures[pool.submit( utils.request_or_wait, path, headers=headers, )] = (url, parent, id_type) pages = {} while True: completed = [] expansion = {} for future in as_completed(futures): completed.append(future) templated_url, parent, id_type = futures[future] page, url, data = future.result() page_key = (templated_url, parent, id_type, url) if page and isinstance(page, list): pages[page_key] = {1: data} for _page in page: expansion[pool.submit( utils.request_or_wait, url, page=_page, headers=headers, )] = (templated_url, parent, id_type) elif isinstance(page, int): if isinstance(data, list): pages[page_key][page] = data else: LOG.warning("worker page expansion error: %r", data) else: if templated_url in transform: expansion_results[url] = data all_params[parent][id_type] = transform[templated_url]( data ) elif isinstance(data, list): all_params[parent][id_type] = data else: LOG.warning("worker expansion error: %r", data) for complete in completed: futures.pop(complete) futures.update(expansion) if not futures: break for details, page_data in pages.items(): templated_url, parent, id_type, url = details data = [] for page in sorted(page_data): data.extend(page_data[page]) if not data: continue if templated_url in transform: expansion_results[url] = data try: all_params[parent][id_type] = transform[templated_url]( data ) except Exception as error: LOG.warning( "failed to transform %s. error: %r data: %r", url, error, data, ) else: all_params[parent][id_type] = data for parent, purged_ids in purge.items(): for purged_id in purged_ids: all_params[parent].pop(purged_id) if errors: LOG.warning("worker errors: %s", " ".join(errors)) return expansion_results