def update_local_database(config, deviant_summary, candidates, since): if isinstance(deviant_summary, bigquery.Table): Log.note("Only the ETL process should fill the bigquery table") return # GET EVERYTHING WE HAVE SO FAR exists = deviant_summary.query({ "select": ["signature_hash", "last_updated"], "where": { "and": [ { "in": { "signature_hash": candidates.signature_hash } }, { "exists": "num_pushes" }, ] }, "sort": "last_updated", "limit": 100000, "format": "list", }).data # CHOOSE MISSING, THEN OLDEST, UP TO "RECENT" missing = list(set(candidates.signature_hash) - set(exists.signature_hash)) too_old = Date.today() - parse(LOCAL_RETENTION) needs_update = missing + [ e.signature_hash for e in exists if e.last_updated < too_old.unix ] Log.alert("{{num}} series are candidates for local update", num=len(needs_update)) limited_update = Queue("sigs") limited_update.extend( left(needs_update, coalesce(config.display.download_limit, 100))) Log.alert("Updating local database with {{num}} series", num=len(limited_update)) with Timer("Updating local database"): def loop(please_stop): while not please_stop: signature_hash = limited_update.pop_one() if not signature_hash: return process( signature_hash, since, source=config.database, deviant_summary=deviant_summary, ) threads = [Thread.run(text(i), loop) for i in range(3)] for t in threads: t.join() Log.note("Local database is up to date")
def __init__(self, flask_app, db, cookie, table="sessions"): global SINGLTON if SINGLTON: Log.error("Can only handle one session manager at a time") SINGLTON = self if is_data(db): self.db = Sqlite(db) else: self.db = db self.table = table self.cookie = cookie self.cookie.max_lifetime = parse(self.cookie.max_lifetime) self.cookie.inactive_lifetime = parse(self.cookie.inactive_lifetime) if not self.db.about(self.table): self.setup() Thread.run("session monitor", self.monitor)
def schema_type(value): jt = python_type_to_json_type[value.__class__] if jt == TIME: v = parse(value).format(TIMESTAMP_FORMAT) elif jt == NUMBER: v = float(value) else: v = value return v, json_type_to_inserter_type[jt], jt
def device_register(self, path=None): """ EXPECTING A SIGNED REGISTRATION REQUEST RETURN JSON WITH url FOR LOGIN """ now = Date.now() expires = now + parse(self.device.register.session['max-age']) request_body = request.get_data() signed = json2value(request_body.decode("utf8")) command = json2value(base642bytes(signed.data).decode("utf8")) session.public_key = command.public_key rsa_crypto.verify(signed, session.public_key) self.session_manager.create_session(session) session.expires = expires.unix session.state = bytes2base64URL(crypto.bytes(32)) with self.device.db.transaction() as t: t.execute( sql_insert( self.device.table, { "state": session.state, "session_id": session.session_id }, )) body = value2json( Data( session_id=session.session_id, interval="5second", expires=session.expires, url=URL( self.device.home, path=self.device.endpoints.login, query={"state": session.state}, ), )) response = Response(body, headers={"Content-Type": mimetype.JSON}, status=200) response.set_cookie(self.device.register.session.name, session.session_id, path=self.device.login.session.path, domain=self.device.login.session.domain, expires=expires.format(RFC1123), secure=self.device.login.session.secure, httponly=self.device.login.session.httponly) return response
def device_status(self, path=None): """ AUTOMATION CAN CALL THIS ENDPOINT TO FIND OUT THE LOGIN STATUS RESPOND WITH {"ok":true} WHEN USER HAS LOGGED IN, AND user IS ASSOCIATED WITH SESSION """ now = Date.now().unix session_id = request.cookies.get(self.device.register.session.name) if not session_id: return Response('{"try_again":false, "status":"no session id"}', status=401) device_session = self.session_manager.get_session(session_id) request_body = request.get_data() signed = json2value(request_body.decode("utf8")) command = rsa_crypto.verify(signed, device_session.public_key) time_sent = parse(command.timestamp).unix if not (now - LEEWAY <= time_sent < now + LEEWAY): return Response( '{"try_again":false, "status":"timestamp is not recent"}', status=401) if device_session.expires < now: return Response( '{"try_again":false, "status":"session is too old"}', status=401) if device_session.user: device_session.public_key = None return Response('{"try_again":false, "status":"verified"}', status=200) state_info = self.device.db.query( sql_query({ "select": "session_id", "from": self.device.table, "where": { "eq": { "state": device_session.state } }, })) if not state_info.data: return Response( '{"try_again":false, "status":"State has been lost"}', status=401) return Response('{"try_again":true, "status":"still waiting"}', status=200)
def schema_type(value): clazz = value.__class__ if clazz.__class__ == EnumMeta: return value.name, json_type_to_inserter_type[STRING], STRING jt = python_type_to_json_type[clazz] if jt == TIME: v = parse(value).format(TIMESTAMP_FORMAT) elif jt == NUMBER: if mo_math.is_finite(value): v = float(value) else: v = None else: v = value return v, json_type_to_inserter_type[jt], jt
def update_local_database(): # GET EVERYTHING WE HAVE SO FAR exists = summary_table.query({ "select": ["id", "last_updated"], "where": { "and": [{ "in": { "id": candidates.id } }, { "exists": "num_pushes" }] }, "sort": "last_updated", "limit": 100000, "format": "list", }).data # CHOOSE MISSING, THEN OLDEST, UP TO "RECENT" missing = list(set(candidates.id) - set(exists.id)) too_old = Date.today() - parse(LOCAL_RETENTION) needs_update = missing + [ e for e in exists if e.last_updated < too_old.unix ] Log.alert("{{num}} series are candidates for local update", num=len(needs_update)) limited_update = Queue("sigs") limited_update.extend( left(needs_update, coalesce(config.analysis.download_limit, 100))) Log.alert("Updating local database with {{num}} series", num=len(limited_update)) with Timer("Updating local database"): def loop(please_stop): while not please_stop: sig_id = limited_update.pop_one() if not sig_id: return process(sig_id) threads = [Thread.run(text(i), loop) for i in range(3)] for t in threads: t.join() Log.note("Local database is up to date")
def device_login(self, path=None): """ REDIRECT BROWSER TO AUTH0 LOGIN """ now = Date.now() expires = now + parse(self.device.login.session['max-age']) state = request.args.get("state") self.session_manager.create_session(session) session.expires = expires.unix session.code_verifier = bytes2base64URL(crypto.bytes(32)) code_challenge = bytes2base64URL( sha256(session.code_verifier.encode("utf8"))) query = Data( client_id=self.device.auth0.client_id, redirect_uri=self.device.auth0.redirect_uri, state=state, nonce=bytes2base64URL(crypto.bytes(32)), code_challenge=code_challenge, response_type="code", code_challenge_method="S256", response_mode="query", audience=self.device.auth0.audience, scope=self.device.auth0.scope, ) url = str( URL("https://" + self.device.auth0.domain + "/authorize", query=query)) Log.note("Forward browser to {{url}}", url=url) response = redirect(url, code=302) response.set_cookie(self.device.login.session.name, session.session_id, path=self.device.login.session.path, domain=self.device.login.session.domain, expires=expires.format(RFC1123), secure=self.device.login.session.secure, httponly=self.device.login.session.httponly) return response
def device_register(self, path=None): """ EXPECTING A SIGNED REGISTRATION REQUEST RETURN JSON WITH url FOR LOGIN """ now = Date.now().unix request_body = request.get_data().strip() signed = json2value(request_body.decode("utf8")) command = json2value(base642bytes(signed.data).decode("utf8")) session.public_key = command.public_key rsa_crypto.verify(signed, session.public_key) self.session_manager.setup_session(session) session.expires = now + parse("10minute").seconds session.state = bytes2base64URL(Random.bytes(32)) with self.device.db.transaction() as t: t.execute( sql_insert( self.device.table, { "state": session.state, "session_id": session.session_id }, )) response = value2json( Data( session_id=session.session_id, interval="5second", expiry=session.expires, url=URL( self.device.home, path=self.device.endpoints.login, query={"state": session.state}, ), )) return Response(response, headers={"Content-Type": "application/json"}, status=200)
def assertAlmostEqualValue(test, expected, digits=None, places=None, msg=None, delta=None): """ Snagged from unittest/case.py, then modified (Aug2014) """ if is_null_op(expected): if test == None: # pandas dataframes reject any comparision with an exception! return else: raise AssertionError( expand_template("{{test|json}} != NULL", locals())) if expected == None: # None has no expectations return if test == expected: # shortcut return if isinstance(expected, (dates.Date, datetime.datetime, datetime.date)): return assertAlmostEqualValue(dates.Date(test).unix, dates.Date(expected).unix, msg=msg, digits=digits, places=places, delta=delta) if not is_number(expected): # SOME SPECIAL CASES, EXPECTING EMPTY CONTAINERS IS THE SAME AS EXPECTING NULL if is_list(expected) and len(expected) == 0 and test == None: return if is_data(expected) and not expected.keys() and test == None: return if test != expected: raise AssertionError( expand_template("{{test|json}} != {{expected|json}}", locals())) return elif not is_number(test): try: # ASSUME IT IS A UTC DATE test = dates.parse(test).unix except Exception as e: raise AssertionError( expand_template("{{test|json}} != {{expected}}", locals())) num_param = 0 if digits != None: num_param += 1 if places != None: num_param += 1 if delta != None: num_param += 1 if num_param > 1: raise TypeError("specify only one of digits, places or delta") if digits is not None: with suppress_exception: diff = log10(abs(test - expected)) if diff < digits: return standardMsg = expand_template( "{{test|json}} != {{expected|json}} within {{digits}} decimal places", locals()) elif delta is not None: if abs(test - expected) <= delta: return standardMsg = expand_template( "{{test|json}} != {{expected|json}} within {{delta}} delta", locals()) else: if places is None: places = 15 with suppress_exception: diff = mo_math.log10(abs(test - expected)) if diff == None: return # Exactly the same if diff < mo_math.ceiling(mo_math.log10(abs(test))) - places: return standardMsg = expand_template( "{{test|json}} != {{expected|json}} within {{places}} places", locals()) raise AssertionError(coalesce(msg, "") + ": (" + standardMsg + ")")
def login(self, please_stop=None): """ WILL REGISTER THIS DEVICE, AND SHOW A QR-CODE TO LOGIN WILL POLL THE SERVICE ENDPOINT UNTIL LOGIN IS COMPLETED, OR FAILED :param please_stop: SIGNAL TO STOP EARLY :return: SESSION THAT CAN BE USED TO SEND AUTHENTICATED REQUESTS """ # SEND PUBLIC KEY now = Date.now().unix self.session = requests.Session() signed = rsa_crypto.sign( Data(public_key=self.public_key, timestamp=now), self.private_key) DEBUG and Log.note("register (unsigned)\n{{request|json}}", request=rsa_crypto.verify(signed, self.public_key)) DEBUG and Log.note("register (signed)\n{{request|json}}", request=signed) try: response = self.session.request( "POST", str(URL(self.config.service) / self.config.endpoints.register), data=value2json(signed)) except Exception as e: raise Log.error("problem registering device", cause=e) device = wrap(response.json()) DEBUG and Log.note("response:\n{{response}}", response=device) device.interval = parse(device.interval).seconds expires = Till(till=parse(device.expiry).unix) cookie = self.session.cookies.get(self.config.cookie.name) if not cookie: Log.error("expecting a session cookie") # SHOW URL AS QR CODE image = text2QRCode(device.url) sys.stdout.write("\n\nLogin using thie URL:\n") sys.stdout.write(device.url + CR) sys.stdout.write(image) while not please_stop and not expires: Log.note("waiting for login...") try: now = Date.now() signed = rsa_crypto.sign(Data(timestamp=now, session=cookie), self.private_key) url = URL(self.config.service) / self.config.endpoints.status DEBUG and Log.note("ping (unsigned) {{url}}\n{{request|json}}", url=url, request=rsa_crypto.verify( signed, self.public_key)) response = self.session.request("POST", url, data=value2json(signed)) ping = wrap(response.json()) DEBUG and Log.note("response\n{{response|json}}", response=ping) if ping.status == "verified": return self.session if not ping.try_again: Log.note("Failed to login {{reason}}", reason=ping.status) return except Exception as e: Log.warning( "problem calling {{url}}", url=URL(self.config.service) / self.config.endpoints.status, cause=e, ) (Till(seconds=device.interval) | please_stop | expires).wait() return self.session
from mo_times.dates import parse from pyLibrary.env import http from pyLibrary.env.flask_wrappers import cors_wrapper, add_flask_rule from pyLibrary.sql import SQL_DELETE, SQL_WHERE, SQL_FROM from pyLibrary.sql.sqlite import ( Sqlite, sql_create, quote_column, sql_eq, sql_query, sql_insert, ) from vendor.mo_logs import Log DEBUG = False LEEWAY = parse("minute").seconds def get_token_auth_header(): """Obtains the Access Token from the Authorization Header """ try: auth = request.headers.get("Authorization", None) bearer, token = auth.split() if bearer.lower() == "bearer": return token except Exception as e: pass Log.error('Expecting "Authorization = Bearer <token>" in header')
def test_13_weeks(self): self.assertAlmostEqual(parse("13week").seconds, (WEEK * 13).seconds)
def test_end_of_month(self): self.assertAlmostEqual( parse("today|month+month").unix, Date.today().floor(MONTH).add(MONTH).unix)
def test_beginning_of_month(self): self.assertAlmostEqual( parse("today|month").unix, Date.today().floor(MONTH).unix)
def test_next_week(self): self.assertAlmostEqual( parse("today+7day").unix, (Date.today() + DAY * 7).unix)
def run(self, force=False, restart=False, merge=False): # SETUP LOGGING settings = startup.read_settings(filename=CONFIG_FILE) constants.set(settings.constants) Log.start(settings.debug) if not settings.extractor.app_name: Log.error("Expecting an extractor.app_name in config file") # SETUP DESTINATION destination = bigquery.Dataset( dataset=settings.extractor.app_name, kwargs=settings.destination ).get_or_create_table(settings.destination) try: if merge: with Timer("merge shards"): destination.merge_shards() # RECOVER LAST SQL STATE redis = Redis() state = redis.get(settings.extractor.key) if restart or not state: state = (0, 0) redis.set(settings.extractor.key, value2json(state).encode("utf8")) else: state = json2value(state.decode("utf8")) last_modified, job_id = state # SCAN SCHEMA, GENERATE EXTRACTION SQL extractor = MySqlSnowflakeExtractor(settings.source) canonical_sql = extractor.get_sql(SQL("SELECT 0")) # ENSURE SCHEMA HAS NOT CHANGED SINCE LAST RUN old_sql = redis.get(settings.extractor.sql) if old_sql and old_sql.decode("utf8") != canonical_sql.sql: if force: Log.warning("Schema has changed") else: Log.error("Schema has changed") redis.set(settings.extractor.sql, canonical_sql.sql.encode("utf8")) # SETUP SOURCE source = MySQL(settings.source.database) while True: Log.note( "Extracting jobs for last_modified={{last_modified|datetime|quote}}, job.id={{job_id}}", last_modified=last_modified, job_id=job_id, ) # Example: job.id ==283890114 # get_ids = ConcatSQL( # (SQL_SELECT, sql_alias(quote_value(283890114), "id")) # ) # get_ids = sql_query( # { # "from": "job", # "select": ["id"], # "where": { # "or": [ # {"gt": {"last_modified": parse(last_modified)}}, # { # "and": [ # {"eq": {"last_modified": parse(last_modified)}}, # {"gt": {"id": job_id}}, # ] # }, # ] # }, # "sort": ["last_modified", "id"], # "limit": settings.extractor.chunk_size, # } # ) get_ids = SQL(str( ( Job.objects.filter( Q(last_modified__gt=parse(last_modified).datetime) | ( Q(last_modified=parse(last_modified).datetime) & Q(id__gt=job_id) ) ) .annotate() .values("id") .order_by("last_modified", "id")[ : settings.extractor.chunk_size ] ).query )) sql = extractor.get_sql(get_ids) # PULL FROM source, AND PUSH TO destination acc = [] with source.transaction(): cursor = source.query(sql, stream=True, row_tuples=True) extractor.construct_docs(cursor, acc.append, False) if not acc: break destination.extend(acc) # RECORD THE STATE last_doc = acc[-1] last_modified, job_id = last_doc.last_modified, last_doc.id redis.set( settings.extractor.key, value2json((last_modified, job_id)).encode("utf8"), ) if len(acc) < settings.extractor.chunk_size: break except Exception as e: Log.warning("problem with extraction", cause=e) Log.note("done job extraction") try: with Timer("merge shards"): destination.merge_shards() except Exception as e: Log.warning("problem with merge", cause=e) Log.note("done job merge")
def test_last_week(self): self.assertAlmostEqual( parse("today-7day").unix, (Date.today() - DAY * 7).unix)
def test_yesterday(self): self.assertAlmostEqual( parse("today-day").unix, (Date.today() - DAY).unix)
def test_today(self): self.assertAlmostEqual(parse("today").unix, Date.today().unix)
def test_now(self): self.assertAlmostEqual( parse("now").unix, Date.now().unix, places=9) # IGNORE THE LEAST SIGNIFICANT MILLISECOND
def run(self, force=False, restart=False, merge=False): # SETUP LOGGING settings = startup.read_settings(filename=CONFIG_FILE) constants.set(settings.constants) Log.start(settings.debug) if not settings.extractor.app_name: Log.error("Expecting an extractor.app_name in config file") # SETUP DESTINATION destination = bigquery.Dataset( dataset=settings.extractor.app_name, kwargs=settings.destination ).get_or_create_table(settings.destination) try: if merge: with Timer("merge shards"): destination.merge_shards() # RECOVER LAST SQL STATE redis = Redis() state = redis.get(settings.extractor.key) if restart or not state: state = (0, 0) redis.set(settings.extractor.key, value2json(state).encode("utf8")) else: state = json2value(state.decode("utf8")) last_modified, alert_id = state last_modified = parse(last_modified) # SCAN SCHEMA, GENERATE EXTRACTION SQL extractor = MySqlSnowflakeExtractor(settings.source) canonical_sql = extractor.get_sql(SQL("SELECT 0")) # ENSURE SCHEMA HAS NOT CHANGED SINCE LAST RUN old_sql = redis.get(settings.extractor.sql) if old_sql and old_sql.decode("utf8") != canonical_sql.sql: if force: Log.warning("Schema has changed") else: Log.error("Schema has changed") redis.set(settings.extractor.sql, canonical_sql.sql.encode("utf8")) # SETUP SOURCE source = MySQL(settings.source.database) while True: Log.note( "Extracting alerts for last_modified={{last_modified|datetime|quote}}, alert.id={{alert_id}}", last_modified=last_modified, alert_id=alert_id, ) last_year = ( Date.today() - YEAR + DAY ) # ONLY YOUNG RECORDS CAN GO INTO BIGQUERY # SELECT # s.od # FROM # treeherder.performance_alert_summary s # LEFT JOIN # treeherder.performance_alert a ON s.id=a.summary_id # WHERE # s.created>{last_year} AND (s.last_updated>{last_modified} OR a.last_updated>{last_modified}) # GROUP BY # s.id # ORDER BY # s.id # LIMIT # {settings.extractor.chunk_size} get_ids = SQL( str( ( PerformanceAlertSummary.objects.filter( Q(created__gt=last_year.datetime) & ( Q(last_updated__gt=last_modified.datetime) | Q(alerts__last_updated__gt=last_modified.datetime) ) ) .annotate() .values("id") .order_by("id")[: settings.extractor.chunk_size] ).query ) ) sql = extractor.get_sql(get_ids) # PULL FROM source, AND PUSH TO destination acc = [] with source.transaction(): cursor = source.query(sql, stream=True, row_tuples=True) extractor.construct_docs(cursor, acc.append, False) if not acc: break destination.extend(acc) # RECORD THE STATE last_doc = acc[-1] last_modified, alert_id = last_doc.created, last_doc.id redis.set( settings.extractor.key, value2json((last_modified, alert_id)).encode("utf8"), ) if len(acc) < settings.extractor.chunk_size: break except Exception as e: Log.warning("problem with extraction", cause=e) Log.note("done alert extraction") try: with Timer("merge shards"): destination.merge_shards() except Exception as e: Log.warning("problem with merge", cause=e) Log.note("done alert merge") Log.stop()
def test_week_before(self): self.assertAlmostEqual( parse("today-2week").unix, (Date.today() - WEEK * 2).unix)
def _typed_encode(value, schema): if is_many(value): output = [] update = {} nest_added = False child_schema = schema.get(NESTED_TYPE) if not child_schema: child_schema = schema[NESTED_TYPE] = {} for r in value: v, m, n = _typed_encode(r, child_schema) output.append(v) update.update(m) nest_added |= n if update: return {text(REPEATED): output}, {NESTED_TYPE: update}, True else: return {text(REPEATED): output}, None, nest_added elif NESTED_TYPE in schema: if not value: return {text(REPEATED): []}, None, False else: return _typed_encode([value], schema) elif is_data(value): output = {} update = {} nest_added = False for k, v in value.items(): child_schema = schema.get(k) if not child_schema: child_schema = schema[k] = {} result, more_update, n = _typed_encode(v, child_schema) output[text(escape_name(k))] = result if more_update: update.update({k: more_update}) nest_added |= n return output, update, nest_added elif is_text(schema): v, inserter_type, json_type = schema_type(value) if schema != json_type: Log.error( "Can not convert {{existing_type}} to {{expected_type}}", existing_type=json_type, expected_type=schema, ) return v, None, False elif value is None: return {text(escape_name(t)): None for t, child_schema in schema}, None, False else: v, inserter_type, json_type = schema_type(value) child_schema = schema.get(inserter_type) update = None if not child_schema: if schema.get(TIME_TYPE): # ATTEMPT TO CONVERT TO TIME, IF EXPECTING TIME try: v = parse(v).format(TIMESTAMP_FORMAT) return {text(escape_name(TIME_TYPE)): v}, update, False except Exception as e: Log.warning( "Failed attempt to convert {{value}} to TIMESTAMP string", value=v, cause=e) schema[inserter_type] = json_type update = {inserter_type: json_type} return {text(escape_name(inserter_type)): v}, update, False
def _typed_encode(value, schema): """ RETURN TRIPLE output - THE ENCODED VALUE update - THE ADDITIONAL SCHEMA OVER schema PROVIDED nested - True IF NESTING IS REQUIRED (CONSIDERED SERIOUS SCHEMA CHANGE) """ if is_many(value): if len(value) == 0: return None, None, False output = [] update = {} nest_added = False child_schema = schema.get(NESTED_TYPE) if not child_schema: nest_added = True child_schema = schema[NESTED_TYPE] = {} for r in value: v, m, n = _typed_encode(r, child_schema) output.append(v) set_default(update, m) nest_added |= n if update: return {text(REPEATED): output}, {NESTED_TYPE: update}, nest_added else: return {text(REPEATED): output}, None, nest_added elif NESTED_TYPE in schema: if not value: return {text(REPEATED): []}, None, False else: return _typed_encode([value], schema) elif is_data(value): output = {} update = {} nest_added = False for k, v in value.items(): child_schema = schema.get(k) if not child_schema: child_schema = schema[k] = {} result, more_update, n = _typed_encode(v, child_schema) if result != None: output[text(escape_name(k))] = result set_default(update, {k: more_update}) nest_added |= n return output, update or None, nest_added elif is_text(schema): v, inserter_type, json_type = schema_type(value) if schema != json_type: Log.error( "Can not convert {{existing_type}} to {{expected_type}}", existing_type=json_type, expected_type=schema, ) return v, None, False elif value == None: return { text(escape_name(t)): None for t, child_schema in schema.items() } or None, None, False else: try: v, inserter_type, json_type = schema_type(value) except Exception as e: # LAST DESPERATE ATTEMPT return _typed_encode(value.__data__(), schema) child_schema = schema.get(inserter_type) update = None if not child_schema: if schema.get(TIME_TYPE): # ATTEMPT TO CONVERT TO TIME, IF EXPECTING TIME try: v = parse(v).format(TIMESTAMP_FORMAT) return {text(escape_name(TIME_TYPE)): v}, update, False except Exception as e: Log.warning( "Failed attempt to convert {{value}} to TIMESTAMP string", value=v, cause=e, ) schema[inserter_type] = json_type update = {inserter_type: json_type} return {text(escape_name(inserter_type)): v}, update, False
def test_last_year(self): self.assertAlmostEqual( parse("today-12month").unix, (Date.today() - MONTH * 12).unix)