def test_extract_job(complex_job, extract_job_settings, now): source = MySQL(extract_job_settings.source.database) extractor = MySqlSnowflakeExtractor(extract_job_settings.source) sql = extractor.get_sql(SQL("SELECT " + text(complex_job.id) + " as id")) acc = [] with source.transaction(): cursor = list(source.query(sql, stream=True, row_tuples=True)) extractor.construct_docs(cursor, acc.append, False) doc = acc[0] doc.guid = complex_job.guid assertAlmostEqual( acc, JOB, places= 4, # TH MIXES LOCAL TIMEZONE WITH GMT: https://bugzilla.mozilla.org/show_bug.cgi?id=1612603 )
def indent(value, prefix=u"\t", indent=None): """ indent given string, using prefix * indent as prefix for each line :param value: :param prefix: :param indent: :return: """ if indent != None: prefix = prefix * indent value = toString(value) try: content = value.rstrip() suffix = value[len(content):] lines = content.splitlines() return prefix + (CR + prefix).join(lines) + suffix except Exception as e: raise Exception(u"Problem with indent of value (" + e.message + u")\n" + text(toString(value)))
def execute_sql(host, username, password, sql, schema=None, param=None, kwargs=None): """EXECUTE MANY LINES OF SQL (FROM SQLDUMP FILE, MAYBE?""" kwargs.schema = coalesce(kwargs.schema, kwargs.database) if param: with MySQL(kwargs) as temp: sql = expand_template(sql, quote_param(param)) # We have no way to execute an entire SQL file in bulk, so we # have to shell out to the commandline client. args = [ "mysql", "-h{0}".format(host), "-u{0}".format(username), "-p{0}".format(password) ] if schema: args.append("{0}".format(schema)) try: proc = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, bufsize=-1) if is_text(sql): sql = sql.encode("utf8") (output, _) = proc.communicate(sql) except Exception as e: raise Log.error("Can not call \"mysql\"", e) if proc.returncode: if len(sql) > 10000: sql = "<" + text(len(sql)) + " bytes of sql>" Log.error( "Unable to execute sql: return code {{return_code}}, {{output}}:\n {{sql}}\n", sql=indent(sql), return_code=proc.returncode, output=output)
def ujson_encode(value, pretty=False): if pretty: return pretty_json(value) try: scrubbed = scrub(value) return ujson_dumps(scrubbed, ensure_ascii=False, sort_keys=True, escape_forward_slashes=False).decode('utf8') except Exception as e: from mo_logs.exceptions import Except from mo_logs import Log e = Except.wrap(e) Log.warning("problem serializing {{type}}", type=text(repr(value)), cause=e) raise e
def _decode(v): output = [] i = 0 while i < len(v): c = v[i] if c == "%": d = hex2chr(v[i + 1:i + 3]) output.append(d) i += 3 else: output.append(c) i += 1 output = text("".join(output)) try: return json2value(output) except Exception: pass return output
def do_all(self): # ENSURE PARENT TRANSACTION IS UP TO DATE c = None try: if self.parent == self: Log.warning("Transactions parent is equal to itself.") if self.parent: self.parent.do_all() # GET THE REMAINING COMMANDS with self.locker: todo = self.todo[self.complete:] self.complete = len(self.todo) # RUN THEM for c in todo: self.db.debug and Log.note(FORMAT_COMMAND, command=c.command) self.db.db.execute(text(c.command)) except Exception as e: Log.error("problem running commands", current=c, cause=e)
def raise_error(e, packed): err = text(e) e = Except.wrap(e) if err.startswith(func_name) and ( "takes at least" in err or "takes exactly " in err or "required positional argument" in err): missing = [p for p in params if str(p) not in packed] given = [p for p in params if str(p) in packed] if not missing: raise e else: get_logger().error( "Problem calling {{func_name}}: Expecting parameter {{missing}}, given {{given}}", func_name=func_name, missing=missing, given=given, stack_depth=2, cause=e) raise e
def matches(self, testString, parseAll=True): """ Method for quick testing of a parser against a test string. Good for simple inline microtests of sub expressions while building up larger parser. Parameters: - testString - to test against this expression for a match - parseAll - (default= ``True``) - flag to pass to `parseString` when running tests Example:: expr = Word(nums) assert expr.matches("100") """ try: self.parseString(text(testString), parseAll=parseAll) return True except ParseException: return False
def remove_instances(self, net_new_utility): instances = self.running_instances() # FIND COMBO THAT WILL SHUTDOWN WHAT WE NEED EXACTLY, OR MORE remove_list = [] for acceptable_error in range(0, 8): remaining_utility = -net_new_utility remove_list = FlatList() for s in instances: utility = coalesce(s.markup.type.utility, 0) if utility <= remaining_utility + acceptable_error: remove_list.append(s) remaining_utility -= utility if remaining_utility <= 0: net_new_utility = -remaining_utility break if not remove_list: return net_new_utility # SEND SHUTDOWN TO EACH INSTANCE Log.note("Shutdown {{instances}}", instances=remove_list.id) remove_threads = [ Thread.run("teardown for " + text(i.id), self.instance_manager.teardown, i) for i in remove_list ] for t in remove_threads: try: t.join() except Exception as e: Log.warning("Teardown of {{id}} failed", id=i.id, cause=e) remove_spot_requests = remove_list.spot_instance_request_id # TERMINATE INSTANCES self.ec2_conn.terminate_instances(instance_ids=remove_list.id) # TERMINATE SPOT REQUESTS self.ec2_conn.cancel_spot_instance_requests( request_ids=remove_spot_requests) return net_new_utility
def test_extract_alert(extract_alert_settings, test_perf_alert_summary, test_perf_alert): """ If you find this test failing, then copy the JSON in the test failure into the test_extract_alerts.json file, then you may use the diff to review the changes. """ with MySQL(extract_alert_settings.source.database) as source: with MySqlSnowflakeExtractor( extract_alert_settings.source) as extractor: sql = extractor.get_sql( SQL("SELECT " + text(test_perf_alert_summary.id) + " as id")) acc = [] with source.transaction(): cursor = list(source.query(sql, stream=True, row_tuples=True)) extractor.construct_docs(cursor, acc.append, False) assertAlmostEqual( acc, ALERT, places=3 ) # TH MIXES LOCAL TIMEZONE WITH GMT: https://bugzilla.mozilla.org/show_bug.cgi?id=1612603
def dispatch(self, json): if isinstance(json, list): return self.delimited_list(json) if isinstance(json, dict): if len(json) == 0: return '' elif 'value' in json: return self.value(json) elif 'from' in json: # Nested queries return '({})'.format(self.format(json)) elif 'select' in json: # Nested queries return '({})'.format(self.format(json)) else: return self.op(json) if isinstance(json, string_types): return escape(json, self.ansi_quotes, self.should_quote) return text(json)
def quote_sql(value, param=None): """ USED TO EXPAND THE PARAMETERS TO THE SQL() OBJECT """ try: if isinstance(value, SQL): if not param: return value param = {k: quote_sql(v) for k, v in param.items()} return SQL(expand_template(value, param)) elif is_text(value): return SQL(value) elif is_data(value): return quote_value(json_encode(value)) elif hasattr(value, '__iter__'): return quote_list(value) else: return text(value) except Exception as e: Log.error("problem quoting SQL", e)
def round(value, decimal=None, digits=None, places=None): """ :param value: THE VALUE TO ROUND :param decimal: NUMBER OF DECIMAL PLACES TO ROUND (NEGATIVE IS LEFT-OF-DECIMAL) :param digits: ROUND TO SIGNIFICANT NUMBER OF digits :param places: SAME AS digits :return: """ value = float(value) if value == 0.0: return "0" digits = coalesce(digits, places) if digits != None: left_of_decimal = int(math.ceil(math.log10(abs(value)))) decimal = digits - left_of_decimal right_of_decimal = max(decimal, 0) format = "{:." + text(right_of_decimal) + "f}" return format.format(_round(value, decimal))
def __init__(self, pattern, flags=0, asGroupList=False, asMatch=False): """The parameters ``pattern`` and ``flags`` are passed to the ``re.compile()`` function as-is. See the Python `re module <https://docs.python.org/3/library/re.html>`_ module for an explanation of the acceptable patterns and flags. """ super(Regex, self).__init__() if isinstance(pattern, text): if not pattern: warnings.warn( "null string passed to Regex; use Empty() instead", SyntaxWarning, stacklevel=2, ) self.pattern = pattern self.flags = flags try: self.re = re.compile(self.pattern, self.flags) except sre_constants.error as cause: Log.error( "invalid pattern {{pattern}} passed to Regex", pattern=pattern, cause=cause, ) elif isinstance(pattern, Regex.compiledREtype): self.re = pattern self.pattern = str(pattern) self.flags = flags else: Log.error( "Regex may only be constructed with a string or a compiled RE object" ) self.parser_name = text(self) self.parser_config.mayIndexError = False self.parser_config.mayReturnEmpty = True
def raise_error(e, a, k): packed = k.copy() packed.update(dict(zip(params, a))) err = text(e) if err.startswith(func_name) and ( "takes at least" in err or "takes exactly " in err or "required positional argument" in err): missing = [p for p in params if str(p) not in packed] given = [p for p in params if str(p) in packed] if not missing: raise e else: get_logger().error( "Problem calling {{func_name}}: Expecting parameter {{missing}}, given {{given}}", func_name=func_name, missing=missing, given=given, stack_depth=2, cause=e, ) raise e
def __init__(self, notChars, min=1, max=0, exact=0): Token.__init__(self) not_chars = "".join(sorted(set(notChars))) if min < 1: raise ValueError( "cannot specify a minimum length < 1; use " "Optional(CharsNotIn()) if zero-length char group is permitted" ) max = max if max > 0 else MAX_INT if exact: min = exact max = exact if len(notChars) == 1: regex = "[^" + regex_range(notChars) + "]" else: regex = "[^" + regex_range(notChars)[1:] if not max or max == MAX_INT: if min == 0: suffix = "*" elif min == 1: suffix = "+" else: suffix = "{" + str(min) + ":}" elif min == 1 and max == 1: suffix = "" else: suffix = "{" + str(min) + ":" + str(max) + "}" self.set_config( regex=regex_compile(regex + suffix), min_len=min, max_len=max, not_chars=not_chars, ) self.parser_name = text(self)
def __exit__(self, exc_type, exc_val, exc_tb): ParserElement._parse = self.previous_parse profile = jx.sort( [ { "parser": text(parser), "cache_hits": cache, "matches": match, "failures": fail, "call_count": match + fail + cache, "total_parse": parse, "total_overhead": all - parse, "per_parse": parse / (match + fail), "per_overhead": (all - parse) / (match + fail + cache), } for parser, (cache, match, fail, parse, all) in timing.items() ], {"total_parse": "desc"}, ) self.file.add_suffix( Date.now().format("%Y%m%d_%H%M%S") ).write(convert.list2tab(profile))
def __init__(self, rate=None, amortization_period=None, source=None, database=None, kwargs=None): self.amortization_period = coalesce(amortization_period, AMORTIZATION_PERIOD) self.rate = coalesce(rate, HG_REQUEST_PER_SECOND) self.cache_locker = Lock() self.cache = { } # MAP FROM url TO (ready, headers, response, timestamp) PAIR self.no_cache = {} # VERY SHORT TERM CACHE self.workers = [] self.todo = Queue(APP_NAME + " todo") self.requests = Queue(APP_NAME + " requests", max=int(self.rate * self.amortization_period.seconds)) self.url = URL(source.url) self.db = Sqlite(database) self.inbound_rate = RateLogger("Inbound") self.outbound_rate = RateLogger("hg.mo") if not self.db.query( "SELECT name FROM sqlite_master WHERE type='table'").data: with self.db.transaction() as t: t.execute("CREATE TABLE cache (" " path TEXT PRIMARY KEY, " " headers TEXT, " " response TEXT, " " timestamp REAL " ")") self.threads = [ Thread.run(APP_NAME + " worker" + text(i), self._worker) for i in range(CONCURRENCY) ] self.limiter = Thread.run(APP_NAME + " limiter", self._rate_limiter) self.cleaner = Thread.run(APP_NAME + " cleaner", self._cache_cleaner)
def countedArray(expr, intExpr=None): """Helper to define a counted list of expressions. This helper defines a pattern of the form:: integer expr expr expr... where the leading integer tells how many expr expressions follow. The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed. If ``intExpr`` is specified, it should be a mo_parsing expression that produces an integer value. Example:: countedArray(Word(alphas)).parseString('2 ab cd ef') # -> ['ab', 'cd'] # in this parser, the leading integer value is given in binary, # '10' indicating that 2 values are in the array binaryConstant = Word('01').addParseAction(lambda t: int(t[0], 2)) countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef') # -> ['ab', 'cd'] """ if intExpr is None: intExpr = Word(nums).addParseAction(lambda t: int(t[0])) arrayExpr = Forward() def countFieldParseAction(t, l, s): n = t[0] arrayExpr << Group(Many(expr, exact=n)) return [] intExpr = ( intExpr .set_parser_name("arrayLen") .addParseAction(countFieldParseAction, callDuringTry=True) ) return (intExpr + arrayExpr).set_parser_name("(len) " + text(expr) + "...")
def percent(value, decimal=None, digits=None, places=None): """ display value as a percent (1 = 100%) :param value: :param decimal: :param digits: :param places: :return: """ value = float(value) if value == 0.0: return "0%" digits = coalesce(digits, places) if digits != None: left_of_decimal = int(math.ceil(math.log10(abs(value)))) + 2 decimal = digits - left_of_decimal decimal = coalesce(decimal, 0) right_of_decimal = max(decimal, 0) format = "{:." + text(right_of_decimal) + "%}" return format.format(_round(value, decimal + 2))
def test_bulk_aggs_list_no_records(self): data = wrap([{"a": "test" + text(i)} for i in range(10111)]) expected = [] test = wrap({ "data": data, "query": { "from": TEST_TABLE, "groupby": "a", "where": { "eq": { "a": "not exists" } }, "limit": len(data), "chunk_size": 1000, "sort": "a", }, "expecting_list": { "data": expected[:MAX_LIMIT] }, # DUMMY, TO ENSURE LOADED }) self.utils.execute_tests(test) test.query.format = "list" test.query.destination = "url" result = http.post_json( url=self.utils.testing.query, json=test.query, ) self.assertEqual(result.meta.format, "list") @self.retry(result.url) def get_content(): content = http.get_json(result.url) self.assertEqual(content.meta.format, "list") sorted_content = jx.sort(content.data, "a") sorted_expected = jx.sort(expected, "a") self.assertEqual(sorted_content, sorted_expected)
def add_version(flask_app): """ ADD ROUTING TO HANDLE REQUEST FOR /__version__ :param flask_app: THE (Flask) APP :return: """ try: rev = coalesce(git.get_revision(), "") branch = "https://github.com/mozilla/ActiveData/tree/" + coalesce( git.get_branch()) version_info = value2json( { "source": "https://github.com/mozilla/ActiveData/tree/" + rev, "branch": branch, "commit": rev, }, pretty=True, ).encode('utf8') + text("\n") Log.note("Using github version\n{{version}}", version=version_info) @register_thread @cors_wrapper def version(): return Response(version_info, status=200, headers={"Content-Type": mimetype.JSON}) flask_app.add_url_rule( str("/__version__"), None, version, defaults={}, methods=[str("GET"), str("POST")], ) except Exception as e: Log.error("Problem setting up listeners for dockerflow", cause=e)
def get(url): """ USE json.net CONVENTIONS TO LINK TO INLINE OTHER JSON """ url = text(url) if url.find("://") == -1: Log.error("{{url}} must have a prototcol (eg http://) declared", url=url) base = URL("") if url.startswith("file://") and url[7] != "/": if os.sep=="\\": base = URL("file:///" + os.getcwd().replace(os.sep, "/").rstrip("/") + "/.") else: base = URL("file://" + os.getcwd().rstrip("/") + "/.") elif url[url.find("://") + 3] != "/": Log.error("{{url}} must be absolute", url=url) phase1 = _replace_ref(wrap({"$ref": url}), base) # BLANK URL ONLY WORKS IF url IS ABSOLUTE try: phase2 = _replace_locals(phase1, [phase1]) return wrap(phase2) except Exception as e: Log.error("problem replacing locals in\n{{phase1}}", phase1=phase1, cause=e)
def test_extract_job(complex_job, extract_job_settings, now): """ If you find this test failing, then copy the JSON in the test failure into the test_extract_job.json file, then you may use the diff to review the changes. """ source = MySQL(extract_job_settings.source.database) extractor = MySqlSnowflakeExtractor(extract_job_settings.source) sql = extractor.get_sql(SQL("SELECT " + text(complex_job.id) + " as id")) acc = [] with source.transaction(): cursor = list(source.query(sql, stream=True, row_tuples=True)) extractor.construct_docs(cursor, acc.append, False) doc = acc[0] doc.guid = complex_job.guid assertAlmostEqual( acc, JOB, places= 4, # TH MIXES LOCAL TIMEZONE WITH GMT: https://bugzilla.mozilla.org/show_bug.cgi?id=1612603 )
def dispatch(self, json): if isinstance(json, list): return self.delimited_list(json) if isinstance(json, dict): if len(json) == 0: return "" elif "value" in json: return self.value(json) elif "from" in json: # Nested queries return "({})".format(self.format(json)) elif "select" in json: # Nested queries return "({})".format(self.format(json)) elif "select_distinct" in json: # Nested queries return "({})".format(self.format(json)) else: return self.op(json) if isinstance(json, string_types): return escape(json, self.ansi_quotes, self.should_quote) return text(json)
def parseImpl(self, string, loc, doActions=True): maxExcLoc = -1 maxException = None for e in self.exprs: try: loc, ret = e._parse(string, loc, doActions) return loc, ParseResults(self, [ret]) except ParseException as err: if err.loc > maxExcLoc: maxException = err maxExcLoc = err.loc except IndexError: if len(string) > maxExcLoc: maxException = ParseException(string, len(string), self) maxExcLoc = len(string) # only got here if no expression matched, raise exception for match that made it the furthest else: if maxException is not None: maxException.msg = "Expecting " + text(self) raise maxException else: raise ParseException(self, loc, string)
def _find_revision(self, revision): please_stop = False locker = Lock() output = [] queue = Queue("repo branches", max=2000) queue.extend(b for b in self.branches if b.locale == DEFAULT_LOCALE and b.name in ["try", "mozilla-inbound", "autoland"]) queue.add(THREAD_STOP) problems = [] def _find(please_stop): for b in queue: if please_stop: return try: url = b.url + "json-info?node=" + revision rev = self.get_revision( Revision(branch=b, changeset={"id": revision})) with locker: output.append(rev) Log.note("Revision found at {{url}}", url=url) except Exception as f: problems.append(f) threads = [] for i in range(3): threads.append( Thread.run("find changeset " + text(i), _find, please_stop=please_stop)) for t in threads: with assert_no_exception: t.join() return output
def __init__(self, flask_app, auth0, permissions, session_manager, device=None): if not auth0.domain: Log.error("expecting auth0 configuration") self.auth0 = auth0 self.permissions = permissions self.session_manager = session_manager # ATTACH ENDPOINTS TO FLASK APP endpoints = auth0.endpoints if not endpoints.login or not endpoints.logout or not endpoints.keep_alive: Log.error("Expecting paths for login, logout and keep_alive") add_flask_rule(flask_app, endpoints.login, self.login) add_flask_rule(flask_app, endpoints.logout, self.logout) add_flask_rule(flask_app, endpoints.keep_alive, self.keep_alive) if device: self.device = device db = self.device.db = Sqlite(device.db) if not db.about("device"): with db.transaction() as t: t.execute( sql_create( "device", {"state": "TEXT PRIMARY KEY", "session_id": "TEXT"}, ) ) if device.auth0.redirect_uri != text( URL(device.home, path=device.endpoints.callback) ): Log.error("expecting home+endpoints.callback == auth0.redirect_uri") add_flask_rule(flask_app, device.endpoints.register, self.device_register) add_flask_rule(flask_app, device.endpoints.status, self.device_status) add_flask_rule(flask_app, device.endpoints.login, self.device_login) add_flask_rule(flask_app, device.endpoints.callback, self.device_callback)
def __init__( self, testing, # location of the ActiveData server endpoints we are testing backend_es, # the ElasticSearch settings for filling the backend fast_testing=False, kwargs=None ): if backend_es.schema == None: Log.error("Expecting backed_es to have a schema defined") letters = text(ascii_lowercase) self.random_letter = letters[int(Date.now().unix / 30) % 26] self.testing = testing self.backend_es = backend_es self.settings = kwargs self._es_test_settings = None self._es_cluster = None self._index = None if not jx_containers.config.default: jx_containers.config.default = { "type": "elasticsearch", "settings": backend_es } if not fast_testing: self.server = http else: Log.alert("TESTS WILL RUN FAST, BUT NOT ALL TESTS ARE RUN!\nEnsure the `file://tests/config/elasticsearch.json#fastTesting=true` to turn on the network response tests.") # WE WILL USE THE ActiveServer CODE, AND CONNECT TO ES DIRECTLY. # THIS MAKES FOR SLIGHTLY FASTER TEST TIMES BECAUSE THE PROXY IS # MISSING self.server = FakeHttp() jx_containers.config.default = { "type": "elasticsearch", "settings": kwargs.backend_es.copy() }
def __init__(self, name, target, *args, **kwargs): BaseThread.__init__(self, -1, coalesce(name, "thread_" + text(object.__hash__(self)))) self.target = target self.end_of_thread = Data() self.args = args # ENSURE THERE IS A SHARED please_stop SIGNAL self.kwargs = copy(kwargs) self.please_stop = self.kwargs.get(PLEASE_STOP) if self.please_stop is None: self.please_stop = self.kwargs[PLEASE_STOP] = Signal( "please_stop for " + self.name ) self.thread = None self.ready_to_stop = Signal("joining with " + self.name) self.stopped = Signal("stopped signal for " + self.name) if PARENT_THREAD in kwargs: del self.kwargs[PARENT_THREAD] self.parent = kwargs[PARENT_THREAD] else: self.parent = Thread.current() self.parent.add_child(self)