def assertAlmostEqualValue(test, expected, digits=None, places=None, msg=None, delta=None): """ Snagged from unittest/case.py, then modified (Aug2014) """ if expected is NULL: if test == None: # pandas dataframes reject any comparision with an exception! return else: raise AssertionError(expand_template("{{test}} != {{expected}}", locals())) if expected == None: # None has no expectations return if test == expected: # shortcut return if not is_number(expected): # SOME SPECIAL CASES, EXPECTING EMPTY CONTAINERS IS THE SAME AS EXPECTING NULL if is_list(expected) and len(expected) == 0 and test == None: return if is_data(expected) and not expected.keys() and test == None: return if test != expected: raise AssertionError(expand_template("{{test}} != {{expected}}", locals())) return num_param = 0 if digits != None: num_param += 1 if places != None: num_param += 1 if delta != None: num_param += 1 if num_param>1: raise TypeError("specify only one of digits, places or delta") if digits is not None: with suppress_exception: diff = log10(abs(test-expected)) if diff < digits: return standardMsg = expand_template("{{test}} != {{expected}} within {{digits}} decimal places", locals()) elif delta is not None: if abs(test - expected) <= delta: return standardMsg = expand_template("{{test}} != {{expected}} within {{delta}} delta", locals()) else: if places is None: places = 15 with suppress_exception: diff = mo_math.log10(abs(test-expected)) if diff < mo_math.ceiling(mo_math.log10(abs(test)))-places: return standardMsg = expand_template("{{test|json}} != {{expected|json}} within {{places}} places", locals()) raise AssertionError(coalesce(msg, "") + ": (" + standardMsg + ")")
def _setup_grcov(self): sudo("apt-get install -y gcc") response = http.get_json("https://api.github.com/repos/marco-c/grcov/releases/latest") with cd("~/ActiveData-ETL"): for asset in response.assets: if self.settings.grcov.platform in asset.browser_download_url: run("wget "+asset.browser_download_url) run(expand_template("tar xf grcov-{{platform}}.tar.bz2", self.settings.grcov)) run(expand_template("rm grcov-{{platform}}.tar.bz2", self.settings.grcov))
def _aggop(self, query): """ SINGLE ROW RETURNED WITH AGGREGATES """ if isinstance(query.select, list): # RETURN SINGLE OBJECT WITH AGGREGATES for s in query.select: if s.aggregate not in aggregates: Log.error("Expecting all columns to have an aggregate: {{select}}", select=s) selects = FlatList() for s in query.select: selects.append(sql_alias(aggregates[s.aggregate].replace("{{code}}", s.value),quote_column(s.name))) sql = expand_template(""" SELECT {{selects}} FROM {{table}} {{where}} """, { "selects": SQL(",\n".join(selects)), "table": self._subquery(query["from"])[0], "where": self._where2sql(query.filter) }) return sql, lambda sql: self.db.column(sql)[0] # RETURNING SINGLE OBJECT WITH AGGREGATE VALUES else: # RETURN SINGLE VALUE s0 = query.select if s0.aggregate not in aggregates: Log.error("Expecting all columns to have an aggregate: {{select}}", select=s0) select = sql_alias(aggregates[s0.aggregate].replace("{{code}}", s0.value) , quote_column(s0.name)) sql = expand_template(""" SELECT {{selects}} FROM {{table}} {{where}} """, { "selects": SQL(select), "table": self._subquery(query["from"])[0], "where": self._where2sql(query.where) }) def post(sql): result = self.db.column_query(sql) return result[0][0] return sql, post # RETURN SINGLE VALUE
def column_query(self, sql, param=None): """ RETURN RESULTS IN [column][row_num] GRID """ self._execute_backlog() try: old_cursor = self.cursor if not old_cursor: # ALLOW NON-TRANSACTIONAL READS self.cursor = self.db.cursor() self.cursor.execute("SET TIME_ZONE='+00:00'") self.cursor.close() self.cursor = self.db.cursor() if param: sql = expand_template(sql, quote_param(param)) sql = self.preamble + outdent(sql) self.debug and Log.note("Execute SQL:\n{{sql}}", sql=indent(sql)) self.cursor.execute(sql) grid = [[utf8_to_unicode(c) for c in row] for row in self.cursor] # columns = [utf8_to_unicode(d[0]) for d in coalesce(self.cursor.description, [])] result = transpose(*grid) if not old_cursor: # CLEANUP AFTER NON-TRANSACTIONAL READS self.cursor.close() self.cursor = None return result except Exception as e: if isinstance(e, InterfaceError) or e.message.find("InterfaceError") >= 0: Log.error("Did you close the db connection?", e) Log.error("Problem executing SQL:\n{{sql|indent}}", sql=sql, cause=e, stack_depth=1)
def execute( self, command, param=None, retry=True # IF command FAILS, JUST THROW ERROR ): if param: command = expand_template(command, self.quote_param(param)) output = None done = False while not done: try: with self.locker: if not self.connection: self._connect() with Closer(self.connection.cursor()) as curs: curs.execute(command) if curs.rowcount >= 0: output = curs.fetchall() self.connection.commit() done = True except Exception as e: with suppress_exception: self.connection.rollback() # TODO: FIGURE OUT WHY rollback() DOES NOT HELP self.connection.close() self.connection = None self._connect() if not retry: Log.error("Problem with command:\n{{command|indent}}", command= command, cause=e) return output
def write(self, template, params): value = expand_template(template, params) self.locker.acquire() try: self.writer(value + CR) finally: self.locker.release()
def _send_email(self): try: if not self.accumulation: return with Emailer(self.settings) as emailer: # WHO ARE WE SENDING TO emails = Data() for template, params in self.accumulation: content = expand_template(template, params) emails[literal_field(self.settings.to_address)] += [content] for c in self.cc: if any(d in params.params.error for d in c.contains): emails[literal_field(c.to_address)] += [content] # SEND TO EACH for to_address, content in emails.items(): emailer.send_email( from_address=self.settings.from_address, to_address=listwrap(to_address), subject=self.settings.subject, text_data="\n\n".join(content) ) self.accumulation = [] except Exception as e: Log.warning("Could not send", e) finally: self.next_send = Date.now() + self.settings.average_interval * (2 * Random.float())
def write(self, template, params): try: with self.file_lock: self.file.append(expand_template(template, params)) except Exception as e: Log.warning("Problem writing to file {{file}}, waiting...", file=file.name, cause=e) time.sleep(5)
def query(self, sql, param=None, stream=False, row_tuples=False): """ RETURN LIST OF dicts """ if not self.cursor: # ALLOW NON-TRANSACTIONAL READS Log.error("must perform all queries inside a transaction") self._execute_backlog() try: if param: sql = expand_template(sql, quote_param(param)) sql = self.preamble + outdent(sql) self.debug and Log.note("Execute SQL:\n{{sql}}", sql=indent(sql)) self.cursor.execute(sql) if row_tuples: if stream: result = self.cursor else: result = wrap(list(self.cursor)) else: columns = [utf8_to_unicode(d[0]) for d in coalesce(self.cursor.description, [])] if stream: result = (wrap({c: utf8_to_unicode(v) for c, v in zip(columns, row)}) for row in self.cursor) else: result = wrap([{c: utf8_to_unicode(v) for c, v in zip(columns, row)} for row in self.cursor]) return result except Exception as e: e = Except.wrap(e) if "InterfaceError" in e: Log.error("Did you close the db connection?", e) Log.error("Problem executing SQL:\n{{sql|indent}}", sql=sql, cause=e, stack_depth=1)
def _send_email(self): try: if not self.accumulation: return with Closer(connect_to_region( self.settings.region, aws_access_key_id=unwrap(self.settings.aws_access_key_id), aws_secret_access_key=unwrap(self.settings.aws_secret_access_key) )) as conn: # WHO ARE WE SENDING TO emails = Data() for template, params in self.accumulation: content = expand_template(template, params) emails[literal_field(self.settings.to_address)] += [content] for c in self.cc: if any(c in params.params.error for c in c.contains): emails[literal_field(c.to_address)] += [content] # SEND TO EACH for to_address, content in emails.items(): conn.send_email( source=self.settings.from_address, to_addresses=listwrap(to_address), subject=self.settings.subject, body="\n\n".join(content), format="text" ) self.next_send = Date.now() + self.settings.max_interval self.accumulation = [] except Exception as e: self.next_send = Date.now() + self.settings.max_interval Log.warning("Could not send", e)
def forall(self, sql, param=None, _execute=None): assert _execute num = 0 self._execute_backlog() try: old_cursor = self.cursor if not old_cursor: # ALLOW NON-TRANSACTIONAL READS self.cursor = self.db.cursor() if param: sql = expand_template(sql, quote_param(param)) sql = self.preamble + outdent(sql) self.debug and Log.note("Execute SQL:\n{{sql}}", sql=indent(sql)) self.cursor.execute(sql) columns = tuple([utf8_to_unicode(d[0]) for d in self.cursor.description]) for r in self.cursor: num += 1 _execute(wrap(dict(zip(columns, [utf8_to_unicode(c) for c in r])))) if not old_cursor: # CLEANUP AFTER NON-TRANSACTIONAL READS self.cursor.close() self.cursor = None except Exception as e: Log.error("Problem executing SQL:\n{{sql|indent}}", sql=sql, cause=e, stack_depth=1) return num
def quote_value(self, value): """ convert values to mysql code for the same mostly delegate directly to the mysql lib, but some exceptions exist """ try: if value == None: return SQL("NULL") elif isinstance(value, SQL): if not value.param: # value.template CAN BE MORE THAN A TEMPLATE STRING return self.quote_sql(value.template) param = {k: self.quote_sql(v) for k, v in value.param.items()} return SQL(expand_template(value.template, param)) elif isinstance(value, basestring): return SQL(self.db.literal(value)) elif isinstance(value, Mapping): return SQL(self.db.literal(json_encode(value))) elif Math.is_number(value): return SQL(unicode(value)) elif isinstance(value, datetime): return SQL("str_to_date('" + value.strftime("%Y%m%d%H%M%S.%f") + "', '%Y%m%d%H%i%s.%f')") elif isinstance(value, Date): return SQL("str_to_date('"+value.format("%Y%m%d%H%M%S.%f")+"', '%Y%m%d%H%i%s.%f')") elif hasattr(value, '__iter__'): return SQL(self.db.literal(json_encode(value))) else: return self.db.literal(value) except Exception as e: Log.error("problem quoting SQL", e)
def to_es_script(self, schema, not_null=False, boolean=False, many=True): term = FirstOp(self.term).partial_eval() value = term.to_es_script(schema) if is_op(value.frum, CoalesceOp_): return CoalesceOp( [StringOp(t).partial_eval() for t in value.frum.terms] ).to_es_script(schema) if value.miss is TRUE or value.type is IS_NULL: return empty_string_script elif value.type == BOOLEAN: return EsScript( miss=self.term.missing().partial_eval(), type=STRING, expr=value.expr + ' ? "T" : "F"', frum=self, schema=schema, ) elif value.type == INTEGER: return EsScript( miss=self.term.missing().partial_eval(), type=STRING, expr="String.valueOf(" + value.expr + ")", frum=self, schema=schema, ) elif value.type == NUMBER: return EsScript( miss=self.term.missing().partial_eval(), type=STRING, expr=expand_template(NUMBER_TO_STRING, {"expr": value.expr}), frum=self, schema=schema, ) elif value.type == STRING: return value else: return EsScript( miss=self.term.missing().partial_eval(), type=STRING, expr=expand_template(NUMBER_TO_STRING, {"expr": value.expr}), frum=self, schema=schema, )
def execute(self, sql, param=None): if self.transaction_level == 0: Log.error("Expecting transaction to be started before issuing queries") if param: sql = expand_template(sql, quote_param(param)) sql = outdent(sql) self.backlog.append(sql) if self.debug or len(self.backlog) >= MAX_BATCH_SIZE: self._execute_backlog()
def to_es_script(self, schema, not_null=False, boolean=False, many=True): return EsScript( miss=FALSE, type=INTEGER, expr=expand_template( _count_template, {"expr": Painless[self.terms].partial_eval().to_es_script(schema).expr}, ), frum=self, schema=schema, )
def write(self, template, params): try: log_line = expand_template(template, params) level = max(self.min_level, MAP[params.context]) self.logger.log(level, log_line) self.count += 1 except Exception as cause: cause = exceptions.Except.wrap(cause) import sys sys.stderr.write("can not write to logger: " + text(cause))
def write(self, template, params): try: with self.file_lock: self.file.append(expand_template(template, params)) except Exception as e: Log.warning( "Problem writing to file {{file}}, waiting...", file=self.file.name, cause=e, ) time.sleep(5)
def to_es_script(self, schema, not_null=False, boolean=False, many=True): return EsScript( miss=FALSE, type=INTEGER, expr=expand_template( _count_template, {"expr": Painless[self.terms].partial_eval().to_es_script(schema).expr}, ), frum=self, schema=schema, )
def append_query(self, es_query, start): self.start = start es_field = self.query.frum.schema.leaves(self.var)[0].es_column es_query = wrap({"aggs": { "_match": set_default({"terms": { "script": expand_template(LIST_TO_PIPE, {"expr": 'doc[' + quote(es_field) + '].values'}) }}, es_query) }}) return es_query
def execute(self, sql, param=None): if self.transaction_level == 0: Log.error( "Expecting transaction to be started before issuing queries") if param: sql = expand_template(sql, self.quote_param(param)) sql = outdent(sql) self.backlog.append(sql) if self.debug or len(self.backlog) >= MAX_BATCH_SIZE: self._execute_backlog()
def append_query(self, es_query, start): self.start = start es_field = self.query.frum.schema.leaves(self.var)[0].es_column es_query = wrap({"aggs": { "_match": set_default({"terms": { "script": expand_template(LIST_TO_PIPE, {"expr": 'doc[' + quote(es_field) + '].values'}) }}, es_query) }}) return es_query
def query(self, sql, param=None, stream=False, row_tuples=False): """ RETURN A LIST OF dicts :param sql: SQL TEMPLATE TO SEND :param param: PARAMETERS TO INJECT INTO SQL TEMPLATE :param stream: STREAM OUTPUT :param row_tuples: DO NOT RETURN dicts """ if not self.cursor: # ALLOW NON-TRANSACTIONAL READS Log.error("must perform all queries inside a transaction") self._execute_backlog() try: if isinstance(sql, SQL): sql = text(sql) if param: sql = expand_template(sql, quote_param(param)) sql = self.preamble + outdent(sql) self.debug and Log.note("Execute SQL:\n{{sql}}", sql=indent(sql)) self.cursor.execute(sql) if row_tuples: if stream: result = self.cursor else: result = wrap(list(self.cursor)) else: columns = tuple( utf8_to_unicode(d[0]) for d in coalesce(self.cursor.description, [])) def streamer(): for row in self.cursor: output = Data() for c, v in zip(columns, row): output[c] = v yield output if stream: result = streamer() else: result = wrap(streamer()) return result except Exception as e: e = Except.wrap(e) if "InterfaceError" in e: Log.error("Did you close the db connection?", e) Log.error("Problem executing SQL:\n{{sql|indent}}", sql=sql, cause=e, stack_depth=1)
def append_query(self, query_path, es_query): es_field = first(self.query.frum.schema.leaves(self.var)).es_column return Aggs().add( TermsAggs( "_match", { "script": expand_template( LIST_TO_PIPE, {"expr": 'doc[' + quote(es_field) + '].values'}), "size": self.limit }, self).add(es_query))
def inner(changeset_id): if self.es.cluster.version.startswith("1.7."): query = { "query": {"filtered": { "query": {"match_all": {}}, "filter": {"and": [ {"prefix": {"changeset.id": changeset_id}}, {"range": {"etl.timestamp": {"gt": MIN_ETL_AGE}}} ]} }}, "size": 1 } else: query = { "query": {"bool": {"must": [ {"prefix": {"changeset.id": changeset_id}}, {"range": {"etl.timestamp": {"gt": MIN_ETL_AGE}}} ]}}, "size": 1 } try: # ALWAYS TRY ES FIRST with self.es_locker: response = self.es.search(query) json_diff = response.hits.hits[0]._source.changeset.diff if json_diff: return json_diff except Exception as e: pass url = expand_template(DIFF_URL, {"location": revision.branch.url, "rev": changeset_id}) if DEBUG: Log.note("get unified diff from {{url}}", url=url) try: response = http.get(url) diff = response.content.decode("utf8", "replace") json_diff = diff_to_json(diff) num_changes = _count(c for f in json_diff for c in f.changes) if json_diff: if num_changes < MAX_DIFF_SIZE: return json_diff elif revision.changeset.description.startswith("merge "): return None # IGNORE THE MERGE CHANGESETS else: Log.warning("Revision at {{url}} has a diff with {{num}} changes, ignored", url=url, num=num_changes) for file in json_diff: file.changes = None return json_diff except Exception as e: Log.warning("could not get unified diff", cause=e)
def table2csv(table_data): """ :param table_data: expecting a list of tuples :return: text in nice formatted csv """ text_data = [tuple(value2json(vals, pretty=True) for vals in rows) for rows in table_data] col_widths = [max(len(text) for text in cols) for cols in zip(*text_data)] template = ", ".join( "{{" + unicode(i) + "|left_align(" + unicode(w) + ")}}" for i, w in enumerate(col_widths) ) text = "\n".join(expand_template(template, d) for d in text_data) return text
def fill_container(self, subtest, typed=True): """ RETURN SETTINGS THAT CAN BE USED TO POINT TO THE INDEX THAT'S FILLED """ subtest = wrap(subtest) _settings = self._es_test_settings # ALREADY COPIED AT setUp() try: url = "file://resources/schema/basic_schema.json.template?{{.|url}}" url = expand_template(url, { "type": _settings.type, "metadata": subtest.metadata }) _settings.schema = mo_json_config.get(url) # MAKE CONTAINER container = self._es_cluster.get_or_create_index( typed=typed, schema=subtest.schema or _settings.schema, kwargs=_settings ) container.add_alias(_settings.index) _settings.alias = container.settings.alias _settings.index = container.settings.index ESUtils.indexes.append(_settings.index) # INSERT DATA if '"null"' in value2json(subtest.data): Log.error("not expected") container.extend([{"value": d} for d in subtest.data]) container.flush() now = Date.now() namespace = ElasticsearchMetadata(self._es_cluster.settings) namespace.get_columns(_settings.alias, after=now) # FORCE A RELOAD # ENSURE query POINTS TO CONTAINER frum = subtest.query["from"] if frum == None: subtest.query["from"] = _settings.alias elif is_text(frum): subtest.query["from"] = frum.replace(test_jx.TEST_TABLE, _settings.alias) else: Log.error("Do not know how to handle") except Exception as e: Log.error("can not load {{data}} into container", data=subtest.data, cause=e) return _settings
def execute_sql( host, username, password, sql, schema=None, param=None, kwargs=None ): """EXECUTE MANY LINES OF SQL (FROM SQLDUMP FILE, MAYBE?""" kwargs.schema = coalesce(kwargs.schema, kwargs.database) if param: with MySQL(kwargs) as temp: sql = expand_template(sql, quote_param(param)) # We have no way to execute an entire SQL file in bulk, so we # have to shell out to the commandline client. args = [ "mysql", "-h{0}".format(host), "-u{0}".format(username), "-p{0}".format(password) ] if schema: args.append("{0}".format(schema)) try: proc = subprocess.Popen( args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, bufsize=-1 ) if is_text(sql): sql = sql.encode("utf8") (output, _) = proc.communicate(sql) except Exception as e: raise Log.error("Can not call \"mysql\"", e) if proc.returncode: if len(sql) > 10000: sql = "<" + text(len(sql)) + " bytes of sql>" Log.error( "Unable to execute sql: return code {{return_code}}, {{output}}:\n {{sql}}\n", sql=indent(sql), return_code=proc.returncode, output=output )
def inner(changeset_id): if self.es.cluster.version.startswith("1.7."): query = { "query": {"filtered": { "query": {"match_all": {}}, "filter": {"and": [ {"prefix": {"changeset.id": changeset_id}}, {"range": {"etl.timestamp": {"gt": MIN_ETL_AGE}}} ]} }}, "size": 1 } else: query = { "query": {"bool": {"must": [ {"prefix": {"changeset.id": changeset_id}}, {"range": {"etl.timestamp": {"gt": MIN_ETL_AGE}}} ]}}, "size": 1 } try: # ALWAYS TRY ES FIRST with self.es_locker: response = self.es.search(query) json_diff = response.hits.hits[0]._source.changeset.diff if json_diff: return json_diff except Exception as e: pass url = expand_template(DIFF_URL, {"location": revision.branch.url, "rev": changeset_id}) DEBUG and Log.note("get unified diff from {{url}}", url=url) try: response = http.get(url) diff = response.content.decode("utf8") json_diff = diff_to_json(diff) num_changes = _count(c for f in json_diff for c in f.changes) if json_diff: if revision.changeset.description.startswith("merge "): return None # IGNORE THE MERGE CHANGESETS elif num_changes < MAX_DIFF_SIZE: return json_diff else: Log.warning("Revision at {{url}} has a diff with {{num}} changes, ignored", url=url, num=num_changes) for file in json_diff: file.changes = None return json_diff except Exception as e: Log.warning("could not get unified diff from {{url}}", url=url, cause=e)
def table2csv(table_data): """ :param table_data: expecting a list of tuples :return: text in nice formatted csv """ text_data = [tuple(value2json(vals, pretty=True) for vals in rows) for rows in table_data] col_widths = [max(len(t) for t in cols) for cols in zip(*text_data)] template = ", ".join( "{{" + text(i) + "|left_align(" + text(w) + ")}}" for i, w in enumerate(col_widths) ) output = "\n".join(expand_template(template, d) for d in text_data) return output
def execute_sql( host, username, password, sql, schema=None, param=None, kwargs=None ): """EXECUTE MANY LINES OF SQL (FROM SQLDUMP FILE, MAYBE?""" kwargs.schema = coalesce(kwargs.schema, kwargs.database) if param: with MySQL(kwargs) as temp: sql = expand_template(sql, quote_param(param)) # We have no way to execute an entire SQL file in bulk, so we # have to shell out to the commandline client. args = [ "mysql", "-h{0}".format(host), "-u{0}".format(username), "-p{0}".format(password) ] if schema: args.append("{0}".format(schema)) try: proc = subprocess.Popen( args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, bufsize=-1 ) if is_text(sql): sql = sql.encode("utf8") (output, _) = proc.communicate(sql) except Exception as e: raise Log.error("Can not call \"mysql\"", e) if proc.returncode: if len(sql) > 10000: sql = "<" + text_type(len(sql)) + " bytes of sql>" Log.error( "Unable to execute sql: return code {{return_code}}, {{output}}:\n {{sql}}\n", sql=indent(sql), return_code=proc.returncode, output=output )
def to_ruby(self, schema): term = FirstOp("first", self.term).partial_eval() value = term.to_ruby(schema) if isinstance(value.frum, CoalesceOp): return CoalesceOp("coalesce", [StringOp("string", t).partial_eval() for t in value.frum.terms]).to_ruby(schema) if value.type == BOOLEAN: return Ruby( miss=self.term.missing().partial_eval(), type=STRING, expr=value.expr + ' ? "T" : "F"', frum=self ) elif value.type == INTEGER: return Ruby( miss=self.term.missing().partial_eval(), type=STRING, expr="String.valueOf(" + value.expr + ")", frum=self ) elif value.type == NUMBER: return Ruby( miss=self.term.missing().partial_eval(), type=STRING, expr=expand_template(TO_STRING, {"expr":value.expr}), frum=self ) elif value.type == STRING: return value else: return Ruby( miss=self.term.missing().partial_eval(), type=STRING, expr=expand_template(TO_STRING, {"expr":value.expr}), frum=self )
def to_es_script(self, schema): term = FirstOp("first", self.term).partial_eval() value = term.to_es_script(schema) if isinstance(value.frum, CoalesceOp): return CoalesceOp("coalesce", [StringOp("string", t).partial_eval() for t in value.frum.terms]).to_es_script(schema) if value.type == BOOLEAN: return EsScript( miss=self.term.missing().partial_eval(), type=STRING, expr=value.expr + ' ? "T" : "F"', frum=self ) elif value.type == INTEGER: return EsScript( miss=self.term.missing().partial_eval(), type=STRING, expr="String.valueOf(" + value.expr + ")", frum=self ) elif value.type == NUMBER: return EsScript( miss=self.term.missing().partial_eval(), type=STRING, expr=expand_template(TO_STRING, {"expr":value.expr}), frum=self ) elif value.type == STRING: return value else: return EsScript( miss=self.term.missing().partial_eval(), type=STRING, expr=expand_template(TO_STRING, {"expr":value.expr}), frum=self )
def compileString2Term(edge): if edge.esscript: Log.error("edge script not supported yet") value = edge.value if is_variable_name(value): value = strings.expand_template("getDocValue({{path}})", {"path": quote(value)}) else: Log.error("not handled") def fromTerm(value): return edge.domain.getPartByKey(value) return Data(toTerm={"head": "", "body": value}, fromTerm=fromTerm)
def test_round(self): expected = [ "0.0000000003142", "0.000000003142", "0.00000003142", "0.0000003142", "0.000003142", "0.00003142", "0.0003142", "0.003142", "0.03142", "0.3142", "3.142", "31.42", "314.2", "3142", "31420", "314200", "3142000", "31420000", "314200000", "3142000000", "31420000000", "314200000000", "3142000000000", "31420000000000", "314200000000000", "3142000000000000", "31420000000000000", "314200000000000000" ] start = -10 for order in range(start, 18): value = pi * (10**order) test = expand_template("{{value|round(places=4)}}", value={"value": value}) self.assertEqual(test, expected[order - start])
def query(self, sql, param=None, stream=False, row_tuples=False): """ RETURN LIST OF dicts """ if not self.cursor: # ALLOW NON-TRANSACTIONAL READS Log.error("must perform all queries inside a transaction") self._execute_backlog() try: if param: sql = expand_template(sql, self.quote_param(param)) sql = self.preamble + outdent(sql) if self.debug: Log.note("Execute SQL:\n{{sql}}", sql=indent(sql)) self.cursor.execute(sql) if row_tuples: if stream: result = self.cursor else: result = wrap(list(self.cursor)) else: columns = [ utf8_to_unicode(d[0]) for d in coalesce(self.cursor.description, []) ] if stream: result = (wrap( {c: utf8_to_unicode(v) for c, v in zip(columns, row)}) for row in self.cursor) else: result = wrap( [{c: utf8_to_unicode(v) for c, v in zip(columns, row)} for row in self.cursor]) return result except Exception as e: if isinstance( e, InterfaceError) or e.message.find("InterfaceError") >= 0: Log.error("Did you close the db connection?", e) Log.error("Problem executing SQL:\n{{sql|indent}}", sql=sql, cause=e, stack_depth=1)
def compileString2Term(edge): if edge.esscript: Log.error("edge script not supported yet") value = edge.value if is_variable_name(value): value = strings.expand_template("getDocValue({{path}})", {"path": quote(value)}) else: Log.error("not handled") def fromTerm(value): return edge.domain.getPartByKey(value) return Data( toTerm={"head": "", "body": value}, fromTerm=fromTerm )
def __unicode__(self): output = self.type + ": " + self.template + "\n" if self.params: output = expand_template(output, self.params) if self.trace: output += indent(format_trace(self.trace)) if self.cause: cause_strings = [] for c in listwrap(self.cause): with suppress_exception: cause_strings.append(text_type(c)) output += "caused by\n\t" + "and caused by\n\t".join(cause_strings) return output
def __unicode__(self): output = self.type + ": " + self.template + "\n" if self.params: output = expand_template(output, self.params) if self.trace: output += indent(format_trace(self.trace)) if self.cause: cause_strings = [] for c in listwrap(self.cause): with suppress_exception: cause_strings.append(text_type(c)) output += "caused by\n\t" + "and caused by\n\t".join(cause_strings) return output
def time_delta_pusher(please_stop, appender, queue, interval): """ appender - THE FUNCTION THAT ACCEPTS A STRING queue - FILLED WITH LOG ENTRIES {"template":template, "params":params} TO WRITE interval - timedelta USE IN A THREAD TO BATCH LOGS BY TIME INTERVAL """ next_run = time() + interval while not please_stop: profiler = Thread.current().cprofiler profiler.disable() (Till(till=next_run) | please_stop).wait() profiler.enable() next_run = time() + interval logs = queue.pop_all() if not logs: continue lines = [] for log in logs: try: if log is THREAD_STOP: please_stop.go() next_run = time() else: expanded = expand_template(log.get("template"), log.get("params")) lines.append(expanded) except Exception as e: location = log.get('params', {}).get('location', {}) Log.warning("Trouble formatting log from {{location}}", location=location, cause=e) # SWALLOW ERROR, GOT TO KEEP RUNNING try: appender(u"\n".join(lines) + u"\n") except Exception as e: sys.stderr.write( str("Trouble with appender: ") + str(e.__class__.__name__) + str("\n"))
def __unicode__(self): output = self.context + ": " + self.template + CR if self.params: output = expand_template(output, self.params) if self.trace: output += indent(format_trace(self.trace)) if self.cause: cause_strings = [] for c in listwrap(self.cause): try: cause_strings.append(text_type(c)) except Exception as e: sys.stderr("Problem serializing cause" + text_type(c)) output += "caused by\n\t" + "and caused by\n\t".join(cause_strings) return output
def __unicode__(self): output = self.context + ": " + self.template + CR if self.params: output = expand_template(output, self.params) if self.trace: output += indent(format_trace(self.trace)) if self.cause: cause_strings = [] for c in listwrap(self.cause): try: cause_strings.append(text_type(c)) except Exception as e: sys.stderr("Problem serializing cause"+text_type(c)) output += "caused by\n\t" + "and caused by\n\t".join(cause_strings) return output
def quote_sql(self, value, param=None): """ USED TO EXPAND THE PARAMETERS TO THE SQL() OBJECT """ try: if isinstance(value, SQL): if not param: return value param = {k: self.quote_sql(v) for k, v in param.items()} return expand_template(value, param) elif isinstance(value, text_type): return value elif isinstance(value, Mapping): return self.db.literal(json_encode(value)) elif hasattr(value, '__iter__'): return sql_iso(sql_list([self.quote_sql(vv) for vv in value])) else: return text_type(value) except Exception as e: Log.error("problem quoting SQL", e)
def quote_sql(self, value, param=None): """ USED TO EXPAND THE PARAMETERS TO THE SQL() OBJECT """ try: if isinstance(value, SQL): if not param: return value param = {k: self.quote_sql(v) for k, v in param.items()} return expand_template(value, param) elif isinstance(value, basestring): return value elif isinstance(value, Mapping): return self.db.literal(json_encode(value)) elif hasattr(value, '__iter__'): return "(" + ",".join([self.quote_sql(vv) for vv in value]) + ")" else: return unicode(value) except Exception as e: Log.error("problem quoting SQL", e)
def quote_sql(value, param=None): """ USED TO EXPAND THE PARAMETERS TO THE SQL() OBJECT """ try: if isinstance(value, SQL): if not param: return value param = {k: quote_sql(v) for k, v in param.items()} return SQL(expand_template(value, param)) elif is_text(value): return SQL(value) elif is_data(value): return quote_value(json_encode(value)) elif hasattr(value, '__iter__'): return quote_list(value) else: return text_type(value) except Exception as e: Log.error("problem quoting SQL", e)
def quote_sql(value, param=None): """ USED TO EXPAND THE PARAMETERS TO THE SQL() OBJECT """ try: if isinstance(value, SQL): if not param: return value param = {k: quote_sql(v) for k, v in param.items()} return SQL(expand_template(value, param)) elif is_text(value): return SQL(value) elif is_data(value): return quote_value(json_encode(value)) elif hasattr(value, '__iter__'): return quote_list(value) else: return text(value) except Exception as e: Log.error("problem quoting SQL", e)
def query(self, sql, param=None): """ RETURN LIST OF dicts """ self._execute_backlog() try: old_cursor = self.cursor if not old_cursor: # ALLOW NON-TRANSACTIONAL READS self.cursor = self.db.cursor() self.cursor.execute("SET TIME_ZONE='+00:00'") self.cursor.close() self.cursor = self.db.cursor() if param: sql = expand_template(sql, self.quote_param(param)) sql = self.preamble + outdent(sql) if self.debug: Log.note("Execute SQL:\n{{sql}}", sql=indent(sql)) self.cursor.execute(sql) columns = [ utf8_to_unicode(d[0]) for d in coalesce(self.cursor.description, []) ] fixed = [[utf8_to_unicode(c) for c in row] for row in self.cursor] result = convert.table2list(columns, fixed) if not old_cursor: # CLEANUP AFTER NON-TRANSACTIONAL READS self.cursor.close() self.cursor = None return result except Exception as e: if isinstance( e, InterfaceError) or e.message.find("InterfaceError") >= 0: Log.error("Did you close the db connection?", e) Log.error("Problem executing SQL:\n{{sql|indent}}", sql=sql, cause=e, stack_depth=1)
def time_delta_pusher(please_stop, appender, queue, interval): """ appender - THE FUNCTION THAT ACCEPTS A STRING queue - FILLED WITH LOG ENTRIES {"template":template, "params":params} TO WRITE interval - timedelta USE IN A THREAD TO BATCH LOGS BY TIME INTERVAL """ next_run = time() + interval while not please_stop: profiler = Thread.current().cprofiler profiler.disable() (Till(till=next_run) | please_stop).wait() profiler.enable() next_run = time() + interval logs = queue.pop_all() if not logs: continue lines = [] for log in logs: try: if log is THREAD_STOP: please_stop.go() next_run = time() else: expanded = expand_template(log.get("template"), log.get("params")) lines.append(expanded) except Exception as e: location = log.get('params', {}).get('location', {}) Log.warning("Trouble formatting log from {{location}}", location=location, cause=e) # SWALLOW ERROR, GOT TO KEEP RUNNING try: appender(u"\n".join(lines) + u"\n") except Exception as e: sys.stderr.write(str("Trouble with appender: ") + str(e.__class__.__name__) + str("\n"))
def inner(changeset_id): if self.es.cluster.version.startswith("1.7."): query = { "query": {"filtered": { "query": {"match_all": {}}, "filter": {"and": [ {"prefix": {"changeset.id": changeset_id}}, {"range": {"etl.timestamp": {"gt": MIN_ETL_AGE}}} ]} }}, "size": 1 } else: query = { "query": {"bool": {"must": [ {"prefix": {"changeset.id": changeset_id}}, {"range": {"etl.timestamp": {"gt": MIN_ETL_AGE}}} ]}}, "size": 1 } try: # ALWAYS TRY ES FIRST with self.es_locker: response = self.es.search(query) moves = response.hits.hits[0]._source.changeset.moves if moves: return moves except Exception as e: pass url = expand_template(DIFF_URL, {"location": revision.branch.url, "rev": changeset_id}) DEBUG and Log.note("get unified diff from {{url}}", url=url) try: moves = http.get(url).content.decode('latin1') # THE ENCODING DOES NOT MATTER BECAUSE WE ONLY USE THE '+', '-' PREFIXES IN THE DIFF return diff_to_moves(text_type(moves)) except Exception as e: Log.warning("could not get unified diff from {{url}}", url=url, cause=e)
def column_query(self, sql, param=None): """ RETURN RESULTS IN [column][row_num] GRID """ self._execute_backlog() try: old_cursor = self.cursor if not old_cursor: # ALLOW NON-TRANSACTIONAL READS self.cursor = self.db.cursor() self.cursor.execute("SET TIME_ZONE='+00:00'") self.cursor.close() self.cursor = self.db.cursor() if param: sql = expand_template(sql, quote_param(param)) sql = self.preamble + outdent(sql) self.debug and Log.note("Execute SQL:\n{{sql}}", sql=indent(sql)) self.cursor.execute(sql) grid = [[utf8_to_unicode(c) for c in row] for row in self.cursor] # columns = [utf8_to_unicode(d[0]) for d in coalesce(self.cursor.description, [])] result = transpose(*grid) if not old_cursor: # CLEANUP AFTER NON-TRANSACTIONAL READS self.cursor.close() self.cursor = None return result except Exception as e: e = Except.wrap(e) if "InterfaceError" in e: Log.error("Did you close the db connection?", e) Log.error("Problem executing SQL:\n{{sql|indent}}", sql=sql, cause=e, stack_depth=1)
def fill_container(self, subtest, tjson=False): """ RETURN SETTINGS THAT CAN BE USED TO POINT TO THE INDEX THAT'S FILLED """ subtest = wrap(subtest) _settings = self._es_test_settings # ALREADY COPIED AT setUp() # _settings.index = "testing_" + Random.hex(10).lower() # settings.type = "test_result" try: url = "file://resources/schema/basic_schema.json.template?{{.|url}}" url = expand_template(url, { "type": _settings.type, "metadata": subtest.metadata }) _settings.schema = mo_json_config.get(url) # MAKE CONTAINER container = self._es_cluster.get_or_create_index(tjson=tjson, kwargs=_settings) container.add_alias(_settings.index) # INSERT DATA container.extend([ {"value": v} for v in subtest.data ]) container.flush() # ENSURE query POINTS TO CONTAINER frum = subtest.query["from"] if frum == None: subtest.query["from"] = _settings.index elif isinstance(frum, basestring): subtest.query["from"] = frum.replace(TEST_TABLE, _settings.index) else: Log.error("Do not know how to handle") except Exception, e: Log.error("can not load {{data}} into container", {"data":subtest.data}, e)
def replace_vars(text, params=None): """ REPLACE {{vars}} WITH ENVIRONMENTAL VALUES """ start = 0 var = strings.between(text, "{{", "}}", start) while var: replace = "{{" + var + "}}" index = text.find(replace, 0) if index == -1: Log.error("could not find {{var}} (including quotes)", var=replace) end = index + len(replace) try: replacement = text_type(Date(var).unix) text = text[:index] + replacement + text[end:] start = index + len(replacement) except Exception as _: start += 1 var = strings.between(text, "{{", "}}", start) text = expand_template(text, coalesce(params, {})) return text
def es_aggsop(es, frum, query): query = query.copy() # WE WILL MARK UP THIS QUERY schema = frum.schema select = listwrap(query.select) es_query = Data() new_select = Data() # MAP FROM canonical_name (USED FOR NAMES IN QUERY) TO SELECT MAPPING formula = [] for s in select: if s.aggregate == "count" and isinstance(s.value, Variable) and s.value.var == ".": if schema.query_path == ".": s.pull = jx_expression_to_function("doc_count") else: s.pull = jx_expression_to_function({"coalesce": ["_nested.doc_count", "doc_count", 0]}) elif isinstance(s.value, Variable): if s.aggregate == "count": new_select["count_"+literal_field(s.value.var)] += [s] else: new_select[literal_field(s.value.var)] += [s] elif s.aggregate: formula.append(s) for canonical_name, many in new_select.items(): for s in many: columns = frum.schema.values(s.value.var) if s.aggregate == "count": canonical_names = [] for column in columns: cn = literal_field(column.es_column + "_count") if column.jx_type == EXISTS: canonical_names.append(cn + ".doc_count") es_query.aggs[cn].filter.range = {column.es_column: {"gt": 0}} else: canonical_names.append(cn+ ".value") es_query.aggs[cn].value_count.field = column.es_column if len(canonical_names) == 1: s.pull = jx_expression_to_function(canonical_names[0]) else: s.pull = jx_expression_to_function({"add": canonical_names}) elif s.aggregate == "median": if len(columns) > 1: Log.error("Do not know how to count columns with more than one type (script probably)") # ES USES DIFFERENT METHOD FOR PERCENTILES key = literal_field(canonical_name + " percentile") es_query.aggs[key].percentiles.field = columns[0].es_column es_query.aggs[key].percentiles.percents += [50] s.pull = jx_expression_to_function(key + ".values.50\\.0") elif s.aggregate == "percentile": if len(columns) > 1: Log.error("Do not know how to count columns with more than one type (script probably)") # ES USES DIFFERENT METHOD FOR PERCENTILES key = literal_field(canonical_name + " percentile") if isinstance(s.percentile, text_type) or s.percetile < 0 or 1 < s.percentile: Log.error("Expecting percentile to be a float from 0.0 to 1.0") percent = Math.round(s.percentile * 100, decimal=6) es_query.aggs[key].percentiles.field = columns[0].es_column es_query.aggs[key].percentiles.percents += [percent] es_query.aggs[key].percentiles.tdigest.compression = 2 s.pull = jx_expression_to_function(key + ".values." + literal_field(text_type(percent))) elif s.aggregate == "cardinality": canonical_names = [] for column in columns: cn = literal_field(column.es_column + "_cardinality") canonical_names.append(cn) es_query.aggs[cn].cardinality.field = column.es_column if len(columns) == 1: s.pull = jx_expression_to_function(canonical_names[0] + ".value") else: s.pull = jx_expression_to_function({"add": [cn + ".value" for cn in canonical_names], "default": 0}) elif s.aggregate == "stats": if len(columns) > 1: Log.error("Do not know how to count columns with more than one type (script probably)") # REGULAR STATS stats_name = literal_field(canonical_name) es_query.aggs[stats_name].extended_stats.field = columns[0].es_column # GET MEDIAN TOO! median_name = literal_field(canonical_name + "_percentile") es_query.aggs[median_name].percentiles.field = columns[0].es_column es_query.aggs[median_name].percentiles.percents += [50] s.pull = get_pull_stats(stats_name, median_name) elif s.aggregate == "union": pulls = [] for column in columns: script = {"scripted_metric": { 'init_script': 'params._agg.terms = new HashSet()', 'map_script': 'for (v in doc['+quote(column.es_column)+'].values) params._agg.terms.add(v);', 'combine_script': 'return params._agg.terms.toArray()', 'reduce_script': 'HashSet output = new HashSet(); for (a in params._aggs) { if (a!=null) for (v in a) {output.add(v)} } return output.toArray()', }} stats_name = encode_property(column.es_column) if column.nested_path[0] == ".": es_query.aggs[stats_name] = script pulls.append(jx_expression_to_function(stats_name + ".value")) else: es_query.aggs[stats_name] = { "nested": {"path": column.nested_path[0]}, "aggs": {"_nested": script} } pulls.append(jx_expression_to_function(stats_name + "._nested.value")) if len(pulls) == 0: s.pull = NULL elif len(pulls) == 1: s.pull = pulls[0] else: s.pull = lambda row: UNION(p(row) for p in pulls) else: if len(columns) > 1: Log.error("Do not know how to count columns with more than one type (script probably)") elif len(columns) <1: # PULL VALUE OUT OF THE stats AGGREGATE s.pull = jx_expression_to_function({"null":{}}) else: # PULL VALUE OUT OF THE stats AGGREGATE es_query.aggs[literal_field(canonical_name)].extended_stats.field = columns[0].es_column s.pull = jx_expression_to_function({"coalesce": [literal_field(canonical_name) + "." + aggregates[s.aggregate], s.default]}) for i, s in enumerate(formula): canonical_name = literal_field(s.name) if isinstance(s.value, TupleOp): if s.aggregate == "count": # TUPLES ALWAYS EXIST, SO COUNTING THEM IS EASY s.pull = "doc_count" elif s.aggregate in ('max', 'maximum', 'min', 'minimum'): if s.aggregate in ('max', 'maximum'): dir = 1 op = "max" else: dir = -1 op = 'min' nully = TupleOp("tuple", [NULL]*len(s.value.terms)).partial_eval().to_es_script(schema).expr selfy = s.value.partial_eval().to_es_script(schema).expr script = {"scripted_metric": { 'init_script': 'params._agg.best = ' + nully + ';', 'map_script': 'params._agg.best = ' + expand_template(MAX_OF_TUPLE, {"expr1": "params._agg.best", "expr2": selfy, "dir": dir, "op": op}) + ";", 'combine_script': 'return params._agg.best', 'reduce_script': 'return params._aggs.stream().max(' + expand_template(COMPARE_TUPLE, {"dir": dir, "op": op}) + ').get()', }} if schema.query_path[0] == ".": es_query.aggs[canonical_name] = script s.pull = jx_expression_to_function(literal_field(canonical_name) + ".value") else: es_query.aggs[canonical_name] = { "nested": {"path": schema.query_path[0]}, "aggs": {"_nested": script} } s.pull = jx_expression_to_function(literal_field(canonical_name) + "._nested.value") else: Log.error("{{agg}} is not a supported aggregate over a tuple", agg=s.aggregate) elif s.aggregate == "count": es_query.aggs[literal_field(canonical_name)].value_count.script = s.value.partial_eval().to_es_script(schema).script(schema) s.pull = jx_expression_to_function(literal_field(canonical_name) + ".value") elif s.aggregate == "median": # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT key = literal_field(canonical_name + " percentile") es_query.aggs[key].percentiles.script = s.value.to_es_script(schema).script(schema) es_query.aggs[key].percentiles.percents += [50] s.pull = jx_expression_to_function(key + ".values.50\\.0") elif s.aggregate == "percentile": # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT key = literal_field(canonical_name + " percentile") percent = Math.round(s.percentile * 100, decimal=6) es_query.aggs[key].percentiles.script = s.value.to_es_script(schema).script(schema) es_query.aggs[key].percentiles.percents += [percent] s.pull = jx_expression_to_function(key + ".values." + literal_field(text_type(percent))) elif s.aggregate == "cardinality": # ES USES DIFFERENT METHOD FOR CARDINALITY key = canonical_name + " cardinality" es_query.aggs[key].cardinality.script = s.value.to_es_script(schema).script(schema) s.pull = jx_expression_to_function(key + ".value") elif s.aggregate == "stats": # REGULAR STATS stats_name = literal_field(canonical_name) es_query.aggs[stats_name].extended_stats.script = s.value.to_es_script(schema).script(schema) # GET MEDIAN TOO! median_name = literal_field(canonical_name + " percentile") es_query.aggs[median_name].percentiles.script = s.value.to_es_script(schema).script(schema) es_query.aggs[median_name].percentiles.percents += [50] s.pull = get_pull_stats(stats_name, median_name) elif s.aggregate == "union": # USE TERMS AGGREGATE TO SIMULATE union stats_name = literal_field(canonical_name) es_query.aggs[stats_name].terms.script_field = s.value.to_es_script(schema).script(schema) s.pull = jx_expression_to_function(stats_name + ".buckets.key") else: # PULL VALUE OUT OF THE stats AGGREGATE s.pull = jx_expression_to_function(canonical_name + "." + aggregates[s.aggregate]) es_query.aggs[canonical_name].extended_stats.script = s.value.to_es_script(schema).script(schema) decoders = get_decoders_by_depth(query) start = 0 # <TERRIBLE SECTION> THIS IS WHERE WE WEAVE THE where CLAUSE WITH nested split_where = split_expression_by_depth(query.where, schema=frum.schema) if len(split_field(frum.name)) > 1: if any(split_where[2::]): Log.error("Where clause is too deep") for d in decoders[1]: es_query = d.append_query(es_query, start) start += d.num_columns if split_where[1]: #TODO: INCLUDE FILTERS ON EDGES filter_ = AndOp("and", split_where[1]).to_esfilter(schema) es_query = Data( aggs={"_filter": set_default({"filter": filter_}, es_query)} ) es_query = wrap({ "aggs": {"_nested": set_default( {"nested": {"path": schema.query_path[0]}}, es_query )} }) else: if any(split_where[1::]): Log.error("Where clause is too deep") if decoders: for d in jx.reverse(decoders[0]): es_query = d.append_query(es_query, start) start += d.num_columns if split_where[0]: #TODO: INCLUDE FILTERS ON EDGES filter = AndOp("and", split_where[0]).to_esfilter(schema) es_query = Data( aggs={"_filter": set_default({"filter": filter}, es_query)} ) # </TERRIBLE SECTION> if not es_query: es_query = wrap({"query": {"match_all": {}}}) es_query.size = 0 with Timer("ES query time") as es_duration: result = es_post(es, es_query, query.limit) try: format_time = Timer("formatting") with format_time: decoders = [d for ds in decoders for d in ds] result.aggregations.doc_count = coalesce(result.aggregations.doc_count, result.hits.total) # IT APPEARS THE OLD doc_count IS GONE formatter, groupby_formatter, aggop_formatter, mime_type = format_dispatch[query.format] if query.edges: output = formatter(decoders, result.aggregations, start, query, select) elif query.groupby: output = groupby_formatter(decoders, result.aggregations, start, query, select) else: output = aggop_formatter(decoders, result.aggregations, start, query, select) output.meta.timing.formatting = format_time.duration output.meta.timing.es_search = es_duration.duration output.meta.content_type = mime_type output.meta.es_query = es_query return output except Exception as e: if query.format not in format_dispatch: Log.error("Format {{format|quote}} not supported yet", format=query.format, cause=e) Log.error("Some problem", cause=e)
def json2value(json_string, params=Null, flexible=False, leaves=False): """ :param json_string: THE JSON :param params: STANDARD JSON PARAMS :param flexible: REMOVE COMMENTS :param leaves: ASSUME JSON KEYS ARE DOT-DELIMITED :return: Python value """ if isinstance(json_string, str): Log.error("only unicode json accepted") try: if flexible: # REMOVE """COMMENTS""", # COMMENTS, //COMMENTS, AND \n \r # DERIVED FROM https://github.com/jeads/datasource/blob/master/datasource/bases/BaseHub.py# L58 json_string = re.sub(r"\"\"\".*?\"\"\"", r"\n", json_string, flags=re.MULTILINE) json_string = "\n".join( remove_line_comment(l) for l in json_string.split("\n")) # ALLOW DICTIONARY'S NAME:VALUE LIST TO END WITH COMMA json_string = re.sub(r",\s*\}", r"}", json_string) # ALLOW LISTS TO END WITH COMMA json_string = re.sub(r",\s*\]", r"]", json_string) if params: # LOOKUP REFERENCES json_string = expand_template(json_string, params) try: value = wrap(json_decoder(unicode(json_string))) except Exception as e: Log.error("can not decode\n{{content}}", content=json_string, cause=e) if leaves: value = wrap_leaves(value) return value except Exception as e: e = Except.wrap(e) if not json_string.strip(): Log.error("JSON string is only whitespace") c = e while "Expecting '" in c.cause and "' delimiter: line" in c.cause: c = c.cause if "Expecting '" in c and "' delimiter: line" in c: line_index = int(strings.between(c.message, " line ", " column ")) - 1 column = int(strings.between(c.message, " column ", " ")) - 1 line = json_string.split("\n")[line_index].replace("\t", " ") if column > 20: sample = "..." + line[column - 20:] pointer = " " + (" " * 20) + "^" else: sample = line pointer = (" " * column) + "^" if len(sample) > 43: sample = sample[:43] + "..." Log.error("Can not decode JSON at:\n\t" + sample + "\n\t" + pointer + "\n") base_str = strings.limit(json_string, 1000).encode('utf8') hexx_str = bytes2hex(base_str, " ") try: char_str = " " + " ".join( (c.decode("latin1") if ord(c) >= 32 else ".") for c in base_str) except Exception as e: char_str = " " Log.error("Can not decode JSON:\n" + char_str + "\n" + hexx_str + "\n", e)
def message(self): return expand_template(self.template, self.params)
def format_trace(tbs, start=0): trace = [] for d in tbs[start::]: item = expand_template('File "{{file}}", line {{line}}, in {{method}}\n', d) trace.append(item) return "".join(trace)
def DataClass(name, columns, constraint=None): """ Use the DataClass to define a class, but with some extra features: 1. restrict the datatype of property 2. restrict if `required`, or if `nulls` are allowed 3. generic constraints on object properties It is expected that this class become a real class (or be removed) in the long term because it is expensive to use and should only be good for verifying program correctness, not user input. :param name: Name of the class we are creating :param columns: Each columns[i] has properties { "name", - (required) name of the property "required", - False if it must be defined (even if None) "nulls", - True if property can be None, or missing "default", - A default value, if none is provided "type" - a Python datatype } :param constraint: a JSON query Expression for extra constraints (return true if all constraints are met) :return: The class that has been created """ columns = wrap([{ "name": c, "required": True, "nulls": False, "type": object } if is_text(c) else c for c in columns]) slots = columns.name required = wrap( filter(lambda c: c.required and not c.nulls and not c.default, columns)).name nulls = wrap(filter(lambda c: c.nulls, columns)).name defaults = {c.name: coalesce(c.default, None) for c in columns} types = {c.name: coalesce(c.jx_type, object) for c in columns} code = expand_template( """ from __future__ import unicode_literals from mo_future import is_text, is_binary from collections import Mapping meta = None types_ = {{types}} defaults_ = {{defaults}} class {{class_name}}(Mapping): __slots__ = {{slots}} def _constraint(row, rownum, rows): try: return {{constraint_expr}} except Exception as e: Log.error( "constraint\\n{" + "{code}}\\nnot satisfied {" + "{expect}}\\n{" + "{value|indent}}", code={{constraint_expr|quote}}, expect={{constraint}}, value=row, cause=e ) def __init__(self, **kwargs): if not kwargs: return for s in {{slots}}: object.__setattr__(self, s, kwargs.get(s, {{defaults}}.get(s, None))) missed = {{required}}-set(kwargs.keys()) if missed: Log.error("Expecting properties {"+"{missed}}", missed=missed) illegal = set(kwargs.keys())-set({{slots}}) if illegal: Log.error("{"+"{names}} are not a valid properties", names=illegal) self._constraint(0, [self]) def __getitem__(self, item): return getattr(self, item) def __setitem__(self, item, value): setattr(self, item, value) return self def __setattr__(self, item, value): if item not in {{slots}}: Log.error("{"+"{item|quote}} not valid attribute", item=item) object.__setattr__(self, item, value) self._constraint(0, [self]) def __getattr__(self, item): Log.error("{"+"{item|quote}} not valid attribute", item=item) def __hash__(self): return object.__hash__(self) def __eq__(self, other): if isinstance(other, {{class_name}}) and dict(self)==dict(other) and self is not other: Log.error("expecting to be same object") return self is other def __dict__(self): return {k: getattr(self, k) for k in {{slots}}} def items(self): return ((k, getattr(self, k)) for k in {{slots}}) def __copy__(self): _set = object.__setattr__ output = object.__new__({{class_name}}) {{assign}} return output def __iter__(self): return {{slots}}.__iter__() def __len__(self): return {{len_slots}} def __str__(self): return str({{dict}}) """, { "class_name": name, "slots": "(" + (", ".join(quote(s) for s in slots)) + ")", "required": "{" + (", ".join(quote(s) for s in required)) + "}", "nulls": "{" + (", ".join(quote(s) for s in nulls)) + "}", "defaults": Literal(defaults).to_python(), "len_slots": len(slots), "dict": "{" + (", ".join(quote(s) + ": self." + s for s in slots)) + "}", "assign": "; ".join("_set(output, " + quote(s) + ", self." + s + ")" for s in slots), "types": "{" + (",".join(quote(k) + ": " + v.__name__ for k, v in types.items())) + "}", "constraint_expr": Python[jx_expression(not ENABLE_CONSTRAINTS or constraint)].to_python(), "constraint": value2json(constraint), }, ) output = _exec(code, name) register_data(output) return output
def format_trace(tbs, start=0): return "".join( expand_template('File "{{file}}", line {{line}}, in {{method}}\n', d) for d in tbs[start::] )
def message(self): return expand_template(self.template, self.params)
def _get_source_code_from_hg(self, revision, file_path): response = http.get(expand_template(FILE_URL, {"location": revision.branch.url, "rev": revision.changeset.id, "path": file_path})) return response.content.decode("utf8", "replace")
def get_dataum(db_config, signature_id): db = MySQL(db_config) with db: return db.query(expand_template(datum_sql, quote_list(listwrap(signature_id))))