def parse(*args): try: if len(args) == 1: a0 = args[0] if isinstance(a0, (datetime, date)): output = unix2Date(datetime2unix(a0)) elif isinstance(a0, Date): output = unix2Date(a0.unix) elif isinstance(a0, (int, long, float, Decimal)): a0 = float(a0) if a0 > 9999999999: # WAY TOO BIG IF IT WAS A UNIX TIMESTAMP output = unix2Date(a0 / 1000) else: output = unix2Date(a0) elif isinstance(a0, basestring) and len(a0) in [9, 10, 12, 13] and Math.is_integer(a0): a0 = float(a0) if a0 > 9999999999: # WAY TOO BIG IF IT WAS A UNIX TIMESTAMP output = unix2Date(a0 / 1000) else: output = unix2Date(a0) elif isinstance(a0, basestring): output = unicode2Date(a0) else: output = unix2Date(datetime2unix(datetime(*args))) else: if isinstance(args[0], basestring): output = unicode2Date(*args) else: output = unix2Date(datetime2unix(datetime(*args))) return output except Exception, e: from pyLibrary.debugs.logs import Log Log.error("Can not convert {{args}} to Date", args=args, cause=e)
def _normalize_sort(sort=None): """ CONVERT SORT PARAMETERS TO A NORMAL FORM SO EASIER TO USE """ if sort == None: return DictList.EMPTY output = DictList() for s in listwrap(sort): if isinstance(s, basestring): output.append({"value": jx_expression(s), "sort": 1}) elif isinstance(s, Expression): output.append({"value": s, "sort": 1}) elif Math.is_integer(s): output.append({"value": OffsetOp("offset", s), "sort": 1}) elif all(d in sort_direction for d in s.values()) and not s.sort and not s.value: for v, d in s.items(): output.append({"value": jx_expression(v), "sort": -1}) else: output.append({ "value": jx_expression(coalesce(s.value, s.field)), "sort": coalesce(sort_direction[s.sort], 1) }) return output
def wrap(query, schema=None): """ NORMALIZE QUERY SO IT CAN STILL BE JSON """ if isinstance(query, QueryOp) or query == None: return query query = wrap(query) output = QueryOp("from", None) output.format = query.format output.frum = wrap_from(query["from"], schema=schema) if not schema and isinstance(output.frum, Schema): schema = output.frum if query.select: output.select = _normalize_selects(query.select, query.frum, schema=schema) else: if query.edges or query.groupby: output.select = Dict(name="count", value=jx_expression("."), aggregate="count", default=0) else: output.select = _normalize_selects(".", query["from"]) if query.groupby and query.edges: Log.error( "You can not use both the `groupby` and `edges` clauses in the same query!" ) elif query.edges: output.edges = _normalize_edges(query.edges, schema=schema) output.groupby = Null elif query.groupby: output.edges = Null output.groupby = _normalize_groupby(query.groupby, schema=schema) else: output.edges = Null output.groupby = Null output.where = _normalize_where(query.where, schema=schema) output.window = [_normalize_window(w) for w in listwrap(query.window)] output.having = None output.sort = _normalize_sort(query.sort) output.limit = Math.min(MAX_LIMIT, coalesce(query.limit, DEFAULT_LIMIT)) if not Math.is_integer(output.limit) or output.limit < 0: Log.error("Expecting limit >= 0") output.isLean = query.isLean return output
def _convert_query(self, query): # if not isinstance(query["from"], Container): # Log.error('Expecting from clause to be a Container') query = wrap(query) output = Query() output["from"] = self._convert_from(query["from"]) output.format = query.format if query.select: output.select = convert_list(self._convert_select, query.select) else: if query.edges or query.groupby: output.select = {"name": "count", "value": ".", "aggregate": "count"} else: output.select = {"name": "__all__", "value": "*", "aggregate": "none"} if query.groupby and query.edges: Log.error("You can not use both the `groupby` and `edges` clauses in the same query!") elif query.edges: output.edges = convert_list(self._convert_edge, query.edges) output.groupby = None elif query.groupby: output.edges = None output.groupby = convert_list(self._convert_group, query.groupby) else: output.edges = [] output.groupby = None output.where = self.convert(query.where) output.window = convert_list(self._convert_window, query.window) output.sort = self._convert_sort(query.sort) output.limit = coalesce(query.limit, DEFAULT_LIMIT) if not Math.is_integer(output.limit) or output.limit < 0: Log.error("Expecting limit >= 0") output.isLean = query.isLean # DEPTH ANALYSIS - LOOK FOR COLUMN REFERENCES THAT MAY BE DEEPER THAN # THE from SOURCE IS. vars = get_all_vars(output, exclude_where=True) # WE WILL EXCLUDE where VARIABLES for c in query.columns: if c.name in vars and c.nested_path: Log.error("This query, with variable {{var_name}} is too deep", var_name=c.name) output.having = convert_list(self._convert_having, query.having) return output
def _normalize_sort(sort=None): """ CONVERT SORT PARAMETERS TO A NORMAL FORM SO EASIER TO USE """ if not sort: return DictList.EMPTY output = DictList() for s in listwrap(sort): if isinstance(s, basestring) or Math.is_integer(s): output.append({"field": s, "sort": 1}) else: output.append({"field": coalesce(s.field, s.value), "sort": coalesce(sort_direction[s.sort], 1)}) return wrap(output)
def wrap(query, schema=None): """ NORMALIZE QUERY SO IT CAN STILL BE JSON """ if isinstance(query, QueryOp) or query == None: return query query = wrap(query) output = QueryOp("from", None) output.format = query.format output.frum = wrap_from(query["from"], schema=schema) if not schema and isinstance(output.frum, Schema): schema = output.frum if query.select: output.select = _normalize_selects(query.select, query.frum, schema=schema) else: if query.edges or query.groupby: output.select = Dict(name="count", value=jx_expression("."), aggregate="count", default=0) else: output.select = _normalize_selects(".", query["from"]) if query.groupby and query.edges: Log.error("You can not use both the `groupby` and `edges` clauses in the same query!") elif query.edges: output.edges = _normalize_edges(query.edges, schema=schema) output.groupby = Null elif query.groupby: output.edges = Null output.groupby = _normalize_groupby(query.groupby, schema=schema) else: output.edges = Null output.groupby = Null output.where = _normalize_where(query.where, schema=schema) output.window = [_normalize_window(w) for w in listwrap(query.window)] output.having = None output.sort = _normalize_sort(query.sort) output.limit = Math.min(MAX_LIMIT, coalesce(query.limit, DEFAULT_LIMIT)) if not Math.is_integer(output.limit) or output.limit < 0: Log.error("Expecting limit >= 0") output.isLean = query.isLean return output
def _normalize_sort(sort=None): """ CONVERT SORT PARAMETERS TO A NORMAL FORM SO EASIER TO USE """ if not sort: return DictList.EMPTY output = DictList() for s in listwrap(sort): if isinstance(s, basestring) or Math.is_integer(s): output.append({"value": s, "sort": 1}) elif list(set(s.values()))[0] == "desc" and not s.sort and not s.value: for v, d in s.items(): output.append({"value": v, "sort": -1}) else: output.append({"value": coalesce(s.value, s.field), "sort": coalesce(sort_direction[s.sort], 1)}) return wrap(output)
def normalize_sort(sort=None): """ CONVERT SORT PARAMETERS TO A NORMAL FORM SO EASIER TO USE """ if not sort: return DictList.EMPTY output = DictList() for s in listwrap(sort): if isinstance(s, basestring) or Math.is_integer(s): output.append({"value": s, "sort": 1}) elif not s.field and not s.value and s.sort==None: #ASSUME {name: sort} FORM for n, v in s.items(): output.append({"value": n, "sort": sort_direction[v]}) else: output.append({"value": coalesce(s.field, s.value), "sort": coalesce(sort_direction[s.sort], 1)}) return wrap(output)
def _normalize_sort(sort=None): """ CONVERT SORT PARAMETERS TO A NORMAL FORM SO EASIER TO USE """ if sort==None: return DictList.EMPTY output = DictList() for s in listwrap(sort): if isinstance(s, basestring): output.append({"value": jx_expression(s), "sort": 1}) elif Math.is_integer(s): output.append({"value": OffsetOp("offset", s), "sort": 1}) elif all(d in sort_direction for d in s.values()) and not s.sort and not s.value: for v, d in s.items(): output.append({"value": jx_expression(v), "sort": -1}) else: output.append({"value": jx_expression(coalesce(s.value, s.field)), "sort": coalesce(sort_direction[s.sort], 1)}) return output
def get_last_updated(es): try: results_max = es.search({ "query": {"match_all": {}}, "from": 0, "size": 1, "sort": {config.primary_field: "desc"} }) max_ = results_max.hits.hits[0]._source[config.primary_field] if isinstance(max_, unicode): pass elif Math.is_integer(max_): max_ = int(max_) return max_ except Exception, e: Log.warning("Can not get_last_updated from {{host}}/{{index}}", { "host": es.settings.host, "index": es.settings.index }, e) return None
def normalize_sort(sort=None): """ CONVERT SORT PARAMETERS TO A NORMAL FORM SO EASIER TO USE """ if not sort: return DictList.EMPTY output = DictList() for s in listwrap(sort): if isinstance(s, basestring) or Math.is_integer(s): output.append({"value": s, "sort": 1}) elif not s.field and not s.value and s.sort == None: #ASSUME {name: sort} FORM for n, v in s.items(): output.append({"value": n, "sort": sort_direction[v]}) else: output.append({ "value": coalesce(s.field, s.value), "sort": coalesce(sort_direction[s.sort], 1) }) return wrap(output)
def _cpython_value2date(*args): try: if len(args) == 1: a0 = args[0] if isinstance(a0, (datetime, date)): output = a0 elif isinstance(a0, Date): output = a0.value elif isinstance(a0, (int, long, float, Decimal)): if a0 == 9999999999000: # PYPY BUG https://bugs.pypy.org/issue1697 output = Date.MAX elif a0 > 9999999999: # WAY TOO BIG IF IT WAS A UNIX TIMESTAMP output = datetime.utcfromtimestamp(a0 / 1000) else: output = datetime.utcfromtimestamp(a0) elif isinstance(a0, basestring) and len(a0) in [9, 10, 12, 13] and Math.is_integer(a0): a0 = long(a0) if a0 == 9999999999000: # PYPY BUG https://bugs.pypy.org/issue1697 output = Date.MAX elif a0 > 9999999999: # WAY TOO BIG IF IT WAS A UNIX TIMESTAMP output = datetime.utcfromtimestamp(a0 / 1000) else: output = datetime.utcfromtimestamp(a0) elif isinstance(a0, basestring): output = unicode2datetime(a0) else: output = datetime(*args) else: if isinstance(args[0], basestring): output = unicode2datetime(*args) else: output = datetime(*args) return output except Exception, e: from pyLibrary.debugs.logs import Log Log.error("Can not convert {{args}} to Date", args=args, cause=e)
def _cpython_value2date(*args): try: if len(args) == 1: a0 = args[0] if isinstance(a0, (datetime, date)): output = a0 elif isinstance(a0, Date): output = a0.value elif isinstance(a0, (int, long, float, Decimal)): if a0 == 9999999999000: # PYPY BUG https://bugs.pypy.org/issue1697 output = Date.MAX elif a0 > 9999999999: # WAY TOO BIG IF IT WAS A UNIX TIMESTAMP output = datetime.utcfromtimestamp(a0 / 1000) else: output = datetime.utcfromtimestamp(a0) elif isinstance(a0, basestring) and len(a0) in [9, 10, 12, 13] and Math.is_integer(a0): a0 = long(a0) if a0 == 9999999999000: # PYPY BUG https://bugs.pypy.org/issue1697 output = Date.MAX elif a0 > 9999999999: # WAY TOO BIG IF IT WAS A UNIX TIMESTAMP output = datetime.utcfromtimestamp(a0 / 1000) else: output = datetime.utcfromtimestamp(a0) elif isinstance(a0, basestring): output = unicode2datetime(a0) else: output = datetime(*args) else: if isinstance(args[0], basestring): output = unicode2datetime(*args) else: output = datetime(*args) return output except Exception, e: from pyLibrary.debugs.logs import Log Log.error("Can not convert {{args}} to Date", args= args, cause=e)
def _get_from_hg(self, revision, locale=None): rev = revision.changeset.id if len(rev) < 12 and Math.is_integer(rev): rev = ("0" * (12 - len(rev))) + rev revision.branch = self.branches[revision.branch.name.lower(), coalesce(locale, DEFAULT_LOCALE)] url = revision.branch.url.rstrip("/") + "/json-info?node=" + rev try: Log.note("Reading details from {{url}}", {"url": url}) response = self._get_and_retry(url) revs = convert.json2value(response.all_content.decode("utf8")) if revs.startswith("unknown revision "): Log.error(revs) if len(revs.keys()) != 1: Log.error("Do not know how to handle") r = list(revs.values())[0] output = Revision( branch=revision.branch, index=r.rev, changeset=Changeset(id=r.node, author=r.user, description=r.description, date=Date(r.date), files=r.files), parents=r.parents, children=r.children, ) return output except Exception, e: Log.error("Can not get revision info from {{url}}", {"url": url}, e)
def _get_from_hg(self, revision, locale=None): rev = revision.changeset.id if len(rev) < 12 and Math.is_integer(rev): rev = ("0" * (12 - len(rev))) + rev revision.branch = self.branches[revision.branch.name.lower(), coalesce(locale, DEFAULT_LOCALE)] url = revision.branch.url.rstrip("/") + "/json-info?node=" + rev try: Log.note("Reading details from {{url}}", {"url": url}) response = self._get_and_retry(url) revs = convert.json2value(response.all_content.decode("utf8")) if revs.startswith("unknown revision "): Log.error(revs) if len(revs.keys()) != 1: Log.error("Do not know how to handle") r = list(revs.values())[0] output = Revision( branch=revision.branch, index=r.rev, changeset=Changeset( id=r.node, author=r.user, description=r.description, date=Date(r.date), files=r.files ), parents=r.parents, children=r.children, ) return output except Exception, e: Log.error("Can not get revision info from {{url}}", {"url": url}, e)
def get_pending(source, since, pending_bugs, please_stop): try: while not please_stop: if since == None: Log.note("Get all records") result = source.search({ # "query": {"match_all": {}}, "query": { "filtered": { "filter": {"exists": {"field": config.primary_field}}, "query": {"match_all": {}} }}, "fields": ["_id", config.primary_field], "from": 0, "size": BATCH_SIZE, "sort": [config.primary_field] }) else: Log.note( "Get records with {{primary_field}} >= {{max_time|datetime}}", primary_field=config.primary_field, max_time=since ) result = source.search({ "query": {"filtered": { "query": {"match_all": {}}, "filter": {"range": {config.primary_field: {"gte": since}}}, }}, "fields": ["_id", config.primary_field], "from": 0, "size": BATCH_SIZE, "sort": [config.primary_field] }) new_max_value = MAX([unwraplist(h.fields[literal_field(config.primary_field)]) for h in result.hits.hits]) if since == new_max_value: # GET ALL WITH THIS TIMESTAMP result = source.search({ "query": {"filtered": { "query": {"match_all": {}}, "filter": {"term": {config.primary_field: since}}, }}, "fields": ["_id", config.primary_field], "from": 0, "size": 100000 }) if Math.is_integer(new_max_value): since = int(new_max_value) + 1 elif Math.is_number(new_max_value): since = float(new_max_value) + 0.5 else: since = unicode(new_max_value) + "a" else: since = new_max_value ids = result.hits.hits._id Log.note("Adding {{num}} to pending queue", num=len(ids)) pending_bugs.extend(ids) if len(result.hits.hits) < BATCH_SIZE: break Log.note("No more ids") except Exception, e: please_stop.go() Log.error("Problem while copying records", cause=e)
def __init__(self, query, schema=None): """ NORMALIZE QUERY SO IT CAN STILL BE JSON """ if isinstance(query, Query) or query == None: return object.__init__(self) query = wrap(query) self.format = query.format self.frum = wrap_from(query["from"], schema=schema) select = query.select if isinstance(select, list): names = set() new_select = [] for s in select: ns = _normalize_select(s, schema=schema) if ns.name in names: Log.error("two select have the same name") names.add(ns.name) new_select.append(unwrap(ns)) self.select = wrap(new_select) elif select: self.select = _normalize_select(select, schema=schema) else: if query.edges or query.groupby: self.select = Dict(name="count", value=".", aggregate="count") else: self.select = Dict(name=".", value=".", aggregate="none") if query.groupby and query.edges: Log.error("You can not use both the `groupby` and `edges` clauses in the same query!") elif query.edges: self.edges = _normalize_edges(query.edges, schema=schema) self.groupby = None elif query.groupby: self.edges = None self.groupby = _normalize_groupby(query.groupby, schema=schema) else: self.edges = [] self.groupby = None self.where = _normalize_where(query.where, schema=schema) self.window = [_normalize_window(w) for w in listwrap(query.window)] self.having = None self.sort = _normalize_sort(query.sort) self.limit = Math.min(MAX_LIMIT, coalesce(query.limit, DEFAULT_LIMIT)) if not Math.is_integer(self.limit) or self.limit < 0: Log.error("Expecting limit >= 0") self.isLean = query.isLean # DEPTH ANALYSIS - LOOK FOR COLUMN REFERENCES THAT MAY BE DEEPER THAN # THE from SOURCE IS. # TODO: IGNORE REACHING INTO THE NON-NESTED TYPES if isinstance(self.frum, list): if not qb: _late_import() columns = qb.get_columns(self.frum) elif isinstance(self.frum, Container): columns = self.frum.get_columns(table=self.frum.name) else: columns = [] query_path = coalesce(self.frum.query_path, ".") vars = query_get_all_vars(self, exclude_where=True) # WE WILL EXCLUDE where VARIABLES for c in columns: if c.name in vars and not query_path.startswith(coalesce(listwrap(c.nested_path)[0], "")): Log.error("This query, with variable {{var_name}} is too deep", var_name=c.name)
def _normalize_job_result(self, branch, revision, job, details, notes, stars): output = Dict() try: job = wrap(copy(job)) # ORGANIZE PROPERTIES output.build.architecture = _scrub(job, "build_architecture") output.build.os = _scrub(job, "build_os") output.build.platform = _scrub(job, "build_platform") output.build.type = _scrub(job, "platform_option") output.build_system_type = _scrub(job, "build_system_type") output.job.id = _scrub(job, "id") output.job.guid = _scrub(job, "job_guid") if job.job_group_symbol != "?": output.job.group.name = _scrub(job, "job_group_name") output.job.group.description = _scrub(job, "job_group_description") output.job.group.symbol = _scrub(job, "job_group_symbol") else: job.job_group_name = None job.job_group_description = None job.job_group_symbol = None output.job.type.description = _scrub(job, "job_type_description") output.job.type.name = _scrub(job, "job_type_name") output.job.type.symbol = _scrub(job, "job_type_symbol") output.ref_data_name = _scrub(job, "ref_data_name") output.machine.name = _scrub(job, "machine_name") if Math.is_integer(output.machine.name.split("-")[-1]): output.machine.pool = "-".join(output.machine.name.split("-")[:-1]) output.machine.platform = _scrub(job, "machine_platform_architecture") output.machine.os = _scrub(job, "machine_platform_os") output.job.reason = _scrub(job, "reason") output.job.state = _scrub(job, "state") output.job.tier = _scrub(job, "tier") output.job.who = _scrub(job, "who") output.job.result = _scrub(job, "result") fcid = _scrub(job, "failure_classification_id") if fcid not in [0, 1]: # 0 is unknown, and 1 is "not classified" output.job.failure_classification = self.failure_classification.get(fcid) if job.result_set: output.repo.push_date = job.result_set.push_timestamp output.repo.branch = self.repo[job.result_set.repository_id] output.repo.revision = job.result_set.revision else: output.repo.branch = branch output.repo.revision = revision output.repo.revision12=revision[:12] output.job.timing.submit = Date(_scrub(job, "submit_timestamp")) output.job.timing.start = Date(_scrub(job, "start_timestamp")) output.job.timing.end = Date(_scrub(job, "end_timestamp")) output.job.timing.last_modified = Date(_scrub(job, "last_modified")) # IGNORED job.job_group_id = None job.job_type_id = None job.result_set = None job.build_platform_id = None job.job_coalesced_to_guid = None job.option_collection_hash = None job.platform = None job.result_set_id = None job.running_eta = None job.signature = None if job.keys(): Log.error("{{names|json}} are not used", names=job.keys()) # ATTACH DETAILS (AND SCRUB OUT REDUNDANT VALUES) output.details = details.get(output.job.guid, Null) for d in output.details: d.job_guid = None d.job_id = None output.task.id = coalesce(*map(_extract_task_id, output.details.url)) # ATTACH NOTES (RESOLVED BY BUG...) for n in notes.get(output.job.id, Null): note = coalesce(n.note.strip(), n.text.strip()) if note: # LOOK UP REVISION IN REPO fix = re.findall(r'[0-9A-Fa-f]{12}', note) if fix: rev = self.hg.get_revision(Dict( changeset={"id": fix[0]}, branch={"name": branch} )) n.revision = rev.changeset.id n.bug_id = self.hg._extract_bug_id(rev.changeset.description) else: note = None output.notes += [{ "note": note, "status": coalesce(n.active_status, n.status), "revision": n.revision, "bug_id": n.bug_id, "who": n.who, "failure_classification": self.failure_classification[n.failure_classification_id], "timestamp": Date(coalesce(n.note_timestamp, n.timestamp, n.created)) }] # ATTACH STAR INFO for s in stars.get(output.job.id, Null): # LOOKUP BUG DETAILS output.stars += [{ "bug_id": s.bug_id, "who": s.who, "timestamp": s.submit_timestamp }] output.etl = {"timestamp": Date.now()} return output except Exception, e: Log.error("Problem with normalization of job {{job_id}}", job_id=coalesce(output.job.id, job.id), cause=e)
def __init__(self, query, schema=None): """ NORMALIZE QUERY SO IT CAN STILL BE JSON """ if isinstance(query, Query): return object.__init__(self) query = wrap(query) max_depth = 1 self.format = query.format self.frum = wrap_from(query["from"], schema=schema) select = query.select if isinstance(select, list): names = set() new_select = [] for s in select: ns = _normalize_select(s, schema=schema) if ns.name in names: Log.error("two select have the same name") names.add(ns.name) new_select.append(unwrap(ns)) self.select = wrap(new_select) elif select: self.select = _normalize_select(select, schema=schema) else: if query.edges or query.groupby: self.select = {"name": "count", "value": ".", "aggregate": "count"} else: self.select = {"name": "__all__", "value": "*", "aggregate": "none"} if query.groupby and query.edges: Log.error("You can not use both the `groupby` and `edges` clauses in the same query!") elif query.edges: self.edges = _normalize_edges(query.edges, schema=schema) self.groupby = None elif query.groupby: self.edges = None self.groupby = _normalize_groupby(query.groupby, schema=schema) else: self.edges = [] self.groupby = None self.where = _normalize_where(query.where, schema=schema) self.window = [_normalize_window(w) for w in listwrap(query.window)] self.sort = _normalize_sort(query.sort) self.limit = coalesce(query.limit, DEFAULT_LIMIT) if not Math.is_integer(self.limit) or self.limit < 0: Log.error("Expecting limit >= 0") self.isLean = query.isLean # DEPTH ANALYSIS - LOOK FOR COLUMN REFERENCES THAT MAY BE DEEPER THAN # THE from SOURCE IS. # TODO: IGNORE REACHING INTO THE NON-NESTED TYPES if isinstance(self.frum, list): if not qb: _late_import() columns = qb.get_columns(self.frum) elif isinstance(self.frum, Container): columns = self.frum.get_columns() else: columns = [] vars = get_all_vars(self) for c in columns: if c.name in vars and c.depth: Log.error("This query, with variable {{var_name}} looks too deep", )
def _normalize_job_result(self, branch, revision, job, details, notes, stars): output = Dict() try: job = wrap(copy(job)) # ORGANIZE PROPERTIES output.build.architecture = _scrub(job, "build_architecture") output.build.os = _scrub(job, "build_os") output.build.platform = _scrub(job, "build_platform") output.build.type = _scrub(job, "platform_option") output.build_system_type = _scrub(job, "build_system_type") output.job.id = _scrub(job, "id") output.job.guid = _scrub(job, "job_guid") if job.job_group_symbol != "?": output.job.group.name = _scrub(job, "job_group_name") output.job.group.description = _scrub(job, "job_group_description") output.job.group.symbol = _scrub(job, "job_group_symbol") else: job.job_group_name = None job.job_group_description = None job.job_group_symbol = None output.job.type.description = _scrub(job, "job_type_description") output.job.type.name = _scrub(job, "job_type_name") output.job.type.symbol = _scrub(job, "job_type_symbol") output.ref_data_name = _scrub(job, "ref_data_name") output.machine.name = _scrub(job, "machine_name") if Math.is_integer(output.machine.name.split("-")[-1]): output.machine.pool = "-".join( output.machine.name.split("-")[:-1]) output.machine.platform = _scrub(job, "machine_platform_architecture") output.machine.os = _scrub(job, "machine_platform_os") output.job.reason = _scrub(job, "reason") output.job.state = _scrub(job, "state") output.job.tier = _scrub(job, "tier") output.job.who = _scrub(job, "who") output.job.result = _scrub(job, "result") fcid = _scrub(job, "failure_classification_id") if fcid not in [0, 1]: # 0 is unknown, and 1 is "not classified" output.job.failure_classification = self.failure_classification.get( fcid) if job.result_set: output.repo.push_date = job.result_set.push_timestamp output.repo.branch = self.repo[job.result_set.repository_id] output.repo.revision = job.result_set.revision else: output.repo.branch = branch output.repo.revision = revision output.repo.revision12 = revision[:12] output.job.timing.submit = Date(_scrub(job, "submit_timestamp")) output.job.timing.start = Date(_scrub(job, "start_timestamp")) output.job.timing.end = Date(_scrub(job, "end_timestamp")) output.job.timing.last_modified = Date(_scrub( job, "last_modified")) # IGNORED job.job_group_id = None job.job_type_id = None job.result_set = None job.build_platform_id = None job.job_coalesced_to_guid = None job.option_collection_hash = None job.platform = None job.result_set_id = None job.running_eta = None job.signature = None if job.keys(): Log.error("{{names|json}} are not used", names=job.keys()) # ATTACH DETAILS (AND SCRUB OUT REDUNDANT VALUES) output.details = details.get(output.job.guid, Null) for d in output.details: d.job_guid = None d.job_id = None output.task.id = coalesce( *map(_extract_task_id, output.details.url)) # ATTACH NOTES (RESOLVED BY BUG...) for n in notes.get(output.job.id, Null): note = coalesce(n.note.strip(), n.text.strip()) if note: # LOOK UP REVISION IN REPO fix = re.findall(r'[0-9A-Fa-f]{12}', note) if fix: rev = self.hg.get_revision( Dict(changeset={"id": fix[0]}, branch={"name": branch})) n.revision = rev.changeset.id n.bug_id = self.hg._extract_bug_id( rev.changeset.description) else: note = None output.notes += [{ "note": note, "status": coalesce(n.active_status, n.status), "revision": n.revision, "bug_id": n.bug_id, "who": n.who, "failure_classification": self.failure_classification[n.failure_classification_id], "timestamp": Date(coalesce(n.note_timestamp, n.timestamp, n.created)) }] # ATTACH STAR INFO for s in stars.get(output.job.id, Null): # LOOKUP BUG DETAILS output.stars += [{ "bug_id": s.bug_id, "who": s.who, "timestamp": s.submit_timestamp }] output.etl = {"timestamp": Date.now()} return output except Exception, e: Log.error("Problem with normalization of job {{job_id}}", job_id=coalesce(output.job.id, job.id), cause=e)
def _convert_query(self, query): # if not isinstance(query["from"], Container): # Log.error('Expecting from clause to be a Container') query = wrap(query) output = QueryOp("from", None) output["from"] = self._convert_from(query["from"]) output.format = query.format if query.select: output.select = convert_list(self._convert_select, query.select) else: if query.edges or query.groupby: output.select = { "name": "count", "value": ".", "aggregate": "count", "default": 0 } else: output.select = { "name": "__all__", "value": "*", "aggregate": "none" } if query.groupby and query.edges: Log.error( "You can not use both the `groupby` and `edges` clauses in the same query!" ) elif query.edges: output.edges = convert_list(self._convert_edge, query.edges) output.groupby = None elif query.groupby: output.edges = None output.groupby = convert_list(self._convert_group, query.groupby) else: output.edges = [] output.groupby = None output.where = self.convert(query.where) output.window = convert_list(self._convert_window, query.window) output.sort = self._convert_sort(query.sort) output.limit = coalesce(query.limit, DEFAULT_LIMIT) if not Math.is_integer(output.limit) or output.limit < 0: Log.error("Expecting limit >= 0") output.isLean = query.isLean # DEPTH ANALYSIS - LOOK FOR COLUMN REFERENCES THAT MAY BE DEEPER THAN # THE from SOURCE IS. vars = get_all_vars( output, exclude_where=True) # WE WILL EXCLUDE where VARIABLES for c in query.columns: if c.name in vars and c.nested_path: Log.error("This query, with variable {{var_name}} is too deep", var_name=c.name) output.having = convert_list(self._convert_having, query.having) return output
payload.locale = None payload.logurl = None payload.os = None payload.platform = None payload.product = None payload.release = None payload.revision = None payload.slave = None payload.status = None payload.talos = None payload.test = None payload.timestamp = None payload.tree = None path = output.run.suite.split("-") if Math.is_integer(path[-1]): output.run.chunk = int(path[-1]) output.run.suite = "-".join(path[:-1]) output.run.files = [ {"name": name, "url": url} for name, url in output.run.files.items() if filename is None or name == filename ] try: rev = Revision(branch={"name": output.build.branch}, changeset=Changeset(id=output.build.revision)) output.repo = resources.hg.get_revision(rev, output.build.locale.replace("en-US", DEFAULT_LOCALE)) except Exception, e: Log.warning("Can not get revision for\n{{details|json|indent}}", details=output, cause=e) # resources.hg.find_changeset(output.build.revision)