def _upgrade(): global _upgraded global _sqlite3 try: import sys import platform if "windows" in platform.system().lower(): original_dll = File.new_instance(sys.exec_prefix, "dlls/sqlite3.dll") if platform.architecture()[0] == "32bit": source_dll = File( "vendor/pyLibrary/vendor/sqlite/sqlite3_32.dll") else: source_dll = File( "vendor/pyLibrary/vendor/sqlite/sqlite3_64.dll") if not all(a == b for a, b in zip_longest( source_dll.read_bytes(), original_dll.read_bytes())): original_dll.backup() File.copy(source_dll, original_dll) else: pass except Exception as e: Log.warning("could not upgrade python's sqlite", cause=e) import sqlite3 as _sqlite3 _ = _sqlite3 _upgraded = True
def __gt__(self, other): other = Version(other) for s, o in zip_longest(self.version, other.version): if s is None and o is not None: return False elif s is not None and o is None: return True elif s < o: return False elif s > o: return True return False
def __eq__(self, other): if other == None: return not self.__bool__() elif is_text(other): try: return "".join(self) == other except Exception as e: return False elif is_many(other): return all(s == o for s, o in zip_longest(self, other)) elif self.length() == 1: return self[0] == other elif not self: return False else: Log.error("do not know how to handle")
def format(self): yield from emit_lines(self.decorator_list) yield "def" yield SPACE yield self.node.name yield "(" yield from format_comment(self.line_comment) for a, d in zip_longest(self.args.args, self.args.defaults): yield a.arg if d: yield "=" yield from d.format() yield "):" yield from format_comment(self.line_comment) yield CR yield from indent_body(self.body)
def _upgrade(): global _upgraded global sqlite3 try: import sys Log.error("Fix to work with 64bit windows too") original_dll = File.new_instance(sys.exec_prefix, "dlls/sqlite3.dll") source_dll = File("vendor/pyLibrary/vendor/sqlite/sqlite3.dll") if not all(a == b for a, b in zip_longest(source_dll.read_bytes(), original_dll.read_bytes())): backup = original_dll.backup() File.copy(source_dll, original_dll) except Exception as e: Log.warning("could not upgrade python's sqlite", cause=e) import sqlite3 _ = sqlite3 _upgraded = True
def apply_diff(text, diff, reverse=False, verify=True): """ SOME EXAMPLES OF diff #@@ -1 +1 @@ #-before china goes live, the content team will have to manually update the settings for the china-ready apps currently in marketplace. #+before china goes live (end January developer release, June general audience release) , the content team will have to manually update the settings for the china-ready apps currently in marketplace. @@ -0,0 +1,3 @@ +before china goes live, the content team will have to manually update the settings for the china-ready apps currently in marketplace. + +kward has the details. @@ -1 +1 @@ -before china goes live (end January developer release, June general audience release), the content team will have to manually update the settings for the china-ready apps currently in marketplace. +before china goes live , the content team will have to manually update the settings for the china-ready apps currently in marketplace. @@ -3 +3 ,6 @@ -kward has the details.+kward has the details. + +Target Release Dates : +https://mana.mozilla.org/wiki/display/PM/Firefox+OS+Wave+Launch+Cross+Functional+View + +Content Team Engagement & Tasks : https://appreview.etherpad.mozilla.org/40 """ output = text if not diff: return output start_of_hunk = 0 while True: if start_of_hunk >= len(diff): break header = diff[start_of_hunk] start_of_hunk += 1 if not header.strip(): continue matches = DIFF_PREFIX.match(header.strip()) if not matches: if not _Log: _late_import() _Log.error("Can not handle \n---\n{{diff}}\n---\n", diff=diff) remove = tuple(int(i.strip()) for i in matches.group(1).split( ",")) # EXPECTING start_line, length TO REMOVE remove = Data( start=remove[0], length=1 if len(remove) == 1 else remove[1]) # ASSUME FIRST LINE add = tuple(int(i.strip()) for i in matches.group(2).split( ",")) # EXPECTING start_line, length TO ADD add = Data(start=add[0], length=1 if len(add) == 1 else add[1]) if remove.start == 0 and remove.length == 0: remove.start = add.start if add.start == 0 and add.length == 0: add.start = remove.start if remove.start != add.start: if not _Log: _late_import() _Log.warning("Do not know how to handle") def repair_hunk(diff): # THE LAST DELETED LINE MAY MISS A "\n" MEANING THE FIRST # ADDED LINE WILL BE APPENDED TO THE LAST DELETED LINE # EXAMPLE: -kward has the details.+kward has the details. # DETECT THIS PROBLEM FOR THIS HUNK AND FIX THE DIFF problem_line = diff[start_of_hunk + remove.length - 1] if reverse: if add.length == 0: return diff first_added_line = output[add.start - 1] if problem_line.endswith('+' + first_added_line): split_point = len(problem_line) - len(first_added_line) - 1 else: return diff else: if remove.length == 0: return diff last_removed_line = output[remove.start - 1] if problem_line.startswith('-' + last_removed_line + "+"): split_point = len(last_removed_line) + 1 else: return diff new_diff = ( diff[:start_of_hunk + remove.length - 1] + [problem_line[:split_point], problem_line[split_point:]] + diff[start_of_hunk + remove.length:]) return new_diff diff = repair_hunk(diff) diff = [d for d in diff if d != "\\ no newline at end of file"] # ANOTHER REPAIR if reverse: new_output = (output[:add.start - 1] + [ d[1:] for d in diff[start_of_hunk:start_of_hunk + remove.length] ] + output[add.start + add.length - 1:]) else: # APPLYING DIFF FORWARD REQUIRES WE APPLY THE HUNKS IN REVERSE TO GET THE LINE NUMBERS RIGHT? new_output = (output[:remove.start - 1] + [ d[1:] for d in diff[start_of_hunk + remove.length:start_of_hunk + remove.length + add.length] ] + output[remove.start + remove.length - 1:]) start_of_hunk += remove.length + add.length output = new_output if verify: original = apply_diff(output, diff, not reverse, False) if any(t != o for t, o in zip_longest(text, original)): if not _Log: _late_import() _Log.error("logical verification check failed") return output
def assertAlmostEqual(test, expected, digits=None, places=None, msg=None, delta=None): show_detail = True test = unwrap(test) expected = unwrap(expected) try: if test is None and expected is None: return elif test is expected: return elif is_text(expected): assertAlmostEqualValue(test, expected, msg=msg, digits=digits, places=places, delta=delta) elif isinstance(test, UniqueIndex): if test ^ expected: Log.error("Sets do not match") elif is_data(expected) and is_data(test): for k, v2 in unwrap(expected).items(): v1 = test.get(k) assertAlmostEqual(v1, v2, msg=msg, digits=digits, places=places, delta=delta) elif is_data(expected): for k, v2 in expected.items(): if is_text(k): v1 = mo_dots.get_attr(test, literal_field(k)) else: v1 = test[k] assertAlmostEqual(v1, v2, msg=msg, digits=digits, places=places, delta=delta) elif is_container(test) and isinstance(expected, set): test = set(wrap(t) for t in test) if len(test) != len(expected): Log.error( "Sets do not match, element count different:\n{{test|json|indent}}\nexpecting{{expectedtest|json|indent}}", test=test, expected=expected ) for e in expected: for t in test: try: assertAlmostEqual(t, e, msg=msg, digits=digits, places=places, delta=delta) break except Exception as _: pass else: Log.error("Sets do not match. {{value|json}} not found in {{test|json}}", value=e, test=test) elif isinstance(expected, types.FunctionType): return expected(test) elif hasattr(test, "__iter__") and hasattr(expected, "__iter__"): if test.__class__.__name__ == "ndarray": # numpy test = test.tolist() elif test.__class__.__name__ == "DataFrame": # pandas test = test[test.columns[0]].values.tolist() elif test.__class__.__name__ == "Series": # pandas test = test.values.tolist() if not expected and test == None: return if expected == None: expected = [] # REPRESENT NOTHING for a, b in zip_longest(test, expected): assertAlmostEqual(a, b, msg=msg, digits=digits, places=places, delta=delta) else: assertAlmostEqualValue(test, expected, msg=msg, digits=digits, places=places, delta=delta) except Exception as e: Log.error( "{{test|json|limit(10000)}} does not match expected {{expected|json|limit(10000)}}", test=test if show_detail else "[can not show]", expected=expected if show_detail else "[can not show]", cause=e )
def apply_diff(text, diff, reverse=False, verify=True): """ SOME EXAMPLES OF diff #@@ -1 +1 @@ #-before china goes live, the content team will have to manually update the settings for the china-ready apps currently in marketplace. #+before china goes live (end January developer release, June general audience release) , the content team will have to manually update the settings for the china-ready apps currently in marketplace. @@ -0,0 +1,3 @@ +before china goes live, the content team will have to manually update the settings for the china-ready apps currently in marketplace. + +kward has the details. @@ -1 +1 @@ -before china goes live (end January developer release, June general audience release), the content team will have to manually update the settings for the china-ready apps currently in marketplace. +before china goes live , the content team will have to manually update the settings for the china-ready apps currently in marketplace. @@ -3 +3 ,6 @@ -kward has the details.+kward has the details. + +Target Release Dates : +https://mana.mozilla.org/wiki/display/PM/Firefox+OS+Wave+Launch+Cross+Functional+View + +Content Team Engagement & Tasks : https://appreview.etherpad.mozilla.org/40 """ if not diff: return text output = text hunks = [ (new_diff[start_hunk], new_diff[start_hunk + 1:end_hunk]) for new_diff in [[ d.lstrip() for d in diff if d.lstrip() and d != "\\ No newline at end of file" ] + ["@@"]] # ANOTHER REPAIR for start_hunk, end_hunk in pairwise( i for i, l in enumerate(new_diff) if l.startswith("@@")) ] for header, hunk_body in reversed(hunks) if reverse else hunks: matches = DIFF_PREFIX.match(header.strip()) if not matches: if not _Log: _late_import() _Log.error("Can not handle \n---\n{{diff}}\n---\n", diff=diff) removes = tuple(int(i.strip()) for i in matches.group(1).split( ",")) # EXPECTING start_line, length TO REMOVE remove = Data( start=removes[0], length=1 if len(removes) == 1 else removes[1]) # ASSUME FIRST LINE adds = tuple(int(i.strip()) for i in matches.group(2).split( ",")) # EXPECTING start_line, length TO ADD add = Data(start=adds[0], length=1 if len(adds) == 1 else adds[1]) if add.length == 0 and add.start == 0: add.start = remove.start def repair_hunk(hunk_body): # THE LAST DELETED LINE MAY MISS A "\n" MEANING THE FIRST # ADDED LINE WILL BE APPENDED TO THE LAST DELETED LINE # EXAMPLE: -kward has the details.+kward has the details. # DETECT THIS PROBLEM FOR THIS HUNK AND FIX THE DIFF if reverse: last_lines = [ o for b, o in zip(reversed(hunk_body), reversed(output)) if b != "+" + o ] if not last_lines: return hunk_body last_line = last_lines[0] for problem_index, problem_line in enumerate(hunk_body): if problem_line.startswith("-") and problem_line.endswith( "+" + last_line): split_point = len(problem_line) - (len(last_line) + 1) break elif problem_line.startswith("+" + last_line + "-"): split_point = len(last_line) + 1 break else: return hunk_body else: if not output: return hunk_body last_line = output[-1] for problem_index, problem_line in enumerate(hunk_body): if problem_line.startswith("+") and problem_line.endswith( "-" + last_line): split_point = len(problem_line) - (len(last_line) + 1) break elif problem_line.startswith("-" + last_line + "+"): split_point = len(last_line) + 1 break else: return hunk_body new_hunk_body = ( hunk_body[:problem_index] + [problem_line[:split_point], problem_line[split_point:]] + hunk_body[problem_index + 1:]) return new_hunk_body hunk_body = repair_hunk(hunk_body) if reverse: new_output = (output[:add.start - 1] + [d[1:] for d in hunk_body if d and d[0] == "-"] + output[add.start + add.length - 1:]) else: new_output = (output[:add.start - 1] + [d[1:] for d in hunk_body if d and d[0] == "+"] + output[add.start + remove.length - 1:]) output = new_output if verify: original = apply_diff(output, diff, not reverse, False) if set(text) != set(original): # bugzilla-etl diffs are a jumble for t, o in zip_longest(text, original): if t in ["reports: https://goo.gl/70o6w6\r"]: break # KNOWN INCONSISTENCIES if t != o: if not _Log: _late_import() _Log.error("logical verification check failed") break return output
def es_deepop(es, query): schema = query.frum.schema query_path = schema.query_path[0] # TODO: FIX THE GREAT SADNESS CAUSED BY EXECUTING post_expressions # THE EXPRESSIONS SHOULD BE PUSHED TO THE CONTAINER: ES ALLOWS # {"inner_hit":{"script_fields":[{"script":""}...]}}, BUT THEN YOU # LOOSE "_source" BUT GAIN "fields", FORCING ALL FIELDS TO BE EXPLICIT post_expressions = {} es_query, es_filters = es_query_template(query_path) # SPLIT WHERE CLAUSE BY DEPTH wheres = split_expression_by_depth(query.where, schema) for f, w in zip_longest(es_filters, wheres): script = ES52[AndOp(w)].partial_eval().to_esfilter(schema) set_default(f, script) if not wheres[1]: # INCLUDE DOCS WITH NO NESTED DOCS more_filter = { "bool": { "filter": [AndOp(wheres[0]).partial_eval().to_esfilter(schema)], "must_not": { "nested": { "path": query_path, "query": MATCH_ALL } } } } else: more_filter = None es_query.size = coalesce(query.limit, DEFAULT_LIMIT) map_to_es_columns = schema.map_to_es() query_for_es = query.map(map_to_es_columns) es_query.sort = jx_sort_to_es_sort(query_for_es.sort, schema) es_query.stored_fields = [] is_list = is_list_(query.select) selects = wrap([unwrap(s.copy()) for s in listwrap(query.select)]) new_select = FlatList() put_index = 0 for select in selects: if is_op(select.value, LeavesOp) and is_op(select.value.term, Variable): # IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS leaves = schema.leaves(select.value.term.var) col_names = set() for c in leaves: if c.nested_path[0] == ".": if c.jx_type == NESTED: continue es_query.stored_fields += [c.es_column] c_name = untype_path(relative_field(c.name, query_path)) col_names.add(c_name) new_select.append({ "name": concat_field(select.name, c_name), "nested_path": c.nested_path[0], "put": {"name": concat_field(select.name, literal_field(c_name)), "index": put_index, "child": "."}, "pull": get_pull_function(c) }) put_index += 1 # REMOVE DOTS IN PREFIX IF NAME NOT AMBIGUOUS for n in new_select: if n.name.startswith("..") and n.name.lstrip(".") not in col_names: n.put.name = n.name = n.name.lstrip(".") col_names.add(n.name) elif is_op(select.value, Variable): net_columns = schema.leaves(select.value.var) if not net_columns: new_select.append({ "name": select.name, "nested_path": ".", "put": {"name": select.name, "index": put_index, "child": "."}, "pull": NULL }) else: for n in net_columns: pull = get_pull_function(n) if n.nested_path[0] == ".": if n.jx_type == NESTED: continue es_query.stored_fields += [n.es_column] # WE MUST FIGURE OUT WHICH NAMESSPACE s.value.var IS USING SO WE CAN EXTRACT THE child for np in n.nested_path: c_name = untype_path(relative_field(n.name, np)) if startswith_field(c_name, select.value.var): # PREFER THE MOST-RELATIVE NAME child = relative_field(c_name, select.value.var) break else: continue new_select.append({ "name": select.name, "pull": pull, "nested_path": n.nested_path[0], "put": { "name": select.name, "index": put_index, "child": child } }) put_index += 1 else: expr = select.value for v in expr.vars(): for c in schema[v.var]: if c.nested_path[0] == ".": es_query.stored_fields += [c.es_column] # else: # Log.error("deep field not expected") pull_name = EXPRESSION_PREFIX + select.name map_to_local = MapToLocal(schema) pull = jx_expression_to_function(pull_name) post_expressions[pull_name] = jx_expression_to_function(expr.map(map_to_local)) new_select.append({ "name": select.name if is_list else ".", "pull": pull, "value": expr.__data__(), "put": {"name": select.name, "index": put_index, "child": "."} }) put_index += 1 es_query.stored_fields = sorted(es_query.stored_fields) # <COMPLICATED> ES needs two calls to get all documents more = [] def get_more(please_stop): more.append(es_post( es, Data( query=more_filter, stored_fields=es_query.stored_fields ), query.limit )) if more_filter: need_more = Thread.run("get more", target=get_more) with Timer("call to ES") as call_timer: data = es_post(es, es_query, query.limit) # EACH A HIT IS RETURNED MULTIPLE TIMES FOR EACH INNER HIT, WITH INNER HIT INCLUDED def inners(): for t in data.hits.hits: for i in t.inner_hits[literal_field(query_path)].hits.hits: t._inner = i._source for k, e in post_expressions.items(): t[k] = e(t) yield t if more_filter: Thread.join(need_more) for t in more[0].hits.hits: yield t # </COMPLICATED> try: formatter, groupby_formatter, mime_type = format_dispatch[query.format] output = formatter(inners(), new_select, query) output.meta.timing.es = call_timer.duration output.meta.content_type = mime_type output.meta.es_query = es_query return output except Exception as e: Log.error("problem formatting", e)
def dominator_tree(graph): """ RETURN DOMINATOR FOREST THERE ARE TWO TREES, "ROOTS" and "LOOPS" ROOTS HAVE NO PARENTS LOOPS ARE NODES THAT ARE A MEMBER OF A CYCLE THAT HAS NO EXTRNAL PARENT roots = dominator_tree(graph).get_children(ROOTS) """ todo = Queue() done = set() dominator = Tree(None) nodes = list(graph.nodes) while True: # FIGURE OUT NET ITEM TO WORK ON if todo: node = todo.pop() elif nodes: node = nodes.pop() if len(nodes) % 1000 == 0: Log.note("{{num}} nodes remaining", num=len(nodes)) else: break if node in done: continue parents = graph.get_parents(node) - {node} if not parents: # node WITHOUT parents IS A ROOT done.add(node) dominator.add_edge(Edge(ROOTS, node)) continue not_done = parents - done if not_done: # THERE ARE MORE parents TO DO FIRST more_todo = not_done - todo if not more_todo: # ALL PARENTS ARE PART OF A CYCLE, MAKE node A ROOT done.add(node) dominator.add_edge(Edge(LOOPS, node)) else: # DO THE PARENTS BEFORE node todo.push(node) for p in more_todo: todo.push(p) continue # WE CAN GET THE DOMINATORS FOR ALL parents if len(parents) == 1: # SHORTCUT dominator.add_edge(Edge(list(parents)[0], node)) done.add(node) continue paths_from_roots = [ list(reversed(dominator.get_path_to_root(p))) for p in parents ] if any(p[0] is ROOTS for p in paths_from_roots): # THIS OBJECT CAN BE REACHED FROM A ROOT, IGNORE PATHS FROM LOOPS paths_from_roots = [p for p in paths_from_roots if p[0] is ROOTS] if len(paths_from_roots) == 1: # SHORTCUT dom = paths_from_roots[0][-1] dominator.add_edge(Edge(dom, node)) done.add(node) continue # FIND COMMON PATH FROM root num_paths = len(paths_from_roots) for i, x in enumerate(zip_longest(*paths_from_roots)): if x.count(x[0]) != num_paths: dom = paths_from_roots[0][i-1] if dom is LOOPS: # CAN BE REACHED FROM MORE THAN ONE LOOP, PICK ONE TO BLAME dom = paths_from_roots[0][-1] break else: # ALL PATHS IDENTICAL dom = paths_from_roots[0][-1] dominator.add_edge(Edge(dom, node)) done.add(node) return dominator
def __eq__(self, other): if is_op(other, AndOp): return all(a == b for a, b in zip_longest(self.terms, other.terms)) return False
def apply_diff(text, diff, reverse=False, verify=True): """ SOME EXAMPLES OF diff #@@ -1 +1 @@ #-before china goes live, the content team will have to manually update the settings for the china-ready apps currently in marketplace. #+before china goes live (end January developer release, June general audience release) , the content team will have to manually update the settings for the china-ready apps currently in marketplace. @@ -0,0 +1,3 @@ +before china goes live, the content team will have to manually update the settings for the china-ready apps currently in marketplace. + +kward has the details. @@ -1 +1 @@ -before china goes live (end January developer release, June general audience release), the content team will have to manually update the settings for the china-ready apps currently in marketplace. +before china goes live , the content team will have to manually update the settings for the china-ready apps currently in marketplace. @@ -3 +3 ,6 @@ -kward has the details.+kward has the details. + +Target Release Dates : +https://mana.mozilla.org/wiki/display/PM/Firefox+OS+Wave+Launch+Cross+Functional+View + +Content Team Engagement & Tasks : https://appreview.etherpad.mozilla.org/40 """ if not diff: return text output = text hunks = [ (new_diff[start_hunk], new_diff[start_hunk+1:end_hunk]) for new_diff in [[d.lstrip() for d in diff if d.lstrip() and d != "\\ No newline at end of file"] + ["@@"]] # ANOTHER REPAIR for start_hunk, end_hunk in pairwise(i for i, l in enumerate(new_diff) if l.startswith('@@')) ] for header, hunk_body in (reversed(hunks) if reverse else hunks): matches = DIFF_PREFIX.match(header.strip()) if not matches: if not _Log: _late_import() _Log.error("Can not handle \n---\n{{diff}}\n---\n", diff=diff) removes = tuple(int(i.strip()) for i in matches.group(1).split(",")) # EXPECTING start_line, length TO REMOVE remove = Data(start=removes[0], length=1 if len(removes) == 1 else removes[1]) # ASSUME FIRST LINE adds = tuple(int(i.strip()) for i in matches.group(2).split(",")) # EXPECTING start_line, length TO ADD add = Data(start=adds[0], length=1 if len(adds) == 1 else adds[1]) if add.length == 0 and add.start == 0: add.start = remove.start def repair_hunk(hunk_body): # THE LAST DELETED LINE MAY MISS A "\n" MEANING THE FIRST # ADDED LINE WILL BE APPENDED TO THE LAST DELETED LINE # EXAMPLE: -kward has the details.+kward has the details. # DETECT THIS PROBLEM FOR THIS HUNK AND FIX THE DIFF if reverse: last_lines = [ o for b, o in zip(reversed(hunk_body), reversed(output)) if b != "+" + o ] if not last_lines: return hunk_body last_line = last_lines[0] for problem_index, problem_line in enumerate(hunk_body): if problem_line.startswith('-') and problem_line.endswith('+' + last_line): split_point = len(problem_line) - (len(last_line) + 1) break elif problem_line.startswith('+' + last_line + "-"): split_point = len(last_line) + 1 break else: return hunk_body else: if not output: return hunk_body last_line = output[-1] for problem_index, problem_line in enumerate(hunk_body): if problem_line.startswith('+') and problem_line.endswith('-' + last_line): split_point = len(problem_line) - (len(last_line) + 1) break elif problem_line.startswith('-' + last_line + "+"): split_point = len(last_line) + 1 break else: return hunk_body new_hunk_body = ( hunk_body[:problem_index] + [problem_line[:split_point], problem_line[split_point:]] + hunk_body[problem_index + 1:] ) return new_hunk_body hunk_body = repair_hunk(hunk_body) if reverse: new_output = ( output[:add.start - 1] + [d[1:] for d in hunk_body if d and d[0] == '-'] + output[add.start + add.length - 1:] ) else: new_output = ( output[:add.start - 1] + [d[1:] for d in hunk_body if d and d[0] == '+'] + output[add.start + remove.length - 1:] ) output = new_output if verify: original = apply_diff(output, diff, not reverse, False) if set(text) != set(original): # bugzilla-etl diffs are a jumble for t, o in zip_longest(text, original): if t in ['reports: https://goo.gl/70o6w6\r']: break # KNOWN INCONSISTENCIES if t != o: if not _Log: _late_import() _Log.error("logical verification check failed") break return output
def es_deepop(es, query): schema = query.frum.schema query_path = schema.query_path[0] # TODO: FIX THE GREAT SADNESS CAUSED BY EXECUTING post_expressions # THE EXPRESSIONS SHOULD BE PUSHED TO THE CONTAINER: ES ALLOWS # {"inner_hit":{"script_fields":[{"script":""}...]}}, BUT THEN YOU # LOOSE "_source" BUT GAIN "fields", FORCING ALL FIELDS TO BE EXPLICIT post_expressions = {} es_query, es_filters = es_query_template(query_path) # SPLIT WHERE CLAUSE BY DEPTH wheres = split_expression_by_depth(query.where, schema) for f, w in zip_longest(es_filters, wheres): script = ES52[AndOp(w)].partial_eval().to_esfilter(schema) set_default(f, script) if not wheres[1]: # INCLUDE DOCS WITH NO NESTED DOCS more_filter = { "bool": { "filter": [AndOp(wheres[0]).partial_eval().to_esfilter(schema)], "must_not": { "nested": { "path": query_path, "query": { "match_all": {} } } } } } else: more_filter = None es_query.size = coalesce(query.limit, DEFAULT_LIMIT) # es_query.sort = jx_sort_to_es_sort(query.sort) map_to_es_columns = schema.map_to_es() # {c.name: c.es_column for c in schema.leaves(".")} query_for_es = query.map(map_to_es_columns) es_query.sort = jx_sort_to_es_sort(query_for_es.sort, schema) es_query.stored_fields = [] is_list = is_list_(query.select) selects = wrap([unwrap(s.copy()) for s in listwrap(query.select)]) new_select = FlatList() put_index = 0 for select in selects: if is_op(select.value, LeavesOp) and is_op(select.value.term, Variable): # IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS leaves = schema.leaves(select.value.term.var) col_names = set() for c in leaves: if c.nested_path[0] == ".": if c.jx_type == NESTED: continue es_query.stored_fields += [c.es_column] c_name = untype_path(relative_field(c.name, query_path)) col_names.add(c_name) new_select.append({ "name": concat_field(select.name, c_name), "nested_path": c.nested_path[0], "put": {"name": concat_field(select.name, literal_field(c_name)), "index": put_index, "child": "."}, "pull": get_pull_function(c) }) put_index += 1 # REMOVE DOTS IN PREFIX IF NAME NOT AMBIGUOUS for n in new_select: if n.name.startswith("..") and n.name.lstrip(".") not in col_names: n.put.name = n.name = n.name.lstrip(".") col_names.add(n.name) elif is_op(select.value, Variable): net_columns = schema.leaves(select.value.var) if not net_columns: new_select.append({ "name": select.name, "nested_path": ".", "put": {"name": select.name, "index": put_index, "child": "."}, "pull": NULL }) else: for n in net_columns: pull = get_pull_function(n) if n.nested_path[0] == ".": if n.jx_type == NESTED: continue es_query.stored_fields += [n.es_column] # WE MUST FIGURE OUT WHICH NAMESSPACE s.value.var IS USING SO WE CAN EXTRACT THE child for np in n.nested_path: c_name = untype_path(relative_field(n.name, np)) if startswith_field(c_name, select.value.var): child = relative_field(c_name, select.value.var) break else: continue # REMOVED BECAUSE SELECTING INNER PROPERTIES IS NOT ALLOWED # child = relative_field(untype_path(relative_field(n.name, n.nested_path[0])), s.value.var) new_select.append({ "name": select.name, "pull": pull, "nested_path": n.nested_path[0], "put": { "name": select.name, "index": put_index, "child": child } }) put_index += 1 else: expr = select.value for v in expr.vars(): for c in schema[v.var]: if c.nested_path[0] == ".": es_query.stored_fields += [c.es_column] # else: # Log.error("deep field not expected") pull_name = EXPRESSION_PREFIX + select.name map_to_local = MapToLocal(schema) pull = jx_expression_to_function(pull_name) post_expressions[pull_name] = jx_expression_to_function(expr.map(map_to_local)) new_select.append({ "name": select.name if is_list else ".", "pull": pull, "value": expr.__data__(), "put": {"name": select.name, "index": put_index, "child": "."} }) put_index += 1 # <COMPLICATED> ES needs two calls to get all documents more = [] def get_more(please_stop): more.append(es_post( es, Data( query=more_filter, stored_fields=es_query.stored_fields ), query.limit )) if more_filter: need_more = Thread.run("get more", target=get_more) with Timer("call to ES") as call_timer: data = es_post(es, es_query, query.limit) # EACH A HIT IS RETURNED MULTIPLE TIMES FOR EACH INNER HIT, WITH INNER HIT INCLUDED def inners(): for t in data.hits.hits: for i in t.inner_hits[literal_field(query_path)].hits.hits: t._inner = i._source for k, e in post_expressions.items(): t[k] = e(t) yield t if more_filter: Thread.join(need_more) for t in more[0].hits.hits: yield t # </COMPLICATED> try: formatter, groupby_formatter, mime_type = format_dispatch[query.format] output = formatter(inners(), new_select, query) output.meta.timing.es = call_timer.duration output.meta.content_type = mime_type output.meta.es_query = es_query return output except Exception as e: Log.error("problem formatting", e)
def dominator_tree(graph): """ RETURN DOMINATOR FOREST THERE ARE TWO TREES, "ROOTS" and "LOOPS" ROOTS HAVE NO PARENTS LOOPS ARE NODES THAT ARE A MEMBER OF A CYCLE THAT HAS NO EXTRNAL PARENT roots = dominator_tree(graph).get_children(ROOTS) """ todo = Queue() done = set() dominator = Tree(None) nodes = list(graph.nodes) while True: # FIGURE OUT NET ITEM TO WORK ON if todo: node = todo.pop() elif nodes: node = nodes.pop() if len(nodes) % 1000 == 0: Log.note("{{num}} nodes remaining", num=len(nodes)) else: break if node in done: continue parents = graph.get_parents(node) - {node} if not parents: # node WITHOUT parents IS A ROOT done.add(node) dominator.add_edge(Edge(ROOTS, node)) continue not_done = parents - done if not_done: # THERE ARE MORE parents TO DO FIRST more_todo = not_done - todo if not more_todo: # ALL PARENTS ARE PART OF A CYCLE, MAKE node A ROOT done.add(node) dominator.add_edge(Edge(LOOPS, node)) else: # DO THE PARENTS BEFORE node todo.push(node) for p in more_todo: todo.push(p) continue # WE CAN GET THE DOMINATORS FOR ALL parents if len(parents) == 1: # SHORTCUT dominator.add_edge(Edge(list(parents)[0], node)) done.add(node) continue paths_from_roots = [ list(reversed(dominator.get_path_to_root(p))) for p in parents ] if any(p[0] is ROOTS for p in paths_from_roots): # THIS OBJECT CAN BE REACHED FROM A ROOT, IGNORE PATHS FROM LOOPS paths_from_roots = [p for p in paths_from_roots if p[0] is ROOTS] if len(paths_from_roots) == 1: # SHORTCUT dom = paths_from_roots[0][-1] dominator.add_edge(Edge(dom, node)) done.add(node) continue # FIND COMMON PATH FROM root num_paths = len(paths_from_roots) for i, x in enumerate(zip_longest(*paths_from_roots)): if x.count(x[0]) != num_paths: dom = paths_from_roots[0][i - 1] if dom is LOOPS: # CAN BE REACHED FROM MORE THAN ONE LOOP, PICK ONE TO BLAME dom = paths_from_roots[0][-1] break else: # ALL PATHS IDENTICAL dom = paths_from_roots[0][-1] dominator.add_edge(Edge(dom, node)) done.add(node) return dominator
def array_add(A, B): return tuple(coalesce(a, 0) + coalesce(b, 0) for a, b in zip_longest(A, B))