예제 #1
0
def _upgrade():
    global _upgraded
    global _sqlite3

    try:
        import sys
        import platform

        if "windows" in platform.system().lower():
            original_dll = File.new_instance(sys.exec_prefix,
                                             "dlls/sqlite3.dll")
            if platform.architecture()[0] == "32bit":
                source_dll = File(
                    "vendor/pyLibrary/vendor/sqlite/sqlite3_32.dll")
            else:
                source_dll = File(
                    "vendor/pyLibrary/vendor/sqlite/sqlite3_64.dll")

            if not all(a == b for a, b in zip_longest(
                    source_dll.read_bytes(), original_dll.read_bytes())):
                original_dll.backup()
                File.copy(source_dll, original_dll)
        else:
            pass
    except Exception as e:
        Log.warning("could not upgrade python's sqlite", cause=e)

    import sqlite3 as _sqlite3

    _ = _sqlite3
    _upgraded = True
예제 #2
0
    def __gt__(self, other):
        other = Version(other)
        for s, o in zip_longest(self.version, other.version):
            if s is None and o is not None:
                return False
            elif s is not None and o is None:
                return True
            elif s < o:
                return False
            elif s > o:
                return True

        return False
예제 #3
0
 def __eq__(self, other):
     if other == None:
         return not self.__bool__()
     elif is_text(other):
         try:
             return "".join(self) == other
         except Exception as e:
             return False
     elif is_many(other):
         return all(s == o for s, o in zip_longest(self, other))
     elif self.length() == 1:
         return self[0] == other
     elif not self:
         return False
     else:
         Log.error("do not know how to handle")
    def format(self):

        yield from emit_lines(self.decorator_list)
        yield "def"
        yield SPACE
        yield self.node.name
        yield "("
        yield from format_comment(self.line_comment)
        for a, d in zip_longest(self.args.args, self.args.defaults):
            yield a.arg
            if d:
                yield "="
                yield from d.format()
        yield "):"
        yield from format_comment(self.line_comment)
        yield CR
        yield from indent_body(self.body)
예제 #5
0
def _upgrade():
    global _upgraded
    global sqlite3

    try:
        import sys
        Log.error("Fix to work with 64bit windows too")
        original_dll = File.new_instance(sys.exec_prefix, "dlls/sqlite3.dll")
        source_dll = File("vendor/pyLibrary/vendor/sqlite/sqlite3.dll")
        if not all(a == b for a, b in zip_longest(source_dll.read_bytes(),
                                                  original_dll.read_bytes())):
            backup = original_dll.backup()
            File.copy(source_dll, original_dll)
    except Exception as e:
        Log.warning("could not upgrade python's sqlite", cause=e)

    import sqlite3
    _ = sqlite3
    _upgraded = True
예제 #6
0
def apply_diff(text, diff, reverse=False, verify=True):
    """
    SOME EXAMPLES OF diff
    #@@ -1 +1 @@
    #-before china goes live, the content team will have to manually update the settings for the china-ready apps currently in marketplace.
    #+before china goes live (end January developer release, June general audience release) , the content team will have to manually update the settings for the china-ready apps currently in marketplace.
    @@ -0,0 +1,3 @@
    +before china goes live, the content team will have to manually update the settings for the china-ready apps currently in marketplace.
    +
    +kward has the details.
    @@ -1 +1 @@
    -before china goes live (end January developer release, June general audience release), the content team will have to manually update the settings for the china-ready apps currently in marketplace.
    +before china goes live , the content team will have to manually update the settings for the china-ready apps currently in marketplace.
    @@ -3 +3 ,6 @@
    -kward has the details.+kward has the details.
    +
    +Target Release Dates :
    +https://mana.mozilla.org/wiki/display/PM/Firefox+OS+Wave+Launch+Cross+Functional+View
    +
    +Content Team Engagement & Tasks : https://appreview.etherpad.mozilla.org/40
    """

    output = text
    if not diff:
        return output

    start_of_hunk = 0
    while True:
        if start_of_hunk >= len(diff):
            break
        header = diff[start_of_hunk]
        start_of_hunk += 1
        if not header.strip():
            continue

        matches = DIFF_PREFIX.match(header.strip())
        if not matches:
            if not _Log:
                _late_import()

            _Log.error("Can not handle \n---\n{{diff}}\n---\n", diff=diff)

        remove = tuple(int(i.strip()) for i in matches.group(1).split(
            ","))  # EXPECTING start_line, length TO REMOVE
        remove = Data(
            start=remove[0],
            length=1 if len(remove) == 1 else remove[1])  # ASSUME FIRST LINE
        add = tuple(int(i.strip()) for i in matches.group(2).split(
            ","))  # EXPECTING start_line, length TO ADD
        add = Data(start=add[0], length=1 if len(add) == 1 else add[1])

        if remove.start == 0 and remove.length == 0:
            remove.start = add.start
        if add.start == 0 and add.length == 0:
            add.start = remove.start

        if remove.start != add.start:
            if not _Log:
                _late_import()
            _Log.warning("Do not know how to handle")

        def repair_hunk(diff):
            # THE LAST DELETED LINE MAY MISS A "\n" MEANING THE FIRST
            # ADDED LINE WILL BE APPENDED TO THE LAST DELETED LINE
            # EXAMPLE: -kward has the details.+kward has the details.
            # DETECT THIS PROBLEM FOR THIS HUNK AND FIX THE DIFF
            problem_line = diff[start_of_hunk + remove.length - 1]
            if reverse:
                if add.length == 0:
                    return diff
                first_added_line = output[add.start - 1]
                if problem_line.endswith('+' + first_added_line):
                    split_point = len(problem_line) - len(first_added_line) - 1
                else:
                    return diff
            else:
                if remove.length == 0:
                    return diff
                last_removed_line = output[remove.start - 1]
                if problem_line.startswith('-' + last_removed_line + "+"):
                    split_point = len(last_removed_line) + 1
                else:
                    return diff

            new_diff = (
                diff[:start_of_hunk + remove.length - 1] +
                [problem_line[:split_point], problem_line[split_point:]] +
                diff[start_of_hunk + remove.length:])
            return new_diff

        diff = repair_hunk(diff)
        diff = [d for d in diff
                if d != "\\ no newline at end of file"]  # ANOTHER REPAIR

        if reverse:
            new_output = (output[:add.start - 1] + [
                d[1:]
                for d in diff[start_of_hunk:start_of_hunk + remove.length]
            ] + output[add.start + add.length - 1:])
        else:
            # APPLYING DIFF FORWARD REQUIRES WE APPLY THE HUNKS IN REVERSE TO GET THE LINE NUMBERS RIGHT?
            new_output = (output[:remove.start - 1] + [
                d[1:]
                for d in diff[start_of_hunk + remove.length:start_of_hunk +
                              remove.length + add.length]
            ] + output[remove.start + remove.length - 1:])
        start_of_hunk += remove.length + add.length
        output = new_output

    if verify:
        original = apply_diff(output, diff, not reverse, False)
        if any(t != o for t, o in zip_longest(text, original)):
            if not _Log:
                _late_import()
            _Log.error("logical verification check failed")

    return output
예제 #7
0
def assertAlmostEqual(test, expected, digits=None, places=None, msg=None, delta=None):
    show_detail = True
    test = unwrap(test)
    expected = unwrap(expected)
    try:
        if test is None and expected is None:
            return
        elif test is expected:
            return
        elif is_text(expected):
            assertAlmostEqualValue(test, expected, msg=msg, digits=digits, places=places, delta=delta)
        elif isinstance(test, UniqueIndex):
            if test ^ expected:
                Log.error("Sets do not match")
        elif is_data(expected) and is_data(test):
            for k, v2 in unwrap(expected).items():
                v1 = test.get(k)
                assertAlmostEqual(v1, v2, msg=msg, digits=digits, places=places, delta=delta)
        elif is_data(expected):
            for k, v2 in expected.items():
                if is_text(k):
                    v1 = mo_dots.get_attr(test, literal_field(k))
                else:
                    v1 = test[k]
                assertAlmostEqual(v1, v2, msg=msg, digits=digits, places=places, delta=delta)
        elif is_container(test) and isinstance(expected, set):
            test = set(wrap(t) for t in test)
            if len(test) != len(expected):
                Log.error(
                    "Sets do not match, element count different:\n{{test|json|indent}}\nexpecting{{expectedtest|json|indent}}",
                    test=test,
                    expected=expected
                )

            for e in expected:
                for t in test:
                    try:
                        assertAlmostEqual(t, e, msg=msg, digits=digits, places=places, delta=delta)
                        break
                    except Exception as _:
                        pass
                else:
                    Log.error("Sets do not match. {{value|json}} not found in {{test|json}}", value=e, test=test)

        elif isinstance(expected, types.FunctionType):
            return expected(test)
        elif hasattr(test, "__iter__") and hasattr(expected, "__iter__"):
            if test.__class__.__name__ == "ndarray":  # numpy
                test = test.tolist()
            elif test.__class__.__name__ == "DataFrame":  # pandas
                test = test[test.columns[0]].values.tolist()
            elif test.__class__.__name__ == "Series":  # pandas
                test = test.values.tolist()

            if not expected and test == None:
                return
            if expected == None:
                expected = []  # REPRESENT NOTHING
            for a, b in zip_longest(test, expected):
                assertAlmostEqual(a, b, msg=msg, digits=digits, places=places, delta=delta)
        else:
            assertAlmostEqualValue(test, expected, msg=msg, digits=digits, places=places, delta=delta)
    except Exception as e:
        Log.error(
            "{{test|json|limit(10000)}} does not match expected {{expected|json|limit(10000)}}",
            test=test if show_detail else "[can not show]",
            expected=expected if show_detail else "[can not show]",
            cause=e
        )
예제 #8
0
def apply_diff(text, diff, reverse=False, verify=True):
    """
    SOME EXAMPLES OF diff
    #@@ -1 +1 @@
    #-before china goes live, the content team will have to manually update the settings for the china-ready apps currently in marketplace.
    #+before china goes live (end January developer release, June general audience release) , the content team will have to manually update the settings for the china-ready apps currently in marketplace.
    @@ -0,0 +1,3 @@
    +before china goes live, the content team will have to manually update the settings for the china-ready apps currently in marketplace.
    +
    +kward has the details.
    @@ -1 +1 @@
    -before china goes live (end January developer release, June general audience release), the content team will have to manually update the settings for the china-ready apps currently in marketplace.
    +before china goes live , the content team will have to manually update the settings for the china-ready apps currently in marketplace.
    @@ -3 +3 ,6 @@
    -kward has the details.+kward has the details.
    +
    +Target Release Dates :
    +https://mana.mozilla.org/wiki/display/PM/Firefox+OS+Wave+Launch+Cross+Functional+View
    +
    +Content Team Engagement & Tasks : https://appreview.etherpad.mozilla.org/40
    """

    if not diff:
        return text
    output = text
    hunks = [
        (new_diff[start_hunk], new_diff[start_hunk + 1:end_hunk])
        for new_diff in [[
            d.lstrip()
            for d in diff if d.lstrip() and d != "\\ No newline at end of file"
        ] + ["@@"]]  # ANOTHER REPAIR
        for start_hunk, end_hunk in pairwise(
            i for i, l in enumerate(new_diff) if l.startswith("@@"))
    ]
    for header, hunk_body in reversed(hunks) if reverse else hunks:
        matches = DIFF_PREFIX.match(header.strip())
        if not matches:
            if not _Log:
                _late_import()

            _Log.error("Can not handle \n---\n{{diff}}\n---\n", diff=diff)

        removes = tuple(int(i.strip()) for i in matches.group(1).split(
            ","))  # EXPECTING start_line, length TO REMOVE
        remove = Data(
            start=removes[0],
            length=1 if len(removes) == 1 else removes[1])  # ASSUME FIRST LINE
        adds = tuple(int(i.strip()) for i in matches.group(2).split(
            ","))  # EXPECTING start_line, length TO ADD
        add = Data(start=adds[0], length=1 if len(adds) == 1 else adds[1])

        if add.length == 0 and add.start == 0:
            add.start = remove.start

        def repair_hunk(hunk_body):
            # THE LAST DELETED LINE MAY MISS A "\n" MEANING THE FIRST
            # ADDED LINE WILL BE APPENDED TO THE LAST DELETED LINE
            # EXAMPLE: -kward has the details.+kward has the details.
            # DETECT THIS PROBLEM FOR THIS HUNK AND FIX THE DIFF
            if reverse:
                last_lines = [
                    o for b, o in zip(reversed(hunk_body), reversed(output))
                    if b != "+" + o
                ]
                if not last_lines:
                    return hunk_body

                last_line = last_lines[0]
                for problem_index, problem_line in enumerate(hunk_body):
                    if problem_line.startswith("-") and problem_line.endswith(
                            "+" + last_line):
                        split_point = len(problem_line) - (len(last_line) + 1)
                        break
                    elif problem_line.startswith("+" + last_line + "-"):
                        split_point = len(last_line) + 1
                        break
                else:
                    return hunk_body
            else:
                if not output:
                    return hunk_body
                last_line = output[-1]
                for problem_index, problem_line in enumerate(hunk_body):
                    if problem_line.startswith("+") and problem_line.endswith(
                            "-" + last_line):
                        split_point = len(problem_line) - (len(last_line) + 1)
                        break
                    elif problem_line.startswith("-" + last_line + "+"):
                        split_point = len(last_line) + 1
                        break
                else:
                    return hunk_body

            new_hunk_body = (
                hunk_body[:problem_index] +
                [problem_line[:split_point], problem_line[split_point:]] +
                hunk_body[problem_index + 1:])
            return new_hunk_body

        hunk_body = repair_hunk(hunk_body)

        if reverse:
            new_output = (output[:add.start - 1] +
                          [d[1:] for d in hunk_body if d and d[0] == "-"] +
                          output[add.start + add.length - 1:])
        else:
            new_output = (output[:add.start - 1] +
                          [d[1:] for d in hunk_body if d and d[0] == "+"] +
                          output[add.start + remove.length - 1:])
        output = new_output

    if verify:
        original = apply_diff(output, diff, not reverse, False)
        if set(text) != set(original):  # bugzilla-etl diffs are a jumble

            for t, o in zip_longest(text, original):
                if t in ["reports: https://goo.gl/70o6w6\r"]:
                    break  # KNOWN INCONSISTENCIES
                if t != o:
                    if not _Log:
                        _late_import()
                    _Log.error("logical verification check failed")
                    break

    return output
예제 #9
0
def es_deepop(es, query):
    schema = query.frum.schema
    query_path = schema.query_path[0]

    # TODO: FIX THE GREAT SADNESS CAUSED BY EXECUTING post_expressions
    # THE EXPRESSIONS SHOULD BE PUSHED TO THE CONTAINER:  ES ALLOWS
    # {"inner_hit":{"script_fields":[{"script":""}...]}}, BUT THEN YOU
    # LOOSE "_source" BUT GAIN "fields", FORCING ALL FIELDS TO BE EXPLICIT
    post_expressions = {}
    es_query, es_filters = es_query_template(query_path)

    # SPLIT WHERE CLAUSE BY DEPTH
    wheres = split_expression_by_depth(query.where, schema)
    for f, w in zip_longest(es_filters, wheres):
        script = ES52[AndOp(w)].partial_eval().to_esfilter(schema)
        set_default(f, script)

    if not wheres[1]:
        # INCLUDE DOCS WITH NO NESTED DOCS
        more_filter = {
            "bool": {
                "filter": [AndOp(wheres[0]).partial_eval().to_esfilter(schema)],
                "must_not": {
                    "nested": {
                        "path": query_path,
                        "query": MATCH_ALL
                    }
                }
            }
        }
    else:
        more_filter = None

    es_query.size = coalesce(query.limit, DEFAULT_LIMIT)

    map_to_es_columns = schema.map_to_es()
    query_for_es = query.map(map_to_es_columns)
    es_query.sort = jx_sort_to_es_sort(query_for_es.sort, schema)

    es_query.stored_fields = []

    is_list = is_list_(query.select)
    selects = wrap([unwrap(s.copy()) for s in listwrap(query.select)])
    new_select = FlatList()

    put_index = 0
    for select in selects:
        if is_op(select.value, LeavesOp) and is_op(select.value.term, Variable):
            # IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS
            leaves = schema.leaves(select.value.term.var)
            col_names = set()
            for c in leaves:
                if c.nested_path[0] == ".":
                    if c.jx_type == NESTED:
                        continue
                    es_query.stored_fields += [c.es_column]
                c_name = untype_path(relative_field(c.name, query_path))
                col_names.add(c_name)
                new_select.append({
                    "name": concat_field(select.name, c_name),
                    "nested_path": c.nested_path[0],
                    "put": {"name": concat_field(select.name, literal_field(c_name)), "index": put_index, "child": "."},
                    "pull": get_pull_function(c)
                })
                put_index += 1

            # REMOVE DOTS IN PREFIX IF NAME NOT AMBIGUOUS
            for n in new_select:
                if n.name.startswith("..") and n.name.lstrip(".") not in col_names:
                    n.put.name = n.name = n.name.lstrip(".")
                    col_names.add(n.name)
        elif is_op(select.value, Variable):
            net_columns = schema.leaves(select.value.var)
            if not net_columns:
                new_select.append({
                    "name": select.name,
                    "nested_path": ".",
                    "put": {"name": select.name, "index": put_index, "child": "."},
                    "pull": NULL
                })
            else:
                for n in net_columns:
                    pull = get_pull_function(n)
                    if n.nested_path[0] == ".":
                        if n.jx_type == NESTED:
                            continue
                        es_query.stored_fields += [n.es_column]

                    # WE MUST FIGURE OUT WHICH NAMESSPACE s.value.var IS USING SO WE CAN EXTRACT THE child
                    for np in n.nested_path:
                        c_name = untype_path(relative_field(n.name, np))
                        if startswith_field(c_name, select.value.var):
                            # PREFER THE MOST-RELATIVE NAME
                            child = relative_field(c_name, select.value.var)
                            break
                    else:
                        continue

                    new_select.append({
                        "name": select.name,
                        "pull": pull,
                        "nested_path": n.nested_path[0],
                        "put": {
                            "name": select.name,
                            "index": put_index,
                            "child": child
                        }
                    })
            put_index += 1
        else:
            expr = select.value
            for v in expr.vars():
                for c in schema[v.var]:
                    if c.nested_path[0] == ".":
                        es_query.stored_fields += [c.es_column]
                    # else:
                    #     Log.error("deep field not expected")

            pull_name = EXPRESSION_PREFIX + select.name
            map_to_local = MapToLocal(schema)
            pull = jx_expression_to_function(pull_name)
            post_expressions[pull_name] = jx_expression_to_function(expr.map(map_to_local))

            new_select.append({
                "name": select.name if is_list else ".",
                "pull": pull,
                "value": expr.__data__(),
                "put": {"name": select.name, "index": put_index, "child": "."}
            })
            put_index += 1

    es_query.stored_fields = sorted(es_query.stored_fields)

    # <COMPLICATED> ES needs two calls to get all documents
    more = []
    def get_more(please_stop):
        more.append(es_post(
            es,
            Data(
                query=more_filter,
                stored_fields=es_query.stored_fields
            ),
            query.limit
        ))
    if more_filter:
        need_more = Thread.run("get more", target=get_more)

    with Timer("call to ES") as call_timer:
        data = es_post(es, es_query, query.limit)

    # EACH A HIT IS RETURNED MULTIPLE TIMES FOR EACH INNER HIT, WITH INNER HIT INCLUDED
    def inners():
        for t in data.hits.hits:
            for i in t.inner_hits[literal_field(query_path)].hits.hits:
                t._inner = i._source
                for k, e in post_expressions.items():
                    t[k] = e(t)
                yield t
        if more_filter:
            Thread.join(need_more)
            for t in more[0].hits.hits:
                yield t
    # </COMPLICATED>

    try:
        formatter, groupby_formatter, mime_type = format_dispatch[query.format]

        output = formatter(inners(), new_select, query)
        output.meta.timing.es = call_timer.duration
        output.meta.content_type = mime_type
        output.meta.es_query = es_query
        return output
    except Exception as e:
        Log.error("problem formatting", e)
예제 #10
0
def dominator_tree(graph):
    """
    RETURN DOMINATOR FOREST
    THERE ARE TWO TREES, "ROOTS" and "LOOPS"
    ROOTS HAVE NO PARENTS
    LOOPS ARE NODES THAT ARE A MEMBER OF A CYCLE THAT HAS NO EXTRNAL PARENT

    roots = dominator_tree(graph).get_children(ROOTS)
    """
    todo = Queue()
    done = set()
    dominator = Tree(None)
    nodes = list(graph.nodes)

    while True:
        # FIGURE OUT NET ITEM TO WORK ON
        if todo:
            node = todo.pop()
        elif nodes:
            node = nodes.pop()
            if len(nodes) % 1000 == 0:
                Log.note("{{num}} nodes remaining", num=len(nodes))
        else:
            break
        if node in done:
            continue

        parents = graph.get_parents(node) - {node}
        if not parents:
            # node WITHOUT parents IS A ROOT
            done.add(node)
            dominator.add_edge(Edge(ROOTS, node))
            continue

        not_done = parents - done
        if not_done:
            # THERE ARE MORE parents TO DO FIRST
            more_todo = not_done - todo
            if not more_todo:
                # ALL PARENTS ARE PART OF A CYCLE, MAKE node A ROOT
                done.add(node)
                dominator.add_edge(Edge(LOOPS, node))
            else:
                # DO THE PARENTS BEFORE node
                todo.push(node)
                for p in more_todo:
                    todo.push(p)
            continue

        # WE CAN GET THE DOMINATORS FOR ALL parents
        if len(parents) == 1:
            # SHORTCUT
            dominator.add_edge(Edge(list(parents)[0], node))
            done.add(node)
            continue

        paths_from_roots = [
            list(reversed(dominator.get_path_to_root(p)))
            for p in parents
        ]

        if any(p[0] is ROOTS for p in paths_from_roots):
            # THIS OBJECT CAN BE REACHED FROM A ROOT, IGNORE PATHS FROM LOOPS
            paths_from_roots = [p for p in paths_from_roots if p[0] is ROOTS]
            if len(paths_from_roots) == 1:
                # SHORTCUT
                dom = paths_from_roots[0][-1]
                dominator.add_edge(Edge(dom, node))
                done.add(node)
                continue

        # FIND COMMON PATH FROM root
        num_paths = len(paths_from_roots)
        for i, x in enumerate(zip_longest(*paths_from_roots)):
            if x.count(x[0]) != num_paths:
                dom = paths_from_roots[0][i-1]
                if dom is LOOPS:
                    # CAN BE REACHED FROM MORE THAN ONE LOOP, PICK ONE TO BLAME
                    dom = paths_from_roots[0][-1]
                break
        else:
            # ALL PATHS IDENTICAL
            dom = paths_from_roots[0][-1]

        dominator.add_edge(Edge(dom, node))
        done.add(node)

    return dominator
예제 #11
0
 def __eq__(self, other):
     if is_op(other, AndOp):
         return all(a == b for a, b in zip_longest(self.terms, other.terms))
     return False
예제 #12
0
def apply_diff(text, diff, reverse=False, verify=True):
    """
    SOME EXAMPLES OF diff
    #@@ -1 +1 @@
    #-before china goes live, the content team will have to manually update the settings for the china-ready apps currently in marketplace.
    #+before china goes live (end January developer release, June general audience release) , the content team will have to manually update the settings for the china-ready apps currently in marketplace.
    @@ -0,0 +1,3 @@
    +before china goes live, the content team will have to manually update the settings for the china-ready apps currently in marketplace.
    +
    +kward has the details.
    @@ -1 +1 @@
    -before china goes live (end January developer release, June general audience release), the content team will have to manually update the settings for the china-ready apps currently in marketplace.
    +before china goes live , the content team will have to manually update the settings for the china-ready apps currently in marketplace.
    @@ -3 +3 ,6 @@
    -kward has the details.+kward has the details.
    +
    +Target Release Dates :
    +https://mana.mozilla.org/wiki/display/PM/Firefox+OS+Wave+Launch+Cross+Functional+View
    +
    +Content Team Engagement & Tasks : https://appreview.etherpad.mozilla.org/40
    """

    if not diff:
        return text
    output = text
    hunks = [
        (new_diff[start_hunk], new_diff[start_hunk+1:end_hunk])
        for new_diff in [[d.lstrip() for d in diff if d.lstrip() and d != "\\ No newline at end of file"] + ["@@"]]  # ANOTHER REPAIR
        for start_hunk, end_hunk in pairwise(i for i, l in enumerate(new_diff) if l.startswith('@@'))
    ]
    for header, hunk_body in (reversed(hunks) if reverse else hunks):
        matches = DIFF_PREFIX.match(header.strip())
        if not matches:
            if not _Log:
                _late_import()

            _Log.error("Can not handle \n---\n{{diff}}\n---\n",  diff=diff)

        removes = tuple(int(i.strip()) for i in matches.group(1).split(","))  # EXPECTING start_line, length TO REMOVE
        remove = Data(start=removes[0], length=1 if len(removes) == 1 else removes[1])  # ASSUME FIRST LINE
        adds = tuple(int(i.strip()) for i in matches.group(2).split(","))  # EXPECTING start_line, length TO ADD
        add = Data(start=adds[0], length=1 if len(adds) == 1 else adds[1])

        if add.length == 0 and add.start == 0:
            add.start = remove.start

        def repair_hunk(hunk_body):
            # THE LAST DELETED LINE MAY MISS A "\n" MEANING THE FIRST
            # ADDED LINE WILL BE APPENDED TO THE LAST DELETED LINE
            # EXAMPLE: -kward has the details.+kward has the details.
            # DETECT THIS PROBLEM FOR THIS HUNK AND FIX THE DIFF
            if reverse:
                last_lines = [
                    o
                    for b, o in zip(reversed(hunk_body), reversed(output))
                    if b != "+" + o
                ]
                if not last_lines:
                    return hunk_body

                last_line = last_lines[0]
                for problem_index, problem_line in enumerate(hunk_body):
                    if problem_line.startswith('-') and problem_line.endswith('+' + last_line):
                        split_point = len(problem_line) - (len(last_line) + 1)
                        break
                    elif problem_line.startswith('+' + last_line + "-"):
                        split_point = len(last_line) + 1
                        break
                else:
                    return hunk_body
            else:
                if not output:
                    return hunk_body
                last_line = output[-1]
                for problem_index, problem_line in enumerate(hunk_body):
                    if problem_line.startswith('+') and problem_line.endswith('-' + last_line):
                        split_point = len(problem_line) - (len(last_line) + 1)
                        break
                    elif problem_line.startswith('-' + last_line + "+"):
                        split_point = len(last_line) + 1
                        break
                else:
                    return hunk_body

            new_hunk_body = (
                hunk_body[:problem_index] +
                [problem_line[:split_point], problem_line[split_point:]] +
                hunk_body[problem_index + 1:]
            )
            return new_hunk_body
        hunk_body = repair_hunk(hunk_body)

        if reverse:
            new_output = (
                output[:add.start - 1] +
                [d[1:] for d in hunk_body if d and d[0] == '-'] +
                output[add.start + add.length - 1:]
            )
        else:
            new_output = (
                output[:add.start - 1] +
                [d[1:] for d in hunk_body if d and d[0] == '+'] +
                output[add.start + remove.length - 1:]
            )
        output = new_output

    if verify:
        original = apply_diff(output, diff, not reverse, False)
        if set(text) != set(original):  # bugzilla-etl diffs are a jumble

            for t, o in zip_longest(text, original):
                if t in ['reports: https://goo.gl/70o6w6\r']:
                    break  # KNOWN INCONSISTENCIES
                if t != o:
                    if not _Log:
                        _late_import()
                    _Log.error("logical verification check failed")
                    break

    return output
예제 #13
0
def es_deepop(es, query):
    schema = query.frum.schema
    query_path = schema.query_path[0]

    # TODO: FIX THE GREAT SADNESS CAUSED BY EXECUTING post_expressions
    # THE EXPRESSIONS SHOULD BE PUSHED TO THE CONTAINER:  ES ALLOWS
    # {"inner_hit":{"script_fields":[{"script":""}...]}}, BUT THEN YOU
    # LOOSE "_source" BUT GAIN "fields", FORCING ALL FIELDS TO BE EXPLICIT
    post_expressions = {}
    es_query, es_filters = es_query_template(query_path)

    # SPLIT WHERE CLAUSE BY DEPTH
    wheres = split_expression_by_depth(query.where, schema)
    for f, w in zip_longest(es_filters, wheres):
        script = ES52[AndOp(w)].partial_eval().to_esfilter(schema)
        set_default(f, script)

    if not wheres[1]:
        # INCLUDE DOCS WITH NO NESTED DOCS
        more_filter = {
            "bool": {
                "filter": [AndOp(wheres[0]).partial_eval().to_esfilter(schema)],
                "must_not": {
                    "nested": {
                        "path": query_path,
                        "query": {
                            "match_all": {}
                        }
                    }
                }
            }
        }
    else:
        more_filter = None

    es_query.size = coalesce(query.limit, DEFAULT_LIMIT)

    # es_query.sort = jx_sort_to_es_sort(query.sort)
    map_to_es_columns = schema.map_to_es()
    # {c.name: c.es_column for c in schema.leaves(".")}
    query_for_es = query.map(map_to_es_columns)
    es_query.sort = jx_sort_to_es_sort(query_for_es.sort, schema)

    es_query.stored_fields = []

    is_list = is_list_(query.select)
    selects = wrap([unwrap(s.copy()) for s in listwrap(query.select)])
    new_select = FlatList()

    put_index = 0
    for select in selects:
        if is_op(select.value, LeavesOp) and is_op(select.value.term, Variable):
            # IF THERE IS A *, THEN INSERT THE EXTRA COLUMNS
            leaves = schema.leaves(select.value.term.var)
            col_names = set()
            for c in leaves:
                if c.nested_path[0] == ".":
                    if c.jx_type == NESTED:
                        continue
                    es_query.stored_fields += [c.es_column]
                c_name = untype_path(relative_field(c.name, query_path))
                col_names.add(c_name)
                new_select.append({
                    "name": concat_field(select.name, c_name),
                    "nested_path": c.nested_path[0],
                    "put": {"name": concat_field(select.name, literal_field(c_name)), "index": put_index, "child": "."},
                    "pull": get_pull_function(c)
                })
                put_index += 1

            # REMOVE DOTS IN PREFIX IF NAME NOT AMBIGUOUS
            for n in new_select:
                if n.name.startswith("..") and n.name.lstrip(".") not in col_names:
                    n.put.name = n.name = n.name.lstrip(".")
                    col_names.add(n.name)
        elif is_op(select.value, Variable):
            net_columns = schema.leaves(select.value.var)
            if not net_columns:
                new_select.append({
                    "name": select.name,
                    "nested_path": ".",
                    "put": {"name": select.name, "index": put_index, "child": "."},
                    "pull": NULL
                })
            else:
                for n in net_columns:
                    pull = get_pull_function(n)
                    if n.nested_path[0] == ".":
                        if n.jx_type == NESTED:
                            continue
                        es_query.stored_fields += [n.es_column]

                    # WE MUST FIGURE OUT WHICH NAMESSPACE s.value.var IS USING SO WE CAN EXTRACT THE child
                    for np in n.nested_path:
                        c_name = untype_path(relative_field(n.name, np))
                        if startswith_field(c_name, select.value.var):
                            child = relative_field(c_name, select.value.var)
                            break
                    else:
                        continue
                        # REMOVED BECAUSE SELECTING INNER PROPERTIES IS NOT ALLOWED
                        # child = relative_field(untype_path(relative_field(n.name, n.nested_path[0])), s.value.var)

                    new_select.append({
                        "name": select.name,
                        "pull": pull,
                        "nested_path": n.nested_path[0],
                        "put": {
                            "name": select.name,
                            "index": put_index,
                            "child": child
                        }
                    })
            put_index += 1
        else:
            expr = select.value
            for v in expr.vars():
                for c in schema[v.var]:
                    if c.nested_path[0] == ".":
                        es_query.stored_fields += [c.es_column]
                    # else:
                    #     Log.error("deep field not expected")

            pull_name = EXPRESSION_PREFIX + select.name
            map_to_local = MapToLocal(schema)
            pull = jx_expression_to_function(pull_name)
            post_expressions[pull_name] = jx_expression_to_function(expr.map(map_to_local))

            new_select.append({
                "name": select.name if is_list else ".",
                "pull": pull,
                "value": expr.__data__(),
                "put": {"name": select.name, "index": put_index, "child": "."}
            })
            put_index += 1

    # <COMPLICATED> ES needs two calls to get all documents
    more = []
    def get_more(please_stop):
        more.append(es_post(
            es,
            Data(
                query=more_filter,
                stored_fields=es_query.stored_fields
            ),
            query.limit
        ))
    if more_filter:
        need_more = Thread.run("get more", target=get_more)

    with Timer("call to ES") as call_timer:
        data = es_post(es, es_query, query.limit)

    # EACH A HIT IS RETURNED MULTIPLE TIMES FOR EACH INNER HIT, WITH INNER HIT INCLUDED
    def inners():
        for t in data.hits.hits:
            for i in t.inner_hits[literal_field(query_path)].hits.hits:
                t._inner = i._source
                for k, e in post_expressions.items():
                    t[k] = e(t)
                yield t
        if more_filter:
            Thread.join(need_more)
            for t in more[0].hits.hits:
                yield t
    # </COMPLICATED>

    try:
        formatter, groupby_formatter, mime_type = format_dispatch[query.format]

        output = formatter(inners(), new_select, query)
        output.meta.timing.es = call_timer.duration
        output.meta.content_type = mime_type
        output.meta.es_query = es_query
        return output
    except Exception as e:
        Log.error("problem formatting", e)
예제 #14
0
def dominator_tree(graph):
    """
    RETURN DOMINATOR FOREST
    THERE ARE TWO TREES, "ROOTS" and "LOOPS"
    ROOTS HAVE NO PARENTS
    LOOPS ARE NODES THAT ARE A MEMBER OF A CYCLE THAT HAS NO EXTRNAL PARENT

    roots = dominator_tree(graph).get_children(ROOTS)
    """
    todo = Queue()
    done = set()
    dominator = Tree(None)
    nodes = list(graph.nodes)

    while True:
        # FIGURE OUT NET ITEM TO WORK ON
        if todo:
            node = todo.pop()
        elif nodes:
            node = nodes.pop()
            if len(nodes) % 1000 == 0:
                Log.note("{{num}} nodes remaining", num=len(nodes))
        else:
            break
        if node in done:
            continue

        parents = graph.get_parents(node) - {node}
        if not parents:
            # node WITHOUT parents IS A ROOT
            done.add(node)
            dominator.add_edge(Edge(ROOTS, node))
            continue

        not_done = parents - done
        if not_done:
            # THERE ARE MORE parents TO DO FIRST
            more_todo = not_done - todo
            if not more_todo:
                # ALL PARENTS ARE PART OF A CYCLE, MAKE node A ROOT
                done.add(node)
                dominator.add_edge(Edge(LOOPS, node))
            else:
                # DO THE PARENTS BEFORE node
                todo.push(node)
                for p in more_todo:
                    todo.push(p)
            continue

        # WE CAN GET THE DOMINATORS FOR ALL parents
        if len(parents) == 1:
            # SHORTCUT
            dominator.add_edge(Edge(list(parents)[0], node))
            done.add(node)
            continue

        paths_from_roots = [
            list(reversed(dominator.get_path_to_root(p))) for p in parents
        ]

        if any(p[0] is ROOTS for p in paths_from_roots):
            # THIS OBJECT CAN BE REACHED FROM A ROOT, IGNORE PATHS FROM LOOPS
            paths_from_roots = [p for p in paths_from_roots if p[0] is ROOTS]
            if len(paths_from_roots) == 1:
                # SHORTCUT
                dom = paths_from_roots[0][-1]
                dominator.add_edge(Edge(dom, node))
                done.add(node)
                continue

        # FIND COMMON PATH FROM root
        num_paths = len(paths_from_roots)
        for i, x in enumerate(zip_longest(*paths_from_roots)):
            if x.count(x[0]) != num_paths:
                dom = paths_from_roots[0][i - 1]
                if dom is LOOPS:
                    # CAN BE REACHED FROM MORE THAN ONE LOOP, PICK ONE TO BLAME
                    dom = paths_from_roots[0][-1]
                break
        else:
            # ALL PATHS IDENTICAL
            dom = paths_from_roots[0][-1]

        dominator.add_edge(Edge(dom, node))
        done.add(node)

    return dominator
예제 #15
0
def array_add(A, B):
    return tuple(coalesce(a, 0) + coalesce(b, 0) for a, b in zip_longest(A, B))