コード例 #1
0
ファイル: convert.py プロジェクト: mozilla/ChangeDetector
def json2value(json_string, params={}, flexible=False, leaves=False):
    """
    :param json_string: THE JSON
    :param params: STANDARD JSON PARAMS
    :param flexible: REMOVE COMMENTS
    :param leaves: ASSUME JSON KEYS ARE DOT-DELIMITED
    :return: Python value
    """
    if isinstance(json_string, str):
        Log.error("only unicode json accepted")

    try:
        if flexible:
            # REMOVE """COMMENTS""", # COMMENTS, //COMMENTS, AND \n \r
            # DERIVED FROM https://github.com/jeads/datasource/blob/master/datasource/bases/BaseHub.py# L58
            json_string = re.sub(r"\"\"\".*?\"\"\"", r"\n", json_string, flags=re.MULTILINE)
            json_string = "\n".join(remove_line_comment(l) for l in json_string.split("\n"))
            # ALLOW DICTIONARY'S NAME:VALUE LIST TO END WITH COMMA
            json_string = re.sub(r",\s*\}", r"}", json_string)
            # ALLOW LISTS TO END WITH COMMA
            json_string = re.sub(r",\s*\]", r"]", json_string)

        if params:
            json_string = expand_template(json_string, params)


        # LOOKUP REFERENCES
        value = wrap(json_decoder(json_string))

        if leaves:
            value = wrap_leaves(value)

        return value

    except Exception, e:
        e = Except.wrap(e)
        if "Expecting '" in e and "' delimiter: line" in e:
            line_index = int(strings.between(e.message, " line ", " column ")) - 1
            column = int(strings.between(e.message, " column ", " ")) - 1
            line = json_string.split("\n")[line_index].replace("\t", " ")
            if column > 20:
                sample = "..." + line[column - 20:]
                pointer = "   " + (" " * 20) + "^"
            else:
                sample = line
                pointer = (" " * column) + "^"

            if len(sample) > 43:
                sample = sample[:43] + "..."

            Log.error("Can not decode JSON at:\n\t" + sample + "\n\t" + pointer + "\n")

        base_str = unicode2utf8(strings.limit(json_string, 1000))
        hexx_str = bytes2hex(base_str, " ")
        try:
            char_str = " " + ("  ".join(c.decode("latin1") if ord(c) >= 32 else ".") for c in base_str)
        except Exception:
            char_str = " "
        Log.error("Can not decode JSON:\n" + char_str + "\n" + hexx_str + "\n", e)
コード例 #2
0
ファイル: rollover_index.py プロジェクト: davehunt/ActiveData
def fix(rownum, line, source, sample_only_filter, sample_size):
    # ES SCHEMA IS STRICTLY TYPED, USE "code" FOR TEXT IDS
    line = line.replace('{"id": "bb"}',
                        '{"code": "bb"}').replace('{"id": "tc"}',
                                                  '{"code": "tc"}')

    # ES SCHEMA IS STRICTLY TYPED, THE SUITE OBJECT CAN NOT BE HANDLED
    if source.name.startswith("active-data-test-result"):
        # "suite": {"flavor": "plain-chunked", "name": "mochitest"}
        found = strings.between(line, '"suite": {', '}')
        if found:
            suite_json = '{' + found + "}"
            if suite_json:
                suite = convert.json2value(suite_json)
                suite = convert.value2json(suite.name)
                line = line.replace(suite_json, suite)

    if rownum == 0:
        value = convert.json2value(line)
        if len(line) > 100000:
            value.result.subtests = [
                s for s in value.result.subtests if s.ok is False
            ]
            value.result.missing_subtests = True

        _id, value = _fix(value)
        row = {"id": _id, "value": value}
        if sample_only_filter and Random.int(
                int(1.0 / coalesce(sample_size, 0.01))) != 0 and jx.filter(
                    [value], sample_only_filter):
            # INDEX etl.id==0, BUT NO MORE
            if value.etl.id != 0:
                Log.error("Expecting etl.id==0")
            return row, True
    elif len(line) > 100000:
        value = convert.json2value(line)
        value.result.subtests = [
            s for s in value.result.subtests if s.ok is False
        ]
        value.result.missing_subtests = True
        _id, value = _fix(value)
        row = {"id": _id, "value": value}
    elif line.find("\"resource_usage\":") != -1:
        value = convert.json2value(line)
        _id, value = _fix(value)
        row = {"id": _id, "value": value}
    else:
        # FAST
        _id = strings.between(line, "\"_id\": \"", "\"")  # AVOID DECODING JSON
        row = {"id": _id, "json": line}

    return row, False
コード例 #3
0
def _get_url(url, branch, **kwargs):
    with Explanation("get push from {{url}}", url=url):
        response = http.get(url, **kwargs)
        data = convert.json2value(response.content.decode("utf8"))
        if isinstance(data, basestring) and data.startswith("unknown revision"):
            Log.error("Unknown push {{revision}}", revision=strings.between(data, "'", "'"))
        branch.url = _trim(url)  #RECORD THIS SUCCESS IN THE BRANCH
        return data
コード例 #4
0
def pull_repo(repo):
    if not File(os.path.join(repo.directory, ".hg")).exists:
        File(repo.directory).delete()

        # REPO DOES NOT EXIST, CLONE IT
        with Timer("Clone hg log for {{name}}", {"name":repo.name}):
            proc = subprocess.Popen(
                ["hg", "clone", repo.url, File(repo.directory).filename],
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE,
                stderr=subprocess.STDOUT,
                bufsize=-1
            )
            try:
                while True:
                    line = proc.stdout.readline()
                    if line.startswith("abort:"):
                        Log.error("Can not clone {{repos.url}}, because {{problem}}", {
                            "repos": repo,
                            "problem": line
                        })
                    if line == '':
                        break
                    Log.note("Mercurial cloning: {{status}}", {"status": line})
            finally:
                proc.wait()


    else:
        hgrc_file = File(os.path.join(repo.directory, ".hg", "hgrc"))
        if not hgrc_file.exists:
            hgrc_file.write("[paths]\ndefault = " + repo.url + "\n")

        # REPO EXISTS, PULL TO UPDATE
        with Timer("Pull hg log for {{name}}", {"name":repo.name}):
            proc = subprocess.Popen(
                ["hg", "pull", "--cwd", File(repo.directory).filename],
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE,
                stderr=subprocess.STDOUT,
                bufsize=-1
            )
            (output, _) = proc.communicate()

            if output.find("abort: repository default not found!") >= 0:
                File(repo.directory).delete()
                pull_repo(repo)
                return
            if output.find("abort: abandoned transaction found") >= 0:
                Log.error("Problem pulling repos, try \"hg recover\"\n{{reason|indent}}", {"reason": output})
                File(repo.directory).delete()
                pull_repo(repo)
                return
            if output.find("abort: ") >= 0:
                Log.error("Problem with pull {{reason}}", {"reason": between(output, "abort:", "\n")})

            Log.note("Mercurial pull results:\n{{pull_results}}", {"pull_results": output})
コード例 #5
0
def fix(rownum, line, source, sample_only_filter, sample_size):
    # ES SCHEMA IS STRICTLY TYPED, USE "code" FOR TEXT IDS
    line = line.replace('{"id": "bb"}', '{"code": "bb"}').replace('{"id": "tc"}', '{"code": "tc"}')

    # ES SCHEMA IS STRICTLY TYPED, THE SUITE OBJECT CAN NOT BE HANDLED
    if source.name.startswith("active-data-test-result"):
        # "suite": {"flavor": "plain-chunked", "name": "mochitest"}
        found = strings.between(line, '"suite": {', '}')
        if found:
            suite_json = '{' + found + "}"
            if suite_json:
                suite = convert.json2value(suite_json)
                suite = convert.value2json(suite.name)
                line = line.replace(suite_json, suite)

    if rownum == 0:
        value = convert.json2value(line)
        if len(line) > 100000:
            value.result.subtests = [s for s in value.result.subtests if s.ok is False]
            value.result.missing_subtests = True

        _id, value = _fix(value)
        row = {"id": _id, "value": value}
        if sample_only_filter and Random.int(int(1.0/coalesce(sample_size, 0.01))) != 0 and jx.filter([value], sample_only_filter):
            # INDEX etl.id==0, BUT NO MORE
            if value.etl.id != 0:
                Log.error("Expecting etl.id==0")
            return row, True
    elif len(line) > 100000:
        value = convert.json2value(line)
        value.result.subtests = [s for s in value.result.subtests if s.ok is False]
        value.result.missing_subtests = True
        _id, value = _fix(value)
        row = {"id": _id, "value": value}
    elif line.find("\"resource_usage\":") != -1:
        value = convert.json2value(line)
        _id, value = _fix(value)
        row = {"id": _id, "value": value}
    else:
        # FAST
        _id = strings.between(line, "\"_id\": \"", "\"")  # AVOID DECODING JSON
        row = {"id": _id, "json": line}

    return row, False
コード例 #6
0
ファイル: query.py プロジェクト: davehunt/ActiveData
def replace_vars(text, params=None):
    """
    REPLACE {{vars}} WITH ENVIRONMENTAL VALUES
    """
    start = 0
    var = strings.between(text, "{{", "}}", start)
    while var:
        replace = "{{" + var + "}}"
        index = text.find(replace, 0)
        if index == -1:
            Log.error("could not find {{var}} (including quotes)", var=replace)
        end = index + len(replace)

        try:
            replacement = unicode(Date(var).unix)
            text = text[:index] + replacement + text[end:]
            start = index + len(replacement)
        except Exception, _:
            start += 1

        var = strings.between(text, "{{", "}}", start)
コード例 #7
0
 def query(self, query):
     try:
         with self.esq:
             self.esq.query(query)
             return None
     except Exception, e:
         f = Except(ERROR, unicode(e), trace=extract_tb(1))
         try:
             details = str(f)
             query = convert.json2value(strings.between(details, ">>>>", "<<<<"))
             return query
         except Exception, g:
             Log.error("problem", f)
コード例 #8
0
    def _load_all_in_push(self, revision, locale=None):
        # http://hg.mozilla.org/mozilla-central/json-pushes?full=1&changeset=57c461500a0c

        if isinstance(revision.branch, basestring):
            lower_name = revision.branch.lower()
        else:
            lower_name = revision.branch.name.lower()

        revision.branch = self.branches[lower_name, locale]
        if not revision.branch:
            Log.error("can not find branch ({{branch}}, {{locale}})",
                      name=lower_name,
                      locale=locale)

        Log.note(
            "Reading pushlog for revision ({{branch}}, {{locale}}, {{changeset}})",
            branch=revision.branch.name,
            locale=locale,
            changeset=revision.changeset.id)

        url = revision.branch.url.rstrip(
            "/") + "/json-pushes?full=1&changeset=" + revision.changeset.id
        try:
            response = self._get_and_retry(url)
            data = convert.json2value(response.all_content.decode("utf8"))
            if isinstance(data,
                          basestring) and data.startswith("unknown revision"):
                Log.error("Unknown push {{revision}}",
                          revision=strings.between(data, "'", "'"))
            for index, _push in data.items():
                push = Push(id=int(index), date=_push.date, user=_push.user)
                self.current_push = push
                revs = []
                for c in _push.changesets:
                    changeset = Changeset(id=c.node, **c)
                    rev = self.get_revision(
                        Revision(branch=revision.branch, changeset=changeset),
                        locale)
                    rev.push = push
                    _id = coalesce(rev.changeset.id12,
                                   "") + "-" + rev.branch.name
                    revs.append({"id": _id, "value": rev})
                self.es.extend(revs)
        except Exception, e:
            Log.error("Problem pulling pushlog from {{url}}", url=url, cause=e)
コード例 #9
0
    def copy(self, keys, source, sample_only_filter=None, sample_size=None):
        num_keys = 0
        for key in keys:
            try:
                for rownum, line in enumerate(
                        source.read_lines(strip_extension(key))):
                    if rownum == 0:
                        value = convert.json2value(line)
                        if len(line) > 1000000:
                            # Log.warning("Line {{num}} for key {{key}} is too long ({{length|comma}} bytes, {{num_tests}} subtests)", key=key, length=len(line), num=rownum, num_tests=len(value.result.subtests))
                            value.result.subtests = None
                            value.result.missing_subtests = True

                        _id, value = _fix(value)
                        row = {"id": _id, "value": value}
                        if sample_only_filter and Random.int(
                                int(1.0 / coalesce(
                                    sample_size, 0.01))) != 0 and qb.filter(
                                        [value], sample_only_filter):
                            # INDEX etl.id==0, BUT NO MORE
                            if value.etl.id != 0:
                                Log.error("Expecting etl.id==0")
                            num_keys += 1
                            self.queue.add(row)
                            break
                    elif len(line) > 1000000:
                        value = convert.json2value(line)
                        # Log.warning("Line {{num}} for key {{key}} is too long ({{length|comma}} bytes, {{num_tests}} subtests).", key=key, length=len(line), num=rownum, num_tests=len(value.result.subtests))
                        value.result.subtests = None
                        value.result.missing_subtests = True
                        _id, value = _fix(value)
                        row = {"id": _id, "value": value}
                    else:
                        #FAST
                        _id = strings.between(line, "_id\": \"",
                                              "\"")  # AVOID DECODING JSON
                        row = {"id": _id, "json": line}
                    num_keys += 1
                    self.queue.add(row)
            except Exception, e:
                Log.warning("Could not get queue for {{key}}",
                            key=key,
                            cause=e)
コード例 #10
0
    def _load_all_in_push(self, revision, locale=None):
        # http://hg.mozilla.org/mozilla-central/json-pushes?full=1&changeset=57c461500a0c


        if isinstance(revision.branch, basestring):
            lower_name = revision.branch.lower()
        else:
            lower_name = revision.branch.name.lower()

        revision.branch = self.branches[lower_name, locale]
        if not revision.branch:
            Log.error("can not find branch ({{branch}}, {{locale}})", name=lower_name, locale=locale)

        Log.note(
            "Reading pushlog for revision ({{branch}}, {{locale}}, {{changeset}})",
            branch=revision.branch.name,
            locale=locale,
            changeset=revision.changeset.id
        )

        url = revision.branch.url.rstrip("/") + "/json-pushes?full=1&changeset=" + revision.changeset.id
        try:
            response = self._get_and_retry(url)
            data = convert.json2value(response.all_content.decode("utf8"))
            if isinstance(data, basestring) and data.startswith("unknown revision"):
                Log.error("Unknown push {{revision}}", revision=strings.between(data, "'", "'"))
            for index, _push in data.items():
                push = Push(id=int(index), date=_push.date, user=_push.user)
                self.current_push = push
                revs = []
                for c in _push.changesets:
                    changeset = Changeset(id=c.node, **c)
                    rev = self.get_revision(Revision(branch=revision.branch, changeset=changeset), locale)
                    rev.push = push
                    _id = coalesce(rev.changeset.id12, "") + "-" + rev.branch.name
                    revs.append({"id": _id, "value": rev})
                self.es.extend(revs)
        except Exception, e:
            Log.error("Problem pulling pushlog from {{url}}", url=url, cause=e)
コード例 #11
0
    def copy(self, keys, source, sample_only_filter=None, sample_size=None):
        num_keys = 0
        for key in keys:
            try:
                for rownum, line in enumerate(source.read_lines(strip_extension(key))):
                    if rownum == 0:
                        value = convert.json2value(line)
                        if len(line) > 1000000:
                            # Log.warning("Line {{num}} for key {{key}} is too long ({{length|comma}} bytes, {{num_tests}} subtests)", key=key, length=len(line), num=rownum, num_tests=len(value.result.subtests))
                            value.result.subtests = None
                            value.result.missing_subtests = True

                        _id, value = _fix(value)
                        row = {"id": _id, "value": value}
                        if sample_only_filter and Random.int(int(1.0/coalesce(sample_size, 0.01))) != 0 and qb.filter([value], sample_only_filter):
                            # INDEX etl.id==0, BUT NO MORE
                            if value.etl.id != 0:
                                Log.error("Expecting etl.id==0")
                            num_keys += 1
                            self.queue.add(row)
                            break
                    elif len(line) > 1000000:
                        value = convert.json2value(line)
                        # Log.warning("Line {{num}} for key {{key}} is too long ({{length|comma}} bytes, {{num_tests}} subtests).", key=key, length=len(line), num=rownum, num_tests=len(value.result.subtests))
                        value.result.subtests = None
                        value.result.missing_subtests = True
                        _id, value = _fix(value)
                        row = {"id": _id, "value": value}
                    else:
                        #FAST
                        _id = strings.between(line, "_id\": \"", "\"")  # AVOID DECODING JSON
                        row = {"id": _id, "json": line}
                    num_keys += 1
                    self.queue.add(row)
            except Exception, e:
                Log.warning("Could not get queue for {{key}}", key=key, cause=e)
コード例 #12
0
ファイル: convert.py プロジェクト: davehunt/ActiveData
            value = wrap_leaves(value)

        return value

    except Exception, e:
        e = Except.wrap(e)

        if not json_string.strip():
            Log.error("JSON string is only whitespace")

        c = e
        while "Expecting '" in c.cause and "' delimiter: line" in c.cause:
            c = c.cause

        if "Expecting '" in c and "' delimiter: line" in c:
            line_index = int(strings.between(c.message, " line ", " column ")) - 1
            column = int(strings.between(c.message, " column ", " ")) - 1
            line = json_string.split("\n")[line_index].replace("\t", " ")
            if column > 20:
                sample = "..." + line[column - 20:]
                pointer = "   " + (" " * 20) + "^"
            else:
                sample = line
                pointer = (" " * column) + "^"

            if len(sample) > 43:
                sample = sample[:43] + "..."

            Log.error("Can not decode JSON at:\n\t" + sample + "\n\t" + pointer + "\n")

        base_str = unicode2utf8(strings.limit(json_string, 1000))
コード例 #13
0
def fix_locale(locale):
    # compensate for bug https://bugzilla.mozilla.org/show_bug.cgi?id=1174979
    if locale.find("\"") == -1:
        return locale
    return strings.between(locale, "\"", "\"")
コード例 #14
0
ファイル: mercurial.py プロジェクト: mozilla/ActiveData-ETL
def pull_repo(repo):
    if not File(os.path.join(repo.directory, ".hg")).exists:
        File(repo.directory).delete()

        # REPO DOES NOT EXIST, CLONE IT
        with Timer("Clone hg log for {{name}}", {"name": repo.name}):
            proc = subprocess.Popen(
                ["hg", "clone", repo.url,
                 File(repo.directory).filename],
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE,
                stderr=subprocess.STDOUT,
                bufsize=-1)
            try:
                while True:
                    line = proc.stdout.readline()
                    if line.startswith("abort:"):
                        Log.error(
                            "Can not clone {{repos.url}}, because {{problem}}",
                            {
                                "repos": repo,
                                "problem": line
                            })
                    if line == '':
                        break
                    Log.note("Mercurial cloning: {{status}}", {"status": line})
            finally:
                proc.wait()

    else:
        hgrc_file = File(os.path.join(repo.directory, ".hg", "hgrc"))
        if not hgrc_file.exists:
            hgrc_file.write("[paths]\ndefault = " + repo.url + "\n")

        # REPO EXISTS, PULL TO UPDATE
        with Timer("Pull hg log for {{name}}", {"name": repo.name}):
            proc = subprocess.Popen(
                ["hg", "pull", "--cwd",
                 File(repo.directory).filename],
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE,
                stderr=subprocess.STDOUT,
                bufsize=-1)
            (output, _) = proc.communicate()

            if output.find("abort: repository default not found!") >= 0:
                File(repo.directory).delete()
                pull_repo(repo)
                return
            if output.find("abort: abandoned transaction found") >= 0:
                Log.error(
                    "Problem pulling repos, try \"hg recover\"\n{{reason|indent}}",
                    {"reason": output})
                File(repo.directory).delete()
                pull_repo(repo)
                return
            if output.find("abort: ") >= 0:
                Log.error("Problem with pull {{reason}}",
                          {"reason": between(output, "abort:", "\n")})

            Log.note("Mercurial pull results:\n{{pull_results}}",
                     {"pull_results": output})