コード例 #1
0
def _scrub(r):
    try:
        if r == None:
            return None
        elif isinstance(r, basestring):
            if r == "":
                return None
            return r
        elif Math.is_number(r):
            return convert.value2number(r)
        elif isinstance(r, Mapping):
            if isinstance(r, Dict):
                r = object.__getattribute__(r, "_dict")
            output = {}
            for k, v in r.items():
                v = _scrub(v)
                if v != None:
                    output[k.lower()] = v
            if len(output) == 0:
                return None
            return output
        elif hasattr(r, '__iter__'):
            if isinstance(r, DictList):
                r = r.list
            output = []
            for v in r:
                v = _scrub(v)
                if v != None:
                    output.append(v)
            if not output:
                return None
            if len(output) == 1:
                return output[0]
            try:
                return sort(output)
            except Exception:
                return output
        else:
            return r
    except Exception, e:
        Log.warning("Can not scrub: {{json}}",  json= r)
コード例 #2
0
def _scrub(r):
    try:
        if r == None:
            return None
        elif isinstance(r, basestring):
            if r == "":
                return None
            return r
        elif Math.is_number(r):
            return convert.value2number(r)
        elif isinstance(r, Mapping):
            if isinstance(r, Dict):
                r = object.__getattribute__(r, "_dict")
            output = {}
            for k, v in r.items():
                v = _scrub(v)
                if v != None:
                    output[k.lower()] = v
            if len(output) == 0:
                return None
            return output
        elif hasattr(r, '__iter__'):
            if isinstance(r, DictList):
                r = r.list
            output = []
            for v in r:
                v = _scrub(v)
                if v != None:
                    output.append(v)
            if not output:
                return None
            if len(output) == 1:
                return output[0]
            try:
                return sort(output)
            except Exception:
                return output
        else:
            return r
    except Exception, e:
        Log.warning("Can not scrub: {{json}}",  json= r)
コード例 #3
0
    def iterator():
        try:
            while True:
                try:
                    line = proc.stdout.readline()
                    if line == '':
                        proc.wait()
                        if proc.returncode:
                            Log.error("Unable to pull hg log: return code {{return_code}}", {
                                "return_code": proc.returncode
                            })
                        return
                except Exception, e:
                    Log.error("Problem getting another line", e)

                if line.strip() == "":
                    continue
                Log.note(line)


                # changeset = "{date|hgdate|urlescape}\t{node}\t{rev}\t{author|urlescape}\t{branches}\t\t\t\t{p1rev}\t{p1node}\t{parents}\t{children}\t{tags}\t{desc|urlescape}\n"
                # branch = "{branch}%0A"
                # parent = "{parent}%0A"
                # tag = "{tag}%0A"
                # child = "{child}%0A"
                (
                    date,
                    node,
                    rev,
                    author,
                    branches,
                    files,
                    file_adds,
                    file_dels,
                    p1rev,
                    p1node,
                    parents,
                    children,
                    tags,
                    desc
                ) = (urllib.unquote(c) for c in line.split("\t"))

                file_adds = set(file_adds.split("\n")) - {""}
                file_dels = set(file_dels.split("\n")) - {""}
                files = set(files.split("\n")) - set()
                doc = {
                    "repos": repo.name,
                    "date": convert.unix2datetime(convert.value2number(date.split(" ")[0])),
                    "node": node,
                    "revision": rev,
                    "author": author,
                    "branches": set(branches.split("\n")) - {""},
                    "file_changes": files - file_adds - file_dels - {""},
                    "file_adds": file_adds,
                    "file_dels": file_dels,
                    "parents": set(parents.split("\n")) - {""} | {p1rev+":"+p1node},
                    "children": set(children.split("\n")) - {""},
                    "tags": set(tags.split("\n")) - {""},
                    "description": desc
                }
                doc = elasticsearch.scrub(doc)
                yield doc
        except Exception, e:
            if isinstance(e, ValueError) and e.message.startswith("need more than "):
                Log.error("Problem iterating through log ({{message}})", {
                    "message": line
                }, e)


            Log.error("Problem iterating through log", e)
コード例 #4
0
ファイル: mercurial.py プロジェクト: mozilla/ActiveData-ETL
    def iterator():
        try:
            while True:
                try:
                    line = proc.stdout.readline()
                    if line == '':
                        proc.wait()
                        if proc.returncode:
                            Log.error(
                                "Unable to pull hg log: return code {{return_code}}",
                                {"return_code": proc.returncode})
                        return
                except Exception, e:
                    Log.error("Problem getting another line", e)

                if line.strip() == "":
                    continue
                Log.note(line)

                # changeset = "{date|hgdate|urlescape}\t{node}\t{rev}\t{author|urlescape}\t{branches}\t\t\t\t{p1rev}\t{p1node}\t{parents}\t{children}\t{tags}\t{desc|urlescape}\n"
                # branch = "{branch}%0A"
                # parent = "{parent}%0A"
                # tag = "{tag}%0A"
                # child = "{child}%0A"
                (date, node, rev, author, branches, files, file_adds,
                 file_dels, p1rev, p1node, parents, children, tags,
                 desc) = (urllib.unquote(c) for c in line.split("\t"))

                file_adds = set(file_adds.split("\n")) - {""}
                file_dels = set(file_dels.split("\n")) - {""}
                files = set(files.split("\n")) - set()
                doc = {
                    "repos":
                    repo.name,
                    "date":
                    convert.unix2datetime(
                        convert.value2number(date.split(" ")[0])),
                    "node":
                    node,
                    "revision":
                    rev,
                    "author":
                    author,
                    "branches":
                    set(branches.split("\n")) - {""},
                    "file_changes":
                    files - file_adds - file_dels - {""},
                    "file_adds":
                    file_adds,
                    "file_dels":
                    file_dels,
                    "parents":
                    set(parents.split("\n")) - {""} | {p1rev + ":" + p1node},
                    "children":
                    set(children.split("\n")) - {""},
                    "tags":
                    set(tags.split("\n")) - {""},
                    "description":
                    desc
                }
                doc = elasticsearch.scrub(doc)
                yield doc
        except Exception, e:
            if isinstance(
                    e, ValueError) and e.message.startswith("need more than "):
                Log.error("Problem iterating through log ({{message}})",
                          {"message": line}, e)

            Log.error("Problem iterating through log", e)