コード例 #1
0
def unix(value):
    if not _convert:
        _late_import()

    if isinstance(value, (date, builtin_datetime)):
        pass
    elif value < 10000000000:
        value = _convert.unix2datetime(value)
    else:
        value = _convert.milli2datetime(value)

    return str(_convert.datetime2unix(value))
コード例 #2
0
ファイル: strings.py プロジェクト: davehunt/ActiveData
def unix(value):
    if not _convert:
        _late_import()

    if isinstance(value, (date, builtin_datetime)):
        pass
    elif value < 10000000000:
        value = _convert.unix2datetime(value)
    else:
        value = _convert.milli2datetime(value)

    return str(_convert.datetime2unix(value))
コード例 #3
0
def _scrub(value, is_done):
    type = value.__class__

    if type in (NoneType, NullType):
        return None
    elif type in (date, datetime):
        return float(datetime2unix(value))
    elif type is timedelta:
        return value.total_seconds()
    elif type is Date:
        return float(value.unix)
    elif type is Duration:
        return value.seconds
    elif type is str:
        return utf82unicode(value)
    elif type is Decimal:
        return float(value)
    elif isinstance(value, Mapping):
        _id = id(value)
        if _id in is_done:
            Log.error("possible loop in structure detected")
        is_done.add(_id)

        output = {}
        for k, v in value.iteritems():
            if not isinstance(k, basestring):
                Log.error("keys must be strings")
            v = _scrub(v, is_done)
            if v != None:
                output[k] = v

        is_done.discard(_id)
        return output
    elif type in (list, DictList):
        output = []
        for v in value:
            v = _scrub(v, is_done)
            output.append(v)
        return output
    elif type.__name__ == "bool_":  # DEAR ME!  Numpy has it's own booleans (value==False could be used, but 0==False in Python.  DOH!)
        if value == False:
            return False
        else:
            return True
    elif hasattr(value, '__json__'):
        try:
            output = json._default_decoder.decode(value.__json__())
            return output
        except Exception, e:
            Log.error("problem with calling __json__()", e)
コード例 #4
0
def get_changesets(date_range=None, revision_range=None, repo=None):
    # GET ALL CHANGESET INFO
    args = [
        "hg",
        "log",
        "--cwd",
        File(repo.directory).filename,
        "-v",
        # "-p",   # TO GET PATCH CONTENTS
        "--style",
        TEMPLATE_FILE.filename
    ]

    if date_range is not None:
        if date_range.max == None:
            if date_range.min == None:
                drange = ">0 0"
            else:
                drange = ">" + unicode(convert.datetime2unix(date_range.min)) + " 0"
        else:
            if date_range.min == None:
                drange = "<" + unicode(convert.datetime2unix(date_range.max) - 1) + " 0"
            else:
                drange = unicode(convert.datetime2unix(date_range.min)) + " 0 to " + unicode(convert.datetime2unix(date_range.max) - 1) + " 0"

        args.extend(["--date", drange])


    if revision_range is not None:
        args.extend(["-r", str(revision_range.min) + ":" + str(revision_range.max)])

    proc = subprocess.Popen(
        args,
        stdin=subprocess.PIPE,
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
        bufsize=-1
    )

    def iterator():
        try:
            while True:
                try:
                    line = proc.stdout.readline()
                    if line == '':
                        proc.wait()
                        if proc.returncode:
                            Log.error("Unable to pull hg log: return code {{return_code}}", {
                                "return_code": proc.returncode
                            })
                        return
                except Exception, e:
                    Log.error("Problem getting another line", e)

                if line.strip() == "":
                    continue
                Log.note(line)


                # changeset = "{date|hgdate|urlescape}\t{node}\t{rev}\t{author|urlescape}\t{branches}\t\t\t\t{p1rev}\t{p1node}\t{parents}\t{children}\t{tags}\t{desc|urlescape}\n"
                # branch = "{branch}%0A"
                # parent = "{parent}%0A"
                # tag = "{tag}%0A"
                # child = "{child}%0A"
                (
                    date,
                    node,
                    rev,
                    author,
                    branches,
                    files,
                    file_adds,
                    file_dels,
                    p1rev,
                    p1node,
                    parents,
                    children,
                    tags,
                    desc
                ) = (urllib.unquote(c) for c in line.split("\t"))

                file_adds = set(file_adds.split("\n")) - {""}
                file_dels = set(file_dels.split("\n")) - {""}
                files = set(files.split("\n")) - set()
                doc = {
                    "repos": repo.name,
                    "date": convert.unix2datetime(convert.value2number(date.split(" ")[0])),
                    "node": node,
                    "revision": rev,
                    "author": author,
                    "branches": set(branches.split("\n")) - {""},
                    "file_changes": files - file_adds - file_dels - {""},
                    "file_adds": file_adds,
                    "file_dels": file_dels,
                    "parents": set(parents.split("\n")) - {""} | {p1rev+":"+p1node},
                    "children": set(children.split("\n")) - {""},
                    "tags": set(tags.split("\n")) - {""},
                    "description": desc
                }
                doc = elasticsearch.scrub(doc)
                yield doc
        except Exception, e:
            if isinstance(e, ValueError) and e.message.startswith("need more than "):
                Log.error("Problem iterating through log ({{message}})", {
                    "message": line
                }, e)


            Log.error("Problem iterating through log", e)
コード例 #5
0
ファイル: __init__.py プロジェクト: klahnakoski/TestFailures
def _scrub(value, is_done):
    type_ = value.__class__

    if type_ in (NoneType, NullType):
        return None
    elif type_ is unicode:
        value_ = value.strip()
        if value_:
            return value_
        else:
            return None
    elif type_ is float:
        if math.isnan(value) or math.isinf(value):
            return None
        return value
    elif type_ in (int, long, bool):
        return value
    elif type_ in (date, datetime):
        return float(datetime2unix(value))
    elif type_ is timedelta:
        return value.total_seconds()
    elif type_ is Date:
        return float(value.unix)
    elif type_ is Duration:
        return float(value.seconds)
    elif type_ is str:
        return utf82unicode(value)
    elif type_ is Decimal:
        return float(value)
    elif type_ is Dict:
        return _scrub(unwrap(value), is_done)
    elif isinstance(value, Mapping):
        _id = id(value)
        if _id in is_done:
            _Log.warning("possible loop in structure detected")
            return '"<LOOP IN STRUCTURE>"'
        is_done.add(_id)

        output = {}
        for k, v in value.iteritems():
            if isinstance(k, basestring):
                pass
            elif hasattr(k, "__unicode__"):
                k = unicode(k)
            else:
                _Log.error("keys must be strings")
            v = _scrub(v, is_done)
            if v != None or isinstance(v, Mapping):
                output[k] = v

        is_done.discard(_id)
        return output
    elif type_ in (tuple, list, DictList):
        output = []
        for v in value:
            v = _scrub(v, is_done)
            output.append(v)
        return output
    elif type_ is type:
        return value.__name__
    elif type_.__name__ == "bool_":  # DEAR ME!  Numpy has it's own booleans (value==False could be used, but 0==False in Python.  DOH!)
        if value == False:
            return False
        else:
            return True
    elif hasattr(value, '__json__'):
        try:
            output = json._default_decoder.decode(value.__json__())
            return output
        except Exception, e:
            _Log.error("problem with calling __json__()", e)
コード例 #6
0
ファイル: __init__.py プロジェクト: davehunt/ActiveData
def _scrub(value, is_done):
    type_ = value.__class__

    if type_ in (NoneType, NullType):
        return None
    elif type_ is unicode:
        value_ = value.strip()
        if value_:
            return value_
        else:
            return None
    elif type_ is float:
        if math.isnan(value) or math.isinf(value):
            return None
        return value
    elif type_ in (int, long, bool):
        return value
    elif type_ in (date, datetime):
        return float(datetime2unix(value))
    elif type_ is timedelta:
        return value.total_seconds()
    elif type_ is Date:
        return float(value.unix)
    elif type_ is Duration:
        return float(value.seconds)
    elif type_ is str:
        return utf82unicode(value)
    elif type_ is Decimal:
        return float(value)
    elif type_ is Dict:
        return _scrub(unwrap(value), is_done)
    elif isinstance(value, Mapping):
        _id = id(value)
        if _id in is_done:
            _Log.warning("possible loop in structure detected")
            return '"<LOOP IN STRUCTURE>"'
        is_done.add(_id)

        output = {}
        for k, v in value.iteritems():
            if isinstance(k, basestring):
                pass
            elif hasattr(k, "__unicode__"):
                k = unicode(k)
            else:
                _Log.error("keys must be strings")
            v = _scrub(v, is_done)
            if v != None or isinstance(v, Mapping):
                output[k] = v

        is_done.discard(_id)
        return output
    elif type_ in (tuple, list, DictList):
        output = []
        for v in value:
            v = _scrub(v, is_done)
            output.append(v)
        return output
    elif type_ is type:
        return value.__name__
    elif type_.__name__ == "bool_":  # DEAR ME!  Numpy has it's own booleans (value==False could be used, but 0==False in Python.  DOH!)
        if value == False:
            return False
        else:
            return True
    elif hasattr(value, '__json__'):
        try:
            output = json._default_decoder.decode(value.__json__())
            return output
        except Exception, e:
            _Log.error("problem with calling __json__()", e)
コード例 #7
0
ファイル: mercurial.py プロジェクト: mozilla/ActiveData-ETL
def get_changesets(date_range=None, revision_range=None, repo=None):
    # GET ALL CHANGESET INFO
    args = [
        "hg",
        "log",
        "--cwd",
        File(repo.directory).filename,
        "-v",
        # "-p",   # TO GET PATCH CONTENTS
        "--style",
        TEMPLATE_FILE.filename
    ]

    if date_range is not None:
        if date_range.max == None:
            if date_range.min == None:
                drange = ">0 0"
            else:
                drange = ">" + unicode(convert.datetime2unix(
                    date_range.min)) + " 0"
        else:
            if date_range.min == None:
                drange = "<" + unicode(
                    convert.datetime2unix(date_range.max) - 1) + " 0"
            else:
                drange = unicode(convert.datetime2unix(
                    date_range.min)) + " 0 to " + unicode(
                        convert.datetime2unix(date_range.max) - 1) + " 0"

        args.extend(["--date", drange])

    if revision_range is not None:
        args.extend(
            ["-r",
             str(revision_range.min) + ":" + str(revision_range.max)])

    proc = subprocess.Popen(args,
                            stdin=subprocess.PIPE,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.STDOUT,
                            bufsize=-1)

    def iterator():
        try:
            while True:
                try:
                    line = proc.stdout.readline()
                    if line == '':
                        proc.wait()
                        if proc.returncode:
                            Log.error(
                                "Unable to pull hg log: return code {{return_code}}",
                                {"return_code": proc.returncode})
                        return
                except Exception, e:
                    Log.error("Problem getting another line", e)

                if line.strip() == "":
                    continue
                Log.note(line)

                # changeset = "{date|hgdate|urlescape}\t{node}\t{rev}\t{author|urlescape}\t{branches}\t\t\t\t{p1rev}\t{p1node}\t{parents}\t{children}\t{tags}\t{desc|urlescape}\n"
                # branch = "{branch}%0A"
                # parent = "{parent}%0A"
                # tag = "{tag}%0A"
                # child = "{child}%0A"
                (date, node, rev, author, branches, files, file_adds,
                 file_dels, p1rev, p1node, parents, children, tags,
                 desc) = (urllib.unquote(c) for c in line.split("\t"))

                file_adds = set(file_adds.split("\n")) - {""}
                file_dels = set(file_dels.split("\n")) - {""}
                files = set(files.split("\n")) - set()
                doc = {
                    "repos":
                    repo.name,
                    "date":
                    convert.unix2datetime(
                        convert.value2number(date.split(" ")[0])),
                    "node":
                    node,
                    "revision":
                    rev,
                    "author":
                    author,
                    "branches":
                    set(branches.split("\n")) - {""},
                    "file_changes":
                    files - file_adds - file_dels - {""},
                    "file_adds":
                    file_adds,
                    "file_dels":
                    file_dels,
                    "parents":
                    set(parents.split("\n")) - {""} | {p1rev + ":" + p1node},
                    "children":
                    set(children.split("\n")) - {""},
                    "tags":
                    set(tags.split("\n")) - {""},
                    "description":
                    desc
                }
                doc = elasticsearch.scrub(doc)
                yield doc
        except Exception, e:
            if isinstance(
                    e, ValueError) and e.message.startswith("need more than "):
                Log.error("Problem iterating through log ({{message}})",
                          {"message": line}, e)

            Log.error("Problem iterating through log", e)