Python Logの例、pyLibrary.debugs.logs.Log Pythonの例

コード例 #1

0

ファイルを表示

ファイル: ref.py プロジェクト: klahnakoski/MoDataSubmission

def get_file(ref, url):
    from pyLibrary.env.files import File

    if ref.path.startswith("~"):
        home_path = os.path.expanduser("~")
        if os.sep == "\\":
            home_path = "/" + home_path.replace(os.sep, "/")
        if home_path.endswith("/"):
            home_path = home_path[:-1]

        ref.path = home_path + ref.path[1::]
    elif not ref.path.startswith("/"):
        # CONVERT RELATIVE TO ABSOLUTE
        if ref.path[0] == ".":
            num_dot = 1
            while ref.path[num_dot] == ".":
                num_dot += 1

            parent = url.path.rstrip("/").split("/")[:-num_dot]
            ref.path = "/".join(parent) + ref.path[num_dot:]
        else:
            parent = url.path.rstrip("/").split("/")[:-1]
            ref.path = "/".join(parent) + "/" + ref.path


    path = ref.path if os.sep != "\\" else ref.path[1::].replace("/", "\\")

    try:
        if DEBUG:
            _Log.note("reading file {{path}}", path=path)
        content = File(path).read()
    except Exception, e:
        content = None
        _Log.error("Could not read file {{filename}}", filename=path, cause=e)

コード例 #2

0

ファイルを表示

ファイル: big_data.py プロジェクト: klahnakoski/Activedata-ETL

def safe_size(source):
    """
    READ THE source UP TO SOME LIMIT, THEN COPY TO A FILE IF TOO BIG
    RETURN A str() OR A FileString()
    """

    if source is None:
        return None

    total_bytes = 0
    bytes = []
    b = source.read(MIN_READ_SIZE)
    while b:
        total_bytes += len(b)
        bytes.append(b)
        if total_bytes > MAX_STRING_SIZE:
            try:
                data = FileString(TemporaryFile())
                for bb in bytes:
                    data.write(bb)
                del bytes
                del bb
                b = source.read(MIN_READ_SIZE)
                while b:
                    total_bytes += len(b)
                    data.write(b)
                    b = source.read(MIN_READ_SIZE)
                data.seek(0)
                Log.note("Using file of size {{length}} instead of str()",  length= total_bytes)

                return data
            except Exception, e:
                Log.error("Could not write file > {{num}} bytes",  num= total_bytes, cause=e)
        b = source.read(MIN_READ_SIZE)

コード例 #3

0

ファイルを表示

ファイル: group_by.py プロジェクト: klahnakoski/esReplicate

def groupby_Multiset(data, min_size, max_size):
    # GROUP multiset BASED ON POPULATION OF EACH KEY, TRYING TO STAY IN min/max LIMITS
    if min_size == None:
        min_size = 0

    total = 0
    i = 0
    g = list()
    for k, c in data.items():
        if total < min_size or total + c < max_size:
            total += c
            g.append(k)
        elif total < max_size:
            yield (i, g)
            i += 1
            total = c
            g = [k]

        if total >= max_size:
            Log.error("({{min}}, {{max}}) range is too strict given step of {{increment}}",
                min=min_size,
                max=max_size,
                increment=c
            )

    if g:
        yield (i, g)

コード例 #4

0

ファイルを表示

ファイル: normal.py プロジェクト: klahnakoski/MoDevETL

    def _convert_edge(self, edge):
        if isinstance(edge, basestring):
            return Dict(
                name=edge,
                value=edge,
                domain=self._convert_domain()
            )
        else:
            edge = wrap(edge)
            if not edge.name and not isinstance(edge.value, basestring):
                Log.error("You must name compound edges: {{edge}}",  edge= edge)

            if isinstance(edge.value, (Mapping, list)) and not edge.domain:
                # COMPLEX EDGE IS SHORT HAND
                domain =self._convert_domain()
                domain.dimension = Dict(fields=edge.value)

                return Dict(
                    name=edge.name,
                    allowNulls=False if edge.allowNulls is False else True,
                    domain=domain
                )

            domain = self._convert_domain(edge.domain)
            return Dict(
                name=coalesce(edge.name, edge.value),
                value=edge.value,
                range=edge.range,
                allowNulls=False if edge.allowNulls is False else True,
                domain=domain
            )

コード例 #5

0

ファイルを表示

ファイル: multithread.py プロジェクト: klahnakoski/MoDataSubmission

    def execute(self, requests):
        """
        RETURN A GENERATOR THAT HAS len(requests) RESULTS (ANY ORDER)
        EXPECTING requests TO BE A list OF dicts, EACH dict IS USED AS kwargs TO GIVEN functions
        """
        if not isinstance(requests, (list, tuple, GeneratorType, Iterable)):
            Log.error("Expecting requests to be a list or generator", stack_depth=1)
        else:
            requests = list(requests)

        # FILL QUEUE WITH WORK
        self.inbound.extend(requests)

        num = len(requests)

        def output():
            for i in xrange(num):
                result = self.outbound.pop()
                if "exception" in result:
                    raise result["exception"]
                else:
                    yield result["response"]

        if self.outbound is not None:
            return output()
        else:
            return

コード例 #6

0

ファイルを表示

ファイル: group_by.py プロジェクト: klahnakoski/esReplicate

def groupby(data, keys=None, size=None, min_size=None, max_size=None, contiguous=False):
    """
        return list of (keys, values) pairs where
            group by the set of keys
            values IS LIST OF ALL data that has those keys
        contiguous - MAINTAIN THE ORDER OF THE DATA, STARTING THE NEW GROUP WHEN THE SELECTOR CHANGES
    """

    if size != None or min_size != None or max_size != None:
        if size != None:
            max_size = size
        return groupby_min_max_size(data, min_size=min_size, max_size=max_size)

    if isinstance(data, Container):
        return data.groupby(keys)

    try:
        keys = listwrap(keys)
        get_key = jx_expression_to_function(keys)
        if not contiguous:
            data = sorted(data, key=get_key)

        def _output():
            for g, v in itertools.groupby(data, get_key):
                group = Dict()
                for k, gg in zip(keys, g):
                    group[k] = gg
                yield (group, wrap(v))

        return _output()

    except Exception, e:
        Log.error("Problem grouping", e)

コード例 #7

0

ファイルを表示

ファイル: group_by.py プロジェクト: klahnakoski/esReplicate

def groupby_size(data, size):
    if hasattr(data, "next"):
        iterator = data
    elif hasattr(data, "__iter__"):
        iterator = data.__iter__()
    else:
        Log.error("do not know how to handle this type")

    done = DictList()
    def more():
        output = DictList()
        for i in range(size):
            try:
                output.append(iterator.next())
            except StopIteration:
                done.append(True)
                break
        return output

    # THIS IS LAZY
    i = 0
    while True:
        output = more()
        yield (i, output)
        if len(done) > 0:
            break
        i += 1

コード例 #8

0

ファイルを表示

 def get_index(self, row):
     if self.computed_domain:
         try:
             part = row[self.start]
             return self.domain.getIndexByKey(part["key"])
         except Exception, e:
             Log.error("problem", cause=e)

コード例 #9

0

ファイルを表示

ファイル: test_result_to_sink.py プロジェクト: klahnakoski/Activedata-ETL

def process_test_result(source_key, source, destination, please_stop=None):
    path = key2path(source_key)
    destination.delete({"and": [
        {"term": {"etl.source.id": path[1]}},
        {"term": {"etl.source.source.id": path[0]}}
    ]})

    lines = source.read_lines()

    keys = []
    data = []
    for l in lines:
        record = convert.json2value(l)
        if record._id==None:
            continue
        record.result.crash_result = None  #TODO: Remove me after May 2015
        keys.append(record._id)
        data.append({
            "id": record._id,
            "value": record
        })
        record._id = None
    if data:
        try:
            destination.extend(data)
        except Exception, e:
            if "Can not decide on index by build.date" in e:
                if source.bucket.name == "ekyle-test-result":
                    # KNOWN CORRUPTION
                    # TODO: REMOVE LATER (today = Mar2015)
                    delete_list = source.bucket.keys(prefix=key_prefix(source_key))
                    for d in delete_list:
                        source.bucket.delete_key(d)
            Log.error("Can not add to sink", e)

コード例 #10

0

ファイルを表示

    def _decode(index, parent_path, path, name2index, expected_vars=NO_VARS):
        c, index = skip_whitespace(index)

        if not path:
            if c != b"[":
                # TREAT VALUE AS SINGLE-VALUE ARRAY
                yield _decode_token(index, c, parent_path, path, name2index, None, expected_vars)
            else:
                c, index = skip_whitespace(index)
                if c == b']':
                    return  # EMPTY ARRAY

                while True:
                    value, index = _decode_token(index, c, parent_path, path, name2index, None, expected_vars)
                    c, index = skip_whitespace(index)
                    if c == b']':
                        yield value, index
                        return
                    elif c == b',':
                        c, index = skip_whitespace(index)
                        yield value, index

        else:
            if c != b'{':
                Log.error("Expecting all objects to at least have {{path}}", path=path[0])

            for j, i in _decode_object(index, parent_path, path, name2index, expected_vars=expected_vars):
                yield j, i

コード例 #11

0

ファイルを表示

    def _decode_token(index, c, full_path, path, name2index, destination, expected_vars):
        if c == b'{':
            if not expected_vars:
                index = jump_to_end(index, c)
                value = None
            elif expected_vars[0] == ".":
                json.mark(index-1)
                index = jump_to_end(index, c)
                value = json_decoder(json.release(index).decode("utf8"))
            else:
                count = 0
                for v, i in _decode_object(index, full_path, path, name2index, destination, expected_vars=expected_vars):
                    index = i
                    value = v
                    count += 1
                if count != 1:
                    Log.error("Expecting object, nothing nested")
        elif c == b'[':
            if not expected_vars:
                index = jump_to_end(index, c)
                value = None
            else:
                json.mark(index - 1)
                index = jump_to_end(index, c)
                value = json_decoder(json.release(index).decode("utf8"))
        else:
            if expected_vars and expected_vars[0] == ".":
                value, index = simple_token(index, c)
            else:
                index = jump_to_end(index, c)
                value = None

        return value, index

コード例 #12

0

ファイルを表示

ファイル: files.py プロジェクト: klahnakoski/MoTreeherder

    def create(self):
        try:
            os.makedirs(self._filename)
        except Exception, e:
            from pyLibrary.debugs.logs import Log

            Log.error("Could not make directory {{dir_name}}",  dir_name= self._filename, cause=e)

コード例 #13

0

ファイルを表示

 def simple_token(index, c):
     if c == b'"':
         json.mark(index - 1)
         while True:
             c = json[index]
             index += 1
             if c == b"\\":
                 index += 1
             elif c == b'"':
                 break
         return json_decoder(json.release(index).decode("utf8")), index
     elif c in b"{[":
         Log.error("Expecting a primitive value")
     elif c == b"t" and json.slice(index, index + 3) == "rue":
         return True, index + 3
     elif c == b"n" and json.slice(index, index + 3) == "ull":
         return None, index + 3
     elif c == b"f" and json.slice(index, index + 4) == "alse":
         return False, index + 4
     else:
         json.mark(index-1)
         while True:
             c = json[index]
             if c in b',]}':
                 break
             index += 1
         return float(json.release(index)), index

コード例 #14

0

ファイルを表示

ファイル: files.py プロジェクト: klahnakoski/MoTreeherder

    def write(self, data):
        if not self.parent.exists:
            self.parent.create()
        with open(self._filename, "wb") as f:
            if isinstance(data, list) and self.key:
                from pyLibrary.debugs.logs import Log

                Log.error("list of data and keys are not supported, encrypt before sending to file")

            if isinstance(data, list):
                pass
            elif isinstance(data, basestring):
                data=[data]
            elif hasattr(data, "__iter__"):
                pass

            for d in data:
                if not isinstance(d, unicode):
                    from pyLibrary.debugs.logs import Log

                    Log.error("Expecting unicode data only")
                if self.key:
                    f.write(crypto.encrypt(d, self.key).encode("utf8"))
                else:
                    f.write(d.encode("utf8"))

コード例 #15

0

ファイルを表示

ファイル: files.py プロジェクト: klahnakoski/MoTreeherder

    def __init__(self, filename, buffering=2 ** 14, suffix=None):
        """
        YOU MAY SET filename TO {"path":p, "key":k} FOR CRYPTO FILES
        """
        if filename == None:
            from pyLibrary.debugs.logs import Log

            Log.error("File must be given a filename")
        elif isinstance(filename, basestring):
            self.key = None
            if filename.startswith("~"):
                home_path = os.path.expanduser("~")
                if os.sep == "\\":
                    home_path = home_path.replace(os.sep, "/")
                if home_path.endswith("/"):
                    home_path = home_path[:-1]
                filename = home_path + filename[1::]
            self._filename = filename.replace(os.sep, "/")  # USE UNIX STANDARD
        else:
            self.key = convert.base642bytearray(filename.key)
            self._filename = "/".join(filename.path.split(os.sep))  # USE UNIX STANDARD

        while self._filename.find(".../") >= 0:
            # LET ... REFER TO GRANDPARENT, .... REFER TO GREAT-GRAND-PARENT, etc...
            self._filename = self._filename.replace(".../", "../../")
        self.buffering = buffering


        if suffix:
            self._filename = File.add_suffix(self._filename, suffix)

コード例 #16

0

ファイルを表示

ファイル: convert.py プロジェクト: klahnakoski/TestFailures

def latin12unicode(value):
    if isinstance(value, unicode):
        Log.error("can not convert unicode from latin1")
    try:
        return unicode(value.decode('iso-8859-1'))
    except Exception, e:
        Log.error("Can not convert {{value|quote}} to unicode", value=value)

コード例 #17

0

ファイルを表示

ファイル: convert.py プロジェクト: klahnakoski/TestFailures

def json2value(json_string, params={}, flexible=False, leaves=False):
    """
    :param json_string: THE JSON
    :param params: STANDARD JSON PARAMS
    :param flexible: REMOVE COMMENTS
    :param leaves: ASSUME JSON KEYS ARE DOT-DELIMITED
    :return: Python value
    """
    if isinstance(json_string, str):
        Log.error("only unicode json accepted")

    try:
        if flexible:
            # REMOVE """COMMENTS""", # COMMENTS, //COMMENTS, AND \n \r
            # DERIVED FROM https://github.com/jeads/datasource/blob/master/datasource/bases/BaseHub.py# L58
            json_string = re.sub(r"\"\"\".*?\"\"\"", r"\n", json_string, flags=re.MULTILINE)
            json_string = "\n".join(remove_line_comment(l) for l in json_string.split("\n"))
            # ALLOW DICTIONARY'S NAME:VALUE LIST TO END WITH COMMA
            json_string = re.sub(r",\s*\}", r"}", json_string)
            # ALLOW LISTS TO END WITH COMMA
            json_string = re.sub(r",\s*\]", r"]", json_string)

        if params:
            # LOOKUP REFERENCES
            json_string = expand_template(json_string, params)

        try:
            value = wrap(json_decoder(unicode(json_string)))
        except Exception, e:
            Log.error("can not decode\n{{content}}", content=json_string, cause=e)

        if leaves:
            value = wrap_leaves(value)

        return value

コード例 #18

0

ファイルを表示

ファイル: convert.py プロジェクト: klahnakoski/TestFailures

def datetime2string(value, format="%Y-%m-%d %H:%M:%S"):
    try:
        return value.strftime(format)
    except Exception, e:
        from pyLibrary.debugs.logs import Log

        Log.error("Can not format {{value}} with {{format}}", value=value, format=format, cause=e)

コード例 #19

0

ファイルを表示

ファイル: convert.py プロジェクト: klahnakoski/TestFailures

def string2url(value):
    if isinstance(value, unicode):
        return "".join([_map2url[c] for c in unicode2latin1(value)])
    elif isinstance(value, str):
        return "".join([_map2url[c] for c in value])
    else:
        Log.error("Expecting a string")

コード例 #20

0

ファイルを表示

ファイル: pulse.py プロジェクト: klahnakoski/MoDevETL

    def __init__(
        self,
        exchange,  # name of the Pulse exchange
        topic,  # message name pattern to subscribe to  ('#' is wildcard)
        target=None,  # WILL BE CALLED WITH PULSE PAYLOADS AND ack() IF COMPLETE$ED WITHOUT EXCEPTION
        target_queue=None,  # (aka self.queue) WILL BE FILLED WITH PULSE PAYLOADS
        host='pulse.mozilla.org',  # url to connect,
        port=5671,  # tcp port
        user=None,
        password=None,
        vhost="/",
        start=0,  # USED AS STARTING POINT FOR ASSIGNING THE _meta.count ATTRIBUTE
        ssl=True,
        applabel=None,
        heartbeat=False,  # True to also get the Pulse heartbeat message
        durable=False,  # True to keep queue after shutdown
        serializer='json',
        broker_timezone='GMT',
        settings=None
    ):
        self.target_queue = target_queue
        self.pulse_target = target
        if (target_queue == None and target == None) or (target_queue != None and target != None):
            Log.error("Expecting a queue (for fast digesters) or a target (for slow digesters)")

        Thread.__init__(self, name="Pulse consumer for " + settings.exchange, target=self._worker)
        self.settings = settings
        settings.callback = self._got_result
        settings.user = coalesce(settings.user, settings.username)
        settings.applabel = coalesce(settings.applable, settings.queue, settings.queue_name)
        settings.topic = topic

        self.pulse = ModifiedGenericConsumer(settings, connect=True, **settings)
        self.count = coalesce(start, 0)
        self.start()

コード例 #21

0

ファイルを表示

ファイル: expressions.py プロジェクト: klahnakoski/Activedata-ETL

def _convert_in(op, term):
    if not term:
        Log.error("Expecting a term")
    if not isinstance(term, Mapping):
        Log.error("Expecting {{op}} to have dict value",  op= op)
    var, val = term.items()[0]

    if isinstance(val, list):
        v2 = [vv for vv in val if vv != None]

        if len(v2) == 0:
            if len(val) == 0:
                return False
            else:
                return {"missing": {"field": var}}

        if len(v2) == 1:
            output = {"term": {var: v2[0]}}
        else:
            output = {"terms": {var: v2}}

        if len(v2) != len(val):
            output = {"or": [
                {"missing": {"field": var}},
                output
            ]}
        return output
    else:
        return {"term": term}

コード例 #22

0

ファイルを表示

ファイル: domains.py プロジェクト: klahnakoski/TestFailures

    def __init__(self, **desc):
        Domain.__init__(self, **desc)
        self.type = "range"
        self.NULL = Null

        if self.partitions:
            # IGNORE THE min, max, interval
            if not self.key:
                Log.error("Must have a key value")

            parts = listwrap(self.partitions)
            for i, p in enumerate(parts):
                self.min = Math.min(self.min, p.min)
                self.max = Math.max(self.max, p.max)
                if p.dataIndex != None and p.dataIndex != i:
                    Log.error("Expecting `dataIndex` to agree with the order of the parts")
                if p[self.key] == None:
                    Log.error("Expecting all parts to have {{key}} as a property", key=self.key)
                p.dataIndex = i

            # VERIFY PARTITIONS DO NOT OVERLAP, HOLES ARE FINE
            for p, q in itertools.product(parts, parts):
                if p.min <= q.min and q.min < p.max:
                    Log.error("partitions overlap!")

            self.partitions = parts
            return
        elif any([self.min == None, self.max == None, self.interval == None]):
            Log.error("Can not handle missing parameter")

        self.key = "min"
        self.partitions = wrap([{"min": v, "max": v + self.interval, "dataIndex": i} for i, v in enumerate(frange(self.min, self.max, self.interval))])

コード例 #23

0

ファイルを表示

ファイル: redshift.py プロジェクト: klahnakoski/MoDataSubmission

    def execute(
        self,
        command,
        param=None,
        retry=True     # IF command FAILS, JUST THROW ERROR
    ):
        if param:
            command = expand_template(command, self.quote_param(param))

        output = None
        done = False
        while not done:
            try:
                with self.locker:
                    if not self.connection:
                        self._connect()

                with Closer(self.connection.cursor()) as curs:
                    curs.execute(command)
                    if curs.rowcount >= 0:
                        output = curs.fetchall()
                self.connection.commit()
                done = True
            except Exception, e:
                try:
                    self.connection.rollback()
                    # TODO: FIGURE OUT WHY rollback() DOES NOT HELP
                    self.connection.close()
                except Exception, f:
                    pass
                self.connection = None
                self._connect()
                if not retry:
                    Log.error("Problem with command:\n{{command|indent}}",  command= command, cause=e)

コード例 #24

0

ファイルを表示

ファイル: hg_mozilla_org.py プロジェクト: klahnakoski/MoHg

    def _get_from_elasticsearch(self, revision, locale=None):
        rev = revision.changeset.id
        query = {
            "query": {"filtered": {
                "query": {"match_all": {}},
                "filter": {"and": [
                    {"prefix": {"changeset.id": rev[0:12]}},
                    {"term": {"branch.name": revision.branch.name}},
                    {"term": {"branch.locale": coalesce(locale, revision.branch.locale, DEFAULT_LOCALE)}}
                ]}
            }},
            "size": 2000,
        }
        try:
            docs = self.es.search(query, timeout=120).hits.hits
            if len(docs) > 1:
                for d in docs:
                    if d._id.endswith(d._source.branch.locale):
                        return d._source
                Log.warning("expecting no more than one document")

            return docs[0]._source
        except Exception, e:
            Log.warning("Bad ES call", e)
            return None

コード例 #25

0

ファイルを表示

ファイル: startup.py プロジェクト: klahnakoski/MoTreeherder

def read_settings(filename=None, defs=None):
    # READ SETTINGS
    if filename:
        settings_file = File(filename)
        if not settings_file.exists:
            Log.error("Can not file settings file {{filename}}", {
                "filename": settings_file.abspath
            })
        settings = ref.get("file:///" + settings_file.abspath)
        if defs:
            settings.args = argparse(defs)
        return settings
    else:
        defs = listwrap(defs)
        defs.append({
            "name": ["--settings", "--settings-file", "--settings_file"],
            "help": "path to JSON file with settings",
            "type": str,
            "dest": "filename",
            "default": "./settings.json",
            "required": False
        })
        args = argparse(defs)
        settings = ref.get("file://" + args.filename.replace(os.sep, "/"))
        settings.args = args
        return settings

コード例 #26

0

ファイルを表示

ファイル: redshift.py プロジェクト: klahnakoski/MoDataSubmission

    def insert_list(self, table_name, records):
        if not records:
            return

        columns = set()
        for r in records:
            columns |= set(r.keys())
        columns = jx.sort(columns)

        try:
            self.execute(
                "DELETE FROM " + self.quote_column(table_name) + " WHERE _id IN {{ids}}",
                {"ids": self.quote_column([r["_id"] for r in records])}
            )

            command = \
                "INSERT INTO " + self.quote_column(table_name) + "(" + \
                ",".join([self.quote_column(k) for k in columns]) + \
                ") VALUES " + ",\n".join([
                    "(" + ",".join([self.quote_value(r.get(k, None)) for k in columns]) + ")"
                    for r in records
                ])
            self.execute(command)
        except Exception, e:
            Log.error("problem with insert", e)

コード例 #27

0

ファイルを表示

ファイル: normal.py プロジェクト: klahnakoski/MoDevETL

 def _convert_from(self, frum):
     if isinstance(frum, basestring):
         return Dict(name=frum)
     elif isinstance(frum, (Container, Query)):
         return frum
     else:
         Log.error("Expecting from clause to be a name, or a container")

コード例 #28

0

ファイルを表示

        def disconnect():
            with suppress_exception:
                self.target_queue.close()
                Log.note("stop put into queue")

            self.pulse.disconnect()
            Log.note("pulse listener was given a disconnect()")

コード例 #29

0

ファイルを表示

ファイル: pulse.py プロジェクト: klahnakoski/MoDevETL

    def send(self, topic, message):
        """Publishes a pulse message to the proper exchange."""

        if not message:
            Log.error("Expecting a message")

        message._prepare()

        if not self.connection:
            self.connect()

        producer = Producer(
            channel=self.connection,
            exchange=Exchange(self.settings.exchange, type='topic'),
            routing_key=topic
        )

        # The message is actually a simple envelope format with a payload and
        # some metadata.
        final_data = Dict(
            payload=message.data,
            _meta=set_default({
                'exchange': self.settings.exchange,
                'routing_key': message.routing_key,
                'serializer': self.settings.serializer,
                'sent': time_to_string(datetime.datetime.now(timezone(self.settings.broker_timezone))),
                'count': self.count
            }, message.metadata)
        )

        producer.publish(jsons.scrub(final_data), serializer=self.settings.serializer)
        self.count += 1

コード例 #30

0

ファイルを表示

ファイル: mysql.py プロジェクト: klahnakoski/MoTreeherder

 def quote_value(self, value):
     """
     convert values to mysql code for the same
     mostly delegate directly to the mysql lib, but some exceptions exist
     """
     try:
         if value == None:
             return "NULL"
         elif isinstance(value, SQL):
             if not value.param:
                 # value.template CAN BE MORE THAN A TEMPLATE STRING
                 return self.quote_sql(value.template)
             param = {k: self.quote_sql(v) for k, v in value.param.items()}
             return expand_template(value.template, param)
         elif isinstance(value, basestring):
             return self.db.literal(value)
         elif isinstance(value, datetime):
             return "str_to_date('" + value.strftime("%Y%m%d%H%M%S") + "', '%Y%m%d%H%i%s')"
         elif hasattr(value, '__iter__'):
             return self.db.literal(json_encode(value))
         elif isinstance(value, Mapping):
             return self.db.literal(json_encode(value))
         elif Math.is_number(value):
             return unicode(value)
         else:
             return self.db.literal(value)
     except Exception, e:
         Log.error("problem quoting SQL", e)

コード例 #31

0

ファイルを表示

ファイル: paths.py プロジェクト: mozilla/ActiveData-ETL

 def __str__(self):
     Log.error("not implemented")

コード例 #32

0

ファイルを表示

ファイル: paths.py プロジェクト: mozilla/ActiveData-ETL

 def remove(self, x):
     Log.error("not implemented")

コード例 #33

0

ファイルを表示

def parse_columns(parent_path, esProperties):
    """
    RETURN THE COLUMN DEFINITIONS IN THE GIVEN esProperties OBJECT
    """
    columns = DictList()
    for name, property in esProperties.items():
        if parent_path:
            path = join_field(split_field(parent_path) + [name])
        else:
            path = name

        if property.type == "nested" and property.properties:
            # NESTED TYPE IS A NEW TYPE DEFINITION
            # MARKUP CHILD COLUMNS WITH THE EXTRA DEPTH
            child_columns = deepcopy(parse_columns(path, property.properties))
            self_columns = deepcopy(child_columns)
            for c in self_columns:
                c.depth += 1
            columns.extend(self_columns)
            columns.append({
                "name": join_field(split_field(path)[1::]),
                "type": "nested",
                "useSource": False
            })

            if path not in INDEX_CACHE:
                pp = split_field(parent_path)
                for i in qb.reverse(range(len(pp))):
                    c = INDEX_CACHE.get(join_field(pp[:i + 1]), None)
                    if c:
                        INDEX_CACHE[path] = c.copy()
                        break
                else:
                    Log.error("Can not find parent")

                INDEX_CACHE[path].name = path
            INDEX_CACHE[path].columns = child_columns
            continue

        if property.properties:
            child_columns = parse_columns(path, property.properties)
            columns.extend(child_columns)
            columns.append({
                "name": join_field(split_field(path)[1::]),
                "type": "object",
                "useSource": False
            })

        if property.dynamic:
            continue
        if not property.type:
            continue
        if property.type == "multi_field":
            property.type = property.fields[name].type  # PULL DEFAULT TYPE
            for i, n, p in enumerate(property.fields):
                if n == name:
                    # DEFAULT
                    columns.append({
                        "name": join_field(split_field(path)[1::]),
                        "type": p.type,
                        "useSource": p.index == "no"
                    })
                else:
                    columns.append({
                        "name":
                        join_field(split_field(path)[1::]) + "\\." + n,
                        "type":
                        p.type,
                        "useSource":
                        p.index == "no"
                    })
            continue

        if property.type in [
                "string", "boolean", "integer", "date", "long", "double"
        ]:
            columns.append({
                "name": join_field(split_field(path)[1::]),
                "type": property.type,
                "useSource": property.index == "no"
            })
            if property.index_name and name != property.index_name:
                columns.append({
                    "name": property.index_name,
                    "type": property.type,
                    "useSource": property.index == "no"
                })
        elif property.enabled == None or property.enabled == False:
            columns.append({
                "name": join_field(split_field(path)[1::]),
                "type": "object",
                "useSource": True
            })
        else:
            Log.warning("unknown type {{type}} for property {{path}}",
                        type=property.type,
                        path=path)

    return columns

コード例 #34

0

ファイルを表示

def compileEdges2Term(mvel_compiler, edges, constants):
    """
    TERMS ARE ALWAYS ESCAPED SO THEY CAN BE COMPOUNDED WITH PIPE (|)

    GIVE MVEL CODE THAT REDUCES A UNIQUE TUPLE OF PARTITIONS DOWN TO A UNIQUE TERM
    GIVE LAMBDA THAT WILL CONVERT THE TERM BACK INTO THE TUPLE
    RETURNS TUPLE OBJECT WITH "type" and "value" ATTRIBUTES.
    "type" CAN HAVE A VALUE OF "script", "field" OR "count"
    CAN USE THE constants (name, value pairs)
    """

    # IF THE QUERY IS SIMPLE ENOUGH, THEN DO NOT USE TERM PACKING
    edge0 = edges[0]

    if len(edges) == 1 and edge0.domain.type in ["set", "default"]:
        # THE TERM RETURNED WILL BE A MEMBER OF THE GIVEN SET
        def temp(term):
            return DictList([edge0.domain.getPartByKey(term)])

        if edge0.value and isKeyword(edge0.value):
            return Dict(field=edge0.value, term2parts=temp)
        elif COUNT(edge0.domain.dimension.fields) == 1:
            return Dict(field=edge0.domain.dimension.fields[0],
                        term2parts=temp)
        elif not edge0.value and edge0.domain.partitions:
            script = mvel_compiler.Parts2TermScript(edge0.domain)
            return Dict(expression=script, term2parts=temp)
        else:
            return Dict(expression=mvel_compiler.compile_expression(
                edge0.value, constants),
                        term2parts=temp)

    mvel_terms = []  # FUNCTION TO PACK TERMS
    fromTerm2Part = []  # UNPACK TERMS BACK TO PARTS
    for e in edges:
        domain = e.domain
        fields = domain.dimension.fields

        if not e.value and fields:
            code, decode = mvel_compiler.Parts2Term(e.domain)
            t = Dict(toTerm=code, fromTerm=decode)
        elif fields:
            Log.error("not expected")
        elif e.domain.type == "time":
            t = compileTime2Term(e)
        elif e.domain.type == "duration":
            t = compileDuration2Term(e)
        elif e.domain.type in domains.ALGEBRAIC:
            t = compileNumeric2Term(e)
        elif e.domain.type == "set" and not fields:

            def fromTerm(term):
                return e.domain.getPartByKey(term)

            code, decode = mvel_compiler.Parts2Term(e.domain)
            t = Dict(toTerm=code, fromTerm=decode)
        else:
            t = compileString2Term(e)

        if not t.toTerm.body:
            mvel_compiler.Parts2Term(e.domain)
            Log.unexpected("what?")

        fromTerm2Part.append(t.fromTerm)
        mvel_terms.append(t.toTerm.body)

    # REGISTER THE DECODE FUNCTION
    def temp(term):
        terms = term.split('|')

        output = DictList([t2p(t) for t, t2p in zip(terms, fromTerm2Part)])
        return output

    return Dict(expression=mvel_compiler.compile_expression(
        "+'|'+".join(mvel_terms), constants),
                term2parts=temp)

コード例 #35

0

ファイルを表示

 def __delslice__(self, i, j):
     _Log.error(
         "Can not perform del on slice: modulo arithmetic was performed on the parameters.  You can try using clear()"
     )

コード例 #36

0

ファイルを表示

    def select(self, key):
        if not _Log:
            _late_import()

        _Log.error("Not supported.  Use `get()`")

コード例 #37

0

ファイルを表示

ファイル: paths.py プロジェクト: mozilla/ActiveData-ETL

 def pop(self):
     Log.error("not implemented")

コード例 #38

0

ファイルを表示

ファイル: paths.py プロジェクト: mozilla/ActiveData-ETL

 def extend(self, values):
     Log.error("not implemented")

コード例 #39

0

ファイルを表示

 def sub(self, value):
     if value != self.agg[0]:
         Log.error("Not a sliding window")
     self.agg = self.agg[1:]

コード例 #40

0

ファイルを表示

ファイル: paths.py プロジェクト: mozilla/ActiveData-ETL

 def __getslice__(self, i, j):
     Log.error(
         "slicing is broken in Python 2.7: a[i:j] == a[i+len(a), j] sometimes.  Use [start:stop:step]"
     )

コード例 #41

0

ファイルを表示

ファイル: __init__.py プロジェクト: davehunt/ActiveData

        else:
            new_value = old_value.__class__(value)  # TRY TO MAKE INSTANCE OF SAME CLASS
    except Exception, e:
        old_value = None
        new_value = value

    try:
        setattr(obj, attr_name, new_value)
        return old_value
    except Exception, e:
        try:
            obj[attr_name] = new_value
            return old_value
        except Exception, f:
            from pyLibrary.debugs.logs import Log
            Log.error(PATH_NOT_FOUND)


def lower_match(value, candidates):
    return [v for v in candidates if v.lower()==value.lower()]


def wrap(v):
    type_ = _get(v, "__class__")

    if type_ is dict:
        m = Dict(v)
        return m
        # m = object.__new__(Dict)
        # object.__setattr__(m, "_dict", v)
        # return m

コード例 #42

0

ファイルを表示

ファイル: paths.py プロジェクト: mozilla/ActiveData-ETL

 def append(self, val):
     Log.error("not implemented")

コード例 #43

0

ファイルを表示

    def pe_filter(filter, data, depth):
        """
        PARTIAL EVALUATE THE filter BASED ON data GIVEN
        """
        if filter is TRUE_FILTER:
            return True
        if filter is FALSE_FILTER:
            return False

        filter = wrap(filter)

        if filter["and"]:
            result = True
            output = DictList()
            for a in filter[u"and"]:
                f = pe_filter(a, data, depth)
                if f is False:
                    result = False
                elif f is not True:
                    output.append(f)
            if result and output:
                return {"and": output}
            else:
                return result
        elif filter["or"]:
            output = DictList()
            for o in filter[u"or"]:
                f = pe_filter(o, data, depth)
                if f is True:
                    return True
                elif f is not False:
                    output.append(f)
            if output:
                return {"or": output}
            else:
                return False
        elif filter["not"]:
            f = pe_filter(filter["not"], data, depth)
            if f is True:
                return False
            elif f is False:
                return True
            else:
                return {"not": f}
        elif filter.term or filter.eq:
            eq = coalesce(filter.term, filter.eq)
            result = True
            output = {}
            for col, val in eq.items():
                first, rest = parse_field(col, data, depth)
                d = data[first]
                if not rest:
                    if d != val:
                        result = False
                else:
                    output[rest] = val

            if result and output:
                return {"term": output}
            else:
                return result
        elif filter.equal:
            a, b = filter["equal"]
            first_a, rest_a = parse_field(a, data, depth)
            first_b, rest_b = parse_field(b, data, depth)
            val_a = data[first_a]
            val_b = data[first_b]
            if not rest_a:
                if not rest_b:
                    if val_a != val_b:
                        return False
                    else:
                        return True
                else:
                    return {"term": {rest_b: val_a}}
            else:
                if not rest_b:
                    return {"term": {rest_a: val_b}}
                else:
                    return {"equal": [rest_a, rest_b]}

        elif filter.terms:
            result = True
            output = {}
            for col, vals in filter["terms"].items():
                first, rest = parse_field(col, data, depth)
                d = data[first]
                if not rest:
                    if d not in vals:
                        result = False
                else:
                    output[rest] = vals
            if result and output:
                return {"terms": output}
            else:
                return result

        elif filter.range:
            result = True
            output = {}
            for col, ranges in filter["range"].items():
                first, rest = parse_field(col, data, depth)
                d = data[first]
                if not rest:
                    for sign, val in ranges.items():
                        if sign in ("gt", ">") and d <= val:
                            result = False
                        if sign == "gte" and d < val:
                            result = False
                        if sign == "lte" and d > val:
                            result = False
                        if sign == "lt" and d >= val:
                            result = False
                else:
                    output[rest] = ranges
            if result and output:
                return {"range": output}
            else:
                return result
        elif filter.missing:
            if isinstance(filter.missing, basestring):
                field = filter["missing"]
            else:
                field = filter["missing"]["field"]

            first, rest = parse_field(field, data, depth)
            d = data[first]
            if not rest:
                if d == None:
                    return True
                return False
            else:
                return {"missing": rest}
        elif filter.prefix:
            result = True
            output = {}
            for col, val in filter["prefix"].items():
                first, rest = parse_field(col, data, depth)
                d = data[first]
                if not rest:
                    if d==None or not d.startswith(val):
                        result = False
                else:
                    output[rest] = val
            if result and output:
                return {"prefix": output}
            else:
                return result

        elif filter.exists:
            if isinstance(filter["exists"], basestring):
                field = filter["exists"]
            else:
                field = filter["exists"]["field"]

            first, rest = parse_field(field, data, depth)
            d = data[first]
            if not rest:
                if d != None:
                    return True
                return False
            else:
                return {"exists": rest}
        else:
            Log.error(u"Can not interpret esfilter: {{esfilter}}", {u"esfilter": filter})

コード例 #44

0

ファイルを表示

 def merge(self, agg):
     Log.error("Do not know how to handle")

コード例 #45

0

ファイルを表示

            return output
        except Exception, e:
            Log.error("Expecting a dict with lists in codomain", e)
    else:
        try:
            # relation[d] is expected to be a list
            # return set(cod for d in data for cod in relation[d])
            output = set()
            for d in data:
                cod = relation(d)
                if cod == None:
                    continue
                output.add(cod)
            return output
        except Exception, e:
            Log.error("Expecting a dict with lists in codomain", e)
    return Null


def tuple(data, field_name):
    """
    RETURN LIST  OF TUPLES
    """
    if isinstance(data, Cube):
        Log.error("not supported yet")

    if isinstance(data, FlatList):
        Log.error("not supported yet")

    if isinstance(field_name, Mapping) and "value" in field_name:
        # SIMPLIFY {"value":value} AS STRING

コード例 #46

0

ファイルを表示

ファイル: app.py プロジェクト: klahnakoski/MoTreeherder

@app.route('/', defaults={'path': ''}, methods=['GET', 'POST'])
@app.route('/<path:path>', methods=['GET', 'POST'])
def catch_all(path):
    return Response(b"",
                    status=400,
                    headers={
                        "access-control-allow-origin": "*",
                        "content-type": "text/html"
                    })


if __name__ == "__main__":
    try:
        config = startup.read_settings()
        constants.set(config.constants)
        Log.start(config.debug)

        # SETUP TREEHERDER CACHE
        hg = HgMozillaOrg(use_cache=True, settings=config.hg)
        th = TreeherderService(hg, settings=config.treeherder)
        app.add_url_rule('/treeherder',
                         None,
                         th.get_treeherder_job,
                         methods=['GET'])

        HeaderRewriterFix(app, remove_headers=['Date', 'Server'])

        app.run(**config.flask)
    except Exception, e:
        Log.error("Serious problem with service construction!  Shutdown!",
                  cause=e)

コード例 #47

0

ファイルを表示

ファイル: persistent_queue.py プロジェクト: mozilla/ActiveData-ETL

class PersistentQueue(object):
    """
    THREAD-SAFE, PERSISTENT QUEUE

    CAN HANDLE MANY PRODUCERS, BUT THE pop(), commit() IDIOM CAN HANDLE ONLY
    ONE CONSUMER.

    IT IS IMPORTANT YOU commit() or close(), OTHERWISE NOTHING COMES OFF THE QUEUE
    """
    def __init__(self, _file):
        """
        file - USES FILE FOR PERSISTENCE
        """
        self.file = File.new_instance(_file)
        self.lock = Lock("lock for persistent queue using file " +
                         self.file.name)
        self.please_stop = Signal()
        self.db = Dict()
        self.pending = []

        if self.file.exists:
            for line in self.file:
                try:
                    delta = convert.json2value(line)
                    apply_delta(self.db, delta)
                except:
                    pass
            if self.db.status.start == None:  # HAPPENS WHEN ONLY ADDED TO QUEUE, THEN CRASH
                self.db.status.start = 0
            self.start = self.db.status.start

            # SCRUB LOST VALUES
            lost = 0
            for k in self.db.keys():
                try:
                    if k != "status" and int(k) < self.start:
                        self.db[k] = None
                        lost += 1
                except Exception:
                    pass  # HAPPENS FOR self.db.status, BUT MAYBE OTHER PROPERTIES TOO
            if lost:
                Log.warning("queue file had {{num}} items lost", num=lost)

            if DEBUG:
                Log.note("Persistent queue {{name}} found with {{num}} items",
                         name=self.file.abspath,
                         num=len(self))
        else:
            self.db.status = Dict(start=0, end=0)
            self.start = self.db.status.start
            if DEBUG:
                Log.note("New persistent queue {{name}}",
                         name=self.file.abspath)

    def _add_pending(self, delta):
        delta = wrap(delta)
        self.pending.append(delta)

    def _apply_pending(self):
        for delta in self.pending:
            apply_delta(self.db, delta)
        self.pending = []

    def __iter__(self):
        """
        BLOCKING ITERATOR
        """
        while not self.please_stop:
            try:
                value = self.pop()
                if value is not Thread.STOP:
                    yield value
            except Exception, e:
                Log.warning("Tell me about what happened here", cause=e)
        if DEBUG:
            Log.note("queue iterator is done")

コード例 #48

0

ファイルを表示

 def assign(source, destination):
     try:
         destination[name] = source.get(f)
     except Exception, e:
         Log.error("{{value}} does not have {{field}} property",  value= source, field=f, cause=e)

コード例 #49

0

ファイルを表示

ファイル: elasticsearch.py プロジェクト: mozilla/ActiveData-ETL

 def add(self, record):
     if isinstance(record, list):
         Log.error("add() has changed to only accept one record, no lists")
     self.extend([record])

コード例 #50

0

ファイルを表示

ファイル: __init__.py プロジェクト: mozilla/ActiveData-ETL

 def transform(self, *args, **kwargs):
     Log.error("Not implemented")

コード例 #51

0

ファイルを表示

ファイル: elasticsearch.py プロジェクト: mozilla/ActiveData-ETL

                             ["1.4.", "1.5."])):
                    if item.index.status not in [200, 201]:
                        Log.error(
                            "{{num}} {{error}} while loading line into {{index}}:\n{{line}}",
                            num=item.index.status,
                            error=item.index.error,
                            line=lines[i * 2 + 1],
                            index=self.settings.index)
                else:
                    Log.error("version not supported {{version}}",
                              version=self.cluster.version)

            if self.debug:
                Log.note("{{num}} documents added", num=len(items))
        except Exception, e:
            Log.error("problem sending to ES", e)

    # RECORDS MUST HAVE id AND json AS A STRING OR
    # HAVE id AND value AS AN OBJECT
    def add(self, record):
        if isinstance(record, list):
            Log.error("add() has changed to only accept one record, no lists")
        self.extend([record])

    # -1 FOR NO REFRESH
    def set_refresh_interval(self, seconds):
        if seconds <= 0:
            interval = -1
        else:
            interval = unicode(seconds) + "s"

コード例 #52

0

ファイルを表示

ファイル: elasticsearch.py プロジェクト: mozilla/ActiveData-ETL

    def _post(self, path, **kwargs):
        url = self.settings.host + ":" + unicode(self.settings.port) + path

        try:
            wrap(kwargs).headers["Accept-Encoding"] = "gzip,deflate"

            if "data" in kwargs and not isinstance(kwargs["data"], str):
                Log.error("data must be utf8 encoded string")

            if self.debug:
                sample = kwargs.get("data", "")[:300]
                Log.note("{{url}}:\n{{data|indent}}", url=url, data=sample)

            response = http.post(url, **kwargs)
            if response.status_code not in [200, 201]:
                Log.error(response.reason + ": " + response.all_content)
            if self.debug:
                Log.note("response: {{response}}",
                         response=utf82unicode(response.all_content)[:130])
            details = convert.json2value(utf82unicode(response.all_content))
            if details.error:
                Log.error(convert.quote2string(details.error))
            if details._shards.failed > 0:
                Log.error("Shard failures {{failures|indent}}",
                          failures="---\n".join(
                              r.replace(";", ";\n")
                              for r in details._shards.failures.reason))
            return details
        except Exception, e:
            if url[0:4] != "http":
                suggestion = " (did you forget \"http://\" prefix on the host name?)"
            else:
                suggestion = ""

            if kwargs.get("data"):
                Log.error("Problem with call to {{url}}" + suggestion +
                          "\n{{body|left(10000)}}",
                          url=url,
                          body=kwargs["data"][0:10000]
                          if self.debug else kwargs["data"][0:100],
                          cause=e)
            else:
                Log.error("Problem with call to {{url}}" + suggestion,
                          {"url": url}, e)

コード例 #53

0

ファイルを表示

    try:
        py_result = strangman.stats.chisquare(f_obs, f_exp)
    except Exception, e:
        Log.error("problem with call", e)

    if DEBUG_STRANGMAN:
        from pyLibrary.testing.fuzzytestcase import assertAlmostEqualValue

        sp_result = scipy.stats.chisquare(np.array(f_obs),
                                          f_exp=np.array(f_exp))
        if not assertAlmostEqualValue(sp_result[0], py_result[0],
                                      digits=9) and assertAlmostEqualValue(
                                          sp_result[1],
                                          py_result[1],
                                          delta=1e-8):
            Log.error("problem with stats lib")

    return py_result


def Stats2ZeroMoment(stats):
    # MODIFIED FROM http://statsmodels.sourceforge.net/devel/_modules/statsmodels/stats/moment_helpers.html
    # ADDED count
    mc0, mc1, mc2, skew, kurt = stats.count, coalesce(stats.mean, 0), coalesce(
        stats.variance, 0), coalesce(stats.skew,
                                     0), coalesce(stats.kurtosis, 0)

    mz0 = mc0
    mz1 = mc1 * mc0
    mz2 = (mc2 + mc1 * mc1) * mc0
    mc3 = coalesce(skew, 0) * (mc2**1.5)  # 3rd central moment

コード例 #54

0

ファイルを表示

ファイル: elasticsearch.py プロジェクト: mozilla/ActiveData-ETL

    def extend(self, records):
        """
        records - MUST HAVE FORM OF
            [{"value":value}, ... {"value":value}] OR
            [{"json":json}, ... {"json":json}]
            OPTIONAL "id" PROPERTY IS ALSO ACCEPTED
        """
        lines = []
        try:
            for r in records:
                id = r.get("id")

                if id == None:
                    id = Random.hex(40)

                if "json" in r:
                    # if id != coalesce(wrap(convert.json2value(r["json"])).value._id, id):
                    #     Log.error("expecting _id to match")
                    json = r["json"]
                elif "value" in r:
                    # if id != coalesce(wrap(r).value._id, id):
                    #     Log.error("expecting _id to match")
                    json = convert.value2json(r["value"])
                else:
                    json = None
                    Log.error(
                        "Expecting every record given to have \"value\" or \"json\" property"
                    )

                lines.append('{"index":{"_id": ' + convert.value2json(id) +
                             '}}')
                lines.append(json)
            del records

            if not lines:
                return

            try:
                data_bytes = "\n".join(lines) + "\n"
                data_bytes = data_bytes.encode("utf8")
            except Exception, e:
                Log.error("can not make request body from\n{{lines|indent}}",
                          lines=lines,
                          cause=e)

            response = self.cluster._post(self.path + "/_bulk",
                                          data=data_bytes,
                                          headers={"Content-Type": "text"},
                                          timeout=self.settings.timeout)
            items = response["items"]

            for i, item in enumerate(items):
                if self.cluster.version.startswith("0.90."):
                    if not item.index.ok:
                        Log.error("{{error}} while loading line:\n{{line}}",
                                  error=item.index.error,
                                  line=lines[i * 2 + 1])
                elif any(map(self.cluster.version.startswith,
                             ["1.4.", "1.5."])):
                    if item.index.status not in [200, 201]:
                        Log.error(
                            "{{num}} {{error}} while loading line into {{index}}:\n{{line}}",
                            num=item.index.status,
                            error=item.index.error,
                            line=lines[i * 2 + 1],
                            index=self.settings.index)
                else:
                    Log.error("version not supported {{version}}",
                              version=self.cluster.version)

            if self.debug:
                Log.note("{{num}} documents added", num=len(items))

コード例 #55

0

ファイルを表示

    def _dispatch_work(self, source_block):
        """
        source_block POINTS TO THE bucket AND key TO PROCESS
        :return: False IF THERE IS NOTHING LEFT TO DO
        """
        source_keys = listwrap(coalesce(source_block.key, source_block.keys))

        if not isinstance(source_block.bucket, basestring):  # FIX MISTAKE
            source_block.bucket = source_block.bucket.bucket
        bucket = source_block.bucket
        work_actions = [
            w for w in self.settings.workers if w.source.bucket == bucket
        ]

        if not work_actions:
            Log.note(
                "No worker defined for records from {{bucket}}, {{action}}.\n{{message|indent}}",
                bucket=source_block.bucket,
                message=source_block,
                action="skipping"
                if self.settings.keep_unknown_on_queue else "deleting")
            return not self.settings.keep_unknown_on_queue

        for action in work_actions:
            try:
                source_key = unicode(source_keys[0])
                if len(source_keys) > 1:
                    multi_source = action._source
                    source = ConcatSources(
                        [multi_source.get_key(k) for k in source_keys])
                    source_key = MIN(source_key)
                else:
                    source = action._source.get_key(source_key)
                    source_key = source.key

                Log.note("Execute {{action}} on bucket={{source}} key={{key}}",
                         action=action.name,
                         source=source_block.bucket,
                         key=source_key)

                if action.transform_type == "bulk":
                    old_keys = set()
                else:
                    old_keys = action._destination.keys(
                        prefix=source_block.key)

                new_keys = set(
                    action._transformer(source_key,
                                        source,
                                        action._destination,
                                        resources=self.resources,
                                        please_stop=self.please_stop))

                #VERIFY KEYS
                if len(new_keys) == 1 and list(new_keys)[0] == source_key:
                    pass  # ok
                else:
                    etls = map(key2etl, new_keys)
                    etls = qb.sort(etls, "id")
                    for i, e in enumerate(etls):
                        if i != e.id:
                            Log.error(
                                "expecting keys to have dense order: {{ids}}",
                                ids=etls.id)
                    #VERIFY KEYS EXIST
                    if hasattr(action._destination, "get_key"):
                        for k in new_keys:
                            action._destination.get_key(k)

                for n in action._notify:
                    for k in new_keys:
                        n.add(k)

                if action.transform_type == "bulk":
                    continue

                # DUE TO BUGS THIS INVARIANT IS NOW BROKEN
                # TODO: FIGURE OUT HOW TO FIX THIS (CHANGE NAME OF THE SOURCE BLOCK KEY?)
                # for n in new_keys:
                #     if not n.startswith(source_key):
                #         Log.error("Expecting new keys ({{new_key}}) to start with source key ({{source_key}})",  new_key= n,  source_key= source_key)

                if not new_keys and old_keys:
                    Log.alert(
                        "Expecting some new keys after etl of {{source_key}}, especially since there were old ones\n{{old_keys}}",
                        old_keys=old_keys,
                        source_key=source_key)
                    continue
                elif not new_keys:
                    Log.alert(
                        "Expecting some new keys after processing {{source_key}}",
                        old_keys=old_keys,
                        source_key=source_key)
                    continue

                for k in new_keys:
                    if len(k.split(".")
                           ) == 3 and action.destination.type != "test_result":
                        Log.error(
                            "two dots have not been needed yet, this is a consitency check"
                        )

                delete_me = old_keys - new_keys
                if delete_me:
                    if action.destination.bucket == "ekyle-test-result":
                        for k in delete_me:
                            action._destination.delete_key(k)
                    else:
                        Log.note("delete keys?\n{{list}}",
                                 list=sorted(delete_me))
                        # for k in delete_me:
                # WE DO NOT PUT KEYS ON WORK QUEUE IF ALREADY NOTIFYING SOME OTHER
                # AND NOT GOING TO AN S3 BUCKET
                if not action._notify and isinstance(
                        action._destination, (aws.s3.Bucket, S3Bucket)):
                    for k in old_keys | new_keys:
                        self.work_queue.add(
                            Dict(bucket=action.destination.bucket, key=k))
            except Exception, e:
                if "Key {{key}} does not exist" in e:
                    err = Log.warning
                elif "multiple keys in {{bucket}}" in e:
                    err = Log.warning
                    if source_block.bucket == "ekyle-test-result":
                        for k in action._source.list(
                                prefix=key_prefix(source_key)):
                            action._source.delete_key(strip_extension(k.key))
                elif "expecting keys to have dense order" in e:
                    err = Log.warning
                    if source_block.bucket == "ekyle-test-result":
                        # WE KNOW OF THIS ETL MISTAKE, REPROCESS
                        self.work_queue.add({
                            "key":
                            unicode(key_prefix(source_key)),
                            "bucket":
                            "ekyle-pulse-logger"
                        })
                elif "Expecting a pure key" in e:
                    err = Log.warning
                else:
                    err = Log.error

                err(
                    "Problem transforming {{action}} on bucket={{source}} key={{key}} to destination={{destination}}",
                    {
                        "action":
                        action.name,
                        "source":
                        source_block.bucket,
                        "key":
                        source_key,
                        "destination":
                        coalesce(action.destination.name,
                                 action.destination.index)
                    }, e)

コード例 #56

0

ファイルを表示

def chisquare(f_obs, f_exp):
    try:
        py_result = strangman.stats.chisquare(f_obs, f_exp)
    except Exception, e:
        Log.error("problem with call", e)

コード例 #57

0

ファイルを表示

        hg = HgMozillaOrg(settings=settings.hg)
        resources = Dict(hg=dictwrap(hg))
        stopper = Signal()
        for i in range(coalesce(settings.param.threads, 1)):
            ETL(name="ETL Loop " + unicode(i),
                work_queue=settings.work_queue,
                resources=resources,
                workers=settings.workers,
                settings=settings.param,
                please_stop=stopper)

        Thread.wait_for_shutdown_signal(stopper, allow_exit=True)
    except Exception, e:
        Log.error("Problem with etl", e)
    finally:
        Log.stop()


def etl_one(settings):
    queue = Queue("temp work queue")
    queue.__setattr__(b"commit", Null)
    queue.__setattr__(b"rollback", Null)

    settings.param.wait_forever = False
    already_in_queue = set()
    for w in settings.workers:
        source = get_container(w.source)
        # source.settings.fast_forward = True
        if id(source) in already_in_queue:
            continue
        try:

コード例 #58

0

ファイルを表示

ファイル: matrix.py プロジェクト: mozilla/ActiveData-ETL

 def value(self):
     if self.num:
         Log.error("can not get value of with dimension")
     return self.cube