def process(source_key, source, destination, resources, please_stop=None):
    lines = source.read_lines()

    etl_header = convert.json2value(lines[0])
    if etl_header.etl:
        start = 0
    elif etl_header.locale or etl_header._meta:
        start = 0
    else:
        start = 1

    keys = []
    records = []
    stats = Dict()
    for i, line in enumerate(lines[start:]):
        pulse_record = Null
        try:
            pulse_record = scrub_pulse_record(source_key, i, line, stats)
            if not pulse_record:
                continue

            with Profiler("transform_buildbot"):
                record = transform_buildbot(pulse_record.payload, resources=resources)
                record.etl = {
                    "id": i,
                    "source": pulse_record.etl,
                    "type": "join",
                    "revision": get_git_revision()
                }
            key = etl2key(record.etl)
            keys.append(key)
            records.append({"id": key, "value": record})
        except Exception, e:
            Log.warning("Problem with pulse payload {{pulse|json}}", pulse=pulse_record.payload, cause=e)
    def event_loop(self, please_stop):
        got_stop_message = False
        while not please_stop.is_go():
            with Timer("get more work", debug=DEBUG):
                request = self.in_queue.pop()
            if request == Thread.STOP:
                if DEBUG:
                    Log.note("{{name}} got a stop message", name=self.name)
                got_stop_message = True
                if self.in_queue:
                    Log.warning(
                        "programmer error, queue not empty. {{num}} requests lost:\n{{requests}}",
                        num=len(self.in_queue.queue),
                        requests=list(self.in_queue.queue)[:5:] + list(self.in_queue.queue)[-5::],
                    )
                break
            if please_stop.is_go():
                break

            with Timer("run {{function}}", {"function": get_function_name(self.function)}, debug=DEBUG):
                try:
                    result = self.function(**request)
                    if self.out_queue != None:
                        self.out_queue.add({"response": result})
                except Exception, e:
                    Log.warning("Can not execute with params={{params}}", params=request, cause=e)
                    if self.out_queue != None:
                        self.out_queue.add({"exception": e})
                finally:
                    self.num_runs += 1
Example #3
0
    def _worker(self, please_stop):
        curr = "0.0"
        acc = []
        last_count_written = -1
        next_write = Date.now()

        while not please_stop:
            d = self.temp_queue.pop(timeout=MINUTE)
            if d == None:
                if not acc:
                    continue
                # WRITE THE INCOMPLETE DATA TO S3, BUT NOT TOO OFTEN
                next_write = Date.now() + MINUTE
                try:
                    if last_count_written != len(acc):
                        if DEBUG:
                            Log.note("write incomplete data ({{num}} lines) to {{uid}} in S3 next (time = {{next_write}})", uid=curr, next_write=next_write, num=len(acc))
                        self.bucket.write_lines(curr, (convert.value2json(a) for a in acc))
                        last_count_written = len(acc)
                except Exception, e:
                    Log.note("Problem with write to S3", cause=e)
            elif d[UID_PATH] != curr:
                # WRITE acc TO S3 IF WE ARE MOVING TO A NEW KEY
                try:
                    if acc:
                        if DEBUG:
                            Log.note("write complete data ({{num}} lines) to {{curr}} in S3", num=len(acc), curr=curr)
                        self.bucket.write_lines(curr, (convert.value2json(a) for a in acc))
                        last_count_written = 0
                    curr = d[UID_PATH]
                    acc = [d]
                except Exception, e:
                    Log.warning("Can not store data", cause=e)
                    Thread.sleep(30*MINUTE)
Example #4
0
    def loop(self, please_stop):
        with self.work_queue:
            while not please_stop:
                if self.settings.wait_forever:
                    todo = None
                    while not please_stop and not todo:
                        if isinstance(self.work_queue, aws.Queue):
                            todo = self.work_queue.pop(wait=EXTRA_WAIT_TIME)
                        else:
                            todo = self.work_queue.pop()
                else:
                    if isinstance(self.work_queue, aws.Queue):
                        todo = self.work_queue.pop()
                    else:
                        todo = self.work_queue.pop(till=Date.now())
                    if todo == None:
                        please_stop.go()
                        return

                try:
                    is_ok = self._dispatch_work(todo)
                    if is_ok:
                        self.work_queue.commit()
                    else:
                        self.work_queue.rollback()
                except Exception, e:
                    self.work_queue.rollback()
                    Log.warning(
                        "could not processs {{key}}.  Returned back to work queue.",
                        key=todo.key,
                        cause=e)
Example #5
0
    def _get_from_elasticsearch(self, revision, locale=None):
        rev = revision.changeset.id
        query = {
            "query": {"filtered": {
                "query": {"match_all": {}},
                "filter": {"and": [
                    {"prefix": {"changeset.id": rev[0:12]}},
                    {"term": {"branch.name": revision.branch.name}},
                    {"term": {"branch.locale": coalesce(locale, revision.branch.locale, DEFAULT_LOCALE)}}
                ]}
            }},
            "size": 2000,
        }
        try:
            docs = self.es.search(query, timeout=120).hits.hits
            if len(docs) > 1:
                for d in docs:
                    if d._id.endswith(d._source.branch.locale):
                        return d._source
                Log.warning("expecting no more than one document")

            return docs[0]._source
        except Exception, e:
            Log.warning("Bad ES call", e)
            return None
Example #6
0
    def monitor(self, please_stop):
        please_stop.on_go(lambda: self.todo.add(Thread.STOP))
        while not please_stop:
            try:
                if not self.todo:
                    with self.columns.locker:
                        old_columns = filter(
                            lambda c: (c.last_updated == None or c.last_updated < Date.now()-TOO_OLD) and c.type not in ["object", "nested"],
                            self.columns
                        )
                        if old_columns:
                            self.todo.extend(old_columns)
                            # TEST CONSISTENCY
                            for c, d in product(list(self.todo.queue), list(self.todo.queue)):
                                if c.abs_name==d.abs_name and c.table==d.table and c!=d:
                                    Log.error("")


                        else:
                            Log.note("no more metatdata to update")

                column = self.todo.pop(timeout=10*MINUTE)
                if column:
                    if column.type in ["object", "nested"]:
                        continue
                    elif column.last_updated >= Date.now()-TOO_OLD:
                        continue
                    try:
                        self._update_cardinality(column)
                        Log.note("updated {{column.name}}", column=column)
                    except Exception, e:
                        Log.warning("problem getting cardinality for  {{column.name}}", column=column, cause=e)
            except Exception, e:
                Log.warning("problem in cardinality monitor", cause=e)
Example #7
0
def record_request(request, query_, data, error):
    try:
        if request_log_queue == None:
            return

        if data and len(data) > 10000:
            data = data[:10000]

        log = wrap({
            "timestamp":
            Date.now(),
            "http_user_agent":
            request.headers.get("user_agent"),
            "http_accept_encoding":
            request.headers.get("accept_encoding"),
            "path":
            request.headers.environ["werkzeug.request"].full_path,
            "content_length":
            request.headers.get("content_length"),
            "remote_addr":
            request.remote_addr,
            "query":
            query_,
            "data":
            data,
            "error":
            error
        })
        log["from"] = request.headers.get("from")
        request_log_queue.add({"value": log})
    except Exception, e:
        Log.warning("Can not record", cause=e)
Example #8
0
def main():
    try:
        settings = startup.read_settings()
        constants.set(settings.constants)
        Log.start(settings.debug)

        aws_args = dict(
            region_name=settings.aws.region,
            aws_access_key_id=unwrap(settings.aws.aws_access_key_id),
            aws_secret_access_key=unwrap(settings.aws.aws_secret_access_key)
        )
        ec2_conn = boto_ec2.connect_to_region(**aws_args)

        instances = _get_managed_instances(ec2_conn, settings.name)

        for i in instances:
            Log.note("Reset {{instance_id}} ({{name}}) at {{ip}}", insance_id=i.id, name=i.tags["Name"], ip=i.ip_address)
            _config_fabric(settings.fabric, i)
            try:
                _refresh_etl()  # TODO: UPON FAILURE, TERMINATE INSTANCE AND SPOT REQUEST
            except Exception, e:
                ec2_conn.terminate_instances([i.id])
                Log.warning("Problem resetting {{instance}}, terminated", instance=i.id, cause=e)
    except Exception, e:
        Log.error("Problem with etl", e)
Example #9
0
    def copy(self, keys, source, sample_only_filter=None, sample_size=None, done_copy=None):
        """
        :param keys: THE KEYS TO LOAD FROM source
        :param source: THE SOURCE (USUALLY S3 BUCKET)
        :param sample_only_filter: SOME FILTER, IN CASE YOU DO NOT WANT TO SEND EVERYTHING
        :param sample_size: FOR RANDOM SAMPLE OF THE source DATA
        :param done_copy: CALLBACK, ADDED TO queue, TO FINISH THE TRANSACTION
        :return: LIST OF SUB-keys PUSHED INTO ES
        """
        num_keys = 0
        queue = None
        for key in keys:
            timer = Timer("key")
            try:
                with timer:
                    for rownum, line in enumerate(source.read_lines(strip_extension(key))):
                        if not line:
                            continue

                        if rownum > 0 and rownum % 1000 == 0:
                            Log.note("Ingested {{num}} records from {{key}} in bucket {{bucket}}", num=rownum, key=key, bucket=source.name)

                        row, please_stop = fix(rownum, line, source, sample_only_filter, sample_size)
                        num_keys += 1

                        if queue == None:
                            queue = self._get_queue(row)
                        queue.add(row)

                        if please_stop:
                            break
            except Exception, e:
                done_copy = None
                Log.warning("Could not process {{key}} after {{duration|round(places=2)}}seconds", key=key, duration=timer.duration.seconds, cause=e)
Example #10
0
def time_delta_pusher(please_stop, appender, queue, interval):
    """
    appender - THE FUNCTION THAT ACCEPTS A STRING
    queue - FILLED WITH LOG ENTRIES {"template":template, "params":params} TO WRITE
    interval - timedelta
    USE IN A THREAD TO BATCH LOGS BY TIME INTERVAL
    """

    next_run = time() + interval

    while not please_stop:
        (Till(till=next_run) | please_stop).wait()
        next_run = time() + interval
        logs = queue.pop_all()
        if not logs:
            continue

        lines = []
        for log in logs:
            try:
                if log is Thread.STOP:
                    please_stop.go()
                    next_run = time()
                else:
                    expanded = expand_template(log.get("template"), log.get("params"))
                    lines.append(expanded)
            except Exception, e:
                Log.warning("Trouble formatting logs", cause=e)
                # SWALLOW ERROR, GOT TO KEEP RUNNING
        try:
            appender(u"\n".join(lines) + u"\n")
        except Exception, e:
            sys.stderr.write(b"Trouble with appender: " + str(e.message) + b"\n")
Example #11
0
 def write(self, template, params):
     try:
         with self.file_lock:
             self.file.append(expand_template(template, params))
     except Exception, e:
         _Log.warning("Problem writing to file {{file}}, waiting...", file=file.name, cause=e)
         _Till(seconds=5).wait()
Example #12
0
def main():
    """
    CLEAR OUT KEYS FROM BUCKET BY RANGE, OR BY FILE
    """
    settings = startup.read_settings(defs=[
        {
            "name": ["--bucket"],
            "help": "bucket to scan",
            "type": str,
            "dest": "bucket",
            "required": True
        }
    ])
    Log.start(settings.debug)

    source = Connection(settings.aws).get_bucket(settings.args.bucket)

    for k in qb.sort(source.keys()):
        try:
            data = source.read_bytes(k)
            if convert.ascii2unicode(data).find("2e2834fa7ecd8d3bb1ad49ec981fdb89eb4df95e18") >= 0:
                Log.note("Found at {{key}}", key=k)
        except Exception, e:
            Log.warning("Problem with {{key}}", key=k, cause=e)
        finally:
Example #13
0
    def replacer(found):
        ops = found.group(1).split("|")

        path = ops[0]
        var = path.lstrip(".")
        depth = min(len(seq), max(1, len(path) - len(var)))
        try:
            val = seq[-depth]
            if var:
                val = val[var]
            for func_name in ops[1:]:
                parts = func_name.split('(')
                if len(parts) > 1:
                    val = eval(parts[0] + "(val, " + ("(".join(parts[1::])))
                else:
                    val = globals()[func_name](val)
            val = toString(val)
            return val
        except Exception, e:
            try:
                if e.message.find("is not JSON serializable"):
                    # WORK HARDER
                    val = toString(val)
                    return val
            except Exception, f:
                if not _Log:
                    _late_import()

                _Log.warning("Can not expand " + "|".join(ops) +
                             " in template: {{template_|json}}",
                             template_=template,
                             cause=e)
Example #14
0
    def replacer(found):
        ops = found.group(1).split("|")

        path = ops[0]
        var = path.lstrip(".")
        depth = min(len(seq), max(1, len(path) - len(var)))
        try:
            val = seq[-depth]
            if var:
                val = val[var]
            for func_name in ops[1:]:
                parts = func_name.split('(')
                if len(parts) > 1:
                    val = eval(parts[0] + "(val, " + ("(".join(parts[1::])))
                else:
                    val = globals()[func_name](val)
            val = toString(val)
            return val
        except Exception, e:
            try:
                if e.message.find("is not JSON serializable"):
                    # WORK HARDER
                    val = toString(val)
                    return val
            except Exception, f:
                if not _Log:
                    _late_import()

                _Log.warning(
                    "Can not expand " + "|".join(ops) + " in template: {{template_|json}}",
                    template_=template,
                    cause=e
                )
def main():
    try:
        settings = startup.read_settings()
        Log.start(settings.debug)
        constants.set(settings.constants)

        with startup.SingleInstance(flavor_id=settings.args.filename):
            with aws.s3.Bucket(settings.destination) as bucket:

                if settings.param.debug:
                    if settings.source.durable:
                        Log.error("Can not run in debug mode with a durable queue")
                    synch = SynchState(bucket.get_key(SYNCHRONIZATION_KEY, must_exist=False))
                else:
                    synch = SynchState(bucket.get_key(SYNCHRONIZATION_KEY, must_exist=False))
                    if settings.source.durable:
                        synch.startup()

                queue = PersistentQueue(settings.param.queue_file)
                if queue:
                    last_item = queue[len(queue) - 1]
                    synch.source_key = last_item._meta.count + 1

                with pulse.Consumer(settings=settings.source, target=None, target_queue=queue, start=synch.source_key):
                    Thread.run("pulse log loop", log_loop, settings, synch, queue, bucket)
                    Thread.wait_for_shutdown_signal(allow_exit=True)
                    Log.warning("starting shutdown")

                queue.close()
                Log.note("write shutdown state to S3")
                synch.shutdown()

    except Exception, e:
        Log.error("Problem with etl", e)
Example #16
0
    def write_lines(self, key, lines):
        self._verify_key_format(key)
        storage = self.bucket.new_key(key + ".json.gz")

        buff = TemporaryFile()
        archive = gzip.GzipFile(fileobj=buff, mode='w')
        count = 0
        for l in lines:
            if hasattr(l, "__iter__"):
                for ll in l:
                    archive.write(ll.encode("utf8"))
                    archive.write(b"\n")
                    count += 1
            else:
                archive.write(l.encode("utf8"))
                archive.write(b"\n")
                count += 1
        archive.close()
        file_length = buff.tell()

        retry = 3
        while retry:
            try:
                with Timer("Sending {{count}} lines in {{file_length|comma}} bytes", {"file_length": file_length, "count": count}, debug=self.settings.debug):
                    buff.seek(0)
                    storage.set_contents_from_file(buff)
                break
            except Exception, e:
                Log.warning("could not push data to s3", cause=e)
                retry -= 1
Example #17
0
    def go(self):
        """
        ACTIVATE SIGNAL (DOES NOTHING IF SIGNAL IS ALREADY ACTIVATED)
        """
        if DEBUG:
            if not _Log:
                _late_import()
            _Log.note("GO! {{name|quote}}", name=self.name)

        with self.lock:
            if DEBUG:
                _Log.note("internal GO! {{name|quote}}", name=self.name)
            if self._go:
                return
            self._go = True
            jobs, self.job_queue = self.job_queue, []
            threads, self.waiting_threads = self.waiting_threads, []

        for t in threads:
            if DEBUG:
                _Log.note("Release")
            t.release()

        for j in jobs:
            try:
                j()
            except Exception, e:
                if not _Log:
                    _late_import()
                _Log.warning("Trigger on Signal.go() failed!", cause=e)
Example #18
0
def pypy_json_encode(value, pretty=False):
    """
    pypy DOES NOT OPTIMIZE GENERATOR CODE WELL
    """
    global _dealing_with_problem
    if pretty:
        return pretty_json(value)

    try:
        _buffer = UnicodeBuilder(2048)
        _value2json(value, _buffer)
        output = _buffer.build()
        return output
    except Exception, e:
        # THE PRETTY JSON WILL PROVIDE MORE DETAIL ABOUT THE SERIALIZATION CONCERNS
        from pyLibrary.debugs.logs import Log

        if _dealing_with_problem:
            Log.error("Serialization of JSON problems", e)
        else:
            Log.warning("Serialization of JSON problems", e)
        _dealing_with_problem = True
        try:
            return pretty_json(value)
        except Exception, f:
            Log.error("problem serializing object", f)
Example #19
0
    def daemon(cls, please_stop):
        global next_ping

        Till.enabled = True
        try:
            while not please_stop:
                now = _time()
                with Till.locker:
                    if next_ping > now:
                        _sleep(min(next_ping - now, INTERVAL))
                        continue

                    next_ping = now + INTERVAL
                    work = None
                    if Till.all_timers:
                        Till.all_timers.sort(key=lambda r: r[0])
                        for i, (t, s) in enumerate(Till.all_timers):
                            if now < t:
                                work, Till.all_timers[:i] = Till.all_timers[:i], []
                                next_ping = min(next_ping, Till.all_timers[0][0])
                                break
                        else:
                            work, Till.all_timers = Till.all_timers, []

                if work:
                    for t, s in work:
                        s.go()

        except Exception, e:
            from pyLibrary.debugs.logs import Log

            Log.warning("timer shutdown", cause=e)
Example #20
0
def solve():
    try:
        data = convert.json2value(convert.utf82unicode(flask.request.data))
        solved = noop.solve(data)
        response_data = convert.unicode2utf8(convert.value2json(solved))

        return Response(
            response_data,
            direct_passthrough=True,  # FOR STREAMING
            status=200,
            headers={
                "access-control-allow-origin": "*",
                "content-type": "application/json"
            }
        )
    except Exception, e:
        e = Except.wrap(e)
        Log.warning("Could not process", cause=e)
        e = e.as_dict()
        return Response(
            convert.unicode2utf8(convert.value2json(e)),
            status=400,
            headers={
                "access-control-allow-origin": "*",
                "content-type": "application/json"
            }
        )
    def _insert_loop(self, please_stop=None):
        bad_count = 0
        while not please_stop:
            try:
                Thread.sleep(seconds=1)
                messages = wrap(self.queue.pop_all())
                if not messages:
                    continue

                for g, mm in jx.groupby(messages, size=self.batch_size):
                    scrubbed = []
                    try:
                        for i, message in enumerate(mm):
                            if message is Thread.STOP:
                                please_stop.go()
                                return
                            scrubbed.append(_deep_json_to_string(message, depth=3))
                    finally:
                        self.es.extend(scrubbed)
                    bad_count = 0
            except Exception, e:
                Log.warning("Problem inserting logs into ES", cause=e)
                bad_count += 1
                if bad_count > MAX_BAD_COUNT:
                    Log.warning("Given up trying to write debug logs to ES index {{index}}", index=self.es.settings.index)
                Thread.sleep(seconds=30)
Example #22
0
def _leap_logging(clock_time, utc_time):
    global _Log
    global _leap_log

    diff = clock_time - LEAP_SECOND
    with _leap_lock:
        if not _Log:
            from pyLibrary.debugs.logs import Log as _Log
            _Log.warning("preparing for leap seconds near {{date|datetime}}", date=LEAP_SECOND)

    if abs(diff) < 30:  # RECORD AROUND THE LEAP SECOND
        with _leap_lock:
            _leap_log.append((clock_time, utc_time))
    if diff > 60 and _leap_log:
        with _leap_lock:
            temp, _leap_log = _leap_log, Null
        if temp:
            clocks, utcs = zip(*temp)
            _Log.warning(
                "Leap second logging results\ntime.time\n{{clocks|json}}\ntime.time diff\n{{clocks_diff|json}}\nutcnow\n{{utc|json}}\nutc diff\n{{utc_diff|json}}",
                clocks=clocks,
                clocks_diff=[b - a for a, b in zip(clocks, clocks[1:])],
                utc=utcs,
                utc_diff=[b - a for a, b in zip(utcs, utcs[1:])]
            )
Example #23
0
 def _worker(self, please_stop):
     while not please_stop:
         try:
             self.pulse.listen()
         except Exception, e:
             if not please_stop:
                 Log.warning("pulse had problem", e)
Example #24
0
    def loop(self, please_stop):
        with self.work_queue:
            while not please_stop:
                if self.settings.wait_forever:
                    todo = None
                    while not please_stop and not todo:
                        if isinstance(self.work_queue, aws.Queue):
                            todo = self.work_queue.pop(wait=EXTRA_WAIT_TIME)
                        else:
                            todo = self.work_queue.pop()
                else:
                    if isinstance(self.work_queue, aws.Queue):
                        todo = self.work_queue.pop()
                    else:
                        todo = self.work_queue.pop(till=Date.now())
                    if todo == None:
                        please_stop.go()
                        return

                try:
                    is_ok = self._dispatch_work(todo)
                    if is_ok:
                        self.work_queue.commit()
                    else:
                        self.work_queue.rollback()
                except Exception, e:
                    self.work_queue.rollback()
                    Log.warning("could not processs {{key}}.  Returned back to work queue.", key=todo.key, cause=e)
Example #25
0
 def __init__(self,
              host,
              index,
              type="query",
              max_size=10,
              batch_size=10,
              settings=None):
     """
     settings ARE FOR THE ELASTICSEARCH INDEX
     """
     es = Cluster(settings).get_or_create_index(schema=convert.json2value(
         convert.value2json(SCHEMA), leaves=True),
                                                limit_replicas=True,
                                                settings=settings)
     #ENSURE THE TYPE EXISTS FOR PROBING
     try:
         es.add({
             "id": "dummy",
             "value": {
                 "hash": "dummy",
                 "create_time": Date.now(),
                 "last_used": Date.now(),
                 "query": {}
             }
         })
     except Exception, e:
         Log.warning("Problem saving query", cause=e)
Example #26
0
    def get_treeherder_job(self):
        try:
            with Timer("Process Request"):
                args = Dict(**flask.request.args)

                # IS THE branch/revision PENDING?

                result = self.get_markup(unwraplist(args.branch),
                                         unwraplist(args.revision),
                                         unwraplist(args.task_id),
                                         unwraplist(args.buildername),
                                         unwraplist(args.timestamp))

                response_data = convert.unicode2utf8(
                    convert.value2json(result))
                return Response(response_data,
                                status=200,
                                headers={
                                    "access-control-allow-origin": "*",
                                    "content-type": "text/plain"
                                })
        except Exception, e:
            e = Except.wrap(e)
            Log.warning("Could not process", cause=e)
            e = e.as_dict()

            return Response(convert.unicode2utf8(convert.value2json(e)),
                            status=400,
                            headers={
                                "access-control-allow-origin": "*",
                                "content-type": "application/json"
                            })
Example #27
0
def etl_one(settings):
    queue = Queue("temp work queue")
    queue.__setattr__(b"commit", Null)
    queue.__setattr__(b"rollback", Null)

    settings.param.wait_forever = False
    already_in_queue = set()
    for w in settings.workers:
        source = get_container(w.source)
        # source.settings.fast_forward = True
        if id(source) in already_in_queue:
            continue
        try:
            for i in parse_id_argument(settings.args.id):
                data = source.get_key(i)
                if data != None:
                    already_in_queue.add(id(source))
                    queue.add(Dict(
                        bucket=w.source.bucket,
                        key=i
                    ))
        except Exception, e:
            if "Key {{key}} does not exist" in e:
                already_in_queue.add(id(source))
                queue.add(Dict(
                    bucket=w.source.bucket,
                    key=settings.args.id
                ))
            Log.warning("Problem", cause=e)
Example #28
0
    def go(self):
        """
        ACTIVATE SIGNAL (DOES NOTHING IF SIGNAL IS ALREADY ACTIVATED)
        """
        if DEBUG:
            if not _Log:
                _late_import()
            _Log.note("GO! {{name|quote}}", name=self.name)

        with self.lock:
            if DEBUG:
                _Log.note("internal GO! {{name|quote}}", name=self.name)
            if self._go:
                return
            self._go = True
            jobs, self.job_queue = self.job_queue, []
            threads, self.waiting_threads = self.waiting_threads, []

        for t in threads:
            if DEBUG:
                _Log.note("Release")
            t.release()

        for j in jobs:
            try:
                j()
            except Exception, e:
                if not _Log:
                    _late_import()
                _Log.warning("Trigger on Signal.go() failed!", cause=e)
Example #29
0
def pypy_json_encode(value, pretty=False):
    """
    pypy DOES NOT OPTIMIZE GENERATOR CODE WELL
    """
    global _dealing_with_problem
    if pretty:
        return pretty_json(value)

    try:
        _buffer = UnicodeBuilder(1024)
        _value2json(value, _buffer)
        output = _buffer.build()
        return output
    except Exception, e:
        # THE PRETTY JSON WILL PROVIDE MORE DETAIL ABOUT THE SERIALIZATION CONCERNS
        from pyLibrary.debugs.logs import Log

        if _dealing_with_problem:
            Log.error("Serialization of JSON problems", e)
        else:
            Log.warning("Serialization of JSON problems", e)
        _dealing_with_problem = True
        try:
            return pretty_json(value)
        except Exception, f:
            Log.error("problem serializing object", f)
Example #30
0
    def add_log(self, logger):
        if logger == None:
            if not _Log:
                _delayed_imports()

            _Log.warning("Expecting a non-None logger")

        self.many.append(logger)
        return self
Example #31
0
    def _kill(self):
        try:
            self.service.kill()
        except Exception, e:
            ee = Except.wrap(e)
            if "The operation completed successfully" in ee:
                return

            Log.warning("Failure to kill process {{process|quote}}", process=self.name, cause=ee)
Example #32
0
 def _delete_old_indexes(self, candidates):
     for c in candidates:
         timestamp = unicode2Date(c.index[-15:], "%Y%m%d_%H%M%S")
         if timestamp + self.rollover_interval < Date.today() - self.rollover_max:
             # Log.warning("Will delete {{index}}", index=c.index)
             try:
                 self.cluster.delete_index(c.index)
             except Exception, e:
                 Log.warning("could not delete index {{index}}", index=c.index, cause=e)
def log_loop(settings, synch, queue, bucket, please_stop):
    with aws.Queue(settings.work_queue) as work_queue:
        for i, g in qb.groupby(queue, size=settings.param.size):
            Log.note(
                "Preparing {{num}} pulse messages to bucket={{bucket}}",
                num=len(g),
                bucket=bucket.name
            )

            full_key = unicode(synch.next_key) + ":" + unicode(MIN(g.select("_meta.count")))
            try:
                output = [
                    set_default(
                        d,
                        {"etl": {
                            "name": "Pulse block",
                            "bucket": settings.destination.bucket,
                            "timestamp": Date.now().unix,
                            "id": synch.next_key,
                            "source": {
                                "name": "pulse.mozilla.org",
                                "id": d._meta.count,
                                "count": d._meta.count,
                                "message_id": d._meta.message_id,
                                "sent": Date(d._meta.sent),
                            },
                            "type": "aggregation"
                        }}
                    )
                    for i, d in enumerate(g)
                    if d != None  # HAPPENS WHEN PERSISTENT QUEUE FAILS TO LOG start
                ]
                bucket.write(full_key, "\n".join(convert.value2json(d) for d in output))
                synch.advance()
                synch.source_key = MAX(g.select("_meta.count")) + 1

                now = Date.now()
                work_queue.add({
                    "bucket": bucket.name,
                    "key": full_key,
                    "timestamp": now.unix,
                    "date/time": now.format()
                })

                synch.ping()
                queue.commit()
                Log.note("Wrote {{num}} pulse messages to bucket={{bucket}}, key={{key}} ",
                    num= len(g),
                    bucket= bucket.name,
                    key= full_key)
            except Exception, e:
                queue.rollback()
                if not queue.closed:
                    Log.warning("Problem writing {{key}} to S3", key=full_key, cause=e)

            if please_stop:
                break
Example #34
0
def _shorten(value, source):
    value.result.subtests = [s for s in value.result.subtests if s.ok is False]
    value.result.missing_subtests = True
    if source.name.startswith("active-data-test-result"):
        value.repo.changeset.files=None

    shorter_length = len(convert.value2json(value))
    if shorter_length > MAX_RECORD_LENGTH:
        Log.warning("Monstrous {{name}} record {{id}} of length {{length}}", id=value._id, name=source.name, length=shorter_length)
Example #35
0
    def now():
        candidate = _time()
        temp = datetime.utcnow()
        unix = datetime2unix(temp)
        if abs(candidate - unix) > 0.1:
            from pyLibrary.debugs.logs import Log

            Log.warning("_time() and datetime.utcnow() is off by {{amount}}", amount=candidate - unix)
        return unix2Date(datetime2unix(temp))
Example #36
0
    def add_log(self, logger):
        if logger == None:
            if not _Log:
                _delayed_imports()

            _Log.warning("Expecting a non-None logger")

        self.many.append(logger)
        return self
Example #37
0
    def __getitem__(self, item):
        c = self.schema.get(item)
        if c:
             return c

        e = self.edges[item]
        if not c:
            Log.warning("Column with name {{column|quote}} can not be found in {{table}}", column=item, table=self.name)
        return e
Example #38
0
def setup(settings=None):
    global config

    try:
        config = startup.read_settings(defs={
            "name": ["--process_num", "--process"],
            "help": "Additional port offset (for multiple Flask processes",
            "type": int,
            "dest": "process_num",
            "default": 0,
            "required": False
        },
                                       filename=settings)
        constants.set(config.constants)
        Log.start(config.debug)

        if config.args.process_num and config.flask.port:
            config.flask.port += config.args.process_num

        # PIPE REQUEST LOGS TO ES DEBUG
        if config.request_logs:
            request_logger = elasticsearch.Cluster(
                config.request_logs).get_or_create_index(config.request_logs)
            active_data.request_log_queue = request_logger.threaded_queue(
                max_size=2000)

        # SETUP DEFAULT CONTAINER, SO THERE IS SOMETHING TO QUERY
        containers.config.default = {
            "type": "elasticsearch",
            "settings": config.elasticsearch.copy()
        }

        # TURN ON /exit FOR WINDOWS DEBUGGING
        if config.flask.debug or config.flask.allow_exit:
            config.flask.allow_exit = None
            Log.warning("ActiveData is in debug mode")
            app.add_url_rule('/exit', 'exit', _exit)

        # TRIGGER FIRST INSTANCE
        FromESMetadata(config.elasticsearch)
        if config.saved_queries:
            setattr(save_query, "query_finder",
                    SaveQueries(config.saved_queries))
        HeaderRewriterFix(app, remove_headers=['Date', 'Server'])

        if config.flask.ssl_context:
            if config.args.process_num:
                Log.error(
                    "can not serve ssl and multiple Flask instances at once")
            setup_ssl()

        return app
    except Exception, e:
        Log.error(
            "Serious problem with ActiveData service construction!  Shutdown!",
            cause=e)
Example #39
0
    def __eq__(self, other):
        Log.warning("expensive")

        from pyLibrary.testing.fuzzytestcase import assertAlmostEqual

        try:
            assertAlmostEqual(convert.json2value(self.json), other)
            return True
        except Exception:
            return False
Example #40
0
    def __getslice__(self, i, j):
        global _emit_slice_warning

        if _emit_slice_warning:
            _emit_slice_warning=False
            if not _Log:
                _late_import()

            _Log.warning("slicing is broken in Python 2.7: a[i:j] == a[i+len(a), j] sometimes.  Use [start:stop:step] (see https://github.com/klahnakoski/pyLibrary/blob/master/pyLibrary/dot/README.md#the-slice-operator-in-python27-is-inconsistent)")
        return self[i:j:]
Example #41
0
    def __exit__(self, exc_type, exc_val, exc_tb):
        if isinstance(exc_val, Exception):
            from pyLibrary.debugs.logs import Log

            Log.warning(template="Ignored failure while " + self.template,
                        default_params=self.more_params,
                        cause=exc_val,
                        stack_depth=1)

            return True
Example #42
0
class TextLog_usingElasticSearch(TextLog):
    @use_settings
    def __init__(self, host, index, type="log", max_size=1000, batch_size=100, settings=None):
        """
        settings ARE FOR THE ELASTICSEARCH INDEX
        """
        self.es = Cluster(settings).get_or_create_index(
            schema=convert.json2value(convert.value2json(SCHEMA), leaves=True),
            limit_replicas=True,
            tjson=True,
            settings=settings
        )
        self.batch_size = batch_size
        self.es.add_alias(coalesce(settings.alias, settings.index))
        self.queue = Queue("debug logs to es", max=max_size, silent=True)
        self.es.settings.retry.times = coalesce(self.es.settings.retry.times, 3)
        self.es.settings.retry.sleep = Duration(coalesce(self.es.settings.retry.sleep, MINUTE))
        Thread.run("add debug logs to es", self._insert_loop)

    def write(self, template, params):
        if params.get("template"):
            # DETECTED INNER TEMPLATE, ASSUME TRACE IS ON, SO DO NOT NEED THE OUTER TEMPLATE
            self.queue.add({"value": params})
        else:
            template = strings.limit(template, 2000)
            self.queue.add({"value": {"template": template, "params": params}}, timeout=3 * MINUTE)
        return self

    def _insert_loop(self, please_stop=None):
        bad_count = 0
        while not please_stop:
            try:
                Thread.sleep(seconds=1)
                messages = wrap(self.queue.pop_all())
                if messages:
                    # for m in messages:
                    #     m.value.params = leafer(m.value.params)
                    #     m.value.error = leafer(m.value.error)
                    for g, mm in jx.groupby(messages, size=self.batch_size):
                        self.es.extend(mm)
                    bad_count = 0
            except Exception, e:
                Log.warning("Problem inserting logs into ES", cause=e)
                bad_count += 1
                if bad_count > 5:
                    break
        Log.warning("Given up trying to write debug logs to ES index {{index}}", index=self.es.settings.index)

        # CONTINUE TO DRAIN THIS QUEUE
        while not please_stop:
            try:
                Thread.sleep(seconds=1)
                self.queue.pop_all()
            except Exception, e:
                Log.warning("Should not happen", cause=e)
Example #43
0
 def _drain_events_loop(self):
     while True:
         try:
             self.connection.drain_events(timeout=self.timeout)
         except socket_timeout, e:
             Log.warning("timeout! Restarting pulse consumer.", cause=e)
             try:
                 self.disconnect()
             except Exception, f:
                 Log.warning("Problem with disconnect()", cause=f)
             break
Example #44
0
    def __exit__(self, exc_type, exc_val, exc_tb):
        Log.note("clean pulse exit")
        self.please_stop.go()
        with suppress_exception:
            self.target_queue.close()
            Log.note("stop put into queue")

        try:
            self.pulse.disconnect()
        except Exception, e:
            Log.warning("Can not disconnect during pulse exit, ignoring", e)
Example #45
0
 def __iter__(self):
     """
     BLOCKING ITERATOR
     """
     while not self.please_stop:
         try:
             value = self.pop()
             if value is not Thread.STOP:
                 yield value
         except Exception, e:
             Log.warning("Tell me about what happened here", cause=e)
Example #46
0
 def output(*args, **kwargs):
     while True:
         try:
             return func(*args, **kwargs)
         except Exception, e:
             e = Except.wrap(e)
             if "Request limit exceeded" in e:
                 Log.warning("AWS Problem", cause=e)
                 continue
             else:
                 Log.error("Problem with call to AWS", cause=e)
Example #47
0
    def create_index(
        self,
        index,
        alias=None,
        create_timestamp=None,
        schema=None,
        limit_replicas=None,
        read_only=False,
        tjson=False,
        settings=None
    ):
        if not alias:
            alias = settings.alias = settings.index
            index = settings.index = proto_name(alias, create_timestamp)

        if settings.alias == index:
            Log.error("Expecting index name to conform to pattern")

        if settings.schema_file:
            Log.error('schema_file attribute not supported.  Use {"$ref":<filename>} instead')

        if schema == None:
            Log.error("Expecting a schema")
        elif isinstance(schema, basestring):
            schema = convert.json2value(schema, leaves=True)
        else:
            schema = convert.json2value(convert.value2json(schema), leaves=True)

        if limit_replicas:
            # DO NOT ASK FOR TOO MANY REPLICAS
            health = self.get("/_cluster/health")
            if schema.settings.index.number_of_replicas >= health.number_of_nodes:
                Log.warning("Reduced number of replicas: {{from}} requested, {{to}} realized",
                    {"from": schema.settings.index.number_of_replicas},
                    to= health.number_of_nodes - 1
                )
                schema.settings.index.number_of_replicas = health.number_of_nodes - 1

        self.post(
            "/" + index,
            data=schema,
            headers={"Content-Type": "application/json"}
        )

        # CONFIRM INDEX EXISTS
        while True:
            try:
                state = self.get("/_cluster/state", retry={"times": 5}, timeout=3)
                if index in state.metadata.indices:
                    break
                Log.note("Waiting for index {{index}} to appear", index=index)
            except Exception, e:
                Log.warning("Problem while waiting for index {{index}} to appear", index=index, cause=e)
            Thread.sleep(seconds=1)
Example #48
0
def _test_mode_wait(query):
    """
    WAIT FOR METADATA TO ARRIVE ON INDEX
    :param query: dict() OF REQUEST BODY
    :return: nothing
    """
    try:
        m = meta.singlton
        now = Date.now()
        end_time = now + MINUTE

        # MARK COLUMNS DIRTY
        m.meta.columns.update({
            "clear": ["partitions", "count", "cardinality", "last_updated"],
            "where": {
                "eq": {
                    "table": join_field(split_field(query["from"])[0:1])
                }
            }
        })

        # BE SURE THEY ARE ON THE todo QUEUE FOR RE-EVALUATION
        cols = [
            c for c in m.get_columns(table_name=query["from"], force=True)
            if c.type not in STRUCT
        ]
        for c in cols:
            Log.note("Mark {{column}} dirty at {{time}}",
                     column=c.name,
                     time=now)
            c.last_updated = now - TOO_OLD
            m.todo.push(c)

        while end_time > now:
            # GET FRESH VERSIONS
            cols = [
                c for c in m.get_columns(table_name=query["from"])
                if c.type not in STRUCT
            ]
            for c in cols:
                if not c.last_updated or c.cardinality == None:
                    Log.note(
                        "wait for column (table={{col.table}}, name={{col.name}}) metadata to arrive",
                        col=c)
                    break
            else:
                break
            Thread.sleep(seconds=1)
        for c in cols:
            Log.note(
                "fresh column name={{column.name}} updated={{column.last_updated|date}} parts={{column.partitions}}",
                column=c)
    except Exception, e:
        Log.warning("could not pickup columns", cause=e)
Example #49
0
    def close(self):
        if self.transaction_level > 0:
            Log.error("expecting commit() or rollback() before close")
        self.cursor = None  # NOT NEEDED
        try:
            self.db.close()
        except Exception, e:
            if e.message.find("Already closed") >= 0:
                return

            Log.warning("can not close()", e)
Example #50
0
def _exit():
    Log.note("Got request to shutdown")
    shutdown = flask.request.environ.get('werkzeug.server.shutdown')
    if shutdown:
        shutdown()
    else:
        Log.warning("werkzeug.server.shutdown does not exist")

    return Response(convert.unicode2utf8(OVERVIEW),
                    status=400,
                    headers={"Content-Type": "text/html"})
Example #51
0
    def __getslice__(self, i, j):
        global _emit_slice_warning

        if _emit_slice_warning:
            _emit_slice_warning = False
            if not _Log:
                _late_import()

            _Log.warning(
                "slicing is broken in Python 2.7: a[i:j] == a[i+len(a), j] sometimes.  Use [start:stop:step] (see https://github.com/klahnakoski/pyLibrary/blob/master/pyLibrary/dot/README.md#the-slice-operator-in-python27-is-inconsistent)"
            )
        return self[i:j:]
Example #52
0
 def _delete_old_indexes(self, candidates):
     for c in candidates:
         timestamp = unicode2Date(c.index[-15:], "%Y%m%d_%H%M%S")
         if timestamp + self.rollover_interval < Date.today(
         ) - self.rollover_max:
             # Log.warning("Will delete {{index}}", index=c.index)
             try:
                 self.cluster.delete_index(c.index)
             except Exception, e:
                 Log.warning("could not delete index {{index}}",
                             index=c.index,
                             cause=e)
Example #53
0
    def __exit__(self, type, value, traceback):
        if self.readonly:
            self.close()
            return

        if isinstance(value, BaseException):
            try:
                if self.cursor: self.cursor.close()
                self.cursor = None
                self.rollback()
            except Exception, e:
                Log.warning(u"can not rollback()", cause=[value, e])
            finally:
Example #54
0
def _upgrade():
    global _upgraded
    _upgraded = True
    try:
        import sys

        sqlite_dll = File.new_instance(sys.exec_prefix, "dlls/sqlite3.dll")
        python_dll = File("pyLibrary/vendor/sqlite/sqlite3.dll")
        if python_dll.read_bytes() != sqlite_dll.read_bytes():
            backup = sqlite_dll.backup()
            File.copy(python_dll, sqlite_dll)
    except Exception, e:
        Log.warning("could not upgrade python's sqlite", cause=e)
Example #55
0
def set_attr(obj, path, value):
    """
    SAME AS object.__setattr__(), BUT USES DOT-DELIMITED path
    RETURN OLD VALUE
    """
    try:
        return _set_attr(obj, split_field(path), value)
    except Exception, e:
        from pyLibrary.debugs.logs import Log
        if PATH_NOT_FOUND in e:
            Log.warning(PATH_NOT_FOUND + ": {{path}}", path=path)
        else:
            Log.error("Problem setting value", e)
Example #56
0
 def _pinger(self, please_stop):
     Log.note("pinger started")
     while not please_stop:
         Thread.sleep(till=self.ping_time + PING_PERIOD,
                      please_stop=please_stop)
         if please_stop:  #EXIT EARLY, OTHERWISE WE MAY OVERWRITE THE shutdown
             break
         if Date.now() < self.ping_time + PING_PERIOD:
             continue
         try:
             self.ping()
         except Exception, e:
             Log.warning("synchro.py could not ping", e)