Ejemplo n.º 1
0
    def get_columns(self, table_name, column_name=None, force=False):
        """
        RETURN METADATA COLUMNS
        """
        try:
            # LAST TIME WE GOT INFO FOR THIS TABLE
            short_name = join_field(split_field(table_name)[0:1])
            table = self.get_table(short_name)[0]

            if not table:
                table = Table(
                    name=short_name,
                    url=None,
                    query_path=None,
                    timestamp=Date.now()
                )
                with self.meta.tables.locker:
                    self.meta.tables.add(table)
                self._get_columns(table=short_name)
            elif force or table.timestamp == None or table.timestamp < Date.now() - MAX_COLUMN_METADATA_AGE:
                table.timestamp = Date.now()
                self._get_columns(table=short_name)

            with self.meta.columns.locker:
                columns = self.meta.columns.find(table_name, column_name)
            if columns:
                columns = jx.sort(columns, "name")
                # AT LEAST WAIT FOR THE COLUMNS TO UPDATE
                while len(self.todo) and not all(columns.get("last_updated")):
                    Log.note("waiting for columns to update {{columns|json}}", columns=[c.table+"."+c.es_column for c in columns if not c.last_updated])
                    Thread.sleep(seconds=1)
                return columns
        except Exception, e:
            Log.error("Not expected", cause=e)
    def pop(self, timeout=None):
        """
        :param timeout: OPTIONAL DURATION
        :return: None, IF timeout PASSES
        """
        with self.lock:
            while not self.please_stop:
                if self.db.status.end > self.start:
                    value = self.db[str(self.start)]
                    self.start += 1
                    return value

                if timeout is not None:
                    try:
                        self.lock.wait(timeout=timeout)
                        if self.db.status.end <= self.start:
                            return None
                    except Exception:
                        pass
                else:
                    try:
                        self.lock.wait()
                    except Exception:
                        pass
            if DEBUG:
                Log.note("persistent queue already stopped")
            return Thread.STOP
Ejemplo n.º 3
0
def main():
    """
    CLEAR OUT KEYS FROM BUCKET BY RANGE, OR BY FILE
    """
    settings = startup.read_settings(defs=[
        {
            "name": ["--bucket"],
            "help": "bucket to scan",
            "type": str,
            "dest": "bucket",
            "required": True
        }
    ])
    Log.start(settings.debug)

    source = Connection(settings.aws).get_bucket(settings.args.bucket)

    for k in qb.sort(source.keys()):
        try:
            data = source.read_bytes(k)
            if convert.ascii2unicode(data).find("2e2834fa7ecd8d3bb1ad49ec981fdb89eb4df95e18") >= 0:
                Log.note("Found at {{key}}", key=k)
        except Exception, e:
            Log.warning("Problem with {{key}}", key=k, cause=e)
        finally:
Ejemplo n.º 4
0
def pretty_json(value):
    try:
        if scrub(value) is None:
            return "null"
        elif isinstance(value, basestring):
            if isinstance(value, str):
                value = utf82unicode(value)
            try:
                return quote(value)
            except Exception, e:
                from pyLibrary.debugs.logs import Log

                try:
                    Log.note(
                        "try explicit convert of string with length {{length}}",
                        length=len(value))
                    acc = [u"\""]
                    for c in value:
                        try:
                            try:
                                c2 = ESCAPE_DCT[c]
                            except Exception, h:
                                c2 = c
                            c3 = unicode(c2)
                            acc.append(c3)
                        except BaseException, g:
                            pass
                            # Log.warning("odd character {{ord}} found in string.  Ignored.",  ord= ord(c)}, cause=g)
                    acc.append(u"\"")
                    output = u"".join(acc)
                    Log.note("return value of length {{length}}",
                             length=len(output))
                    return output
Ejemplo n.º 5
0
    def get_revision(self, revision, locale=None):
        """
        EXPECTING INCOMPLETE revision
        RETURNS revision
        """
        rev = revision.changeset.id
        if not rev:
            return Null
        elif rev == "None":
            return Null
        elif revision.branch.name == None:
            return Null
        locale = coalesce(locale, revision.branch.locale, DEFAULT_LOCALE)
        doc = self._get_from_elasticsearch(revision, locale=locale)
        if doc:
            Log.note(
                "Got hg ({{branch}}, {{locale}}, {{revision}}) from ES",
                branch=doc.branch.name,
                locale=locale,
                revision=doc.changeset.id,
            )
            return doc

        output = self._load_all_in_push(revision, locale=locale)
        return output
Ejemplo n.º 6
0
    def copy(self, keys, source, sample_only_filter=None, sample_size=None, done_copy=None):
        """
        :param keys: THE KEYS TO LOAD FROM source
        :param source: THE SOURCE (USUALLY S3 BUCKET)
        :param sample_only_filter: SOME FILTER, IN CASE YOU DO NOT WANT TO SEND EVERYTHING
        :param sample_size: FOR RANDOM SAMPLE OF THE source DATA
        :param done_copy: CALLBACK, ADDED TO queue, TO FINISH THE TRANSACTION
        :return: LIST OF SUB-keys PUSHED INTO ES
        """
        num_keys = 0
        queue = None
        for key in keys:
            timer = Timer("key")
            try:
                with timer:
                    for rownum, line in enumerate(source.read_lines(strip_extension(key))):
                        if not line:
                            continue

                        if rownum > 0 and rownum % 1000 == 0:
                            Log.note("Ingested {{num}} records from {{key}} in bucket {{bucket}}", num=rownum, key=key, bucket=source.name)

                        row, please_stop = fix(rownum, line, source, sample_only_filter, sample_size)
                        num_keys += 1

                        if queue == None:
                            queue = self._get_queue(row)
                        queue.add(row)

                        if please_stop:
                            break
            except Exception, e:
                done_copy = None
                Log.warning("Could not process {{key}} after {{duration|round(places=2)}}seconds", key=key, duration=timer.duration.seconds, cause=e)
Ejemplo n.º 7
0
    def monitor(self, please_stop):
        please_stop.on_go(lambda: self.todo.add(Thread.STOP))
        while not please_stop:
            try:
                if not self.todo:
                    with self.columns.locker:
                        old_columns = filter(
                            lambda c: (c.last_updated == None or c.last_updated < Date.now()-TOO_OLD) and c.type not in ["object", "nested"],
                            self.columns
                        )
                        if old_columns:
                            self.todo.extend(old_columns)
                            # TEST CONSISTENCY
                            for c, d in product(list(self.todo.queue), list(self.todo.queue)):
                                if c.abs_name==d.abs_name and c.table==d.table and c!=d:
                                    Log.error("")


                        else:
                            Log.note("no more metatdata to update")

                column = self.todo.pop(timeout=10*MINUTE)
                if column:
                    if column.type in ["object", "nested"]:
                        continue
                    elif column.last_updated >= Date.now()-TOO_OLD:
                        continue
                    try:
                        self._update_cardinality(column)
                        Log.note("updated {{column.name}}", column=column)
                    except Exception, e:
                        Log.warning("problem getting cardinality for  {{column.name}}", column=column, cause=e)
            except Exception, e:
                Log.warning("problem in cardinality monitor", cause=e)
Ejemplo n.º 8
0
def main():
    try:
        settings = startup.read_settings()
        constants.set(settings.constants)
        Log.start(settings.debug)

        aws_args = dict(
            region_name=settings.aws.region,
            aws_access_key_id=unwrap(settings.aws.aws_access_key_id),
            aws_secret_access_key=unwrap(settings.aws.aws_secret_access_key)
        )
        ec2_conn = boto_ec2.connect_to_region(**aws_args)

        instances = _get_managed_instances(ec2_conn, settings.name)

        for i in instances:
            Log.note("Reset {{instance_id}} ({{name}}) at {{ip}}", insance_id=i.id, name=i.tags["Name"], ip=i.ip_address)
            _config_fabric(settings.fabric, i)
            try:
                _refresh_etl()  # TODO: UPON FAILURE, TERMINATE INSTANCE AND SPOT REQUEST
            except Exception, e:
                ec2_conn.terminate_instances([i.id])
                Log.warning("Problem resetting {{instance}}, terminated", instance=i.id, cause=e)
    except Exception, e:
        Log.error("Problem with etl", e)
Ejemplo n.º 9
0
def _refresh_etl():
    with cd("~/TestLog-ETL/"):
        result = run("git pull origin etl")
        if result.find("Already up-to-date.") != -1:
            Log.note("No change required")
            return
        sudo("supervisorctl restart all")
Ejemplo n.º 10
0
    def test_multiple_agg_on_same_field(self):
        if self.not_real_service():
            return

        test = wrap({
            "query": {
                "from": {
                    "type": "elasticsearch",
                    "settings": {
                        "host": ES_CLUSTER_LOCATION,
                        "index": "unittest",
                        "type": "test_result"
                    }
                },
                "select": [{
                    "name": "max_bytes",
                    "value": "run.stats.bytes",
                    "aggregate": "max"
                }, {
                    "name": "count",
                    "value": "run.stats.bytes",
                    "aggregate": "count"
                }]
            }
        })

        query = convert.unicode2utf8(convert.value2json(test.query))
        # EXECUTE QUERY
        with Timer("query"):
            response = http.get(self.service_url, data=query)
            if response.status_code != 200:
                error(response)
        result = convert.json2value(convert.utf82unicode(response.all_content))

        Log.note("result\n{{result|indent}}", {"result": result})
Ejemplo n.º 11
0
    def on_go(self, target):
        """
        RUN target WHEN SIGNALED
        """
        if not target:
            if not _Log:
                _late_import()
            _Log.error("expecting target")

        with self.lock:
            if self._go:
                if DEBUG_SIGNAL:
                    if not _Log:
                        _late_import()
                    _Log.note(
                        "Signal {{name|quote}} already triggered, running job immediately",
                        name=self.name)
                target()
            else:
                if DEBUG:
                    if not _Log:
                        _late_import()
                    _Log.note("Adding target to signal {{name|quote}}",
                              name=self.name)
                self.job_queue.append(target)
Ejemplo n.º 12
0
def safe_size(source):
    """
    READ THE source UP TO SOME LIMIT, THEN COPY TO A FILE IF TOO BIG
    RETURN A str() OR A FileString()
    """

    if source is None:
        return None

    total_bytes = 0
    bytes = []
    b = source.read(MIN_READ_SIZE)
    while b:
        total_bytes += len(b)
        bytes.append(b)
        if total_bytes > MAX_STRING_SIZE:
            try:
                data = FileString(TemporaryFile())
                for bb in bytes:
                    data.write(bb)
                del bytes
                del bb
                b = source.read(MIN_READ_SIZE)
                while b:
                    total_bytes += len(b)
                    data.write(b)
                    b = source.read(MIN_READ_SIZE)
                data.seek(0)
                Log.note("Using file of size {{length}} instead of str()",  length= total_bytes)

                return data
            except Exception, e:
                Log.error("Could not write file > {{num}} bytes",  num= total_bytes, cause=e)
        b = source.read(MIN_READ_SIZE)
Ejemplo n.º 13
0
    def _reader(self, pipe, recieve, please_stop):
        try:
            while not please_stop:
                line = pipe.readline()
                if self.service.returncode is not None:
                    # GRAB A FEW MORE LINES
                    for i in range(100):
                        try:
                            line = pipe.readline()
                            if line:
                                recieve.add(line)
                                if self.debug:
                                    Log.note("FROM {{process}}: {{line}}",
                                             process=self.name,
                                             line=line.rstrip())
                        except Exception:
                            break
                    return

                recieve.add(line)
                if self.debug:
                    Log.note("FROM {{process}}: {{line}}",
                             process=self.name,
                             line=line.rstrip())
        finally:
            pipe.close()
Ejemplo n.º 14
0
    def get_revision(self, revision, locale=None):
        """
        EXPECTING INCOMPLETE revision
        RETURNS revision
        """
        rev = revision.changeset.id
        if not rev:
            return Null
        elif rev == "None":
            return Null
        elif revision.branch.name == None:
            return Null

        if not self.current_push:
            doc = self._get_from_elasticsearch(revision, locale=locale)
            if doc:
                Log.note("Got hg ({{branch}}, {{locale}}, {{revision}}) from ES", branch=doc.branch.name, locale=locale, revision=doc.changeset.id)
                return doc

            self._load_all_in_push(revision, locale=locale)

            # THE cache IS FILLED, CALL ONE LAST TIME...
            return self.get_revision(revision, locale)

        output = self._get_from_hg(revision, locale=locale)
        output.changeset.id12 = output.changeset.id[0:12]
        output.branch = {
            "name": output.branch.name,
            "url": output.branch.url,
            "locale": output.branch.locale
        }
        return output
def verify_blobber_file(line_number, name, url):
    """
    :param line_number:  for debugging
    :param name:  for debugging
    :param url:  TO BE READ
    :return:  RETURNS BYTES **NOT** UNICODE
    """
    if name in ["emulator-5554.log", "qemu.log"] or any(map(name.endswith, [".png", ".html"])):
        return None, 0

    with Timer("Read {{name}}: {{url}}", {"name": name, "url": url}, debug=DEBUG):
        response = http.get(url)
        try:
            logs = response.all_lines
        except Exception, e:
            if name.endswith("_raw.log"):
                Log.error(
                    "Line {{line}}: {{name}} = {{url}} is NOT structured log",
                    line=line_number,
                    name=name,
                    url=url,
                    cause=e
                )
            if DEBUG:
                Log.note(
                    "Line {{line}}: {{name}} = {{url}} is NOT structured log",
                    line=line_number,
                    name=name,
                    url=url
                )
            return None, 0
Ejemplo n.º 16
0
    def forall(self, sql, param=None, _execute=None):
        assert _execute
        num = 0

        self._execute_backlog()
        try:
            old_cursor = self.cursor
            if not old_cursor:  # ALLOW NON-TRANSACTIONAL READS
                self.cursor = self.db.cursor()

            if param:
                sql = expand_template(sql, self.quote_param(param))
            sql = self.preamble + outdent(sql)
            if self.debug:
                Log.note("Execute SQL:\n{{sql}}", sql=indent(sql))
            self.cursor.execute(sql)

            columns = tuple(
                [utf8_to_unicode(d[0]) for d in self.cursor.description])
            for r in self.cursor:
                num += 1
                _execute(
                    wrap(dict(zip(columns, [utf8_to_unicode(c) for c in r]))))

            if not old_cursor:  # CLEANUP AFTER NON-TRANSACTIONAL READS
                self.cursor.close()
                self.cursor = None

        except Exception, e:
            Log.error("Problem executing SQL:\n{{sql|indent}}",
                      sql=sql,
                      cause=e,
                      stack_depth=1)
Ejemplo n.º 17
0
def test_simple(filename):
    with Timer("simple time"):
        with codecs.open(filename, "r", encoding="utf-8") as f:
            for line in f:
                id = int(line.split("\t")[0])
                if id % 10000 == 0:
                    Log.note(str(id))
Ejemplo n.º 18
0
def main():
    try:
        settings = startup.read_settings()
        Log.start(settings.debug)
        constants.set(settings.constants)

        with startup.SingleInstance(flavor_id=settings.args.filename):
            with aws.s3.Bucket(settings.destination) as bucket:

                if settings.param.debug:
                    if settings.source.durable:
                        Log.error("Can not run in debug mode with a durable queue")
                    synch = SynchState(bucket.get_key(SYNCHRONIZATION_KEY, must_exist=False))
                else:
                    synch = SynchState(bucket.get_key(SYNCHRONIZATION_KEY, must_exist=False))
                    if settings.source.durable:
                        synch.startup()

                queue = PersistentQueue(settings.param.queue_file)
                if queue:
                    last_item = queue[len(queue) - 1]
                    synch.source_key = last_item._meta.count + 1

                with pulse.Consumer(settings=settings.source, target=None, target_queue=queue, start=synch.source_key):
                    Thread.run("pulse log loop", log_loop, settings, synch, queue, bucket)
                    Thread.wait_for_shutdown_signal(allow_exit=True)
                    Log.warning("starting shutdown")

                queue.close()
                Log.note("write shutdown state to S3")
                synch.shutdown()

    except Exception, e:
        Log.error("Problem with etl", e)
def verify_blobber_file(line_number, name, url):
    """
    :param line_number:  for debugging
    :param name:  for debugging
    :param url:  TO BE READ
    :return:  RETURNS BYTES **NOT** UNICODE
    """
    if name in ["emulator-5554.log", "qemu.log"] or any(
            map(name.endswith, [".png", ".html"])):
        return None, 0

    with Timer("Read {{name}}: {{url}}", {
            "name": name,
            "url": url
    },
               debug=DEBUG):
        response = http.get(url)
        try:
            logs = response.all_lines
        except Exception, e:
            if name.endswith("_raw.log"):
                Log.error(
                    "Line {{line}}: {{name}} = {{url}} is NOT structured log",
                    line=line_number,
                    name=name,
                    url=url,
                    cause=e)
            if DEBUG:
                Log.note(
                    "Line {{line}}: {{name}} = {{url}} is NOT structured log",
                    line=line_number,
                    name=name,
                    url=url)
            return None, 0
Ejemplo n.º 20
0
def test_buffered(filename):
    with Timer("buffered time"):
        with codecs.open(filename, "r", encoding="utf-8", buffering=2 ** 25) as f:
            for line in f:
                id = int(line.split("\t")[0])
                if id % 10000 == 0:
                    Log.note(str(id))
Ejemplo n.º 21
0
def get_file(ref, url):
    from pyLibrary.env.files import File

    if ref.path.startswith("~"):
        home_path = os.path.expanduser("~")
        if os.sep == "\\":
            home_path = "/" + home_path.replace(os.sep, "/")
        if home_path.endswith("/"):
            home_path = home_path[:-1]

        ref.path = home_path + ref.path[1::]
    elif not ref.path.startswith("/"):
        # CONVERT RELATIVE TO ABSOLUTE
        if ref.path[0] == ".":
            num_dot = 1
            while ref.path[num_dot] == ".":
                num_dot += 1

            parent = url.path.rstrip("/").split("/")[:-num_dot]
            ref.path = "/".join(parent) + ref.path[num_dot:]
        else:
            parent = url.path.rstrip("/").split("/")[:-1]
            ref.path = "/".join(parent) + "/" + ref.path

    path = ref.path if os.sep != "\\" else ref.path[1::].replace("/", "\\")

    try:
        if DEBUG:
            _Log.note("reading file {{path}}", path=path)
        content = File(path).read()
    except Exception, e:
        content = None
        _Log.error("Could not read file {{filename}}", filename=path, cause=e)
Ejemplo n.º 22
0
    def event_loop(self, please_stop):
        got_stop_message = False
        while not please_stop.is_go():
            with Timer("get more work", debug=DEBUG):
                request = self.in_queue.pop()
            if request == Thread.STOP:
                if DEBUG:
                    Log.note("{{name}} got a stop message", name=self.name)
                got_stop_message = True
                if self.in_queue:
                    Log.warning(
                        "programmer error, queue not empty. {{num}} requests lost:\n{{requests}}",
                        num=len(self.in_queue.queue),
                        requests=list(self.in_queue.queue)[:5:] + list(self.in_queue.queue)[-5::],
                    )
                break
            if please_stop.is_go():
                break

            with Timer("run {{function}}", {"function": get_function_name(self.function)}, debug=DEBUG):
                try:
                    result = self.function(**request)
                    if self.out_queue != None:
                        self.out_queue.add({"response": result})
                except Exception, e:
                    Log.warning("Can not execute with params={{params}}", params=request, cause=e)
                    if self.out_queue != None:
                        self.out_queue.add({"exception": e})
                finally:
                    self.num_runs += 1
Ejemplo n.º 23
0
    def extend(self, records):
        """
        records - MUST HAVE FORM OF
            [{"value":value}, ... {"value":value}] OR
            [{"json":json}, ... {"json":json}]
            OPTIONAL "id" PROPERTY IS ALSO ACCEPTED
        """
        lines = []
        try:
            for r in records:
                id = r.get("id")
                if id == None:
                    id = Random.hex(40)

                if "json" in r:
                    json = r["json"]
                elif "value" in r:
                    json = convert.value2json(r["value"])
                else:
                    json = None
                    Log.error("Expecting every record given to have \"value\" or \"json\" property")

                lines.append('{"index":{"_id": ' + convert.value2json(id) + '}}')
                lines.append(json)
            del records

            if not lines:
                return

            try:
                data_bytes = "\n".join(lines) + "\n"
                data_bytes = data_bytes.encode("utf8")
            except Exception, e:
                Log.error("can not make request body from\n{{lines|indent}}",  lines= lines, cause=e)


            response = self.cluster._post(
                self.path + "/_bulk",
                data=data_bytes,
                headers={"Content-Type": "text"},
                timeout=self.settings.timeout
            )
            items = response["items"]

            for i, item in enumerate(items):
                if self.cluster.version.startswith("0.90."):
                    if not item.index.ok:
                        Log.error("{{error}} while loading line:\n{{line}}",
                            error= item.index.error,
                            line= lines[i * 2 + 1])
                elif self.cluster.version.startswith("1.4."):
                    if item.index.status not in [200, 201]:
                        Log.error("{{error}} while loading line:\n{{line}}",
                            error= item.index.error,
                            line= lines[i * 2 + 1])
                else:
                    Log.error("version not supported {{version}}",  version=self.cluster.version)

            if self.debug:
                Log.note("{{num}} documents added",  num= len(items))
Ejemplo n.º 24
0
def test_wrap_3():
    switch = [
        lambda: Random.string(20),
        lambda: {"i": Random.int(2000)},
        lambda: Data(i=Random.int(2000)),
        lambda: FlatList([{"i": Random.int(2000)}]),
        lambda: [{"i": Random.int(2000)}]
    ]

    inputs = [switch[min(len(switch) - 1, int(floor(-log(Random.float(), 2))))]() for i in range(NUM_INPUT)]

    for i in range(NUM_REPEAT):
        results = []
        gc.collect()
        with Profiler("more string: slow_wrap"):
            for v in inputs:
                results.append(slow_wrap(v))

        results = []
        gc.collect()
        with Profiler("more string: wrap"):
            for v in inputs:
                results.append(wrap(v))

        results = []
        gc.collect()
        with Profiler("more string: baseline"):
            for v in inputs:
                results.append(baseline(v))

        Log.note("Done {{i}} of {{num}}", {"i": i, "num": NUM_REPEAT})
Ejemplo n.º 25
0
    def close(self):
        self.please_stop.go()
        with self.lock:
            if self.db is None:
                return

            self.add(Thread.STOP)

            if self.db.status.end == self.start:
                if DEBUG:
                    Log.note("persistent queue clear and closed")
                self.file.delete()
            else:
                if DEBUG:
                    Log.note("persistent queue closed with {{num}} items left", num=len(self))
                try:
                    self._add_pending({"add": {"status.start": self.start}})
                    for i in range(self.db.status.start, self.start):
                        self._add_pending({"remove": str(i)})
                    self.file.write(
                        convert.value2json({"add": self.db})
                        + "\n"
                        + ("\n".join(convert.value2json(p) for p in self.pending))
                        + "\n"
                    )
                    self._apply_pending()
                except Exception, e:
                    raise e
            self.db = None
Ejemplo n.º 26
0
    def commit(self):
        with self.lock:
            if self.closed:
                Log.error("Queue is closed, commit not allowed")

            try:
                self._add_pending({"add": {"status.start": self.start}})
                for i in range(self.db.status.start, self.start):
                    self._add_pending({"remove": str(i)})

                if (
                    self.db.status.end - self.start < 10 or Random.range(0, 1000) == 0
                ):  # FORCE RE-WRITE TO LIMIT FILE SIZE
                    # SIMPLY RE-WRITE FILE
                    if DEBUG:
                        Log.note(
                            "Re-write {{num_keys}} keys to persistent queue", num_keys=self.db.status.end - self.start
                        )

                        for k in self.db.keys():
                            if k == "status" or int(k) >= self.db.status.start:
                                continue
                            Log.error("Not expecting {{key}}", key=k)
                    self._commit()
                    self.file.write(convert.value2json({"add": self.db}) + "\n")
                else:
                    self._commit()
            except Exception, e:
                raise e
Ejemplo n.º 27
0
def get_raw_json(path):
    active_data_timer = Timer("total duration")
    body = flask.request.get_data()
    try:
        with active_data_timer:
            args = wrap(Dict(**flask.request.args))
            limit = args.limit if args.limit else 10
            args.limit = None
            frum = wrap_from(path)
            result = jx.run(
                {
                    "from": path,
                    "where": {
                        "eq": args
                    },
                    "limit": limit,
                    "format": "list"
                }, frum)

            if isinstance(
                    result, Container
            ):  #TODO: REMOVE THIS CHECK, jx SHOULD ALWAYS RETURN Containers
                result = result.format("list")

        result.meta.active_data_response_time = active_data_timer.duration

        response_data = convert.unicode2utf8(
            convert.value2json(result.data, pretty=True))
        Log.note("Response is {{num}} bytes", num=len(response_data))
        return Response(response_data, status=200)
    except Exception, e:
        e = Except.wrap(e)
        return _send_error(active_data_timer, body, e)
Ejemplo n.º 28
0
    def delete_index(self, index_name):
        if not isinstance(index_name, unicode):
            Log.error("expecting an index name")

        if self.debug:
            Log.note("Deleting index {{index}}", index=index_name)

        # REMOVE ALL ALIASES TOO
        aliases = [a for a in self.get_aliases() if a.index == index_name and a.alias != None]
        if aliases:
            self.post(
                path="/_aliases",
                data={"actions": [{"remove": a} for a in aliases]}
            )

        url = self.settings.host + ":" + unicode(self.settings.port) + "/" + index_name
        try:
            response = http.delete(url)
            if response.status_code != 200:
                Log.error("Expecting a 200, got {{code}}", code=response.status_code)
            details = convert.json2value(utf82unicode(response.content))
            if self.debug:
                Log.note("delete response {{response}}", response=details)
            return response
        except Exception, e:
            Log.error("Problem with call to {{url}}", url=url, cause=e)
Ejemplo n.º 29
0
        def disconnect():
            with suppress_exception:
                self.target_queue.close()
                Log.note("stop put into queue")

            self.pulse.disconnect()
            Log.note("pulse listener was given a disconnect()")
Ejemplo n.º 30
0
def get_all_in_es(es):
    in_es = set()

    all_indexes = es.es.cluster.get_metadata().indices
    for name, index in all_indexes.items():
        if "unittest" not in index.aliases:
            continue

        result = elasticsearch.Index(index=name, alias="unittest", settings=es.es.settings).search({
            "aggs": {
                "_match": {
                    "terms": {
                        "field": "etl.source.source.id",
                        "size": 200000
                    }

                }
            }
        })

        good_es = []
        for k in result.aggregations._match.buckets.key:
            try:
                good_es.append(int(k))
            except Exception, e:
                pass

        Log.note("got {{num}} from {{index}}",
            num= len(good_es),
            index= name)
        in_es |= set(good_es)
Ejemplo n.º 31
0
    def commit(self):
        with self.lock:
            if self.closed:
                Log.error("Queue is closed, commit not allowed")

            try:
                self._add_pending({"add": {"status.start": self.start}})
                for i in range(self.db.status.start, self.start):
                    self._add_pending({"remove": str(i)})

                if self.db.status.end - self.start < 10 or Random.range(
                        0, 1000) == 0:  # FORCE RE-WRITE TO LIMIT FILE SIZE
                    # SIMPLY RE-WRITE FILE
                    if DEBUG:
                        Log.note(
                            "Re-write {{num_keys}} keys to persistent queue",
                            num_keys=self.db.status.end - self.start)

                        for k in self.db.keys():
                            if k == "status" or int(k) >= self.db.status.start:
                                continue
                            Log.error("Not expecting {{key}}", key=k)
                    self._commit()
                    self.file.write(
                        convert.value2json({"add": self.db}) + "\n")
                else:
                    self._commit()
            except Exception, e:
                raise e
Ejemplo n.º 32
0
def get_file(ref, url):
    from pyLibrary.env.files import File

    if ref.path.startswith("~"):
        home_path = os.path.expanduser("~")
        if os.sep == "\\":
            home_path = "/" + home_path.replace(os.sep, "/")
        if home_path.endswith("/"):
            home_path = home_path[:-1]

        ref.path = home_path + ref.path[1::]
    elif not ref.path.startswith("/"):
        # CONVERT RELATIVE TO ABSOLUTE
        if ref.path[0] == ".":
            num_dot = 1
            while ref.path[num_dot] == ".":
                num_dot += 1

            parent = url.path.rstrip("/").split("/")[:-num_dot]
            ref.path = "/".join(parent) + ref.path[num_dot:]
        else:
            parent = url.path.rstrip("/").split("/")[:-1]
            ref.path = "/".join(parent) + "/" + ref.path


    path = ref.path if os.sep != "\\" else ref.path[1::].replace("/", "\\")

    try:
        if DEBUG:
            _Log.note("reading file {{path}}", path=path)
        content = File(path).read()
    except Exception, e:
        content = None
        _Log.error("Could not read file {{filename}}", filename=path, cause=e)
Ejemplo n.º 33
0
    def not_monitor(self, please_stop):
        Log.alert("metadata scan has been disabled")
        please_stop.on_go(lambda: self.todo.add(Thread.STOP))
        while not please_stop:
            c = self.todo.pop()
            if c == Thread.STOP:
                break

            if not c.last_updated or c.last_updated >= Date.now() - TOO_OLD:
                continue

            with self.meta.columns.locker:
                self.meta.columns.update({
                    "set": {
                        "last_updated": Date.now()
                    },
                    "clear": [
                        "count",
                        "cardinality",
                        "partitions",
                    ],
                    "where": {
                        "eq": {
                            "es_index": c.es_index,
                            "es_column": c.es_column
                        }
                    }
                })
            Log.note("Could not get {{col.es_index}}.{{col.es_column}} info",
                     col=c)
Ejemplo n.º 34
0
    def delete_record(self, filter):
        if self.settings.read_only:
            Log.error("Index opened in read only mode, no changes allowed")
        self.cluster.get_metadata()

        if self.cluster.cluster_state.version.number.startswith("0.90"):
            query = {"filtered": {
                "query": {"match_all": {}},
                "filter": filter
            }}
        elif self.cluster.cluster_state.version.number.startswith("1.0"):
            query = {"query": {"filtered": {
                "query": {"match_all": {}},
                "filter": filter
            }}}
        else:
            raise NotImplementedError

        if self.debug:
            Log.note("Delete bugs:\n{{query}}",  query= query)

        result = self.cluster.delete(
            self.path + "/_query",
            data=convert.value2json(query),
            timeout=60
        )

        for name, status in result._indices.items():
            if status._shards.failed > 0:
                Log.error("Failure to delete from {{index}}", index=name)
Ejemplo n.º 35
0
    def test_public_request(self):
        # MAKE SOME DATA
        data = {
            "a": {  # MATCHES SERVER PATTERN
                "b": "good",
                "c": [
                    {"k": "good", "m": 1},
                    {"k": 2, "m": 2}
                ]
            },
            "constant": "this is a test",
            "random-data": convert.bytes2base64(Random.bytes(100))
        }

        content = json.dumps(data)

        response = requests.post(
            url=settings.url,
            data=content,
            headers={
                'Content-Type': CONTENT_TYPE
            }
        )

        self.assertEqual(response.status_code, 200, "Expecting 200")

        about = json.loads(response.content)
        return about['link'], about['etl']['id']

        Log.note("Data located at {{link}} id={{id}}", link=link, id=id)
Ejemplo n.º 36
0
    def delete_index(self, index_name):
        if not isinstance(index_name, unicode):
            Log.error("expecting an index name")

        if self.debug:
            Log.note("Deleting index {{index}}", index=index_name)

        # REMOVE ALL ALIASES TOO
        aliases = [a for a in self.get_aliases() if a.index == index_name and a.alias != None]
        if aliases:
            self.post(
                path="/_aliases",
                data={"actions": [{"remove": a} for a in aliases]}
            )

        url = self.settings.host + ":" + unicode(self.settings.port) + "/" + index_name
        try:
            response = http.delete(url)
            if response.status_code != 200:
                Log.error("Expecting a 200")
            details = convert.json2value(utf82unicode(response.content))
            if self.debug:
                Log.note("delete response {{response}}", response=details)
            return response
        except Exception, e:
            Log.error("Problem with call to {{url}}", url=url, cause=e)
Ejemplo n.º 37
0
 def test_big_result_works(self):
     result = http.post_json(global_settings.service_url,
                             data={
                                 "from": "unittest",
                                 "where": {
                                     "and": [{
                                         "gte": {
                                             "run.timestamp":
                                             Date.today() - DAY
                                         }
                                     }, {
                                         "lt": {
                                             "run.timestamp": Date.today()
                                         }
                                     }, {
                                         "eq": {
                                             "result.ok": False
                                         }
                                     }]
                                 },
                                 "format": "list",
                                 "limit": 10000
                             })
     if result.template:
         result = Except.new_instance(result)
         Log.error("problem with call", cause=result)
     Log.note("Got {{num}} test failures", num=len(result.data))
Ejemplo n.º 38
0
    def _upsert_column(self, c):
        # ASSUMING THE  self.meta.columns.locker IS HAD
        existing_columns = [
            r for r in self.meta.columns.data
            if r.table == c.table and r.name == c.name
        ]
        if not existing_columns:
            self.meta.columns.add(c)
            Log.note("todo: {{table}}.{{column}}",
                     table=c.table,
                     column=c.es_column)
            self.todo.add(c)

            # MARK meta.columns AS DIRTY TOO
            cols = [
                r for r in self.meta.columns.data if r.table == "meta.columns"
            ]
            for cc in cols:
                cc.partitions = cc.cardinality = None
                cc.last_updated = Date.now()
            self.todo.extend(cols)
        else:
            canonical = existing_columns[0]
            if canonical.relative and not c.relative:
                return  # RELATIVE COLUMNS WILL SHADOW ABSOLUTE COLUMNS

            for key in Column.__slots__:
                canonical[key] = c[key]
            Log.note("todo: {{table}}.{{column}}",
                     table=canonical.table,
                     column=canonical.es_column)
            self.todo.add(canonical)
Ejemplo n.º 39
0
    def test_queue_speed(self):
        SCALE = 1000*10

        done = Signal("done")
        slow = Queue()
        q = ThreadedQueue("test queue", queue=slow)

        def empty(please_stop):
            while not please_stop:
                item = q.pop()
                if item is Thread.STOP:
                    break

            done.go()

        Thread.run("empty", empty)

        timer = Timer("add {{num}} to queue", param={"num": SCALE})
        with timer:
            for i in range(SCALE):
                q.add(i)
            q.add(Thread.STOP)
            Log.note("Done insert")
            done.wait()

        self.assertLess(timer.duration.seconds, 1.5, "Expecting queue to be fast")
Ejemplo n.º 40
0
    def test_thread_wait(self):
        NUM = 100
        locker = Lock("test")
        phase1 = []
        phase2 = []

        def work(value, please_stop):
            with locker:
                phase1.append(value)
                locker.wait()
                phase2.append(value)

        with locker:
            threads = [Thread.run(unicode(i), work, i) for i in range(NUM)]

        # CONTINUE TO USE THE locker SO WAITS GET TRIGGERED

        while len(phase2) < NUM:
            with locker:
                pass
        for t in threads:
            t.join()

        self.assertEqual(len(phase1), NUM, "expecting "+unicode(NUM)+" items")
        self.assertEqual(len(phase2), NUM, "expecting "+unicode(NUM)+" items")
        for i in range(NUM):
            self.assertTrue(i in phase1, "expecting "+unicode(i))
            self.assertTrue(i in phase2, "expecting "+unicode(i))
        Log.note("done")
Ejemplo n.º 41
0
    def close(self):
        self.please_stop.go()
        with self.lock:
            if self.db is None:
                return

            self.add(Thread.STOP)

            if self.db.status.end == self.start:
                if DEBUG:
                    Log.note("persistent queue clear and closed")
                self.file.delete()
            else:
                if DEBUG:
                    Log.note("persistent queue closed with {{num}} items left",
                             num=len(self))
                try:
                    self._add_pending({"add": {"status.start": self.start}})
                    for i in range(self.db.status.start, self.start):
                        self._add_pending({"remove": str(i)})
                    self.file.write(
                        convert.value2json({"add": self.db}) + "\n" +
                        ("\n".join(
                            convert.value2json(p)
                            for p in self.pending)) + "\n")
                    self._apply_pending()
                except Exception, e:
                    raise e
            self.db = None
Ejemplo n.º 42
0
    def forall(self, sql, param=None, _execute=None):
        assert _execute
        num = 0

        self._execute_backlog()
        try:
            old_cursor = self.cursor
            if not old_cursor: # ALLOW NON-TRANSACTIONAL READS
                self.cursor = self.db.cursor()

            if param:
                sql = expand_template(sql, self.quote_param(param))
            sql = self.preamble + outdent(sql)
            if self.debug:
                Log.note("Execute SQL:\n{{sql}}",  sql= indent(sql))
            self.cursor.execute(sql)

            columns = tuple([utf8_to_unicode(d[0]) for d in self.cursor.description])
            for r in self.cursor:
                num += 1
                _execute(wrap(dict(zip(columns, [utf8_to_unicode(c) for c in r]))))

            if not old_cursor:   # CLEANUP AFTER NON-TRANSACTIONAL READS
                self.cursor.close()
                self.cursor = None

        except Exception, e:
            Log.error("Problem executing SQL:\n{{sql|indent}}",  sql= sql, cause=e, stack_depth=1)
Ejemplo n.º 43
0
    def delete_record(self, filter):
        if self.settings.read_only:
            Log.error("Index opened in read only mode, no changes allowed")
        self.cluster.get_metadata()

        if self.cluster.cluster_state.version.number.startswith("0.90"):
            query = {"filtered": {
                "query": {"match_all": {}},
                "filter": filter
            }}
        elif self.cluster.cluster_state.version.number.startswith("1.0"):
            query = {"query": {"filtered": {
                "query": {"match_all": {}},
                "filter": filter
            }}}
        else:
            raise NotImplementedError

        if self.debug:
            Log.note("Delete bugs:\n{{query}}",  query= query)

        result = self.cluster.delete(
            self.path + "/_query",
            data=convert.value2json(query),
            timeout=60
        )

        for name, status in result._indices.items():
            if status._shards.failed > 0:
                Log.error("Failure to delete from {{index}}", index=name)
Ejemplo n.º 44
0
    def column_query(self, sql, param=None):
        """
        RETURN RESULTS IN [column][row_num] GRID
        """
        self._execute_backlog()
        try:
            old_cursor = self.cursor
            if not old_cursor: # ALLOW NON-TRANSACTIONAL READS
                self.cursor = self.db.cursor()
                self.cursor.execute("SET TIME_ZONE='+00:00'")
                self.cursor.close()
                self.cursor = self.db.cursor()

            if param:
                sql = expand_template(sql, self.quote_param(param))
            sql = self.preamble + outdent(sql)
            if self.debug:
                Log.note("Execute SQL:\n{{sql}}", sql=indent(sql))

            self.cursor.execute(sql)
            grid = [[utf8_to_unicode(c) for c in row] for row in self.cursor]
            # columns = [utf8_to_unicode(d[0]) for d in coalesce(self.cursor.description, [])]
            result = zip(*grid)

            if not old_cursor:   # CLEANUP AFTER NON-TRANSACTIONAL READS
                self.cursor.close()
                self.cursor = None

            return result
        except Exception, e:
            if isinstance(e, InterfaceError) or e.message.find("InterfaceError") >= 0:
                Log.error("Did you close the db connection?", e)
            Log.error("Problem executing SQL:\n{{sql|indent}}",  sql= sql, cause=e,stack_depth=1)
Ejemplo n.º 45
0
def get_all_in_es(es):
    in_es = set()

    all_indexes = es.es.cluster.get_metadata().indices
    for name, index in all_indexes.items():
        if "unittest" not in index.aliases:
            continue

        result = elasticsearch.Index(index=name,
                                     alias="unittest",
                                     settings=es.es.settings).search({
                                         "aggs": {
                                             "_match": {
                                                 "terms": {
                                                     "field":
                                                     "etl.source.source.id",
                                                     "size": 200000
                                                 }
                                             }
                                         }
                                     })

        good_es = []
        for k in result.aggregations._match.buckets.key:
            try:
                good_es.append(int(k))
            except Exception, e:
                pass

        Log.note("got {{num}} from {{index}}", num=len(good_es), index=name)
        in_es |= set(good_es)
Ejemplo n.º 46
0
        def disconnect():
            with suppress_exception:
                self.target_queue.close()
                Log.note("stop put into queue")

            self.pulse.disconnect()
            Log.note("pulse listener was given a disconnect()")
Ejemplo n.º 47
0
 def get_columns(self,
                 table_name,
                 column_name=None,
                 fail_when_not_found=False):
     """
     RETURN METADATA COLUMNS
     """
     try:
         with self.meta.columns.locker:
             columns = [
                 c for c in self.meta.columns.data
                 if c.table == table_name and (
                     column_name is None or c.name == column_name)
             ]
         if columns:
             columns = jx.sort(columns, "name")
             if fail_when_not_found:
                 # AT LEAST WAIT FOR THE COLUMNS TO UPDATE
                 while len(self.todo) and not all(
                         columns.get("last_updated")):
                     Log.note(
                         "waiting for columns to update {{columns|json}}",
                         columns=[
                             c.table + "." + c.es_column for c in columns
                             if not c.last_updated
                         ])
                     Thread.sleep(seconds=1)
                 return columns
             elif all(columns.get("last_updated")):
                 return columns
     except Exception, e:
         Log.error("Not expected", cause=e)
Ejemplo n.º 48
0
def find_some_work(th):
    # th.get_markup("fx-team", "036f62007472", "B8kS5IJ5Rom8l-kcSIRIlA")
    # th.get_markup("mozilla-inbound", "971c1ee26cad", "fNuzNmZxS6m3i_p9jDh8iA")

    # GET SOME TASKS
    result = http.post_json(url="http://activedata.allizom.org/query",
                            data={
                                "from":
                                "task",
                                "select":
                                ["build.branch", "build.revision", "task.id"],
                                "where": {
                                    "and": [{
                                        "gt": {
                                            "task.run.start_time":
                                            (Date.today() - DAY).unix
                                        }
                                    }, {
                                        "exists": "build.revision"
                                    }, {
                                        "exists": "build.branch"
                                    }]
                                },
                                "format":
                                "list"
                            })

    # TRY TO GET THEM OUT OF OUR CACHE
    for r in result.data:
        Log.note("look for task {{task_id}}", task_id=r.task.id)
        th.get_markup(r.build.branch, r.build.revision, r.task.id)
Ejemplo n.º 49
0
def pretty_json(value):
    try:
        if scrub(value) is None:
            return "null"
        elif isinstance(value, basestring):
            if isinstance(value, str):
                value = utf82unicode(value)
            try:
                return quote(value)
            except Exception, e:
                from pyLibrary.debugs.logs import Log

                try:
                    Log.note("try explicit convert of string with length {{length}}",  length= len(value))
                    acc = [u"\""]
                    for c in value:
                        try:
                            try:
                                c2 = ESCAPE_DCT[c]
                            except Exception, h:
                                c2 = c
                            c3 = unicode(c2)
                            acc.append(c3)
                        except BaseException, g:
                            pass
                            # Log.warning("odd character {{ord}} found in string.  Ignored.",  ord= ord(c)}, cause=g)
                    acc.append(u"\"")
                    output = u"".join(acc)
                    Log.note("return value of length {{length}}",  length= len(output))
                    return output
Ejemplo n.º 50
0
 def __enter__(self):
     if self.debug:
         if not self.silent:
             Log.note("Timer start: " + self.template,
                      stack_depth=1,
                      **self.param)
     self.start = time()
     return self
Ejemplo n.º 51
0
 def flush(self):
     try:
         self.cluster.post("/" + self.settings.index + "/_flush", data={"wait_if_ongoing": True, "forced": False})
     except Exception, e:
         if "FlushNotAllowedEngineException" in e:
             Log.note("Flush is ignored")
         else:
             Log.error("Problem flushing", cause=e)
Ejemplo n.º 52
0
 def tearDownClass(self):
     cluster = elasticsearch.Cluster(global_settings.backend_es)
     for i in ESUtils.indexes:
         try:
             cluster.delete_index(i.settings.index)
             Log.note("remove index {{index}}", index=i)
         except Exception, e:
             pass
Ejemplo n.º 53
0
 def _find(b, please_stop):
     try:
         url = b.url + "rev/" + revision
         response = http.get(url)
         if response.status_code == 200:
             Log.note("{{revision}} found at {{url}}", url=url, revision=revision)
     except Exception, e:
         pass
Ejemplo n.º 54
0
    def add(self, doc, queue=None):
        if queue == None:
            queue = self._get_queue(doc)
            if queue == None:
                Log.note("Document not added: Too old")
                return

        queue.add(doc)
Ejemplo n.º 55
0
 def flush(self):
     try:
         self.cluster.post("/" + self.settings.index + "/_flush", data={"wait_if_ongoing": True, "forced": False})
     except Exception, e:
         if "FlushNotAllowedEngineException" in e:
             Log.note("Flush is ignored")
         else:
             Log.error("Problem flushing", cause=e)
Ejemplo n.º 56
0
def _replace_ref(node, url):
    if url.path.endswith("/"):
        url.path = url.path[:-1]

    if isinstance(node, Mapping):
        ref = None
        output = {}
        for k, v in node.items():
            if k == "$ref":
                ref = URL(v)
            else:
                output[k] = _replace_ref(v, url)

        if not ref:
            return output

        node = output

        if not ref.scheme and not ref.path:
            # DO NOT TOUCH LOCAL REF YET
            output["$ref"] = ref
            return output

        if not ref.scheme:
            # SCHEME RELATIVE IMPLIES SAME PROTOCOL AS LAST TIME, WHICH
            # REQUIRES THE CURRENT DOCUMENT'S SCHEME
            ref.scheme = url.scheme

        # FIND THE SCHEME AND LOAD IT
        if ref.scheme in scheme_loaders:
            new_value = scheme_loaders[ref.scheme](ref, url)
        else:
            raise _Log.error("unknown protocol {{scheme}}", scheme=ref.scheme)

        if ref.fragment:
            new_value = dot.get_attr(new_value, ref.fragment)

        if DEBUG:
            _Log.note("Replace {{ref}} with {{new_value}}",
                      ref=ref,
                      new_value=new_value)

        if not output:
            output = new_value
        else:
            output = unwrap(set_default(output, new_value))

        if DEBUG:
            _Log.note("Return {{output}}", output=output)

        return output
    elif isinstance(node, list):
        output = [_replace_ref(n, url) for n in node]
        # if all(p[0] is p[1] for p in zip(output, node)):
        #     return node
        return output

    return node
Ejemplo n.º 57
0
    def test_timing(self):
        if self.not_real_service():
            return

        test = wrap({
            "query": {
                "from": {
                    "type": "elasticsearch",
                    "settings": {
                        "host": ES_CLUSTER_LOCATION,
                        "index": "unittest",
                        "type": "test_result"
                    }
                },
                "select": [{
                    "name": "count",
                    "value": "run.duration",
                    "aggregate": "count"
                }, {
                    "name": "total",
                    "value": "run.duration",
                    "aggregate": "sum"
                }],
                "edges": [{
                    "name": "chunk",
                    "value": ["run.suite", "run.chunk"]
                }, "result.ok"],
                "where": {
                    "and": [{
                        "lt": {
                            "timestamp": Date.floor(Date.now()).milli / 1000
                        }
                    }, {
                        "gte": {
                            "timestamp":
                            Date.floor(Date.now() - (Duration.DAY * 7),
                                       Duration.DAY).milli / 1000
                        }
                    }]
                },
                "format":
                "cube",
                "samples": {
                    "limit": 30
                }
            }
        })

        query = convert.unicode2utf8(convert.value2json(test.query))
        # EXECUTE QUERY
        with Timer("query"):
            response = http.get(self.service_url, data=query)
            if response.status_code != 200:
                error(response)
        result = convert.json2value(convert.utf82unicode(response.all_content))

        Log.note("result\n{{result|indent}}", {"result": result})
Ejemplo n.º 58
0
 def join(self):
     try:
         # WAIT FOR FINISH
         for t in self.threads:
             t.join()
     except (KeyboardInterrupt, SystemExit):
         Log.note("Shutdow Started, please be patient")
     except Exception, e:
         Log.error("Unusual shutdown!", e)
Ejemplo n.º 59
0
        def disconnect():
            try:
                self.target_queue.close()
                Log.note("stop put into queue")
            except:
                pass

            self.pulse.disconnect()
            Log.note("pulse listener was given a disconnect()")