Example #1
0
class StructuredLogger_usingThread(StructuredLogger):
    def __init__(self, logger):
        if not isinstance(logger, StructuredLogger):
            Log.error("Expecting a StructuredLogger")

        self.queue = Queue("Queue for " + self.__class__.__name__,
                           max=10000,
                           silent=True,
                           allow_add_after_close=True)
        self.logger = logger

        def worker(logger, please_stop):
            try:
                while not please_stop:
                    logs = self.queue.pop_all()
                    if not logs:
                        (Till(seconds=1) | please_stop).wait()
                        continue
                    for log in logs:
                        if log is THREAD_STOP:
                            please_stop.go()
                        else:
                            logger.write(**log)
            except Exception as e:
                print("problem in " + StructuredLogger_usingThread.__name__ +
                      ": " + str(e))
            finally:
                Log.note("stop the child")
                logger.stop()

        self.thread = Thread("Thread for " + self.__class__.__name__, worker,
                             logger)
        self.thread.parent.remove_child(
            self.thread)  # LOGGING WILL BE RESPONSIBLE FOR THREAD stop()
        self.thread.start()

    def write(self, template, params):
        try:
            self.queue.add({"template": template, "params": params})
            return self
        except Exception as e:
            e = Except.wrap(e)
            raise e  # OH NO!

    def stop(self):
        try:
            self.queue.add(
                THREAD_STOP)  # BE PATIENT, LET REST OF MESSAGE BE SENT
            self.thread.join()
            Log.note("joined on thread")
        except Exception as e:
            Log.note("problem in threaded logger" + str(e))

        with suppress_exception:
            self.queue.close()
Example #2
0
class StructuredLogger_usingThread(StructuredLogger):

    def __init__(self, logger):
        if not isinstance(logger, StructuredLogger):
            Log.error("Expecting a StructuredLogger")

        self.queue = Queue("Queue for " + self.__class__.__name__, max=10000, silent=True, allow_add_after_close=True)
        self.logger = logger

        def worker(logger, please_stop):
            try:
                while not please_stop:
                    logs = self.queue.pop_all()
                    if not logs:
                        (Till(seconds=1) | please_stop).wait()
                        continue
                    for log in logs:
                        if log is THREAD_STOP:
                            please_stop.go()
                        else:
                            logger.write(**log)
            except Exception as e:
                print("problem in " + StructuredLogger_usingThread.__name__ + ": " + str(e))
            finally:
                Log.note("stop the child")
                logger.stop()

        self.thread = Thread("Thread for " + self.__class__.__name__, worker, logger)
        self.thread.parent.remove_child(self.thread)  # LOGGING WILL BE RESPONSIBLE FOR THREAD stop()
        self.thread.start()

    def write(self, template, params):
        try:
            self.queue.add({"template": template, "params": params})
            return self
        except Exception as e:
            e = Except.wrap(e)
            raise e  # OH NO!

    def stop(self):
        try:
            self.queue.add(THREAD_STOP)  # BE PATIENT, LET REST OF MESSAGE BE SENT
            self.thread.join()
            Log.note("joined on thread")
        except Exception as e:
            Log.note("problem in threaded logger" + str(e))

        with suppress_exception:
            self.queue.close()
class StructuredLogger_usingElasticSearch(StructuredLogger):
    @override
    def __init__(
        self,
        host,
        index,
        port=9200,
        type="log",
        queue_size=1000,
        batch_size=100,
        kwargs=None,
    ):
        """
        settings ARE FOR THE ELASTICSEARCH INDEX
        """
        kwargs.timeout = Duration(coalesce(kwargs.timeout, "30second")).seconds
        kwargs.retry.times = coalesce(kwargs.retry.times, 3)
        kwargs.retry.sleep = Duration(coalesce(kwargs.retry.sleep, MINUTE)).seconds
        kwargs.host = Random.sample(listwrap(host), 1)[0]

        rollover_interval = coalesce(kwargs.rollover.interval, kwargs.rollover.max, "year")
        rollover_max = coalesce(kwargs.rollover.max, kwargs.rollover.interval, "year")

        schema = set_default(
            kwargs.schema,
            {"mappings": {kwargs.type: {"properties": {"~N~": {"type": "nested"}}}}},
            json2value(value2json(SCHEMA), leaves=True)
        )

        self.es = RolloverIndex(
            rollover_field={"get": [{"first": "."}, {"literal": "timestamp"}]},
            rollover_interval=rollover_interval,
            rollover_max=rollover_max,
            schema=schema,
            limit_replicas=True,
            typed=True,
            read_only=False,
            kwargs=kwargs,
        )
        self.batch_size = batch_size
        self.queue = Queue("debug logs to es", max=queue_size, silent=True)

        self.worker = Thread.run("add debug logs to es", self._insert_loop)

    def write(self, template, params):
        try:
            params.template = strings.limit(params.template, 2000)
            params.format = None
            self.queue.add({"value": _deep_json_to_string(params, 3)}, timeout=3 * 60)
        except Exception as e:
            sys.stdout.write(text(Except.wrap(e)))
        return self

    def _insert_loop(self, please_stop=None):
        bad_count = 0
        while not please_stop:
            try:
                messages = wrap(self.queue.pop_all())
                if not messages:
                    Till(seconds=PAUSE_AFTER_GOOD_INSERT).wait()
                    continue

                for g, mm in jx.chunk(messages, size=self.batch_size):
                    scrubbed = []
                    for i, message in enumerate(mm):
                        if message is THREAD_STOP:
                            please_stop.go()
                            continue
                        try:
                            chain = flatten_causal_chain(message.value)
                            scrubbed.append(
                                {
                                    "value": [
                                        _deep_json_to_string(link, depth=3)
                                        for link in chain
                                    ]
                                }
                            )
                        except Exception as e:
                            Log.warning("Problem adding to scrubbed list", cause=e)

                    self.es.extend(scrubbed)
                    bad_count = 0
            except Exception as f:
                Log.warning("Problem inserting logs into ES", cause=f)
                bad_count += 1
                if bad_count > MAX_BAD_COUNT:
                    Log.warning(
                        "Given up trying to write debug logs to ES index {{index}}",
                        index=self.es.settings.index,
                    )
                    break
                Till(seconds=PAUSE_AFTER_BAD_INSERT).wait()

        # CONTINUE TO DRAIN THIS QUEUE
        while not please_stop:
            try:
                Till(seconds=PAUSE_AFTER_GOOD_INSERT).wait()
                self.queue.pop_all()
            except Exception as e:
                Log.warning("Should not happen", cause=e)

    def stop(self):
        with suppress_exception:
            self.queue.add(THREAD_STOP)  # BE PATIENT, LET REST OF MESSAGE BE SENT

        with suppress_exception:
            self.queue.close()
        self.worker.join()
Example #4
0
class ColumnList(Table, jx_base.Container):
    """
    OPTIMIZED FOR THE PARTICULAR ACCESS PATTERNS USED
    """
    def __init__(self, es_cluster):
        Table.__init__(self, META_COLUMNS_NAME)
        self.data = {}  # MAP FROM ES_INDEX TO (abs_column_name to COLUMNS)
        self.locker = Lock()
        self._schema = None
        self.dirty = False
        self.es_cluster = es_cluster
        self.es_index = None
        self.last_load = Null
        self.todo = Queue(
            "update columns to es"
        )  # HOLD (action, column) PAIR, WHERE action in ['insert', 'update']
        self._db_load()
        Thread.run("update " + META_COLUMNS_NAME,
                   self._update_from_es,
                   parent_thread=MAIN_THREAD)

    def _query(self, query):
        result = Data()
        curr = self.es_cluster.execute(query)
        result.meta.format = "table"
        result.header = [d[0] for d in curr.description
                         ] if curr.description else None
        result.data = curr.fetchall()
        return result

    def _db_create(self):
        schema = {
            "settings": {
                "index.number_of_shards": 1,
                "index.number_of_replicas": 6
            },
            "mappings": {
                META_COLUMNS_TYPE_NAME: {}
            },
        }

        self.es_index = self.es_cluster.create_index(id=ID,
                                                     index=META_COLUMNS_NAME,
                                                     schema=schema)
        self.es_index.add_alias(META_COLUMNS_NAME)

        for c in META_COLUMNS_DESC.columns:
            self._add(c)
            self.es_index.add({"value": c.__dict__()})

    def _db_load(self):
        self.last_load = Date.now()

        try:
            self.es_index = self.es_cluster.get_index(
                id=ID,
                index=META_COLUMNS_NAME,
                type=META_COLUMNS_TYPE_NAME,
                read_only=False)

            result = self.es_index.search({
                "query": {
                    "bool": {
                        "should": [
                            {
                                "bool": {
                                    "must_not": {
                                        "exists": {
                                            "field": "cardinality.~n~"
                                        }
                                    }
                                }
                            },
                            {  # ASSUME UNUSED COLUMNS DO NOT EXIST
                                "range": {
                                    "cardinality.~n~": {
                                        "gt": 0
                                    }
                                }
                            },
                        ]
                    }
                },
                "sort": ["es_index.~s~", "name.~s~", "es_column.~s~"],
                "size":
                10000,
            })

            Log.note("{{num}} columns loaded", num=result.hits.total)
            with self.locker:
                for r in result.hits.hits._source:
                    self._add(doc_to_column(r))

        except Exception as e:
            Log.warning("no {{index}} exists, making one",
                        index=META_COLUMNS_NAME,
                        cause=e)
            self._db_create()

    def _update_from_es(self, please_stop):
        try:
            last_extract = Date.now()
            while not please_stop:
                now = Date.now()
                try:
                    if (now - last_extract).seconds > COLUMN_EXTRACT_PERIOD:
                        result = self.es_index.search({
                            "query": {
                                "range": {
                                    "last_updated.~n~": {
                                        "gte": self.last_load
                                    }
                                }
                            },
                            "sort":
                            ["es_index.~s~", "name.~s~", "es_column.~s~"],
                            "from":
                            0,
                            "size":
                            10000,
                        })
                        last_extract = now

                        with self.locker:
                            for r in result.hits.hits._source:
                                c = doc_to_column(r)
                                self._add(c)
                                self.last_load = MAX(
                                    (self.last_load, c.last_updated))

                    while not please_stop:
                        updates = self.todo.pop_all()
                        if not updates:
                            break

                        DEBUG and updates and Log.note(
                            "{{num}} columns to push to db", num=len(updates))
                        self.es_index.extend([{
                            "value": column.__dict__()
                        } for column in updates])
                except Exception as e:
                    Log.warning("problem updating database", cause=e)

                (Till(seconds=COLUMN_LOAD_PERIOD) | please_stop).wait()
        finally:
            Log.note("done")

    def find(self, es_index, abs_column_name=None):
        with self.locker:
            if es_index.startswith("meta."):
                self._update_meta()

            if not abs_column_name:
                return [
                    c for cs in self.data.get(es_index, {}).values()
                    for c in cs
                ]
            else:
                return self.data.get(es_index, {}).get(abs_column_name, [])

    def extend(self, columns):
        self.dirty = True
        with self.locker:
            for column in columns:
                self._add(column)

    def add(self, column):
        self.dirty = True
        with self.locker:
            canonical = self._add(column)
        if canonical == None:
            return column  # ALREADY ADDED
        self.todo.add(canonical)
        return canonical

    def remove_table(self, table_name):
        del self.data[table_name]

    def _add(self, column):
        """
        :param column: ANY COLUMN OBJECT
        :return:  None IF column IS canonical ALREADY (NET-ZERO EFFECT)
        """
        columns_for_table = self.data.setdefault(column.es_index, {})
        existing_columns = columns_for_table.setdefault(column.name, [])

        for canonical in existing_columns:
            if canonical is column:
                return None
            if canonical.es_type == column.es_type:
                if column.last_updated > canonical.last_updated:
                    for key in Column.__slots__:
                        old_value = canonical[key]
                        new_value = column[key]
                        if new_value == old_value:
                            pass  # NO NEED TO UPDATE WHEN NO CHANGE MADE (COMMON CASE)
                        else:
                            canonical[key] = new_value
                return canonical
        existing_columns.append(column)
        return column

    def _update_meta(self):
        if not self.dirty:
            return

        now = Date.now()
        for mc in META_COLUMNS_DESC.columns:
            count = 0
            values = set()
            objects = 0
            multi = 1
            for column in self._all_columns():
                value = column[mc.name]
                if value == None:
                    pass
                else:
                    count += 1
                    if is_list(value):
                        multi = max(multi, len(value))
                        try:
                            values |= set(value)
                        except Exception:
                            objects += len(value)
                    elif is_data(value):
                        objects += 1
                    else:
                        values.add(value)
            mc.count = count
            mc.cardinality = len(values) + objects
            mc.partitions = jx.sort(values)
            mc.multi = multi
            mc.last_updated = now

        META_COLUMNS_DESC.last_updated = now
        self.dirty = False

    def _all_columns(self):
        return [
            column for t, cs in self.data.items() for _, css in cs.items()
            for column in css
        ]

    def __iter__(self):
        with self.locker:
            self._update_meta()
            return iter(self._all_columns())

    def __len__(self):
        return self.data[META_COLUMNS_NAME]["es_index"].count

    def update(self, command):
        self.dirty = True
        try:
            command = wrap(command)
            DEBUG and Log.note(
                "Update {{timestamp}}: {{command|json}}",
                command=command,
                timestamp=Date(command["set"].last_updated),
            )
            eq = command.where.eq
            if eq.es_index:
                if len(eq) == 1:
                    if unwraplist(command.clear) == ".":
                        d = self.data
                        i = eq.es_index
                        with self.locker:
                            cols = d[i]
                            del d[i]

                        for c in cols:
                            mark_as_deleted(c)
                            self.todo.add(c)
                        return

                    # FASTEST
                    all_columns = self.data.get(eq.es_index, {}).values()
                    with self.locker:
                        columns = [c for cs in all_columns for c in cs]
                elif eq.es_column and len(eq) == 2:
                    # FASTER
                    all_columns = self.data.get(eq.es_index, {}).values()
                    with self.locker:
                        columns = [
                            c for cs in all_columns for c in cs
                            if c.es_column == eq.es_column
                        ]

                else:
                    # SLOWER
                    all_columns = self.data.get(eq.es_index, {}).values()
                    with self.locker:
                        columns = [
                            c for cs in all_columns for c in cs
                            if all(c[k] == v for k, v in
                                   eq.items())  # THIS LINE IS VERY SLOW
                        ]
            else:
                columns = list(self)
                columns = jx.filter(columns, command.where)

            with self.locker:
                for col in columns:
                    DEBUG and Log.note(
                        "update column {{table}}.{{column}}",
                        table=col.es_index,
                        column=col.es_column,
                    )
                    for k in command["clear"]:
                        if k == ".":
                            mark_as_deleted(col)
                            self.todo.add(col)
                            lst = self.data[col.es_index]
                            cols = lst[col.name]
                            cols.remove(col)
                            if len(cols) == 0:
                                del lst[col.name]
                                if len(lst) == 0:
                                    del self.data[col.es_index]
                            break
                        else:
                            col[k] = None
                    else:
                        # DID NOT DELETE COLUMNM ("."), CONTINUE TO SET PROPERTIES
                        for k, v in command.set.items():
                            col[k] = v
                        self.todo.add(col)

        except Exception as e:
            Log.error("should not happen", cause=e)

    def query(self, query):
        # NOT EXPECTED TO BE RUN
        Log.error("not")
        with self.locker:
            self._update_meta()
            if not self._schema:
                self._schema = Schema(".", [
                    c for cs in self.data[META_COLUMNS_NAME].values()
                    for c in cs
                ])
            snapshot = self._all_columns()

        from jx_python.containers.list_usingPythonList import ListContainer

        query.frum = ListContainer(META_COLUMNS_NAME, snapshot, self._schema)
        return jx.run(query)

    def groupby(self, keys):
        with self.locker:
            self._update_meta()
            return jx.groupby(self.__iter__(), keys)

    def window(self, window):
        raise NotImplemented()

    @property
    def schema(self):
        if not self._schema:
            with self.locker:
                self._update_meta()
                self._schema = Schema(".", [
                    c for cs in self.data[META_COLUMNS_NAME].values()
                    for c in cs
                ])
        return self._schema

    @property
    def namespace(self):
        return self

    def get_table(self, table_name):
        if table_name != META_COLUMNS_NAME:
            Log.error("this container has only the " + META_COLUMNS_NAME)
        return self

    def get_columns(self, table_name):
        if table_name != META_COLUMNS_NAME:
            Log.error("this container has only the " + META_COLUMNS_NAME)
        return self._all_columns()

    def denormalized(self):
        """
        THE INTERNAL STRUCTURE FOR THE COLUMN METADATA IS VERY DIFFERENT FROM
        THE DENORMALIZED PERSPECITVE. THIS PROVIDES THAT PERSPECTIVE FOR QUERIES
        """
        with self.locker:
            self._update_meta()
            output = [
                {
                    "table": c.es_index,
                    "name": untype_path(c.name),
                    "cardinality": c.cardinality,
                    "es_column": c.es_column,
                    "es_index": c.es_index,
                    "last_updated": c.last_updated,
                    "count": c.count,
                    "nested_path": [unnest_path(n) for n in c.nested_path],
                    "es_type": c.es_type,
                    "type": c.jx_type,
                } for tname, css in self.data.items()
                for cname, cs in css.items() for c in cs
                if c.jx_type not in STRUCT  # and c.es_column != "_id"
            ]

        from jx_python.containers.list_usingPythonList import ListContainer

        return ListContainer(
            self.name,
            data=output,
            schema=jx_base.Schema(META_COLUMNS_NAME, SIMPLE_METADATA_COLUMNS),
        )
Example #5
0
class StructuredLogger_usingElasticSearch(StructuredLogger):
    @override
    def __init__(self,
                 host,
                 index,
                 port=9200,
                 type="log",
                 max_size=1000,
                 batch_size=100,
                 kwargs=None):
        """
        settings ARE FOR THE ELASTICSEARCH INDEX
        """
        self.es = Cluster(kwargs).get_or_create_index(
            schema=mo_json.json2value(value2json(SCHEMA), leaves=True),
            limit_replicas=True,
            tjson=True,
            kwargs=kwargs)
        self.batch_size = batch_size
        self.es.add_alias(coalesce(kwargs.alias, kwargs.index))
        self.queue = Queue("debug logs to es", max=max_size, silent=True)
        self.es.settings.retry.times = coalesce(self.es.settings.retry.times,
                                                3)
        self.es.settings.retry.sleep = Duration(
            coalesce(self.es.settings.retry.sleep, MINUTE))
        Thread.run("add debug logs to es", self._insert_loop)

    def write(self, template, params):
        if params.get("template"):
            # DETECTED INNER TEMPLATE, ASSUME TRACE IS ON, SO DO NOT NEED THE OUTER TEMPLATE
            self.queue.add({"value": params})
        else:
            template = strings.limit(template, 2000)
            self.queue.add({"value": {
                "template": template,
                "params": params
            }},
                           timeout=3 * MINUTE)
        return self

    def _insert_loop(self, please_stop=None):
        bad_count = 0
        while not please_stop:
            try:
                Till(seconds=1).wait()
                messages = wrap(self.queue.pop_all())
                if not messages:
                    continue

                for g, mm in jx.groupby(messages, size=self.batch_size):
                    scrubbed = []
                    try:
                        for i, message in enumerate(mm):
                            if message is THREAD_STOP:
                                please_stop.go()
                                return
                            scrubbed.append(
                                _deep_json_to_string(message, depth=3))
                    finally:
                        self.es.extend(scrubbed)
                    bad_count = 0
            except Exception as e:
                Log.warning("Problem inserting logs into ES", cause=e)
                bad_count += 1
                if bad_count > MAX_BAD_COUNT:
                    Log.warning(
                        "Given up trying to write debug logs to ES index {{index}}",
                        index=self.es.settings.index)
                Till(seconds=30).wait()

        # CONTINUE TO DRAIN THIS QUEUE
        while not please_stop:
            try:
                Till(seconds=1).wait()
                self.queue.pop_all()
            except Exception as e:
                Log.warning("Should not happen", cause=e)

    def stop(self):
        with suppress_exception:
            self.queue.add(
                THREAD_STOP)  # BE PATIENT, LET REST OF MESSAGE BE SENT

        with suppress_exception:
            self.queue.close()
Example #6
0
class Table(BaseFacts):
    @override
    def __init__(
        self,
        table,
        typed,
        read_only,
        sharded,
        container,
        id=Null,
        partition=Null,
        cluster=Null,
        top_level_fields=Null,
        kwargs=None,
    ):
        self.short_name = table
        self.typed = typed
        self.read_only = read_only
        self.cluster = cluster
        self.id = id
        self.top_level_fields = top_level_fields
        self.config = Data(  # USED TO REPLICATE THIS
            typed=typed,
            read_only=read_only,
            sharded=sharded,
            id=id,
            partition=partition,
            cluster=cluster,
            top_level_fields=top_level_fields,
        )

        esc_name = escape_name(table)
        self.full_name = container.full_name + esc_name
        self.alias_view = alias_view = container.client.get_table(text(self.full_name))
        self.partition = partition
        self.container = container

        if not sharded:
            if not read_only and alias_view.table_type == "VIEW":
                Log.error("Expecting a table, not a view")
            self.shard = alias_view
            self._flake = Snowflake.parse(
                alias_view.schema,
                text(self.full_name),
                self.top_level_fields,
                partition,
            )
        else:
            if alias_view.table_type != "VIEW":
                Log.error("Sharded tables require a view")
            current_view = container.client.get_table(text(self.full_name))
            view_sql = current_view.view_query
            shard_name = _extract_primary_shard_name(view_sql)
            try:
                self.shard = container.client.get_table(
                    text(container.full_name + shard_name)
                )
                self._flake = Snowflake.parse(
                    alias_view.schema,
                    text(self.full_name),
                    self.top_level_fields,
                    partition,
                )
            except Exception as e:
                Log.warning("view {{name}} is invalid", name=shard_name, cause=e)
                self._flake = Snowflake.parse(
                    alias_view.schema,
                    text(self.full_name),
                    self.top_level_fields,
                    partition,
                )
                # REMOVE STALE VIEW
                container.client.delete_table(current_view)

                # MAKE NEW VIEW POINTING TO NEW SHARD
                self._create_new_shard()
                container.create_view(
                    self.full_name,
                    self.container.full_name + ApiName(self.shard.table_id),
                )

        self.last_extend = Date.now() - EXTEND_LIMIT
        self.extend_locker = Lock()
        self.extend_queue = Queue("wait for extend")

    def all_records(self):
        """
        MOSTLY FOR TESTING, RETURN ALL RECORDS IN TABLE
        :return:
        """
        return self.sql_query(sql_query({"from": text(self.full_name)}, self.schema))

    def jx_query(self, jx_query):
        docs = self.sql_query(
            sql_query(
                dict_to_data({"from": text(self.full_name)}) | jx_query, self.schema
            )
        )
        data = []
        for d in docs:
            u = untyped(from_data(leaves_to_data(d)))
            data.append(u)

        return Data(data=data, format="list")

    @property
    def schema(self):
        return self._flake

    def sql_query(self, sql):
        """
        :param sql: SQL QUERY
        :return: GENERATOR OF DOCUMENTS as dict
        """
        query_job = self.container.query_and_wait(sql)
        # WE WILL REACH INTO THE _flake, SINCE THIS IS THE FIRST PLACE WE ARE ACTUALLY PULLING RECORDS OUT
        # TODO: WITH MORE CODE THIS LOGIC GOES ELSEWHERE
        _ = self._flake.columns  # ENSURE schema HAS BEEN PROCESSED
        if not self._flake._top_level_fields.keys():
            for row in query_job:
                yield untyped(dict(row))
        else:
            top2deep = {
                name: path for path, name in self._flake._top_level_fields.items()
            }
            for row in query_job:
                output = {}
                doc = dict(row)
                # COPY ALL BUT TOP LEVEL FIELDS
                for k, v in doc.items():
                    deep = top2deep.get(k)
                    if deep is None:
                        output[k] = v
                # INSERT TOP LEVEL FIELDS
                reach = wrap(output)
                for k, p in top2deep.items():
                    try:
                        reach[p] = doc.get(k)
                    except Exception as cause:
                        raise cause
                yield untyped(output)

    @property
    def flake(self):
        return self._flake

    def _create_new_shard(self):
        primary_shard = self.container.create_table(
            table=self.short_name + "_" + "".join(randoms.sample(ALLOWED, 20)),
            sharded=False,
            schema=self._flake.schema,
            kwargs=self.config,
        )
        self.shard = primary_shard.shard

    def extend(self, docs):
        self.extend_queue.extend(docs)
        with self.extend_locker:
            docs = self.extend_queue.pop_all()
            self._extend(docs)

    def _extend(self, rows):
        if self.read_only:
            Log.error("not for writing")
        if len(rows) == 0:
            return

        try:
            update = {}
            with Timer("encoding", verbose=DEBUG):
                while True:
                    typed_rows = []
                    for rownum, row in enumerate(rows):
                        typed_row, more, add_nested = typed_encode(row, self.flake)
                        set_default(update, more)
                        if add_nested:
                            # row HAS NEW NESTED COLUMN!
                            # GO OVER THE rows AGAIN SO "RECORD" GET MAPPED TO "REPEATED"
                            DEBUG and Log.note("New nested documnet found, retrying")
                            break
                        typed_rows.append(typed_row)
                    else:
                        break

            if update or not self.shard:
                # BATCH HAS ADDITIONAL COLUMNS!!
                # WE CAN NOT USE THE EXISTING SHARD, MAKE A NEW ONE:
                self._create_new_shard()
                DEBUG and Log.note(
                    "added new shard with name: {{shard}}", shard=self.shard.table_id
                )
            with Timer(
                "insert {{num}} rows to bq", param={"num": len(rows)}, verbose=DEBUG
            ):
                failures = self.container.client.insert_rows_json(
                    self.shard,
                    json_rows=typed_rows,
                    row_ids=[None] * len(typed_rows),
                    skip_invalid_rows=False,
                    ignore_unknown_values=False,
                )
            if failures:
                if all(r == "stopped" for r in wrap(failures).errors.reason):
                    self._create_new_shard()
                    DEBUG and Log.note(
                        "STOPPED encountered: Added new shard with name: {{shard}}",
                        shard=self.shard.table_id,
                    )
                Log.error(
                    "Got {{num}} failures:\n{{failures|json}}",
                    num=len(failures),
                    failures=failures[:5],
                )
            else:
                self.last_extend = Date.now()
                DEBUG and Log.note("{{num}} rows added", num=len(typed_rows))
        except Exception as cause:
            cause = Except.wrap(cause)
            if (
                len(typed_rows) < 2
                and "Your client has issued a malformed or illegal request." in cause
            ):
                Log.error(
                    "big query complains about:\n{{data|json}}",
                    data=typed_rows,
                    cause=cause,
                )
            elif len(rows) > 1 and (
                "Request payload size exceeds the limit" in cause
                or "An existing connection was forcibly closed by the remote host"
                in cause
                or "Your client has issued a malformed or illegal request." in cause
                or "BrokenPipeError(32, 'Broken pipe')" in cause
                or "ConnectionResetError(104, 'Connection reset by peer')" in cause
            ):
                Log.warning(
                    "problem with batch of size {{size}}", size=len(rows), cause=cause
                )
                batch_size = ceiling(len(rows) / 10)
                try:
                    DEBUG and Log.note(
                        "attempt smaller batches of size {{batch_size}}",
                        batch_size=batch_size,
                    )
                    for _, chunk in jx.chunk(rows, batch_size):
                        self._extend(chunk)
                    return
                except Exception as cause2:
                    Log.error(
                        "smaller batches of size {{batch_size}} did not work",
                        batch_size=batch_size,
                        cause=cause2,
                    )
            elif len(rows) == 1:
                Log.error(
                    "Could not insert document\n{{doc|json|indent}}",
                    doc=rows[0],
                    cause=cause,
                )
            else:
                Log.error("Do not know how to handle", cause=cause)

    def add(self, row):
        self.extend([row])

    def merge_shards(self):
        shards = []
        tables = list(self.container.client.list_tables(self.container.dataset))
        current_view = Null  # VIEW THAT POINTS TO PRIMARY SHARD
        primary_shard_name = None  # PRIMARY SHARD
        api_name = escape_name(self.short_name)

        for table_item in tables:
            table = table_item.reference
            table_api_name = ApiName(table.table_id)
            if text(table_api_name).startswith(text(api_name)):
                if table_api_name == api_name:
                    if table_item.table_type != "VIEW":
                        Log.error("expecting {{table}} to be a view", table=api_name)
                    current_view = self.container.client.get_table(table)
                    view_sql = current_view.view_query
                    primary_shard_name = _extract_primary_shard_name(view_sql)
                elif SUFFIX_PATTERN.match(text(table_api_name)[len(text(api_name)) :]):
                    try:
                        known_table = self.container.client.get_table(table)
                        shards.append(known_table)
                    except Exception as e:
                        Log.warning(
                            "could not merge table {{table}}", table=table, cause=e
                        )

        if not current_view:
            Log.error(
                "expecting {{table}} to be a view pointing to a table", table=api_name
            )

        shard_flakes = [
            Snowflake.parse(
                big_query_schema=shard.schema,
                es_index=text(self.container.full_name + ApiName(shard.table_id)),
                top_level_fields=self.top_level_fields,
                partition=self.partition,
            )
            for shard in shards
        ]
        total_flake = snowflakes.merge(
            shard_flakes,
            es_index=text(self.full_name),
            top_level_fields=self.top_level_fields,
            partition=self.partition,
        )

        for i, s in enumerate(shards):
            if ApiName(s.table_id) == primary_shard_name:
                if total_flake == shard_flakes[i]:
                    # USE THE CURRENT PRIMARY SHARD AS A DESTINATION
                    del shards[i]
                    del shard_flakes[i]
                    break
        else:
            name = self.short_name + "_" + "".join(randoms.sample(ALLOWED, 20))
            primary_shard_name = escape_name(name)
            self.container.create_table(
                table=name,
                schema=total_flake.schema,
                sharded=False,
                read_only=False,
                kwargs=self.config,
            )

        primary_full_name = self.container.full_name + primary_shard_name

        selects = []
        for flake, table in zip(shard_flakes, shards):
            q = ConcatSQL(
                SQL_SELECT,
                JoinSQL(ConcatSQL(SQL_COMMA, SQL_CR), gen_select(total_flake, flake)),
                SQL_FROM,
                quote_column(ApiName(table.dataset_id, table.table_id)),
            )
            selects.append(q)

        DEBUG and Log.note(
            "inserting into table {{table}}", table=text(primary_shard_name)
        )
        matched = []
        unmatched = []
        for sel, shard, flake in zip(selects, shards, shard_flakes):
            if flake == total_flake:
                matched.append((sel, shard, flake))
            else:
                unmatched.append((sel, shard, flake))

        # EVERYTHING THAT IS IDENTICAL TO PRIMARY CAN BE MERGED WITH SIMPLE UNION ALL
        if matched:
            for g, merge_chunk in jx.chunk(matched, MAX_MERGE):
                command = ConcatSQL(
                    SQL_INSERT,
                    quote_column(primary_full_name),
                    JoinSQL(
                        SQL_UNION_ALL,
                        (
                            sql_query(
                                {
                                    "from": text(
                                        self.container.full_name
                                        + ApiName(shard.table_id)
                                    )
                                },
                                schema,
                            )
                            for _, shard, schema in merge_chunk
                        ),
                    ),
                )
                DEBUG and Log.note("{{sql}}", sql=text(command))
                job = self.container.query_and_wait(command)
                DEBUG and Log.note(
                    "job {{id}} state = {{state}}", id=job.job_id, state=job.state
                )

                if job.errors:
                    Log.error(
                        "\n{{sql}}\nDid not fill table:\n{{reason|json|indent}}",
                        sql=command.sql,
                        reason=job.errors,
                    )
                for _, shard, _ in merge_chunk:
                    self.container.client.delete_table(shard)

        # ALL OTHER SCHEMAS MISMATCH
        for s, shard, _ in unmatched:
            try:
                command = ConcatSQL(SQL_INSERT, quote_column(primary_full_name), s)
                DEBUG and Log.note("{{sql}}", sql=text(command))
                job = self.container.query_and_wait(command)
                DEBUG and Log.note(
                    "from {{shard}}, job {{id}}, state {{state}}",
                    id=job.job_id,
                    shard=shard.table_id,
                    state=job.state,
                )

                if job.errors:
                    if all(
                        " does not have a schema." in m
                        for m in wrap(job.errors).message
                    ):
                        pass  # NOTHING TO DO
                    else:
                        Log.error(
                            "\n{{sql}}\nDid not fill table:\n{{reason|json|indent}}",
                            sql=command.sql,
                            reason=job.errors,
                        )

                self.container.client.delete_table(shard)
            except Exception as e:
                Log.warning("failure to merge {{shard}}", shard=shard, cause=e)

        # REMOVE OLD VIEW
        view_full_name = self.container.full_name + api_name
        if current_view:
            self.container.client.delete_table(current_view)

        # CREATE NEW VIEW
        self.container.create_view(view_full_name, primary_full_name)

    def condense(self):
        """
        :return:
        """
        # MAKE NEW SHARD
        partition = JoinSQL(
            SQL_COMMA,
            [
                quote_column(c.es_field)
                for f in listwrap(self.id.field)
                for c in self.flake.leaves(f)
            ],
        )
        order_by = JoinSQL(
            SQL_COMMA,
            [
                ConcatSQL(quote_column(c.es_field), SQL_DESC)
                for f in listwrap(self.id.version)
                for c in self.flake.leaves(f)
            ],
        )
        # WRAP WITH etl.timestamp BEST SELECTION
        self.container.query_and_wait(
            ConcatSQL(
                SQL(  # SOME KEYWORDS: ROWNUM RANK
                    "SELECT * EXCEPT (_rank) FROM (SELECT *, ROW_NUMBER() OVER (PARTITION BY "
                ),
                partition,
                SQL_ORDERBY,
                order_by,
                SQL(") AS _rank FROM "),
                quote_column(self.full_name),
                SQL(") a WHERE _rank=1"),
            )
        )
class StructuredLogger_usingElasticSearch(StructuredLogger):
    @override
    def __init__(self, host, index, port=9200, type="log", max_size=1000, batch_size=100, kwargs=None):
        """
        settings ARE FOR THE ELASTICSEARCH INDEX
        """
        self.es = Cluster(kwargs).get_or_create_index(
            schema=mo_json.json2value(value2json(SCHEMA), leaves=True),
            limit_replicas=True,
            tjson=True,
            kwargs=kwargs
        )
        self.batch_size = batch_size
        self.es.add_alias(coalesce(kwargs.alias, kwargs.index))
        self.queue = Queue("debug logs to es", max=max_size, silent=True)
        self.es.settings.retry.times = coalesce(self.es.settings.retry.times, 3)
        self.es.settings.retry.sleep = Duration(coalesce(self.es.settings.retry.sleep, MINUTE))
        Thread.run("add debug logs to es", self._insert_loop)

    def write(self, template, params):
        if params.get("template"):
            # DETECTED INNER TEMPLATE, ASSUME TRACE IS ON, SO DO NOT NEED THE OUTER TEMPLATE
            self.queue.add({"value": params})
        else:
            template = strings.limit(template, 2000)
            self.queue.add({"value": {"template": template, "params": params}}, timeout=3 * MINUTE)
        return self

    def _insert_loop(self, please_stop=None):
        bad_count = 0
        while not please_stop:
            try:
                Till(seconds=1).wait()
                messages = wrap(self.queue.pop_all())
                if not messages:
                    continue

                for g, mm in jx.groupby(messages, size=self.batch_size):
                    scrubbed = []
                    try:
                        for i, message in enumerate(mm):
                            if message is THREAD_STOP:
                                please_stop.go()
                                return
                            scrubbed.append(_deep_json_to_string(message, depth=3))
                    finally:
                        self.es.extend(scrubbed)
                    bad_count = 0
            except Exception as e:
                Log.warning("Problem inserting logs into ES", cause=e)
                bad_count += 1
                if bad_count > MAX_BAD_COUNT:
                    Log.warning("Given up trying to write debug logs to ES index {{index}}", index=self.es.settings.index)
                Till(seconds=30).wait()

        # CONTINUE TO DRAIN THIS QUEUE
        while not please_stop:
            try:
                Till(seconds=1).wait()
                self.queue.pop_all()
            except Exception as e:
                Log.warning("Should not happen", cause=e)

    def stop(self):
        with suppress_exception:
            self.queue.add(THREAD_STOP)  # BE PATIENT, LET REST OF MESSAGE BE SENT

        with suppress_exception:
            self.queue.close()
class StructuredLogger_usingElasticSearch(StructuredLogger):
    @override
    def __init__(
        self,
        host,
        index,
        port=9200,
        type="log",
        queue_size=1000,
        batch_size=100,
        kwargs=None,
    ):
        """
        settings ARE FOR THE ELASTICSEARCH INDEX
        """
        kwargs.timeout = Duration(coalesce(kwargs.timeout, "30second")).seconds
        kwargs.retry.times = coalesce(kwargs.retry.times, 3)
        kwargs.retry.sleep = Duration(coalesce(kwargs.retry.sleep, MINUTE)).seconds

        self.es = Cluster(kwargs).get_or_create_index(
            schema=json2value(value2json(SCHEMA), leaves=True),
            limit_replicas=True,
            typed=True,
            kwargs=kwargs,
        )
        self.batch_size = batch_size
        self.es.add_alias(coalesce(kwargs.alias, kwargs.index))
        self.queue = Queue("debug logs to es", max=queue_size, silent=True)

        self.worker = Thread.run("add debug logs to es", self._insert_loop)

    def write(self, template, params):
        try:
            params.template = strings.limit(params.template, 2000)
            params.format = None
            self.queue.add({"value": _deep_json_to_string(params, 3)}, timeout=3 * 60)
        except Exception as e:
            sys.stdout.write(text_type(Except.wrap(e)))
        return self

    def _insert_loop(self, please_stop=None):
        bad_count = 0
        while not please_stop:
            try:
                messages = wrap(self.queue.pop_all())
                if not messages:
                    Till(seconds=PAUSE_AFTER_GOOD_INSERT).wait()
                    continue

                for g, mm in jx.groupby(messages, size=self.batch_size):
                    scrubbed = []
                    for i, message in enumerate(mm):
                        if message is THREAD_STOP:
                            please_stop.go()
                            continue
                        try:
                            messages = flatten_causal_chain(message.value)
                            scrubbed.append(
                                {"value": [_deep_json_to_string(m, depth=3) for m in messages]}
                            )
                        except Exception as e:
                            Log.warning("Problem adding to scrubbed list", cause=e)

                    self.es.extend(scrubbed)
                    bad_count = 0
            except Exception as f:
                Log.warning("Problem inserting logs into ES", cause=f)
                bad_count += 1
                if bad_count > MAX_BAD_COUNT:
                    Log.warning(
                        "Given up trying to write debug logs to ES index {{index}}",
                        index=self.es.settings.index,
                    )
                Till(seconds=PAUSE_AFTER_BAD_INSERT).wait()

        self.es.flush()

        # CONTINUE TO DRAIN THIS QUEUE
        while not please_stop:
            try:
                Till(seconds=PAUSE_AFTER_GOOD_INSERT).wait()
                self.queue.pop_all()
            except Exception as e:
                Log.warning("Should not happen", cause=e)

    def stop(self):
        with suppress_exception:
            self.queue.add(THREAD_STOP)  # BE PATIENT, LET REST OF MESSAGE BE SENT

        with suppress_exception:
            self.queue.close()
        self.worker.join()
Example #9
0
class ColumnList(Table, jx_base.Container):
    """
    OPTIMIZED FOR THE PARTICULAR ACCESS PATTERNS USED
    """

    def __init__(self, name):
        Table.__init__(self, "meta.columns")
        self.db_file = File("metadata." + name + ".sqlite")
        self.data = {}  # MAP FROM ES_INDEX TO (abs_column_name to COLUMNS)
        self.locker = Lock()
        self._schema = None
        self.db = sqlite3.connect(
            database=self.db_file.abspath, check_same_thread=False, isolation_level=None
        )
        self.last_load = Null
        self.todo = Queue(
            "update columns to db"
        )  # HOLD (action, column) PAIR, WHERE action in ['insert', 'update']
        self._db_load()
        Thread.run("update " + name, self._db_worker)

    @contextmanager
    def _db_transaction(self):
        self.db.execute(str("BEGIN"))
        try:
            yield
            self.db.execute(str("COMMIT"))
        except Exception as e:
            e = Except.wrap(e)
            self.db.execute(str("ROLLBACK"))
            Log.error("Transaction failed", cause=e)

    def _query(self, query):
        result = Data()
        curr = self.db.execute(query)
        result.meta.format = "table"
        result.header = [d[0] for d in curr.description] if curr.description else None
        result.data = curr.fetchall()
        return result

    def _db_create(self):
        with self._db_transaction():
            self.db.execute(
                "CREATE TABLE "
                + db_table_name
                + sql_iso(
                    sql_list(
                        [
                            quote_column(c.name)
                            + " "
                            + json_type_to_sqlite_type[c.jx_type]
                            for c in METADATA_COLUMNS
                        ]
                        + [
                            "PRIMARY KEY"
                            + sql_iso(
                                sql_list(map(quote_column, ["es_index", "es_column"]))
                            )
                        ]
                    )
                )
            )

            for c in METADATA_COLUMNS:
                self._add(c)
                self._db_insert_column(c)

    def _db_load(self):
        self.last_load = Date.now()

        result = self._query(
            SQL_SELECT
            + "name"
            + SQL_FROM
            + "sqlite_master"
            + SQL_WHERE
            + SQL_AND.join(["name=" + db_table_name, "type=" + quote_value("table")])
        )
        if not result.data:
            self._db_create()
            return

        result = self._query(
            SQL_SELECT
            + all_columns
            + SQL_FROM
            + db_table_name
            + SQL_ORDERBY
            + sql_list(map(quote_column, ["es_index", "name", "es_column"]))
        )

        with self.locker:
            for r in result.data:
                c = row_to_column(result.header, r)
                self._add(c)

    def _db_worker(self, please_stop):
        while not please_stop:
            try:
                with self._db_transaction():
                    result = self._query(
                        SQL_SELECT
                        + all_columns
                        + SQL_FROM
                        + db_table_name
                        + SQL_WHERE
                        + "last_updated > "
                        + quote_value(self.last_load)
                        + SQL_ORDERBY
                        + sql_list(map(quote_column, ["es_index", "name", "es_column"]))
                    )

                with self.locker:
                    for r in result.data:
                        c = row_to_column(result.header, r)
                        self._add(c)
                        if c.last_updated > self.last_load:
                            self.last_load = c.last_updated

                updates = self.todo.pop_all()
                DEBUG and updates and Log.note(
                    "{{num}} columns to push to db", num=len(updates)
                )
                for action, column in updates:
                    while not please_stop:
                        try:
                            with self._db_transaction():
                                DEBUG and Log.note(
                                    "{{action}} db for {{table}}.{{column}}",
                                    action=action,
                                    table=column.es_index,
                                    column=column.es_column,
                                )
                                if action is EXECUTE:
                                    self.db.execute(column)
                                elif action is UPDATE:
                                    self.db.execute(
                                        "UPDATE"
                                        + db_table_name
                                        + "SET"
                                        + sql_list(
                                            [
                                                "count=" + quote_value(column.count),
                                                "cardinality="
                                                + quote_value(column.cardinality),
                                                "multi=" + quote_value(column.multi),
                                                "partitions="
                                                + quote_value(
                                                    value2json(column.partitions)
                                                ),
                                                "last_updated="
                                                + quote_value(column.last_updated),
                                            ]
                                        )
                                        + SQL_WHERE
                                        + SQL_AND.join(
                                            [
                                                "es_index = "
                                                + quote_value(column.es_index),
                                                "es_column = "
                                                + quote_value(column.es_column),
                                                "last_updated < "
                                                + quote_value(column.last_updated),
                                            ]
                                        )
                                    )
                                elif action is DELETE:
                                    self.db.execute(
                                        "DELETE FROM"
                                        + db_table_name
                                        + SQL_WHERE
                                        + SQL_AND.join(
                                            [
                                                "es_index = "
                                                + quote_value(column.es_index),
                                                "es_column = "
                                                + quote_value(column.es_column),
                                            ]
                                        )
                                    )
                                else:
                                    self._db_insert_column(column)
                            break
                        except Exception as e:
                            e = Except.wrap(e)
                            if "database is locked" in e:
                                Log.note("metadata database is locked")
                                Till(seconds=1).wait()
                                break
                            else:
                                Log.warning("problem updataing database", cause=e)

            except Exception as e:
                Log.warning("problem updating database", cause=e)

            (Till(seconds=10) | please_stop).wait()

    def _db_insert_column(self, column):
        try:
            self.db.execute(
                "INSERT INTO"
                + db_table_name
                + sql_iso(all_columns)
                + "VALUES"
                + sql_iso(
                    sql_list(
                        [
                            quote_value(column[c.name])
                            if c.name not in ("nested_path", "partitions")
                            else quote_value(value2json(column[c.name]))
                            for c in METADATA_COLUMNS
                        ]
                    )
                )
            )
        except Exception as e:
            e = Except.wrap(e)
            if "UNIQUE constraint failed" in e or " are not unique" in e:
                # THIS CAN HAPPEN BECAUSE todo HAS OLD COLUMN DATA
                self.todo.add((UPDATE, column), force=True)
            else:
                Log.error("do not know how to handle", cause=e)

    def __copy__(self):
        output = object.__new__(ColumnList)
        Table.__init__(output, "meta.columns")
        output.data = {
            t: {c: list(cs) for c, cs in dd.items()} for t, dd in self.data.items()
        }
        output.locker = Lock()
        output._schema = None
        return output

    def find(self, es_index, abs_column_name=None):
        with self.locker:
            if es_index.startswith("meta."):
                self._update_meta()

            if not abs_column_name:
                return [c for cs in self.data.get(es_index, {}).values() for c in cs]
            else:
                return self.data.get(es_index, {}).get(abs_column_name, [])

    def extend(self, columns):
        self.dirty = True
        with self.locker:
            for column in columns:
                self._add(column)

    def add(self, column):
        self.dirty = True
        with self.locker:
            canonical = self._add(column)
        if canonical == None:
            return column  # ALREADY ADDED
        self.todo.add((INSERT if canonical is column else UPDATE, canonical))
        return canonical

    def remove_table(self, table_name):
        del self.data[table_name]

    def _add(self, column):
        """
        :param column: ANY COLUMN OBJECT
        :return:  None IF column IS canonical ALREADY (NET-ZERO EFFECT)
        """
        columns_for_table = self.data.setdefault(column.es_index, {})
        existing_columns = columns_for_table.setdefault(column.name, [])

        for canonical in existing_columns:
            if canonical is column:
                return None
            if canonical.es_type == column.es_type:
                if column.last_updated > canonical.last_updated:
                    for key in Column.__slots__:
                        old_value = canonical[key]
                        new_value = column[key]
                        if new_value == None:
                            pass  # DO NOT BOTHER CLEARING OLD VALUES (LIKE cardinality AND paritiions)
                        elif new_value == old_value:
                            pass  # NO NEED TO UPDATE WHEN NO CHANGE MADE (COMMON CASE)
                        else:
                            canonical[key] = new_value
                return canonical
        existing_columns.append(column)
        return column

    def _update_meta(self):
        if not self.dirty:
            return

        for mcl in self.data.get("meta.columns").values():
            for mc in mcl:
                count = 0
                values = set()
                objects = 0
                multi = 1
                for column in self._all_columns():
                    value = column[mc.name]
                    if value == None:
                        pass
                    else:
                        count += 1
                        if is_list(value):
                            multi = max(multi, len(value))
                            try:
                                values |= set(value)
                            except Exception:
                                objects += len(value)
                        elif is_data(value):
                            objects += 1
                        else:
                            values.add(value)
                mc.count = count
                mc.cardinality = len(values) + objects
                mc.partitions = jx.sort(values)
                mc.multi = multi
                mc.last_updated = Date.now()
        self.dirty = False

    def _all_columns(self):
        return [
            column
            for t, cs in self.data.items()
            for _, css in cs.items()
            for column in css
        ]

    def __iter__(self):
        with self.locker:
            self._update_meta()
            return iter(self._all_columns())

    def __len__(self):
        return self.data["meta.columns"]["es_index"].count

    def update(self, command):
        self.dirty = True
        try:
            command = wrap(command)
            DEBUG and Log.note(
                "Update {{timestamp}}: {{command|json}}",
                command=command,
                timestamp=Date(command["set"].last_updated),
            )
            eq = command.where.eq
            if eq.es_index:
                if len(eq) == 1:
                    if unwraplist(command.clear) == ".":
                        with self.locker:
                            del self.data[eq.es_index]
                        self.todo.add(
                            (
                                EXECUTE,
                                "DELETE FROM "
                                + db_table_name
                                + SQL_WHERE
                                + " es_index="
                                + quote_value(eq.es_index),
                            )
                        )
                        return

                    # FASTEST
                    all_columns = self.data.get(eq.es_index, {}).values()
                    with self.locker:
                        columns = [c for cs in all_columns for c in cs]
                elif eq.es_column and len(eq) == 2:
                    # FASTER
                    all_columns = self.data.get(eq.es_index, {}).values()
                    with self.locker:
                        columns = [
                            c
                            for cs in all_columns
                            for c in cs
                            if c.es_column == eq.es_column
                        ]

                else:
                    # SLOWER
                    all_columns = self.data.get(eq.es_index, {}).values()
                    with self.locker:
                        columns = [
                            c
                            for cs in all_columns
                            for c in cs
                            if all(
                                c[k] == v for k, v in eq.items()
                            )  # THIS LINE IS VERY SLOW
                        ]
            else:
                columns = list(self)
                columns = jx.filter(columns, command.where)

            with self.locker:
                for col in columns:
                    DEBUG and Log.note(
                        "update column {{table}}.{{column}}",
                        table=col.es_index,
                        column=col.es_column,
                    )
                    for k in command["clear"]:
                        if k == ".":
                            self.todo.add((DELETE, col))
                            lst = self.data[col.es_index]
                            cols = lst[col.name]
                            cols.remove(col)
                            if len(cols) == 0:
                                del lst[col.name]
                                if len(lst) == 0:
                                    del self.data[col.es_index]
                            break
                        else:
                            col[k] = None
                    else:
                        # DID NOT DELETE COLUMNM ("."), CONTINUE TO SET PROPERTIES
                        for k, v in command.set.items():
                            col[k] = v
                        self.todo.add((UPDATE, col))

        except Exception as e:
            Log.error("should not happen", cause=e)

    def query(self, query):
        # NOT EXPECTED TO BE RUN
        Log.error("not")
        with self.locker:
            self._update_meta()
            if not self._schema:
                self._schema = Schema(
                    ".", [c for cs in self.data["meta.columns"].values() for c in cs]
                )
            snapshot = self._all_columns()

        from jx_python.containers.list_usingPythonList import ListContainer

        query.frum = ListContainer("meta.columns", snapshot, self._schema)
        return jx.run(query)

    def groupby(self, keys):
        with self.locker:
            self._update_meta()
            return jx.groupby(self.__iter__(), keys)

    @property
    def schema(self):
        if not self._schema:
            with self.locker:
                self._update_meta()
                self._schema = Schema(
                    ".", [c for cs in self.data["meta.columns"].values() for c in cs]
                )
        return self._schema

    @property
    def namespace(self):
        return self

    def get_table(self, table_name):
        if table_name != "meta.columns":
            Log.error("this container has only the meta.columns")
        return self

    def denormalized(self):
        """
        THE INTERNAL STRUCTURE FOR THE COLUMN METADATA IS VERY DIFFERENT FROM
        THE DENORMALIZED PERSPECITVE. THIS PROVIDES THAT PERSPECTIVE FOR QUERIES
        """
        with self.locker:
            self._update_meta()
            output = [
                {
                    "table": c.es_index,
                    "name": untype_path(c.name),
                    "cardinality": c.cardinality,
                    "es_column": c.es_column,
                    "es_index": c.es_index,
                    "last_updated": c.last_updated,
                    "count": c.count,
                    "nested_path": [unnest_path(n) for n in c.nested_path],
                    "es_type": c.es_type,
                    "type": c.jx_type,
                }
                for tname, css in self.data.items()
                for cname, cs in css.items()
                for c in cs
                if c.jx_type not in STRUCT  # and c.es_column != "_id"
            ]

        from jx_python.containers.list_usingPythonList import ListContainer

        return ListContainer(
            self.name,
            data=output,
            schema=jx_base.Schema("meta.columns", SIMPLE_METADATA_COLUMNS),
        )
class StructuredLogger_usingElasticSearch(StructuredLogger):
    @override
    def __init__(
        self,
        host,
        index,
        port=9200,
        type="log",
        queue_size=1000,
        batch_size=100,
        kwargs=None,
    ):
        """
        settings ARE FOR THE ELASTICSEARCH INDEX
        """
        kwargs.timeout = Duration(coalesce(kwargs.timeout, "30second")).seconds
        kwargs.retry.times = coalesce(kwargs.retry.times, 3)
        kwargs.retry.sleep = Duration(coalesce(kwargs.retry.sleep, MINUTE)).seconds
        kwargs.host = Random.sample(listwrap(host), 1)[0]

        schema = json2value(value2json(SCHEMA), leaves=True)
        schema.mappings[type].properties["~N~"].type = "nested"
        self.es = Cluster(kwargs).get_or_create_index(
            schema=schema,
            limit_replicas=True,
            typed=True,
            kwargs=kwargs,
        )
        self.batch_size = batch_size
        self.es.add_alias(coalesce(kwargs.alias, kwargs.index))
        self.queue = Queue("debug logs to es", max=queue_size, silent=True)

        self.worker = Thread.run("add debug logs to es", self._insert_loop)

    def write(self, template, params):
        try:
            params.template = strings.limit(params.template, 2000)
            params.format = None
            self.queue.add({"value": _deep_json_to_string(params, 3)}, timeout=3 * 60)
        except Exception as e:
            sys.stdout.write(text_type(Except.wrap(e)))
        return self

    def _insert_loop(self, please_stop=None):
        bad_count = 0
        while not please_stop:
            try:
                messages = wrap(self.queue.pop_all())
                if not messages:
                    Till(seconds=PAUSE_AFTER_GOOD_INSERT).wait()
                    continue

                for g, mm in jx.groupby(messages, size=self.batch_size):
                    scrubbed = []
                    for i, message in enumerate(mm):
                        if message is THREAD_STOP:
                            please_stop.go()
                            continue
                        try:
                            messages = flatten_causal_chain(message.value)
                            scrubbed.append(
                                {
                                    "value": [
                                        _deep_json_to_string(m, depth=3)
                                        for m in messages
                                    ]
                                }
                            )
                        except Exception as e:
                            Log.warning("Problem adding to scrubbed list", cause=e)

                    self.es.extend(scrubbed)
                    bad_count = 0
            except Exception as f:
                Log.warning("Problem inserting logs into ES", cause=f)
                bad_count += 1
                if bad_count > MAX_BAD_COUNT:
                    Log.warning(
                        "Given up trying to write debug logs to ES index {{index}}",
                        index=self.es.settings.index,
                    )
                Till(seconds=PAUSE_AFTER_BAD_INSERT).wait()

        self.es.flush()

        # CONTINUE TO DRAIN THIS QUEUE
        while not please_stop:
            try:
                Till(seconds=PAUSE_AFTER_GOOD_INSERT).wait()
                self.queue.pop_all()
            except Exception as e:
                Log.warning("Should not happen", cause=e)

    def stop(self):
        with suppress_exception:
            self.queue.add(THREAD_STOP)  # BE PATIENT, LET REST OF MESSAGE BE SENT

        with suppress_exception:
            self.queue.close()
        self.worker.join()