def denormalized(self): """ THE INTERNAL STRUCTURE FOR THE COLUMN METADATA IS VERY DIFFERENT FROM THE DENORMALIZED PERSPECITVE. THIS PROVIDES THAT PERSPECTIVE FOR QUERIES """ with self.locker: self._update_meta() output = [ { "table": c.es_index, "name": untype_path(c.name), "cardinality": c.cardinality, "es_column": c.es_column, "es_index": c.es_index, "last_updated": c.last_updated, "count": c.count, "nested_path": [unnest_path(n) for n in c.nested_path], "es_type": c.es_type, "type": c.jx_type, } for tname, css in self.data.items() for cname, cs in css.items() for c in cs if c.jx_type not in STRUCT # and c.es_column != "_id" ] from jx_python.containers.list_usingPythonList import ListContainer return ListContainer( self.name, data=output, schema=jx_base.Schema(META_COLUMNS_NAME, SIMPLE_METADATA_COLUMNS), )
def __init__(self, host, index, sql_file='metadata.sqlite', alias=None, name=None, port=9200, kwargs=None): if hasattr(self, "settings"): return self.too_old = TOO_OLD self.settings = kwargs self.default_name = coalesce(name, alias, index) self.es_cluster = elasticsearch.Cluster(kwargs=kwargs) self.index_does_not_exist = set() self.todo = Queue("refresh metadata", max=100000, unique=True) self.index_to_alias = Relation_usingList() self.es_metadata = Null self.metadata_last_updated = Date.now() - OLD_METADATA self.meta = Data() self.meta.columns = ColumnList() self.alias_to_query_paths = { "meta.columns": [['.']], "meta.tables": [['.']] } self.alias_last_updated = { "meta.columns": Date.now(), "meta.tables": Date.now() } table_columns = metadata_tables() self.meta.tables = ListContainer( "meta.tables", [ # TableDesc("meta.columns", None, ".", Date.now()), # TableDesc("meta.tables", None, ".", Date.now()) ], jx_base.Schema(".", table_columns)) self.meta.columns.extend(table_columns) # TODO: fix monitor so it does not bring down ES if ENABLE_META_SCAN: self.worker = Thread.run("refresh metadata", self.monitor) else: self.worker = Thread.run("refresh metadata", self.not_monitor) return
def __init__(self, host, index, alias=None, name=None, port=9200, kwargs=None): if hasattr(self, "settings"): return self.settings = kwargs self.too_old = TOO_OLD self.es_cluster = elasticsearch.Cluster(kwargs=kwargs) self.index_does_not_exist = set() self.todo = Queue("refresh metadata", max=100000, unique=True) self.meta = Data() self.meta.columns = ColumnList(self.es_cluster) self.meta.columns.extend(META_TABLES_DESC.columns) self.meta.tables = ListContainer(META_TABLES_NAME, [], jx_base.Schema(".", META_TABLES_DESC.columns)) self.meta.table.extend([META_COLUMNS_DESC, META_TABLES_DESC]) self.alias_to_query_paths = {} for i, settings in self.es_cluster.get_metadata().indices.items(): if len(settings.aliases) == 0: alias = i elif len(settings.aliases) == 1: alias = first(settings.aliases) else: Log.error("expecting only one alias per index") desc = TableDesc( name=alias, url=None, query_path=ROOT_PATH, last_updated=Date.MIN, columns=[] ) self.meta.tables.add(desc) self.alias_to_query_paths[alias] = [desc.query_path] self.alias_to_query_paths[self._find_alias(alias)] = [desc.query_path] # WE MUST PAUSE? # TODO: fix monitor so it does not bring down ES if ENABLE_META_SCAN: self.worker = Thread.run("refresh metadata", self.monitor) else: self.worker = Thread.run("not refresh metadata", self.not_monitor) return