def create_table_column_stats_by_name(metastore_name, data): """Batch add/update table column stats""" # TODO: verify user is a service account with DBSession() as session: metastore = admin_logic.get_query_metastore_by_name(metastore_name, session=session) api_assert(metastore, "Invalid metastore") verify_metastore_permission(metastore.id, session=session) with DataTableFinder(metastore.id) as t_finder: for d in data: column = t_finder.get_table_column_by_name( schema_name=d["schema_name"], table_name=d["table_name"], column_name=d["column_name"], session=session, ) if column is not None: for s in d["stats"]: logic.upsert_table_column_stat( column_id=column.id, key=s["key"], value=s["value"], uid=current_user.id, session=session, ) return
def search_tables( metastore_id, keywords, filters=[], fields=[], sort_key=None, sort_order=None, limit=1000, offset=0, concise=False, ): verify_metastore_permission(metastore_id) filters.append(["metastore_id", metastore_id]) query = _construct_tables_query( keywords=keywords, filters=filters, fields=fields, limit=limit, offset=offset, concise=concise, sort_key=sort_key, sort_order=sort_order, ) results, count = _get_matching_objects( query, ES_CONFIG["tables"]["index_name"], ES_CONFIG["tables"]["type_name"], True, ) return {"count": count, "results": results}
def search_tables( metastore_id, keywords, filters=[], fields=[], sort_key=None, sort_order=None, limit=1000, offset=0, concise=False, ): verify_metastore_permission(metastore_id) filters.append(["metastore_id", metastore_id]) # Unfortuantely currently we can't search including underscore, # so split. # TODO: Allow for both. parsed_keywords = " ".join(re.split("-|_|\\.", keywords)) query = _construct_tables_query( keywords=parsed_keywords, filters=filters, fields=fields, limit=limit, offset=offset, concise=concise, sort_key=sort_key, sort_order=sort_order, ) results, count = _get_matching_objects( query, ES_CONFIG["tables"]["index_name"], ES_CONFIG["tables"]["type_name"], True, ) return {"count": count, "results": results}
def get_if_schema_and_table_exists(metastore_id, schema_name, table_name) -> Tuple[bool, bool]: """ Check if the table name / schema name exists in cache, then check the actual metastore if they don't exist Returns [schema_exists, table_exists] """ verify_metastore_permission(metastore_id) with DataTableFinder(metastore_id) as t_finder: table_exists_in_cache = t_finder.get_table_by_name( schema_name, table_name) if table_exists_in_cache: return [True, True] metastore_loader = get_metastore_loader(metastore_id) table_exists = metastore_loader.check_if_table_exists( schema_name, table_name) if table_exists: return [True, True] schema_exists_in_cache = t_finder.get_schema_by_name(schema_name) if schema_exists_in_cache: return [True, False] schema_exists = metastore_loader.check_if_schema_exists(schema_name) if schema_exists: return [True, False] return [False, False]
def get_schema(schema_id, include_metastore=False, include_table=False): with DBSession() as session: schema = logic.get_schema_by_id(schema_id, session=session) api_assert(schema, "Invalid schema") verify_metastore_permission(schema.metastore_id, session=session) schema_dict = schema.to_dict(include_metastore, include_table) return schema_dict
def get_schemas(metastore_id, limit=5, offset=0, sort_key="name", sort_order="desc"): verify_metastore_permission(metastore_id) schemas = logic.get_all_schemas(metastore_id, offset, limit, sort_key, sort_order) return {"results": schemas, "done": len(schemas) < limit}
def suggest_tables(metastore_id, prefix, limit=10): api_assert(limit is None or limit <= 100, "Requesting too many tables") verify_metastore_permission(metastore_id) query = construct_suggest_table_query(prefix, limit, metastore_id) options = get_matching_suggestions(query, ES_CONFIG["tables"]["index_name"]) texts = [ "{}.{}".format( option.get("_source", {}).get("schema", ""), option.get("_source", {}).get("name", ""), ) for option in options ] return texts
def suggest_tables(metastore_id, prefix, limit=10): verify_metastore_permission(metastore_id) # Unfortuantely currently we can't search including underscore, # so split. # TODO: Allow for both. # parsed_keywords = map(lambda x: re.split('(-|_)', x), keywords) query = { "suggest": { "suggest": { "text": prefix, "completion": { "field": "completion_name", "size": limit, "contexts": { "metastore_id": metastore_id }, }, } }, } index_name = ES_CONFIG["tables"]["index_name"] type_name = ES_CONFIG["tables"]["type_name"] result = None try: # print '\n--ES latest hosted_index %s\n' % hosted_index result = get_hosted_es().search(index_name, type_name, body=query) except Exception as e: LOG.info(e) finally: if result is None: result = {} options = next(iter(result.get("suggest", {}).get("suggest", [])), {}).get("options", []) texts = [ "{}.{}".format( option.get("_source", {}).get("schema", ""), option.get("_source", {}).get("name", ""), ) for option in options ] return {"data": texts}
def upsert_table_boost_score_by_name(metastore_name, data): # TODO: verify user is a service account with DBSession() as session: metastore = admin_logic.get_query_metastore_by_name(metastore_name, session=session) api_assert(metastore, "Invalid metastore") verify_metastore_permission(metastore.id, session=session) with DataTableFinder(metastore.id) as t_finder: for d in data: table = t_finder.get_table_by_name( schema_name=d["schema_name"], table_name=d["table_name"], session=session, ) if table is not None: logic.update_table(id=table.id, score=d["boost_score"], session=session) return
def suggest_tables(metastore_id, prefix, limit=10): verify_metastore_permission(metastore_id) query = { "suggest": { "suggest": { "text": prefix, "completion": { "field": "completion_name", "size": limit, "contexts": { "metastore_id": metastore_id }, }, } }, } index_name = ES_CONFIG["tables"]["index_name"] type_name = ES_CONFIG["tables"]["type_name"] result = None try: result = get_hosted_es().search(index_name, type_name, body=query) except Exception as e: LOG.info(e) finally: if result is None: result = {} options = next(iter(result.get("suggest", {}).get("suggest", [])), {}).get("options", []) texts = [ "{}.{}".format( option.get("_source", {}).get("schema", ""), option.get("_source", {}).get("name", ""), ) for option in options ] return {"data": texts}