Ejemplo n.º 1
0
def create_table_column_stats_by_name(metastore_name, data):
    """Batch add/update table column stats"""
    # TODO: verify user is a service account
    with DBSession() as session:
        metastore = admin_logic.get_query_metastore_by_name(metastore_name,
                                                            session=session)
        api_assert(metastore, "Invalid metastore")
        verify_metastore_permission(metastore.id, session=session)

        with DataTableFinder(metastore.id) as t_finder:
            for d in data:
                column = t_finder.get_table_column_by_name(
                    schema_name=d["schema_name"],
                    table_name=d["table_name"],
                    column_name=d["column_name"],
                    session=session,
                )

                if column is not None:
                    for s in d["stats"]:
                        logic.upsert_table_column_stat(
                            column_id=column.id,
                            key=s["key"],
                            value=s["value"],
                            uid=current_user.id,
                            session=session,
                        )
    return
Ejemplo n.º 2
0
def search_tables(
    metastore_id,
    keywords,
    filters=[],
    fields=[],
    sort_key=None,
    sort_order=None,
    limit=1000,
    offset=0,
    concise=False,
):
    verify_metastore_permission(metastore_id)
    filters.append(["metastore_id", metastore_id])

    query = _construct_tables_query(
        keywords=keywords,
        filters=filters,
        fields=fields,
        limit=limit,
        offset=offset,
        concise=concise,
        sort_key=sort_key,
        sort_order=sort_order,
    )
    results, count = _get_matching_objects(
        query,
        ES_CONFIG["tables"]["index_name"],
        ES_CONFIG["tables"]["type_name"],
        True,
    )
    return {"count": count, "results": results}
Ejemplo n.º 3
0
def search_tables(
    metastore_id,
    keywords,
    filters=[],
    fields=[],
    sort_key=None,
    sort_order=None,
    limit=1000,
    offset=0,
    concise=False,
):
    verify_metastore_permission(metastore_id)
    filters.append(["metastore_id", metastore_id])
    # Unfortuantely currently we can't search including underscore,
    # so split. # TODO: Allow for both.
    parsed_keywords = " ".join(re.split("-|_|\\.", keywords))
    query = _construct_tables_query(
        keywords=parsed_keywords,
        filters=filters,
        fields=fields,
        limit=limit,
        offset=offset,
        concise=concise,
        sort_key=sort_key,
        sort_order=sort_order,
    )
    results, count = _get_matching_objects(
        query,
        ES_CONFIG["tables"]["index_name"],
        ES_CONFIG["tables"]["type_name"],
        True,
    )
    return {"count": count, "results": results}
Ejemplo n.º 4
0
def get_if_schema_and_table_exists(metastore_id, schema_name,
                                   table_name) -> Tuple[bool, bool]:
    """
    Check if the table name / schema name exists in cache, then check the actual metastore
    if they don't exist

    Returns [schema_exists, table_exists]
    """
    verify_metastore_permission(metastore_id)
    with DataTableFinder(metastore_id) as t_finder:
        table_exists_in_cache = t_finder.get_table_by_name(
            schema_name, table_name)
        if table_exists_in_cache:
            return [True, True]

        metastore_loader = get_metastore_loader(metastore_id)
        table_exists = metastore_loader.check_if_table_exists(
            schema_name, table_name)
        if table_exists:
            return [True, True]

        schema_exists_in_cache = t_finder.get_schema_by_name(schema_name)
        if schema_exists_in_cache:
            return [True, False]

        schema_exists = metastore_loader.check_if_schema_exists(schema_name)
        if schema_exists:
            return [True, False]

    return [False, False]
Ejemplo n.º 5
0
def get_schema(schema_id, include_metastore=False, include_table=False):
    with DBSession() as session:
        schema = logic.get_schema_by_id(schema_id, session=session)
        api_assert(schema, "Invalid schema")
        verify_metastore_permission(schema.metastore_id, session=session)

        schema_dict = schema.to_dict(include_metastore, include_table)
        return schema_dict
Ejemplo n.º 6
0
def get_schemas(metastore_id,
                limit=5,
                offset=0,
                sort_key="name",
                sort_order="desc"):
    verify_metastore_permission(metastore_id)
    schemas = logic.get_all_schemas(metastore_id, offset, limit, sort_key,
                                    sort_order)

    return {"results": schemas, "done": len(schemas) < limit}
Ejemplo n.º 7
0
def suggest_tables(metastore_id, prefix, limit=10):
    api_assert(limit is None or limit <= 100, "Requesting too many tables")
    verify_metastore_permission(metastore_id)

    query = construct_suggest_table_query(prefix, limit, metastore_id)
    options = get_matching_suggestions(query,
                                       ES_CONFIG["tables"]["index_name"])
    texts = [
        "{}.{}".format(
            option.get("_source", {}).get("schema", ""),
            option.get("_source", {}).get("name", ""),
        ) for option in options
    ]
    return texts
Ejemplo n.º 8
0
def suggest_tables(metastore_id, prefix, limit=10):
    verify_metastore_permission(metastore_id)
    # Unfortuantely currently we can't search including underscore,
    # so split. # TODO: Allow for both.
    # parsed_keywords = map(lambda x: re.split('(-|_)', x), keywords)
    query = {
        "suggest": {
            "suggest": {
                "text": prefix,
                "completion": {
                    "field": "completion_name",
                    "size": limit,
                    "contexts": {
                        "metastore_id": metastore_id
                    },
                },
            }
        },
    }

    index_name = ES_CONFIG["tables"]["index_name"]
    type_name = ES_CONFIG["tables"]["type_name"]

    result = None
    try:
        # print '\n--ES latest hosted_index %s\n' % hosted_index
        result = get_hosted_es().search(index_name, type_name, body=query)
    except Exception as e:
        LOG.info(e)
    finally:
        if result is None:
            result = {}
    options = next(iter(result.get("suggest", {}).get("suggest", [])),
                   {}).get("options", [])
    texts = [
        "{}.{}".format(
            option.get("_source", {}).get("schema", ""),
            option.get("_source", {}).get("name", ""),
        ) for option in options
    ]
    return {"data": texts}
Ejemplo n.º 9
0
def upsert_table_boost_score_by_name(metastore_name, data):
    # TODO: verify user is a service account
    with DBSession() as session:
        metastore = admin_logic.get_query_metastore_by_name(metastore_name,
                                                            session=session)
        api_assert(metastore, "Invalid metastore")
        verify_metastore_permission(metastore.id, session=session)

        with DataTableFinder(metastore.id) as t_finder:
            for d in data:
                table = t_finder.get_table_by_name(
                    schema_name=d["schema_name"],
                    table_name=d["table_name"],
                    session=session,
                )

                if table is not None:
                    logic.update_table(id=table.id,
                                       score=d["boost_score"],
                                       session=session)
        return
Ejemplo n.º 10
0
def suggest_tables(metastore_id, prefix, limit=10):
    verify_metastore_permission(metastore_id)

    query = {
        "suggest": {
            "suggest": {
                "text": prefix,
                "completion": {
                    "field": "completion_name",
                    "size": limit,
                    "contexts": {
                        "metastore_id": metastore_id
                    },
                },
            }
        },
    }

    index_name = ES_CONFIG["tables"]["index_name"]
    type_name = ES_CONFIG["tables"]["type_name"]

    result = None
    try:
        result = get_hosted_es().search(index_name, type_name, body=query)
    except Exception as e:
        LOG.info(e)
    finally:
        if result is None:
            result = {}
    options = next(iter(result.get("suggest", {}).get("suggest", [])),
                   {}).get("options", [])
    texts = [
        "{}.{}".format(
            option.get("_source", {}).get("schema", ""),
            option.get("_source", {}).get("name", ""),
        ) for option in options
    ]
    return {"data": texts}