Beispiel #1
0
def get_physical_table_metadata(
    database: Database, table_name: str, schema_name: Optional[str] = None,
) -> List[Dict[str, str]]:
    """Use SQLAlchemy inspector to get table metadata"""
    db_engine_spec = database.db_engine_spec
    db_dialect = database.get_dialect()
    # ensure empty schema
    _schema_name = schema_name if schema_name else None
    # Table does not exist or is not visible to a connection.
    if not database.has_table_by_name(table_name, schema=_schema_name):
        raise NoSuchTableError

    cols = database.get_columns(table_name, schema=_schema_name)
    for col in cols:
        try:
            if isinstance(col["type"], TypeEngine):
                db_type = db_engine_spec.column_datatype_to_string(
                    col["type"], db_dialect
                )
                type_spec = db_engine_spec.get_column_spec(db_type)
                col.update(
                    {
                        "type": db_type,
                        "type_generic": type_spec.generic_type if type_spec else None,
                        "is_dttm": type_spec.is_dttm if type_spec else None,
                    }
                )
        # Broad exception catch, because there are multiple possible exceptions
        # from different drivers that fall outside CompileError
        except Exception:  # pylint: disable=broad-except
            col.update(
                {"type": "UNKNOWN", "generic_type": None, "is_dttm": None,}
            )
    return cols
Beispiel #2
0
def get_table_metadata(database: Database, table_name: str,
                       schema_name: Optional[str]) -> Dict:
    """
        Get table metadata information, including type, pk, fks.
        This function raises SQLAlchemyError when a schema is not found.


    :param database: The database model
    :param table_name: Table name
    :param schema_name: schema name
    :return: Dict table metadata ready for API response
    """
    keys: List = []
    columns = database.get_columns(table_name, schema_name)
    # define comment dict by tsl
    comment_dict = {}
    primary_key = database.get_pk_constraint(table_name, schema_name)
    if primary_key and primary_key.get("constrained_columns"):
        primary_key["column_names"] = primary_key.pop("constrained_columns")
        primary_key["type"] = "pk"
        keys += [primary_key]
    # get dialect name
    dialect_name = database.get_dialect().name
    if isinstance(dialect_name, bytes):
        dialect_name = dialect_name.decode()
    # get column comment, presto & hive
    if dialect_name == "presto" or dialect_name == "hive":
        db_engine_spec = database.db_engine_spec
        sql = ParsedQuery("desc {a}.{b}".format(a=schema_name,
                                                b=table_name)).stripped()
        engine = database.get_sqla_engine(schema_name)
        conn = engine.raw_connection()
        cursor = conn.cursor()
        query = Query()
        session = Session(bind=engine)
        query.executed_sql = sql
        query.__tablename__ = table_name
        session.commit()
        db_engine_spec.execute(cursor, sql, async_=False)
        data = db_engine_spec.fetch_data(cursor, query.limit)
        # parse list data into dict by tsl; hive and presto is different
        if dialect_name == "presto":
            for d in data:
                d[3]
                comment_dict[d[0]] = d[3]
        else:
            for d in data:
                d[2]
                comment_dict[d[0]] = d[2]
        conn.commit()

    foreign_keys = get_foreign_keys_metadata(database, table_name, schema_name)
    indexes = get_indexes_metadata(database, table_name, schema_name)
    keys += foreign_keys + indexes
    payload_columns: List[Dict] = []
    for col in columns:
        dtype = get_col_type(col)
        if len(comment_dict) > 0:
            payload_columns.append({
                "name":
                col["name"],
                "type":
                dtype.split("(")[0] if "(" in dtype else dtype,
                "longType":
                dtype,
                "keys":
                [k for k in keys if col["name"] in k.get("column_names")],
                "comment":
                comment_dict[col["name"]],
            })
        elif dialect_name == "mysql":
            payload_columns.append({
                "name":
                col["name"],
                "type":
                dtype.split("(")[0] if "(" in dtype else dtype,
                "longType":
                dtype,
                "keys":
                [k for k in keys if col["name"] in k.get("column_names")],
                "comment":
                col["comment"],
            })
        else:
            payload_columns.append({
                "name":
                col["name"],
                "type":
                dtype.split("(")[0] if "(" in dtype else dtype,
                "longType":
                dtype,
                "keys":
                [k for k in keys if col["name"] in k.get("column_names")],
                # "comment": col["comment"],
            })
    return {
        "name":
        table_name,
        "columns":
        payload_columns,
        "selectStar":
        database.select_star(
            table_name,
            schema=schema_name,
            show_cols=True,
            indent=True,
            cols=columns,
            latest_partition=True,
        ),
        "primaryKey":
        primary_key,
        "foreignKeys":
        foreign_keys,
        "indexes":
        keys,
    }