def create_database(
    name: str,
    description: Optional[str] = None,
    catalog_id: Optional[str] = None,
    exist_ok: bool = False,
    boto3_session: Optional[boto3.Session] = None,
) -> None:
    """Create a database in AWS Glue Catalog.

    Parameters
    ----------
    name : str
        Database name.
    description : str, optional
        A Descrption for the Database.
    catalog_id : str, optional
        The ID of the Data Catalog from which to retrieve Databases.
        If none is provided, the AWS account ID is used by default.
    exist_ok : bool
        If set to True will not raise an Exception if a Database with the same already exists.
        In this case the description will be updated if it is different from the current one.
    boto3_session : boto3.Session(), optional
        Boto3 Session. The default boto3 session will be used if boto3_session receive None.

    Returns
    -------
    None
        None.

    Examples
    --------
    >>> import awswrangler as wr
    >>> wr.catalog.create_database(
    ...     name='awswrangler_test'
    ... )
    """
    client_glue: boto3.client = _utils.client(service_name="glue",
                                              session=boto3_session)
    args: Dict[str, str] = {"Name": name}
    if description is not None:
        args["Description"] = description

    try:
        r = client_glue.get_database(Name=name)
        if not exist_ok:
            raise exceptions.AlreadyExists(
                f"Database {name} already exists and <exist_ok> is set to False."
            )
        if description and description != r["Database"].get("Description", ""):
            client_glue.update_database(**_catalog_id(
                catalog_id=catalog_id, Name=name, DatabaseInput=args))
    except client_glue.exceptions.EntityNotFoundException:
        client_glue.create_database(
            **_catalog_id(catalog_id=catalog_id, DatabaseInput=args))
def _update_table_objects(
    catalog_id: Optional[str],
    database: str,
    table: str,
    transaction_id: str,
    boto3_session: Optional[boto3.Session],
    add_objects: Optional[List[Dict[str, Any]]] = None,
    del_objects: Optional[List[Dict[str, Any]]] = None,
) -> None:
    """Register Governed Table Objects changes to Lake Formation Engine."""
    session: boto3.Session = _utils.ensure_session(session=boto3_session)
    client_lakeformation: boto3.client = _utils.client(
        service_name="lakeformation", session=session)

    update_kwargs: Dict[str,
                        Union[str, int,
                              List[Dict[str, Dict[str, Any]]]]] = _catalog_id(
                                  catalog_id=catalog_id,
                                  **_transaction_id(
                                      transaction_id=transaction_id,
                                      DatabaseName=database,
                                      TableName=table))

    write_operations: List[Dict[str, Dict[str, Any]]] = []
    if add_objects:
        write_operations.extend({"AddObject": obj} for obj in add_objects)
    if del_objects:
        write_operations.extend({"DeleteObject": _without_keys(obj, ["Size"])}
                                for obj in del_objects)
    update_kwargs["WriteOperations"] = write_operations

    client_lakeformation.update_table_objects(**update_kwargs)
def _overwrite_table(
    client_glue: boto3.client,
    catalog_id: Optional[str],
    database: str,
    table: str,
    table_input: Dict[str, Any],
    transaction_id: Optional[str],
    boto3_session: boto3.Session,
) -> None:
    delete_table_if_exists(
        database=database,
        table=table,
        transaction_id=transaction_id,
        boto3_session=boto3_session,
        catalog_id=catalog_id,
    )
    args: Dict[str, Any] = _catalog_id(
        catalog_id=catalog_id,
        **_transaction_id(
            transaction_id=transaction_id,
            DatabaseName=database,
            TableInput=table_input,
        ),
    )
    client_glue.create_table(**args)
def get_table_number_of_versions(
    database: str, table: str, catalog_id: Optional[str] = None, boto3_session: Optional[boto3.Session] = None
) -> int:
    """Get tatal number of versions.

    Parameters
    ----------
    database : str
        Database name.
    table : str
        Table name.
    catalog_id : str, optional
        The ID of the Data Catalog from which to retrieve Databases.
        If none is provided, the AWS account ID is used by default.
    boto3_session : boto3.Session(), optional
        Boto3 Session. The default boto3 session will be used if boto3_session receive None.

    Returns
    -------
    int
        Total number of versions.

    Examples
    --------
    >>> import awswrangler as wr
    >>> num = wr.catalog.get_table_number_of_versions(database="...", table="...")

    """
    client_glue: boto3.client = _utils.client(service_name="glue", session=boto3_session)
    paginator = client_glue.get_paginator("get_table_versions")
    count: int = 0
    response_iterator = paginator.paginate(**_catalog_id(DatabaseName=database, TableName=table, catalog_id=catalog_id))
    for page in response_iterator:
        count += len(page["TableVersions"])
    return count
Beispiel #5
0
def _add_partitions(
    database: str,
    table: str,
    boto3_session: Optional[boto3.Session],
    inputs: List[Dict[str, Any]],
    catalog_id: Optional[str] = None,
) -> None:
    chunks: List[List[Dict[str, Any]]] = _utils.chunkify(lst=inputs,
                                                         max_length=100)
    client_glue: boto3.client = _utils.client(service_name="glue",
                                              session=boto3_session)
    for chunk in chunks:  # pylint: disable=too-many-nested-blocks
        res: Dict[str, Any] = client_glue.batch_create_partition(
            **_catalog_id(catalog_id=catalog_id,
                          DatabaseName=database,
                          TableName=table,
                          PartitionInputList=chunk))
        if ("Errors" in res) and res["Errors"]:
            for error in res["Errors"]:
                if "ErrorDetail" in error:
                    if "ErrorCode" in error["ErrorDetail"]:
                        if error["ErrorDetail"][
                                "ErrorCode"] != "AlreadyExistsException":
                            raise exceptions.ServiceApiError(str(
                                res["Errors"]))
def get_databases(
    catalog_id: Optional[str] = None, boto3_session: Optional[boto3.Session] = None
) -> Iterator[Dict[str, Any]]:
    """Get an iterator of databases.

    Parameters
    ----------
    catalog_id : str, optional
        The ID of the Data Catalog from which to retrieve Databases.
        If none is provided, the AWS account ID is used by default.
    boto3_session : boto3.Session(), optional
        Boto3 Session. The default boto3 session will be used if boto3_session receive None.

    Returns
    -------
    Iterator[Dict[str, Any]]
        Iterator of Databases.

    Examples
    --------
    >>> import awswrangler as wr
    >>> dbs = wr.catalog.get_databases()

    """
    client_glue: boto3.client = _utils.client(service_name="glue", session=boto3_session)
    paginator = client_glue.get_paginator("get_databases")
    response_iterator = paginator.paginate(**_catalog_id(catalog_id=catalog_id))
    for page in response_iterator:
        for db in page["DatabaseList"]:
            yield db
Beispiel #7
0
def get_table_description(
    database: str, table: str, catalog_id: Optional[str] = None, boto3_session: Optional[boto3.Session] = None
) -> Optional[str]:
    """Get table description.

    Parameters
    ----------
    database : str
        Database name.
    table : str
        Table name.
    catalog_id : str, optional
        The ID of the Data Catalog from which to retrieve Databases.
        If none is provided, the AWS account ID is used by default.
    boto3_session : boto3.Session(), optional
        Boto3 Session. The default boto3 session will be used if boto3_session receive None.

    Returns
    -------
    Optional[str]
        Description if exists.

    Examples
    --------
    >>> import awswrangler as wr
    >>> desc = wr.catalog.get_table_description(database="...", table="...")

    """
    client_glue: boto3.client = _utils.client(service_name="glue", session=boto3_session)
    response: Dict[str, Any] = client_glue.get_table(
        **_catalog_id(catalog_id=catalog_id, DatabaseName=database, Name=table)
    )
    desc: Optional[str] = response["Table"].get("Description", None)
    return desc
Beispiel #8
0
def delete_database(name: str,
                    catalog_id: Optional[str] = None,
                    boto3_session: Optional[boto3.Session] = None) -> None:
    """Create a database in AWS Glue Catalog.

    Parameters
    ----------
    name : str
        Database name.
    catalog_id : str, optional
        The ID of the Data Catalog from which to retrieve Databases.
        If none is provided, the AWS account ID is used by default.
    boto3_session : boto3.Session(), optional
        Boto3 Session. The default boto3 session will be used if boto3_session receive None.

    Returns
    -------
    None
        None.

    Examples
    --------
    >>> import awswrangler as wr
    >>> wr.catalog.delete_database(
    ...     name='awswrangler_test'
    ... )
    """
    client_glue: boto3.client = _utils.client(service_name="glue",
                                              session=boto3_session)
    client_glue.delete_database(
        **_catalog_id(Name=name, catalog_id=catalog_id))
def _get_partitions(
    database: str,
    table: str,
    expression: Optional[str] = None,
    catalog_id: Optional[str] = None,
    boto3_session: Optional[boto3.Session] = None,
) -> Dict[str, List[str]]:
    client_glue: boto3.client = _utils.client(service_name="glue", session=boto3_session)

    args: Dict[str, Any] = _catalog_id(
        catalog_id=catalog_id,
        DatabaseName=database,
        TableName=table,
        MaxResults=1_000,
        Segment={"SegmentNumber": 0, "TotalSegments": 1},
    )
    if expression is not None:
        args["Expression"] = expression

    partitions_values: Dict[str, List[str]] = {}
    _logger.debug("Starting pagination...")

    response: Dict[str, Any] = client_glue.get_partitions(**args)
    token: Optional[str] = _append_partitions(partitions_values=partitions_values, response=response)
    while token is not None:
        args["NextToken"] = response["NextToken"]
        response = client_glue.get_partitions(**args)
        token = _append_partitions(partitions_values=partitions_values, response=response)

    _logger.debug("Pagination done.")
    return partitions_values
def _get_table_input(
    database: str,
    table: str,
    boto3_session: Optional[boto3.Session],
    transaction_id: Optional[str] = None,
    catalog_id: Optional[str] = None,
) -> Optional[Dict[str, Any]]:
    client_glue: boto3.client = _utils.client(service_name="glue", session=boto3_session)
    args: Dict[str, Any] = _catalog_id(
        catalog_id=catalog_id, **_transaction_id(transaction_id=transaction_id, DatabaseName=database, Name=table)
    )
    try:
        response: Dict[str, Any] = client_glue.get_table(**args)
    except client_glue.exceptions.EntityNotFoundException:
        return None
    table_input: Dict[str, Any] = {}
    for k, v in response["Table"].items():
        if k in [
            "Name",
            "Description",
            "Owner",
            "LastAccessTime",
            "LastAnalyzedTime",
            "Retention",
            "StorageDescriptor",
            "PartitionKeys",
            "ViewOriginalText",
            "ViewExpandedText",
            "TableType",
            "Parameters",
            "TargetTable",
        ]:
            table_input[k] = v
    return table_input
def get_columns_comments(
    database: str,
    table: str,
    transaction_id: Optional[str] = None,
    query_as_of_time: Optional[str] = None,
    catalog_id: Optional[str] = None,
    boto3_session: Optional[boto3.Session] = None,
) -> Dict[str, str]:
    """Get all columns comments.

    Note
    ----
    If reading from a governed table, pass only one of `transaction_id` or `query_as_of_time`.

    Parameters
    ----------
    database : str
        Database name.
    table : str
        Table name.
    transaction_id: str, optional
        The ID of the transaction (i.e. used with GOVERNED tables).
    query_as_of_time: str, optional
        The time as of when to read the table contents. Must be a valid Unix epoch timestamp.
        Cannot be specified alongside transaction_id.
    catalog_id : str, optional
        The ID of the Data Catalog from which to retrieve Databases.
        If none is provided, the AWS account ID is used by default.
    boto3_session : boto3.Session(), optional
        Boto3 Session. The default boto3 session will be used if boto3_session receive None.

    Returns
    -------
    Dict[str, str]
        Columns comments. e.g. {"col1": "foo boo bar"}.

    Examples
    --------
    >>> import awswrangler as wr
    >>> pars = wr.catalog.get_columns_comments(database="...", table="...")

    """
    client_glue: boto3.client = _utils.client(service_name="glue", session=boto3_session)
    response: Dict[str, Any] = client_glue.get_table(
        **_catalog_id(
            catalog_id=catalog_id,
            **_transaction_id(
                transaction_id=transaction_id, query_as_of_time=query_as_of_time, DatabaseName=database, Name=table
            ),
        )
    )
    comments: Dict[str, str] = {}
    for c in response["Table"]["StorageDescriptor"]["Columns"]:
        comments[c["Name"]] = c.get("Comment")
    if "PartitionKeys" in response["Table"]:
        for p in response["Table"]["PartitionKeys"]:
            comments[p["Name"]] = p.get("Comment")
    return comments
Beispiel #12
0
def table(database: str,
          table: str,
          catalog_id: Optional[str] = None,
          boto3_session: Optional[boto3.Session] = None) -> pd.DataFrame:
    """Get table details as Pandas DataFrame.

    Parameters
    ----------
    database : str
        Database name.
    table : str
        Table name.
    catalog_id : str, optional
        The ID of the Data Catalog from which to retrieve Databases.
        If none is provided, the AWS account ID is used by default.
    boto3_session : boto3.Session(), optional
        Boto3 Session. The default boto3 session will be used if boto3_session receive None.

    Returns
    -------
    pandas.DataFrame
        Pandas DataFrame filled by formatted infos.

    Examples
    --------
    >>> import awswrangler as wr
    >>> df_table = wr.catalog.table(database='default', table='my_table')

    """
    client_glue: boto3.client = _utils.client(service_name="glue",
                                              session=boto3_session)
    tbl = client_glue.get_table(**_catalog_id(
        catalog_id=catalog_id, DatabaseName=database, Name=table))["Table"]
    df_dict: Dict[str, List[Union[str, bool]]] = {
        "Column Name": [],
        "Type": [],
        "Partition": [],
        "Comment": []
    }
    for col in tbl["StorageDescriptor"]["Columns"]:
        df_dict["Column Name"].append(col["Name"])
        df_dict["Type"].append(col["Type"])
        df_dict["Partition"].append(False)
        if "Comment" in col:
            df_dict["Comment"].append(col["Comment"])
        else:
            df_dict["Comment"].append("")
    if "PartitionKeys" in tbl:
        for col in tbl["PartitionKeys"]:
            df_dict["Column Name"].append(col["Name"])
            df_dict["Type"].append(col["Type"])
            df_dict["Partition"].append(True)
            if "Comment" in col:
                df_dict["Comment"].append(col["Comment"])
            else:
                df_dict["Comment"].append("")
    return pd.DataFrame(data=df_dict)
def get_table_types(
    database: str,
    table: str,
    transaction_id: Optional[str] = None,
    query_as_of_time: Optional[str] = None,
    catalog_id: Optional[str] = None,
    boto3_session: Optional[boto3.Session] = None,
) -> Optional[Dict[str, str]]:
    """Get all columns and types from a table.

    Note
    ----
    If reading from a governed table, pass only one of `transaction_id` or `query_as_of_time`.

    Parameters
    ----------
    database: str
        Database name.
    table: str
        Table name.
    transaction_id: str, optional
        The ID of the transaction (i.e. used with GOVERNED tables).
    query_as_of_time: str, optional
        The time as of when to read the table contents. Must be a valid Unix epoch timestamp.
        Cannot be specified alongside transaction_id.
    catalog_id: str, optional
        The ID of the Data Catalog from which to retrieve Databases.
        If none is provided, the AWS account ID is used by default.
    boto3_session: boto3.Session(), optional
        Boto3 Session. The default boto3 session will be used if boto3_session receive None.

    Returns
    -------
    Optional[Dict[str, str]]
        If table exists, a dictionary like {'col name': 'col data type'}. Otherwise None.

    Examples
    --------
    >>> import awswrangler as wr
    >>> wr.catalog.get_table_types(database='default', table='my_table')
    {'col0': 'int', 'col1': double}

    """
    client_glue: boto3.client = _utils.client(service_name="glue", session=boto3_session)
    try:
        response: Dict[str, Any] = client_glue.get_table(
            **_catalog_id(
                catalog_id=catalog_id,
                **_transaction_id(
                    transaction_id=transaction_id, query_as_of_time=query_as_of_time, DatabaseName=database, Name=table
                ),
            )
        )
    except client_glue.exceptions.EntityNotFoundException:
        return None
    return _extract_dtypes_from_table_details(response=response)
def get_table_location(
    database: str,
    table: str,
    transaction_id: Optional[str] = None,
    query_as_of_time: Optional[str] = None,
    catalog_id: Optional[str] = None,
    boto3_session: Optional[boto3.Session] = None,
) -> str:
    """Get table's location on Glue catalog.

    Note
    ----
    If reading from a governed table, pass only one of `transaction_id` or `query_as_of_time`.

    Parameters
    ----------
    database: str
        Database name.
    table: str
        Table name.
    transaction_id: str, optional
        The ID of the transaction (i.e. used with GOVERNED tables).
    query_as_of_time: str, optional
        The time as of when to read the table contents. Must be a valid Unix epoch timestamp.
        Cannot be specified alongside transaction_id.
    catalog_id: str, optional
        The ID of the Data Catalog from which to retrieve Databases.
        If none is provided, the AWS account ID is used by default.
    boto3_session: boto3.Session(), optional
        Boto3 Session. The default boto3 session will be used if boto3_session receive None.

    Returns
    -------
    str
        Table's location.

    Examples
    --------
    >>> import awswrangler as wr
    >>> wr.catalog.get_table_location(database='default', table='my_table')
    's3://bucket/prefix/'

    """
    client_glue: boto3.client = _utils.client(service_name="glue", session=boto3_session)
    res: Dict[str, Any] = client_glue.get_table(
        **_catalog_id(
            catalog_id=catalog_id,
            **_transaction_id(
                transaction_id=transaction_id, query_as_of_time=query_as_of_time, DatabaseName=database, Name=table
            ),
        )
    )
    try:
        return cast(str, res["Table"]["StorageDescriptor"]["Location"])
    except KeyError as ex:
        raise exceptions.InvalidTable(f"{database}.{table}") from ex
def get_table_description(
    database: str,
    table: str,
    transaction_id: Optional[str] = None,
    query_as_of_time: Optional[str] = None,
    catalog_id: Optional[str] = None,
    boto3_session: Optional[boto3.Session] = None,
) -> Optional[str]:
    """Get table description.

    Note
    ----
    If reading from a governed table, pass only one of `transaction_id` or `query_as_of_time`.

    Parameters
    ----------
    database : str
        Database name.
    table : str
        Table name.
    transaction_id: str, optional
        The ID of the transaction (i.e. used with GOVERNED tables).
    query_as_of_time: str, optional
        The time as of when to read the table contents. Must be a valid Unix epoch timestamp.
        Cannot be specified alongside transaction_id.
    catalog_id : str, optional
        The ID of the Data Catalog from which to retrieve Databases.
        If none is provided, the AWS account ID is used by default.
    boto3_session : boto3.Session(), optional
        Boto3 Session. The default boto3 session will be used if boto3_session receive None.

    Returns
    -------
    Optional[str]
        Description if exists.

    Examples
    --------
    >>> import awswrangler as wr
    >>> desc = wr.catalog.get_table_description(database="...", table="...")

    """
    client_glue: boto3.client = _utils.client(service_name="glue", session=boto3_session)
    response: Dict[str, Any] = client_glue.get_table(
        **_catalog_id(
            catalog_id=catalog_id,
            **_transaction_id(
                transaction_id=transaction_id, query_as_of_time=query_as_of_time, DatabaseName=database, Name=table
            ),
        )
    )
    desc: Optional[str] = response["Table"].get("Description", None)
    return desc
Beispiel #16
0
def delete_column(
    database: str,
    table: str,
    column_name: str,
    boto3_session: Optional[boto3.Session] = None,
    catalog_id: Optional[str] = None,
) -> None:
    """Delete a column in a AWS Glue Catalog table.

    Parameters
    ----------
    database : str
        Database name.
    table : str
        Table name.
    column_name : str
        Column name
    boto3_session : boto3.Session(), optional
        Boto3 Session. The default boto3 session will be used if boto3_session receive None.
    catalog_id : str, optional
        The ID of the Data Catalog from which to retrieve Databases.
        If none is provided, the AWS account ID is used by default.

    Returns
    -------
    None
        None

    Examples
    --------
    >>> import awswrangler as wr
    >>> wr.catalog.delete_column(
    ...     database='my_db',
    ...     table='my_table',
    ...     column_name='my_col',
    ... )
    """
    client_glue: boto3.client = _utils.client(service_name="glue",
                                              session=boto3_session)
    table_res: Dict[str, Any] = client_glue.get_table(DatabaseName=database,
                                                      Name=table)
    table_input: Dict[str, Any] = _update_table_definition(table_res)
    table_input["StorageDescriptor"]["Columns"] = [
        i for i in table_input["StorageDescriptor"]["Columns"]
        if i["Name"] != column_name
    ]
    res: Dict[str, Any] = client_glue.update_table(**_catalog_id(
        catalog_id=catalog_id, DatabaseName=database, TableInput=table_input))
    if ("Errors" in res) and res["Errors"]:
        for error in res["Errors"]:
            if "ErrorDetail" in error:
                if "ErrorCode" in error["ErrorDetail"]:
                    raise exceptions.ServiceApiError(str(res["Errors"]))
Beispiel #17
0
def delete_partitions(
    table: str,
    database: str,
    partitions_values: List[List[str]],
    catalog_id: Optional[str] = None,
    boto3_session: Optional[boto3.Session] = None,
) -> None:
    """Delete specified partitions in a AWS Glue Catalog table.

    Parameters
    ----------
    table : str
        Table name.
    database : str
        Table name.
    catalog_id : str, optional
        The ID of the Data Catalog from which to retrieve Databases.
        If none is provided, the AWS account ID is used by default.
    partitions_values : List[List[str]]
        List of lists of partitions values as strings.
        (e.g. [['2020', '10', '25'], ['2020', '11', '16'], ['2020', '12', '19']]).
    boto3_session : boto3.Session(), optional
        Boto3 Session. The default boto3 session will be used if boto3_session receive None.

    Returns
    -------
    None
        None.

    Examples
    --------
    >>> import awswrangler as wr
    >>> wr.catalog.delete_partitions(
    ...     table='my_table',
    ...     database='awswrangler_test',
    ...     partitions_values=[['2020', '10', '25'], ['2020', '11', '16'], ['2020', '12', '19']]
    ... )
    """
    client_glue: boto3.client = _utils.client(service_name="glue",
                                              session=boto3_session)
    chunks: List[List[List[str]]] = _utils.chunkify(lst=partitions_values,
                                                    max_length=25)
    for chunk in chunks:
        client_glue.batch_delete_partition(**_catalog_id(
            catalog_id=catalog_id,
            DatabaseName=database,
            TableName=table,
            PartitionsToDelete=[{
                "Values": v
            } for v in chunk],
        ))
Beispiel #18
0
def delete_table_if_exists(
    database: str,
    table: str,
    transaction_id: Optional[str] = None,
    catalog_id: Optional[str] = None,
    boto3_session: Optional[boto3.Session] = None,
) -> bool:
    """Delete Glue table if exists.

    Parameters
    ----------
    database : str
        Database name.
    table : str
        Table name.
    transaction_id: str, optional
        The ID of the transaction (i.e. used with GOVERNED tables).
    catalog_id : str, optional
        The ID of the Data Catalog from which to retrieve Databases.
        If none is provided, the AWS account ID is used by default.
    boto3_session : boto3.Session(), optional
        Boto3 Session. The default boto3 session will be used if boto3_session receive None.

    Returns
    -------
    bool
        True if deleted, otherwise False.

    Examples
    --------
    >>> import awswrangler as wr
    >>> wr.catalog.delete_table_if_exists(database='default', table='my_table')  # deleted
    True
    >>> wr.catalog.delete_table_if_exists(database='default', table='my_table')  # Nothing to be deleted
    False

    """
    client_glue: boto3.client = _utils.client(service_name="glue", session=boto3_session)
    try:
        client_glue.delete_table(
            **_catalog_id(
                **_transaction_id(
                    transaction_id=transaction_id, DatabaseName=database, Name=table, catalog_id=catalog_id
                )
            )
        )
        return True
    except client_glue.exceptions.EntityNotFoundException:
        return False
def _overwrite_table_parameters(
    parameters: Dict[str, str],
    database: str,
    catalog_versioning: bool,
    catalog_id: Optional[str],
    table_input: Dict[str, Any],
    boto3_session: Optional[boto3.Session],
) -> Dict[str, str]:
    table_input["Parameters"] = parameters
    client_glue: boto3.client = _utils.client(service_name="glue", session=boto3_session)
    skip_archive: bool = not catalog_versioning
    client_glue.update_table(
        **_catalog_id(catalog_id=catalog_id, DatabaseName=database, TableInput=table_input, SkipArchive=skip_archive)
    )
    return parameters
def _get_table_objects(
    catalog_id: Optional[str],
    database: str,
    table: str,
    transaction_id: str,
    boto3_session: Optional[boto3.Session],
    partition_cols: Optional[List[str]] = None,
    partitions_types: Optional[Dict[str, str]] = None,
    partitions_values: Optional[List[str]] = None,
) -> List[Dict[str, Any]]:
    """Get Governed Table Objects from Lake Formation Engine."""
    session: boto3.Session = _utils.ensure_session(session=boto3_session)
    client_lakeformation: boto3.client = _utils.client(
        service_name="lakeformation", session=session)

    scan_kwargs: Dict[str, Union[str, int]] = _catalog_id(
        catalog_id=catalog_id,
        **_transaction_id(transaction_id=transaction_id,
                          DatabaseName=database,
                          TableName=table,
                          MaxResults=100),
    )
    if partition_cols and partitions_types and partitions_values:
        scan_kwargs["PartitionPredicate"] = _build_partition_predicate(
            partition_cols=partition_cols,
            partitions_types=partitions_types,
            partitions_values=partitions_values)

    next_token: str = "init_token"  # Dummy token
    table_objects: List[Dict[str, Any]] = []
    while next_token:
        response = _utils.try_it(
            f=client_lakeformation.get_table_objects,
            ex=botocore.exceptions.ClientError,
            ex_code="ResourceNotReadyException",
            base=1.0,
            max_num_tries=5,
            **scan_kwargs,
        )
        for objects in response["Objects"]:
            for table_object in objects["Objects"]:
                if objects["PartitionValues"]:
                    table_object["PartitionValues"] = objects[
                        "PartitionValues"]
                table_objects.append(table_object)
        next_token = response.get("NextToken", None)
        scan_kwargs["NextToken"] = next_token
    return table_objects
Beispiel #21
0
def get_connection(
        name: str,
        catalog_id: Optional[str] = None,
        boto3_session: Optional[boto3.Session] = None) -> Dict[str, Any]:
    """Get Glue connection details.

    Parameters
    ----------
    name : str
        Connection name.
    catalog_id : str, optional
        The ID of the Data Catalog from which to retrieve Databases.
        If none is provided, the AWS account ID is used by default.
    boto3_session : boto3.Session(), optional
        Boto3 Session. The default boto3 session will be used if boto3_session receive None.

    Returns
    -------
    Dict[str, Any]
        API Response for:
        https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/glue.html#Glue.Client.get_connection

    Examples
    --------
    >>> import awswrangler as wr
    >>> res = wr.catalog.get_connection(name='my_connection')

    """
    client_glue: boto3.client = _utils.client(service_name="glue",
                                              session=boto3_session)

    res = _utils.try_it(
        f=client_glue.get_connection,
        ex=botocore.exceptions.ClientError,
        ex_code="ThrottlingException",
        max_num_tries=3,
        **_catalog_id(catalog_id=catalog_id, Name=name, HidePassword=False),
    )["Connection"]

    if "ENCRYPTED_PASSWORD" in res["ConnectionProperties"]:
        client_kms = _utils.client(service_name="kms", session=boto3_session)
        pwd = client_kms.decrypt(CiphertextBlob=base64.b64decode(
            res["ConnectionProperties"]
            ["ENCRYPTED_PASSWORD"]))["Plaintext"].decode("utf-8")
        res["ConnectionProperties"]["PASSWORD"] = pwd
    return cast(Dict[str, Any], res)
Beispiel #22
0
def search_tables(
        text: str,
        catalog_id: Optional[str] = None,
        boto3_session: Optional[boto3.Session] = None
) -> Iterator[Dict[str, Any]]:
    """Get Pandas DataFrame of tables filtered by a search string.

    Note
    ----
    Search feature is not supported for Governed tables.

    Parameters
    ----------
    text : str, optional
        Select only tables with the given string in table's properties.
    catalog_id : str, optional
        The ID of the Data Catalog from which to retrieve Databases.
        If none is provided, the AWS account ID is used by default.
    boto3_session : boto3.Session(), optional
        Boto3 Session. The default boto3 session will be used if boto3_session receive None.

    Returns
    -------
    Iterator[Dict[str, Any]]
        Iterator of tables.

    Examples
    --------
    >>> import awswrangler as wr
    >>> df_tables = wr.catalog.search_tables(text='my_property')

    """
    client_glue: boto3.client = _utils.client(service_name="glue",
                                              session=boto3_session)
    args: Dict[str, Any] = _catalog_id(catalog_id=catalog_id, SearchText=text)
    response: Dict[str, Any] = client_glue.search_tables(**args)
    for tbl in response["TableList"]:
        yield tbl
    while "NextToken" in response:
        args["NextToken"] = response["NextToken"]
        response = client_glue.search_tables(**args)
        for tbl in response["TableList"]:
            yield tbl
Beispiel #23
0
def get_table_versions(
        database: str,
        table: str,
        catalog_id: Optional[str] = None,
        boto3_session: Optional[boto3.Session] = None) -> List[Dict[str, Any]]:
    """Get all versions.

    Parameters
    ----------
    database : str
        Database name.
    table : str
        Table name.
    catalog_id : str, optional
        The ID of the Data Catalog from which to retrieve Databases.
        If none is provided, the AWS account ID is used by default.
    boto3_session : boto3.Session(), optional
        Boto3 Session. The default boto3 session will be used if boto3_session receive None.

    Returns
    -------
    List[Dict[str, Any]
        List of table inputs:
        https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/glue.html#Glue.Client.get_table_versions

    Examples
    --------
    >>> import awswrangler as wr
    >>> tables_versions = wr.catalog.get_table_versions(database="...", table="...")

    """
    client_glue: boto3.client = _utils.client(service_name="glue",
                                              session=boto3_session)
    paginator = client_glue.get_paginator("get_table_versions")
    versions: List[Dict[str, Any]] = []
    response_iterator = paginator.paginate(**_catalog_id(
        DatabaseName=database, TableName=table, catalog_id=catalog_id))
    for page in response_iterator:
        for tbl in page["TableVersions"]:
            versions.append(tbl)
    return versions
Beispiel #24
0
def get_columns_comments(
        database: str,
        table: str,
        catalog_id: Optional[str] = None,
        boto3_session: Optional[boto3.Session] = None) -> Dict[str, str]:
    """Get all columns comments.

    Parameters
    ----------
    database : str
        Database name.
    table : str
        Table name.
    catalog_id : str, optional
        The ID of the Data Catalog from which to retrieve Databases.
        If none is provided, the AWS account ID is used by default.
    boto3_session : boto3.Session(), optional
        Boto3 Session. The default boto3 session will be used if boto3_session receive None.

    Returns
    -------
    Dict[str, str]
        Columns comments. e.g. {"col1": "foo boo bar"}.

    Examples
    --------
    >>> import awswrangler as wr
    >>> pars = wr.catalog.get_table_parameters(database="...", table="...")

    """
    client_glue: boto3.client = _utils.client(service_name="glue",
                                              session=boto3_session)
    response: Dict[str, Any] = client_glue.get_table(**_catalog_id(
        catalog_id=catalog_id, DatabaseName=database, Name=table))
    comments: Dict[str, str] = {}
    for c in response["Table"]["StorageDescriptor"]["Columns"]:
        comments[c["Name"]] = c["Comment"]
    if "PartitionKeys" in response["Table"]:
        for p in response["Table"]["PartitionKeys"]:
            comments[p["Name"]] = p["Comment"]
    return comments
Beispiel #25
0
def get_connection(
        name: str,
        catalog_id: Optional[str] = None,
        boto3_session: Optional[boto3.Session] = None) -> Dict[str, Any]:
    """Get Glue connection details.

    Parameters
    ----------
    name : str
        Connection name.
    catalog_id : str, optional
        The ID of the Data Catalog from which to retrieve Databases.
        If none is provided, the AWS account ID is used by default.
    boto3_session : boto3.Session(), optional
        Boto3 Session. The default boto3 session will be used if boto3_session receive None.

    Returns
    -------
    Dict[str, Any]
        API Response for:
        https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/glue.html#Glue.Client.get_connection

    Examples
    --------
    >>> import awswrangler as wr
    >>> res = wr.catalog.get_connection(name='my_connection')

    """
    client_glue: boto3.client = _utils.client(service_name="glue",
                                              session=boto3_session)
    return cast(
        Dict[str, Any],
        client_glue.get_connection(
            **_catalog_id(catalog_id=catalog_id, Name=name,
                          HidePassword=False))["Connection"],
    )
Beispiel #26
0
def add_column(
    database: str,
    table: str,
    column_name: str,
    column_type: str = "string",
    column_comment: Optional[str] = None,
    transaction_id: Optional[str] = None,
    boto3_session: Optional[boto3.Session] = None,
    catalog_id: Optional[str] = None,
) -> None:
    """Add a column in a AWS Glue Catalog table.

    Parameters
    ----------
    database : str
        Database name.
    table : str
        Table name.
    column_name : str
        Column name
    column_type : str
        Column type.
    column_comment : str
        Column Comment
    transaction_id: str, optional
        The ID of the transaction (i.e. used with GOVERNED tables).
    boto3_session : boto3.Session(), optional
        Boto3 Session. The default boto3 session will be used if boto3_session receive None.
    catalog_id : str, optional
        The ID of the Data Catalog from which to retrieve Databases.
        If none is provided, the AWS account ID is used by default.

    Returns
    -------
    None
        None

    Examples
    --------
    >>> import awswrangler as wr
    >>> wr.catalog.add_column(
    ...     database='my_db',
    ...     table='my_table',
    ...     column_name='my_col',
    ...     column_type='int'
    ... )
    """
    if _check_column_type(column_type):
        client_glue: boto3.client = _utils.client(service_name="glue",
                                                  session=boto3_session)
        table_res: Dict[str, Any] = client_glue.get_table(**_catalog_id(
            catalog_id=catalog_id,
            **_transaction_id(transaction_id=transaction_id,
                              DatabaseName=database,
                              Name=table),
        ))
        table_input: Dict[str, Any] = _update_table_definition(table_res)
        table_input["StorageDescriptor"]["Columns"].append({
            "Name":
            column_name,
            "Type":
            column_type,
            "Comment":
            column_comment
        })
        res: Dict[str, Any] = client_glue.update_table(**_catalog_id(
            catalog_id=catalog_id,
            **_transaction_id(transaction_id=transaction_id,
                              DatabaseName=database,
                              TableInput=table_input),
        ))
        if ("Errors" in res) and res["Errors"]:
            for error in res["Errors"]:
                if "ErrorDetail" in error:
                    if "ErrorCode" in error["ErrorDetail"]:
                        raise exceptions.ServiceApiError(str(res["Errors"]))
Beispiel #27
0
def read_sql_query(
    sql: str,
    database: str,
    transaction_id: Optional[str] = None,
    query_as_of_time: Optional[str] = None,
    catalog_id: Optional[str] = None,
    categories: Optional[List[str]] = None,
    safe: bool = True,
    map_types: bool = True,
    use_threads: bool = True,
    boto3_session: Optional[boto3.Session] = None,
    params: Optional[Dict[str, Any]] = None,
) -> pd.DataFrame:
    """Execute PartiQL query on AWS Glue Table (Transaction ID or time travel timestamp). Return Pandas DataFrame.

    Note
    ----
    ORDER BY operations are not honoured.
    i.e. sql="SELECT * FROM my_table ORDER BY my_column" is NOT valid

    Note
    ----
    The database must NOT be explicitely defined in the PartiQL statement.
    i.e. sql="SELECT * FROM my_table" is valid
    but sql="SELECT * FROM my_db.my_table" is NOT valid

    Note
    ----
    Pass one of `transaction_id` or `query_as_of_time`, not both.

    Parameters
    ----------
    sql : str
        partiQL query.
    database : str
        AWS Glue database name
    transaction_id : str, optional
        The ID of the transaction at which to read the table contents.
        Cannot be specified alongside query_as_of_time
    query_as_of_time : str, optional
        The time as of when to read the table contents. Must be a valid Unix epoch timestamp.
        Cannot be specified alongside transaction_id
    catalog_id : str, optional
        The ID of the Data Catalog from which to retrieve Databases.
        If none is provided, the AWS account ID is used by default.
    categories: Optional[List[str]], optional
        List of columns names that should be returned as pandas.Categorical.
        Recommended for memory restricted environments.
    safe : bool, default True
        For certain data types, a cast is needed in order to store the
        data in a pandas DataFrame or Series (e.g. timestamps are always
        stored as nanoseconds in pandas). This option controls whether it
        is a safe cast or not.
    map_types : bool, default True
        True to convert pyarrow DataTypes to pandas ExtensionDtypes. It is
        used to override the default pandas type for conversion of built-in
        pyarrow types or in absence of pandas_metadata in the Table schema.
    use_threads : bool
        True to enable concurrent requests, False to disable multiple threads.
        When enabled, os.cpu_count() is used as the max number of threads.
    boto3_session : boto3.Session(), optional
        Boto3 Session. The default boto3 session is used if boto3_session receives None.
    params: Dict[str, any], optional
        Dict of parameters used to format the partiQL query. Only named parameters are supported.
        The dict must contain the information in the form {"name": "value"} and the SQL query must contain
        `:name`.

    Returns
    -------
    pd.DataFrame
        Pandas DataFrame.

    Examples
    --------
    >>> import awswrangler as wr
    >>> df = wr.lakeformation.read_sql_query(
    ...     sql="SELECT * FROM my_table;",
    ...     database="my_db",
    ...     catalog_id="111111111111"
    ... )

    >>> import awswrangler as wr
    >>> df = wr.lakeformation.read_sql_query(
    ...     sql="SELECT * FROM my_table LIMIT 10;",
    ...     database="my_db",
    ...     transaction_id="1b62811fa3e02c4e5fdbaa642b752030379c4a8a70da1f8732ce6ccca47afdc9"
    ... )

    >>> import awswrangler as wr
    >>> df = wr.lakeformation.read_sql_query(
    ...     sql="SELECT * FROM my_table WHERE name=:name; AND city=:city;",
    ...     database="my_db",
    ...     query_as_of_time="1611142914",
    ...     params={"name": "'filtered_name'", "city": "'filtered_city'"}
    ... )

    """
    session: boto3.Session = _utils.ensure_session(session=boto3_session)
    client_lakeformation: boto3.client = _utils.client(
        service_name="lakeformation", session=session)
    commit_trans: bool = False
    if params is None:
        params = {}
    for key, value in params.items():
        sql = sql.replace(f":{key};", str(value))

    if not any([transaction_id, query_as_of_time]):
        _logger.debug(
            "Neither `transaction_id` nor `query_as_of_time` were specified, starting transaction"
        )
        transaction_id = start_transaction(read_only=True,
                                           boto3_session=session)
        commit_trans = True
    args: Dict[str, Optional[str]] = _catalog_id(
        catalog_id=catalog_id,
        **_transaction_id(transaction_id=transaction_id,
                          query_as_of_time=query_as_of_time,
                          DatabaseName=database),
    )
    query_id: str = client_lakeformation.start_query_planning(
        QueryString=sql, QueryPlanningContext=args)["QueryId"]
    df = _resolve_sql_query(
        query_id=query_id,
        categories=categories,
        safe=safe,
        map_types=map_types,
        use_threads=use_threads,
        boto3_session=session,
    )
    if commit_trans:
        commit_transaction(transaction_id=transaction_id)  # type: ignore
    return df
def table(
    database: str,
    table: str,
    transaction_id: Optional[str] = None,
    query_as_of_time: Optional[str] = None,
    catalog_id: Optional[str] = None,
    boto3_session: Optional[boto3.Session] = None,
) -> pd.DataFrame:
    """Get table details as Pandas DataFrame.

    Note
    ----
    If reading from a governed table, pass only one of `transaction_id` or `query_as_of_time`.

    Parameters
    ----------
    database: str
        Database name.
    table: str
        Table name.
    transaction_id: str, optional
        The ID of the transaction (i.e. used with GOVERNED tables).
    query_as_of_time: str, optional
        The time as of when to read the table contents. Must be a valid Unix epoch timestamp.
        Cannot be specified alongside transaction_id.
    catalog_id: str, optional
        The ID of the Data Catalog from which to retrieve Databases.
        If none is provided, the AWS account ID is used by default.
    boto3_session: boto3.Session(), optional
        Boto3 Session. The default boto3 session will be used if boto3_session receive None.

    Returns
    -------
    pandas.DataFrame
        Pandas DataFrame filled by formatted infos.

    Examples
    --------
    >>> import awswrangler as wr
    >>> df_table = wr.catalog.table(database='default', table='my_table')

    """
    client_glue: boto3.client = _utils.client(service_name="glue", session=boto3_session)
    tbl = client_glue.get_table(
        **_catalog_id(
            catalog_id=catalog_id,
            **_transaction_id(
                transaction_id=transaction_id, query_as_of_time=query_as_of_time, DatabaseName=database, Name=table
            ),
        )
    )["Table"]
    df_dict: Dict[str, List[Union[str, bool]]] = {"Column Name": [], "Type": [], "Partition": [], "Comment": []}
    if "StorageDescriptor" in tbl:
        for col in tbl["StorageDescriptor"].get("Columns", {}):
            df_dict["Column Name"].append(col["Name"])
            df_dict["Type"].append(col["Type"])
            df_dict["Partition"].append(False)
            if "Comment" in col:
                df_dict["Comment"].append(col["Comment"])
            else:
                df_dict["Comment"].append("")
    if "PartitionKeys" in tbl:
        for col in tbl["PartitionKeys"]:
            df_dict["Column Name"].append(col["Name"])
            df_dict["Type"].append(col["Type"])
            df_dict["Partition"].append(True)
            if "Comment" in col:
                df_dict["Comment"].append(col["Comment"])
            else:
                df_dict["Comment"].append("")
    return pd.DataFrame(data=df_dict)
def get_tables(
    catalog_id: Optional[str] = None,
    database: Optional[str] = None,
    transaction_id: Optional[str] = None,
    name_contains: Optional[str] = None,
    name_prefix: Optional[str] = None,
    name_suffix: Optional[str] = None,
    boto3_session: Optional[boto3.Session] = None,
) -> Iterator[Dict[str, Any]]:
    """Get an iterator of tables.

    Note
    ----
    Please, does not filter using name_contains and name_prefix/name_suffix at the same time.
    Only name_prefix and name_suffix can be combined together.

    Parameters
    ----------
    catalog_id : str, optional
        The ID of the Data Catalog from which to retrieve Databases.
        If none is provided, the AWS account ID is used by default.
    database : str, optional
        Database name.
    transaction_id: str, optional
        The ID of the transaction (i.e. used with GOVERNED tables).
    name_contains : str, optional
        Select by a specific string on table name
    name_prefix : str, optional
        Select by a specific prefix on table name
    name_suffix : str, optional
        Select by a specific suffix on table name
    boto3_session : boto3.Session(), optional
        Boto3 Session. The default boto3 session will be used if boto3_session receive None.

    Returns
    -------
    Iterator[Dict[str, Any]]
        Iterator of tables.

    Examples
    --------
    >>> import awswrangler as wr
    >>> tables = wr.catalog.get_tables()

    """
    client_glue: boto3.client = _utils.client(service_name="glue", session=boto3_session)
    paginator = client_glue.get_paginator("get_tables")
    args: Dict[str, str] = {}
    if (name_prefix is not None) and (name_suffix is not None) and (name_contains is not None):
        raise exceptions.InvalidArgumentCombination(
            "Please, does not filter using name_contains and "
            "name_prefix/name_suffix at the same time. Only "
            "name_prefix and name_suffix can be combined together."
        )
    if (name_prefix is not None) and (name_suffix is not None):
        args["Expression"] = f"{name_prefix}*{name_suffix}"
    elif name_contains is not None:
        args["Expression"] = f"*{name_contains}*"
    elif name_prefix is not None:
        args["Expression"] = f"{name_prefix}*"
    elif name_suffix is not None:
        args["Expression"] = f"*{name_suffix}"
    if database is not None:
        dbs: List[str] = [database]
    else:
        dbs = [x["Name"] for x in get_databases(catalog_id=catalog_id)]
    for db in dbs:
        args["DatabaseName"] = db
        response_iterator = paginator.paginate(
            **_catalog_id(catalog_id=catalog_id, **_transaction_id(transaction_id=transaction_id, **args))
        )
        try:
            for page in response_iterator:
                for tbl in page["TableList"]:
                    yield tbl
        except client_glue.exceptions.EntityNotFoundException:
            continue
Beispiel #30
0
def _create_table(  # pylint: disable=too-many-branches,too-many-statements
    database: str,
    table: str,
    description: Optional[str],
    parameters: Optional[Dict[str, str]],
    mode: str,
    catalog_versioning: bool,
    boto3_session: Optional[boto3.Session],
    table_input: Dict[str, Any],
    table_exist: bool,
    projection_enabled: bool,
    partitions_types: Optional[Dict[str, str]],
    columns_comments: Optional[Dict[str, str]],
    projection_types: Optional[Dict[str, str]],
    projection_ranges: Optional[Dict[str, str]],
    projection_values: Optional[Dict[str, str]],
    projection_intervals: Optional[Dict[str, str]],
    projection_digits: Optional[Dict[str, str]],
    catalog_id: Optional[str],
) -> None:
    # Description
    mode = _update_if_necessary(dic=table_input,
                                key="Description",
                                value=description,
                                mode=mode)

    # Parameters
    parameters = parameters if parameters else {}
    for k, v in parameters.items():
        mode = _update_if_necessary(dic=table_input["Parameters"],
                                    key=k,
                                    value=v,
                                    mode=mode)

    # Projection
    if projection_enabled is True:
        table_input["Parameters"]["projection.enabled"] = "true"
        partitions_types = partitions_types if partitions_types else {}
        projection_types = projection_types if projection_types else {}
        projection_ranges = projection_ranges if projection_ranges else {}
        projection_values = projection_values if projection_values else {}
        projection_intervals = projection_intervals if projection_intervals else {}
        projection_digits = projection_digits if projection_digits else {}
        projection_types = {
            sanitize_column_name(k): v
            for k, v in projection_types.items()
        }
        projection_ranges = {
            sanitize_column_name(k): v
            for k, v in projection_ranges.items()
        }
        projection_values = {
            sanitize_column_name(k): v
            for k, v in projection_values.items()
        }
        projection_intervals = {
            sanitize_column_name(k): v
            for k, v in projection_intervals.items()
        }
        projection_digits = {
            sanitize_column_name(k): v
            for k, v in projection_digits.items()
        }
        for k, v in projection_types.items():
            dtype: Optional[str] = partitions_types.get(k)
            if dtype is None:
                raise exceptions.InvalidArgumentCombination(
                    f"Column {k} appears as projected column but not as partitioned column."
                )
            if dtype == "date":
                table_input["Parameters"][
                    f"projection.{k}.format"] = "yyyy-MM-dd"
            elif dtype == "timestamp":
                table_input["Parameters"][
                    f"projection.{k}.format"] = "yyyy-MM-dd HH:mm:ss"
                table_input["Parameters"][
                    f"projection.{k}.interval.unit"] = "SECONDS"
                table_input["Parameters"][f"projection.{k}.interval"] = "1"
        for k, v in projection_types.items():
            mode = _update_if_necessary(dic=table_input["Parameters"],
                                        key=f"projection.{k}.type",
                                        value=v,
                                        mode=mode)
        for k, v in projection_ranges.items():
            mode = _update_if_necessary(dic=table_input["Parameters"],
                                        key=f"projection.{k}.range",
                                        value=v,
                                        mode=mode)
        for k, v in projection_values.items():
            mode = _update_if_necessary(dic=table_input["Parameters"],
                                        key=f"projection.{k}.values",
                                        value=v,
                                        mode=mode)
        for k, v in projection_intervals.items():
            mode = _update_if_necessary(dic=table_input["Parameters"],
                                        key=f"projection.{k}.interval",
                                        value=str(v),
                                        mode=mode)
        for k, v in projection_digits.items():
            mode = _update_if_necessary(dic=table_input["Parameters"],
                                        key=f"projection.{k}.digits",
                                        value=str(v),
                                        mode=mode)
    else:
        table_input["Parameters"]["projection.enabled"] = "false"

    # Column comments
    columns_comments = columns_comments if columns_comments else {}
    columns_comments = {
        sanitize_column_name(k): v
        for k, v in columns_comments.items()
    }
    if columns_comments:
        for col in table_input["StorageDescriptor"]["Columns"]:
            name: str = col["Name"]
            if name in columns_comments:
                mode = _update_if_necessary(dic=col,
                                            key="Comment",
                                            value=columns_comments[name],
                                            mode=mode)
        for par in table_input["PartitionKeys"]:
            name = par["Name"]
            if name in columns_comments:
                mode = _update_if_necessary(dic=par,
                                            key="Comment",
                                            value=columns_comments[name],
                                            mode=mode)

    _logger.debug("table_input: %s", table_input)

    session: boto3.Session = _utils.ensure_session(session=boto3_session)
    client_glue: boto3.client = _utils.client(service_name="glue",
                                              session=session)
    skip_archive: bool = not catalog_versioning
    if mode not in ("overwrite", "append", "overwrite_partitions", "update"):
        raise exceptions.InvalidArgument(
            f"{mode} is not a valid mode. It must be 'overwrite', 'append' or 'overwrite_partitions'."
        )
    if table_exist is True and mode == "overwrite":
        delete_all_partitions(table=table,
                              database=database,
                              catalog_id=catalog_id,
                              boto3_session=session)
        _logger.debug("Updating table (%s)...", mode)
        client_glue.update_table(**_catalog_id(catalog_id=catalog_id,
                                               DatabaseName=database,
                                               TableInput=table_input,
                                               SkipArchive=skip_archive))
    elif (table_exist is True) and (mode in ("append", "overwrite_partitions",
                                             "update")):
        if mode == "update":
            _logger.debug("Updating table (%s)...", mode)
            client_glue.update_table(**_catalog_id(catalog_id=catalog_id,
                                                   DatabaseName=database,
                                                   TableInput=table_input,
                                                   SkipArchive=skip_archive))
    elif table_exist is False:
        try:
            _logger.debug("Creating table (%s)...", mode)
            client_glue.create_table(**_catalog_id(catalog_id=catalog_id,
                                                   DatabaseName=database,
                                                   TableInput=table_input))
        except client_glue.exceptions.AlreadyExistsException:
            if mode == "overwrite":
                _utils.try_it(
                    f=_overwrite_table,
                    ex=client_glue.exceptions.AlreadyExistsException,
                    client_glue=client_glue,
                    catalog_id=catalog_id,
                    database=database,
                    table=table,
                    table_input=table_input,
                    boto3_session=boto3_session,
                )
    _logger.debug("Leaving table as is (%s)...", mode)