Python InvalidArgumentValue Examples

Programming Language: Python

Namespace/Package Name: botocore.exceptions

Method/Function: InvalidArgumentValue

Examples at hotexamples.com: 4

Python InvalidArgumentValue - 4 examples found. These are the top rated real world Python examples of botocore.exceptions.InvalidArgumentValue extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def _validate_datetimes(
    last_modified_begin: Optional[datetime.datetime] = None, last_modified_end: Optional[datetime.datetime] = None
) -> None:
    if (last_modified_begin is not None) and (last_modified_begin.tzinfo is None):
        raise exceptions.InvalidArgumentValue("Timezone is not defined for last_modified_begin.")
    if (last_modified_end is not None) and (last_modified_end.tzinfo is None):
        raise exceptions.InvalidArgumentValue("Timezone is not defined for last_modified_end.")
    if (last_modified_begin is not None) and (last_modified_end is not None):
        if last_modified_begin > last_modified_end:
            raise exceptions.InvalidArgumentValue("last_modified_begin is bigger than last_modified_end.")

Example #2

Show file

File: _read.py Project: Westerley/aws-data-wrangler

def _resolve_query_with_cache(
    cache_info: _CacheInfo,
    categories: Optional[List[str]],
    chunksize: Optional[Union[int, bool]],
    use_threads: bool,
    session: Optional[boto3.Session],
) -> Union[pd.DataFrame, Iterator[pd.DataFrame]]:
    """Fetch cached data and return it as a pandas DataFrame (or list of DataFrames)."""
    _logger.debug("cache_info:\n%s", cache_info)
    if cache_info.query_execution_id is None:
        raise RuntimeError("Trying to resolve with cache but w/o any query execution ID.")
    query_metadata: _QueryMetadata = _get_query_metadata(
        query_execution_id=cache_info.query_execution_id,
        boto3_session=session,
        categories=categories,
        query_execution_payload=cache_info.query_execution_payload,
    )
    if cache_info.file_format == "parquet":
        return _fetch_parquet_result(
            query_metadata=query_metadata,
            keep_files=True,
            categories=categories,
            chunksize=chunksize,
            use_threads=use_threads,
            boto3_session=session,
        )
    if cache_info.file_format == "csv":
        return _fetch_csv_result(
            query_metadata=query_metadata,
            keep_files=True,
            chunksize=chunksize,
            use_threads=use_threads,
            boto3_session=session,
        )
    raise exceptions.InvalidArgumentValue(f"Invalid data type: {cache_info.file_format}.")

Example #3

Show file

File: _read.py Project: romankoles/aws-data-wrangler

def _resolve_query_without_cache_ctas(
    sql: str,
    database: Optional[str],
    data_source: Optional[str],
    s3_output: Optional[str],
    keep_files: bool,
    chunksize: Union[int, bool, None],
    categories: Optional[List[str]],
    encryption: Optional[str],
    workgroup: Optional[str],
    kms_key: Optional[str],
    wg_config: _WorkGroupConfig,
    name: Optional[str],
    use_threads: bool,
    s3_additional_kwargs: Optional[Dict[str, Any]],
    boto3_session: boto3.Session,
) -> Union[pd.DataFrame, Iterator[pd.DataFrame]]:
    path: str = f"{s3_output}/{name}"
    ext_location: str = "\n" if wg_config.enforced is True else f",\n    external_location = '{path}'\n"
    sql = (f'CREATE TABLE "{name}"\n'
           f"WITH(\n"
           f"    format = 'Parquet',\n"
           f"    parquet_compression = 'SNAPPY'"
           f"{ext_location}"
           f") AS\n"
           f"{sql}")
    _logger.debug("sql: %s", sql)
    try:
        query_id: str = _start_query_execution(
            sql=sql,
            wg_config=wg_config,
            database=database,
            data_source=data_source,
            s3_output=s3_output,
            workgroup=workgroup,
            encryption=encryption,
            kms_key=kms_key,
            boto3_session=boto3_session,
        )
    except botocore.exceptions.ClientError as ex:
        error: Dict[str, Any] = ex.response["Error"]
        if error[
                "Code"] == "InvalidRequestException" and "Exception parsing query" in error[
                    "Message"]:
            raise exceptions.InvalidCtasApproachQuery(
                "Is not possible to wrap this query into a CTAS statement. Please use ctas_approach=False."
            )
        if error[
                "Code"] == "InvalidRequestException" and "extraneous input" in error[
                    "Message"]:
            raise exceptions.InvalidCtasApproachQuery(
                "Is not possible to wrap this query into a CTAS statement. Please use ctas_approach=False."
            )
        raise ex
    _logger.debug("query_id: %s", query_id)
    try:
        query_metadata: _QueryMetadata = _get_query_metadata(
            query_execution_id=query_id,
            boto3_session=boto3_session,
            categories=categories,
            metadata_cache_manager=_cache_manager,
        )
    except exceptions.QueryFailed as ex:
        msg: str = str(ex)
        if "Column name" in msg and "specified more than once" in msg:
            raise exceptions.InvalidCtasApproachQuery(
                f"Please, define distinct names for your columns OR pass ctas_approach=False. Root error message: {msg}"
            )
        if "Column name not specified" in msg:
            raise exceptions.InvalidArgumentValue(
                "Please, define all columns names in your query. (E.g. 'SELECT MAX(col1) AS max_col1, ...')"
            )
        if "Column type is unknown" in msg:
            raise exceptions.InvalidArgumentValue(
                "Please, don't leave undefined columns types in your query. You can cast to ensure it. "
                "(E.g. 'SELECT CAST(NULL AS INTEGER) AS MY_COL, ...')")
        raise ex
    return _fetch_parquet_result(
        query_metadata=query_metadata,
        keep_files=keep_files,
        categories=categories,
        chunksize=chunksize,
        use_threads=use_threads,
        s3_additional_kwargs=s3_additional_kwargs,
        boto3_session=boto3_session,
    )

Example #4

Show file

File: _utils.py Project: awslabs/aws-data-wrangler

def create_ctas_table(  # pylint: disable=too-many-locals
    sql: str,
    database: str,
    ctas_table: Optional[str] = None,
    ctas_database: Optional[str] = None,
    s3_output: Optional[str] = None,
    storage_format: Optional[str] = None,
    write_compression: Optional[str] = None,
    partitioning_info: Optional[List[str]] = None,
    bucketing_info: Optional[Tuple[List[str], int]] = None,
    field_delimiter: Optional[str] = None,
    schema_only: bool = False,
    workgroup: Optional[str] = None,
    data_source: Optional[str] = None,
    encryption: Optional[str] = None,
    kms_key: Optional[str] = None,
    categories: Optional[List[str]] = None,
    wait: bool = False,
    boto3_session: Optional[boto3.Session] = None,
) -> Dict[str, Union[str, _QueryMetadata]]:
    """Create a new table populated with the results of a SELECT query.

    https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html

    Parameters
    ----------
    sql : str
        SELECT SQL query.
    database : str
        The name of the database where the original table is stored.
    ctas_table : Optional[str], optional
        The name of the CTAS table.
        If None, a name with a random string is used.
    ctas_database : Optional[str], optional
        The name of the alternative database where the CTAS table should be stored.
        If None, `database` is used, that is the CTAS table is stored in the same database as the original table.
    s3_output : Optional[str], optional
        The output Amazon S3 path.
        If None, either the Athena workgroup or client-side location setting is used.
        If a workgroup enforces a query results location, then it overrides this argument.
    storage_format : Optional[str], optional
        The storage format for the CTAS query results, such as ORC, PARQUET, AVRO, JSON, or TEXTFILE.
        PARQUET by default.
    write_compression : Optional[str], optional
        The compression type to use for any storage format that allows compression to be specified.
    partitioning_info : Optional[List[str]], optional
        A list of columns by which the CTAS table will be partitioned.
    bucketing_info : Optional[Tuple[List[str], int]], optional
        Tuple consisting of the column names used for bucketing as the first element and the number of buckets as the
        second element.
        Only `str`, `int` and `bool` are supported as column data types for bucketing.
    field_delimiter : Optional[str], optional
        The single-character field delimiter for files in CSV, TSV, and text files.
    schema_only : bool, optional
        _description_, by default False
    workgroup : Optional[str], optional
        Athena workgroup.
    data_source : Optional[str], optional
        Data Source / Catalog name. If None, 'AwsDataCatalog' is used.
    encryption : str, optional
        Valid values: [None, 'SSE_S3', 'SSE_KMS']. Note: 'CSE_KMS' is not supported.
    kms_key : str, optional
        For SSE-KMS, this is the KMS key ARN or ID.
    categories: List[str], optional
        List of columns names that should be returned as pandas.Categorical.
        Recommended for memory restricted environments.
    wait : bool, default False
        Whether to wait for the query to finish and return a dictionary with the Query metadata.
    boto3_session : Optional[boto3.Session], optional
        Boto3 Session. The default boto3 session is used if boto3_session is None.

    Returns
    -------
    Dict[str, Union[str, _QueryMetadata]]
        A dictionary with the the CTAS database and table names.
        If `wait` is `False`, the query ID is included, otherwise a Query metadata object is added instead.

    Examples
    --------
    Select all into a new table and encrypt the results

    >>> import awswrangler as wr
    >>> wr.athena.create_ctas_table(
    ...     sql="select * from table",
    ...     database="default",
    ...     encryption="SSE_KMS",
    ...     kms_key="1234abcd-12ab-34cd-56ef-1234567890ab",
    ... )
    {'ctas_database': 'default', 'ctas_table': 'temp_table_5669340090094....', 'ctas_query_id': 'cc7dfa81-831d-...'}

    Create a table with schema only

    >>> wr.athena.create_ctas_table(
    ...     sql="select col1, col2 from table",
    ...     database="default",
    ...     ctas_table="my_ctas_table",
    ...     schema_only=True,
    ...     wait=True,
    ... )

    Partition data and save to alternative CTAS database

    >>> wr.athena.create_ctas_table(
    ...     sql="select * from table",
    ...     database="default",
    ...     ctas_database="my_ctas_db",
    ...     storage_format="avro",
    ...     write_compression="snappy",
    ...     partitioning_info=["par0", "par1"],
    ...     wait=True,
    ... )

    """
    ctas_table = catalog.sanitize_table_name(
        ctas_table) if ctas_table else f"temp_table_{uuid.uuid4().hex}"
    ctas_database = ctas_database if ctas_database else database
    fully_qualified_name = f'"{ctas_database}"."{ctas_table}"'

    wg_config: _WorkGroupConfig = _get_workgroup_config(session=boto3_session,
                                                        workgroup=workgroup)
    s3_output = _get_s3_output(s3_output=s3_output,
                               wg_config=wg_config,
                               boto3_session=boto3_session)
    s3_output = s3_output[:-1] if s3_output[-1] == "/" else s3_output
    # If the workgroup enforces an external location, then it overrides the user supplied argument
    external_location_str: str = (
        f"    external_location = '{s3_output}/{ctas_table}',\n" if
        (not wg_config.enforced) and (s3_output) else "")

    # At least one property must be specified within `WITH()` in the query. We default to `PARQUET` for `storage_format`
    storage_format_str: str = f"""    format = '{storage_format.upper() if storage_format else "PARQUET"}'"""
    write_compression_str: str = (
        f"    write_compression = '{write_compression.upper()}',\n"
        if write_compression else "")
    partitioning_str: str = f"    partitioned_by = ARRAY{partitioning_info},\n" if partitioning_info else ""
    bucketing_str: str = (
        f"    bucketed_by = ARRAY{bucketing_info[0]},\n    bucket_count = {bucketing_info[1]},\n"
        if bucketing_info else "")
    field_delimiter_str: str = f"    field_delimiter = '{field_delimiter}',\n" if field_delimiter else ""
    schema_only_str: str = "\nWITH NO DATA" if schema_only else ""

    ctas_sql = (f"CREATE TABLE {fully_qualified_name}\n"
                f"WITH(\n"
                f"{external_location_str}"
                f"{partitioning_str}"
                f"{bucketing_str}"
                f"{field_delimiter_str}"
                f"{write_compression_str}"
                f"{storage_format_str}"
                f")\n"
                f"AS {sql}"
                f"{schema_only_str}")
    _logger.debug("ctas sql: %s", ctas_sql)

    try:
        query_execution_id: str = _start_query_execution(
            sql=ctas_sql,
            wg_config=wg_config,
            database=database,
            data_source=data_source,
            s3_output=s3_output,
            workgroup=workgroup,
            encryption=encryption,
            kms_key=kms_key,
            boto3_session=boto3_session,
        )
    except botocore.exceptions.ClientError as ex:
        error: Dict[str, Any] = ex.response["Error"]
        if error[
                "Code"] == "InvalidRequestException" and "Exception parsing query" in error[
                    "Message"]:
            raise exceptions.InvalidCtasApproachQuery(
                f"It is not possible to wrap this query into a CTAS statement. Root error message: {error['Message']}"
            )
        if error[
                "Code"] == "InvalidRequestException" and "extraneous input" in error[
                    "Message"]:
            raise exceptions.InvalidCtasApproachQuery(
                f"It is not possible to wrap this query into a CTAS statement. Root error message: {error['Message']}"
            )
        raise ex

    response: Dict[str, Union[str, _QueryMetadata]] = {
        "ctas_database": ctas_database,
        "ctas_table": ctas_table
    }
    if wait:
        try:
            response["ctas_query_metadata"] = _get_query_metadata(
                query_execution_id=query_execution_id,
                boto3_session=boto3_session,
                categories=categories,
                metadata_cache_manager=_cache_manager,
            )
        except exceptions.QueryFailed as ex:
            msg: str = str(ex)
            if "Column name" in msg and "specified more than once" in msg:
                raise exceptions.InvalidCtasApproachQuery(
                    f"Please, define distinct names for your columns. Root error message: {msg}"
                )
            if "Column name not specified" in msg:
                raise exceptions.InvalidArgumentValue(
                    "Please, define all columns names in your query. (E.g. 'SELECT MAX(col1) AS max_col1, ...')"
                )
            if "Column type is unknown" in msg:
                raise exceptions.InvalidArgumentValue(
                    "Please, don't leave undefined columns types in your query. You can cast to ensure it. "
                    "(E.g. 'SELECT CAST(NULL AS INTEGER) AS MY_COL, ...')")
            raise ex
    else:
        response["ctas_query_id"] = query_execution_id
    return response