Beispiel #1
0
def delete_all_templates(
        account_id: Optional[str] = None,
        boto3_session: Optional[boto3.Session] = None) -> None:
    """Delete all templates.

    Parameters
    ----------
    account_id : str, optional
        If None, the account ID will be inferred from your boto3 session.
    boto3_session : boto3.Session(), optional
        Boto3 Session. The default boto3 session will be used if boto3_session receive None.

    Returns
    -------
    None
        None.

    Examples
    --------
    >>> import awswrangler as wr
    >>> wr.quicksight.delete_all_templates()
    """
    session: boto3.Session = _utils.ensure_session(session=boto3_session)
    if account_id is None:
        account_id = _utils.get_account_id(boto3_session=session)
    for template in list_templates(account_id=account_id,
                                   boto3_session=session):
        delete_template(template_id=template["TemplateId"],
                        account_id=account_id,
                        boto3_session=session)
Beispiel #2
0
def create_athena_bucket(boto3_session: Optional[boto3.Session] = None) -> str:
    """Create the default Athena bucket if it doesn't exist.

    Parameters
    ----------
    boto3_session : boto3.Session(), optional
        Boto3 Session. The default boto3 session will be used if boto3_session receive None.

    Returns
    -------
    str
        Bucket s3 path (E.g. s3://aws-athena-query-results-ACCOUNT-REGION/)

    Examples
    --------
    >>> import awswrangler as wr
    >>> wr.athena.create_athena_bucket()
    's3://aws-athena-query-results-ACCOUNT-REGION/'

    """
    session: boto3.Session = _utils.ensure_session(session=boto3_session)
    account_id: str = _utils.get_account_id(boto3_session=session)
    region_name: str = str(session.region_name).lower()
    s3_output = f"s3://aws-athena-query-results-{account_id}-{region_name}/"
    s3_resource = session.resource("s3")
    s3_resource.Bucket(s3_output)
    return s3_output
Beispiel #3
0
def create_ingestion(
    dataset_name: Optional[str] = None,
    dataset_id: Optional[str] = None,
    ingestion_id: Optional[str] = None,
    account_id: Optional[str] = None,
    boto3_session: Optional[boto3.Session] = None,
) -> str:
    """Create and starts a new SPICE ingestion on a dataset.

    Note
    ----
    You must pass ``dataset_name`` OR ``dataset_id`` argument.

    Parameters
    ----------
    dataset_name : str, optional
        Dataset name.
    dataset_id : str, optional
        Dataset ID.
    ingestion_id : str, optional
        Ingestion ID.
    account_id : str, optional
        If None, the account ID will be inferred from your boto3 session.
    boto3_session : boto3.Session(), optional
        Boto3 Session. The default boto3 session will be used if boto3_session receive None.

    Returns
    -------
    str
        Ingestion ID

    Examples
    --------
    >>> import awswrangler as wr
    >>> status = wr.quicksight.create_ingestion("my_dataset")
    """
    session: boto3.Session = _utils.ensure_session(session=boto3_session)
    if account_id is None:
        account_id = _utils.get_account_id(boto3_session=session)
    if (dataset_name is None) and (dataset_id is None):
        raise exceptions.InvalidArgument(
            "You must pass a not None dataset_name or dataset_id argument.")
    if (dataset_id is None) and (dataset_name is not None):
        dataset_id = get_dataset_id(name=dataset_name,
                                    account_id=account_id,
                                    boto3_session=session)
    if ingestion_id is None:
        ingestion_id = uuid.uuid4().hex
    client: boto3.client = _utils.client(service_name="quicksight",
                                         session=session)
    response: Dict[str,
                   Any] = client.create_ingestion(DataSetId=dataset_id,
                                                  IngestionId=ingestion_id,
                                                  AwsAccountId=account_id)
    return response["IngestionId"]
Beispiel #4
0
def _delete(func_name: str,
            account_id: Optional[str] = None,
            boto3_session: Optional[boto3.Session] = None,
            **kwargs) -> None:
    session: boto3.Session = _utils.ensure_session(session=boto3_session)
    if account_id is None:
        account_id = _utils.get_account_id(boto3_session=session)
    client: boto3.client = _utils.client(service_name="quicksight",
                                         session=session)
    func: Callable = getattr(client, func_name)
    func(AwsAccountId=account_id, **kwargs)
def describe_ingestion(
    ingestion_id: str = None,
    dataset_name: Optional[str] = None,
    dataset_id: Optional[str] = None,
    account_id: Optional[str] = None,
    boto3_session: Optional[boto3.Session] = None,
) -> Dict[str, Any]:
    """Describe a QuickSight ingestion by ID.

    Note
    ----
    You must pass a not None value for ``dataset_name`` or ``dataset_id`` argument.

    Parameters
    ----------
    ingestion_id : str
        Ingestion ID.
    dataset_name : str, optional
        Dataset name.
    dataset_id : str, optional
        Dataset ID.
    account_id : str, optional
        If None, the account ID will be inferred from your boto3 session.
    boto3_session : boto3.Session(), optional
        Boto3 Session. The default boto3 session will be used if boto3_session receive None.

    Returns
    -------
    Dict[str, Any]
        Ingestion Description.

    Examples
    --------
    >>> import awswrangler as wr
    >>> description = wr.quicksight.describe_dataset(ingestion_id="...", dataset_name="...")
    """
    if (dataset_name is None) and (dataset_id is None):
        raise exceptions.InvalidArgument(
            "You must pass a not None name or dataset_id argument.")
    session: boto3.Session = _utils.ensure_session(session=boto3_session)
    if account_id is None:
        account_id = _utils.get_account_id(boto3_session=session)
    if (dataset_id is None) and (dataset_name is not None):
        dataset_id = get_dataset_id(name=dataset_name,
                                    account_id=account_id,
                                    boto3_session=session)
    client: boto3.client = _utils.client(service_name="quicksight",
                                         session=session)
    return client.describe_ingestion(IngestionId=ingestion_id,
                                     AwsAccountId=account_id,
                                     DataSetId=dataset_id)["Ingestion"]
Beispiel #6
0
def _get_default_logging_path(
    subnet_id: Optional[str] = None,
    account_id: Optional[str] = None,
    region: Optional[str] = None,
    boto3_session: Optional[boto3.Session] = None,
) -> str:
    """Get EMR default logging path.

    E.g. "s3://aws-logs-{account_id}-{region}/elasticmapreduce/"

    Parameters
    ----------
    subnet_id : str, optional
        Subnet ID. If not provided, you must pass `account_id` and `region` explicit.
    account_id: str, optional
        Account ID.
    region: str, optional
        Region e.g. 'us-east-1'
    boto3_session : boto3.Session(), optional
        Boto3 Session. The default boto3 session will be used if boto3_session receive None.

    Returns
    -------
    str
        Default logging path.
        E.g. "s3://aws-logs-{account_id}-{region}/elasticmapreduce/"

    Examples
    --------
    >>> import awswrangler as wr
    >>> state = wr.emr._get_default_logging_path("subnet-id")
    's3://aws-logs-{account_id}-{region}/elasticmapreduce/'

    """
    if account_id is None:
        boto3_session = _utils.ensure_session(session=boto3_session)
        _account_id: str = _utils.get_account_id(boto3_session=boto3_session)
    else:
        _account_id = account_id
    if (region is None) and (subnet_id is not None):
        boto3_session = _utils.ensure_session(session=boto3_session)
        _region: str = _utils.get_region_from_session(
            boto3_session=boto3_session)
    elif (region is None) and (subnet_id is None):
        raise exceptions.InvalidArgumentCombination(
            "You must pass region or subnet_id or both.")
    else:
        _region = region  # type: ignore
    return f"s3://aws-logs-{_account_id}-{_region}/elasticmapreduce/"
def describe_dashboard(
    name: Optional[str] = None,
    dashboard_id: Optional[str] = None,
    account_id: Optional[str] = None,
    boto3_session: Optional[boto3.Session] = None,
) -> Dict[str, Any]:
    """Describe a QuickSight dashboard by name or ID.

    Note
    ----
    You must pass a not None ``name`` or ``dashboard_id`` argument.

    Parameters
    ----------
    name : str, optional
        Dashboard name.
    dashboard_id : str, optional
        Dashboard ID.
    account_id : str, optional
        If None, the account ID will be inferred from your boto3 session.
    boto3_session : boto3.Session(), optional
        Boto3 Session. The default boto3 session will be used if boto3_session receive None.

    Returns
    -------
    Dict[str, Any]
        Dashboad Description.

    Examples
    --------
    >>> import awswrangler as wr
    >>> description = wr.quicksight.describe_dashboard(name="my-dashboard")
    """
    if (name is None) and (dashboard_id is None):
        raise exceptions.InvalidArgument(
            "You must pass a not None name or dashboard_id argument.")
    session: boto3.Session = _utils.ensure_session(session=boto3_session)
    if account_id is None:
        account_id = _utils.get_account_id(boto3_session=session)
    if (dashboard_id is None) and (name is not None):
        dashboard_id = get_dashboard_id(name=name,
                                        account_id=account_id,
                                        boto3_session=session)
    client: boto3.client = _utils.client(service_name="quicksight",
                                         session=session)
    return client.describe_dashboard(AwsAccountId=account_id,
                                     DashboardId=dashboard_id)["Dashboard"]
def list_ingestions(
    dataset_name: Optional[str] = None,
    dataset_id: Optional[str] = None,
    account_id: Optional[str] = None,
    boto3_session: Optional[boto3.Session] = None,
) -> List[Dict[str, Any]]:
    """List the history of SPICE ingestions for a dataset.

    Parameters
    ----------
    dataset_name : str, optional
        Dataset name.
    dataset_id : str, optional
        The ID of the dataset used in the ingestion.
    account_id : str, optional
        If None, the account ID will be inferred from your boto3 session.
    boto3_session : boto3.Session(), optional
        Boto3 Session. The default boto3 session will be used if boto3_session receive None.

    Returns
    -------
    List[Dict[str, Any]]
        IAM policy assignments.

    Examples
    --------
    >>> import awswrangler as wr
    >>> ingestions = wr.quicksight.list_ingestions()
    """
    if (dataset_name is None) and (dataset_id is None):
        raise exceptions.InvalidArgument(
            "You must pass a not None name or dataset_id argument.")
    session: boto3.Session = _utils.ensure_session(session=boto3_session)
    if account_id is None:
        account_id = _utils.get_account_id(boto3_session=session)
    if (dataset_id is None) and (dataset_name is not None):
        dataset_id = get_dataset_id(name=dataset_name,
                                    account_id=account_id,
                                    boto3_session=session)
    return _list(
        func_name="list_ingestions",
        attr_name="Ingestions",
        account_id=account_id,
        boto3_session=boto3_session,
        DataSetId=dataset_id,
    )
def _list(
    func_name: str,
    attr_name: str,
    account_id: Optional[str] = None,
    boto3_session: Optional[boto3.Session] = None,
    **kwargs,
) -> List[Dict[str, Any]]:
    session: boto3.Session = _utils.ensure_session(session=boto3_session)
    if account_id is None:
        account_id = _utils.get_account_id(boto3_session=session)
    client: boto3.client = _utils.client(service_name="quicksight",
                                         session=session)
    func: Callable = getattr(client, func_name)
    response = func(AwsAccountId=account_id, **kwargs)
    next_token: str = response.get("NextToken", None)
    result: List[Dict[str, Any]] = response[attr_name]
    while next_token is not None:
        response = func(AwsAccountId=account_id,
                        NextToken=next_token,
                        **kwargs)
        next_token = response.get("NextToken", None)
        result += response[attr_name]
    return result
Beispiel #10
0
def _build_cluster_args(**pars):  # pylint: disable=too-many-branches,too-many-statements
    account_id: str = _utils.get_account_id(boto3_session=pars["boto3_session"])
    region: str = _utils.get_region_from_subnet(subnet_id=pars["subnet_id"], boto3_session=pars["boto3_session"])

    # S3 Logging path
    if pars.get("logging_s3_path") is None:
        pars["logging_s3_path"] = _get_default_logging_path(
            subnet_id=None, account_id=account_id, region=region, boto3_session=pars["boto3_session"]
        )

    spark_env: Optional[Dict[str, str]] = None
    yarn_env: Optional[Dict[str, str]] = None
    livy_env: Optional[Dict[str, str]] = None

    if pars["spark_pyarrow"] is True:
        if pars["spark_defaults"] is None:
            pars["spark_defaults"] = {"spark.sql.execution.arrow.enabled": "true"}
        else:  # pragma: no cover
            pars["spark_defaults"]["spark.sql.execution.arrow.enabled"] = "true"
        spark_env = {"ARROW_PRE_0_15_IPC_FORMAT": "1"}
        yarn_env = {"ARROW_PRE_0_15_IPC_FORMAT": "1"}
        livy_env = {"ARROW_PRE_0_15_IPC_FORMAT": "1"}

    if pars["python3"] is True:
        if spark_env is None:
            spark_env = {"PYSPARK_PYTHON": "/usr/bin/python3"}  # pragma: no cover
        else:
            spark_env["PYSPARK_PYTHON"] = "/usr/bin/python3"

    if pars["spark_jars_path"] is not None:
        paths: str = ",".join(pars["spark_jars_path"])
        if pars["spark_defaults"] is None:  # pragma: no cover
            pars["spark_defaults"] = {"spark.jars": paths}
        else:
            pars["spark_defaults"]["spark.jars"] = paths

    args: Dict[str, Any] = {
        "Name": pars["cluster_name"],
        "LogUri": pars["logging_s3_path"],
        "ReleaseLabel": pars["emr_release"],
        "VisibleToAllUsers": pars["visible_to_all_users"],
        "JobFlowRole": pars["emr_ec2_role"],
        "ServiceRole": pars["emr_role"],
        "Instances": {
            "KeepJobFlowAliveWhenNoSteps": pars["keep_cluster_alive_when_no_steps"],
            "TerminationProtected": pars["termination_protected"],
            "Ec2SubnetId": pars["subnet_id"],
            "InstanceFleets": [],
        },
    }

    # EC2 Key Pair
    if pars["key_pair_name"] is not None:  # pragma: no cover
        args["Instances"]["Ec2KeyName"] = pars["key_pair_name"]

    # Security groups
    if pars["security_group_master"] is not None:  # pragma: no cover
        args["Instances"]["EmrManagedMasterSecurityGroup"] = pars["security_group_master"]
    if pars["security_groups_master_additional"] is not None:  # pragma: no cover
        args["Instances"]["AdditionalMasterSecurityGroups"] = pars["security_groups_master_additional"]
    if pars["security_group_slave"] is not None:  # pragma: no cover
        args["Instances"]["EmrManagedSlaveSecurityGroup"] = pars["security_group_slave"]
    if pars["security_groups_slave_additional"] is not None:  # pragma: no cover
        args["Instances"]["AdditionalSlaveSecurityGroups"] = pars["security_groups_slave_additional"]
    if pars["security_group_service_access"] is not None:  # pragma: no cover
        args["Instances"]["ServiceAccessSecurityGroup"] = pars["security_group_service_access"]

    # Configurations
    args["Configurations"] = [
        {"Classification": "spark-log4j", "Properties": {"log4j.rootCategory": f"{pars['spark_log_level']}, console"}}
    ]
    if pars["docker"] is True:
        if pars.get("extra_registries") is None:
            extra_registries: List[str] = []
        else:  # pragma: no cover
            extra_registries = pars["extra_registries"]
        registries: str = f"local,centos,{account_id}.dkr.ecr.{region}.amazonaws.com,{','.join(extra_registries)}"
        registries = registries[:-1] if registries.endswith(",") else registries
        args["Configurations"].append(
            {
                "Classification": "container-executor",
                "Properties": {},
                "Configurations": [
                    {
                        "Classification": "docker",
                        "Properties": {
                            "docker.privileged-containers.registries": registries,
                            "docker.trusted.registries": registries,
                        },
                        "Configurations": [],
                    }
                ],
            }
        )
    if spark_env is not None:
        args["Configurations"].append(
            {
                "Classification": "spark-env",
                "Properties": {},
                "Configurations": [{"Classification": "export", "Properties": spark_env, "Configurations": []}],
            }
        )
    if yarn_env is not None:
        args["Configurations"].append(
            {
                "Classification": "yarn-env",
                "Properties": {},
                "Configurations": [{"Classification": "export", "Properties": yarn_env, "Configurations": []}],
            }
        )
    if livy_env is not None:
        args["Configurations"].append(
            {
                "Classification": "livy-env",
                "Properties": {},
                "Configurations": [{"Classification": "export", "Properties": livy_env, "Configurations": []}],
            }
        )
    if pars["spark_glue_catalog"] is True:
        args["Configurations"].append(
            {
                "Classification": "spark-hive-site",
                "Properties": {
                    "hive.metastore.client.factory.class": "com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory"  # noqa
                },
                "Configurations": [],
            }
        )
    if pars["hive_glue_catalog"] is True:
        hive_conf: Optional[Dict[str, Any]] = {"Classification": "hive-site", "Properties": {}, "Configurations": []}
        hive_conf["Properties"][
            "hive.metastore.client.factory.class"
        ] = "com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory"
        args["Configurations"].append(hive_conf)
    if pars["presto_glue_catalog"] is True:
        args["Configurations"].append(
            {
                "Classification": "presto-connector-hive",
                "Properties": {"hive.metastore.glue.datacatalog.enabled": "true"},
                "Configurations": [],
            }
        )
    if pars["consistent_view"] is True:
        args["Configurations"].append(
            {
                "Classification": "emrfs-site",
                "Properties": {
                    "fs.s3.consistent.retryPeriodSeconds": str(pars.get("consistent_view_retry_seconds", "10")),
                    "fs.s3.consistent": "true",
                    "fs.s3.consistent.retryCount": str(pars.get("consistent_view_retry_count", "5")),
                    "fs.s3.consistent.metadata.tableName": pars.get("consistent_view_table_name", "EmrFSMetadata"),
                },
            }
        )
    if pars["maximize_resource_allocation"] is True:
        args["Configurations"].append({"Classification": "spark", "Properties": {"maximizeResourceAllocation": "true"}})
    if pars["spark_defaults"] is not None:
        spark_defaults: Dict[str, Union[str, Dict[str, str]]] = {
            "Classification": "spark-defaults",
            "Properties": pars["spark_defaults"],
        }
        args["Configurations"].append(spark_defaults)
    if pars.get("custom_classifications") is not None:
        for c in pars["custom_classifications"]:
            args["Configurations"].append(c)

    # Applications
    if pars["applications"]:
        args["Applications"] = [{"Name": x} for x in pars["applications"]]

    # Bootstraps
    if pars["bootstraps_paths"]:  # pragma: no cover
        args["BootstrapActions"] = [{"Name": x, "ScriptBootstrapAction": {"Path": x}} for x in pars["bootstraps_paths"]]

    # Debugging and Steps
    if (pars["debugging"] is True) or (pars["steps"] is not None):
        args["Steps"] = []
        if pars["debugging"] is True:
            args["Steps"].append(
                {
                    "Name": "Setup Hadoop Debugging",
                    "ActionOnFailure": "TERMINATE_CLUSTER",
                    "HadoopJarStep": {"Jar": "command-runner.jar", "Args": ["state-pusher-script"]},
                }
            )
        if pars["steps"] is not None:
            args["Steps"] += pars["steps"]

    # Master Instance Fleet
    timeout_action_master: str = "SWITCH_TO_ON_DEMAND" if pars[
        "spot_timeout_to_on_demand_master"
    ] else "TERMINATE_CLUSTER"
    fleet_master: Dict = {
        "Name": "MASTER",
        "InstanceFleetType": "MASTER",
        "TargetOnDemandCapacity": pars["instance_num_on_demand_master"],
        "TargetSpotCapacity": pars["instance_num_spot_master"],
        "InstanceTypeConfigs": [
            {
                "InstanceType": pars["instance_type_master"],
                "WeightedCapacity": 1,
                "BidPriceAsPercentageOfOnDemandPrice": pars["spot_bid_percentage_of_on_demand_master"],
                "EbsConfiguration": {
                    "EbsBlockDeviceConfigs": [
                        {
                            "VolumeSpecification": {"SizeInGB": pars["instance_ebs_size_master"], "VolumeType": "gp2"},
                            "VolumesPerInstance": 1,
                        }
                    ],
                    "EbsOptimized": True,
                },
            }
        ],
    }
    if pars["instance_num_spot_master"] > 0:  # pragma: no cover
        fleet_master["LaunchSpecifications"] = {
            "SpotSpecification": {
                "TimeoutDurationMinutes": pars["spot_provisioning_timeout_master"],
                "TimeoutAction": timeout_action_master,
            }
        }
    args["Instances"]["InstanceFleets"].append(fleet_master)

    # Core Instance Fleet
    if (pars["instance_num_spot_core"] > 0) or pars["instance_num_on_demand_core"] > 0:
        timeout_action_core = "SWITCH_TO_ON_DEMAND" if pars["spot_timeout_to_on_demand_core"] else "TERMINATE_CLUSTER"
        fleet_core: Dict = {
            "Name": "CORE",
            "InstanceFleetType": "CORE",
            "TargetOnDemandCapacity": pars["instance_num_on_demand_core"],
            "TargetSpotCapacity": pars["instance_num_spot_core"],
            "InstanceTypeConfigs": [
                {
                    "InstanceType": pars["instance_type_core"],
                    "WeightedCapacity": 1,
                    "BidPriceAsPercentageOfOnDemandPrice": pars["spot_bid_percentage_of_on_demand_core"],
                    "EbsConfiguration": {
                        "EbsBlockDeviceConfigs": [
                            {
                                "VolumeSpecification": {
                                    "SizeInGB": pars["instance_ebs_size_core"],
                                    "VolumeType": "gp2",
                                },
                                "VolumesPerInstance": 1,
                            }
                        ],
                        "EbsOptimized": True,
                    },
                }
            ],
        }
        if pars["instance_num_spot_core"] > 0:
            fleet_core["LaunchSpecifications"] = {
                "SpotSpecification": {
                    "TimeoutDurationMinutes": pars["spot_provisioning_timeout_core"],
                    "TimeoutAction": timeout_action_core,
                }
            }
        args["Instances"]["InstanceFleets"].append(fleet_core)

    # Task Instance Fleet
    if (pars["instance_num_spot_task"] > 0) or pars["instance_num_on_demand_task"] > 0:
        timeout_action_task: str = "SWITCH_TO_ON_DEMAND" if pars[
            "spot_timeout_to_on_demand_task"
        ] else "TERMINATE_CLUSTER"
        fleet_task: Dict = {
            "Name": "TASK",
            "InstanceFleetType": "TASK",
            "TargetOnDemandCapacity": pars["instance_num_on_demand_task"],
            "TargetSpotCapacity": pars["instance_num_spot_task"],
            "InstanceTypeConfigs": [
                {
                    "InstanceType": pars["instance_type_task"],
                    "WeightedCapacity": 1,
                    "BidPriceAsPercentageOfOnDemandPrice": pars["spot_bid_percentage_of_on_demand_task"],
                    "EbsConfiguration": {
                        "EbsBlockDeviceConfigs": [
                            {
                                "VolumeSpecification": {
                                    "SizeInGB": pars["instance_ebs_size_task"],
                                    "VolumeType": "gp2",
                                },
                                "VolumesPerInstance": 1,
                            }
                        ],
                        "EbsOptimized": True,
                    },
                }
            ],
        }
        if pars["instance_num_spot_task"] > 0:
            fleet_task["LaunchSpecifications"] = {
                "SpotSpecification": {
                    "TimeoutDurationMinutes": pars["spot_provisioning_timeout_task"],
                    "TimeoutAction": timeout_action_task,
                }
            }
        args["Instances"]["InstanceFleets"].append(fleet_task)

    # Tags
    if pars["tags"] is not None:
        args["Tags"] = [{"Key": k, "Value": v} for k, v in pars["tags"].items()]

    _logger.debug("args: \n%s", pprint.pformat(args))
    return args
Beispiel #11
0
def create_athena_dataset(
    name: str,
    database: Optional[str] = None,
    table: Optional[str] = None,
    sql: Optional[str] = None,
    sql_name: str = "CustomSQL",
    data_source_name: Optional[str] = None,
    data_source_arn: Optional[str] = None,
    import_mode: str = "DIRECT_QUERY",
    allowed_to_use: Optional[List[str]] = None,
    allowed_to_manage: Optional[List[str]] = None,
    logical_table_alias: str = "LogicalTable",
    rename_columns: Optional[Dict[str, str]] = None,
    cast_columns_types: Optional[Dict[str, str]] = None,
    tags: Optional[Dict[str, str]] = None,
    account_id: Optional[str] = None,
    boto3_session: Optional[boto3.Session] = None,
) -> None:
    """Create a QuickSight dataset.

    Note
    ----
    You will not be able to see the the dataset in the console
    if you not pass your user to one of the ``allowed_*`` arguments.

    Note
    ----
    You must pass ``database``/``table`` OR ``sql`` argument.

    Note
    ----
    You must pass ``data_source_name`` OR ``data_source_arn`` argument.

    Parameters
    ----------
    name : str
        Dataset name.
    database : str
        Athena's database name.
    table : str
        Athena's table name.
    sql : str
        Use a SQL query to define your table.
    sql_name : str
        Query name.
    data_source_name : str, optional
        QuickSight data source name.
    data_source_arn : str, optional
        QuickSight data source ARN.
    import_mode : str
        Indicates whether you want to import the data into SPICE.
        'SPICE'|'DIRECT_QUERY'
    tags : Dict[str, str], optional
        Key/Value collection to put on the Cluster.
        e.g. {"foo": "boo", "bar": "xoo"})
    allowed_to_use : optional
        List of principals that will be allowed to see and use the data source.
        e.g. ["john", "Mary"]
    allowed_to_manage : optional
        List of principals that will be allowed to see, use, update and delete the data source.
        e.g. ["Mary"]
    logical_table_alias : str
        A display name for the logical table.
    rename_columns : Dict[str, str], optional
        Dictionary to map column renames. e.g. {"old_name": "new_name", "old_name2": "new_name2"}
    cast_columns_types : Dict[str, str], optional
        Dictionary to map column casts. e.g. {"col_name": "STRING", "col_name2": "DECIMAL"}
        Valid types: 'STRING'|'INTEGER'|'DECIMAL'|'DATETIME'
    account_id : str, optional
        If None, the account ID will be inferred from your boto3 session.
    boto3_session : boto3.Session(), optional
        Boto3 Session. The default boto3 session will be used if boto3_session receive None.

    Returns
    -------
    None
        None.

    Examples
    --------
    >>> import awswrangler as wr
    >>> wr.quicksight.create_athena_dataset(
    ...     name="...",
    ...     database="..."
    ...     table="..."
    ...     data_source_name="..."
    ...     allowed_to_manage=["Mary"]
    ... )
    """
    if (data_source_name is None) and (data_source_arn is None):
        raise exceptions.InvalidArgument(
            "You must pass a not None data_source_name or data_source_arn argument."
        )
    if ((database is None) and (table is None)) and (sql is None):
        raise exceptions.InvalidArgument(
            "You must pass database/table OR sql argument.")
    if (database is not None) and (sql is not None):
        raise exceptions.InvalidArgument(
            "If you provide sql argument, please include the database name inside the sql statement."
            "Do NOT pass in with database argument.")
    session: boto3.Session = _utils.ensure_session(session=boto3_session)
    client: boto3.client = _utils.client(service_name="quicksight",
                                         session=session)
    if account_id is None:
        account_id = _utils.get_account_id(boto3_session=session)
    if (data_source_arn is None) and (data_source_name is not None):
        data_source_arn = get_data_source_arn(name=data_source_name,
                                              account_id=account_id,
                                              boto3_session=session)
    if sql is not None:
        physical_table: Dict[str, Dict[str, Any]] = {
            "CustomSql": {
                "DataSourceArn":
                data_source_arn,
                "Name":
                sql_name,
                "SqlQuery":
                sql,
                "Columns":
                extract_athena_query_columns(
                    sql=sql,
                    data_source_arn=data_source_arn,  # type: ignore
                    account_id=account_id,
                    boto3_session=session,
                ),
            }
        }
    else:
        physical_table = {
            "RelationalTable": {
                "DataSourceArn":
                data_source_arn,
                "Schema":
                database,
                "Name":
                table,
                "InputColumns":
                extract_athena_table_columns(
                    database=database,  # type: ignore
                    table=table,  # type: ignore
                    boto3_session=session,
                ),
            }
        }
    table_uuid: str = uuid.uuid4().hex
    args: Dict[str, Any] = {
        "AwsAccountId": account_id,
        "DataSetId": name,
        "Name": name,
        "ImportMode": import_mode,
        "PhysicalTableMap": {
            table_uuid: physical_table
        },
        "LogicalTableMap": {
            table_uuid: {
                "Alias": logical_table_alias,
                "Source": {
                    "PhysicalTableId": table_uuid
                }
            }
        },
    }
    trans: List[Dict[str, Dict[str, Any]]] = _generate_transformations(
        rename_columns=rename_columns, cast_columns_types=cast_columns_types)
    if trans:
        args["LogicalTableMap"][table_uuid]["DataTransforms"] = trans
    permissions: List[Dict[str, Union[str,
                                      List[str]]]] = _generate_permissions(
                                          resource="dataset",
                                          account_id=account_id,
                                          boto3_session=session,
                                          allowed_to_use=allowed_to_use,
                                          allowed_to_manage=allowed_to_manage,
                                      )
    if permissions:
        args["Permissions"] = permissions
    if tags is not None:
        _tags: List[Dict[str, str]] = [{
            "Key": k,
            "Value": v
        } for k, v in tags.items()]
        args["Tags"] = _tags
    client.create_data_set(**args)
Beispiel #12
0
def create_athena_data_source(
    name: str,
    workgroup: str = "primary",
    allowed_to_use: Optional[List[str]] = None,
    allowed_to_manage: Optional[List[str]] = None,
    tags: Optional[Dict[str, str]] = None,
    account_id: Optional[str] = None,
    boto3_session: Optional[boto3.Session] = None,
) -> None:
    """Create a QuickSight data source pointing to an Athena/Workgroup.

    Note
    ----
    You will not be able to see the the data source in the console
    if you not pass your user to one of the ``allowed_*`` arguments.

    Parameters
    ----------
    name : str
        Data source name.
    workgroup : str
        Athena workgroup.
    tags : Dict[str, str], optional
        Key/Value collection to put on the Cluster.
        e.g. {"foo": "boo", "bar": "xoo"})
    allowed_to_use : optional
        List of principals that will be allowed to see and use the data source.
        e.g. ["John"]
    allowed_to_manage : optional
        List of principals that will be allowed to see, use, update and delete the data source.
        e.g. ["Mary"]
    account_id : str, optional
        If None, the account ID will be inferred from your boto3 session.
    boto3_session : boto3.Session(), optional
        Boto3 Session. The default boto3 session will be used if boto3_session receive None.

    Returns
    -------
    None
        None.

    Examples
    --------
    >>> import awswrangler as wr
    >>> wr.quicksight.create_athena_data_source(
    ...     name="...",
    ...     allowed_to_manage=["john"]
    ... )
    """
    session: boto3.Session = _utils.ensure_session(session=boto3_session)
    client: boto3.client = _utils.client(service_name="quicksight",
                                         session=session)
    if account_id is None:
        account_id = _utils.get_account_id(boto3_session=session)
    args: Dict[str, Any] = {
        "AwsAccountId": account_id,
        "DataSourceId": name,
        "Name": name,
        "Type": "ATHENA",
        "DataSourceParameters": {
            "AthenaParameters": {
                "WorkGroup": workgroup
            }
        },
        "SslProperties": {
            "DisableSsl": True
        },
    }
    permissions: List[Dict[str, Union[str,
                                      List[str]]]] = _generate_permissions(
                                          resource="data_source",
                                          account_id=account_id,
                                          boto3_session=session,
                                          allowed_to_use=allowed_to_use,
                                          allowed_to_manage=allowed_to_manage,
                                      )
    if permissions:
        args["Permissions"] = permissions
    if tags is not None:
        _tags: List[Dict[str, str]] = [{
            "Key": k,
            "Value": v
        } for k, v in tags.items()]
        args["Tags"] = _tags
    client.create_data_source(**args)