Exemple #1
0
    def _ingest_single_batch(
        data_frame: DataFrame,
        feature_group_name: str,
        client_config: Config,
        start_index: int,
        end_index: int,
        profile_name: str = None,
    ) -> List[int]:
        """Ingest a single batch of DataFrame rows into FeatureStore.

        Args:
            data_frame (DataFrame): source DataFrame to be ingested.
            feature_group_name (str): name of the Feature Group.
            client_config (Config): Configuration for the sagemaker feature store runtime
                client to perform boto calls.
            start_index (int): starting position to ingest in this batch.
            end_index (int): ending position to ingest in this batch.
            profile_name (str): the profile credential should be used for ``PutRecord``
                (default: None).

        Returns:
            List of row indices that failed to be ingested.
        """
        retry_config = client_config.retries
        if "max_attempts" not in retry_config and "total_max_attempts" not in retry_config:
            client_config = copy.deepcopy(client_config)
            client_config.retries = {"max_attempts": 10, "mode": "standard"}
        sagemaker_featurestore_runtime_client = boto3.Session(
            profile_name=profile_name).client(
                service_name="sagemaker-featurestore-runtime",
                config=client_config)

        logger.info("Started ingesting index %d to %d", start_index, end_index)
        failed_rows = list()
        for row in data_frame[start_index:end_index].itertuples():
            record = [
                FeatureValue(
                    feature_name=data_frame.columns[index - 1],
                    value_as_string=str(row[index]),
                ) for index in range(1, len(row)) if pd.notna(row[index])
            ]
            try:
                sagemaker_featurestore_runtime_client.put_record(
                    FeatureGroupName=feature_group_name,
                    Record=[value.to_dict() for value in record],
                )
            except Exception as e:  # pylint: disable=broad-except
                logger.error("Failed to ingest row %d: %s", row[0], e)
                failed_rows.append(row[0])
        return failed_rows
Exemple #2
0
    def __init__(  # type: ignore
        self,
        agent_config_id: str = None,
        name: str = None,
        labels: Iterable[str] = None,
        env_vars: dict = None,
        max_polls: int = None,
        agent_address: str = None,
        no_cloud_logs: bool = False,
        task_definition_path: str = None,
        run_task_kwargs_path: str = None,
        aws_access_key_id: str = None,
        aws_secret_access_key: str = None,
        aws_session_token: str = None,
        region_name: str = None,
        cluster: str = None,
        launch_type: str = None,
        task_role_arn: str = None,
        botocore_config: dict = None,
    ) -> None:
        super().__init__(
            agent_config_id=agent_config_id,
            name=name,
            labels=labels,
            env_vars=env_vars,
            max_polls=max_polls,
            agent_address=agent_address,
            no_cloud_logs=no_cloud_logs,
        )

        from botocore.config import Config
        from prefect.utilities.aws import get_boto_client

        self.cluster = cluster
        self.launch_type = launch_type.upper() if launch_type else "FARGATE"
        self.task_role_arn = task_role_arn

        # Load boto configuration. We want to use the standard retry mode by
        # default (which isn't boto's default due to backwards compatibility).
        # The logic below lets the user override our default retry mode either
        # in `botocore_config` or in their aws config file.
        #
        # See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/retries.html
        # for more info.
        boto_config = Config(**botocore_config or {})
        if not boto_config.retries:
            boto_config.retries = {"mode": "standard"}

        self.boto_kwargs = dict(
            aws_access_key_id=aws_access_key_id,
            aws_secret_access_key=aws_secret_access_key,
            aws_session_token=aws_session_token,
            region_name=region_name,
            config=boto_config,
        )  # type: Dict[str, Any]

        self.ecs_client = get_boto_client("ecs", **self.boto_kwargs)
        self.rgtag_client = get_boto_client("resourcegroupstaggingapi",
                                            **self.boto_kwargs)

        # Load default task definition
        if not task_definition_path:
            task_definition_path = DEFAULT_TASK_DEFINITION_PATH
        try:
            self.task_definition = yaml.safe_load(
                read_bytes_from_path(task_definition_path))
        except Exception:
            self.logger.error(
                "Failed to load default task definition from %r",
                task_definition_path,
                exc_info=True,
            )
            raise

        # Load default run_task kwargs
        if run_task_kwargs_path:
            try:
                self.run_task_kwargs = yaml.safe_load(
                    read_bytes_from_path(run_task_kwargs_path))
            except Exception:
                self.logger.error(
                    "Failed to load default `run_task` kwargs from %r",
                    run_task_kwargs_path,
                    exc_info=True,
                )
                raise
        else:
            self.run_task_kwargs = {}

        # If `task_role_arn` is configured on the agent, add it to the default
        # template. The agent default `task_role_arn` is only applied if using
        # the agent's default template.
        if self.task_role_arn:
            self.task_definition["taskRoleArn"] = self.task_role_arn

        # If running on fargate, auto-configure `networkConfiguration` for the
        # user if they didn't configure it themselves.
        if self.launch_type == "FARGATE" and not self.run_task_kwargs.get(
                "networkConfiguration"):
            self.run_task_kwargs[
                "networkConfiguration"] = self.infer_network_configuration()
Exemple #3
0
        self.task_role_arn = task_role_arn
<<<<<<< HEAD
        self.execution_role_arn = execution_role_arn
=======
>>>>>>> prefect clone

        # Load boto configuration. We want to use the standard retry mode by
        # default (which isn't boto's default due to backwards compatibility).
        # The logic below lets the user override our default retry mode either
        # in `botocore_config` or in their aws config file.
        #
        # See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/retries.html
        # for more info.
        boto_config = Config(**botocore_config or {})
        if not boto_config.retries:
            boto_config.retries = {"mode": "standard"}

        self.boto_kwargs = dict(
            aws_access_key_id=aws_access_key_id,
            aws_secret_access_key=aws_secret_access_key,
            aws_session_token=aws_session_token,
            region_name=region_name,
            config=boto_config,
        )  # type: Dict[str, Any]

        self.ecs_client = get_boto_client("ecs", **self.boto_kwargs)
        self.rgtag_client = get_boto_client(
            "resourcegroupstaggingapi", **self.boto_kwargs
        )

        # Load default task definition