def _ingest_single_batch( data_frame: DataFrame, feature_group_name: str, client_config: Config, start_index: int, end_index: int, profile_name: str = None, ) -> List[int]: """Ingest a single batch of DataFrame rows into FeatureStore. Args: data_frame (DataFrame): source DataFrame to be ingested. feature_group_name (str): name of the Feature Group. client_config (Config): Configuration for the sagemaker feature store runtime client to perform boto calls. start_index (int): starting position to ingest in this batch. end_index (int): ending position to ingest in this batch. profile_name (str): the profile credential should be used for ``PutRecord`` (default: None). Returns: List of row indices that failed to be ingested. """ retry_config = client_config.retries if "max_attempts" not in retry_config and "total_max_attempts" not in retry_config: client_config = copy.deepcopy(client_config) client_config.retries = {"max_attempts": 10, "mode": "standard"} sagemaker_featurestore_runtime_client = boto3.Session( profile_name=profile_name).client( service_name="sagemaker-featurestore-runtime", config=client_config) logger.info("Started ingesting index %d to %d", start_index, end_index) failed_rows = list() for row in data_frame[start_index:end_index].itertuples(): record = [ FeatureValue( feature_name=data_frame.columns[index - 1], value_as_string=str(row[index]), ) for index in range(1, len(row)) if pd.notna(row[index]) ] try: sagemaker_featurestore_runtime_client.put_record( FeatureGroupName=feature_group_name, Record=[value.to_dict() for value in record], ) except Exception as e: # pylint: disable=broad-except logger.error("Failed to ingest row %d: %s", row[0], e) failed_rows.append(row[0]) return failed_rows
def __init__( # type: ignore self, agent_config_id: str = None, name: str = None, labels: Iterable[str] = None, env_vars: dict = None, max_polls: int = None, agent_address: str = None, no_cloud_logs: bool = False, task_definition_path: str = None, run_task_kwargs_path: str = None, aws_access_key_id: str = None, aws_secret_access_key: str = None, aws_session_token: str = None, region_name: str = None, cluster: str = None, launch_type: str = None, task_role_arn: str = None, botocore_config: dict = None, ) -> None: super().__init__( agent_config_id=agent_config_id, name=name, labels=labels, env_vars=env_vars, max_polls=max_polls, agent_address=agent_address, no_cloud_logs=no_cloud_logs, ) from botocore.config import Config from prefect.utilities.aws import get_boto_client self.cluster = cluster self.launch_type = launch_type.upper() if launch_type else "FARGATE" self.task_role_arn = task_role_arn # Load boto configuration. We want to use the standard retry mode by # default (which isn't boto's default due to backwards compatibility). # The logic below lets the user override our default retry mode either # in `botocore_config` or in their aws config file. # # See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/retries.html # for more info. boto_config = Config(**botocore_config or {}) if not boto_config.retries: boto_config.retries = {"mode": "standard"} self.boto_kwargs = dict( aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, aws_session_token=aws_session_token, region_name=region_name, config=boto_config, ) # type: Dict[str, Any] self.ecs_client = get_boto_client("ecs", **self.boto_kwargs) self.rgtag_client = get_boto_client("resourcegroupstaggingapi", **self.boto_kwargs) # Load default task definition if not task_definition_path: task_definition_path = DEFAULT_TASK_DEFINITION_PATH try: self.task_definition = yaml.safe_load( read_bytes_from_path(task_definition_path)) except Exception: self.logger.error( "Failed to load default task definition from %r", task_definition_path, exc_info=True, ) raise # Load default run_task kwargs if run_task_kwargs_path: try: self.run_task_kwargs = yaml.safe_load( read_bytes_from_path(run_task_kwargs_path)) except Exception: self.logger.error( "Failed to load default `run_task` kwargs from %r", run_task_kwargs_path, exc_info=True, ) raise else: self.run_task_kwargs = {} # If `task_role_arn` is configured on the agent, add it to the default # template. The agent default `task_role_arn` is only applied if using # the agent's default template. if self.task_role_arn: self.task_definition["taskRoleArn"] = self.task_role_arn # If running on fargate, auto-configure `networkConfiguration` for the # user if they didn't configure it themselves. if self.launch_type == "FARGATE" and not self.run_task_kwargs.get( "networkConfiguration"): self.run_task_kwargs[ "networkConfiguration"] = self.infer_network_configuration()
self.task_role_arn = task_role_arn <<<<<<< HEAD self.execution_role_arn = execution_role_arn ======= >>>>>>> prefect clone # Load boto configuration. We want to use the standard retry mode by # default (which isn't boto's default due to backwards compatibility). # The logic below lets the user override our default retry mode either # in `botocore_config` or in their aws config file. # # See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/retries.html # for more info. boto_config = Config(**botocore_config or {}) if not boto_config.retries: boto_config.retries = {"mode": "standard"} self.boto_kwargs = dict( aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, aws_session_token=aws_session_token, region_name=region_name, config=boto_config, ) # type: Dict[str, Any] self.ecs_client = get_boto_client("ecs", **self.boto_kwargs) self.rgtag_client = get_boto_client( "resourcegroupstaggingapi", **self.boto_kwargs ) # Load default task definition