Ejemplo n.º 1
0
    def _ingest_single_batch(
        data_frame: DataFrame,
        feature_group_name: str,
        sagemaker_session: Session,
        start_index: int,
        end_index: int,
    ):
        """Ingest a single batch of DataFrame rows into FeatureStore.

        Args:
            data_frame (DataFrame): source DataFrame to be ingested.
            feature_group_name (str): name of the Feature Group.
            sagemaker_session (Session): session instance to perform boto calls.
            start_index (int): starting position to ingest in this batch.
            end_index (int): ending position to ingest in this batch.
        """
        logger.info("Started ingesting index %d to %d", start_index, end_index)
        for row in data_frame[start_index:end_index].itertuples(index=False):
            record = [
                FeatureValue(feature_name=data_frame.columns[index],
                             value_as_string=str(row[index]))
                for index in range(len(row))
            ]
            sagemaker_session.put_record(
                feature_group_name=feature_group_name,
                record=[value.to_dict() for value in record])
Ejemplo n.º 2
0
    def _ingest_single_batch(
        data_frame: DataFrame,
        feature_group_name: str,
        sagemaker_session: Session,
        start_index: int,
        end_index: int,
    ) -> List[int]:
        """Ingest a single batch of DataFrame rows into FeatureStore.

        Args:
            data_frame (DataFrame): source DataFrame to be ingested.
            feature_group_name (str): name of the Feature Group.
            sagemaker_session (Session): session instance to perform boto calls.
            start_index (int): starting position to ingest in this batch.
            end_index (int): ending position to ingest in this batch.

        Returns:
            List of row indices that failed to be ingested.
        """
        logger.info("Started ingesting index %d to %d", start_index, end_index)
        failed_rows = list()
        for row in data_frame[start_index:end_index].itertuples():
            record = [
                FeatureValue(feature_name=data_frame.columns[index - 1],
                             value_as_string=str(row[index]))
                for index in range(1, len(row)) if pd.notna(row[index])
            ]
            try:
                sagemaker_session.put_record(
                    feature_group_name=feature_group_name,
                    record=[value.to_dict() for value in record],
                )
            except Exception as e:  # pylint: disable=broad-except
                logger.error("Failed to ingest row %d: %s", row[0], e)
                failed_rows.append(row[0])
        return failed_rows