コード例 #1
0
def test_feature_store_create(sagemaker_session_mock, role_arn,
                              feature_group_dummy_definitions, s3_uri):
    feature_group = FeatureGroup(name="MyFeatureGroup",
                                 sagemaker_session=sagemaker_session_mock)
    feature_group.feature_definitions = feature_group_dummy_definitions
    feature_group.create(
        s3_uri=s3_uri,
        record_identifier_name="feature1",
        event_time_feature_name="feature2",
        role_arn=role_arn,
        enable_online_store=True,
    )
    sagemaker_session_mock.create_feature_group.assert_called_with(
        feature_group_name="MyFeatureGroup",
        record_identifier_name="feature1",
        event_time_feature_name="feature2",
        feature_definitions=[
            fd.to_dict() for fd in feature_group_dummy_definitions
        ],
        role_arn=role_arn,
        description=None,
        tags=None,
        online_store_config={"EnableOnlineStore": True},
        offline_store_config={
            "DisableGlueTableCreation": False,
            "S3StorageConfig": {
                "S3Uri": s3_uri
            },
        },
    )
コード例 #2
0
def save_to_feature_store():
    logger.info("Save to FeatureStore started")
    global feature_group

    df_data = pd.read_csv(feature_s3_url)
    logger.info("Read data from S3: %s", df_data.head())

    feature_store_session = Session(
        boto_session=boto_session,
        sagemaker_client=sagemaker_client,
        sagemaker_featurestore_runtime_client=featurestore_runtime)
    # You can modify the following to use a bucket of your choosing
    logger.info("Default bucket: %s", default_bucket)

    # record identifier and event time feature names
    record_identifier_feature_name = "IDpol"
    event_time_feature_name = "EventTime"
    current_time_sec = int(round(time.time()))
    # cast object dtype to string. The SageMaker FeatureStore Python SDK will then map the string dtype to String feature type.
    cast_object_to_string(df_data)
    df_data[event_time_feature_name] = pd.Series([current_time_sec] *
                                                 len(df_data),
                                                 dtype="float64")

    feature_group_name = 'insurance-policy-feature-group-' + strftime(
        '%d-%H-%M-%S', gmtime())
    logger.info("Feature Group Name: %s", feature_group_name)

    # Check if feature group already exists. Create a feature group if doesn't exist.
    if feature_group_exist(feature_group_name) == False:
        logger.info("Feature Group: %s doesn't exist. Create a new one.",
                    feature_group)

        feature_group = FeatureGroup(name=feature_group_name,
                                     sagemaker_session=feature_store_session)

        # append EventTime feature
        # load feature definitions to the feature group. SageMaker FeatureStore Python SDK will auto-detect the data schema based on input data.
        feature_group.load_feature_definitions(data_frame=df_data)
        # output is suppressed
        feature_group.create(
            s3_uri=f"s3://{default_bucket}/{prefix}",
            record_identifier_name=record_identifier_feature_name,
            event_time_feature_name=event_time_feature_name,
            role_arn=get_execution_role(),
            enable_online_store=True)

        wait_for_feature_group_creation_complete(feature_group=feature_group)
        feature_group.describe()
    else:
        logger.info("Feature Group: %s exits", feature_group)
        # Init feature group object if already exists
        feature_group = FeatureGroup(name=feature_group_name,
                                     sagemaker_session=feature_store_session)

    # ingest data into feature store
    feature_group.ingest(data_frame=df_data, max_workers=5, wait=True)
コード例 #3
0
def create_or_load_feature_group(prefix, feature_group_name):

    # Feature Definitions for our records
    feature_definitions = [
        FeatureDefinition(feature_name="input_ids", feature_type=FeatureTypeEnum.STRING),
        FeatureDefinition(feature_name="input_mask", feature_type=FeatureTypeEnum.STRING),
        FeatureDefinition(feature_name="segment_ids", feature_type=FeatureTypeEnum.STRING),
        FeatureDefinition(feature_name="label_id", feature_type=FeatureTypeEnum.INTEGRAL),
        FeatureDefinition(feature_name="review_id", feature_type=FeatureTypeEnum.STRING),
        FeatureDefinition(feature_name="date", feature_type=FeatureTypeEnum.STRING),
        FeatureDefinition(feature_name="label", feature_type=FeatureTypeEnum.INTEGRAL),
        #        FeatureDefinition(feature_name='review_body', feature_type=FeatureTypeEnum.STRING),
        FeatureDefinition(feature_name="split_type", feature_type=FeatureTypeEnum.STRING),
    ]

    feature_group = FeatureGroup(
        name=feature_group_name, feature_definitions=feature_definitions, sagemaker_session=sagemaker_session
    )

    print("Feature Group: {}".format(feature_group))

    try:
        print(
            "Waiting for existing Feature Group to become available if it is being created by another instance in our cluster..."
        )
        wait_for_feature_group_creation_complete(feature_group)
    except Exception as e:
        print("Before CREATE FG wait exeption: {}".format(e))
    #        pass

    try:
        record_identifier_feature_name = "review_id"
        event_time_feature_name = "date"

        print("Creating Feature Group with role {}...".format(role))
        feature_group.create(
            s3_uri=f"s3://{bucket}/{prefix}",
            record_identifier_name=record_identifier_feature_name,
            event_time_feature_name=event_time_feature_name,
            role_arn=role,
            enable_online_store=True,
        )
        print("Creating Feature Group. Completed.")

        print("Waiting for new Feature Group to become available...")
        wait_for_feature_group_creation_complete(feature_group)
        print("Feature Group available.")
        feature_group.describe()

    except Exception as e:
        print("Exception: {}".format(e))

    return feature_group
コード例 #4
0
def test_ingest_without_string_feature(
    feature_store_session,
    role,
    feature_group_name,
    offline_store_s3_uri,
    pandas_data_frame_without_string,
):
    feature_group = FeatureGroup(name=feature_group_name,
                                 sagemaker_session=feature_store_session)
    feature_group.load_feature_definitions(
        data_frame=pandas_data_frame_without_string)

    with cleanup_feature_group(feature_group):
        output = feature_group.create(
            s3_uri=offline_store_s3_uri,
            record_identifier_name="feature1",
            event_time_feature_name="feature2",
            role_arn=role,
            enable_online_store=True,
        )
        _wait_for_feature_group_create(feature_group)

        ingestion_manager = feature_group.ingest(
            data_frame=pandas_data_frame_without_string,
            max_workers=3,
            wait=False)
        ingestion_manager.wait()

    assert output["FeatureGroupArn"].endswith(
        f"feature-group/{feature_group_name}")
コード例 #5
0
def test_ingest_multi_process(
    feature_store_session,
    role,
    feature_group_name,
    offline_store_s3_uri,
    pandas_data_frame,
):
    feature_group = FeatureGroup(name=feature_group_name,
                                 sagemaker_session=feature_store_session)
    feature_group.load_feature_definitions(data_frame=pandas_data_frame)

    with cleanup_feature_group(feature_group):
        output = feature_group.create(
            s3_uri=offline_store_s3_uri,
            record_identifier_name="feature1",
            event_time_feature_name="feature3",
            role_arn=role,
            enable_online_store=True,
        )
        _wait_for_feature_group_create(feature_group)

        feature_group.ingest(data_frame=pandas_data_frame,
                             max_workers=3,
                             max_processes=2,
                             wait=True)

    assert output["FeatureGroupArn"].endswith(
        f"feature-group/{feature_group_name}")
コード例 #6
0
def create_feature_group(
    feature_group_name,
    feature_group_description,
    df,
    id_name,
    event_time_name,
    offline_feature_group_bucket,
    sagemaker_session,
    role,
):
    """
    Create a new FeatureGroup.

    :param feature_group_name: str
    :param feature_group_description: str
    :param df: pandas.DataFrame
    :param id_name: str
    :param event_time_name: str
    :param offline_feature_group_bucket: str
    :param sagemaker_session: sagemaker.Session()
    :param role: str
    :return: tuple(FeatureGroup, bool)
    """
    feature_group = FeatureGroup(name=feature_group_name,
                                 sagemaker_session=sagemaker_session)
    feature_definitions = get_feature_definitions(df, feature_group)
    feature_group.feature_definitions = feature_definitions
    feature_group_already_exists = False
    try:
        print(f"Trying to create feature group {feature_group_description} \n")
        feature_group.create(
            description=feature_group_description,
            record_identifier_name=id_name,
            event_time_feature_name=event_time_name,
            role_arn=role,
            s3_uri=offline_feature_group_bucket,
            enable_online_store=True,
        )
        wait_for_feature_group_creation_complete(feature_group)
    except Exception as e:
        code = e.response.get("Error").get("Code")
        if code == "ResourceInUse":
            print(f"Using existing feature group: {feature_group_name}")
            feature_group_already_exists = True
        else:
            raise (e)
    return feature_group, feature_group_already_exists
コード例 #7
0
def test_create_feature_store(
    feature_store_session,
    role,
    feature_group_name,
    offline_store_s3_uri,
    pandas_data_frame,
    record,
    create_table_ddl,
):
    feature_group = FeatureGroup(name=feature_group_name,
                                 sagemaker_session=feature_store_session)
    feature_group.load_feature_definitions(data_frame=pandas_data_frame)

    with cleanup_feature_group(feature_group):
        output = feature_group.create(
            s3_uri=offline_store_s3_uri,
            record_identifier_name="feature1",
            event_time_feature_name="feature3",
            role_arn=role,
            enable_online_store=True,
        )
        _wait_for_feature_group_create(feature_group)

        # Ingest data
        feature_group.put_record(record=record)
        ingestion_manager = feature_group.ingest(data_frame=pandas_data_frame,
                                                 max_workers=3,
                                                 wait=False)
        ingestion_manager.wait()
        assert 0 == len(ingestion_manager.failed_rows)

        # Query the integrated Glue table.
        athena_query = feature_group.athena_query()
        df = DataFrame()
        with timeout(minutes=10):
            while df.shape[0] < 11:
                athena_query.run(
                    query_string=f'SELECT * FROM "{athena_query.table_name}"',
                    output_location=f"{offline_store_s3_uri}/query_results",
                )
                athena_query.wait()
                assert "SUCCEEDED" == athena_query.get_query_execution().get(
                    "QueryExecution").get("Status").get("State")
                df = athena_query.as_dataframe()
                print(f"Found {df.shape[0]} records.")
                time.sleep(60)

        assert df.shape[0] == 11
        nans = pd.isna(df.loc[df["feature1"].isin([5, 6, 7, 8,
                                                   9])]["feature4"])
        for is_na in nans.items():
            assert is_na
        assert (create_table_ddl.format(
            feature_group_name=feature_group_name,
            region=feature_store_session.boto_session.region_name,
            account=feature_store_session.account_id(),
        ) == feature_group.as_hive_ddl())
    assert output["FeatureGroupArn"].endswith(
        f"feature-group/{feature_group_name}")
コード例 #8
0
def test_feature_store_create(
    sagemaker_session_mock, role_arn, feature_group_dummy_definitions, s3_uri
):
    feature_group = FeatureGroup(name="MyFeatureGroup", sagemaker_session=sagemaker_session_mock)
    feature_group.feature_definitions = feature_group_dummy_definitions
    feature_group.create(
        s3_uri=s3_uri,
        record_identifier_name="feature1",
        event_time_feature_name="feature2",
        role_arn=role_arn,
        enable_online_store=True,
    )
    assert sagemaker_session_mock.create_feature_group.called_with(
        feature_group_name="MyFeatureGroup",
        record_identifier_name="feature1",
        event_time_feature_name="feature2",
        role_arn=role_arn,
        online_store_config={"EnableOnlineStore": True},
        feature_definitions=[fd.to_dict() for fd in feature_group_dummy_definitions],
    )
コード例 #9
0
def test_create_feature_store_online_only(
    feature_store_session,
    role,
    feature_group_name,
    pandas_data_frame,
):
    feature_group = FeatureGroup(name=feature_group_name, sagemaker_session=feature_store_session)
    feature_group.load_feature_definitions(data_frame=pandas_data_frame)

    with cleanup_feature_group(feature_group):
        output = feature_group.create(
            s3_uri=False,
            record_identifier_name="feature1",
            event_time_feature_name="feature3",
            role_arn=role,
            enable_online_store=True,
        )
        _wait_for_feature_group_create(feature_group)

    assert output["FeatureGroupArn"].endswith(f"feature-group/{feature_group_name}")
コード例 #10
0
 data['review_id']     = data['review_id'].astype('str').astype('string')
 data['product_id']    = data['product_id'].astype('str').astype('string')
 data['review_body']   = data['review_body'].astype('str').astype('string')
 data['label']         = data['label'].astype('str').astype('string')
 data['star_rating']   = data['star_rating'].astype('int64')
 data['event_time']    = data['event_time'].astype('float64')
 # Load feature definitions
 feature_group.load_feature_definitions(data_frame=data)
 # Create feature group
 feature_group.create(
     s3_uri='s3://{}/{}'.format(bucket, prefix),
     record_identifier_name=record_identifier_feature_name,
     event_time_feature_name=event_time_feature_name,
     role_arn=role,
     enable_online_store=True,
     description="1.8M+ tokenized camera reviews from the Amazon Customer Reviews dataset",
     tags=[
         { 'Key': 'Dataset', 'Value': 'amazon customer reviews' },
         { 'Key': 'Subset', 'Value': 'cameras' },
         { 'Key': 'Owner', 'Value': 'Julien Simon' }
     ]
 )
 # Wait for feature group to be ready
 while feature_group.describe().get("FeatureGroupStatus") != 'Created':
     sleep(1)
 print('Feature group created')
 
 # Ingest data
 print('Ingesting data...')
 try:
     feature_group.ingest(data_frame=data, max_workers=max_workers, wait=True)
def create_or_load_feature_group(prefix, feature_group_name):

    # Feature Definitions for our records
    feature_definitions = [
        FeatureDefinition(feature_name='input_ids',
                          feature_type=FeatureTypeEnum.STRING),
        FeatureDefinition(feature_name='input_mask',
                          feature_type=FeatureTypeEnum.STRING),
        FeatureDefinition(feature_name='segment_ids',
                          feature_type=FeatureTypeEnum.STRING),
        FeatureDefinition(feature_name='label_id',
                          feature_type=FeatureTypeEnum.INTEGRAL),
        FeatureDefinition(feature_name='review_id',
                          feature_type=FeatureTypeEnum.STRING),
        FeatureDefinition(feature_name='date',
                          feature_type=FeatureTypeEnum.STRING),
        FeatureDefinition(feature_name='label',
                          feature_type=FeatureTypeEnum.INTEGRAL),
        #        FeatureDefinition(feature_name='review_body', feature_type=FeatureTypeEnum.STRING),
        FeatureDefinition(feature_name='split_type',
                          feature_type=FeatureTypeEnum.STRING)
    ]

    feature_group = FeatureGroup(name=feature_group_name,
                                 feature_definitions=feature_definitions,
                                 sagemaker_session=sagemaker_session)

    print('Feature Group: {}'.format(feature_group))

    try:
        print(
            'Waiting for existing Feature Group to become available if it is being created by another instance in our cluster...'
        )
        wait_for_feature_group_creation_complete(feature_group)
    except Exception as e:
        print('Before CREATE FG wait exeption: {}'.format(e))
#        pass

    try:
        record_identifier_feature_name = "review_id"
        event_time_feature_name = "date"

        print('Creating Feature Group with role {}...'.format(role))
        feature_group.create(
            s3_uri=f"s3://{bucket}/{prefix}",
            record_identifier_name=record_identifier_feature_name,
            event_time_feature_name=event_time_feature_name,
            role_arn=role,
            enable_online_store=True)
        print('Creating Feature Group. Completed.')

        print('Waiting for new Feature Group to become available...')
        wait_for_feature_group_creation_complete(feature_group)
        print('Feature Group available.')
        feature_group.describe()

    except Exception as e:
        print('Exception: {}'.format(e))


#        pass

#         print('FAILED - NOW Creating Feature Group with service-role {}...'.format('arn:aws:iam::231218423789:role/service-role/AmazonSageMakerServiceCatalogProductsUseRole'))
#         feature_group.create(
#             s3_uri=f"s3://{bucket}/{prefix}",
#             record_identifier_name=record_identifier_feature_name,
#             event_time_feature_name=event_time_feature_name,
#             role_arn='arn:aws:iam::231218423789:role/service-role/AmazonSageMakerServiceCatalogProductsUseRole',
#             enable_online_store=True
#         )
#         print('Creating Feature Group. Completed.')

#    feature_group.describe()

    return feature_group
コード例 #12
0
ファイル: prepare_data.py プロジェクト: damiansp/aws_learn
def create_or_load_feature_group(prefix, feature_group_name):

    # Feature Definitions for the records
    feature_definitions = [
        FeatureDefinition(feature_name='review_id',
                          feature_type=FeatureTypeEnum.STRING),
        FeatureDefinition(feature_name='date',
                          feature_type=FeatureTypeEnum.STRING),
        FeatureDefinition(feature_name='sentiment',
                          feature_type=FeatureTypeEnum.STRING),
        FeatureDefinition(feature_name='label_id',
                          feature_type=FeatureTypeEnum.STRING),
        FeatureDefinition(feature_name='input_ids',
                          feature_type=FeatureTypeEnum.STRING),
        FeatureDefinition(feature_name='review_body',
                          feature_type=FeatureTypeEnum.STRING),
        FeatureDefinition(feature_name='split_type',
                          feature_type=FeatureTypeEnum.STRING)
    ]

    # setup the Feature Group
    feature_group = FeatureGroup(name=feature_group_name,
                                 feature_definitions=feature_definitions,
                                 sagemaker_session=sagemaker_session)

    print('Feature Group: {}'.format(feature_group))

    try:
        print(
            'Waiting for existing Feature Group to become available if it is being created by another instance in our cluster...'
        )
        wait_for_feature_group_creation_complete(feature_group)
    except Exception as e:
        print('Before CREATE FG wait exeption: {}'.format(e))

    try:
        record_identifier_feature_name = "review_id"
        event_time_feature_name = "date"

        print('Creating Feature Group with role {}...'.format(role))

        # create Feature Group
        feature_group.create(
            s3_uri=f"s3://{bucket}/{prefix}",
            record_identifier_name=record_identifier_feature_name,
            event_time_feature_name=event_time_feature_name,
            role_arn=role,
            enable_online_store=False)
        print('Creating Feature Group. Completed.')

        print('Waiting for new Feature Group to become available...')
        wait_for_feature_group_creation_complete(feature_group)
        print('Feature Group available.')

        # the information about the Feature Group
        feature_group.describe()

    except Exception as e:
        print('Exception: {}'.format(e))

    return feature_group
コード例 #13
0
def test_one_step_ingestion_pipeline(sagemaker_session, feature_store_session,
                                     feature_definitions, role, pipeline_name):
    instance_count = ParameterInteger(name="InstanceCount", default_value=1)
    instance_type = ParameterString(name="InstanceType",
                                    default_value="ml.m5.4xlarge")

    input_name = "features.csv"
    input_file_path = os.path.join(DATA_DIR, "workflow", "features.csv")
    input_data_uri = os.path.join("s3://", sagemaker_session.default_bucket(),
                                  "py-sdk-ingestion-test-input/features.csv")
    with open(input_file_path, "r") as data:
        body = data.read()
        S3Uploader.upload_string_as_file_body(
            body=body,
            desired_s3_uri=input_data_uri,
            sagemaker_session=sagemaker_session)

    inputs = [
        ProcessingInput(
            input_name=input_name,
            source=input_data_uri,
            destination="/opt/ml/processing/features.csv",
        )
    ]

    feature_group_name = f"py-sdk-integ-fg-{int(time.time() * 10**7)}"
    feature_group = FeatureGroup(
        name=feature_group_name,
        feature_definitions=feature_definitions,
        sagemaker_session=feature_store_session,
    )

    ingestion_only_flow, output_name = generate_data_ingestion_flow_from_s3_input(
        input_name,
        input_data_uri,
        s3_content_type="csv",
        s3_has_header=True,
    )

    outputs = [
        ProcessingOutput(
            output_name=output_name,
            app_managed=True,
            feature_store_output=FeatureStoreOutput(
                feature_group_name=feature_group_name),
        )
    ]

    temp_flow_path = "./ingestion.flow"
    with cleanup_feature_group(feature_group):
        json.dump(ingestion_only_flow, open(temp_flow_path, "w"))

        data_wrangler_processor = DataWranglerProcessor(
            role=role,
            data_wrangler_flow_source=temp_flow_path,
            instance_count=instance_count,
            instance_type=instance_type,
            sagemaker_session=sagemaker_session,
            max_runtime_in_seconds=86400,
        )

        data_wrangler_step = ProcessingStep(name="ingestion-step",
                                            processor=data_wrangler_processor,
                                            inputs=inputs,
                                            outputs=outputs)

        pipeline = Pipeline(
            name=pipeline_name,
            parameters=[instance_count, instance_type],
            steps=[data_wrangler_step],
            sagemaker_session=sagemaker_session,
        )

        try:
            response = pipeline.create(role)
            create_arn = response["PipelineArn"]

            offline_store_s3_uri = os.path.join(
                "s3://", sagemaker_session.default_bucket(),
                feature_group_name)
            feature_group.create(
                s3_uri=offline_store_s3_uri,
                record_identifier_name="f11",
                event_time_feature_name="f10",
                role_arn=role,
                enable_online_store=False,
            )
            _wait_for_feature_group_create(feature_group)

            execution = pipeline.start()
            response = execution.describe()
            assert response["PipelineArn"] == create_arn

            try:
                execution.wait(delay=60, max_attempts=10)
            except WaiterError:
                pass

            execution_steps = execution.list_steps()

            assert len(execution_steps) == 1
            assert execution_steps[0]["StepName"] == "ingestion-step"
            assert execution_steps[0]["StepStatus"] == "Succeeded"

            athena_query = feature_group.athena_query()
            with timeout(minutes=10):
                athena_query.run(
                    query_string=f'SELECT * FROM "{athena_query.table_name}"',
                    output_location=f"{offline_store_s3_uri}/query_results",
                )
                athena_query.wait()
                assert "SUCCEEDED" == athena_query.get_query_execution().get(
                    "QueryExecution").get("Status").get("State")

                df = athena_query.as_dataframe()
                assert pd.read_csv(input_file_path).shape[0] == df.shape[0]
        finally:
            try:
                pipeline.delete()
            except Exception as e:
                print(f"Delete pipeline failed with error: {e}")
            os.remove(temp_flow_path)
コード例 #14
0
class FeatureGroupDataSet(AbstractDataSet):
    def __init__(
        self,
        name: str,
        s3_uri: str,
        record_identifier_name: str,
        event_time_name: str,
        query: str,
        description: str = None,
    ):

        region = boto3.Session().region_name
        boto_session = boto3.Session(region_name=region)

        sagemaker_client = boto_session.client(
            service_name="sagemaker", region_name=region
        )
        featurestore_runtime = boto_session.client(
            service_name="sagemaker-featurestore-runtime", region_name=region
        )

        feature_store_session = Session(
            boto_session=boto_session,
            sagemaker_client=sagemaker_client,
            sagemaker_featurestore_runtime_client=featurestore_runtime,
        )

        iam = boto3.client("iam")
        role = iam.get_role(RoleName="AmazonSageMaker-ExecutionRole")["Role"]["Arn"]

        # you can also suffix the feature group name with pipeline git version
        self._feature_group = FeatureGroup(
            name=name, sagemaker_session=feature_store_session
        )
        self._description = description
        self._s3_uri = s3_uri
        self._role = role
        self._record_identifier_name = record_identifier_name
        self._event_time_name = event_time_name
        self._query = query

    def _wait_for_feature_group_creation_complete(self):
        status = self._feature_group.describe().get("FeatureGroupStatus")
        while status == "Creating":
            logger.info("Waiting for Feature Group Creation")
            time.sleep(5)
            status = self._feature_group.describe().get("FeatureGroupStatus")
        if status != "Created":
            raise RuntimeError(
                f"Failed to create feature group {self._feature_group.name}"
            )
        logger.info("FeatureGroup %s successfully created.", self._feature_group.name)

    def _describe(self):
        return dict(feature_group=self._feature_group)

    def _save(self, data):
        self._feature_group.load_feature_definitions(data)
        try:
            self._feature_group.create(
                description=self._description,
                s3_uri=self._s3_uri,
                record_identifier_name=self._record_identifier_name,
                event_time_feature_name=self._event_time_name,
                role_arn=self._role,
                enable_online_store=True,
            )

            self._wait_for_feature_group_creation_complete()
        except Exception as exc:
            if (
                f"Resource Already Exists: FeatureGroup with name {self._feature_group.name} already exists"
                in str(exc)
            ):
                pass
            else:
                raise

        self._feature_group.ingest(data[:10])  # just for demo purpose

    def _load(self) -> pd.DataFrame:
        query = self._feature_group.athena_query()
        print(self._query.format(table_name=query.table_name))
        query.run(
            self._query.format(table_name=query.table_name),
            output_location=f"{self._s3_uri}/query_results/",
        )
        query.wait()
        return query.as_dataframe()