Esempio n. 1
0
    def test_export_data(self, shared_state):
        """Get an existing dataset, export data to a newly created folder in
        Google Cloud Storage, then verify data was successfully exported."""

        assert shared_state["staging_bucket"]
        assert shared_state["storage_client"]

        aiplatform.init(
            project=_TEST_PROJECT,
            location=_TEST_LOCATION,
            staging_bucket=f"gs://{shared_state['staging_bucket']}",
        )

        text_dataset = aiplatform.TextDataset(dataset_name=_TEST_TEXT_DATASET_ID)

        exported_files = text_dataset.export_data(
            output_dir=f"gs://{shared_state['staging_bucket']}"
        )

        assert len(exported_files)  # Ensure at least one GCS path was returned

        exported_file = exported_files[0]
        bucket, prefix = utils.extract_bucket_and_prefix_from_gcs_path(exported_file)

        storage_client = shared_state["storage_client"]

        bucket = storage_client.get_bucket(bucket)
        blob = bucket.get_blob(prefix)

        assert blob  # Verify the returned GCS export path exists
Esempio n. 2
0
    def test_get_new_dataset_and_import(self, dataset_gapic_client, shared_state):
        """Retrieve new, empty dataset and import a text dataset using import().
        Then verify data items were successfully imported."""

        assert shared_state["dataset_name"]
        aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION)

        my_dataset = aiplatform.TextDataset(dataset_name=shared_state["dataset_name"])

        data_items_pre_import = dataset_gapic_client.list_data_items(
            parent=my_dataset.resource_name
        )

        assert len(list(data_items_pre_import)) == 0

        # Blocking call to import
        my_dataset.import_data(
            gcs_source=_TEST_TEXT_ENTITY_EXTRACTION_GCS_SOURCE,
            import_schema_uri=_TEST_TEXT_ENTITY_IMPORT_SCHEMA,
        )

        data_items_post_import = dataset_gapic_client.list_data_items(
            parent=my_dataset.resource_name
        )

        assert len(list(data_items_post_import)) == 469
Esempio n. 3
0
def create_training_pipeline_text_sentiment_analysis_sample(
    project: str,
    location: str,
    display_name: str,
    dataset_id: int,
    model_display_name: Optional[str] = None,
    sentiment_max: int = 10,
    training_fraction_split: float = 0.8,
    validation_fraction_split: float = 0.1,
    test_fraction_split: float = 0.1,
    budget_milli_node_hours: int = 8000,
    disable_early_stopping: bool = False,
    sync: bool = True,
):
    aiplatform.init(project=project, location=location)

    job = aiplatform.AutoMLTextTrainingJob(
        display_name=display_name,
        prediction_type="sentiment",
        sentiment_max=sentiment_max,
    )

    text_dataset = aiplatform.TextDataset(dataset_id)

    model = job.run(
        dataset=text_dataset,
        model_display_name=model_display_name,
        training_fraction_split=training_fraction_split,
        validation_fraction_split=validation_fraction_split,
        test_fraction_split=test_fraction_split,
        budget_milli_node_hours=budget_milli_node_hours,
        disable_early_stopping=disable_early_stopping,
        sync=sync,
    )

    model.wait()

    print(model.display_name)
    print(model.resource_name)
    print(model.uri)
    return model
Esempio n. 4
0
def import_data_text_entity_extraction_sample(
    project: str,
    location: str,
    dataset: str,
    src_uris: Union[str, List[str]],
    sync: bool = True,
):
    aiplatform.init(project=project, location=location)

    ds = aiplatform.TextDataset(dataset)
    ds.import_data(
        gcs_source=src_uris,
        import_schema_uri=aiplatform.schema.dataset.ioformat.text.extraction,
        sync=sync,
    )

    ds.wait()

    print(ds.display_name)
    print(ds.resource_name)
    return ds