def export_dataset(project: str, dataset: str, bucket: str): if os.path.isfile(dataset_file_name): logging.info('Dataset already downloaded, no export done.') return dataset_file_name client = AutoMlClient() export_path = 'gs://{}/export/export_{}'.format(bucket, dataset) output_config = {"gcs_destination": {"output_uri_prefix": export_path}} dataset_name = client.dataset_path(project, compute_region, dataset) export_operation = client.export_data(dataset_name, output_config) logging.info('Waiting for the export to complete...') export_operation.result() logging.info('Downloading exported csv...') download_training_csv(bucket, 'export/export_{}/export.csv'.format(dataset), dataset_file_name) return dataset_file_name
GCP_LOCATION = "test-location" MODEL_NAME = "test_model" MODEL_ID = "projects/198907790164/locations/us-central1/models/TBL9195602771183665152" DATASET_ID = "TBL123456789" MODEL = { "display_name": MODEL_NAME, "dataset_id": DATASET_ID, "tables_model_metadata": { "train_budget_milli_node_hours": 1000 }, } LOCATION_PATH = AutoMlClient.location_path(GCP_PROJECT_ID, GCP_LOCATION) MODEL_PATH = PredictionServiceClient.model_path(GCP_PROJECT_ID, GCP_LOCATION, MODEL_ID) DATASET_PATH = AutoMlClient.dataset_path(GCP_PROJECT_ID, GCP_LOCATION, DATASET_ID) INPUT_CONFIG = {"input": "value"} OUTPUT_CONFIG = {"output": "value"} PAYLOAD = {"test": "payload"} DATASET = {"dataset_id": "data"} MASK = {"field": "mask"} class TestAuoMLHook(unittest.TestCase): def setUp(self) -> None: with mock.patch( "airflow.providers.google.cloud.hooks.automl.GoogleBaseHook.__init__", new=mock_base_gcp_hook_no_default_project_id, ): self.hook = CloudAutoMLHook()