Esempio n. 1
0
def export_dataset(project: str, dataset: str, bucket: str):
    if os.path.isfile(dataset_file_name):
        logging.info('Dataset already downloaded, no export done.')
        return dataset_file_name
    client = AutoMlClient()
    export_path = 'gs://{}/export/export_{}'.format(bucket, dataset)
    output_config = {"gcs_destination": {"output_uri_prefix": export_path}}
    dataset_name = client.dataset_path(project, compute_region, dataset)
    export_operation = client.export_data(dataset_name, output_config)
    logging.info('Waiting for the export to complete...')
    export_operation.result()
    logging.info('Downloading exported csv...')
    download_training_csv(bucket,
                          'export/export_{}/export.csv'.format(dataset),
                          dataset_file_name)
    return dataset_file_name
Esempio n. 2
0
GCP_LOCATION = "test-location"
MODEL_NAME = "test_model"
MODEL_ID = "projects/198907790164/locations/us-central1/models/TBL9195602771183665152"
DATASET_ID = "TBL123456789"
MODEL = {
    "display_name": MODEL_NAME,
    "dataset_id": DATASET_ID,
    "tables_model_metadata": {
        "train_budget_milli_node_hours": 1000
    },
}

LOCATION_PATH = AutoMlClient.location_path(GCP_PROJECT_ID, GCP_LOCATION)
MODEL_PATH = PredictionServiceClient.model_path(GCP_PROJECT_ID, GCP_LOCATION,
                                                MODEL_ID)
DATASET_PATH = AutoMlClient.dataset_path(GCP_PROJECT_ID, GCP_LOCATION,
                                         DATASET_ID)

INPUT_CONFIG = {"input": "value"}
OUTPUT_CONFIG = {"output": "value"}
PAYLOAD = {"test": "payload"}
DATASET = {"dataset_id": "data"}
MASK = {"field": "mask"}


class TestAuoMLHook(unittest.TestCase):
    def setUp(self) -> None:
        with mock.patch(
                "airflow.providers.google.cloud.hooks.automl.GoogleBaseHook.__init__",
                new=mock_base_gcp_hook_no_default_project_id,
        ):
            self.hook = CloudAutoMLHook()