예제 #1
0
 def execute(self, context: "Context") -> dict:
     hook = DataprocMetastoreHook(
         gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain
     )
     self.log.info("Creating Dataproc Metastore service: %s", self.project_id)
     try:
         operation = hook.create_service(
             region=self.region,
             project_id=self.project_id,
             service=self.service,
             service_id=self.service_id,
             request_id=self.request_id,
             retry=self.retry,
             timeout=self.timeout,
             metadata=self.metadata,
         )
         service = hook.wait_for_operation(self.timeout, operation)
         self.log.info("Service %s created successfully", self.service_id)
     except HttpError as err:
         if err.resp.status not in (409, '409'):
             raise
         self.log.info("Instance %s already exists", self.service_id)
         service = hook.get_service(
             region=self.region,
             project_id=self.project_id,
             service_id=self.service_id,
             retry=self.retry,
             timeout=self.timeout,
             metadata=self.metadata,
         )
     DataprocMetastoreLink.persist(context=context, task_instance=self, url=METASTORE_SERVICE_LINK)
     return Service.to_dict(service)
예제 #2
0
    def execute(self, context: "Context"):
        hook = DataprocMetastoreHook(
            gcp_conn_id=self.gcp_conn_id,
            impersonation_chain=self.impersonation_chain)
        self.log.info("Exporting metadata from Dataproc Metastore service: %s",
                      self.service_id)
        hook.export_metadata(
            destination_gcs_folder=self.destination_gcs_folder,
            project_id=self.project_id,
            region=self.region,
            service_id=self.service_id,
            request_id=self.request_id,
            database_dump_type=self.database_dump_type,
            retry=self.retry,
            timeout=self.timeout,
            metadata=self.metadata,
        )
        metadata_export = self._wait_for_export_metadata(hook)
        self.log.info("Metadata from service %s exported successfully",
                      self.service_id)

        DataprocMetastoreLink.persist(context=context,
                                      task_instance=self,
                                      url=METASTORE_EXPORT_LINK)
        uri = self._get_uri_from_destination(
            MetadataExport.to_dict(metadata_export)["destination_gcs_uri"])
        StorageLink.persist(context=context, task_instance=self, uri=uri)
        return MetadataExport.to_dict(metadata_export)
예제 #3
0
 def execute(self, context: "Context"):
     hook = DataprocMetastoreHook(
         gcp_conn_id=self.gcp_conn_id,
         impersonation_chain=self.impersonation_chain)
     self.log.info(
         "Restoring Dataproc Metastore service: %s from backup: %s",
         self.service_id, self.backup_id)
     hook.restore_service(
         project_id=self.project_id,
         region=self.region,
         service_id=self.service_id,
         backup_project_id=self.backup_project_id,
         backup_region=self.backup_region,
         backup_service_id=self.backup_service_id,
         backup_id=self.backup_id,
         restore_type=self.restore_type,
         request_id=self.request_id,
         retry=self.retry,
         timeout=self.timeout,
         metadata=self.metadata,
     )
     self._wait_for_restore_service(hook)
     self.log.info("Service %s restored from backup %s", self.service_id,
                   self.backup_id)
     DataprocMetastoreLink.persist(context=context,
                                   task_instance=self,
                                   url=METASTORE_SERVICE_LINK)
예제 #4
0
    def execute(self, context: dict) -> dict:
        hook = DataprocMetastoreHook(
            gcp_conn_id=self.gcp_conn_id,
            impersonation_chain=self.impersonation_chain)
        self.log.info("Creating Dataproc Metastore backup: %s", self.backup_id)

        try:
            operation = hook.create_backup(
                project_id=self.project_id,
                region=self.region,
                service_id=self.service_id,
                backup=self.backup,
                backup_id=self.backup_id,
                request_id=self.request_id,
                retry=self.retry,
                timeout=self.timeout,
                metadata=self.metadata,
            )
            backup = hook.wait_for_operation(self.timeout, operation)
            self.log.info("Backup %s created successfully", self.backup_id)
        except HttpError as err:
            if err.resp.status not in (409, '409'):
                raise
            self.log.info("Backup %s already exists", self.backup_id)
            backup = hook.get_backup(
                project_id=self.project_id,
                region=self.region,
                service_id=self.service_id,
                backup_id=self.backup_id,
                retry=self.retry,
                timeout=self.timeout,
                metadata=self.metadata,
            )
        return Backup.to_dict(backup)
예제 #5
0
    def execute(self, context: "Context"):
        hook = DataprocMetastoreHook(
            gcp_conn_id=self.gcp_conn_id,
            impersonation_chain=self.impersonation_chain)
        self.log.info("Creating Dataproc Metastore metadata import: %s",
                      self.metadata_import_id)
        operation = hook.create_metadata_import(
            project_id=self.project_id,
            region=self.region,
            service_id=self.service_id,
            metadata_import=self.metadata_import,
            metadata_import_id=self.metadata_import_id,
            request_id=self.request_id,
            retry=self.retry,
            timeout=self.timeout,
            metadata=self.metadata,
        )
        metadata_import = hook.wait_for_operation(self.timeout, operation)
        self.log.info("Metadata import %s created successfully",
                      self.metadata_import_id)

        DataprocMetastoreDetailedLink.persist(context=context,
                                              task_instance=self,
                                              url=METASTORE_IMPORT_LINK,
                                              resource=self.metadata_import_id)
        return MetadataImport.to_dict(metadata_import)
예제 #6
0
 def execute(self, context: 'Context') -> dict:
     hook = DataprocMetastoreHook(
         gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain
     )
     self.log.info("Gets the details of a single Dataproc Metastore service: %s", self.project_id)
     result = hook.get_service(
         region=self.region,
         project_id=self.project_id,
         service_id=self.service_id,
         retry=self.retry,
         timeout=self.timeout,
         metadata=self.metadata,
     )
     return Service.to_dict(result)
예제 #7
0
 def execute(self, context: "Context"):
     hook = DataprocMetastoreHook(
         gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain
     )
     self.log.info("Deleting Dataproc Metastore service: %s", self.project_id)
     operation = hook.delete_service(
         region=self.region,
         project_id=self.project_id,
         service_id=self.service_id,
         retry=self.retry,
         timeout=self.timeout,
         metadata=self.metadata,
     )
     hook.wait_for_operation(self.timeout, operation)
     self.log.info("Service %s deleted successfully", self.project_id)
예제 #8
0
 def execute(self, context: "Context") -> dict:
     hook = DataprocMetastoreHook(
         gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain
     )
     self.log.info("Gets the details of a single Dataproc Metastore service: %s", self.project_id)
     result = hook.get_service(
         region=self.region,
         project_id=self.project_id,
         service_id=self.service_id,
         retry=self.retry,
         timeout=self.timeout,
         metadata=self.metadata,
     )
     DataprocMetastoreLink.persist(context=context, task_instance=self, url=METASTORE_SERVICE_LINK)
     return Service.to_dict(result)
예제 #9
0
 def execute(self, context: "Context") -> None:
     hook = DataprocMetastoreHook(
         gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain
     )
     self.log.info("Deleting Dataproc Metastore backup: %s", self.backup_id)
     operation = hook.delete_backup(
         project_id=self.project_id,
         region=self.region,
         service_id=self.service_id,
         backup_id=self.backup_id,
         request_id=self.request_id,
         retry=self.retry,
         timeout=self.timeout,
         metadata=self.metadata,
     )
     hook.wait_for_operation(self.timeout, operation)
     self.log.info("Backup %s deleted successfully", self.project_id)
예제 #10
0
 def execute(self, context: 'Context') -> List[dict]:
     hook = DataprocMetastoreHook(
         gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain
     )
     self.log.info("Listing Dataproc Metastore backups: %s", self.service_id)
     backups = hook.list_backups(
         project_id=self.project_id,
         region=self.region,
         service_id=self.service_id,
         page_size=self.page_size,
         page_token=self.page_token,
         filter=self.filter,
         order_by=self.order_by,
         retry=self.retry,
         timeout=self.timeout,
         metadata=self.metadata,
     )
     return [Backup.to_dict(backup) for backup in backups]
예제 #11
0
 def execute(self, context: 'Context'):
     hook = DataprocMetastoreHook(
         gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain
     )
     self.log.info("Creating Dataproc Metastore metadata import: %s", self.metadata_import_id)
     operation = hook.create_metadata_import(
         project_id=self.project_id,
         region=self.region,
         service_id=self.service_id,
         metadata_import=self.metadata_import,
         metadata_import_id=self.metadata_import_id,
         request_id=self.request_id,
         retry=self.retry,
         timeout=self.timeout,
         metadata=self.metadata,
     )
     metadata_import = hook.wait_for_operation(self.timeout, operation)
     self.log.info("Metadata import %s created successfully", self.metadata_import_id)
     return MetadataImport.to_dict(metadata_import)
예제 #12
0
 def execute(self, context: 'Context'):
     hook = DataprocMetastoreHook(
         gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain
     )
     self.log.info("Exporting metadata from Dataproc Metastore service: %s", self.service_id)
     hook.export_metadata(
         destination_gcs_folder=self.destination_gcs_folder,
         project_id=self.project_id,
         region=self.region,
         service_id=self.service_id,
         request_id=self.request_id,
         database_dump_type=self.database_dump_type,
         retry=self.retry,
         timeout=self.timeout,
         metadata=self.metadata,
     )
     metadata_export = self._wait_for_export_metadata(hook)
     self.log.info("Metadata from service %s exported successfully", self.service_id)
     return MetadataExport.to_dict(metadata_export)
예제 #13
0
    def execute(self, context: 'Context'):
        hook = DataprocMetastoreHook(
            gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain
        )
        self.log.info("Updating Dataproc Metastore service: %s", self.service.get("name"))

        operation = hook.update_service(
            project_id=self.project_id,
            region=self.region,
            service_id=self.service_id,
            service=self.service,
            update_mask=self.update_mask,
            request_id=self.request_id,
            retry=self.retry,
            timeout=self.timeout,
            metadata=self.metadata,
        )
        hook.wait_for_operation(self.timeout, operation)
        self.log.info("Service %s updated successfully", self.service.get("name"))
예제 #14
0
 def execute(self, context: "Context") -> List[dict]:
     hook = DataprocMetastoreHook(
         gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain
     )
     self.log.info("Listing Dataproc Metastore backups: %s", self.service_id)
     backups = hook.list_backups(
         project_id=self.project_id,
         region=self.region,
         service_id=self.service_id,
         page_size=self.page_size,
         page_token=self.page_token,
         filter=self.filter,
         order_by=self.order_by,
         retry=self.retry,
         timeout=self.timeout,
         metadata=self.metadata,
     )
     DataprocMetastoreLink.persist(context=context, task_instance=self, url=METASTORE_BACKUPS_LINK)
     return [Backup.to_dict(backup) for backup in backups]
예제 #15
0
    def execute(self, context: "Context"):
        hook = DataprocMetastoreHook(
            gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain
        )
        self.log.info("Updating Dataproc Metastore service: %s", self.service.get("name"))

        operation = hook.update_service(
            project_id=self.project_id,
            region=self.region,
            service_id=self.service_id,
            service=self.service,
            update_mask=self.update_mask,
            request_id=self.request_id,
            retry=self.retry,
            timeout=self.timeout,
            metadata=self.metadata,
        )
        hook.wait_for_operation(self.timeout, operation)
        self.log.info("Service %s updated successfully", self.service.get("name"))
        DataprocMetastoreLink.persist(context=context, task_instance=self, url=METASTORE_SERVICE_LINK)
예제 #16
0
 def execute(self, context: 'Context'):
     hook = DataprocMetastoreHook(
         gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain
     )
     self.log.info(
         "Restoring Dataproc Metastore service: %s from backup: %s", self.service_id, self.backup_id
     )
     hook.restore_service(
         project_id=self.project_id,
         region=self.region,
         service_id=self.service_id,
         backup_project_id=self.backup_project_id,
         backup_region=self.backup_region,
         backup_service_id=self.backup_service_id,
         backup_id=self.backup_id,
         restore_type=self.restore_type,
         request_id=self.request_id,
         retry=self.retry,
         timeout=self.timeout,
         metadata=self.metadata,
     )
     self._wait_for_restore_service(hook)
     self.log.info("Service %s restored from backup %s", self.service_id, self.backup_id)
예제 #17
0
 def _wait_for_restore_service(self, hook: DataprocMetastoreHook):
     """
     Workaround to check that restore service was finished successfully.
     We discovered an issue to parse result to Restore inside the SDK
     """
     for time_to_wait in exponential_sleep_generator(initial=10, maximum=120):
         sleep(time_to_wait)
         service = hook.get_service(
             region=self.region,
             project_id=self.project_id,
             service_id=self.service_id,
             retry=self.retry,
             timeout=self.timeout,
             metadata=self.metadata,
         )
         activities: MetadataManagementActivity = service.metadata_management_activity
         restore_service: Restore = activities.restores[0]
         if restore_service.state == Restore.State.SUCCEEDED:
             return restore_service
         if restore_service.state == Restore.State.FAILED:
             raise AirflowException("Restoring service FAILED")
예제 #18
0
 def _wait_for_export_metadata(self, hook: DataprocMetastoreHook):
     """
     Workaround to check that export was created successfully.
     We discovered a issue to parse result to MetadataExport inside the SDK
     """
     for time_to_wait in exponential_sleep_generator(initial=10, maximum=120):
         sleep(time_to_wait)
         service = hook.get_service(
             region=self.region,
             project_id=self.project_id,
             service_id=self.service_id,
             retry=self.retry,
             timeout=self.timeout,
             metadata=self.metadata,
         )
         activities: MetadataManagementActivity = service.metadata_management_activity
         metadata_export: MetadataExport = activities.metadata_exports[0]
         if metadata_export.state == MetadataExport.State.SUCCEEDED:
             return metadata_export
         if metadata_export.state == MetadataExport.State.FAILED:
             raise AirflowException(
                 f"Exporting metadata from Dataproc Metastore {metadata_export.name} FAILED"
             )