Esempio n. 1
0
    def execute(self, context):
        hook = GoogleCloudStorageHook(
            google_cloud_storage_conn_id=self.gcp_conn_id,
            delegate_to=self.delegate_to)

        hook.create_bucket(bucket_name=self.bucket_name,
                           resource=self.resource,
                           storage_class=self.storage_class,
                           location=self.location,
                           project_id=self.project_id,
                           labels=self.labels)
Esempio n. 2
0
    def execute(self, context):
        if self.labels is not None:
            self.labels.update(
                {'airflow-version': 'v' + version.replace('.', '-').replace('+', '-')}
            )

        hook = GoogleCloudStorageHook(
            google_cloud_storage_conn_id=self.google_cloud_storage_conn_id,
            delegate_to=self.delegate_to
        )

        hook.create_bucket(bucket_name=self.bucket_name,
                           storage_class=self.storage_class,
                           location=self.location,
                           project_id=self.project_id,
                           labels=self.labels)
Esempio n. 3
0
    def execute(self, context):
        if self.labels is not None:
            self.labels.update({
                'airflow-version':
                'v' + version.replace('.', '-').replace('+', '-')
            })

        hook = GoogleCloudStorageHook(
            google_cloud_storage_conn_id=self.google_cloud_storage_conn_id,
            delegate_to=self.delegate_to)

        hook.create_bucket(bucket_name=self.bucket_name,
                           storage_class=self.storage_class,
                           location=self.location,
                           project_id=self.project_id,
                           labels=self.labels)
Esempio n. 4
0
    def create_env(run_id, **context):
        """Run id should be some random UUID.
        """

        ghook = GoogleCloudStorageHook()  # uses default gcp connection
        bucket_name = context["dag_run"].conf.get('source')
        project_id = context["dag_run"].conf.get("project_id")
        if not TEST_MODE:
            """
            # _process bucket could already exist
            try:
                subprocess.check_output([f"gsutil mb -p {project_id} -l US-EAST4 -b on gs://{bucket_name + '_process'}"], shell=True).decode()
            except Exception:
                pass

            # other buckets should not have been created before

            # this data can be used for chunk-based image processing)
            try:
                subprocess.check_output([f"gsutil mb -p {project_id} -l US-EAST4 -b on gs://{bucket_name + '_chunk_' + run_id}"], shell=True).decode()
            except Exception:
                pass

            # will be auto deleted
            try:
                subprocess.check_output([f"gsutil mb -p {project_id} -l US-EAST4 -b on gs://{bucket_name + '_tmp_' + run_id}"], shell=True).decode()
            except Exception:
                pass

            # will be made public readable
            try:
                subprocess.check_output([f"gsutil mb -p {project_id} -l US-EAST4 -b on gs://{bucket_name + '_ng_' + run_id}"], shell=True).decode()
            except Exception:
                pass
            """

            # interface does not support enabling uniform IAM.
            # create bucket for configs (ignore if it already existss
            try:
                ghook.create_bucket(bucket_name=bucket_name + "_process",
                                    project_id=project_id,
                                    storage_class="REGIONAL",
                                    location="US-EAST4")
            except AirflowException as e:
                # ignore if the erorr is the bucket exists
                if not str(e).startswith("409"):
                    raise

            # other buckets should not have been created before

            # this data can be used for chunk-based image processing)
            ghook.create_bucket(bucket_name=bucket_name + "_chunk_" + run_id,
                                project_id=project_id,
                                storage_class="REGIONAL",
                                location="US-EAST4")

            # will be auto deleted
            ghook.create_bucket(
                bucket_name=bucket_name + "_tmp_" + run_id,
                project_id=project_id
            )  #, storage_class="REGIONAL", location="US-EAST4")

            # will be made public readable
            ghook.create_bucket(bucket_name=bucket_name + "_ng_" + run_id,
                                project_id=project_id,
                                storage_class="REGIONAL",
                                location="US-EAST4")

            # dump configuration
            client = ghook.get_conn()
            source = context["dag_run"].conf.get("source")
            bucket = client.bucket(source + "_process")
            blob = bucket.blob(
                blob_name=f"{context['dag_run'].run_id}/init.json")

            data = context["dag_run"].conf
            data["execution_date"] = str(context.get("execution_date"))
            data = json.dumps(data)
            blob.upload_from_string(data)