Esempi in Python per GoogleCloudStorageHook.GoogleCloudStorageHook, esempi in Python per airflow.contrib.hooks.gcs_hook.GoogleCloudStorageHook.GoogleCloudStorageHook

Esempio n. 1

0

Mostra file

    def execute(self, context):
        if self.sa360_hook is None:
            self.sa360_hook = GoogleSearchAds360Hook(
                gcp_conn_id=self.gcp_conn_id, delegate_to=self.delegate_to)
        if self.gcs_hook is None:
            self.gcs_hook = GoogleCloudStorageHook(
                google_cloud_storage_conn_id=self.gcp_conn_id,
                delegate_to=self.delegate_to)

        request = self.sa360_hook.get_service().reports().get(
            reportId=self.report_id)
        response = request.execute()

        temp_file = tempfile.NamedTemporaryFile(delete=False)
        try:
            self._download_report(self.report_id, temp_file,
                                  len(response['files']))
            destination_object_name = self._get_destination_uri(
                self.destination_object, temp_file)
            self.gcs_hook.upload(bucket=self.destination_bucket,
                                 object=destination_object_name,
                                 filename=temp_file.name,
                                 multipart=True)
            context['task_instance'].xcom_push('destination_bucket',
                                               self.destination_bucket)
            context['task_instance'].xcom_push('destination_object',
                                               destination_object_name)
        finally:
            temp_file.close()
            os.unlink(temp_file.name)

Esempio n. 2

0

Mostra file

    def execute(self, context):
        bq_hook = BigQueryHook(bigquery_conn_id=self.bigquery_conn_id,
                               delegate_to=self.delegate_to)

        if not self.schema_fields and self.gcs_schema_object:

            gcs_bucket, gcs_object = _parse_gcs_url(self.gcs_schema_object)

            gcs_hook = GoogleCloudStorageHook(
                google_cloud_storage_conn_id=self.google_cloud_storage_conn_id,
                delegate_to=self.delegate_to)
            schema_fields = json.loads(
                gcs_hook.download(gcs_bucket, gcs_object).decode("utf-8"))
        else:
            schema_fields = self.schema_fields

        conn = bq_hook.get_conn()
        cursor = conn.cursor()

        cursor.create_empty_table(project_id=self.project_id,
                                  dataset_id=self.dataset_id,
                                  table_id=self.table_id,
                                  schema_fields=schema_fields,
                                  time_partitioning=self.time_partitioning,
                                  labels=self.labels)

Esempio n. 3

0

Mostra file

File: gcs_acl_operator.py Progetto: zyh1690/airflow

 def execute(self, context):
     hook = GoogleCloudStorageHook(
         google_cloud_storage_conn_id=self.google_cloud_storage_conn_id
     )
     hook.insert_object_acl(bucket_name=self.bucket, object_name=self.object_name,
                            entity=self.entity, role=self.role,
                            generation=self.generation, user_project=self.user_project)

Esempio n. 4

0

Mostra file

File: datastore_export_operator.py Progetto: xydinesh/incubator-airflow

    def execute(self, context):
        logging.info('Exporting data to Cloud Storage bucket ' + self.bucket)

        if self.overwrite_existing and self.namespace:
            gcs_hook = GoogleCloudStorageHook(self.cloud_storage_conn_id)
            objects = gcs_hook.list(self.bucket, prefix=self.namespace)
            for o in objects:
                gcs_hook.delete(self.bucket, o)

        ds_hook = DatastoreHook(self.datastore_conn_id, self.delegate_to)
        result = ds_hook.export_to_storage_bucket(
            bucket=self.bucket,
            namespace=self.namespace,
            entity_filter=self.entity_filter,
            labels=self.labels)
        operation_name = result['name']
        result = ds_hook.poll_operation_until_done(
            operation_name, self.polling_interval_in_seconds)

        state = result['metadata']['common']['state']
        if state != 'SUCCESSFUL':
            raise AirflowException(
                'Operation failed: result={}'.format(result))

        if self.xcom_push:
            return result

Esempio n. 5

0

Mostra file

    def execute(self, context):
        bq_hook = BigQueryHook(bigquery_conn_id=self.bigquery_conn_id,
                               delegate_to=self.delegate_to)

        if not self.schema_fields and self.schema_object \
                and self.source_format != 'DATASTORE_BACKUP':
            gcs_hook = GoogleCloudStorageHook(
                google_cloud_storage_conn_id=self.google_cloud_storage_conn_id,
                delegate_to=self.delegate_to)
            schema_fields = json.loads(gcs_hook.download(
                self.bucket,
                self.schema_object).decode("utf-8"))
        else:
            schema_fields = self.schema_fields

        source_uris = ['gs://{}/{}'.format(self.bucket, source_object)
                       for source_object in self.source_objects]
        conn = bq_hook.get_conn()
        cursor = conn.cursor()

        cursor.create_external_table(
            external_project_dataset_table=self.destination_project_dataset_table,
            schema_fields=schema_fields,
            source_uris=source_uris,
            source_format=self.source_format,
            compression=self.compression,
            skip_leading_rows=self.skip_leading_rows,
            field_delimiter=self.field_delimiter,
            max_bad_records=self.max_bad_records,
            quote_character=self.quote_character,
            allow_quoted_newlines=self.allow_quoted_newlines,
            allow_jagged_rows=self.allow_jagged_rows,
            src_fmt_configs=self.src_fmt_configs,
            labels=self.labels
        )

Esempio n. 6

0

Mostra file

File: gcs_to_bq.py Progetto: d-lee/airflow

    def execute(self, context):
        gcs_hook = GoogleCloudStorageHook(google_cloud_storage_conn_id=self.google_cloud_storage_conn_id,
                                          delegate_to=self.delegate_to)
        bq_hook = BigQueryHook(bigquery_conn_id=self.bigquery_conn_id,
                               delegate_to=self.delegate_to)

        schema_fields = self.schema_fields if self.schema_fields else json.loads(gcs_hook.download(self.bucket, self.schema_object))
        source_uris = map(lambda schema_object: 'gs://{}/{}'.format(self.bucket, schema_object), self.source_objects)
        conn = bq_hook.get_conn()
        cursor = conn.cursor()
        cursor.run_load(
            destination_project_dataset_table=self.destination_project_dataset_table,
            schema_fields=schema_fields,
            source_uris=source_uris,
            source_format=self.source_format,
            create_disposition=self.create_disposition,
            skip_leading_rows=self.skip_leading_rows,
            write_disposition=self.write_disposition,
            field_delimiter=self.field_delimiter)

        if self.max_id_key:
            cursor.execute('SELECT MAX({}) FROM {}'.format(self.max_id_key, self.destination_project_dataset_table))
            row = cursor.fetchone()
            max_id = row[0] if row[0] else 0
            logging.info('Loaded BQ data with max {}.{}={}'.format(self.destination_project_dataset_table, self.max_id_key, max_id))
            return max_id

Esempio n. 7

0

Mostra file

    def copy_docs_to_gcs(self, bucket: str, bucket_path: str,
                         project_path: str):
        """
        Copy doc files generated with dbt docs generate to GCS

        :param bucket: Bucket where the doc files will be copied
        :param bucket_path: Path in the bucket
        :param project_path: Local project folder
        """
        hook = GoogleCloudStorageHook()
        for doc_file in DBT_DOC_FILES:
            doc_file_path = f"{project_path}/{DBT_DOC_FOLDER}/{doc_file}"
            if os.path.exists(doc_file_path):
                logging.info(
                    f"{doc_file} found. Copying to gs://{bucket}/{bucket_path}"
                )
                hook.upload(
                    bucket,
                    object=f"{bucket_path}/{doc_file}"
                    if bucket_path else doc_file,
                    filename=doc_file_path,
                    mime_type="text/html"
                    if doc_file.endswith(".html") else "application/json",
                )
            else:
                logging.warning(f"{doc_file} not found. Skipping")

Esempio n. 8

0

Mostra file

File: load_data_forecast.py Progetto: wwymak/task-ts

def data_to_GCS(csv_name: str,
                folder_name: str,
                bucket_name="task_ts_data",
                **kwargs):
    hook = GoogleCloudStorageHook()
    data = load_data()
    df = pd.DataFrame(data=data)
    df.to_csv('corona_data.csv', index=False)
    columns_to_consider_for_uniqueness = ['country', 'region', 'sub_region']
    unique_column_name = 'full_county'
    minimum_datapoints_threshold = 60
    """
    Function to split data-frame based on state or county.
    """
    unique_df_list = []
    for col in columns_to_consider_for_uniqueness:
        df[col] = df[col].fillna('').apply(lambda x: x.replace(" ", "_"))
    df[unique_column_name] = df[columns_to_consider_for_uniqueness[0]].str.cat(
        df[columns_to_consider_for_uniqueness[1:]], sep="__")
    for i, g in df.groupby('full_county'):
        df_code = g.copy()
        ts_count = len(df_code)
        if ts_count > minimum_datapoints_threshold:
            df_code.reset_index(
                drop=True).loc[:, ~df.columns.str.contains('^Unnamed')].to_csv(
                    '{}.csv'.format(i), index=False)
            hook.upload(bucket_name,
                        object='{}/{}.csv'.format(folder_name, i),
                        filename='{}.csv'.format(i),
                        mime_type='text/csv')
    """ Function for full data pull

Esempio n. 9

0

Mostra file

File: newfile_sensor.py Progetto: DanielEdu/composer_demos

 def poke(self, context):
     self.log.info('Sensor checks existence of objects: %s, %s',
                   self.bucket, self.prefix)
     hook = GoogleCloudStorageHook(
         google_cloud_storage_conn_id=self.google_cloud_conn_id,
         delegate_to=self.delegate_to)
     objects = []
     for prefix in self.prefixes:
         objects.extend(list(hook.list(self.bucket, prefix=prefix)))
     self.log.info(f'Objects list: {objects}')
     names, files, objects = names_match(objects)
     if names:
         ti = context['ti']
         self.__download(hook, objects, files, ti)
         ti.xcom_push(key='names', value=names)
         ti.xcom_push(key='files', value=files)
         ti.xcom_push(key='objects', value=objects)
         for name, fil in zip(names, files):
             ti.xcom_push(key=f'{name}', value=fil)
         self.log.info(
             f'names: {names}\nfiles: {files}\nobjects: {objects}')
         data_timestamp = current_datetime().isoformat()
         ti.xcom_push(key='data_timestamp', value=data_timestamp)
         return True
     return False

Esempio n. 10

0

Mostra file

File: operator_flow_to_gcs.py Progetto: tushar91/airflow_poc

    def execute(self, context):
        """
        1. Prepare data from the Dimension table, clean & store in CSV on local
        2. Upload the CSV to GCS
        """
        #  depending on the flow name, execute the task
        if self.flow_name == 'dimension_currency':
            # prepare dimension data
            dimension_currency_to_csv(self.raw_data_filepath,
                                      self.clean_filepath)
        elif self.flow_name == 'exchange_rate_history':
            # prepare exchnage rate history data
            exchange_rate_history_to_csv(self.raw_data_filepath,
                                         self.clean_filepath)
        else:
            raise AirflowException("Incorrect Flow name")

        # upload file to GCS
        hook = GoogleCloudStorageHook(
            google_cloud_storage_conn_id=self.google_cloud_storage_conn_id)
        hook.upload(bucket=self.gcs_bucket,
                    object=self.gcs_filepath,
                    filename=self.clean_filepath)
        logging.info("File uploaded to GCS")

        # remove files
        if os.path.exists(self.clean_filepath):
            os.remove(self.clean_filepath)
        logging.info(f"{self.clean_filepath} : File deleted from local")

Esempio n. 11

0

Mostra file

 def execute(self, context):
     gcs_hook = GoogleCloudStorageHook(
         google_cloud_storage_conn_id=self.google_cloud_storage_conn_id,
         delegate_to=self.delegate_to
     )
     s3_hook = S3Hook(aws_conn_id=self.dest_aws_conn_id, verify=self.dest_verify)
     if gcs_hook.exists(self.gcs_source_bucket, self.gcs_source_uri) is False:
         self.log.error('Skip object not found: gs://%s/%s', self.gcs_source_bucket, self.gcs_source_uri)
         raise AirflowException('Skip object not found: gs://%s/%s', self.gcs_source_bucket, self.gcs_source_uri)
     tmp = tempfile.NamedTemporaryFile()
     self.log.info('Download gs://%s/%s', self.gcs_source_bucket, self.gcs_source_uri)
     gcs_hook.download(
         bucket=self.gcs_source_bucket,
         object=self.gcs_source_uri,
         filename=tmp.name,
     )
     self.log.info('Upload s3://%s/%s', self.s3_destination_bucket, self.s3_destination_uri)
     s3_hook.load_file(
             filename=tmp.name,
         bucket_name=self.s3_destination_bucket,
         key=self.s3_destination_uri,
         replace=True,
         acl_policy=self.s3_acl_policy
     )
     tmp.close()

Esempio n. 12

0

Mostra file

    def outputManager(self, context, output, key, bucket):
        if len(output) == 0 or output is None:
            if self.total_output_files == 0:
                logging.info("No records pulled from Hubspot.")

                downstream_tasks = context['task'].get_flat_relatives(upstream=False)

                logging.info('Skipping downstream tasks...')
                logging.debug("Downstream task_ids %s", downstream_tasks)

                if downstream_tasks:
                    self.skip(context['dag_run'],
                              context['ti'].execution_date,
                              downstream_tasks)
        else:
            logging.info('Logging {0} to GCS...'.format(key))

            output = [flatten(e) for e in output]
            output = '\n'.join([json.dumps({boa.constrict(k): v
                               for k, v in i.items()}) for i in output])

            gcs = GoogleCloudStorageHook(self.gcs_conn_id)

            with open("__temp__", "w") as fid:
                fid.write(output)

            gcs_conn.upload(self.gcs_bucket, self.gcs_object, "__temp__")

            self.total_output_files += 1

Esempio n. 13

0

Mostra file

File: gcs_copy_operator.py Progetto: zuc/incubator-airflow

    def execute(self, context):

        self.log.info('Executing copy - Source_Bucket: %s, Source_directory: %s, '
                      'Destination_bucket: %s, Destination_directory: %s',
                      self.source_bucket, self.source_object,
                      self.destination_bucket or self.source_bucket,
                      self.destination_directory or self.source_object)

        hook = GoogleCloudStorageHook(google_cloud_storage_conn_id=self.google_cloud_storage_conn_id,
                                      delegate_to=self.delegate_to)

        self.log.info('Getting list of the files to copy. Source Bucket: %s; Source Object: %s',
                      self.source_bucket, self.source_object)

        # Create a list of objects to copy from Source bucket. The function uses prefix keyword to pass the name of
        # the object to copy.
        self.files_to_copy = hook.list(bucket=self.source_bucket, prefix=self.source_object,
                                       delimiter=self.source_files_delimiter)

        # Log the names of all objects to be copied
        self.log.info('Files to copy: %s', self.files_to_copy)

        if self.files_to_copy is not None:
            for file_to_copy in self.files_to_copy:
                self.log.info('Source_Bucket: %s, Source_Object: %s, '
                              'Destination_bucket: %s, Destination_Directory: %s',
                              self.source_bucket, file_to_copy,
                              self.destination_bucket or self.source_bucket,
                              self.destination_directory + file_to_copy)
                hook.copy(self.source_bucket, file_to_copy,
                          self.destination_bucket, self.destination_directory + file_to_copy)
        else:
            self.log.info('No Files to copy.')

Esempio n. 14

0

Mostra file

File: lego_dag.py Progetto: qshytpolite/jupyter_airflow

    def gcs_to_psql_import(**kwargs):
        fd, tmp_filename = tempfile.mkstemp(text=True)

        # download file locally
        gcs_hook = GoogleCloudStorageHook(
            google_cloud_storage_conn_id=kwargs['gcp_conn_id'])
        gcs_hook.download(bucket=kwargs['bucket'],
                          object=kwargs['object'],
                          filename=tmp_filename)
        del gcs_hook

        # load the file into postgres
        pg_hook = PostgresHook(postgres_conn_id=kwargs['postgres_conn_id'],
                               schema=kwargs['database'])
        pg_hook.bulk_load(
            '{schema}.{table}'.format(schema=kwargs['schema'],
                                      table=kwargs['table']), tmp_filename)

        # output errors
        for output in pg_hook.conn.notices:
            print(output)

        # remove temp file
        os.close(fd)
        os.unlink(tmp_filename)

Esempio n. 15

0

Mostra file

File: 01_build_train_deploy.py Progetto: belovanna/tensorflow_ltv_repo

def do_copy_model_to_final(**kwargs):
    gcs = GoogleCloudStorageHook()

    # Returns all the objects within the bucket. All sub-buckets are considered
    # as prefix of the leaves. List does not differentiate files from subbuckets
    all_jobs_files = gcs.list(
        bucket=COMPOSER_BUCKET_NAME,
        prefix='{}/export/estimate'.format(PREFIX_JOBS_EXPORT))

    # Extract the latest model bucket parent of variables/ and saved_model.pbtxt
    # The max() string contains the latest model folders in 1234567, we need to
    # extract that using regex
    # ex: jobs/clv-composer/export/estimate/1234567890/variables/variables.index
    # returns /1234567890/
    latest_model_bucket = re.findall(r'/\d+/', max(all_jobs_files))[0]

    # List all the files that needs to be copied (only files in the latest bucket
    # and skip the ones that are not files but sub buckets)
    for c in [
            f for f in all_jobs_files
            if latest_model_bucket in f and f[-1] != '/'
    ]:

        # The model used for training is saved into a 'final' sub bucket of the
        # export bucket.
        dest_object = c.split(latest_model_bucket)[1]
        dest_object = '{}/{}'.format(PREFIX_FINAL_MODEL, dest_object)

        logging.info("Copying {} to {} ...".format(dest_object,
                                                   COMPOSER_BUCKET_NAME))

        gcs.copy(source_bucket=COMPOSER_BUCKET_NAME,
                 source_object=c,
                 destination_object=dest_object)

Esempio n. 16

0

Mostra file

    def execute(self, context):
        if self.gcs_hook is None:
            self.gcs_hook = GoogleCloudStorageHook(
                google_cloud_storage_conn_id=self.gcp_conn_id,
                delegate_to=self.delegate_to)
        if self.cm_hook is None:
            self.cm_hook = GoogleCampaignManagerHook(
                gcp_conn_id=self.gcp_conn_id, delegate_to=self.delegate_to)

        temp_file = tempfile.NamedTemporaryFile(delete=False)
        try:
            report_file_name = self._download_report(self.report_id,
                                                     self.file_id, temp_file,
                                                     self.chunk_size)

            destination_object_name = self._get_destination_uri(
                self.destination_object, report_file_name)

            self.gcs_hook.upload(bucket=self.destination_bucket,
                                 object=destination_object_name,
                                 filename=temp_file.name,
                                 gzip=True,
                                 multipart=True)

            context['task_instance'].xcom_push('destination_bucket',
                                               self.destination_bucket)
            context['task_instance'].xcom_push('destination_object',
                                               destination_object_name)
        finally:
            temp_file.close()
            os.unlink(temp_file.name)

Esempio n. 17

0

Mostra file

    def execute(self, context):

        hook = GoogleCloudStorageHook(
            google_cloud_storage_conn_id=self.google_cloud_storage_conn_id,
            delegate_to=self.delegate_to)

        if '*' in self.source_object:
            wildcard_position = self.source_object.index('*')
            objects = hook.list(
                self.source_bucket,
                prefix=self.source_object[:wildcard_position],
                delimiter=self.source_object[wildcard_position + 1:])
            for source_object in objects:
                self.log.info('Executing copy of gs://{0}/{1} to '
                              'gs://{2}/{3}/{1}'.format(
                                  self.source_bucket, source_object,
                                  self.destination_bucket,
                                  self.destination_object, source_object))
                hook.copy(
                    self.source_bucket, source_object, self.destination_bucket,
                    "{}/{}".format(self.destination_object, source_object))
                if self.move_object:
                    hook.delete(self.source_bucket, source_object)

        else:
            self.log.info('Executing copy: %s, %s, %s, %s', self.source_bucket,
                          self.source_object, self.destination_bucket
                          or self.source_bucket, self.destination_object
                          or self.source_object)
            hook.copy(self.source_bucket, self.source_object,
                      self.destination_bucket, self.destination_object)

            if self.move_object:
                hook.delete(self.source_bucket, self.source_object)

Esempio n. 18

0

Mostra file

File: gcs_to_s3.py Progetto: newscorp-ghfb/incubator-airflow-nchq

    def execute(self, context):
        # use the super to list all files in an Google Cloud Storage bucket
        files = super(GoogleCloudStorageToS3Operator, self).execute(context)
        s3_hook = S3Hook(aws_conn_id=self.dest_aws_conn_id, verify=self.dest_verify)

        if not self.replace:
            # if we are not replacing -> list all files in the S3 bucket
            # and only keep those files which are present in
            # Google Cloud Storage and not in S3
            bucket_name, _ = S3Hook.parse_s3_url(self.dest_s3_key)
            existing_files = s3_hook.list_keys(bucket_name)
            files = set(files) - set(existing_files)

        if files:
            hook = GoogleCloudStorageHook(
                google_cloud_storage_conn_id=self.google_cloud_storage_conn_id,
                delegate_to=self.delegate_to
            )

            for file in files:
                file_bytes = hook.download(self.bucket, file)

                dest_key = self.dest_s3_key + file
                self.log.info("Saving file to %s", dest_key)

                s3_hook.load_bytes(file_bytes,
                                   key=dest_key,
                                   replace=self.replace)

            self.log.info("All done, uploaded %d files to S3", len(files))
        else:
            self.log.info("In sync, no files needed to be uploaded to S3")

        return files

Esempio n. 19

0

Mostra file

File: salesforce_schema_to_bigquery_operator.py Progetto: rlhez/salesforce_bigquery_pluggin

    def execute(self, context):
        """
        See class definition.
        """
        # Get Columns From Salesforce
        sf_cols = self.fetch_sf_columns(self.sf_conn_id, self.sf_object)

        print('this is SF data')
        print(sf_cols)

        self.xcom_push(context,
                       key='sf_cols',
                       value=[col['sf_name'] for col in sf_cols])

        # Get Columns From Redshift
        #bq_cols = self.fetch_bq_columns(self.bq_table)
        bq_cols = self.patch_bq_cols(self.bq_table, sf_cols)

        gcs = GoogleCloudStorageHook(self.gcs_conn_id)

        with NamedTemporaryFile("w") as tmp:

            tmp.file.write(str(bq_cols).replace("'", '"'))
            tmp.file.flush()

            gcs.upload(bucket=self.gcs_bucket,
                       object=self.gcs_key,
                       filename=tmp.name)

            tmp.close()

        self.xcom_push(context, key='bq_cols', value=str(bq_cols))

Esempio n. 20

0

Mostra file

    def execute(self, context):
        if self.gcs_hook is None:
            self.gcs_hook = GoogleCloudStorageHook(
                google_cloud_storage_conn_id=self.gcp_conn_id,
                delegate_to=self.delegate_to)

        if self.ga_hook is None:
            self.ga_hook = GoogleAnalyticsManagementHook(
                gcp_conn_id=self.gcp_conn_id,
                delegate_to=self.delegate_to,
            )

        try:
            temp_ga_upload_file = tempfile.NamedTemporaryFile(delete=False)
            self._get_file_from_cloud_storage(self.gcs_hook,
                                              self.storage_bucket,
                                              self.storage_name_object,
                                              temp_ga_upload_file)

            self.ga_hook.upload_file(temp_ga_upload_file.name, self.account_id,
                                     self.web_property_id,
                                     self.custom_data_source_id,
                                     self.mime_type)

        finally:
            temp_ga_upload_file.close()
            os.unlink(temp_ga_upload_file.name)

Esempio n. 21

0

Mostra file

File: display_video_360.py Progetto: rebeccasg/orchestra

    def execute(self, context):
        if self.hook is None:
            self.hook = GoogleCloudStorageHook(
                google_cloud_storage_conn_id=self.gcp_conn_id,
                delegate_to=self.delegate_to)

        temp_file = tempfile.NamedTemporaryFile(delete=False)
        try:
            # TODO(efolgar): Directly stream to storage instead of temp file
            self._download_report(self.report_url, temp_file, self.chunk_size)
            destination_object_name = self._get_destination_uri(
                self.destination_object, self.report_url)
            self.hook.upload(
                bucket=self.destination_bucket,
                object=destination_object_name,
                filename=temp_file.name,
                multipart=True)

            context['task_instance'].xcom_push(
                'destination_bucket', self.destination_bucket)
            context['task_instance'].xcom_push(
                'destination_object', destination_object_name)
        finally:
            temp_file.close()
            os.unlink(temp_file.name)

Esempio n. 22

0

Mostra file

    def execute(self, context):
        self.log.info("Fetching launch data")
        launch_hook = LaunchHook(conn_id=self._launch_conn_id)
        result = launch_hook.get_launches(
            start_date=self._start_date,
            end_date=self._end_date
        )
        self.log.info("Fetched data for %d launches", len(result))

        self.log.info(
            "Uploading data to gcs://%s/%s", self._output_bucket, self._output_path
        )
        gcs_hook = GoogleCloudStorageHook(
            google_cloud_storage_conn_id=self._gcp_conn_id
        )

        with tempfile.TemporaryDirectory() as tmp_dir:
            tmp_path = os.path.join(tmp_dir, "result.json")
            with open(tmp_path, "w") as file_:
                json.dump(result, file_)

            gcs_hook.upload(
                bucket=self._output_bucket,
                object=self._output_path,
                filename=tmp_path
            )

Esempio n. 23

0

Mostra file

File: cassandra_to_gcs.py Progetto: xwydq/airflow-k8s

 def _upload_to_gcs(self, files_to_upload):
     hook = GoogleCloudStorageHook(
         google_cloud_storage_conn_id=self.google_cloud_storage_conn_id,
         delegate_to=self.delegate_to)
     for object, tmp_file_handle in files_to_upload.items():
         hook.upload(self.bucket, object, tmp_file_handle.name,
                     'application/json')

Esempio n. 24

0

Mostra file

def _get_data_from_gcs(gcp_conn_id, bucket, input):
    hook = GoogleCloudStorageHook(google_cloud_storage_conn_id=gcp_conn_id)
    tmp_file = NamedTemporaryFile(delete=False)
    hook.download(bucket, input, tmp_file.name)
    filename = tmp_file.name

    return filename

Esempio n. 25

0

Mostra file

 def poke(self, context):
     self.log.info('Sensor checks existence of : %s, %s', self.bucket,
                   self.object)
     hook = GoogleCloudStorageHook(
         google_cloud_storage_conn_id=self.google_cloud_conn_id,
         delegate_to=self.delegate_to)
     return hook.exists(self.bucket, self.object)

Esempio n. 26

0

Mostra file

 def execute(self, context):
     hook = GoogleCloudStorageHook(
         google_cloud_storage_conn_id=self.gcp_conn_id)
     hook.insert_bucket_acl(bucket_name=self.bucket,
                            entity=self.entity,
                            role=self.role,
                            user_project=self.user_project)

Esempio n. 27

0

Mostra file

File: gcs_sensor.py Progetto: wengle520/airflow

 def poke(self, context):
     self.log.info('Sensor checks existence of objects: %s, %s',
                   self.bucket, self.prefix)
     hook = GoogleCloudStorageHook(
         google_cloud_storage_conn_id=self.google_cloud_conn_id,
         delegate_to=self.delegate_to)
     return bool(hook.list(self.bucket, prefix=self.prefix))

Esempio n. 28

0

Mostra file

 def execute(self, context):
     logging.info('Executing download: %s, %s, %s', self.bucket,
                  self.object, self.filename)
     hook = GoogleCloudStorageHook(
         google_cloud_storage_conn_id=self.google_cloud_storage_conn_id,
         delegate_to=self.delegate_to)
     print(hook.download(self.bucket, self.object, self.filename))

Esempio n. 29

0

Mostra file

    def schema(self):
        hook = GoogleCloudStorageHook()
        objs = hook.download(
            self.config['bucket_name'],
            '{}/{}.json'.format(self.config['schemas_clean_path'], self.table))

        return json.loads(objs)

Esempio n. 30

0

Mostra file

File: gcs_sensor.py Progetto: dhruvarora93/aiflow-poc

 def poke(self, context):
     logging.info('Sensor checks existence of : %s, %s', self.bucket,
                  self.object)
     hook = GoogleCloudStorageHook(
         google_cloud_storage_conn_id=self.google_cloud_conn_id,
         delegate_to=self.delegate_to)
     return hook.is_updated_after(self.bucket, self.object,
                                  self.ts_func(context))