Example #1
0
    def __init__(self):
        """
        Attempt to create hook with airflow[gcloud] (and set
        use_gcloud = True), otherwise uses airflow[gcp_api]
        """
        remote_conn_id = configuration.get('core', 'REMOTE_LOG_CONN_ID')
        self.use_gcloud = False

        try:
            from airflow.contrib.hooks import GCSHook
            self.hook = GCSHook(remote_conn_id)
            self.use_gcloud = True
        except:
            try:
                from airflow.contrib.hooks import GoogleCloudStorageHook
                self.hook = GoogleCloudStorageHook(
                    scope=
                    'https://www.googleapis.com/auth/devstorage.read_write',
                    google_cloud_storage_conn_id=remote_conn_id)
            except:
                self.hook = None
                logging.error(
                    'Could not create a GCSHook with connection id "{}". '
                    'Please make sure that either airflow[gcloud] or '
                    'airflow[gcp_api] is installed and the GCS connection '
                    'exists.'.format(remote_conn_id))
Example #2
0
    def __init__(self):
        """
        Attempt to create hook with airflow[gcp_api].
        """
        remote_conn_id = configuration.get('core', 'REMOTE_LOG_CONN_ID')
        self.hook = None

        try:
            from airflow.contrib.hooks import GoogleCloudStorageHook
            self.hook = GoogleCloudStorageHook(
                google_cloud_storage_conn_id=remote_conn_id)
        except:
            logging.error(
                'Could not create a GoogleCloudStorageHook with connection id '
                '"{}". Please make sure that airflow[gcp_api] is installed '
                'and the GCS connection exists.'.format(remote_conn_id))
Example #3
0
def do_list_predictions_files(**kwargs):
    """ Retrieves all the predictions files that should be loaded to BigQuery.
    Can not do a GoogleCloudStorageToBigQueryOperator directly due to the possible
    multiple files.
    """
    # List all relevant files
    # TODO Add when Composer is on Airflow 2.0
    # predictions_files = gcs_list_operator.GoogleCloudStorageListOperator(
    #     task_id='predictions_files',
    #     bucket=COMPOSER_BUCKET_NAME,
    #     prefix='predictions/output/prediction.results-'
    # )
    # TODO Remove when Composer on Airflow 2.0
    gcs = GoogleCloudStorageHook()
    predictions_files = gcs.list(
        bucket=COMPOSER_BUCKET_NAME,
        prefix='predictions/output/prediction.results-')

    logging.info("Predictions files are: {}".format(predictions_files))

    # Create a variable that can be used in the next task
    kwargs['ti'].xcom_push(key='predictions_files', value=predictions_files)
Example #4
0
    def output_manager(self, file_name):
        """
        Takes output and uploads to corresponding destination.
        """
        if self.destination.lower() == 's3':
            s3 = S3Hook(self.dest_conn_id)

            s3.load_file(filename=file_name,
                         key=self.key,
                         bucket_name=self.bucket,
                         replace=True)

        elif self.destination.lower() == 'gcs':
            print("Uploading File!")
            gcs = GoogleCloudStorageHook(self.dest_conn_id)

            gcs.upload(
                bucket=self.bucket,
                object=self.key,
                filename=file_name,
            )
            print("Uploaded file to  {0}/{1}".format(self.bucket, self.key))

        os.remove(file_name)
Example #5
0
 def execute(self, context):
     hook = HttpHook(self.t1, self.t2)
     with open("launches.json", "w+t") as f:
         f.write(json.dumps(hook.get_results()))
     GoogleCloudStorageHook().upload(bucket="launchbucket",object="launches.json",filename="launches.json")