Example #1
0
def do_list_predictions_files(**kwargs):
    """ Retrieves all the predictions files that should be loaded to BigQuery.
    Can not do a GoogleCloudStorageToBigQueryOperator directly due to the possible
    multiple files.
    """
    # List all relevant files
    # TODO Add when Composer is on Airflow 2.0
    # predictions_files = gcs_list_operator.GoogleCloudStorageListOperator(
    #     task_id='predictions_files',
    #     bucket=COMPOSER_BUCKET_NAME,
    #     prefix='predictions/output/prediction.results-'
    # )
    # TODO Remove when Composer on Airflow 2.0
    gcs = GoogleCloudStorageHook()
    predictions_files = gcs.list(
        bucket=COMPOSER_BUCKET_NAME,
        prefix='predictions/output/prediction.results-')

    logging.info("Predictions files are: {}".format(predictions_files))

    # Create a variable that can be used in the next task
    kwargs['ti'].xcom_push(key='predictions_files', value=predictions_files)