def query_and_upload_to_s3(schema, table_export_setting, date_range): """ Query the database and upload the results to s3. Args: schema (str): Account schema name in which to execute the query. table_export_setting (TableExportSetting): Settings for the table export. date_range (tuple): Pair of date objects of inclusive start and end dates. """ uploader = AwsS3Uploader(settings.S3_BUCKET_NAME) start_date, end_date = date_range iterate_daily = table_export_setting.iterate_daily dates_to_iterate = rrule(DAILY, dtstart=start_date, until=end_date if iterate_daily else start_date) with connection.cursor() as cursor: cursor.db.set_schema(schema) for the_date in dates_to_iterate: upload_path = get_upload_path( schema, table_export_setting.provider, the_date, table_export_setting.output_name, iterate_daily, ) cursor.execute( table_export_setting.sql.format(schema=schema), { 'start_date': the_date, 'end_date': the_date if iterate_daily else end_date, }, ) # Don't upload if result set is empty if cursor.rowcount == 0: return with NamedTemporaryGZip() as temp_file: writer = csv.writer(temp_file, quotechar='"', quoting=csv.QUOTE_MINIMAL) writer.writerow([field.name for field in cursor.description]) for row in cursor.fetchall(): writer.writerow(row) temp_file.close() uploader.upload_file(temp_file.name, upload_path)
def query_and_upload_to_s3(schema_name, provider_uuid, table_export_setting, start_date, end_date): """ Query the database and upload the results to s3. Args: schema_name (str): Account schema name in which to execute the query. provider_uuid (UUID): Provider UUID for filtering the query. table_export_setting (dict): Settings for the table export. start_date (string): start date (inclusive) end_date (string): end date (inclusive) """ if not settings.ENABLE_S3_ARCHIVING: LOG.info("S3 Archiving is disabled. Not running task.") return LOG.info( "query_and_upload_to_s3: schema %s provider_uuid %s table.output_name %s for %s", schema_name, provider_uuid, table_export_setting["output_name"], (start_date, end_date), ) if isinstance(start_date, str): start_date = parse(start_date) if isinstance(end_date, str): end_date = parse(end_date) uploader = AwsS3Uploader(settings.S3_BUCKET_NAME) iterate_daily = table_export_setting["iterate_daily"] dates_to_iterate = rrule(DAILY, dtstart=start_date, until=end_date if iterate_daily else start_date) for the_date in dates_to_iterate: with NamedTemporaryGZip() as temp_file: with connection.cursor() as cursor: cursor.db.set_schema(schema_name) upload_path = get_upload_path( schema_name, table_export_setting["provider"], provider_uuid, the_date, table_export_setting["output_name"], iterate_daily, ) cursor.execute( table_export_setting["sql"].format(schema=schema_name), { "start_date": the_date, "end_date": the_date if iterate_daily else end_date, "provider_uuid": provider_uuid, }, ) # Don't upload if result set is empty if cursor.rowcount == 0: continue writer = csv.writer(temp_file, quotechar='"', quoting=csv.QUOTE_MINIMAL) writer.writerow([field.name for field in cursor.description]) while True: records = cursor.fetchmany(size=_DB_FETCH_BATCH_SIZE) if not records: break for row in records: writer.writerow(row) temp_file.close() uploader.upload_file(temp_file.name, upload_path)
def query_and_upload_to_s3(schema_name, provider_uuid, table_export_setting, date_range): """ Query the database and upload the results to s3. Args: schema_name (str): Account schema name in which to execute the query. provider_uuid (UUID): Provider UUID for filtering the query. table_export_setting (TableExportSetting): Settings for the table export. date_range (tuple): Pair of date objects of inclusive start and end dates. """ LOG.info( 'query_and_upload_to_s3: schema %s provider_uuid %s table.output_name %s for %s', schema_name, provider_uuid, table_export_setting.output_name, date_range, ) uploader = AwsS3Uploader(settings.S3_BUCKET_NAME) start_date, end_date = date_range iterate_daily = table_export_setting.iterate_daily dates_to_iterate = rrule(DAILY, dtstart=start_date, until=end_date if iterate_daily else start_date) for the_date in dates_to_iterate: with NamedTemporaryGZip() as temp_file: with connection.cursor() as cursor: cursor.db.set_schema(schema_name) upload_path = get_upload_path( schema_name, table_export_setting.provider, provider_uuid, the_date, table_export_setting.output_name, iterate_daily, ) cursor.execute( table_export_setting.sql.format(schema=schema_name), { 'start_date': the_date, 'end_date': the_date if iterate_daily else end_date, 'provider_uuid': provider_uuid, }, ) # Don't upload if result set is empty if cursor.rowcount == 0: continue writer = csv.writer(temp_file, quotechar='"', quoting=csv.QUOTE_MINIMAL) writer.writerow([field.name for field in cursor.description]) while True: records = cursor.fetchmany(size=_DB_FETCH_BATCH_SIZE) if not records: break for row in records: writer.writerow(row) temp_file.close() uploader.upload_file(temp_file.name, upload_path)