Beispiel #1
0
    def test_get_upload_path(self):
        """Assert get_upload_path produces an appropriate S3 path for the month."""

        report_date = date(2018, 4, 1)
        account = 'test_acct'
        provider_type = 'test'
        table_name = 'test_table'
        with self.settings(S3_BUCKET_PATH='bucket'):
            path = get_upload_path(account, provider_type, report_date, table_name)
            self.assertEquals('bucket/test_acct/test/2018/04/00/test_table.csv.gz', path)
Beispiel #2
0
 def test_get_upload_path(self):
     """Assert get_upload_path produces an appropriate S3 path for the month."""
     report_date = date(2018, 4, 1)
     schema_name = 'test_acct'
     provider_type = 'test_type'
     provider_uuid = uuid.UUID('de4db3ef-a185-4bad-b33f-d15ea5edc0de',
                               version=4)
     table_name = 'test_table'
     with self.settings(S3_BUCKET_PATH='bucket'):
         path = get_upload_path(schema_name, provider_type, provider_uuid,
                                report_date, table_name)
         self.assertEquals(
             'bucket/test_acct/test_type/de4db3ef-a185-4bad-b33f-d15ea5edc0de/2018/04/00/test_table.csv.gz',
             path,
         )
Beispiel #3
0
def query_and_upload_to_s3(schema_name, provider_uuid, table_export_setting,
                           start_date, end_date):
    """
    Query the database and upload the results to s3.

    Args:
        schema_name (str): Account schema name in which to execute the query.
        provider_uuid (UUID): Provider UUID for filtering the query.
        table_export_setting (dict): Settings for the table export.
        start_date (string): start date (inclusive)
        end_date (string): end date (inclusive)

    """
    if not settings.ENABLE_S3_ARCHIVING:
        LOG.info("S3 Archiving is disabled. Not running task.")
        return

    LOG.info(
        "query_and_upload_to_s3: schema %s provider_uuid %s table.output_name %s for %s",
        schema_name,
        provider_uuid,
        table_export_setting["output_name"],
        (start_date, end_date),
    )
    if isinstance(start_date, str):
        start_date = parse(start_date)
    if isinstance(end_date, str):
        end_date = parse(end_date)

    uploader = AwsS3Uploader(settings.S3_BUCKET_NAME)
    iterate_daily = table_export_setting["iterate_daily"]
    dates_to_iterate = rrule(DAILY,
                             dtstart=start_date,
                             until=end_date if iterate_daily else start_date)

    for the_date in dates_to_iterate:
        with NamedTemporaryGZip() as temp_file:
            with connection.cursor() as cursor:
                cursor.db.set_schema(schema_name)
                upload_path = get_upload_path(
                    schema_name,
                    table_export_setting["provider"],
                    provider_uuid,
                    the_date,
                    table_export_setting["output_name"],
                    iterate_daily,
                )
                cursor.execute(
                    table_export_setting["sql"].format(schema=schema_name),
                    {
                        "start_date": the_date,
                        "end_date": the_date if iterate_daily else end_date,
                        "provider_uuid": provider_uuid,
                    },
                )
                # Don't upload if result set is empty
                if cursor.rowcount == 0:
                    continue
                writer = csv.writer(temp_file,
                                    quotechar='"',
                                    quoting=csv.QUOTE_MINIMAL)
                writer.writerow([field.name for field in cursor.description])
                while True:
                    records = cursor.fetchmany(size=_DB_FETCH_BATCH_SIZE)
                    if not records:
                        break
                    for row in records:
                        writer.writerow(row)
            temp_file.close()
            uploader.upload_file(temp_file.name, upload_path)