def test_format_file_size_terabytes(self):
     self.assertEqual(format_file_size(1024 * 9000000000), '8 TB')
 def test_format_file_size_kilobytes(self):
     self.assertEqual(format_file_size(1024 * 900), '900 KB')
 def test_format_file_size_gigabytes(self):
     self.assertEqual(format_file_size(1024 * 9000000), '9 GB')
 def test_format_file_size_bytes(self):
     self.assertEqual(format_file_size(124), '124 B')
 def test_format_file_size_one_kilobyte(self):
     self.assertEqual(format_file_size(1024), '1 KB')
Exemple #6
0
 def test_format_file_size_terabytes(self):
     self.assertEqual(format_file_size(1024 * 9000000000), '8 TB')
Exemple #7
0
def export_downloadable_csv(geo_type, late_value):
    """
    Export a dataset to S3 as a UTF-8 CSV file, adding single quotes
    to FIPS codes so that Excel doesn't strip leading zeros.

    geo_types are County, MetroArea or State.
    late_values are percent_30_60 or percent_90.
    Non-Metro areas are added to the MetroArea CSV.

    Each CSV is to start with a National row for comparison.

    CSVs are posted at
    https://files.consumerfinance.gov/data/mortgage-performance/downloads/  # noqa: E501

    The script also stores URLs and file sizes for use in page footnotes.
    """
    date_list = FIPS.short_dates
    thru_date = FIPS.dates[-1]
    thru_month = thru_date[:-3]
    geo_dict = {
        'County': {
            'queryset':
            CountyMortgageData.objects.filter(county__valid=True),
            'headings': ['RegionType', 'State', 'Name', 'FIPSCode'],
            'fips_list':
            sorted(
                [county.fips for county in County.objects.filter(valid=True)])
        },
        'MetroArea': {
            'queryset':
            MSAMortgageData.objects.filter(msa__valid=True),
            'headings': ['RegionType', 'Name', 'CBSACode'],
            'fips_list':
            sorted(
                [metro.fips for metro in MetroArea.objects.filter(valid=True)])
        },
        'NonMetroArea': {
            'queryset':
            NonMSAMortgageData.objects.filter(state__non_msa_valid=True),
            'headings': ['RegionType', 'Name', 'CBSACode'],
            'fips_list':
            sorted([
                "{}-non".format(state.fips)
                for state in State.objects.filter(non_msa_valid=True)
            ])
        },
        'State': {
            'queryset':
            StateMortgageData.objects.all(),
            'headings': ['RegionType', 'Name', 'FIPSCode'],
            'fips_list':
            sorted([
                state.fips
                for state in State.objects.exclude(fips__in=STATES_TO_IGNORE)
            ])
        },
    }
    slug = "{}Mortgages{}DaysLate-thru-{}".format(geo_type,
                                                  LATE_VALUE_TITLE[late_value],
                                                  thru_month)
    _map = geo_dict.get(geo_type)
    fips_list = _map['fips_list']
    csvfile = BytesIO()
    writer = unicodecsv.writer(csvfile)
    writer.writerow(_map['headings'] + date_list)
    nation_starter = [NATION_STARTER[heading] for heading in _map['headings']]
    nation_ender = FIPS.nation_row[late_value]
    writer.writerow(nation_starter + nation_ender)
    for fips in fips_list:
        records = _map['queryset'].filter(fips=fips)
        record_starter = row_starter(geo_type, records.first())
        record_ender = [
            round_pct(getattr(record, late_value)) for record in records
        ]
        writer.writerow(record_starter + record_ender)
    if geo_type == 'MetroArea':
        non_map = geo_dict['NonMetroArea']
        for fips in non_map['fips_list']:
            records = non_map['queryset'].filter(fips=fips)
            record_starter = row_starter('NonMetroArea', records.first())
            record_ender = [
                round_pct(getattr(record, late_value)) for record in records
            ]
            writer.writerow(record_starter + record_ender)
    bake_csv_to_s3(slug,
                   csvfile,
                   sub_bucket="{}/downloads".format(MORTGAGE_SUB_BUCKET))
    logger.info("Baked {} to S3".format(slug))
    csvfile.seek(0, 2)
    bytecount = csvfile.tell()
    csv_size = format_file_size(bytecount)
    save_metadata(csv_size, slug, thru_month, late_value, geo_type)
Exemple #8
0
 def test_format_file_size_gigabytes(self):
     self.assertEqual(format_file_size(1024 * 9000000), '9 GB')
Exemple #9
0
 def test_format_file_size_kilobytes(self):
     self.assertEqual(format_file_size(1024 * 900), '900 KB')
Exemple #10
0
 def test_format_file_size_one_kilobyte(self):
     self.assertEqual(format_file_size(1024), '1 KB')
Exemple #11
0
 def test_format_file_size_bytes(self):
     self.assertEqual(format_file_size(124), '124 B')
def export_downloadable_csv(geo_type, late_value):
    """
    Export a dataset to S3 as a UTF-8 CSV file, adding single quotes
    to FIPS codes so that Excel doesn't strip leading zeros.

    geo_types are County, MetroArea or State.
    late_values are percent_30_60 or percent_90.
    Non-Metro areas are added to the MetroArea CSV.

    Each CSV is to start with a National row for comparison.

    CSVs are posted at
    http://files.consumerfinance.gov.s3.amazonaws.com/data/mortgage-performance/downloads/  # noqa: E501

    The script also stores URLs and file sizes for use in page footnotes.
    """
    date_list = FIPS.short_dates
    thru_date = FIPS.dates[-1]
    thru_month = thru_date[:-3]
    geo_dict = {
        'County': {
            'queryset': CountyMortgageData.objects.filter(
                county__valid=True),
            'headings': ['RegionType', 'State', 'Name', 'FIPSCode'],
            'fips_list': sorted(
                [county.fips for county in County.objects.filter(valid=True)])
        },
        'MetroArea': {
            'queryset': MSAMortgageData.objects.filter(msa__valid=True),
            'headings': ['RegionType', 'Name', 'CBSACode'],
            'fips_list': sorted(
                [metro.fips for metro in MetroArea.objects.filter(valid=True)])
        },
        'NonMetroArea': {
            'queryset': NonMSAMortgageData.objects.filter(
                state__non_msa_valid=True),
            'headings': ['RegionType', 'Name', 'CBSACode'],
            'fips_list': sorted(
                ["{}-non".format(state.fips) for state
                 in State.objects.filter(non_msa_valid=True)])
        },
        'State': {
            'queryset': StateMortgageData.objects.all(),
            'headings': ['RegionType', 'Name', 'FIPSCode'],
            'fips_list': sorted(
                [state.fips for state in State.objects.all()])
        },
    }
    slug = "{}Mortgages{}DaysLate-thru-{}".format(
        geo_type, LATE_VALUE_TITLE[late_value], thru_month)
    _map = geo_dict.get(geo_type)
    fips_list = _map['fips_list']
    csvfile = StringIO()
    writer = unicodecsv.writer(csvfile)
    writer.writerow(_map['headings'] + date_list)
    nation_starter = [NATION_STARTER[heading]
                      for heading in _map['headings']]
    nation_ender = FIPS.nation_row[late_value]
    writer.writerow(nation_starter + nation_ender)
    for fips in fips_list:
        records = _map['queryset'].filter(fips=fips)
        record_starter = row_starter(geo_type, records.first())
        record_ender = [round_pct(getattr(record, late_value))
                        for record in records]
        writer.writerow(record_starter + record_ender)
    if geo_type == 'MetroArea':
        non_map = geo_dict['NonMetroArea']
        for fips in non_map['fips_list']:
            records = non_map['queryset'].filter(fips=fips)
            record_starter = row_starter('NonMetroArea', records.first())
            record_ender = [round_pct(getattr(record, late_value))
                            for record in records]
            writer.writerow(record_starter + record_ender)
    bake_csv_to_s3(
        slug,
        csvfile,
        sub_bucket="{}/downloads".format(MORTGAGE_SUB_BUCKET))
    logger.info("Baked {} to S3".format(slug))
    csvfile.seek(0, 2)
    bytecount = csvfile.tell()
    csv_size = format_file_size(bytecount)
    save_metadata(csv_size, slug, thru_month, late_value, geo_type)