def test_format_file_size_terabytes(self): self.assertEqual(format_file_size(1024 * 9000000000), '8 TB')
def test_format_file_size_kilobytes(self): self.assertEqual(format_file_size(1024 * 900), '900 KB')
def test_format_file_size_gigabytes(self): self.assertEqual(format_file_size(1024 * 9000000), '9 GB')
def test_format_file_size_bytes(self): self.assertEqual(format_file_size(124), '124 B')
def test_format_file_size_one_kilobyte(self): self.assertEqual(format_file_size(1024), '1 KB')
def export_downloadable_csv(geo_type, late_value): """ Export a dataset to S3 as a UTF-8 CSV file, adding single quotes to FIPS codes so that Excel doesn't strip leading zeros. geo_types are County, MetroArea or State. late_values are percent_30_60 or percent_90. Non-Metro areas are added to the MetroArea CSV. Each CSV is to start with a National row for comparison. CSVs are posted at https://files.consumerfinance.gov/data/mortgage-performance/downloads/ # noqa: E501 The script also stores URLs and file sizes for use in page footnotes. """ date_list = FIPS.short_dates thru_date = FIPS.dates[-1] thru_month = thru_date[:-3] geo_dict = { 'County': { 'queryset': CountyMortgageData.objects.filter(county__valid=True), 'headings': ['RegionType', 'State', 'Name', 'FIPSCode'], 'fips_list': sorted( [county.fips for county in County.objects.filter(valid=True)]) }, 'MetroArea': { 'queryset': MSAMortgageData.objects.filter(msa__valid=True), 'headings': ['RegionType', 'Name', 'CBSACode'], 'fips_list': sorted( [metro.fips for metro in MetroArea.objects.filter(valid=True)]) }, 'NonMetroArea': { 'queryset': NonMSAMortgageData.objects.filter(state__non_msa_valid=True), 'headings': ['RegionType', 'Name', 'CBSACode'], 'fips_list': sorted([ "{}-non".format(state.fips) for state in State.objects.filter(non_msa_valid=True) ]) }, 'State': { 'queryset': StateMortgageData.objects.all(), 'headings': ['RegionType', 'Name', 'FIPSCode'], 'fips_list': sorted([ state.fips for state in State.objects.exclude(fips__in=STATES_TO_IGNORE) ]) }, } slug = "{}Mortgages{}DaysLate-thru-{}".format(geo_type, LATE_VALUE_TITLE[late_value], thru_month) _map = geo_dict.get(geo_type) fips_list = _map['fips_list'] csvfile = BytesIO() writer = unicodecsv.writer(csvfile) writer.writerow(_map['headings'] + date_list) nation_starter = [NATION_STARTER[heading] for heading in _map['headings']] nation_ender = FIPS.nation_row[late_value] writer.writerow(nation_starter + nation_ender) for fips in fips_list: records = _map['queryset'].filter(fips=fips) record_starter = row_starter(geo_type, records.first()) record_ender = [ round_pct(getattr(record, late_value)) for record in records ] writer.writerow(record_starter + record_ender) if geo_type == 'MetroArea': non_map = geo_dict['NonMetroArea'] for fips in non_map['fips_list']: records = non_map['queryset'].filter(fips=fips) record_starter = row_starter('NonMetroArea', records.first()) record_ender = [ round_pct(getattr(record, late_value)) for record in records ] writer.writerow(record_starter + record_ender) bake_csv_to_s3(slug, csvfile, sub_bucket="{}/downloads".format(MORTGAGE_SUB_BUCKET)) logger.info("Baked {} to S3".format(slug)) csvfile.seek(0, 2) bytecount = csvfile.tell() csv_size = format_file_size(bytecount) save_metadata(csv_size, slug, thru_month, late_value, geo_type)
def export_downloadable_csv(geo_type, late_value): """ Export a dataset to S3 as a UTF-8 CSV file, adding single quotes to FIPS codes so that Excel doesn't strip leading zeros. geo_types are County, MetroArea or State. late_values are percent_30_60 or percent_90. Non-Metro areas are added to the MetroArea CSV. Each CSV is to start with a National row for comparison. CSVs are posted at http://files.consumerfinance.gov.s3.amazonaws.com/data/mortgage-performance/downloads/ # noqa: E501 The script also stores URLs and file sizes for use in page footnotes. """ date_list = FIPS.short_dates thru_date = FIPS.dates[-1] thru_month = thru_date[:-3] geo_dict = { 'County': { 'queryset': CountyMortgageData.objects.filter( county__valid=True), 'headings': ['RegionType', 'State', 'Name', 'FIPSCode'], 'fips_list': sorted( [county.fips for county in County.objects.filter(valid=True)]) }, 'MetroArea': { 'queryset': MSAMortgageData.objects.filter(msa__valid=True), 'headings': ['RegionType', 'Name', 'CBSACode'], 'fips_list': sorted( [metro.fips for metro in MetroArea.objects.filter(valid=True)]) }, 'NonMetroArea': { 'queryset': NonMSAMortgageData.objects.filter( state__non_msa_valid=True), 'headings': ['RegionType', 'Name', 'CBSACode'], 'fips_list': sorted( ["{}-non".format(state.fips) for state in State.objects.filter(non_msa_valid=True)]) }, 'State': { 'queryset': StateMortgageData.objects.all(), 'headings': ['RegionType', 'Name', 'FIPSCode'], 'fips_list': sorted( [state.fips for state in State.objects.all()]) }, } slug = "{}Mortgages{}DaysLate-thru-{}".format( geo_type, LATE_VALUE_TITLE[late_value], thru_month) _map = geo_dict.get(geo_type) fips_list = _map['fips_list'] csvfile = StringIO() writer = unicodecsv.writer(csvfile) writer.writerow(_map['headings'] + date_list) nation_starter = [NATION_STARTER[heading] for heading in _map['headings']] nation_ender = FIPS.nation_row[late_value] writer.writerow(nation_starter + nation_ender) for fips in fips_list: records = _map['queryset'].filter(fips=fips) record_starter = row_starter(geo_type, records.first()) record_ender = [round_pct(getattr(record, late_value)) for record in records] writer.writerow(record_starter + record_ender) if geo_type == 'MetroArea': non_map = geo_dict['NonMetroArea'] for fips in non_map['fips_list']: records = non_map['queryset'].filter(fips=fips) record_starter = row_starter('NonMetroArea', records.first()) record_ender = [round_pct(getattr(record, late_value)) for record in records] writer.writerow(record_starter + record_ender) bake_csv_to_s3( slug, csvfile, sub_bucket="{}/downloads".format(MORTGAGE_SUB_BUCKET)) logger.info("Baked {} to S3".format(slug)) csvfile.seek(0, 2) bytecount = csvfile.tell() csv_size = format_file_size(bytecount) save_metadata(csv_size, slug, thru_month, late_value, geo_type)