LOGGY.info("Reading: %s" % infile.name) d = {} for r in DictReader(infile, delimiter="\t"): row = {k: v.strip() for k, v in r.items()} seriesid = row['series_id'] areatype = AREA_TYPES_MAP.get(seriesid[3:5]) valtype = seriesid[-2:] if areatype and valtype in ['03', '06']: fips = seriesid[5:7] if areatype == 'State' else seriesid[5:10] year = row['year'] month = row['period'][1:] key = (fips, year, month) if not d.get(key): d[key] = { 'fips': fips, 'area_type': areatype, 'year': year, 'month': month, } v = unemp_header if valtype == '03' else 'labor_force' d[key][v] = None if row['value'] == '-' else row['value'] csvout = DictWriter(stdout, fieldnames=[ 'fips', 'area_type', 'year', 'month', unemp_header, 'labor_force' ]) csvout.writeheader() for k, row in sorted(d.items(), key=lambda x: x[0]): csvout.writerow(row)
"line_srvc_cnt": "line_item_service_count", "bene_unique_cnt": "beneficiary_unique_count", 'bene_day_srvc_cnt': "unique_beneficiary_per_day_services_count", } if __name__ == '__main__': parser = argparse.ArgumentParser("Normalizes payments data") parser.add_argument('infile', type=argparse.FileType('r')) args = parser.parse_args() infile = args.infile LOGGY.info("Reading: %s" % infile.name) rawheaders = infile.readline().strip().split('\t') newheaders = [h.lower() for h in rawheaders] for i, h in enumerate(newheaders): if CLEANED_PAYMENT_HEADER_MAP.get(h): newheaders[i] = CLEANED_PAYMENT_HEADER_MAP[h] csvin = DictReader(infile, delimiter='\t', fieldnames=newheaders) csvout = DictWriter(stdout, fieldnames=CLEANED_PAYMENT_HEADERS) for row in csvin: d = {} for h in CLEANED_PAYMENT_HEADERS: if 'amount' in h: d[h] = round(float(row[h]), 2) if row.get(h) else None else: d[h] = row[h] csvout.writerow(d)