def main(): """ Entry point when run from command line """ # pylint: disable=C0103 cmd_args = [{ 'short': "-i", "long": "--infile", 'dest': "infile", 'type': str, 'help': """CSV file with calendar data""", 'required': True }] ui = Interface("BlockePlaces", "Find out where events took place", commandline_args=cmd_args) ui.info(ui.args.infile) output = [] with open(ui.args.infile) as infile: csv_reader = DictReader(infile) places = {} for row in csv_reader: nation = row["country"] person = row["person"] location_category = None if row["location"]: location_category, location = palaces.get_location_category( row["location"], nation) if not location_category: location_category, location = palaces.get_location_category( row["title"], nation) if not location_category: location_category, location = palaces.get_location_category( row["description"], nation) if person not in places: places[person] = { palaces.UNKNOWN: 0, palaces.AT_HOME: 0, palaces.DOMESTIC: 0, palaces.ABROAD: 0 } places[person][location_category] += 1 outputrow = row outputrow["location_category"] = palaces.category_names[ location_category] outputrow["location_name"] = location output.append(outputrow) print places with open('output.csv', 'w') as csvfile: fieldnames = [ "id", "description", "title", "country", "date_end", "date_start", "person", "link", "location", "location_category", "location_name" ] writer = DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() writer.writerows(output)
def export_report(): sources_by_slug={} new_slug=lambda line:"_".join(_ for _ in [line["acronym"],line["country"],line["dates"],line["edition_date"],line["volume"],line["pages"]] if _ !="") with open("../csv_data/sources.csv","r") as f: sources=DictReader(f) for s in sources: ns=new_slug(s) s["new_slug"]=ns sources_by_slug[ns]=[s] if ns not in sources_by_slug else sources_by_slug[ns]+[s] with open("new_slug_colisions.csv","w") as f: new_slug_colisions=DictWriter(f,["keep","new_slug"]+sources.fieldnames) new_slug_colisions.writeheader() for k,dups in sources_by_slug.iteritems(): if len(dups)>1 : new_slug_colisions.writerows(dups)
def main(): """ Entry point when run from command line """ # pylint: disable=C0103 cmd_args = [{ 'short': "-i", "long": "--infile", 'dest': "infile", 'type': str, 'help': """CSV file with calendar data""", 'required': True }] ui = Interface("BlockePlaces", "Find out where events took place", commandline_args=cmd_args) ui.info(ui.args.infile) output = [] with open(ui.args.infile) as infile: csv_reader = DictReader(infile) places = {} for row in csv_reader: nation = row["country"] person = row["person"] location_category = None if row["location"]: location_category, location = palaces.get_location_category(row["location"], nation) if not location_category: location_category, location = palaces.get_location_category(row["title"], nation) if not location_category: location_category, location = palaces.get_location_category(row["description"], nation) if person not in places: places[person] = { palaces.UNKNOWN: 0, palaces.AT_HOME: 0, palaces.DOMESTIC: 0, palaces.ABROAD: 0 } places[person][location_category] += 1 outputrow = row outputrow["location_category"] = palaces.category_names[location_category] outputrow["location_name"] = location output.append(outputrow) print places with open('output.csv', 'w') as csvfile: fieldnames = ["id", "description", "title", "country", "date_end", "date_start", "person", "link", "location", "location_category", "location_name"] writer = DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() writer.writerows(output)
def import_correction(): with open("out_data/new_slug_colisions.csv", "r") as f: corrections_file = DictReader(f) sort_newslug = lambda e: e["new_slug"] corrections = [c for c in corrections_file] corrections.sort(key=sort_newslug) correction_lines = [] slugs_to_keep = [] for new_slug, lines in itertools.groupby(corrections, sort_newslug): correction_lines = list(lines) # keep index_to_keep = [ i for i, l in enumerate(correction_lines) if l["keep"] == "x" ] if len(index_to_keep) > 1: print "WARNING: multiple keep order for group %s" % new_slug elif len(index_to_keep) == 1: print "correcting group %s" % new_slug slug_to_keep = correction_lines[index_to_keep[0]]["slug"] slugs_to_replace = [{ "Source": line["slug"], "Target": slug_to_keep } for line in correction_lines if line["keep"] != "x"] # add correction_lines in patch_sources.csv with open("in_data/patchs/patch_sources.csv", "a") as psf: psfw = DictWriter(psf, ["Source", "Target"]) psfw.writerows(slugs_to_replace) print "added %s in patch to be removed" % " | ".join( s["Source"] for s in slugs_to_replace) slugs_to_keep.append((slug_to_keep, index_to_keep[0])) # check if index_to_keep not empty # output a SQL script patch # by copy_notes lines notes_to_copy = [{ "Source": line["slug"], "Target": line["notes"] } for line in correction_lines if line["copy_notes_to_rate"] == "x"] # add correction_lines in patch_sources.csv with open("in_data/patchs/patch_sources_copy.csv", "a") as psfc: psfcw = DictWriter(psfc, ["Source", "Target"]) psfcw.writerows(notes_to_copy) print "added %s in patch to be copied" % " | ".join( s["Source"] for s in notes_to_copy) ## UPDATE ON flows/echange_rates SET notes=%notes% WHERE source=%slug% ## UPDATE ON sourcse SET notes="" WHERE slug=%slug% # by remove notes notes_to_remove = [{ "Source": line["slug"], "Target": line["notes"] } for line in correction_lines if line["remove_notes_from_source"] == "x"] # add correction_lines in patch_sources.csv with open("in_data/patchs/patch_sources_remove.csv", "a") as psfr: psfrw = DictWriter(psfr, ["Source", "Target"]) psfrw.writerows(notes_to_remove) print "added %s in patch to be removed" % " | ".join( s["Source"] for s in notes_to_remove)
LOGGY.info("Reading: %s" % infile.name) d = {} for r in DictReader(infile, delimiter="\t"): row = {k: v.strip() for k, v in r.items()} seriesid = row['series_id'] areatype = AREA_TYPES_MAP.get(seriesid[3:5]) valtype = seriesid[-2:] if areatype and valtype in ['03', '06']: fips = seriesid[5:7] if areatype == 'State' else seriesid[5:10] year = row['year'] month = row['period'][1:] key = (fips, year, month) if not d.get(key): d[key] = { 'fips': fips, 'area_type': areatype, 'year': year, 'month': month, } v = unemp_header if valtype == '03' else 'labor_force' d[key][v] = None if row['value'] == '-' else row['value'] csvout = DictWriter(stdout, fieldnames=[ 'fips', 'area_type', 'year', 'month', unemp_header, 'labor_force' ]) csvout.writeheader() for k, row in sorted(d.items(), key=lambda x: x[0]): csvout.writerow(row)
"line_srvc_cnt": "line_item_service_count", "bene_unique_cnt": "beneficiary_unique_count", 'bene_day_srvc_cnt': "unique_beneficiary_per_day_services_count", } if __name__ == '__main__': parser = argparse.ArgumentParser("Normalizes payments data") parser.add_argument('infile', type=argparse.FileType('r')) args = parser.parse_args() infile = args.infile LOGGY.info("Reading: %s" % infile.name) rawheaders = infile.readline().strip().split('\t') newheaders = [h.lower() for h in rawheaders] for i, h in enumerate(newheaders): if CLEANED_PAYMENT_HEADER_MAP.get(h): newheaders[i] = CLEANED_PAYMENT_HEADER_MAP[h] csvin = DictReader(infile, delimiter='\t', fieldnames=newheaders) csvout = DictWriter(stdout, fieldnames=CLEANED_PAYMENT_HEADERS) for row in csvin: d = {} for h in CLEANED_PAYMENT_HEADERS: if 'amount' in h: d[h] = round(float(row[h]), 2) if row.get(h) else None else: d[h] = row[h] csvout.writerow(d)