Esempio n. 1
0
def main():
    """ Entry point when run from command line
    """
    # pylint: disable=C0103

    cmd_args = [{
        'short': "-i",
        "long": "--infile",
        'dest': "infile",
        'type': str,
        'help': """CSV file with calendar data""",
        'required': True
    }]
    ui = Interface("BlockePlaces",
                   "Find out where events took place",
                   commandline_args=cmd_args)
    ui.info(ui.args.infile)

    output = []
    with open(ui.args.infile) as infile:
        csv_reader = DictReader(infile)
        places = {}
        for row in csv_reader:
            nation = row["country"]
            person = row["person"]

            location_category = None
            if row["location"]:
                location_category, location = palaces.get_location_category(
                    row["location"], nation)
            if not location_category:
                location_category, location = palaces.get_location_category(
                    row["title"], nation)
            if not location_category:
                location_category, location = palaces.get_location_category(
                    row["description"], nation)

            if person not in places:
                places[person] = {
                    palaces.UNKNOWN: 0,
                    palaces.AT_HOME: 0,
                    palaces.DOMESTIC: 0,
                    palaces.ABROAD: 0
                }
            places[person][location_category] += 1
            outputrow = row
            outputrow["location_category"] = palaces.category_names[
                location_category]
            outputrow["location_name"] = location
            output.append(outputrow)
        print places

    with open('output.csv', 'w') as csvfile:
        fieldnames = [
            "id", "description", "title", "country", "date_end", "date_start",
            "person", "link", "location", "location_category", "location_name"
        ]
        writer = DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(output)
Esempio n. 2
0
def export_report():
    sources_by_slug={}

    new_slug=lambda line:"_".join(_ for _ in [line["acronym"],line["country"],line["dates"],line["edition_date"],line["volume"],line["pages"]] if _ !="")

    with open("../csv_data/sources.csv","r") as f:
        sources=DictReader(f)
        for s in sources:
            ns=new_slug(s)
            s["new_slug"]=ns
            sources_by_slug[ns]=[s] if ns not in sources_by_slug else sources_by_slug[ns]+[s]

        with open("new_slug_colisions.csv","w") as f:
            new_slug_colisions=DictWriter(f,["keep","new_slug"]+sources.fieldnames)
            new_slug_colisions.writeheader()
            for k,dups in sources_by_slug.iteritems():
                if len(dups)>1 :
                    new_slug_colisions.writerows(dups)
Esempio n. 3
0
def main():
    """ Entry point when run from command line
    """
    # pylint: disable=C0103

    cmd_args = [{
        'short': "-i", "long": "--infile",
        'dest': "infile",
        'type': str,
        'help': """CSV file with calendar data""",
        'required': True
    }]
    ui = Interface("BlockePlaces",
                   "Find out where events took place",
                   commandline_args=cmd_args)
    ui.info(ui.args.infile)

    output = []
    with open(ui.args.infile) as infile:
        csv_reader = DictReader(infile)
        places = {}
        for row in csv_reader:
            nation = row["country"]
            person = row["person"]

            location_category = None
            if row["location"]:
                location_category, location = palaces.get_location_category(row["location"], nation)
            if not location_category:
                location_category, location = palaces.get_location_category(row["title"], nation)
            if not location_category:
                location_category, location = palaces.get_location_category(row["description"], nation)

            if person not in places:
                places[person] = {
                    palaces.UNKNOWN: 0,
                    palaces.AT_HOME: 0,
                    palaces.DOMESTIC: 0,
                    palaces.ABROAD: 0
                }
            places[person][location_category] += 1
            outputrow = row
            outputrow["location_category"] = palaces.category_names[location_category]
            outputrow["location_name"] = location
            output.append(outputrow)
        print places

    with open('output.csv', 'w') as csvfile:
        fieldnames = ["id",
                      "description",
                      "title",
                      "country",
                      "date_end",
                      "date_start",
                      "person",
                      "link",
                      "location",
                      "location_category",
                      "location_name"]
        writer = DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(output)
Esempio n. 4
0
def import_correction():
    with open("out_data/new_slug_colisions.csv", "r") as f:
        corrections_file = DictReader(f)
        sort_newslug = lambda e: e["new_slug"]
        corrections = [c for c in corrections_file]
        corrections.sort(key=sort_newslug)

        correction_lines = []
        slugs_to_keep = []
        for new_slug, lines in itertools.groupby(corrections, sort_newslug):
            correction_lines = list(lines)
            # keep
            index_to_keep = [
                i for i, l in enumerate(correction_lines) if l["keep"] == "x"
            ]
            if len(index_to_keep) > 1:
                print "WARNING: multiple keep order for group %s" % new_slug
            elif len(index_to_keep) == 1:
                print "correcting group %s" % new_slug
                slug_to_keep = correction_lines[index_to_keep[0]]["slug"]
                slugs_to_replace = [{
                    "Source": line["slug"],
                    "Target": slug_to_keep
                } for line in correction_lines if line["keep"] != "x"]
                # add correction_lines in patch_sources.csv
                with open("in_data/patchs/patch_sources.csv", "a") as psf:
                    psfw = DictWriter(psf, ["Source", "Target"])
                    psfw.writerows(slugs_to_replace)
                    print "added %s in patch to be removed" % " | ".join(
                        s["Source"] for s in slugs_to_replace)
                slugs_to_keep.append((slug_to_keep, index_to_keep[0]))
            # check if index_to_keep not empty

            # output a SQL script patch
            # by copy_notes lines
            notes_to_copy = [{
                "Source": line["slug"],
                "Target": line["notes"]
            } for line in correction_lines
                             if line["copy_notes_to_rate"] == "x"]
            # add correction_lines in patch_sources.csv
            with open("in_data/patchs/patch_sources_copy.csv", "a") as psfc:
                psfcw = DictWriter(psfc, ["Source", "Target"])
                psfcw.writerows(notes_to_copy)
                print "added %s in patch to be copied" % " | ".join(
                    s["Source"] for s in notes_to_copy)
            ## UPDATE ON flows/echange_rates SET notes=%notes% WHERE source=%slug%
            ## UPDATE ON sourcse SET notes="" WHERE slug=%slug%
            # by remove notes
            notes_to_remove = [{
                "Source": line["slug"],
                "Target": line["notes"]
            } for line in correction_lines
                               if line["remove_notes_from_source"] == "x"]
            # add correction_lines in patch_sources.csv
            with open("in_data/patchs/patch_sources_remove.csv", "a") as psfr:
                psfrw = DictWriter(psfr, ["Source", "Target"])
                psfrw.writerows(notes_to_remove)
                print "added %s in patch to be removed" % " | ".join(
                    s["Source"] for s in notes_to_remove)
    LOGGY.info("Reading: %s" % infile.name)
    d = {}
    for r in DictReader(infile, delimiter="\t"):
        row = {k: v.strip() for k, v in r.items()}
        seriesid = row['series_id']
        areatype = AREA_TYPES_MAP.get(seriesid[3:5])
        valtype = seriesid[-2:]
        if areatype and valtype in ['03', '06']:
            fips = seriesid[5:7] if areatype == 'State' else seriesid[5:10]
            year = row['year']
            month = row['period'][1:]
            key = (fips, year, month)
            if not d.get(key):
                d[key] = {
                    'fips': fips,
                    'area_type': areatype,
                    'year': year,
                    'month': month,
                }
            v = unemp_header if valtype == '03' else 'labor_force'
            d[key][v] = None if row['value'] == '-' else row['value']

    csvout = DictWriter(stdout,
                        fieldnames=[
                            'fips', 'area_type', 'year', 'month', unemp_header,
                            'labor_force'
                        ])
    csvout.writeheader()
    for k, row in sorted(d.items(), key=lambda x: x[0]):
        csvout.writerow(row)
    "line_srvc_cnt": "line_item_service_count",
    "bene_unique_cnt": "beneficiary_unique_count",
    'bene_day_srvc_cnt': "unique_beneficiary_per_day_services_count",
}

if __name__ == '__main__':
    parser = argparse.ArgumentParser("Normalizes payments data")
    parser.add_argument('infile', type=argparse.FileType('r'))
    args = parser.parse_args()
    infile = args.infile
    LOGGY.info("Reading: %s" % infile.name)

    rawheaders = infile.readline().strip().split('\t')
    newheaders = [h.lower() for h in rawheaders]
    for i, h in enumerate(newheaders):
        if CLEANED_PAYMENT_HEADER_MAP.get(h):
            newheaders[i] = CLEANED_PAYMENT_HEADER_MAP[h]

    csvin = DictReader(infile, delimiter='\t', fieldnames=newheaders)
    csvout = DictWriter(stdout, fieldnames=CLEANED_PAYMENT_HEADERS)

    for row in csvin:
        d = {}
        for h in CLEANED_PAYMENT_HEADERS:
            if 'amount' in h:
                d[h] = round(float(row[h]), 2) if row.get(h) else None
            else:
                d[h] = row[h]

        csvout.writerow(d)