Example #1
0
def prep(filename):
    prepstart = datetime.datetime.now()
    slug = filename[:3]
    rawfile = "%s/%s" % (rawbase, filename)
    tempfile = "%s/%s_temp.csv" % (temp, slug)
    prepfile = "%s/%s_prep.csv" % (prepbase, slug)
    loadfile = "%s/%s.csv" % (loadbase, slug)
    call("cp %s %s" % (rawheader, tempfile), shell=True)
    call('cat %s/%s >> %s' % (rawbase, filename, tempfile), shell=True)
    call("csvcut -t -c %s %s > %s" % (columns, tempfile, prepfile), shell=True)
    with open(prepfile, 'r') as f:
        biglist = []
        reader = cdr(f)
        header = reader.fieldnames
        for row in reader:
            if row['race'] in raced.keys():
                RACE = raced[row['race']]
            else:
                RACE = ''
            if row['party'] in partyd.keys():
                PARTY = partyd[row['party']] 
            else:
                PARTY = ''
            biglist.append([
                    row['lname'].strip(), 
                    row['fname'].strip(), 
                    row['mname'].strip(), 
                    row['suffix'].strip(), 
                    ' '.join(row['addr1'].split()),#strips extra interior white space
                    row['addr2'].strip(), 
                    row['city'].strip(), 
                    row['zip'].strip(), 
                    row['gender'].strip(), 
                    RACE, 
                    row['birthdate'].strip(), 
                    PARTY, 
                    row['areacode'].strip(), 
                    row['phone'].strip(),
                    row['email'].strip(),
                    row['voter_ID'].strip()
                    ])
        biglist=sorted(biglist)#sorts list of lists based on first field, which is last name
        with open(loadfile, 'w') as lf:
            writer = ckw(lf)
            writer.writerow(header)
            for entry in biglist:
                writer.writerow(entry)
    print "%s ready for loading; prepping took %s" % (loadfile, (datetime.datetime.now()-prepstart))
Example #2
0
def export_spreadsheet(year):
    """traverse pages and assemble all school ids and names"""
    starter = datetime.datetime.now()
    fields = build_string(year)
    headings = fields.replace(".", "_").split(",")
    collector = {}
    url = "%s?api_key=%s&per_page=%s&fields=%s" % (schools_root, api_key, page_max, fields)
    data = json.loads(requests.get(url).text)
    #  initial pass
    if "results" not in data:
        print "no results"
        return data
    for school in data["results"]:
        collector[school["id"]] = {key: "" for key in headings}
        for key, value in school.iteritems():
            collector[school["id"]][key.replace(".", "_")] = value
    next_page = data["metadata"]["page"] + 1
    more_data = True
    #  harvest rest of pages
    while more_data:
        print "getting page %s" % next_page
        next_url = "%s&page=%s" % (url, next_page)
        nextdata = json.loads(requests.get(next_url).text)
        if len(nextdata["results"]) == 0:
            more_data = False
            print "no more pages; exporting ..."
        else:
            for school in nextdata["results"]:
                collector[school["id"]] = {key: "" for key in headings}
                for key, value in school.iteritems():
                    collector[school["id"]][key.replace(".", "_")] = value
            next_page = nextdata["metadata"]["page"] + 1
    with open("schools_%s.csv" % year, "w") as f:
        writer = ckw(f)
        writer.writerow(headings)
        for school_id in collector:
            writer.writerow([collector[school_id][field] for field in headings])
    print "export_spreadsheet took %s to process schools\
    for the year %s" % (
        (datetime.datetime.now() - starter),
        year,
    )
    return data