po_state = state_re.findall(po_address[2])[0].strip() address_state = state_re.findall(address[2])[0].strip() city = city_re.findall(address[2])[0].strip() zip_code = zip_re.findall(address[2])[0].strip() street = address[0].strip() phone = dogcatcher.find_phone(phone_re, county) fax = dogcatcher.find_phone(fax_re, county) email = dogcatcher.find_emails(email_re, county) hours = hours_re.findall(county)[0].strip(" \r\n") if "PO Box" not in po_street: review = review + "c" fips = dogcatcher.find_fips(county_name, voter_state) if fips == "": print county_name + " has no findable FIPS. It may be a spellling difference." sys.exit() result.append([ authority_name, first_name, last_name, county_name, fips, street, city, address_state, zip_code, po_street, po_city, po_state, po_zip_code, reg_authority_name, reg_first, reg_last, reg_street, reg_city, reg_state, reg_zip_code, reg_po_street, reg_po_city, reg_po_state, reg_po_zip_code, reg_phone, reg_fax, reg_email, reg_website, reg_hours, phone, fax, email, website, hours, voter_state, source, review ]) #This outputs the results to a separate text file.
po_zip_code = zip_re.findall(csz)[0].strip() else: city = city_re.findall(csz)[0].strip() address_state = state_re.findall(csz)[0].strip() zip_code = zip_re.findall(csz)[0].strip() print "_______________________________________________________" website = dogcatcher.find_website(website_re, county) print website if county_name == "Genesee": po_street = "P.O. Box 284" fips = dogcatcher.find_fips(county_name, voter_state) result.append([authority_name, first_name, last_name, county_name, fips, street, city, address_state, zip_code, po_street, po_city, po_state, po_zip_code, reg_authority_name, reg_first, reg_last, reg_street, reg_city, reg_state, reg_zip_code, reg_po_street, reg_po_city, reg_po_state, reg_po_zip_code, reg_phone, reg_fax, reg_email, reg_website, reg_hours, phone, fax, email, website, hours, voter_state, source, review]) #This outputs the results to a separate text file. dogcatcher.output(result, voter_state, cdir)
reg_zip_code = zip_re.findall(other_csz)[0].strip() else: reg_street = address[0] reg_city = city_re.findall(reg_csz)[0].strip(", ") reg_state = state_re.findall(reg_csz)[0].strip() reg_zip_code = zip_re.findall(reg_csz)[0].strip() phone = dogcatcher.find_phone(phone_re, abse) reg_phone = dogcatcher.find_phone(phone_re, reg) #There are two items for different parts of Jefferson County. They contain dashes followed by the part of the county in the county name, so we need to cut out the dashed section so that the FIPs can effectively match them. if "-" in county_name: fips = dogcatcher.find_fips(county_name.partition(" -")[0], voter_state) else: fips = dogcatcher.find_fips(county_name, voter_state) print "__________________________________" result.append([authority_name, first_name, last_name, county_name, fips, street, city, address_state, zip_code, po_street, po_city, po_state, po_zip_code, reg_authority_name, reg_first, reg_last, reg_street, reg_city, reg_state, reg_zip_code, reg_po_street, reg_po_city, reg_po_state, reg_po_zip_code, reg_phone, reg_fax, reg_email, reg_website, reg_hours, phone, fax, email, website, hours, voter_state, source, review])
names = [] cities = [] zips = [] phones = [] emails = [] county_name_address_re = re.compile("ADDRESS\nNAME\nCOUNTY\n(.+?)\n\n", re.DOTALL) digit_re = re.compile("\d") county_name_address = county_name_address_re.findall(data_1)[0].split("\n") for item in county_name_address: if digit_re.findall(item) or "Box" in item: streets.append(item) elif dogcatcher.find_fips(item, voter_state) != None: counties.append(item) else: names.append(item) cities_re = re.compile("CITY\/STATE\/ZIP\n(.+?)\n\n", re.DOTALL) cities_and_zips = cities_re.findall(data_1)[0].split("\n") for item in cities_and_zips: city_zip = item.split("MT") cities.append(city_zip[0].strip()) zips.append(city_zip[1].strip()) phones_re = re.compile("PHONE\n(.+?)\n\n", re.DOTALL) phones = phones_re.findall(data_1)[0].split("\n") fax_email_re = re.compile(".*\d{4}\s+(.*)")
cities = [] zips = [] phones = [] emails = [] county_name_address_re = re.compile("ADDRESS\nNAME\nCOUNTY\n(.+?)\n\n", re.DOTALL) digit_re = re.compile("\d") county_name_address = county_name_address_re.findall(data_1)[0].split("\n") for item in county_name_address: if digit_re.findall(item) or "Box" in item: streets.append(item) elif dogcatcher.find_fips(item, voter_state) != None: counties.append(item) else: names.append(item) cities_re = re.compile("CITY\/STATE\/ZIP\n(.+?)\n\n", re.DOTALL) cities_and_zips = cities_re.findall(data_1)[0].split("\n") for item in cities_and_zips: city_zip = item.split("MT") cities.append(city_zip[0].strip()) zips.append(city_zip[1].strip()) phones_re = re.compile("PHONE\n(.+?)\n\n", re.DOTALL) phones = phones_re.findall(data_1)[0].split("\n") fax_email_re = re.compile(".*\d{4}\s+(.*)")
header_row = ["authority_name", "first_name", "last_name", "town_name", "county_name", "fips", "street", "city", "address_state", "zip_code", "po_street", "po_city", "po_state", "po_zip_code", "reg_authority_name", "reg_first", "reg_last", "reg_street", "reg_city", "reg_state", "reg_zip_code", "reg_po_street", "reg_po_city", "reg_po_state", "reg_po_zip_code", "reg_phone", "reg_fax", "reg_email", "reg_website", "reg_hours", "phone", "fax", "email", "website", "hours", "voter_state", "source", "review", "town_name_full"] cities = [header_row] county_name_re = re.compile(r'(.+?)\sCOUNTY\s') town_name_re = re.compile(r'^(TOWN|CITY|VILLAGE)\sOF\s(.+?)\s-.*', re.DOTALL) # process XLS, separate counties from cities for curr_row in range(1, worksheet.nrows): row = worksheet.row_values(curr_row) county_name = county_name_re.search(row[1]) if county_name: current_county = county_name.group(1) current_fips = dogcatcher.find_fips(current_county, voter_state) else: town_name = town_name_re.search(row[1]) if town_name: cities.append(city_data(current_county, current_fips, town_name.group(2), row)) else: print "skipping: " + row[1] dogcatcher.output(cities, voter_state, base_dir(), "cities")