zip_code = zip_re.findall(address[2])[0].strip() street = address[0].strip() else: po_street = po_address[0].strip() po_city = city_re.findall(po_address[2])[0].strip() po_zip_code = zip_re.findall(po_address[2])[0].strip() po_state = state_re.findall(po_address[2])[0].strip() address_state = state_re.findall(address[2])[0].strip() city = city_re.findall(address[2])[0].strip() zip_code = zip_re.findall(address[2])[0].strip() street = address[0].strip() phone = dogcatcher.find_phone(phone_re, county) fax = dogcatcher.find_phone(fax_re, county) email = dogcatcher.find_emails(email_re, county) hours = hours_re.findall(county)[0].strip(" \r\n") if "PO Box" not in po_street: review = review + "c" fips = dogcatcher.find_fips(county_name, voter_state) if fips == "": print county_name + " has no findable FIPS. It may be a spellling difference." sys.exit() result.append([ authority_name, first_name, last_name, county_name, fips, street, city, address_state, zip_code, po_street, po_city, po_state, po_zip_code, reg_authority_name, reg_first, reg_last, reg_street, reg_city,
street = street.replace(po_street, "").replace(", , ",", ").strip(", ") city = city_re.findall(address)[0].strip() address_state = state_re.findall(address)[0].strip() zip_code = zip_re.findall(address)[0].strip() print street + ", " + city + ", " + address_state + " " + zip_code print po_street + ", " + po_city + ", " + po_state + " " + po_zip_code phone = dogcatcher.find_phone(phone_re, county) fax = dogcatcher.find_phone(fax_re,county) email = dogcatcher.find_emails(email_re, county) try: if county_name == "Staunton City": website = "http://www.staunton.va.us/directory/departments-h-z/registrar/how-to-vote" else: website = dogcatcher.website_find(website_re, county) except: website = "" fips = dogcatcher.find_fips(county_name, voter_state) county_result.append([authority_name, first_name, last_name, county_name, fips, street, city, address_state, zip_code, po_street, po_city, po_state, po_zip_code,
#There's only a single physical or mailing address, and all addresses are only two lines. State is not included. #Address_re finds the address. We then check whether it's a physical or mailing address, and assign the other variables accordingly. #Since the city and zip code are explicitly set out in the data (as City: Foovile \n Zip: 11111), this gets both directly from the county data, instead of extracting it from the complete address. address = address_re.findall(county)[0].strip() if "PO " in address: po_street = address po_city = city_re.findall(county)[0].strip() po_zip_code = zip_re.findall(county)[0].strip() else: street = address city = city_re.findall(county)[0].strip() zip_code = zip_re.findall(county)[0].strip() email = dogcatcher.find_emails(email_re, county).replace("%20", "") phone = dogcatcher.find_phone(phone_re, county) fax = dogcatcher.find_phone(fax_re, county) fips = dogcatcher.find_fips(county_name, voter_state) result.append([ authority_name, first_name, last_name, county_name, fips, street, city, address_state, zip_code, po_street, po_city, po_state, po_zip_code, reg_authority_name, reg_first, reg_last, reg_street, reg_city, reg_state, reg_zip_code, reg_po_street, reg_po_city, reg_po_state, reg_po_zip_code, reg_phone, reg_fax, reg_email, reg_website, reg_hours, phone, fax, email, website, hours, voter_state, source, review ])
reg_po_state = state_re.findall(reg_csz)[0].strip() reg_po_zip_code = zip_re.findall(reg_csz)[0].strip().title() if reg_street: reg_city = city_re.findall(reg_csz)[0].strip().title() reg_state = state_re.findall(reg_csz)[0].strip() reg_zip_code = zip_re.findall(reg_csz)[0].strip().title() phone = dogcatcher.find_phone(phone_re, town, areacode = "203") if ("(203) 203-") in phone: phone = dogcatcher.clean_phone(phone.partition(" ")[2]) print phone email = dogcatcher.find_emails(email_re, town) fax = dogcatcher.find_phone(fax_re, town) official_name = name_re.findall(town)[0].title() first_name, last_name, review = dogcatcher.split_name(official_name, review) #This section finds the full address for the town clerk. After finding the address, it identifies a city/state/zip (csz) combination and a PO Box number if that exists. #It removes both the CSZ and the PO Address (if it exists) from the full address, leaving behind a street address with some garbage. #It then cleans up the street address and pulls the city, state, and zip out of the csz, and assigns them as appropriate to the street address and state. address = abs_address_re.findall(town)[0] csz = csz_re.findall(address)[0] if not address.replace(csz,""):
#They split in a neat place, so we're able to use partition instead of a regular expression. #Another part of the content is the county website, so we split that off as well. #This yields three pieces of data: absentee (absentee voting official), registration (registration official), and county_web (website official). offices = county.partition("div >Voter Registration") absentee = offices[0] regweb = offices[2].partition("County Website") registration = regweb[0] county_web = regweb[2] website = dogcatcher.find_website(website_re, county_web) reg_website = website email = dogcatcher.find_emails(email_re, absentee) reg_email = dogcatcher.find_emails(email_re, registration) phone = dogcatcher.find_phone(phone_re, absentee) reg_phone = dogcatcher.find_phone(phone_re, registration) # print absentee # print registration print name_re.findall(absentee) absentee_official = name_re.findall(absentee)[0].replace("</div>","") #In one county, the regular expression used yields </div> as a response. The other easy fix creates more problems, so we just remove the </div>. first_name, last_name, review = dogcatcher.split_name(absentee_official, review) if absentee_official: authority_name = direct_re.findall(absentee)[2].replace(county_name + " County","").replace(county_name + " Co","").replace(county_name,"").replace(" . "," ").strip(", .") else:
try: po_street = po_re.findall(jurisdiction)[0].replace("v: ","") except: po_street = "" if not po_street: street = mailing_address.replace("v: ","") elif digit_re.findall(mailing_address.replace(po_street,"")): street = mailing_address.replace(po_street,"") else: po_street = mailing_address street = street.rstrip(",") email = dogcatcher.find_emails(email_re, jurisdiction) phone = dogcatcher.phone_find(phone_re, jurisdiction) fax = dogcatcher.phone_find(fax_re, jurisdiction) try: city = city_re.findall(jurisdiction)[0].replace("v: ","") except: city = "" zip_code_all = zip_code_re.findall(jurisdiction) if len(zip_code_all) == 2: zip_code = zip_code_all[1].replace("v: ","") elif len(zip_code_all) == 1: zip_code = zip_code_all[0].replace("v: ","") else: zip_code = ""
authority_name, first_name, last_name, county_name, town_name, fips, street, city, address_state, zip_code, po_street, po_city, po_state, po_zip_code, reg_authority_name, reg_first, reg_last, reg_street, reg_city, reg_state, reg_zip_code, reg_po_street, reg_po_city, reg_po_state, reg_po_zip_code, reg_phone, reg_fax, reg_email, reg_website, reg_hours, phone, fax, email, website, hours, review = dogcatcher.begin(voter_state) print reg_po_state county_name = county_names[county_data.index(county)] print "___________________________________" print county print "+++++++++++++++++++++++++++++++++++" name = name_re.findall(county)[0].strip() first_name, last_name, authority_name, review = dogcatcher.make_name(name, ", ", review) email = h.unescape(dogcatcher.find_emails(email_re, county)) website = dogcatcher.find_website(website_re, county) phone = dogcatcher.find_phone(phone_re, county) fax = dogcatcher.find_phone(fax_re, county, area_re.findall(phone)[0]) #The fax #s don't come with area codes. #We know that there are either one or two address-shaped things in any given county. #So we first find all of the addresses, and then proceed based on whether there's one or two. addresses = address_re.findall(county) if len(addresses)==1: #This section finds the full address. After finding the address, it identifies a city/state/zip (csz) combination and a PO Box number if that exists. #It removes both the CSZ and the PO Address (if it exists) from the full address, leaving behind a street address with some garbage. #It then cleans up the street address and pulls the city, state, and zip out of the csz, and assigns them as appropriate to the street address and state.
# There's only a single physical or mailing address, and all addresses are only two lines. State is not included. # Address_re finds the address. We then check whether it's a physical or mailing address, and assign the other variables accordingly. # Since the city and zip code are explicitly set out in the data (as City: Foovile \n Zip: 11111), this gets both directly from the county data, instead of extracting it from the complete address. address = address_re.findall(county)[0].strip() if "PO " in address: po_street = address po_city = city_re.findall(county)[0].strip() po_zip_code = zip_re.findall(county)[0].strip() else: street = address city = city_re.findall(county)[0].strip() zip_code = zip_re.findall(county)[0].strip() email = dogcatcher.find_emails(email_re, county).replace("%20", "") phone = dogcatcher.find_phone(phone_re, county) fax = dogcatcher.find_phone(fax_re, county) fips = dogcatcher.find_fips(county_name, voter_state) result.append( [ authority_name, first_name, last_name, county_name, fips, street,