#The Authority name is uniform across counties. if county_name == "Westchester": fax = "914-995-3190, 914-995-7753" review = review + "b" else: fax = dogcatcher.find_phone(fax_re, county) official_name = official_name_re.findall(county)[0] if "<br>" in official_name.lower(): print county print official_name sys.exit() first_name, last_name, review = dogcatcher.split_name(official_name, review) #This section finds the full address. After finding the address, it identifies a city/state/zip (csz) combination and a PO Box number if that exists. #It removes both the CSZ and the PO Address (if it exists) from the full address, leaving behind a street address with some garbage. #It then cleans up the street address and pulls the city, state, and zip out of the csz, and assigns them as appropriate to the street address and state. address = address_re.findall(county)[0] print address csz = csz_re.findall(address)[0] if po_re.findall(address): po_street = " ".join(po_re.findall(address)[0].replace("<br>","").strip(", ").split()) street = address.replace(po_street,"").replace(csz,"").replace("<br>",", ").replace("<BR>",", ").replace("<Br>",", ")
county_data = county_data_re.findall(data) print "County number ", county_data.length for county in county_data: authority_name, first_name, last_name, county_name, town_name, fips, street, city, address_state, zip_code, po_street, po_city, po_state, po_zip_code, reg_authority_name, reg_first, reg_last, reg_street, reg_city, reg_state, reg_zip_code, reg_po_street, reg_po_city, reg_po_state, reg_po_zip_code, reg_phone, reg_fax, reg_email, reg_website, reg_hours, phone, fax, email, website, hours, review = dogcatcher.begin( voter_state) county_data_item = county_data_item_re.findall(county) county_name = county_name_re.findall(county)[0] official_name = name_re.findall(county)[0].replace("\r\n", "").lstrip() first_name, last_name, review = dogcatcher.split_name( official_name, review) #NC gives two addresses: a mailing address and a street, each formatted "Streeet <br /> City, State Zip". The mailing address may be identical to thes treet address. #This gets the address by running a RE to grab each and split it at the "<br />". #It then checks whether the mailing and non-mailing addresses are identical. If not po_address = " ".join(county_data_item[1].replace( "\r\n", "").split()).partition("<br />") address = " ".join(county_data_item[2].replace( "\r\n", "").split()).partition("<br />") print "__________________________________________________" if po_address == address: address_state = state_re.findall(address[2])[0].strip() city = city_re.findall(address[2])[0].strip() zip_code = zip_re.findall(address[2])[0].strip()
authority_name, first_name, last_name, county_name, town_name, fips, street, city, address_state, zip_code, po_street, po_city, po_state, po_zip_code, reg_authority_name, reg_first, reg_last, reg_street, reg_city, reg_state, reg_zip_code, reg_po_street, reg_po_city, reg_po_state, reg_po_zip_code, reg_phone, reg_fax, reg_email, reg_website, reg_hours, phone, fax, email, website, hours, review = dogcatcher.begin(voter_state) county_name = county_names[county_list.index(county)].title().replace(" County","") authority_name = "Voter Registration Office" print "______________________________________________________________________\n" + county_name try: name = name_re.findall(county)[0] except: name = "" first_name, last_name, review = dogcatcher.split_name(name, review) mailing_address = mailing_address_re.findall(county)[0] try: po_street = po_re.findall(mailing_address)[0].strip() except: po_street = "" street = "" for item in street_re.findall(mailing_address): street = (street + ", " + item).strip(", ") street = street.replace(po_street, "").replace(", , ",", ").strip(", ")
website = dogcatcher.find_website(website_re, county_web) reg_website = website email = dogcatcher.find_emails(email_re, absentee) reg_email = dogcatcher.find_emails(email_re, registration) phone = dogcatcher.find_phone(phone_re, absentee) reg_phone = dogcatcher.find_phone(phone_re, registration) # print absentee # print registration print name_re.findall(absentee) absentee_official = name_re.findall(absentee)[0].replace("</div>","") #In one county, the regular expression used yields </div> as a response. The other easy fix creates more problems, so we just remove the </div>. first_name, last_name, review = dogcatcher.split_name(absentee_official, review) if absentee_official: authority_name = direct_re.findall(absentee)[2].replace(county_name + " County","").replace(county_name + " Co","").replace(county_name,"").replace(" . "," ").strip(", .") else: authority_name = direct_re.findall(absentee)[1].replace(county_name + " County","").replace(county_name + " Co","").replace(county_name,"").replace(" . "," ").strip(", .") reg_official = name_re.findall(registration)[0].replace("</div>","") #In one county, the regular expression used yields </div> as a response. The other easy fix creates more problems, so we just remove the </div>. reg_first, reg_last, review = dogcatcher.split_name(reg_official, review) print ["0", reg_official] print ["1", absentee] print ["2", regweb] if reg_official and direct_re.findall(registration):
data = dogcatcher.po_standardize(data) county_data = county_data_re.findall(data) #In each county, there are separate offices for registration and absentee ballots. This separates those offices and then applies essentially identical procedures to both. for county in county_data: authority_name, first_name, last_name, county_name, town_name, fips, street, city, address_state, zip_code, po_street, po_city, po_state, po_zip_code, reg_authority_name, reg_first, reg_last, reg_street, reg_city, reg_state, reg_zip_code, reg_po_street, reg_po_city, reg_po_state, reg_po_zip_code, reg_phone, reg_fax, reg_email, reg_website, reg_hours, phone, fax, email, website, hours, review = dogcatcher.begin(voter_state) county_name = county_name_re.findall(county)[0] #This isolates the county clerk data from the complete county. clerk = clerk_re.findall(county)[0] clerk_name = name_re.findall(clerk)[0] first_name, last_name, review = dogcatcher.split_name(clerk_name, review) phone = dogcatcher.find_phone(phone_re, clerk) fax = dogcatcher.find_phone(fax_re, clerk) website = dogcatcher.find_website(website_re, clerk) hours = " ".join(hours_re.findall(clerk)[0].replace("<br>\n"," ").split()) #It's hard to get the address without also getting the clerk's name. #So we first find the address, remove the clerk's name, and clean up a few html tags. #That can leave a mess of commas, so we clean that up. #We then extract the City, State, and Zip (CSZ) and check for a PO Box. #We then remove the CSZ and PO box from the address to form the street, and check whether it exists. #We then check whether there's anything left to be a street address. If there is, we clean it and trim it down to one line.
authority_name, first_name, last_name, county_name, town_name, fips, street, city, address_state, zip_code, po_street, po_city, po_state, po_zip_code, reg_authority_name, reg_first, reg_last, reg_street, reg_city, reg_state, reg_zip_code, reg_po_street, reg_po_city, reg_po_state, reg_po_zip_code, reg_phone, reg_fax, reg_email, reg_website, reg_hours, phone, fax, email, website, hours, review = dogcatcher.begin( voter_state) county_name = county_names[county_list.index(county)].title().replace( " County", "") authority_name = "Voter Registration Office" print "______________________________________________________________________\n" + county_name try: name = name_re.findall(county)[0] except: name = "" first_name, last_name, review = dogcatcher.split_name(name, review) mailing_address = mailing_address_re.findall(county)[0] try: po_street = po_re.findall(mailing_address)[0].strip() except: po_street = "" street = "" for item in street_re.findall(mailing_address): street = (street + ", " + item).strip(", ") street = street.replace(po_street, "").replace(", , ", ", ").strip(", ")
if county_name_re.findall(county)[0].strip() != county_name_re.findall(reg_county)[0].strip(): print "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" print "The lists don't match. Breaking the code." print county, reg_county print county_name_re.findall(county)[0], county_name_re.findall(reg_county)[0] print "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" sys.exit() authority_name = county_data_item[0] reg_authority_name = reg_county_data_item[0] county_name = county_name_re.findall(county)[0].title().replace(" County","") official_name = county_data_item[1] first_name, last_name, review = dogcatcher.split_name(official_name, review) reg_official_name = reg_county_data_item[1] reg_first, reg_last, review = dogcatcher.split_name(reg_official_name, review) fax = dogcatcher.find_phone(fax_re, county) reg_fax = dogcatcher.find_phone(reg_fax_re, reg_county) phone = dogcatcher.find_phone(phone_re, county) reg_phone = dogcatcher.find_phone(reg_phone_re, reg_county) print "_____________________________________" #This section finds the address for the absentee official.