if po_address == address:
        address_state = state_re.findall(address[2])[0].strip()
        city = city_re.findall(address[2])[0].strip()
        zip_code = zip_re.findall(address[2])[0].strip()
        street = address[0].strip()
    else:
        po_street = po_address[0].strip()
        po_city = city_re.findall(po_address[2])[0].strip()
        po_zip_code = zip_re.findall(po_address[2])[0].strip()
        po_state = state_re.findall(po_address[2])[0].strip()
        address_state = state_re.findall(address[2])[0].strip()
        city = city_re.findall(address[2])[0].strip()
        zip_code = zip_re.findall(address[2])[0].strip()
        street = address[0].strip()

    phone = dogcatcher.find_phone(phone_re, county)
    fax = dogcatcher.find_phone(fax_re, county)

    email = dogcatcher.find_emails(email_re, county)
    hours = hours_re.findall(county)[0].strip(" \r\n")

    if "PO Box" not in po_street:
        review = review + "c"

    fips = dogcatcher.find_fips(county_name, voter_state)

    if fips == "":
        print county_name + " has no findable FIPS. It may be a spellling difference."
        sys.exit()

    result.append([
Beispiel #2
0
		if reg_other:
			other_content = content_re.findall(reg_other)
			other_address = other_content[0].partition("<BR>")
			other_csz = other_address[2]
			reg_street = other_address[0]
			reg_city = city_re.findall(other_csz)[0].strip(", ")
			reg_address_state = state_re.findall(other_csz)[0].strip()
			reg_zip_code = zip_re.findall(other_csz)[0].strip()
	else:
		reg_street = address[0]
		reg_city = city_re.findall(reg_csz)[0].strip(", ")
		reg_state = state_re.findall(reg_csz)[0].strip()
		reg_zip_code = zip_re.findall(reg_csz)[0].strip()


	phone = dogcatcher.find_phone(phone_re, abse)

	reg_phone = dogcatcher.find_phone(phone_re, reg)

	#There are two items for different parts of Jefferson County. They contain dashes followed by the part of the county in the county name, so we need to cut out the dashed section so that the FIPs can effectively match them.

	if "-" in county_name:
		fips = dogcatcher.find_fips(county_name.partition(" -")[0], voter_state)
	else:
		fips = dogcatcher.find_fips(county_name, voter_state)

	print "__________________________________"


	result.append([authority_name, first_name, last_name, county_name, fips,
	street, city, address_state, zip_code,
Beispiel #3
0
	data = urllib.urlopen(county_url).read()
	output = open(file_name,"w")
	output.write(data)
	output.close()

	#This line is usually unnecessary, but is present so the previous lines can be commented out.


	print file_name
	data = open(file_name).read()

	county = data

	#Once we have the data, we start parsing.

	phone = dogcatcher.find_phone(phone_re, county)

	#The Authority name is uniform across counties.

	if county_name == "Westchester":
		fax = "914-995-3190, 914-995-7753"
		review = review + "b"
	else:
		fax = dogcatcher.find_phone(fax_re, county)

	official_name = official_name_re.findall(county)[0]
	if "<br>" in official_name.lower():
		print county
		print official_name
		sys.exit()
Beispiel #4
0
)  #this carries the phones and names from the website in the same order as in the PDF.

for county in county_data:

    authority_name, first_name, last_name, county_name, town_name, fips, street, city, address_state, zip_code, po_street, po_city, po_state, po_zip_code, reg_authority_name, reg_first, reg_last, reg_street, reg_city, reg_state, reg_zip_code, reg_po_street, reg_po_city, reg_po_state, reg_po_zip_code, reg_phone, reg_fax, reg_email, reg_website, reg_hours, phone, fax, email, website, hours, review = dogcatcher.begin(
        voter_state)

    phone_name = phone_name_data[county_data.index(county)]

    official_name = name_re.findall(phone_name)[0]
    first_name, last_name, review = dogcatcher.split_name(
        official_name, review)

    authority_name = "County Clerk"

    phone = dogcatcher.find_phone(phone_re, phone_name)

    print "_________________________________________________"

    #This section finds the full address. After finding the address, it identifies a city/state/zip (csz) combination and a PO Box number if that exists.
    #It removes both the CSZ and the PO Address (if it exists) from the full address, leaving behind a street address with some garbage.
    #It then cleans up the street address and pulls the city, state, and zip out of the csz, and assigns them as appropriate to the street address and state.

    county_name = county_name_re.findall(county)[0].title()

    address = address_re.findall(county)[0]

    csz = csz_re.findall(address)[0]

    try:
        po_street = po_re.findall(address)[0].replace(csz, "").strip(", \n")
#This breaks the complete dataset into a list of strings, each of which is a town.

town_data = town_data_re.findall(data)


for town in town_data:
  authority_name, first_name, last_name, county_name, town_name, fips, street, city, address_state, zip_code, po_street, po_city, po_state, po_zip_code, reg_authority_name, reg_first, reg_last, reg_street, reg_city, reg_state, reg_zip_code, reg_po_street, reg_po_city, reg_po_state, reg_po_zip_code, reg_phone, reg_fax, reg_email, reg_website, reg_hours, phone, fax, email, website, hours, review = dogcatcher.begin(voter_state)

  town_data_item = town_data_item_re.findall(town)

  town_name = town_name_re.findall(town)[0].title().strip()

  #The MA data is highly variable in what it includes per state. So the code makes an attempt at grabbing all of the possible secondary pieces of info available, but recognizes that it may not be able to get everything.

  fax = dogcatcher.find_phone(fax_re, town)
  phone = dogcatcher.find_phone(phone_re, town)
  try:
    email = email_re.findall(town)[0].lower()
  except:
  	email = ""
  try:
    website = dogcatcher.find_website(website_re, town)
  except:
  	website = ""


  if town_name == "Ware":
    town_data_item.insert(0, "TOWN CLERK")

  authority_name = town_data_item[0].title().replace("'S","'s").replace(town_name.upper(),"").strip(", ")
Beispiel #6
0
	if town_name == "Rockwood Strip":
		town_name = town_name.replace(" Strip","")
	if town_name == "Dennistown Plantation" or "Oxbow P" in town_name:
		town_name = town_name.replace(" Plantation","")
	if "Pleasant Point" in town_name:
		town_name = town_name.replace(" Voting District","")

	#This separates the person who handles registrations and the person who handles absentee ballot requests.
	absentee = absentee_re.findall(county)[0]
	registrar = registrar_re.findall(county)[0]

	official_name = name_re.findall(absentee)[0].strip()
	first_name, last_name, review = dogcatcher.split_name(official_name, review)

	phone = dogcatcher.find_phone(phone_re, absentee)

	fax = dogcatcher.find_phone(fax_re, absentee)

	#This section finds athe address. After finding the address, it identifies a city/state/zip (csz) combination and a PO Box number if that exists.
    #It removes both the CSZ and the PO Address (if it exists) from the full address, leaving behind a street address with some garbage.
    #It then cleans up the street address and pulls the city, state, and zip out of the csz, and assigns them as appropriate to the street address and mailing address.

	address = address_re.findall(absentee)[0]

	csz = csz_re.findall(address)[0].strip()

	try:
		po_street = po_re.findall(address)[0]
	except:
		po_street = ""
    reg_street = reg_address.replace(reg_po_street,"").replace(reg_csz,"")
    reg_street = reg_street.replace("\n",", ").replace(" ,",",").strip(" \n/,").title()

    if reg_po_street:
        reg_po_city = city_re.findall(reg_csz)[0].strip().title()
        reg_po_state = state_re.findall(reg_csz)[0].strip()
        reg_po_zip_code = zip_re.findall(reg_csz)[0].strip().title()
    if reg_street:
        reg_city = city_re.findall(reg_csz)[0].strip().title()
        reg_state = state_re.findall(reg_csz)[0].strip()
        reg_zip_code = zip_re.findall(reg_csz)[0].strip().title()



    phone = dogcatcher.find_phone(phone_re, town, areacode = "203")

    if ("(203) 203-") in phone:
        phone = dogcatcher.clean_phone(phone.partition(" ")[2])
        print phone

    email = dogcatcher.find_emails(email_re, town)
    fax = dogcatcher.find_phone(fax_re, town)

    official_name = name_re.findall(town)[0].title()
    first_name, last_name, review = dogcatcher.split_name(official_name, review)

    #This section finds the full address for the town clerk. After finding the address, it identifies a city/state/zip (csz) combination and a PO Box number if that exists.
    #It removes both the CSZ and the PO Address (if it exists) from the full address, leaving behind a street address with some garbage.
    #It then cleans up the street address and pulls the city, state, and zip out of the csz, and assigns them as appropriate to the street address and state.
Beispiel #8
0
county_data = county_data_re.findall(data)

#In each county, there are separate offices for registration and absentee ballots. This separates those offices and then applies essentially identical procedures to both.
for county in county_data:
	authority_name, first_name, last_name, county_name, town_name, fips, street, city, address_state, zip_code, po_street, po_city, po_state, po_zip_code, reg_authority_name, reg_first, reg_last, reg_street, reg_city, reg_state, reg_zip_code, reg_po_street, reg_po_city, reg_po_state, reg_po_zip_code, reg_phone, reg_fax, reg_email, reg_website, reg_hours, phone, fax, email, website, hours, review = dogcatcher.begin(voter_state)


	county_name = county_name_re.findall(county)[0]

	#This isolates the county clerk data from the complete county.
	clerk = clerk_re.findall(county)[0]

	clerk_name = name_re.findall(clerk)[0]
	first_name, last_name, review = dogcatcher.split_name(clerk_name, review)

	phone = dogcatcher.find_phone(phone_re, clerk)

	fax = dogcatcher.find_phone(fax_re, clerk)

	website = dogcatcher.find_website(website_re, clerk)

	hours = " ".join(hours_re.findall(clerk)[0].replace("<br>\n"," ").split())

	#It's hard to get the address without also getting the clerk's name.
	#So we first find the address, remove the clerk's name, and clean up a few html tags.
	#That can leave a mess of commas, so we clean that up.
	#We then extract the City, State, and Zip (CSZ) and check for a PO Box.
	#We then remove the CSZ and PO box from the address to form the street, and check whether it exists.
	#We then check whether there's anything left to be a street address. If there is, we clean it and trim it down to one line.
	#Based on whether there's a street address and a PO Box at this point, we assign the city, state, and zip accordingly.
	offices = county.partition("div  >Voter Registration")
	absentee = offices[0]
	regweb = offices[2].partition("County Website")
	registration = regweb[0]
	county_web = regweb[2]


	website = dogcatcher.find_website(website_re, county_web)
	reg_website = website


	email = dogcatcher.find_emails(email_re, absentee)
	reg_email = dogcatcher.find_emails(email_re, registration)

	phone = dogcatcher.find_phone(phone_re, absentee)
	reg_phone = dogcatcher.find_phone(phone_re, registration)


	# print absentee
	# print registration
	print name_re.findall(absentee)
	absentee_official = name_re.findall(absentee)[0].replace("</div>","") #In one county, the regular expression used yields </div> as a response. The other easy fix creates more problems, so we just remove the </div>.
	first_name, last_name, review = dogcatcher.split_name(absentee_official, review)

	if absentee_official:
		authority_name = direct_re.findall(absentee)[2].replace(county_name + " County","").replace(county_name + " Co","").replace(county_name,"").replace(" . "," ").strip(", .")
	else:
		authority_name = direct_re.findall(absentee)[1].replace(county_name + " County","").replace(county_name + " Co","").replace(county_name,"").replace(" . "," ").strip(", .")

Beispiel #10
0
	first_name, last_name, authority_name, review = dogcatcher.make_name(name_line, ",", review)

	#Some of the authority names break in mid-line; this cleans them.

	for item in space_re.findall(authority_name):
		authority_name = authority_name.replace(item," ")

	town_name = town_name_re.findall(town)[0]


	hours = hours_re.findall(town)[0]
	hours = " ".join(hours.replace("\r\n","").replace("<br />"," ").split())

	email = dogcatcher.find_emails(email_re, town)

	phone = dogcatcher.find_phone(phone_re, town)
	fax = dogcatcher.find_phone(fax_re, town)

    #This section finds athe address. After finding the address, it identifies a city/state/zip (csz) combination and a PO Box number if that exists.
    #It removes both the CSZ and the PO Address (if it exists) from the full address, leaving behind a street address with some garbage.
    #It then cleans up the street address and pulls the city, state, and zip out of the csz, and assigns them as appropriate to the street address and state.

	address = address_re.findall(town)[0]

	csz = csz_re.findall(address)[0]

	if po_re.findall(address):
		po_street = " ".join(po_re.findall(address)[0].replace("<br />","").strip(", ").split())
	else:
		po_street = ""
Beispiel #11
0
		print county_name_re.findall(county)[0], county_name_re.findall(reg_county)[0]
		print "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%"
		sys.exit()

	authority_name = county_data_item[0]
	reg_authority_name = reg_county_data_item[0]

	county_name = county_name_re.findall(county)[0].title().replace(" County","")

	official_name = county_data_item[1]
	first_name, last_name, review = dogcatcher.split_name(official_name, review)

	reg_official_name = reg_county_data_item[1]
	reg_first, reg_last, review = dogcatcher.split_name(reg_official_name, review)

	fax = dogcatcher.find_phone(fax_re, county)

	reg_fax = dogcatcher.find_phone(reg_fax_re, reg_county)

	phone = dogcatcher.find_phone(phone_re, county)

	reg_phone = dogcatcher.find_phone(reg_phone_re, reg_county)

	print "_____________________________________"

	#This section finds the address for the absentee official.
	#These are all comma separated, so don't need to rely on the maps API.
	#Todo: document.


	address = county_data_item[2].replace("\r\n", " ").strip()
Beispiel #12
0
    print reg_po_state

    county_name = county_names[county_data.index(county)]

    print "___________________________________"
    print county
    print "+++++++++++++++++++++++++++++++++++"

    name = name_re.findall(county)[0].strip()
    first_name, last_name, authority_name, review = dogcatcher.make_name(name, ", ", review)


    email = h.unescape(dogcatcher.find_emails(email_re, county))
    website = dogcatcher.find_website(website_re, county)
    phone = dogcatcher.find_phone(phone_re, county)
    fax = dogcatcher.find_phone(fax_re, county, area_re.findall(phone)[0]) #The fax #s don't come with area codes.


    #We know that there are either one or two address-shaped things in any given county.
    #So we first find all of the addresses, and then proceed based on whether there's one or two.

    addresses = address_re.findall(county)
    if len(addresses)==1:

        #This section finds the full address. After finding the address, it identifies a city/state/zip (csz) combination and a PO Box number if that exists.
        #It removes both the CSZ and the PO Address (if it exists) from the full address, leaving behind a street address with some garbage.
        #It then cleans up the street address and pulls the city, state, and zip out of the csz, and assigns them as appropriate to the street address and state.

        address = addresses[0]
        csz = csz_re.findall(address)[0]
Beispiel #13
0
    if town_name == "Rockwood Strip":
        town_name = town_name.replace(" Strip", "")
    if town_name == "Dennistown Plantation" or "Oxbow P" in town_name:
        town_name = town_name.replace(" Plantation", "")
    if "Pleasant Point" in town_name:
        town_name = town_name.replace(" Voting District", "")

    #This separates the person who handles registrations and the person who handles absentee ballot requests.
    absentee = absentee_re.findall(county)[0]
    registrar = registrar_re.findall(county)[0]

    official_name = name_re.findall(absentee)[0].strip()
    first_name, last_name, review = dogcatcher.split_name(
        official_name, review)

    phone = dogcatcher.find_phone(phone_re, absentee)

    fax = dogcatcher.find_phone(fax_re, absentee)

    #This section finds athe address. After finding the address, it identifies a city/state/zip (csz) combination and a PO Box number if that exists.
    #It removes both the CSZ and the PO Address (if it exists) from the full address, leaving behind a street address with some garbage.
    #It then cleans up the street address and pulls the city, state, and zip out of the csz, and assigns them as appropriate to the street address and mailing address.

    address = address_re.findall(absentee)[0]

    csz = csz_re.findall(address)[0].strip()

    try:
        po_street = po_re.findall(address)[0]
    except:
        po_street = ""
Beispiel #14
0
phone_name_data = county_2_re.findall(htmldata) #this carries the phones and names from the website in the same order as in the PDF.

for county in county_data:

    authority_name, first_name, last_name, county_name, town_name, fips, street, city, address_state, zip_code, po_street, po_city, po_state, po_zip_code, reg_authority_name, reg_first, reg_last, reg_street, reg_city, reg_state, reg_zip_code, reg_po_street, reg_po_city, reg_po_state, reg_po_zip_code, reg_phone, reg_fax, reg_email, reg_website, reg_hours, phone, fax, email, website, hours, review = dogcatcher.begin(voter_state)

    phone_name = phone_name_data[county_data.index(county)]

    official_name = name_re.findall(phone_name)[0]
    first_name, last_name, review = dogcatcher.split_name(official_name, review)

    authority_name = "County Clerk"


    phone = dogcatcher.find_phone(phone_re, phone_name)


    print "_________________________________________________"
   
    #This section finds the full address. After finding the address, it identifies a city/state/zip (csz) combination and a PO Box number if that exists.
    #It removes both the CSZ and the PO Address (if it exists) from the full address, leaving behind a street address with some garbage.
    #It then cleans up the street address and pulls the city, state, and zip out of the csz, and assigns them as appropriate to the street address and state.

    county_name = county_name_re.findall(county)[0].title()

    address = address_re.findall(county)[0]

    csz = csz_re.findall(address)[0]

    try: