예제 #1
0
zip_re = re.compile("\d{5}[^\s]*")

if "300 S. Garnett St Henderson" in data:
    data = data.replace("300 S. Garnett St Henderson",
                        "300 S. Garnett St <br /> Henderson")
else:
    print "This is no longer a useful piece of code. Remove it."
    sys.exit()

county_data = county_data_re.findall(data)

print "County number ", county_data.length

for county in county_data:

    authority_name, first_name, last_name, county_name, town_name, fips, street, city, address_state, zip_code, po_street, po_city, po_state, po_zip_code, reg_authority_name, reg_first, reg_last, reg_street, reg_city, reg_state, reg_zip_code, reg_po_street, reg_po_city, reg_po_state, reg_po_zip_code, reg_phone, reg_fax, reg_email, reg_website, reg_hours, phone, fax, email, website, hours, review = dogcatcher.begin(
        voter_state)

    county_data_item = county_data_item_re.findall(county)
    county_name = county_name_re.findall(county)[0]

    official_name = name_re.findall(county)[0].replace("\r\n", "").lstrip()

    first_name, last_name, review = dogcatcher.split_name(
        official_name, review)

    #NC gives two addresses: a mailing address and a street, each formatted "Streeet <br /> City, State Zip". The mailing address may be identical to thes treet address.
    #This gets the address by running a RE to grab each and split it at the "<br />".
    #It then checks whether the mailing and non-mailing addresses are identical. If not

    po_address = " ".join(county_data_item[1].replace(
        "\r\n", "").split()).partition("<br />")
예제 #2
0
address_re = re.compile("Board of Elections<BR>(.+?\d{5}[-\d]*?)<br>")
csz_re = re.compile("<br>([^<>]+?, [A-Z]{2,2} +?\d{5}[\d-]*)")
city_re = re.compile("(.+?),")
state_re = re.compile(" [A-Z][A-Z] ")
zip_re = re.compile("\d{5}[\d-]*")
po_re = re.compile("(P\.\s*O\..+?)<br>")
comma_re = re.compile("[, ]{2,}")

website_re = re.compile("HREF=\"([^m].+?)\">Visit")
#This reduces the web page grabbed earlier to a simple list of county names. For each county name, we then turn it into a URL, grab an associated county webpage, extract the data, add that data to the Results matrix, and move on to the next county name.

county_names = county_name_re.findall(data)

for item in county_names:

	authority_name, first_name, last_name, county_name, town_name, fips, street, city, address_state, zip_code, po_street, po_city, po_state, po_zip_code, reg_authority_name, reg_first, reg_last, reg_street, reg_city, reg_state, reg_zip_code, reg_po_street, reg_po_city, reg_po_state, reg_po_zip_code, reg_phone, reg_fax, reg_email, reg_website, reg_hours, phone, fax, email, website, hours, review = dogcatcher.begin(voter_state)

	county_name = item

	authority_name = "Board of Elections"

	#We need to define a slightly different version of the county name to be used in the URL; there's only a distinction in two counties, St. Lawrence and New York.

	county_name_use = county_name

	if county_name == "St":
		county_name = "St. Lawrence"
		county_name_use = "St.Lawrence"
	if county_name == "New":
		county_name = "New York"
		county_name_use = "New+York"