def _scrape_upper(self, roster_page, term): member_urls = roster_page.xpath('(//table[caption])[1]//a/@href') # Sort by district for easier spotting of omissions: member_urls.sort(key=lambda url: int(re.search( r'\d+(?=\.htm)', url).group())) for member_url in member_urls: self._scrape_senator(member_url, term) # Handle Lt. Governor (President of the Senate) separately: url = 'http://www.senate.state.tx.us/75r/LtGov/Ltgov.htm' page = lxml.html.fromstring(self.get(url).text) name = page.xpath('//div[@class="memtitle"]/text()')[0] \ .replace('Lieutenant Governor', '').strip() # A safe assumption for lack of information on official member page or # party listings: party = 'Republican' lt_governor = Person(name) lt_governor.add_role('Lt. Governor', term, party=party) lt_governor.add_source(url) self.save_legislator(lt_governor)
def _scrape_upper(self, roster_page, term): member_urls = roster_page.xpath('(//table[caption])[1]//a/@href') # Sort by district for easier spotting of omissions: member_urls.sort( key=lambda url: int(re.search(r'\d+(?=\.htm)', url).group())) for member_url in member_urls: self._scrape_senator(member_url, term) # Handle Lt. Governor (President of the Senate) separately: url = 'http://www.senate.state.tx.us/75r/LtGov/Ltgov.htm' page = lxml.html.fromstring(self.get(url).text) name = page.xpath('//div[@class="memtitle"]/text()')[0] \ .replace('Lieutenant Governor', '').strip() # A safe assumption for lack of information on official member page or # party listings: party = 'Republican' lt_governor = Person(name) lt_governor.add_role('Lt. Governor', term, party=party) lt_governor.add_source(url) self.save_legislator(lt_governor)
def scrape_member(self, chamber, term, member_url): with self.urlopen(member_url) as page: root = lxml.html.fromstring(page) root.make_links_absolute(member_url) sdiv = root.xpath('//div[@class="subtitle"]')[0] table = sdiv.getnext() photo_url = table.xpath('//img[@id="ctl00_ContentPlaceHolder1' '_imgMember"]')[0].attrib['src'] td = table.xpath('//td[@valign="top"]')[0] type = td.xpath('string(//div[1]/strong)').strip() full_name = td.xpath('string(//div[2]/strong)').strip() full_name = re.sub(r'\s+', ' ', full_name) district = td.xpath('string(//div[3])').strip() district = district.replace('District ', '') addrs = {} for atype, text in (('capital_address', 'Capitol address:'), ('district_address', 'District address:')): aspan = root.xpath("//span[. = '%s']" % text) addrs[atype] = None if aspan: addrs[atype] = aspan[0].tail elem = aspan[0].getnext() while elem is not None and elem.tag == 'br': if elem.tail: addrs[atype] += "\n" + elem.tail elem = elem.getnext() party = td.xpath('string(//div[4])').strip()[0] if party == 'D': party = 'Democratic' elif party == 'R': party = 'Republican' if type == 'Lt. Gov.': leg = Person(full_name) leg.add_role('Lt. Governor', term, party=party, **addrs) else: leg = Legislator(term, chamber, district, full_name, party=party, photo_url=photo_url, **addrs) leg.add_source(urlescape(member_url)) comm_div = root.xpath('//div[string() = "Committee Membership:"]' '/following-sibling::div' '[@class="rcwcontent"]')[0] for link in comm_div.xpath('*/a'): name = link.text if '(Vice Chair)' in name: mtype = 'vice chair' elif '(Chair)' in name: mtype = 'chair' else: mtype = 'member' name = clean_committee_name(link.text) # There's no easy way to determine whether a committee # is joint or not using the mobile legislator directory # (without grabbing a whole bunch of pages, at least) # so for now we will hard-code the one broken case if (name == "Oversight of HHS Eligibility System" and term == '82'): comm_chamber = 'joint' else: comm_chamber = chamber if name.startswith('Appropriations-S/C on '): sub = name.replace('Appropriations-S/C on ', '') leg.add_role('committee member', term, chamber=comm_chamber, committee='Appropriations', subcommittee=sub, position=mtype) else: leg.add_role('committee member', term, chamber=comm_chamber, committee=name, position=mtype) if type == 'Lt. Gov.': self.save_person(leg) else: if district: self.save_legislator(leg)
def scrape_member(self, chamber, term, member_url): page = self.urlopen(member_url) root = lxml.html.fromstring(page) root.make_links_absolute(member_url) sdiv = root.xpath('//div[@class="subtitle"]')[0] table = sdiv.getnext() photo_url = table.xpath('//img[@id="ctl00_ContentPlaceHolder1' '_imgMember"]')[0].attrib['src'] td = table.xpath('//td[@valign="top"]')[0] type = td.xpath('string(//div[1]/strong)').strip() full_name = td.xpath('string(//div[2]/strong)').strip() full_name = re.sub(r'\s+', ' ', full_name) district = td.xpath('string(//div[3])').strip() district = district.replace('District ', '') party = td.xpath('string(//div[4])').strip()[0] if party == 'D': party = 'Democratic' elif party == 'R': party = 'Republican' if type == 'Lt. Gov.': leg = Person(full_name) leg.add_role('Lt. Governor', term, party=party) else: leg = Legislator(term, chamber, district, full_name, party=party, photo_url=photo_url, url=member_url) leg.add_source(urlescape(member_url)) # add addresses for atype, text in (('capitol', 'Capitol address'), ('district', 'District address')): aspan = root.xpath("//span[. = '%s:']" % text) addr = '' phone = None if aspan: # cycle through brs addr = aspan[0].tail.strip() elem = aspan[0].getnext() while elem is not None and elem.tag == 'br': if elem.tail: if not phone_re.match(elem.tail): addr += "\n" + elem.tail else: phone = elem.tail elem = elem.getnext() # now add the addresses leg.add_office(atype, text, address=addr, phone=phone) # add committees comm_div = root.xpath('//div[string() = "Committee Membership:"]' '/following-sibling::div' '[@class="rcwcontent"]')[0] for link in comm_div.xpath('*/a'): name = link.text if '(Vice Chair)' in name: mtype = 'vice chair' elif '(Chair)' in name: mtype = 'chair' else: mtype = 'member' name = clean_committee_name(link.text) # There's no easy way to determine whether a committee # is joint or not using the mobile legislator directory # (without grabbing a whole bunch of pages, at least) # so for now we will hard-code the one broken case if (name == "Oversight of HHS Eligibility System" and term == '82'): comm_chamber = 'joint' else: comm_chamber = chamber if name.startswith('Appropriations-S/C on '): sub = name.replace('Appropriations-S/C on ', '') leg.add_role('committee member', term, chamber=comm_chamber, committee='Appropriations', subcommittee=sub, position=mtype) else: leg.add_role('committee member', term, chamber=comm_chamber, committee=name, position=mtype) if type == 'Lt. Gov.': self.save_object(leg) else: if district: self.save_legislator(leg)
def scrape(self, term, chambers): # The mayor doesn't sit on council. url = 'http://www.phila.gov/' doc = lxml.html.fromstring(self.urlopen(url)) doc.make_links_absolute(url) # The mayor's name doesn't appear on the mayor's page! full_name = re.search('Mayor (.+)', doc.xpath('//title/text()')[0].strip()).group(1) first_name, middle_name, last_name = parse_full_name(full_name) mayor = Person(full_name, first_name, last_name, middle_name) mayor.add_source(url) url = 'http://www.phila.gov/mayor/' doc = lxml.html.fromstring(self.urlopen(url)) doc.make_links_absolute(url) lines = map(clean_string, doc.xpath('//div[contains(text(),"Mailing Address")]/following-sibling::text()')[1:]) address = '\n'.join(lines) phone = '-'.join(tel_regex.search(doc.xpath('//strong[contains(text(),"Phone")]/following-sibling::text()[1]')[0]).groups()) fax = '-'.join(tel_regex.search(doc.xpath('//strong[contains(text(),"Fax")]/following-sibling::text()[1]')[0]).groups()) email = clean_string(doc.xpath('//strong[contains(text(),"Email")]/following-sibling::text()[1]')[0]) mayor.update(dict(url=url, email=email)) mayor.add_office('capitol', 'Office of the Mayor', address=address, phone=phone, fax=fax) mayor.add_role('Mayor', term) mayor.add_source(url) self.save_object(mayor) council_url = 'http://philadelphiacitycouncil.net/council-members/' doc = lxml.html.fromstring(self.urlopen(council_url)) doc.make_links_absolute(council_url) urls = set(doc.xpath('//a[contains(@href, "/council-members/council")]/@href')) assert len(urls) <= 17, 'expected 17 unique councilmember URLs, found %d' % len(urls) for url in urls: doc = lxml.html.fromstring(self.urlopen(url)) doc.make_links_absolute(url) optional = dict() # fields not all legislators will have full_name = [] first_name = '' middle_name = '' last_name = '' suffixes = '' roles = [] lines = [] lines_office2 = [] has_office2 = bool(False) reached_contact_form = bool(False) phone1 = None phone1_office2 = None phone2 = None phone2_office2 = None fax = None fax_office2 = None office_name = None district = 'At-Large' # default photo_url = ( doc.xpath('//img[contains(@title, "brian picture")]/@src') or # Special case for BRIAN J. O’NEILL doc.xpath('//img[contains(@class, "size-full")]/@src') or doc.xpath('//img[contains(@class, "size-medium")]/@src') or doc.xpath('//img[contains(@class, "size-thumbnail")]/@src') )[0] # That's an en dash, not a hyphen. parts = re.split(u'[,–]', doc.xpath('//h3/text()')[0]) for index, part in enumerate(filter(None, parts)): part = clean_string(part) if index == 0: if 'Councilman' in part: optional['gender'] = 'Male' elif 'Councilwoman' in part: optional['gender'] = 'Female' elif 'Council President' in part: roles.append('Council President') part = re.sub('^Council(?:man|woman| President)\s+', '', part) full_name.append(part) first_name, middle_name, last_name = parse_full_name(full_name[0]) elif part in ('Jr.', 'Sr.'): full_name.append(part) suffixes = part elif 'District' in part: district = part else: roles.append(part) full_name = ', '.join(full_name) contact_url = doc.xpath('//a[text()="Contact"]/@href')[0] doc = lxml.html.fromstring(self.urlopen(contact_url)) doc.make_links_absolute(contact_url) # @todo email, personal_url are sometimes in another paragraph. parts = doc.xpath('//div[@class="post-entry"]//text()') parts = map(clean_string, parts) consuming_address_lines = bool(False) for part in filter(None, parts): # Special case for Curtis Jones Jr. if re.match(r'^Local Office:', part): consuming_address_lines = True has_office2 = True office_name = 'Local Office' if re.match(r'City Hall Office', part) or re.match(r'^Hours', part) or re.match(r'.*facebook', part) or re.match(r'.*twitter', part) or reached_contact_form: continue elif re.match(r'^Contact Council.*man', part) or re.match(r'^Contact CMAL', part): reached_contact_form = True continue elif re.match(r'^City Hall.+Room', part): consuming_address_lines = True lines.append(part) elif re.match(r'^FAX:', part, re.I) or re.match(r'^F:', part, re.I): consuming_address_lines = False if has_office2 and fax_office2 == None: fax_office2 = '-'.join(tel_regex.search(part).groups()) elif fax == None: fax = '-'.join(tel_regex.search(part).groups()) elif tel_regex.search(part): consuming_address_lines = False if has_office2 and phone1_office2 == None and phone2_office2 == None: phone1_office2, phone2_office2 = parse_phones(part) elif phone1 == None and phone2 == None: phone1, phone2 = parse_phones(part) elif '@' in part: consuming_address_lines = False optional['email'] = re.search('\S+@\S+', part).group() elif re.match(r'^Neighborhood Office.*', part): consuming_address_lines = False has_office2 = True elif re.match(r'.*Office.*', part) or re.match(r'.*Heroes Hall.*', part): # Special case for Curtis Jones Jr. if re.match(r'.*Local Office.*', part): continue if len(lines_office2) > 0: consuming_address_lines = False else: consuming_address_lines = True office_name = string.strip(part, ':;,.') elif consuming_address_lines: if has_office2: lines_office2.append(cleanup_address(part, False)) else: lines.append(cleanup_address(part)) elif re.match(r'^(?:, )?Philadelphia, PA(?: 19107(?:-3290)?)?$', part): pass else: self.logger.warning('Skipped: ' + part) # Some Councilmembers have no zip code or only a 5-digit zip code. # All that changes between them is a room number. address = '\n'.join(lines) address_office2 = '\n'.join(lines_office2) legislator = Legislator(term, 'upper', district, full_name, first_name, last_name, middle_name, suffixes=suffixes, url=url, photo_url=photo_url, party=None) legislator.update(optional) if re.search('.*\S.*', address): legislator.add_office('capitol', 'City Hall Office', address=address, phone=phone1, secondary_phone=phone2, fax=fax) if re.search('.*\S.*', address_office2): legislator.add_office('district', office_name, address=address_office2, phone=phone1_office2, secondary_phone=phone2_office2, fax=fax_office2) legislator.add_source(url) for role in roles: legislator.add_role(role, term) self.save_legislator(legislator)
def scrape_member(self, chamber, term, member_url): with self.urlopen(member_url) as page: root = lxml.html.fromstring(page) root.make_links_absolute(member_url) sdiv = root.xpath('//div[@class="subtitle"]')[0] table = sdiv.getnext() photo_url = table.xpath('//img[@id="ctl00_ContentPlaceHolder1' '_imgMember"]')[0].attrib['src'] td = table.xpath('//td[@valign="top"]')[0] type = td.xpath('string(//div[1]/strong)').strip() full_name = td.xpath('string(//div[2]/strong)').strip() full_name = re.sub(r'\s+', ' ', full_name) district = td.xpath('string(//div[3])').strip() district = district.replace('District ', '') addrs = {} for atype, text in (('capital_address', 'Capitol address:'), ('district_address', 'District address:')): aspan = root.xpath("//span[. = '%s']" % text) addrs[atype] = None if aspan: addrs[atype] = aspan[0].tail elem = aspan[0].getnext() while elem is not None and elem.tag == 'br': if elem.tail: addrs[atype] += "\n" + elem.tail elem = elem.getnext() party = td.xpath('string(//div[4])').strip()[0] if party == 'D': party = 'Democratic' elif party == 'R': party = 'Republican' if type == 'Lt. Gov.': leg = Person(full_name) leg.add_role('Lt. Governor', term, party=party, **addrs) else: leg = Legislator(term, chamber, district, full_name, party=party, photo_url=photo_url, **addrs) leg.add_source(member_url) comm_div = root.xpath('//div[string() = "Committee Membership:"]' '/following-sibling::div' '[@class="rcwcontent"]')[0] for link in comm_div.xpath('*/a'): name = link.text if '(Vice Chair)' in name: mtype = 'vice chair' elif '(Chair)' in name: mtype = 'chair' else: mtype = 'member' name = clean_committee_name(link.text) if name.startswith('Appropriations-S/C on '): sub = name.replace('Appropriations-S/C on ', '') leg.add_role('committee member', term, chamber=chamber, committee='Appropriations', subcommittee=sub, position=mtype) else: leg.add_role('committee member', term, chamber=chamber, committee=name, position=mtype) if type == 'Lt. Gov.': self.save_person(leg) else: if district: self.save_legislator(leg)
def scrape_member(self, chamber, term, member_url): page = self.get(member_url).text root = lxml.html.fromstring(page) root.make_links_absolute(member_url) sdiv = root.xpath('//div[@class="subtitle"]')[0] table = sdiv.getnext() photo_url = table.xpath('//img[@id="ctl00_ContentPlaceHolder1' '_imgMember"]')[0].attrib['src'] td = table.xpath('//td[@valign="top"]')[0] type = td.xpath('string(//div[1]/strong)').strip() full_name = td.xpath('//div/strong/text()') full_name = [re.sub(r'\s+', ' ', x).strip() for x in full_name] if full_name == []: self.warning("ERROR: CAN'T GET FULL NAME") return full_name = full_name[-1] district = td.xpath('string(//div[3])').strip() district = district.replace('District ', '') party = td.xpath('string(//div[4])').strip()[0] if party == 'D': party = 'Democratic' elif party == 'R': party = 'Republican' if type == 'Lt. Gov.': leg = Person(full_name) leg.add_role('Lt. Governor', term, party=party) else: leg = Legislator(term, chamber, district, full_name, party=party, photo_url=photo_url, url=member_url) leg.add_source(urlescape(member_url)) # add addresses for atype, text in (('capitol', 'Capitol address'), ('district', 'District address')): aspan = root.xpath("//span[. = '%s:']" % text) addr = '' phone = None if aspan: # cycle through brs addr = aspan[0].tail.strip() elem = aspan[0].getnext() while elem is not None and elem.tag == 'br': if elem.tail: if not phone_re.match(elem.tail): addr += "\n" + elem.tail else: phone = elem.tail elem = elem.getnext() # now add the addresses leg.add_office(atype, text, address=addr, phone=phone) # add committees comm_div = root.xpath('//div[string() = "Committee Membership:"]' '/following-sibling::div' '[@class="rcwcontent"]')[0] for link in comm_div.xpath('*/a'): name = link.text if '(Vice Chair)' in name: mtype = 'vice chair' elif '(Chair)' in name: mtype = 'chair' else: mtype = 'member' name = clean_committee_name(link.text) # There's no easy way to determine whether a committee # is joint or not using the mobile legislator directory # (without grabbing a whole bunch of pages, at least) # so for now we will hard-code the one broken case if (name == "Oversight of HHS Eligibility System" and term == '82'): comm_chamber = 'joint' else: comm_chamber = chamber if name.startswith('Appropriations-S/C on '): sub = name.replace('Appropriations-S/C on ', '') leg.add_role('committee member', term, chamber=comm_chamber, committee='Appropriations', subcommittee=sub, position=mtype) else: leg.add_role('committee member', term, chamber=comm_chamber, committee=name, position=mtype) if type == 'Lt. Gov.': self.save_object(leg) else: if district: self.save_legislator(leg)
def scrape_member(self, chamber, term, member_url): with self.urlopen(member_url) as page: root = lxml.html.fromstring(page) root.make_links_absolute(member_url) sdiv = root.xpath('//div[@class="subtitle"]')[0] table = sdiv.getnext() photo_url = table.xpath('//img[@id="ctl00_ContentPlaceHolder1' '_imgMember"]')[0].attrib["src"] td = table.xpath('//td[@valign="top"]')[0] type = td.xpath("string(//div[1]/strong)").strip() full_name = td.xpath("string(//div[2]/strong)").strip() full_name = re.sub(r"\s+", " ", full_name) district = td.xpath("string(//div[3])").strip() district = district.replace("District ", "") party = td.xpath("string(//div[4])").strip()[0] if party == "D": party = "Democratic" elif party == "R": party = "Republican" if type == "Lt. Gov.": leg = Person(full_name) leg.add_role("Lt. Governor", term, party=party) else: leg = Legislator(term, chamber, district, full_name, party=party, photo_url=photo_url, url=member_url) leg.add_source(urlescape(member_url)) # add addresses for atype, text in (("capitol", "Capitol address"), ("district", "District address")): aspan = root.xpath("//span[. = '%s:']" % text) addr = "" phone = None if aspan: # cycle through brs addr = aspan[0].tail.strip() elem = aspan[0].getnext() while elem is not None and elem.tag == "br": if elem.tail: if not phone_re.match(elem.tail): addr += "\n" + elem.tail else: phone = elem.tail elem = elem.getnext() # now add the addresses leg.add_office(atype, text, address=addr, phone=phone) # add committees comm_div = root.xpath( '//div[string() = "Committee Membership:"]' "/following-sibling::div" '[@class="rcwcontent"]' )[0] for link in comm_div.xpath("*/a"): name = link.text if "(Vice Chair)" in name: mtype = "vice chair" elif "(Chair)" in name: mtype = "chair" else: mtype = "member" name = clean_committee_name(link.text) # There's no easy way to determine whether a committee # is joint or not using the mobile legislator directory # (without grabbing a whole bunch of pages, at least) # so for now we will hard-code the one broken case if name == "Oversight of HHS Eligibility System" and term == "82": comm_chamber = "joint" else: comm_chamber = chamber if name.startswith("Appropriations-S/C on "): sub = name.replace("Appropriations-S/C on ", "") leg.add_role( "committee member", term, chamber=comm_chamber, committee="Appropriations", subcommittee=sub, position=mtype, ) else: leg.add_role("committee member", term, chamber=comm_chamber, committee=name, position=mtype) if type == "Lt. Gov.": self.save_object(leg) else: if district: self.save_legislator(leg)
def scrape(self, term, chambers): # The mayor doesn't sit on council. url = 'http://www.phila.gov/' doc = lxml.html.fromstring(self.urlopen(url)) doc.make_links_absolute(url) # The mayor's name doesn't appear on the mayor's page! name = re.search('Mayor (.+)', doc.xpath('//title/text()')[0].strip()).group(1) mayor = Person(name) mayor.add_source(url) url = 'http://www.phila.gov/mayor/' doc = lxml.html.fromstring(self.urlopen(url)) doc.make_links_absolute(url) lines = map(clean_string, doc.xpath('//div[contains(text(),"Mailing Address")]/following-sibling::text()')[1:]) address = '\n'.join(lines) phone = '-'.join(tel_regex.search(doc.xpath('//strong[contains(text(),"Phone")]/following-sibling::text()[1]')[0]).groups()) fax = '-'.join(tel_regex.search(doc.xpath('//strong[contains(text(),"Fax")]/following-sibling::text()[1]')[0]).groups()) email = clean_string(doc.xpath('//strong[contains(text(),"Email")]/following-sibling::text()[1]')[0]) mayor.update(dict(url=url, email=email)) mayor.add_office('capitol', 'Office of the Mayor', address=address, phone=phone, fax=fax) mayor.add_role('Mayor', term) mayor.add_source(url) self.save_object(mayor) council_url = 'http://philadelphiacitycouncil.net/council-members/' doc = lxml.html.fromstring(self.urlopen(council_url)) doc.make_links_absolute(council_url) urls = set(doc.xpath('//a[contains(@href, "/council-members/council")]/@href')) assert len(urls) <= 17, 'expected 17 unique councilmember URLs, found %d' % len(urls) for url in urls: doc = lxml.html.fromstring(self.urlopen(url)) doc.make_links_absolute(url) optional = dict() # fields not all legislators will have name = [] roles = [] lines = [] phone1 = None phone2 = None fax = None district = 'At-Large' # default photo_url = ( doc.xpath('//img[contains(@class, "size-full")]/@src') or doc.xpath('//img[contains(@class, "size-medium")]/@src') or doc.xpath('//img[contains(@class, "size-thumbnail")]/@src') )[0] # That's an en dash, not a hyphen. parts = re.split(u'[,–]', doc.xpath('//h3/text()')[0]) for index, part in enumerate(filter(None, parts)): part = clean_string(part) if index == 0: if 'Councilman' in part: optional['gender'] = 'Male' elif 'Councilwoman' in part: optional['gender'] = 'Female' elif 'Council President' in part: roles.append('Council President') part = re.sub('^Council(?:man|woman| President)\s+', '', part) name.append(part) elif part in ('Jr.', 'Sr.'): name.append(part) elif 'District' in part: district = part else: roles.append(part) name = ', '.join(name) contact_url = doc.xpath('//a[text()="Contact"]/@href')[0] doc = lxml.html.fromstring(self.urlopen(contact_url)) doc.make_links_absolute(contact_url) # @todo email, second office, personal_url are sometimes in another paragraph. if len(doc.xpath('//div[@class="post-entry"]/p')) > 1: self.logger.warning('Skipped paragraphs:\n' + '\n'.join(lxml.html.tostring(html) for html in doc.xpath('//div[@class="post-entry"]/p[position()>1]'))) parts = doc.xpath('//div[@class="post-entry"]/p[position()=1]//text()') or doc.xpath('//div[@class="post-entry"]//text()') parts = map(clean_string, parts) for part in filter(None, parts): if re.match(r'^City Hall', part): lines.append('City Hall, Room %s' % re.search('Room (\d+)', part).group(1)) elif re.match(r'^FAX:', part, re.I): fax = '-'.join(tel_regex.search(part).groups()) elif tel_regex.search(part): if phone1: self.logger.warning('Already have phone numbers for one office: ' + part) else: phones = tel_regex.findall(part) phone1 = '-'.join(phones[0]) if len(phones) == 2: phone2 = '-'.join(phones[1]) else: phone2 = phone1[:8] + re.search(r'(?: or |/)(\d{4})$', parts[2]).group(1) elif '@' in part: optional['email'] = re.search('\S+@\S+', part).group() elif re.match(r'^(?:, )?Philadelphia, PA(?: 19107(?:-3290)?)?$', part): pass else: # @todo second office is sometimes in the same paragraph. self.logger.warning('Skipped: ' + part) # Some Councilmembers have no zip code or only a 5-digit zip code. # All that changes between them is a room number. lines.append('Philadelphia, PA 19107-3290') address = '\n'.join(lines) legislator = Legislator(term, 'upper', district, name, url=url, photo_url=photo_url, party=None) legislator.update(optional) legislator.add_office('capitol', 'Council Office', address=address, phone=phone1, secondary_phone=phone2, fax=fax) legislator.add_source(url) for role in roles: legislator.add_role(role, term) self.save_legislator(legislator)