Ejemplo n.º 1
0
    def handle_list_item(self, item):
        photo_url = item.xpath('./img/@src')[0]
        url = item.xpath('.//h5/a/@href')[0]
        name_text = item.xpath('.//h5/a/b/text()')[0]

        name_match = re.match(r'^(.+)\(([0-9]{2}[AB]), ([A-Z]+)\)$', name_text)
        name = name_match.group(1).strip()
        district = name_match.group(2).lstrip('0').upper()
        party_text = name_match.group(3)
        party = PARTIES[party_text]

        info_texts = [x.strip() for x in item.xpath(
            './div/text()[normalize-space()]'
        ) if x.strip()]
        address = '\n'.join((info_texts[0], info_texts[1]))

        phone_text = info_texts[2]
        if validate_phone_number(phone_text):
            phone = phone_text

        email_text = item.xpath('.//a/@href')[1].replace('mailto:', '').strip()
        if validate_email_address(email_text):
            email = email_text

        rep = Person(name=name, district=district, party=party,
                     primary_org='lower', role='Representative',
                     image=photo_url)
        rep.add_link(url)
        rep.add_contact_detail(type='address', value=address, note='capitol')
        rep.add_contact_detail(type='voice', value=phone, note='capitol')
        rep.add_contact_detail(type='email', value=email, note='capitol')
        rep.add_source(self.url)

        yield rep
Ejemplo n.º 2
0
    def handle_list_item(self, item):
        photo_url = item.xpath('./td[1]/a/img/@src')[0]
        info_nodes = item.xpath('./td[2]/p/a')
        name_text = info_nodes[0].xpath('./b/text()')[0]
        url = info_nodes[0].get('href')

        name_match = re.match(r'^(.+)\(([0-9]{2}[AB]), ([A-Z]+)\)$', name_text)
        name = name_match.group(1).strip()
        district = name_match.group(2).lstrip('0').upper()
        party_text = name_match.group(3)
        party = PARTIES[party_text]

        info_texts = [
            x.strip() for x in item.xpath(
                './td[2]/p/text()[normalize-space() and preceding-sibling::br]'
            ) if x.strip()
        ]
        address = '\n'.join((info_texts[0], info_texts[1]))

        phone_text = info_texts[2]
        if validate_phone_number(phone_text):
            phone = phone_text

        email_node = info_nodes[1]
        email_text = email_node.text
        email_text = email_text.replace('Email: ', '').strip()
        if validate_email_address(email_text):
            email = email_text

        rep = Person(name=name,
                     district=district,
                     party=party,
                     primary_org='lower',
                     role='Representative',
                     image=photo_url)
        rep.add_link(url)
        rep.add_contact_detail(type='address', value=address, note='capitol')
        rep.add_contact_detail(type='voice', value=phone, note='capitol')
        rep.add_contact_detail(type='email', value=email, note='capitol')
        rep.add_source(self.url)

        yield rep
Ejemplo n.º 3
0
    def handle_list_item(self, item):
        photo_url = item.xpath("./img/@src")[0]
        url = item.xpath(".//h5/a/@href")[0]
        name_text = item.xpath(".//h5/a/b/text()")[0]

        name_match = re.match(r"^(.+)\(([0-9]{2}[AB]), ([A-Z]+)\)$", name_text)
        name = name_match.group(1).strip()
        district = name_match.group(2).lstrip("0").upper()
        party_text = name_match.group(3)
        party = PARTIES[party_text]

        info_texts = [
            x.strip()
            for x in item.xpath("./div/text()[normalize-space()]")
            if x.strip()
        ]
        address = "\n".join((info_texts[0], info_texts[1]))

        phone_text = info_texts[2]
        if validate_phone_number(phone_text):
            phone = phone_text

        email_text = item.xpath(".//a/@href")[1].replace("mailto:", "").strip()
        if validate_email_address(email_text):
            email = email_text

        rep = Person(
            name=name,
            district=district,
            party=party,
            primary_org="lower",
            role="Representative",
            image=photo_url,
        )
        rep.add_link(url)
        rep.add_contact_detail(type="address", value=address, note="capitol")
        rep.add_contact_detail(type="voice", value=phone, note="capitol")
        rep.add_contact_detail(type="email", value=email, note="capitol")
        rep.add_source(self.url)

        yield rep
Ejemplo n.º 4
0
    def handle_list_item(self, item):
        photo_url = item.xpath('./td[1]/a/img/@src')[0]
        info_nodes = item.xpath('./td[2]/p/a')
        name_text = info_nodes[0].xpath('./b/text()')[0]
        url = info_nodes[0].get('href')

        name_match = re.match(r'^(.+)\(([0-9]{2}[AB]), ([A-Z]+)\)$', name_text)
        name = name_match.group(1).strip()
        district = name_match.group(2).lstrip('0').upper()
        party_text = name_match.group(3)
        party = PARTIES[party_text]

        info_texts = [x.strip() for x in item.xpath(
            './td[2]/p/text()[normalize-space() and preceding-sibling::br]'
        ) if x.strip()]
        address = '\n'.join((info_texts[0], info_texts[1]))

        phone_text = info_texts[2]
        if validate_phone_number(phone_text):
            phone = phone_text

        email_node = info_nodes[1]
        email_text = email_node.text
        email_text = email_text.replace('Email: ', '').strip()
        if validate_email_address(email_text):
            email = email_text

        rep = Person(name=name, district=district, party=party,
                     primary_org='lower', role='Representative',
                     image=photo_url)
        rep.add_link(url)
        rep.add_contact_detail(type='address', value=address)
        rep.add_contact_detail(type='voice', value=phone)
        rep.add_contact_detail(type='email', value=email)
        rep.add_source(self.url)

        yield rep
Ejemplo n.º 5
0
    def _scrape_lower_chamber(self, term):
        url = 'http://www.house.leg.state.mn.us/members/hmem.asp'

        page = self.lxmlize(url)

        legislator_nodes = self.get_nodes(
            page,
            '//div[@id="hide_show_alpha_all"]/table/tr/td/table/tr')

        for legislator_node in legislator_nodes:
            photo_url = self.get_node(
                legislator_node,
                './td[1]/a/img/@src')

            info_nodes = self.get_nodes(
                legislator_node,
                './td[2]/p/a')

            name_text = self.get_node(
                info_nodes[0],
                './b/text()')

            name_match = re.search(r'^.+\(', name_text)
            name = name_match.group(0)
            name = name.replace('(', '').strip()

            district_match = re.search(r'\([0-9]{2}[A-Z]', name_text)
            district_text = district_match.group(0)
            district = district_text.replace('(', '').lstrip('0').strip()

            party_match = re.search(r'[A-Z]+\)$', name_text)
            party_text = party_match.group(0)
            party_text = party_text.replace(')', '').strip()
            party = self._parties[party_text]

            info_texts = self.get_nodes(
                legislator_node,
                './td[2]/p/text()[normalize-space() and preceding-sibling'
                '::br]')
            address = '\n'.join((info_texts[0], info_texts[1]))

            phone_text = info_texts[2]
            if validate_phone_number(phone_text):
                phone = phone_text

            email_node = info_nodes[1]
            email_text = email_node.text

            email_text = email_text.replace('Email: ', '').strip()
            if validate_email_address(email_text):
                email = email_text

            legislator = Legislator(
                term=term,
                chamber='lower',
                district=district,
                full_name=name,
                party=party,
                email=email,
                photo_url=photo_url,
            )
            legislator.add_source(url)

            legislator.add_office(
                type='capitol',
                name="Capitol Office",
                address=address,
                phone=phone,
                email=email,
             )

            self.save_legislator(legislator)
Ejemplo n.º 6
0
    def scrape_lower_chamber(self, term):
        # E-mail contact is now hidden behind webforms. Sadness.

        party_map = {'PNP': 'Partido Nuevo Progresista',
                     'PPD': u'Partido Popular Democr\xe1tico',
                     'PIP': u'Partido Independentista Puertorrique\u00F1o',
                     }

        url = 'http://www.tucamarapr.org/dnncamara/ComposiciondelaCamara/Biografia.aspx'
        page = self.lxmlize(url)

        member_nodes = self.get_nodes(page, '//li[@class="selectionRep"]')
        for member_node in member_nodes:
            member_info = member_node.text_content().strip().split("\n")

            name = re.sub(r'^Hon\.', '', member_info[0]).strip()
            district_text = member_info[-1].strip()
            if district_text == 'Representante por Acumulación':
                district = 'At-Large'
            else:
                district = district_text.replace("Representante del Distrito ", "").strip()
            photo_url = self.get_node(member_node, './/img/@src')

            rep_link = self.get_node(member_node, ".//a/@href")
            rep_page = self.lxmlize(rep_link)

            party_node = self.get_node(rep_page, '//span[@class="partyBio"]')
            # Albelo doesn't seem to have a "partyBio" as an independent, but we
            # expect this to exist for all other members.
            if not party_node and name == "Manuel A. Natal Albelo":
                party = "Independent"
            else:
                party_text = party_node.text_content().strip()
                party = party_map[party_text]

            address = self.get_node(rep_page, '//h6').text.strip().split("\n")[0].strip()

            # Only grabs the first validated phone number found.
            # Typically, representatives have multiple phone numbers.
            phone_node = self.get_node(
                rep_page,
                '//span[@class="data-type" and contains(text(), "Tel.")]')
            phone = None
            possible_phones = phone_node.text.strip().split("\n")
            for phone_attempt in possible_phones:
                # Don't keep searching phone numbers if a good one is found.
                if phone:
                    break

                phone_text = re.sub(r'^Tel\.[\s]*', '', phone_attempt).strip()
                if validate_phone_number(phone_text):
                    phone = phone_text

            fax_node = self.get_node(
                rep_page,
                '//span[@class="data-type" and contains(text(), "Fax.")]')
            fax = None
            if fax_node:
                fax_text = fax_node.text.strip()
                fax_text = re.sub(r'^Fax\.[\s]*', '', fax_text).strip()
                if validate_phone_number(fax_text):
                    fax = fax_text

            person = Person(primary_org='lower',
                            district=district,
                            name=name,
                            party=party,
                            image=photo_url)

            person.add_link(rep_link)
            person.add_source(rep_link)
            person.add_source(url)

            if address:
                person.add_contact_detail(type='address',
                                          value=address,
                                          note='Capitol Office')
            if phone:
                person.add_contact_detail(type='voice',
                                          value=phone,
                                          note='Capitol Office')
            if fax:
                person.add_contact_detail(type='fax',
                                          value=fax,
                                          note='Capitol Office')

            yield person
Ejemplo n.º 7
0
    def scrape_lower_chamber(self, term):
        # E-mail contact is now hidden behind webforms. Sadness.

        party_map = {
            "PNP": "Partido Nuevo Progresista",
            "PPD": u"Partido Popular Democr\xe1tico",
            "PIP": u"Partido Independentista Puertorrique\u00F1o",
        }

        url = "http://www.tucamarapr.org/dnncamara/ComposiciondelaCamara/Biografia.aspx"
        page = self.lxmlize(url)

        member_nodes = self.get_nodes(page, '//li[@class="selectionRep"]')
        for member_node in member_nodes:
            member_info = member_node.text_content().strip().split("\n")

            name = re.sub(r"^Hon\.", "", member_info[0]).strip()
            district_text = member_info[-1].strip()
            if district_text == "Representante por Acumulación":
                district = "At-Large"
            else:
                district = district_text.replace("Representante del Distrito ",
                                                 "").strip()
            photo_url = self.get_node(member_node, ".//img/@src")

            rep_link = self.get_node(member_node, ".//a/@href")
            rep_page = self.lxmlize(rep_link)

            party_node = self.get_node(rep_page, '//span[@class="partyBio"]')
            # Albelo doesn't seem to have a "partyBio" as an independent, but we
            # expect this to exist for all other members.
            if not party_node and name == "Manuel A. Natal Albelo":
                party = "Independent"
            else:
                party_text = party_node.text_content().strip()
                party = party_map[party_text]

            address = (self.get_node(
                rep_page, "//h6").text.strip().split("\n")[0].strip())

            # Only grabs the first validated phone number found.
            # Typically, representatives have multiple phone numbers.
            phone_node = self.get_node(
                rep_page,
                '//span[@class="data-type" and contains(text(), "Tel.")]')
            phone = None
            possible_phones = phone_node.text.strip().split("\n")
            for phone_attempt in possible_phones:
                # Don't keep searching phone numbers if a good one is found.
                if phone:
                    break

                phone_text = re.sub(r"^Tel\.[\s]*", "", phone_attempt).strip()
                if validate_phone_number(phone_text):
                    phone = phone_text

            fax_node = self.get_node(
                rep_page,
                '//span[@class="data-type" and contains(text(), "Fax.")]')
            fax = None
            if fax_node:
                fax_text = fax_node.text.strip()
                fax_text = re.sub(r"^Fax\.[\s]*", "", fax_text).strip()
                if validate_phone_number(fax_text):
                    fax = fax_text

            person = Person(
                primary_org="lower",
                district=district,
                name=name,
                party=party,
                image=photo_url,
            )

            person.add_link(rep_link)
            person.add_source(rep_link)
            person.add_source(url)

            if address:
                person.add_contact_detail(type="address",
                                          value=address,
                                          note="Capitol Office")
            if phone:
                person.add_contact_detail(type="voice",
                                          value=phone,
                                          note="Capitol Office")
            if fax:
                person.add_contact_detail(type="fax",
                                          value=fax,
                                          note="Capitol Office")

            yield person
Ejemplo n.º 8
0
    def _scrape_lower_chamber(self, term):
        url = 'http://www.house.leg.state.mn.us/members/hmem.asp'

        page = self.lxmlize(url)

        legislator_nodes = self.get_nodes(
            page,
            '//div[@id="hide_show_alpha_all"]/table/tr/td/table/tr')

        for legislator_node in legislator_nodes:
            photo_url = self.get_node(
                legislator_node,
                './td[1]/a/img/@src')

            info_nodes = self.get_nodes(
                legislator_node,
                './td[2]/p/a')

            name_text = self.get_node(
                info_nodes[0],
                './b/text()')

            name_match = re.search(r'^.+\(', name_text)
            name = name_match.group(0)
            name = name.replace('(', '').strip()

            district_match = re.search(r'\([0-9]{2}[A-Z]', name_text)
            district_text = district_match.group(0)
            district = district_text.replace('(', '').lstrip('0').strip()

            party_match = re.search(r'[A-Z]+\)$', name_text)
            party_text = party_match.group(0)
            party_text = party_text.replace(')', '').strip()
            party = self._parties[party_text]

            info_texts = self.get_nodes(
                legislator_node,
                './td[2]/p/text()[normalize-space() and preceding-sibling'
                '::br]')
            address = '\n'.join((info_texts[0], info_texts[1]))

            phone_text = info_texts[2]
            if validate_phone_number(phone_text):
                phone = phone_text

            email_node = info_nodes[1]
            email_text = email_node.text

            email_text = email_text.replace('Email: ', '').strip()
            if validate_email_address(email_text):
                email = email_text

            legislator = Legislator(
                term=term,
                chamber='lower',
                district=district,
                full_name=name,
                party=party,
                email=email,
                photo_url=photo_url,
            )
            legislator.add_source(url)

            legislator.add_office(
                type='capitol',
                name="Capitol Office",
                address=address,
                phone=phone,
                email=email,
             )

            self.save_legislator(legislator)
Ejemplo n.º 9
0
    def legislators(self, latest_only):
        legs = {}

        for member, chamber, term, url in self._memberships(latest_only):
            name, _, _, district, party = member.xpath('td')
            district = district.text
            detail_url = name.xpath('a/@href')[0]

            if party.text_content().strip() == "":
                self.warning("Garbage party: Skipping!")
                continue

            party = {'D': 'Democratic', 'R': 'Republican', 'I': 'Independent'}[party.text]
            name = name.text_content().strip()

            # inactive legislator, skip them for now
            if name.endswith('*'):
                name = name.strip('*')
                continue

            name = AKA.get(name, name)

            if name in legs:
                p, terms = legs[name]
                terms.append((chamber, district, term, party))
            else:
                p = Person(name, party=party)
                legs[name] = p, [(chamber, district, term, party)]

            p.add_source(url)
            p.add_source(detail_url)
            p.add_link(detail_url)

            birth_date = BIRTH_DATES.get(name, None)
            if birth_date:
                p.birth_date = birth_date

            leg_html = self.get(detail_url).text
            leg_doc = lxml.html.fromstring(leg_html)
            leg_doc.make_links_absolute(detail_url)

            hotgarbage = (
                'Senate Biography Information for the 98th General '
                'Assembly is not currently available.')

            if hotgarbage in leg_html:
                # The legislator's bio isn't available yet.
                self.logger.warning('No legislator bio available for ' + name)
                continue

            photo_url = leg_doc.xpath('//img[contains(@src, "/members/")]/@src')[0]
            p.image = photo_url

            p.contact_details = []
            # email
            email = leg_doc.xpath('//b[text()="Email: "]')
            if email:
                p.add_contact_detail(type='email', value=email[0].tail.strip(), note='capitol')

            offices = {'capitol': '//table[contains(string(), "Springfield Office")]',
                       'district': '//table[contains(string(), "District Office")]'}

            for location, xpath in offices.items():
                table = leg_doc.xpath(xpath)
                if table:
                    for type, value in self._table_to_office(table[3]):
                        if type in ('fax', 'voice') and not validate_phone_number(value):
                            continue

                        p.add_contact_detail(type=type, value=value, note=location)

        return legs
Ejemplo n.º 10
0
    def legislators(self, latest_only):
        legs = {}

        for member, chamber, term, url in self._memberships(latest_only):
            name, _, _, district, party = member.xpath("td")
            district = district.text
            detail_url = name.xpath("a/@href")[0]

            if party.text_content().strip() == "":
                self.warning("Garbage party: Skipping!")
                continue

            party = {
                "D": "Democratic",
                "R": "Republican",
                "I": "Independent"
            }[party.text]
            name = name.text_content().strip()

            # inactive legislator, skip them for now
            if name.endswith("*"):
                name = name.strip("*")
                continue

            name = AKA.get(name, name)

            if name in legs:
                p, terms = legs[name]
                terms.append((chamber, district, term, party))
            else:
                p = Person(name, party=party)
                legs[name] = p, [(chamber, district, term, party)]

            p.add_source(url)
            p.add_source(detail_url)
            p.add_link(detail_url)

            birth_date = BIRTH_DATES.get(name, None)
            if birth_date:
                p.birth_date = birth_date

            leg_html = self.get(detail_url).text
            leg_doc = lxml.html.fromstring(leg_html)
            leg_doc.make_links_absolute(detail_url)

            hotgarbage = ("Senate Biography Information for the 98th General "
                          "Assembly is not currently available.")

            if hotgarbage in leg_html:
                # The legislator's bio isn't available yet.
                self.logger.warning("No legislator bio available for " + name)
                continue

            photo_url = leg_doc.xpath(
                '//img[contains(@src, "/members/")]/@src')[0]
            p.image = photo_url

            p.contact_details = []
            # email
            email = leg_doc.xpath('//b[text()="Email: "]')
            if email:
                p.add_contact_detail(type="email",
                                     value=email[0].tail.strip(),
                                     note="capitol")

            offices = {
                "capitol": '//table[contains(string(), "Springfield Office")]',
                "district": '//table[contains(string(), "District Office")]',
            }

            for location, xpath in offices.items():
                table = leg_doc.xpath(xpath)
                if table:
                    for type, value in self._table_to_office(table[3]):
                        if type in ("fax", "voice"
                                    ) and not validate_phone_number(value):
                            continue

                        p.add_contact_detail(type=type,
                                             value=value,
                                             note=location)

        return legs
Ejemplo n.º 11
0
    def scrape_lower_chamber(self, term):
        # E-mail contact is now hidden behind webforms. Sadness.

        party_map = {'PNP': 'Partido Nuevo Progresista',
                     'PPD': u'Partido Popular Democr\xe1tico',
                     'PIP': u'Partido Independentista Puertorrique\u00F1o',
                     }

        url = 'http://www.tucamarapr.org/dnncamara/ComposiciondelaCamara/Biografia.aspx'
        page = self.lxmlize(url)

        member_nodes = self.get_nodes(page, '//li[@class="selectionRep"]')
        for member_node in member_nodes:
            member_info = member_node.text_content().strip().split("\n")

            name = re.sub(r'^Hon\.', '', member_info[0]).strip()
            district_text = member_info[-1].strip()
            if district_text == 'Representante por Acumulación':
                district = 'At-Large'
            else:
                district = district_text.replace("Representante del Distrito ", "").strip()
            photo_url = self.get_node(member_node, './/img/@src')

            rep_link = self.get_node(member_node, ".//a/@href")
            rep_page = self.lxmlize(rep_link)

            party_node = self.get_node(rep_page, '//span[@class="partyBio"]')
            party_text = party_node.text_content().strip()
            party = party_map[party_text]

            address = self.get_node(rep_page, '//h6').text.strip().split("\n")[0].strip()

            # Only grabs the first validated phone number found.
            # Typically, representatives have multiple phone numbers.
            phone_node = self.get_node(
                rep_page,
                '//span[@class="data-type" and contains(text(), "Tel.")]')
            phone = None
            possible_phones = phone_node.text.strip().split("\n")
            for phone_attempt in possible_phones:
                # Don't keep searching phone numbers if a good one is found.
                if phone:
                    break

                phone_text = re.sub(r'^Tel\.[\s]*', '', phone_attempt).strip()
                if validate_phone_number(phone_text):
                    phone = phone_text

            fax_node = self.get_node(
                rep_page,
                '//span[@class="data-type" and contains(text(), "Fax.")]')
            fax = None
            if fax_node:
                fax_text = fax_node.text.strip()
                fax_text = re.sub(r'^Fax\.[\s]*', '', fax_text).strip()
                if validate_phone_number(fax_text):
                    fax = fax_text

            person = Person(primary_org='lower',
                            district=district,
                            name=name,
                            party=party,
                            image=photo_url)

            person.add_link(rep_link)
            person.add_source(rep_link)
            person.add_source(url)

            if address:
                person.add_contact_detail(type='address',
                                          value=address,
                                          note='Capitol Office')
            if phone:
                person.add_contact_detail(type='voice',
                                          value=phone,
                                          note='Capitol Office')
            if fax:
                person.add_contact_detail(type='fax',
                                          value=fax,
                                          note='Capitol Office')

            yield person
Ejemplo n.º 12
0
    def _scrape_lower_chamber(self, term):
        url = 'http://www.house.leg.state.mn.us/members/hmem.asp'

        page = self.lxmlize(url)

        legislator_nodes = self.get_nodes(
            page,
            '//div[@id="hide_show_alpha_all"]/table/tr/td/table/tr')

        need_special_email_case = False

        for legislator_node in legislator_nodes:
            photo_url = self.get_node(
                legislator_node,
                './td[1]/a/img/@src')

            info_nodes = self.get_nodes(
                legislator_node,
                './td[2]/p/a')

            name_text = self.get_node(
                info_nodes[0],
                './b/text()')

            name_match = re.search(r'^.+\(', name_text)
            name = name_match.group(0)
            name = name.replace('(', '').strip()

            district_match = re.search(r'\([0-9]{2}[A-Z]', name_text)
            district_text = district_match.group(0)
            district = district_text.replace('(', '').lstrip('0').strip()

            party_match = re.search(r'[A-Z]+\)$', name_text)
            party_text = party_match.group(0)
            party_text = party_text.replace(')', '').strip()
            party = self._parties[party_text]

            info_texts = self.get_nodes(
                legislator_node,
                './td[2]/p/text()[normalize-space() and preceding-sibling'
                '::br]')
            address = '\n'.join((info_texts[0], info_texts[1]))

            phone_text = info_texts[2]
            if validate_phone_number(phone_text):
                phone = phone_text

            # E-mail markup is screwed-up and inconsistent.
            try:
                email_node = info_nodes[1]
                email_text = email_node.text
            except IndexError:
                # Primarily for Dan Fabian.
                email_node = info_texts[3]
                need_special_email_case = True

            email_text = email_text.replace('Email: ', '').strip()
            if validate_email_address(email_text):
                email = email_text

            legislator = Legislator(
                term=term,
                chamber='lower',
                district=district,
                full_name=name,
                party=party,
                email=email,
                photo_url=photo_url,
            )
            legislator.add_source(url)

            legislator.add_office(
                type='capitol',
                name="Capitol Office",
                address=address,
                phone=phone,
                email=email,
             )

            self.save_legislator(legislator)

        if not need_special_email_case:
            self.logger.warning('Special e-mail handling no longer required.')
Ejemplo n.º 13
0
    def _scrape_lower_chamber(self, term):
        url = 'http://www.house.leg.state.mn.us/members/hmem.asp'

        page = self.lxmlize(url)

        legislator_nodes = self.get_nodes(
            page, '//div[@id="hide_show_alpha_all"]/table/tr/td/table/tr')

        need_special_email_case = False

        for legislator_node in legislator_nodes:
            photo_url = self.get_node(legislator_node, './td[1]/a/img/@src')

            info_nodes = self.get_nodes(legislator_node, './td[2]/p/a')

            name_text = self.get_node(info_nodes[0], './b/text()')

            name_match = re.search(r'^.+\(', name_text)
            name = name_match.group(0)
            name = name.replace('(', '').strip()

            district_match = re.search(r'\([0-9]{2}[A-Z]', name_text)
            district_text = district_match.group(0)
            district = district_text.replace('(', '').lstrip('0').strip()

            party_match = re.search(r'[A-Z]+\)$', name_text)
            party_text = party_match.group(0)
            party_text = party_text.replace(')', '').strip()
            party = self._parties[party_text]

            info_texts = self.get_nodes(
                legislator_node,
                './td[2]/p/text()[normalize-space() and preceding-sibling'
                '::br]')
            address = '\n'.join((info_texts[0], info_texts[1]))

            phone_text = info_texts[2]
            if validate_phone_number(phone_text):
                phone = phone_text

            # E-mail markup is screwed-up and inconsistent.
            try:
                email_node = info_nodes[1]
                email_text = email_node.text
            except IndexError:
                # Primarily for Dan Fabian.
                email_node = info_texts[3]
                need_special_email_case = True

            email_text = email_text.replace('Email: ', '').strip()
            if validate_email_address(email_text):
                email = email_text

            legislator = Legislator(
                term=term,
                chamber='lower',
                district=district,
                full_name=name,
                party=party,
                email=email,
                photo_url=photo_url,
            )
            legislator.add_source(url)

            legislator.add_office(
                type='capitol',
                name="Capitol Office",
                address=address,
                phone=phone,
                email=email,
            )

            self.save_legislator(legislator)

        if not need_special_email_case:
            self.logger.warning('Special e-mail handling no longer required.')