Exemple #1
0
    def process_item(self, item):
        website, district, name, party, office, phone, email = item.getchildren(
        )

        # skip header row
        if website.tag == "th":
            self.skip()

        office = office.text_content()
        for abbr, full in self.office_names.items():
            office = office.replace(abbr, full)

        p = ScrapePerson(
            name=name.text_content(),
            state="mi",
            chamber="lower",
            district=district.text_content().lstrip("0"),
            party=party.text_content(),
            email=email.text_content(),
        )
        link = CSS("a").match_one(website).get("href")
        if link.startswith("http:/r"):
            link = link.replace(":/", "://")
        p.add_link(link)
        p.add_source(self.source.url)
        p.capitol_office.voice = phone.text_content()
        p.capitol_office.address = office
        return p
Exemple #2
0
    def process_item(self, item):
        name = CSS("td a").match(item)[1].text_content().strip()
        if name == "Vacant":
            self.skip()

        party = CSS("td").match(item)[1].text_content().strip()
        if party == "Democrat":
            party = "Democratic"

        district = CSS("td").match(item)[2].text_content().strip().lstrip("0")

        p = ScrapePerson(
            name=name,
            state="wv",
            chamber=self.chamber,
            district=district,
            party=party,
        )

        p.add_source(self.source.url)

        email = CSS("td").match(item)[4].text_content().strip()
        p.email = email

        capp_addr_txt = XPath("td[4]/text()").match(item)
        capp_addr = ""
        for line in capp_addr_txt:
            capp_addr += line.strip()
            capp_addr += " "
        p.capitol_office.address = capp_addr.strip()

        phone = CSS("td").match(item)[5].text_content().strip()
        p.capitol_office.voice = phone

        detail_link = CSS("td a").match(item)[1].get("href")
        detail_link = detail_link.replace(" ", "%20")
        p.add_source(detail_link)
        p.add_link(detail_link, note="homepage")

        return LegDetail(p, source=URL(detail_link, timeout=30))
Exemple #3
0
    def process_page(self):
        party = CSS("i").match(self.root)[0].text_content().strip()

        # Checking for if this rep has a specific position
        position = ""
        try:
            position = CSS("i font").match_one(
                self.root).text_content().strip()
            party = party.replace(position, "")

        except SelectorError:
            pass

        if re.search("(D)", party):
            party = "Democrat"
        elif re.search("(R)", party):
            party = "Republican"
        else:
            self.warn(f"the party {party} must be included")

        phone_numbers = XPath("//font[@size='2']").match(
            self.root)[10].text_content()

        district_office = CSS("p").match(self.root)[13].getchildren()

        image = (XPath("//img[contains(@src, 'memberphotos')]").match_one(
            self.root).get("src"))

        district = CSS("font b").match(
            self.root)[26].text_content().split(" ")[1]

        # All emails should still be [email protected] and [email protected] -
        # many reps have these emails on their personal pages
        fullname_email = self.input.name.split("\xa0")
        lastname_email = fullname_email[len(fullname_email) - 1]

        if self.input.chamber == "upper":
            email = f"Sen{lastname_email}@njleg.org"
        elif self.input.chamber == "lower":
            email = f"Asm{lastname_email}@njleg.org"

        p = ScrapePerson(
            name=self.input.name,
            state="nj",
            chamber=self.input.chamber,
            party=party,
            image=image,
            district=district,
            email=email,
        )

        p.add_source(self.input.url)
        p.add_source(self.source.url)
        if position != "":
            p.extras["role"] = position.replace("(", "").replace(")",
                                                                 "").strip()

        try:
            fax_match = phone_fax_pattern.findall(
                XPath("//font[@size='2']").match(self.root)[12].text_content())

            for okay in district_office:
                address = okay.text_content()
                if fax_match:
                    process_address(address,
                                    phone_numbers,
                                    p,
                                    fax_number=fax_match)
                else:
                    process_address(address, phone_numbers, p)

        except SelectorError:
            pass

        return p