Ejemplo n.º 1
0
    def process_item(self, item):
        comm_name = CSS("a").match_one(item).text_content()
        comm_url = CSS("a").match_one(item).get("href")

        # "https://jtlegbudget.legislature.ca.gov/sublegislativeanalyst" has no members
        if comm_url == "https://jtlegbudget.legislature.ca.gov/sublegislativeanalyst":
            self.skip()

        # Joint Committees are being skipped to avoid duplicates (they were already grabbed during SenateCommitteeList())
        if comm_name.startswith("Joint Committee") or comm_name.startswith(
                "Joint Legislative"):
            self.skip()
        elif comm_name.startswith("Subcommittee"):
            parent_comm = item.getparent().getparent().getchildren(
            )[0].text_content()
            com = ScrapeCommittee(
                name=comm_name,
                classification="subcommittee",
                chamber="lower",
                parent=parent_comm,
            )
        else:
            com = ScrapeCommittee(name=comm_name,
                                  classification="committee",
                                  chamber="lower")
        com.add_source(self.source.url)
        com.add_source(comm_url)
        com.add_link(comm_url, note="homepage")
        return ChooseType(com, source=URL(comm_url))
Ejemplo n.º 2
0
    def process_item(self, item):
        website, district, name, party, office, phone, email = item.getchildren(
        )

        # skip header row
        if website.tag == "th":
            self.skip()

        office = office.text_content()
        for abbr, full in self.office_names.items():
            office = office.replace(abbr, full)

        p = ScrapePerson(
            name=name.text_content(),
            state="mi",
            chamber="lower",
            district=district.text_content().lstrip("0"),
            party=party.text_content(),
            email=email.text_content(),
        )
        link = CSS("a").match_one(website).get("href")
        if link.startswith("http:/r"):
            link = link.replace(":/", "://")
        p.add_link(link)
        p.add_source(self.source.url)
        p.capitol_office.voice = phone.text_content()
        p.capitol_office.address = office
        return p