コード例 #1
0
    def process_item(self, item):
        comm_name = XPath("text()").match_one(item)
        if comm_name in [
                "Teleconference How-To Information", "Legislative Process"
        ]:
            self.skip()

        comm_url = XPath("@href").match_one(item)

        if comm_name.startswith("Joint"):
            com = ScrapeCommittee(name=comm_name,
                                  classification="committee",
                                  chamber="legislature")
        elif comm_name.startswith("Subcommittee"):
            parent_comm = (item.getparent().getparent().getparent().getparent(
            ).getchildren()[0].text_content())
            com = ScrapeCommittee(
                name=comm_name,
                classification="subcommittee",
                chamber="upper",
                parent=parent_comm,
            )
        else:
            com = ScrapeCommittee(name=comm_name,
                                  classification="committee",
                                  chamber="upper")
        com.add_source(self.source.url)
        com.add_source(comm_url)
        com.add_link(comm_url, note="homepage")
        return ChooseType(com, source=URL(comm_url))
コード例 #2
0
 def process_item(self, item):
     href = XPath("@href").match_one(item)
     if not href.startswith("http"):
         href = f"https://ultrasignup.com{href}"
     race_id = href.split("=")[-1]
     return RaceResultDetail(dict(race_id=race_id,
                                  race_results_url=href,
                                  **self.input),
                             source=href)