def process_page(self): name = self.root.xpath('//h2[@class="committeeName"]')[1].text if name.startswith("Appropriations Subcommittee"): return # TODO: restore scraping of Appropriations Subcommittees # name = name.replace('Appropriations ', '') # parent = {'name': 'Appropriations', 'classification': 'upper'} # chamber = None else: if name.startswith("Committee on"): name = name.replace("Committee on ", "") parent = None chamber = "upper" print(name) comm = ScrapeCommittee(name=name, classification="committee", chamber=chamber, parent=parent) for dt in self.root.xpath('//div[@id="members"]/dl/dt'): role = dt.text.replace(": ", "").strip().lower() member = dt.xpath("./following-sibling::dd")[0].text_content() member = self.clean_name(member) comm.add_member(member, role=role) for ul in self.root.xpath('//div[@id="members"]/ul/li'): member = self.clean_name(ul.text_content()) comm.add_member(member) comm.add_source(self.source.url) return comm
def process_item(self, item): name = item["CommitteeName"] chamber = item["LegislativeBody"] if chamber == "H": chamber = "lower" elif chamber == "S": chamber = "upper" else: # a few Ad Hoc Committees don't have chambers, but are not included in the Standing Committees Scrape anyway self.logger.warning("Committee not assigned to chamber") chamber = "lower" if item["IsSubCommittee"] is False: com = ScrapeCommittee(name=name, chamber=chamber) else: try: parent, name = name.split(" Subcommittee on ") except ValueError: self.logger.warning(f"No parent listed for {name}") com = ScrapeCommittee( name=name, classification="subcommittee", chamber=chamber, parent=parent, ) members = [] for member in item["Members"]: name = member["FirstName"] + " " + member["LastName"] if member["IsChair"]: position = "Chair" elif member["IsViceChair"]: position = "Vice Chair" else: position = "member" # As of now, the API lists all members twice, so we must check for duplicates for members if f"{name} {position}" in members: continue else: members.append(f"{name} {position}") com.add_member(name, position) com.extras["Committee ID"] = item["CommitteeId"] com.extras["Committee Short Name"] = item["CommitteeShortName"] com.extras["Committee Type"] = item["TypeName"] com.add_source(self.source.url) return com