def process_item(self, item): name_party = CSS("span").match(item)[0].text_content().strip().split( " - ") name = name_party[0].strip() party = name_party[1].strip() if party == "(D)": party = "Democratic" elif party == "(R)": party = "Republican" elif party == "(DTS)": party = "Independent" district = CSS("span").match(item)[1].text_content().strip() district = re.search(r"District:\s(.+)", district).groups()[0].strip() p = ScrapePerson( name=name, state="nm", chamber=self.chamber, district=district, party=party, ) detail_link = CSS("a").match_one(item).get("href") p.add_source(self.source.url) p.add_source(detail_link) p.add_link(detail_link, note="homepage") img = CSS("img").match_one(item).get("src") p.image = img return LegDetail(p, source=detail_link)
def process_page(self): com = self.input members = list(CSS("table.Grid a").match(self.root)) if not members: raise SkipItem("empty committee") for member in members: name = member.text_content().strip() if re.search(r"\(", name): name_split = re.search(r"(.+),\s(.+)\s\((.+)\)", name).groups() first_name = name_split[1] last_name = name_split[0] role = name_split[2] else: name_split = re.search(r"(.+),\s(.+)", name).groups() first_name = name_split[1] last_name = name_split[0] role = "member" first_name = re.sub(""", '"', first_name) name = f"{first_name} {last_name}" com.add_member(name, role) extra_info = CSS("div table#ContentPlaceHolder1_gvClerk tr").match( self.root) for info in extra_info: if ":" in info.text_content().strip(): idx, val = info.text_content().strip().split(":") com.extras[idx.strip()] = val.strip() else: com.extras["Room"] = info.text_content().strip() return com
def process_page(self): com = self.input # no members if (CSS("div.Membership fieldset").match_one( self.root).text_content().strip() == ""): raise SkipItem("empty committee") members = CSS("fieldset div.area-holder ul.list li span.col01").match( self.root) num_members = 0 for member in members: role = member.getnext().text_content().strip() # skip Public Members if role == "Public Member": continue if role == "Member": role = "member" num_members += 1 mem_name = CSS("span span").match_one( member).text_content().strip() mem_name = re.search(r"(Representative|Senator)\s(.+)", mem_name).groups()[1] com.add_member(mem_name, role) if not num_members: raise SkipItem("only public members") return com
def process_item(self, item): comm_name = CSS("a").match_one(item).text_content() comm_url = CSS("a").match_one(item).get("href") # "https://jtlegbudget.legislature.ca.gov/sublegislativeanalyst" has no members if comm_url == "https://jtlegbudget.legislature.ca.gov/sublegislativeanalyst": self.skip() # Joint Committees are being skipped to avoid duplicates (they were already grabbed during SenateCommitteeList()) if comm_name.startswith("Joint Committee") or comm_name.startswith( "Joint Legislative"): self.skip() elif comm_name.startswith("Subcommittee"): parent_comm = item.getparent().getparent().getchildren( )[0].text_content() com = ScrapeCommittee( name=comm_name, classification="subcommittee", chamber="lower", parent=parent_comm, ) else: com = ScrapeCommittee(name=comm_name, classification="committee", chamber="lower") com.add_source(self.source.url) com.add_source(comm_url) com.add_link(comm_url, note="homepage") return ChooseType(com, source=URL(comm_url))
def process_item(self, item): # skip header rows if ( len(CSS("td").match(item)) == 1 or CSS("td").match(item)[0].get("class") == "header" ): self.skip() first_link = CSS("td a").match(item)[0] name = first_link.text_content() detail_link = first_link.get("href") district = CSS("td").match(item)[3].text_content() party_letter = CSS("td").match(item)[4].text_content() party_dict = {"D": "Democratic", "R": "Republican", "I": "Independent"} party = party_dict[party_letter] p = ScrapePerson( name=name, state="il", party=party, chamber=self.chamber, district=district, ) p.add_source(self.source.url) p.add_source(detail_link) p.add_link(detail_link, note="homepage") return LegDetail(p, source=detail_link)
class SenateDetail(HtmlPage): name_css = CSS(".field--name-title") image_css = CSS(".bSenBio__media-btn") district_css = CSS(".bDistrict h2") address_css = CSS(".bSenBio__address p") phone_css = CSS(".bSenBio__tel a") contact_link_sel = SimilarLink( r"https://oksenate.gov/contact-senator\?sid=") def process_page(self): for bio in CSS(".bSenBio__infoIt").match(self.root): if "Party:" in bio.text_content(): party = bio.text_content().split(":")[1].strip() p = ScrapePerson( name=self.name_css.match_one(self.root).text, state="ok", chamber="upper", party=party, image=self.image_css.match_one(self.root).get("href"), district=self.district_css.match_one( self.root).text.strip().split()[1], ) p.capitol_office.address = self.address_css.match_one(self.root).text p.capitol_office.phone = self.phone_css.match_one(self.root).text p.add_link( self.contact_link_sel.match_one(self.root).get("href"), "Contact Form") return p
def process_page(self): com = self.input Rolez = XPath("//*[@id='form1']/div/div/div/div/div[1]/text()").match(self.root) Chair_mem = ( CSS("#form1 div div div div div a") .match(self.root)[0] .text_content() .strip() ) Chair_role = Rolez[0].replace(":", "").strip() com.add_member(Chair_mem, Chair_role) VChair_mem = ( CSS("#form1 div div div div div a") .match(self.root)[1] .text_content() .strip() ) VChair_role = Rolez[1].replace(":", "").strip() com.add_member(VChair_mem, VChair_role) members = CSS("#form1 div div.card-body div a").match(self.root)[7:] for mem in members: member = mem.text_content().strip() role_mem = "Member" com.add_member(member, role_mem) return com
def process_item(self, item): comm_name = CSS("a").match(item)[0].text_content().strip() previous_sibs = item.getparent().itersiblings(preceding=True) for sib in previous_sibs: if len(sib.getchildren()) == 0: chamber_type = sib.text_content().strip() break if chamber_type == "Senate Committees": chamber = "upper" elif chamber_type == "Joint Committees": self.skip() elif chamber_type == "Task Forces": self.skip() com = ScrapeCommittee( name=comm_name, classification="committee", chamber=chamber, ) detail_link = CSS("a").match(item)[0].get("href") com.add_source(self.source.url) com.add_source(detail_link) com.add_link(detail_link, note="homepage") return SenDetail(com, source=detail_link)
class HouseDetail(HtmlPage): image_selector = SimilarLink( "https://www.okhouse.gov/Members/Pictures/HiRes/") prefix = "#ctl00_ContentPlaceHolder1_lbl" name_css = CSS(prefix + "Name") district_css = CSS(prefix + "District") party_css = CSS(prefix + "Party") def process_page(self): name = self.name_css.match_one(self.root).text.split(maxsplit=1)[1] p = Person( name=name, state="ok", chamber="upper", party=self.party_css.match_one(self.root).text, district=self.district_css.match_one(self.root).text.split()[1], ) p.image = self.image_selector.match_one(self.root).get("href") contact_url = self.source.url.replace("District.aspx", "Contact.aspx") assert contact_url.startswith( "https://www.okhouse.gov/Members/Contact.aspx?District=") p.add_link(contact_url, note="Contact Form") # capitol address check_capitol_address = CSS(".districtheadleft").match( self.root)[0].text_content().strip() if check_capitol_address == "Capitol Address:": capitol_address_div = (CSS(".districtheadleft + div").match( self.root)[0].text_content().strip().splitlines()) p.capitol_office.address = "; ".join( [ln.strip() for ln in capitol_address_div[:-1]]) p.capitol_office.phone = capitol_address_div[-1].strip() return p
def process_item(self, item): try: link = CSS("a").match(item)[1] except SelectorError: self.skip() data = { "last_name": link.text_content(), "url": link.get("href"), } for key, label in self.LABELS.items(): data[key] = CSS(f"[id$={label}]").match_one( item).text_content().strip() party = {"(D)": "Democratic", "(R)": "Republican"}[data["party"]] address = "Hawaii State Capitol, Room " + data["room"] chamber = "upper" if data["chamber"] == "S" else "lower" p = ScrapePerson( name=data["first_name"] + " " + data["last_name"], state="hi", chamber=chamber, district=data["district"], given_name=data["first_name"], family_name=data["last_name"], party=party, email=data["email"], ) p.capitol_office.address = address p.capitol_office.voice = data["voice"] p.capitol_office.fax = data["fax"] p.add_source(data["url"]) p.add_link(data["url"]) return p
def process_page(self): com = self.input com.add_source(self.source.url) link = self.source.url.replace( "MemberGridCluster.aspx?filter=compage&category=committee&", "") com.add_link(link, note="homepage") # As of now, one committees page is empty. Just in case it is updated soon, the page will still be scraped try: members = CSS("#theTable tr").match(self.root) for member in members: # skip first row with table headers if member.text_content().strip() == "NamePartyPosition": continue __, name, __, position = CSS("td").match(member) name = (name.text_content().replace("Rep. ", "").replace("Sen. ", "").split(",")) name = name[1] + " " + name[0] if position.text_content(): position = position.text_content() else: position = "member" com.add_member(name, position) except SelectorError: pass return com
def process_item(self, item): name = CSS("header").match_one(item).text_content() district = CSS("div.district").match_one(item).text_content() district = re.search(r"House\sDistrict\s(\d+)", district).groups()[0] img = CSS("img").match_one(item).get("src") p = ScrapePerson( name=name, state="in", chamber=self.chamber, district=district, party=self.party, image=img, ) p.extras["city"] = CSS("div.city").match_one(item).text_content() detail_link = item.get("href") p.add_link(detail_link, note="homepage") detail_link_full = detail_link + "/full" p.add_source(detail_link_full) p.add_source(self.source.url) return BlueRepDetail(p, source=detail_link_full)
def process_page(self): # construct person from the details from above p = Person( state="oh", chamber="lower", district=self.input.district, name=self.input.name, party=self.input.party, image=self.input.image, ) p.add_source(self.input.url) p.add_link(self.input.url) divs = CSS(".member-info-bar-module").match(self.root) # last div is contact details contact_details = CSS(".member-info-bar-value").match(divs[-1]) for div in contact_details: dtc = div.text_content() if ", OH" in dtc: # join parts of the div together to make whole address children = div.getchildren() p.capitol_office.address = "; ".join( [children[0].text.strip(), children[0].tail.strip(), children[1].tail.strip()] ) elif "Phone:" in dtc: p.capitol_office.voice = dtc.split(": ")[1] elif "Fax:" in dtc: p.capitol_office.fax = dtc.split(": ")[1] return p
def process_item(self, item): name = CSS("h3").match_one(item).text_content() if name == " - Vacant Seat": self.skip() party = CSS("small").match_one(item).text_content() if party == "Democrat": party = "Democratic" district = CSS("p").match(item)[0].text_content() district = ( re.search(r"District:\r\n(.+)", district).groups()[0].strip().lstrip("0") ) p = ScrapePerson( name=name, state="ky", party=party, chamber=self.chamber, district=district, ) detail_link = item.get("href") p.add_source(self.source.url) p.add_source(detail_link) p.add_link(detail_link, note="homepage") return LegDetail(p, source=detail_link)
def process_item(self, item): name = CSS("div a").match(item)[1].text_content() district = ( CSS("div .esg-content.eg-senators-grid-element-1") .match_one(item) .text_content() .split("|")[1] .strip() .lower() ) district = re.search(r"district\s(\d+)", district).groups()[0] img = CSS("div img").match_one(item).get("data-lazysrc") p = ScrapePerson( name=name, state="in", chamber=self.chamber, district=district, party=self.party, image=img, ) city = ( CSS("div .esg-content.eg-senators-grid-element-27") .match_one(item) .text_content() ) p.extras["city"] = city detail_link = CSS("div a").match(item)[1].get("href") p.add_link(detail_link, note="homepage") p.add_source(self.source.url) p.add_source(detail_link) return BlueSenDetail(p, source=detail_link)
def process_page(self): com = self.input com.add_source(self.source.url) time, room = (CSS(".border-0 .pl-2").match( self.root)[0].text_content().split("in ")) time = time.split("Meets:")[1] com.extras["room"] = room.strip() com.extras["meeting schedule"] = time.strip() for p in XPath('//div[@class="media pl-2 py-4"]').match(self.root): name = (XPath(".//div[@class='media-body']/span/b/text()").match(p) [0].replace("Rep.", "").split("(R)")[0].split("(DFL")[0].strip()) positions = ["committee chair", "vice chair", "republican lead"] if name: try: position = CSS("span b u").match( p)[0].text_content().lower() if position in positions: role = position except SelectorError: role = "member" com.add_member(name, role) return com
def process_page(self): p = self.input capitol_addr_lst = XPath(".//*[@id='district']/span[1]/text()").match( self.root) capitol_addr = "" for line in capitol_addr_lst: capitol_addr += line.strip() capitol_addr += " " p.capitol_office.address = capitol_addr.strip() try: fax = (CSS("span.info.fax").match_one( self.root).text_content().strip().split("\n")) fax = fax[-1].strip() p.capitol_office.fax = fax except SelectorError: pass try: staff_spans = CSS("span.info.staff span").match(self.root) for num, span in enumerate(grouper(staff_spans[1:], 2)): staff_name = span[0].text_content().strip() staff_email = span[1].text_content().strip() p.extras["staff" + str(num + 1)] = staff_name p.extras["staff_email" + str(num + 1)] = staff_email except SelectorError: pass return p
def process_page(self): name = self.name_css.match_one(self.root).text.split(maxsplit=1)[1] p = ScrapePerson( name=name, state="ok", chamber="upper", party=self.party_css.match_one(self.root).text, district=self.district_css.match_one(self.root).text.split()[1], ) p.image = self.image_selector.match_one(self.root).get("href") contact_url = self.source.url.replace("District.aspx", "Contact.aspx") assert contact_url.startswith( "https://www.okhouse.gov/Members/Contact.aspx?District=") p.add_link(contact_url, note="Contact Form") # capitol address check_capitol_address = (CSS(".districtheadleft").match( self.root)[0].text_content().strip()) if check_capitol_address == "Capitol Address:": capitol_address_div = (CSS(".districtheadleft + div").match( self.root)[0].text_content().strip().splitlines()) p.capitol_office.address = "; ".join( [ln.strip() for ln in capitol_address_div[:-1]]) p.capitol_office.phone = capitol_address_div[-1].strip() return p
def process_item(self, item): name = CSS("a.membername").match_one(item).text_content() name = re.search(r"(Senator|Representative)\s(.+)", name).groups()[1] party = CSS("a.membername").match_one(item).tail.strip() if party == "(D)": party = "Democratic" elif party == "(R)": party = "Republican" district = CSS("div.district a").match_one(item).text_content().strip() district = re.search(r"District\s(.+)", district).groups()[0] p = ScrapePerson( name=name, state="sc", chamber=self.chamber, district=district, party=party, ) detail_link = CSS("div.district a").match_one(item).get("href") p.add_source(self.source.url) p.add_source(detail_link) p.add_link(detail_link, note="homepage") img = CSS("img").match_one(item).get("src") p.image = img return LegDetail(p, source=URL(detail_link, timeout=20))
def process_item(self, item): name_dirty = CSS("a").match_one(item).text_content().strip().split( ", ") name = name_dirty[1] + " " + name_dirty[0] district = CSS("br").match(item)[-1].tail.strip() district = re.search(r"District\s(.+)", district).groups()[0] party = CSS("b").match_one(item).tail.strip() if party == "(D)": party = "Democratic" elif party == "(R)": party = "Republican" elif party == "(I)": party = "Independent" p = ScrapePerson( name=name, state="pa", chamber=self.chamber, district=district, party=party, ) detail_link = CSS("a").match_one(item).get("href") p.add_source(self.source.url) p.add_source(detail_link) p.add_link(detail_link, note="homepage") return LegDetail(p, source=URL(detail_link, timeout=10))
def process_page(self): com = self.input com.add_source(self.source.url) com.add_link(self.source.url, note="homepage") # a few committees don't have chair positions try: chair_role = ( CSS(".c-chair-block--position") .match_one(self.root) .text_content() .lower() ) chair_name = CSS(".c-chair--title").match_one(self.root).text_content() com.add_member(chair_name, chair_role) except SelectorError: pass try: for p in XPath( "//div[contains(@class, 'c-senators-container')]//div[@class='view-content']/div[contains(@class, 'odd') or contains(@class, 'even')]" ).match(self.root): name = CSS(".nys-senator--name").match_one(p).text_content() role = CSS(".nys-senator--position").match_one(p).text_content().lower() if role == "": role = "member" com.add_member(name, role) except SelectorError: pass return com
def process_item(self, item): name = CSS("strong").match(item)[0].text_content() # skip header row if name == "Committees": self.skip() com = ScrapeCommittee( name=name, chamber=self.chamber, ) all_text = CSS("p").match(item)[0].text_content().strip() secretary, email, phone = re.search( r"\n?Secretary:(.+)\n?Email:(.+)\n?Phone:(.+)", all_text ).groups() com.extras["secretary"] = secretary.strip() com.extras["email"] = email.strip() com.extras["phone"] = phone.strip() detail_link = CSS("a").match(item)[0].get("href") com.add_source(self.source.url) com.add_source(detail_link) com.add_link(detail_link, note="homepage") return DetailCommitteePage(com, source=detail_link)
def process_item(self, item): member, party, district, contact_link, phone, office = item.getchildren( ) name = member.text_content() district = district.text_content() # skip vacant districts if "Interim District" in name: self.skip() # each of these <td> have a single link leg_url = CSS("a").match_one(member).get("href") contact_url = CSS("a").match_one(contact_link).get("href") # construct this URL based on observation elsewhere on senate.michigan.gov image_url = ( f"https://senate.michigan.gov/_images/{district}{ord_suffix(district)}.jpg" ) p = ScrapePerson( **split_name(name), state="mi", chamber="upper", district=district, party=self.PARTY_MAP[party.text], image=image_url, ) p.capitol_office.voice = str(phone.text_content()) p.capitol_office.address = str(office.text_content()) p.add_source(self.source.url) p.add_link(leg_url) p.add_link(contact_url, note="Contact") return p
def process_item(self, item): try: name = name_title = CSS("a").match(item)[0].text_content() except SelectorError: self.skip("header row") if "--" in name_title: name, title = [word.strip() for word in name.split("--")] _, district, party, email, room, capitol_phone = item.getchildren() district = district.text_content() party = party.text_content() if party == "R": party = "Republican" elif party == "D": party = "Democratic" email = email.text_content() if email.startswith("Email: "): email = email.replace("Email: ", "").lower() + "@azleg.gov" else: email = "" room = room.text_content() if self.chamber == "lower": address = "House of Representatives\n " elif self.chamber == "upper": address = "Senate\n " address = address + "1700 West Washington\n " + room + "\nPhoenix, AZ 85007" capitol_phone = capitol_phone.text_content() image = CSS("td a img").match(item) if image: image = image[0].get("src") p = ScrapePerson( name=name, state="az", chamber=self.chamber, district=district, party=party, email=email, image=image, ) p.capitol_office.address = address p.capitol_office.voice = capitol_phone p.add_source(self.source.url) p.add_link(CSS("a").match(item)[0].get("href")) if "--" in name_title: p.extras["title"] = title return p
def process_page(self): p = self.input img = CSS("div.field-person-photo img").match_one(self.root).get("src") p.image = img bio_info = CSS("div.pane-content ul li").match(self.root) if len(bio_info) > 0: p.extras["bio info"] = [] for info in bio_info: p.extras["bio info"] += info try: street = (CSS("div.street-address").match_one( self.root).text_content().strip()) town = CSS("span.locality").match_one( self.root).text_content().strip() zip_code = (CSS("span.postal-code").match_one( self.root).text_content().strip()) address = street + ", " + town + ", ND " + zip_code p.district_office.address = address except SelectorError: pass try: phones = XPath( "//*[@id='block-system-main']//div[contains(text(), 'phone')]" ).match(self.root) for phone in phones: phone_type = phone.text_content().strip() phone_number = phone.getnext().text_content().strip() if phone_type == "Cellphone:": p.extras["Cell phone"] = phone_number elif phone_type == "Home Telephone:": p.extras["Home phone"] = phone_number elif phone_type == "Office Telephone:": p.district_office.voice = phone_number except SelectorError: pass email = (XPath( "//*[@id='block-system-main']//div[contains(text(), 'Email')]"). match_one(self.root).getnext().text_content().strip()) p.email = email try: fax = (XPath( "//*[@id='block-system-main']//div[contains(text(), 'Fax')]"). match_one(self.root).getnext().text_content().strip()) p.district_office.fax = fax except SelectorError: pass return p
def process_page(self): p = self.input img = CSS("div#content p img").match_one(self.root).get("src") p.image = img if self.source.url == "https://legislature.maine.gov/District-22": addr = CSS("div#content p strong").match(self.root)[2].tail.strip() else: addr = ( CSS("div#content p strong") .match(self.root)[1] .tail.strip() .lstrip(":") .strip() ) if addr != p.district_office.address: p.extras["Additional address"] = addr try: state_phone = ( XPath("//*[@id='content']/p/strong[contains(text(), 'State')]") .match_one(self.root) .tail.strip() ) state_phone = state_phone.lstrip(":").strip() p.capitol_office.voice = state_phone except SelectorError: pass try: state_phone = ( XPath("//*[@id='content']/p/b[contains(text(), 'State')]") .match_one(self.root) .tail.strip() ) state_phone = state_phone.lstrip(":").strip() p.capitol_office.voice = state_phone except SelectorError: pass website = ( XPath("//*[@id='content']/p/strong[contains(text(), 'Website')]") .match_one(self.root) .getnext() ) if website.get("href") is None: website = website.getnext().get("href") else: website = website.get("href") p.add_link(website, note="website") return p
def process_page(self): com = self.input com.add_source(self.source.url) com.add_link(self.source.url, note="homepage") try: chairs = CSS(".chair-info").match(self.root) except SelectorError: raise SkipItem("skipping committee without full information") # in case there are co-chairs num_chairs = len(chairs) for chair in chairs: chair_name = CSS(".comm-chair-name").match_one(chair).text_content().strip() chair_role = ( XPath(f"..//preceding-sibling::header[{num_chairs}]") .match_one(chair) .text_content() .strip() .lower() ) com.add_member(chair_name, chair_role) # some committees only have chairs and no members list try: for p in CSS("#comm-membership ul li").match(self.root): name = p.text_content().strip() role = "member" com.add_member(name, role) except SelectorError: pass # some committees have temporary addresses, others have permanent ones try: temp, room, zip = XPath( "//section[@id='comm-addr']/div[@class='mod-inner']//text()" ).match(self.root) com.extras["address"] = f"{temp}: {room}; {zip}" except ValueError: room, zip = XPath( "//section[@id='comm-addr']/div[@class='mod-inner']//text()" ).match(self.root) com.extras["address"] = f"{room}; {zip}" # some committees have press releases try: news_link = CSS("#page-content .read-more").match(self.root)[0].get("href") com.add_link(news_link) except SelectorError: pass return com
def process_page(self): chamber = "upper" if self.input.identifier.startswith("S") else "lower" short_title = self.get_column_div("Summary").text long_title = CSS("#title").match_one(self.root).text if "*" in self.input.identifier: stars = re.search(r"\*+", self.input.identifier).group() if ( self.input.session in CARRYOVERS and stars in CARRYOVERS[self.input.session] ): self.input.identifier = re.sub( r"\*+", "-" + CARRYOVERS[self.input.session][stars], self.input.identifier, ) else: self.logger.error( f"Unidentified carryover bill {self.input.identifier}. Update CARRYOVERS dict in bills.py" ) return bill = Bill( identifier=self.input.identifier, legislative_session=self.input.session, title=short_title, chamber=chamber, ) bill.subject = self.input.subjects # use the pretty source URL bill.add_source(self.input.source_url) bill.add_title(long_title) try: sponsors = self.get_column_div("Primary Sponsor") self.add_sponsors(bill, CSS("a").match(sponsors), primary=True) except SelectorError: pass try: cosponsors = self.get_column_div("Co-Sponsor") self.add_sponsors(bill, CSS("a").match(cosponsors), primary=False) except SelectorError: pass # TODO: figure out cosponsor div name, can't find any as of Feb 2021 self.add_actions(bill, chamber) bdr = extract_bdr(short_title) if bdr: bill.extras["BDR"] = bdr text_url = self.source.url.replace("Overview", "Text") yield BillTabText(bill, source=text_url)
def process_item(self, item): # Convert names to title case as they are in all-caps name = CSS("span.name").match_one(item).text_content().strip() name = re.sub(r"^Hon\.", "", name, flags=re.IGNORECASE).strip().title() party = CSS("span.partido").match_one(item).text_content().strip() # Translate to English since being an Independent is a universal construct if party == "Independiente": party = "Independent" detail_link = CSS("a").match_one(item).get("href") partial = PartialSen(name=name, party=party, source=self.source.url) return SenDetail(partial, source=detail_link)
def process_page(self): com = self.input try: # This section has the chair memebers the regular, democratic and minority and the roles # main chair chair_member = (CSS( "div.MemberInfoList-MemberWrapper.ChairWrapper div.ChairNameText a" ).match(self.root)[0].text.strip()) # main chair role chair_member_role = (CSS( "div.MemberInfoList-MemberWrapper.ChairWrapper div.ChairNameText div" ).match(self.root)[0].text.strip()) except IndexError: pass try: com.add_member(fix_name(chair_member), chair_member_role) # Democratic Chair member and or the minority chair member demo_chair_member = (CSS( "div.MemberInfoList-MemberWrapper.ChairWrapper div.ChairNameText a" ).match(self.root)[1].text.strip()) # Democratic Chair member and or the minority chair member role demo_chair_member_role = (CSS( "div.MemberInfoList-MemberWrapper.ChairWrapper div.ChairNameText div" ).match(self.root)[1].text.strip()) com.add_member(fix_name(demo_chair_member), demo_chair_member_role) except IndexError: pass majority_members = CSS( ".Widget.CteeInfo-MajorityList .MemberInfoList-MemberWrapper.Member" ).match(self.root) for mem in majority_members: try: major_member_name = CSS("div a").match_one(mem).text.strip() major_mem_position = CSS(".position").match_one( mem).text.strip() except SelectorError: major_mem_position = "member" com.add_member(fix_name(major_member_name), major_mem_position) minority_members = CSS( ".Widget.CteeInfo-MinorityList .MemberInfoList-MemberWrapper.Member" ).match(self.root) for mem in minority_members: try: minor_member_name = CSS("div a").match_one(mem).text.strip() minor_mem_position = CSS(".position").match_one( mem).text.strip() except SelectorError: minor_mem_position = "member" com.add_member(fix_name(minor_member_name), minor_mem_position) return com