def get_session_list(self): from utils import url_xpath return url_xpath( "http://www.legis.state.wv.us/Bill_Status/Bill_Status.cfm", '//select[@name="year"]/option/text()', )
def _add_agenda_list(self, url, event): trs = url_xpath(url, "//tr") for tr in trs: things = tr.xpath("./td/a") for thing in things: event = self._add_agenda_real(thing.attrib["href"], event) return event
def get_session_list(self): return url_xpath( "https://www.revisor.mn.gov/bills/" "status_search.php?body=House", '//select[@name="session"]/option/text()', False, # SSL verification failing in MN due to missing intermediate cert )
def get_session_list(self): return [ s.strip() for s in url_xpath( "http://www.legislature.mi.gov/mileg.aspx?page=LegBasicSearch", "//option/text()", ) if s.strip() ]
def get_session_list(self): # from https://stackoverflow.com/questions/38015537/python-requests-exceptions-sslerror-dh-key-too-small import requests requests.packages.urllib3.disable_warnings() requests.packages.urllib3.util.ssl_.DEFAULT_CIPHERS += ":HIGH:!DH:!aNULL" return url_xpath("https://flsenate.gov", "//option/text()", False)
def get_session_list(self): sessions = url_xpath( "http://legislature.vermont.gov/bill/search/2016", '//fieldset/div[@id="Form_SelectSession_selected_session_Holder"]' "/div/select/option/text()", ) sessions = (session.replace(",", "").strip() for session in sessions) return sessions
def get_session_list(self): from utils import url_xpath # this URL should work even for future sessions return url_xpath( "https://sutra.oslpr.org/osl/esutra/", '//select[@id="ctl00_CPHBody_Tramites_lovCuatrienio"]/option/text()', )
def get_session_list(self): sessions = url_xpath( "http://www.leginfo.ca.gov/bilinfo.html", "//select[@name='sess']/option/text()", ) return [ re.findall(r"\(.*\)", session)[0][1:-1] for session in sessions ]
def get_session_list(self): return [ session["_scraped_name"] for session in self.legislative_sessions ] return url_xpath( "https://www.akleg.gov/basis/Home/Archive", '//div[@id="fullpage"]//a[contains(@href, "/BillsandLaws/")]//text()', )
def get_session_list(self): # doesn't include current session, we need to change it sessions = url_xpath( "http://www.capitol.hawaii.gov/archives/main.aspx", "//div[@class='roundedrect gradientgray shadow']/a/text()", ) sessions.remove("Archives Main") return sessions
def get_session_list(self): sessions = url_xpath( "https://www.legislature.ohio.gov/legislation/search" "?generalAssemblies=133&pageSize=10&start=1&isInitial=true", '//div[@id="generalAssemblyValues"]//' 'div[contains(@class, "optionLabel")]/text()', ) # Archive does not include current session return sessions
def get_session_list(self): # Special sessions are available in the archive, but not in current session. # Solution is to scrape special session as part of regular session return [ x for x in url_xpath( "http://www.capitol.tn.gov/legislation/archives.html", '//h2[text()="Bills and Resolutions"]/following-sibling::ul/li/text()', ) if x.strip() ]
def get_session_list(self): from utils import url_xpath sessions = url_xpath( "http://webserver1.lsb.state.ok.us/WebApplication2/WebForm1.aspx", "//select[@name='cbxSession']/option/text()", ) # OK Sometimes appends (Mainsys) to their session listings sessions = [s.replace("(Mainsys)", "").strip() for s in sessions] return sessions
def get_postable_subjects(): global subjects if subjects is None: subs = url_xpath( "https://status.rilegislature.gov/", "//select[@id='rilinContent_cbCategory']", )[0].xpath("./*") subjects = {o.text: o.attrib["value"] for o in subs} subjects.pop(None) return subjects
def get_session_list(self): # PA keeps slowly adding backdata, so just ignore it en masse for i in range(1800, 2000): self.ignored_scraped_sessions.append("{} Regular Session".format(i)) self.ignored_scraped_sessions.append("{} Special Session #1".format(i)) return url_xpath( "http://www.legis.state.pa.us/cfdocs/legis/home/bills/", '//select[@id="billSessions"]/option/text()', )
def get_session_list(self): user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36" sessions = url_xpath( "http://docs.legis.wisconsin.gov/search", "//select[@name='sessionNumber']/option/text()", verify=False, user_agent=user_agent, ) return [session.strip(" -") for session in sessions]
def _add_agenda_real(self, url, event): trs = url_xpath(url, "//tr") for tr in trs: tds = tr.xpath("./*") billinf = tds[0].attrib["id"] # TN uses bill_ids as the id descr = tr.xpath("./td//p")[-1].text_content() agenda_item = event.add_agenda_item(descr) agenda_item.add_bill(billinf, id=billinf) event.add_source(url) event.add_document("Agenda", url) return event
def get_session_list(self): sessions = url_xpath( "https://apps.legislature.ky.gov/record/pastses.html", "//td/div/a/text()") for index, session in enumerate(sessions): # Remove escaped whitespace characters. sessions[index] = re.sub(r"\s\s+", " ", session) sessions[index] = sessions[index].strip() return sessions
def get_session_list(self): sessions = [] regex = r"2[0-9][0-9][0-9]\ .*\ Session" tags = url_xpath( "http://www.leg.state.co.us/clics/cslFrontPages.nsf/PrevSessionInfo?OpenForm", "//font/text()", ) for tag in tags: sess = re.findall(regex, tag) for session in sess: sessions.append(session) return sessions
def get_session_list(self): sessions = url_xpath( "https://www.legis.iowa.gov/legislation/findLegislation", "//section[@class='grid_6']//li/a/text()[normalize-space()]", ) return [ x[0] for x in filter( lambda x: x != [], [ re.findall(r"^.*Assembly: [0-9]+", session) for session in sessions ], ) ]
def get_default_headers(page): headers = {} for el in url_xpath(page, "//*[@name]"): name = el.attrib["name"] value = "" try: value = el.attrib["value"] except KeyError: value = el.text if value: value = value.strip() headers[name] = value or "" headers["__EVENTTARGET"] = "" headers["__EVENTARGUMENT"] = "" headers["__LASTFOCUS"] = "" return headers
def get_session_list(self): sessions = url_xpath( "http://www.mainelegislature.org/LawMakerWeb/advancedsearch.asp", '//select[@name="LegSession"]/option/text()', ) return sessions
def get_session_list(self): http.client.parse_headers = parse_headers_override return url_xpath( "https://www.house.mo.gov/billcentral.aspx?year=2019&code=S1&q=&id=", '//select[@id="SearchSession"]/option/text()', )
def get_session_list(self): sessions = url_xpath( "http://le.utah.gov/Documents/bills.htm", '//ul[contains(@class,"bills-alternate")]/li/a[contains(@href, "BillList")]/text()', ) return [re.sub(r"\s+", " ", session.strip()) for session in sessions]
def get_session_list(self): return url_xpath( "http://www.legis.la.gov/Legis/SessionInfo/SessionInfo.aspx", '//table[@id="ctl00_ctl00_PageBody_DataListSessions"]//a[contains' '(text(), "Session")]/text()', )
def get_vote_dates(self, page, session): dates = url_xpath(page, "//select[@name='votedate']")[0].xpath("./*") return [a.text for a in dates if a.text.endswith(session[-2:])]
def get_session_list(self): return url_xpath("http://billstatus.ls.state.ms.us/sessions.htm", "//a/text()")
def get_session_list(self): sessions = url_xpath( "https://le.utah.gov/asp/billsintro/index.asp?year=2021X1", "//select[@id='Listbox1']/option/text()", ) return [re.sub(r"\s+", " ", session.strip()) for session in sessions]
def get_session_list(self): return url_xpath( "http://mgaleg.maryland.gov/mgawebsite/Search/Legislation", '//select[@id="valueSessions"]/option/text()', )
def get_session_list(self): return url_xpath("http://flsenate.gov", "//option/text()")