Python url_xpath Examples, utils.url_xpath Python Examples

Example #1

0

Show file

File: __init__.py Project: csnardi/openstates

    def get_session_list(self):
        from utils import url_xpath

        return url_xpath(
            "http://www.legis.state.wv.us/Bill_Status/Bill_Status.cfm",
            '//select[@name="year"]/option/text()',
        )

Example #2

0

Show file

File: events.py Project: vikrantmygamma/openstates-scrapers

 def _add_agenda_list(self, url, event):
     trs = url_xpath(url, "//tr")
     for tr in trs:
         things = tr.xpath("./td/a")
         for thing in things:
             event = self._add_agenda_real(thing.attrib["href"], event)
     return event

Example #3

0

Show file

File: __init__.py Project: washabstract/openstates-scrapers

 def get_session_list(self):
     return url_xpath(
         "https://www.revisor.mn.gov/bills/"
         "status_search.php?body=House",
         '//select[@name="session"]/option/text()',
         False,  # SSL verification failing in MN due to missing intermediate cert
     )

Example #4

0

Show file

File: __init__.py Project: hiteshgarg14/openstates

 def get_session_list(self):
     return [
         s.strip() for s in url_xpath(
             "http://www.legislature.mi.gov/mileg.aspx?page=LegBasicSearch",
             "//option/text()",
         ) if s.strip()
     ]

Example #5

0

Show file

    def get_session_list(self):
        # from https://stackoverflow.com/questions/38015537/python-requests-exceptions-sslerror-dh-key-too-small
        import requests

        requests.packages.urllib3.disable_warnings()
        requests.packages.urllib3.util.ssl_.DEFAULT_CIPHERS += ":HIGH:!DH:!aNULL"
        return url_xpath("https://flsenate.gov", "//option/text()", False)

Example #6

0

Show file

 def get_session_list(self):
     sessions = url_xpath(
         "http://legislature.vermont.gov/bill/search/2016",
         '//fieldset/div[@id="Form_SelectSession_selected_session_Holder"]'
         "/div/select/option/text()",
     )
     sessions = (session.replace(",", "").strip() for session in sessions)
     return sessions

Example #7

0

Show file

File: __init__.py Project: vikrantmygamma/openstates-scrapers

    def get_session_list(self):
        from utils import url_xpath

        # this URL should work even for future sessions
        return url_xpath(
            "https://sutra.oslpr.org/osl/esutra/",
            '//select[@id="ctl00_CPHBody_Tramites_lovCuatrienio"]/option/text()',
        )

Example #8

0

Show file

File: __init__.py Project: jessemortenson/openstates

 def get_session_list(self):
     sessions = url_xpath(
         "http://www.leginfo.ca.gov/bilinfo.html",
         "//select[@name='sess']/option/text()",
     )
     return [
         re.findall(r"\(.*\)", session)[0][1:-1] for session in sessions
     ]

Example #9

0

Show file

File: __init__.py Project: jessemortenson/openstates

 def get_session_list(self):
     return [
         session["_scraped_name"] for session in self.legislative_sessions
     ]
     return url_xpath(
         "https://www.akleg.gov/basis/Home/Archive",
         '//div[@id="fullpage"]//a[contains(@href, "/BillsandLaws/")]//text()',
     )

Example #10

0

Show file

File: __init__.py Project: jessemortenson/openstates

 def get_session_list(self):
     # doesn't include current session, we need to change it
     sessions = url_xpath(
         "http://www.capitol.hawaii.gov/archives/main.aspx",
         "//div[@class='roundedrect gradientgray shadow']/a/text()",
     )
     sessions.remove("Archives Main")
     return sessions

Example #11

0

Show file

 def get_session_list(self):
     sessions = url_xpath(
         "https://www.legislature.ohio.gov/legislation/search"
         "?generalAssemblies=133&pageSize=10&start=1&isInitial=true",
         '//div[@id="generalAssemblyValues"]//'
         'div[contains(@class, "optionLabel")]/text()',
     )
     # Archive does not include current session
     return sessions

Example #12

0

Show file

File: __init__.py Project: vikrantmygamma/openstates-scrapers

 def get_session_list(self):
     # Special sessions are available in the archive, but not in current session.
     # Solution is to scrape special session as part of regular session
     return [
         x for x in url_xpath(
             "http://www.capitol.tn.gov/legislation/archives.html",
             '//h2[text()="Bills and Resolutions"]/following-sibling::ul/li/text()',
         ) if x.strip()
     ]

Example #13

0

Show file

File: __init__.py Project: PrinceCarter/openstates-scrapers

    def get_session_list(self):
        from utils import url_xpath

        sessions = url_xpath(
            "http://webserver1.lsb.state.ok.us/WebApplication2/WebForm1.aspx",
            "//select[@name='cbxSession']/option/text()",
        )
        # OK Sometimes appends (Mainsys) to their session listings
        sessions = [s.replace("(Mainsys)", "").strip() for s in sessions]
        return sessions

Example #14

0

Show file

File: bills.py Project: csnardi/openstates

def get_postable_subjects():
    global subjects
    if subjects is None:
        subs = url_xpath(
            "https://status.rilegislature.gov/",
            "//select[@id='rilinContent_cbCategory']",
        )[0].xpath("./*")
        subjects = {o.text: o.attrib["value"] for o in subs}
        subjects.pop(None)
    return subjects

Example #15

0

Show file

    def get_session_list(self):
        # PA keeps slowly adding backdata, so just ignore it en masse
        for i in range(1800, 2000):
            self.ignored_scraped_sessions.append("{} Regular Session".format(i))
            self.ignored_scraped_sessions.append("{} Special Session #1".format(i))

        return url_xpath(
            "http://www.legis.state.pa.us/cfdocs/legis/home/bills/",
            '//select[@id="billSessions"]/option/text()',
        )

Example #16

0

Show file

File: __init__.py Project: jealob/openstates-scrapers

    def get_session_list(self):
        user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36"

        sessions = url_xpath(
            "http://docs.legis.wisconsin.gov/search",
            "//select[@name='sessionNumber']/option/text()",
            verify=False,
            user_agent=user_agent,
        )
        return [session.strip(" -") for session in sessions]

Example #17

0

Show file

File: events.py Project: vikrantmygamma/openstates-scrapers

 def _add_agenda_real(self, url, event):
     trs = url_xpath(url, "//tr")
     for tr in trs:
         tds = tr.xpath("./*")
         billinf = tds[0].attrib["id"]  # TN uses bill_ids as the id
         descr = tr.xpath("./td//p")[-1].text_content()
         agenda_item = event.add_agenda_item(descr)
         agenda_item.add_bill(billinf, id=billinf)
     event.add_source(url)
     event.add_document("Agenda", url)
     return event

Example #18

0

Show file

    def get_session_list(self):
        sessions = url_xpath(
            "https://apps.legislature.ky.gov/record/pastses.html",
            "//td/div/a/text()")

        for index, session in enumerate(sessions):
            # Remove escaped whitespace characters.
            sessions[index] = re.sub(r"\s\s+", " ", session)
            sessions[index] = sessions[index].strip()

        return sessions

Example #19

0

Show file

    def get_session_list(self):
        sessions = []
        regex = r"2[0-9][0-9][0-9]\ .*\ Session"

        tags = url_xpath(
            "http://www.leg.state.co.us/clics/cslFrontPages.nsf/PrevSessionInfo?OpenForm",
            "//font/text()",
        )
        for tag in tags:
            sess = re.findall(regex, tag)
            for session in sess:
                sessions.append(session)

        return sessions

Example #20

0

Show file

    def get_session_list(self):
        sessions = url_xpath(
            "https://www.legis.iowa.gov/legislation/findLegislation",
            "//section[@class='grid_6']//li/a/text()[normalize-space()]",
        )

        return [
            x[0] for x in filter(
                lambda x: x != [],
                [
                    re.findall(r"^.*Assembly: [0-9]+", session)
                    for session in sessions
                ],
            )
        ]

Example #21

0

Show file

File: bills.py Project: csnardi/openstates

def get_default_headers(page):
    headers = {}
    for el in url_xpath(page, "//*[@name]"):
        name = el.attrib["name"]
        value = ""
        try:
            value = el.attrib["value"]
        except KeyError:
            value = el.text

        if value:
            value = value.strip()

        headers[name] = value or ""
    headers["__EVENTTARGET"] = ""
    headers["__EVENTARGUMENT"] = ""
    headers["__LASTFOCUS"] = ""
    return headers

Example #22

0

Show file

File: __init__.py Project: recipefordisaster/openstates-scrapers

 def get_session_list(self):
     sessions = url_xpath(
         "http://www.mainelegislature.org/LawMakerWeb/advancedsearch.asp",
         '//select[@name="LegSession"]/option/text()',
     )
     return sessions

Example #23

0

Show file

File: __init__.py Project: jessemortenson/openstates

 def get_session_list(self):
     http.client.parse_headers = parse_headers_override
     return url_xpath(
         "https://www.house.mo.gov/billcentral.aspx?year=2019&code=S1&q=&id=",
         '//select[@id="SearchSession"]/option/text()',
     )

Example #24

0

Show file

 def get_session_list(self):
     sessions = url_xpath(
         "http://le.utah.gov/Documents/bills.htm",
         '//ul[contains(@class,"bills-alternate")]/li/a[contains(@href, "BillList")]/text()',
     )
     return [re.sub(r"\s+", " ", session.strip()) for session in sessions]

Example #25

0

Show file

 def get_session_list(self):
     return url_xpath(
         "http://www.legis.la.gov/Legis/SessionInfo/SessionInfo.aspx",
         '//table[@id="ctl00_ctl00_PageBody_DataListSessions"]//a[contains'
         '(text(), "Session")]/text()',
     )

Example #26

0

Show file

File: bills.py Project: csnardi/openstates

 def get_vote_dates(self, page, session):
     dates = url_xpath(page, "//select[@name='votedate']")[0].xpath("./*")
     return [a.text for a in dates if a.text.endswith(session[-2:])]

Example #27

0

Show file

File: __init__.py Project: jessemortenson/openstates

 def get_session_list(self):
     return url_xpath("http://billstatus.ls.state.ms.us/sessions.htm",
                      "//a/text()")

Example #28

0

Show file

File: __init__.py Project: NewAgeAirbender/openstates-scrapers

 def get_session_list(self):
     sessions = url_xpath(
         "https://le.utah.gov/asp/billsintro/index.asp?year=2021X1",
         "//select[@id='Listbox1']/option/text()",
     )
     return [re.sub(r"\s+", " ", session.strip()) for session in sessions]

Example #29

0

Show file

File: __init__.py Project: mollycode/openstates-scrapers

 def get_session_list(self):
     return url_xpath(
         "http://mgaleg.maryland.gov/mgawebsite/Search/Legislation",
         '//select[@id="valueSessions"]/option/text()',
     )

Example #30

0

Show file

 def get_session_list(self):
     return url_xpath("http://flsenate.gov", "//option/text()")