Python url_xpathの例、utils.url_xpath Pythonの例

コード例 #1

0

ファイルを表示

ファイル: __init__.py プロジェクト: csnardi/openstates

    def get_session_list(self):
        from utils import url_xpath

        return url_xpath(
            "http://www.legis.state.wv.us/Bill_Status/Bill_Status.cfm",
            '//select[@name="year"]/option/text()',
        )

コード例 #2

0

ファイルを表示

ファイル: events.py プロジェクト: vikrantmygamma/openstates-scrapers

 def _add_agenda_list(self, url, event):
     trs = url_xpath(url, "//tr")
     for tr in trs:
         things = tr.xpath("./td/a")
         for thing in things:
             event = self._add_agenda_real(thing.attrib["href"], event)
     return event

コード例 #3

0

ファイルを表示

ファイル: __init__.py プロジェクト: washabstract/openstates-scrapers

 def get_session_list(self):
     return url_xpath(
         "https://www.revisor.mn.gov/bills/"
         "status_search.php?body=House",
         '//select[@name="session"]/option/text()',
         False,  # SSL verification failing in MN due to missing intermediate cert
     )

コード例 #4

0

ファイルを表示

ファイル: __init__.py プロジェクト: hiteshgarg14/openstates

 def get_session_list(self):
     return [
         s.strip() for s in url_xpath(
             "http://www.legislature.mi.gov/mileg.aspx?page=LegBasicSearch",
             "//option/text()",
         ) if s.strip()
     ]

コード例 #5

0

ファイルを表示

    def get_session_list(self):
        # from https://stackoverflow.com/questions/38015537/python-requests-exceptions-sslerror-dh-key-too-small
        import requests

        requests.packages.urllib3.disable_warnings()
        requests.packages.urllib3.util.ssl_.DEFAULT_CIPHERS += ":HIGH:!DH:!aNULL"
        return url_xpath("https://flsenate.gov", "//option/text()", False)

コード例 #6

0

ファイルを表示

 def get_session_list(self):
     sessions = url_xpath(
         "http://legislature.vermont.gov/bill/search/2016",
         '//fieldset/div[@id="Form_SelectSession_selected_session_Holder"]'
         "/div/select/option/text()",
     )
     sessions = (session.replace(",", "").strip() for session in sessions)
     return sessions

コード例 #7

0

ファイルを表示

ファイル: __init__.py プロジェクト: vikrantmygamma/openstates-scrapers

    def get_session_list(self):
        from utils import url_xpath

        # this URL should work even for future sessions
        return url_xpath(
            "https://sutra.oslpr.org/osl/esutra/",
            '//select[@id="ctl00_CPHBody_Tramites_lovCuatrienio"]/option/text()',
        )

コード例 #8

0

ファイルを表示

ファイル: __init__.py プロジェクト: jessemortenson/openstates

 def get_session_list(self):
     sessions = url_xpath(
         "http://www.leginfo.ca.gov/bilinfo.html",
         "//select[@name='sess']/option/text()",
     )
     return [
         re.findall(r"\(.*\)", session)[0][1:-1] for session in sessions
     ]

コード例 #9

0

ファイルを表示

ファイル: __init__.py プロジェクト: jessemortenson/openstates

 def get_session_list(self):
     return [
         session["_scraped_name"] for session in self.legislative_sessions
     ]
     return url_xpath(
         "https://www.akleg.gov/basis/Home/Archive",
         '//div[@id="fullpage"]//a[contains(@href, "/BillsandLaws/")]//text()',
     )

コード例 #10

0

ファイルを表示

ファイル: __init__.py プロジェクト: jessemortenson/openstates

 def get_session_list(self):
     # doesn't include current session, we need to change it
     sessions = url_xpath(
         "http://www.capitol.hawaii.gov/archives/main.aspx",
         "//div[@class='roundedrect gradientgray shadow']/a/text()",
     )
     sessions.remove("Archives Main")
     return sessions

コード例 #11

0

ファイルを表示

 def get_session_list(self):
     sessions = url_xpath(
         "https://www.legislature.ohio.gov/legislation/search"
         "?generalAssemblies=133&pageSize=10&start=1&isInitial=true",
         '//div[@id="generalAssemblyValues"]//'
         'div[contains(@class, "optionLabel")]/text()',
     )
     # Archive does not include current session
     return sessions

コード例 #12

0

ファイルを表示

ファイル: __init__.py プロジェクト: vikrantmygamma/openstates-scrapers

 def get_session_list(self):
     # Special sessions are available in the archive, but not in current session.
     # Solution is to scrape special session as part of regular session
     return [
         x for x in url_xpath(
             "http://www.capitol.tn.gov/legislation/archives.html",
             '//h2[text()="Bills and Resolutions"]/following-sibling::ul/li/text()',
         ) if x.strip()
     ]

コード例 #13

0

ファイルを表示

ファイル: __init__.py プロジェクト: PrinceCarter/openstates-scrapers

    def get_session_list(self):
        from utils import url_xpath

        sessions = url_xpath(
            "http://webserver1.lsb.state.ok.us/WebApplication2/WebForm1.aspx",
            "//select[@name='cbxSession']/option/text()",
        )
        # OK Sometimes appends (Mainsys) to their session listings
        sessions = [s.replace("(Mainsys)", "").strip() for s in sessions]
        return sessions

コード例 #14

0

ファイルを表示

ファイル: bills.py プロジェクト: csnardi/openstates

def get_postable_subjects():
    global subjects
    if subjects is None:
        subs = url_xpath(
            "https://status.rilegislature.gov/",
            "//select[@id='rilinContent_cbCategory']",
        )[0].xpath("./*")
        subjects = {o.text: o.attrib["value"] for o in subs}
        subjects.pop(None)
    return subjects

コード例 #15

0

ファイルを表示

    def get_session_list(self):
        # PA keeps slowly adding backdata, so just ignore it en masse
        for i in range(1800, 2000):
            self.ignored_scraped_sessions.append("{} Regular Session".format(i))
            self.ignored_scraped_sessions.append("{} Special Session #1".format(i))

        return url_xpath(
            "http://www.legis.state.pa.us/cfdocs/legis/home/bills/",
            '//select[@id="billSessions"]/option/text()',
        )

コード例 #16

0

ファイルを表示

ファイル: __init__.py プロジェクト: jealob/openstates-scrapers

    def get_session_list(self):
        user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36"

        sessions = url_xpath(
            "http://docs.legis.wisconsin.gov/search",
            "//select[@name='sessionNumber']/option/text()",
            verify=False,
            user_agent=user_agent,
        )
        return [session.strip(" -") for session in sessions]

コード例 #17

0

ファイルを表示

ファイル: events.py プロジェクト: vikrantmygamma/openstates-scrapers

 def _add_agenda_real(self, url, event):
     trs = url_xpath(url, "//tr")
     for tr in trs:
         tds = tr.xpath("./*")
         billinf = tds[0].attrib["id"]  # TN uses bill_ids as the id
         descr = tr.xpath("./td//p")[-1].text_content()
         agenda_item = event.add_agenda_item(descr)
         agenda_item.add_bill(billinf, id=billinf)
     event.add_source(url)
     event.add_document("Agenda", url)
     return event

コード例 #18

0

ファイルを表示

    def get_session_list(self):
        sessions = url_xpath(
            "https://apps.legislature.ky.gov/record/pastses.html",
            "//td/div/a/text()")

        for index, session in enumerate(sessions):
            # Remove escaped whitespace characters.
            sessions[index] = re.sub(r"\s\s+", " ", session)
            sessions[index] = sessions[index].strip()

        return sessions

コード例 #19

0

ファイルを表示

    def get_session_list(self):
        sessions = []
        regex = r"2[0-9][0-9][0-9]\ .*\ Session"

        tags = url_xpath(
            "http://www.leg.state.co.us/clics/cslFrontPages.nsf/PrevSessionInfo?OpenForm",
            "//font/text()",
        )
        for tag in tags:
            sess = re.findall(regex, tag)
            for session in sess:
                sessions.append(session)

        return sessions

コード例 #20

0

ファイルを表示

    def get_session_list(self):
        sessions = url_xpath(
            "https://www.legis.iowa.gov/legislation/findLegislation",
            "//section[@class='grid_6']//li/a/text()[normalize-space()]",
        )

        return [
            x[0] for x in filter(
                lambda x: x != [],
                [
                    re.findall(r"^.*Assembly: [0-9]+", session)
                    for session in sessions
                ],
            )
        ]

コード例 #21

0

ファイルを表示

ファイル: bills.py プロジェクト: csnardi/openstates

def get_default_headers(page):
    headers = {}
    for el in url_xpath(page, "//*[@name]"):
        name = el.attrib["name"]
        value = ""
        try:
            value = el.attrib["value"]
        except KeyError:
            value = el.text

        if value:
            value = value.strip()

        headers[name] = value or ""
    headers["__EVENTTARGET"] = ""
    headers["__EVENTARGUMENT"] = ""
    headers["__LASTFOCUS"] = ""
    return headers

コード例 #22

0

ファイルを表示

ファイル: __init__.py プロジェクト: recipefordisaster/openstates-scrapers

 def get_session_list(self):
     sessions = url_xpath(
         "http://www.mainelegislature.org/LawMakerWeb/advancedsearch.asp",
         '//select[@name="LegSession"]/option/text()',
     )
     return sessions

コード例 #23

0

ファイルを表示

ファイル: __init__.py プロジェクト: jessemortenson/openstates

 def get_session_list(self):
     http.client.parse_headers = parse_headers_override
     return url_xpath(
         "https://www.house.mo.gov/billcentral.aspx?year=2019&code=S1&q=&id=",
         '//select[@id="SearchSession"]/option/text()',
     )

コード例 #24

0

ファイルを表示

 def get_session_list(self):
     sessions = url_xpath(
         "http://le.utah.gov/Documents/bills.htm",
         '//ul[contains(@class,"bills-alternate")]/li/a[contains(@href, "BillList")]/text()',
     )
     return [re.sub(r"\s+", " ", session.strip()) for session in sessions]

コード例 #25

0

ファイルを表示

 def get_session_list(self):
     return url_xpath(
         "http://www.legis.la.gov/Legis/SessionInfo/SessionInfo.aspx",
         '//table[@id="ctl00_ctl00_PageBody_DataListSessions"]//a[contains'
         '(text(), "Session")]/text()',
     )

コード例 #26

0

ファイルを表示

ファイル: bills.py プロジェクト: csnardi/openstates

 def get_vote_dates(self, page, session):
     dates = url_xpath(page, "//select[@name='votedate']")[0].xpath("./*")
     return [a.text for a in dates if a.text.endswith(session[-2:])]

コード例 #27

0

ファイルを表示

ファイル: __init__.py プロジェクト: jessemortenson/openstates

 def get_session_list(self):
     return url_xpath("http://billstatus.ls.state.ms.us/sessions.htm",
                      "//a/text()")

コード例 #28

0

ファイルを表示

ファイル: __init__.py プロジェクト: NewAgeAirbender/openstates-scrapers

 def get_session_list(self):
     sessions = url_xpath(
         "https://le.utah.gov/asp/billsintro/index.asp?year=2021X1",
         "//select[@id='Listbox1']/option/text()",
     )
     return [re.sub(r"\s+", " ", session.strip()) for session in sessions]

コード例 #29

0

ファイルを表示

ファイル: __init__.py プロジェクト: mollycode/openstates-scrapers

 def get_session_list(self):
     return url_xpath(
         "http://mgaleg.maryland.gov/mgawebsite/Search/Legislation",
         '//select[@id="valueSessions"]/option/text()',
     )

コード例 #30

0

ファイルを表示

 def get_session_list(self):
     return url_xpath("http://flsenate.gov", "//option/text()")