def session_list(): from billy.scrape.utils import url_xpath import re # There is no current session index #return [] tags = url_xpath( 'http://www.leg.state.co.us/clics/clics2014a/cslFrontPages.nsf/PrevSessionInfo?OpenForm', "//font/text()") sessions = [] regex = "2[0-9][0-9][0-9]\ .*\ Session" for tag in tags: sess = re.findall(regex, tag) for session in sess: sessions.append(session) tags = url_xpath( 'http://www.leg.state.co.us/CLICS/CLICS2016A/csl.nsf/Home?OpenForm&BaseTarget=Bottom', "//font/text()") for tag in tags: sess = re.findall(regex, tag) for session in sess: sessions.append(session) return sessions
def session_list(): from billy.scrape.utils import url_xpath sessions = url_xpath('http://www.mainelegislature.org/LawMakerWeb/advancedsearch.asp', '//select[@name="LegSession"]/option/text()') sessions.remove('jb-Test') sessions.remove('2001-2002') return sessions
def session_list(): from billy.scrape.utils import url_xpath sessions = url_xpath('http://legis.delaware.gov/', "//select[@name='gSession']/option/text()") sessions = [ session.strip() for session in sessions ] sessions.remove("Session") return sessions
def session_list(): from billy.scrape.utils import url_xpath return url_xpath( "http://status.rilin.state.ri.us/bill_history.aspx?mode=previous", "//select[@name='ctl00$rilinContent$cbYear']/option/text()", )
def session_list(): from billy.scrape.utils import url_xpath sessions = url_xpath('http://www.capitol.hawaii.gov/archives/main.aspx', "//div[@class='roundedrect gradientgray shadow']/a/text()" ) sessions.remove('Archives Main') return sessions
def session_list(): from billy.scrape.utils import url_xpath url = url_xpath( 'http://www.kslegislature.org/li', '//a[contains(text(), "Senate Bills")]/@href')[0] slug = url.split('/')[2] return [slug]
def session_list(): from billy.scrape.utils import url_xpath return [ x.strip() for x in url_xpath('https://olis.leg.state.or.us/liz/sessions/', '//a[contains(@href, "/liz/")]/text()') ]
def session_list(): from billy.scrape.utils import url_xpath sessions = url_xpath('http://archives.legislature.state.oh.us', '//form[@action="bill_search.cfm"]//input[@type="radio" and @name="SESSION"]/@value') # Archive does not include current session sessions.append('131') return sessions
def session_list(): from billy.scrape.utils import url_xpath sessions = url_xpath( 'http://www.arkleg.state.ar.us/assembly/2013/2013R/Pages/Previous%20Legislatures.aspx', '//div[@id="ctl00_ctl15_g_91c28874_44ca_4b3e_9969_7202c1ca63dd_panel"]//a' ) return [s.text_content() for s in sessions if s.text_content()]
def session_list(): from billy.scrape.utils import url_xpath links = url_xpath( 'http://www.arkleg.state.ar.us/assembly/2013/2013R/Pages/Previous%20Legislatures.aspx', '//a') sessions = [a.text_content() for a in links if 'Session' in a.attrib.get('title', '')] return sessions
def session_list(): from billy.scrape.utils import url_xpath return url_xpath( "https://www.revisor.mn.gov/revisor/pages/search_status/" "status_search.php?body=House", '//select[@name="session"]/option/text()', )
def session_list(): from billy.scrape.utils import url_xpath sessions = url_xpath( 'http://www.arkleg.state.ar.us/assembly/2011/2011R/Pages/Previous%20Legislatures.aspx', '//div[@id="ctl00_ctl15_g_95338513_84cb_48ec_85d1_4e6a889e8035_panel"]//a' ) return [s.text_content() for s in sessions if s.text_content()]
def session_list(): from billy.scrape.utils import url_xpath sessions = url_xpath( 'http://le.utah.gov/Documents/bills.htm', '//p/a[contains(@href, "session")]/text()' ) return [ re.sub(r'\s+', ' ', session.strip()) for session in sessions ]
def session_list(): from billy.scrape.utils import url_xpath import re return url_xpath( "http://www.legis.la.gov/Legis/SessionInfo/SessionInfo.aspx", '//a[contains(text(), "Session")]/text()' )
def session_list(): from billy.scrape.utils import url_xpath import re return [ re.sub("\s+", " ", x.text_content()) for x in url_xpath("http://www.legis.state.la.us/session.htm", "//strong") ][:-1]
def session_list(): from billy.scrape.utils import url_xpath return [ x.strip() for x in url_xpath( 'http://www.leg.state.or.us/bills_laws/billsinfo.htm', '//a[contains(@href, "measures")]/text()') ]
def session_list(): from billy.scrape.utils import url_xpath import re sessions = url_xpath("http://www.leginfo.ca.gov/bilinfo.html", "//select[@name='sess']/option/text()") sessions = [re.findall("\(.*\)", session)[0][1:-1] for session in sessions] return sessions
def session_list(): from billy.scrape.utils import url_xpath sessions = url_xpath('http://webserver1.lsb.state.ok.us/WebApplication2/WebForm1.aspx', "//select[@name='cbxSession']/option/text()") # OK Sometimes appends (Mainsys) to their session listings sessions = [s.replace('(Mainsys)', '').strip() for s in sessions] return sessions
def session_list(): from billy.scrape.utils import url_xpath import re url = ('http://www.assembly.ab.ca/net/index.aspx?' 'p=bill§ion=doc&legl=28&session=1') options = url_xpath(url, '//option/text()') return [re.sub('\s+', ' ', opt.strip()) for opt in options]
def session_list(): from billy.scrape.utils import url_xpath sessions = url_xpath( 'http://legis.wisconsin.gov/', "//select[@name='ctl00$PlaceHolderLeftNavBar$ctl01$ctl00$ddlPropSess']/option/text()" ) return [session.strip() for session in sessions]
def session_list(): url = 'http://legis.delaware.gov/Legislature.nsf/7CD69CCAB66992B285256EE0'\ '005E0727/78346509610C835385257F2B004F2590?OpenDocument' sessions = url_xpath(url, '//select[@name="gSession"]/option/text()') sessions = [session.strip() for session in sessions if session.strip()] return sessions
def session_list(): from billy.scrape.utils import url_xpath import re return [ re.sub('\s+', ' ', x.text_content()) for x in url_xpath( 'http://www.legis.state.la.us/session.htm', '//strong') ][:-1]
def session_list(): from billy.scrape.utils import url_xpath url = "http://legis.delaware.gov/Legislature.nsf/"\ "7CD69CCAB66992B285256EE0005E0727/FC256764B3B3DCAE85257E0E005F9CD8" sessions = url_xpath(url, "//select[@name='gSession']/option/text()") sessions = [session.strip() for session in sessions if session.strip()] return sessions
def session_list(): from billy.scrape.utils import url_xpath url = url_xpath( 'http://www.kslegislature.org/li', '//div[@id="nav"]//a[contains(text(), "Senate Bills")]/@href')[0] slug = url.split('/')[2] return [slug]
def session_list(): from billy.scrape.utils import url_xpath return [ x.strip() for x in url_xpath("https://olis.leg.state.or.us/liz/sessions/", '//a[contains(@href, "/liz/")]/text()') ]
def session_list(): from billy.scrape.utils import url_xpath sessions = url_xpath( "http://www.arkleg.state.ar.us/assembly/2011/2011R/Pages/Previous%20Legislatures.aspx", '//div[@id="ctl00_ctl15_g_95338513_84cb_48ec_85d1_4e6a889e8035_panel"]//a', ) return [s.text_content() for s in sessions if s.text_content()]
def session_list(): from billy.scrape.utils import url_xpath sessions = url_xpath( "http://www.capitol.hawaii.gov/archives/main.aspx", "//div[@class='roundedrect gradientgray shadow']/a/text()" ) sessions.remove("Archives Main") return sessions
def session_list(): from billy.scrape.utils import url_xpath sessions = url_xpath( "http://legis.wisconsin.gov/", "//select[@name='ctl00$PlaceHolderLeftNavBar$ctl01$ctl00$ddlPropSess']/option/text()", ) return [session.strip() for session in sessions]
def session_list(): from billy.scrape.utils import url_xpath xpath = ('//div[@class="navigation insert"]/' 'descendant::div[@class="xrm-attribute-value"]/p/text()') ret = url_xpath( 'http://www.legassembly.sk.ca/legislative-business/bills/', xpath) return [s.replace(u'\xa0', ' ') for s in ret]
def session_list(): from billy.scrape.utils import url_xpath sessions = url_xpath( 'http://webserver1.lsb.state.ok.us/WebApplication2/WebForm1.aspx', "//select[@name='cbxSession']/option/text()") # OK Sometimes appends (Mainsys) to their session listings sessions = [s.replace('(Mainsys)', '').strip() for s in sessions] return sessions
def session_list(): # doesn't include current session, we need to change it from billy.scrape.utils import url_xpath sessions = url_xpath('http://www.capitol.hawaii.gov/archives/main.aspx', "//div[@class='roundedrect gradientgray shadow']/a/text()" ) sessions.remove("Archives Main") return sessions
def session_list(): # doesn't include current session, we need to change it from billy.scrape.utils import url_xpath sessions = url_xpath( 'http://www.capitol.hawaii.gov/archives/main.aspx', "//div[@class='roundedrect gradientgray shadow']/a/text()") sessions.remove("Archives Main") return sessions
def session_list(): import re return [ re.sub(ur'(\xa0|\(click to close\)|\(click to open\))', '', x.text_content()) for x in url_xpath('http://www.leg.state.nv.us/Session/', '//*[@class="MainHeading"]') ]
def get_postable_subjects(): global subjects if subjects == None: subs = url_xpath( "http://status.rilin.state.ri.us/", "//select[@id='rilinContent_cbCategory']" )[0].xpath("./*") subjects = { o.text : o.attrib['value'] for o in subs } subjects.pop(None) return subjects
def session_list(): from billy.scrape.utils import url_xpath sessions = url_xpath( 'http://archives.legislature.state.oh.us', '//form[@action="bill_search.cfm"]//input[@type="radio" and @name="SESSION"]/@value' ) # Archive does not include current session sessions.append('131') return sessions
def session_list(): sessions = url_xpath('http://www.lrc.ky.gov/legislation.htm', '//a[contains(@href, "record.htm")]/text()[normalize-space()]') for index, session in enumerate(sessions): # Remove escaped whitespace characters. sessions[index] = re.sub(r'[\r\n\t]+', '', session) return sessions
def session_list(): import re from billy.scrape.utils import url_xpath sessions = url_xpath('http://www.malegislature.gov/Bills/Search', "//select[@id='Input_GeneralCourtId']/option/text()") # Ok, this is actually a mess. Let's clean it up. # sessions.remove('--Select Value--') # They removed this. sessions = [re.sub("\(.*$", "", session).strip() for session in sessions] return sessions
def session_list(): import re from billy.scrape.utils import url_xpath sessions = url_xpath('http://www.malegislature.gov/Bills/Search', "//select[@id='Input_GeneralCourtId']/option/text()") # Ok, this is actually a mess. Let's clean it up. # sessions.remove('--Select Value--') # They removed this. sessions = [ re.sub("\(.*$", "", session).strip() for session in sessions ] return sessions
def session_list(): import re from billy.scrape.utils import url_xpath ret = [re.sub('\s+', ' ', x).strip() for x in url_xpath( 'http://www.leg.bc.ca/documents/4-1-0.htm', '//table[3]//a[contains(@href, "index.htm")]/text()')] for key in ['', 'Home']: if key in ret: ret.remove(key) return ret
def session_list(): from billy.scrape.utils import url_xpath import re sessions = url_xpath('http://www.leginfo.ca.gov/bilinfo.html', "//select[@name='sess']/option/text()") sessions = [ re.findall('\(.*\)', session)[0][1:-1] \ for session in sessions ] return sessions
def session_list(): from billy.scrape.utils import url_xpath import re sessions = url_xpath( 'https://www.legis.iowa.gov/Legislation/Find/findLegislation.aspx', "//div[@id='ctl00_ctl00_ctl00_cphMainContent_cphCenterCol_cphCenterCol_ucGASelect_divLinks']/ul/li/a/text()") sessions = [ re.findall(".*\(", session)[0][:-1].strip() for session in sessions ] return sessions
def session_list(): from billy.scrape.utils import url_xpath import re sessions = url_xpath( 'https://www.legis.iowa.gov/Legislation/Find/findLegislation.aspx', "//div[@id='ctl00_ctl00_ctl00_cphMainContent_cphCenterCol_cphCenterCol_ucGASelect_divLinks']/ul/li/a/text()" ) sessions = [ re.findall(".*\(", session)[0][:-1].strip() for session in sessions ] return sessions
def session_list(): import re from billy.scrape.utils import url_xpath pblocks = url_xpath('http://www.legislature.state.al.us/', '//div/p/text()' ) sessions = [] for pblock in pblocks: pb_sessions = re.findall('2[0-9][0-9][0-9]\ Regular\ Session', pblock) for session in pb_sessions: sessions.append( session ) return sessions
def session_list(): import re from billy.scrape.utils import url_xpath pblocks = url_xpath('http://www.legislature.state.al.us/', '//div/p/text()') sessions = [] for pblock in pblocks: pb_sessions = re.findall('2[0-9][0-9][0-9]\ Regular\ Session', pblock) for session in pb_sessions: sessions.append(session) return sessions
def session_list(): # Special sessions are available in the archive, but not in current session. # Solution is to scrape special session as part of regular session from billy.scrape.utils import url_xpath sessions = [ x for x in url_xpath( 'http://www.capitol.tn.gov/legislation/archives.html', '//h2[text()="Bills and Resolutions"]/following-sibling::ul/li/text()' ) if x.strip() ] return sessions
def get_default_headers( page ): headers = {} for el in url_xpath( page, "//*[@name]" ): name = el.attrib['name'] value = "" try: value = el.attrib['value'] except KeyError: value = el.text headers[name] = value or "" return headers
def session_list(): from billy.scrape.utils import url_xpath import re tags = url_xpath('http://www.leg.state.co.us/clics/clics2011a/cslFrontPages.nsf/PrevSessionInfo?OpenForm', "//font/text()") sessions = [] regex = "2[0-9][0-9][0-9]\ .*\ Session" for tag in tags: sess = re.findall(regex, tag) for session in sess: sessions.append( session ) tags = url_xpath('http://www.leg.state.co.us/CLICS/CLICS2011A/csl.nsf/Home?OpenForm&BaseTarget=Bottom', "//font/text()") for tag in tags: sess = re.findall(regex, tag) for session in sess: sessions.append( session ) return sessions
def session_list(): # Special sessions are available in the archive, but not in current session. # Solution is to scrape special session as part of regular session from billy.scrape.utils import url_xpath sessions = [ x for x in url_xpath('http://www.capitol.tn.gov/legislation/archives.html', '//h2[text()="Bills and Resolutions"]/following-sibling::ul/li/text()') if x.strip() ] sessions.append("109th General Assembly") return sessions
def session_list(): select_id = \ "ctl00_SPWebPartManager1_g_3ddc9629_a44e_4724_ae40_c80247107bd6_Session" from billy.scrape.utils import url_xpath sessions = url_xpath( 'http://www.legis.ga.gov/Legislation/en-US/Search.aspx', "//select")[1].xpath("option/text()") # XXX: If this breaks, it's because of this wonky xpath thing. # the ID seemed to change when I was testing it. This works # well enough for now. sessions = [ session.strip() for session in sessions ] return sessions
def get_default_headers(page): headers = {} for el in url_xpath(page, "//*[@name]"): name = el.attrib['name'] value = "" try: value = el.attrib['value'] except KeyError: value = el.text if value: value = value.strip() headers[name] = value or "" headers['__EVENTTARGET'] = "" headers['__EVENTARGUMENT'] = "" headers['__LASTFOCUS'] = "" return headers
def session_list(): to_rm = [ 'QUICK LINKS', '- - - - - - - - - - - - - -', 'Log in', 'LIS Home', 'General Assembly Home', '- - - - - - - - - - - - - -', 'Session Tracking:', 'Bills & Resolutions', 'Members', 'Committees', 'Meetings', 'Calendars', 'Communications', 'Minutes', 'Statistics', 'Lobbyist-in-a-Box', 'Personal lists', '- - - - - - - - - - - - - -', 'Search:', 'Code of Virginia', 'Administrative Code', 'Bills & Resolutions', 'Summaries', u'OTHER SESSIONS', '- - - - - - - - - - - - - -' ] from billy.scrape.utils import url_xpath sessions = url_xpath('http://lis.virginia.gov/121/lis.htm', "//select[@name='val']/option/text()") sessions = [session.strip() for session in sessions] for x in to_rm: sessions.remove(x) return sessions
def session_list(): from billy.scrape.utils import url_xpath # this URL should work even for future sessions return url_xpath('http://www.oslpr.org/legislatura/tl2013/buscar_2013.asp', '//select[@name="URL"]/option/text()')
def session_list(): # Special sessions are aviable in the archive, but not in current session. # Solution is to scrape special session as part of regular session from billy.scrape.utils import url_xpath return url_xpath('http://www.capitol.tn.gov/legislation/archives.html', "//div[@class='col1']/ul/li[@class='show']/text()")