예제 #1
0
 def parse_reload(self, response):
     return response.follow(response.meta['url'], callback=self.parse_query, dont_filter=True, headers=common.headers(self.county_abbr))
예제 #2
0
 def parse_login(self, response):
     return response.follow(response.xpath(u'//a[re:test(., "^提案$")]/@href').extract_first(), callback=self.parse_unordered, headers=common.headers(self.county_abbr))
예제 #3
0
 def parse_unordered(self, response):
     payload = {
         'act': 'search_set',
         'field': 'SET_OrderByMethod',
         'value': 'DESC'
     }
     yield scrapy.FormRequest(response.urljoin('application.php'), formdata=payload, callback=self.parse_reload, meta={'url': response.url}, headers=common.headers(self.county_abbr))
예제 #4
0
 def parse_list(self, response):
     for link in response.css('.GridItem a::attr(href),.GridAlternatingItem a::attr(href)').extract():
         time.sleep(60)
         yield response.follow(link, callback=self.parse_profile)
     next_page = response.css(u'.GridPager span ~ a::attr(href)').extract_first()
     if next_page:
         payload = {'__EVENTTARGET': re.search("doPostBack\('([^']*)'", next_page).group(1)}
         yield scrapy.FormRequest.from_response(response, formdata=payload, callback=self.parse_list, dont_filter=True, dont_click=True, headers=common.headers(self.county_abbr))
예제 #5
0
 def parse_query(self, response):
     for value in response.xpath(u'//select[@name="sid"]/option/@value').extract():
         payload = {'sid': value}
         yield scrapy.FormRequest.from_response(response, formdata=payload, callback=self.parse_list, dont_filter=True, dont_click=True, headers=common.headers(self.county_abbr))
예제 #6
0
 def parse_unordered(self, response):
     payload = {
         'act': 'search_set',
         'field': 'SET_OrderByMethod',
         'value': 'DESC'
     }
     yield scrapy.FormRequest(response.urljoin('application.php'), formdata=payload, callback=self.parse_reload, meta={'url': response.url}, headers=common.headers(self.county_abbr))
예제 #7
0
 def parse_query(self, response):
     for bill_type in response.xpath(u'//select[@name="Type"]/option[re:test(., "(提案|請願)")]/@value').extract():
         for council in response.xpath(u'//select[@name="Council"]/option/@value').extract():
             payload = {
                 'Type': bill_type,
                 'Council': council
             }
             yield scrapy.FormRequest.from_response(response, formdata=payload, callback=self.parse_list, dont_filter=True, headers=common.headers(self.county_abbr))
예제 #8
0
 def parse_query(self, response):
     for value in response.xpath(u'//input[@name="ctl00$ContentPlaceHolder1$rbtnMKind"]/@value').extract():
         payload = {'ctl00$ContentPlaceHolder1$rbtnMKind': value}
         yield scrapy.FormRequest.from_response(response, formdata=payload, callback=self.parse_list, dont_filter=True, dont_click=True, headers=common.headers(self.county_abbr))
예제 #9
0
 def parse_list(self, response):
     pages = response.css('#ctl00_ContentPlaceHolder1_gvIndex_ctl13_lblPageCount::text').extract_first()
     print pages
     for node in response.css('.main3_3_04,.main3_3_05'):
         node_ad = int(node.xpath('td[2]/text()').re(u'(\d+)\s*屆')[0])
         if node_ad < self.ad:
             break
         if node_ad > self.ad:
             continue
         yield response.follow(node.xpath('td[6]/span/a/@href').extract_first(), callback=self.parse_profile)
     next_page = response.xpath(u'//a[re:test(.,"下一頁")]/@href').extract_first()
     if next_page and node_ad >= self.ad:
         payload = {'__EVENTTARGET': re.search("doPostBack\('([^']*)'", next_page).group(1)}
         yield scrapy.FormRequest.from_response(response, formdata=payload, callback=self.parse_list, dont_filter=True, dont_click=True, headers=common.headers(self.county_abbr))
예제 #10
0
 def parse_login(self, response):
     return response.follow(
         response.xpath(u'//a[re:test(., "^提案$")]/@href').extract_first(),
         callback=self.parse_unordered,
         headers=common.headers(self.county_abbr))
예제 #11
0
 def parse_reload(self, response):
     return response.follow(response.meta['url'],
                            callback=self.parse_query,
                            dont_filter=True,
                            headers=common.headers(self.county_abbr))
예제 #12
0
 def parse_list(self, response):
     for node in response.xpath('//table[@id="dg_List"]/tr[position()>1 and position()<last()]'):
         yield response.follow(node.xpath('td[1]/input/@onclick').re(u"open\('(.*?=\d+)")[0], callback=self.parse_profile)
     if response.css('.MultiPageButtonFont span::text').re('1$'):
         payload = {name: None for name in response.xpath('////input[not(@type="hidden")]/@name').extract()}
         for page in response.css('.MultiPageButtonFont').xpath('descendant::span[1]/following-sibling::a'):
             payload['__EVENTTARGET'] = page.xpath('@href').re("doPostBack\('([^']*)'")[0]
             yield scrapy.FormRequest.from_response(response, formdata=payload, callback=self.parse_list, dont_filter=True, dont_click=True, headers=common.headers(self.county_abbr))
예제 #13
0
 def parse_list(self, response):
     for node in response.css('table#dg tr')[1:]:
         item = {}
         item['id'] = re.search(u'Fmotion_instanceOS=([^&]*)', node.xpath('td[1]/descendant::a/@href').extract_first()).group(1)
         yield response.follow(node.xpath('td[1]/descendant::a/@href').extract_first(), callback=self.parse_profile, meta={'item': item})
     next_page = response.xpath(u'//a[re:test(.,"下一頁")]/@href').extract_first()
     has_next_page = response.xpath(u'//select[@name="page"]/option[@selected]/following-sibling::option').extract()
     if next_page and has_next_page:
         payload = {'__EVENTTARGET': re.search("doPostBack\('([^']*)'", next_page).group(1)}
         yield scrapy.FormRequest.from_response(response, formdata=payload, callback=self.parse_list, dont_filter=True, dont_click=True, headers=common.headers(self.county_abbr))
예제 #14
0
 def parse_query(self, response):
     pages = re.sub('\D', '', response.css('.result_select').xpath('string()').extract_first())
     print pages
     for node in response.css('.result_content'):
         item = {}
         item['election_year'] = self.election_year
         link = node.css('.acc_link a::attr(href)').extract_first()
         item['id'] = node.css('.acc_type::text').extract_first().split('@')[0].strip()
         level = response.xpath(u'string((//span[re:test(., "類別階層")]/following-sibling::span)[1])').extract_first()
         item['type'], item['category'] = re.search(u'/([^/]+)/?(.*)$', level).groups()
         item['abstract'] = re.sub('\s', '', node.css('.result_text::text').extract_first())
         yield response.follow(link, callback=self.parse_profile, meta={'item': item, 'handle_httpstatus_list': [302], 'dont_redirect': True}, headers=common.headers(self.county_abbr))
         time.sleep(.5)
     next_page = response.css('.page_botton.pb_pagedw::attr(href)').extract_first()
     if next_page:
         yield response.follow(next_page, callback=self.parse_query)
예제 #15
0
 def parse_query(self, response):
     pages = re.sub('\D', '', response.css('.result_select').xpath('string()').extract_first())
     for node in response.css('.result_content'):
         link_node = node.css('.acc_link a')
         if link_node.xpath('text()').re(self.ad):
             item = {}
             item['election_year'] = self.election_year
             link = link_node.xpath('@href').extract_first()
             item['id'] = node.css('.acc_type::text').extract_first().split('@')[0].strip()
             level = node.xpath(u'string((descendant::span[re:test(., "類別階層")]/following-sibling::span)[1])').extract_first()
             item['type'], item['category'] = re.search(u'/([^/]+)/?(.*)$', level).groups()
             item['abstract'] = re.sub('\s', '', node.css('.result_text::text').extract_first())
             yield response.follow(link, callback=self.parse_profile, meta={'item': item, 'handle_httpstatus_list': [302], 'dont_redirect': True}, headers=common.headers(self.county_abbr))
         else:
             raise scrapy.exceptions.CloseSpider('out of date range')
         time.sleep(.5)
     next_page = response.css('.page_botton.pb_pagedw::attr(href)').extract_first()
     if next_page:
         yield response.follow(next_page, callback=self.parse_query)
예제 #16
0
 def parse_list(self, response):
     pages = response.css('#BodyContent_PageHelpWuc1_lbTotalInFo::text').extract_first()
     for node in response.css('table.list3 tbody tr'):
         node_ad = int(node.xpath('td[1]/text()').re(u'(\d+)\s*屆')[0])
         if node_ad < self.ad:
             break
         if node_ad > self.ad:
             continue
         yield response.follow(node.xpath('@onclick').re("href='(.*)'")[0], callback=self.parse_profile)
     next_page = response.xpath(u'//input[re:test(@value, "下一頁")][not(@disabled)]')
     if next_page:
         payload = {next_page.xpath('@name').extract_first(): next_page.xpath('@value').extract_first()}
         yield scrapy.FormRequest.from_response(response, formdata=payload, callback=self.parse_list, dont_filter=True, dont_click=True, headers=common.headers(self.county_abbr))
예제 #17
0
 def parse_query(self, response):
     for value in response.xpath(u'//select[@name="motiondept"]/option[not(@value="")]/@value').extract():
         payload = {
             'menu1': response.xpath(u'//select[@name="menu1"]/option[re:test(., "第\s*%d\s*屆")]/@value' % self.ad).extract_first(),
             'motiondept': value
         }
         yield scrapy.FormRequest.from_response(response, formdata=payload, callback=self.parse_list, dont_filter=True, dont_click=True, headers=common.headers(self.county_abbr))
예제 #18
0
 def parse_query(self, response):
     payload = {
         'ctl00$ContentPlaceHolder1$uscPeriodSessionMeeting$ddlSession': response.xpath(u'//select[@name="ctl00$ContentPlaceHolder1$uscPeriodSessionMeeting$ddlSession"]/option[re:test(., "%s屆")]/@value' % self.ad).extract_first(),
         'ctl00$ContentPlaceHolder1$uscPeriodSessionMeeting$ddlMeeting': '',
         '__EVENTTARGET': re.search('_PostBackOptions\("([^"]*)', response.css('#ContentPlaceHolder1_LinkButton1::attr(href)').extract_first()).group(1)
     }
     yield scrapy.FormRequest.from_response(response, formdata=payload, callback=self.parse_type, dont_filter=True, dont_click=True, headers=common.headers(self.county_abbr))