Ejemplo n.º 1
0
 def parse(self, response):
     sz_div = response.xpath("//div[@class='twolbta']/div[2]")  # 省长信息
     if sz_div:
         item = GovPeopleItem()
         names = sz_div.xpath("./a/text()").extract_first().split(":")
         item['name'] = names[1]
         item['position'] = names[0]
         item['department'] = '省政府领导'
         item['people_url'] = sz_div.xpath("./a/@href").extract_first()
         item['province'] = '黑龙江省'
         item['city'] = ''
         # print(item)
         yield item
     fsz_li_list = response.xpath("//div[@class='twolbtb']/ul/li")  # 副省长信息
     for li in fsz_li_list:
         item = GovPeopleItem()
         item['name'] = li.xpath("./a/text()").extract_first()
         item['position'] = '副省长'
         item['department'] = '省政府领导'
         item['people_url'] = li.xpath("./a/@href").extract_first()
         item['province'] = '黑龙江省'
         item['city'] = ''
         # print(item)
         yield item
     other_url = response.xpath(
         "//div[@class='twolbbot']/a/@href").extract_first()
     if other_url:
         yield scrapy.Request(other_url, callback=self.other_leader)
Ejemplo n.º 2
0
 def parse(self, response):
     dl_1 = response.xpath(
         "//div[@class='province_list ld_ys_inherit']/dl[1]")
     if dl_1:
         item = GovPeopleItem()
         item['position'] = dl_1.xpath("./dt/text()").extract_first()
         item['name'] = response.xpath(
             "//div[@class='province_list ld_ys_inherit']/div[1]//h2/a/text()"
         ).extract_first()
         item['people_url'] = response.urljoin(
             response.xpath(
                 "//div[@class='province_list ld_ys_inherit']/div[1]//h2/a/@href"
             ).extract_first())
         item['department'] = '省政府领导'
         item['province'] = '吉林省'
         item['city'] = ''
         # print(1,item)
         yield item
     dl_2 = response.xpath(
         "//div[@class='province_list ld_ys_inherit']/dl[position()=2]")
     if dl_2:
         position = "副省长"
         li_list = response.xpath(
             "//div[@class='province_list ld_ys_inherit']/ul[position()<=2]/li"
         )
         for li in li_list:
             item = GovPeopleItem()
             item['name'] = li.xpath("./a/p/text()").extract_first()
             item['position'] = position
             item['people_url'] = response.urljoin(
                 li.xpath("./a/@href").extract_first())
             item['department'] = '省政府领导'
             item['province'] = '吉林省'
             item['city'] = ''
             # print(2,item)
             yield item
     dl_3 = response.xpath(
         "//div[@class='province_list ld_ys_inherit']/dl[position()=4]")
     if dl_3:
         position = "秘书长"
         li_list = response.xpath(
             "//div[@class='province_list ld_ys_inherit']/ul[position()=3]/li"
         )
         for li in li_list:
             item = GovPeopleItem()
             item['name'] = li.xpath("./a/p/text()").extract_first()
             item['position'] = position
             item['people_url'] = response.urljoin(
                 li.xpath("./a/@href").extract_first())
             item['department'] = '省政府领导'
             item['province'] = '吉林省'
             item['city'] = ''
             # print(3,item)
             yield item
Ejemplo n.º 3
0
 def parse(self, response):
     left_position = response.xpath(
         "//div[@class='zwgk_ldjl_zx_detail left']/div/span/text()"
     ).extract_first()  # 左侧职位名称
     if left_position:
         left_position = left_position.replace(":", "")
         item = GovPeopleItem()
         item['position'] = left_position
         item['name'] = response.xpath(
             "//div[@class='zwgk_ldjl_zx_detail left']/div/a/text()"
         ).extract_first()
         item['people_url'] = response.urljoin(
             response.xpath(
                 "//div[@class='zwgk_ldjl_zx_detail left']/div/a/@href").
             extract_first())
         item['department'] = '自治区政府'
         item['province'] = '内蒙古'
         item['city'] = ''
         # print(item)
         yield item
     right_div_list = response.xpath(
         "//div[@class='zwgk_ldjl_zx_right left']/div[position()<=2]")
     for div in right_div_list:
         position = div.xpath("./div[1]/text()").extract_first().replace(
             ":", "")
         li_list = div.xpath("./div[2]//li")
         if li_list:
             for li in li_list:
                 item = GovPeopleItem()
                 item['position'] = position
                 item['name'] = li.xpath("./a/text()").extract_first()
                 item['people_url'] = response.urljoin(
                     li.xpath("./a/@href").extract_first())
                 item['department'] = '自治区政府'
                 item['province'] = '内蒙古'
                 item['city'] = ''
                 # print(item)
                 yield item
         else:
             item = GovPeopleItem()
             item['position'] = position
             item['name'] = div.xpath("./a/text()").extract_first()
             item['people_url'] = response.urljoin(
                 div.xpath("./a/@href").extract_first())
             item['department'] = '自治区政府'
             item['province'] = '内蒙古'
             item['city'] = ''
             # print(item)
             yield item
Ejemplo n.º 4
0
 def parse_detai(self, response):
     department = response.meta['department']
     title = response.xpath("//title/text()").extract_first()
     name_and_position = title.replace("-领导-首都之窗-北京市政务门户网站", "").split('-')
     item = GovPeopleItem()
     item['name'] = name_and_position[0]
     item['position'] = name_and_position[1]
     item['province'] = '北京'
     item['city'] = '北京'
     item['department'] = department
     item['people_url'] = response.url
     # print(item)
     yield item
Ejemplo n.º 5
0
 def other_leader(self, response):
     div_list = response.xpath("//div[@class='f000 twolmain']")
     for div in div_list:
         item = GovPeopleItem()
         msg = div.xpath("./div[1]/text()").extract_first().split(' ')
         item['name'] = msg[1]
         item['position'] = msg[0]
         item['department'] = '省政府领导'
         item['people_url'] = ''
         item['province'] = '黑龙江省'
         item['city'] = ''
         # print(item)
         yield item
Ejemplo n.º 6
0
 def parse(self, response):
     li_list = response.xpath("//div[@class='l-box-right']/ul/li")
     for li in li_list:
         names = li.xpath("./a/text()").extract_first().split(" ")
         item = GovPeopleItem()
         item['position'] = names[1].replace(u'\u3000', '')
         item['department'] = '省政府领导'
         item['name'] = names[0].replace(u'\u3000', '')
         item['people_url'] = response.urljoin(
             li.xpath("./a/@href").extract_first())
         item['province'] = '辽宁省'
         item['city'] = ''
         # print(item)
         yield item
Ejemplo n.º 7
0
 def parse(self, response):
     li_list = response.xpath("//div[@class='left_zhong']/ul/li")
     for li in li_list:
         names = li.xpath("./a/text()").extract_first().split(' ')
         item = GovPeopleItem()
         item['name'] = names[1]
         item['position'] = names[0]
         item['department'] = '省政府领导'
         item['province'] = '河北省'
         item['city'] = ''
         item['people_url'] = response.urljoin(
             li.xpath("/a/@href").extract_first())
         # print(item)
         yield item
Ejemplo n.º 8
0
 def parse(self, response):
     p_list = response.xpath("//div[@id='Tab1-1']/p[position()<=3]")
     for p in p_list:
         position = p.xpath("./text()").extract_first().replace(':',
                                                                '').replace(
                                                                    ' ', '')
         # print(position)
         a_list = p.xpath("./a")
         for a in a_list:
             item = GovPeopleItem()
             item['name'] = a.xpath("./text()").extract_first().replace(
                 u'\u3000', '')
             item['people_url'] = a.xpath("./@href").extract_first()
             item['position'] = position
             item['department'] = "市政府领导"
             item['province'] = '上海'
             item['city'] = '上海'
             # print(item)
             yield item
     p_list2 = response.xpath(
         "//div[@id='Tab1-1']/p[position()=4 or position()=5]")
     for p in p_list2:
         # \xa0 \u3000
         p_str = p.xpath("./text()").extract_first().split(':')
         position2 = p_str[0]
         name_list = p_str[1].replace(u'\u3000',
                                      '').split(u"\xa0\xa0\xa0\xa0\xa0")
         for name in name_list:
             item = GovPeopleItem()
             item['name'] = name
             item['people_url'] = ''
             item['position'] = position2
             item['department'] = "市政府领导"
             item['province'] = '上海'
             item['city'] = '上海'
             # print(item)
             yield item
Ejemplo n.º 9
0
 def parse(self, response):
     li_list = response.xpath(
         "//ul[@class='provincial-leaders-inner oflow-hd']/li")
     for li in li_list:
         position = li.xpath("./div/text()").extract_first()
         position = position.replace(u"\xa0", "")
         dl_list = li.xpath("./dl")
         for dl in dl_list:
             item = GovPeopleItem()
             item['people_url'] = dl.xpath("./dd/a/@href").extract_first()
             item['people_url'] = response.urljoin(item['people_url'])
             item['name'] = dl.xpath(
                 "./dd/a/text()").extract_first().replace(u"\u3000", "")
             item['province'] = '山西省'
             item['city'] = ''
             item['department'] = '省政府领导'
             item['position'] = position
             # print(item)
             yield item
Ejemplo n.º 10
0
 def parse(self, response):
     div_list = response.xpath("//div[@class='ld-205']")
     for div in div_list:
         position = div.xpath(
             "./div[@class='ld-zw']/text()").extract_first()
         if position is None:
             position = '副省长'
         li_list = div.xpath("./div[@class='ld-xx']//li")
         for li in li_list:
             item = GovPeopleItem()
             item['name'] = li.xpath(
                 "./a/div[2]/text()").extract_first().replace(u"\xa0", "")
             item['position'] = position.replace(u'\xa0', '')
             item['department'] = '省政府领导'
             item['people_url'] = response.urljoin(
                 li.xpath("./a/@href").extract_first())
             item['province'] = '江苏省'
             item['city'] = ''
             # print(item)
             yield item