Beispiel #1
0
    def parse_page_detail(self, response):

        content_by_xpath = parse_item.parse_response(self.urls_data['tag'],
                                                     response)
        # 对非解析获取的字段赋值
        parse_item.processing_data(content_by_xpath)
        data = parse_item.parse_common_field(response, content_by_xpath,
                                             self.urls_data['site'])
        item = ThinkTankItem()
        item['data'] = data
        item['site'] = self.urls_data['site']
        item['tag'] = self.urls_data['tag']
        yield item
Beispiel #2
0
    def parse_page_detail(self, response):
        """
        解析页面详情
        """
        # 通过获取数据库对应xpath解析对应字段

        content_by_xpath = parse_item.parse_response(self.urls_data['site'], response)
        content_by_xpath['svg_data'] = []
        if content_by_xpath['svg_url']:
            content_by_xpath['svg_data'].append(
                parse_item.parse_svg_url(content_by_xpath['svg_url']))
        # 对非解析获取的字段赋值
        parse_item.processing_data(content_by_xpath)
        data = parse_item.parse_common_field(response, content_by_xpath, self.urls_data['site'])
        self.item['data'] = data
        self.item['tag'] = self.urls_data['tag']
        self.item['site'] = self.urls_data['site']
        yield self.item
Beispiel #3
0
 def parse_page_detail(self, response):
     """
     解析页面详情
     :return: item
     """
     content_by_xpath = parse_item.parse_response(self.urls_data['tag'],
                                                  response)
     # 对非解析获取的字段赋值
     parse_item.processing_data(content_by_xpath)
     data = parse_item.parse_common_field(response, content_by_xpath,
                                          self.urls_data['site'])
     data['expertDV'] = parse_item.parse_expert_DV(response,
                                                   data['expertDV'])
     parse_item.parse_check_data(data)
     item = ThinkTankItem()
     item['data'] = data
     item['site'] = self.urls_data['site']
     item['tag'] = self.urls_data['tag']
     yield item