def parse_yhd_item(self, reponse):
        """解析tmall Item"""
        data = reponse.body
        soup = BeautifulSoup(data, "html5lib")
        item = reponse.meta['item']

        title_tag = soup.find('h1', id="productMainName")
        item['title'] = title_tag.text

        is_proprietary_trading = False
        source_tag = soup.find('p', attrs={'class': 'add_02'})
        # print 'sssssssssssource_tag.string    %s' % source_tag.string
        # print 'ttttttttttsource_tag.text    %s' % source_tag.text
        if source_tag.text.strip().startswith(u'本商品由1号店自营提供'):
            # print 'sssssssssssssssssssssss本商品由1号店自营提供'
            is_proprietary_trading = True

        else:
            pass

        if is_proprietary_trading:

            ul_tag = soup.find('ul', attrs={'class': 'ull'})

            brand_name = Parse_Util.get_parse_value(ul_tag, u'【产品品牌】:')
            if brand_name == 'None':

                brand_name = Parse_Util.get_parse_value(ul_tag, u'【品牌名称】:')

            if brand_name == 'None':

                dl_tag = soup.find('dl', attrs={'class': 'des_info clearfix'})
                brand_name = self.get_brand(dl_tag)

            product_name = Parse_Util.get_parse_value(ul_tag, u'【产品名称】:')
            if product_name == 'None':

                product_name = Parse_Util.get_parse_value(ul_tag, u'【商品名称】:')
            if product_name == 'None':

                product_name = Parse_Util.get_parse_value(ul_tag, u'【名称】:')

            item['brand'] = brand_name
            item['product_name'] = product_name
            item['source'] = 1

        else:

            good_tag = soup.find('dl', class_="des_info clearfix")
            item['brand'] = self.get_brand(good_tag)
            item['source'] = 0

        yield item
    def parse_kaola_item(self, reponse):
        """解析Kaola Item"""
        data = reponse.body
        soup = BeautifulSoup(data, "html5lib")
        item = reponse.meta['item']

        kaola_item = KaoLaMMItem()

        title_tag = soup.find('dt', class_="product-title")
        kaola_item['title'] = title_tag.text

        goods_tag = soup.find('ul', class_='goods_parameter')
        kaola_item['brand'] = Parse_Util.get_parse_value(goods_tag, u'商品品牌:')
        kaola_item['product_name'] = Parse_Util.get_parse_value(
            goods_tag, u'品名:')
        kaola_item['good_detail'] = Parse_Util.make_up_dic(goods_tag)
        item['other_parameter'] = kaola_item

        yield item
Esempio n. 3
0
    def parse_word_wide_item(self, response):
        """解析全球购ITEM"""
        data = response.body
        soup = BeautifulSoup(data, "html5lib")
        item = response.meta['item']
        item_id = response.meta['id']

        title_tag = soup.find('div', id="name")

        jd_item = JDMMItem()
        jd_item['title'] = self.delete_node_content(title_tag, 'span')

        good_tag = soup.find('ul', id="parameter2")
        jd_item['product_name'] = Parse_Util.get_parse_value(
            good_tag, u'商品名称:')
        jd_item['brand'] = Parse_Util.get_parse_value(good_tag, u'品牌:')
        jd_item['good_detail'] = Parse_Util.make_up_dic(good_tag)
        item['other_parameter'] = jd_item

        item_comment_link = comment_origin_url % (int(item_id))
        yield Request(item_comment_link,
                      callback=self.parse_comment_detail,
                      meta={'item': item})
Esempio n. 4
0
    def parse_jd_item(self, response):
        """解析普通jd Item"""
        data = response.body
        soup = BeautifulSoup(data, "html5lib")
        item = response.meta['item']
        item_id = response.meta['id']

        title_tag = soup.find('div', id="name")
        title = 'error'
        for child in title_tag.children:
            if child is None:
                continue
            if child.name is None:
                continue
            if child.name == u"h1":
                title = child.string
                break
        jd_item = JDMMItem()
        jd_item['title'] = title.encode('utf-8')

        good_tag = soup.find('ul', attrs={'id': 'parameter2'})
        jd_item['product_name'] = Parse_Util.get_parse_value(
            good_tag, u'商品名称:')
        jd_item['good_detail'] = Parse_Util.make_up_dic(good_tag)

        ul_tag = soup.find('ul', id="parameter-brand")
        # print 'ul_tag -------------- %s' % ul_tag
        jd_item['brand'] = 'None'
        if ul_tag != None:
            jd_item['brand'] = ul_tag.find('li').get("title")
            li_tags = ul_tag.find_all('li')
            li_tag = li_tags[0]

            p = re.compile('\s+')
            brand_str = re.sub(p, '', li_tag.text)
            if string.find(brand_str, u'♥') != -1:
                list_str = brand_str.split(u'♥')
                brand_str = list_str[0]
            brand_str_list = brand_str.split(u':')
            # print 'brand_str_list --------- %s' % brand_str_list
            jd_item['good_detail'][brand_str_list[0]] = brand_str_list[1]

        item['other_parameter'] = jd_item

        item_comment_link = comment_origin_url % (int(item_id))
        yield Request(item_comment_link,
                      callback=self.parse_comment_detail,
                      meta={'item': item})