Beispiel #1
0
    def saveProduct(self, productInfo, checkExisted=True):
        raiseIf(not isinstance(productInfo, DbObject))
        productInfo.mark = self.mark
        db = self.getDb()
        if checkExisted:
            checkExited = self.checkProductWithCode(productInfo.product_code)
            if checkExited > 0:
                print('已经存在无需保存:' + productInfo.product_code)
                return

        def setMark(x):
            x.mark = self.mark
            return x

        imageData = self.createSaveObject(
            map(setMark, productInfo.main_img + productInfo.detail_img))

        db.begin()
        try:
            self.saveData(db, 'product', productInfo.saveableObj())
            self.saveData(db, 'product_images', imageData)
            self.saveData(db, 'product_spec',
                          self.createSaveObject(productInfo.specs))
            self.saveData(
                db, 'product_sku',
                self.createSaveObject(map(setMark, productInfo.skus)))
            self.saveData(db, 'product_comment',
                          self.createSaveObject(productInfo.comments))

            db.commit()
            print('成功保存商品数据:' + productInfo.product_code)
        except BaseException:
            db.rollback()
            print('保存商品数据失败,已回滚')
            raise
Beispiel #2
0
    def saveCategory(self, categoryInfo, db=None):
        categoryInfo.append('mark', self.mark)

        if db == None:
            db = self.db
        else:
            raiseIf(not isinstance(db, MySQLCommand))

        existed = self.checkCategoryWithUrl(categoryInfo['c_url'])
        if not existed:
            db.insert('category', categoryInfo)
        else:
            print('分类已存在' + categoryInfo['c_url'])
Beispiel #3
0
    def saveImageWithInfo(self, productInfo, db=None):
        if db == None:
            db = self.getDb()
        else:
            raiseIf(not isinstance(db, MySQLCommand))

        start = time.time()
        try:
            nextUrl = self.createImagePathTree(
                productInfo.category_path) + productInfo.product_code

            mkDir(nextUrl)

            if len(productInfo.main_img) > 0:
                mainPath = nextUrl + '/主图/'
                mkDir(mainPath)
                for dbImage in productInfo.main_img:
                    imageUrl = dbImage.image_url
                    imagePath = mainPath + self.getShortName(imageUrl)
                    downloadImg(imageUrl, imagePath)

            if len(productInfo.detail_img) > 0:
                detailPath = nextUrl + '/详情图/'
                mkDir(detailPath)
                for dbImage in productInfo.main_img:
                    imageUrl = dbImage.image_url
                    imagePath = detailPath + self.getShortName(imageUrl)
                    downloadImg(imageUrl, imagePath)

            db.update(
                'product', "mark = '" + self.mark + "' and product_code = '" +
                str(productInfo.product_code) + "'", {'image_saved': '1'})
            print('保存图片成功:' + productInfo.product_code)
        except Exception as ex:
            print('保存商品图片失败' + str(ex))
            endTime = time.time()
            span = endTime - start
            save_status = '2'
            if span > 5:
                save_status = '3'
            db.update(
                'product', "mark = '" + self.mark + "' and product_code = '" +
                str(productInfo.product_code) + "'",
                {'image_saved': str(save_status)})
            print('保存图片失败,' + '耗时' + str(int(span)) + 's, 已标记为' + save_status +
                  ' ' + productInfo.product_code)
Beispiel #4
0
    def getProductOne(self, url):
        raiseIf(url.replace('item.grainger.cn', '').find('g') <= 0, '传入的URL不属于SPU')

        soup = getHtmlAsSoup(url)

        productId = url.split('/')[-2:-1][0]

        productName = soup.find('div', id='product-intro').find('h1').string
        productCode = productId

        categoryPathTag = soup.find('div', class_='node_path').find_all('a')[1:]
        categoryPath = ''
        for cpt in categoryPathTag:
            categoryPath += cpt.string.strip() + '>'

        categoryPath += productName


        # 获取直属分类名称
        categoryName = categoryPath.split('>')
        categoryName = categoryName[-2:-1][0]

        productInrtoTag = soup.find('div', id='product-intro').find('ul', id='summary').find_all('li')


        price = ''
        unit_name = ''
        markedPrice = ''
        buyNo = ''
        brandName = ''
        brandUrl = ''
        brandImg = ''
        model = '分SKU'

        for mainInfo in productInrtoTag:
            if mainInfo.find('div').string == '价  格:':
                price = mainInfo.find('strong', class_='p-price').string[1:]

            if mainInfo.find('div').string == '品  牌:':
                brandName = mainInfo.find('a').string
                if hasattr(mainInfo.find('a'), 'href'):
                    brandUrl = "http://item.grainger.cn/" + mainInfo.find('a')['href'][1:]


        productDetailTag = soup.find(id='content_product')

        descriptionTag = productDetailTag.find('div', class_='property')

        description = str(descriptionTag).replace('<br/>', '')

        specTag = soup.find('ul', class_='specifications')
        specInfo = []
        if specTag != None:
            for div in specTag.find_all('div'):
                specPair = div.string.split(':')
                if len(specPair) == 2:
                    dbSpec = ProductSpec(
                        {'product_code': productCode, 'product_id': productId, 'spec_name': str(specPair[0]).strip(),
                         'spec_value': str(specPair[1]).strip()})
                    specInfo.append(dbSpec)


        #保存主图
        mainImage = []
        mainImageTags = soup.find('ul', class_='lh imageThumb')
        if mainImageTags != None:
            for mtag in mainImageTags.find_all('a'):
                mainUrl = mtag['rel'][4][1:][:-2]
                dbImage = ProductImage({'product_code': productCode, 'product_id': productId, 'image_url': mainUrl, 'type': '2'})
                mainImage.append(dbImage)

        #保存详情图
        imageInfo = []
        detailImageTags = productDetailTag.find('div', class_='group-picture')
        if detailImageTags != None:
            for imageTag in detailImageTags.find_all('img'):
                imageUrl = imageTag['data-original']
                dbImage = ProductImage({'product_code':productCode,'product_id':productId,'image_url':imageUrl, 'type':'1'})
                imageInfo.append(dbImage)

        #获取SKU信息
        skuInfos = []
        skuTags = soup.find(id='pd_table').tbody

        if skuTags != None:
            for skuTr in skuTags.find_all('tr'):
                skuTag = skuTr.find('a', target='_blank')
                skuUrl = 'http://'+skuTag['href'][2:]

                skuModel = skuTag.string
                stopSaleTag = skuTr.find('span', class_='iconOutOfOrder')
                stopSale = ''
                if stopSaleTag != None:
                    stopSale = '停止销售'

                alternativeProductUrl = ''
                alternativeTag = skuTr.find('td', class_='alternative')
                if alternativeTag != None and alternativeTag.a != None:
                    alternativeProductUrl = alternativeTag.a['href'][2:]

                dbProductSku = ProductSku({'product_code':productCode,'product_id':productId,'product_model':skuModel, 'model_url':skuUrl,'remark': stopSale, 'can_replace':alternativeProductUrl,'info_saved': '0'})
                skuInfos.append(dbProductSku)

        dbProduct = Product({'category_path':categoryPath,
                             'product_id':productId,
                             'product_code':productCode,
                             'product_url': url,
                             'product_name':productName,
                             'price':price,
                             'model':model,
                             'description':description,
                             'buy_code':buyNo,
                             'brand_name':brandName,
                             'brand_img':brandImg,
                             'brand_url':brandUrl,
                             'unit_name':unit_name,
                             'market_price':markedPrice,
                             'image_saved':'0',
                             'product_type':'SPU',
                             'category_name': categoryName,

                             'main_img':mainImage,
                             'detail_img':imageInfo,
                             'specs':specInfo,
                             'skus':skuInfos,
                             'comments': []})


        return dbProduct
Beispiel #5
0
    def getSkuOne(self, skuUrl):
        raiseIf(skuUrl.replace('item.grainger.cn', '').find('u') <= 0, '传入的URL不属于SKU')

        soup = getHtmlAsSoup(skuUrl)
        categoryPathTag = soup.find('div', class_='node_path').find_all('a')
        categoryPath = ''
        for cpt in categoryPathTag[1:]:
            categoryPath += cpt.string.strip() + '>'

        categoryPath = categoryPath[:-1]

        productId = skuUrl.split('/')[-2:-1][0]
        # 获取直属分类名称
        categoryName = categoryPath.split('>')
        categoryName = categoryName[-2:-1][0]

        productName = soup.find('div', id='product-intro').find('h1').string
        productCode = productId

        productInrtoTag = soup.find('div', id='product-intro').find('div', class_='line').find_all('dl')
        price = ''
        unit_name = ''
        markedPrice = ''
        buyNo = ''
        brandName = ''
        brandUrl = ''
        brandImg = ''
        model = ''

        for mainInfo in productInrtoTag:
            if mainInfo.find('dt').string == '价  格':
                price = mainInfo.find('span', class_='p-price').contents[0][1:]
                unit_name = mainInfo.find('span', class_='p-price').contents[1].string[1:]

            if mainInfo.find('dt').string == '面  价':
                markedPrice = mainInfo.find('dd', class_='p-price-del').string[1:]

            if mainInfo.find('dt').string == '订 货 号':
                buyNo = mainInfo.find('span').string

            if mainInfo.find('dt').string == '品  牌':
                brandName = mainInfo.find('a').string
                brandUrl = "http:" + mainInfo.find('a')['href'][1:]

            if mainInfo.find('dt').string == '制造商型号':
                model = mainInfo.find('dd').string

        productDetailTag = soup.find(id='content_product')
        descriptionTag = productDetailTag.find('div', class_='property')
        description = str(descriptionTag).replace('<br/>', '')

        specTag = soup.find('ul', class_='specifications')
        specInfo = []
        if specTag != None:
            for div in specTag.find_all('div'):
                specPair = div.string.split(':')
                if len(specPair) == 2:
                    dbSpec = ProductSpec(
                        {'product_code': productCode, 'product_id': productId, 'spec_name': str(specPair[0]).strip(),
                         'spec_value': str(specPair[1]).strip()})
                    specInfo.append(dbSpec)


        #保存主图
        mainImage = []
        mainImageTag = soup.find('div', id='spec-n1')
        if mainImageTag != None:
            mainImageTag = mainImageTag.find('a')
            dbImage = ProductImage({'product_code': productCode, 'product_id': productId, 'image_url': mainImageTag['href'], 'type': '2'})
            mainImage.append(dbImage)

        # 保存详情图
        imageInfo = []
        detailImageTags = productDetailTag.find('div', class_='group-picture')
        if detailImageTags != None:
            for imageTag in detailImageTags.find_all('img'):
                url = imageTag['data-original']
                dbImage = ProductImage(
                    {'product_code': productCode, 'product_id': productId, 'image_url': url, 'type': '1'})
                imageInfo.append(dbImage)

        dbSkuProduct = Product({'mark':self.mark,
                             'category_path': categoryPath,
                             'product_id': productId,
                             'product_code': productCode,
                             'product_url': skuUrl,
                             'product_name': productName,
                             'price': price,
                             'model': model,
                             'description': description,
                             'buy_code': buyNo,
                             'brand_name': brandName,
                             'brand_img': brandImg,
                             'brand_url': brandUrl,
                             'unit_name': unit_name,
                             'market_price': markedPrice,
                             'image_saved': '0',
                             'product_type': 'SKU',
                             'category_name': categoryName,

                             'main_img': mainImage,
                             'detail_img': imageInfo,
                             'specs': specInfo,
                             'skus': [],
                             'comments': []})

        return dbSkuProduct
Beispiel #6
0
 def __checkAll(self):
     raiseIf(self.mark == None)
     raiseIf(self.url == None)
     raiseIf(self.conf == None)
     raiseIf(self.base_path == None)
     raiseIf(not os.path.exists(self.base_path))