Exemple #1
0
    def parse_entry(self, driver):
        driver.implicitly_wait(15)
        product_count = 0
        while True:
            elements = of_utils.find_elements_by_css_selector(
                driver, 'div.productItemContainer > a')
            if not elements:
                elements = of_utils.find_elements_by_css_selector(
                    driver, 'li.productItemContainer > a')
            if not elements:
                elements = of_utils.find_elements_by_css_selector(
                    driver, 'li.productItem > a')
            if not elements:
                elements = of_utils.find_elements_by_css_selector(
                    driver, 'li.lookItem > a')
            if not elements:
                elements = of_utils.find_elements_by_css_selector(
                    driver, 'ul.lv-list-container a')

            if len(elements) > product_count:
                product_count = len(elements)
                driver.execute_script(
                    'window.scrollBy(0, document.body.scrollHeight);')
                of_utils.sleep(4)
            else:
                break
        return [element.get_attribute('href').strip() for element in elements]
Exemple #2
0
 def parse_product(self, driver):
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_css_selector(
         driver, 'div.goods_about > p.about_tit')
     if element:
         product['title'] = element.text.strip().replace('\n', ' ')
     else:
         raise Exception('Title not found')
     # code
     element = of_utils.find_element_by_css_selector(
         driver, 'p.bianhao > span')
     if element:
         product['code'] = element.text.strip()
     # price_cny
     element = of_utils.find_element_by_css_selector(
         driver, 'p.about_cost > span')
     if element:
         price_text = element.text.strip()[1:].strip()
         product['price_cny'] = int(float(price_text))
     # images
     elements = of_utils.find_elements_by_css_selector(
         driver, 'ul.swiper-wrapper > li > img')
     images = [
         element.get_attribute('src').strip().replace('80X80', '540X540')
         for element in elements
     ]
     product['images'] = ';'.join(images)
     # detail
     elements = of_utils.find_elements_by_css_selector(
         driver, 'div.word > div > p > span')
     texts = [element.text.strip() for element in elements]
     product['detail'] = '\n'.join(texts)
     return product
Exemple #3
0
 def parse_product(self, driver):
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_css_selector(
         driver, 'p.tdr-watch-details__header-watch-name > span')
     if element:
         product['title'] = element.text.strip().replace('\n', '')
     else:
         raise Exception('Title not found')
     # code
     element = of_utils.find_element_by_css_selector(
         driver, 'p.tdr-watch-details__header-watch-reference')
     if element:
         product['code'] = element.text.strip().split(':')[-1]
     # price_cny N/A
     # images
     elements = of_utils.find_elements_by_css_selector(
         driver, 'div.tdr-variations__main-image-canvas-wrapper > img')
     images = [element.get_attribute('src').strip() for element in elements]
     product['images'] = ';'.join(images)
     # detail
     texts = []
     elements = of_utils.find_elements_by_css_selector(
         driver,
         'ul.tdr-watch-details__column > li > div.tdr-watch-details__text')
     for element in elements:
         k_element = of_utils.find_element_by_css_selector(
             element, 'p.tdr-watch-details__title')
         v_element = of_utils.find_element_by_css_selector(
             element, 'p.tdr-watch-details__spectext')
         texts.append(k_element.text.strip() + ':' + v_element.text.strip())
     product['detail'] = '\n'.join(texts)
     return product
Exemple #4
0
 def parse_product(self, driver):
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_css_selector(driver, 'span.product.attribute.name')
     if element:
         product['title'] = element.text.strip()
     else:
         raise Exception('Title not found')
     # code
     element = of_utils.find_element_by_css_selector(driver, 'span.value[itemprop=sku]')
     if element:
         product['code'] = element.text.strip()
     # price_cny
     element = of_utils.find_element_by_css_selector(driver, 'span.product-price-reveal__action__show')
     if element:
         driver.execute_script('arguments[0].click();', element)
         of_utils.sleep(2)
         element = of_utils.find_element_by_css_selector(driver, 'span.price')
         if element:
             price_text = element.text.strip()[1:].strip().replace(',', '') # 去掉开头的¥
             product['price_cny'] = int(float(price_text))
     # images
     elements = of_utils.find_elements_by_css_selector(driver, 'div.fotorama__stage__shaft > div > img')
     images = [element.get_attribute('src').strip() for element in elements]
     product['images'] = ';'.join(images)
     # detail
     elements = of_utils.find_elements_by_css_selector(driver, 'div.product-info-details-content > div.value > p')
     texts = [element.get_attribute('innerHTML').strip() for element in elements]
     product['detail'] = '\n'.join(texts)
     return product
Exemple #5
0
 def parse_entry(self, driver):
     # 手袋
     elements = of_utils.find_elements_by_css_selector(driver, 'div.fs-products-grid__product.fs-gridelement > div.fs-products-grid__product__illu > a')
     if not elements:
         # 手袋2
         elements = of_utils.find_elements_by_css_selector(driver, 'div.fs-products-grid > div.fs-products-grid__product.fs-gridelement > div.fs-products-grid__product__wrapper > a')
     if not elements:
         # 彩妆
         elements = of_utils.find_elements_by_css_selector(driver, 'div.fnb_col-wd6.fnb_product-img > a') 
     # 手表
     if not elements:
         load_more = of_utils.find_element_by_css_selector(driver, 'div.pd-action-btns > button[role=button]')
         if not load_more: # 戒指
             load_more = of_utils.find_element_by_css_selector(driver, 'div.display-all > a')
         if load_more:
             driver.execute_script('arguments[0].click();', load_more)
         of_utils.sleep(5)
         product_count = 0
         while True:
             elements = of_utils.find_elements_by_css_selector(driver, 'div.products > div.row > div > ul > li > div.product-item-wrapper > a')
             if len(elements) > product_count:
                 product_count = len(elements)
                 driver.execute_script('window.scrollBy(0, document.body.scrollHeight);')
                 of_utils.sleep(4)
             else:
                 break
     return [element.get_attribute('href').strip() for element in elements]
Exemple #6
0
 def parse_product(self, driver):
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_css_selector(
         driver, 'div.frabic-detail-right > h3')
     if element:
         product['title'] = element.text.strip()
     else:
         raise Exception('Title not found')
     # code + price_cny
     elements = of_utils.find_elements_by_css_selector(
         driver, 'div.detail-chose-box > div > strong')
     if len(elements) >= 2:
         product['code'] = elements[0].text.strip()
         product['price_cny'] = int(float(elements[1].text.strip()))
     # images
     elements = of_utils.find_elements_by_css_selector(
         driver, 'div.frabic-detail-left > img')
     images = [element.get_attribute('src').strip() for element in elements]
     product['images'] = ';'.join(images)
     # detail
     element = of_utils.find_element_by_css_selector(
         driver, 'table.detail > tbody > tr > td > span.detail')
     product['detail'] = element.text.strip()
     return product
 def parse_product(self, driver):
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_css_selector(
         driver, 'div.product-title.desktop > div.inner > h1[lang=en]')
     if element:
         product['title'] = element.text.strip().replace('\n', ' ')
     else:
         raise Exception('Title not found')
     # code N/A
     # price_cny N/A
     # images
     elements = of_utils.find_elements_by_css_selector(
         driver, 'img.main-image')
     images = [element.get_attribute('src').strip() for element in elements]
     product['images'] = ';'.join(images)
     # detail
     texts = []
     element = of_utils.find_element_by_css_selector(driver, 'div.readmore')
     texts.append(element.text.strip())
     elements = of_utils.find_elements_by_css_selector(
         driver, 'div.list-details > div.row')
     for element in elements:
         spans = of_utils.find_elements_by_css_selector(element, 'span')
         texts.append(spans[0].text.strip() + ':' + spans[1].text.strip())
     product['detail'] = '\n'.join(texts)
     return product
Exemple #8
0
 def parse_entry(self, driver):
     elements = of_utils.find_elements_by_css_selector(
         driver, 'div.product > div.product-image > a')
     if not elements:
         elements = of_utils.find_elements_by_css_selector(
             driver, 'div.product > a.product-link')
     return [element.get_attribute('href').strip() for element in elements]
 def parse_product(self, driver):
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_css_selector(
         driver, 'div.product-title > h1')
     if element:
         product['title'] = element.text.strip()
     else:
         raise Exception('Title not found')
     # code N/A
     # price_cny
     element = of_utils.find_element_by_css_selector(
         driver, 'span.regular-price > span.price')
     if element:
         price_text = element.text.strip()[1:].strip().replace(',', '')
         product['price_cny'] = int(float(price_text))
     # images
     elements = of_utils.find_elements_by_css_selector(
         driver,
         'div#product-page-top > div > div > div > div > div > div > img')
     if not elements:
         elements = of_utils.find_elements_by_css_selector(
             driver, 'div.img-cont > img')
     images = [element.get_attribute('src').strip() for element in elements]
     product['images'] = ';'.join(images)
     # detail
     element = of_utils.find_element_by_css_selector(
         driver, 'div#product-description')
     product['detail'] = element.text.strip()
     return product
Exemple #10
0
 def parse_product(self, driver):
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_css_selector(
         driver, 'div.right[align=left] > div')
     if element:
         texts = element.text.split('\n')
         product['title'] = ' '.join(texts[:-1])
         price_text = texts[-1].split('/')[0].split('$')[-1].strip()
         product['price_hkd'] = int(float(price_text))
     else:
         raise Exception('Title not found')
     # code N/A
     # price_hkd Processed in title
     # images
     elements = of_utils.find_elements_by_css_selector(
         driver, 'div.left[align=center] > div >img')
     images = [element.get_attribute('src').strip() for element in elements]
     product['images'] = ';'.join(images)
     # detail
     texts = []
     elements = of_utils.find_elements_by_css_selector(
         driver, 'table.table-productdetails > tbody > tr > td')
     for element in elements:
         text = element.text.strip()
         if text:
             texts.append(text)
     product['detail'] = '\n'.join(texts)
     return product
Exemple #11
0
    def parse_product(self, driver):
        flag = int(driver.current_url.split('?')[-1])
        product = of_spider.empty_product.copy()

        #title
        element = of_utils.find_elements_by_css_selector(
            driver, '.product-list .name')[flag]
        if element:
            product['title'] = element.text.strip()
        else:
            raise Exception('Title not found')
        # code N/A
        # price_cny
        element = of_utils.find_elements_by_css_selector(
            driver, '.product-list .value')[flag]
        if element:
            product['price_cny'] = of_utils.convert_price(element.text.strip())
        # images
        images = []
        element = of_utils.find_elements_by_css_selector(
            driver, '.product-list .product-img img')[flag]
        if element:
            images.append(element.get_attribute('src'))
        product['images'] = ';'.join(images)
        # detail N/A
        return product
 def parse_product(self, driver):
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_css_selector(
         driver, 'div.line1 > h1')
     if element:
         product['title'] = element.text.strip()
     else:
         raise Exception('Title not found')
     # code
     element = of_utils.find_element_by_css_selector(
         driver, 'div.reference > span')
     if element:
         product['code'] = element.text.strip().split(' ')[-1]
     # price_cny
     element = of_utils.find_element_by_css_selector(
         driver, 'div#priceDisplayValue')
     if element:
         try:
             price_text = element.text.strip()[1:].replace(",", '')
             product['price_cny'] = int(float(price_text))
         except:
             pass
     # images
     elements = of_utils.find_elements_by_css_selector(
         driver, 'div.callage > img.pngfix')
     images = [element.get_attribute('src').strip() for element in elements]
     product['images'] = ';'.join(images)
     # detail
     elements = of_utils.find_elements_by_css_selector(
         driver, 'ul.listInfos.specifications > li')
     texts = [element.text.strip() for element in elements]
     product['detail'] = '\n'.join(texts).strip()
     return product
Exemple #13
0
 def parse_product(self, driver):
     driver.implicitly_wait(10)
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_css_selector(driver, '.iwc-buying-options-title')
     if element:
         product['title']  = element.text.strip().replace('\n添加至我的愿望清单','')
     else:
         raise Exception('Title not found')        
     # code
     element = of_utils.find_element_by_css_selector(driver, '.iwc-buying-options-reference')
     if element:
         product['code'] = element.text.strip()
         product['title'] =  product['title'] + ' ' + element.text.strip()
     # price_cny
     element = of_utils.find_element_by_css_selector(driver, '.iwc-buying-options-price')
     if element:
         product['price_cny'] = of_utils.convert_price(element.text.strip())
     # images
     elements = of_utils.find_elements_by_css_selector(driver, '.iwc-buying-option-thumbnails .iwc-watch-thumbnail-container:not(.slick-cloned) .iwc-watch-thumbnail')
     images = ['https://www.iwc.cn'+ element.get_attribute('data-srcset').strip() for element in elements]
     product['images'] = ';'.join(images)
     # detail
     elements = of_utils.find_elements_by_css_selector(driver, 'ul.detailList > li')
     texts = [element.text.strip() for element in elements]
     product['detail'] = '\n'.join(texts).strip()
     return product
Exemple #14
0
 def parse_product(self, driver):
     driver.implicitly_wait(10)
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_css_selector(driver, 'h2.pageTitle.watch-title')
     if element:
         product['title'] = element.text.strip()
     else:
         raise Exception('Title not found')
     # code
     element = of_utils.find_element_by_css_selector(driver, 'li.variation-reference > span.val')
     if element:
         product['code'] = element.text.strip()
     # price_cny
     element = of_utils.find_element_by_css_selector(driver, 'li.price > span.val')
     if element:
         try:
             price_text = element.text.strip()[1:].strip()
             product['price_cny'] = int(float(price_text))
         except:
             pass
     # images
     elements = of_utils.find_elements_by_css_selector(driver, 'ul.frontBack > li > img')
     images = [element.get_attribute('src').strip() for element in elements]
     product['images'] = ';'.join(images)
     # detail
     elements = of_utils.find_elements_by_css_selector(driver, 'ul.detailList > li')
     texts = [element.text.strip() for element in elements]
     product['detail'] = '\n'.join(texts).strip()
     return product
Exemple #15
0
    def parse_product(self, driver):
        of_utils.sleep(2)
        product = of_spider.empty_product.copy()

        ele = of_utils.find_element_by_css_selector(driver, 'div.product-name>h1')
        product['title'] = ele.text.strip() if ele else ''

        ele = of_utils.find_element_by_css_selector(driver, 'div[itemprop=sku]')
        product['code'] = ele.text.strip() if ele else ''        

        ele = of_utils.find_element_by_css_selector(driver, 'meta[itemprop=price]')
        product['price_cny'] = of_utils.convert_price(ele.get_attribute('content').strip()) if ele else 0

        # images
        imgs=[]
        eles = of_utils.find_elements_by_css_selector(driver, '.fotorama__thumb>img')
        if not eles:
            eles = of_utils.find_elements_by_css_selector(driver, '.fotorama__stage__frame>img')
        for ele in eles:
            img = ele.get_attribute('src').strip().replace('cache', '')
            for a in img.split('/'):
                if len(a) == 32:
                    img = img.replace(a,'')
            imgs.append(img)    
        product['images'] = ';'.join(imgs)
        
        return product
Exemple #16
0
 def parse_product(self, driver):
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_css_selector(
         driver, 'h1.prd-ProductContent_Title')
     if element:
         product['title'] = element.text.strip()
     else:
         raise Exception('Title not found')
     # code N/A
     # price_cny
     element = of_utils.find_element_by_css_selector(
         driver, '.prd-ProductPrice_Price>span')
     if element:
         product['price_gbp'] = element.text.strip()[1:]
     # images
     elements = of_utils.find_elements_by_css_selector(
         driver, '.prd-ProductImage_Thumbs a.prd-ProductImage_Link')
     if elements:
         images = [
             element.get_attribute('href').strip() for element in elements
         ]
         product['images'] = ';'.join(images)
     else:
         elements = of_utils.find_elements_by_css_selector(
             driver, '.prd-ProductImage img')
         images = [
             element.get_attribute('src').strip() for element in elements
         ]
         product['images'] = ';'.join(images)
     # detail N/A
     return product
Exemple #17
0
 def parse_product(self, driver):
     product = of_spider.empty_product.copy()
     # title
     elements = of_utils.find_elements_by_css_selector(
         driver, '.item-name-line')
     txts = [element.text.strip() for element in elements]
     if txts:
         product['title'] = ' '.join(txts)
     else:
         raise Exception('Title not found')
     # code N/A
     # price_cny
     element = of_utils.find_element_by_css_selector(
         driver, '.item-price .itemPrice .price .value')
     if element:
         product['price_hkd'] = element.text.strip()
     # images
     elements = of_utils.find_elements_by_css_selector(
         driver, '.item-zoom-images>ul.alternativeImages>li>img')
     images = [element.get_attribute('src').strip() for element in elements]
     product['images'] = ';'.join(images)
     # detail
     element = of_utils.find_element_by_css_selector(
         driver, '.editorialdescription>span.value')
     if element:
         product['detail'] = element.text.strip()
     return product
Exemple #18
0
 def parse_product(self, driver):
     driver.implicitly_wait(10)
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_css_selector(
         driver, 'h1.hidden-xs[itemprop=name]')
     if element:
         product['title'] = element.text.strip()
     else:
         raise Exception('Title not found')
     # code
     element = of_utils.find_element_by_css_selector(
         driver, 'p.reference > span.ezstring-field')
     if element:
         product['code'] = element.text.strip()
     # price_cny N/A
     # images
     elements = of_utils.find_elements_by_css_selector(
         driver, 'div.watch > img[itemrprop=image]')
     images = [element.get_attribute('src').strip() for element in elements]
     product['images'] = ';'.join(images)
     # detail
     texts = []
     elements = of_utils.find_elements_by_css_selector(
         driver, 'div.details > div >  div.ezxmltext-field > p')
     for element in elements:
         text = element.text.strip()
         if text:
             texts.append(text)
     product['detail'] = '\n'.join(texts)
     return product
 def parse_entry(self, driver):
     # 手袋
     elements = of_utils.find_elements_by_css_selector(driver, 'div.fs-products-grid__product.fs-gridelement > div.fs-products-grid__product__illu > a')
     if not elements:
         # 彩妆
         elements = of_utils.find_elements_by_css_selector(driver, 'div.fnb_col-wd6.fnb_product-img > a')
     return [element.get_attribute('href').strip() for element in elements]
Exemple #20
0
 def parse_product(self, driver):
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_css_selector(
         driver, 'h1.product-info-h1')
     if element:
         product['title'] = element.text.strip()
     else:
         raise Exception('Title not found')
     # code N/A
     # price_cny
     element = of_utils.find_element_by_css_selector(
         driver, '.product-info .price')
     if element:
         product['price_cny'] = of_utils.convert_price(
             element.text.strip().replace('ml', ''))
     # images
     elements = of_utils.find_elements_by_css_selector(
         driver, '.product-pic .item img')
     if not elements:
         elements = of_utils.find_elements_by_css_selector(
             driver, '.productpage-images .productpage-image img')
     images = [element.get_attribute('src').strip() for element in elements]
     product['images'] = ';'.join(images)
     # detail
     element = of_utils.find_element_by_css_selector(
         driver, '.tab-product-info')
     if element:
         product['detail'] = element.text.strip()
     return product
Exemple #21
0
 def parse_product(self, driver):
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_css_selector(
         driver, 'h1.entry-title')
     if element:
         product['title'] = element.text.strip()
     else:
         raise Exception('Title not found')
     # code
     element = of_utils.find_element_by_css_selector(
         driver, 'div.container > div.pdp-name > p.pdp-sku')
     if element:
         product['code'] = element.text.strip()
     # price_cny
     element = of_utils.find_element_by_css_selector(
         driver, 'div.container > div.pdp-name > p.pdp-price')
     if element:
         price_text = element.get_attribute(
             'innerHTML').strip()[1:].strip().replace(',', '')  # 去掉开头的¥
         product['price_cny'] = int(float(price_text))
     # images
     elements = of_utils.find_elements_by_css_selector(
         driver, 'div.stiky-style-images > a.inventoryVariant > img')
     images = [element.get_attribute('src').strip() for element in elements]
     product['images'] = ';'.join(images)
     # detail
     texts = []
     elements = of_utils.find_elements_by_css_selector(
         driver, 'div.pdp-tab-longdesc > ul > li')
     for element in elements:
         texts.append(element.text.strip())
     product['detail'] = '\n'.join(texts)
     return product
Exemple #22
0
 def parse_product(self, driver):
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_css_selector(
         driver, 'p.product-title')
     if element:
         product['title'] = element.text.strip()
     else:
         raise Exception('Title not found')
     # code N/A
     # price_cny N/A
     # images
     elements = of_utils.find_elements_by_css_selector(
         driver, 'ul > li.slide > a > img')
     images = [element.get_attribute('src').strip() for element in elements]
     product['images'] = ';'.join(images)
     # detail
     texts = []
     element = of_utils.find_element_by_css_selector(
         driver, 'p.product-information')
     texts.append(element.text.strip())
     elements = of_utils.find_elements_by_css_selector(
         driver, 'table.gridtable > tbody > tr')
     for element in elements:
         _elements = of_utils.find_elements_by_css_selector(element, 'td')
         texts.append(_elements[0].text.strip() + _elements[1].text.strip())
     product['detail'] = '\n'.join(texts)
     return product
Exemple #23
0
 def parse_product(self, driver):
     of_utils.sleep(5)
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_css_selector(driver, 'div.breadcrumbLeaf > p.attributesUpdater.Title > span.value')
     if element:
         product['title'] = element.text.strip()
     else:
         raise Exception('Title not found')
     # code N/A
     # price_cny N/A
     # images
     elements = of_utils.find_elements_by_css_selector(driver, 'div.item-alternativeImages-shots > ul > li > img')
     print(elements)
     images = []
     for element in elements:
         image = element.get_attribute('data-origin').strip().replace('_10_', '_20_')
         images.append(image)
     product['images'] = ';'.join(images)
     # detail
     element = of_utils.find_element_by_css_selector(driver, 'span.modelName')
     product['code'] = element.get_attribute('innerHTML').strip() # Code here...
     btn = of_utils.find_element_by_css_selector(driver, 'ul.itemDetails-info-accordion > li > h2 > div.plusIcon')
     driver.execute_script('arguments[0].click();', btn)
     elements = of_utils.find_elements_by_css_selector(driver, 'div.itemdescription > ul > li')
     texts = [element.get_attribute('innerHTML').strip() for element in elements]
     product['detail'] = '\n'.join(texts)
     return product
Exemple #24
0
 def parse_product(self, driver):
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_css_selector(
         driver, 'div.product-card > span')
     if element:
         product['title'] = element.text.strip()
     else:
         raise Exception('Title not found')
     # code
     element = of_utils.find_element_by_css_selector(
         driver, 'span.reference-jewelry')
     if element:
         product['code'] = element.text.strip()
     # price_cny N/A
     # images
     elements = of_utils.find_elements_by_css_selector(
         driver, 'div.content > img.carousel-slide__media')
     images = [element.get_attribute('src').strip() for element in elements]
     product['images'] = ';'.join(images)
     # detail
     texts = []
     elements = of_utils.find_elements_by_css_selector(
         driver, 'ul.fiche-details__left > li')
     for element in elements:
         k_element = of_utils.find_element_by_css_selector(element, 'span')
         v_element = of_utils.find_element_by_css_selector(element, 'p')
         txt = k_element.text.strip() + ':' + v_element.text.strip(
         ) if k_element else v_element.text.strip()
         texts.append(txt)
     product['detail'] = '\n'.join(texts)
     return product
Exemple #25
0
 def parse_product(self, driver):
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_css_selector(driver, 'div.product-detail h1.product-name')
     if element:
         product['title'] = element.text.strip()
     else:
         raise Exception('Title not found')
     # code
     element = of_utils.find_element_by_css_selector(driver, '.giv-ProductContent-productNumber>span')
     if element:
         product['code'] = element.text.strip()
     # price_cny
     element = of_utils.find_element_by_css_selector(driver, 'div.product-price > span.price-sales')
     if element:
         price_text = element.text.strip()[1:].strip().replace(',', '') # 去掉开头的¥
         product['price_cny'] = int(float(price_text))
     # images
     elements = of_utils.find_elements_by_css_selector(driver, 'div.swiper-slide > a.fullscreen > picture > img')
     if not elements:
         elements = of_utils.find_elements_by_css_selector(driver, '.giv-ProductImageContainer .slick-list .slick-track a picture source') 
     images = [element.get_attribute('srcset').strip() for element in elements]
     product['images'] = ';'.join(images)
     # detail N/A
     return product
Exemple #26
0
 def parse_product(self, driver):
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_css_selector(
         driver, 'div.p-name > h3.subtitle')
     if element:
         product['title'] = element.text.strip()
     else:
         raise Exception('Title not found')
     # code N/A
     # price_cny
     element = of_utils.find_element_by_css_selector(
         driver, 'div.p-name > div.price')
     if element:
         price_text = element.text.strip().split(' ')[1].split(
             '/')[0].strip()
         product['price_cny'] = int(price_text)
     # images
     elements = of_utils.find_elements_by_css_selector(
         driver, 'ul.swiper-wrapper > li.swiper-slide > img')
     images = [element.get_attribute('src').strip() for element in elements]
     product['images'] = ';'.join(images)
     # detail
     element = of_utils.find_element_by_css_selector(
         driver, 'div.product-point > div#p-intro > div.des > p')
     if element:
         product['detail'] = element.text.strip()
     else:
         elements = of_utils.find_elements_by_css_selector(
             driver, 'div.product-point > div#p-intro > div.des > div')
         texts = [element.text.strip() for element in elements]
         product['detail'] = '\n'.join(texts)
     return product
Exemple #27
0
    def parse_product(self, driver):
        elements = of_utils.find_elements_by_css_selector(
            driver, '.product_list .product_1 a')
        flag = int(driver.current_url.split('?')[-1])
        element = elements[flag]
        driver.execute_script('arguments[0].click();', element)
        of_utils.sleep(2)

        product = of_spider.empty_product.copy()
        # title
        element = of_utils.find_element_by_css_selector(
            driver,
            '.popup_product_%s .product_detail_content .jspPane h2' % flag)
        if element:
            product['title'] = element.text.strip()
        else:
            raise Exception('Title not found')
        # code N/A
        # price_cny
        element = of_utils.find_element_by_css_selector(
            driver, '.popup_product_%s .product_photo h3' % flag)
        if element:
            product['price_hkd'] = element.text.strip().split(
                '/')[0].strip().replace('$', '').replace(',', '')
        # images
        elements = of_utils.find_elements_by_css_selector(
            driver, '.popup_product_%s .product_photo img' % flag)
        images = [element.get_attribute('src').strip() for element in elements]
        product['images'] = ';'.join(images)
        # detail N/A
        return product
 def parse_product(self, driver):
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_css_selector(
         driver, '#pdpMain h1.product_subtitle')
     if element:
         product['title'] = element.text.strip()
     else:
         raise Exception('Title not found')
     # code N/A
     # price_cny
     element = of_utils.find_element_by_css_selector(
         driver, '.product_add_to_cart .product_price')
     if element:
         product['price_cny'] = of_utils.convert_price(
             element.get_attribute('data-pricevalue'))
     # images
     elements = of_utils.find_elements_by_css_selector(
         driver, '.product_thumbnails ul li.thumb a')
     if not elements:
         elements = of_utils.find_elements_by_css_selector(
             driver, '.product_primary_image a')
     images = [
         element.get_attribute('href').strip() for element in elements
     ]
     product['images'] = ';'.join(images)
     # detail
     element = of_utils.find_element_by_css_selector(
         driver, '#pdpMain .product_description_box>div>span')
     if element:
         product['detail'] = element.text.strip()
     return product
Exemple #29
0
 def parse_entry(self, driver):
     btn = of_utils.find_element_by_css_selector(driver, 'div.spice-looks-grid-button > a')
     if btn:
         driver.execute_script('arguments[0].click();', btn) # 点击“浏览所有”
     elements = of_utils.find_elements_by_css_selector(driver, 'ul.spice-float-clearfix > li > div > div > a.spice-item-grid')
     if not elements:
         elements = of_utils.find_elements_by_css_selector(driver, 'div#pdlist > div.grid-cell > div.product-tiles-box > a.spice-item-grid')
     return [element.get_attribute('href').strip() for element in elements]
Exemple #30
0
 def parse_product(self, driver):
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_css_selector(
         driver, '.product-name h1.title')
     if element:
         product['title'] = element.text.strip()
     else:
         raise Exception('Title not found')
     # code
     element = of_utils.find_element_by_css_selector(driver, 'footer > p')
     if element:
         product['code'] = element.text.split(' ')[1].strip()
     if '中国官网' in product['title']:
         title = ''
         element = of_utils.find_element_by_css_selector(driver, 'h1.title')
         if not element:
             raise Exception('Title not found')
         title = element.text.strip() + '-'
         element = of_utils.find_element_by_css_selector(
             driver, 'div.product-sku')
         if element:
             title += element.text.strip()
         product['title'] = title
     # price_cny
     element = of_utils.find_element_by_css_selector(
         driver, 'div.prices > span.price > span > span.price')
     if not element:
         element = of_utils.find_element_by_css_selector(
             driver, 'div.prices > span.price > p.old-price > span')
     if not element:
         element = of_utils.find_element_by_css_selector(
             driver, 'div.product-price > span.price > span > span.price')
     if element:
         price_text = element.get_attribute('innerHTML')
         price_text = price_text.split(';')[1].strip().replace(',', '')
         product['price_cny'] = int(float(price_text))
     # images
     elements = of_utils.find_elements_by_css_selector(
         driver, 'ul.images > li > a')
     images = [
         element.get_attribute('data-zoom-image').strip()
         for element in elements
     ]
     if not images:
         elements = of_utils.find_elements_by_css_selector(
             driver,
             'div.product-gallery > div.gallery > div.gallery-item > a')
         images = [
             element.get_attribute('href').strip() for element in elements
         ]
     product['images'] = ';'.join(images)
     # detail
     element = of_utils.find_element_by_css_selector(
         driver, 'div#product-info')
     if element:
         product['detail'] = element.text.strip()
     return product