Beispiel #1
0
 def parse_product(self, driver):
     driver.implicitly_wait(15)
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_xpath(driver, '//div[@id="variant-info"]/h1')
     if not element:
         element = of_utils.find_element_by_xpath(driver, '//p[@class="product-title"]')
     if element:
         product['title'] = element.text.strip()
     else:
         raise Exception('Title not found')
     # code
     element = of_utils.find_element_by_xpath(driver, '//div[@id="product-detail"]//div[@class="commerce-product-sku"]/p/span')
     if element:
         product['code'] = element.text.strip()
     # price_cny
     element = of_utils.find_element_by_xpath(driver, '//div[@id="variant-info"]/p[@class="field-type-commerce-price"]')
     if not element:
         element = of_utils.find_element_by_xpath(driver, '//p[@class="product-price"]')
     if element:
          product['price_cny'] = of_utils.convert_price(element.text.strip())
     # images
     elements = of_utils.find_elements_by_xpath(driver, '//picture[@class="product-item-picture"]/img')
     if not elements:
         elements = of_utils.find_elements_by_xpath(driver, '//img[contains(@class,"main-product-image")]')
     if elements:
         images = [element.get_attribute('src').strip() for element in elements if 'data:image/gif' not in element.get_attribute('src')]
         product['images'] = ';'.join(images)
     # detail
     element = of_utils.find_element_by_xpath(driver, '//div[@class="field-name-field-description"]/div/p')
     if not element:
         element = of_utils.find_element_by_xpath(driver, '//p[@class="product-attribute-font-description"]')
     if element:
         product['detail'] = element.text.strip()
     return product
Beispiel #2
0
 def parse_product(self, driver):
     driver.implicitly_wait(15)
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_xpath(driver,
                                              '//h1[@id="productName"]')
     if not element:
         element = of_utils.find_element_by_xpath(
             driver, '//h1[@class="fc-product-title"]')
     if element:
         product['title'] = element.text.strip()
     else:
         raise Exception('Title not found')
     # code
     element = of_utils.find_element_by_xpath(driver,
                                              '//span[@class="sku"]')
     if element:
         product['code'] = element.text.strip()
     # price_cny
     element = of_utils.find_element_by_xpath(driver,
                                              '//div[@class="priceValue"]')
     if not element:
         element = of_utils.find_element_by_xpath(
             driver, '//span[@class="fc-price-container fc-show"]')
     if element:
         product['price_cny'] = of_utils.convert_price(element.text.strip())
     # images
     elements = of_utils.find_elements_by_xpath(
         driver,
         '//div[@id="productSheetSlideshow"]//ul//li//button//picture//source'
     )
     if len(elements) > 0:
         images = [
             element.get_attribute('srcset').split(',')[0].replace(
                 ' 1600w',
                 '').replace(' 1280w', '').replace(' 1024w', '').replace(
                     ' 640w',
                     '').replace(' 480w',
                                 '').replace(' 320w',
                                             '').replace(' 240w', '')
             for element in elements
         ]
         product['images'] = ';'.join({}.fromkeys(images).keys())
     else:
         elements = of_utils.find_elements_by_xpath(
             driver,
             '//div[@class="fc-model-container"]//div[@class="carousel-active fc-display-image carousel-key-Front"]/img'
         )
         if len(elements) > 0:
             images = [element.get_attribute('src') for element in elements]
             product['images'] = ';'.join({}.fromkeys(images).keys())
     # detail
     element = of_utils.find_element_by_xpath(
         driver, '//div[@id="productDescription"]')
     if element:
         product['detail'] = element.text.strip()
     return product
Beispiel #3
0
 def parse_product(self, driver):
     driver.implicitly_wait(15)
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_xpath(
         driver, '//h2[@class="product-title"]')
     if element:
         product['title'] = element.text.strip()
     else:
         raise Exception('Title not found')
     # code
     element = of_utils.find_element_by_xpath(
         driver, '//div[@class="col-xs-12 mt-20"]/p[2]')
     if element:
         product['code'] = element.text.strip()
     # price_cny
     element = of_utils.find_element_by_xpath(
         driver, '//div[@class="product-price"]')
     if element:
         product['price_cny'] = of_utils.convert_price(
             element.get_attribute('content'))
     # images
     elements = of_utils.find_elements_by_xpath(driver,
                                                '//img[@itemprop="image"]')
     images = [element.get_attribute('src').strip() for element in elements]
     product['images'] = ';'.join({}.fromkeys(images).keys())
     # detail
     element = of_utils.find_element_by_xpath(
         driver, '//div[@id="product_information_Advices"]')
     if element:
         product['detail'] = element.text.strip()
     return product
Beispiel #4
0
 def parse_product(self, driver):
     driver.implicitly_wait(15)
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_xpath(
         driver, '//h1[@class="product-name"]')
     if element:
         product['title'] = element.text.strip()
     else:
         raise Exception('Title not found')
     # code
     element = of_utils.find_element_by_xpath(driver, '//input[@id="pid"]')
     if element:
         product['code'] = element.get_attribute('value')
     # price_cny N/A
     # images
     elements = of_utils.find_elements_by_xpath(
         driver, '//img[contains(@class,"primary-image")]')
     images = [element.get_attribute('src').strip() for element in elements]
     product['images'] = ';'.join({}.fromkeys(images).keys())
     # detail N/A
     element = of_utils.find_element_by_xpath(
         driver, '//p[@itemprop="description"]')
     if element:
         product['detail'] = element.text.strip()
     return product
Beispiel #5
0
 def parse_entry(self, driver):
     of_utils.sleep(5)
     urls = []
     while True:
         btn = of_utils.find_element_by_xpath(
             driver, '//div[contains(@class,"etude-btn-listMore")]')
         goods_page_all = of_utils.find_element_by_xpath(
             driver,
             '//span[contains(@class,"goods_page_all")]').text.strip()
         goods_page_now = of_utils.find_element_by_xpath(
             driver,
             '//span[contains(@class,"goods_page_now")]').text.strip()
         if btn and goods_page_all != goods_page_now:
             driver.execute_script('arguments[0].click();', btn)
             print('click btn')
             of_utils.sleep(5)
         else:
             break
     elements = of_utils.find_elements_by_xpath(
         driver, '//div[@class="product_cell_thumbBox"]')
     for e in elements:
         urls.append('http://www.etude.cn' +
                     e.get_attribute('onclick').strip().replace(
                         'window.open(\'', '').replace('\')', ''))
     return urls
Beispiel #6
0
 def parse_product(self, driver):
     driver.implicitly_wait(15)
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_xpath(
         driver, '//div[@class="etude_detail_good_title select-text"]')
     if element:
         product['title'] = element.text.strip()
     else:
         raise Exception('Title not found')
     # code
     # price_cny
     element = of_utils.find_element_by_xpath(
         driver,
         '//div[contains(@class,"etude-product-detail")]//span[contains(@class,"price-new")]'
     )
     if element:
         product['price_cny'] = of_utils.convert_price(element.text.strip())
     # images
     elements = of_utils.find_elements_by_xpath(
         driver,
         '//div[contains(@class,"etude_detail_abbreviations")]/a/img')
     images = [element.get_attribute('src').strip() for element in elements]
     product['images'] = ';'.join({}.fromkeys(images).keys())
     # detail
     element = of_utils.find_element_by_xpath(
         driver, '//div[contains(@class,"etude_detail_good_titDesc")]')
     if element:
         product['detail'] = element.text.strip()
     return product
Beispiel #7
0
 def parse_product(self, driver):
     driver.implicitly_wait(15)
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_xpath(
         driver,
         '//h1[@class="c-pdp__cta-section--product-title js-pdp__cta-section--product-title"]'
     )
     if element:
         product['title'] = element.text.strip()
     else:
         raise Exception('Title not found')
     # code
     element = of_utils.find_element_by_xpath(driver,
                                              '//span[@class="local-ref"]')
     if element:
         product['code'] = element.text.strip().replace('编号: ', '')
     # price_cny
     element = of_utils.find_element_by_xpath(
         driver, '//div[@class="price js-product-price-formatted"]')
     if element:
         product['price_cny'] = of_utils.convert_price(element.text.strip())
     # images
     elements = of_utils.find_elements_by_xpath(
         driver,
         '//div[@class="c-pdp__product-carousel js-pdp__product-carousel carousel slide"]//img[@class="image js-adaptiveImage c-pdp__image image"]'
     )
     images = [element.get_attribute('src').strip() for element in elements]
     product['images'] = ';'.join(images)
     # detail
     element = of_utils.find_element_by_xpath(
         driver, '//div[@class="tabbed-content__content-column"][1]/p')
     if element:
         product['detail'] = element.text.strip()
     return product
Beispiel #8
0
 def parse_product(self, driver):
     driver.implicitly_wait(15)
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_xpath(
         driver, '//div[@class="prod_detzone_info"]/h3')
     if element:
         product['title'] = element.text.strip()
     else:
         raise Exception('Title not found')
     # code
     element = of_utils.find_element_by_xpath(
         driver, '//div[@class="prod_detzone_info"]/div[@class="dec"]')
     if element:
         product['code'] = element.text.strip().replace('款号:', '')
     # price_cny
     element = of_utils.find_element_by_xpath(
         driver, '//div[@class="prod_detzone_info"]/h4')
     if element:
         product['price_cny'] = of_utils.convert_price(element.text.strip())
     # images
     elements = of_utils.find_elements_by_xpath(
         driver, '//div[@class="pic_all texiao"]/div')
     images = [
         element.get_attribute('data-bigimg').strip()
         for element in elements
     ]
     product['images'] = ';'.join({}.fromkeys(images).keys())
     # detail N/A
     element = of_utils.find_element_by_xpath(
         driver, '//div[@class="note"]/small[1]')
     if element:
         product['detail'] = element.text.strip()
     return product
Beispiel #9
0
 def parse_product(self, driver):
     driver.implicitly_wait(15)
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_xpath(
         driver, '//div[@class="product-name"]/h1')
     if element:
         product['title'] = element.get_attribute('innerText').strip()
     else:
         raise Exception('Title not found')
     # code N/A
     element = of_utils.find_element_by_xpath(
         driver, '//div[@class="price-box"]//span[@class="price"]')
     if element:
         product['price_cny'] = of_utils.convert_price(
             element.get_attribute('innerText').strip())
     # images
     elements = of_utils.find_elements_by_xpath(driver,
                                                '//img[@id="image-main"]')
     images = [element.get_attribute('src').strip() for element in elements]
     product['images'] = ';'.join({}.fromkeys(images).keys())
     # detail
     element = of_utils.find_element_by_xpath(
         driver, '//div[@class="short-description"]/div')
     if element:
         product['detail'] = element.text.strip()
     return product
Beispiel #10
0
 def parse_product(self, driver):
     driver.implicitly_wait(15)
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_xpath(
         driver, '//h1[@class="product-name"]')
     if element:
         product['title'] = element.text.strip()
     else:
         raise Exception('Title not found')
     # code N/A
     element = of_utils.find_element_by_xpath(driver,
                                              '//span[@itemprop="price"]')
     if element:
         product['price_cny'] = of_utils.convert_price(
             element.get_attribute('content'))
     # images
     elements = of_utils.find_elements_by_xpath(
         driver, '//meta[@property="og:image"]')
     images = [
         element.get_attribute('content').strip() for element in elements
     ]
     product['images'] = ';'.join({}.fromkeys(images).keys())
     # detail
     element = of_utils.find_element_by_xpath(
         driver, '//p[@id="collapseDescription"]')
     if element:
         product['detail'] = element.text.strip()
     return product
Beispiel #11
0
 def parse_product(self, driver):
     driver.implicitly_wait(15)
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_xpath(
         driver, '//div[@class="product-name alternate"]')
     if element:
         product['title'] = element.text.strip()
     else:
         raise Exception('Title not found')
     # code N/A
     # price_cny
     element = of_utils.find_element_by_xpath(
         driver, '//span[@class="price-sales"]')
     if element:
         product['price_usd'] = of_utils.convert_price(
             element.text.strip().replace('$', ''))
     # images
     elements = of_utils.find_elements_by_xpath(
         driver, '//a[@class="product-image main-image"]/picture/img')
     images = [
         element.get_attribute('srcset').strip() for element in elements
     ]
     product['images'] = ';'.join({}.fromkeys(images).keys())
     # detail N/A
     return product
Beispiel #12
0
 def parse_product(self, driver):
     driver.implicitly_wait(15)
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_xpath(
         driver, '//h1[@class="product-name-title"]')
     if element:
         product['title'] = element.text.strip()
     else:
         raise Exception('Title not found')
     # code
     element = of_utils.find_element_by_xpath(driver,
                                              '//li[@class="product-id"]')
     if element:
         product['code'] = element.text.strip()
     # price_cny
     element = of_utils.find_element_by_xpath(
         driver,
         '//div[@class="primary-category-and-price"]//span[@class="sales "]'
     )
     if element:
         product['price_cny'] = of_utils.convert_price(element.text.strip())
     # images
     elements = of_utils.find_elements_by_xpath(
         driver,
         '//div[@class="primary-images"]//div[@class="swiper-wrapper"]//img'
     )
     images = [element.get_attribute('src').strip() for element in elements]
     product['images'] = ';'.join({}.fromkeys(images).keys())
     # detail N/A
     element = of_utils.find_element_by_xpath(
         driver, '//div[@class="info-and-care product-attributes"]/ul')
     if element:
         product['detail'] = element.text.strip()
     return product
Beispiel #13
0
 def parse_product(self, driver):
     driver.implicitly_wait(15)
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_xpath(driver, '//div[@class="product-info-section"]//h1[@class="product-name"]')
     if element:
         product['title'] = element.text.strip()
     else:
         raise Exception('Title not found')
     # code
     element = of_utils.find_element_by_xpath(driver, '//span[@class="product-sku"]')
     if element:
         product['code'] = element.text.strip()
     # price_cny
     element = of_utils.find_element_by_xpath(driver, '//div[@class="product-info-section"]//span[@class="price bold-family"]')
     if element:
         product['price_cny'] = of_utils.convert_price(element.text.strip())
     # images
     elements = of_utils.find_elements_by_xpath(driver, '//div[contains(@class,"swiper-item cursor-zoom swiper-slide")]/img')
     images = [element.get_attribute('src').strip() for element in elements]
     product['images'] = ';'.join(images)
     # detail
     element = of_utils.find_element_by_xpath(driver, '//div[@class="item-description"]/div')
     if element:
         product['detail'] = element.text.strip()
     return product
Beispiel #14
0
 def parse_product(self, driver):
     driver.implicitly_wait(15)
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_xpath(
         driver,
         '//div[@class="product-title "]/h2[@class="product-name"][1]')
     if element:
         product['title'] = element.text.strip()
     else:
         raise Exception('Title not found')
     # code N/A
     # price_cny
     element = of_utils.find_element_by_xpath(
         driver, '//div[@class="price-row"]//h2[@class="price-sales"][1]')
     if element:
         product['price_cny'] = of_utils.convert_price(element.text.strip())
     # images
     elements = of_utils.find_elements_by_xpath(
         driver, '//li[contains(@class,"productthumbnail")]/img')
     images = [element.get_attribute('src').strip() for element in elements]
     product['images'] = ';'.join({}.fromkeys(images).keys())
     # detail N/A
     element = of_utils.find_element_by_xpath(
         driver, '//div[@class="pdp-tab-content"]/p')
     if element:
         product['detail'] = element.text.strip()
     return product
Beispiel #15
0
 def parse_product(self, driver):
     of_utils.sleep(2)
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_xpath(
         driver, '//h1[contains(@class,"c-product-name-pdp")]')
     if element:
         product['title'] = element.text.strip().replace('\n', ' ')
     else:
         raise Exception('Title not found')
     # code N/A
     element = of_utils.find_element_by_xpath(
         driver, '//span[contains(@class,"o-utility")]')
     if element:
         product['code'] = element.text.strip().replace('商品编号 :',
                                                        '').strip()
     # price_cny N/A
     # images
     elements = of_utils.find_elements_by_xpath(
         driver, '//div[contains(@class,"product-image-first")]/img')
     images = [element.get_attribute('src').strip() for element in elements]
     product['images'] = ';'.join({}.fromkeys(images).keys())
     # detail
     element = of_utils.find_element_by_xpath(
         driver, '//p[@class="text--center c-collection-desc"]')
     if element:
         product['detail'] = element.text.strip()
     return product
Beispiel #16
0
 def parse_product(self, driver):
     driver.implicitly_wait(15)
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_xpath(driver,
                                              '//h2[@class="goods-title"]')
     if element:
         product['title'] = element.text.strip()
     else:
         raise Exception('Title not found')
     # code
     element = of_utils.find_element_by_xpath(driver,
                                              '//div[@class="goods-bn"]')
     if element:
         product['code'] = element.text.strip().replace('ITEM: ', '')
     # price_cny
     element = of_utils.find_element_by_xpath(
         driver,
         '//span[@class="price-primary sp-price sp-price__default clearfix"]/span[@class="price__int"]'
     )
     if element:
         product['price_cny'] = of_utils.convert_price(element.text.strip())
     # images
     elements = of_utils.find_elements_by_xpath(
         driver, '//div[@class="img-container"]/img')
     images = [element.get_attribute('src').strip() for element in elements]
     product['images'] = ';'.join({}.fromkeys(images).keys())
     # detail N/A
     return product
 def parse_product(self, driver):
     driver.implicitly_wait(15)
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_xpath(
         driver,
         '//h3[@class="component-products-head-line__title font-bemboStd"]')
     if element:
         product['title'] = element.text.strip()
     else:
         raise Exception('Title not found')
     # code
     element = of_utils.find_element_by_xpath(
         driver, '//p[@class="page-products-id__code"]')
     if element:
         product['code'] = element.text.strip().replace('商品代码:', '')
     # price_cny
     element = of_utils.find_element_by_xpath(
         driver, '//div[@class="component-products-head-line__price"]')
     if element:
         product['price_cny'] = of_utils.convert_price(element.text.strip())
     # images
     elements = of_utils.find_elements_by_xpath(
         driver, '//li[@class="component-products-pictures__item"]/img')
     images = [element.get_attribute('src').strip() for element in elements]
     product['images'] = ';'.join(images)
     # detail
     element = of_utils.find_element_by_xpath(
         driver, '//p[@class="page-products-id__describe"]')
     if element:
         product['detail'] = element.text.strip()
     return product
Beispiel #18
0
    def parse_entry(self, driver):
        driver.implicitly_wait(15)
        loadMore = of_utils.find_element_by_xpath(
            driver, '//div[@class="loadMore isVisible"]/button')
        if loadMore:
            driver.execute_script('arguments[0].click();', loadMore)
            of_utils.sleep(4)

        product_count = 0
        while True:
            elements = of_utils.find_elements_by_xpath(
                driver,
                '//a[@class="productQB__wrapperImage js-product-qb-link"]')
            if len(elements) > product_count:
                product_count = len(elements)
                action = ActionChains(driver).move_to_element(elements[-1])
                action.send_keys(Keys.PAGE_DOWN)
                action.send_keys(Keys.PAGE_DOWN)
                action.send_keys(Keys.PAGE_DOWN)
                action.send_keys(Keys.PAGE_DOWN)
                action.send_keys(Keys.PAGE_DOWN)
                action.perform()
                of_utils.sleep(4)
            else:
                break
        return [element.get_attribute('href').strip() for element in elements]
Beispiel #19
0
def getContent(url):
    options = webdriver.ChromeOptions()
    options.add_argument('--ignore-certificate-errors')
    driver = webdriver.Chrome(chrome_options=options)
    driver.maximize_window()
    driver.get(url)
    driver.implicitly_wait(15)
    d ={}

    eles = of_utils.find_elements_by_xpath(driver,'//div[@class="auths"]/a')
    if eles:
        auths = [e.text.strip() for e in eles]
        d['author'] = ';'.join(auths)

    ele = of_utils.find_element_by_xpath(driver,'//div[@class="rprt abstract"]/div[@class="cit"]')
    if ele:
        arr = ele.text.split('. ')
        d['journal'] = arr[0]
        d['publishAt'] = arr[1]
        d['doi'] = arr[2].replace('doi: ','')
    d['seq'] = ''    
    ele = of_utils.find_element_by_xpath(driver,'//div[@class="icons portlet"]/a')
    if not ele:
        ele = of_utils.find_element_by_xpath(driver,'//a[@ref="aid_type=doi"]')
    if ele:
        d['fullTextUrl'] = ele.get_attribute('href')
    return d
Beispiel #20
0
 def parse_product(self, driver):
     driver.implicitly_wait(15)
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_xpath(driver, '//h1[@class="productName "]')
     if element:
         product['title'] = element.text.strip()
     else:
         raise Exception('Title not found')
     # code N/A
     # price_cny
     element = of_utils.find_element_by_xpath(driver, '//div[@class="itemBoxPrice"]//span[@class="price"]//span[@class="value"]')
     if element:
         product['price_cny'] = of_utils.convert_price(element.text.strip())
     # images
     elements = of_utils.find_elements_by_xpath(driver,'//ul[@class="alternativeImages"]//li//img')
     images = [element.get_attribute('src').strip() if element.get_attribute('src') else element.get_attribute('data-origin')  for element in elements]
     product['images'] = ';'.join({}.fromkeys(images).keys())
     # detail
     element = of_utils.find_element_by_xpath(driver, '//div[@class="attributesUpdater itemdescription"]//span[@class="value"]')
     if element:
         txt = element.get_attribute('innerHTML').strip()
         product['detail'] = txt
         product['code'] = txt[txt.find('商品编号')+5:] if txt.find('商品编号') >= 0 else ''
     return product
Beispiel #21
0
    def parse_product(self, driver):
        driver.implicitly_wait(15)
        product = of_spider.empty_product.copy()
        # title
        element = of_utils.find_element_by_xpath(
            driver, '//span[@id="capds-js-product-name"]')
        if element:
            product['title'] = element.text.strip()
        else:
            raise Exception('Title not found')
        # code
        product['code'] = driver.current_url[driver.current_url.rfind('/') +
                                             1:driver.current_url.find('.html'
                                                                       )]

        # price_cny
        element = of_utils.find_element_by_xpath(
            driver, '//span[@class="capds-product__price--active"]')
        if element:
            product['price_cny'] = of_utils.convert_price(element.text.strip())

        # images
        elements = of_utils.find_elements_by_xpath(
            driver, '//div[@class="swiper-slide"]//img')
        images = [e.get_attribute('src').strip() for e in elements]
        product['images'] = ';'.join({}.fromkeys(images).keys())

        # detail
        element = of_utils.find_element_by_xpath(
            driver, '//div[@class="capds-product__description"]/p')
        if element:
            product['detail'] = element.text.strip()
        return product
Beispiel #22
0
 def parse_product(self, driver):
     driver.implicitly_wait(15)
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_xpath(driver,
                                              '//h1[@class="h1-title"]')
     if element:
         product['title'] = element.text.strip()
     else:
         raise Exception('Title not found')
     # code N/A
     # price_cny N/A
     # images
     elements = of_utils.find_elements_by_xpath(
         driver,
         '//div[@class="product-gallery__col-item product-gallery__main-gallery"]//img'
     )
     images = [element.get_attribute('src').strip() for element in elements]
     product['images'] = ';'.join({}.fromkeys(images).keys())
     # detail
     element = of_utils.find_element_by_xpath(
         driver,
         '//div[@class="product-detail__col--item product-detail__details"]/p'
     )
     if element:
         product['detail'] = element.text.strip()
     return product
Beispiel #23
0
 def parse_product(self, driver):
     driver.implicitly_wait(15)
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_xpath(
         driver, '//div[@class="product-titles"]/h1/span')
     if element:
         product['title'] = element.text.strip()
     else:
         raise Exception('Title not found')
     # code
     element = of_utils.find_element_by_xpath(
         driver, '//p[@class="product-titles-ref"]')
     if element:
         product['code'] = element.text.strip().replace('编号: ', '')
     # price_cny
     element = of_utils.find_element_by_xpath(
         driver, '//span[@class="price-line"]')
     if element:
         product['price_cny'] = of_utils.convert_price(element.text.strip())
     # images
     elements = of_utils.find_elements_by_xpath(
         driver, '//div[@class="image product-media__image"]/img')
     images = [element.get_attribute('src').strip() for element in elements]
     product['images'] = ';'.join({}.fromkeys(images).keys())
     # detail
     element = of_utils.find_element_by_xpath(
         driver, '//div[@class="couture-description__html"]')
     if element:
         product['detail'] = element.get_attribute('innerHTML')
     return product
Beispiel #24
0
 def parse_product(self, driver):
     driver.implicitly_wait(15)
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_xpath(
         driver, '//h1[@class="product-detail_name"]')
     if element:
         product['title'] = element.text.strip()
     else:
         raise Exception('Title not found')
     # code
     element = of_utils.find_element_by_xpath(
         driver, '//p[contains(@class,"itemNum")]')
     if element:
         product['code'] = element.text.strip().replace('ITEM: ', '')
     # price_cny
     element = of_utils.find_element_by_xpath(
         driver, '//div[@class="product-detail_price"]')
     if element:
         product['price_cny'] = of_utils.convert_price(
             element.text.strip().replace('CN¥‌', ''))
     # images
     elements = of_utils.find_elements_by_xpath(
         driver, '//img[@class="product-imagery_picture-image"]')
     images = [element.get_attribute('src').strip() for element in elements]
     product['images'] = ';'.join({}.fromkeys(images).keys())
     # detail N/A
     element = of_utils.find_element_by_xpath(
         driver, '//div[contains(@class,"description")]/p')
     if element:
         product['detail'] = element.text.strip()
     return product
Beispiel #25
0
 def parse_product(self, driver):
     driver.implicitly_wait(15)
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_xpath(
         driver, '//h1[@class="pDetails__title"]')
     if element:
         product['title'] = element.text.strip()
     else:
         raise Exception('Title not found')
     # code
     element = of_utils.find_element_by_xpath(
         driver, '//div[@id="mainPdpContent"]')
     if element:
         product['code'] = element.get_attribute('data-partnumber')
     # price_cny
     element = of_utils.find_element_by_xpath(
         driver,
         '//div[@class=" pDetails__priceItem"]//span[@class="price__value"]'
     )
     if element:
         product['price_cny'] = of_utils.convert_price(element.text.strip())
     # images
     elements = of_utils.find_elements_by_xpath(
         driver,
         '//div[@class="pDetails__slide js-imgProduct slick-slide"]//img')
     images = [element.get_attribute('src') for element in elements]
     product['images'] = ';'.join({}.fromkeys(images).keys())
     # detail
     element = of_utils.find_element_by_xpath(
         driver, '//p[@class="pDetails__desc"]')
     if element:
         product['detail'] = element.text.strip()
     return product
 def parse_product(self, driver):
     of_utils.sleep(5)
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_xpath(
         driver, '//i[@class="iconfont icon-ICON_share"]/..')
     if element:
         product['title'] = element.text.strip()
     else:
         raise Exception('Title not found')
     # code
     element = of_utils.find_element_by_xpath(driver,
                                              '//div[@class="code"]')
     if element:
         product['code'] = element.text.strip()
     # price_cny
     element = of_utils.find_element_by_xpath(
         driver, '//div[@class="product-price"]/i')
     if element:
         product['price_cny'] = of_utils.convert_price(element.text.strip())
     # images
     elements = of_utils.find_elements_by_xpath(
         driver, '//ul[@class="small-img-list"]/li/img')
     images = [
         element.get_attribute('data-src').strip().split('?x-oss-process')
         [0] for element in elements
     ]
     product['images'] = ';'.join({}.fromkeys(images).keys())
     # detail N/A
     element = of_utils.find_element_by_xpath(
         driver, '//div[@class="desc"]//div[@class="content"]')
     if element:
         product['detail'] = element.text.strip()
     return product
Beispiel #27
0
    def parse_product(self, driver):
        driver.implicitly_wait(15)
        product = of_spider.empty_product.copy()
        if 'www.ysl.cn' in driver.current_url:
            ele = of_utils.find_element_by_xpath(driver,'//h2[@class="page-products-id__title"]')
            if ele:
                product['title'] = ele.text.strip()
            else:
                raise Exception('Title not found')
            
            ele = of_utils.find_element_by_xpath(driver,'//ul[@class="page-products-id__text__material-color"]/li/span[2]')
            if ele:
                product['code'] = ele.get_attribute('innerHTML').strip()

            ele = of_utils.find_element_by_xpath(driver,'//div[@class="page-products-id__price"]/span') 
            if ele:
                product['price_cny'] = of_utils.convert_price(ele.text.strip())

            eles = of_utils.find_elements_by_xpath(driver,'//ul[@class="component-products-pictures__desktop layout-desktop-large-desktop-only"]//li//img')
            images = [e.get_attribute('src')  for e in eles if 'base64,' not in e.get_attribute('src')]
            product['images'] = ';'.join({}.fromkeys(images).keys())

        elif 'www.yslbeautycn.com' in driver.current_url:
            ele = of_utils.find_element_by_xpath(driver,'//div[@class="product-top"]//div[@class="product-tit"]//h1')
            if ele:
                product['title'] = ele.text.strip()
            else:
                raise Exception('Title not found')
            
            ele = of_utils.find_element_by_xpath(driver,'input[@id="hide-currentItemCode"]')
            if ele:
                product['code'] = ele.get_attribute('value')

            ele = of_utils.find_element_by_xpath(driver,'//div[@class="detail-item is-active current-item"]//p[@class="product-price"]')
            if ele:
                product['price_cny'] = of_utils.convert_price(ele.text.strip())

            eles = of_utils.find_elements_by_xpath(driver,'//div[@class="swiper-container e-main-scroll swiper-container-horizontal swiper-container-fade"]//div[@class="swiper-wrapper"]//img')
            images = [e.get_attribute('src').strip() for e in eles]
            product['images'] = ';'.join({}.fromkeys(images).keys())

            ele = of_utils.find_element_by_xpath(driver,'//div[@class="product-description none-sm"]/p')
            if ele:
                product['detail'] = ele.text.strip()

        return product
Beispiel #28
0
 def parse_entry(self, driver):
     driver.execute_script('window.scrollBy(0, document.body.scrollHeight);')
     btn = of_utils.find_element_by_xpath(driver,'//a[@class="pagination__all"]')
     if btn:
         driver.execute_script('arguments[0].click();', btn)
         of_utils.sleep(5)
     elements = of_utils.find_elements_by_xpath(driver, '//div[@class="product-image"]//a[@class="thumb-link"]')
     return [element.get_attribute('href').strip() for element in elements]  
Beispiel #29
0
 def parse_product(self, driver):
     elements = of_utils.find_elements_by_xpath(
         driver, '//img[@class="attachment-shop_single size-shop_single"]')
     images = [element.get_attribute('src').strip() for element in elements]
     product['images'] = ';'.join({}.fromkeys(images).keys())
     # detail N/A
     element = of_utils.find_element_by_xpath(
         driver, '//p[@class="product-detail-description"]')
     if element:
         product['detail'] = element.text.strip()
     return product
Beispiel #30
0
    def parse_entry(self, driver):
        driver.implicitly_wait(15)
        while True:
            loadMore = of_utils.find_element_by_xpath(driver,'//a[@class="btn-load-more"]')
            if loadMore and "display: none;" not in loadMore.get_attribute('style'):
                driver.execute_script('arguments[0].click();', loadMore)
                of_utils.sleep(5)
            else:
                break    

        elements = of_utils.find_elements_by_xpath(driver,'//a[@class="component-product-card"]')
        return [element.get_attribute('href').strip() for element in elements]