def parse_entry(self, driver): of_utils.sleep(5) urls = [] while True: btn = of_utils.find_element_by_xpath( driver, '//div[contains(@class,"etude-btn-listMore")]') goods_page_all = of_utils.find_element_by_xpath( driver, '//span[contains(@class,"goods_page_all")]').text.strip() goods_page_now = of_utils.find_element_by_xpath( driver, '//span[contains(@class,"goods_page_now")]').text.strip() if btn and goods_page_all != goods_page_now: driver.execute_script('arguments[0].click();', btn) print('click btn') of_utils.sleep(5) else: break elements = of_utils.find_elements_by_xpath( driver, '//div[@class="product_cell_thumbBox"]') for e in elements: urls.append('http://www.etude.cn' + e.get_attribute('onclick').strip().replace( 'window.open(\'', '').replace('\')', '')) return urls
def parse_product(self, driver): driver.implicitly_wait(15) product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_xpath( driver, '//div[@class="etude_detail_good_title select-text"]') if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code # price_cny element = of_utils.find_element_by_xpath( driver, '//div[contains(@class,"etude-product-detail")]//span[contains(@class,"price-new")]' ) if element: product['price_cny'] = of_utils.convert_price(element.text.strip()) # images elements = of_utils.find_elements_by_xpath( driver, '//div[contains(@class,"etude_detail_abbreviations")]/a/img') images = [element.get_attribute('src').strip() for element in elements] product['images'] = ';'.join({}.fromkeys(images).keys()) # detail element = of_utils.find_element_by_xpath( driver, '//div[contains(@class,"etude_detail_good_titDesc")]') if element: product['detail'] = element.text.strip() return product
def parse_product(self, driver): driver.implicitly_wait(15) product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_xpath( driver, '//span[@id="capds-js-product-name"]') if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code product['code'] = driver.current_url[driver.current_url.rfind('/') + 1:driver.current_url.find('.html' )] # price_cny element = of_utils.find_element_by_xpath( driver, '//span[@class="capds-product__price--active"]') if element: product['price_cny'] = of_utils.convert_price(element.text.strip()) # images elements = of_utils.find_elements_by_xpath( driver, '//div[@class="swiper-slide"]//img') images = [e.get_attribute('src').strip() for e in elements] product['images'] = ';'.join({}.fromkeys(images).keys()) # detail element = of_utils.find_element_by_xpath( driver, '//div[@class="capds-product__description"]/p') if element: product['detail'] = element.text.strip() return product
def parse_product(self, driver): driver.implicitly_wait(15) product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_xpath( driver, '//div[@class="product-name"]/h1') if element: product['title'] = element.get_attribute('innerText').strip() else: raise Exception('Title not found') # code N/A element = of_utils.find_element_by_xpath( driver, '//div[@class="price-box"]//span[@class="price"]') if element: product['price_cny'] = of_utils.convert_price( element.get_attribute('innerText').strip()) # images elements = of_utils.find_elements_by_xpath(driver, '//img[@id="image-main"]') images = [element.get_attribute('src').strip() for element in elements] product['images'] = ';'.join({}.fromkeys(images).keys()) # detail element = of_utils.find_element_by_xpath( driver, '//div[@class="short-description"]/div') if element: product['detail'] = element.text.strip() return product
def parse_product(self, driver): driver.implicitly_wait(15) product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_xpath(driver, '//div[@class="product-info-section"]//h1[@class="product-name"]') if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code element = of_utils.find_element_by_xpath(driver, '//span[@class="product-sku"]') if element: product['code'] = element.text.strip() # price_cny element = of_utils.find_element_by_xpath(driver, '//div[@class="product-info-section"]//span[@class="price bold-family"]') if element: product['price_cny'] = of_utils.convert_price(element.text.strip()) # images elements = of_utils.find_elements_by_xpath(driver, '//div[contains(@class,"swiper-item cursor-zoom swiper-slide")]/img') images = [element.get_attribute('src').strip() for element in elements] product['images'] = ';'.join(images) # detail element = of_utils.find_element_by_xpath(driver, '//div[@class="item-description"]/div') if element: product['detail'] = element.text.strip() return product
def parse_product(self, driver): driver.implicitly_wait(15) product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_xpath( driver, '//h2[@class="product-title"]') if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code element = of_utils.find_element_by_xpath( driver, '//div[@class="col-xs-12 mt-20"]/p[2]') if element: product['code'] = element.text.strip() # price_cny element = of_utils.find_element_by_xpath( driver, '//div[@class="product-price"]') if element: product['price_cny'] = of_utils.convert_price( element.get_attribute('content')) # images elements = of_utils.find_elements_by_xpath(driver, '//img[@itemprop="image"]') images = [element.get_attribute('src').strip() for element in elements] product['images'] = ';'.join({}.fromkeys(images).keys()) # detail element = of_utils.find_element_by_xpath( driver, '//div[@id="product_information_Advices"]') if element: product['detail'] = element.text.strip() return product
def parse_product(self, driver): driver.implicitly_wait(15) product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_xpath(driver, '//h1[@class="productName "]') if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code N/A # price_cny element = of_utils.find_element_by_xpath(driver, '//div[@class="itemBoxPrice"]//span[@class="price"]//span[@class="value"]') if element: product['price_cny'] = of_utils.convert_price(element.text.strip()) # images elements = of_utils.find_elements_by_xpath(driver,'//ul[@class="alternativeImages"]//li//img') images = [element.get_attribute('src').strip() if element.get_attribute('src') else element.get_attribute('data-origin') for element in elements] product['images'] = ';'.join({}.fromkeys(images).keys()) # detail element = of_utils.find_element_by_xpath(driver, '//div[@class="attributesUpdater itemdescription"]//span[@class="value"]') if element: txt = element.get_attribute('innerHTML').strip() product['detail'] = txt product['code'] = txt[txt.find('商品编号')+5:] if txt.find('商品编号') >= 0 else '' return product
def parse_product(self, driver): driver.implicitly_wait(15) product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_xpath( driver, '//h1[@class="product-detail_name"]') if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code element = of_utils.find_element_by_xpath( driver, '//p[contains(@class,"itemNum")]') if element: product['code'] = element.text.strip().replace('ITEM: ', '') # price_cny element = of_utils.find_element_by_xpath( driver, '//div[@class="product-detail_price"]') if element: product['price_cny'] = of_utils.convert_price( element.text.strip().replace('CN¥', '')) # images elements = of_utils.find_elements_by_xpath( driver, '//img[@class="product-imagery_picture-image"]') images = [element.get_attribute('src').strip() for element in elements] product['images'] = ';'.join({}.fromkeys(images).keys()) # detail N/A element = of_utils.find_element_by_xpath( driver, '//div[contains(@class,"description")]/p') if element: product['detail'] = element.text.strip() return product
def parse_product(self, driver): driver.implicitly_wait(15) product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_xpath( driver, '//h1[@class="product-name"]') if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code element = of_utils.find_element_by_xpath(driver, '//input[@id="pid"]') if element: product['code'] = element.get_attribute('value') # price_cny N/A # images elements = of_utils.find_elements_by_xpath( driver, '//img[contains(@class,"primary-image")]') images = [element.get_attribute('src').strip() for element in elements] product['images'] = ';'.join({}.fromkeys(images).keys()) # detail N/A element = of_utils.find_element_by_xpath( driver, '//p[@itemprop="description"]') if element: product['detail'] = element.text.strip() return product
def parse_product(self, driver): driver.implicitly_wait(15) product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_xpath( driver, '//div[@class="prod_detzone_info"]/h3') if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code element = of_utils.find_element_by_xpath( driver, '//div[@class="prod_detzone_info"]/div[@class="dec"]') if element: product['code'] = element.text.strip().replace('款号:', '') # price_cny element = of_utils.find_element_by_xpath( driver, '//div[@class="prod_detzone_info"]/h4') if element: product['price_cny'] = of_utils.convert_price(element.text.strip()) # images elements = of_utils.find_elements_by_xpath( driver, '//div[@class="pic_all texiao"]/div') images = [ element.get_attribute('data-bigimg').strip() for element in elements ] product['images'] = ';'.join({}.fromkeys(images).keys()) # detail N/A element = of_utils.find_element_by_xpath( driver, '//div[@class="note"]/small[1]') if element: product['detail'] = element.text.strip() return product
def parse_product(self, driver): driver.implicitly_wait(15) product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_xpath( driver, '//h1[@class="product-name-title"]') if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code element = of_utils.find_element_by_xpath(driver, '//li[@class="product-id"]') if element: product['code'] = element.text.strip() # price_cny element = of_utils.find_element_by_xpath( driver, '//div[@class="primary-category-and-price"]//span[@class="sales "]' ) if element: product['price_cny'] = of_utils.convert_price(element.text.strip()) # images elements = of_utils.find_elements_by_xpath( driver, '//div[@class="primary-images"]//div[@class="swiper-wrapper"]//img' ) images = [element.get_attribute('src').strip() for element in elements] product['images'] = ';'.join({}.fromkeys(images).keys()) # detail N/A element = of_utils.find_element_by_xpath( driver, '//div[@class="info-and-care product-attributes"]/ul') if element: product['detail'] = element.text.strip() return product
def parse_product(self, driver): driver.implicitly_wait(15) product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_xpath( driver, '//h1[@class="product-name"]') if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code N/A element = of_utils.find_element_by_xpath(driver, '//span[@itemprop="price"]') if element: product['price_cny'] = of_utils.convert_price( element.get_attribute('content')) # images elements = of_utils.find_elements_by_xpath( driver, '//meta[@property="og:image"]') images = [ element.get_attribute('content').strip() for element in elements ] product['images'] = ';'.join({}.fromkeys(images).keys()) # detail element = of_utils.find_element_by_xpath( driver, '//p[@id="collapseDescription"]') if element: product['detail'] = element.text.strip() return product
def parse_product(self, driver): driver.implicitly_wait(15) product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_xpath( driver, '//div[@class="product-title "]/h2[@class="product-name"][1]') if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code N/A # price_cny element = of_utils.find_element_by_xpath( driver, '//div[@class="price-row"]//h2[@class="price-sales"][1]') if element: product['price_cny'] = of_utils.convert_price(element.text.strip()) # images elements = of_utils.find_elements_by_xpath( driver, '//li[contains(@class,"productthumbnail")]/img') images = [element.get_attribute('src').strip() for element in elements] product['images'] = ';'.join({}.fromkeys(images).keys()) # detail N/A element = of_utils.find_element_by_xpath( driver, '//div[@class="pdp-tab-content"]/p') if element: product['detail'] = element.text.strip() return product
def parse_product(self, driver): driver.implicitly_wait(15) product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_xpath( driver, '//div[@class="product-name alternate"]') if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code N/A # price_cny element = of_utils.find_element_by_xpath( driver, '//span[@class="price-sales"]') if element: product['price_usd'] = of_utils.convert_price( element.text.strip().replace('$', '')) # images elements = of_utils.find_elements_by_xpath( driver, '//a[@class="product-image main-image"]/picture/img') images = [ element.get_attribute('srcset').strip() for element in elements ] product['images'] = ';'.join({}.fromkeys(images).keys()) # detail N/A return product
def parse_product(self, driver): driver.implicitly_wait(15) product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_xpath( driver, '//h1[@class="c-pdp__cta-section--product-title js-pdp__cta-section--product-title"]' ) if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code element = of_utils.find_element_by_xpath(driver, '//span[@class="local-ref"]') if element: product['code'] = element.text.strip().replace('编号: ', '') # price_cny element = of_utils.find_element_by_xpath( driver, '//div[@class="price js-product-price-formatted"]') if element: product['price_cny'] = of_utils.convert_price(element.text.strip()) # images elements = of_utils.find_elements_by_xpath( driver, '//div[@class="c-pdp__product-carousel js-pdp__product-carousel carousel slide"]//img[@class="image js-adaptiveImage c-pdp__image image"]' ) images = [element.get_attribute('src').strip() for element in elements] product['images'] = ';'.join(images) # detail element = of_utils.find_element_by_xpath( driver, '//div[@class="tabbed-content__content-column"][1]/p') if element: product['detail'] = element.text.strip() return product
def parse_product(self, driver): driver.implicitly_wait(15) product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_xpath( driver, '//h1[@class="pDetails__title"]') if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code element = of_utils.find_element_by_xpath( driver, '//div[@id="mainPdpContent"]') if element: product['code'] = element.get_attribute('data-partnumber') # price_cny element = of_utils.find_element_by_xpath( driver, '//div[@class=" pDetails__priceItem"]//span[@class="price__value"]' ) if element: product['price_cny'] = of_utils.convert_price(element.text.strip()) # images elements = of_utils.find_elements_by_xpath( driver, '//div[@class="pDetails__slide js-imgProduct slick-slide"]//img') images = [element.get_attribute('src') for element in elements] product['images'] = ';'.join({}.fromkeys(images).keys()) # detail element = of_utils.find_element_by_xpath( driver, '//p[@class="pDetails__desc"]') if element: product['detail'] = element.text.strip() return product
def parse_product(self, driver): driver.implicitly_wait(15) product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_xpath( driver, '//h3[@class="component-products-head-line__title font-bemboStd"]') if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code element = of_utils.find_element_by_xpath( driver, '//p[@class="page-products-id__code"]') if element: product['code'] = element.text.strip().replace('商品代码:', '') # price_cny element = of_utils.find_element_by_xpath( driver, '//div[@class="component-products-head-line__price"]') if element: product['price_cny'] = of_utils.convert_price(element.text.strip()) # images elements = of_utils.find_elements_by_xpath( driver, '//li[@class="component-products-pictures__item"]/img') images = [element.get_attribute('src').strip() for element in elements] product['images'] = ';'.join(images) # detail element = of_utils.find_element_by_xpath( driver, '//p[@class="page-products-id__describe"]') if element: product['detail'] = element.text.strip() return product
def parse_product(self, driver): of_utils.sleep(5) product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_xpath( driver, '//i[@class="iconfont icon-ICON_share"]/..') if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code element = of_utils.find_element_by_xpath(driver, '//div[@class="code"]') if element: product['code'] = element.text.strip() # price_cny element = of_utils.find_element_by_xpath( driver, '//div[@class="product-price"]/i') if element: product['price_cny'] = of_utils.convert_price(element.text.strip()) # images elements = of_utils.find_elements_by_xpath( driver, '//ul[@class="small-img-list"]/li/img') images = [ element.get_attribute('data-src').strip().split('?x-oss-process') [0] for element in elements ] product['images'] = ';'.join({}.fromkeys(images).keys()) # detail N/A element = of_utils.find_element_by_xpath( driver, '//div[@class="desc"]//div[@class="content"]') if element: product['detail'] = element.text.strip() return product
def getContent(url): options = webdriver.ChromeOptions() options.add_argument('--ignore-certificate-errors') driver = webdriver.Chrome(chrome_options=options) driver.maximize_window() driver.get(url) driver.implicitly_wait(15) d ={} eles = of_utils.find_elements_by_xpath(driver,'//div[@class="auths"]/a') if eles: auths = [e.text.strip() for e in eles] d['author'] = ';'.join(auths) ele = of_utils.find_element_by_xpath(driver,'//div[@class="rprt abstract"]/div[@class="cit"]') if ele: arr = ele.text.split('. ') d['journal'] = arr[0] d['publishAt'] = arr[1] d['doi'] = arr[2].replace('doi: ','') d['seq'] = '' ele = of_utils.find_element_by_xpath(driver,'//div[@class="icons portlet"]/a') if not ele: ele = of_utils.find_element_by_xpath(driver,'//a[@ref="aid_type=doi"]') if ele: d['fullTextUrl'] = ele.get_attribute('href') return d
def parse_product(self, driver): driver.implicitly_wait(15) product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_xpath(driver, '//h1[@class="h1-title"]') if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code N/A # price_cny N/A # images elements = of_utils.find_elements_by_xpath( driver, '//div[@class="product-gallery__col-item product-gallery__main-gallery"]//img' ) images = [element.get_attribute('src').strip() for element in elements] product['images'] = ';'.join({}.fromkeys(images).keys()) # detail element = of_utils.find_element_by_xpath( driver, '//div[@class="product-detail__col--item product-detail__details"]/p' ) if element: product['detail'] = element.text.strip() return product
def parse_product(self, driver): driver.implicitly_wait(15) product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_xpath( driver, '//div[@class="product-titles"]/h1/span') if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code element = of_utils.find_element_by_xpath( driver, '//p[@class="product-titles-ref"]') if element: product['code'] = element.text.strip().replace('编号: ', '') # price_cny element = of_utils.find_element_by_xpath( driver, '//span[@class="price-line"]') if element: product['price_cny'] = of_utils.convert_price(element.text.strip()) # images elements = of_utils.find_elements_by_xpath( driver, '//div[@class="image product-media__image"]/img') images = [element.get_attribute('src').strip() for element in elements] product['images'] = ';'.join({}.fromkeys(images).keys()) # detail element = of_utils.find_element_by_xpath( driver, '//div[@class="couture-description__html"]') if element: product['detail'] = element.get_attribute('innerHTML') return product
def parse_product(self, driver): driver.implicitly_wait(15) product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_xpath(driver, '//h2[@class="goods-title"]') if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code element = of_utils.find_element_by_xpath(driver, '//div[@class="goods-bn"]') if element: product['code'] = element.text.strip().replace('ITEM: ', '') # price_cny element = of_utils.find_element_by_xpath( driver, '//span[@class="price-primary sp-price sp-price__default clearfix"]/span[@class="price__int"]' ) if element: product['price_cny'] = of_utils.convert_price(element.text.strip()) # images elements = of_utils.find_elements_by_xpath( driver, '//div[@class="img-container"]/img') images = [element.get_attribute('src').strip() for element in elements] product['images'] = ';'.join({}.fromkeys(images).keys()) # detail N/A return product
def parse_product(self, driver): of_utils.sleep(2) product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_xpath( driver, '//h1[contains(@class,"c-product-name-pdp")]') if element: product['title'] = element.text.strip().replace('\n', ' ') else: raise Exception('Title not found') # code N/A element = of_utils.find_element_by_xpath( driver, '//span[contains(@class,"o-utility")]') if element: product['code'] = element.text.strip().replace('商品编号 :', '').strip() # price_cny N/A # images elements = of_utils.find_elements_by_xpath( driver, '//div[contains(@class,"product-image-first")]/img') images = [element.get_attribute('src').strip() for element in elements] product['images'] = ';'.join({}.fromkeys(images).keys()) # detail element = of_utils.find_element_by_xpath( driver, '//p[@class="text--center c-collection-desc"]') if element: product['detail'] = element.text.strip() return product
def parse_entry(self, driver): driver.implicitly_wait(15) loadMore = of_utils.find_element_by_xpath( driver, '//div[@class="loadMore isVisible"]/button') if loadMore: driver.execute_script('arguments[0].click();', loadMore) of_utils.sleep(4) product_count = 0 while True: elements = of_utils.find_elements_by_xpath( driver, '//a[@class="productQB__wrapperImage js-product-qb-link"]') if len(elements) > product_count: product_count = len(elements) action = ActionChains(driver).move_to_element(elements[-1]) action.send_keys(Keys.PAGE_DOWN) action.send_keys(Keys.PAGE_DOWN) action.send_keys(Keys.PAGE_DOWN) action.send_keys(Keys.PAGE_DOWN) action.send_keys(Keys.PAGE_DOWN) action.perform() of_utils.sleep(4) else: break return [element.get_attribute('href').strip() for element in elements]
def parse_product(self, driver): driver.implicitly_wait(15) product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_css_selector(driver, 'h1#title') if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code N/A # price_cny element = of_utils.find_element_by_xpath( driver, '//span[@class="price-sales"]') if element: product['price_cny'] = of_utils.convert_price(element.text.strip()) # images elements = of_utils.find_elements_by_css_selector( driver, 'ul.productSlide > li > a > div.zoomPad > img') images = [element.get_attribute('src').strip() for element in elements] product['images'] = ';'.join(images) # detail element = of_utils.find_element_by_css_selector( driver, 'h2.detaildesc') product['detail'] = element.text.strip() return product
def parse_entry(self, driver): driver.implicitly_wait(15) while True: loadMore = of_utils.find_element_by_xpath( driver, '//div[@class="o-viewMore"]') if loadMore and 'display: none;' not in loadMore.get_attribute( 'style'): loadMore = of_utils.find_element_by_xpath( driver, '//div[@class="o-viewMore"]/a') driver.execute_script('arguments[0].click();', loadMore) of_utils.sleep(5) else: break elements = of_utils.find_elements_by_xpath( driver, '//a[@class="js-product-link"]') return [element.get_attribute('href').strip() for element in elements]
def parse_entry(self, driver): driver.execute_script('window.scrollBy(0, document.body.scrollHeight);') btn = of_utils.find_element_by_xpath(driver,'//a[@class="pagination__all"]') if btn: driver.execute_script('arguments[0].click();', btn) of_utils.sleep(5) elements = of_utils.find_elements_by_xpath(driver, '//div[@class="product-image"]//a[@class="thumb-link"]') return [element.get_attribute('href').strip() for element in elements]
def parse_product(self, driver): driver.implicitly_wait(15) product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_xpath(driver, '//div[@id="variant-info"]/h1') if not element: element = of_utils.find_element_by_xpath(driver, '//p[@class="product-title"]') if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code element = of_utils.find_element_by_xpath(driver, '//div[@id="product-detail"]//div[@class="commerce-product-sku"]/p/span') if element: product['code'] = element.text.strip() # price_cny element = of_utils.find_element_by_xpath(driver, '//div[@id="variant-info"]/p[@class="field-type-commerce-price"]') if not element: element = of_utils.find_element_by_xpath(driver, '//p[@class="product-price"]') if element: product['price_cny'] = of_utils.convert_price(element.text.strip()) # images elements = of_utils.find_elements_by_xpath(driver, '//picture[@class="product-item-picture"]/img') if not elements: elements = of_utils.find_elements_by_xpath(driver, '//img[contains(@class,"main-product-image")]') if elements: images = [element.get_attribute('src').strip() for element in elements if 'data:image/gif' not in element.get_attribute('src')] product['images'] = ';'.join(images) # detail element = of_utils.find_element_by_xpath(driver, '//div[@class="field-name-field-description"]/div/p') if not element: element = of_utils.find_element_by_xpath(driver, '//p[@class="product-attribute-font-description"]') if element: product['detail'] = element.text.strip() return product
def parse_product(self, driver): driver.implicitly_wait(15) product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_xpath( driver, '//div[@class="c-product-details"]//h1[@class="c-title"]') if element: product['title'] = element.text.strip() else: raise Exception('Title not found') action = ActionChains(driver).move_to_element(element) action.send_keys(Keys.PAGE_DOWN) action.send_keys(Keys.PAGE_DOWN) action.send_keys(Keys.PAGE_DOWN) action.send_keys(Keys.PAGE_DOWN) action.send_keys(Keys.PAGE_DOWN) action.perform() of_utils.sleep(4) # code element = of_utils.find_element_by_xpath(driver, '//div[@class="c-cod"]') if element: product['code'] = element.text.replace('货号', '').strip() # price_cny element = of_utils.find_element_by_xpath( driver, '//span[@class="c-realprice"]') if element: product['price_cny'] = of_utils.convert_price(element.text.strip()) # images elements = of_utils.find_elements_by_xpath( driver, '//div[@class="c-vertical-scroll js-init-slick"]/div[@class="c-slide"]/span/img' ) images = [element.get_attribute('src').strip() for element in elements] product['images'] = ';'.join(images) # detail element = of_utils.find_element_by_xpath( driver, '//h2[@class="c-description"]') if element: product['detail'] = element.text.strip() # product['detail'] = element.get_attribute('innerHTML').strip() return product
def parse_product(self, driver): elements = of_utils.find_elements_by_xpath( driver, '//img[@class="attachment-shop_single size-shop_single"]') images = [element.get_attribute('src').strip() for element in elements] product['images'] = ';'.join({}.fromkeys(images).keys()) # detail N/A element = of_utils.find_element_by_xpath( driver, '//p[@class="product-detail-description"]') if element: product['detail'] = element.text.strip() return product