def parse_entry(self, driver): driver.implicitly_wait(15) product_count = 0 while True: elements = of_utils.find_elements_by_css_selector( driver, 'div.productItemContainer > a') if not elements: elements = of_utils.find_elements_by_css_selector( driver, 'li.productItemContainer > a') if not elements: elements = of_utils.find_elements_by_css_selector( driver, 'li.productItem > a') if not elements: elements = of_utils.find_elements_by_css_selector( driver, 'li.lookItem > a') if not elements: elements = of_utils.find_elements_by_css_selector( driver, 'ul.lv-list-container a') if len(elements) > product_count: product_count = len(elements) driver.execute_script( 'window.scrollBy(0, document.body.scrollHeight);') of_utils.sleep(4) else: break return [element.get_attribute('href').strip() for element in elements]
def parse_product(self, driver): product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_css_selector( driver, 'div.goods_about > p.about_tit') if element: product['title'] = element.text.strip().replace('\n', ' ') else: raise Exception('Title not found') # code element = of_utils.find_element_by_css_selector( driver, 'p.bianhao > span') if element: product['code'] = element.text.strip() # price_cny element = of_utils.find_element_by_css_selector( driver, 'p.about_cost > span') if element: price_text = element.text.strip()[1:].strip() product['price_cny'] = int(float(price_text)) # images elements = of_utils.find_elements_by_css_selector( driver, 'ul.swiper-wrapper > li > img') images = [ element.get_attribute('src').strip().replace('80X80', '540X540') for element in elements ] product['images'] = ';'.join(images) # detail elements = of_utils.find_elements_by_css_selector( driver, 'div.word > div > p > span') texts = [element.text.strip() for element in elements] product['detail'] = '\n'.join(texts) return product
def parse_product(self, driver): product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_css_selector( driver, 'p.tdr-watch-details__header-watch-name > span') if element: product['title'] = element.text.strip().replace('\n', '') else: raise Exception('Title not found') # code element = of_utils.find_element_by_css_selector( driver, 'p.tdr-watch-details__header-watch-reference') if element: product['code'] = element.text.strip().split(':')[-1] # price_cny N/A # images elements = of_utils.find_elements_by_css_selector( driver, 'div.tdr-variations__main-image-canvas-wrapper > img') images = [element.get_attribute('src').strip() for element in elements] product['images'] = ';'.join(images) # detail texts = [] elements = of_utils.find_elements_by_css_selector( driver, 'ul.tdr-watch-details__column > li > div.tdr-watch-details__text') for element in elements: k_element = of_utils.find_element_by_css_selector( element, 'p.tdr-watch-details__title') v_element = of_utils.find_element_by_css_selector( element, 'p.tdr-watch-details__spectext') texts.append(k_element.text.strip() + ':' + v_element.text.strip()) product['detail'] = '\n'.join(texts) return product
def parse_product(self, driver): product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_css_selector(driver, 'span.product.attribute.name') if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code element = of_utils.find_element_by_css_selector(driver, 'span.value[itemprop=sku]') if element: product['code'] = element.text.strip() # price_cny element = of_utils.find_element_by_css_selector(driver, 'span.product-price-reveal__action__show') if element: driver.execute_script('arguments[0].click();', element) of_utils.sleep(2) element = of_utils.find_element_by_css_selector(driver, 'span.price') if element: price_text = element.text.strip()[1:].strip().replace(',', '') # 去掉开头的¥ product['price_cny'] = int(float(price_text)) # images elements = of_utils.find_elements_by_css_selector(driver, 'div.fotorama__stage__shaft > div > img') images = [element.get_attribute('src').strip() for element in elements] product['images'] = ';'.join(images) # detail elements = of_utils.find_elements_by_css_selector(driver, 'div.product-info-details-content > div.value > p') texts = [element.get_attribute('innerHTML').strip() for element in elements] product['detail'] = '\n'.join(texts) return product
def parse_entry(self, driver): # 手袋 elements = of_utils.find_elements_by_css_selector(driver, 'div.fs-products-grid__product.fs-gridelement > div.fs-products-grid__product__illu > a') if not elements: # 手袋2 elements = of_utils.find_elements_by_css_selector(driver, 'div.fs-products-grid > div.fs-products-grid__product.fs-gridelement > div.fs-products-grid__product__wrapper > a') if not elements: # 彩妆 elements = of_utils.find_elements_by_css_selector(driver, 'div.fnb_col-wd6.fnb_product-img > a') # 手表 if not elements: load_more = of_utils.find_element_by_css_selector(driver, 'div.pd-action-btns > button[role=button]') if not load_more: # 戒指 load_more = of_utils.find_element_by_css_selector(driver, 'div.display-all > a') if load_more: driver.execute_script('arguments[0].click();', load_more) of_utils.sleep(5) product_count = 0 while True: elements = of_utils.find_elements_by_css_selector(driver, 'div.products > div.row > div > ul > li > div.product-item-wrapper > a') if len(elements) > product_count: product_count = len(elements) driver.execute_script('window.scrollBy(0, document.body.scrollHeight);') of_utils.sleep(4) else: break return [element.get_attribute('href').strip() for element in elements]
def parse_product(self, driver): product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_css_selector( driver, 'div.frabic-detail-right > h3') if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code + price_cny elements = of_utils.find_elements_by_css_selector( driver, 'div.detail-chose-box > div > strong') if len(elements) >= 2: product['code'] = elements[0].text.strip() product['price_cny'] = int(float(elements[1].text.strip())) # images elements = of_utils.find_elements_by_css_selector( driver, 'div.frabic-detail-left > img') images = [element.get_attribute('src').strip() for element in elements] product['images'] = ';'.join(images) # detail element = of_utils.find_element_by_css_selector( driver, 'table.detail > tbody > tr > td > span.detail') product['detail'] = element.text.strip() return product
def parse_product(self, driver): product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_css_selector( driver, 'div.product-title.desktop > div.inner > h1[lang=en]') if element: product['title'] = element.text.strip().replace('\n', ' ') else: raise Exception('Title not found') # code N/A # price_cny N/A # images elements = of_utils.find_elements_by_css_selector( driver, 'img.main-image') images = [element.get_attribute('src').strip() for element in elements] product['images'] = ';'.join(images) # detail texts = [] element = of_utils.find_element_by_css_selector(driver, 'div.readmore') texts.append(element.text.strip()) elements = of_utils.find_elements_by_css_selector( driver, 'div.list-details > div.row') for element in elements: spans = of_utils.find_elements_by_css_selector(element, 'span') texts.append(spans[0].text.strip() + ':' + spans[1].text.strip()) product['detail'] = '\n'.join(texts) return product
def parse_entry(self, driver): elements = of_utils.find_elements_by_css_selector( driver, 'div.product > div.product-image > a') if not elements: elements = of_utils.find_elements_by_css_selector( driver, 'div.product > a.product-link') return [element.get_attribute('href').strip() for element in elements]
def parse_product(self, driver): product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_css_selector( driver, 'div.product-title > h1') if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code N/A # price_cny element = of_utils.find_element_by_css_selector( driver, 'span.regular-price > span.price') if element: price_text = element.text.strip()[1:].strip().replace(',', '') product['price_cny'] = int(float(price_text)) # images elements = of_utils.find_elements_by_css_selector( driver, 'div#product-page-top > div > div > div > div > div > div > img') if not elements: elements = of_utils.find_elements_by_css_selector( driver, 'div.img-cont > img') images = [element.get_attribute('src').strip() for element in elements] product['images'] = ';'.join(images) # detail element = of_utils.find_element_by_css_selector( driver, 'div#product-description') product['detail'] = element.text.strip() return product
def parse_product(self, driver): product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_css_selector( driver, 'div.right[align=left] > div') if element: texts = element.text.split('\n') product['title'] = ' '.join(texts[:-1]) price_text = texts[-1].split('/')[0].split('$')[-1].strip() product['price_hkd'] = int(float(price_text)) else: raise Exception('Title not found') # code N/A # price_hkd Processed in title # images elements = of_utils.find_elements_by_css_selector( driver, 'div.left[align=center] > div >img') images = [element.get_attribute('src').strip() for element in elements] product['images'] = ';'.join(images) # detail texts = [] elements = of_utils.find_elements_by_css_selector( driver, 'table.table-productdetails > tbody > tr > td') for element in elements: text = element.text.strip() if text: texts.append(text) product['detail'] = '\n'.join(texts) return product
def parse_product(self, driver): flag = int(driver.current_url.split('?')[-1]) product = of_spider.empty_product.copy() #title element = of_utils.find_elements_by_css_selector( driver, '.product-list .name')[flag] if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code N/A # price_cny element = of_utils.find_elements_by_css_selector( driver, '.product-list .value')[flag] if element: product['price_cny'] = of_utils.convert_price(element.text.strip()) # images images = [] element = of_utils.find_elements_by_css_selector( driver, '.product-list .product-img img')[flag] if element: images.append(element.get_attribute('src')) product['images'] = ';'.join(images) # detail N/A return product
def parse_product(self, driver): product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_css_selector( driver, 'div.line1 > h1') if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code element = of_utils.find_element_by_css_selector( driver, 'div.reference > span') if element: product['code'] = element.text.strip().split(' ')[-1] # price_cny element = of_utils.find_element_by_css_selector( driver, 'div#priceDisplayValue') if element: try: price_text = element.text.strip()[1:].replace(",", '') product['price_cny'] = int(float(price_text)) except: pass # images elements = of_utils.find_elements_by_css_selector( driver, 'div.callage > img.pngfix') images = [element.get_attribute('src').strip() for element in elements] product['images'] = ';'.join(images) # detail elements = of_utils.find_elements_by_css_selector( driver, 'ul.listInfos.specifications > li') texts = [element.text.strip() for element in elements] product['detail'] = '\n'.join(texts).strip() return product
def parse_product(self, driver): driver.implicitly_wait(10) product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_css_selector(driver, '.iwc-buying-options-title') if element: product['title'] = element.text.strip().replace('\n添加至我的愿望清单','') else: raise Exception('Title not found') # code element = of_utils.find_element_by_css_selector(driver, '.iwc-buying-options-reference') if element: product['code'] = element.text.strip() product['title'] = product['title'] + ' ' + element.text.strip() # price_cny element = of_utils.find_element_by_css_selector(driver, '.iwc-buying-options-price') if element: product['price_cny'] = of_utils.convert_price(element.text.strip()) # images elements = of_utils.find_elements_by_css_selector(driver, '.iwc-buying-option-thumbnails .iwc-watch-thumbnail-container:not(.slick-cloned) .iwc-watch-thumbnail') images = ['https://www.iwc.cn'+ element.get_attribute('data-srcset').strip() for element in elements] product['images'] = ';'.join(images) # detail elements = of_utils.find_elements_by_css_selector(driver, 'ul.detailList > li') texts = [element.text.strip() for element in elements] product['detail'] = '\n'.join(texts).strip() return product
def parse_product(self, driver): driver.implicitly_wait(10) product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_css_selector(driver, 'h2.pageTitle.watch-title') if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code element = of_utils.find_element_by_css_selector(driver, 'li.variation-reference > span.val') if element: product['code'] = element.text.strip() # price_cny element = of_utils.find_element_by_css_selector(driver, 'li.price > span.val') if element: try: price_text = element.text.strip()[1:].strip() product['price_cny'] = int(float(price_text)) except: pass # images elements = of_utils.find_elements_by_css_selector(driver, 'ul.frontBack > li > img') images = [element.get_attribute('src').strip() for element in elements] product['images'] = ';'.join(images) # detail elements = of_utils.find_elements_by_css_selector(driver, 'ul.detailList > li') texts = [element.text.strip() for element in elements] product['detail'] = '\n'.join(texts).strip() return product
def parse_product(self, driver): of_utils.sleep(2) product = of_spider.empty_product.copy() ele = of_utils.find_element_by_css_selector(driver, 'div.product-name>h1') product['title'] = ele.text.strip() if ele else '' ele = of_utils.find_element_by_css_selector(driver, 'div[itemprop=sku]') product['code'] = ele.text.strip() if ele else '' ele = of_utils.find_element_by_css_selector(driver, 'meta[itemprop=price]') product['price_cny'] = of_utils.convert_price(ele.get_attribute('content').strip()) if ele else 0 # images imgs=[] eles = of_utils.find_elements_by_css_selector(driver, '.fotorama__thumb>img') if not eles: eles = of_utils.find_elements_by_css_selector(driver, '.fotorama__stage__frame>img') for ele in eles: img = ele.get_attribute('src').strip().replace('cache', '') for a in img.split('/'): if len(a) == 32: img = img.replace(a,'') imgs.append(img) product['images'] = ';'.join(imgs) return product
def parse_product(self, driver): product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_css_selector( driver, 'h1.prd-ProductContent_Title') if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code N/A # price_cny element = of_utils.find_element_by_css_selector( driver, '.prd-ProductPrice_Price>span') if element: product['price_gbp'] = element.text.strip()[1:] # images elements = of_utils.find_elements_by_css_selector( driver, '.prd-ProductImage_Thumbs a.prd-ProductImage_Link') if elements: images = [ element.get_attribute('href').strip() for element in elements ] product['images'] = ';'.join(images) else: elements = of_utils.find_elements_by_css_selector( driver, '.prd-ProductImage img') images = [ element.get_attribute('src').strip() for element in elements ] product['images'] = ';'.join(images) # detail N/A return product
def parse_product(self, driver): product = of_spider.empty_product.copy() # title elements = of_utils.find_elements_by_css_selector( driver, '.item-name-line') txts = [element.text.strip() for element in elements] if txts: product['title'] = ' '.join(txts) else: raise Exception('Title not found') # code N/A # price_cny element = of_utils.find_element_by_css_selector( driver, '.item-price .itemPrice .price .value') if element: product['price_hkd'] = element.text.strip() # images elements = of_utils.find_elements_by_css_selector( driver, '.item-zoom-images>ul.alternativeImages>li>img') images = [element.get_attribute('src').strip() for element in elements] product['images'] = ';'.join(images) # detail element = of_utils.find_element_by_css_selector( driver, '.editorialdescription>span.value') if element: product['detail'] = element.text.strip() return product
def parse_product(self, driver): driver.implicitly_wait(10) product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_css_selector( driver, 'h1.hidden-xs[itemprop=name]') if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code element = of_utils.find_element_by_css_selector( driver, 'p.reference > span.ezstring-field') if element: product['code'] = element.text.strip() # price_cny N/A # images elements = of_utils.find_elements_by_css_selector( driver, 'div.watch > img[itemrprop=image]') images = [element.get_attribute('src').strip() for element in elements] product['images'] = ';'.join(images) # detail texts = [] elements = of_utils.find_elements_by_css_selector( driver, 'div.details > div > div.ezxmltext-field > p') for element in elements: text = element.text.strip() if text: texts.append(text) product['detail'] = '\n'.join(texts) return product
def parse_entry(self, driver): # 手袋 elements = of_utils.find_elements_by_css_selector(driver, 'div.fs-products-grid__product.fs-gridelement > div.fs-products-grid__product__illu > a') if not elements: # 彩妆 elements = of_utils.find_elements_by_css_selector(driver, 'div.fnb_col-wd6.fnb_product-img > a') return [element.get_attribute('href').strip() for element in elements]
def parse_product(self, driver): product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_css_selector( driver, 'h1.product-info-h1') if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code N/A # price_cny element = of_utils.find_element_by_css_selector( driver, '.product-info .price') if element: product['price_cny'] = of_utils.convert_price( element.text.strip().replace('ml', '')) # images elements = of_utils.find_elements_by_css_selector( driver, '.product-pic .item img') if not elements: elements = of_utils.find_elements_by_css_selector( driver, '.productpage-images .productpage-image img') images = [element.get_attribute('src').strip() for element in elements] product['images'] = ';'.join(images) # detail element = of_utils.find_element_by_css_selector( driver, '.tab-product-info') if element: product['detail'] = element.text.strip() return product
def parse_product(self, driver): product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_css_selector( driver, 'h1.entry-title') if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code element = of_utils.find_element_by_css_selector( driver, 'div.container > div.pdp-name > p.pdp-sku') if element: product['code'] = element.text.strip() # price_cny element = of_utils.find_element_by_css_selector( driver, 'div.container > div.pdp-name > p.pdp-price') if element: price_text = element.get_attribute( 'innerHTML').strip()[1:].strip().replace(',', '') # 去掉开头的¥ product['price_cny'] = int(float(price_text)) # images elements = of_utils.find_elements_by_css_selector( driver, 'div.stiky-style-images > a.inventoryVariant > img') images = [element.get_attribute('src').strip() for element in elements] product['images'] = ';'.join(images) # detail texts = [] elements = of_utils.find_elements_by_css_selector( driver, 'div.pdp-tab-longdesc > ul > li') for element in elements: texts.append(element.text.strip()) product['detail'] = '\n'.join(texts) return product
def parse_product(self, driver): product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_css_selector( driver, 'p.product-title') if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code N/A # price_cny N/A # images elements = of_utils.find_elements_by_css_selector( driver, 'ul > li.slide > a > img') images = [element.get_attribute('src').strip() for element in elements] product['images'] = ';'.join(images) # detail texts = [] element = of_utils.find_element_by_css_selector( driver, 'p.product-information') texts.append(element.text.strip()) elements = of_utils.find_elements_by_css_selector( driver, 'table.gridtable > tbody > tr') for element in elements: _elements = of_utils.find_elements_by_css_selector(element, 'td') texts.append(_elements[0].text.strip() + _elements[1].text.strip()) product['detail'] = '\n'.join(texts) return product
def parse_product(self, driver): of_utils.sleep(5) product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_css_selector(driver, 'div.breadcrumbLeaf > p.attributesUpdater.Title > span.value') if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code N/A # price_cny N/A # images elements = of_utils.find_elements_by_css_selector(driver, 'div.item-alternativeImages-shots > ul > li > img') print(elements) images = [] for element in elements: image = element.get_attribute('data-origin').strip().replace('_10_', '_20_') images.append(image) product['images'] = ';'.join(images) # detail element = of_utils.find_element_by_css_selector(driver, 'span.modelName') product['code'] = element.get_attribute('innerHTML').strip() # Code here... btn = of_utils.find_element_by_css_selector(driver, 'ul.itemDetails-info-accordion > li > h2 > div.plusIcon') driver.execute_script('arguments[0].click();', btn) elements = of_utils.find_elements_by_css_selector(driver, 'div.itemdescription > ul > li') texts = [element.get_attribute('innerHTML').strip() for element in elements] product['detail'] = '\n'.join(texts) return product
def parse_product(self, driver): product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_css_selector( driver, 'div.product-card > span') if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code element = of_utils.find_element_by_css_selector( driver, 'span.reference-jewelry') if element: product['code'] = element.text.strip() # price_cny N/A # images elements = of_utils.find_elements_by_css_selector( driver, 'div.content > img.carousel-slide__media') images = [element.get_attribute('src').strip() for element in elements] product['images'] = ';'.join(images) # detail texts = [] elements = of_utils.find_elements_by_css_selector( driver, 'ul.fiche-details__left > li') for element in elements: k_element = of_utils.find_element_by_css_selector(element, 'span') v_element = of_utils.find_element_by_css_selector(element, 'p') txt = k_element.text.strip() + ':' + v_element.text.strip( ) if k_element else v_element.text.strip() texts.append(txt) product['detail'] = '\n'.join(texts) return product
def parse_product(self, driver): product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_css_selector(driver, 'div.product-detail h1.product-name') if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code element = of_utils.find_element_by_css_selector(driver, '.giv-ProductContent-productNumber>span') if element: product['code'] = element.text.strip() # price_cny element = of_utils.find_element_by_css_selector(driver, 'div.product-price > span.price-sales') if element: price_text = element.text.strip()[1:].strip().replace(',', '') # 去掉开头的¥ product['price_cny'] = int(float(price_text)) # images elements = of_utils.find_elements_by_css_selector(driver, 'div.swiper-slide > a.fullscreen > picture > img') if not elements: elements = of_utils.find_elements_by_css_selector(driver, '.giv-ProductImageContainer .slick-list .slick-track a picture source') images = [element.get_attribute('srcset').strip() for element in elements] product['images'] = ';'.join(images) # detail N/A return product
def parse_product(self, driver): product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_css_selector( driver, 'div.p-name > h3.subtitle') if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code N/A # price_cny element = of_utils.find_element_by_css_selector( driver, 'div.p-name > div.price') if element: price_text = element.text.strip().split(' ')[1].split( '/')[0].strip() product['price_cny'] = int(price_text) # images elements = of_utils.find_elements_by_css_selector( driver, 'ul.swiper-wrapper > li.swiper-slide > img') images = [element.get_attribute('src').strip() for element in elements] product['images'] = ';'.join(images) # detail element = of_utils.find_element_by_css_selector( driver, 'div.product-point > div#p-intro > div.des > p') if element: product['detail'] = element.text.strip() else: elements = of_utils.find_elements_by_css_selector( driver, 'div.product-point > div#p-intro > div.des > div') texts = [element.text.strip() for element in elements] product['detail'] = '\n'.join(texts) return product
def parse_product(self, driver): elements = of_utils.find_elements_by_css_selector( driver, '.product_list .product_1 a') flag = int(driver.current_url.split('?')[-1]) element = elements[flag] driver.execute_script('arguments[0].click();', element) of_utils.sleep(2) product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_css_selector( driver, '.popup_product_%s .product_detail_content .jspPane h2' % flag) if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code N/A # price_cny element = of_utils.find_element_by_css_selector( driver, '.popup_product_%s .product_photo h3' % flag) if element: product['price_hkd'] = element.text.strip().split( '/')[0].strip().replace('$', '').replace(',', '') # images elements = of_utils.find_elements_by_css_selector( driver, '.popup_product_%s .product_photo img' % flag) images = [element.get_attribute('src').strip() for element in elements] product['images'] = ';'.join(images) # detail N/A return product
def parse_product(self, driver): product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_css_selector( driver, '#pdpMain h1.product_subtitle') if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code N/A # price_cny element = of_utils.find_element_by_css_selector( driver, '.product_add_to_cart .product_price') if element: product['price_cny'] = of_utils.convert_price( element.get_attribute('data-pricevalue')) # images elements = of_utils.find_elements_by_css_selector( driver, '.product_thumbnails ul li.thumb a') if not elements: elements = of_utils.find_elements_by_css_selector( driver, '.product_primary_image a') images = [ element.get_attribute('href').strip() for element in elements ] product['images'] = ';'.join(images) # detail element = of_utils.find_element_by_css_selector( driver, '#pdpMain .product_description_box>div>span') if element: product['detail'] = element.text.strip() return product
def parse_entry(self, driver): btn = of_utils.find_element_by_css_selector(driver, 'div.spice-looks-grid-button > a') if btn: driver.execute_script('arguments[0].click();', btn) # 点击“浏览所有” elements = of_utils.find_elements_by_css_selector(driver, 'ul.spice-float-clearfix > li > div > div > a.spice-item-grid') if not elements: elements = of_utils.find_elements_by_css_selector(driver, 'div#pdlist > div.grid-cell > div.product-tiles-box > a.spice-item-grid') return [element.get_attribute('href').strip() for element in elements]
def parse_product(self, driver): product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_css_selector( driver, '.product-name h1.title') if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code element = of_utils.find_element_by_css_selector(driver, 'footer > p') if element: product['code'] = element.text.split(' ')[1].strip() if '中国官网' in product['title']: title = '' element = of_utils.find_element_by_css_selector(driver, 'h1.title') if not element: raise Exception('Title not found') title = element.text.strip() + '-' element = of_utils.find_element_by_css_selector( driver, 'div.product-sku') if element: title += element.text.strip() product['title'] = title # price_cny element = of_utils.find_element_by_css_selector( driver, 'div.prices > span.price > span > span.price') if not element: element = of_utils.find_element_by_css_selector( driver, 'div.prices > span.price > p.old-price > span') if not element: element = of_utils.find_element_by_css_selector( driver, 'div.product-price > span.price > span > span.price') if element: price_text = element.get_attribute('innerHTML') price_text = price_text.split(';')[1].strip().replace(',', '') product['price_cny'] = int(float(price_text)) # images elements = of_utils.find_elements_by_css_selector( driver, 'ul.images > li > a') images = [ element.get_attribute('data-zoom-image').strip() for element in elements ] if not images: elements = of_utils.find_elements_by_css_selector( driver, 'div.product-gallery > div.gallery > div.gallery-item > a') images = [ element.get_attribute('href').strip() for element in elements ] product['images'] = ';'.join(images) # detail element = of_utils.find_element_by_css_selector( driver, 'div#product-info') if element: product['detail'] = element.text.strip() return product