def parse(driver, url): products = [] driver.get(url) elements = util.find_elements_by_css_selector(driver, 'a.product-item') if not elements: # http://www.louisvuitton.cn/zhs-cn/women/ready-to-wear/furs/_/N-f8lvb3 elements = util.find_elements_by_css_selector( driver, 'li.listing > a.product-img') for element in elements: products.append(element.get_attribute('href').strip()) return ';'.join(products)
def parse(driver, url): products = [] driver.get(url) elements = util.find_elements_by_css_selector(driver, 'a.shelf_view-all') for element in elements: if element.is_displayed(): driver.execute_script('arguments[0].click();', element) util.sleep(3) elements = util.find_elements_by_css_selector(driver, 'li.product > div > a') for element in elements: products.append(element.get_attribute('href').strip()) return ';'.join(products)
def parse(driver, url): products = [] driver.get(url) elements = util.find_elements_by_css_selector(driver, 'div.product > div > div > a') if not elements: # http://www.dior.cn/beauty/zh_cn/%E9%A6%99%E6%B0%9B%E4%B8%8E%E7%BE%8E%E5%AE%B9/%E5%BD%A9%E5%A6%86/%E7%9C%BC%E9%83%A8/%E7%9C%BC%E5%BD%B1/fr-eyeshadows-%E7%9C%BC%E5%BD%B1.html elements = util.find_elements_by_css_selector(driver, 'div.column > div.push-pic > a') if not elements: # http://www.dior.cn/couture/zh_cn/%E5%A5%B3%E5%A3%AB%E6%97%B6%E8%A3%85/%E5%A4%AA%E9%98%B3%E7%9C%BC%E9%95%9C elements = util.find_elements_by_css_selector(driver, '[id|=push-produit] > div > div > a') if not elements: # http://www.dior.cn/couture/zh_cn/%E7%94%B7%E5%A3%AB%E6%97%B6%E8%A3%85/dior-homme-x-sennheiser elements = util.find_elements_by_css_selector(driver, 'span.univers-part--product > div > div > a') for element in elements: products.append(element.get_attribute('href').strip()) return ';'.join(products)
def parse(driver, url): products = [] driver.get(url) driver.execute_script('window.scrollBy(0,50000)') util.sleep(3) elements = util.find_elements_by_css_selector( driver, 'section.products > article.item > div > a') if not elements: # https://www.chloe.cn/cn/chloe/%E5%A5%B3%E5%A3%AB/subhome/accessories_section elements = util.find_elements_by_css_selector(driver, 'article.product >a') for element in elements: products.append(element.get_attribute('href').strip()) return ';'.join(products)
def parse(driver, url): products = [] driver.get(url) elements = util.find_elements_by_css_selector(driver, 'h3.fnb_product-title > a') if not elements: # https://www.chanel.com/zh_CN/watches-jewelry/watches/c/jewelry-watches/W009 elements = util.find_elements_by_css_selector( driver, 'div.product-item-wrapper > a') if not elements: # https://www.chanel.cn/zh_CN/fragrance-beauty/skincare/c/le-lift.html elements = util.find_elements_by_css_selector( driver, 'figure.fnb_prd-info > a') for element in elements: products.append(element.get_attribute('href').strip()) return ';'.join(products)
def parse(driver, url): products = [] driver.get(url) elements = util.find_elements_by_css_selector(driver, 'a.prod-link') if elements: for element in elements: if element.get_attribute('style') != 'display: none;': products.append(element.get_attribute('href').strip()) else: # http://www.cartier.cn/zh-cn/pages/fragrances/les-heures-de-parfum%E7%B3%BB%E5%88%97.html elements = util.find_elements_by_css_selector( driver, 'div.comp-rich-text > p > a') for i in range(len(elements) - 1): products.append(elements[i].get_attribute('href').strip()) return ';'.join(products)
def parse(driver, url): products = [] driver.get(url) elements = util.find_elements_by_css_selector(driver, 'a.item-url.open-productdetail') for element in elements: products.append(element.get_attribute('href').strip()) return ';'.join(products)
def parse(driver, url): products = [] driver.get(url) elements = util.find_elements_by_css_selector(driver, 'a.item-display-image-container') for element in elements: products.append(element.get_attribute('href').strip()) return ';'.join(products)
def parse(driver, url): products = [] driver.get(url) elements = util.find_elements_by_css_selector(driver, 'div.product > div > a') for element in elements: products.append(element.get_attribute('href').strip()) return ';'.join(products)
def parse(driver, url): products = [] driver.get(url) elements = util.find_elements_by_css_selector( driver, 'article.search-item > header > a') for element in elements: products.append(element.get_attribute('href').strip()) return ';'.join(products)
def parse(driver, url): products = [] driver.get(url) while True: elements = util.find_elements_by_css_selector(driver, 'a.bul-btn-more') cont = False for element in elements: if element.is_displayed(): cont = True driver.execute_script('arguments[0].click();', element) util.sleep(3) if not cont: break elements = util.find_elements_by_css_selector(driver, 'a.product-link') for element in elements: products.append(element.get_attribute('href').strip()) return ';'.join(products)
def parse(driver, url): products = [] driver.get(url) elements = util.find_elements_by_css_selector( driver, 'figure.product-image > a.thumb-link') for element in elements: products.append(element.get_attribute('href').strip()) return ';'.join(products)
def get_images(driver): images = '' texts = set([]) elements = util.find_elements_by_css_selector(driver, '#ItemSlideshowViewport > div.bx-wrapper > div.bx-viewport > ul > li > img') for element in elements: image = ((element.get_attribute('srcset').split(','))[0].split(' '))[0].strip() texts.add(image) images = ';'.join(texts) return images
def parse(driver, url): products = [] driver.get(url) driver.execute_script('window.scrollBy(0,50000)') time.sleep(3) elements = util.find_elements_by_css_selector(driver, 'a.js-producttile_link') for element in elements: products.append(element.get_attribute('href').strip()) return products
def parse(driver, url): products = [] driver.get(url) for i in range(10): # 确保页面拉到最下面,所有商品得到展示。 driver.execute_script('window.scrollBy(0, 10000)') util.sleep(1) elements = util.find_elements_by_css_selector(driver, 'li.item > div> div.product-image-box >a') for element in elements: products.append(element.get_attribute('href').strip()) return ';'.join(products)
def get_intro(driver): intro = '' element = util.find_element_by_css_selector(driver, 'div[data-accordionpanel=item_detail] > span.icon-plus') if element: element.click() texts = [] elements = util.find_elements_by_css_selector(driver, 'div.item-description > ul > li > span.value') for element in elements: texts.append(element.text.strip()) if texts: intro = '\n'.join(texts) return intro