def scrape_products(self): # CSS paths name_css = '' prices_css = '' price_class = 'product_price' prices_css_2 = '' url_class = 'product_name_link' image_css = 'img' promo_class = 'plp-promotion-redesign' for item in self.item_boxes: title = item.find_element_by_css_selector(image_css).get_attribute('alt') price = float(item.find_element_by_class_name(price_class).text[1:]) url = item.find_element_by_class_name(url_class).get_attribute('href') store_product_id = int(url.split('-')[-1]) item_image = item.find_element_by_css_selector(image_css).get_attribute('src') try: promo = item.find_element_by_class_name(promo_class).text promo_url = None except: promo = None promo_url = None item = ShopItem(title, price, store_product_id, url, item_image, 3, self.date, self.time, promo, promo_url) print(item) yield item
def scrape_products(self): # CSS paths name_css = 'a > div > div:nth-child(2)' prices_css = 'div > div.c-product-grid__product-item__price.ng-binding' price_class = 'product_price' prices_css_2 = '' url_class = 'product_name_link' image_css = 'a > div > div.c-product-grid__product-item__img-wrapper > img' href_css = 'a' promo_class = 'plp-promotion-redesign' for num, item in enumerate(self.item_boxes): try: title = item.find_element_by_css_selector(image_css).get_attribute('alt') item_image = item.find_element_by_css_selector(image_css).get_attribute('src') url = item.find_element_by_css_selector(href_css).get_attribute('href') store_product_id = url.split('-')[-1].split('.')[0] price = float(item.find_element_by_css_selector(prices_css).text[1:]) promo = None promo_url = None except: continue item = ShopItem(title, price, store_product_id, url, item_image, self.store_id, self.date, self.time, promo, promo_url) #print(item) yield item
def scrape_products(self): # CSS paths name_css = 'div > article > div > div.u-display-inline-block.u-width-full > a > div' prices_css = 'div > article > div > div:nth-child(2) > div.dbh-now.u-text-weight-bold.u-color-neutral-70 > span' image_css = 'div > article > a > div > img' href_css = 'div > article > a' for item in self.item_boxes: try: title = item.find_element_by_css_selector(name_css).text item_image = item.find_element_by_css_selector( image_css).get_attribute('src') url = item.find_element_by_css_selector( href_css).get_attribute('href') store_product_id = url.split('prod_')[1] price = float( item.find_element_by_css_selector(prices_css).text) promo = None promo_url = None item = ShopItem(title, price, store_product_id, url, item_image, self.store_id, self.date, self.time, promo, promo_url) except: continue yield item
def scrape_products(self): # CSS paths name_css = 'div > div.item__text > a' prices_css = 'div > div.basket-add > div' price_class = 'items__prices' prices_css_2 = 'div > div.basket-add > div.items__prices' image_css = 'div > a > img' promo_class = 'item__promotion' for item in self.item_boxes: title = item.find_element_by_css_selector(image_css).get_attribute( 'alt') price = item.find_element_by_class_name(price_class) price_text = price.get_attribute("textContent") price_replace = price_text.replace('\t', '') price_text_split = price_replace.split('\n') price_text_split_sliced = price_text_split[1] price_text_split_sliced_price = price_text_split_sliced[1:] price_text_split_sliced_price_float = float( price_text_split_sliced_price) #price = float(price.text.split('\n')[0][1:]) url = item.find_element_by_css_selector(name_css).get_attribute( 'href') store_product_id = int(url.split('/')[-1]) item_image = item.find_element_by_css_selector( image_css).get_attribute('src') try: promo = item.find_element_by_class_name( promo_class).get_attribute('title') promo_url = item.find_element_by_class_name( promo_class).get_attribute('href') except: promo = None promo_url = None if item_image is None: item_image = ' https://www.superdrug.com' + item.find_element_by_css_selector( image_css).get_attribute('data-src') item = ShopItem(title, price_text_split_sliced_price_float, store_product_id, url, item_image, self.store_id, self.date, self.time, promo, promo_url) #print(item) yield item
def scrape_products(self): # CSS paths stock_css = 'div > div > a > div:nth-child(2) > div.tablet-up > div > span' image_css = 'div > a.productLink.link--plain.js-nextPreviousStorage > div > img' title_css = 'div > div > a > div.product-tile__description' sku_css = 'div > meta' price_css = 'div > div > a > div:nth-child(2) > div.product-tile__pricing-container > div > div > p > span > span.product-tile__price-value' url_css = 'div > div > a' for item in self.item_boxes: stock = 'In Stock' try: try: stock = item.find_element_by_css_selector(stock_css).text except NoSuchElementException: if stock != 'Out of stock': title = item.find_element_by_css_selector( title_css).text.strip() if item.find_element_by_css_selector( image_css).get_attribute('src')[:5] == 'https': item_image = item.find_element_by_css_selector( image_css).get_attribute('src') else: item_image = None store_product_id = item.find_element_by_css_selector( sku_css).get_attribute('content') price = float( item.find_element_by_css_selector(price_css).text) url = item.find_element_by_css_selector( url_css).get_attribute('href') promo, promo_url = None, None item = ShopItem(title, price, store_product_id, url, item_image, self.store_id, self.date, self.time, promo, promo_url) yield item except: continue
def scrape_products(self): # CSS paths rating_css = '' title_css = 'div:nth-child(9) > div.listing-product-name > p' price_css = 'div:nth-child(9) > div.price.sale-price.price-listing-page > div.d-inline' price_class = '' price_css_w_rating = '' product_id = '#AUTODIV_ecommcategoryphtml > div.main-content.listing-content.container-fluid.primary-border > div > div.listing-products-container > div.listing-products-inner > div.row.listing-products > div:nth-child(1)' product_url = 'div:nth-child(9) > div.listing-product-image > a:nth-child(1)' img_src_class = 'listing-product-image' for item in self.item_boxes: store_product_id = item.get_attribute('data-id') title = item.find_element_by_css_selector(title_css).get_attribute( 'title').strip() try: price = float( item.find_element_by_css_selector(price_css).get_attribute( 'content')) except NoSuchElementException: price = float( item.find_element_by_class_name('d-inline').get_attribute( 'content')) item_image = item.find_element_by_class_name( 'listing-product-image').find_element_by_class_name( 'img-fluid').get_attribute('src') if item_image.endswith('.gif'): item_image = None url = item.find_element_by_class_name( img_src_class).find_element_by_css_selector( 'a:nth-child(1)').get_attribute('href') item = ShopItem(title, price, store_product_id, url, item_image, self.store_id, self.date, self.time) #print(item) yield item
def scrape_products(self): # CSS paths rating_css = 'a.ac-product-link.ac-product-card__details > div.ac-product-card__title-and-rating > div.ac-product-card__rating > div' name_css_w_rating = 'a.ac-product-link.ac-product-card__details > div.ac-product-card__title > div > div' name_css_wo_rating = 'a.ac-product-link.ac-product-card__details > div.ac-product-card__title-and-rating > div.ac-product-card__title > div > div' price = 'a[2]/div[3]/div[1]/span[1]' price_class = 'ac-product-price__amount' price_css_w_rating = 'a.ac-product-link.ac-product-card__details > div.ac-product-card__prices-and-offers > div.ac-product-price.xs-row.ac-product-card__prices > span.ac-product-price__amount' product_id = '' product_url = 'a[1]' img_src = 'a[1]/div/div/img' img_src = 'a.ac-product-link.ac-product-card__image > div > div > picture > img' for item in self.item_boxes: try: try: title = item.find_element_by_css_selector( name_css_w_rating).text except NoSuchElementException: title = item.find_element_by_css_selector( name_css_wo_rating).text price = float( item.find_element_by_css_selector( price_css_w_rating).text[1:]) store_product_id = item.get_attribute('data-product-id') url = item.find_element_by_xpath(product_url).get_attribute( 'href') item_image = item.find_element_by_css_selector( img_src).get_attribute('src') item = ShopItem(title, price, store_product_id, url, item_image, self.store_id, self.date, self.time) yield item except: continue