Example #1
0
    def scrape_products(self):

        # CSS paths
        name_css = ''
        prices_css = ''
        price_class = 'product_price'
        prices_css_2 = ''
        url_class = 'product_name_link'

        image_css = 'img'

        promo_class = 'plp-promotion-redesign'

        for item in self.item_boxes:
            title = item.find_element_by_css_selector(image_css).get_attribute('alt')
            price = float(item.find_element_by_class_name(price_class).text[1:])
            url = item.find_element_by_class_name(url_class).get_attribute('href')

            store_product_id = int(url.split('-')[-1])

            item_image = item.find_element_by_css_selector(image_css).get_attribute('src')
            try:
                promo = item.find_element_by_class_name(promo_class).text
                promo_url = None
            except:
                promo = None
                promo_url = None

            item = ShopItem(title, price, store_product_id, url, item_image, 3, self.date, self.time, promo, promo_url)
            print(item)
            yield item
Example #2
0
    def scrape_products(self):

        # CSS paths
        name_css = 'a > div > div:nth-child(2)'
        prices_css = 'div > div.c-product-grid__product-item__price.ng-binding'
        price_class = 'product_price'
        prices_css_2 = ''
        url_class = 'product_name_link'

        image_css = 'a > div > div.c-product-grid__product-item__img-wrapper > img'
        href_css = 'a'
        promo_class = 'plp-promotion-redesign'

        for num, item in enumerate(self.item_boxes):
            try:
                title = item.find_element_by_css_selector(image_css).get_attribute('alt')
                item_image = item.find_element_by_css_selector(image_css).get_attribute('src')
                url = item.find_element_by_css_selector(href_css).get_attribute('href')
                store_product_id = url.split('-')[-1].split('.')[0]
                price = float(item.find_element_by_css_selector(prices_css).text[1:])
                promo = None
                promo_url = None
            except:
                continue
            item = ShopItem(title, price, store_product_id, url, item_image, self.store_id, self.date, self.time, promo,
                            promo_url)
            #print(item)
            yield item
Example #3
0
    def scrape_products(self):

        # CSS paths
        name_css = 'div > article > div > div.u-display-inline-block.u-width-full > a > div'
        prices_css = 'div > article > div > div:nth-child(2) > div.dbh-now.u-text-weight-bold.u-color-neutral-70 > span'
        image_css = 'div > article > a > div > img'
        href_css = 'div > article > a'

        for item in self.item_boxes:
            try:
                title = item.find_element_by_css_selector(name_css).text
                item_image = item.find_element_by_css_selector(
                    image_css).get_attribute('src')
                url = item.find_element_by_css_selector(
                    href_css).get_attribute('href')
                store_product_id = url.split('prod_')[1]
                price = float(
                    item.find_element_by_css_selector(prices_css).text)
                promo = None
                promo_url = None

                item = ShopItem(title, price, store_product_id, url,
                                item_image, self.store_id, self.date,
                                self.time, promo, promo_url)
            except:
                continue
            yield item
Example #4
0
    def scrape_products(self):

        # CSS paths
        name_css = 'div > div.item__text > a'
        prices_css = 'div > div.basket-add > div'
        price_class = 'items__prices'
        prices_css_2 = 'div > div.basket-add > div.items__prices'
        image_css = 'div > a > img'

        promo_class = 'item__promotion'

        for item in self.item_boxes:
            title = item.find_element_by_css_selector(image_css).get_attribute(
                'alt')

            price = item.find_element_by_class_name(price_class)
            price_text = price.get_attribute("textContent")
            price_replace = price_text.replace('\t', '')
            price_text_split = price_replace.split('\n')
            price_text_split_sliced = price_text_split[1]
            price_text_split_sliced_price = price_text_split_sliced[1:]
            price_text_split_sliced_price_float = float(
                price_text_split_sliced_price)

            #price = float(price.text.split('\n')[0][1:])
            url = item.find_element_by_css_selector(name_css).get_attribute(
                'href')
            store_product_id = int(url.split('/')[-1])
            item_image = item.find_element_by_css_selector(
                image_css).get_attribute('src')
            try:
                promo = item.find_element_by_class_name(
                    promo_class).get_attribute('title')
                promo_url = item.find_element_by_class_name(
                    promo_class).get_attribute('href')
            except:
                promo = None
                promo_url = None

            if item_image is None:
                item_image = ' https://www.superdrug.com' + item.find_element_by_css_selector(
                    image_css).get_attribute('data-src')
            item = ShopItem(title, price_text_split_sliced_price_float,
                            store_product_id, url, item_image, self.store_id,
                            self.date, self.time, promo, promo_url)
            #print(item)
            yield item
Example #5
0
    def scrape_products(self):

        # CSS paths
        stock_css = 'div > div > a > div:nth-child(2) > div.tablet-up > div > span'
        image_css = 'div > a.productLink.link--plain.js-nextPreviousStorage > div > img'
        title_css = 'div > div > a > div.product-tile__description'
        sku_css = 'div > meta'
        price_css = 'div > div > a > div:nth-child(2) > div.product-tile__pricing-container > div > div > p > span > span.product-tile__price-value'
        url_css = 'div > div > a'
        for item in self.item_boxes:
            stock = 'In Stock'
            try:
                try:
                    stock = item.find_element_by_css_selector(stock_css).text
                except NoSuchElementException:
                    if stock != 'Out of stock':
                        title = item.find_element_by_css_selector(
                            title_css).text.strip()

                        if item.find_element_by_css_selector(
                                image_css).get_attribute('src')[:5] == 'https':
                            item_image = item.find_element_by_css_selector(
                                image_css).get_attribute('src')
                        else:
                            item_image = None

                        store_product_id = item.find_element_by_css_selector(
                            sku_css).get_attribute('content')
                        price = float(
                            item.find_element_by_css_selector(price_css).text)
                        url = item.find_element_by_css_selector(
                            url_css).get_attribute('href')
                        promo, promo_url = None, None
                        item = ShopItem(title, price, store_product_id, url,
                                        item_image, self.store_id, self.date,
                                        self.time, promo, promo_url)
                        yield item
            except:
                continue
Example #6
0
    def scrape_products(self):

        # CSS paths
        rating_css = ''

        title_css = 'div:nth-child(9) > div.listing-product-name > p'
        price_css = 'div:nth-child(9) > div.price.sale-price.price-listing-page > div.d-inline'
        price_class = ''
        price_css_w_rating = ''
        product_id = '#AUTODIV_ecommcategoryphtml > div.main-content.listing-content.container-fluid.primary-border > div > div.listing-products-container > div.listing-products-inner > div.row.listing-products > div:nth-child(1)'
        product_url = 'div:nth-child(9) > div.listing-product-image > a:nth-child(1)'
        img_src_class = 'listing-product-image'

        for item in self.item_boxes:
            store_product_id = item.get_attribute('data-id')
            title = item.find_element_by_css_selector(title_css).get_attribute(
                'title').strip()

            try:
                price = float(
                    item.find_element_by_css_selector(price_css).get_attribute(
                        'content'))
            except NoSuchElementException:
                price = float(
                    item.find_element_by_class_name('d-inline').get_attribute(
                        'content'))
            item_image = item.find_element_by_class_name(
                'listing-product-image').find_element_by_class_name(
                    'img-fluid').get_attribute('src')
            if item_image.endswith('.gif'):
                item_image = None
            url = item.find_element_by_class_name(
                img_src_class).find_element_by_css_selector(
                    'a:nth-child(1)').get_attribute('href')
            item = ShopItem(title, price, store_product_id, url, item_image,
                            self.store_id, self.date, self.time)
            #print(item)
            yield item
Example #7
0
    def scrape_products(self):

        # CSS paths
        rating_css = 'a.ac-product-link.ac-product-card__details > div.ac-product-card__title-and-rating > div.ac-product-card__rating > div'

        name_css_w_rating = 'a.ac-product-link.ac-product-card__details > div.ac-product-card__title > div > div'
        name_css_wo_rating = 'a.ac-product-link.ac-product-card__details > div.ac-product-card__title-and-rating > div.ac-product-card__title > div > div'
        price = 'a[2]/div[3]/div[1]/span[1]'
        price_class = 'ac-product-price__amount'
        price_css_w_rating = 'a.ac-product-link.ac-product-card__details > div.ac-product-card__prices-and-offers > div.ac-product-price.xs-row.ac-product-card__prices > span.ac-product-price__amount'
        product_id = ''
        product_url = 'a[1]'
        img_src = 'a[1]/div/div/img'
        img_src = 'a.ac-product-link.ac-product-card__image > div > div > picture > img'

        for item in self.item_boxes:
            try:
                try:
                    title = item.find_element_by_css_selector(
                        name_css_w_rating).text
                except NoSuchElementException:
                    title = item.find_element_by_css_selector(
                        name_css_wo_rating).text
                price = float(
                    item.find_element_by_css_selector(
                        price_css_w_rating).text[1:])
                store_product_id = item.get_attribute('data-product-id')
                url = item.find_element_by_xpath(product_url).get_attribute(
                    'href')
                item_image = item.find_element_by_css_selector(
                    img_src).get_attribute('src')
                item = ShopItem(title, price, store_product_id, url,
                                item_image, self.store_id, self.date,
                                self.time)
                yield item
            except:
                continue