Example #1
0
    def products_for_url(cls, url, category=None, extra_args=None):
        session = session_with_proxy(extra_args)
        soup = BeautifulSoup(session.get(url).text, 'html.parser')

        name = soup.find("h1", "title").text

        if soup.find("div", "product-price") is None:
            return []

        sku = re.search(r'(\d+)',
                        soup.find("div", "product-price")['id']
                        ).groups()[0]

        stock = -1

        price = soup.find('div', 'product-price')
        price = price.find('div', 'PricesalesPrice').span.text
        price = Decimal(remove_words(price))

        description_a = html_to_markdown(str(soup.find('div', 's_desc').text))
        description_b = html_to_markdown(str(soup.find('div', 'desc')))

        description = description_a + '\n\n' + description_b

        resized_picture_urls = soup.find('ul', 'pagination2').img['src']

        resized_picture_name = resized_picture_urls.split('/')[-1]
        picture_size = re.search(r'(_\d+x\d+)',
                                 resized_picture_name).groups()[0]
        picture_name = resized_picture_name.replace(picture_size, '')

        picture_urls = ['http://www.airecenter.cl/images/stories/'
                        'virtuemart/product/' + picture_name]

        p = Product(
            name,
            cls.__name__,
            category,
            url,
            url,
            sku,
            stock,
            price,
            price,
            'CLP',
            sku=sku,
            description=description,
            picture_urls=picture_urls
        )

        return [p]
Example #2
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        session = session_with_proxy(extra_args)

        for i in range(5):
            response = session.get(url)

            if response.status_code == 404:
                return []

            if response.status_code == 200:
                break
        else:
            # Called if no "break" was executed
            raise Exception('Could not bypass Incapsulata')

        soup = BeautifulSoup(response.text, 'html.parser')
        name = soup.find('span', {'itemprop': 'name'}).text.strip()
        sku = soup.find('div', {'itemprop': 'sku'}).text.strip()
        price = Decimal(
            soup.find('meta', {'itemprop': 'price'})['content'].strip())
        stock = -1

        pictures_data = re.search(r'"mage/gallery/gallery": ([\s\S]*?)\}\n',
                                  response.text).groups()[0]
        pictures_json = json.loads(pictures_data + '}')
        picture_urls = [tag['full'] for tag in pictures_json['data']]

        description = '{}\n\n{}'.format(
            html_to_markdown(
                str(soup.find('div', 'additional-attributes-wrapper'))),
            html_to_markdown(str(soup.find('div', 'description'))))

        p = Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    stock,
                    price,
                    price,
                    cls.currency_iso,
                    sku=sku,
                    picture_urls=picture_urls,
                    description=description)

        return [p]
Example #3
0
    def products_for_url(cls, url, category=None, extra_args=None):
        session = session_with_proxy(extra_args)
        soup = BeautifulSoup(session.get(url).text, 'html.parser')

        name = soup.find('div', 'product-name').text.strip()
        sku = soup.find('input', {'name': 'product'})['value'].strip()

        price_string = soup.find('span', 'price').text

        price = Decimal(price_string.replace(
            '.', '').replace('$', '').replace(',', '.'))

        description = html_to_markdown(
            str(soup.find('div', 'product-collateral')))

        picture_urls = [tag['src'] for tag in
                        soup.findAll('img', {'id': 'image'})]

        p = Product(
            name,
            cls.__name__,
            category,
            url,
            url,
            sku,
            -1,
            price,
            price,
            'ARS',
            sku=sku,
            description=description,
            picture_urls=picture_urls
        )

        return [p]
Example #4
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        # &_from=0&_to=49
        session = session_with_proxy(extra_args)

        page = 0
        page_size = 50
        products = []

        while True:
            target_url = '{}&_from={}&_to={}'.format(
                url, page*page_size, (page + 1) * page_size - 1
            )
            data = session.get(target_url)

            json_data = json.loads(data.text)

            if not json_data:
                if page == 0:
                    raise Exception('Empty category: ' + target_url)
                break

            for product in json_data:
                name = product['productName']
                sku = product['productReference']
                product_url = product['link']
                stock = product['items'][0]['sellers'][0][
                    'commertialOffer']['AvailableQuantity']
                price = Decimal(product['items'][0]['sellers'][0]
                                ['commertialOffer']['Price'])

                pictures = product['items'][0]['images']
                picture_urls = []

                for picture in pictures:
                    picture_urls.append(picture['imageUrl'])

                description = html_to_markdown(product['description'])

                p = Product(
                    name,
                    cls.__name__,
                    category,
                    product_url,
                    url,
                    sku,
                    stock,
                    price,
                    price,
                    'CLP',
                    sku=sku,
                    part_number=product['productReference'],
                    description=description,
                    picture_urls=picture_urls
                )

                products.append(p)
            page += 1

        return products
Example #5
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        session = session_with_proxy(extra_args)

        page_source = session.get(url).text
        soup = BeautifulSoup(page_source, 'html.parser')

        name = soup.find('h1', {'itemprop': 'name'})
        if not name:
            name = soup.find('p', {'itemprop': 'name'})

        name = name.text
        sku = soup.find('span', 'variant-sku').text

        potential_stock_containers = soup.findAll('h6')
        stock_container = None

        for container in potential_stock_containers:
            if 'EXISTENCIA' in container.text:
                stock_container = container.parent.find('div').contents

        if not stock_container:
            stock = 0
        else:
            for item in stock_container:
                if 'pzas.' in item:
                    stock = int(item.replace('pzas.', ''))
                    break

        price = soup.find('span', 'gf_product-price money').text

        price = Decimal(price.replace('$', '').replace(',', ''))

        images = soup.findAll('meta', {'property': 'og:image:secure_url'})
        picture_urls = [i["content"] for i in images]

        description = html_to_markdown(
            str(soup.find('div', 'product-description')))

        if 'reacondicionado' in name.lower():
            condition = 'https://schema.org/RefurbishedCondition'
        else:
            condition = 'https://schema.org/NewCondition'

        p = Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    stock,
                    price,
                    price,
                    'MXN',
                    sku=sku,
                    picture_urls=picture_urls,
                    description=description,
                    condition=condition)

        return [p]
Example #6
0
    def products_for_url(cls, url, category=None, extra_args=None):
        session = session_with_proxy(extra_args)
        soup = BeautifulSoup(session.get(url).text, 'html.parser')

        name = soup.find('h1', 'entry-title').text.strip()
        sku = soup.find('input', {'name': 'product_id'})['value'].strip()
        description = html_to_markdown(
            str(soup.find('div', 'product_description')))
        picture_urls = [tag['href'] for tag in soup.findAll('a', 'thickbox')]
        price = Decimal(remove_words(soup.find('span', 'currentprice').text))

        price *= Decimal('1.19')
        price = price.quantize(0)

        p = Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    -1,
                    price,
                    price,
                    'CLP',
                    sku=sku,
                    description=description,
                    picture_urls=picture_urls)

        return [p]
Example #7
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        session = session_with_proxy(extra_args)
        response = session.get(url)

        soup = BeautifulSoup(response.text, 'html.parser')
        data = re.search(r'value_product = ([\s\S]+?)\];',
                         response.text).groups()[0] + ']'
        data = json.loads(data)[0]

        name = data['descripcion'].strip()
        sku = data['idproducto'].strip()
        stock = round(float(data['stock']))
        offer_price = Decimal(data['precioweb1'])
        normal_price = Decimal(data['precioweb2'])
        description = None
        if data['long_descrip']:
            description = html_to_markdown(data['long_descrip'])
        picture_urls = [x['href'] for x in soup.findAll('a', 'fancybox')]

        p = Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    stock,
                    normal_price,
                    offer_price,
                    'CLP',
                    sku=sku,
                    description=description,
                    picture_urls=picture_urls)

        return [p]
Example #8
0
    def products_for_url(cls, url, category=None, extra_args=None):
        session = session_with_proxy(extra_args)
        session.headers['user-agent'] = 'python-requests/2.21.0'
        response = session.get(url)

        if response.status_code == 404:
            return []

        page_source = response.text
        soup = BeautifulSoup(page_source, 'html.parser')

        if not soup.find('body') or \
                not soup.find('h1', {'id': 'nombre-producto'}):
            return []

        name = soup.find('h1', {'id': 'nombre-producto'}).text.strip()
        sku = soup.find('div', {'itemprop': 'sku'}).text.strip()

        ajax_session = session_with_proxy(extra_args)
        ajax_session.headers['user-agent'] = 'python-requests/2.21.0'
        ajax_session.headers['x-requested-with'] = 'XMLHttpRequest'
        ajax_session.headers['content-type'] = \
            'application/x-www-form-urlencoded'

        stock_data = json.loads(
            ajax_session.post(
                'https://catalogo.movistar.cl/fullprice/stockproducto/validar/',
                'sku=' + sku).text)

        stock = stock_data['respuesta']['cantidad']

        price_container = soup.find('span', 'special-price').find('p')
        price = Decimal(remove_words(price_container.text))

        description = html_to_markdown(
            str(soup.find('div', 'detailed-desktop')))

        if 'seminuevo' in description:
            condition = 'https://schema.org/RefurbishedCondition'
        else:
            condition = 'https://schema.org/NewCondition'

        picture_urls = [soup.find('meta', {'property': 'og:image'})['content']]

        return [
            Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    stock,
                    price,
                    price,
                    'CLP',
                    condition=condition,
                    sku=sku,
                    description=description,
                    picture_urls=picture_urls)
        ]
Example #9
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        session = session_with_proxy(extra_args)

        page_source = session.get(url).text
        soup = BeautifulSoup(page_source, 'html.parser')

        name = soup.find('h1', 'product_title').text
        sku = soup.find('div', 'wd-wishlist-btn').find('a')['data-product-id']

        stock_container = soup.find('p', 'stock')

        if stock_container:
            stock_text = stock_container.text.split(' ')[0]
            if stock_text == 'Agotado':
                stock = 0
            else:
                stock = int(stock_text)
        else:
            stock = -1

        part_number_container = soup.find('span', 'sku')

        if part_number_container:
            part_number = part_number_container.text.strip()
        else:
            part_number = None

        price_container = soup.find('p', 'price')

        if price_container.find('ins'):
            price = Decimal(
                price_container.find('ins').text.replace('$',
                                                         '').replace('.', ''))
        else:
            price = Decimal(
                price_container.text.replace('$', '').replace('.', ''))

        picture_containers = soup.findAll('div', 'product-image-wrap')
        picture_urls = [p.find('a')['href'] for p in picture_containers]

        description = html_to_markdown(
            str(soup.find('div', {'id': 'tab-description'})))

        p = Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    stock,
                    price,
                    price,
                    'CLP',
                    sku=sku,
                    picture_urls=picture_urls,
                    description=description,
                    part_number=part_number)

        return [p]
Example #10
0
    def products_for_url(cls, url, category=None, extra_args=None):
        session = session_with_proxy(extra_args)
        soup = BeautifulSoup(session.get(url).text, 'html.parser')

        name = soup.find('meta', {'name': 'Title'})['content'].strip()
        sku = soup.find('input', {'name': 'id'})['value'].strip()

        price_string = soup.find('input', {'id': 'product_price'})['value']
        price = Decimal(price_string)

        description = html_to_markdown(
            str(soup.find('div', {'id': 'especificaciones-container'})))

        picture_urls = [tag['data-zoom-image'] for tag in
                        soup.find('div', 'owl-carousel').findAll('img')]

        p = Product(
            name,
            cls.__name__,
            category,
            url,
            url,
            sku,
            -1,
            price,
            price,
            'ARS',
            sku=sku,
            description=description,
            picture_urls=picture_urls
        )

        return [p]
Example #11
0
    def products_for_url(cls, url, category=None, extra_args=None):
        session = session_with_proxy(extra_args)
        data = session.get(url).text
        soup = BeautifulSoup(data, 'html.parser')
        sku_container = soup.find('h6', 'sku')

        if not sku_container:
            return []

        sku = sku_container.text.strip()
        name = "{} ({})".format(
            soup.find('div', 'product-name').find('h1').text.strip(), sku)

        if soup.find('p', 'availability').find('span').text.strip() \
                == 'En existencia':
            stock = -1
        else:
            stock = 0

        price = Decimal(
            soup.find('div', 'price-box').find('span', 'price').text.replace(
                'Q', '').replace(',', ''))

        picture_urls = [soup.find('p', 'product-image').find('a')['href']]
        description = html_to_markdown(
            str(soup.find('div', {'id': 'product_tabs_description_contents'})))

        description += '\n\n'

        description += html_to_markdown(
            str(soup.find('div', {'id': 'product_tabs_additional_contents'})))

        p = Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    stock,
                    price,
                    price,
                    'GTQ',
                    sku=sku,
                    picture_urls=picture_urls,
                    description=description)

        return [p]
Example #12
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        session = session_with_proxy(extra_args)
        page_source = session.get(url).text

        pricing_data = re.search(r'vtex.events.addData\(([\S\s]+?)\);',
                                 page_source).groups()[0]
        pricing_data = json.loads(pricing_data)

        skus_data = re.search(r'var skuJson_0 = ([\S\s]+?);CATALOG',
                              page_source).groups()[0]
        skus_data = json.loads(skus_data)
        name = '{} {}'.format(pricing_data['productBrandName'],
                              pricing_data['productName'])
        price = Decimal(pricing_data['productPriceTo'])

        soup = BeautifulSoup(page_source, 'html.parser')

        picture_urls = [
            tag['rel'][0] for tag in soup.findAll('a', {'id': 'botaoZoom'})
        ]

        description = html_to_markdown(
            str(soup.find('section', 'product-specs')))
        products = []

        if 'productEans' in pricing_data:
            ean = pricing_data['productEans'][0]
            if len(ean) == 12:
                ean = '0' + ean
            if not check_ean13(ean):
                ean = None
        else:
            ean = None

        for sku_data in skus_data['skus']:
            sku = str(sku_data['sku'])
            stock = pricing_data['skuStocks'][sku]

            if sku_data['sellerId'] == 'lojamultilaser':
                price = (price * Decimal('0.95')).quantize(Decimal('0.01'))

            p = Product(name,
                        cls.__name__,
                        category,
                        url,
                        url,
                        sku,
                        stock,
                        price,
                        price,
                        'COP',
                        sku=sku,
                        ean=ean,
                        description=description,
                        picture_urls=picture_urls)
            products.append(p)

        return products
Example #13
0
    def products_for_url(cls, url, category=None, extra_args=None):
        session = session_with_proxy(extra_args)
        page_source = session.get(url).text

        soup = BeautifulSoup(page_source, 'html.parser')

        picture_urls = []

        for tag in soup.findAll('li', 'owl-item'):
            picture_path = tag.find('a')['data-zoom'].replace(' ',
                                                              '%20').strip()

            if not picture_path:
                picture_path = tag.find('a')['data-normal'].replace(
                    ' ', '%20').strip()

            if not picture_path:
                continue
            picture_url = 'https:' + picture_path
            picture_urls.append(picture_url)

        if not picture_urls:
            picture_urls = None

        pricing_data = demjson.decode(
            re.search(r'dataLayer = ([\S\s]+?);dataLayer',
                      page_source).groups()[0])[0]

        products = []

        for product_entry in pricing_data['product']:
            name = product_entry['productName']
            sku = str(product_entry['productSku'])
            price = Decimal(product_entry['productDiscount'])

            if product_entry['productAvailable']:
                stock = -1
            else:
                stock = 0

            description = html_to_markdown(
                html.unescape(product_entry['productDescription']))

            p = Product(name,
                        cls.__name__,
                        category,
                        url,
                        url,
                        sku,
                        stock,
                        price,
                        price,
                        'BRL',
                        sku=sku,
                        description=description,
                        picture_urls=picture_urls)
            products.append(p)

        return products
Example #14
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        session = session_with_proxy(extra_args)

        response = session.get(url)

        if response.status_code in [404]:
            return []

        page_source = response.text
        soup = BeautifulSoup(page_source, 'html5lib')

        name = soup.find('h1', 'name').text
        sku = soup.find('div', {'itemprop': 'sku'}).text

        availability = soup.find('div', 'availability')

        if availability:
            stock = int(soup.find('div', 'availability').find('strong').text)
        else:
            stock = 0

        price = Decimal(
            soup.find('span', 'price').text.replace('$', '').replace(',', ''))

        if soup.find('div', {'id': 'owl-carousel-gallery'}):
            picture_urls = [
                i['src'] for i in soup.find('div', {
                    'id': 'owl-carousel-gallery'
                }).findAll('img', 'img-fluid')
            ]
        else:
            picture_urls = [soup.find('img', 'img-fluid')['src']]

        description = html_to_markdown(str(soup.find('div', 'description')))

        ths = soup.findAll('th')
        part_number = None

        for th in ths:
            if th.text == "mpn":
                part_number = th.parent.find('td').text

        p = Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    stock,
                    price,
                    price,
                    'MXN',
                    sku=sku,
                    picture_urls=picture_urls,
                    description=description,
                    part_number=part_number)

        return [p]
Example #15
0
    def products_for_url(cls, url, category=None, extra_args=None):
        session = session_with_proxy(extra_args)
        session.headers['User-Agent'] = \
            'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ' \
            '(KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36'

        soup = BeautifulSoup(session.get(url, timeout=30).text, 'html.parser')

        containers = soup.findAll('div', 'textOtrosPrecios')

        normal_price = Decimal(remove_words(containers[0].text))

        stock_image = containers[1].find('img')['src']

        if stock_image in [
                'images/imagenes/ico_normal.jpg',
                'images/imagenes/ico_bajo.jpg'
        ]:
            stock = -1
        else:
            stock = 0

        sku = containers[2].text.strip()
        name = soup.find('div', 'textTituloProducto').text.strip()
        offer_price = Decimal(
            remove_words(soup.find('div', 'textPrecioContado').text))

        description = html_to_markdown(str(soup.find('div', 'p7TPcontent')))

        main_picture = soup.findAll(
            'table', {'id': 'table20'})[1].findAll('img')[2]['src']

        picture_paths = [main_picture]
        picture_paths.extend(
            [tag['src'] for tag in soup.findAll('img', 'Imagen')])

        picture_urls = []
        for path in picture_paths:
            picture_id = path.split('=')[-1]
            picture_url = 'http://www.ttchile.cl/images/imgproductos/' \
                          'imgImagenMarco.php?imagen=' + picture_id
            picture_urls.append(picture_url.replace(' ', '%20'))

        p = Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    stock,
                    normal_price,
                    offer_price,
                    'CLP',
                    sku=sku,
                    description=description,
                    picture_urls=picture_urls)

        return [p]
Example #16
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        session = session_with_proxy(extra_args)
        soup = BeautifulSoup(session.get(url).text, 'html.parser')

        name = soup.find('h1', 'product_title').text.strip()
        sku = soup.find('meta',
                        {'property': 'product:retailer_item_id'})['content']

        if not soup.find('input', {'id': 'the-cantidad-selector'}):
            return []

        stock = soup.find('input', {'id': 'the-cantidad-selector'})['max']

        if stock:
            stock = int(stock)
        else:
            stock = -1

        if 'LG' not in name.upper().split(' '):
            stock = 0

        normal_price = Decimal(
            soup.find('p', 'price').find('span', 'amount').text.replace(
                'Gs.', '').replace('.', '').strip())
        offer_price = Decimal(
            soup.find('p', 'price').find('span', {
                'id': 'elpreciocentralPorta'
            }).text.split('Gs.')[-1].replace('.', '').replace('!', '').strip())

        if normal_price < offer_price:
            offer_price = normal_price

        description = html_to_markdown(
            str(soup.find('div', {'itemprop': 'description'})))

        pictures = soup.findAll('div', 'thumbnails-single owl-carousel')
        picture_urls = []

        for picture in pictures:
            picture_url = picture.find('a')['href']
            picture_urls.append(picture_url)

        return [
            Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    stock,
                    normal_price,
                    offer_price,
                    'PYG',
                    sku=sku,
                    description=description,
                    picture_urls=picture_urls)
        ]
Example #17
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        session = session_with_proxy(extra_args)

        response = session.get(url)

        if response.url != url:
            return []

        page_source = response.text
        soup = BeautifulSoup(page_source, 'html.parser')

        name = soup.find('h1', 'product_title').text
        sku_container = soup.find('span', 'sku')
        if not sku_container:
            return []

        sku = sku_container.text

        if soup.find('p', 'out-of-stock'):
            stock = 0
        else:
            stock = -1

        price_container = soup.find('p', 'price').find('ins')

        if price_container:
            price = price_container.find('span', 'amount').text
        else:
            price = soup.find('p', 'price').find('span', 'amount').text

        price = Decimal(price.replace('$', '').replace(',', ''))

        images = soup.find(
            'figure', 'woocommerce-product-gallery__wrapper').findAll('img')

        picture_urls = [i['src'] for i in images]

        description = html_to_markdown(
            str(soup.find('div', {'id': 'tab-description'})))

        p = Product(
            name,
            cls.__name__,
            category,
            url,
            url,
            sku,
            stock,
            price,
            price,
            'MXN',
            sku=sku,
            picture_urls=picture_urls,
            description=description,
        )

        return [p]
Example #18
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        session = session_with_proxy(extra_args)
        response_text = session.get(url).text

        variants_raw_data = re.search(r'var meta = ([\S\s]+?);\n',
                                      response_text).groups()[0]
        variants_data = json.loads(variants_raw_data)['product']['variants']

        products = []

        for variant in variants_data:
            variant_id = variant['id']
            sku = variant['sku']
            color = variant['public_title']

            variant_url = '{}?variant={}'.format(url, variant_id)
            variant_url_source = session.get(variant_url).text
            soup = BeautifulSoup(variant_url_source, 'html.parser')
            name = soup.find('h1', 'product_name').text + " ({})".format(color)
            stock = 0

            if soup.find('link', {'itemprop': 'availability'})['href'] == \
                    'http://schema.org/InStock':
                stock = -1

            price_text = soup.find('span', 'current_price').text.strip()\
                .replace('$', '').replace('.', '')

            if price_text == '-':
                continue

            price = Decimal(price_text)
            image_containers = soup.findAll('div', 'image__container')
            picture_urls = [
                'http:' + i.find('img')['data-src'] for i in image_containers
            ]

            description = html_to_markdown(
                str(soup.find('div', {'data-et-handle': 'tabs-descripcion'})))

            p = Product(name,
                        cls.__name__,
                        category,
                        variant_url,
                        url,
                        sku,
                        stock,
                        price,
                        price,
                        'CLP',
                        sku=sku,
                        picture_urls=picture_urls,
                        description=description)

            products.append(p)

        return products
Example #19
0
    def products_for_url(cls, url, category=None, extra_args=None):
        session = session_with_proxy(extra_args)
        soup = BeautifulSoup(session.get(url).text, 'html.parser')

        name = soup.find('h1').text.strip()

        if soup.find('link', {'itemprop': 'availability'})['href'] == \
                'http://schema.org/InStock':
            stock = -1
        else:
            stock = 0

        sku = soup.find('div', 'product-name').find('span').text.strip()

        panels = [
            soup.find('div', {'id': 'description'}),
            soup.find('div', {'id': 'additional'})
        ]

        description = '\n\n'.join([html_to_markdown(str(panel))
                                   for panel in panels])

        normal_price = soup.find('p', {'itemprop': 'price'}).text
        normal_price = Decimal(normal_price.replace('R$', '').replace(
            '.', '').replace(',', '.'))

        if stock == 0:
            offer_price = normal_price
        else:
            offer_price = soup.find('span', 't_boleto_price').text
            offer_price = Decimal(offer_price.split('$')[1].replace(
                '.', '').replace(',', '.'))

        pictures_container = soup.find('ul', 'bxslider')

        if pictures_container:
            picture_urls = [link['href']
                            for link in pictures_container.findAll('a')]
        else:
            picture_urls = [soup.find('a', 'cloud-zoom-gallery')['href']]

        p = Product(
            name,
            cls.__name__,
            category,
            url,
            url,
            sku,
            stock,
            normal_price,
            offer_price,
            'BRL',
            sku=sku,
            description=description,
            picture_urls=picture_urls
        )

        return [p]
Example #20
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        session = session_with_proxy(extra_args)
        response = session.get(url)

        if response.status_code == 404:
            return []

        soup = BeautifulSoup(response.text, 'html.parser')

        name = soup.find('span', {'itemprop': 'name'}).text
        sku = soup.find('div', {'itemprop': 'sku'}).text

        stock = 0
        stock_container = soup.find('div', 'product-stock')

        if stock_container:
            stock = int(stock_container.text.strip().split(' ')[1])

        offer_price = Decimal(
            soup.find('span', 'efectivo').find('span', 'price').text.replace(
                '$', '').replace('.', ''))
        normal_price = offer_price * Decimal(1.034)

        image_scripts = soup.findAll('script', {'type': 'text/x-magento-init'})
        picture_urls = []

        for script in image_scripts:
            if 'mage/gallery/gallery' in script.text:
                image_data = json.loads(
                    script.text)['[data-gallery-role=gallery-placeholder]'][
                        'mage/gallery/gallery']['data']
                for data in image_data:
                    picture_urls.append(data['img'])

        description = html_to_markdown(str(soup.find('div', 'description')))

        if len(sku) > 50:
            sku = sku[0:50]

        p = Product(
            name,
            cls.__name__,
            category,
            url,
            url,
            sku,
            stock,
            normal_price,
            offer_price,
            'CLP',
            sku=sku,
            part_number=sku,
            picture_urls=picture_urls,
            description=description,
        )

        return [p]
Example #21
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        session = session_with_proxy(extra_args)
        response = session.get(url)

        if response.status_code == 404:
            return []

        page_source = session.get(url).text
        soup = BeautifulSoup(page_source, 'html.parser')

        name = soup.find('h1', 'detailsInfo_right_title').text
        sku = soup.find('div', 'detailsInfo_right_artnum')\
            .text.replace('SKU:', '').strip()

        if not soup.find('span', 'stockFlag'):
            stock = 0
        else:
            stock = int(soup.find('span', 'stockFlag').find('span').text)

        if not soup.find('span', 'priceText'):
            return []

        price = Decimal(
            soup.find('span', 'priceText').text.replace('$',
                                                        '').replace(',', ''))

        if soup.find('div', 'detailsInfo_left_picture_morepictures')\
                .find('div', 'emslider2_items'):
            picture_urls = []
            images = soup.find('div', 'detailsInfo_left_picture_morepictures')\
                .find('div', 'emslider2_items').findAll('li')

            for image in images:
                picture_urls.append(image.find('a')['data-src'])
        else:
            picture_urls = None

        description = html_to_markdown(
            str(soup.find('div', 'cpattributes-box')))

        p = Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    stock,
                    price,
                    price,
                    'MXN',
                    sku=sku,
                    picture_urls=picture_urls,
                    description=description,
                    part_number=sku)

        return [p]
Example #22
0
    def products_for_url(cls, url, category=None, extra_args=None):
        products = []
        if url == cls.prepago_url:
            # Plan Prepago
            p = Product(
                'GTD Prepago',
                cls.__name__,
                category,
                url,
                url,
                'Claro Prepago',
                -1,
                Decimal(0),
                Decimal(0),
                'CLP',
            )
            products.append(p)
        elif url == cls.equipos_url:
            session = session_with_proxy(extra_args)
            body = session.get(url).text
            json_body = re.search(r'var catalog = (.+)', body).groups()[0][:-1]
            json_body = json.loads(json_body)

            for json_product in json_body['products']:
                if not json_product['published']:
                    continue

                name = json_product['name']
                sku = json_product['id']
                price = Decimal(remove_words(json_product['leasing_price']))
                description = html_to_markdown(json_product['description'])

                picture_urls = [
                    'https://nuevo.gtdmanquehue.com' + im['options']['url']
                    for im in json_product['images']
                ]

                product = Product(name,
                                  cls.__name__,
                                  'Cell',
                                  url,
                                  url,
                                  sku,
                                  -1,
                                  price,
                                  price,
                                  'CLP',
                                  sku=sku,
                                  cell_plan_name='GTD Prepago',
                                  description=description,
                                  picture_urls=picture_urls)

                products.append(product)
        else:
            raise Exception('Invalid URL: ' + url)

        return products
Example #23
0
    def products_for_url(cls, url, category=None, extra_args=None):
        session = session_with_proxy(extra_args)
        response = session.get(url)

        if response.url != url:
            return []

        page_source = response.text

        pricing_data = re.search(r'digitalData = ([\S\s]+?); </script',
                                 page_source).groups()[0]

        for kw in ['domain', 'fullName', 'protocol', 'pathname', 'referrer']:
            for_replace = "'{}': .+".format(kw)
            pricing_data = re.sub(for_replace, '', pricing_data)

        pricing_data = demjson.decode(pricing_data)['page']['product']

        name = pricing_data['title']
        sku = pricing_data['idSku']

        if pricing_data['stockAvailability']:
            stock = -1
        else:
            stock = 0

        if 'cashPrice' in pricing_data:
            normal_price = Decimal(pricing_data['salePrice'])
            offer_price = Decimal(pricing_data['cashPrice'])
        else:
            normal_price = Decimal(0)
            offer_price = Decimal(0)

        soup = BeautifulSoup(page_source, 'html.parser')

        description = html_to_markdown(str(soup.find('div', 'description')))

        picture_urls = [tag['data-src'] for tag in
                        soup.findAll('img', 'carousel-product__item-img')]

        p = Product(
            name,
            cls.__name__,
            category,
            url,
            url,
            sku,
            stock,
            normal_price,
            offer_price,
            'BRL',
            sku=sku,
            description=description,
            picture_urls=picture_urls
        )

        return [p]
Example #24
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        session = session_with_proxy(extra_args)
        response = session.get(url)

        if response.status_code == 404:
            return []

        soup = BeautifulSoup(response.text, 'html5lib')

        name = soup.find('h1', 'name').text.strip()
        info_table = soup.find('div', 'listing')
        rows = info_table.findAll('tr')

        sku = rows[0].find('td', 'td_right').text.strip()

        stock = 0

        for i in range(0, len(rows) - 1):
            left_text = rows[i].find('td', 'td_left').text
            if 'Cantidad' not in left_text:
                continue
            right_text = rows[i].find('td', 'td_right').text
            if '+' in right_text:
                stock = -1
                break
            if 'Agotado' not in right_text:
                stock += int(right_text)

        price = Decimal(rows[-1].find('td',
                                      'td_right').text.split('$')[-1].replace(
                                          ',', ''))

        description = html_to_markdown(str(soup.find('div', 'description')))

        image_containers = soup.findAll('li', 'wrapper_pic_div')
        picture_urls = []

        for image in image_containers:
            picture_url = image.find('a')['href'].replace(' ', '%20')
            picture_urls.append(picture_url)

        p = Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    stock,
                    price,
                    price,
                    'USD',
                    sku=sku,
                    description=description,
                    picture_urls=picture_urls)

        return [p]
Example #25
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        session = session_with_proxy(extra_args)
        soup = BeautifulSoup(session.get(url).text, 'html.parser')

        name = soup.find('div', 'product-info__description').text.strip()
        sku = soup.find(
            'div',
            'product-info__title').find('span').text.split(':')[1].strip()

        stock_container = soup.find('input', {'id': 'producto_cantidad'})
        if stock_container:
            stock = int(stock_container['max'])
        else:
            stock = 0

        price_container = soup.find('span', 'price-box__new')

        old_price_container = price_container.find('s')

        if old_price_container:
            old_price = Decimal(remove_words(old_price_container.text))
            price = (old_price * Decimal('0.9')).quantize(0)
        else:
            price = Decimal(remove_words(price_container.text))

        description = html_to_markdown(str(soup.find('div', 'tab-content')),
                                       'http://www.eglo.cl')

        picture_containers = soup.findAll('a', 'swiper-slide')

        if picture_containers:
            picture_urls = []
            for container in picture_containers:
                picture_url = container.find('img')['src']
                picture_urls.append(picture_url)
        else:
            picture_urls = [
                soup.find('div', 'product-main-'
                          'image__item').img['src']
            ]

        p = Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    stock,
                    price,
                    price,
                    'CLP',
                    sku=sku,
                    description=description,
                    picture_urls=picture_urls)

        return [p]
Example #26
0
    def products_for_url(cls, url, category=None, extra_args=None):
        session = session_with_proxy(extra_args)
        session.headers['User-Agent'] = \
            'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ' \
            '(KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36'

        soup = BeautifulSoup(session.get(url).text, 'html.parser')

        name = soup.find('h1', {'itemprop': 'name'})

        if not name:
            return []

        name = name.text.strip()

        stock = -1
        if soup.find('strong', 'text-not-product-avisme'):
            stock = 0

        price = soup.find('meta', {'itemprop': 'lowPrice'})
        if not price:
            price = soup.find('meta', {'itemprop': 'price'})

        normal_price = Decimal(price['content'])
        offer_price = normal_price

        sku = soup.find('span', {'itemprop': 'sku'}).text.split('.', 1)[1]

        description = html_to_markdown(str(
            soup.find('div',
                      'yCmsContentSlot productDetailsPageShortDescription')))

        picture_tags = soup.find('div', 'gallery-image').findAll('img')
        picture_urls = [tag['data-zoom-image'] for tag in picture_tags
                        if tag.has_attr('data-zoom-image')]

        if not picture_urls:
            picture_urls = None

        p = Product(
            name,
            cls.__name__,
            category,
            url,
            url,
            sku,
            stock,
            normal_price,
            offer_price,
            'BRL',
            sku=sku,
            description=description,
            picture_urls=picture_urls
        )

        return [p]
Example #27
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        session = session_with_proxy(extra_args)
        session.headers['User-Agent'] = \
            'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ' \
            '(KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36'
        soup = BeautifulSoup(session.get(url).text, 'html.parser')

        name = soup.find('h2', 'product_title').text.strip()
        sku = soup.find('span', 'sku').text.strip()

        stock_text = soup.find('span', 'stock').text.strip()
        stock = 0
        if stock_text != 'Agotado':
            stock = int(stock_text.split(' ')[0])

        price_container = soup.find('p', 'price')

        if not price_container.text.strip():
            return []

        offer_price = Decimal(
            remove_words(price_container.find('ins').find('span').text))
        normal_price = Decimal(
            remove_words(price_container.find('del').find('span').text))

        picture_containers = soup.findAll('div', 'img-thumbnail')
        picture_urls = []

        for picture in picture_containers:
            try:
                picture_url = picture.find('img')['content']
                picture_urls.append(picture_url)
            except KeyError:
                continue

        description = html_to_markdown(
            str(soup.find('div', {'id': 'tab-description'})))

        p = Product(
            name,
            cls.__name__,
            category,
            url,
            url,
            sku,
            stock,
            normal_price,
            offer_price,
            'CLP',
            sku=sku,
            description=description,
            picture_urls=picture_urls
        )

        return [p]
Example #28
0
    def products_for_url(cls, url, category=None, extra_args=None):
        query_string = urllib.parse.urlparse(url).query
        params = urllib.parse.parse_qs(query_string)
        price = Decimal(params['price'][0])
        currency = params['currency'][0]
        id = params['fProdId'][0]

        product_url = 'https://www.grupocva.com/me_bpm/' \
                      'detalle_articulo/me_articulo.php?fProdId=' + id

        session = session_with_proxy(extra_args)
        session.headers['Content-Type'] = 'application/x-www-form-urlencoded'

        request_payload = 'accion=getArticulo&id=' + id
        response = cls._retrieve_page(
            session, 'https://www.grupocva.com/me_bpm/detalle_articulo/'
            'fcDetArticulo.php', request_payload, extra_args)

        json_data = json.loads(response.text)

        name = json_data['descripcion'][:255]
        sku = json_data['clave']
        key = json_data['idProd']
        part_number = json_data['fabricante']
        description = html_to_markdown(json_data['desT'])
        picture_urls = [
            'https://www.grupocva.com/me_bpm/'
            'detalle_articulo/imagen_art.php?fProd=' + key
        ]

        stock_url = 'https://www.grupocva.com/me_bpm/' \
                    'existencia/exs_general.php?fPID=' + key
        stock_soup = BeautifulSoup(
            requests.get(stock_url, cookies=cls.SESSION_COOKIES,
                         timeout=30).text, 'html.parser')

        stock = int(
            stock_soup.find('strong',
                            text='Total General').next.next.next.text)

        p = Product(name,
                    cls.__name__,
                    category,
                    product_url,
                    product_url,
                    key,
                    stock,
                    price,
                    price,
                    currency,
                    sku=sku,
                    part_number=part_number,
                    description=description,
                    picture_urls=picture_urls)

        return [p]
Example #29
0
    def products_for_url(cls, url, category=None, extra_args=None):
        session = session_with_proxy(extra_args)
        session.headers['Accept-Encoding'] = 'deflate'
        response = session.get(url)

        if response.status_code == 500:
            return []

        soup = BeautifulSoup(response.text,
                             'html.parser')

        name = soup.find('title').text.strip()
        sku = soup.find('input', {'name': 'product_id'})['value']

        description = html_to_markdown(
            str(soup.find('div', {'id': 'tab-description'})))
        pictures_container = soup.find('ul', 'thumbnails')

        if pictures_container:
            picture_urls = [tag['href'] for tag in pictures_container.findAll(
                'a', 'thumbnail') if tag['href']]
        else:
            picture_urls = None

        if soup.find('button', {'id': 'button-cart'}):
            stock = -1
        else:
            stock = 0

        price_text = soup.findAll('h2')[-1].text.replace('.', '')

        normal_price = re.search(r'Webpay: \$(\d+)', price_text)
        normal_price = Decimal(normal_price.groups()[0])

        offer_price = re.search(r'Transferencia: \$(\d+)', price_text)
        offer_price = Decimal(offer_price.groups()[0])

        p = Product(
            name,
            cls.__name__,
            category,
            url,
            url,
            sku,
            stock,
            normal_price,
            offer_price,
            'CLP',
            sku=sku,
            part_number=sku,
            description=description,
            picture_urls=picture_urls
        )

        return [p]
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        session = session_with_proxy(extra_args)
        response = session.get(url)

        if response.status_code != 200:
            return []

        soup = BeautifulSoup(response.text, 'html.parser')

        scripts = soup.findAll('script')
        product_data = [s for s in scripts if 'var skuJson' in s.text]

        if product_data:
            product_data = product_data[0].text
        else:
            raise Exception('No Data')

        product_json = json.loads(
            re.search(r'var skuJson_0 = ([\S\s]+?);',
                      product_data).groups()[0])

        name = product_json['name']
        sku = str(product_json['skus'][0]['sku'])
        stock = 0
        if product_json['available']:
            stock = -1

        tax = Decimal('1.12')
        price = Decimal(product_json['skus'][0]['bestPrice'] / 100) * tax

        picture_urls = [
            a['zoom'] for a in soup.findAll('a', {'id': 'botaoZoom'})
        ]

        description = html_to_markdown(
            str(soup.find('div', 'product-description')))

        p = Product(
            name,
            cls.__name__,
            category,
            url,
            url,
            sku,
            stock,
            price,
            price,
            'USD',
            sku=sku,
            picture_urls=picture_urls,
            description=description,
        )

        return [p]