예제 #1
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        session = session_with_proxy(extra_args)
        page_source = session.get(url).text

        pricing_data = re.search(r'vtex.events.addData\(([\S\s]+?)\);',
                                 page_source).groups()[0]
        pricing_data = json.loads(pricing_data)

        skus_data = re.search(r'var skuJson_0 = ([\S\s]+?);CATALOG',
                              page_source).groups()[0]
        skus_data = json.loads(skus_data)
        name = '{} {}'.format(pricing_data['productBrandName'],
                              pricing_data['productName'])
        price = Decimal(pricing_data['productPriceTo'])

        soup = BeautifulSoup(page_source, 'html.parser')

        picture_urls = [
            tag['rel'][0] for tag in soup.findAll('a', {'id': 'botaoZoom'})
        ]

        description = html_to_markdown(
            str(soup.find('section', 'product-specs')))
        products = []

        if 'productEans' in pricing_data:
            ean = pricing_data['productEans'][0]
            if len(ean) == 12:
                ean = '0' + ean
            if not check_ean13(ean):
                ean = None
        else:
            ean = None

        for sku_data in skus_data['skus']:
            sku = str(sku_data['sku'])
            stock = pricing_data['skuStocks'][sku]

            if sku_data['sellerId'] == 'lojamultilaser':
                price = (price * Decimal('0.95')).quantize(Decimal('0.01'))

            p = Product(name,
                        cls.__name__,
                        category,
                        url,
                        url,
                        sku,
                        stock,
                        price,
                        price,
                        'COP',
                        sku=sku,
                        ean=ean,
                        description=description,
                        picture_urls=picture_urls)
            products.append(p)

        return products
예제 #2
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        session = session_with_proxy(extra_args)

        page_source = session.get(url).text
        soup = BeautifulSoup(page_source, 'html.parser')

        skus = soup.findAll('div', 'skuTienda')
        sku = skus[0].text.replace('SKU#. ', '').strip()
        ean = skus[1].text.replace('EAN#. ', '').strip()

        if len(ean) == 12:
            ean = '0' + ean
        if not check_ean13(ean):
            ean = None

        pricing_str = re.search(r'dataLayer = ([\S\s]+?);',
                                page_source).groups()[0]
        pricing_data = demjson.decode(pricing_str)[0]

        json_product = pricing_data['ecommerce']['detail']['products'][0]

        name = '{} {}'.format(json_product['brand'], json_product['name'])
        price = Decimal(json_product['price'])

        picture_urls = [soup.find('img', 'tienda_Detalle')['src']]

        specs_table = soup.find('dl', 'descTable')
        description = html_to_markdown(str(specs_table))
        part_number = None

        for idx, header in enumerate(specs_table.findAll('dt')):
            if header.text.lower().strip() == 'modelo':
                part_number = specs_table.findAll('dd')[idx].text.strip()
                break

        p = Product(
            name,
            cls.__name__,
            category,
            url,
            url,
            sku,
            -1,
            price,
            price,
            'MXN',
            sku=sku,
            ean=ean,
            part_number=part_number,
            description=description,
            picture_urls=picture_urls
        )

        return [p]
예제 #3
0
    def products_for_url(cls, url, category=None, extra_args=None):
        session = session_with_proxy(extra_args)
        page_source = session.get(url).text

        pricing_data = re.search(r'vtex.events.addData\(([\S\s]+?)\);',
                                 page_source).groups()[0]
        pricing_data = json.loads(pricing_data)

        skus_data = re.search(r'var skuJson_0 = ([\S\s]+?);',
                              page_source).groups()[0]
        skus_data = json.loads(skus_data)
        name = '{} {}'.format(pricing_data['productBrandName'],
                              pricing_data['productName'])
        price = (Decimal(pricing_data['productPriceTo']) *
                 Decimal('1.19')).quantize(0)

        soup = BeautifulSoup(page_source, 'html.parser')

        description = html_to_markdown(
            str(soup.find('div', 'boxProductDescription')))
        products = []

        if 'productEans' in pricing_data:
            ean = pricing_data['productEans'][0]
            if len(ean) == 12:
                ean = '0' + ean
            if not check_ean13(ean):
                ean = None
        else:
            ean = None

        for sku_data in skus_data['skus']:
            sku = str(sku_data['sku'])
            stock = pricing_data['skuStocks'][sku]

            picture_urls = [sku_data['image']]

            p = Product(name,
                        cls.__name__,
                        category,
                        url,
                        url,
                        sku,
                        stock,
                        price,
                        price,
                        'COP',
                        sku=sku,
                        ean=ean,
                        description=description,
                        picture_urls=picture_urls)
            products.append(p)

        return products
예제 #4
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        session = session_with_proxy(extra_args)
        page_source = session.get(url).text

        pricing_data = re.search(r'vtex.events.addData\(([\S\s]+?)\);',
                                 page_source).groups()[0]
        pricing_data = json.loads(pricing_data)
        name = '{} {}'.format(pricing_data['productBrandName'],
                              pricing_data['productName'])
        price = Decimal(pricing_data['productPriceTo'])

        soup = BeautifulSoup(page_source, 'html.parser')

        picture_urls = [
            tag['rel'][0] for tag in soup.findAll('a', {'id': 'botaoZoom'})
        ]

        description = html_to_markdown(
            str(soup.find('div', 'section-specifications')))

        products = []

        if 'productEans' in pricing_data:
            ean = pricing_data['productEans'][0]
            if len(ean) == 12:
                ean = '0' + ean
            if not check_ean13(ean):
                ean = None
        else:
            ean = None

        name = '{} / {}'.format(pricing_data['productReferenceId'],
                                pricing_data['productName'])[:255]

        for sku, stock in pricing_data['skuStocks'].items():
            p = Product(name,
                        cls.__name__,
                        category,
                        url,
                        url,
                        sku,
                        stock,
                        price,
                        price,
                        'CLP',
                        sku=sku,
                        ean=ean,
                        description=description,
                        picture_urls=picture_urls)
            products.append(p)

        return products
예제 #5
0
    def products_for_url(cls, url, category=None, extra_args=None):
        session = session_with_proxy(extra_args)
        response = session.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')

        products = []
        json_data = demjson.decode(
            re.search(r'current: ([\s\S]*?),\n[ \t]+customerLoggedIn',
                      response.text).groups()[0])['product']

        description = html_to_markdown(json_data['description'])

        images = json_data['images']
        picture_urls = [
            'https:{}'.format(image.split('?')[0]) for image in images
        ]

        for variant in json_data['variants']:
            name = variant['name']
            sku = variant['sku']
            barcode = variant['barcode']

            if len(barcode) == 12:
                barcode = '0' + barcode

            if not check_ean13(barcode):
                barcode = None

            # The stock may be listed as zero for available products, and no
            # active products at Multimax seem to be unavailable, so
            # assume available stok but unkwon quantity
            stock = variant['inventory_quantity'] or -1
            price = Decimal(variant['price']) / Decimal(100)

            products.append(
                Product(name,
                        cls.__name__,
                        category,
                        url,
                        url,
                        sku,
                        stock,
                        price,
                        price,
                        'USD',
                        sku=sku,
                        ean=barcode,
                        description=description,
                        picture_urls=picture_urls))

        return products
예제 #6
0
    def products_for_url(cls, url, category=None, extra_args=None):
        session = session_with_proxy(extra_args)
        page_source = session.get(url).text

        pricing_data = demjson.decode(
            re.search(r'dataLayer = ([\S\s]+?);', page_source).groups()[0])[0]

        name = pricing_data['prodName']
        sku = str(pricing_data['prodid'][0])

        ean = pricing_data['barcode'].strip()
        if len(ean) == 12:
            ean = '0' + ean
        if not check_ean13(ean):
            ean = None

        availability = pricing_data['in_stock']

        if availability == 'Y':
            stock = -1
        else:
            stock = 0

        price = Decimal(pricing_data['totalvalue'])

        soup = BeautifulSoup(page_source, 'html.parser')

        picture_urls = [
            tag['src'] for tag in soup.findAll('img', 'imgGallery')
        ]

        description = html_to_markdown(
            str(soup.find('div', {'id': 'descricaoPadrao'})))

        p = Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    stock,
                    price,
                    price,
                    'BRL',
                    sku=sku,
                    ean=ean,
                    description=description,
                    picture_urls=picture_urls)

        return [p]
예제 #7
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        session = session_with_proxy(extra_args)
        soup = BeautifulSoup(session.get(url).text, 'html.parser')
        pricing_tag = soup.find(
            'script', {'src': 'https://media.flixfacts.com/js/loader.js'})

        sku = pricing_tag['data-flix-sku']

        ean = pricing_tag['data-flix-sku']
        if len(ean) == 12:
            ean = '0' + ean
        if not check_ean13(ean):
            ean = None

        name = soup.find('h1').text.strip()

        price = soup.find('div', 'precio').text.split('$')[1].replace(',', '')
        price = Decimal(price)

        description = html_to_markdown(
            str(soup.find('div', 'descripcion_larga')))

        picture_urls = [tag.find('a')['href'] for tag in
                        soup.findAll('div', 'fotito')]

        p = Product(
            name,
            cls.__name__,
            category,
            url,
            url,
            sku,
            -1,
            price,
            price,
            'MXN',
            sku=sku,
            ean=ean,
            description=description,
            picture_urls=picture_urls
        )

        return [p]
예제 #8
0
    def products_for_url(cls, url, category=None, extra_args=None):
        session = session_with_proxy(extra_args)

        response = session.get('http://200.6.78.34/stock/v1/price',
                               auth=HTTPBasicAuth(extra_args['username'],
                                                  extra_args['password']))
        sku_entries = json.loads(response.text)['products']
        subcategories = cls.categories_dict[category]

        products = []
        for sku_entry in sku_entries:
            if sku_entry['subCategoria'] not in subcategories:
                continue

            name = sku_entry['descripcion'][:255]
            sku = sku_entry['codigoTg']
            stock = sku_entry['stockDisp']
            price = Decimal(str(sku_entry['precio']))
            currency = sku_entry['tipoMoneda']
            ean = sku_entry['upcEan13']

            if not check_ean13(ean):
                ean = None

            part_number = sku_entry['pnFabricante']

            products.append(
                Product(name,
                        cls.__name__,
                        category,
                        url,
                        url,
                        sku,
                        stock,
                        price,
                        price,
                        currency,
                        sku=sku,
                        ean=ean,
                        part_number=part_number))

        return products
예제 #9
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        session = session_with_proxy(extra_args)
        sku_id = url.split('/')[-1]

        query_url = 'https://buysmart-bff-production.lider.cl/buysmart-bff/' \
                    'products/{}?appId=BuySmart'.format(sku_id)

        response = session.get(query_url)

        if response.status_code in [500]:
            return []

        entry = json.loads(response.text)

        name = '{} {}'.format(entry['brand'], entry['displayName'])
        ean = entry['gtin13']

        if not check_ean13(ean):
            ean = None

        sku = str(entry['sku'])
        stock = -1 if entry['available'] else 0
        normal_price = Decimal(entry['price']['BasePriceSales'])
        offer_price_container = entry['price']['BasePriceTLMC']

        if offer_price_container:
            offer_price = Decimal(offer_price_container)
            if not offer_price:
                offer_price = normal_price
        else:
            offer_price = normal_price

        specs = OrderedDict()
        for spec in entry.get('filters', []):
            specs.update(spec)

        part_number = specs.get('Modelo')
        if part_number:
            part_number = part_number[:49]

        description = None
        if 'longDescription' in entry:
            description = entry['longDescription']

        if description:
            description = html_to_markdown(description)

        picture_urls = [
            'https://images.lider.cl/wmtcl?source=url'
            '[file:/productos/{}{}]&sink'.format(sku, img)
            for img in entry['imagesAvailables']
        ]

        return [
            Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    stock,
                    normal_price,
                    offer_price,
                    'CLP',
                    sku=sku,
                    ean=ean,
                    part_number=part_number,
                    picture_urls=picture_urls,
                    description=description)
        ]
예제 #10
0
    def products_for_url(cls, url, category=None, extra_args=None):
        session = session_with_proxy(extra_args)
        soup = BeautifulSoup(session.get(url).text, 'html.parser')

        json_tags = soup.findAll(
            'script', {'type': 'application/ld+json'})

        if json_tags:
            product_url = url
        else:
            product_path = url.split('/')[-1]
            product_url = 'https://www.pichau.com.br/' + product_path
            soup = BeautifulSoup(session.get(product_url).text, 'html.parser')
            json_tags = soup.findAll(
                'script', {'type': 'application/ld+json'})

        pricing_data = json.loads(json_tags[-1].text)[0]

        name = pricing_data['name']
        sku = pricing_data['sku']
        description = pricing_data.get('description')

        if 'gtin13' in pricing_data:
            ean = pricing_data['gtin13'].strip()
            if len(ean) == 12:
                ean = '0' + ean
            if not check_ean13(ean):
                ean = None
        else:
            ean = None

        offer_price = Decimal(pricing_data['offers']['price'])

        if pricing_data['offers']['availability'] == \
                'http://schema.org/InStock':
            stock = -1
        else:
            stock = 0

        normal_price = Decimal(
            soup.find('li', 'regular-price').text.replace('R$', '').replace(
                '.', '').replace(',', '.'))

        pictures_container = soup.find('ul', 'slides')
        if pictures_container:
            picture_urls = [tag['href'] for tag in
                            pictures_container.findAll('a')]
        else:
            picture_urls = None

        p = Product(
            name,
            cls.__name__,
            category,
            product_url,
            url,
            sku,
            stock,
            normal_price,
            offer_price,
            'BRL',
            sku=sku,
            ean=ean,
            description=description,
            picture_urls=picture_urls
        )

        return [p]
예제 #11
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        session = session_with_proxy(extra_args)
        response = session.get(url)

        if 'https://www.fravega.com/Sistema/404' in response.url:
            return []

        page_source = response.text
        pricing_data = re.search(r'vtex.events.addData\(([\S\s]+?)\);',
                                 page_source).groups()[0]
        pricing_data = json.loads(pricing_data)

        skus_data = re.search(r'var skuJson_0 = ([\S\s]+?);',
                              page_source).groups()[0]
        skus_data = json.loads(skus_data)
        name = '{} {}'.format(pricing_data['productBrandName'],
                              pricing_data['productName'])
        price = Decimal(pricing_data['productPriceTo'])

        soup = BeautifulSoup(page_source, 'html.parser')

        picture_urls = [
            tag['rel'][0] for tag in soup.findAll('a', {'id': 'botaoZoom'})
        ]

        description = html_to_markdown(
            str(soup.find('article', 'fichaProducto__specs__descripcion')))

        products = []

        if 'productEans' in pricing_data:
            ean = pricing_data['productEans'][0]
            if len(ean) == 12:
                ean = '0' + ean
            if not check_ean13(ean):
                ean = None
        else:
            ean = None

        for sku_data in skus_data['skus']:
            sku = str(sku_data['sku'])
            stock = pricing_data['skuStocks'][sku]

            p = Product(name,
                        cls.__name__,
                        category,
                        url,
                        url,
                        sku,
                        stock,
                        price,
                        price,
                        'ARS',
                        sku=sku,
                        ean=ean,
                        description=description,
                        picture_urls=picture_urls)
            products.append(p)

        return products
예제 #12
0
    def products_for_url(cls, url, category=None, extra_args=None):
        session = session_with_proxy(extra_args)
        response = session.get(url)

        if response.url != url:
            return []

        page_source = response.text

        pricing_data = re.search(r'vtex.events.addData\(([\S\s]+?)\);',
                                 page_source).groups()[0]
        pricing_data = json.loads(pricing_data)

        skus_data = re.search(r'var skuJson_0 = ([\S\s]+?);',
                              page_source).groups()[0]
        skus_data = json.loads(skus_data)
        name = '{} {}'.format(pricing_data['productBrandName'],
                              pricing_data['productName'])
        normal_price = Decimal(pricing_data['productPriceTo'])

        soup = BeautifulSoup(page_source, 'html.parser')

        discount_container = soup.find('div', 'price_box-v1').fetchParents()[0]
        discount_container = discount_container.findAll('p', 'flag')
        if discount_container:
            discount_container = discount_container[-1]
            discount_value = re.search(r'(\d+)', discount_container.text)
            discount_value = Decimal(discount_value.groups()[0])
            discount_factor = (Decimal(100) - discount_value) / Decimal(100)

            offer_price = normal_price * discount_factor
            offer_price = offer_price.quantize(Decimal('0.01'))
        else:
            offer_price = normal_price

        picture_urls = [
            tag['rel'][0].split('?')[0]
            for tag in soup.findAll('a', {'id': 'botaoZoom'})
        ]

        description = ''
        panel_classes = ['blc_1', 'blc_2']

        for panel_class in panel_classes:
            panel = soup.find('div', panel_class)
            description += html_to_markdown(str(panel)) + '\n\n'

        products = []

        if 'productEans' in pricing_data:
            ean = pricing_data['productEans'][0]
            if len(ean) == 12:
                ean = '0' + ean
            if not check_ean13(ean):
                ean = None
        else:
            ean = None

        for sku_data in skus_data['skus']:
            sku = str(sku_data['sku'])
            stock = pricing_data['skuStocks'][sku]

            p = Product(name,
                        cls.__name__,
                        category,
                        url,
                        url,
                        sku,
                        stock,
                        normal_price,
                        offer_price,
                        'BRL',
                        sku=sku,
                        ean=ean,
                        description=description,
                        picture_urls=picture_urls)
            products.append(p)

        return products
예제 #13
0
    def products_for_url(cls, url, category=None, extra_args=None):
        session = session_with_proxy(extra_args)
        session.headers['User-Agent'] = \
            'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, ' \
            'like Gecko) Chrome/66.0.3359.117 Safari/537.36'
        session.headers['Accept-Language'] = \
            'en-US,en;q=0.9,es;q=0.8,pt;q=0.7,pt-BR;q=0.6'
        page_source = session.get(url, timeout=30).text
        pricing_data = re.search(r'var siteMetadata = ([\S\s]+?);',
                                 page_source).groups()[0]

        pricing_data = json.loads(pricing_data)['page']

        if 'product' not in pricing_data:
            return []

        pricing_data = pricing_data['product']

        name = urllib.parse.unquote(pricing_data['fullName'])
        sku = pricing_data['idSku']
        price = Decimal(pricing_data['salePrice'])

        if pricing_data['StockAvailability']:
            stock = -1
        else:
            stock = 0

        soup = BeautifulSoup(page_source, 'html.parser')

        ean_container = soup.find('span', 'productEan')
        if ean_container:
            ean = re.search(r'EAN (\d+)', ean_container.text).groups()[0]
            if len(ean) == 12:
                ean = '0' + ean
            if not check_ean13(ean):
                ean = None
        else:
            ean = None

        description = html_to_markdown(str(soup.find('div',
                                                     'detalhesProduto')))

        picture_urls = [
            tag.find('img')['src'].replace('\xa0', '%20')
            for tag in soup.findAll('a', 'jqzoom')
        ]

        p = Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    stock,
                    price,
                    price,
                    'BRL',
                    sku=sku,
                    ean=ean,
                    description=description,
                    picture_urls=picture_urls)

        return [p]
예제 #14
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        session = session_with_proxy(extra_args)
        page_source = session.get(url).text

        match = re.search(r'JSON.parse\("(.+)", reviver\);', page_source)
        product_data = json.loads(
            match.groups()[0].encode('utf-8').decode('unicode_escape'))

        if 'customerPrice' not in product_data:
            return []

        name = product_data['title']
        sku = product_data['skuId']
        normal_price = Decimal(product_data['customerPrice'])
        offer_price = normal_price
        part_number = product_data.get('modelNumber')
        ean = product_data['upc']
        stock = -1
        condition = 'https://schema.org/NewCondition'

        if 'reacondicionado' in name.lower():
            condition = 'https://schema.org/RefurbishedCondition'

        if len(ean) == 12:
            ean = '0' + ean

        if not check_ean13(ean):
            ean = None

        soup = BeautifulSoup(page_source, 'html.parser')

        if not soup.find('div', 'shop-add-to-cart'):
            stock = 0
        elif 'Agotado' in soup.find('div', 'shop-add-to-cart').text:
            stock = 0
        elif 'Preventa' in soup.find('div', 'shop-add-to-cart').text:
            stock = 0

        description = html_to_markdown(
            str(soup.find('div', 'bbmx-product-description')))
        picture_urls = [
            tag['src'] for tag in soup.findAll(
                'img', {'data-track': 'enlarge-image:image'})
        ]

        p = Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    stock,
                    normal_price,
                    offer_price,
                    'MXN',
                    sku=sku,
                    condition=condition,
                    part_number=part_number,
                    ean=ean,
                    description=description,
                    picture_urls=picture_urls)

        return [p]
예제 #15
0
    def products_for_url(cls, url, category=None, extra_args=None):
        session = session_with_proxy(extra_args)
        session.headers['User-Agent'] = \
            'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, ' \
            'like Gecko) Chrome/66.0.3359.117 Safari/537.36'
        session.headers['Accept-Language'] = \
            'en-US,en;q=0.9,es;q=0.8,pt;q=0.7,pt-BR;q=0.6'

        response = session.get(url, timeout=30)

        if response.url != url:
            return []

        page_source = response.text

        soup = BeautifulSoup(page_source, 'html.parser')
        if soup.find('svg', 'not-found-image'):
            return []

        main_page_json = re.search(r'window.__PRELOADED_STATE__ = (.+);',
                                   page_source)
        if not main_page_json:
            return []

        main_page_json = json.loads(main_page_json.groups()[0])

        product_json = \
            main_page_json['entities']['products']['entities']['products']
        eans_json = main_page_json['entities']['skus']['entities']['skus']
        pricing_json = main_page_json['entities']['offers']

        sizes = ['extraLarge', 'large', 'big', 'medium']

        description = html_to_markdown(html.unescape(
            main_page_json['description']['content']))

        products = []
        for page_id, page_json in product_json.items():
            name = page_json['name']

            picture_urls = []

            for image_json in page_json['images']:
                for size in sizes:
                    if size in image_json:
                        picture_url = image_json[size]
                        picture_urls.append(picture_url)
                        break

            if pricing_json[page_id]:
                normal_price = Decimal(
                    str(pricing_json[page_id][0]['salesPrice']))
                offer_price = normal_price
                stock = -1
            else:
                normal_price = Decimal(0)
                offer_price = Decimal(0)
                stock = 0

            for sku in page_json['skus']:
                if 'eans' in eans_json[sku]:
                    ean = eans_json[sku]['eans'][0]
                    while len(ean) < 13:
                        ean = '0' + ean
                    if not check_ean13(ean):
                        ean = None

                    if ean and ean == '0000000000000':
                        ean = None
                else:
                    ean = None

                p = Product(
                    name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    stock,
                    normal_price,
                    offer_price,
                    'BRL',
                    sku=sku,
                    ean=ean,
                    description=description,
                    picture_urls=picture_urls
                )

                products.append(p)

        return products
예제 #16
0
    def products_for_url(cls, url, category=None, extra_args=None):
        session = session_with_proxy(extra_args)
        response = session.get(url)

        if response.status_code == 404 or response.url != url:
            return []

        soup = BeautifulSoup(response.text, 'html.parser')
        key = re.search(r'-([a-zA-Z0-9]+)$', url).groups()[0]
        page_source = response.text
        pricing_str = re.search(r'dataLayer = ([\S\s]+?);\n',
                                page_source).groups()[0]
        pricing_data = json.loads(pricing_str)[0]

        name = pricing_data['product_name'][0:254]
        sku = pricing_data['sku_config']

        reference_code = pricing_data['ean_code'].strip()
        ean = None

        if check_ean13(reference_code):
            ean = reference_code
        else:
            name = '{} - {}'.format(name, reference_code)

        name = name[0:256]

        normal_price = Decimal(pricing_data['special_price'])

        pricing_container = soup.find('div', 'product-price-lg')

        if not soup.find('span', 'sprite-cmr'):
            offer_price = normal_price
        else:
            offer_price_container = pricing_container.find(
                'span', 'price-promotional')

            if offer_price_container:
                offer_price = Decimal(remove_words(offer_price_container.text))
                if offer_price > normal_price:
                    offer_price = normal_price
            else:
                offer_price = normal_price

        soup = BeautifulSoup(page_source, 'html.parser')

        condition_dict = {
            'Nuevo': 'https://schema.org/NewCondition',
            'Reacondicionado': 'https://schema.org/RefurbishedCondition',
        }

        condition_label = soup.find('span', 'badge-condition-type')

        if condition_label:
            condition = condition_dict[condition_label.text.strip()]
        else:
            condition = 'https://schema.org/NewCondition'

        description = html_to_markdown(
            str(soup.find('div', 'feature-information')))

        description += '\n\n' + html_to_markdown(
            str(soup.find('div', 'features-box-section')))

        picture_urls = [
            'https:' + tag.find('img')['data-lazy']
            for tag in soup.findAll('div', {'id': 'image-product'})
        ]

        availability_container = soup.find('link',
                                           {'itemprop': 'availability'})

        if not availability_container:
            stock = 0
        elif soup.find('div', 'feature-information').find(
                'span', 'badge-pill-international-shipping'):
            stock = 0
            description = 'ST-INTERNATIONAL-SHIPPING {}'.format(description)
        elif availability_container['href'] == 'http://schema.org/InStock':
            stock = -1
        else:
            stock = 0

        seller_container = soup.find('div', 'seller-name-rating-section')
        if seller_container:
            seller = seller_container.text.strip()
        else:
            seller = None

        p = Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    key,
                    stock,
                    normal_price,
                    offer_price,
                    cls.currency,
                    sku=sku,
                    ean=ean,
                    description=description,
                    picture_urls=picture_urls,
                    condition=condition,
                    seller=seller)

        return [p]
예제 #17
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        driver = cls._session_driver(extra_args)
        driver.get(url)

        time.sleep(5)

        first_url_of_last_page = None
        products = []

        while True:
            slept = False
            containers = driver.find_elements_by_class_name('single-result')

            for idx, container in enumerate(containers):
                product_url = container.find_element_by_class_name(
                    'ellipsis-multiline').get_attribute('href')

                if idx == 0 and product_url == first_url_of_last_page:
                    time.sleep(5)
                    slept = True
                    break

                if idx == 0:
                    first_url_of_last_page = product_url

                if container.text.strip():

                    pricing_spans = container.find_element_by_class_name(
                        'prod-number-container').find_elements_by_tag_name(
                            'span')

                    part_number = pricing_spans[0].text

                    if len(pricing_spans) == 3:
                        ean = pricing_spans[1].text

                        if len(ean) == 12:
                            ean = '0' + ean
                        if not check_ean13(ean):
                            ean = None

                        sku = pricing_spans[2].text
                    elif len(pricing_spans) == 2:
                        ean = None
                        sku = pricing_spans[1].text
                    else:
                        raise Exception('Invalid container')

                    name = container.find_element_by_class_name(
                        'ellipsis-multiline').text

                    price = container.find_elements_by_class_name(
                        'resprice')[1].text.split('$')

                    if len(price) > 1:
                        price = Decimal(price[1].replace('.',
                                                         '').replace(',', '.'))
                        stock_tag = container.find_element_by_class_name(
                            'in-stock')
                        stock = int(
                            stock_tag.get_attribute('data-stock-qty-' + sku))
                    else:
                        price = Decimal(0)
                        stock = 0

                    if 'BAD BOX' in name:
                        condition = 'https://schema.org/DamagedCondition'
                    else:
                        condition = 'https://schema.org/NewCondition'

                    product = Product(name,
                                      cls.__name__,
                                      category,
                                      product_url,
                                      url,
                                      sku,
                                      stock,
                                      price,
                                      price,
                                      'USD',
                                      sku=sku,
                                      ean=ean,
                                      condition=condition,
                                      part_number=part_number)

                    products.append(product)

            if slept:
                continue

            next_button = driver.find_elements_by_id('next')
            if next_button:
                next_button[0].click()
            else:
                break

        driver.close()

        return products
예제 #18
0
    def products_for_url(cls, url, category=None, extra_args=None):
        session = session_with_proxy(extra_args)
        soup = BeautifulSoup(session.get(url).text, 'html.parser')

        name = soup.find('div', 'product-name').text.strip()
        ean = soup.find('span', {'itemprop': 'sku'}).text.strip()
        sku = ean
        if len(ean) == 12:
            ean = '0' + ean

        if not check_ean13(ean):
            ean = None

        description = ''

        panels = [
            soup.find('div', 'short-description std'),
            soup.find('table', {'id': 'product-attribute-specs-table'})
        ]

        for panel in panels:
            description += html_to_markdown(str(panel)) + '\n\n'

        if soup.find('p', 'in-stock'):
            stock = -1
        else:
            stock = 0

        picture_urls = []

        for picture_tag in soup.find('li', 'image-extra').findAll('img'):
            picture_urls.append(picture_tag['src'])

        product_box = soup.find('div', 'product-shop')

        price_container = product_box.find('span', {'itemprop': 'price'})

        if price_container:
            normal_price = Decimal(price_container['content'])
        else:
            price_container = product_box.findAll('span', 'price')[1]
            normal_price = Decimal(remove_words(price_container.string))

        offer_price = normal_price

        p = Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    stock,
                    normal_price,
                    offer_price,
                    'COP',
                    sku=sku,
                    ean=ean,
                    description=description,
                    picture_urls=picture_urls)

        return [p]
예제 #19
0
    def products_for_url(cls, url, category=None, extra_args=None):
        session = session_with_proxy(extra_args)
        page_source = session.get(url).text

        pricing_data = re.search(r'vtex.events.addData\(([\S\s]+?)\);',
                                 page_source).groups()[0]
        pricing_data = json.loads(pricing_data)

        skus_data = re.search(r'var skuJson_0 = ([\S\s]+?);',
                              page_source).groups()[0]
        skus_data = json.loads(skus_data)
        name = '{} {}'.format(pricing_data['productBrandName'],
                              pricing_data['productName'])
        price = Decimal(pricing_data['productPriceTo'])

        soup = BeautifulSoup(page_source, 'html.parser')

        description = ''
        panel_classes = [
            'produto-contents--sinope', 'produto-contents--caracteristicas'
        ]

        for panel_class in panel_classes:
            panel = soup.find('li', panel_class)
            description += html_to_markdown(str(panel)) + '\n\n'

        products = []

        if 'productEans' in pricing_data:
            ean = pricing_data['productEans'][0]
            if len(ean) == 12:
                ean = '0' + ean
            if not check_ean13(ean):
                ean = None
        else:
            ean = None

        for sku_data in skus_data['skus']:
            sku = str(sku_data['sku'])
            stock = pricing_data['skuStocks'][sku]

            picture_urls = [
                sku_data['image'].split('?')[0].replace('-300-300', '')
            ]

            p = Product(name,
                        cls.__name__,
                        category,
                        url,
                        url,
                        sku,
                        stock,
                        price,
                        price,
                        'BRL',
                        sku=sku,
                        ean=ean,
                        description=description,
                        picture_urls=picture_urls)
            products.append(p)

        return products