def products_for_url(cls, url, category=None, extra_args=None): print(url) session = session_with_proxy(extra_args) page_source = session.get(url).text pricing_data = re.search(r'vtex.events.addData\(([\S\s]+?)\);', page_source).groups()[0] pricing_data = json.loads(pricing_data) skus_data = re.search(r'var skuJson_0 = ([\S\s]+?);CATALOG', page_source).groups()[0] skus_data = json.loads(skus_data) name = '{} {}'.format(pricing_data['productBrandName'], pricing_data['productName']) price = Decimal(pricing_data['productPriceTo']) soup = BeautifulSoup(page_source, 'html.parser') picture_urls = [ tag['rel'][0] for tag in soup.findAll('a', {'id': 'botaoZoom'}) ] description = html_to_markdown( str(soup.find('section', 'product-specs'))) products = [] if 'productEans' in pricing_data: ean = pricing_data['productEans'][0] if len(ean) == 12: ean = '0' + ean if not check_ean13(ean): ean = None else: ean = None for sku_data in skus_data['skus']: sku = str(sku_data['sku']) stock = pricing_data['skuStocks'][sku] if sku_data['sellerId'] == 'lojamultilaser': price = (price * Decimal('0.95')).quantize(Decimal('0.01')) p = Product(name, cls.__name__, category, url, url, sku, stock, price, price, 'COP', sku=sku, ean=ean, description=description, picture_urls=picture_urls) products.append(p) return products
def products_for_url(cls, url, category=None, extra_args=None): print(url) session = session_with_proxy(extra_args) page_source = session.get(url).text soup = BeautifulSoup(page_source, 'html.parser') skus = soup.findAll('div', 'skuTienda') sku = skus[0].text.replace('SKU#. ', '').strip() ean = skus[1].text.replace('EAN#. ', '').strip() if len(ean) == 12: ean = '0' + ean if not check_ean13(ean): ean = None pricing_str = re.search(r'dataLayer = ([\S\s]+?);', page_source).groups()[0] pricing_data = demjson.decode(pricing_str)[0] json_product = pricing_data['ecommerce']['detail']['products'][0] name = '{} {}'.format(json_product['brand'], json_product['name']) price = Decimal(json_product['price']) picture_urls = [soup.find('img', 'tienda_Detalle')['src']] specs_table = soup.find('dl', 'descTable') description = html_to_markdown(str(specs_table)) part_number = None for idx, header in enumerate(specs_table.findAll('dt')): if header.text.lower().strip() == 'modelo': part_number = specs_table.findAll('dd')[idx].text.strip() break p = Product( name, cls.__name__, category, url, url, sku, -1, price, price, 'MXN', sku=sku, ean=ean, part_number=part_number, description=description, picture_urls=picture_urls ) return [p]
def products_for_url(cls, url, category=None, extra_args=None): session = session_with_proxy(extra_args) page_source = session.get(url).text pricing_data = re.search(r'vtex.events.addData\(([\S\s]+?)\);', page_source).groups()[0] pricing_data = json.loads(pricing_data) skus_data = re.search(r'var skuJson_0 = ([\S\s]+?);', page_source).groups()[0] skus_data = json.loads(skus_data) name = '{} {}'.format(pricing_data['productBrandName'], pricing_data['productName']) price = (Decimal(pricing_data['productPriceTo']) * Decimal('1.19')).quantize(0) soup = BeautifulSoup(page_source, 'html.parser') description = html_to_markdown( str(soup.find('div', 'boxProductDescription'))) products = [] if 'productEans' in pricing_data: ean = pricing_data['productEans'][0] if len(ean) == 12: ean = '0' + ean if not check_ean13(ean): ean = None else: ean = None for sku_data in skus_data['skus']: sku = str(sku_data['sku']) stock = pricing_data['skuStocks'][sku] picture_urls = [sku_data['image']] p = Product(name, cls.__name__, category, url, url, sku, stock, price, price, 'COP', sku=sku, ean=ean, description=description, picture_urls=picture_urls) products.append(p) return products
def products_for_url(cls, url, category=None, extra_args=None): print(url) session = session_with_proxy(extra_args) page_source = session.get(url).text pricing_data = re.search(r'vtex.events.addData\(([\S\s]+?)\);', page_source).groups()[0] pricing_data = json.loads(pricing_data) name = '{} {}'.format(pricing_data['productBrandName'], pricing_data['productName']) price = Decimal(pricing_data['productPriceTo']) soup = BeautifulSoup(page_source, 'html.parser') picture_urls = [ tag['rel'][0] for tag in soup.findAll('a', {'id': 'botaoZoom'}) ] description = html_to_markdown( str(soup.find('div', 'section-specifications'))) products = [] if 'productEans' in pricing_data: ean = pricing_data['productEans'][0] if len(ean) == 12: ean = '0' + ean if not check_ean13(ean): ean = None else: ean = None name = '{} / {}'.format(pricing_data['productReferenceId'], pricing_data['productName'])[:255] for sku, stock in pricing_data['skuStocks'].items(): p = Product(name, cls.__name__, category, url, url, sku, stock, price, price, 'CLP', sku=sku, ean=ean, description=description, picture_urls=picture_urls) products.append(p) return products
def products_for_url(cls, url, category=None, extra_args=None): session = session_with_proxy(extra_args) response = session.get(url) soup = BeautifulSoup(response.text, 'html.parser') products = [] json_data = demjson.decode( re.search(r'current: ([\s\S]*?),\n[ \t]+customerLoggedIn', response.text).groups()[0])['product'] description = html_to_markdown(json_data['description']) images = json_data['images'] picture_urls = [ 'https:{}'.format(image.split('?')[0]) for image in images ] for variant in json_data['variants']: name = variant['name'] sku = variant['sku'] barcode = variant['barcode'] if len(barcode) == 12: barcode = '0' + barcode if not check_ean13(barcode): barcode = None # The stock may be listed as zero for available products, and no # active products at Multimax seem to be unavailable, so # assume available stok but unkwon quantity stock = variant['inventory_quantity'] or -1 price = Decimal(variant['price']) / Decimal(100) products.append( Product(name, cls.__name__, category, url, url, sku, stock, price, price, 'USD', sku=sku, ean=barcode, description=description, picture_urls=picture_urls)) return products
def products_for_url(cls, url, category=None, extra_args=None): session = session_with_proxy(extra_args) page_source = session.get(url).text pricing_data = demjson.decode( re.search(r'dataLayer = ([\S\s]+?);', page_source).groups()[0])[0] name = pricing_data['prodName'] sku = str(pricing_data['prodid'][0]) ean = pricing_data['barcode'].strip() if len(ean) == 12: ean = '0' + ean if not check_ean13(ean): ean = None availability = pricing_data['in_stock'] if availability == 'Y': stock = -1 else: stock = 0 price = Decimal(pricing_data['totalvalue']) soup = BeautifulSoup(page_source, 'html.parser') picture_urls = [ tag['src'] for tag in soup.findAll('img', 'imgGallery') ] description = html_to_markdown( str(soup.find('div', {'id': 'descricaoPadrao'}))) p = Product(name, cls.__name__, category, url, url, sku, stock, price, price, 'BRL', sku=sku, ean=ean, description=description, picture_urls=picture_urls) return [p]
def products_for_url(cls, url, category=None, extra_args=None): print(url) session = session_with_proxy(extra_args) soup = BeautifulSoup(session.get(url).text, 'html.parser') pricing_tag = soup.find( 'script', {'src': 'https://media.flixfacts.com/js/loader.js'}) sku = pricing_tag['data-flix-sku'] ean = pricing_tag['data-flix-sku'] if len(ean) == 12: ean = '0' + ean if not check_ean13(ean): ean = None name = soup.find('h1').text.strip() price = soup.find('div', 'precio').text.split('$')[1].replace(',', '') price = Decimal(price) description = html_to_markdown( str(soup.find('div', 'descripcion_larga'))) picture_urls = [tag.find('a')['href'] for tag in soup.findAll('div', 'fotito')] p = Product( name, cls.__name__, category, url, url, sku, -1, price, price, 'MXN', sku=sku, ean=ean, description=description, picture_urls=picture_urls ) return [p]
def products_for_url(cls, url, category=None, extra_args=None): session = session_with_proxy(extra_args) response = session.get('http://200.6.78.34/stock/v1/price', auth=HTTPBasicAuth(extra_args['username'], extra_args['password'])) sku_entries = json.loads(response.text)['products'] subcategories = cls.categories_dict[category] products = [] for sku_entry in sku_entries: if sku_entry['subCategoria'] not in subcategories: continue name = sku_entry['descripcion'][:255] sku = sku_entry['codigoTg'] stock = sku_entry['stockDisp'] price = Decimal(str(sku_entry['precio'])) currency = sku_entry['tipoMoneda'] ean = sku_entry['upcEan13'] if not check_ean13(ean): ean = None part_number = sku_entry['pnFabricante'] products.append( Product(name, cls.__name__, category, url, url, sku, stock, price, price, currency, sku=sku, ean=ean, part_number=part_number)) return products
def products_for_url(cls, url, category=None, extra_args=None): print(url) session = session_with_proxy(extra_args) sku_id = url.split('/')[-1] query_url = 'https://buysmart-bff-production.lider.cl/buysmart-bff/' \ 'products/{}?appId=BuySmart'.format(sku_id) response = session.get(query_url) if response.status_code in [500]: return [] entry = json.loads(response.text) name = '{} {}'.format(entry['brand'], entry['displayName']) ean = entry['gtin13'] if not check_ean13(ean): ean = None sku = str(entry['sku']) stock = -1 if entry['available'] else 0 normal_price = Decimal(entry['price']['BasePriceSales']) offer_price_container = entry['price']['BasePriceTLMC'] if offer_price_container: offer_price = Decimal(offer_price_container) if not offer_price: offer_price = normal_price else: offer_price = normal_price specs = OrderedDict() for spec in entry.get('filters', []): specs.update(spec) part_number = specs.get('Modelo') if part_number: part_number = part_number[:49] description = None if 'longDescription' in entry: description = entry['longDescription'] if description: description = html_to_markdown(description) picture_urls = [ 'https://images.lider.cl/wmtcl?source=url' '[file:/productos/{}{}]&sink'.format(sku, img) for img in entry['imagesAvailables'] ] return [ Product(name, cls.__name__, category, url, url, sku, stock, normal_price, offer_price, 'CLP', sku=sku, ean=ean, part_number=part_number, picture_urls=picture_urls, description=description) ]
def products_for_url(cls, url, category=None, extra_args=None): session = session_with_proxy(extra_args) soup = BeautifulSoup(session.get(url).text, 'html.parser') json_tags = soup.findAll( 'script', {'type': 'application/ld+json'}) if json_tags: product_url = url else: product_path = url.split('/')[-1] product_url = 'https://www.pichau.com.br/' + product_path soup = BeautifulSoup(session.get(product_url).text, 'html.parser') json_tags = soup.findAll( 'script', {'type': 'application/ld+json'}) pricing_data = json.loads(json_tags[-1].text)[0] name = pricing_data['name'] sku = pricing_data['sku'] description = pricing_data.get('description') if 'gtin13' in pricing_data: ean = pricing_data['gtin13'].strip() if len(ean) == 12: ean = '0' + ean if not check_ean13(ean): ean = None else: ean = None offer_price = Decimal(pricing_data['offers']['price']) if pricing_data['offers']['availability'] == \ 'http://schema.org/InStock': stock = -1 else: stock = 0 normal_price = Decimal( soup.find('li', 'regular-price').text.replace('R$', '').replace( '.', '').replace(',', '.')) pictures_container = soup.find('ul', 'slides') if pictures_container: picture_urls = [tag['href'] for tag in pictures_container.findAll('a')] else: picture_urls = None p = Product( name, cls.__name__, category, product_url, url, sku, stock, normal_price, offer_price, 'BRL', sku=sku, ean=ean, description=description, picture_urls=picture_urls ) return [p]
def products_for_url(cls, url, category=None, extra_args=None): print(url) session = session_with_proxy(extra_args) response = session.get(url) if 'https://www.fravega.com/Sistema/404' in response.url: return [] page_source = response.text pricing_data = re.search(r'vtex.events.addData\(([\S\s]+?)\);', page_source).groups()[0] pricing_data = json.loads(pricing_data) skus_data = re.search(r'var skuJson_0 = ([\S\s]+?);', page_source).groups()[0] skus_data = json.loads(skus_data) name = '{} {}'.format(pricing_data['productBrandName'], pricing_data['productName']) price = Decimal(pricing_data['productPriceTo']) soup = BeautifulSoup(page_source, 'html.parser') picture_urls = [ tag['rel'][0] for tag in soup.findAll('a', {'id': 'botaoZoom'}) ] description = html_to_markdown( str(soup.find('article', 'fichaProducto__specs__descripcion'))) products = [] if 'productEans' in pricing_data: ean = pricing_data['productEans'][0] if len(ean) == 12: ean = '0' + ean if not check_ean13(ean): ean = None else: ean = None for sku_data in skus_data['skus']: sku = str(sku_data['sku']) stock = pricing_data['skuStocks'][sku] p = Product(name, cls.__name__, category, url, url, sku, stock, price, price, 'ARS', sku=sku, ean=ean, description=description, picture_urls=picture_urls) products.append(p) return products
def products_for_url(cls, url, category=None, extra_args=None): session = session_with_proxy(extra_args) response = session.get(url) if response.url != url: return [] page_source = response.text pricing_data = re.search(r'vtex.events.addData\(([\S\s]+?)\);', page_source).groups()[0] pricing_data = json.loads(pricing_data) skus_data = re.search(r'var skuJson_0 = ([\S\s]+?);', page_source).groups()[0] skus_data = json.loads(skus_data) name = '{} {}'.format(pricing_data['productBrandName'], pricing_data['productName']) normal_price = Decimal(pricing_data['productPriceTo']) soup = BeautifulSoup(page_source, 'html.parser') discount_container = soup.find('div', 'price_box-v1').fetchParents()[0] discount_container = discount_container.findAll('p', 'flag') if discount_container: discount_container = discount_container[-1] discount_value = re.search(r'(\d+)', discount_container.text) discount_value = Decimal(discount_value.groups()[0]) discount_factor = (Decimal(100) - discount_value) / Decimal(100) offer_price = normal_price * discount_factor offer_price = offer_price.quantize(Decimal('0.01')) else: offer_price = normal_price picture_urls = [ tag['rel'][0].split('?')[0] for tag in soup.findAll('a', {'id': 'botaoZoom'}) ] description = '' panel_classes = ['blc_1', 'blc_2'] for panel_class in panel_classes: panel = soup.find('div', panel_class) description += html_to_markdown(str(panel)) + '\n\n' products = [] if 'productEans' in pricing_data: ean = pricing_data['productEans'][0] if len(ean) == 12: ean = '0' + ean if not check_ean13(ean): ean = None else: ean = None for sku_data in skus_data['skus']: sku = str(sku_data['sku']) stock = pricing_data['skuStocks'][sku] p = Product(name, cls.__name__, category, url, url, sku, stock, normal_price, offer_price, 'BRL', sku=sku, ean=ean, description=description, picture_urls=picture_urls) products.append(p) return products
def products_for_url(cls, url, category=None, extra_args=None): session = session_with_proxy(extra_args) session.headers['User-Agent'] = \ 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, ' \ 'like Gecko) Chrome/66.0.3359.117 Safari/537.36' session.headers['Accept-Language'] = \ 'en-US,en;q=0.9,es;q=0.8,pt;q=0.7,pt-BR;q=0.6' page_source = session.get(url, timeout=30).text pricing_data = re.search(r'var siteMetadata = ([\S\s]+?);', page_source).groups()[0] pricing_data = json.loads(pricing_data)['page'] if 'product' not in pricing_data: return [] pricing_data = pricing_data['product'] name = urllib.parse.unquote(pricing_data['fullName']) sku = pricing_data['idSku'] price = Decimal(pricing_data['salePrice']) if pricing_data['StockAvailability']: stock = -1 else: stock = 0 soup = BeautifulSoup(page_source, 'html.parser') ean_container = soup.find('span', 'productEan') if ean_container: ean = re.search(r'EAN (\d+)', ean_container.text).groups()[0] if len(ean) == 12: ean = '0' + ean if not check_ean13(ean): ean = None else: ean = None description = html_to_markdown(str(soup.find('div', 'detalhesProduto'))) picture_urls = [ tag.find('img')['src'].replace('\xa0', '%20') for tag in soup.findAll('a', 'jqzoom') ] p = Product(name, cls.__name__, category, url, url, sku, stock, price, price, 'BRL', sku=sku, ean=ean, description=description, picture_urls=picture_urls) return [p]
def products_for_url(cls, url, category=None, extra_args=None): print(url) session = session_with_proxy(extra_args) page_source = session.get(url).text match = re.search(r'JSON.parse\("(.+)", reviver\);', page_source) product_data = json.loads( match.groups()[0].encode('utf-8').decode('unicode_escape')) if 'customerPrice' not in product_data: return [] name = product_data['title'] sku = product_data['skuId'] normal_price = Decimal(product_data['customerPrice']) offer_price = normal_price part_number = product_data.get('modelNumber') ean = product_data['upc'] stock = -1 condition = 'https://schema.org/NewCondition' if 'reacondicionado' in name.lower(): condition = 'https://schema.org/RefurbishedCondition' if len(ean) == 12: ean = '0' + ean if not check_ean13(ean): ean = None soup = BeautifulSoup(page_source, 'html.parser') if not soup.find('div', 'shop-add-to-cart'): stock = 0 elif 'Agotado' in soup.find('div', 'shop-add-to-cart').text: stock = 0 elif 'Preventa' in soup.find('div', 'shop-add-to-cart').text: stock = 0 description = html_to_markdown( str(soup.find('div', 'bbmx-product-description'))) picture_urls = [ tag['src'] for tag in soup.findAll( 'img', {'data-track': 'enlarge-image:image'}) ] p = Product(name, cls.__name__, category, url, url, sku, stock, normal_price, offer_price, 'MXN', sku=sku, condition=condition, part_number=part_number, ean=ean, description=description, picture_urls=picture_urls) return [p]
def products_for_url(cls, url, category=None, extra_args=None): session = session_with_proxy(extra_args) session.headers['User-Agent'] = \ 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, ' \ 'like Gecko) Chrome/66.0.3359.117 Safari/537.36' session.headers['Accept-Language'] = \ 'en-US,en;q=0.9,es;q=0.8,pt;q=0.7,pt-BR;q=0.6' response = session.get(url, timeout=30) if response.url != url: return [] page_source = response.text soup = BeautifulSoup(page_source, 'html.parser') if soup.find('svg', 'not-found-image'): return [] main_page_json = re.search(r'window.__PRELOADED_STATE__ = (.+);', page_source) if not main_page_json: return [] main_page_json = json.loads(main_page_json.groups()[0]) product_json = \ main_page_json['entities']['products']['entities']['products'] eans_json = main_page_json['entities']['skus']['entities']['skus'] pricing_json = main_page_json['entities']['offers'] sizes = ['extraLarge', 'large', 'big', 'medium'] description = html_to_markdown(html.unescape( main_page_json['description']['content'])) products = [] for page_id, page_json in product_json.items(): name = page_json['name'] picture_urls = [] for image_json in page_json['images']: for size in sizes: if size in image_json: picture_url = image_json[size] picture_urls.append(picture_url) break if pricing_json[page_id]: normal_price = Decimal( str(pricing_json[page_id][0]['salesPrice'])) offer_price = normal_price stock = -1 else: normal_price = Decimal(0) offer_price = Decimal(0) stock = 0 for sku in page_json['skus']: if 'eans' in eans_json[sku]: ean = eans_json[sku]['eans'][0] while len(ean) < 13: ean = '0' + ean if not check_ean13(ean): ean = None if ean and ean == '0000000000000': ean = None else: ean = None p = Product( name, cls.__name__, category, url, url, sku, stock, normal_price, offer_price, 'BRL', sku=sku, ean=ean, description=description, picture_urls=picture_urls ) products.append(p) return products
def products_for_url(cls, url, category=None, extra_args=None): session = session_with_proxy(extra_args) response = session.get(url) if response.status_code == 404 or response.url != url: return [] soup = BeautifulSoup(response.text, 'html.parser') key = re.search(r'-([a-zA-Z0-9]+)$', url).groups()[0] page_source = response.text pricing_str = re.search(r'dataLayer = ([\S\s]+?);\n', page_source).groups()[0] pricing_data = json.loads(pricing_str)[0] name = pricing_data['product_name'][0:254] sku = pricing_data['sku_config'] reference_code = pricing_data['ean_code'].strip() ean = None if check_ean13(reference_code): ean = reference_code else: name = '{} - {}'.format(name, reference_code) name = name[0:256] normal_price = Decimal(pricing_data['special_price']) pricing_container = soup.find('div', 'product-price-lg') if not soup.find('span', 'sprite-cmr'): offer_price = normal_price else: offer_price_container = pricing_container.find( 'span', 'price-promotional') if offer_price_container: offer_price = Decimal(remove_words(offer_price_container.text)) if offer_price > normal_price: offer_price = normal_price else: offer_price = normal_price soup = BeautifulSoup(page_source, 'html.parser') condition_dict = { 'Nuevo': 'https://schema.org/NewCondition', 'Reacondicionado': 'https://schema.org/RefurbishedCondition', } condition_label = soup.find('span', 'badge-condition-type') if condition_label: condition = condition_dict[condition_label.text.strip()] else: condition = 'https://schema.org/NewCondition' description = html_to_markdown( str(soup.find('div', 'feature-information'))) description += '\n\n' + html_to_markdown( str(soup.find('div', 'features-box-section'))) picture_urls = [ 'https:' + tag.find('img')['data-lazy'] for tag in soup.findAll('div', {'id': 'image-product'}) ] availability_container = soup.find('link', {'itemprop': 'availability'}) if not availability_container: stock = 0 elif soup.find('div', 'feature-information').find( 'span', 'badge-pill-international-shipping'): stock = 0 description = 'ST-INTERNATIONAL-SHIPPING {}'.format(description) elif availability_container['href'] == 'http://schema.org/InStock': stock = -1 else: stock = 0 seller_container = soup.find('div', 'seller-name-rating-section') if seller_container: seller = seller_container.text.strip() else: seller = None p = Product(name, cls.__name__, category, url, url, key, stock, normal_price, offer_price, cls.currency, sku=sku, ean=ean, description=description, picture_urls=picture_urls, condition=condition, seller=seller) return [p]
def products_for_url(cls, url, category=None, extra_args=None): print(url) driver = cls._session_driver(extra_args) driver.get(url) time.sleep(5) first_url_of_last_page = None products = [] while True: slept = False containers = driver.find_elements_by_class_name('single-result') for idx, container in enumerate(containers): product_url = container.find_element_by_class_name( 'ellipsis-multiline').get_attribute('href') if idx == 0 and product_url == first_url_of_last_page: time.sleep(5) slept = True break if idx == 0: first_url_of_last_page = product_url if container.text.strip(): pricing_spans = container.find_element_by_class_name( 'prod-number-container').find_elements_by_tag_name( 'span') part_number = pricing_spans[0].text if len(pricing_spans) == 3: ean = pricing_spans[1].text if len(ean) == 12: ean = '0' + ean if not check_ean13(ean): ean = None sku = pricing_spans[2].text elif len(pricing_spans) == 2: ean = None sku = pricing_spans[1].text else: raise Exception('Invalid container') name = container.find_element_by_class_name( 'ellipsis-multiline').text price = container.find_elements_by_class_name( 'resprice')[1].text.split('$') if len(price) > 1: price = Decimal(price[1].replace('.', '').replace(',', '.')) stock_tag = container.find_element_by_class_name( 'in-stock') stock = int( stock_tag.get_attribute('data-stock-qty-' + sku)) else: price = Decimal(0) stock = 0 if 'BAD BOX' in name: condition = 'https://schema.org/DamagedCondition' else: condition = 'https://schema.org/NewCondition' product = Product(name, cls.__name__, category, product_url, url, sku, stock, price, price, 'USD', sku=sku, ean=ean, condition=condition, part_number=part_number) products.append(product) if slept: continue next_button = driver.find_elements_by_id('next') if next_button: next_button[0].click() else: break driver.close() return products
def products_for_url(cls, url, category=None, extra_args=None): session = session_with_proxy(extra_args) soup = BeautifulSoup(session.get(url).text, 'html.parser') name = soup.find('div', 'product-name').text.strip() ean = soup.find('span', {'itemprop': 'sku'}).text.strip() sku = ean if len(ean) == 12: ean = '0' + ean if not check_ean13(ean): ean = None description = '' panels = [ soup.find('div', 'short-description std'), soup.find('table', {'id': 'product-attribute-specs-table'}) ] for panel in panels: description += html_to_markdown(str(panel)) + '\n\n' if soup.find('p', 'in-stock'): stock = -1 else: stock = 0 picture_urls = [] for picture_tag in soup.find('li', 'image-extra').findAll('img'): picture_urls.append(picture_tag['src']) product_box = soup.find('div', 'product-shop') price_container = product_box.find('span', {'itemprop': 'price'}) if price_container: normal_price = Decimal(price_container['content']) else: price_container = product_box.findAll('span', 'price')[1] normal_price = Decimal(remove_words(price_container.string)) offer_price = normal_price p = Product(name, cls.__name__, category, url, url, sku, stock, normal_price, offer_price, 'COP', sku=sku, ean=ean, description=description, picture_urls=picture_urls) return [p]
def products_for_url(cls, url, category=None, extra_args=None): session = session_with_proxy(extra_args) page_source = session.get(url).text pricing_data = re.search(r'vtex.events.addData\(([\S\s]+?)\);', page_source).groups()[0] pricing_data = json.loads(pricing_data) skus_data = re.search(r'var skuJson_0 = ([\S\s]+?);', page_source).groups()[0] skus_data = json.loads(skus_data) name = '{} {}'.format(pricing_data['productBrandName'], pricing_data['productName']) price = Decimal(pricing_data['productPriceTo']) soup = BeautifulSoup(page_source, 'html.parser') description = '' panel_classes = [ 'produto-contents--sinope', 'produto-contents--caracteristicas' ] for panel_class in panel_classes: panel = soup.find('li', panel_class) description += html_to_markdown(str(panel)) + '\n\n' products = [] if 'productEans' in pricing_data: ean = pricing_data['productEans'][0] if len(ean) == 12: ean = '0' + ean if not check_ean13(ean): ean = None else: ean = None for sku_data in skus_data['skus']: sku = str(sku_data['sku']) stock = pricing_data['skuStocks'][sku] picture_urls = [ sku_data['image'].split('?')[0].replace('-300-300', '') ] p = Product(name, cls.__name__, category, url, url, sku, stock, price, price, 'BRL', sku=sku, ean=ean, description=description, picture_urls=picture_urls) products.append(p) return products