def products_for_url(cls, url, category=None, extra_args=None): products = [] if url == cls.prepago_url: # Plan Prepago p = Product( 'GTD Prepago', cls.__name__, category, url, url, 'Claro Prepago', -1, Decimal(0), Decimal(0), 'CLP', ) products.append(p) elif url == cls.equipos_url: session = session_with_proxy(extra_args) body = session.get(url).text json_body = re.search(r'var catalog = (.+)', body).groups()[0][:-1] json_body = json.loads(json_body) for json_product in json_body['products']: if not json_product['published']: continue name = json_product['name'] sku = json_product['id'] price = Decimal(remove_words(json_product['leasing_price'])) description = html_to_markdown(json_product['description']) picture_urls = [ 'https://nuevo.gtdmanquehue.com' + im['options']['url'] for im in json_product['images'] ] product = Product(name, cls.__name__, 'Cell', url, url, sku, -1, price, price, 'CLP', sku=sku, cell_plan_name='GTD Prepago', description=description, picture_urls=picture_urls) products.append(product) else: raise Exception('Invalid URL: ' + url) return products
def products_for_url(cls, url, category=None, extra_args=None): print(url) products = [] if url == cls.prepago_url: # Plan Prepago products.append( Product( 'Entel Prepago', cls.__name__, category, url, url, 'Entel Prepago', -1, Decimal(0), Decimal(0), 'CLP', )) elif 'entel.cl/planes/' in url: # Plan Postpago products.extend(cls._plans(url, extra_args)) elif 'miportal.entel.cl' in url: # Equipo postpago products.extend(cls._celular_postpago(url, extra_args)) else: raise Exception('Invalid URL: ' + url) return products
def products_for_url(cls, url, category=None, extra_args=None): session = session_with_proxy(extra_args) session.headers['Authorization'] = 'Basic OGRiZDViZGY4M2Y5NzA3MTlkY' \ 'jE2NmRiODdhZDZhNWQ=' data = json.loads(session.get(url).text) products = [] for product_entry in data['product_listing']['variants']: if product_entry['available']: stock = -1 else: stock = 0 name = product_entry['title'].strip() sku = str(product_entry['id']) price = Decimal(product_entry['price']) product_url = cls.url p = Product( name, cls.__name__, category, product_url, url, sku, stock, price, price, 'CLP', sku=sku, ) products.append(p) return products
def products_for_url(cls, url, category=None, extra_args=None): print(url) # &_from=0&_to=49 session = session_with_proxy(extra_args) page = 0 page_size = 50 products = [] while True: target_url = '{}&_from={}&_to={}'.format( url, page*page_size, (page + 1) * page_size - 1 ) data = session.get(target_url) json_data = json.loads(data.text) if not json_data: if page == 0: raise Exception('Empty category: ' + target_url) break for product in json_data: name = product['productName'] sku = product['productReference'] product_url = product['link'] stock = product['items'][0]['sellers'][0][ 'commertialOffer']['AvailableQuantity'] price = Decimal(product['items'][0]['sellers'][0] ['commertialOffer']['Price']) pictures = product['items'][0]['images'] picture_urls = [] for picture in pictures: picture_urls.append(picture['imageUrl']) description = html_to_markdown(product['description']) p = Product( name, cls.__name__, category, product_url, url, sku, stock, price, price, 'CLP', sku=sku, part_number=product['productReference'], description=description, picture_urls=picture_urls ) products.append(p) page += 1 return products
def products_for_url(cls, url, category=None, extra_args=None): products = [] if url == cls.prepago_url: # Plan Prepago p = Product( 'WOM Prepago', cls.__name__, category, url, url, 'WOM Prepago', -1, Decimal(0), Decimal(0), 'CLP', ) products.append(p) elif url == cls.planes_url: # Plan Postpago products.extend(cls._plans(url, extra_args)) elif '/equipos/' in url: # Equipo postpago products.extend(cls._celular_postpago(url, extra_args)) else: raise Exception('Invalid URL: ' + url) return products
def products_for_url(cls, url, category=None, extra_args=None): session = session_with_proxy(extra_args) soup = BeautifulSoup(session.get(url).text, 'html.parser') name = soup.find('div', 'product-name').text.strip() sku = soup.find('input', {'name': 'product'})['value'].strip() price_string = soup.find('span', 'price').text price = Decimal(price_string.replace( '.', '').replace('$', '').replace(',', '.')) description = html_to_markdown( str(soup.find('div', 'product-collateral'))) picture_urls = [tag['src'] for tag in soup.findAll('img', {'id': 'image'})] p = Product( name, cls.__name__, category, url, url, sku, -1, price, price, 'ARS', sku=sku, description=description, picture_urls=picture_urls ) return [p]
def products_for_url(cls, url, category=None, extra_args=None): session = session_with_proxy(extra_args) session.headers['user-agent'] = 'python-requests/2.21.0' response = session.get(url) if response.status_code == 404: return [] page_source = response.text soup = BeautifulSoup(page_source, 'html.parser') if not soup.find('body') or \ not soup.find('h1', {'id': 'nombre-producto'}): return [] name = soup.find('h1', {'id': 'nombre-producto'}).text.strip() sku = soup.find('div', {'itemprop': 'sku'}).text.strip() ajax_session = session_with_proxy(extra_args) ajax_session.headers['user-agent'] = 'python-requests/2.21.0' ajax_session.headers['x-requested-with'] = 'XMLHttpRequest' ajax_session.headers['content-type'] = \ 'application/x-www-form-urlencoded' stock_data = json.loads( ajax_session.post( 'https://catalogo.movistar.cl/fullprice/stockproducto/validar/', 'sku=' + sku).text) stock = stock_data['respuesta']['cantidad'] price_container = soup.find('span', 'special-price').find('p') price = Decimal(remove_words(price_container.text)) description = html_to_markdown( str(soup.find('div', 'detailed-desktop'))) if 'seminuevo' in description: condition = 'https://schema.org/RefurbishedCondition' else: condition = 'https://schema.org/NewCondition' picture_urls = [soup.find('meta', {'property': 'og:image'})['content']] return [ Product(name, cls.__name__, category, url, url, sku, stock, price, price, 'CLP', condition=condition, sku=sku, description=description, picture_urls=picture_urls) ]
def products_for_url(cls, url, category=None, extra_args=None): print(url) session = session_with_proxy(extra_args) response = session.get(url) soup = BeautifulSoup(response.text, 'html.parser') data = re.search(r'value_product = ([\s\S]+?)\];', response.text).groups()[0] + ']' data = json.loads(data)[0] name = data['descripcion'].strip() sku = data['idproducto'].strip() stock = round(float(data['stock'])) offer_price = Decimal(data['precioweb1']) normal_price = Decimal(data['precioweb2']) description = None if data['long_descrip']: description = html_to_markdown(data['long_descrip']) picture_urls = [x['href'] for x in soup.findAll('a', 'fancybox')] p = Product(name, cls.__name__, category, url, url, sku, stock, normal_price, offer_price, 'CLP', sku=sku, description=description, picture_urls=picture_urls) return [p]
def _get_product(cls, container, category): product_url = container.find('a')['href'].split('?')[0] if 'https' not in product_url: product_url = 'https://www.paris.cl' + product_url data = json.loads( container.find('div', 'product-tile')['data-product']) name = data['name'] sku = data['variant'] normal_price = Decimal(data['price']) if data['dimension20']: offer_price = Decimal(data['dimension20']) else: offer_price = normal_price stock = -1 p = Product( name, cls.__name__, category, product_url, product_url, sku, stock, normal_price, offer_price, 'CLP', sku=sku, ) return p
def products_for_url(cls, url, category=None, extra_args=None): print(url) session = session_with_proxy(extra_args) soup = BeautifulSoup(session.get(url).text, 'html.parser') name = soup.find('h1', 'product_title').text.strip() sku = soup.find('span', 'sku').text.strip() stock = -1 if 'LG' not in name.upper().split(' '): stock = 0 price = Decimal( soup.find('p', 'price').find('span', 'amount').text.replace( '₲.', '').replace('.', '')) picture_urls = [soup.find('meta', {'name': 'og:image'})['content']] return [ Product(name, cls.__name__, category, url, url, sku, stock, price, price, 'PYG', sku=sku, picture_urls=picture_urls) ]
def products_for_url(cls, url, category=None, extra_args=None): print(url) session = session_with_proxy(extra_args) page_source = session.get(url).text soup = BeautifulSoup(page_source, 'html.parser') name = soup.find('h1', 'product_title').text sku = soup.find('div', 'wd-wishlist-btn').find('a')['data-product-id'] stock_container = soup.find('p', 'stock') if stock_container: stock_text = stock_container.text.split(' ')[0] if stock_text == 'Agotado': stock = 0 else: stock = int(stock_text) else: stock = -1 part_number_container = soup.find('span', 'sku') if part_number_container: part_number = part_number_container.text.strip() else: part_number = None price_container = soup.find('p', 'price') if price_container.find('ins'): price = Decimal( price_container.find('ins').text.replace('$', '').replace('.', '')) else: price = Decimal( price_container.text.replace('$', '').replace('.', '')) picture_containers = soup.findAll('div', 'product-image-wrap') picture_urls = [p.find('a')['href'] for p in picture_containers] description = html_to_markdown( str(soup.find('div', {'id': 'tab-description'}))) p = Product(name, cls.__name__, category, url, url, sku, stock, price, price, 'CLP', sku=sku, picture_urls=picture_urls, description=description, part_number=part_number) return [p]
def products_for_url(cls, url, category=None, extra_args=None): session = session_with_proxy(extra_args) soup = BeautifulSoup(session.get(url).text, 'html.parser') name = soup.find('meta', {'name': 'Title'})['content'].strip() sku = soup.find('input', {'name': 'id'})['value'].strip() price_string = soup.find('input', {'id': 'product_price'})['value'] price = Decimal(price_string) description = html_to_markdown( str(soup.find('div', {'id': 'especificaciones-container'}))) picture_urls = [tag['data-zoom-image'] for tag in soup.find('div', 'owl-carousel').findAll('img')] p = Product( name, cls.__name__, category, url, url, sku, -1, price, price, 'ARS', sku=sku, description=description, picture_urls=picture_urls ) return [p]
def products_for_url(cls, url, category=None, extra_args=None): print(url) session = session_with_proxy(extra_args) page_source = session.get(url).text soup = BeautifulSoup(page_source, 'html.parser') name = soup.find('h1', {'itemprop': 'name'}) if not name: name = soup.find('p', {'itemprop': 'name'}) name = name.text sku = soup.find('span', 'variant-sku').text potential_stock_containers = soup.findAll('h6') stock_container = None for container in potential_stock_containers: if 'EXISTENCIA' in container.text: stock_container = container.parent.find('div').contents if not stock_container: stock = 0 else: for item in stock_container: if 'pzas.' in item: stock = int(item.replace('pzas.', '')) break price = soup.find('span', 'gf_product-price money').text price = Decimal(price.replace('$', '').replace(',', '')) images = soup.findAll('meta', {'property': 'og:image:secure_url'}) picture_urls = [i["content"] for i in images] description = html_to_markdown( str(soup.find('div', 'product-description'))) if 'reacondicionado' in name.lower(): condition = 'https://schema.org/RefurbishedCondition' else: condition = 'https://schema.org/NewCondition' p = Product(name, cls.__name__, category, url, url, sku, stock, price, price, 'MXN', sku=sku, picture_urls=picture_urls, description=description, condition=condition) return [p]
def products_for_url(cls, url, category=None, extra_args=None): session = session_with_proxy(extra_args) soup = BeautifulSoup(session.get(url).text, 'html.parser') name = soup.find('h1', 'entry-title').text.strip() sku = soup.find('input', {'name': 'product_id'})['value'].strip() description = html_to_markdown( str(soup.find('div', 'product_description'))) picture_urls = [tag['href'] for tag in soup.findAll('a', 'thickbox')] price = Decimal(remove_words(soup.find('span', 'currentprice').text)) price *= Decimal('1.19') price = price.quantize(0) p = Product(name, cls.__name__, category, url, url, sku, -1, price, price, 'CLP', sku=sku, description=description, picture_urls=picture_urls) return [p]
def products_for_url(cls, url, category=None, extra_args=None): print(url) session = session_with_proxy(extra_args) page_source = session.get(url).text pricing_data = re.search(r'vtex.events.addData\(([\S\s]+?)\);', page_source).groups()[0] pricing_data = json.loads(pricing_data) skus_data = re.search(r'var skuJson_0 = ([\S\s]+?);CATALOG', page_source).groups()[0] skus_data = json.loads(skus_data) name = '{} {}'.format(pricing_data['productBrandName'], pricing_data['productName']) price = Decimal(pricing_data['productPriceTo']) soup = BeautifulSoup(page_source, 'html.parser') picture_urls = [ tag['rel'][0] for tag in soup.findAll('a', {'id': 'botaoZoom'}) ] description = html_to_markdown( str(soup.find('section', 'product-specs'))) products = [] if 'productEans' in pricing_data: ean = pricing_data['productEans'][0] if len(ean) == 12: ean = '0' + ean if not check_ean13(ean): ean = None else: ean = None for sku_data in skus_data['skus']: sku = str(sku_data['sku']) stock = pricing_data['skuStocks'][sku] if sku_data['sellerId'] == 'lojamultilaser': price = (price * Decimal('0.95')).quantize(Decimal('0.01')) p = Product(name, cls.__name__, category, url, url, sku, stock, price, price, 'COP', sku=sku, ean=ean, description=description, picture_urls=picture_urls) products.append(p) return products
def products_for_url(cls, url, category=None, extra_args=None): print(url) session = session_with_proxy(extra_args) response = session.get(url) if response.status_code in [404]: return [] page_source = response.text soup = BeautifulSoup(page_source, 'html5lib') name = soup.find('h1', 'name').text sku = soup.find('div', {'itemprop': 'sku'}).text availability = soup.find('div', 'availability') if availability: stock = int(soup.find('div', 'availability').find('strong').text) else: stock = 0 price = Decimal( soup.find('span', 'price').text.replace('$', '').replace(',', '')) if soup.find('div', {'id': 'owl-carousel-gallery'}): picture_urls = [ i['src'] for i in soup.find('div', { 'id': 'owl-carousel-gallery' }).findAll('img', 'img-fluid') ] else: picture_urls = [soup.find('img', 'img-fluid')['src']] description = html_to_markdown(str(soup.find('div', 'description'))) ths = soup.findAll('th') part_number = None for th in ths: if th.text == "mpn": part_number = th.parent.find('td').text p = Product(name, cls.__name__, category, url, url, sku, stock, price, price, 'MXN', sku=sku, picture_urls=picture_urls, description=description, part_number=part_number) return [p]
def products_for_url(cls, url, category=None, extra_args=None): session = session_with_proxy(extra_args) page_source = session.get(url).text soup = BeautifulSoup(page_source, 'html.parser') picture_urls = [] for tag in soup.findAll('li', 'owl-item'): picture_path = tag.find('a')['data-zoom'].replace(' ', '%20').strip() if not picture_path: picture_path = tag.find('a')['data-normal'].replace( ' ', '%20').strip() if not picture_path: continue picture_url = 'https:' + picture_path picture_urls.append(picture_url) if not picture_urls: picture_urls = None pricing_data = demjson.decode( re.search(r'dataLayer = ([\S\s]+?);dataLayer', page_source).groups()[0])[0] products = [] for product_entry in pricing_data['product']: name = product_entry['productName'] sku = str(product_entry['productSku']) price = Decimal(product_entry['productDiscount']) if product_entry['productAvailable']: stock = -1 else: stock = 0 description = html_to_markdown( html.unescape(product_entry['productDescription'])) p = Product(name, cls.__name__, category, url, url, sku, stock, price, price, 'BRL', sku=sku, description=description, picture_urls=picture_urls) products.append(p) return products
def products_for_url(cls, url, category=None, extra_args=None): session = session_with_proxy(extra_args) soup = BeautifulSoup(session.get(url).text, 'html.parser') name = soup.find('h1').text.strip() if soup.find('link', {'itemprop': 'availability'})['href'] == \ 'http://schema.org/InStock': stock = -1 else: stock = 0 sku = soup.find('div', 'product-name').find('span').text.strip() panels = [ soup.find('div', {'id': 'description'}), soup.find('div', {'id': 'additional'}) ] description = '\n\n'.join([html_to_markdown(str(panel)) for panel in panels]) normal_price = soup.find('p', {'itemprop': 'price'}).text normal_price = Decimal(normal_price.replace('R$', '').replace( '.', '').replace(',', '.')) if stock == 0: offer_price = normal_price else: offer_price = soup.find('span', 't_boleto_price').text offer_price = Decimal(offer_price.split('$')[1].replace( '.', '').replace(',', '.')) pictures_container = soup.find('ul', 'bxslider') if pictures_container: picture_urls = [link['href'] for link in pictures_container.findAll('a')] else: picture_urls = [soup.find('a', 'cloud-zoom-gallery')['href']] p = Product( name, cls.__name__, category, url, url, sku, stock, normal_price, offer_price, 'BRL', sku=sku, description=description, picture_urls=picture_urls ) return [p]
def products_for_url(cls, url, category=None, extra_args=None): print(url) session = session_with_proxy(extra_args) response_text = session.get(url).text variants_raw_data = re.search(r'var meta = ([\S\s]+?);\n', response_text).groups()[0] variants_data = json.loads(variants_raw_data)['product']['variants'] products = [] for variant in variants_data: variant_id = variant['id'] sku = variant['sku'] color = variant['public_title'] variant_url = '{}?variant={}'.format(url, variant_id) variant_url_source = session.get(variant_url).text soup = BeautifulSoup(variant_url_source, 'html.parser') name = soup.find('h1', 'product_name').text + " ({})".format(color) stock = 0 if soup.find('link', {'itemprop': 'availability'})['href'] == \ 'http://schema.org/InStock': stock = -1 price_text = soup.find('span', 'current_price').text.strip()\ .replace('$', '').replace('.', '') if price_text == '-': continue price = Decimal(price_text) image_containers = soup.findAll('div', 'image__container') picture_urls = [ 'http:' + i.find('img')['data-src'] for i in image_containers ] description = html_to_markdown( str(soup.find('div', {'data-et-handle': 'tabs-descripcion'}))) p = Product(name, cls.__name__, category, variant_url, url, sku, stock, price, price, 'CLP', sku=sku, picture_urls=picture_urls, description=description) products.append(p) return products
def products_for_url(cls, url, category=None, extra_args=None): print(url) session = session_with_proxy(extra_args) response = session.get(url) if response.status_code == 404: return [] soup = BeautifulSoup(response.text, 'html.parser') name = soup.find('span', {'itemprop': 'name'}).text sku = soup.find('div', {'itemprop': 'sku'}).text stock = 0 stock_container = soup.find('div', 'product-stock') if stock_container: stock = int(stock_container.text.strip().split(' ')[1]) offer_price = Decimal( soup.find('span', 'efectivo').find('span', 'price').text.replace( '$', '').replace('.', '')) normal_price = offer_price * Decimal(1.034) image_scripts = soup.findAll('script', {'type': 'text/x-magento-init'}) picture_urls = [] for script in image_scripts: if 'mage/gallery/gallery' in script.text: image_data = json.loads( script.text)['[data-gallery-role=gallery-placeholder]'][ 'mage/gallery/gallery']['data'] for data in image_data: picture_urls.append(data['img']) description = html_to_markdown(str(soup.find('div', 'description'))) if len(sku) > 50: sku = sku[0:50] p = Product( name, cls.__name__, category, url, url, sku, stock, normal_price, offer_price, 'CLP', sku=sku, part_number=sku, picture_urls=picture_urls, description=description, ) return [p]
def products_for_url(cls, url, category=None, extra_args=None): print(url) session = session_with_proxy(extra_args) response = session.get(url) if response.url != url: return [] page_source = response.text soup = BeautifulSoup(page_source, 'html.parser') name = soup.find('h1', 'product_title').text sku_container = soup.find('span', 'sku') if not sku_container: return [] sku = sku_container.text if soup.find('p', 'out-of-stock'): stock = 0 else: stock = -1 price_container = soup.find('p', 'price').find('ins') if price_container: price = price_container.find('span', 'amount').text else: price = soup.find('p', 'price').find('span', 'amount').text price = Decimal(price.replace('$', '').replace(',', '')) images = soup.find( 'figure', 'woocommerce-product-gallery__wrapper').findAll('img') picture_urls = [i['src'] for i in images] description = html_to_markdown( str(soup.find('div', {'id': 'tab-description'}))) p = Product( name, cls.__name__, category, url, url, sku, stock, price, price, 'MXN', sku=sku, picture_urls=picture_urls, description=description, ) return [p]
def products_for_url(cls, url, category=None, extra_args=None): print(url) session = session_with_proxy(extra_args) soup = BeautifulSoup(session.get(url).text, 'html.parser') name = soup.find('h1', 'product_title').text.strip() sku = soup.find('meta', {'property': 'product:retailer_item_id'})['content'] if not soup.find('input', {'id': 'the-cantidad-selector'}): return [] stock = soup.find('input', {'id': 'the-cantidad-selector'})['max'] if stock: stock = int(stock) else: stock = -1 if 'LG' not in name.upper().split(' '): stock = 0 normal_price = Decimal( soup.find('p', 'price').find('span', 'amount').text.replace( 'Gs.', '').replace('.', '').strip()) offer_price = Decimal( soup.find('p', 'price').find('span', { 'id': 'elpreciocentralPorta' }).text.split('Gs.')[-1].replace('.', '').replace('!', '').strip()) if normal_price < offer_price: offer_price = normal_price description = html_to_markdown( str(soup.find('div', {'itemprop': 'description'}))) pictures = soup.findAll('div', 'thumbnails-single owl-carousel') picture_urls = [] for picture in pictures: picture_url = picture.find('a')['href'] picture_urls.append(picture_url) return [ Product(name, cls.__name__, category, url, url, sku, stock, normal_price, offer_price, 'PYG', sku=sku, description=description, picture_urls=picture_urls) ]
def products_for_url(cls, url, category=None, extra_args=None): session = session_with_proxy(extra_args) session.headers['User-Agent'] = \ 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ' \ '(KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36' soup = BeautifulSoup(session.get(url, timeout=30).text, 'html.parser') containers = soup.findAll('div', 'textOtrosPrecios') normal_price = Decimal(remove_words(containers[0].text)) stock_image = containers[1].find('img')['src'] if stock_image in [ 'images/imagenes/ico_normal.jpg', 'images/imagenes/ico_bajo.jpg' ]: stock = -1 else: stock = 0 sku = containers[2].text.strip() name = soup.find('div', 'textTituloProducto').text.strip() offer_price = Decimal( remove_words(soup.find('div', 'textPrecioContado').text)) description = html_to_markdown(str(soup.find('div', 'p7TPcontent'))) main_picture = soup.findAll( 'table', {'id': 'table20'})[1].findAll('img')[2]['src'] picture_paths = [main_picture] picture_paths.extend( [tag['src'] for tag in soup.findAll('img', 'Imagen')]) picture_urls = [] for path in picture_paths: picture_id = path.split('=')[-1] picture_url = 'http://www.ttchile.cl/images/imgproductos/' \ 'imgImagenMarco.php?imagen=' + picture_id picture_urls.append(picture_url.replace(' ', '%20')) p = Product(name, cls.__name__, category, url, url, sku, stock, normal_price, offer_price, 'CLP', sku=sku, description=description, picture_urls=picture_urls) return [p]
def products_for_url(cls, url, category=None, extra_args=None): print(url) session = session_with_proxy(extra_args) response = session.get(url) if response.status_code == 404: return [] page_source = session.get(url).text soup = BeautifulSoup(page_source, 'html.parser') name = soup.find('h1', 'detailsInfo_right_title').text sku = soup.find('div', 'detailsInfo_right_artnum')\ .text.replace('SKU:', '').strip() if not soup.find('span', 'stockFlag'): stock = 0 else: stock = int(soup.find('span', 'stockFlag').find('span').text) if not soup.find('span', 'priceText'): return [] price = Decimal( soup.find('span', 'priceText').text.replace('$', '').replace(',', '')) if soup.find('div', 'detailsInfo_left_picture_morepictures')\ .find('div', 'emslider2_items'): picture_urls = [] images = soup.find('div', 'detailsInfo_left_picture_morepictures')\ .find('div', 'emslider2_items').findAll('li') for image in images: picture_urls.append(image.find('a')['data-src']) else: picture_urls = None description = html_to_markdown( str(soup.find('div', 'cpattributes-box'))) p = Product(name, cls.__name__, category, url, url, sku, stock, price, price, 'MXN', sku=sku, picture_urls=picture_urls, description=description, part_number=sku) return [p]
def products_for_url(cls, url, category=None, extra_args=None): print(url) session = session_with_proxy(extra_args) response = session.get(url) if response.status_code == 404: return [] soup = BeautifulSoup(response.text, 'html5lib') name = soup.find('h1', 'name').text.strip() info_table = soup.find('div', 'listing') rows = info_table.findAll('tr') sku = rows[0].find('td', 'td_right').text.strip() stock = 0 for i in range(0, len(rows) - 1): left_text = rows[i].find('td', 'td_left').text if 'Cantidad' not in left_text: continue right_text = rows[i].find('td', 'td_right').text if '+' in right_text: stock = -1 break if 'Agotado' not in right_text: stock += int(right_text) price = Decimal(rows[-1].find('td', 'td_right').text.split('$')[-1].replace( ',', '')) description = html_to_markdown(str(soup.find('div', 'description'))) image_containers = soup.findAll('li', 'wrapper_pic_div') picture_urls = [] for image in image_containers: picture_url = image.find('a')['href'].replace(' ', '%20') picture_urls.append(picture_url) p = Product(name, cls.__name__, category, url, url, sku, stock, price, price, 'USD', sku=sku, description=description, picture_urls=picture_urls) return [p]
def products_for_url(cls, url, category=None, extra_args=None): print(url) session = session_with_proxy(extra_args) soup = BeautifulSoup(session.get(url).text, 'html.parser') name = soup.find('div', 'product-info__description').text.strip() sku = soup.find( 'div', 'product-info__title').find('span').text.split(':')[1].strip() stock_container = soup.find('input', {'id': 'producto_cantidad'}) if stock_container: stock = int(stock_container['max']) else: stock = 0 price_container = soup.find('span', 'price-box__new') old_price_container = price_container.find('s') if old_price_container: old_price = Decimal(remove_words(old_price_container.text)) price = (old_price * Decimal('0.9')).quantize(0) else: price = Decimal(remove_words(price_container.text)) description = html_to_markdown(str(soup.find('div', 'tab-content')), 'http://www.eglo.cl') picture_containers = soup.findAll('a', 'swiper-slide') if picture_containers: picture_urls = [] for container in picture_containers: picture_url = container.find('img')['src'] picture_urls.append(picture_url) else: picture_urls = [ soup.find('div', 'product-main-' 'image__item').img['src'] ] p = Product(name, cls.__name__, category, url, url, sku, stock, price, price, 'CLP', sku=sku, description=description, picture_urls=picture_urls) return [p]
def products_for_url(cls, url, category=None, extra_args=None): session = session_with_proxy(extra_args) response = session.get(url) if response.url != url: return [] page_source = response.text pricing_data = re.search(r'digitalData = ([\S\s]+?); </script', page_source).groups()[0] for kw in ['domain', 'fullName', 'protocol', 'pathname', 'referrer']: for_replace = "'{}': .+".format(kw) pricing_data = re.sub(for_replace, '', pricing_data) pricing_data = demjson.decode(pricing_data)['page']['product'] name = pricing_data['title'] sku = pricing_data['idSku'] if pricing_data['stockAvailability']: stock = -1 else: stock = 0 if 'cashPrice' in pricing_data: normal_price = Decimal(pricing_data['salePrice']) offer_price = Decimal(pricing_data['cashPrice']) else: normal_price = Decimal(0) offer_price = Decimal(0) soup = BeautifulSoup(page_source, 'html.parser') description = html_to_markdown(str(soup.find('div', 'description'))) picture_urls = [tag['data-src'] for tag in soup.findAll('img', 'carousel-product__item-img')] p = Product( name, cls.__name__, category, url, url, sku, stock, normal_price, offer_price, 'BRL', sku=sku, description=description, picture_urls=picture_urls ) return [p]
def products_for_url(cls, url, category=None, extra_args=None): category_id = urllib.parse.parse_qs( urllib.parse.urlparse(url).fragment)['_id'][0] session = session_with_proxy(extra_args) params = { 'IdMenu': category_id, 'textoBusqueda': "", 'producto': "", 'marca': "", 'pager': "", 'ordenamiento': 0, 'precioDesde': "", 'precioHasta': "" } session.get('https://www.jumbo.com.ar/') session.headers.update( {'Content-Type': 'application/json; charset=UTF-8'}) response = session.post( 'https://www.jumbo.com.ar/Comprar/HomeService.aspx/' 'ObtenerArticulosPorDescripcionMarcaFamiliaLevex', json.dumps(params)) containers = json.loads(json.loads( response.text)['d'])['ResultadosBusquedaLevex'] products = [] for container in containers: name = container['DescripcionArticulo'].strip() price = Decimal(container['Precio']) sku = container['IdArticulo'] stock = int(container['Stock']) picture_urls = [ 'https://images.jumbo.com.ar/JumboComprasArchivos/' 'Archivos/' + container['IdArchivoBig'] ] p = Product(name, cls.__name__, category, url, url, sku, stock, price, price, 'ARS', sku=sku, picture_urls=picture_urls) products.append(p) return products
def products_for_url(cls, url, category=None, extra_args=None): print(url) session = session_with_proxy(extra_args) session.headers['User-Agent'] = \ 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ' \ '(KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36' soup = BeautifulSoup(session.get(url).text, 'html.parser') name = soup.find('h2', 'product_title').text.strip() sku = soup.find('span', 'sku').text.strip() stock_text = soup.find('span', 'stock').text.strip() stock = 0 if stock_text != 'Agotado': stock = int(stock_text.split(' ')[0]) price_container = soup.find('p', 'price') if not price_container.text.strip(): return [] offer_price = Decimal( remove_words(price_container.find('ins').find('span').text)) normal_price = Decimal( remove_words(price_container.find('del').find('span').text)) picture_containers = soup.findAll('div', 'img-thumbnail') picture_urls = [] for picture in picture_containers: try: picture_url = picture.find('img')['content'] picture_urls.append(picture_url) except KeyError: continue description = html_to_markdown( str(soup.find('div', {'id': 'tab-description'}))) p = Product( name, cls.__name__, category, url, url, sku, stock, normal_price, offer_price, 'CLP', sku=sku, description=description, picture_urls=picture_urls ) return [p]
def products_for_url(cls, url, category=None, extra_args=None): session = session_with_proxy(extra_args) soup = BeautifulSoup(session.get(url).text, 'html.parser') name = soup.findAll('td', 'texto-precio-ahorro')[1].text.strip() if soup.find('img', {'src': 'images/ficha/ico_sin_stock.gif'}): stock = 0 else: stock = int(soup.find('td', 'stock-product').text.split()[0]) sku = soup.find('td', 'sku').text.split()[-1] part_number = soup.findAll('td', 'texto-precio-ahorro')[2]\ .find('td').text.split(':')[1].strip() container = soup.find('td', 'lowPrice') offer_price = container.contents[0].split('$')[1] offer_price = offer_price.split('IVA')[0] offer_price = Decimal(remove_words(offer_price)) normal_price = container.parent.parent.find( 'td', 'price-normal').contents[0].split('$')[1].split('IVA')[0] normal_price = Decimal(remove_words(normal_price)) picture_links = soup.findAll('a', {'rel': 'lightbox[roadtrip]'}) picture_urls = [] for tag in picture_links: if not tag.find('img'): continue picture_url = tag.find('img')['src'].replace(' ', '%20') if picture_url == 'http://www.clie.cl/photos/': continue picture_urls.append(picture_url) if not picture_urls: picture_urls = None p = Product( name, cls.__name__, category, url, url, part_number, stock, normal_price, offer_price, 'CLP', sku=sku, part_number=part_number, picture_urls=picture_urls ) return [p]