def products_for_url(cls, url, category=None, extra_args=None): session = session_with_proxy(extra_args) session.headers['User-Agent'] = \ 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ' \ '(KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36' soup = BeautifulSoup(session.get(url, timeout=30).text, 'html.parser') containers = soup.findAll('div', 'textOtrosPrecios') normal_price = Decimal(remove_words(containers[0].text)) stock_image = containers[1].find('img')['src'] if stock_image in [ 'images/imagenes/ico_normal.jpg', 'images/imagenes/ico_bajo.jpg' ]: stock = -1 else: stock = 0 sku = containers[2].text.strip() name = soup.find('div', 'textTituloProducto').text.strip() offer_price = Decimal( remove_words(soup.find('div', 'textPrecioContado').text)) description = html_to_markdown(str(soup.find('div', 'p7TPcontent'))) main_picture = soup.findAll( 'table', {'id': 'table20'})[1].findAll('img')[2]['src'] picture_paths = [main_picture] picture_paths.extend( [tag['src'] for tag in soup.findAll('img', 'Imagen')]) picture_urls = [] for path in picture_paths: picture_id = path.split('=')[-1] picture_url = 'http://www.ttchile.cl/images/imgproductos/' \ 'imgImagenMarco.php?imagen=' + picture_id picture_urls.append(picture_url.replace(' ', '%20')) p = Product(name, cls.__name__, category, url, url, sku, stock, normal_price, offer_price, 'CLP', sku=sku, description=description, picture_urls=picture_urls) return [p]
def products_for_url(cls, url, category=None, extra_args=None): print(url) session = session_with_proxy(extra_args) soup = BeautifulSoup(session.get(url).text, 'html.parser') name = soup.find('div', 'product-info__description').text.strip() sku = soup.find( 'div', 'product-info__title').find('span').text.split(':')[1].strip() stock_container = soup.find('input', {'id': 'producto_cantidad'}) if stock_container: stock = int(stock_container['max']) else: stock = 0 price_container = soup.find('span', 'price-box__new') old_price_container = price_container.find('s') if old_price_container: old_price = Decimal(remove_words(old_price_container.text)) price = (old_price * Decimal('0.9')).quantize(0) else: price = Decimal(remove_words(price_container.text)) description = html_to_markdown(str(soup.find('div', 'tab-content')), 'http://www.eglo.cl') picture_containers = soup.findAll('a', 'swiper-slide') if picture_containers: picture_urls = [] for container in picture_containers: picture_url = container.find('img')['src'] picture_urls.append(picture_url) else: picture_urls = [ soup.find('div', 'product-main-' 'image__item').img['src'] ] p = Product(name, cls.__name__, category, url, url, sku, stock, price, price, 'CLP', sku=sku, description=description, picture_urls=picture_urls) return [p]
def products_for_url(cls, url, category=None, extra_args=None): session = session_with_proxy(extra_args) soup = BeautifulSoup(session.get(url).text, 'html.parser') name = soup.findAll('td', 'texto-precio-ahorro')[1].text.strip() if soup.find('img', {'src': 'images/ficha/ico_sin_stock.gif'}): stock = 0 else: stock = int(soup.find('td', 'stock-product').text.split()[0]) sku = soup.find('td', 'sku').text.split()[-1] part_number = soup.findAll('td', 'texto-precio-ahorro')[2]\ .find('td').text.split(':')[1].strip() container = soup.find('td', 'lowPrice') offer_price = container.contents[0].split('$')[1] offer_price = offer_price.split('IVA')[0] offer_price = Decimal(remove_words(offer_price)) normal_price = container.parent.parent.find( 'td', 'price-normal').contents[0].split('$')[1].split('IVA')[0] normal_price = Decimal(remove_words(normal_price)) picture_links = soup.findAll('a', {'rel': 'lightbox[roadtrip]'}) picture_urls = [] for tag in picture_links: if not tag.find('img'): continue picture_url = tag.find('img')['src'].replace(' ', '%20') if picture_url == 'http://www.clie.cl/photos/': continue picture_urls.append(picture_url) if not picture_urls: picture_urls = None p = Product( name, cls.__name__, category, url, url, part_number, stock, normal_price, offer_price, 'CLP', sku=sku, part_number=part_number, picture_urls=picture_urls ) return [p]
def products_for_url(cls, url, category=None, extra_args=None): print(url) session = session_with_proxy(extra_args) session.headers['User-Agent'] = \ 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ' \ '(KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36' soup = BeautifulSoup(session.get(url).text, 'html.parser') name = soup.find('h2', 'product_title').text.strip() sku = soup.find('span', 'sku').text.strip() stock_text = soup.find('span', 'stock').text.strip() stock = 0 if stock_text != 'Agotado': stock = int(stock_text.split(' ')[0]) price_container = soup.find('p', 'price') if not price_container.text.strip(): return [] offer_price = Decimal( remove_words(price_container.find('ins').find('span').text)) normal_price = Decimal( remove_words(price_container.find('del').find('span').text)) picture_containers = soup.findAll('div', 'img-thumbnail') picture_urls = [] for picture in picture_containers: try: picture_url = picture.find('img')['content'] picture_urls.append(picture_url) except KeyError: continue description = html_to_markdown( str(soup.find('div', {'id': 'tab-description'}))) p = Product( name, cls.__name__, category, url, url, sku, stock, normal_price, offer_price, 'CLP', sku=sku, description=description, picture_urls=picture_urls ) return [p]
def products_for_url(cls, url, category=None, extra_args=None): session = session_with_proxy(extra_args) request_url = '{}?_={}'.format(url, random.randint(1, 1000)) print(request_url) response = session.get(request_url) if response.status_code == 404 or not response.text: return [] page_source = response.text soup = BeautifulSoup(page_source, 'html5lib') name = soup.find('span', {'itemprop': 'name'}).text.strip() part_number = soup.find('div', {'itemprop': 'sku'}).text.strip() sku = soup.find('div', 'price-final_price')['data-product-id'].strip() if soup.find('button', {'id': 'product-addtocart-button'}): stock = -1 else: stock = 0 price_containers = soup.find('div', 'product-info-price').findAll( 'span', 'price') normal_price = Decimal(remove_words(price_containers[0].string)) offer_price = Decimal(remove_words(price_containers[-1].string)) description = '' for panel_id in ['product.info.description', 'additional']: panel = soup.find('div', {'id': panel_id}) if panel: description += html_to_markdown(str(panel)) + '\n\n' picture_urls = [ tag['data-image'] for tag in soup.findAll('a', 'mt-thumb-switcher') if tag.get('data-image') ] p = Product(name, cls.__name__, category, url, url, sku, stock, normal_price, offer_price, 'CLP', sku=sku, part_number=part_number, description=description, picture_urls=picture_urls) return [p]
def products_for_url(cls, url, category=None, extra_args=None): print(url) session = session_with_proxy(extra_args) soup = BeautifulSoup(session.get(url).text, 'html.parser') name = soup.find('h1', 'rm-product-page__title').text sku = soup.find('div', 'rm-product__id').h3.text if not soup.find('p', 'rm-product__mpn'): part_number = None else: part_number = soup.find( 'p', 'rm-product__mpn').text.split(':')[-1].strip() stock_container = soup.find('div', 'rm-producto-stock-message') if not stock_container: stock = 0 elif stock_container.text == 'Sin disponibilidad para venta web': stock = 0 else: stock = int(stock_container.text.split(' ')[0]) offer_price = soup.find('div', 'rm-product__price--cash').h3.text offer_price = Decimal(remove_words(offer_price)) normal_price = soup.find('div', 'rm-product__price--normal').h3.text normal_price = Decimal(remove_words(normal_price)) description = html_to_markdown( str(soup.find('div', {'id': 'tab-description'}))) picture_urls = None thumbnails = soup.find('ul', 'thumbnails') if thumbnails: picture_urls = [thumbnails.a['href']] p = Product(name, cls.__name__, category, url, url, sku, stock, normal_price, offer_price, 'CLP', sku=sku, description=description, picture_urls=picture_urls, part_number=part_number) return [p]
def products_for_url(cls, url, category=None, extra_args=None): session = session_with_proxy(extra_args) soup = BeautifulSoup(session.get(url).text, 'html.parser') name = soup.find('h1').text.strip() sku = soup.find('input', {'name': 'id_product'})['value'].strip() part_number_container = soup.find('meta', {'name': 'description'}) if part_number_container: part_number = part_number_container['content'].strip() if len(part_number) >= 50: part_number = None else: part_number = None availability = soup.find('link', {'itemprop': 'availability'}) if availability and availability['href'] == \ 'http://schema.org/InStock': stock = -1 else: stock = 0 offer_price = soup.find('span', {'id': 'our_price_display'}).string offer_price = Decimal(remove_words(offer_price)) normal_price = soup.find('p', { 'id': 'old_price' }).find('span', 'price').string normal_price = Decimal(remove_words(normal_price)) description = html_to_markdown( str(soup.find('section', 'page-product-box'))) picture_urls = [tag['href'] for tag in soup.findAll('a', 'fancybox')] p = Product(name, cls.__name__, category, url, url, sku, stock, normal_price, offer_price, 'CLP', sku=sku, part_number=part_number, description=description, picture_urls=picture_urls) return [p]
def products_for_url(cls, url, category=None, extra_args=None): session = session_with_proxy(extra_args) session.headers['Host'] = 'www.ledshop.cl' session.headers['User-Agent'] = 'curl/7.52.1' session.headers['Accept'] = '*/*' print(url) soup = BeautifulSoup(session.get(url, timeout=10).text, 'html.parser') name = soup.find('h2').text.strip() sku = soup.find('input', {'name': 'product_id'})['value'].strip() if soup.find('input', 'wpsc_buy_button'): stock = -1 else: stock = 0 try: price = Decimal( remove_words(soup.find('div', 'wpsc_product_price').span.text)) except InvalidOperation: price = Decimal( remove_words( soup.find('div', 'wpsc_product_price').findAll('span')[1].text)) price = price.quantize(0) description = html_to_markdown( str(soup.find('div', 'wpsc_description'))) picture_urls = [ tag['href'].replace(' ', '%20') for tag in soup.findAll('a', 'thickbox') ] p = Product(name, cls.__name__, category, url, url, sku, stock, price, price, 'CLP', sku=sku, description=description, picture_urls=picture_urls) return [p]
def products_for_url(cls, url, category=None, extra_args=None): session = session_with_proxy(extra_args) soup = BeautifulSoup(session.get(url).text, 'html.parser') name = soup.find('h1').text.strip() price = soup.find('span', 'pricing_retail_nodiscount_price') stock = -1 query_string = urllib.parse.urlparse(url).query sku = urllib.parse.parse_qs(query_string)['sku'][0] description = html_to_markdown( str(soup.find('div', {'id': 'cntTabsCnt'}))) picture_urls = [ soup.find('div', {'id': 'maincontentcnt'}).findAll('img')[1]['src'] ] if price: price = Decimal(remove_words(price.string.split('$')[1])) else: configure_link_image = soup.find( 'img', {'alt': 'Configurar y cotizar'}) configure_link = configure_link_image.parent['href'] soup = BeautifulSoup(session.get(configure_link).text, 'html.parser') price = soup.find('span', 'pricing_retail_nodiscount_price') if not price: stock = 0 price = Decimal(0) else: price = Decimal(remove_words(price.string.split('$')[1])) p = Product( name, cls.__name__, category, url, url, sku, stock, price, price, 'CLP', sku=sku, description=description, picture_urls=picture_urls ) return [p]
def products_for_url(cls, url, category=None, extra_args=None): session = session_with_proxy(extra_args) session.headers.update({ 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/' '537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 ' 'Safari/537.36' }) soup = BeautifulSoup(session.get(url).text, 'html.parser') name = soup.find('h1').text.strip() sku = soup.find('input', {'name': 'product'})['value'].strip() if soup.find('button', {'id': 'product-addtocart-button'}): stock = -1 else: stock = 0 normal_price = Decimal( remove_words( soup.find('p', 'old-price').find('span', 'price').string)) offer_price = Decimal( remove_words( soup.find('p', 'special-price').find('span', 'price').string)) description = html_to_markdown( str(soup.find('div', 'short-description'))) picture_urls = [ tag['href'] for tag in soup.findAll('a', 'ig_lightbox2') ] p = Product(name, cls.__name__, category, url, url, sku, stock, normal_price, offer_price, 'CLP', sku=sku, description=description, picture_urls=picture_urls) return [p]
def products_for_url(cls, url, category=None, extra_args=None): session = session_with_proxy(extra_args) response = session.get(url) if response.status_code == 500: return [] soup = BeautifulSoup(response.text, 'html5lib') name = soup.find('h2', 'title-product').text.strip() sku = soup.find('span', 'text-stock').text.strip() stocks_container = soup.find('div', 'sucursales-stock') if stocks_container and stocks_container.find('i', 'fa-check-circle'): stock = -1 else: stock = 0 price_containers = soup.findAll('p', 'precio') offer_price = Decimal(remove_words(price_containers[0].text.strip())) normal_price = Decimal(remove_words(price_containers[2].text.strip())) if normal_price < offer_price: normal_price = offer_price description = html_to_markdown( str(soup.find('div', {'id': 'description'}))) picture_urls = [ tag['src'] for tag in soup.findAll('img', 'primary-img') ] p = Product(name, cls.__name__, category, url, url, sku, stock, normal_price, offer_price, 'CLP', sku=sku, description=description, picture_urls=picture_urls) return [p]
def products_for_url(cls, url, category=None, extra_args=None): session = session_with_proxy(extra_args) session.headers['user-agent'] = 'python-requests/2.21.0' response = session.get(url) if response.status_code == 404: return [] page_source = response.text soup = BeautifulSoup(page_source, 'html.parser') if not soup.find('body') or \ not soup.find('h1', {'id': 'nombre-producto'}): return [] name = soup.find('h1', {'id': 'nombre-producto'}).text.strip() sku = soup.find('div', {'itemprop': 'sku'}).text.strip() ajax_session = session_with_proxy(extra_args) ajax_session.headers['user-agent'] = 'python-requests/2.21.0' ajax_session.headers['x-requested-with'] = 'XMLHttpRequest' ajax_session.headers['content-type'] = \ 'application/x-www-form-urlencoded' stock_data = json.loads( ajax_session.post( 'https://catalogo.movistar.cl/fullprice/stockproducto/validar/', 'sku=' + sku).text) stock = stock_data['respuesta']['cantidad'] price_container = soup.find('span', 'special-price').find('p') price = Decimal(remove_words(price_container.text)) description = html_to_markdown( str(soup.find('div', 'detailed-desktop'))) if 'seminuevo' in description: condition = 'https://schema.org/RefurbishedCondition' else: condition = 'https://schema.org/NewCondition' picture_urls = [soup.find('meta', {'property': 'og:image'})['content']] return [ Product(name, cls.__name__, category, url, url, sku, stock, price, price, 'CLP', condition=condition, sku=sku, description=description, picture_urls=picture_urls) ]
def products_for_url(cls, url, category=None, extra_args=None): session = session_with_proxy(extra_args) soup = BeautifulSoup(session.get(url).text, 'html.parser') name = soup.find('h1', 'entry-title').text.strip() sku = soup.find('input', {'name': 'product_id'})['value'].strip() description = html_to_markdown( str(soup.find('div', 'product_description'))) picture_urls = [tag['href'] for tag in soup.findAll('a', 'thickbox')] price = Decimal(remove_words(soup.find('span', 'currentprice').text)) price *= Decimal('1.19') price = price.quantize(0) p = Product(name, cls.__name__, category, url, url, sku, -1, price, price, 'CLP', sku=sku, description=description, picture_urls=picture_urls) return [p]
def products_for_url(cls, url, category=None, extra_args=None): print(url) session = session_with_proxy(extra_args) session.headers['User-Agent'] = 'curl/7.54.0' soup = BeautifulSoup(session.get(url).text, 'html.parser') name = soup.find('h5', 'ttvproduct-title').text.strip() sku = soup.find('input', {'name': 'product_id'})['value'] stock = int(soup.find('span', 'ttvproduct-stock-status').text.strip()) price_containers = soup.find('ul', 'product-price-and-shipping')\ .findAll('h3') normal_price = Decimal(remove_words(price_containers[0].text)) if len(price_containers) > 1: offer_price = Decimal(remove_words(price_containers[1].text)) else: offer_price = normal_price if normal_price < offer_price: offer_price = normal_price description = html_to_markdown( str(soup.find('div', {'id': 'tab-description'}))) picture_container = soup.find('img', {'id': 'img_zoom'}) picture_urls = [] if picture_container: picture_urls.append(picture_container['data-zoom-image']) p = Product(name, cls.__name__, category, url, url, sku, stock, normal_price, offer_price, 'CLP', sku=sku, description=description, picture_urls=picture_urls) return [p]
def products_for_url(cls, url, category=None, extra_args=None): print(url) session = session_with_proxy(extra_args) page_source = session.get(url).text soup = BeautifulSoup(page_source, 'html.parser') if 'Producto no disponible' in page_source: return [] name = soup.find('h1').text.strip() sku = re.search('ecomm_prodid: (\d+)', page_source).groups()[0] price_container = soup.find('span', 'text_web') if price_container: price = remove_words( price_container.find('strong').find('p').nextSibling) else: price_container = soup.find('span', 'oferta') if not price_container: return [] price = remove_words(price_container.find('b').text) price = Decimal(price) description = html_to_markdown( str(soup.find('div', {'id': 'box-descripcion'}))) picture_urls = [ tag['href'] for tag in soup.find('div', 'owl-carousel').findAll( 'a', 'fancybox') ] p = Product(name, cls.__name__, category, url, url, sku, -1, price, price, 'CLP', sku=sku, description=description, picture_urls=picture_urls) return [p]
def products_for_url(cls, url, category=None, extra_args=None): products = [] if url == cls.prepago_url: # Plan Prepago p = Product( 'GTD Prepago', cls.__name__, category, url, url, 'Claro Prepago', -1, Decimal(0), Decimal(0), 'CLP', ) products.append(p) elif url == cls.equipos_url: session = session_with_proxy(extra_args) body = session.get(url).text json_body = re.search(r'var catalog = (.+)', body).groups()[0][:-1] json_body = json.loads(json_body) for json_product in json_body['products']: if not json_product['published']: continue name = json_product['name'] sku = json_product['id'] price = Decimal(remove_words(json_product['leasing_price'])) description = html_to_markdown(json_product['description']) picture_urls = [ 'https://nuevo.gtdmanquehue.com' + im['options']['url'] for im in json_product['images'] ] product = Product(name, cls.__name__, 'Cell', url, url, sku, -1, price, price, 'CLP', sku=sku, cell_plan_name='GTD Prepago', description=description, picture_urls=picture_urls) products.append(product) else: raise Exception('Invalid URL: ' + url) return products
def products_for_url(cls, url, category=None, extra_args=None): print(url) session = session_with_proxy(extra_args) soup = BeautifulSoup(session.get(url).text, 'html.parser') pricing_container = soup.find('div', {'id': 'product'}).parent name = pricing_container.find('h1').text.strip() sku = soup.find('input', {'name': 'product_id'})['value'] stock = int(soup.find('b', text='STOCK WEB:').next.next) price_containers = pricing_container.find( 'img', {'align': 'absmiddle'}).parent.findAll('h2') normal_price = Decimal(remove_words(price_containers[1].text)) offer_price = Decimal(remove_words(price_containers[2].text)) if offer_price > normal_price: offer_price = normal_price description = html_to_markdown(str(soup.find( 'div', {'id': 'tab-description'}))) picture_urls = [tag['href'].replace(' ', '%20') for tag in soup.findAll('a', 'thumbnail') if tag['href']] p = Product( name, cls.__name__, category, url, url, sku, stock, normal_price, offer_price, 'CLP', sku=sku, description=description, picture_urls=picture_urls ) return [p]
def products_for_url(cls, url, category=None, extra_args=None): session = session_with_proxy(extra_args) json_data = json.loads(session.get(url).text) products = [] plans_dict = { 'Plan Libre': 'Plus Libre Cod_OAM_Porta cuotas', 'Plan XL': 'Plus XL Cod_OAN_Porta cuotas', 'Plan L': 'Plus L Cod_OAO_Porta cuotas', 'Plan M': 'Plus M Cod_OAP_Porta cuotas', } for entry in json_data: name = entry['telefono'] picture_urls = [ 'https://ww2.movistar.cl/movistarone/' + entry['imagenUrl'].replace(' ', '%20') ] for plan_entry in entry['planes']: cell_plan_name = plans_dict[plan_entry['tipoPlan']] price = Decimal(remove_words(plan_entry['pieEquipo'])) cell_monthly_payment = Decimal( remove_words(plan_entry['cuotaMensualEquipo'])) products.append( Product(name, cls.__name__, 'Cell', 'https://ww2.movistar.cl/movistarone/', url, '{} {}'.format(name, cell_plan_name), -1, price, price, 'CLP', picture_urls=picture_urls, cell_plan_name=cell_plan_name, cell_monthly_payment=cell_monthly_payment)) return products
def products_for_url(cls, url, category=None, extra_args=None): print(url) session = session_with_proxy(extra_args) response = session.get(url, verify=False) if response.status_code == 404: return [] soup = BeautifulSoup( session.get(url, verify=False).text, 'html.parser') name = soup.find('h1', {'itemprop': 'name'}).text.strip() sku = soup.find('input', {'name': 'id_product'})['value'].strip() part_number = soup.find('span', {'itemprop': 'sku'}).text.strip() unavailable_container = soup.find('span', { 'id': 'availability_value' }).string if unavailable_container: stock = 0 else: stock = -1 price_container = soup.find('span', {'id': 'our_price_display'}) price = price_container.string.split('$')[1] price = Decimal(remove_words(price)) condition = soup.find('link', {'itemprop': 'itemCondition'})['href'] description = html_to_markdown( str(soup.find('div', 'page-product-box'))) picture_urls = [tag['href'] for tag in soup.findAll('a', 'fancybox')] p = Product(name, cls.__name__, category, url, url, sku, stock, price, price, 'CLP', sku=sku, part_number=part_number, description=description, picture_urls=picture_urls, condition=condition) return [p]
def products_for_url(cls, url, category=None, extra_args=None): session = session_with_proxy(extra_args) soup = BeautifulSoup(session.get(url).text, 'html.parser') name = soup.find("h1", "title").text if soup.find("div", "product-price") is None: return [] sku = re.search(r'(\d+)', soup.find("div", "product-price")['id'] ).groups()[0] stock = -1 price = soup.find('div', 'product-price') price = price.find('div', 'PricesalesPrice').span.text price = Decimal(remove_words(price)) description_a = html_to_markdown(str(soup.find('div', 's_desc').text)) description_b = html_to_markdown(str(soup.find('div', 'desc'))) description = description_a + '\n\n' + description_b resized_picture_urls = soup.find('ul', 'pagination2').img['src'] resized_picture_name = resized_picture_urls.split('/')[-1] picture_size = re.search(r'(_\d+x\d+)', resized_picture_name).groups()[0] picture_name = resized_picture_name.replace(picture_size, '') picture_urls = ['http://www.airecenter.cl/images/stories/' 'virtuemart/product/' + picture_name] p = Product( name, cls.__name__, category, url, url, sku, stock, price, price, 'CLP', sku=sku, description=description, picture_urls=picture_urls ) return [p]
def products_for_url(cls, url, category=None, extra_args=None): print(url) session = session_with_proxy(extra_args) soup = BeautifulSoup(session.get(url).text, 'html.parser') name = soup.find('h1', 'product_title').text.strip() sku = soup.find('link', {'rel': 'shortlink'})['href'].split('=')[1] stock_container = soup.find('p', 'stock') if not stock_container: return [] if stock_container.text == 'Hay existencias': stock = -1 else: stock = 0 price_container = soup.find('p', 'price') if price_container.find('ins'): price_container = price_container.find('ins') price = Decimal(remove_words(price_container.text)) picture_containers = soup.findAll( 'div', 'woocommerce-product-gallery__image') picture_urls = [ ic.find('img')['src'] for ic in picture_containers if ic['data-thumb'] ] description = html_to_markdown( str(soup.find('div', 'woocommerce-Tabs-panel--description'))) p = Product(name, cls.__name__, category, url, url, sku, stock, price, price, 'CLP', sku=sku, description=description, picture_urls=picture_urls) return [p]
def products_for_url(cls, url, category=None, extra_args=None): session = session_with_proxy(extra_args) soup = BeautifulSoup( session.get(url, verify=False).text, 'html.parser') name = soup.find('h1').text.strip() pricing_container = soup.find('div', 'wrap-product-shop') sku = pricing_container.find('p').text.split(':')[1].strip() price = Decimal( remove_words( pricing_container.find('p', 'special-price').find( 'span', 'price').contents[0])) price *= Decimal('1.19') normal_price = price.quantize(0) offer_price = normal_price description_ids = [ 'tab-descripcion', 'tab-adicional', 'tab-ficha_tecnica' ] descriptions = [] for descrption_id in description_ids: tag = soup.find('div', {'id': descrption_id}) if tag: descriptions.append(html_to_markdown(str(tag))) description = '\n\n'.join(descriptions) picture_urls = [tag['href'] for tag in soup.findAll('a', 'colorbox')] p = Product(name, cls.__name__, category, url, url, sku, -1, normal_price, offer_price, 'CLP', sku=sku, description=description, picture_urls=picture_urls) return [p]
def products_for_url(cls, url, category=None, extra_args=None): print(url) session = session_with_proxy(extra_args) data = session.get(url, verify=False).text soup = BeautifulSoup(data, 'html.parser') name = soup.find('h1', {'itemprop': 'name'}).text.strip() sku = soup.find('p', 'titulo-atributo-ficha').find('span').text.strip() pricing_container = soup.find('div', 'product-shop') price_container = pricing_container.find('span', 'regular-price') pn_match = re.search(r'ccs_cc_args.push\(\[\'pn\', \'(.+)\'\]\);', data) part_number = pn_match.groups()[0].strip() if pn_match else None if not price_container: price_container = pricing_container.find('p', 'special-price') price = Decimal( remove_words(price_container.find('span', 'price').text)) description = html_to_markdown( str(soup.find('div', 'product-description'))) picture_urls = [tag['href'] for tag in soup.findAll('a', 'lightbox')] if soup.find('button', 'btn-cart') or \ soup.findAll('p', 'tienda-disponible'): stock = -1 else: stock = 0 p = Product(name, cls.__name__, category, url, url, sku, stock, price, price, 'CLP', sku=sku, part_number=part_number, description=description, picture_urls=picture_urls) return [p]
def products_for_url(cls, url, category=None, extra_args=None): print(url) session = session_with_proxy(extra_args) page_source = session.get(url).text soup = BeautifulSoup(page_source, 'html.parser') sku = re.search(r'/p/(\d+)/', url).groups()[0] model = soup.find('h1', 'producto-nombre').text.strip() part_number = soup.find('div', 'producto-subtitulo').text.strip() name = '{} ({})'.format(model, part_number) stock_container = soup.find('div', 'producto-stock') if stock_container: stock = int( re.search(r'STOCK: (\d+)', stock_container.text).groups()[0]) else: stock = 0 price = soup.find('div', 'producto-precio').text.split('(')[0] price = Decimal(remove_words(price)) description = html_to_markdown( str(soup.find('table', 'producto-ficha-tabla'))) picture_urls = [] for tag in soup.findAll('div', 'producto-galeria-imagenes-item'): picture_tag = tag.find('a') if picture_tag: picture_urls.append(picture_tag['href']) p = Product(name, cls.__name__, category, url, url, sku, stock, price, price, 'CLP', sku=sku, part_number=part_number, description=description, picture_urls=picture_urls) return [p]
def products_for_url(cls, url, category=None, extra_args=None): print(url) session = session_with_proxy(extra_args) response = session.get(url) if response.status_code == 404: return [] page_source = response.text soup = BeautifulSoup(page_source, 'html.parser') name = soup.find('h1', 'product-info__name').text sku_text = soup.find('meta', {'property': 'og:image'})['content'] sku = re.search(r'/ultimate-gamer-store/(\d+)/', sku_text).groups()[0] stock = 0 if soup.find('meta', {'property': 'product:availability'})['content'] \ == 'instock': stock = -1 price = Decimal( remove_words( soup.find('span', 'product-info__price-current').text).strip()) description = html_to_markdown( str(soup.find('section', {'id': 'product-description'}))) picture_urls = [ i['src'] for i in soup.findAll('img', 'product-slider__block-image') ] p = Product(name, cls.__name__, category, url, url, sku, stock, price, price, 'CLP', sku=sku, picture_urls=picture_urls, description=description) return [p]
def _plans(cls, url, extra_args): session = session_with_proxy(extra_args) session.headers['user-agent'] = 'python-requests/2.21.0' soup = BeautifulSoup(session.get(url, timeout=30).text, 'html5lib') products = [] plan_containers = soup.findAll('div', 'mb-parrilla_col') for plan_container in plan_containers: print(plan_container) plan_link = plan_container.find('a') plan_url = plan_link['href'] base_plan_name = 'Plan ' + plan_link.find('h3').text.strip() base_plan_name = base_plan_name.replace(' ', '') price_text = plan_container.find('div', 'mb-parrilla_price').find( 'p', 'price').text price = Decimal(remove_words(price_text.split()[0])) portability_suffixes = ['', ' Portabilidad'] cuotas_suffixes = [ ' (sin cuota de arriendo)', ' (con cuota de arriendo)' ] for portability_suffix in portability_suffixes: for cuota_suffix in cuotas_suffixes: plan_name = '{}{}{}'.format( base_plan_name, portability_suffix, cuota_suffix) products.append(Product( plan_name, cls.__name__, 'CellPlan', plan_url, url, plan_name, -1, price, price, 'CLP' )) return products
def products_for_url(cls, url, category=None, extra_args=None): session = session_with_proxy(extra_args) soup = BeautifulSoup( session.get(url, verify=False).text, 'html.parser') model = soup.find('h1').text.strip() brand = soup.find('span', text='Marca:') if brand: brand = brand.next.next.next.text.strip() name = '{} {}'.format(brand, model) else: name = model sku = soup.find('span', text='SKU:').next.next.strip() description = html_to_markdown( str(soup.find('div', {'id': 'tab-description'}))) picture_urls = [ tag['href'].replace(' ', '%20') for tag in soup.findAll('a', 'colorbox') ] price = Decimal( remove_words(soup.find('div', 'price').text.split(':')[1])) price = price.quantize(0) p = Product(name, cls.__name__, category, url, url, sku, -1, price, price, 'CLP', sku=sku, description=description, picture_urls=picture_urls) return [p]
def _plans(cls, url, extra_args): session = session_with_proxy(extra_args) soup = BeautifulSoup(session.get(url).text, 'html.parser') products = [] rows = soup.findAll('div', 'box-vertical-fijo') portability_suffixes = [('', 'data-not-ported'), (' Portabilidad', 'data-ported')] cuotas_suffixes = [ ' (sin cuota de arriendo)', ' (con cuota de arriendo)' ] for row in rows: base_plan_name = ' '.join([ x.replace('\n', '').strip() for x in row.find('li', 'gbs_text').text.split() ]) price_container = row.find('div', 'price') for portability_suffix, price_field in portability_suffixes: price = Decimal(remove_words(price_container[price_field])) for cuota_suffix in cuotas_suffixes: plan_name = '{}{}{}'.format(base_plan_name, portability_suffix, cuota_suffix) p = Product( plan_name, cls.__name__, 'CellPlan', url, url, plan_name, -1, price, price, 'CLP', ) products.append(p) return products
def products_for_url(cls, url, category=None, extra_args=None): session = session_with_proxy(extra_args) soup = BeautifulSoup(session.get(url).content, 'html.parser') name = soup.find('h1').text.strip() sku = soup.find('div', 'codigo-producto').text.split(':')[1].strip() if soup.find('a', 'btnAddBasketHome'): stock = -1 else: stock = 0 price = Decimal(remove_words(soup.find( 'div', 'precio').find('label').string)) price *= Decimal('1.19') price = price.quantize(0) panels = soup.findAll('section', 'page_product_box') description = '\n\n'.join( [html_to_markdown(str(panel), 'https://www.dled.cl') for panel in panels]) picture_urls = ['https://www.dled.cl' + tag['href'] for tag in soup.findAll('a', 'cloud-zoom-gallery')] p = Product( name, cls.__name__, category, url, url, sku, stock, price, price, 'CLP', sku=sku, description=description, picture_urls=picture_urls ) return [p]
def products_for_url(cls, url, category=None, extra_args=None): session = session_with_proxy(extra_args) soup = BeautifulSoup(session.get(url).text, 'html.parser') name = soup.find('div', 'titulo').text.strip() identifier = soup.find('div', 'identif').text.strip() sku, part_number = \ re.search('Código Rhona: (\d+) \| Código Fabricante: (.+)', identifier).groups() price = soup.find('span', 'verde') if not price: stock = 0 price = Decimal(0) else: stock = -1 price = Decimal(remove_words(price.string)) description = html_to_markdown(str(soup.find('ul', {'id': 'tab1'}))) picture_urls = [ tag['href'] for tag in soup.find('div', 'masFotos').findAll('a') ] p = Product(name, cls.__name__, category, url, url, sku, stock, price, price, 'CLP', sku=sku, part_number=part_number, description=description, picture_urls=picture_urls) return [p]