def _retrieve_product(cls, url): browser = mechanize.Browser() product_data = browser.open(url).get_data() product_soup = BeautifulSoup(product_data) product_cells = product_soup.findAll('td', 'pageHeading') product_name = product_cells[0].find('h1').contents[0] cells = [ [product_cells[1], ['cash']], [product_cells[2], ['deposit', 'debit_card', 'credit_card', 'presto_card']], ] prices = {} for cell, payment_methods in cells: try: product_price = Decimal(clean_price_string( cell.find('span', 'productSpecialPrice').string)) except Exception: product_price = Decimal(clean_price_string( cell.string.split('$')[1])) for p in payment_methods: prices[p] = product_price return [product_name, prices]
def _retrieve_product(cls, url): browser = mechanize.Browser() product_data = browser.open(url).get_data() soup = BeautifulSoup(product_data) name = soup.find("div", {"id": "ficha-producto-nombre"}).string name = name.encode("ascii", "ignore").strip() prices = {} has_cencosud_card_price = soup.find("div", {"id": "ficha-producto-precio-mas"}) if has_cencosud_card_price: mas_price = clean_price_string(has_cencosud_card_price.contents[0]) mas_price = Decimal(mas_price) prices["cencosud_card"] = mas_price normal_price = soup.find("div", {"id": "ficha-producto-precio-normal"}) normal_price = normal_price.string.split("$")[1] normal_price = Decimal(clean_price_string(normal_price)) for p in ["cash", "debit_card", "credit_card"]: prices[p] = normal_price else: normal_price = soup.find("div", {"id": "ficha-producto-precio"}) normal_price = normal_price.string.split("$")[1] normal_price = Decimal(clean_price_string(normal_price)) for p in ["cencosud_card", "cash", "debit_card", "credit_card"]: prices[p] = normal_price return name, prices
def _retrieve_product(cls, url): browser = mechanize.Browser() product_soup = BeautifulSoup(browser.open(url).get_data()) name = product_soup.find('td', 'cy2').find('strong') name = name.contents[0].encode('ascii', 'ignore') availability = product_soup.find('b', {'style': 'color:red;'}) if availability: if 'PRODUCTO AGOTADO' in availability.string: return name, {} prices = {} cash_price = product_soup.find('span', 'cy3').string cash_price = Decimal(clean_price_string(cash_price)) for p in ['cash', 'deposit', 'wire_transfer', 'check']: prices[p] = cash_price normal_price = product_soup.find('span', 'txtOldPrice').string normal_price = Decimal(clean_price_string(normal_price)) for p in ['credit_card']: prices[p] = normal_price debit_price = product_soup.findAll('span', 'cy1')[1].string debit_price = Decimal(clean_price_string(debit_price)) for p in ['debit_card']: prices[p] = debit_price return name, prices
def _retrieve_product(cls, url): browser = mechanize.Browser() try: product_soup = BeautifulSoup(browser.open(url).get_data()) except Exception: return url, {} name = product_soup.find('h1').contents[0].encode('ascii', 'ignore') name = name.strip() prices = {} cash_price = product_soup.findAll('h2')[1].string.replace('cash', '') cash_price = Decimal(clean_price_string(cash_price)) for p in ['cash', 'deposit', 'wire_transfer']: prices[p] = cash_price normal_price = product_soup.find('h2').find('a').string normal_price = Decimal(clean_price_string(normal_price)) for p in ['check', 'debit_card', 'credit_card', 'ripley_card', 'presto_card']: prices[p] = normal_price return name, prices
def _retrieve_product(cls, url): browser = mechanize.Browser() soup = BeautifulSoup(browser.open(url).get_data()) name_container = soup.find('span', 'main_titulo_ficha_bold') name_pieces = name_container.string.encode('ascii', 'ignore').split() name = ' '.join(name_pieces) prices = {} cash_price = soup.find('span', 'main_precio_efectivo').find( 'strong').string cash_price = Decimal(clean_price_string(cash_price.string)) for p in ['cash', 'deposit', 'wire_transfer']: prices[p] = cash_price normal_price = soup.find('span', 'main_precio_normal').find( 'strong').string[:-1] normal_price = int(clean_price_string(normal_price)) for p in ['check', 'debit_card']: prices[p] = Decimal(int(round(normal_price * 0.97))) for p in ['credit_card', 'ripley_card', 'presto_card']: prices[p] = Decimal(normal_price) return name, prices
def _retrieve_product(cls, url): br = mechanize.Browser() soup = BeautifulSoup(br.open(url).get_data()) availabilities = soup.find('table', 'pdisponibilidad') availabilities = availabilities.findAll('td') name = soup.find('title').string.split( 'WEI CHILE S. A. - ')[1].encode('ascii', 'ignore') for availability in availabilities: if 'Producto agotado' in availability.contents[1]: return name, {} prices = {} inet_price = soup.find('table', 'pprecio').find('h1').string inet_price = Decimal(clean_price_string(inet_price)) for p in ['credit_card', 'check', 'cash', 'deposit', 'wire_transfer']: prices[p] = inet_price normal_price = soup.findAll('h5')[1].string normal_price = int(clean_price_string(normal_price)) prices['debit_card'] = Decimal(int(round(normal_price * 0.97))) for p in ['dated_check', 'presto_card', 'ripley_card']: prices[p] = Decimal(normal_price) return name, prices
def _retrieve_product(cls, url): browser = mechanize.Browser() product_data = browser.open(url).get_data() product_soup = BeautifulSoup(product_data) try: name = product_soup.find('th', {'colspan': '2'}).string except AttributeError: return url, {} name = name.encode('ascii', 'ignore').split('"')[1] prices = {} cash_price = product_soup.find('table', {'cellspacing': '1'}).find('table') cash_price = cash_price.findAll('td')[3].find('strong').string cash_price = Decimal(clean_price_string(cash_price)) for p in ['cash', 'deposit', 'wire_transfer']: prices[p] = cash_price normal_price = product_soup.find('table', {'cellspacing': '1'}).find('table') normal_price = normal_price.findAll('td')[5].find('strong').string normal_price = Decimal(clean_price_string(normal_price)) for p in ['debit_card', 'credit_card']: prices[p] = normal_price return name, prices
def _retrieve_product(cls, url): browser = mechanize.Browser() product_data = browser.open(url).get_data() soup = BeautifulSoup(product_data) prices = {} name = soup.find('span', 'titulo_producto').string name = name.encode('ascii', 'ignore') cash_price = soup.find('span', 'precio_producto_efectivo') cash_price = Decimal(clean_price_string(cash_price.string)) prices['cash'] = cash_price normal_price = soup.find('span', 'otro_precio') if normal_price: normal_price = normal_price.string.split('$')[1] normal_price = Decimal(clean_price_string(normal_price)) else: normal_price = cash_price for p in ['credit_card', 'debit_card', 'wire_transfer']: prices[p] = normal_price return name, prices
def _retrieve_product(cls, url): browser = mechanize.Browser() product_data = browser.open(url).get_data() product_soup = BeautifulSoup(product_data) product_name = product_soup.find('td', 'tit-nar-bold') product_name = product_name.contents[0].split('•')[0] product_name = product_name.replace(' » ', '').strip() prices = {} cash_product_price = \ product_soup.find('td', {'background': 'images/ficha/bg_efectivo_d.gif'}) cash_product_price = Decimal(clean_price_string( cash_product_price.find('a').string)) for p in ['cash', 'deposit', 'wire_transfer']: prices[p] = cash_product_price normal_product_price = \ product_soup.find('td', {'background': 'images/ficha/bg_precio_normal_d.gif'}) normal_product_price = Decimal(clean_price_string( normal_product_price.find('a').string)) for p in ['debit_card', 'credit_card']: prices[p] = normal_product_price return [product_name, prices]
def _retrieve_product(cls, url): base_data = mechanize.urlopen(url) base_soup = BeautifulSoup(base_data) title = base_soup.find('div', 'textTituloProducto') title = title.string.strip().encode('ascii', 'ignore') image = base_soup.findAll('div', 'textOtrosPrecios')[2] image = image.find('img')['src'] if 'agotado' in image or 'proximo' in image: return title, {} prices = {} cash_price = base_soup.find('div', 'textPrecioContado') cash_price = Decimal(clean_price_string(cash_price.string)) for p in ['cash', 'deposit', 'wire_transfer']: prices[p] = cash_price normal_price = base_soup.find('div', 'textOtrosPrecios') normal_price = Decimal(clean_price_string(normal_price.string)) for p in ['debit_card', 'credit_card', 'presto_card', 'ripley_card']: prices[p] = normal_price prices['check'] = Decimal(int(round(int(normal_price) * 0.98))) return [title, prices]
def _retrieve_product(cls, url): browser = mechanize.Browser() soup = BeautifulSoup(browser.open(url).get_data()) stock_status = soup.find('li', 'stocks') stock_status = stock_status.findAll('span')[1].contents[0] title_span = soup.find('h1') title = str(title_span.string).strip() if 'pedido' not in stock_status and 'stock' not in stock_status: return [title, {}] prices = {} price_container = soup.find('span', 'red') price_string = price_container.find('strong').string cash_price = Decimal(clean_price_string(price_string)) for p in ['cash', 'deposit', 'wire_transfer']: prices[p] = cash_price containers = soup.find('div', {'id': 'product-info'}).findAll('li') offset = 0 try: if containers[0]['class'] == 'li_ahorro': offset = 1 except KeyError: pass check_price = Decimal(clean_price_string(containers[offset + 1].contents[1])) prices['check'] = check_price debit_price = Decimal(clean_price_string(containers[offset + 2].contents[1])) prices['debit_card'] = debit_price normal_price = Decimal(clean_price_string(containers[offset + 3].contents[1])) for p in ['dated_check', 'credit_card', 'presto_card', 'ripley_card']: prices[p] = normal_price return title, prices
def _retrieve_product(cls, url): browser = mechanize.Browser() soup = BeautifulSoup(browser.open(url).get_data()) name = soup.find('h4', 'Estilo5').string name = name.encode('ascii', 'ignore') prices = {} cash_price = soup.find('span', {'style': 'color: #F00; font-size:12px;'}) cash_price = cash_price.parent.parent.parent.findAll('td')[1].find( 'span') cash_price = int(clean_price_string(cash_price.string)) for p in ['cash', 'wire_transfer', 'deposit']: prices[p] = Decimal(cash_price) normal_price = int(round(cash_price / 0.94)) for p in ['debit_card', 'credit_card', 'presto_card']: prices[p] = Decimal(int(round(normal_price * 0.975))) for p in ['check']: prices[p] = Decimal(int(round(normal_price * 0.98))) for p in ['dated_check']: prices[p] = Decimal(normal_price) return name, prices
def _retrieve_product(cls, url): try: product_webpage = mechanize.urlopen(url) except HTTPError: return [url, {}] product_soup = BeautifulSoup(product_webpage.read()) try: product_name = product_soup.find('h1', {'id': 'catalog_link'}) product_name = product_name.string except AttributeError: return url, {} product_name = product_name.strip().encode('ascii', 'ignore') # Product not available check if product_soup.find('span', 'button_bottom'): return product_name, {} prices = {} product_price = product_soup.find('span', {'id': 'offerPrice'}).string product_price = Decimal(clean_price_string(product_price)) for p in ['cash', 'debit_card', 'credit_card', 'abcdin_card']: prices[p] = product_price return [product_name, prices]
def _retrieve_product(cls, url): browser = mechanize.Browser() product_data = browser.open(url).get_data() soup = BeautifulSoup(product_data) product_title = soup.find('h1').contents[0].strip() product_title = product_title.encode('ascii', 'ignore') product_prices = [] pricing_options_titles = soup.findAll('h3') for title in pricing_options_titles: if title.string in ['Pack Prepago', 'Compra equipo']: price_container = title.parent.find('span', 'valorPrecio') if price_container: price = clean_price_string(price_container.string) product_prices.append(Decimal(price)) if product_prices: product_price = min(product_prices) else: product_price = 0 prices = {} for p in ['cash', 'debit_card', 'credit_card']: prices[p] = product_price return [product_title, prices]
def _retrieve_product(cls, url): cookies = mechanize.CookieJar() opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies)) opener.addheaders = [('User-agent', 'Mozilla/5.0 (MyProgram/0.1)'), ('From', '*****@*****.**')] mechanize.install_opener(opener) browser = mechanize.Browser() product_data = browser.open(url).get_data() soup = BeautifulSoup(product_data) product_name = soup.find('h1').string.encode('ascii', 'ignore') product_price = soup.find('span', {'id': 'product_price'}) product_price = Decimal(clean_price_string(product_price.string)) payment_methods = ['cash', 'deposit', 'wire_transfer'] additional_data = soup.find('td', 'descr').findAll('h3') if not additional_data: payment_methods.extend(['debit_card', 'credit_card']) elif additional_data[0].string and 'Contado' not in \ additional_data[0].string: payment_methods.extend(['debit_card', 'credit_card']) prices = {} for p in payment_methods: prices[p] = product_price return [product_name, prices]
def _retrieve_product(cls, url): browser = mechanize.Browser() product_data = browser.open(url).get_data() product_soup = BeautifulSoup(product_data) pn = product_soup.find('div', {'id': 'destacadoRuta'}).find('a').string if not pn: return url, {} pn = pn.replace(' ', ' ').replace('\r', ' ').replace('\n', ' ') pn = ' '.join(re.split('\s+', pn.replace('\t', ' '))) product_name = pn.encode('ascii', 'ignore') prices = {} price_container = product_soup.find('div', {'id': 'skuPrice'}) op_unica_cmr = bool(price_container.findAll('div', 'opUnica')) if op_unica_cmr: # CMR Price cmr_price = price_container.find('div', 'precio1') cmr_price = Decimal(clean_price_string(cmr_price.contents[2])) prices['cmr_card'] = cmr_price # Sale price sale_price = price_container.find('div', 'precio2') sale_price = sale_price.string.split(':')[1] sale_price = Decimal(clean_price_string(sale_price)) for p in ['cash', 'debit_card', 'credit_card']: prices[p] = sale_price else: # Internet Price inet_price = price_container.find('div', 'precio1') inet_price = Decimal(clean_price_string(inet_price.contents[2])) for p in ['cmr_card', 'cash', 'debit_card', 'credit_card']: prices[p] = inet_price return [product_name, prices]
def _retrieve_product(cls, url): br = mechanize.Browser() soup = BeautifulSoup(br.open(url).get_data()) name = soup.find('h2').string prices = {} cash_price = soup.find('span', {'id': 'our_price_display'}).string cash_price = Decimal(clean_price_string(cash_price)) prices['cash'] = cash_price normal_price = soup.find('span', {'id': 'old_price_display'}).string normal_price = Decimal(clean_price_string(normal_price)) for p in ['debit_card', 'credit_card']: prices[p] = normal_price return name, prices
def _retrieve_product(cls, url): browser = mechanize.Browser() soup = BeautifulSoup(browser.open(url).get_data()) name = soup.find('span', 'textogrisbold') name = name.string.encode('ascii', 'ignore') prices = {} inet_price = soup.find('div', 'textodetallesrojo') if inet_price.string: inet_price = inet_price.string else: inet_price = inet_price.find('div').string inet_price = Decimal(clean_price_string(inet_price.split('$')[1])) normal_price = soup.find('span', 'normalHOME') if normal_price: normal_price = Decimal(clean_price_string( normal_price.string.split('$')[1])) ripley_card_exclusive = soup.find( 'img', {'src': '/wcsstore/Ripley/en_US/images/tarjeta.gif'}) if ripley_card_exclusive: prices['ripley_card'] = inet_price if normal_price: for p in ['cash', 'debit_card', 'credit_card']: prices[p] = normal_price else: for p in ['debit_card', 'credit_card', 'ripley_card']: prices[p] = inet_price if normal_price: prices['cash'] = normal_price return name, prices
def _retrieve_product(cls, url): browser = mechanize.Browser() product_data = browser.open(url).get_data() try: product_soup = BeautifulSoup(product_data) except UnicodeEncodeError: return [url, {}] name = product_soup.find('div', 'titleContent').string name = name.encode('ascii', 'ignore') try: availability = product_soup.find('div', 'stock') availability = availability.contents[4] except AttributeError: return name, {} if 'Agotado' in availability: return name, {} prices = {} cash_price = product_soup.findAll('div', 'precioDetalle')[1] cash_price = cash_price.string.split('$')[1] cash_price = Decimal(clean_price_string(cash_price)) for p in ['cash', 'deposit', 'wire_transfer']: prices[p] = cash_price normal_price = product_soup.find('div', 'precioDetalle') normal_price = normal_price.string.split('$')[1] normal_price = Decimal(clean_price_string(normal_price)) for p in ['debit_card', 'credit_card', 'check']: prices[p] = normal_price return name, prices
def _retrieve_product(cls, url): br = mechanize.Browser() data = br.open(url).get_data() soup = BeautifulSoup(data) name = soup.find('h1').string.encode('ascii', 'ignore') prices = {} contado_price = soup.find('span', 'precio oferta').find('b') contado_price = Decimal(clean_price_string(contado_price.string)) for p in ['cash', 'check', 'deposit', 'wire_transfer']: prices[p] = contado_price normal_price = soup.find('span', 'precio').find('b') normal_price = Decimal(clean_price_string(normal_price.string)) for p in ['dated_check', 'credit_card', 'debit_card', 'presto_card', 'ripley_card']: prices[p] = normal_price return name, prices
def _retrieve_product(cls, url): soup = BeautifulSoup(mechanize.urlopen(url)) name = soup.find('h1').string.strip().encode('ascii', 'ignore') if soup.find('span', 'warning_inline'): return [name, {}] prices = {} cash_price = soup.find('span', {'id': 'our_price_display'}).string cash_price = Decimal(clean_price_string(cash_price)) for p in ['cash', 'deposit', 'wire_transfer']: prices[p] = cash_price normal_price = soup.find('span', 'value').string normal_price = Decimal(clean_price_string(normal_price)) for p in ['debit_card', 'credit_card']: prices[p] = normal_price return [name, prices]
def _retrieve_product(cls, url): br = mechanize.Browser() data = br.open(url).get_data() soup = BeautifulSoup(data) stock_info = soup.find('td', 'disp') title_span = soup.find('td', 'menuprodg') title = title_span.contents[0].strip() name = title.encode('ascii', 'ignore').strip() if not stock_info: return [name, {}] stock_string = ''.join(str(stock) for stock in stock_info.contents) if 'Agotado' in stock_string: return [name, {}] prices = {} inet_price_cell = soup.find('td', 'prcm') internet_price = Decimal(clean_price_string(inet_price_cell.string)) prices['wire_transfer'] = internet_price cash_price_cell = soup.find('td', 'prc8') cash_price = Decimal(clean_price_string(cash_price_cell.string)) for p in ['cash', 'check']: prices[p] = cash_price normal_price_cell = soup.findAll('td', 'prc8')[1] normal_price = Decimal(clean_price_string(normal_price_cell.string)) for p in ['dated_check', 'debit_card', 'credit_card']: prices[p] = normal_price return [name, prices]
def _retrieve_product(cls, url): br = mechanize.Browser() try: data = br.open(url).get_data() except BadStatusLine: return url, {} soup = BeautifulSoup(data) name = soup.findAll('h1')[1].string.strip().encode('ascii', 'ignore') contado_price = Decimal( clean_price_string(soup.findAll('span', 'price')[1].string)) normal_price = Decimal( clean_price_string(soup.findAll('span', 'price')[3].string)) prices = {} for payment_method in ['wire_transfer', 'deposit', 'cash']: prices[payment_method] = contado_price for payment_method in ['debit_card', 'credit_card', 'presto_card']: prices[payment_method] = normal_price return name, prices
def _retrieve_product(cls, url): browser = mechanize.Browser() product_data = browser.open(url).get_data() soup = BeautifulSoup(product_data) name = soup.findAll('h2')[1] name = name.string.strip().encode('ascii', 'ignore') price = soup.find('span', {'itemprop': 'price'}).contents[0] price = Decimal(clean_price_string(price)) prices = {} for p in ['cash', 'credit_card', 'debit_card', 'wire_transfer']: prices[p] = price return name, prices
def _retrieve_product(cls, url): br = mechanize.Browser() data = br.open(url).get_data() soup = BeautifulSoup(data) title = soup.find('div', {'id': 'scpcc_title'}).find('img')['alt'] title = title.encode('ascii', 'ignore') price = soup.find(['tr', 'td'], 'pricing_dotdotdot') price = price.findAll('span')[-1].string.split('$')[1] price = Decimal(clean_price_string(price)) prices = {} for p in ['credit_card', 'deposit', 'wire_transfer']: prices[p] = price return [title, prices]
def _retrieve_product(cls, url): browser = mechanize.Browser() soup = BeautifulSoup(browser.open(url).get_data()) name = soup.findAll('h2')[5].string.encode('ascii', 'ignore') prices = {} try: price = soup.find('div', 'detallesCompra') price = price.findAll('dd')[1].string price = Decimal(clean_price_string(price)) except Exception: return name, {} for p in ['cash', 'debit_card', 'credit_card']: prices[p] = price return name, prices
def _retrieve_product(cls, url): browser = mechanize.Browser() product_data = browser.open(url).get_data() soup = BeautifulSoup(product_data) name = soup.find('div', 'tit_prod_det').string name = name.encode('ascii', 'ignore').strip() if not int(soup.find('span', 'unidades').string): return name, {} price = soup.find('div', 'precio_det').contents[1] price = Decimal(clean_price_string(price)) prices = {} for p in ['cash', 'debit_card', 'credit_card']: prices[p] = price return name, prices
def _retrieve_product(cls, url): br = mechanize.Browser() # Double open URL because the first redirects to home and sets a cookie BeautifulSoup(br.open(url).get_data()) product_soup = BeautifulSoup(br.open(url).get_data()) product_name = product_soup.findAll('h2')[-1].string product_name = product_name.encode('ascii', 'ignore') product_price = product_soup.find('h1', 'bigtitle') product_price = product_price.contents[0].split('$')[1] product_price = Decimal(clean_price_string(product_price)) prices = {} for p in ['debit_card', 'credit_card']: prices[p] = product_price return [product_name, prices]
def _retrieve_product(cls, url): browser = mechanize.Browser() product_data = browser.open(url).get_data() soup = BeautifulSoup(product_data) name = soup.find('h1').contents[2].encode('ascii', 'ignore').strip() prepago_price_container = soup.find('li', 'pnormalForeverAlone') if prepago_price_container: price = prepago_price_container.find('span').string price = Decimal(clean_price_string(price)) else: price = Decimal(0) prices = {} for p in ['cash', 'debit_card', 'credit_card']: prices[p] = price return name, prices
def _retrieve_product(cls, url): browser = mechanize.Browser() product_data = browser.open(url).get_data() soup = BeautifulSoup(product_data) name = soup.find('h1').string contado_price = soup.find('span', {'id': 'our_price_display'}).string contado_price = int(clean_price_string(contado_price)) prices = {} for p in ['cash', 'deposit', 'wire_transfer']: prices[p] = Decimal(contado_price) real_price = int(round(contado_price / 0.95)) prices['check'] = Decimal(int(round(real_price * 0.97))) for p in ['dated_check', 'credit_card', 'debit_card']: prices[p] = Decimal(real_price) return name, prices