def assertData(self, url, name, price, image_url, categories): parser = get_parser(url) res = parser.get_data() assert res['name'] == name assert res['price'] == price assert res['image_url'] == image_url assert all([x in res['categories'] for x in categories]) assert all([x in categories for x in res['categories']])
def new_product(): if request.method == 'POST' and request.form['url']: parser = get_parser(request.form['url']) if parser is None: return abort(400) url = parser.minify_url() product = Product.query.filter_by(url=url).first() if product: #add flash message. return render_template('new_product.html') try: product = Product(url, parser=parser) db.session.add(product) db.session.commit() return redirect(url_for('show_product', product_id=product.id)) except ValueError: abort(400) return render_template('new_product.html')
def update_price(product_id, product_currency='UYP', save_html_route='prices_html', recursion_limit=0, e=None): if recursion_limit > 5: raise e job = get_current_job() job.meta['product_id'] = product_id job.save() #Importo aca adentro para no tener problema con imports circulares. from web.trackerapp import db from web.models import Product, PriceLog, PriceLogError try: product = Product.query.get(product_id) result = requests.get(product.url, timeout=30, allow_redirects=False) if result.status_code == 302: price_log_error = PriceLogError(product, job.id) price_log_error.message = "Product URL is redirected with %d." % (result.status_code) db.session.add(price_log_error) db.session.commit() return False if result.status_code >= 300: #Error en la pagina que no es el de temporalmente inhabilitado. price_log_error = PriceLogError(product, job.id) price_log_error.message = "Product URL returns wrong status code: %d." % (result.status_code) db.session.add(price_log_error) db.session.commit() return False parser = get_parser(product.url) product_data = parser.get_data(result.text) if product_data['name'] != product.name: price_log_error = PriceLogError(product, job.id) price_log_error.message = "Product Name differs from original. Original: %s, New: %s" % (product.name, product_data['name']) db.session.add(price_log_error) db.session.commit() return False if product_data['image_url'] != product.original_img: # Si la URL de la imagen cambio entonces la actualizo. product.original_img = product_data['image_url'] db.session.add(product) new_price = product_data['price'] price_log = PriceLog(new_price, product_currency, product) """ Si se quiere guardar un archivo entonces se guardan los datos en un .gz y se guarda el nombre del archivo en la tabla. """ try: import gzip now = datetime.utcnow() folder_path = "%s/%s" % (os.path.abspath(save_html_route), now.strftime("%Y-%m-%d")) if not os.path.isdir(folder_path): os.mkdir(folder_path) file_name = "product_%d-%s.html.gz" % (product.id, now.strftime("%H_%M_%S")) f = gzip.open("%s/%s" % (folder_path, file_name), 'wb') f.write(result.content) f.close() price_log.html_file_name = file_name except: job.meta['error'] = 'Error while saving gz.' job.save() db.session.add(price_log) db.session.commit() except requests.exceptions.Timeout, ex: #Llamo para probar otra vez. #Server down or overloaded. time.sleep(60) update_price(product_id, recursion_limit=recursion_limit + 1, e=ex)
def create_products_with_prices(): print "Creating sources" source = Source(domain="tinglesa.com.uy") db.session.add(source) # el commit se hace cuando se guarda Product. source = Source(domain="devoto.com.uy") db.session.add(source) source = Source(domain="multiahorro.com.uy") db.session.add(source) db.session.commit() print "Creating products with prices." #Tienda inglesa tienda_inglesa_urls = [ 'http://www.tinglesa.com.uy/producto.php?idarticulo=9974', 'http://www.tinglesa.com.uy/producto.php?idarticulo=9578', 'http://www.tinglesa.com.uy/producto.php?idarticulo=219091', 'http://www.tinglesa.com.uy/producto.php?idarticulo=5478', 'http://www.tinglesa.com.uy/producto.php?idarticulo=8764', 'http://www.tinglesa.com.uy/producto.php?idarticulo=1849', 'http://www.tinglesa.com.uy/producto.php?idarticulo=1788', 'http://www.tinglesa.com.uy/producto.php?idarticulo=5994', 'http://www.tinglesa.com.uy/producto.php?idarticulo=1151', 'http://www.tinglesa.com.uy/producto.php?idarticulo=6008', 'http://www.tinglesa.com.uy/producto.php?idarticulo=6185', 'http://www.tinglesa.com.uy/producto.php?idarticulo=6004', 'http://www.tinglesa.com.uy/producto.php?idarticulo=6280', 'http://www.tinglesa.com.uy/producto.php?idarticulo=23125', 'http://www.tinglesa.com.uy/producto.php?idarticulo=212', 'http://www.tinglesa.com.uy/producto.php?idarticulo=1026' ] #Devoto devoto_urls = [ 'http://www.devoto.com.uy/mvdcommerce/servlet/hdetalleproductop?2,1,4253,0,274,1', 'http://www.devoto.com.uy/mvdcommerce/servlet/hdetalleproductop?2,1,4254,0,274,1', 'http://www.devoto.com.uy/mvdcommerce/servlet/hdetalleproductop?2,1,4249,0,274,1', 'http://www.devoto.com.uy/mvdcommerce/servlet/hdetalleproductop?2,1,4250,0,274,1', 'http://www.devoto.com.uy/mvdcommerce/servlet/hdetalleproductop?2,1,5132,0,274,1', 'http://www.devoto.com.uy/mvdcommerce/servlet/hdetalleproductop?2,1,48552,0,274,1', 'http://www.devoto.com.uy/mvdcommerce/servlet/hdetalleproductop?2,1,4909,0,274,1', 'http://www.devoto.com.uy/mvdcommerce/servlet/hdetalleproductop?2,1,4910,0,274,1', 'http://www.devoto.com.uy/mvdcommerce/servlet/hdetalleproductop?2,1,126854,0,274,1', 'http://www.devoto.com.uy/mvdcommerce/servlet/hdetalleproductop?2,1,4256,0,274,1', 'http://www.devoto.com.uy/mvdcommerce/servlet/hdetalleproductop?2,1,126918,0,274,1', 'http://www.devoto.com.uy/mvdcommerce/servlet/hdetalleproductop?2,1,126862,0,274,1', 'http://www.devoto.com.uy/mvdcommerce/servlet/hdetalleproductop?2,1,126917,0,274,1', 'http://www.devoto.com.uy/mvdcommerce/servlet/hdetalleproductop?2,1,126861,0,274,1', 'http://www.devoto.com.uy/mvdcommerce/servlet/hdetalleproductop?2,1,132213,0,274,1', 'http://www.devoto.com.uy/mvdcommerce/servlet/hdetalleproductop?2,1,4245,0,274,1', 'http://www.devoto.com.uy/mvdcommerce/servlet/hdetalleproductop?2,1,4244,0,274,1', 'http://www.devoto.com.uy/mvdcommerce/servlet/hdetalleproductop?2,1,141235,0,274,1', 'http://www.devoto.com.uy/mvdcommerce/servlet/hdetalleproductop?2,1,14026,0,274,1', 'http://www.devoto.com.uy/mvdcommerce/servlet/hdetalleproductop?2,1,6925,0,274,1', 'http://www.devoto.com.uy/mvdcommerce/servlet/hdetalleproductop?2,1,6926,0,274,1', 'http://www.devoto.com.uy/mvdcommerce/servlet/hdetalleproductop?2,1,5744,0,274,1' ] multiahorro = [ 'http://www.multiahorro.com.uy/Product.aspx?p=168548', 'http://www.multiahorro.com.uy/Product.aspx?p=118216', 'http://www.multiahorro.com.uy/Product.aspx?p=118223', 'http://www.multiahorro.com.uy/Product.aspx?p=118230', 'http://www.multiahorro.com.uy/Product.aspx?p=163355', 'http://www.multiahorro.com.uy/Product.aspx?p=163359', 'http://www.multiahorro.com.uy/Product.aspx?p=163366', 'http://www.multiahorro.com.uy/Product.aspx?p=163367', 'http://www.multiahorro.com.uy/Product.aspx?p=134130', 'http://www.multiahorro.com.uy/Product.aspx?p=118224', 'http://www.multiahorro.com.uy/Product.aspx?p=176558', 'http://www.multiahorro.com.uy/Product.aspx?p=176554', 'http://www.multiahorro.com.uy/Product.aspx?p=118225', 'http://www.multiahorro.com.uy/Product.aspx?p=100826', 'http://www.multiahorro.com.uy/Product.aspx?p=178870', 'http://www.multiahorro.com.uy/Product.aspx?p=193131', 'http://www.multiahorro.com.uy/Product.aspx?p=193132', 'http://www.multiahorro.com.uy/Product.aspx?p=100774', 'http://www.multiahorro.com.uy/Product.aspx?p=100773', 'http://www.multiahorro.com.uy/Product.aspx?p=100771', 'http://www.multiahorro.com.uy/Product.aspx?p=100768', 'http://www.multiahorro.com.uy/Product.aspx?p=100770', 'http://www.multiahorro.com.uy/Product.aspx?p=193197', 'http://www.multiahorro.com.uy/Product.aspx?p=176686', 'http://www.multiahorro.com.uy/Product.aspx?p=167302', 'http://www.multiahorro.com.uy/Product.aspx?p=167299', 'http://www.multiahorro.com.uy/Product.aspx?p=167307', 'http://www.multiahorro.com.uy/Product.aspx?p=118218', 'http://www.multiahorro.com.uy/Product.aspx?p=118220', 'http://www.multiahorro.com.uy/Product.aspx?p=143160', 'http://www.multiahorro.com.uy/Product.aspx?p=193124', 'http://www.multiahorro.com.uy/Product.aspx?p=193123', 'http://www.multiahorro.com.uy/Product.aspx?p=193126', 'http://www.multiahorro.com.uy/Product.aspx?p=193125', 'http://www.multiahorro.com.uy/Product.aspx?p=176606', 'http://www.multiahorro.com.uy/Product.aspx?p=176586', 'http://www.multiahorro.com.uy/Product.aspx?p=176610', 'http://www.multiahorro.com.uy/Product.aspx?p=176663', 'http://www.multiahorro.com.uy/Product.aspx?p=176664', 'http://www.multiahorro.com.uy/Product.aspx?p=176684', 'http://www.multiahorro.com.uy/Product.aspx?p=176685', 'http://www.multiahorro.com.uy/Product.aspx?p=176687', 'http://www.multiahorro.com.uy/Product.aspx?p=176609', 'http://www.multiahorro.com.uy/Product.aspx?p=176682', 'http://www.multiahorro.com.uy/Product.aspx?p=176683', 'http://www.multiahorro.com.uy/Product.aspx?p=100720', 'http://www.multiahorro.com.uy/Product.aspx?p=100719', 'http://www.multiahorro.com.uy/Product.aspx?p=118229', 'http://www.multiahorro.com.uy/Product.aspx?p=100711', 'http://www.multiahorro.com.uy/Product.aspx?p=143614', 'http://www.multiahorro.com.uy/Product.aspx?p=143636', 'http://www.multiahorro.com.uy/Product.aspx?p=171686', 'http://www.multiahorro.com.uy/Product.aspx?p=143292', 'http://www.multiahorro.com.uy/Product.aspx?p=143598', 'http://www.multiahorro.com.uy/Product.aspx?p=167306', 'http://www.multiahorro.com.uy/Product.aspx?p=167300', 'http://www.multiahorro.com.uy/Product.aspx?p=118222', 'http://www.multiahorro.com.uy/Product.aspx?p=118227', 'http://www.multiahorro.com.uy/Product.aspx?p=176582', 'http://www.multiahorro.com.uy/Product.aspx?p=176587', ] list_of_lists = [multiahorro, devoto_urls, tienda_inglesa_urls] for url_list in list_of_lists: for url in url_list: parser = get_parser(url) p = Product(url, parser=parser) p.name += ' - TESTING' print "Adding:", url db.session.add(p) db.session.commit() price = p.get_price() fetched_date = date.today() - timedelta(days=700) for i in range(700): p_log = PriceLog(price=price, currency="UYP", product=p, fetched_date=fetched_date) db.session.add(p_log) db.session.commit() price += choice(increments) price = price if price > 0 else 1 fetched_date = fetched_date + timedelta(days=1)
def assertMinification(self, url, minified): parser = get_parser(url) minified_res = parser.minify_url() assert minified_res == minified