def test_get_unavailable_product_should_return_empty_price_list(self): challenge = Challenge() url = '''http://www.americanas.com.br/produto/6991162/utilidadesdomesticas/aparelhodejantar/20pecas/para4pessoas/conjunto-de-jantar-ipanema-c/-20-pecas-porto-fino''' data = challenge.do_request(url)['response'] results = data.xpath("//span[@class='amount']/text()") unavailable = data.xpath("//div[@class='unavailProd']/text()") self.assertTrue(len(unavailable) > 0 and len(results) == 0)
def test_get_price_sending_headers_should_be_ok(self): challenge = Challenge() url = '''http://hughes.sieve.com.br:9090/level1/''' header = {'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.77 Safari/537.1'} data = challenge.do_request(url,header)['response'] results = data.xpath("//div/text()") self.assertTrue(len(results) > 0)
def test_get_price_with_cookie_should_be_ok(self): challenge = Challenge() url = '''http://hughes.sieve.com.br:9090/level2/''' cookie = dict(d53db4de415c4e858dc761595623a898='+') data = challenge.do_request(url,None,cookie)['response'] results = data.xpath("//div/text()") self.assertTrue(len(results) > 0)
def test_get_available_product_should_return_price(self): challenge = Challenge() url = '''http://www.americanas.com.br/produto/110595674/informatica/notebooks/notebooks/notebook-gateway-nv55c12b-com-intel-core-i3-2gb-320gb-led-156-windows-7-home-basic''' data = challenge.do_request(url)['response'] results = data.xpath("//span[@class='amount']/text()") unavailable = data.xpath("//div[@class='unavailProd']/text()") self.assertTrue(len(unavailable) == 0 and len(results) > 0)
def load_data(self): with open("data.json") as f: data = json.load(f) for key, challenges in data.items(): for challenge in challenges: cal = Challenge.Challenge(key, challenge["name"], challenge["stages_duration"], challenge["stages_data"], challenge["text"]) self.challenges[key].append(cal) self.eeg_thread.start()
# -*- coding: utf-8 -*- #!/usr/bin/python import sys; sys.path.append('modules') from Challenge import * challenge = Challenge() url = "http://hughes.sieve.com.br:9090/level4/" cookie = {'cade-meu-cookie':'esta aqui'} data = challenge.do_request(url,None,cookie)['response'] results = data.xpath("//p/text()") print "Preço: "+str(challenge.get_float_price(results[0]))
def scrape(): mars = mongo.db.mars mars_data = Challenge.scrape_all() mars.update({}, mars_data, upsert=True) return "Scraping Successful!"
try: import Tkinter as tk except ImportError: import tkinter as tk try: import ttk py3 = False except ImportError: import tkinter.ttk as ttk py3 = True def init(top, gui, *args, **kwargs): global w, top_level, root w = gui top_level = top root = top def destroy_window(): # Function which closes the window. global top_level top_level.destroy() top_level = None if __name__ == '__main__': import Challenge Challenge.vp_start_gui()
import json import pandas as pd import numpy as np import re from sqlalchemy import create_engine from config import db_password import sys # !{sys.executable} -m pip install psycopg2-binary import time import Challenge as ch file_dir = "C:/Users/LocNguyen/OneDrive - stok LLC/Documents/Loc Nguyen/Online Data Analytics Bootcamp/Module 8/Movies-ETL/Resources/" # file paths - user providing: wiki_path = f'{file_dir}/wikipedia.movies.json' kaggle_path = f'{file_dir}movies_metadata.csv' ratings_path = f'{file_dir}ratings.csv' ch.master_ETL(wiki_path, kaggle_path, ratings_path)
def test_get_price_without_cookie_should_return_error(self): challenge = Challenge() url = '''http://hughes.sieve.com.br:9090/level2/''' data = challenge.do_request(url)['response'] results = data.xpath("//div/text()") self.assertTrue(len(results) == 0)
def test_get_inexistent_product_should_return_301_in_status_code(self): challenge = Challenge() url = '''http://www.americanas.com.br/produto/7262040/chromalin-(120-caps)-+-brinde-ripped-extreme-yellow-caps-(20-caps''' data = challenge.do_request(url)['status_code'] self.assertTrue(data == 301)
# -*- coding: utf-8 -*- #!/usr/bin/python import sys sys.path.append("modules") from Challenge import * challenge = Challenge() url = "http://hughes.sieve.com.br:9090/level5/" location = challenge.quote_url(challenge.do_request(url, None, None)["history"][0].headers["location"]) data = challenge.do_request(location, None, None)["response"] results = data.xpath("//p/text()") print "Preço: " + str(challenge.get_float_price(results[0]))
# -*- coding: utf-8 -*- # A simple script to extract product prices from Americanas.com # Usage: desafio.py URL # It simply connect to the url using requests lib and does a parse using xpath to discover the price inside the page #!/usr/bin/python import sys; sys.path.append('modules') from Challenge import * challenge = Challenge() url = challenge.get_url_from_command_line() data = challenge.do_request(url)['response'] status_code = challenge.do_request(url)['status_code'] results = data.xpath("//span[@class='amount']/text()") unavailable = data.xpath("//div[@class='unavailProd']/text()") if (status_code == 301): print u"\nInexistent product! Impossible to get price!\n" else: if len(unavailable) > 0: print u'\nUnavailable product\n' elif len(results) > 0 : print "\nOriginal: '"+results[0]+"'\n" price = challenge.get_float_price(results[0]) print "\nFloat: "+str(price)+"\n"