def process_html(player_file): ''' Processes the html file(s) of the users :param player_file: :return: lists of things ''' player_name, dates, scores, roles, ascension_games = [], [], [], [], [] player_games = {} with open(player_file, 'r') as file: soup = bs4._soup(file, "html.parser") try: games = soup.findAll('pre')[0].string.split('\n') except IndexError: print "No games found for user %s. Maybe check spelling?" % os.path.basename(player_file).split('.')[0] os.remove(player_file) sys.exit(99) for i in range(0, len(games) - 1): keys = [t.split('=')[0] for t in games[i].split(':')] values = [t.split('=')[1] for t in games[i].split(':')] all_games = dict(zip(keys, values)) dates.append(str(all_games['deathdate'])) scores.append(int(all_games['points'])) roles.append(str(all_games['role'])) if str(all_games['death']) == 'ascended': ascension_games.append(all_games) return dates, scores, roles, ascension_games
def process_html(player_file): ''' Processes the html file(s) of the users :param player_file: :return: lists of things ''' player_name, dates, scores, roles, ascension_games = [], [], [], [], [] player_games = {} with open(player_file, 'r') as file: soup = bs4._soup(file, "html.parser") try: games = soup.findAll('pre')[0].string.split('\n') except IndexError: print "No games found for user %s. Maybe check spelling?" % os.path.basename( player_file).split('.')[0] os.remove(player_file) sys.exit(99) for i in range(0, len(games) - 1): keys = [t.split('=')[0] for t in games[i].split(':')] values = [t.split('=')[1] for t in games[i].split(':')] all_games = dict(zip(keys, values)) dates.append(str(all_games['deathdate'])) scores.append(int(all_games['points'])) roles.append(str(all_games['role'])) if str(all_games['death']) == 'ascended': ascension_games.append(all_games) return dates, scores, roles, ascension_games
def _get(endpoint: str): response = session.get(endpoint) # fix too many requests while response.status_code == 429: # print("too many requests, retry after some time") time.sleep(random() / 4) response = session.get(endpoint) return _soup(response.text, 'lxml')
def register(user_email): print(strftime('[%H:%M:%S]:'), "Creating Account...") home_url = "https://www.footlocker.co.uk/en/homepage" r = requests.Session() r.headers.update({ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.66 Safari/537.36' }) x = r.get(home_url).text xs = bs4._soup(x, 'html.parser') for i in xs.find_all("script", {"type": "text/javascript"}): if "SYNCHRONIZER_TOKEN_VALUE =" in str(i): t = (str(i)).split("'") token = t[3] reg_url = 'https://www.footlocker.co.uk/INTERSHOP/web/WFS/Footlocker-Footlocker_GB-Site/en_GB/-/GBP/ViewUserAccount-Dispatch' config_file = open("config.json", "r") config = json.load(config_file) config_file.close() user = { 'SynchronizerToken': token, 'Ajax': '1', 'RegisterUserFullEmail_Login': user_email, 'RegisterUserFullEmail_Password': config['password'], 'AddressForm_Address3': '', 'isshippingaddress': '', 'AddressForm_LocaleID': 'en_GB', 'AddressForm_Title': 'common.account.salutation.mr.text', 'AddressForm_FirstName': config['firstName'], 'AddressForm_LastName': config['lastName'], 'AddressForm_Address1': config['street'], 'AddressForm_Address2': config['houseNo'], 'AddressForm_City': config['city'], 'AddressForm_PostalCode': config['postCode'], 'AddressForm_CountryCode': 'GB', 'AddressForm_PhoneHome': config['phoneNo'], 'AddressForm_Birthday_Day': config['birth-day'], 'AddressForm_Birthday_Month': config['birth-month'], 'AddressForm_Birthday_Year': config['birth-year'], 'AddressForm_PreferredShippingAddress': 'true', 'AddressForm_PreferredBillingAddress': 'true', 'RegisterUserFullEmail_Newsletter': 'true', 'CreateAccount': '' } r.post(reg_url, data=user) print(strftime('[%H:%M:%S]:'), "Account Created using", user_email) export = user_email + ":" + config['password'] return export
def __init__(self): self.urls = [] self.client = _request("http://donegal.com.pl/gb/") self.pageHtml = self.client.read() self.client.close() self.pageSoup = _soup(self.pageHtml,"html.parser") self.menuLinks = self.pageSoup.find_all("li",{"class":"category glowna"}) self.saleMenuLink = self.pageSoup.find_all("li",{"class":"link glowna"}) print(len(self.menuLinks)) i = 0 for item in self.menuLinks: if not item.div: print(item.a["href"]) for saleHrefs in self.saleMenuLink: self.urls.append(saleHrefs.a["href"]) break hrefs = item.div.ul.find_all("a") for href in hrefs: self.urls.append(href["href"]) i = i+1
def __init__(self,url): self.productList = [] self.client = _request(url) self.pageHtml = self.client.read() self.client.close() self.pageSoup = _soup(self.pageHtml,"html.parser") self.products = self.pageSoup.find_all("article",{"class":"product-miniature js-product-miniature"}) self.category = self.pageSoup.h6.text + "/" + self.pageSoup.find("div",{"class":"block-category"}).h1.text for item in self.products: product = Products() product.productId = item["data-id-product"] product.productImage = item.div.img["data-full-size-image-url"] product.productTitle = item.div.div.div.a.text.encode("utf-8","ignore") product.productReference = item.div.find("div",{"class":"product-reference"}).label.text + item.div.find("div",{"class":"product-reference"}).span.text product.productPrice = item.div.find("div",{"class":"product-prices"}).div.span.text self.productList.append(product)
def _get(endpoint: str): return _soup(session.get(endpoint).text, 'lxml')