def __open(self, url): req = Request(url, headers = { 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3', 'Accept-Encoding': 'none', 'Accept-Language': 'en-US,en;q=0.8', 'Connection': 'keep-alive'}) start = dt.now() req = None try: req = urlopen(url) self.html_src = req.read() except Exception as e: self.req_err = e else: if self.req_err is not None: req.close() self.req_time = dt.now() - start return self.html_src
print 'Logged in to FitDay. Saved cookie to '+COOKIE_FILE+'.' # Download weight data nexturl = 'https://www.fitday.com/fitness/WeightHistory.html' while nexturl != '': # Load the cookie and visit the URL of interest. cookieJar.load(COOKIE_FILE) while True: try: req = urlopen(nexturl) break except: print "Error reading the URL. Trying again..." htmlSource = req.read() req.close() # Filter the result a bit. htmlSource = htmlSource[11:] htmlSource = htmlSource.replace('\\"','"') htmlSource = htmlSource.replace('\\n','\n') htmlSource = htmlSource.replace('\\t','\t') soup = BeautifulSoup(''.join(htmlSource)) to_extract = soup.findAll('script') # removing JS for item in to_extract: item.extract() weight_table = soup.find('div', {'class' : 'ListView'}).table