def getStoryNumber(self, source): """ Parses HTML and returns the number of a story. """ bs = BeautifulSoup(source) span = bs.find('span', attrs={'class': 'rank'}) number = span.string.replace('.', '') return int(number)
def getStoryTitle(self, source): """ Gets the title of a story. """ bs = BeautifulSoup(source) title = bs.find('td', attrs={'class': 'title'}).text title = title.strip() return title
def getStoryDomain(self, source): """ Gets the domain of a story. """ bs = BeautifulSoup(source) url = bs.find('a').get('href') url_parsed = urlparse(url) if url_parsed.netloc: return url return urljoin('https://news.ycombinator.com', url)