def __get_item_content(article: ResultSet): content = article.find('div', class_="text-container") if content is None: content = article.find('p', class_=None, recursive=True) if content is not None: return content.getText().strip() else: return None
def __get_item_author(article: ResultSet): author = article.find(class_="author-block__info", recursive=True) if author is None: author = article.find(class_="story-block__byline", recursive=True) if author is not None: return author.getText().strip() else: return None
def __get_item_content(article: ResultSet): content = article.find('p', class_="story-block__standfirst", recursive=True) if content is None: content = article.find('p', class_="standfirst-content", recursive=True) if content is None: content = article.find('p', class_=None) if content is not None: return content.getText().strip() else: return None
def __get_item_url(article: ResultSet): headline_elem = AbcNewsItemParser.__get_headline_elem(article) # Not all articles use a common headline element, so get first link if no dedicated headline encountered if headline_elem is None: return article.find('a')['href'] else: return headline_elem.find('a')['href']
def get_price(self, listing: element.ResultSet) -> str: """Gets a price of an apartment from a "li" element passed in by finding the first "div" element. :param listing: "li" element for one apartment :type listing: element.ResultSet :return: price of an apartment :rtype: str """ price = listing.find("div", class_="price").text return price
def get_listing_link(self, listing: element.ResultSet) -> str: """Gets a link of a listing from a "li" element passed in by finding the first "a" element. :param listing: "li" element for one apartment :type listing: element.ResultSet :return: a link to a listing :rtype: str """ listing_link = listing.find("a")["href"] return listing_link
def get_title(self, listing: element.ResultSet) -> str: """Gets a title of a listing from a "li" element passed in by finding the first "h2" element. :param listing: "li" element for one apartment :type listing: element.ResultSet :return: title of an apartment's listing :rtype: str """ title = listing.find("h2", class_="title-list").text return title
def get_attribute(self, listing: element.ResultSet, attribute_name: str) -> str: """ "Gets table row value by searching for corresponding keys which are passsed in as an attribute_name. :param listing: "li" element for one apartment :type listing: element.ResultSet :param attribute_name: a keyword to search for in a li element :type attribute_name: str :return: value that corresponds to a keyword :rtype: str """ sq_meters = listing.find("span", title=attribute_name).text return sq_meters
def __get_topic_text(article: ResultSet): topic = article.find(class_="story-block__kicker") if topic is not None: return topic.getText().strip() else: return None
def __get_item_url(article: ResultSet): headline = TheAustralianNewsItemParser.__get_headline_elem(article) if headline is None: return article.find('a')['href'] else: return headline.find('a')['href']
def __get_headline_elem(article: ResultSet): # Article can sometimes have no headline, e.g. in Daily Cartoon return article.find(class_="story-block__heading")
def _find_advert_anhor(self, div: element.ResultSet) -> element.Tag: """ Function which search for an anhor in div. """ return div.find('a', {'href': True, 'class': True, 'title': False})
def __get_headline_elem(article: ResultSet): headline_elem = article.find(class_='headline', recursive=True) return headline_elem
def __get_headline_text(article: ResultSet): headline_elem = AbcNewsItemParser.__get_headline_elem(article) # Not all articles use a common headline element, if not found get the link text if headline_elem is None: headline_elem = article.find('a') return headline_elem.getText().strip()
def __get_headline_elem(article: ResultSet): headline_elem = article.find('h3') return headline_elem
def __get_item_url(article: ResultSet): return article.find('a', class_="story__link")['href']
def __get_headline_text(article: ResultSet): return article.find(class_="story__headline__text").getText().strip()
def __get_topic_text(article: ResultSet): topic = article.find(class_="topic__string") if topic is not None: return topic.getText().strip() else: return None
def __get_item_author(article: ResultSet): byline = article.find(class_='byline') if byline is not None: return byline.find('a').getText().strip() else: return None