def __get_item_content(article: ResultSet):
     content = article.find('div', class_="text-container")
     if content is None:
         content = article.find('p', class_=None, recursive=True)
     if content is not None:
         return content.getText().strip()
     else:
         return None
 def __get_item_author(article: ResultSet):
     author = article.find(class_="author-block__info", recursive=True)
     if author is None:
         author = article.find(class_="story-block__byline", recursive=True)
     if author is not None:
         return author.getText().strip()
     else:
         return None
 def __get_item_content(article: ResultSet):
     content = article.find('p', class_="story-block__standfirst", recursive=True)
     if content is None:
         content = article.find('p', class_="standfirst-content", recursive=True)
     if content is None:
         content = article.find('p', class_=None)
     if content is not None:
         return content.getText().strip()
     else:
         return None
 def __get_item_url(article: ResultSet):
     headline_elem = AbcNewsItemParser.__get_headline_elem(article)
     # Not all articles use a common headline element, so get first link if no dedicated headline encountered
     if headline_elem is None:
         return article.find('a')['href']
     else:
         return headline_elem.find('a')['href']
    def get_price(self, listing: element.ResultSet) -> str:
        """Gets a price of an apartment from a "li" element passed in
        by finding the first "div" element.

        :param listing: "li" element for one apartment
        :type listing: element.ResultSet
        :return: price of an apartment
        :rtype: str
        """
        price = listing.find("div", class_="price").text
        return price
    def get_listing_link(self, listing: element.ResultSet) -> str:
        """Gets a link of a listing from a "li" element passed in
        by finding the first "a" element.

        :param listing: "li" element for one apartment
        :type listing: element.ResultSet
        :return: a link to a listing
        :rtype: str
        """
        listing_link = listing.find("a")["href"]
        return listing_link
    def get_title(self, listing: element.ResultSet) -> str:
        """Gets a title of a listing from a "li" element passed in
        by finding the first "h2" element.

        :param listing: "li" element for one apartment
        :type listing: element.ResultSet
        :return: title of an apartment's listing
        :rtype: str
        """
        title = listing.find("h2", class_="title-list").text
        return title
    def get_attribute(self, listing: element.ResultSet, attribute_name: str) -> str:
        """ "Gets table row value by searching for corresponding keys which are
        passsed in as an attribute_name.

        :param listing: "li" element for one apartment
        :type listing: element.ResultSet
        :param attribute_name: a keyword to search for in a li element
        :type attribute_name: str
        :return: value that corresponds to a keyword
        :rtype: str
        """

        sq_meters = listing.find("span", title=attribute_name).text
        return sq_meters
 def __get_topic_text(article: ResultSet):
     topic = article.find(class_="story-block__kicker")
     if topic is not None:
         return topic.getText().strip()
     else:
         return None
 def __get_item_url(article: ResultSet):
     headline = TheAustralianNewsItemParser.__get_headline_elem(article)
     if headline is None:
         return article.find('a')['href']
     else:
         return headline.find('a')['href']
 def __get_headline_elem(article: ResultSet):
     # Article can sometimes have no headline, e.g. in Daily Cartoon
     return article.find(class_="story-block__heading")
Beispiel #12
0
 def _find_advert_anhor(self, div: element.ResultSet) -> element.Tag:
     """ Function which search for an anhor in div. """
     return div.find('a', {'href': True, 'class': True, 'title': False})
 def __get_headline_elem(article: ResultSet):
     headline_elem = article.find(class_='headline', recursive=True)
     return headline_elem
 def __get_headline_text(article: ResultSet):
     headline_elem = AbcNewsItemParser.__get_headline_elem(article)
     # Not all articles use a common headline element, if not found get the link text
     if headline_elem is None:
         headline_elem = article.find('a')
     return headline_elem.getText().strip()
 def __get_headline_elem(article: ResultSet):
     headline_elem = article.find('h3')
     return headline_elem
Beispiel #16
0
 def __get_item_url(article: ResultSet):
     return article.find('a', class_="story__link")['href']
Beispiel #17
0
 def __get_headline_text(article: ResultSet):
     return article.find(class_="story__headline__text").getText().strip()
 def __get_topic_text(article: ResultSet):
     topic = article.find(class_="topic__string")
     if topic is not None:
         return topic.getText().strip()
     else:
         return None
 def __get_item_author(article: ResultSet):
     byline = article.find(class_='byline')
     if byline is not None:
         return byline.find('a').getText().strip()
     else:
         return None