Python find_tag Examples

Programming Language: Python

Namespace/Package Name: lib.soup_utils

Method/Function: find_tag

Examples at hotexamples.com: 15

Python find_tag - 15 examples found. These are the top rated real world Python examples of lib.soup_utils.find_tag extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: items.py Project: johnnyxiang2015/amz_reviews

 def parse_author(self):
     try:
         return soup_utils.find_tag(
             self.item_soup,
             ReviewItem.REVIEW_AUTHOR_SELECTOR).text.strip()
     except:
         return None

Example #2

Show file

File: items.py Project: johnnyxiang2015/amz_reviews

 def parse_helpful(self):
     try:
         vote_text = soup_utils.find_tag(
             self.item_soup, ReviewItem.REVIEW_VOTES_SELECTOR).text
         return vote_text.replace("people found this helpful.", "").strip()
     except:
         return 0

Example #3

Show file

File: items.py Project: johnnyxiang2015/amz_reviews

 def parse_description(self):
     try:
         return soup_utils.find_tag(
             self.soup, self.DESCRIPTION_SELECTOR).decode_contents(
                 formatter="html").strip()
     except:
         return None

Example #4

Show file

File: items.py Project: johnnyxiang2015/amz_reviews

 def parse_feature_list(self):
     try:
         return soup_utils.find_tag(
             self.soup, self.FEATURE_BULLETS_SELECTOR).decode_contents(
                 formatter="html").strip()
     except:
         return None

Example #5

Show file

File: items.py Project: johnnyxiang2015/amz_reviews

 def parse_content(self):
     try:
         return soup_utils.find_tag(
             self.item_soup,
             ReviewItem.REVIEW_CONTENT_SELECTOR).decode_contents(
                 formatter="html").strip()
     except:
         return None

Example #6

Show file

File: items.py Project: johnnyxiang2015/amz_reviews

 def parse_date(self):
     try:
         date_text = soup_utils.find_tag(
             self.item_soup, ReviewItem.REVIEW_DATE_SELECTOR).text
         date_text = date_text.split('on')[-1].strip()
         return datetime.datetime.strptime(date_text, '%B %d, %Y')
     except:
         return None

Example #7

Show file

File: items.py Project: johnnyxiang2015/amz_reviews

 def parse_breadcrumbs(self):
     try:
         categories = soup_utils.find_tag(
             self.soup, self.BREADCRUMBS_SELECTOR).text.strip()
         categories = [c.strip() for c in categories.split('›')]
         return " > ".join(categories)
     except:
         return None

Example #8

Show file

File: items.py Project: johnnyxiang2015/amz_reviews

 def parse_price(self):
     try:
         price_text = soup_utils.find_tag(self.soup,
                                          self.PRICE_SELECTOR).text.strip()
         price_text = re.sub(r'[^0-9.,\-]', '', price_text)
         return round(float(price_text), 2)
     except:
         return None

Example #9

Show file

    def parse(self, response):
        soup = BeautifulSoup(response, HTML_PARSER)
        items = soup_utils.find_tags(soup, ITEM_LINK_SELECTOR)
        for item in items:
            item_link = soup_utils.find_tag(item, 'a')
            asin = extract_asin_from_url(item_link['href'])
            self.save_asin(asin)

        self.process_next_page(soup)

Example #10

Show file

File: items.py Project: johnnyxiang2015/amz_reviews

 def parse_rating(self):
     try:
         stars_text = soup_utils.find_tag(
             self.item_soup,
             ReviewItem.REVIEW_RATING_SELECTOR).text.replace(
                 "out of 5 stars", "").strip()
         return int(float(stars_text))
     except:
         return 0

Example #11

Show file

    def process_next_page(self, soup):
        self.page_processed += 1
        if self.page_processed > self.max_pages:
            return

        next_page = soup_utils.find_tag(soup, NEXT_PAGE_SELECTOR)
        if next_page is not None:
            next_page_url = soup_utils.format_url(
                next_page['href'], get_review_url(self.asin, self.country))
            self.process(next_page_url)

Example #12

Show file

 def process_next_page(self, soup):
     next_page = soup_utils.find_tag(soup, NEXT_PAGE_SELECTOR)
     if next_page is not None:
         next_page_url = soup_utils.format_url(next_page['href'], self.url)
         self.process(next_page_url)

Example #13

Show file

File: items.py Project: johnnyxiang2015/amz_reviews

 def parse_title(self):
     try:
         return soup_utils.find_tag(
             self.item_soup, ReviewItem.REVIEW_TITLE_SELECTOR).text.strip()
     except:
         return None

Example #14

Show file

File: items.py Project: johnnyxiang2015/amz_reviews

 def parse_brand(self):
     return soup_utils.find_tag(self.soup, self.BRAND_SELECTOR).text.strip()

Example #15

Show file

File: items.py Project: johnnyxiang2015/amz_reviews

 def parse_name(self):
     return soup_utils.find_tag(self.soup, self.TITLE_SELECTOR).text.strip()