Python strip_html_tags Examples

Programming Language: Python

Namespace/Package Name: external.amazon_scraper

Method/Function: strip_html_tags

Examples at hotexamples.com: 6

Python strip_html_tags - 6 examples found. These are the top rated real world Python examples of external.amazon_scraper.strip_html_tags extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: product.py Project: MachineLearningStudyGroup/Smart_Review_Summarization

 def author_bio(self):
     tag = self.soup.find('div', class_='mainContent')
     if tag:
         text = strip_html_tags(unicode(tag))
         if text:
             return text
     return None

Example #2

Show file

File: review.py Project: MachineLearningStudyGroup/Smart_Review_Summarization

 def user(self):
     tag = self.soup.find('a', class_='author')
     if tag:
         return strip_html_tags(unicode(tag))
     return None

Example #3

Show file

File: review.py Project: MachineLearningStudyGroup/Smart_Review_Summarization

 def date(self):
     tag = self.soup.find('span', class_='review-date')
     if tag:
         return get_review_date(strip_html_tags(unicode(tag)))
     return None

Example #4

Show file

File: review.py Project: MachineLearningStudyGroup/Smart_Review_Summarization

 def text(self):
     tag = self.soup.find('span', class_='review-text')
     return strip_html_tags(unicode(tag))

Example #5

Show file

File: review.py Project: tailintalent/Smart_Review_Summarization

 def text(self):
     tag = self.soup.find('span', class_='description')
     return strip_html_tags(unicode(tag))

Example #6

Show file

File: product.py Project: MachineLearningStudyGroup/Smart_Review_Summarization

    def supplemental_text(self):
        # get all the known text blobs
        # remove any found in editorial reviews
        result = []

        # kindle
        # http://www.amazon.com/dp/1593080050
        tag = self.soup.find('div', id='postBodyPS')
        if tag:
            text = strip_html_tags(unicode(tag))
            if text:
                result.append(text)

        # paperbacks
        # http://www.amazon.com/dp/1568822812
        tag = self.soup.find('div', id='bookDescription_feature_div')
        if tag:
            tag = tag.find('div', class_=None)
            text = strip_html_tags(unicode(tag))
            if text:
                result.append(text)

        # extract from the javascript code that updates the iframe
        # http://www.amazon.com/dp/1491268727
        tag = self.soup.find('script', text=re.compile(r'bookDescEncodedData', flags=re.I))
        if tag:
            match = re.search(r'bookDescEncodedData\s=\s"(?P<description>[^",]+)', tag.text)
            if match:
                text = match.group('description')
                text = urllib.unquote(text)
                text = strip_html_tags(text)
                if text:
                    result.append(text)

        # http://www.amazon.com/dp/1616611359
        for tag in self.soup.find_all('div', class_='productDescriptionWrapper'):
            text = unicode(tag)
            text = strip_html_tags(text)
            if text:
                result.append(text)

        # android apps
        # http://www.amazon.com/dp/B008A1I0SU
        tag = self.soup.find('div', class_='mas-product-description-wrapper')
        if tag:
            sub_tag = tag.find('div', class_='content')
            if sub_tag:
                tag = sub_tag
            text = strip_html_tags(unicode(tag))
            if text:
                result.append(text)

        # amazon instant video
        # http://www.amazon.com/dp/B004C0YS5C
        # older method
        tag = self.soup.find('div', class_='prod-synopsis')
        if tag:
            text = strip_html_tags(unicode(tag))
            if text:
                result.append(text)
        # newer method
        tag = self.soup.find('div', class_='dv-simple-synopsis')
        if tag:
            text = strip_html_tags(unicode(tag))
            if text:
                result.append(text)

        # http://www.amazon.com/dp/B0006FUAD6
        tag = self.soup.find('div', id=re.compile('feature-bullets', flags=re.I))
        if tag:
            tags = map(unicode, tag.find_all('span'))
            text = strip_html_tags(u''.join(tags))
            if text:
                result.append(text)

        # http://www.amazon.com/dp/B00DHF39KS
        tag = self.soup.find('div', class_='aplus')
        if tag:
            text = strip_html_tags(unicode(tag))
            if text:
                result.append(text)

        return result