def author_bio(self): tag = self.soup.find('div', class_='mainContent') if tag: text = strip_html_tags(unicode(tag)) if text: return text return None
def user(self): tag = self.soup.find('a', class_='author') if tag: return strip_html_tags(unicode(tag)) return None
def date(self): tag = self.soup.find('span', class_='review-date') if tag: return get_review_date(strip_html_tags(unicode(tag))) return None
def text(self): tag = self.soup.find('span', class_='review-text') return strip_html_tags(unicode(tag))
def text(self): tag = self.soup.find('span', class_='description') return strip_html_tags(unicode(tag))
def supplemental_text(self): # get all the known text blobs # remove any found in editorial reviews result = [] # kindle # http://www.amazon.com/dp/1593080050 tag = self.soup.find('div', id='postBodyPS') if tag: text = strip_html_tags(unicode(tag)) if text: result.append(text) # paperbacks # http://www.amazon.com/dp/1568822812 tag = self.soup.find('div', id='bookDescription_feature_div') if tag: tag = tag.find('div', class_=None) text = strip_html_tags(unicode(tag)) if text: result.append(text) # extract from the javascript code that updates the iframe # http://www.amazon.com/dp/1491268727 tag = self.soup.find('script', text=re.compile(r'bookDescEncodedData', flags=re.I)) if tag: match = re.search(r'bookDescEncodedData\s=\s"(?P<description>[^",]+)', tag.text) if match: text = match.group('description') text = urllib.unquote(text) text = strip_html_tags(text) if text: result.append(text) # http://www.amazon.com/dp/1616611359 for tag in self.soup.find_all('div', class_='productDescriptionWrapper'): text = unicode(tag) text = strip_html_tags(text) if text: result.append(text) # android apps # http://www.amazon.com/dp/B008A1I0SU tag = self.soup.find('div', class_='mas-product-description-wrapper') if tag: sub_tag = tag.find('div', class_='content') if sub_tag: tag = sub_tag text = strip_html_tags(unicode(tag)) if text: result.append(text) # amazon instant video # http://www.amazon.com/dp/B004C0YS5C # older method tag = self.soup.find('div', class_='prod-synopsis') if tag: text = strip_html_tags(unicode(tag)) if text: result.append(text) # newer method tag = self.soup.find('div', class_='dv-simple-synopsis') if tag: text = strip_html_tags(unicode(tag)) if text: result.append(text) # http://www.amazon.com/dp/B0006FUAD6 tag = self.soup.find('div', id=re.compile('feature-bullets', flags=re.I)) if tag: tags = map(unicode, tag.find_all('span')) text = strip_html_tags(u''.join(tags)) if text: result.append(text) # http://www.amazon.com/dp/B00DHF39KS tag = self.soup.find('div', class_='aplus') if tag: text = strip_html_tags(unicode(tag)) if text: result.append(text) return result