def parse_sales_rank(cls, b, **kwargs): if not isinstance(b, bs3_BeautifulSoup): b = bs3_BeautifulSoup(b) t = b.find('li', attrs={'id': re.compile('SalesRank')}) sales_rank_re = re.compile('(\d[\d,]+) in ([\w\ ]+)') tt = sales_rank_re.findall(t.text) return {'sales_rank': int(re.compile('\D').sub('', tt[0][0])), 'sales_rank_category': tt[0][1].strip(' ')}
def parse_sales_sub_rank(cls, b, **kwargs): if not isinstance(b, bs3_BeautifulSoup): b = bs3_BeautifulSoup(b) t = b.find('li', attrs={'id': re.compile('SalesRank')}) tt = t.findAll('li', 'zg_hrsr_item') sales_sub_rank = list() for tti in tt: d = dict() d['sales_rank'] = int(re.compile('\D').sub('', tti.find('span', 'zg_hrsr_rank').text)) ttt = tti.find('span', 'zg_hrsr_ladder') ttt = ttt.text.split(' ')[1] d['sales_rank_category'] = ttt.split('>') sales_sub_rank.append(d) return sales_sub_rank
def parse_product_title(cls, b, **kwargs): if not isinstance(b, bs3_BeautifulSoup): b = bs3_BeautifulSoup(b) return b.find('span', attrs={'id': 'productTitle'}).text
def parse_avg_rating(cls, b, **kwargs): if not isinstance(b, bs3_BeautifulSoup): b = bs3_BeautifulSoup(b) t = b.find('span', 'reviewCountTextLinkedHistogram') return float(re.compile('[\d\.]+').findall(t['title'])[0])