def get_detail_info(html): item_additional = re.search(r'<div id="item_additional"(.+)</div>', html, re.DOTALL).group(0) soup = BeautifulSoup(item_additional) item_additional = soup.find(id='item_additional') examine = soup.contents[0].contents[2][1:-1] spans = soup.findAll('span') min_price = int(rs_str_to_int(spans[0].contents[2][1:-1])) price = int(rs_str_to_int(spans[1].contents[2][1:-1])) max_price = int(rs_str_to_int(spans[2].contents[2][1:-1])) return min_price, price, max_price, examine
def get_volume_info(): html = read_url(VOLUME_URL) table = re.search(r'<table id="top100_table"(.+)</table>', html, re.DOTALL).group(0) soup = BeautifulSoup(table) raw_entries = soup.findAll('tr')[1:] for raw_entry in raw_entries: soup = BeautifulSoup(str(raw_entry)) columns = soup.findAll('td') id = int(re.search(r'id=(\d+)', str(columns[0])).group(1)) volume = rs_str_to_int(columns[5].contents[0]) yield id, volume
def get_front_volume_info(): html = read_url(FRONT_VOLUME_URL) top5_left = re.search(r'<div class="top5_left(.+)', html, re.DOTALL).group(0) soup = BeautifulSoup(top5_left) table = soup.find('table') raw_entries = table.findAll('tr')[1:] for raw_entry in raw_entries: soup = BeautifulSoup(str(raw_entry)) columns = soup.findAll('td') id = int(re.search(r'id=(\d+)', str(columns[0])).group(1)) volume = rs_str_to_int(columns[3].contents[0][:-1]) yield id, volume
def get_index_info(html): table = re.search(r'<table id="search_results_table"(.+)</table>', html, re.DOTALL).group(0) soup = BeautifulSoup(table) raw_entries = soup.findAll('tr')[1:-1] for raw_entry in raw_entries: soup = BeautifulSoup(str(raw_entry)) columns = soup.findAll('td') id = int(re.search(r'id=(\d+)', str(columns[0])).group(1)) name = soup.find('img')['alt'] price = rs_str_to_int(columns[2].contents[0]) members = 'star_members.png' in str(columns[4]) yield (id, name, members, price)