Example #1
0
def get_volume_info():
    html = read_url(VOLUME_URL)
    table = re.search(r'<table id="top100_table"(.+)</table>', html, re.DOTALL).group(0)
    soup = BeautifulSoup(table)
    raw_entries = soup.findAll('tr')[1:]
    for raw_entry in raw_entries:
        soup = BeautifulSoup(str(raw_entry))
        columns = soup.findAll('td')
        id = int(re.search(r'id=(\d+)', str(columns[0])).group(1))
        volume = rs_str_to_int(columns[5].contents[0])
        yield id, volume
Example #2
0
def get_front_volume_info():
    html = read_url(FRONT_VOLUME_URL)
    top5_left = re.search(r'<div class="top5_left(.+)', html, re.DOTALL).group(0)
    soup = BeautifulSoup(top5_left)
    table = soup.find('table')
    raw_entries = table.findAll('tr')[1:]
    for raw_entry in raw_entries:
        soup = BeautifulSoup(str(raw_entry))
        columns = soup.findAll('td')
        id = int(re.search(r'id=(\d+)', str(columns[0])).group(1))
        volume = rs_str_to_int(columns[3].contents[0][:-1])
        yield id, volume
Example #3
0
def get_volume_info():
    html = read_url(VOLUME_URL)
    table = re.search(r'<table id="top100_table"(.+)</table>', html,
                      re.DOTALL).group(0)
    soup = BeautifulSoup(table)
    raw_entries = soup.findAll('tr')[1:]
    for raw_entry in raw_entries:
        soup = BeautifulSoup(str(raw_entry))
        columns = soup.findAll('td')
        id = int(re.search(r'id=(\d+)', str(columns[0])).group(1))
        volume = rs_str_to_int(columns[5].contents[0])
        yield id, volume
Example #4
0
def loop_and_parse_indexes():
    logging.info('Looping through index pages.')
    letters = list('abcdefghijklmnopqrstuvwxyz')
    letters.append('Other')
    for letter in letters:
        page = 1
        while True:
            content = read_url(get_index_url(letter, page))
            if 'did not return' in content: break
            logging.debug("Parsing page %i of letter '%s'" % (page, letter))
            parse_index(content)
            if 'Next &gt;<br>' in content: break
            page += 1
Example #5
0
def get_front_volume_info():
    html = read_url(FRONT_VOLUME_URL)
    top5_left = re.search(r'<div class="top5_left(.+)', html,
                          re.DOTALL).group(0)
    soup = BeautifulSoup(top5_left)
    table = soup.find('table')
    raw_entries = table.findAll('tr')[1:]
    for raw_entry in raw_entries:
        soup = BeautifulSoup(str(raw_entry))
        columns = soup.findAll('td')
        id = int(re.search(r'id=(\d+)', str(columns[0])).group(1))
        volume = rs_str_to_int(columns[3].contents[0][:-1])
        yield id, volume
Example #6
0
def get_detail_info_from_id(rs_id):
    html = read_url(get_detail_url(rs_id))
    return get_detail_info(html)