Esempio n. 1
0
def _get_pages(start_page_url):
    count = 1
    soup = utils.get_parsed(site + start_page_url)
    img = soup.find("img")['src']
    next_page = soup.find('span', {'class': 'next'}).findChild()['href']
    yield count, img
    while next_page.startswith(start_page_url):
        print count,
        count += 1
        soup = utils.get_parsed(site + next_page)
        img = soup.find("img")['src']
        next_page = soup.find('span', {'class': 'next'}).findChild()['href']
        yield count, img
Esempio n. 2
0
def _get_pages(start_page_url):
    chapter_url = start_page_url.replace('/p1', '')
    count = 1
    soup = utils.get_parsed(start_page_url)
    img = _find_img(soup)
    next_page = soup.find('a', {'class': 'nextLink'})['href']
    yield count, img
    while next_page.startswith(chapter_url):
        print count,
        count += 1
        soup = utils.get_parsed(next_page)
        img = _find_img(soup)
        next_page = soup.find('a', {'class': 'nextLink'})['href']
        yield count, img
Esempio n. 3
0
def get_index():
    soup = utils.get_parsed(site_index)
    index = {}
    links = soup.find_all("a", {"class": "popupLink"})
    for link in links:
        url = link.get('href')
        index[link.string] = url
    return index
Esempio n. 4
0
def get_index():
    soup = utils.get_parsed(site_index)
    index = {}
    divs = soup.find_all("div", {"class": "series_alpha"})
    for div in divs:
        for link in div.find_all('a'):
            url = link.get('href')
            if url and url.startswith('/') and len(url) > 1:
                index[link.string] = url
    return index
Esempio n. 5
0
def get_chapters(url, name):
    chapters_index_name = utils.index_location_format % re.sub(
        r'[ -/]', '_', name.lower())
    chapters = utils.get_index_from_store(site_folder, chapters_index_name)
    if chapters:
        return {int(chapter): url for chapter, url in chapters.items()}

    soup = utils.get_parsed(site + url)
    div = soup.find(id='chapterlist')
    chapters = {}
    for link in div.find_all('a'):
        url = link.get('href')
        if url and url.startswith('/') and len(url) > 1:
            chapters[int(link.string.replace(name, '').strip())] = url

    utils.store_index(chapters, site_folder, chapters_index_name)
    return chapters