예제 #1
0
def get_searchList(search_text, page):
    headers = {'User-Agent': UserAgents.get_user_agent_phone()}
    url = '{0}/search?title={1}&page={2}'.format(server, search_text, page)
    html = requests.get(url, headers=headers).text

    soup = BeautifulSoup(html, 'html.parser')
    bookList = soup.find_all('a', class_='manga-item')
    results = []
    for book in bookList:
        temp = {}
        temp['url'] = '/info' + book['href']

        img = book.find('img')
        tag = soup.new_tag('div')
        tag['class'] = 'manga-right'

        for item in book.find_all('p'):
            item.name = 'div'
            tag.append(item)
        book.append(img)
        book.append(tag)
        # print(book)
        temp['html'] = ''.join(str(item) for item in book)
        results.append(temp)

    return results
예제 #2
0
def get_chapterImages(bookID, chapterID):
    results = {}
    images = []

    headers = {'User-Agent': UserAgents.get_user_agent_phone()}
    chapterUrl = server + '/m{0}'.format(chapterID)
    html = requests.get(chapterUrl, headers=headers, timeout=10).text

    urlNum = '1'
    spare = re.search(',\'[0-9].*?jpg', html).group().split('|')
    if spare is not None and len(spare) > 4 and spare[
            len(spare) - 3].isdigit() and int(spare[len(spare) - 3]) < 50:
        urlNum = spare[len(spare) - 3]
    else:
        urlNum = re.search('[0-9]\\.[0-9]\\.[0-9]/[0-9]*/', html).group()
        urlNum = urlNum.split('/')[1]

    imgsList = re.findall('[0-9]+?_[0-9]+', html)
    imgsList.sort(key=lambda x: int(str(x).split('_')[0]))

    for img in imgsList:
        img_url = imgServer + '{0}/{1}/{2}/'.format(urlNum, bookID,
                                                    chapterID) + img + '.jpg'
        images.append(img_url)

    results['imgs'] = images
    return results
예제 #3
0
def get_bookInfo(bookID):
    results = {}
    headers = {'User-Agent': UserAgents.get_user_agent_phone()}
    html = requests.get('{0}/{1}'.format(server, bookID), headers=headers).text
    soup = BeautifulSoup(html, 'html.parser')

    results['title'] = soup.find('p', class_='detail-main-title').text
    results['author'] = soup.find(
        'p', class_='detail-main-subtitle').find('a').find('span').text
    results['content'] = soup.find(
        'p', class_='detail-main-content').text + soup.find(
            'p', class_='detail-main-content').text
    results['imgUrl'] = soup.find('img', class_='detail-bar-bg')['src']

    detailList = []
    for item in soup.find('div', class_='detail-list').find_all('a'):
        detail = {}
        detail['url'] = item['href']
        detail['name'] = item.text.strip()
        detailList.append(detail)
    results['list'] = detailList
    results['list'].reverse()

    return results