Ejemplo n.º 1
0
    def get_books(self, search_info: str) -> List[basesite.Book]:
        headers = {
            'Accept':
            'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
            'Content-Type':
            'application/x-www-form-urlencoded',
            'User-Agent':
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:75.0) Gecko/20100101 Firefox/75.0'
        }
        # data = {'t': '1', 'searchkey': urllib.parse.quote(search_info.encode('GB2312'))} # 错误
        data = f'searchkey={urllib.parse.quote(search_info.encode("GB2312"))}&t=1'
        r = self.try_post_url(self.session,
                              url=self.search_url,
                              try_timeout=5,
                              headers=headers,
                              data=data,
                              allow_redirects=False)
        if r is None:
            return []
        if r.status_code == 302:  # 只找到一本书,将跳转
            return [
                basesite.Book(site=self,
                              url=r.headers['Location'],
                              name=search_info,
                              author="",
                              brief="")
            ]

        soup = BeautifulSoup(r.content.decode(self.encoding, 'ignore'),
                             'html.parser')
        if not (book_soup_list := soup.select('div.ml212 dt')):
            return []
Ejemplo n.º 2
0
    def get_books(self, search_info: str) -> List[basesite.Book]:
        url = self.search_url % urllib.parse.quote(search_info)
        r = self.try_get_url(self.session, url, try_timeout=5)
        if r is None or r.text.find("没有找到有关") >= 0:
            return []

        soup = BeautifulSoup(r.content, 'html.parser')
        book_soup_list = soup.select('div.content > article')
        search_book_results = []
        for book_soup in book_soup_list:
            tmp_soup = book_soup.select_one('header > h2 > a')
            book_url = tmp_soup.attrs['href']
            m = re.search(r'《(.*?)》.* (\w+) 著', tmp_soup.text)
            book_name = m.group(1)
            book_author = m.group(2)
            tmp_text = book_soup.select_one('p.note').text
            if m2 := re.search(r"简介(:)?(.*)", tmp_text):
                book_brief = m2.group(2).strip()
            else:
                book_brief = tmp_text.strip()
            book = basesite.Book(site=self,
                                 url=book_url,
                                 name=book_name,
                                 author=book_author,
                                 brief=book_brief)
            search_book_results.append(book)
Ejemplo n.º 3
0
    def get_books(self, search_info: str) -> List[basesite.Book]:
        url = self.search_url % urllib.parse.quote(search_info)
        r = self.try_get_url(self.session, url, try_timeout=5)
        if r is None:
            return []

        soup = BeautifulSoup(r.content, 'html.parser')
        book_tag_list = soup.select('tr')
        book_num = len(book_tag_list) - 1
        if book_num == 0:
            return []

        search_book_results = []
        book_soup_list = book_tag_list[1:]
        for book_soup in book_soup_list:
            td_list = book_soup.findAll('td')
            book_url = self.site_url + td_list[1].find('a').attrs['href']
            book_name = td_list[1].find('a').text
            book_author = td_list[2].text
            book_brief = f"最新章节:{td_list[3].find('a').text} 更新时间:{td_list[4].text.strip()}"
            book = basesite.Book(site=self,
                                 url=book_url,
                                 name=book_name,
                                 author=book_author,
                                 brief=book_brief)
            search_book_results.append(book)
        return search_book_results
Ejemplo n.º 4
0
    def get_books(self, search_info: str) -> List[basesite.Book]:
        r = self.try_post_url(
            self.session,
            url=self.search_url,
            try_timeout=5,
            params=f'searchtype=all&searchkey={urllib.parse.quote(search_info)}'
        )
        if r is None:
            return []

        soup = BeautifulSoup(r.content, 'html.parser')
        book_tag_list = soup.select('div.novelslist2 > ul > li')
        book_num = len(book_tag_list) - 1
        if book_num == 0:
            return []

        search_book_results = []
        book_soup_list = book_tag_list[1:]
        for book_soup in book_soup_list:
            span_list = book_soup.findAll('span')
            book_url = self.base_url + span_list[1].find('a').attrs['href']
            book_name = span_list[1].find('a').text
            book_author = span_list[3].text
            book_brief = f"最新章节:{span_list[2].find('a').text} 更新时间:{span_list[4].text.strip()}"
            book = basesite.Book(site=self,
                                 url=book_url,
                                 name=book_name,
                                 author=book_author,
                                 brief=book_brief)
            search_book_results.append(book)
        return search_book_results
Ejemplo n.º 5
0
class Fox2018Site(basesite.BaseSite):
    def __init__(self):
        self.site_info = basesite.SiteInfo(
            type='文学',
            statue='上线版本',
            url='http://www.fox2018.com',
            name='青少年读书网',
            brief_name='青少年',
            version='1.1',
            max_threading_number=50,
        )
        super().__init__(self.site_info)
        self.base_url = 'http://www.fox2018.com'
        self.encoding = 'GB2312'
        self.search_url = 'http://www.fox2018.com/e/search/index.php'
        self.session = requests.session()

    @basesite.print_in_out
    def get_books(self, search_info: str) -> List[basesite.Book]:
        headers = {
            'Accept':
            'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
            'Content-Type':
            'application/x-www-form-urlencoded',
            'User-Agent':
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:75.0) Gecko/20100101 Firefox/75.0'
        }
        # data = {'keyboard': urllib.parse.quote(search_info.encode(self.encoding)),..} # 不能用字典形式,耗费5月2日半天时间
        data = f'show=title&classid=1&tempid=4&keyboard={urllib.parse.quote(search_info.encode(self.encoding))}'
        r = self.try_post_url(self.session,
                              url=self.search_url,
                              try_timeout=5,
                              headers=headers,
                              data=data)
        if r is None:
            return []
        soup = BeautifulSoup(r.content.decode(self.encoding, 'ignore'),
                             'html.parser')
        # soup = BeautifulSoup(r.content, 'html.parser')
        if not (book_soup_list := soup.select('div.classify_list a')):
            return []

        search_book_results = []
        for book_soup in book_soup_list:
            book_url = self.base_url + book_soup.attrs['href']
            book_name = book_soup.select_one('h3').text
            book_author = book_soup.select_one('p.author i').text
            book_brief = book_soup.select_one('p.brief').text
            book = basesite.Book(site=self,
                                 url=book_url,
                                 name=book_name,
                                 author=book_author,
                                 brief=book_brief)
            search_book_results.append(book)
        return search_book_results
Ejemplo n.º 6
0
    def get_books(self, search_info: str) -> List[basesite.Book]:
        url = self.search_url % urllib.parse.quote(search_info)
        r = self.try_get_url(self.session, url, try_timeout=5)

        soup = BeautifulSoup(r.content, 'html.parser')
        book_soup_list = soup.select('tbody > tr')
        search_book_results = []
        for book_soup in book_soup_list:
            td_soup_list = book_soup.select('td')
            book_url = self.base_url + td_soup_list[0].select_one(
                'a').attrs['href']
            if book_url.find('search.html') != -1:
                continue
            book_name = td_soup_list[0].text
            book_author = td_soup_list[1].text
            book_brief = "无"
            book = basesite.Book(site=self,
                                 url=book_url,
                                 name=book_name,
                                 author=book_author,
                                 brief=book_brief)
            search_book_results.append(book)
        return search_book_results
Ejemplo n.º 7
0
class Shuku87Site(basesite.BaseSite):
    def __init__(self):
        self.site_info = basesite.SiteInfo(
            type='网络小说',
            statue='上线版本',
            url='http://www.87xiaoshuo.net',
            name='霸气书库',
            brief_name='霸气网',
            version='1.1',
            max_threading_number=3,
        )
        super().__init__(self.site_info)
        self.base_url = 'http://www.87xiaoshuo.net'
        self.encoding = 'GB2312'
        self.search_url = 'http://www.87xiaoshuo.net/modules/article/search.php'
        self.session = requests.session()

    @basesite.print_in_out
    def get_books(self, search_info: str) -> List[basesite.Book]:
        headers = {
            'Accept':
            'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
            'Content-Type':
            'application/x-www-form-urlencoded',
            'User-Agent':
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:75.0) Gecko/20100101 Firefox/75.0'
        }
        # data = {'t': '1', 'searchkey': urllib.parse.quote(search_info.encode('GB2312'))} # 错误
        data = f'searchkey={urllib.parse.quote(search_info.encode("GB2312"))}&t=1'
        r = self.try_post_url(self.session,
                              url=self.search_url,
                              try_timeout=5,
                              headers=headers,
                              data=data,
                              allow_redirects=False)
        if r is None:
            return []
        if r.status_code == 302:  # 只找到一本书,将跳转
            return [
                basesite.Book(site=self,
                              url=r.headers['Location'],
                              name=search_info,
                              author="",
                              brief="")
            ]

        soup = BeautifulSoup(r.content.decode(self.encoding, 'ignore'),
                             'html.parser')
        if not (book_soup_list := soup.select('div.ml212 dt')):
            return []

        search_book_results = []
        for book_soup in book_soup_list:
            book_url = book_soup.select_one('a').attrs['href']
            m = re.search(r'(\w+).*作者:(\w+).*?(\w+.*)$',
                          book_soup.text,
                          flags=re.DOTALL)
            if not m:
                print(
                    f'error in {self.site_info.brief_name} {book_url=} {book_soup.text=}'
                )
                return []
            book_name = m.group(1)
            book_author = m.group(2)
            book_brief = m.group(3).replace("\n", "").replace("\r", "").strip()
            book = basesite.Book(site=self,
                                 url=book_url,
                                 name=book_name,
                                 author=book_author,
                                 brief=book_brief)
            search_book_results.append(book)
        return search_book_results