def parse_info(self, data): if data and hasattr(data, "content"): data = data.content.decode(self.encoding) doc = pq(data) book_name = doc('meta[property="og:title"]').attr("content") book_desc = ( doc('meta[property="og:description"]').attr("content").replace( "\xa0", "")) latest_chapter_str = doc( 'meta[property="og:novel:latest_chapter_name"]').attr("content") author_name = doc('meta[property="og:novel:author"]').attr("content") markeup = doc('meta[property="og:novel:category"]').attr("content") cover = doc('meta[property="og:image"]').attr("content") if not isinstance(cover, list): cover = [cover] info = { "name": validateFilename(book_name), "latest_chapter": validateFilename(latest_chapter_str), "desc": book_desc, "author_name": author_name, "markeup": markeup, "cover": cover, } return info
def parse_info(self, data): if data and hasattr(data, "content"): data = data.content.decode(self.encoding) doc = pq(data) book_name = doc('meta[property="og:title"]').attr('content') book_desc = doc('meta[property="og:description"]').attr( 'content').replace("\xa0", '') latest_chapter_str = doc( 'meta[property="og:novel:latest_chapter_name"]').attr('content') author_name = doc('meta[property="og:novel:author"]').attr('content') markeup = doc('meta[property="og:novel:category"]').attr('content') cover = doc('meta[property="og:image"]').attr('content') if not isinstance(cover, list): cover = [cover] info = { 'name': validateFilename(book_name), 'latest_chapter': validateFilename(latest_chapter_str), 'desc': book_desc, 'author_name': author_name, 'markeup': markeup, 'cover': cover } return info
def get_content_info(self, data): if data and hasattr(data, "content"): data = data.content.decode(self.encoding) doc = pq(data) title = doc("#main > h1").text() content = doc("#content").text() return validateFilename(title), content
def get_content_info(self, data): if data and hasattr(data, "content"): data = data.content.decode(self.encoding) doc = pq(data) title = doc(".entry-title").text() eles = doc(".entry-content").children()[2:] content = "" for ele in eles: content += f"{ele.text_content()}。\r\n" return validateFilename(title), content
def parse_chapter(self, data): print(data) if data and hasattr(data, "content"): data = data.content.decode(self.encoding) doc = pq(data) dl_dd = doc("#main article") # [{第一章:http://www.a.cc/1},第二章, ...] chapter_list = [] for u in dl_dd: u = u.find("header").find("h1").find("a") link = u.get("href") chapter_list.append({validateFilename(u.text_content()): link}) return chapter_list
def parse_info(self, data): if data and hasattr(data, "content"): data = data.content.decode(self.encoding) doc = pq(data) book_name = doc('.btitle>h1').text() book_desc = doc('p.intro').text() latest_chapter_str = doc( '#container > div.bookinfo > p.stats > span.fl > a').text() author_name = doc('#container > div.bookinfo > div > em > a').text() markeup = doc('#wrapper > div.crumbs > div.fl > a:nth-child(3)').text() cover = "" if not isinstance(cover, list): cover = [cover] info = { 'name': validateFilename(book_name), 'latest_chapter': validateFilename(latest_chapter_str), 'desc': book_desc, 'author_name': author_name, 'markeup': markeup, 'cover': cover } return info
def parse_info(self, data): if data and hasattr(data, "content"): data = data.content.decode(self.encoding) doc = pq(data) book_name = doc(".btitle>h1").text() book_desc = doc("p.intro").text() latest_chapter_str = doc( "#container > div.bookinfo > p.stats > span.fl > a").text() author_name = doc("#container > div.bookinfo > div > em > a").text() markeup = doc("#wrapper > div.crumbs > div.fl > a:nth-child(3)").text() cover = "" if not isinstance(cover, list): cover = [cover] info = { "name": validateFilename(book_name), "latest_chapter": validateFilename(latest_chapter_str), "desc": book_desc, "author_name": author_name, "markeup": markeup, "cover": cover, } return info
def parse_chapter(self, data): if data and hasattr(data, "content"): data = data.content.decode(self.encoding) doc = pq(data) dl_dd = doc('.chapterlist>dd')[9:] chapter_list = [] flag = False for dd in dl_dd: flag = dd.tag == 'dd' if flag: link = pq(pq(dd)('a')).attr('href') chapter_list.append({ validateFilename(dd.text_content()): self.page_base_url + link }) return chapter_list
def parse_all_book(self, data): if data and hasattr(data, "content"): data = data.content.decode(self.encoding) doc = pq(data) novel_list = [] novels = doc('#tlist > ul > li') for info in novels: title = pq(pq(info)('.zp>a')).text() author = pq(pq(info)('.author')).text() url = pq(pq(info)('.zp>a')).attr('href') t = { 'title': validateFilename(title), 'url': url, 'label': "", "author": author } if not url: continue novel_list.append(t) return novel_list
def parse_all_book(self, data): if data and hasattr(data, "content"): data = data.content.decode(self.encoding) doc = pq(data) novel_list = [] novels = doc("#tlist > ul > li") for info in novels: title = pq(pq(info)(".zp>a")).text() author = pq(pq(info)(".author")).text() url = pq(pq(info)(".zp>a")).attr("href") t = { "title": validateFilename(title), "url": url, "label": "", "author": author, } if not url: continue novel_list.append(t) return novel_list
def parse_chapter(self, data): if data and hasattr(data, "content"): data = data.content.decode(self.encoding) doc = pq(data) dl_dd = doc("#list dl").children()[1:] # [{第一章:http://www.a.cc/1},第二章, ...] chapter_list = [] flag = False for u in dl_dd: if flag: link = u.find("a").get("href") chapter_list.append({ validateFilename(u.text_content()): self.page_base_url + link }) else: flag = u.tag == "dt" return chapter_list
def parse_all_book(self, data): if data and hasattr(data, "content"): data = data.content.decode(self.encoding) doc = pq(data) novel_list = [] novels = doc('#content > div > div.details.list-type > ul > li') for info in novels: title = pq(pq(info)('.s2 > a')).text() author = pq(pq(info)('.s3')).text() url = pq(pq(info)('.s2 > a')).attr('href') label = pq(pq(info)('.s1')).text().replace('[', '').replace(']', '') if not url: continue t = { 'title': validateFilename(title), 'author': author, 'url': self.page_base_url + url, 'label': label } novel_list.append(t) return novel_list
def parse_all_book(self, data): if data and hasattr(data, "content"): data = data.content.decode(self.encoding) doc = pq(data) novel_list = [] novels = doc("#content > div > div.details.list-type > ul > li") for info in novels: title = pq(pq(info)(".s2 > a")).text() author = pq(pq(info)(".s3")).text() url = pq(pq(info)(".s2 > a")).attr("href") label = pq(pq(info)(".s1")).text().replace("[", "").replace("]", "") if not url: continue t = { "title": validateFilename(title), "author": author, "url": self.page_base_url + url, "label": label, } novel_list.append(t) return novel_list
def parse_all_book(self, data): if data and hasattr(data, "content"): data = data.content.decode(self.encoding) doc = pq(data) novel_list = [] novels = doc(".novellist") for block in novels: b = pq(block).html() label = pq(b)("h2").text() book_list = pq(pq(b)("ul"))("li") for info in book_list: title = pq(info).text() url = pq(pq(info)("a")).attr("href") t = { "title": validateFilename(title), "url": url, "label": label } if not url: continue novel_list.append(t) return novel_list
def parse_all_book(self, data): if data and hasattr(data, "content"): data = data.content.decode(self.encoding) doc = pq(data) novel_list = [] novels = doc('.novellist') for block in novels: b = pq(block).html() label = pq(b)('h2').text() book_list = pq(pq(b)('ul'))('li') for info in book_list: title = pq(info).text() url = pq(pq(info)('a')).attr('href') t = { 'title': validateFilename(title), 'url': url, 'label': label } if not url: continue novel_list.append(t) return novel_list