async def delete_bookshelf(request): """ Delete bookshelf :param request: :return: : -1 user's session expire, need to login again : 0 not delete : 1 delete successfully """ user = request['session'].get('user', None) data = parse_qs(str(request.body, encoding='utf-8')) if user: if data.get('book_url', None): book_url = data.get('book_url', None)[0] else: novels_name = data.get('novels_name', '') chapter_url = data.get('chapter_url', '') book_url = "/chapter?url={chapter_url}&novels_name={novels_name}".format(chapter_url=chapter_url[0], novels_name=novels_name[0]) try: motor_db = motor_base.get_db() await motor_db.user_message.update_one({'user': user}, {'$pull': {'books_url': {"book_url": unquote(book_url)}}}) LOGGER.info('You have deleted bookshelf') return json({'status': 1}) except Exception as e: LOGGER.exception(e) return json({'status': 0}) else: return json({'status': -1})
async def delete_user(request): """ Delte users(administrator can do it) :param request: :return: : -1 administrator's session expire, need to login again : 0 not delete : 1 delete successfully """ user = request['session'].get('user', None) role = request['session'].get('role', None) data = parse_qs(str(request.body, encoding='utf-8')) motor_db = motor_base.get_db() # check whether user is administrator if user and role == "Admin": if data.get('user_name', None): user_name_delete = data.get('user_name', None)[0] else: user_name_delete = data.get('user_name', '') try: await motor_db.user.delete_one({'user': user_name_delete}) LOGGER.info('You have deleted the user') return json({'status': 1}) except Exception as e: LOGGER.exception(e) return json({'status': 0}) else: return json({'status': -1})
async def add_bookshelf(request): """ Add bookshelf (any user can) :param request: :return: : -1 user's session expire, need to login again : 0 not add : 1 add successfully """ user = request['session'].get('user', None) data = parse_qs(str(request.body, encoding='utf-8')) novels_name = data.get('novels_name', '') chapter_url = data.get('chapter_url', '') last_read_url = data.get('last_read_url', '') if user and novels_name and chapter_url: url = "/chapter?url={chapter_url}&novels_name={novels_name}".format(chapter_url=chapter_url[0], novels_name=novels_name[0]) time = get_time() try: motor_db = motor_base.get_db() res = await motor_db.user_message.update_one({'user': user}, {'$set': {'last_update_time': time}}, upsert=True) if res: await motor_db.user_message.update_one( {'user': user, 'books_url.book_url': {'$ne': url}}, {'$push': { 'books_url': {'book_url': url, 'add_time': time, 'last_read_url': unquote(last_read_url[0])}}}) LOGGER.info('You have added this page successfully in your bookshelf!') return json({'status': 1}) except Exception as e: LOGGER.exception(e) return json({'status': 0}) else: return json({'status': -1})
async def delete_bookmark(request): """ Delete bookmark :param request: :return: : -1 user's session expire, need to login again : 0 not delete : 1 delete successfully """ user = request['session'].get('user', None) data = parse_qs(str(request.body, encoding='utf-8')) bookmark_url = data.get('bookmarkurl', '') if user and bookmark_url: bookmark = unquote(bookmark_url[0]) try: motor_db = motor_base.get_db() await motor_db.user_message.update_one({'user': user}, {'$pull': {'bookmarks': {"bookmark": bookmark}}}) LOGGER.info('You have already delete one bookmark') return json({'status': 1}) except Exception as e: LOGGER.exception(e) return json({'status': 0}) else: return json({'status': -1})
def extract_pre_next_chapter(chapter_url, html): """ Get the next chapter and pre chapter :param chapter_url: :param html: :return: """ next_chapter = OrderedDict() try: # reference https://greasyfork.org/zh-CN/scripts/292-my-novel-reader next_reg = r'(<a\s+.*?>.*[第上前下后][一]?[0-9]{0,6}?[页张个篇章节步].*?</a>)' judge_reg = r'[第上前下后][一]?[0-9]{0,6}?[页张个篇章节步]' # parse again next_res = re.findall(next_reg, html.replace('<<', '').replace('>>', ''), re.I) str_next_res = '\n'.join(next_res) next_res_soup = BeautifulSoup(str_next_res, 'html5lib') for link in next_res_soup.find_all('a'): text = link.text or '' text = text.replace(' ', '') if novels_list(text): is_next = re.search(judge_reg, text) # is_ok = is_chapter(text) if is_next: url = urljoin(chapter_url, link.get('href')) or '' next_chapter[text[:5]] = url return next_chapter except Exception as e: LOGGER.exception(e) return next_chapter
async def add_bookmark(request): """ Add bookmark (any user can) :param request: :return: : -1 user's session expire, need to login again : 0 not add : 1 add successfully """ user = request['session'].get('user', None) data = parse_qs(str(request.body, encoding='utf-8')) # get the bookmark url bookmark_url = data.get('bookmark_url', '') if user and bookmark_url: url = unquote(bookmark_url[0]) time = get_time() try: motor_db = motor_base.get_db() res = await motor_db.user_message.update_one({'user': user}, {'$set': {'last_update_time': time}}, upsert=True) if res: # store the bookmark data in the mongodb, user_message collection await motor_db.user_message.update_one( {'user': user, 'bookmarks.bookmark': {'$ne': url}}, {'$push': {'bookmarks': {'bookmark': url, 'add_time': time}}}) LOGGER.info('bookmark has been added') return json({'status': 1}) except Exception as e: LOGGER.exception(e) return json({'status': 0}) else: return json({'status': -1})
async def update_all_books(loop, timeout=15): try: motor_db = MotorBase().get_db() # get all the url of bookshelf books_url_cursor = motor_db.user_message.find({}, { 'books_url.book_url': 1, '_id': 0 }) book_urls = [] already_urls = set() async for document in books_url_cursor: if document: books_url = document['books_url'] for book_url in books_url: chapter_url = book_url['book_url'] if chapter_url not in already_urls: try: await get_the_latest_chapter(chapter_url, timeout) except Exception as e: LOGGER.exception(e) already_urls.add(chapter_url) except Exception as e: LOGGER.exception(e) return False
def get_html_by_requests(url, headers, timeout=15): """ :param url: :return: """ try: response = requests.get(url=url, headers=headers, verify=False, timeout=timeout) response.raise_for_status() content = response.content charset = cchardet.detect(content) text = content.decode(charset['encoding']) return text except Exception as e: LOGGER.exception(e) return None
async def fetch_url(self, url, params, headers): with async_timeout.timeout(15): try: # Asynchronous HTTP client-To get something from the web:.ß # reference: https://github.com/aio-libs/aiohttp async with aiohttp.ClientSession() as client: async with client.get(url, params=params, headers=headers) as response: assert response.status == 200 LOGGER.info('Task url: {}'.format(response.url)) try: # get the source code of search result in search engine text = await response.text() except: text = await response.read() return text except Exception as e: LOGGER.exception(e) return None
async def target_fetch(url, headers, timeout=15): """ :param url: target url :return: text """ with async_timeout.timeout(timeout): try: async with aiohttp.ClientSession() as client: async with client.get(url, headers=headers) as response: assert response.status == 200 LOGGER.info('Task url: {}'.format(response.url)) try: text = await response.text() except: try: text = await response.read() except aiohttp.ServerDisconnectedError as e: LOGGER.exception(e) text = None return text except Exception as e: LOGGER.exception(str(e)) return None
async def quickreading_content(request): """ Content of selected chapter :param request: url: resource content url :return: content_url: the url of selected chapter's content in this quickreading """ url = request.args.get('url', None) chapter_url = request.args.get('chapter_url', None) novels_name = request.args.get('novels_name', None) name = request.args.get('name', '') is_ajax = request.args.get('is_ajax', '') # 当小说内容url不在解析规则内 跳转到原本url netloc = get_netloc(url) if netloc not in RULES.keys(): return redirect(url) user = request['session'].get('user', None) motor_db = motor_base.get_db() # 拼接小说目录url book_url = "/chapter?url={chapter_url}&novels_name={novels_name}".format( chapter_url=chapter_url, novels_name=novels_name) motor_db = motor_base.get_db() if url == chapter_url: # 阅读到最后章节时候 在数据库中保存最新阅读章节 if user and is_ajax == "quickReading_cache": quickReading_referer = request.headers.get( 'Referer', '').split('quickreading_content')[1] if quickReading_referer: latest_read = "/quickreading_content" + quickReading_referer await motor_db.user_message.update_one( { 'user': user, 'books_url.book_url': book_url }, {'$set': { 'books_url.$.last_read_url': latest_read }}) return redirect(book_url) content_url = RULES[netloc].content_url content_data = await cache_novels_content(url=url, netloc=netloc) if content_data: try: content = content_data.get('content', 'Failure to get') next_chapter = content_data.get('next_chapter', []) title = content_data.get('title', '').replace(novels_name, '') name = title if title else name # 拼接小说书签url bookmark_url = "{path}?url={url}&name={name}&chapter_url={chapter_url}&novels_name={novels_name}".format( path=request.path, url=url, name=name, chapter_url=chapter_url, novels_name=novels_name) # delete advertisement content = str(content).strip('[]Jjs,').replace('http', 'hs') if user: data = await motor_db.user.find_one({'user': user}) user_role = data.get('role', None) bookmark = await motor_db.user_message.find_one({ 'user': user, 'bookmarks.bookmark': bookmark_url }) book = await motor_db.user_message.find_one({ 'user': user, 'books_url.book_url': book_url }) bookmark = 1 if bookmark else 0 if book: # hs exsitence book = 1 # save latest reading history if is_ajax == "quickReading_cache": quickReading_referer = \ request.headers.get('Referer', bookmark_url).split('quickreading_content')[1] latest_read = "/quickreading_content" + quickReading_referer await motor_db.user_message.update_one( { 'user': user, 'books_url.book_url': book_url }, { '$set': { 'books_url.$.last_read_url': latest_read } }) else: book = 0 if is_ajax == "quickReading_cache": quickReading_cache_dict = dict(is_login=1, user=user, name=name, url=url, bookmark=bookmark, book=book, content_url=content_url, chapter_url=chapter_url, novels_name=novels_name, next_chapter=next_chapter, soup=content) return json(quickReading_cache_dict) return template('content.html', is_login=1, user=user, user_role=user_role, name=name, url=url, bookmark=bookmark, book=book, content_url=content_url, chapter_url=chapter_url, novels_name=novels_name, next_chapter=next_chapter, soup=content) else: if is_ajax == "quickReading_cache": quickReading_cache_dict = dict(is_login=0, name=name, url=url, bookmark=0, book=0, content_url=content_url, chapter_url=chapter_url, novels_name=novels_name, next_chapter=next_chapter, soup=content) return json(quickReading_cache_dict) return template('content.html', is_login=0, name=name, url=url, bookmark=0, book=0, content_url=content_url, chapter_url=chapter_url, novels_name=novels_name, next_chapter=next_chapter, soup=content) except Exception as e: LOGGER.exception(e) return redirect(book_url) else: if user: is_login = 1 user = user return template('parse_error.html', url=url, is_login=is_login, user=user) else: is_login = 0 return template('parse_error.html', url=url, is_login=is_login)
async def quickreading_search(request): """ Search Button :param request:novel's name :return: result.html: search result(original website link, chapter page link, Whether parsed or recommended) html("No result"): There is an error """ start = time.time() # delete space in the beginning and the ending of input novel's name name = str(request.args.get('wd', '')).strip() novels_keyword = name.split(' ')[0] # use motor to manage mongodb motor_db = motor_base.get_db() if not name: return redirect('/') else: # store the search record try: await motor_db.search_records.update_one({'keyword': name}, {'$inc': { 'count': 1 }}, upsert=True) except Exception as e: LOGGER.exception(e) # Retrieve the search results through a search engine parse_result = None # choose different search engine to search novels if name.startswith('!baidu'): novels_keyword = name.split('baidu')[1].strip() novels_name = 'intitle:{name} 小说 阅读'.format(name=novels_keyword) parse_result = await get_novels_info(class_name='baidu', novels_name=novels_name) elif name.startswith('!360'): novels_keyword = name.split('360')[1].strip() novels_name = "{name} 小说 最新章节".format(name=novels_keyword) parse_result = await get_novels_info(class_name='so', novels_name=novels_name) elif name.startswith('!bing'): novels_keyword = name.split('bing')[1].strip() novels_name = "{name} 小说 阅读 最新章节".format(name=novels_keyword) parse_result = await get_novels_info(class_name='bing', novels_name=novels_name) elif name.startswith('!google'): novels_keyword = name.split('google')[1].strip() novels_name = "{name} 小说 阅读 最新章节".format(name=novels_keyword) parse_result = await get_novels_info(class_name='google', novels_name=novels_name) else: for each_engine in ENGINE_PRIORITY: # for bing if each_engine == "bing": novels_name = "{name} 小说 阅读 最新章节".format(name=name) parse_result = await get_novels_info(class_name='bing', novels_name=novels_name) if parse_result: break # for 360 so if each_engine == "360": novels_name = "{name} 小说 最新章节".format(name=name) parse_result = await get_novels_info(class_name='so', novels_name=novels_name) if parse_result: break # for baidu if each_engine == "baidu": novels_name = 'intitle:{name} 小说 阅读'.format(name=name) parse_result = await get_novels_info(class_name='baidu', novels_name=novels_name) if parse_result: break if each_engine == "google": novels_name = '{name} 小说 阅读'.format(name=name) parse_result = await get_novels_info(class_name='google', novels_name=novels_name) if parse_result: break if parse_result: # sort the search result (is_recommend, is_parse put first place) result_sorted = sorted(parse_result, reverse=True, key=itemgetter('is_recommend', 'is_parse', 'timestamp')) user = request['session'].get('user', None) if user: data = await motor_db.user.find_one({'user': user}) user_role = data.get('role', None) try: time_now = get_time() # store search date res = await motor_db.user_message.update_one( {'user': user}, {'$set': { 'last_update_time': time_now }}, upsert=True) if res: # store novels name and count(if searched before add one ) store_search_information = await motor_db.user_message.update_one( { 'user': user, 'search_records.keyword': { '$ne': novels_keyword } }, { '$push': { 'search_records': { 'keyword': novels_keyword, 'counts': 1 } } }, ) if store_search_information: await motor_db.user_message.update_one( { 'user': user, 'search_records.keyword': novels_keyword }, {'$inc': { 'search_records.$.counts': 1 }}) except Exception as e: LOGGER.exception(e) return template('result.html', is_login=1, user=user, user_role=user_role, name=novels_keyword, time='%.2f' % (time.time() - start), result=result_sorted, count=len(parse_result)) else: return template('result.html', is_login=0, name=novels_keyword, time='%.2f' % (time.time() - start), result=result_sorted, count=len(parse_result)) else: return html("No Result!")
async def get_the_latest_chapter(chapter_url, timeout=15): try: with async_timeout.timeout(timeout): url = parse_qs(urlparse(chapter_url).query).get('url', '') novels_name = parse_qs(urlparse(chapter_url).query).get( 'novels_name', '') data = None if url and novels_name: url = url[0] novels_name = novels_name[0] netloc = urlparse(url).netloc if netloc in LATEST_RULES.keys(): headers = {'user-agent': await get_random_user_agent()} try: html = await target_fetch(url=url, headers=headers, timeout=timeout) if html is None: html = get_html_by_requests(url=url, headers=headers, timeout=timeout) except TypeError: html = get_html_by_requests(url=url, headers=headers, timeout=timeout) except Exception as e: LOGGER.exception(e) return None try: soup = BeautifulSoup(html, 'html5lib') except Exception as e: LOGGER.exception(e) return None latest_chapter_name, latest_chapter_url = None, None if LATEST_RULES[netloc].plan: meta_value = LATEST_RULES[netloc].meta_value latest_chapter_name = soup.select( 'meta[property="{0}"]'.format( meta_value["latest_chapter_name"]) ) or soup.select('meta[name="{0}"]'.format( meta_value["latest_chapter_name"])) latest_chapter_name = latest_chapter_name[0].get( 'content', None) if latest_chapter_name else None latest_chapter_url = soup.select( 'meta[property="{0}"]'.format( meta_value["latest_chapter_url"]) ) or soup.select('meta[name="{0}"]'.format( meta_value["latest_chapter_url"])) latest_chapter_url = urljoin( chapter_url, latest_chapter_url[0].get( 'content', None)) if latest_chapter_url else None else: selector = LATEST_RULES[netloc].selector content_url = selector.get('content_url') if selector.get('id', None): latest_chapter_soup = soup.find_all( id=selector['id']) elif selector.get('class', None): latest_chapter_soup = soup.find_all( class_=selector['class']) else: latest_chapter_soup = soup.select( selector.get('tag')) if latest_chapter_soup: if content_url == '1': pass elif content_url == '0': pass else: latest_chapter_url = content_url + latest_chapter_soup[ 0].get('href', None) latest_chapter_name = latest_chapter_soup[0].get( 'title', None) if latest_chapter_name and latest_chapter_url: time_current = get_time() # print(latest_chapter_url) data = { "latest_chapter_name": latest_chapter_name, "latest_chapter_url": latest_chapter_url, "quickreading_chapter_url": chapter_url, "quickreading_content_url": "/quickreading_content?url={latest_chapter_url}&name={name}&chapter_url={chapter_url}&novels_name={novels_name}" .format( latest_chapter_url=latest_chapter_url, name=latest_chapter_name, chapter_url=url, novels_name=novels_name, ), } # store latest chapter motor_db = MotorBase().get_db() await motor_db.latest_chapter.update_one( { "novels_name": novels_name, 'quickreading_chapter_url': chapter_url }, { '$set': { 'data': data, "finished_at": time_current } }, upsert=True) return data except Exception as e: LOGGER.exception(e) return None