async def owllook_delete_bookmark(request): """ :param request: :return: : -1 用户session失效 需要重新登录 : 0 删除书签失败 : 1 删除书签成功 """ user = request['session'].get('user', None) if user: bookmarkurl = request.args.get('bookmarkurl', '') name = request.args.get('name', '') chapter_url = request.args.get('chapter_url', '') novels_name = request.args.get('novels_name', '') url = bookmarkurl + "&name=" + name + "&chapter_url=" + chapter_url + "&novels_name=" + novels_name try: motor_db = MotorBase().db motor_db.user_message.update_one( {'user': user}, {'$pull': { 'bookmarks': { "bookmark": url } }}) LOGGER.info('删除书签成功') return json({'status': 1}) except Exception as e: LOGGER.exception(e) return json({'status': 0}) else: return json({'status': -1})
def extract_pre_next_chapter(chapter_url, html): """ 获取单章节上一页下一页 :param chapter_url: :param html: :return: """ next_chapter = {} try: # 参考https://greasyfork.org/zh-CN/scripts/292-my-novel-reader next_reg = r'(<a\s+.*?>.*[上前下后][一]?[页张个篇章节步].*?</a>)' # 这里同样需要利用bs再次解析 next_res = re.findall(next_reg, html) str_next_res = '\n'.join(next_res) next_res_soup = BeautifulSoup(str_next_res, 'html5lib') for link in next_res_soup.find_all('a'): text = link.text or '' text = text.strip().replace(' ', '') is_ok = is_chapter(text) if is_ok: url = urljoin(chapter_url, link.get('href')) or '' next_chapter[text] = url nextDic = [{v[0]: v[1]} for v in sorted(next_chapter.items(), key=lambda d: d[1])] return nextDic except Exception as e: LOGGER.exception(e) return next_chapter
async def data_extraction_for_web_baidu(client, html): with async_timeout.timeout(20): try: url = html.select('h3.t a')[0].get('href', None) real_url = await get_real_url(client=client, url=url) if url else None if real_url: netloc = urlparse(real_url).netloc if 'baidu' in real_url or netloc in BLACK_DOMAIN: return None is_parse = 1 if netloc in RULES.keys() else 0 title = html.select('h3.t a')[0].get_text() source = real_url # time = re.findall(r'\d+-\d+-\d+', source) # time = time[0] if time else None timestamp = 0 time = "" # if time: # try: # time_list = [int(i) for i in time.split('-')] # timestamp = arrow.get(time_list[0], time_list[1], time_list[2]).timestamp # except Exception as e: # LOGGER.exception(e) # timestamp = 0 return {'title': title, 'url': real_url.replace('index.html', ''), 'time': time, 'is_parse': is_parse, 'timestamp': timestamp, 'netloc': netloc} else: return None except Exception as e: LOGGER.exception(e) return None
async def similar_user(request): user = request['session'].get('user', None) if user: motor_db = MotorBase().db try: similar_info = await motor_db.user_recommend.find_one({'user': user}) if similar_info: similar_user = similar_info['similar_user'] user_tag = similar_info['user_tag'] updated_at = similar_info['updated_at'] return template('similar_user.html', title='与' + user + '相似的书友', is_login=1, is_similar=1, user=user, similar_user=similar_user, user_tag=user_tag, updated_at=updated_at) else: return template('similar_user.html', title='与' + user + '相似的书友', is_login=1, is_similar=1, user=user) except Exception as e: LOGGER.error(e) return redirect('/') else: return redirect('/')
async def owllook_delete_bookmark(request): """ :param request: :return: : -1 用户session失效 需要重新登录 : 0 删除书签失败 : 1 删除书签成功 """ user = request['session'].get('user', None) data = parse_qs(str(request.body, encoding='utf-8')) bookmarkurl = data.get('bookmarkurl', '') if user and bookmarkurl: bookmark = unquote(bookmarkurl[0]) try: motor_db = MotorBase().db await motor_db.user_message.update_one( {'user': user}, {'$pull': { 'bookmarks': { "bookmark": bookmark } }}) LOGGER.info('删除书签成功') return json({'status': 1}) except Exception as e: LOGGER.exception(e) return json({'status': 0}) else: return json({'status': -1})
async def books(request): user = request['session'].get('user', None) if user: motor_db = MotorBase().db try: data = await motor_db.user_message.find_one({'user': user}) if data: books_url = data.get('books_url', None) if books_url: result = [] for i in books_url: item_result = {} book_url = i.get('book_url', None) query = parse_qs(urlparse(book_url).query) item_result['novels_name'] = query.get('novels_name', '')[0] if query.get('novels_name', '') else '' item_result['book_url'] = book_url item_result['add_time'] = i.get('add_time', '') result.append(item_result) return template('admin_books.html', title='{user}的书架 - owllook'.format(user=user), is_login=1, user=user, is_bookmark=1, result=result[::-1]) return template('admin_books.html', title='{user}的书架 - owllook'.format(user=user), is_login=1, user=user, is_bookmark=0) except Exception as e: LOGGER.error(e) else: return redirect('/')
async def owllook_delete_book(request): """ :param request: :return: : -1 用户session失效 需要重新登录 : 0 删除书架失败 : 1 删除书架成功 """ user = request['session'].get('user', None) data = parse_qs(str(request.body, encoding='utf-8')) if user: if data.get('book_url', None): book_url = data.get('book_url', None)[0] else: novels_name = data.get('novels_name', '') chapter_url = data.get('chapter_url', '') book_url = "/chapter?url={chapter_url}&novels_name={novels_name}".format( chapter_url=chapter_url[0], novels_name=novels_name[0]) try: motor_db = MotorBase().db await motor_db.user_message.update_one( {'user': user}, {'$pull': { 'books_url': { "book_url": unquote(book_url) } }}) LOGGER.info('删除书架成功') return json({'status': 1}) except Exception as e: LOGGER.exception(e) return json({'status': 0}) else: return json({'status': -1})
async def data_extraction_for_web_so(client, html): with async_timeout.timeout(15): try: try: url = html.select('h3.res-title a')[0].get('href', None) title = html.select('h3.res-title a')[0].get_text() except IndexError: url = html.select('h3.title a')[0].get('href', None) title = html.select('h3.title a')[0].get_text() except Exception as e: LOGGER.exception(e) url, title = None, None url = parse_qs(urlparse(url).query).get('url', None) url = url[0] if url else None netloc = urlparse(url).netloc if not url or 'baidu' in url or 'baike.so.com' in url or netloc in BLACK_DOMAIN: return None is_parse = 1 if netloc in RULES.keys() else 0 time = '' timestamp = 0 return { 'title': title, 'url': url.replace('index.html', ''), 'time': time, 'is_parse': is_parse, 'timestamp': timestamp, 'netloc': netloc } except Exception as e: LOGGER.exception(e) return None
async def owl_novels_chapters(request): """ 返回章节目录 基本达到通用 :param request: :param chapter_url: 章节源目录页url :param novels_name: 小说名称 :return: 小说目录信息 """ chapters_url = request.args.get('chapters_url', None) novels_name = request.args.get('novels_name', None) netloc = get_netloc(chapters_url) try: res = await cache_owllook_novels_chapter(url=chapters_url, netloc=netloc) chapters_sorted = [] if res: chapters_sorted = extract_chapters(chapters_url, res) result = {'status': 200} else: result = {'status': 204} result.update({ 'data': { 'novels_name': novels_name, 'chapter_url': chapters_url, 'all_chapters': chapters_sorted }, 'msg': "ok" }) except Exception as e: LOGGER.exception(e) result = {'status': 500, 'msg': e} result.update({'finished_at': get_time()}) return response.json(result)
async def fetch(client, url, name, is_web): with async_timeout.timeout(15): try: headers = {'user-agent': get_random_user_agent()} if is_web: params = { 'wd': name, 'ie': 'utf-8', 'rn': BAIDU_RN, 'vf_bl': 1 } else: params = {'word': name} async with client.get(url, params=params, headers=headers) as response: assert response.status == 200 LOGGER.info('Task url: {}'.format(response.url)) try: text = await response.text() except: text = await response.read() return text except Exception as e: LOGGER.exception(e) return None
async def owllook_delete_book(request): """ :param request: :return: : -1 用户session失效 需要重新登录 : 0 删除书架失败 : 1 删除书架成功 """ user = request['session'].get('user', None) if user: novels_name = request.args.get('novels_name', '') chapter_url = request.args.get('chapter_url', '') url = "/chapter?url={chapter_url}&novels_name={novels_name}".format( chapter_url=chapter_url, novels_name=novels_name) try: motor_db = MotorBase().db motor_db.user_message.update_one( {'user': user}, {'$pull': { 'books_url': { "book_url": url } }}) LOGGER.info('删除书架成功') return json({'status': 1}) except Exception as e: LOGGER.exception(e) return json({'status': 0}) else: return json({'status': -1})
async def data_extraction_for_web(html): with async_timeout.timeout(10): try: url = html.find('a').get('href', None) if not url or 'baidu' in url or urlparse( url).netloc in BLACK_DOMAIN: return None netloc = urlparse(url).netloc is_parse = 1 if netloc in RULES.keys() else 0 title = html.select('font[size="3"]')[0].get_text() source = html.select('font[color="#008000"]')[0].get_text() time = re.findall(r'\d+-\d+-\d+', source) time = time[0] if time else None timestamp = 0 if time: try: time_list = [int(i) for i in time.split('-')] timestamp = arrow.get(time_list[0], time_list[1], time_list[2]).timestamp except Exception as e: LOGGER.exception(e) timestamp = 0 return { 'title': title, 'url': url.replace('index.html', '').replace('Index.html', ''), 'time': time, 'is_parse': is_parse, 'timestamp': timestamp, 'netloc': netloc } except Exception as e: LOGGER.exception(e) return None
async def fetch(client, url): with async_timeout.timeout(10): try: headers = {'user-agent': get_random_user_agent()} async with client.get(url, headers=headers) as response: assert response.status == 200 LOGGER.info('Task url: {}'.format(response.url)) text = await response.text() return text except Exception as e: LOGGER.exception(e) return None
async def owllook_add_book(request): """ :param request: :return: : -1 用户session失效 需要重新登录 : 0 添加书架失败 : 1 添加书架成功 """ user = request['session'].get('user', None) data = parse_qs(str(request.body, encoding='utf-8')) novels_name = data.get('novels_name', '') chapter_url = data.get('chapter_url', '') last_read_url = data.get('last_read_url', '') if user and novels_name and chapter_url: url = "/chapter?url={chapter_url}&novels_name={novels_name}".format( chapter_url=chapter_url[0], novels_name=novels_name[0]) time = get_time() try: motor_db = MotorBase().db res = await motor_db.user_message.update_one( {'user': user}, {'$set': { 'last_update_time': time }}, upsert=True) if res: await motor_db.user_message.update_one( { 'user': user, 'books_url.book_url': { '$ne': url } }, { '$push': { 'books_url': { 'book_url': url, 'add_time': time, 'last_read_url': unquote(last_read_url[0]) } } }) LOGGER.info('书架添加成功') return json({'status': 1}) except Exception as e: LOGGER.exception(e) return json({'status': 0}) else: return json({'status': -1})
def init_cache(sanic, loop): LOGGER.info("Starting aiocache") aiocache.settings.set_defaults( class_="aiocache.RedisCache", endpoint=REDIS_DICT.get('REDIS_ENDPOINT', None), port=REDIS_DICT.get('REDIS_PORT', None), db=REDIS_DICT.get('CACHE_DB', None), password=REDIS_DICT.get('PASSWORD', None), loop=loop, ) LOGGER.info("Starting redis pool") redis = RedisSession() # redis instance for app app.get_redis_pool = redis.get_redis_pool # pass the getter method for the connection pool into the session app.session_interface = RedisSessionInterface(app.get_redis_pool, expiry=86400)
async def fetch(client, url, novels_name): with async_timeout.timeout(15): try: headers = {'user-agent': get_random_user_agent()} params = {'q': novels_name, 'ie': 'utf-8'} async with client.get(url, params=params, headers=headers) as response: assert response.status == 200 LOGGER.info('Task url: {}'.format(response.url)) try: text = await response.text() except: text = await response.read() return text except Exception as e: LOGGER.exception(e) return None
async def search_user(request): user = request['session'].get('user', None) name = request.args.get('ss', None) if user and name: motor_db = MotorBase().db try: data = await motor_db.user_message.find_one({'user': name}) books_url = data.get('books_url', None) if data else None if books_url: result = [] for i in books_url: item_result = {} book_url = i.get('book_url', None) last_read_url = i.get("last_read_url", "") book_query = parse_qs(urlparse(book_url).query) last_read_chapter_name = parse_qs(last_read_url).get('name', ['暂无'])[0] item_result['novels_name'] = book_query.get('novels_name', '')[0] if book_query.get( 'novels_name', '') else '' item_result['book_url'] = book_url latest_data = await motor_db.latest_chapter.find_one({'owllook_chapter_url': book_url}) if latest_data: item_result['latest_chapter_name'] = latest_data['data']['latest_chapter_name'] item_result['owllook_content_url'] = latest_data['data']['owllook_content_url'] else: get_latest_data = await get_the_latest_chapter(book_url) or {} item_result['latest_chapter_name'] = get_latest_data.get('latest_chapter_name', '暂未获取,请反馈') item_result['owllook_content_url'] = get_latest_data.get('owllook_content_url', '') item_result['add_time'] = i.get('add_time', '') item_result["last_read_url"] = last_read_url if last_read_url else book_url item_result["last_read_chapter_name"] = last_read_chapter_name result.append(item_result) return template('search_user.html', title='{name}的书架 - owllook'.format(name=name), is_login=1, user=user, name=name, is_bookmark=1, result=result[::-1]) else: return template('search_user.html', title='{name}的书架 - owllook'.format(name=name), is_login=1, user=user, is_bookmark=0) except Exception as e: LOGGER.error(e) return redirect('/') else: return redirect('/')
async def owllook_add_bookmark(request): """ :param request: :return: : -1 用户session失效 需要重新登录 : 0 添加书签失败 : 1 添加书签成功 """ user = request['session'].get('user', None) if user: bookmarkurl = request.args.get('bookmarkurl', '') name = request.args.get('name', '') chapter_url = request.args.get('chapter_url', '') novels_name = request.args.get('novels_name', '') url = bookmarkurl + "&name=" + name + "&chapter_url=" + chapter_url + "&novels_name=" + novels_name time = get_time() try: motor_db = MotorBase().db res = motor_db.user_message.update_one( {'user': user}, {'$set': { 'last_update_time': time }}, upsert=True) if res: motor_db.user_message.update_one( { 'user': user, 'bookmarks.bookmark': { '$ne': url } }, { '$push': { 'bookmarks': { 'bookmark': url, 'add_time': time } } }) LOGGER.info('书签添加成功') return json({'status': 1}) except Exception as e: LOGGER.exception(e) return json({'status': 0}) else: return json({'status': -1})
async def data_extraction_for_phone(html): with async_timeout.timeout(10): try: # Get title data_log = eval(html['data-log']) url = data_log.get('mu', None) if not url: return None # Get title title = html.find('h3').get_text() # Get author and update_time (option) novel_mess = html.findAll(class_='c-gap-right-large') basic_mess = [i.get_text() for i in novel_mess] if novel_mess else None return {'title': title, 'url': url, 'basic_mess': basic_mess} except Exception as e: LOGGER.exception(e) return None
async def owllook_add_book(request): """ :param request: :return: : -1 用户session失效 需要重新登录 : 0 添加书架失败 : 1 添加书架成功 """ user = request['session'].get('user', None) if user: novels_name = request.args.get('novels_name', '') chapter_url = request.args.get('chapter_url', '') url = "/chapter?url={chapter_url}&novels_name={novels_name}".format( chapter_url=chapter_url, novels_name=novels_name) time = get_time() try: motor_db = MotorBase().db res = motor_db.user_message.update_one( {'user': user}, {'$set': { 'last_update_time': time }}, upsert=True) if res: motor_db.user_message.update_one( { 'user': user, 'books_url.book_url': { '$ne': url } }, { '$push': { 'books_url': { 'book_url': url, 'add_time': time } } }) LOGGER.info('书架添加成功') return json({'status': 1}) except Exception as e: LOGGER.exception(e) return json({'status': 0}) else: return json({'status': -1})
async def owllook_add_bookmark(request): """ :param request: :return: : -1 用户session失效 需要重新登录 : 0 添加书签失败 : 1 添加书签成功 """ user = request['session'].get('user', None) data = parse_qs(str(request.body, encoding='utf-8')) bookmarkurl = data.get('bookmarkurl', '') if user and bookmarkurl: url = unquote(bookmarkurl[0]) time = get_time() try: motor_db = MotorBase().db res = await motor_db.user_message.update_one( {'user': user}, {'$set': { 'last_update_time': time }}, upsert=True) if res: await motor_db.user_message.update_one( { 'user': user, 'bookmarks.bookmark': { '$ne': url } }, { '$push': { 'bookmarks': { 'bookmark': url, 'add_time': time } } }) LOGGER.info('书签添加成功') return json({'status': 1}) except Exception as e: LOGGER.exception(e) return json({'status': 0}) else: return json({'status': -1})
async def owllook_search(request): start = time.time() name = request.args.get('wd', None) if not name: return redirect('/') else: novels_name = 'intitle:{name} 小说 阅读'.format( name=name) if ':baidu' not in name else name.split('baidu')[1] try: motor_db = MotorBase().db motor_db.search_records.update_one({'keyword': name}, {'$inc': { 'count': 1 }}, upsert=True) except Exception as e: LOGGER.exception(e) is_web = int(request.args.get('is_web', 1)) result = await search(novels_name, is_web) if result: parse_result = [i for i in result if i] result_sorted = sorted( parse_result, reverse=True, key=lambda res: res[ 'timestamp']) if ':baidu' not in name else parse_result user = request['session'].get('user', None) if user: return template('result.html', is_login=1, user=user, name=name, time='%.2f' % (time.time() - start), result=result_sorted, count=len(parse_result)) else: return template('result.html', is_login=0, name=name, time='%.2f' % (time.time() - start), result=result_sorted, count=len(parse_result)) else: return html("No Result!")
async def update_all_books(): try: motor_db = MotorBase().db # 获取所有书架链接游标 books_url_cursor = motor_db.user_message.find({}, {'books_url.book_url': 1, '_id': 0}) # 已更新url集合 already_urls = set() async for document in books_url_cursor: if document: books_url = document['books_url'] # 一组书架链接列表数据 for book_url in books_url: chapter_url = book_url['book_url'] if chapter_url not in already_urls: await get_the_latest_chapter(chapter_url) already_urls.add(chapter_url) return True except Exception as e: LOGGER.exception(e) return False
async def get_real_url(client, url): with async_timeout.timeout(10): try: headers = {'user-agent': get_random_user_agent()} async with client.get(url, headers=headers, allow_redirects=True) as response: assert response.status == 200 LOGGER.info('Parse url: {}'.format(response.url)) # text = "" # try: # text = await response.text() # except: # text = await response.read() # if text: # print(text) # text = re.findall(r'replace\(\"(.*?)\"\)', str(text)) # text = text[0] if text[0] else "" url = response.url if response.url else None return url except Exception as e: LOGGER.exception(e) return None
async def target_fetch(client, url): """ :param client: aiohttp client :param url: targer url :return: text """ with async_timeout.timeout(20): try: headers = {'user-agent': get_random_user_agent()} async with client.get(url, headers=headers) as response: assert response.status == 200 LOGGER.info('Task url: {}'.format(response.url)) try: text = await response.text() except: text = await response.read() return text except Exception as e: LOGGER.exception(e) return None
async def so_novels(request, name): """ 360小说信息接口 :param request: :param name: 小说名 :return: 小说相关信息 """ name = unquote(name) novels_name = '{name} 小说 免费阅读'.format(name=name) try: res = await cache_owllook_so_novels_result(novels_name) parse_result = None if request: parse_result = [i for i in res if i] result = {'status': 200} else: result = {'status': 204} result.update({'data': parse_result, 'msg': "ok"}) except Exception as e: LOGGER.exception(e) result = {'status': 500, 'msg': e} result.update({'finished_at': get_time()}) return response.json(result)
async def owllook_search(request): start = time.time() name = request.args.get('wd', None) motor_db = MotorBase().db if not name: return redirect('/') else: # 记录搜索小说名 try: await motor_db.search_records.update_one({'keyword': name}, {'$inc': { 'count': 1 }}, upsert=True) except Exception as e: LOGGER.exception(e) # 通过搜索引擎获取检索结果 parse_result = [None] for each_engine in ENGINE_PRIORITY: # for 360 so if each_engine == "360": novels_name = "{name} 小说 免费阅读".format(name=name) parse_result = await cache_owllook_so_novels_result(novels_name) if parse_result: break if each_engine == "baidu": # for baidu novels_name = 'intitle:{name} 小说 阅读'.format( name=name) if ':baidu' not in name else name.split('baidu')[1] parse_result = await cache_owllook_baidu_novels_result(novels_name) if parse_result: break if parse_result[0]: # result_sorted = sorted( # parse_result, reverse=True, key=lambda res: res['timestamp']) if ':baidu' not in name else parse_result # 优先依靠是否解析进行排序 其次以更新时间进行排序 result_sorted = sorted( parse_result, reverse=True, key=itemgetter( 'is_parse', 'timestamp')) if ':baidu' not in name else parse_result user = request['session'].get('user', None) if user: try: time_current = get_time() res = await motor_db.user_message.update_one( {'user': user}, {'$set': { 'last_update_time': time_current }}, upsert=True) # 此处语法操作过多 下次看一遍mongo再改 if res: is_ok = await motor_db.user_message.update_one( { 'user': user, 'search_records.keyword': { '$ne': name } }, { '$push': { 'search_records': { 'keyword': name, 'counts': 1 } } }, ) if is_ok: await motor_db.user_message.update_one( { 'user': user, 'search_records.keyword': name }, {'$inc': { 'search_records.$.counts': 1 }}) except Exception as e: LOGGER.exception(e) return template('result.html', is_login=1, user=user, name=name, time='%.2f' % (time.time() - start), result=result_sorted, count=len(parse_result)) else: return template('result.html', is_login=0, name=name, time='%.2f' % (time.time() - start), result=result_sorted, count=len(parse_result)) else: return html("No Result!请将小说名反馈给本站,谢谢!")
async def owllook_content(request): """ 返回小说章节内容页 : content_url 这决定当前U页面url的生成方式 : url 章节内容页源url : chapter_url 小说目录源url : novels_name 小说名称 :return: 小说章节内容页 """ url = request.args.get('url', None) chapter_url = request.args.get('chapter_url', None) novels_name = request.args.get('novels_name', None) name = request.args.get('name', '') # 当小说内容url不在解析规则内 跳转到原本url netloc = get_netloc(url) if netloc not in RULES.keys(): return redirect(url) # 拼接小说目录url book_url = "/chapter?url={chapter_url}&novels_name={novels_name}".format( chapter_url=chapter_url, novels_name=novels_name) if url == chapter_url: return redirect(book_url) content_url = RULES[netloc].content_url content_data = await cache_owllook_novels_content(url=url, netloc=netloc) if content_data: user = request['session'].get('user', None) try: content = content_data.get('content', '获取失败') next_chapter = content_data.get('next_chapter', '获取失败') title = content_data.get('title', '获取失败') name = name or title # 拼接小说书签url bookmark_url = "{path}?url={url}&name={name}&chapter_url={chapter_url}&novels_name={novels_name}".format( path=request.path, url=url, name=name, chapter_url=chapter_url, novels_name=novels_name) # 破坏广告链接 content = str(content).strip('[]Jjs,').replace('http', 'hs') if user: motor_db = MotorBase().db bookmark = await motor_db.user_message.find_one({ 'user': user, 'bookmarks.bookmark': bookmark_url }) book = await motor_db.user_message.find_one({ 'user': user, 'books_url.book_url': book_url }) bookmark = 1 if bookmark else 0 if book: # 当书架中存在该书源 book = 1 # 保存最后一次阅读记录 await motor_db.user_message.update_one( { 'user': user, 'books_url.book_url': book_url }, {'$set': { 'books_url.$.last_read_url': bookmark_url }}) else: book = 0 return template('content.html', is_login=1, user=user, name=name, url=url, bookmark=bookmark, book=book, content_url=content_url, chapter_url=chapter_url, novels_name=novels_name, next_chapter=next_chapter, soup=content) else: return template('content.html', is_login=0, name=name, url=url, bookmark=0, book=0, content_url=content_url, chapter_url=chapter_url, novels_name=novels_name, next_chapter=next_chapter, soup=content) except Exception as e: LOGGER.exception(e) return redirect(book_url) else: return text( '解析失败或者是没有下一页了,请将失败页面反馈给本站,请重新刷新一次,或者访问源网页:{url}'.format(url=url))