async def owllook_add_book(request): """ :param request: :return: : -1 用户session失效 需要重新登录 : 0 添加书架失败 : 1 添加书架成功 """ user = request['session'].get('user', None) data = parse_qs(str(request.body, encoding='utf-8')) novels_name = data.get('novels_name', '') chapter_url = data.get('chapter_url', '') last_read_url = data.get('last_read_url', '') if user and novels_name and chapter_url: url = "/chapter?url={chapter_url}&novels_name={novels_name}".format(chapter_url=chapter_url[0], novels_name=novels_name[0]) time = get_time() try: motor_db = MotorBase().db res = await motor_db.user_message.update_one({'user': user}, {'$set': {'last_update_time': time}}, upsert=True) if res: await motor_db.user_message.update_one( {'user': user, 'books_url.book_url': {'$ne': url}}, {'$push': { 'books_url': {'book_url': url, 'add_time': time, 'last_read_url': unquote(last_read_url[0])}}}) LOGGER.info('书架添加成功') return json({'status': 1}) except Exception as e: LOGGER.exception(e) return json({'status': 0}) else: return json({'status': -1})
async def owl_novels_chapters(request): """ 返回章节目录 基本达到通用 :param request: :param chapter_url: 章节源目录页url :param novels_name: 小说名称 :return: 小说目录信息 """ chapters_url = request.args.get('chapters_url', None) novels_name = request.args.get('novels_name', None) netloc = get_netloc(chapters_url) try: res = await cache_owllook_novels_chapter(url=chapters_url, netloc=netloc) chapters_sorted = [] if res: chapters_sorted = extract_chapters(chapters_url, res) result = {'status': 200} else: result = {'status': 204} result.update({ 'data': { 'novels_name': novels_name, 'chapter_url': chapters_url, 'all_chapters': chapters_sorted }, 'msg': "ok" }) except Exception as e: LOGGER.exception(e) result = {'status': 500, 'msg': e} result.update({'finished_at': get_time()}) return response.json(result)
async def owllook_add_bookmark(request): """ :param request: :return: : -1 用户session失效 需要重新登录 : 0 添加书签失败 : 1 添加书签成功 """ user = request['session'].get('user', None) data = parse_qs(str(request.body, encoding='utf-8')) bookmarkurl = data.get('bookmarkurl', '') if user and bookmarkurl: url = unquote(bookmarkurl[0]) time = get_time() try: motor_db = MotorBase().db res = await motor_db.user_message.update_one({'user': user}, {'$set': {'last_update_time': time}}, upsert=True) if res: await motor_db.user_message.update_one( {'user': user, 'bookmarks.bookmark': {'$ne': url}}, {'$push': {'bookmarks': {'bookmark': url, 'add_time': time}}}) LOGGER.info('书签添加成功') return json({'status': 1}) except Exception as e: LOGGER.exception(e) return json({'status': 0}) else: return json({'status': -1})
async def get_tag(): motor_db = MotorBase().get_db() novels = Novels() # 获取所有书架链接游标 books_url_cursor = motor_db.user_message.find({}, {'books_url.book_url': 1, 'user': 1, '_id': 0}) async for document in books_url_cursor: if document: books_url = document.get('books_url', None) if books_url: all_user = {} user = document['user'] all_user[user + '_novels'] = [] all_user[user + '_tag'] = [] all_user[user + '_author'] = [] for book_url in books_url: chapter_url = book_url['book_url'] novels_name = parse_qs(urlparse(chapter_url).query).get('novels_name', '')[0] all_user[user + '_novels'].append(novels_name) novels_info = novels.search_name(novels_name) if novels_info: novels_type = novels_info['novels_type'].split('#') author = novels_info['author'] all_user[user + '_author'].append(author) all_user[user + '_tag'].extend(novels_type) data = { 'user_novels': all_user[user + '_novels'], 'user_tag': all_user[user + '_tag'], 'user_author': all_user[user + '_author'], } await motor_db.user_tag.update_one( {"user": user}, {'$set': {'data': data, "updated_at": get_time()}}, upsert=True) pprint(data)
async def owl_novels_chapters(request, **kwargs): """ 返回章节目录 基本达到通用 :param request: :param chapter_url: 章节源目录页url :param novels_name: 小说名称 :return: 小说目录信息 """ request_params = kwargs["request_params"] chapters_url = request_params.get('chapters_url', None) novels_name = request_params.get('novels_name', None) netloc = get_netloc(chapters_url) try: res = await cache_owllook_novels_chapter(url=chapters_url, netloc=netloc) chapters_sorted = [] if res: chapters_sorted = extract_chapters(chapters_url, res) UniResponse.SUCCESS.update({ResponseField.DATA: { 'novels_name': novels_name, 'chapter_url': chapters_url, 'all_chapters': chapters_sorted }, ResponseField.FINISH_AT: get_time()}) return response_handle(request, UniResponse.SUCCESS, 200) except Exception as e: LOGGER.exception(e) return response_handle(request, UniResponse.SERVER_UNKNOWN_ERR, 500)
async def get_user_tag(): motor_db = MotorBase().get_db() user_tag_cursor = motor_db.user_tag.find({}, { 'data.user_tag': 1, 'user': 1, '_id': 0 }) result = {} async for document in user_tag_cursor: if document['data']['user_tag']: result[document['user'].replace( '.', '·')] = document['data']['user_tag'] for key, value in result.items(): result_copy = deepcopy(result) del result_copy[key] cos = CosineSimilarity(value, result_copy) vector = cos.create_vector() resultDic = cos.calculate(vector) pprint(resultDic) # pprint(type(resultList[1])) await motor_db.user_recommend.update_one({"user": key}, { '$set': { 'similar_user': resultDic, 'user_tag': result[key], "updated_at": get_time() } }, upsert=True)
async def get_user_tag(): motor_db = MotorBase().get_db() user_tag_cursor = motor_db.user_tag.find({}, { 'data': 1, 'user': 1, '_id': 0 }) result = {} user_book_dict = {} async for document in user_tag_cursor: if document['data']: user_book_dict[document['user'].replace( '.', '·')] = document['data'] result[document['user'].replace( '.', '·')] = document['data']['user_tag'] for key, value in result.items(): if not value: continue print("\nUser:"******"User tags:", set(value)) print("User books:", user_book_dict[key]["user_novels"]) result_copy = deepcopy(result) del result_copy[key] cos = CosineSimilarity(value, result_copy) vector = cos.create_vector() resultDic = cos.calculate(vector) print("相似用户:") pprint(resultDic[:10]) # pprint(type(resultList[1])) booksDic = collections.defaultdict(float) for userBooks in resultDic: for simuser, simrate in userBooks.items(): # print(simuser, simrate) for book in user_book_dict[simuser]["user_novels"]: booksDic[book] += simrate # print(booksDic) # 推荐20本书,注意这里的推荐并没有去除用户收藏的书籍 recommend = sorted(booksDic.items(), key=itemgetter(1), reverse=True)[0:20] print("书籍推荐:") pprint(recommend) recommend_novels = [book for book, simrate in recommend] await motor_db.user_recommend.update_one({"user": key}, { '$set': { 'similar_user': resultDic, 'user_tag': result[key], 'recommend_novels': recommend_novels, "updated_at": get_time() } }, upsert=True)
async def owllook_register(request): """ 用户注册 不允许重名 :param request: :return: : -1 用户名已存在 : 0 用户名或密码不能为空 : 1 注册成功 """ register_data = parse_qs(str(request.body, encoding='utf-8')) user = register_data.get('user', [None])[0] pwd = register_data.get('pwd', [None])[0] email = register_data.get('email', [None])[0] answer = register_data.get('answer', [None])[0] reg_index = request.cookies.get('reg_index') if user and pwd and email and answer and reg_index and len( user) > 2 and len(pwd) > 5: motor_db = motor_base.get_db() is_exist = await motor_db.user.find_one({'user': user}) if not is_exist: # 验证问题答案是否准确 real_answer = get_real_answer(str(reg_index)) if real_answer and real_answer == answer: pass_first = hashlib.md5((CONFIG.WEBSITE["TOKEN"] + pwd).encode("utf-8")).hexdigest() password = hashlib.md5(pass_first.encode("utf-8")).hexdigest() time = get_time() data = { "user": user, "password": password, "email": email, "register_time": time, } await motor_db.user.insert_one(data) return json({'status': 1}) else: return json({'status': -2}) else: return json({'status': -1}) else: return json({'status': 0})
async def owl_so_novels(request, name): """ 360小说信息接口 :param request: :param name: 小说名 :return: 小说相关信息 """ name = unquote(name) novels_name = '{name} 小说 免费阅读'.format(name=name) try: res = await cache_owllook_so_novels_result(novels_name) parse_result = None if res: parse_result = [i for i in res if i] result = {'status': 200} else: result = {'status': 204} result.update({'data': parse_result, 'msg': "ok"}) except Exception as e: LOGGER.exception(e) result = {'status': 500, 'msg': e} result.update({'finished_at': get_time()}) return response.json(result)
async def owl_so_novels(request, name): """ 360小说信息接口 :param request: :param name: 小说名 :return: 小说相关信息 """ name = unquote(name) novels_name = '{name} 小说 免费阅读'.format(name=name) try: res = await get_novels_info(class_name='baidu', novels_name=novels_name) parse_result = [] if res: parse_result = [i for i in res if i] UniResponse.SUCCESS.update({ ResponseField.DATA: parse_result, ResponseField.FINISH_AT: get_time() }) return response_handle(request, UniResponse.SUCCESS, 200) except Exception as e: LOGGER.exception(e) return response_handle(request, UniResponse.SERVER_UNKNOWN_ERR, 500)
async def get_the_latest_chapter(chapter_url, loop=None, timeout=15): try: with async_timeout.timeout(timeout): url = parse_qs(urlparse(chapter_url).query).get('url', '') novels_name = parse_qs(urlparse(chapter_url).query).get( 'novels_name', '') data = None if url and novels_name: url = url[0] novels_name = novels_name[0] netloc = urlparse(url).netloc if netloc in LATEST_RULES.keys(): async with aiohttp.ClientSession(loop=loop) as client: headers = {'user-agent': await get_random_user_agent()} try: html = await target_fetch(client=client, url=url, headers=headers, timeout=timeout) if html is None: html = get_html_by_requests(url=url, headers=headers, timeout=timeout) except TypeError: html = get_html_by_requests(url=url, headers=headers, timeout=timeout) except Exception as e: LOGGER.exception(e) return None try: soup = BeautifulSoup(html, 'html5lib') except Exception as e: LOGGER.exception(e) return None latest_chapter_name, latest_chapter_url = None, None if LATEST_RULES[netloc].plan: meta_value = LATEST_RULES[netloc].meta_value latest_chapter_name = soup.select( 'meta[property="{0}"]'.format( meta_value["latest_chapter_name"])) latest_chapter_name = latest_chapter_name[0].get( 'content', None) if latest_chapter_name else None latest_chapter_url = soup.select( 'meta[property="{0}"]'.format( meta_value["latest_chapter_url"])) latest_chapter_url = urljoin( url, latest_chapter_url[0].get( 'content', None)) if latest_chapter_url else None else: selector = LATEST_RULES[netloc].selector content_url = selector.get('content_url') if selector.get('id', None): latest_chapter_soup = soup.find_all( id=selector['id']) elif selector.get('class', None): latest_chapter_soup = soup.find_all( class_=selector['class']) else: latest_chapter_soup = soup.select( selector.get('tag')) if latest_chapter_soup: if content_url == '1': # TODO pass elif content_url == '0': # TODO pass else: latest_chapter_url = content_url + latest_chapter_soup[ 0].get('href', None) latest_chapter_name = latest_chapter_soup[ 0].get('title', None) if latest_chapter_name and latest_chapter_url: time_current = get_time() # print(latest_chapter_url) data = { "latest_chapter_name": latest_chapter_name, "latest_chapter_url": latest_chapter_url, "owllook_chapter_url": chapter_url, "owllook_content_url": "/owllook_content?url={latest_chapter_url}&name={name}&chapter_url={chapter_url}&novels_name={novels_name}" .format( latest_chapter_url=latest_chapter_url, name=latest_chapter_name, chapter_url=url, novels_name=novels_name, ), } # 存储最新章节 motor_db = MotorBase().get_db() await motor_db.latest_chapter.update_one( { "novels_name": novels_name, 'owllook_chapter_url': chapter_url }, { '$set': { 'data': data, "finished_at": time_current } }, upsert=True) return data except Exception as e: LOGGER.exception(e) return None
async def author_notification(request): """ 作者新书通知 :param request: :return: : -1 用户session失效 需要重新登录 : 2 无该作者信息 : 3 作者已经添加 : 4 超过添加的上限 : 0 操作失败 : 1 操作成功 """ user = request['session'].get('user', None) user_data = parse_qs(str(request.body, encoding='utf-8')) if user: try: motor_db = motor_base.get_db() all_authors = await motor_db.user_message.find_one( {'user': user}, { 'author_latest': 1, '_id': 0 }) count = len(all_authors.get('author_latest', [])) if count == CONFIG.WEBSITE.get("AUTHOR_LATEST_COUNT", 5): return json({'status': 4}) author_name = user_data.get('author_name', None)[0] data = [] author_cursor = motor_db.all_books.find({'author': author_name}, { 'name': 1, 'url': 1, '_id': 0 }) async for document in author_cursor: data.append(document) if data: time = get_time() res = await motor_db.user_message.update_one( {'user': user}, {'$set': { 'last_update_time': time }}, upsert=True) is_exist = await motor_db.user_message.find_one({ 'user': user, 'author_latest.author_name': author_name }) if is_exist: return json({'status': 3}) if res: await motor_db.user_message.update_one( { 'user': user, 'author_latest.author_name': { '$ne': author_name } }, { '$push': { 'author_latest': { 'author_name': author_name, 'add_time': time } } }) is_author_exist = await motor_db.author_message.find_one( {'name': author_name}) if not is_author_exist: author_data = { "author_name": author_name, "nums": len(data), "updated_time": get_time(), } await motor_db.author_message.save(author_data) LOGGER.info('作者添加成功') return json({'status': 1}) else: return json({'status': 2}) else: return json({'status': 2}) except Exception as e: LOGGER.exception(e) return json({'status': 0}) else: return json({'status': -1})
async def owllook_search(request): start = time.time() name = str(request.args.get('wd', '')).strip() novels_keyword = name.split(' ')[0] motor_db = motor_base.get_db() if not name: return redirect('/') else: # 记录搜索小说名 try: await motor_db.search_records.update_one({'keyword': name}, {'$inc': {'count': 1}}, upsert=True) except Exception as e: LOGGER.exception(e) # 通过搜索引擎获取检索结果 parse_result = None if name.startswith('!baidu'): novels_keyword = name.split('baidu')[1].strip() novels_name = 'intitle:{name} 小说 阅读'.format(name=novels_keyword) parse_result = await cache_owllook_baidu_novels_result(novels_name) elif name.startswith('!360'): novels_keyword = name.split('360')[1].strip() novels_name = "{name} 小说 最新章节".format(name=novels_keyword) parse_result = await cache_owllook_so_novels_result(novels_name) elif name.startswith('!bing'): novels_keyword = name.split('bing')[1].strip() novels_name = "{name} 小说 阅读 最新章节".format(name=novels_keyword) parse_result = await cache_owllook_bing_novels_result(novels_name) # elif name.startswith('!duck_go'): # novels_keyword = name.split('duck_go')[1].strip() # novels_name = '{name} 小说 阅读 最新章节'.format(name=novels_keyword) # parse_result = await cache_owllook_duck_novels_result(novels_name) else: for each_engine in ENGINE_PRIORITY: # for bing if each_engine == "bing": novels_name = "{name} 小说 阅读 最新章节".format(name=name) parse_result = await cache_owllook_bing_novels_result(novels_name) if parse_result: break # for 360 so if each_engine == "360": novels_name = "{name} 小说 最新章节".format(name=name) parse_result = await cache_owllook_so_novels_result(novels_name) if parse_result: break # for baidu if each_engine == "baidu": novels_name = 'intitle:{name} 小说 阅读'.format(name=name) parse_result = await cache_owllook_baidu_novels_result(novels_name) if parse_result: break # for duckduckgo if each_engine == "duck_go": novels_name = '{name} 小说 阅读 最新章节'.format(name=name) parse_result = await cache_owllook_duck_novels_result(novels_name) if parse_result: break if parse_result: # result_sorted = sorted( # parse_result, reverse=True, key=lambda res: res['timestamp']) if ':baidu' not in name else parse_result # 优先依靠是否解析进行排序 其次以更新时间进行排序 result_sorted = sorted( parse_result, reverse=True, key=itemgetter('is_recommend', 'is_parse', 'timestamp')) user = request['session'].get('user', None) if user: try: time_current = get_time() res = await motor_db.user_message.update_one({'user': user}, {'$set': {'last_update_time': time_current}}, upsert=True) # 此处语法操作过多 下次看一遍mongo再改 if res: is_ok = await motor_db.user_message.update_one( {'user': user, 'search_records.keyword': {'$ne': novels_keyword}}, {'$push': {'search_records': {'keyword': novels_keyword, 'counts': 1}}}, ) if is_ok: await motor_db.user_message.update_one( {'user': user, 'search_records.keyword': novels_keyword}, {'$inc': {'search_records.$.counts': 1}} ) except Exception as e: LOGGER.exception(e) return template( 'result.html', is_login=1, user=user, name=novels_keyword, time='%.2f' % (time.time() - start), result=result_sorted, count=len(parse_result)) else: return template( 'result.html', is_login=0, name=novels_keyword, time='%.2f' % (time.time() - start), result=result_sorted, count=len(parse_result)) else: return html("No Result!请将小说名反馈给本站,谢谢!")
async def owl_bd_novels(request, name): """ 百度小说信息接口 :param request: :param name: 小说名 :return: 小说相关信息 """ name = unquote(name) novels_name = 'intitle:{name} 小说 阅读'.format(name=name) try: res = await cache_owllook_baidu_novels_result(novels_name) parse_result = [] if res: parse_result = [i for i in res if i] UniResponse.SUCCESS.update({ResponseField.DATA: parse_result, ResponseField.FINISH_AT: get_time()}) return response_handle(request, UniResponse.SUCCESS, 200) except Exception as e: LOGGER.exception(e) return response_handle(request, UniResponse.SERVER_UNKNOWN_ERR, 500)
async def get_the_latest_chapter(chapter_url): url = parse_qs(urlparse(chapter_url).query).get('url', '') novels_name = parse_qs(urlparse(chapter_url).query).get('novels_name', '') data = None if url and novels_name: url = url[0] novels_name = novels_name[0] netloc = urlparse(url).netloc if netloc in LATEST_RULES.keys(): async with aiohttp.ClientSession() as client: html = await target_fetch(client=client, url=url) soup = BeautifulSoup(html, 'html5lib') latest_chapter_name, latest_chapter_url = None, None if LATEST_RULES[netloc].plan: meta_value = LATEST_RULES[netloc].meta_value latest_chapter_name = soup.select( 'meta[property="{0}"]'.format( meta_value["latest_chapter_name"])) latest_chapter_name = latest_chapter_name[0].get( 'content', None) if latest_chapter_name else None latest_chapter_url = soup.select( 'meta[property="{0}"]'.format( meta_value["latest_chapter_url"])) latest_chapter_url = latest_chapter_url[0].get( 'content', None) if latest_chapter_url else None else: selector = LATEST_RULES[netloc].selector content_url = selector.get('content_url') if selector.get('id', None): latest_chapter_soup = soup.find_all(id=selector['id']) elif selector.get('class', None): latest_chapter_soup = soup.find_all( class_=selector['class']) else: latest_chapter_soup = soup.select(selector.get('tag')) if latest_chapter_soup: if content_url == '1': # TODO pass elif content_url == '0': # TODO pass else: latest_chapter_url = content_url + latest_chapter_soup[ 0].get('href', None) latest_chapter_name = latest_chapter_soup[0].get( 'title', None) if latest_chapter_name and latest_chapter_url: time_current = get_time() data = { "latest_chapter_name": latest_chapter_name, "latest_chapter_url": latest_chapter_url, "owllook_chapter_url": chapter_url, "owllook_content_url": "/owllook_content?url={latest_chapter_url}&name={name}&chapter_url={chapter_url}&novels_name={novels_name}" .format( latest_chapter_url=latest_chapter_url, name=latest_chapter_name, chapter_url=url, novels_name=novels_name, ), } # 存储最新章节 motor_db = MotorBase().db await motor_db.latest_chapter.update_one( { "novels_name": novels_name, 'owllook_chapter_url': chapter_url }, {'$set': { 'data': data, "finished_at": time_current }}, upsert=True) return data