async def nh_search(q: Optional[str] = "a", page: Optional[int] = 1): """ **nh_search** : - **name**: 漫画信息关键词搜索 - **description**: url传参数,关键词搜索,使用get方式请求 - **rely**: 依赖handlers.dbFormat.reglux,handlers.getWeb.base_load_web等方法 - **param**[q]: str,搜索关键词,多个词使用+号分隔,组合词使用_代替空格。默认值为a - **param**[page]: int,页数。默认值为1 - **return**: json,响应的数据咯~ """ #todo 无关搜索词时返回的数据处理 from handlers.getWeb import base_load_web from handlers.dbFormat import urlencode keyword = urlencode(q) # 关键词切割,空格和空格的url转义字符都替换成+ # keyword = text.replace(" ", "+").replace("%20", "+") tempDict = { "kw": q, "page": page, "pages": None, "results": 0, "bookList": [], } params = { "page": page } callbackJson = constructResponse() url = "https://nhentai.net/search/?q=%s" % keyword req = base_load_web(url, params=params) if req != None: from handlers.dbFormat import reglux from handlers.dbFormat import str_extract_num callbackJson.statusCode = req.status_code bids = reglux(req.text, r'<ahref="/g/(\d*?)/"class="cover"style="padding', True) names = reglux(req.text, r'<div class="caption">(.*?)</div>', False) thumbs = reglux(req.text, r'<noscript><img src="([\s\S]*?)"', False) for b, n, t in zip(bids, names, thumbs): tempItem = { "id": b, "hash": str_extract_num(t), "bname": n, "cover": "/ero/nh/t/{tid}/thumb.{tname}".format(tid=str_extract_num(t), tname="png" if "png" in t else "jpg"), "url": "/ero/nh/id/%s/" % b, } tempDict["bookList"].append(tempItem) # 获取结果的总数 results = "".join( reglux(req.text, r'<i class="fa fa-search color-icon"></i> ([\s\S]*?) results</h1>', False)) or "0" tempDict["results"] = int(results.replace(",", "")) # 处理结果页数 无结果时返回1 tempDict["pages"] = "".join( reglux(req.text, r'<a href="/search/\?q=.*?\&\;page=(\d*?)" class="last">', False)) or 1 tempDict["pages"] = int(tempDict["pages"]) if tempDict["results"] == 0: tempDict["pages"] = 1 return callbackJson.callBacker(tempDict)
async def exh(): from handlers.getWeb import base_load_web tempStr = "{}" start_time = time.time() req = base_load_web("https://nhentai.net/g/249664/") if req != None: from handlers.dbFormat import reglux tempStr = "".join( reglux(req.text, r'window._gallery = JSON.parse\("([\s\S]*?)"\);', False)).encode("utf-8").decode('unicode-escape') end_time = time.time() return {"costTime": end_time - start_time, "content": json.loads(tempStr)}
async def nh_galleries(enc: str, raw: Optional[bool] = False): """ **nh_item** : - **name**: 漫画信息接口 - **description**: 用对应id获取对应的本子漫画信息的接口,使用get方式请求 - **rely**: 依赖handlers.dbFormat.reglux,handlers.getWeb.base_load_web等方法 - **param**[item_id]: int,漫画的id - **return**: json,响应的数据咯~ """ from handlers.getWeb import base_load_web from handlers.dbFormat import encrypt_2_str callbackJson = constructResponse() decStr = encrypt_2_str(enc).split("|") id = decStr[0] or None hash = decStr[1] or None pages = decStr[2] or None tempDict = { "from": None, "pages": None, "thumbs": [], "images": [], "raw": None, } req = base_load_web("https://nhentai.net/g/%s/" % id) # 请求失败返回 if req is None: return callbackJson.callBacker(tempDict) from handlers.dbFormat import reglux callbackJson.statusCode = req.status_code tempT = reglux(req.text, r'data-src="https://t.nhentai.net/galleries/\d*/(\d*)t.(jpg|png)"', False) if id and hash and pages: tempDict["from"] = f'/ero/nh/id/{id}' tempDict["pages"] = int(pages) tempDict["thumbs"] = [f"/ero/nh/t/{hash}/{i[0]}t.{i[1]}" for i in tempT] tempDict["images"] = [f"/ero/nh/i/{hash}/{i[0]}.{i[1]}" for i in tempT] # tempDict["thumbs"] = [f"https://ero.raxianch.moe/cdn/sacy/nt/galleries/{hash}/{i[0]}t.{i[1]}" for i in tempT] # tempDict["images"] = [f"https://ero.raxianch.moe/cdn/sacy/ni/galleries/{hash}/{i[0]}.{i[1]}" for i in tempT] # tempDict["thumbs"] = [f"https://cdn.statically.io/img/t.nhentai.net/f=auto,w=720,q=80/galleries/{hash}/{i[0]}t.{i[1]}" for i in tempT] # tempDict["images"] = [f" https://cdn.statically.io/img/i.nhentai.net/f=auto,w=720,q=80/galleries/{hash}/{i[0]}.{i[1]}" for i in tempT] # 是否提供原生数据 if raw: tempDict["raw"] = req.text else: del tempDict["raw"] return callbackJson.callBacker(tempDict)
async def exh_images( book_hash: str, picture_id: str, ): from handlers.getWeb import base_load_web from handlers.dbFormat import reglux headers = {"Cookie": random.choice(EXH_COOKIE)} url = f"https://exhentai.org/s/{book_hash}/{picture_id}" req = base_load_web(url, headers=headers) # 请求失败返回 if req is None: return Response(status_code=404) imgUrl = "".join(reglux(req.text, '<img id="img" src="(.*?)"', False)) r = base_load_web(imgUrl, headers=headers) return Response(content=r.content)
async def nh_item(item_id: int, raw: Optional[bool] = False): """ **nh_item** : - **name**: 漫画信息接口 - **description**: 用对应id获取对应的本子漫画信息的接口,使用get方式请求 - **rely**: 依赖handlers.dbFormat.reglux,handlers.getWeb.base_load_web等方法 - **param**[item_id]: int,漫画的id - **return**: json,响应的数据咯~ """ from handlers.getWeb import base_load_web from handlers.dbFormat import str_2_encrypt tempStr = "{}" tempDict = { "id": None, "hash": None, "origin": app_name, "title": None, "pages": None, "favorites": None, "upload_date": None, "cover": None, "galleries": None, "tags": [], "raw": None, # 原始数据 } callbackJson = constructResponse() req = base_load_web("https://nhentai.net/g/%s/" % item_id) # 请求失败返回 if req is None: return callbackJson.callBacker(tempDict) from handlers.dbFormat import reglux callbackJson.statusCode = req.status_code tempStr = "".join(reglux(req.text, r'window._gallery = JSON.parse\("([\s\S]*?)"\);', False)).encode( "utf-8").decode('unicode-escape') rawData = json.loads(tempStr) tempDict["id"] = rawData["id"] tempDict["hash"] = rawData["media_id"] tempDict["title"] = { "full_name": rawData["title"]["english"], "translated": rawData["title"]["japanese"], "abbre": rawData["title"]["pretty"], } tempDict["favorites"] = rawData["num_favorites"] tempDict["pages"] = rawData["num_pages"] tempDict["tags"] = rawData["tags"] tempDict["upload_date"] = rawData["upload_date"] # 获取本子图片格式后缀 bookImgSuffix = "".join(reglux(req.text, r'data-src="https://t.nhentai.net/galleries/\d*/cover.(jpg|png)"', False)) # 生成封面地址 tempDict["cover"] = "/ero/nh/t/{cid}/cover.{suffix}".format(cid=rawData["media_id"], suffix=bookImgSuffix) # 生成画廊地址 tempDict["galleries"] = '/ero/nh/galleries/%s' % str_2_encrypt( f'{rawData["id"]}|{rawData["media_id"]}|{rawData["num_pages"]}' ) # 是否提供原生数据 if raw: tempDict["raw"] = rawData else: del tempDict["raw"] return callbackJson.callBacker(tempDict)
async def exh_search(q: Optional[str] = "", page: Optional[int] = 1): """ **exh_search** : - **name**: 漫画信息关键词搜索 - **description**: url传参数,关键词搜索,使用get方式请求 - **rely**: 依赖handlers.dbFormat.reglux,handlers.getWeb.base_load_web等方法 - **param**[q]: str,搜索关键词,多个词使用+号分隔,组合词使用_代替空格。默认值为a - **param**[page]: int,页数。默认值为1 - **return**: json,响应的数据咯~ """ # todo 无关搜索词时返回的数据处理 from handlers.getWeb import base_load_web # 关键词切割,空格和空格的url转义字符都替换成+ keyword = q.replace("+", "%20") tempDict = { "kw": q, "page": page, "pages": None, "results": None, "bookList": [], } callbackJson = constructResponse() headers = {"Cookie": random.choice(EXH_COOKIE)} param = { "page": page - 1 if page - 1 >= 0 else 0, "f_search": keyword, } # req = base_load_web(f"https://exhentai.org/?page={page - 1}&f_search={keyword}", headers=headers) req = base_load_web(f"https://exhentai.org/", params=param, headers=headers) # print(req.text) if req != None: from handlers.dbFormat import reglux import math from handlers.dbFormat import str_extract_num callbackJson.statusCode = req.status_code callbackJson.url = req.url # print(req.url) # print(req.text) # 获取搜索结果总数 tempDict["results"] = int( str_extract_num("".join( reglux(req.text, r'Showing (.*?) results', False)))) or 0 # 通过总数计算总页数 tempDict["pages"] = int(math.ceil(tempDict["results"] / 25)) or 0 bids = reglux( req.text, r'<a href="https://exhentai.org/g/([\s\S]*?)/([\s\S]*?)/">', False) names = reglux(req.text, r'<div class=".*?glink">([\s\S]*?)</div>', False) thumbs = reglux( req.text, r'src="https://exhentai.org/t/.*?/.*?/([\s\S]*?).(jpg|png|jpeg|gif)"', False) # todo 星级等新字段添加 for b, n, t in zip(bids, names, thumbs): tempItem = { "id": b[0], "hash": b[-1], "bname": n, # "cover": "/ero/exh/t/{tname}.{bookImgSuffix}".format(tname=t[0], bookImgSuffix=t[1]), "cover": "/ero/exh/t/{tname}.{bookImgSuffix}".format( tname=t[0], bookImgSuffix=t[1]), "url": "/ero/exh/id/%s/%s/" % (b[0], b[1]), } tempDict["bookList"].append(tempItem) if not tempDict["bookList"]: # print(tempDict["bookList"]) logger.warning(f"{app_name} {sys._getframe().f_code.co_name}") return callbackJson.callBacker(tempDict)
async def exh_galleries(enc: str, raw: Optional[bool] = False): from handlers.getWeb import base_load_web from handlers.getWeb import thread_load_web from handlers.dbFormat import encrypt_2_str callbackJson = constructResponse() decStr = encrypt_2_str(enc).split("|") id = decStr[0] or None hash = decStr[1] or None pages = int(decStr[2]) or None baseDict = { "from": None, "pages": None, "thumbs": [], "images": [], "raw": None, } tempDict = baseDict.copy() callbackJson = constructResponse() # exh需要cookie headers = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", "Host": "exhentai.org", "Cookie": random.choice(EXH_COOKIE), } # req = base_load_web(f"https://exhentai.org/g/{id}/{hash}/?inline_set=ts_l", headers=headers) req = base_load_web(f"https://exhentai.org/g/{id}/{hash}/", headers=headers) # 请求失败返回 if req is None: return callbackJson.callBacker(tempDict) import math from handlers.dbFormat import reglux callbackJson.statusCode = req.status_code callbackJson.url = req.url # 获取本子首页所有单页地址 # exh html 不时出现变化,双路正则表达式 thumbTemp = reglux( req.text, r'src="https://exhentai.org/t/[\s\S]{2}/[\s\S]{2}/([\s\S]*?)"', False) or [] # 获取本子首页所有单页地址 imagesTemp = [] # exh html 不时出现变化,双路正则表达式 imagesTemp += reglux(req.text, '<div class="gdtl" style="height:.*?px"><a href="(.*?)">', False) or \ reglux(req.text, 'no-repeat"><a href="(.*?)"><img alt', False) # 计算exh对本子的分页数 gPages = math.ceil(pages / 20) # 大于1页才进行多页请求 if gPages > 1: # 生成exh本子分页地址列表 gUrlPagesUrl = [] print(gPages) for i in range(1, gPages): gUrlPagesUrl.append(f"https://exhentai.org/g/{id}/{hash}/?p={i}") print(f"https://exhentai.org/g/{id}/{hash}/?p={i}") # 对本子分页发出请求,获取各个分页中所有单页地址并合并到返回数据中 subPages = thread_load_web( gUrlPagesUrl, headers=headers, inspectStr="IP address has been temporarily banned") # print(subPages) for k, v in subPages.items(): if v is None: continue itemp = reglux(v.text, '<div class="gdtl" style="height:.*?px"><a href="(.*?)">', False) or \ reglux(v.text, 'no-repeat"><a href="(.*?)"><img alt', False) or [] tTemp = reglux( v.text, r'src="https://exhentai.org/t/[\s\S]{2}/[\s\S]{2}/([\s\S]*?)"', False) or [] # print(f"{v.url}|{len(list(set(tTemp)))}|{len(list(set(tTemp)))}") if len(tTemp) == 0: print(v.text) thumbTemp += tTemp imagesTemp += itemp # 数据整理 tempDict["thumbs"] = list(set([f"/ero/exh/t/{i}" for i in thumbTemp])) tempDict["images"] = list( map(lambda x: f"/ero/exh/i/{x.split('/s/')[-1]}/", imagesTemp)) return callbackJson.callBacker(tempDict)
async def exh_item(item_id: int, hash_id: str, raw: Optional[bool] = False): """ **exh_item** : - **name**: 漫画信息接口 - **description**: 用对应id获取对应的本子漫画信息的接口,使用get方式请求 - **rely**: 依赖handlers.dbFormat.reglux,handlers.getWeb.base_load_web等方法 - **param**[item_id]: int,漫画的id - **return**: json,响应的数据咯~ """ # todo 可能存在部分本子单页缺失问题 from handlers.getWeb import base_load_web from handlers.dbFormat import str_2_encrypt baseDict = { "id": None, "hash": None, "origin": app_name, "title": None, "pages": None, "favorites": 0, "upload_date": None, "cover": None, "galleries": None, "tags": [], "raw": None, # 原始数据 } tempDict = baseDict.copy() callbackJson = constructResponse() # exh需要cookie headers = {"Cookie": random.choice(EXH_COOKIE)} req = base_load_web(f"https://exhentai.org/g/{item_id}/{hash_id}/", headers=headers) # 请求失败返回 if req is None: return callbackJson.callBacker(tempDict) from handlers.dbFormat import reglux callbackJson.statusCode = req.status_code callbackJson.url = req.url # 获取本子图片总数 pages = "".join( reglux(req.text, 'Length:</td><td class="gdt2">(.*?) pages</td></tr>', False)) if pages and req.text: tempDict["pages"] = int(pages) else: tempDict["pages"] = 0 print(req.status_code) print([req.text]) logger.warning( f"[获取本子图片总数失败!] {app_name} {sys._getframe().f_code.co_name} {req.url}" ) # 获取本子图片格式后缀 bookImgSuffix = "".join( reglux(req.text, r'background:transparent url\(https://exhentai.org/t/(.*?)\)', False)) # # 生成封面地址 tempDict["cover"] = "/ero/exh/t/{suffix}".format(suffix=bookImgSuffix) # 生成画廊地址 tempDict["galleries"] = '/ero/exh/galleries/%s' % str_2_encrypt( f'{item_id}|{hash_id}|{tempDict["pages"]}') # 是否提供原生数据 if raw: tempDict["raw"] = [req.text] else: del tempDict["raw"] # 数据整理 tempDict["id"] = item_id tempDict["hash"] = hash_id tempDict["title"] = { "full_name": "".join(reglux(req.text, "<title>(.*?) - ExHentai.org</title>", False)), "translated": "".join(reglux(req.text, '<h1 id="gj">(.*?)</h1>', False)), "abbre": "", } tempDict["favorites"] = int( "".join(reglux(req.text, 'id="favcount">(\d*?) times</td>', False)) or "".join(reglux(req.text, 'id="favcount">(.*?)</td>', False)).replace("Once", "1") or 0) tempDict["tags"] = [] tempDict["upload_date"] = "".join( reglux(req.text, 'Posted:</td><td class="gdt2">(.*?)</td>', False)) return callbackJson.callBacker(tempDict)