def main_uncensored(number): htmlcode = get_html('https://www.javbus.com/' + number) dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", '')) if getTitle(htmlcode) == '': htmlcode = get_html('https://www.javbus.com/' + number.replace('-', '_')) dww_htmlcode = get_html( "https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", '')) dic = { 'title': str(re.sub('\w+-\d+-', '', getTitle(htmlcode))).replace(getNum(htmlcode) + '-', ''), 'studio': getStudio(htmlcode), 'year': getYear(htmlcode), 'outline': getOutline(dww_htmlcode), 'runtime': getRuntime(htmlcode), 'director': getDirector(htmlcode), 'actor': getActor(htmlcode), 'release': getRelease(htmlcode), 'number': getNum(htmlcode), 'cover': getCover(htmlcode), 'tag': getTag(htmlcode), 'label': getSerise(htmlcode), 'imagecut': 0, 'actor_photo': '', 'website': 'https://www.javbus.com/' + number, } js = json.dumps( dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') if getYear(htmlcode) == '' or getYear(htmlcode) == 'null': js2 = javdb.main(number) return js2 return js
def getDataFromJSON(file_number): #从JSON返回元数据 global title global studio global year global outline global runtime global director global actor_list global actor global release global number global cover global imagecut global tag global image_main global cn_sub global website global actor_photo global cover_small global json_data global naming_rule global location_rule # ================================================网站规则添加开始================================================ if re.match('^\d{5,}', file_number): json_data = json.loads(avsox.main(file_number)) if getDataState(json_data) == 0: # 如果元数据获取失败,请求番号至其他网站抓取 json_data = json.loads(javdb.main(file_number)) #== elif re.match('\d+\D+', file_number): json_data = json.loads(siro.main(file_number)) if getDataState(json_data) == 0: # 如果元数据获取失败,请求番号至其他网站抓取 json_data = json.loads(javbus.main(file_number)) elif getDataState(json_data) == 0: # 如果元数据获取失败,请求番号至其他网站抓取 json_data = json.loads(javdb.main(file_number)) # == elif 'fc2' in file_number or 'FC2' in file_number: json_data = json.loads(fc2fans_club.main(file_number)) # == elif 'HEYZO' in number or 'heyzo' in number or 'Heyzo' in number: json_data = json.loads(avsox.main(file_number)) # == elif 'siro' in file_number or 'SIRO' in file_number or 'Siro' in file_number: json_data = json.loads(siro.main(file_number)) # == else: json_data = json.loads(javbus.main(file_number)) if getDataState(json_data) == 0: # 如果元数据获取失败,请求番号至其他网站抓取 json_data = json.loads(avsox.main(file_number)) elif getDataState(json_data) == 0: # 如果元数据获取失败,请求番号至其他网站抓取 json_data = json.loads(javdb.main(file_number)) # ================================================网站规则添加结束================================================ title = str(json_data['title']).replace(' ', '') studio = json_data['studio'] year = json_data['year'] outline = json_data['outline'] runtime = json_data['runtime'] director = json_data['director'] actor_list = str(json_data['actor']).strip("[ ]").replace("'", '').split( ',') # 字符串转列表 release = json_data['release'] number = json_data['number'] cover = json_data['cover'] try: cover_small = json_data['cover_small'] except: cover_small = '' imagecut = json_data['imagecut'] tag = str(json_data['tag']).strip("[ ]").replace("'", '').replace( " ", '').split(',') # 字符串转列表 @ actor = str(actor_list).strip("[ ]").replace("'", '').replace(" ", '') actor_photo = json_data['actor_photo'] website = json_data['website'] source = json_data['source'] if title == '' or number == '': print('[-]Movie Data not found!') moveFailedFolder() if imagecut == '3': DownloadFileWithFilename() # ====================处理异常字符====================== #\/:*?"<>| title = title.replace('\\', '') title = title.replace('/', '') title = title.replace(':', '') title = title.replace('*', '') title = title.replace('?', '') title = title.replace('"', '') title = title.replace('<', '') title = title.replace('>', '') title = title.replace('|', '') tmpArr = cover_small.split(',') if len(tmpArr) > 0: cover_small = tmpArr[0].strip('\"').strip('\'') # ====================处理异常字符 END================== #\/:*?"<>| naming_rule = eval(config['Name_Rule']['naming_rule']) location_rule = eval(config['Name_Rule']['location_rule'])
def getDataFromJSON(file_number, filepath, failed_folder): # 从JSON返回元数据 # ================================================网站规则添加开始================================================ if re.match('^\d{5,}', file_number): json_data = json.loads(avsox.main(file_number)) if getDataState(json_data) == 0: # 如果元数据获取失败,请求番号至其他网站抓取 json_data = json.loads(javdb.main(file_number)) # == elif re.match('\d+\D+', file_number): json_data = json.loads(siro.main(file_number)) if getDataState(json_data) == 0: # 如果元数据获取失败,请求番号至其他网站抓取 json_data = json.loads(javbus.main(file_number)) if getDataState(json_data) == 0: # 如果元数据获取失败,请求番号至其他网站抓取 json_data = json.loads(javdb.main(file_number)) # == elif 'fc2' in file_number or 'FC2' in file_number: json_data = json.loads( fc2fans_club.main( file_number.replace('fc2-', '').replace('fc2_', '').replace( 'FC2-', '').replace('fc2_', ''))) # == elif 'HEYZO' in file_number or 'heyzo' in file_number or 'Heyzo' in file_number: json_data = json.loads(avsox.main(file_number)) # == elif 'siro' in file_number or 'SIRO' in file_number or 'Siro' in file_number: json_data = json.loads(siro.main(file_number)) elif not '-' in file_number or '_' in file_number: json_data = json.loads(fanza.main(file_number)) if getDataState(json_data) == 0: # 如果元数据获取失败,请求番号至其他网站抓取 json_data = json.loads(javbus.main(file_number)) if getDataState(json_data) == 0: # 如果元数据获取失败,请求番号至其他网站抓取 json_data = json.loads(avsox.main(file_number)) if getDataState(json_data) == 0: # 如果元数据获取失败,请求番号至其他网站抓取 json_data = json.loads(javdb.main(file_number)) # == else: json_data = json.loads(javbus.main(file_number)) if getDataState(json_data) == 0: # 如果元数据获取失败,请求番号至其他网站抓取 json_data = json.loads(avsox.main(file_number)) if getDataState(json_data) == 0: # 如果元数据获取失败,请求番号至其他网站抓取 json_data = json.loads(javdb.main(file_number)) # ================================================网站规则添加结束================================================ title = json_data['title'] actor_list = str(json_data['actor']).strip("[ ]").replace("'", '').split( ',') # 字符串转列表 release = json_data['release'] number = json_data['number'] try: cover_small = json_data['cover_small'] except: cover_small = '' imagecut = json_data['imagecut'] tag = str(json_data['tag']).strip("[ ]").replace("'", '').replace( " ", '').split(',') # 字符串转列表 @ actor = str(actor_list).strip("[ ]").replace("'", '').replace(" ", '') if title == '' or number == '': print('[-]Movie Data not found!') moveFailedFolder(filepath, failed_folder) # if imagecut == '3': # DownloadFileWithFilename() # ====================处理异常字符====================== #\/:*?"<>| title = title.replace('\\', '') title = title.replace('/', '') title = title.replace(':', '') title = title.replace('*', '') title = title.replace('?', '') title = title.replace('"', '') title = title.replace('<', '') title = title.replace('>', '') title = title.replace('|', '') release = release.replace('/', '-') tmpArr = cover_small.split(',') if len(tmpArr) > 0: cover_small = tmpArr[0].strip('\"').strip('\'') # ====================处理异常字符 END================== #\/:*?"<>| naming_rule = eval(config['Name_Rule']['naming_rule']) location_rule = eval(config['Name_Rule']['location_rule']) # 返回处理后的json_data json_data['title'] = title json_data['actor'] = actor json_data['release'] = release json_data['cover_small'] = cover_small json_data['tag'] = tag json_data['naming_rule'] = naming_rule json_data['location_rule'] = location_rule return json_data
def getDataFromJSON(file_number, config, mode): # 从JSON返回元数据 # ================================================网站规则添加开始================================================ json_data = {} if mode == 1: # 从全部网站刮削 if re.match('^\d{5,}', file_number): # 111111-111 json_data = json.loads(avsox.main(file_number)) if getDataState(json_data) == 0: # 如果元数据获取失败,请求番号至其他网站抓取 json_data = json.loads(javdb.main(file_number)) if getDataState(json_data) == 0: # 如果元数据获取失败,请求番号至其他网站抓取 json_data = json.loads(javbus.main(file_number)) # == elif re.match('\d+\D+', file_number): # 259LUXU-1111 json_data = json.loads(siro.main(file_number)) if getDataState(json_data) == 0: # 如果元数据获取失败,请求番号至其他网站抓取 json_data = json.loads(javbus.main(file_number)) if getDataState(json_data) == 0: # 如果元数据获取失败,请求番号至其他网站抓取 json_data = json.loads(javdb.main(file_number)) # == elif 'fc2' in file_number or 'FC2' in file_number: # FC2-111111 json_data = json.loads( fc2fans_club.main( file_number.replace('fc2-', '').replace('fc2_', '').replace( 'FC2-', '').replace('fc2_', ''))) if getDataState(json_data) == 0: # 如果元数据获取失败,请求番号至其他网站抓取 json_data = json.loads(javdb.main(file_number)) # == elif 'HEYZO' in file_number or 'heyzo' in file_number or 'Heyzo' in file_number: # HEYZO-1111 json_data = json.loads(avsox.main(file_number)) # == elif 'siro' in file_number or 'SIRO' in file_number or 'Siro' in file_number: # SIRO-111 json_data = json.loads(siro.main(file_number)) elif re.match('\D{2,}00\d{3,}', file_number): # ssni00321 json_data = json.loads(fanza.main(file_number)) else: # MIDE-139, n1111 json_data = json.loads(javbus.main(file_number)) if getDataState(json_data) == 0: # 如果元数据获取失败,请求番号至其他网站抓取 json_data = json.loads(avsox.main(file_number)) if getDataState(json_data) == 0: # 如果元数据获取失败,请求番号至其他网站抓取 json_data = json.loads(javdb.main(file_number)) elif mode != 6 and re.match('\D{2,}00\d{3,}', file_number): json_data = { 'title': '', 'actor': '', 'website': '', } elif mode == 2: # 仅从javdb json_data = json.loads(javdb.main(file_number)) elif mode == 3: # 仅从javbus json_data = json.loads(javbus.main(file_number)) elif mode == 4: # 仅从avsox json_data = json.loads(avsox.main(file_number)) elif mode == 5: # 仅从fc2club json_data = json.loads(fc2fans_club.main(file_number)) elif mode == 6: # 仅从fanza json_data = json.loads(fanza.main(file_number)) elif mode == 7: # 仅从siro json_data = json.loads(siro.main(file_number)) # ================================================网站规则添加结束================================================ # print(json_data) # ======================================超时或未找到 if json_data['website'] == 'timeout': return json_data elif json_data['title'] == '': return json_data title = json_data['title'] number = json_data['number'].replace('_', '-') actor_list = str(json_data['actor']).strip("[ ]").replace("'", '').split( ',') # 字符串转列表 release = json_data['release'] try: cover_small = json_data['cover_small'] except: cover_small = '' tag = str(json_data['tag']).strip("[ ]").replace("'", '').replace( " ", '').split(',') # 字符串转列表 @ actor = str(actor_list).strip("[ ]").replace("'", '').replace(" ", '') # ====================处理异常字符====================== #\/:*?"<>| title = title.replace('\\', '') title = title.replace('/', '') title = title.replace(':', '') title = title.replace('*', '') title = title.replace('?', '') title = title.replace('"', '') title = title.replace('<', '') title = title.replace('>', '') title = title.replace('|', '') title = title.replace(' ', '') release = release.replace('/', '-') tmpArr = cover_small.split(',') if len(tmpArr) > 0: cover_small = tmpArr[0].strip('\"').strip('\'') # ====================处理异常字符 END================== #\/:*?"<>| naming_rule = config['Name_Rule']['naming_rule'] location_rule = config['Name_Rule']['location_rule'] # 返回处理后的json_data json_data['title'] = title json_data['number'] = number json_data['actor'] = actor json_data['release'] = release json_data['cover_small'] = cover_small json_data['tag'] = tag json_data['naming_rule'] = naming_rule json_data['location_rule'] = location_rule return json_data
def main(number): try: if re.search('\d+\D+', number).group( ) in number or 'siro' in number or 'SIRO' in number or 'Siro' in number: js = siro.main(number) return js except: aaaa = '' try: htmlcode = get_html('https://www.javbus.com/' + number) dww_htmlcode = get_html( "https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", '')) dic = { 'title': str(re.sub('\w+-\d+-', '', getTitle(htmlcode))), 'studio': getStudio(htmlcode), 'year': str(re.search('\d{4}', getYear(htmlcode)).group()), 'outline': getOutline(dww_htmlcode), 'runtime': getRuntime(htmlcode), 'director': getDirector(htmlcode), 'actor': getActor(htmlcode), 'release': getRelease(htmlcode), 'number': getNum(htmlcode), 'cover': getCover(htmlcode), 'imagecut': 1, 'tag': getTag(htmlcode), 'label': getSerise(htmlcode), 'actor_photo': getActorPhoto(htmlcode), 'website': 'https://www.javbus.com/' + number, } js = json.dumps( dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') if 'HEYZO' in number or 'heyzo' in number or 'Heyzo' in number: htmlcode = get_html('https://www.javbus.com/' + number) #dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", '')) dic = { 'title': str(re.sub('\w+-\d+-', '', getTitle(htmlcode))), 'studio': getStudio(htmlcode), 'year': getYear(htmlcode), 'outline': '', 'runtime': getRuntime(htmlcode), 'director': getDirector(htmlcode), 'actor': getActor(htmlcode), 'release': getRelease(htmlcode), 'number': getNum(htmlcode), 'cover': getCover(htmlcode), 'imagecut': 1, 'tag': getTag(htmlcode), 'label': getSerise(htmlcode), 'actor_photo': getActorPhoto(htmlcode), 'website': 'https://www.javbus.com/' + number, } js2 = json.dumps( dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') return js2 return js except: a = javdb.main(number) return a
def getDataFromJSON(file_number, config, mode): # 从JSON返回元数据 # ================================================网站规则添加开始================================================ json_data = {} if mode == 1: # 从全部网站刮削 # =======================================================================无码抓取:111111-111,n1111,HEYZO-1111 if re.match('^\d{4,}', file_number) or re.match( 'n\d{4}', file_number) or 'HEYZO' in file_number.upper(): json_data = json.loads(javbus.main_uncensored(file_number)) if getDataState(json_data) == 0: json_data = json.loads(javdb.main(file_number)) if getDataState(json_data) == 0: json_data = json.loads(avsox.main(file_number)) # =======================================================================259LUXU-1111 elif re.match('\d+\D+', file_number): json_data = json.loads(siro.main(file_number)) if getDataState(json_data) == 0: json_data = json.loads(javbus.main(file_number)) if getDataState(json_data) == 0: json_data = json.loads(javdb.main(file_number)) # =======================================================================FC2-111111 elif 'FC2' in file_number.upper(): json_data = json.loads( fc2fans_club.main( file_number.replace('fc2-', '').replace('fc2_', '').replace( 'FC2-', '').replace('fc2_', ''))) if getDataState(json_data) == 0: json_data = json.loads(javdb.main(file_number)) # =======================================================================SIRO-111 elif 'SIRO' in file_number.upper(): json_data = json.loads(siro.main(file_number)) if getDataState(json_data) == 0: json_data = json.loads(javdb.main(file_number)) # =======================================================================ssni00321 elif re.match('\D{2,}00\d{3,}', file_number ) and '-' not in file_number and '_' not in file_number: json_data = json.loads(fanza.main(file_number)) # =======================================================================MIDE-139 else: json_data = json.loads(javbus.main(file_number)) if getDataState(json_data) == 0: json_data = json.loads( javlibrary.main(file_number, config['javlibrary_url']['url'])) if getDataState(json_data) == 0: json_data = json.loads(javdb.main(file_number)) if getDataState(json_data) == 0: json_data = json.loads(avsox.main(file_number)) if getDataState(json_data) == 0: json_data = json.loads(mgstage.main(file_number)) elif mode != 6 and re.match('\D{2,}00\d{3,}', file_number): json_data = { 'title': '', 'actor': '', 'website': '', } elif mode == 2: # 仅从javdb json_data = json.loads(javdb.main(file_number)) elif mode == 3: # 仅从javbus if re.match('^\d{5,}', file_number) or re.match( 'n\d{4}', file_number) or 'HEYZO' in file_number.upper(): json_data = json.loads(javbus.main_uncensored(file_number)) else: json_data = json.loads(javbus.main(file_number)) elif mode == 4: # 仅从avsox json_data = json.loads(avsox.main(file_number)) elif mode == 5: # 仅从fc2club json_data = json.loads(fc2fans_club.main(file_number)) elif mode == 6: # 仅从fanza json_data = json.loads(fanza.main(file_number)) elif mode == 7: # 仅从siro json_data = json.loads(siro.main(file_number)) elif mode == 8: # 仅从javlibrary json_data = json.loads( javlibrary.main(file_number, config['javlibrary_url']['url'])) # ================================================网站规则添加结束================================================ # print(json_data) # ======================================超时或未找到 if json_data['website'] == 'timeout': return json_data elif json_data['title'] == '': return json_data # ======================================处理得到的信息 title = json_data['title'] number = json_data['number'] actor_list = str(json_data['actor']).strip("[ ]").replace("'", '').split( ',') # 字符串转列表 release = json_data['release'] try: cover_small = json_data['cover_small'] except: cover_small = '' tag = str(json_data['tag']).strip("[ ]").replace("'", '').replace( " ", '').split(',') # 字符串转列表 @ actor = str(actor_list).strip("[ ]").replace("'", '').replace(" ", '') if actor == '': actor = 'Unknown' # ====================处理异常字符====================== #\/:*?"<>| title = title.replace('\\', '') title = title.replace('/', '') title = title.replace(':', '') title = title.replace('*', '') title = title.replace('?', '') title = title.replace('"', '') title = title.replace('<', '') title = title.replace('>', '') title = title.replace('|', '') title = title.replace(' ', '') title = title.replace('【', '') title = title.replace('】', '') release = release.replace('/', '-') tmpArr = cover_small.split(',') if len(tmpArr) > 0: cover_small = tmpArr[0].strip('\"').strip('\'') for key, value in json_data.items(): if key == 'title' or key == 'studio' or key == 'director' or key == 'series' or key == 'publisher': json_data[key] = str(value).replace('/', '') # ====================处理异常字符 END================== #\/:*?"<>| naming_media = config['Name_Rule']['naming_media'] naming_file = config['Name_Rule']['naming_file'] folder_name = config['Name_Rule']['folder_name'] # 返回处理后的json_data json_data['title'] = title json_data['number'] = number json_data['actor'] = actor json_data['release'] = release json_data['cover_small'] = cover_small json_data['tag'] = tag json_data['naming_media'] = naming_media json_data['naming_file'] = naming_file json_data['folder_name'] = folder_name return json_data