def get(title, configfile, dbfile, bl_only=False, sj_only=False): hostnames = CrawlerConfig('Hostnames', configfile) by = hostnames.get('by') dw = hostnames.get('dw') fx = hostnames.get('fx') nk = hostnames.get('nk') sj = hostnames.get('sj') specific_season = re.match(r'^(.*),(s\d{1,3})$', title.lower()) specific_episode = re.match(r'^(.*),(s\d{1,3}e\d{1,3})$', title.lower()) if specific_season: split = title.split(",") title = split[0] special = split[1].upper() elif specific_episode: split = title.split(",") title = split[0] special = split[1].upper() else: special = None bl_final = {} sj_final = {} scraper = cloudscraper.create_scraper() if not sj_only: mb_query = sanitize(title).replace(" ", "+") if special: bl_query = mb_query + "+" + special else: bl_query = mb_query unrated = [] config = CrawlerConfig('ContentAll', configfile) quality = config.get('quality') ignore = config.get('ignore') if "480p" not in quality: search_quality = "+" + quality else: search_quality = "" if by: by_search = 'https://' + by + '/?q=' + bl_query + search_quality else: by_search = None if dw: dw_search = 'https://' + dw + '/?kategorie=Movies&search=' + bl_query + search_quality else: dw_search = None if fx: fx_search = 'https://' + fx + '/?s=' + bl_query else: fx_search = None async_results = get_urls_async([by_search, dw_search, fx_search], configfile, dbfile, scraper) scraper = async_results[1] async_results = async_results[0] by_results = [] dw_results = [] fx_results = [] for res in async_results: if check_is_site(res, configfile) == 'BY': by_results = by_search_results(res, by) elif check_is_site(res, configfile) == 'DW': dw_results = dw_search_results(res, dw) elif check_is_site(res, configfile) == 'FX': fx_results = fx_search_results(fx_content_to_soup(res), configfile, dbfile, scraper) if nk: nk_search = post_url( 'https://' + nk + "/search", configfile, dbfile, data={'search': bl_query.replace("+", " ") + " " + quality}) nk_results = nk_search_results(nk_search, 'https://' + nk + '/') else: nk_results = [] password = by for result in by_results: if "480p" in quality: if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[ 0].lower() or "2160p" in \ result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[ 0].lower() or "complete.uhd.bluray" in result[0].lower(): continue if "xxx" not in result[0].lower(): unrated.append([ rate(result[0], ignore), encode_base64(result[1] + "|" + password), result[0] + " (BY)" ]) password = dw for result in dw_results: if "480p" in quality: if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[ 0].lower() or "2160p" in \ result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[ 0].lower() or "complete.uhd.bluray" in result[0].lower(): continue unrated.append([ rate(result[0], ignore), encode_base64(result[1] + "|" + password), result[0] + " (DW)" ]) password = fx.split('.')[0] for result in fx_results: if "480p" in quality: if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[ 0].lower() or "2160p" in \ result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[ 0].lower() or "complete.uhd.bluray" in result[0].lower(): continue if "-low" not in result[0].lower(): unrated.append([ rate(result[0], ignore), encode_base64(result[1] + "|" + password), result[0] + " (FX)" ]) password = nk.split('.')[0].capitalize() for result in nk_results: if "480p" in quality: if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[ 0].lower() or "2160p" in \ result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[ 0].lower() or "complete.uhd.bluray" in result[0].lower(): continue unrated.append([ rate(result[0], ignore), encode_base64(result[1] + "|" + password), result[0] + " (NK)" ]) rated = sorted(unrated, reverse=True) results = {} i = 0 for result in rated: res = {"payload": result[1], "title": result[2]} results["result" + str(i + 1000)] = res i += 1 bl_final = results if not bl_only: if sj: sj_query = sanitize(title).replace(" ", "+") sj_search = get_url( 'https://' + sj + '/serie/search?q=' + sj_query, configfile, dbfile, scraper) try: sj_results = BeautifulSoup(sj_search, 'lxml').findAll( "a", href=re.compile("/serie")) except: sj_results = [] else: sj_results = [] if special: append = " (" + special + ")" else: append = "" i = 0 results = {} for result in sj_results: r_title = result.text r_rating = fuzz.ratio(title.lower(), r_title) if r_rating > 40: res = { "payload": encode_base64(result['href'] + "|" + r_title + "|" + str(special)), "title": r_title + append } results["result" + str(i + 1000)] = res i += 1 sj_final = results return bl_final, sj_final
def download(payload, device, configfile, dbfile): config = CrawlerConfig('ContentAll', configfile) db = FeedDb(dbfile, 'FeedCrawler') hostnames = CrawlerConfig('Hostnames', configfile) by = hostnames.get('by') nk = hostnames.get('nk') payload = decode_base64(payload).split("|") link = payload[0] password = payload[1] site = check_is_site(link, configfile) if not site: return False elif "DW" in site: download_method = add_decrypt_instead_of_download download_links = [link] key = payload[1] password = payload[2] else: url = get_url(link, configfile, dbfile) if not url or "NinjaFirewall 429" in url: return False download_method = myjd_download soup = BeautifulSoup(url, 'lxml') if "BY" in site: key = soup.find("small").text links = soup.find_all("iframe") async_link_results = [] for link in links: link = link["src"] if 'https://' + by in link: async_link_results.append(link) async_link_results = get_urls_async(async_link_results, configfile, dbfile) links = async_link_results[0] url_hosters = [] for link in links: if link: link = BeautifulSoup(link, 'lxml').find( "a", href=re.compile("/go\.php\?")) if link: url_hosters.append( [link["href"], link.text.replace(" ", "")]) elif "NK" in site: key = soup.find("span", {"class": "subtitle"}).text url_hosters = [] hosters = soup.find_all("a", href=re.compile("/go/")) for hoster in hosters: url_hosters.append( ['https://' + nk + hoster["href"], hoster.text]) elif "FX" in site: key = payload[1] password = payload[2] else: return False links = {} if "FX" in site: class FX: configfile = "" FX.configfile = configfile download_links = fx_get_download_links(FX, url, key) else: for url_hoster in reversed(url_hosters): try: link_hoster = url_hoster[1].lower().replace( 'target="_blank">', '').replace(" ", "-").replace("ddownload", "ddl") if check_hoster(link_hoster, configfile): link = url_hoster[0] if by in link: demasked_link = get_redirected_url( link, configfile, dbfile, False) if demasked_link: link = demasked_link links[link_hoster] = link except: pass if config.get("hoster_fallback") and not links: for url_hoster in reversed(url_hosters): link_hoster = url_hoster[1].lower().replace( 'target="_blank">', '').replace(" ", "-").replace("ddownload", "ddl") link = url_hoster[0] if by in link: demasked_link = get_redirected_url( link, configfile, dbfile, False) if demasked_link: link = demasked_link links[link_hoster] = link download_links = list(links.values()) englisch = False if "*englisch" in key.lower() or "*english" in key.lower(): key = key.replace('*ENGLISCH', '').replace("*Englisch", "").replace( "*ENGLISH", "").replace("*English", "").replace("*", "") englisch = True staffel = re.search(r"s\d{1,2}(-s\d{1,2}|-\d{1,2}|\.)", key.lower()) if download_links: if staffel: if download_method(configfile, dbfile, device, key, "FeedCrawler", download_links, password): db.store( key.replace(".COMPLETE", "").replace(".Complete", ""), 'notdl' if config.get('enforcedl') and '.dl.' not in key.lower() else 'added') log_entry = '[Suche/Staffel] - ' + key.replace( ".COMPLETE", "").replace(".Complete", "") + ' - [' + site + ']' logger.info(log_entry) notify([log_entry], configfile) return True else: retail = False if config.get('cutoff') and '.COMPLETE.' not in key.lower(): if is_retail(key, dbfile): retail = True if download_method(configfile, dbfile, device, key, "FeedCrawler", download_links, password): db.store( key, 'notdl' if config.get('enforcedl') and '.dl.' not in key.lower() else 'added') log_entry = '[Suche/Film' + ( '/Englisch' if englisch and not retail else '') + ('/Englisch/Retail' if englisch and retail else '') + ('/Retail' if not englisch and retail else '') + '] - ' + key + ' - [' + site + ']' logger.info(log_entry) notify([log_entry], configfile) return [key] else: return False
def get_redirected_url(url, configfile, dbfile, scraper=False): config = CrawlerConfig('FeedCrawler', configfile) proxy = config.get('proxy') if not scraper: scraper = cloudscraper.create_scraper() db = FeedDb(dbfile, 'proxystatus') db_normal = FeedDb(dbfile, 'normalstatus') site = check_is_site(url, configfile) if proxy: try: if site and "SJ" in site: if db.retrieve("SJ"): if config.get("fallback") and not db_normal.retrieve("SJ"): return scraper.get( url, allow_redirects=False, timeout=30).headers._store["location"][1] else: return url elif site and "DJ" in site: if db.retrieve("DJ"): if config.get("fallback") and not db_normal.retrieve("DJ"): return scraper.get( url, allow_redirects=False, timeout=30).headers._store["location"][1] else: return url elif site and "SF" in site: if db.retrieve("SF"): if config.get("fallback") and not db_normal.retrieve("SF"): return scraper.get( url, allow_redirects=False, timeout=30).headers._store["location"][1] else: return url elif site and "BY" in site: if db.retrieve("BY"): if config.get("fallback") and not db_normal.retrieve("BY"): return scraper.get( url, allow_redirects=False, timeout=30).headers._store["location"][1] else: return url elif site and "DW" in site: if db.retrieve("DW"): if config.get("fallback") and not db_normal.retrieve("DW"): return scraper.get( url, allow_redirects=False, timeout=30).headers._store["location"][1] else: return url elif site and "FX" in site: if db.retrieve("FX"): if config.get("fallback") and not db_normal.retrieve("FX"): return scraper.get( url, allow_redirects=False, timeout=30).headers._store["location"][1] else: return url elif site and "NK" in site: if db.retrieve("NK"): if config.get("fallback") and not db_normal.retrieve("NK"): return scraper.get( url, allow_redirects=False, timeout=30).headers._store["location"][1] else: return url elif site and "WW" in site: return url elif site and "DD" in site: if db.retrieve("DD"): if config.get("fallback") and not db_normal.retrieve("DD"): return scraper.get( url, allow_redirects=False, timeout=30).headers._store["location"][1] else: return url proxies = {'http': proxy, 'https': proxy} response = scraper.get(url, allow_redirects=False, proxies=proxies, timeout=30).headers._store["location"][1] return response except Exception as e: print(u"Fehler beim Abruf von: " + url + " " + str(e)) return url else: try: if site and "SJ" in site and db_normal.retrieve("SJ"): return url elif site and "DJ" in site and db_normal.retrieve("DJ"): return url elif site and "SF" in site and db_normal.retrieve("SF"): return url elif site and "BY" in site and db_normal.retrieve("BY"): return url elif site and "DW" in site and db_normal.retrieve("DW"): return url elif site and "FX" in site and db_normal.retrieve("FX"): return url elif site and "NK" in site and db_normal.retrieve("NK"): return url elif site and "WW" in site: return url elif site and "DD" in site and db_normal.retrieve("DD"): return url response = scraper.get(url, allow_redirects=False, timeout=30).headers._store["location"][1] return response except Exception as e: print(u"Fehler beim Abruf von: " + url + " " + str(e)) return url
def post_url_headers(url, configfile, dbfile, headers, data, scraper=False): config = CrawlerConfig('FeedCrawler', configfile) proxy = config.get('proxy') if not scraper: scraper = cloudscraper.create_scraper() db = FeedDb(dbfile, 'proxystatus') db_normal = FeedDb(dbfile, 'normalstatus') site = check_is_site(url, configfile) if proxy: try: if site and "SJ" in site: if db.retrieve("SJ"): if config.get("fallback") and not db_normal.retrieve("SJ"): return [ scraper.post(url, data, headers=headers, timeout=30), scraper ] else: return ["", scraper] elif site and "DJ" in site: if db.retrieve("DJ"): if config.get("fallback") and not db_normal.retrieve("DJ"): return [ scraper.post(url, data, headers=headers, timeout=30), scraper ] else: return ["", scraper] elif site and "SF" in site: if db.retrieve("SF"): if config.get("fallback") and not db_normal.retrieve("SF"): return [ scraper.post(url, data, headers=headers, timeout=30), scraper ] else: return ["", scraper] elif site and "BY" in site: if db.retrieve("BY"): if config.get("fallback") and not db_normal.retrieve("BY"): return [ scraper.post(url, data, headers=headers, timeout=30), scraper ] else: return ["", scraper] elif site and "DW" in site: if db.retrieve("DW"): if config.get("fallback") and not db_normal.retrieve("DW"): return [ scraper.post(url, data, headers=headers, timeout=30), scraper ] else: return ["", scraper] elif site and "FX" in site: if db.retrieve("FX"): if config.get("fallback") and not db_normal.retrieve("FX"): return [ scraper.post(url, data, headers=headers, timeout=30), scraper ] else: return ["", scraper] elif site and "NK" in site: if db.retrieve("NK"): if config.get("fallback") and not db_normal.retrieve("NK"): return [ scraper.post(url, data, headers=headers, timeout=30), scraper ] else: return ["", scraper] elif site and "WW" in site: if db.retrieve("WW"): if config.get("fallback") and not db_normal.retrieve("WW"): return [ scraper.post(url, data, headers=headers, timeout=30), scraper ] else: return ["", scraper] elif site and "DD" in site: if db.retrieve("DD"): if config.get("fallback") and not db_normal.retrieve("DD"): return [ scraper.post(url, data, headers=headers, timeout=30), scraper ] else: return ["", scraper] proxies = {'http': proxy, 'https': proxy} response = scraper.post(url, data, headers=headers, proxies=proxies, timeout=30) return [response, scraper] except Exception as e: print(u"Fehler beim Abruf von: " + url + " " + str(e)) return ["", scraper] else: try: if site and "SJ" in site and db_normal.retrieve("SJ"): return ["", scraper] elif site and "DJ" in site and db_normal.retrieve("DJ"): return ["", scraper] elif site and "SF" in site and db_normal.retrieve("SF"): return ["", scraper] elif site and "BY" in site and db_normal.retrieve("BY"): return ["", scraper] elif site and "DW" in site and db_normal.retrieve("DW"): return ["", scraper] elif site and "FX" in site and db_normal.retrieve("FX"): return ["", scraper] elif site and "NK" in site and db_normal.retrieve("NK"): return ["", scraper] elif site and "WW" in site and db_normal.retrieve("WW"): return ["", scraper] elif site and "DD" in site and db_normal.retrieve("DD"): return ["", scraper] response = scraper.post(url, data, headers=headers, timeout=30) return [response, scraper] except Exception as e: print(u"Fehler beim Abruf von: " + url + " " + str(e)) return ["", scraper]
def get_search_results(self, bl_query): hostnames = CrawlerConfig('Hostnames', self.configfile) by = hostnames.get('by') dw = hostnames.get('dw') fx = hostnames.get('fx') nk = hostnames.get('nk') search_results = [] config = CrawlerConfig('ContentAll', self.configfile) quality = config.get('quality') if "480p" not in quality: search_quality = "+" + quality else: search_quality = "" if by: by_search = 'https://' + by + '/?q=' + bl_query + search_quality else: by_search = None if dw: dw_search = 'https://' + dw + '/?kategorie=Movies&search=' + bl_query + search_quality else: dw_search = None if fx: fx_search = 'https://' + fx + '/?s=' + bl_query else: fx_search = None async_results = get_urls_async([by_search, dw_search, fx_search], self.configfile, self.dbfile, self.scraper) scraper = async_results[1] async_results = async_results[0] by_results = [] dw_results = [] fx_results = [] for res in async_results: if check_is_site(res, self.configfile) == 'BY': by_results = by_search_results(res, by) elif check_is_site(res, self.configfile) == 'DW': dw_results = dw_search_results(res, dw) elif check_is_site(res, self.configfile) == 'FX': fx_results = fx_search_results(fx_content_to_soup(res), self.configfile, self.dbfile, scraper) if nk: nk_search = post_url( 'https://' + nk + "/search", self.configfile, self.dbfile, data={'search': bl_query.replace("+", " ") + " " + quality}) nk_results = nk_search_results(nk_search, 'https://' + nk + '/') else: nk_results = [] password = by for result in by_results: if "480p" in quality: if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[ 0].lower() or "2160p" in \ result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[ 0].lower() or "complete.uhd.bluray" in result[0].lower(): continue if "xxx" not in result[0].lower(): search_results.append([result[0], result[1] + "|" + password]) password = dw for result in dw_results: if "480p" in quality: if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[ 0].lower() or "2160p" in \ result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[ 0].lower() or "complete.uhd.bluray" in result[0].lower(): continue search_results.append([result[0], result[1] + "|" + password]) for result in fx_results: if "480p" in quality: if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[ 0].lower() or "2160p" in \ result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[ 0].lower() or "complete.uhd.bluray" in result[0].lower(): continue if "-low" not in result[0].lower(): search_results.append([result[0], result[1]]) password = nk.split('.')[0].capitalize() for result in nk_results: if "480p" in quality: if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[ 0].lower() or "2160p" in \ result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[ 0].lower() or "complete.uhd.bluray" in result[0].lower(): continue search_results.append([result[0], result[1] + "|" + password]) return search_results