def by_feed_enricher(self, content): base_url = "https://" + CrawlerConfig('Hostnames', self.configfile).get('by') content = BeautifulSoup(content, 'lxml') posts = content.findAll("a", href=re.compile("/category/"), text=re.compile("Download")) async_results = [] for post in posts: try: async_results.append(base_url + post['href']) except: pass async_results = get_urls_async(async_results, self.configfile, self.dbfile, self.scraper) results = async_results[0] entries = [] if results: for result in results: try: content = [] details = BeautifulSoup(result, 'lxml').findAll( "td", { "valign": "TOP", "align": "CENTER" })[1] title = details.find("small").text published = details.find("th", {"align": "RIGHT"}).text try: imdb = details.find("a", href=re.compile("imdb.com")) imdb_link = imdb["href"].replace("https://anonym.to/?", "") imdb_score = imdb.text.replace(" ", "").replace("/10", "") if "0.0" in imdb_score: imdb_score = "9.9" content.append('<a href="' + imdb_link + '"' + imdb_score + '</a>') except: pass links = details.find_all("iframe") for link in links: content.append('href="' + link["src"] + '"') content = "".join(content) entries.append( FakeFeedParserDict({ "title": title, "published": published, "content": [FakeFeedParserDict({"value": content + " mkv"})] })) except: pass feed = {"entries": entries} feed = FakeFeedParserDict(feed) return feed
def nk_feed_enricher(self, content): base_url = "https://" + CrawlerConfig('Hostnames', self.configfile).get('nk') content = BeautifulSoup(content, 'lxml') posts = content.findAll("a", {"class": "btn"}, href=re.compile("/release/")) async_results = [] for post in posts: try: async_results.append(base_url + post['href']) except: pass async_results = get_urls_async(async_results, self.configfile, self.dbfile, self.scraper)[0] entries = [] if async_results: for result in async_results: try: content = [] details = BeautifulSoup(result, 'lxml').find("div", {"class": "article"}) title = details.find("span", {"class": "subtitle"}).text published = details.find("p", {"class": "meta"}).text content.append("mkv ") try: imdb = details.find("a", href=re.compile("imdb.com"))["href"] content.append('<a href="' + imdb + '" 9,9</a>') except: pass links = details.find_all("a", href=re.compile("/go/")) for link in links: content.append('href="' + base_url + link["href"] + '">' + link.text + '<') content = "".join(content) entries.append( FakeFeedParserDict({ "title": title, "published": published, "content": [FakeFeedParserDict({"value": content})] })) except: pass feed = {"entries": entries} feed = FakeFeedParserDict(feed) return feed
def by_get_download_links(self, content, title): async_link_results = re.findall(r'href="([^"\'>]*)"', content) async_link_results = get_urls_async(async_link_results, self.configfile, self.dbfile, self.scraper) content = [] links = async_link_results[0] for link in links: link = BeautifulSoup(link, 'lxml').find("a", href=re.compile("/go\.php\?")) try: content.append('href="' + link["href"] + '">' + link.text.replace(" ", "") + '<') except: pass content = "".join(content) download_links = get_download_links(self, content, title) return download_links
def by_page_download_link(self, download_link, key): unused_get_feed_parameter(key) by = self.hostnames.get('by') download_link = get_url(download_link, self.configfile, self.dbfile) soup = BeautifulSoup(download_link, 'lxml') links = soup.find_all("iframe") async_link_results = [] for link in links: link = link["src"] if 'https://' + by in link: async_link_results.append(link) async_link_results = get_urls_async(async_link_results, self.configfile, self.dbfile) links = async_link_results[0] url_hosters = [] for link in links: if link: link = BeautifulSoup(link, 'lxml').find("a", href=re.compile("/go\.php\?")) if link: url_hosters.append([link["href"], link.text.replace(" ", "")]) return check_download_links(self, url_hosters)
def get(title, configfile, dbfile, bl_only=False, sj_only=False): hostnames = CrawlerConfig('Hostnames', configfile) by = hostnames.get('by') dw = hostnames.get('dw') fx = hostnames.get('fx') nk = hostnames.get('nk') sj = hostnames.get('sj') specific_season = re.match(r'^(.*),(s\d{1,3})$', title.lower()) specific_episode = re.match(r'^(.*),(s\d{1,3}e\d{1,3})$', title.lower()) if specific_season: split = title.split(",") title = split[0] special = split[1].upper() elif specific_episode: split = title.split(",") title = split[0] special = split[1].upper() else: special = None bl_final = {} sj_final = {} scraper = cloudscraper.create_scraper() if not sj_only: mb_query = sanitize(title).replace(" ", "+") if special: bl_query = mb_query + "+" + special else: bl_query = mb_query unrated = [] config = CrawlerConfig('ContentAll', configfile) quality = config.get('quality') ignore = config.get('ignore') if "480p" not in quality: search_quality = "+" + quality else: search_quality = "" if by: by_search = 'https://' + by + '/?q=' + bl_query + search_quality else: by_search = None if dw: dw_search = 'https://' + dw + '/?kategorie=Movies&search=' + bl_query + search_quality else: dw_search = None if fx: fx_search = 'https://' + fx + '/?s=' + bl_query else: fx_search = None async_results = get_urls_async([by_search, dw_search, fx_search], configfile, dbfile, scraper) scraper = async_results[1] async_results = async_results[0] by_results = [] dw_results = [] fx_results = [] for res in async_results: if check_is_site(res, configfile) == 'BY': by_results = by_search_results(res, by) elif check_is_site(res, configfile) == 'DW': dw_results = dw_search_results(res, dw) elif check_is_site(res, configfile) == 'FX': fx_results = fx_search_results(fx_content_to_soup(res), configfile, dbfile, scraper) if nk: nk_search = post_url( 'https://' + nk + "/search", configfile, dbfile, data={'search': bl_query.replace("+", " ") + " " + quality}) nk_results = nk_search_results(nk_search, 'https://' + nk + '/') else: nk_results = [] password = by for result in by_results: if "480p" in quality: if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[ 0].lower() or "2160p" in \ result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[ 0].lower() or "complete.uhd.bluray" in result[0].lower(): continue if "xxx" not in result[0].lower(): unrated.append([ rate(result[0], ignore), encode_base64(result[1] + "|" + password), result[0] + " (BY)" ]) password = dw for result in dw_results: if "480p" in quality: if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[ 0].lower() or "2160p" in \ result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[ 0].lower() or "complete.uhd.bluray" in result[0].lower(): continue unrated.append([ rate(result[0], ignore), encode_base64(result[1] + "|" + password), result[0] + " (DW)" ]) password = fx.split('.')[0] for result in fx_results: if "480p" in quality: if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[ 0].lower() or "2160p" in \ result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[ 0].lower() or "complete.uhd.bluray" in result[0].lower(): continue if "-low" not in result[0].lower(): unrated.append([ rate(result[0], ignore), encode_base64(result[1] + "|" + password), result[0] + " (FX)" ]) password = nk.split('.')[0].capitalize() for result in nk_results: if "480p" in quality: if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[ 0].lower() or "2160p" in \ result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[ 0].lower() or "complete.uhd.bluray" in result[0].lower(): continue unrated.append([ rate(result[0], ignore), encode_base64(result[1] + "|" + password), result[0] + " (NK)" ]) rated = sorted(unrated, reverse=True) results = {} i = 0 for result in rated: res = {"payload": result[1], "title": result[2]} results["result" + str(i + 1000)] = res i += 1 bl_final = results if not bl_only: if sj: sj_query = sanitize(title).replace(" ", "+") sj_search = get_url( 'https://' + sj + '/serie/search?q=' + sj_query, configfile, dbfile, scraper) try: sj_results = BeautifulSoup(sj_search, 'lxml').findAll( "a", href=re.compile("/serie")) except: sj_results = [] else: sj_results = [] if special: append = " (" + special + ")" else: append = "" i = 0 results = {} for result in sj_results: r_title = result.text r_rating = fuzz.ratio(title.lower(), r_title) if r_rating > 40: res = { "payload": encode_base64(result['href'] + "|" + r_title + "|" + str(special)), "title": r_title + append } results["result" + str(i + 1000)] = res i += 1 sj_final = results return bl_final, sj_final
def download(payload, device, configfile, dbfile): config = CrawlerConfig('ContentAll', configfile) db = FeedDb(dbfile, 'FeedCrawler') hostnames = CrawlerConfig('Hostnames', configfile) by = hostnames.get('by') nk = hostnames.get('nk') payload = decode_base64(payload).split("|") link = payload[0] password = payload[1] site = check_is_site(link, configfile) if not site: return False elif "DW" in site: download_method = add_decrypt_instead_of_download download_links = [link] key = payload[1] password = payload[2] else: url = get_url(link, configfile, dbfile) if not url or "NinjaFirewall 429" in url: return False download_method = myjd_download soup = BeautifulSoup(url, 'lxml') if "BY" in site: key = soup.find("small").text links = soup.find_all("iframe") async_link_results = [] for link in links: link = link["src"] if 'https://' + by in link: async_link_results.append(link) async_link_results = get_urls_async(async_link_results, configfile, dbfile) links = async_link_results[0] url_hosters = [] for link in links: if link: link = BeautifulSoup(link, 'lxml').find( "a", href=re.compile("/go\.php\?")) if link: url_hosters.append( [link["href"], link.text.replace(" ", "")]) elif "NK" in site: key = soup.find("span", {"class": "subtitle"}).text url_hosters = [] hosters = soup.find_all("a", href=re.compile("/go/")) for hoster in hosters: url_hosters.append( ['https://' + nk + hoster["href"], hoster.text]) elif "FX" in site: key = payload[1] password = payload[2] else: return False links = {} if "FX" in site: class FX: configfile = "" FX.configfile = configfile download_links = fx_get_download_links(FX, url, key) else: for url_hoster in reversed(url_hosters): try: link_hoster = url_hoster[1].lower().replace( 'target="_blank">', '').replace(" ", "-").replace("ddownload", "ddl") if check_hoster(link_hoster, configfile): link = url_hoster[0] if by in link: demasked_link = get_redirected_url( link, configfile, dbfile, False) if demasked_link: link = demasked_link links[link_hoster] = link except: pass if config.get("hoster_fallback") and not links: for url_hoster in reversed(url_hosters): link_hoster = url_hoster[1].lower().replace( 'target="_blank">', '').replace(" ", "-").replace("ddownload", "ddl") link = url_hoster[0] if by in link: demasked_link = get_redirected_url( link, configfile, dbfile, False) if demasked_link: link = demasked_link links[link_hoster] = link download_links = list(links.values()) englisch = False if "*englisch" in key.lower() or "*english" in key.lower(): key = key.replace('*ENGLISCH', '').replace("*Englisch", "").replace( "*ENGLISH", "").replace("*English", "").replace("*", "") englisch = True staffel = re.search(r"s\d{1,2}(-s\d{1,2}|-\d{1,2}|\.)", key.lower()) if download_links: if staffel: if download_method(configfile, dbfile, device, key, "FeedCrawler", download_links, password): db.store( key.replace(".COMPLETE", "").replace(".Complete", ""), 'notdl' if config.get('enforcedl') and '.dl.' not in key.lower() else 'added') log_entry = '[Suche/Staffel] - ' + key.replace( ".COMPLETE", "").replace(".Complete", "") + ' - [' + site + ']' logger.info(log_entry) notify([log_entry], configfile) return True else: retail = False if config.get('cutoff') and '.COMPLETE.' not in key.lower(): if is_retail(key, dbfile): retail = True if download_method(configfile, dbfile, device, key, "FeedCrawler", download_links, password): db.store( key, 'notdl' if config.get('enforcedl') and '.dl.' not in key.lower() else 'added') log_entry = '[Suche/Film' + ( '/Englisch' if englisch and not retail else '') + ('/Englisch/Retail' if englisch and retail else '') + ('/Retail' if not englisch and retail else '') + '] - ' + key + ' - [' + site + ']' logger.info(log_entry) notify([log_entry], configfile) return [key] else: return False
def get_search_results(self, bl_query): hostnames = CrawlerConfig('Hostnames', self.configfile) by = hostnames.get('by') dw = hostnames.get('dw') fx = hostnames.get('fx') nk = hostnames.get('nk') search_results = [] config = CrawlerConfig('ContentAll', self.configfile) quality = config.get('quality') if "480p" not in quality: search_quality = "+" + quality else: search_quality = "" if by: by_search = 'https://' + by + '/?q=' + bl_query + search_quality else: by_search = None if dw: dw_search = 'https://' + dw + '/?kategorie=Movies&search=' + bl_query + search_quality else: dw_search = None if fx: fx_search = 'https://' + fx + '/?s=' + bl_query else: fx_search = None async_results = get_urls_async([by_search, dw_search, fx_search], self.configfile, self.dbfile, self.scraper) scraper = async_results[1] async_results = async_results[0] by_results = [] dw_results = [] fx_results = [] for res in async_results: if check_is_site(res, self.configfile) == 'BY': by_results = by_search_results(res, by) elif check_is_site(res, self.configfile) == 'DW': dw_results = dw_search_results(res, dw) elif check_is_site(res, self.configfile) == 'FX': fx_results = fx_search_results(fx_content_to_soup(res), self.configfile, self.dbfile, scraper) if nk: nk_search = post_url( 'https://' + nk + "/search", self.configfile, self.dbfile, data={'search': bl_query.replace("+", " ") + " " + quality}) nk_results = nk_search_results(nk_search, 'https://' + nk + '/') else: nk_results = [] password = by for result in by_results: if "480p" in quality: if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[ 0].lower() or "2160p" in \ result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[ 0].lower() or "complete.uhd.bluray" in result[0].lower(): continue if "xxx" not in result[0].lower(): search_results.append([result[0], result[1] + "|" + password]) password = dw for result in dw_results: if "480p" in quality: if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[ 0].lower() or "2160p" in \ result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[ 0].lower() or "complete.uhd.bluray" in result[0].lower(): continue search_results.append([result[0], result[1] + "|" + password]) for result in fx_results: if "480p" in quality: if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[ 0].lower() or "2160p" in \ result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[ 0].lower() or "complete.uhd.bluray" in result[0].lower(): continue if "-low" not in result[0].lower(): search_results.append([result[0], result[1]]) password = nk.split('.')[0].capitalize() for result in nk_results: if "480p" in quality: if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[ 0].lower() or "2160p" in \ result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[ 0].lower() or "complete.uhd.bluray" in result[0].lower(): continue search_results.append([result[0], result[1] + "|" + password]) return search_results
def dw_feed_enricher(self, content): base_url = "https://" + CrawlerConfig('Hostnames', self.configfile).get('dw') content = BeautifulSoup(content, 'lxml') posts = content.findAll("a", href=re.compile("download/")) href_by_id = {} async_results = [] for post in posts: try: post_id = post['href'].replace("download/", "").split("/")[0] post_link = base_url + "/" + post['href'] post_hosters = post.parent.findAll( "img", src=re.compile(r"images/icon_hoster")) hosters = [] for hoster in post_hosters: hosters.append(hoster["title"].replace("Download bei ", "")) hosters = "|".join(hosters) href_by_id[post_id] = {"hosters": hosters, "link": post_link} async_results.append(post_link) except: pass async_results = get_urls_async(async_results, self.configfile, self.dbfile, self.scraper) results = async_results[0] entries = [] if results: for result in results: try: content = [] details = BeautifulSoup(result, 'lxml') title = details.title.text.split(' //')[0].replace( "*mirror*", "").strip() post_id = details.find( "a", {"data-warezkorb": re.compile(r"\d*")})["data-warezkorb"] details = details.findAll("div", {"class": "row"})[3] published = details.findAll("td")[1].text.replace("Datum", "") try: imdb = details.findAll("td")[6].find("a") imdb_link = imdb["href"] imdb_score = imdb.find("b").text.replace(" ", "").replace( "/10", "") if "0.0" in imdb_score: imdb_score = "9.9" content.append('<a href="' + imdb_link + '"' + imdb_score + '</a>') except: pass content.append('DOWNLOADLINK="' + href_by_id[post_id]["link"] + '"') content.append('HOSTERS="' + href_by_id[post_id]["hosters"] + '"') content = "".join(content) entries.append( FakeFeedParserDict({ "title": title, "published": published, "content": [FakeFeedParserDict({"value": content + " mkv"})] })) except: pass feed = {"entries": entries} feed = FakeFeedParserDict(feed) return feed