def get_device(configfile): conf = RssConfig('RSScrawler', configfile) myjd_user = str(conf.get('myjd_user')) myjd_pass = str(conf.get('myjd_pass')) myjd_device = str(conf.get('myjd_device')) jd = rsscrawler.myjdapi.Myjdapi() jd.set_app_key('RSScrawler') if myjd_user and myjd_pass and myjd_device: try: jd.connect(myjd_user, myjd_pass) jd.update_devices() device = jd.get_device(myjd_device) except rsscrawler.myjdapi.MYJDException as e: print(u"Fehler bei der Verbindung mit MyJDownloader: " + str(e)) return False if not device or not is_device(device): return False return device elif myjd_user and myjd_pass: myjd_device = get_if_one_device(myjd_user, myjd_pass) try: jd.connect(myjd_user, myjd_pass) jd.update_devices() device = jd.get_device(myjd_device) except rsscrawler.myjdapi.MYJDException as e: print(u"Fehler bei der Verbindung mit MyJDownloader: " + str(e)) return False if not device or not is_device(device): return False return device else: return False
def check_is_site(string, configfile): hostnames = RssConfig('Hostnames', configfile) sj = hostnames.get('sj') dj = hostnames.get('dj') sf = hostnames.get('sf') mb = hostnames.get('mb') hw = hostnames.get('hw') hs = hostnames.get('hs') fx = hostnames.get('fx') nk = hostnames.get('nk') dd = hostnames.get('dd') fc = hostnames.get('fc') if sj and sj.split('.')[0] in string: return "SJ" elif dj and dj.split('.')[0] in string: return "DJ" elif sf and sf.split('.')[0] in string: return "SF" elif mb and mb.split('.')[0] in string: return "MB" elif hw and hw.split('.')[0] in string: return "HW" elif fx and fx.split('.')[0] in string: return "FX" elif hs and hs.split('.')[0] in string: return "HS" elif nk and nk.split('.')[0] in string: return "NK" elif dd and dd.split('.')[0] in string: return "DD" elif fc and fc.split('.')[0] in string: return "FC" else: return False
def cryptor_url_first(configfile, failed_package): hostnames = RssConfig('Hostnames', configfile) fc = hostnames.get('fc').replace('www.', '').split('.')[0] resorted_failed_package = [] for p in failed_package: pk = { 'name': p['name'], 'path': p['path'], 'urls': p['urls'], 'linkids': p['linkids'], 'uuid': p['uuid'] } cryptor_found = False links = split_urls(pk['urls']) for u in links: if not cryptor_found: if fc in u: pk['url'] = u cryptor_found = True if not cryptor_found: pk['url'] = p['url'] resorted_failed_package.append(pk) return resorted_failed_package
def notify(items, configfile): notifications = RssConfig('Notifications', configfile) homeassistant_settings = notifications.get("homeassistant").split(',') pushbullet_token = notifications.get("pushbullet") telegram_settings = notifications.get("telegram").split(',') pushover_settings = notifications.get("pushover").split(',') if len(items) > 0: cut_items = list(api_request_cutter(items, 5)) if len(notifications.get("homeassistant")) > 0: for cut_item in cut_items: homassistant_url = homeassistant_settings[0] homeassistant_password = homeassistant_settings[1] home_assistant(cut_item, homassistant_url, homeassistant_password) if len(notifications.get("pushbullet")) > 0: pushbullet(items, pushbullet_token) if len(notifications.get("telegram")) > 0: for cut_item in cut_items: telegram_token = telegram_settings[0] telegram_chatid = telegram_settings[1] telegram(cut_item, telegram_token, telegram_chatid) if len(notifications.get('pushover')) > 0: for cut_item in cut_items: pushover_user = pushover_settings[0] pushover_token = pushover_settings[1] pushover(cut_item, pushover_user, pushover_token)
def post_url(url, configfile, dbfile, data, scraper=False): config = RssConfig('RSScrawler', configfile) proxy = config.get('proxy') if not scraper: scraper = cloudscraper.create_scraper() db = RssDb(dbfile, 'proxystatus') db_normal = RssDb(dbfile, 'normalstatus') site = check_is_site(url, configfile) # Temporary fix for FX if site and "FX" in site: scraper = requests.session() scraper.headers = scraper.headers scraper.cookies = scraper.cookies scraper.verify = False if proxy: try: if site and "SJ" in site: if db.retrieve("SJ"): if config.get("fallback") and not db_normal.retrieve("SJ"): return scraper.post(url, data, timeout=30).content else: return "" elif site and "DJ" in site: if db.retrieve("DJ"): if config.get("fallback") and not db_normal.retrieve("DJ"): return scraper.post(url, data, timeout=30).content else: return "" elif site and "SF" in site: if db.retrieve("SF"): if config.get("fallback") and not db_normal.retrieve("SF"): return scraper.post(url, data, timeout=30).content else: return "" elif site and "MB" in site: if db.retrieve("MB"): if config.get("fallback") and not db_normal.retrieve("MB"): return scraper.post(url, data, timeout=30).content else: return "" elif site and "HW" in site: if db.retrieve("HW"): if config.get("fallback") and not db_normal.retrieve("HW"): return scraper.post(url, data, timeout=30).content else: return "" elif site and "FX" in site: if db.retrieve("FX"): if config.get("fallback") and not db_normal.retrieve("FX"): return scraper.post(url, data, timeout=30).content else: return "" elif site and "HS" in site: if db.retrieve("HS"): if config.get("fallback") and not db_normal.retrieve("HS"): return scraper.post(url, data, timeout=30).content else: return "" elif site and "NK" in site: if db.retrieve("NK"): if config.get("fallback") and not db_normal.retrieve("NK"): return scraper.post(url, data, timeout=30).content else: return "" elif site and "DD" in site: if db.retrieve("DD"): if config.get("fallback") and not db_normal.retrieve("DD"): return scraper.post(url, data, timeout=30).content else: return "" elif site and "FC" in site: if db.retrieve("FC"): if config.get("fallback") and not db_normal.retrieve("FC"): return scraper.post(url, data, timeout=30).content else: return "" proxies = {'http': proxy, 'https': proxy} response = scraper.post(url, data, proxies=proxies, timeout=30).content return response except Exception as e: print(u"Fehler beim Abruf von: " + url + " " + str(e)) return "" else: try: if site and "SJ" in site and db_normal.retrieve("SJ"): return "" elif site and "DJ" in site and db_normal.retrieve("DJ"): return "" elif site and "SF" in site and db_normal.retrieve("SF"): return "" elif site and "MB" in site and db_normal.retrieve("MB"): return "" elif site and "HW" in site and db_normal.retrieve("HW"): return "" elif site and "FX" in site and db_normal.retrieve("FX"): return "" elif site and "HS" in site and db_normal.retrieve("HS"): return "" elif site and "NK" in site and db_normal.retrieve("NK"): return "" elif site and "DD" in site and db_normal.retrieve("DD"): return "" elif site and "FC" in site and db_normal.retrieve("FC"): return "" response = scraper.post(url, data, timeout=30).content return response except Exception as e: print(u"Fehler beim Abruf von: " + url + " " + str(e)) return ""
def check_url(configfile, dbfile, scraper=False): hostnames = RssConfig('Hostnames', configfile) sj = hostnames.get('sj') dj = hostnames.get('dj') sf = hostnames.get('sf') mb = hostnames.get('mb') hw = hostnames.get('hw') hs = hostnames.get('hs') fx = hostnames.get('fx') nk = hostnames.get('nk') dd = hostnames.get('dd') fc = hostnames.get('fc') if not scraper: scraper = cloudscraper.create_scraper() sj_url = 'https://' + sj dj_url = 'https://' + sj sf_url = 'https://' + sf mb_url = 'https://' + mb hw_url = 'https://' + hw fx_url = 'https://' + fx hs_url = 'https://' + hs + '/collection/neuerscheinungen/' nk_url = 'https://' + nk dd_url = 'https://' + dd fc_url = 'https://' + fc sj_blocked_proxy = False dj_blocked_proxy = False sf_blocked_proxy = False mb_blocked_proxy = False hw_blocked_proxy = False fx_blocked_proxy = False hs_blocked_proxy = False nk_blocked_proxy = False dd_blocked_proxy = False fc_blocked_proxy = False sj_blocked = False dj_blocked = False sf_blocked = False mb_blocked = False hw_blocked = False fx_blocked = False hs_blocked = False nk_blocked = False dd_blocked = False fc_blocked = False db = RssDb(dbfile, 'proxystatus') db.delete("SJ") db.delete("DJ") db.delete("SF") db.delete("MB") db.delete("HW") db.delete("FX") db.delete("HS") db.delete("NK") db.delete("DD") db.delete("FC") db_normal = RssDb(dbfile, 'normalstatus') db_normal.delete("SJ") db_normal.delete("DJ") db_normal.delete("SF") db_normal.delete("MB") db_normal.delete("HW") db_normal.delete("FX") db_normal.delete("HS") db_normal.delete("NK") db_normal.delete("DD") db_normal.delete("FC") proxy = RssConfig('RSScrawler', configfile).get('proxy') fallback = RssConfig('RSScrawler', configfile).get('fallback') if proxy: proxies = {'http': proxy, 'https': proxy} if not sj: db.store("SJ", "Blocked") else: try: if "block." in str( scraper.get( sj_url, proxies=proxies, timeout=30, allow_redirects=False).headers.get("location")): sj_blocked_proxy = True else: db.delete("SJ") except: sj_blocked_proxy = True if sj_blocked_proxy: print( u"Der Zugriff auf SJ ist mit der aktuellen Proxy-IP nicht möglich!" ) db.store("SJ", "Blocked") scraper = cloudscraper.create_scraper() if not dj: db.store("DJ", "Blocked") else: try: if "block." in str( scraper.get( dj_url, proxies=proxies, timeout=30, allow_redirects=False).headers.get("location")): dj_blocked_proxy = True else: db.delete("DJ") except: dj_blocked_proxy = True if dj_blocked_proxy: print( u"Der Zugriff auf DJ ist mit der aktuellen Proxy-IP nicht möglich!" ) db.store("DJ", "Blocked") scraper = cloudscraper.create_scraper() if not sf: db.store("SF", "Blocked") else: try: if "block." in str( scraper.get( sf_url, proxies=proxies, timeout=30, allow_redirects=False).headers.get("location")): sf_blocked_proxy = True else: db.delete("SF") except: sf_blocked_proxy = True if sf_blocked_proxy: print( u"Der Zugriff auf SF ist mit der aktuellen Proxy-IP nicht möglich!" ) db.store("SF", "Blocked") scraper = cloudscraper.create_scraper() if not mb: db.store("MB", "Blocked") else: try: if "<Response [403]>" in str( scraper.get(mb_url, proxies=proxies, timeout=30, allow_redirects=False)): mb_blocked_proxy = True else: db.delete("MB") except: mb_blocked_proxy = True if mb_blocked_proxy: print( u"Der Zugriff auf MB ist mit der aktuellen Proxy-IP nicht möglich!" ) db.store("MB", "Blocked") scraper = cloudscraper.create_scraper() if not hw: db.store("HW", "Blocked") else: try: if "<Response [403]>" in str( scraper.get(hw_url, proxies=proxies, timeout=30, allow_redirects=False)): hw_blocked_proxy = True else: db.delete("HW") except: hw_blocked_proxy = True if hw_blocked_proxy: print( u"Der Zugriff auf HW ist mit der aktuellen Proxy-IP nicht möglich!" ) db.store("HW", "Blocked") scraper = cloudscraper.create_scraper() if not fx: db.store("FX", "Blocked") else: try: if "<Response [403]>" in str( scraper.get(fx_url, proxies=proxies, timeout=30, allow_redirects=False)): fx_blocked_proxy = True else: db.delete("FX") except: fx_blocked_proxy = True session = requests.session() session.headers = scraper.headers session.cookies = scraper.cookies session.verify = False if "<Response [200]>" in str( session.get(fx_url, proxies=proxies, timeout=30, allow_redirects=False)): fx_blocked_proxy = False if fx_blocked_proxy: print( u"Der Zugriff auf FX ist mit der aktuellen Proxy-IP nicht möglich!" ) db.store("FX", "Blocked") scraper = cloudscraper.create_scraper() if not hs: db.store("HS", "Blocked") else: try: if "200" not in str( scraper.get(hs_url, timeout=30, allow_redirects=False).status_code): hs_blocked_proxy = True else: db.delete("HS") except: hs_blocked_proxy = True if hs_blocked_proxy: print( u"Der Zugriff auf HS ist mit der aktuellen Proxy-IP nicht möglich!" ) db.store("HS", "Blocked") scraper = cloudscraper.create_scraper() if not nk: db.store("NK", "Blocked") else: try: if "200" not in str( scraper.get(nk_url, timeout=30, allow_redirects=False).status_code): nk_blocked_proxy = True else: db.delete("NK") except: nk_blocked_proxy = True if nk_blocked_proxy: print( u"Der Zugriff auf NK ist mit der aktuellen Proxy-IP nicht möglich!" ) db.store("NK", "Blocked") scraper = cloudscraper.create_scraper() if not dd: db.store("DD", "Blocked") else: try: if "200" not in str( scraper.get(dd_url, timeout=30, allow_redirects=False).status_code): dd_blocked_proxy = True else: db.delete("DD") except: dd_blocked_proxy = True if dd_blocked_proxy: print( u"Der Zugriff auf DD ist mit der aktuellen Proxy-IP nicht möglich!" ) db.store("DD", "Blocked") scraper = cloudscraper.create_scraper() if not fc: db.store("FC", "Blocked") else: try: if "200" not in str( scraper.get(fc_url, timeout=30).status_code): fc_blocked_proxy = True else: db.delete("FC") except: fc_blocked_proxy = True if fc_blocked_proxy: print( u"Der Zugriff auf FC ist mit der aktuellen Proxy-IP nicht möglich!" ) db.store("FC", "Blocked") scraper = cloudscraper.create_scraper() if not proxy or (proxy and sj_blocked_proxy and fallback): if not sj: db.store("SJ", "Blocked") else: try: if "block." in str( scraper.get( sj_url, timeout=30, allow_redirects=False).headers.get("location")): sj_blocked = True except: sj_blocked = True if sj_blocked: db_normal.store("SJ", "Blocked") print( u"Der Zugriff auf SJ ist mit der aktuellen IP nicht möglich!" ) if not proxy or (proxy and dj_blocked_proxy and fallback): if not dj: db.store("DJ", "Blocked") else: try: if "block." in str( scraper.get( dj_url, timeout=30, allow_redirects=False).headers.get("location")): dj_blocked = True except: dj_blocked = True if dj_blocked: db_normal.store("DJ", "Blocked") print( u"Der Zugriff auf DJ ist mit der aktuellen IP nicht möglich!" ) if not sf: db.store("SF", "Blocked") else: try: if "block." in str( scraper.get( sf_url, timeout=30, allow_redirects=False).headers.get("location")): sf_blocked = True except: sf_blocked = True if sf_blocked: db_normal.store("SF", "Blocked") print( u"Der Zugriff auf SF ist mit der aktuellen IP nicht möglich!" ) if not proxy or (proxy and mb_blocked_proxy and fallback): if not mb: db.store("MB", "Blocked") else: try: if "<Response [403]>" in str( scraper.get(mb_url, timeout=30, allow_redirects=False)): mb_blocked = True except: mb_blocked = True if mb_blocked: db_normal.store("MB", "Blocked") print( u"Der Zugriff auf MB ist mit der aktuellen IP nicht möglich!" ) if not proxy or (proxy and hw_blocked_proxy and fallback): if not hw: db.store("HW", "Blocked") else: try: if "<Response [403]>" in str( scraper.get(hw_url, timeout=30, allow_redirects=False)): hw_blocked = True except: hw_blocked = True if hw_blocked: db_normal.store("HW", "Blocked") print( u"Der Zugriff auf HW ist mit der aktuellen IP nicht möglich!" ) if not proxy or (proxy and fx_blocked_proxy and fallback): if not fx: db.store("FX", "Blocked") else: try: if "<Response [403]>" in str( scraper.get(fx_url, timeout=30, allow_redirects=False)): fx_blocked = True except: fx_blocked = True session = requests.session() session.headers = scraper.headers session.cookies = scraper.cookies session.verify = False if "<Response [200]>" in str( session.get(fx_url, timeout=30, allow_redirects=False)): fx_blocked = False if fx_blocked: db_normal.store("FX", "Blocked") print( u"Der Zugriff auf FX ist mit der aktuellen IP nicht möglich!" ) if not proxy or (proxy and hs_blocked_proxy and fallback): if not hs: db.store("HS", "Blocked") else: try: if "200" not in str( scraper.get(hs_url, timeout=30, allow_redirects=False).status_code): hs_blocked = True except: hs_blocked = True if hs_blocked: db_normal.store("HS", "Blocked") print( u"Der Zugriff auf HS ist mit der aktuellen IP nicht möglich!" ) if not proxy or (proxy and nk_blocked_proxy and fallback): if not nk: db.store("NK", "Blocked") else: try: if "200" not in str( scraper.get(nk_url, timeout=30, allow_redirects=False).status_code): nk_blocked = True except: nk_blocked = True if nk_blocked: db_normal.store("NK", "Blocked") print( u"Der Zugriff auf NK ist mit der aktuellen IP nicht möglich!" ) if not proxy or (proxy and dd_blocked_proxy and fallback): if not dd: db.store("DD", "Blocked") else: try: if "200" not in str( scraper.get(dd_url, timeout=30, allow_redirects=False).status_code): dd_blocked = True except: dd_blocked = True if dd_blocked: db_normal.store("DD", "Blocked") print( u"Der Zugriff auf DD ist mit der aktuellen IP nicht möglich!" ) if not proxy or (proxy and fc_blocked_proxy and fallback): if not fc: db.store("FC", "Blocked") else: try: if "200" not in str( scraper.get(fc_url, timeout=30).status_code): fc_blocked = True except: fc_blocked = True if fc_blocked: db_normal.store("FC", "Blocked") print( u"Der Zugriff auf FC ist mit der aktuellen IP nicht möglich!" ) return scraper
class SF: def __init__(self, configfile, dbfile, device, logging, scraper, filename, internal_name): self._INTERNAL_NAME = internal_name self.configfile = configfile self.dbfile = dbfile self.device = device self.hostnames = RssConfig('Hostnames', self.configfile) self.sf = self.hostnames.get('sf') self.config = RssConfig(self._INTERNAL_NAME, self.configfile) self.rsscrawler = RssConfig("RSScrawler", self.configfile) self.hevc_retail = self.config.get("hevc_retail") self.retail_only = self.config.get("retail_only") self.hoster_fallback = self.config.get("hoster_fallback") self.hosters = RssConfig("Hosters", configfile).get_section() self.log_info = logging.info self.log_error = logging.error self.log_debug = logging.debug self.scraper = scraper self.filename = filename self.db = RssDb(self.dbfile, 'rsscrawler') self.quality = self.config.get("quality") self.cdc = RssDb(self.dbfile, 'cdc') self.last_set_sf = self.cdc.retrieve("SFSet-" + self.filename) self.last_sha_sf = self.cdc.retrieve("SF-" + self.filename) self.headers = { 'If-Modified-Since': str(self.cdc.retrieve("SFHeaders-" + self.filename)) } settings = [ "quality", "rejectlist", "regex", "hevc_retail", "retail_only", "hoster_fallback" ] self.settings = [] self.settings.append(self.rsscrawler.get("english")) self.settings.append(self.rsscrawler.get("surround")) self.settings.append(self.hosters) for s in settings: self.settings.append(self.config.get(s)) self.empty_list = False if self.filename == 'SJ_Staffeln_Regex': self.level = 3 elif self.filename == 'MB_Staffeln': self.seasonssource = self.config.get('seasonssource').lower() self.level = 2 elif self.filename == 'SJ_Serien_Regex': self.level = 1 else: self.level = 0 self.pattern = r'^(' + "|".join( self.get_series_list(self.filename, self.level)).lower() + ')' self.listtype = "" self.day = 0 def settings_hash(self, refresh): if refresh: settings = [ "quality", "rejectlist", "regex", "hevc_retail", "retail_only", "hoster_fallback" ] self.settings = [] self.settings.append(self.rsscrawler.get("english")) self.settings.append(self.rsscrawler.get("surround")) self.settings.append(self.hosters) for s in settings: self.settings.append(self.config.get(s)) self.pattern = r'^(' + "|".join( self.get_series_list(self.filename, self.level)).lower() + ')' set_sf = str(self.settings) + str(self.pattern) return hashlib.sha256(set_sf.encode('ascii', 'ignore')).hexdigest() def get_series_list(self, liste, series_type): if series_type == 1: self.listtype = " (RegEx)" elif series_type == 2: self.listtype = " (Staffeln)" elif series_type == 3: self.listtype = " (Staffeln/RegEx)" cont = ListDb(self.dbfile, liste).retrieve() titles = [] if cont: for title in cont: if title: title = title.replace(" ", ".") titles.append(title) if not titles: self.empty_list = True return titles def parse_download(self, series_url, title, language_id): if not check_valid_release(title, self.retail_only, self.hevc_retail, self.dbfile): self.log_debug( title + u" - Release ignoriert (Gleiche oder bessere Quelle bereits vorhanden)" ) return if self.filename == 'MB_Staffeln': if not self.config.get("seasonpacks"): staffelpack = re.search(r"s\d.*(-|\.).*s\d", title.lower()) if staffelpack: self.log_debug("%s - Release ignoriert (Staffelpaket)" % title) return if not re.search(self.seasonssource, title.lower()): self.log_debug(title + " - Release hat falsche Quelle") return try: if language_id == 2: lang = 'EN' else: lang = 'DE' epoch = str(datetime.datetime.now().timestamp()).replace('.', '')[:-3] api_url = series_url + '?lang=' + lang + '&_=' + epoch response = get_url(api_url, self.configfile, self.dbfile, self.scraper) info = json.loads(response) is_episode = re.findall(r'.*\.(s\d{1,3}e\d{1,3})\..*', title, re.IGNORECASE) if is_episode: episode_string = re.findall(r'.*S\d{1,3}(E\d{1,3}).*', is_episode[0])[0].lower() season_string = re.findall(r'.*(S\d{1,3})E\d{1,3}.*', is_episode[0])[0].lower() season_title = rreplace( title.lower().replace(episode_string, ''), "-", ".*", 1).lower() season_title = season_title.replace(".untouched", ".*").replace( ".dd+51", ".dd.51") episode = str(int(episode_string.replace("e", ""))) season = str(int(season_string.replace("s", ""))) episode_name = re.findall(r'.*\.s\d{1,3}(\..*).german', season_title, re.IGNORECASE) if episode_name: season_title = season_title.replace(episode_name[0], '') codec_tags = [".h264", ".x264"] for tag in codec_tags: season_title = season_title.replace(tag, ".*264") web_tags = [".web-rip", ".webrip", ".webdl", ".web-dl"] for tag in web_tags: season_title = season_title.replace(tag, ".web.*") else: season = False episode = False season_title = title multiple_episodes = re.findall(r'(e\d{1,3}-e*\d{1,3}\.)', season_title, re.IGNORECASE) if multiple_episodes: season_title = season_title.replace( multiple_episodes[0], '.*') content = BeautifulSoup(info['html'], 'lxml') releases = content.find( "small", text=re.compile(season_title, re.IGNORECASE)).parent.parent.parent links = releases.findAll("div", {'class': 'row'})[1].findAll('a') valid = False for link in links: download_link = link['href'] if check_hoster(link.text.replace('\n', ''), self.configfile): valid = True break if not valid and not self.hoster_fallback: storage = self.db.retrieve_all(title) if 'added' not in storage and 'notdl' not in storage: wrong_hoster = '[SF/Hoster fehlt] - ' + title if 'wrong_hoster' not in storage: self.log_info(wrong_hoster) self.db.store(title, 'wrong_hoster') notify([wrong_hoster], self.configfile) else: self.log_debug(wrong_hoster) else: return self.send_package(title, download_link, language_id, season, episode) except: print( u"SF hat die Serien-API angepasst. Breche Download-Prüfung ab!" ) def send_package(self, title, download_link, language_id, season, episode): englisch = "" if language_id == 2: englisch = "/Englisch" if self.filename == 'SJ_Serien_Regex': link_placeholder = '[Episode/RegEx' + englisch + '] - ' elif self.filename == 'SJ_Serien': link_placeholder = '[Episode' + englisch + '] - ' elif self.filename == 'SJ_Staffeln_Regex]': link_placeholder = '[Staffel/RegEx' + englisch + '] - ' else: link_placeholder = '[Staffel' + englisch + '] - ' try: storage = self.db.retrieve_all(title) except Exception as e: self.log_debug("Fehler bei Datenbankzugriff: %s, Grund: %s" % (e, title)) return if 'added' in storage or 'notdl' in storage: self.log_debug(title + " - Release ignoriert (bereits gefunden)") else: download_link = 'https://' + self.sf + download_link if season and episode: download_link = download_link.replace( '&_=', '&season=' + str(season) + '&episode=' + str(episode) + '&_=') download = add_decrypt(title, download_link, self.sf, self.dbfile) if download: self.db.store(title, 'added') log_entry = link_placeholder + title + ' - [SF]' self.log_info(log_entry) notify(["[Click'n'Load notwendig] - " + log_entry], self.configfile) return log_entry def periodical_task(self): if not self.sf: return self.device if self.filename == 'SJ_Serien_Regex': if not self.config.get('regex'): self.log_debug("Suche für SF-Regex deaktiviert!") return self.device elif self.filename == 'SJ_Staffeln_Regex': if not self.config.get('regex'): self.log_debug("Suche für SF-Regex deaktiviert!") return self.device elif self.filename == 'MB_Staffeln': if not self.config.get('crawlseasons'): self.log_debug("Suche für SF-Staffeln deaktiviert!") return self.device if self.empty_list: self.log_debug("Liste ist leer. Stoppe Suche für Serien!" + self.listtype) return self.device try: reject = self.config.get("rejectlist").replace( ",", "|").lower() if len( self.config.get("rejectlist")) > 0 else r"^unmatchable$" except TypeError: reject = r"^unmatchable$" set_sf = self.settings_hash(False) header = False response = False while self.day < 8: if self.last_set_sf == set_sf: try: delta = ( datetime.datetime.now() - datetime.timedelta(days=self.day)).strftime("%Y-%m-%d") response = get_url_headers( 'https://' + self.sf + '/updates/' + delta, self.configfile, self.dbfile, self.headers, self.scraper) self.scraper = response[1] response = response[0] if self.filename == "MB_Staffeln" or self.filename == "SJ_Staffeln_Regex": feed = sf_releases_to_feedparser_dict( response.text, "seasons", 'https://' + self.sf, True) else: feed = sf_releases_to_feedparser_dict( response.text, "episodes", 'https://' + self.sf, True) except: print(u"SF hat die Feed-API angepasst. Breche Suche ab!") feed = False if response: if response.status_code == 304: self.log_debug( "SF-Feed seit letztem Aufruf nicht aktualisiert - breche Suche ab!" ) return self.device header = True else: try: delta = ( datetime.datetime.now() - datetime.timedelta(days=self.day)).strftime("%Y-%m-%d") response = get_url( 'https://' + self.sf + '/updates/' + delta, self.configfile, self.dbfile, self.scraper) if self.filename == "MB_Staffeln" or self.filename == "SJ_Staffeln_Regex": feed = sf_releases_to_feedparser_dict( response, "seasons", 'https://' + self.sf, True) else: feed = sf_releases_to_feedparser_dict( response, "episodes", 'https://' + self.sf, True) except: print(u"SF hat die Feed-API angepasst. Breche Suche ab!") feed = False self.day += 1 if feed and feed.entries: first_post_sf = feed.entries[0] concat_sf = first_post_sf.title + first_post_sf.published + str( self.settings) + str(self.pattern) sha_sf = hashlib.sha256(concat_sf.encode( 'ascii', 'ignore')).hexdigest() else: self.log_debug("Feed ist leer - breche Suche ab!") return False for post in feed.entries: concat = post.title + post.published + \ str(self.settings) + str(self.pattern) sha = hashlib.sha256(concat.encode('ascii', 'ignore')).hexdigest() if sha == self.last_sha_sf: self.log_debug("Feed ab hier bereits gecrawlt (" + post.title + ") - breche Suche ab!") break series_url = post.series_url title = post.title.replace("-", "-") if self.filename == 'SJ_Serien_Regex': if self.config.get("regex"): if '.german.' in title.lower(): language_id = 1 elif self.rsscrawler.get('english'): language_id = 2 else: language_id = 0 if language_id: m = re.search(self.pattern, title.lower()) if not m and "720p" not in title and "1080p" not in title and "2160p" not in title: m = re.search( self.pattern.replace("480p", "."), title.lower()) self.quality = "480p" if m: if "720p" in title.lower(): self.quality = "720p" if "1080p" in title.lower(): self.quality = "1080p" if "2160p" in title.lower(): self.quality = "2160p" m = re.search(reject, title.lower()) if m: self.log_debug( title + " - Release durch Regex gefunden (trotz rejectlist-Einstellung)" ) title = re.sub(r'\[.*\] ', '', post.title) self.parse_download(series_url, title, language_id) else: self.log_debug( "%s - Englische Releases deaktiviert" % title) else: continue elif self.filename == 'SJ_Staffeln_Regex': if self.config.get("regex"): if '.german.' in title.lower(): language_id = 1 elif self.rsscrawler.get('english'): language_id = 2 else: language_id = 0 if language_id: m = re.search(self.pattern, title.lower()) if not m and "720p" not in title and "1080p" not in title and "2160p" not in title: m = re.search( self.pattern.replace("480p", "."), title.lower()) self.quality = "480p" if m: if "720p" in title.lower(): self.quality = "720p" if "1080p" in title.lower(): self.quality = "1080p" if "2160p" in title.lower(): self.quality = "2160p" m = re.search(reject, title.lower()) if m: self.log_debug( title + " - Release durch Regex gefunden (trotz rejectlist-Einstellung)" ) title = re.sub(r'\[.*\] ', '', post.title) self.parse_download(series_url, title, language_id) else: self.log_debug( "%s - Englische Releases deaktiviert" % title) else: continue else: if self.config.get("quality") != '480p': m = re.search(self.pattern, title.lower()) if m: if '.german.' in title.lower(): language_id = 1 elif self.rsscrawler.get('english'): language_id = 2 else: language_id = 0 if language_id: mm = re.search(self.quality, title.lower()) if mm: mmm = re.search(reject, title.lower()) if mmm: self.log_debug( title + " - Release ignoriert (basierend auf rejectlist-Einstellung)" ) continue if self.rsscrawler.get("surround"): if not re.match( r'.*\.(DTS|DD\+*51|DD\+*71|AC3\.5\.*1)\..*', title): self.log_debug( title + " - Release ignoriert (kein Mehrkanalton)" ) continue try: storage = self.db.retrieve_all(title) except Exception as e: self.log_debug( "Fehler bei Datenbankzugriff: %s, Grund: %s" % (e, title)) return self.device if 'added' in storage: self.log_debug( title + " - Release ignoriert (bereits gefunden)" ) continue self.parse_download( series_url, title, language_id) else: self.log_debug( "%s - Englische Releases deaktiviert" % title) else: m = re.search(self.pattern, title.lower()) if m: if '.german.' in title.lower(): language_id = 1 elif self.rsscrawler.get('english'): language_id = 2 else: language_id = 0 if language_id: if "720p" in title.lower( ) or "1080p" in title.lower( ) or "2160p" in title.lower(): continue mm = re.search(reject, title.lower()) if mm: self.log_debug( title + " Release ignoriert (basierend auf rejectlist-Einstellung)" ) continue if self.rsscrawler.get("surround"): if not re.match( r'.*\.(DTS|DD\+*51|DD\+*71|AC3\.5\.*1)\..*', title): self.log_debug( title + " - Release ignoriert (kein Mehrkanalton)" ) continue title = re.sub(r'\[.*\] ', '', post.title) try: storage = self.db.retrieve_all(title) except Exception as e: self.log_debug( "Fehler bei Datenbankzugriff: %s, Grund: %s" % (e, title)) return self.device if 'added' in storage: self.log_debug( title + " - Release ignoriert (bereits gefunden)" ) continue self.parse_download( series_url, title, language_id) else: self.log_debug( "%s - Englische Releases deaktiviert" % title) if set_sf: new_set_sf = self.settings_hash(True) if set_sf == new_set_sf: self.cdc.delete("SFSet-" + self.filename) self.cdc.store("SFSet-" + self.filename, set_sf) self.cdc.delete("SF-" + self.filename) self.cdc.store("SF-" + self.filename, sha_sf) if header and response: self.cdc.delete("SFHeaders-" + self.filename) self.cdc.store("SFHeaders-" + self.filename, response.headers['date']) return self.device
def main(): arguments = docopt(__doc__, version='RSScrawler') print(u"┌──────────────────────────────────────────────┐") print(u" RSScrawler " + version + " von RiX") print(u" https://github.com/rix1337/RSScrawler") print(u"└──────────────────────────────────────────────┘") if arguments['--docker']: configpath = "/config" else: configpath = files.config(arguments['--config']) configfile = os.path.join(configpath, "RSScrawler.ini") dbfile = os.path.join(configpath, "RSScrawler.db") print(u"Nutze das Verzeichnis " + configpath + u" für Einstellungen/Logs") log_level = logging.__dict__[arguments['--log-level']] if arguments[ '--log-level'] in logging.__dict__ else logging.INFO log_file = os.path.join(configpath, 'RSScrawler.log') log_format = '%(asctime)s - %(message)s' hostnames = RssConfig('Hostnames', configfile) def clean_up_hostname(host, string): if '/' in string: string = string.replace('https://', '').replace('http://', '') string = re.findall(r'([a-z-.]*\.[a-z]*)', string)[0] hostnames.save(host, string) if re.match(r'.*[A-Z].*', string): hostnames.save(host, string.lower()) if not string: print(u'Kein Hostname gesetzt: ' + host.upper() + ' (Seite wird ignoriert!)') return string set_hostnames = {} list_names = ['sj', 'dj', 'sf', 'mb', 'hw', 'hs', 'fx', 'nk', 'fc'] for name in list_names: hostname = clean_up_hostname(name, hostnames.get(name)) if hostname: set_hostnames[name] = hostname if not arguments['--testlauf'] and not set_hostnames: print( u'Keine Hostnamen in der RSScrawler.ini gefunden! Beende RSScrawler!' ) time.sleep(10) sys.exit(1) disable_request_warnings(InsecureRequestWarning) if arguments['--testlauf']: device = False else: if not os.path.exists(configfile): if arguments['--docker']: if arguments['--jd-user'] and arguments['--jd-pass']: device = files.myjd_input(configfile, arguments['--port'], arguments['--jd-user'], arguments['--jd-pass'], arguments['--jd-device']) else: device = False else: device = files.myjd_input(configfile, arguments['--port'], arguments['--jd-user'], arguments['--jd-pass'], arguments['--jd-device']) else: rsscrawler = RssConfig('RSScrawler', configfile) user = rsscrawler.get('myjd_user') password = rsscrawler.get('myjd_pass') if user and password: device = get_device(configfile) if not device: device = get_if_one_device(user, password) if device: print(u"Gerätename " + device + " automatisch ermittelt.") rsscrawler.save('myjd_device', device) device = get_device(configfile) else: device = files.myjd_input(configfile, arguments['--port'], arguments['--jd-user'], arguments['--jd-pass'], arguments['--jd-device']) if not device and not arguments['--testlauf']: print( u'My JDownloader Zugangsdaten fehlerhaft! Beende RSScrawler!') time.sleep(10) sys.exit(1) else: print(u"Erfolgreich mit My JDownloader verbunden. Gerätename: " + device.name) rsscrawler = RssConfig('RSScrawler', configfile) port = int(rsscrawler.get("port")) docker = False if arguments['--docker']: port = int('9090') docker = True elif arguments['--port']: port = int(arguments['--port']) if rsscrawler.get("prefix"): prefix = '/' + rsscrawler.get("prefix") else: prefix = '' local_address = 'http://' + common.check_ip() + ':' + str(port) + prefix if not arguments['--docker']: print(u'Der Webserver ist erreichbar unter ' + local_address) if arguments['--keep-cdc']: print(u"CDC-Tabelle nicht geleert!") else: RssDb(dbfile, 'cdc').reset() p = multiprocessing.Process(target=web_server, args=(port, docker, configfile, dbfile, log_level, log_file, log_format, device)) p.start() if not arguments['--testlauf']: c = multiprocessing.Process(target=crawler, args=(configfile, dbfile, device, rsscrawler, log_level, log_file, log_format)) c.start() w = multiprocessing.Process(target=crawldog, args=(configfile, dbfile)) w.start() print(u'Drücke [Strg] + [C] zum Beenden') def signal_handler(): print(u'Beende RSScrawler...') p.terminate() c.terminate() w.terminate() sys.exit(0) signal.signal(signal.SIGINT, signal_handler) try: while True: signal.pause() except AttributeError: while True: time.sleep(1) else: crawler(configfile, dbfile, device, rsscrawler, log_level, log_file, log_format) p.terminate() sys.exit(0)
def crawldog(configfile, dbfile): disable_request_warnings(InsecureRequestWarning) crawljobs = RssConfig('Crawljobs', configfile) autostart = crawljobs.get("autostart") db = RssDb(dbfile, 'crawldog') grabber_was_collecting = False device = False while True: try: if not device or not is_device(device): device = get_device(configfile) myjd_packages = get_info(configfile, device) grabber_collecting = myjd_packages[2] if grabber_was_collecting or grabber_collecting: grabber_was_collecting = grabber_collecting time.sleep(5) else: packages_in_downloader_decrypted = myjd_packages[4][0] packages_in_linkgrabber_decrypted = myjd_packages[4][1] offline_packages = myjd_packages[4][2] encrypted_packages = myjd_packages[4][3] try: watched_titles = db.retrieve_all_titles() except: watched_titles = False notify_list = [] if packages_in_downloader_decrypted or packages_in_linkgrabber_decrypted or offline_packages or encrypted_packages: if watched_titles: for title in watched_titles: if packages_in_downloader_decrypted: for package in packages_in_downloader_decrypted: if title[0] in package[ 'name'] or title[0].replace( ".", " ") in package['name']: check = hoster_check( configfile, device, [package], title[0], [0]) device = check[0] if device: db.delete(title[0]) if packages_in_linkgrabber_decrypted: for package in packages_in_linkgrabber_decrypted: if title[0] in package[ 'name'] or title[0].replace( ".", " ") in package['name']: check = hoster_check( configfile, device, [package], title[0], [0]) device = check[0] episode = RssDb( dbfile, 'episode_remover').retrieve( title[0]) if episode: filenames = package['filenames'] if len(filenames) > 1: fname_episodes = [] for fname in filenames: try: if re.match( r'.*S\d{1,3}E\d{1,3}.*', fname, flags=re. IGNORECASE): fname = re.findall( r'S\d{1,3}E(\d{1,3})', fname, flags=re. IGNORECASE ).pop() else: fname = fname.replace( "hddl8", "").replace( "dd51", "" ).replace( "264", "" ).replace( "265", "") except: fname = fname.replace( "hddl8", "").replace( "dd51", "").replace( "264", "" ).replace( "265", "") fname_episode = "".join( re.findall( r'\d+', fname.split( ".part")[0])) try: fname_episodes.append( str( int(fname_episode ))) except: pass replacer = longest_substr( fname_episodes) new_fname_episodes = [] for new_ep_fname in fname_episodes: try: new_fname_episodes.append( str( int( new_ep_fname .replace( replacer, "")))) except: pass replacer = longest_substr( new_fname_episodes) newer_fname_episodes = [] for new_ep_fname in new_fname_episodes: try: newer_fname_episodes.append( str( int( re.sub( replacer, "", new_ep_fname, 1)))) except: pass replacer = longest_substr( newer_fname_episodes) even_newer_fname_episodes = [] for newer_ep_fname in newer_fname_episodes: try: even_newer_fname_episodes.append( str( int( re.sub( replacer, "", newer_ep_fname, 1)))) except: pass if even_newer_fname_episodes: fname_episodes = even_newer_fname_episodes elif newer_fname_episodes: fname_episodes = newer_fname_episodes elif new_fname_episodes: fname_episodes = new_fname_episodes delete_linkids = [] pos = 0 for delete_id in package[ 'linkids']: if str(episode) != str( fname_episodes[pos] ): delete_linkids.append( delete_id) pos += 1 if delete_linkids: delete_uuids = [ package['uuid'] ] RssDb( dbfile, 'episode_remover' ).delete(title[0]) device = remove_from_linkgrabber( configfile, device, delete_linkids, delete_uuids) if autostart: device = move_to_downloads( configfile, device, package['linkids'], [package['uuid']]) if device: db.delete(title[0]) if offline_packages: for package in offline_packages: if title[0] in package[ 'name'] or title[0].replace( ".", " ") in package['name']: notify_list.append("[Offline] - " + title[0]) print((u"[Offline] - " + title[0])) db.delete(title[0]) if encrypted_packages: for package in encrypted_packages: if title[0] in package[ 'name'] or title[0].replace( ".", " ") in package['name']: if title[1] == 'added': if retry_decrypt( configfile, dbfile, device, package['linkids'], [package['uuid']], package['urls']): db.delete(title[0]) db.store(title[0], 'retried') else: add_decrypt( package['name'], package['url'], "", dbfile) device = remove_from_linkgrabber( configfile, device, package['linkids'], [package['uuid']]) notify_list.append( "[Click'n'Load notwendig] - " + title[0]) print( u"[Click'n'Load notwendig] - " + title[0]) db.delete(title[0]) else: if not grabber_collecting: db.reset() if notify_list: notify(notify_list, configfile) time.sleep(30) except Exception: traceback.print_exc() time.sleep(30)
def ombi(configfile, dbfile, device, log_debug): db = RssDb(dbfile, 'Ombi') config = RssConfig('Ombi', configfile) url = config.get('url') api = config.get('api') if not url or not api: return device english = RssConfig('RSScrawler', configfile).get('english') try: requested_movies = requests.get(url + '/api/v1/Request/movie', headers={'ApiKey': api}) requested_movies = json.loads(requested_movies.text) requested_shows = requests.get(url + '/api/v1/Request/tv', headers={'ApiKey': api}) requested_shows = json.loads(requested_shows.text) except: log_debug("Ombi ist nicht erreichbar!") return False scraper = False for r in requested_movies: if bool(r.get("approved")): if not bool(r.get("available")): imdb_id = r.get("imdbId") if not db.retrieve('movie_' + str(imdb_id)) == 'added': response = imdb_movie(imdb_id, configfile, dbfile, scraper) title = response[0] if title: scraper = response[1] best_result = search.best_result_bl( title, configfile, dbfile) print(u"Film: " + title + u" durch Ombi hinzugefügt.") if best_result: search.download_bl(best_result, device, configfile, dbfile) if english: title = r.get('title') best_result = search.best_result_bl( title, configfile, dbfile) print(u"Film: " + title + u"durch Ombi hinzugefügt.") if best_result: search.download_bl(best_result, device, configfile, dbfile) db.store('movie_' + str(imdb_id), 'added') else: log_debug("Titel für IMDB-ID nicht abrufbar: " + imdb_id) for r in requested_shows: imdb_id = r.get("imdbId") infos = None child_requests = r.get("childRequests") for cr in child_requests: if bool(cr.get("approved")): if not bool(cr.get("available")): details = cr.get("seasonRequests") for season in details: sn = season.get("seasonNumber") eps = [] episodes = season.get("episodes") for episode in episodes: if not bool(episode.get("available")): enr = episode.get("episodeNumber") s = str(sn) if len(s) == 1: s = "0" + s s = "S" + s e = str(enr) if len(e) == 1: e = "0" + e se = s + "E" + e if not db.retrieve('show_' + str(imdb_id) + '_' + se) == 'added': eps.append(enr) if eps: if not infos: infos = imdb_show(imdb_id, configfile, dbfile, scraper) if infos: title = infos[0] all_eps = infos[1] scraper = infos[2] check_sn = False if all_eps: check_sn = all_eps.get(sn) if check_sn: sn_length = len(eps) check_sn_length = len(check_sn) if check_sn_length > sn_length: for ep in eps: e = str(ep) if len(e) == 1: e = "0" + e se = s + "E" + e payload = search.best_result_sj( title, configfile, dbfile) if payload: payload = decode_base64( payload).split("|") payload = encode_base64( payload[0] + "|" + payload[1] + "|" + se) added_episode = search.download_sj( payload, configfile, dbfile) if not added_episode: payload = decode_base64( payload).split("|") payload = encode_base64( payload[0] + "|" + payload[1] + "|" + s) add_season = search.download_sj( payload, configfile, dbfile) for e in eps: e = str(e) if len(e) == 1: e = "0" + e se = s + "E" + e db.store( 'show_' + str(imdb_id) + '_' + se, 'added') if not add_season: log_debug( u"Konnte kein Release für " + title + " " + se + "finden.") break db.store( 'show_' + str(imdb_id) + '_' + se, 'added') else: payload = search.best_result_sj( title, configfile, dbfile) if payload: payload = decode_base64( payload).split("|") payload = encode_base64( payload[0] + "|" + payload[1] + "|" + s) search.download_sj( payload, configfile, dbfile) for ep in eps: e = str(ep) if len(e) == 1: e = "0" + e se = s + "E" + e db.store( 'show_' + str(imdb_id) + '_' + se, 'added') print(u"Serie/Staffel/Episode: " + title + u" durch Ombi hinzugefügt.") return device
def download(configfile, dbfile, device, title, subdir, old_links, password, full_path=None, autostart=False): try: if not device or not is_device(device): device = get_device(configfile) if isinstance(old_links, list): links = [] for link in old_links: if link not in links: links.append(link) else: links = [old_links] links = str(links).replace(" ", "") crawljobs = RssConfig('Crawljobs', configfile) usesubdir = crawljobs.get("subdir") priority = "DEFAULT" if full_path: path = full_path else: if usesubdir: path = subdir + "/<jd:packagename>" else: path = "<jd:packagename>" if "Remux" in path: priority = "LOWER" try: device.linkgrabber.add_links( params=[{ "autostart": autostart, "links": links, "packageName": title, "extractPassword": password, "priority": priority, "downloadPassword": password, "destinationFolder": path, "comment": "RSScrawler by rix1337", "overwritePackagizerRules": False }]) except rsscrawler.myjdapi.TokenExpiredException: device = get_device(configfile) if not device or not is_device(device): return False device.linkgrabber.add_links( params=[{ "autostart": autostart, "links": links, "packageName": title, "extractPassword": password, "priority": priority, "downloadPassword": password, "destinationFolder": path, "comment": "RSScrawler by rix1337", "overwritePackagizerRules": False }]) db = RssDb(dbfile, 'crawldog') if db.retrieve(title): db.delete(title) db.store(title, 'retried') else: db.store(title, 'added') return device except rsscrawler.myjdapi.MYJDException as e: print(u"Fehler bei der Verbindung mit MyJDownloader: " + str(e)) return False
def get(title, configfile, dbfile, bl_only=False, sj_only=False): hostnames = RssConfig('Hostnames', configfile) mb = hostnames.get('mb') hw = hostnames.get('hw') hs = hostnames.get('hs') fx = hostnames.get('fx') nk = hostnames.get('nk') sj = hostnames.get('sj') specific_season = re.match(r'^(.*),(s\d{1,3})$', title.lower()) specific_episode = re.match(r'^(.*),(s\d{1,3}e\d{1,3})$', title.lower()) if specific_season: split = title.split(",") title = split[0] special = split[1].upper() elif specific_episode: split = title.split(",") title = split[0] special = split[1].upper() else: special = None bl_final = {} sj_final = {} scraper = cloudscraper.create_scraper() if not sj_only: mb_query = sanitize(title).replace(" ", "+") if special: bl_query = mb_query + "+" + special else: bl_query = mb_query unrated = [] config = RssConfig('MB', configfile) quality = config.get('quality') ignore = config.get('ignore') if "480p" not in quality: search_quality = "+" + quality else: search_quality = "" if mb: mb_search = 'https://' + mb + '/search/' + bl_query + search_quality + '/feed/rss2/' else: mb_search = None if hw: hw_search = 'https://' + hw + '/search/' + bl_query + search_quality + '/feed/rss2/' else: hw_search = None if hs: hs_search = 'https://' + hs + '/search/' + bl_query + search_quality + '/feed' else: hs_search = None if fx: fx_search = 'https://' + fx + '/?s=' + bl_query else: fx_search = None async_results = get_urls_async( [mb_search, hw_search, hs_search, fx_search], configfile, dbfile, scraper) scraper = async_results[1] async_results = async_results[0] mb_results = [] hw_results = [] hs_results = [] fx_results = [] for res in async_results: if check_is_site(res, configfile) == 'MB': mb_results = re.findall( r'<title>(.*?)<\/title>\n.*?<link>(.*?)<\/link>', res) elif check_is_site(res, configfile) == 'HW': hw_results = re.findall( r'<title>(.*?)<\/title>\n.*?<link>(.*?)<\/link>', res) elif check_is_site(res, configfile) == 'HS': hs_results = hs_search_results(res) elif check_is_site(res, configfile) == 'FX': fx_results = fx_search_results(fx_content_to_soup(res), configfile, dbfile, scraper) if nk: nk_search = post_url( 'https://' + nk + "/search", configfile, dbfile, data={'search': bl_query.replace("+", " ") + " " + quality}) nk_results = nk_search_results(nk_search, 'https://' + nk + '/') else: nk_results = [] password = mb for result in mb_results: if "480p" in quality: if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[ 0].lower() or "2160p" in \ result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[ 0].lower() or "complete.uhd.bluray" in result[0].lower(): continue if not result[0].endswith("-MB") and not result[0].endswith(".MB"): unrated.append([ rate(result[0], ignore), encode_base64(result[1] + "|" + password), result[0] + " (MB)" ]) password = hw for result in hw_results: if "480p" in quality: if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[ 0].lower() or "2160p" in \ result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[ 0].lower() or "complete.uhd.bluray" in result[0].lower(): continue unrated.append([ rate(result[0], ignore), encode_base64(result[1] + "|" + password), result[0] + " (HW)" ]) password = hs for result in hs_results: if "480p" in quality: if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[ 0].lower() or "2160p" in \ result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[ 0].lower() or "complete.uhd.bluray" in result[0].lower(): continue unrated.append([ rate(result[0], ignore), encode_base64(result[1] + "|" + password), result[0] + " (HS)" ]) password = fx.split('.')[0] for result in fx_results: if "480p" in quality: if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[ 0].lower() or "2160p" in \ result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[ 0].lower() or "complete.uhd.bluray" in result[0].lower(): continue unrated.append([ rate(result[0], ignore), encode_base64(result[1] + "|" + password), result[0] + " (FX)" ]) password = nk.split('.')[0].capitalize() for result in nk_results: if "480p" in quality: if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[ 0].lower() or "2160p" in \ result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[ 0].lower() or "complete.uhd.bluray" in result[0].lower(): continue unrated.append([ rate(result[0], ignore), encode_base64(result[1] + "|" + password), result[0] + " (NK)" ]) if config.get("crawl3d"): if mb: mb_search = 'https://' + mb + '/search/' + bl_query + search_quality + "+3D/feed/rss2/" else: mb_search = None if hw: hw_search = 'https://' + hw + '/search/' + bl_query + search_quality + "+3D/feed/rss2/" else: hw_search = None if hs: hs_search = 'https://' + hs + '/search/' + bl_query + search_quality + '+3D/feed' else: hs_search = None if fx: fx_search = 'https://' + fx + '/?s=' + bl_query + "+3D" else: fx_search = None async_results = get_urls_async( [mb_search, hw_search, hs_search, fx_search], configfile, dbfile, scraper) async_results = async_results[0] mb_results = [] hw_results = [] hs_results = [] fx_results = [] for res in async_results: if check_is_site(res, configfile) == 'MB': mb_results = re.findall( r'<title>(.*?)<\/title>\n.*?<link>(.*?)<\/link>', res) elif check_is_site(res, configfile) == 'HW': hw_results = re.findall( r'<title>(.*?)<\/title>\n.*?<link>(.*?)<\/link>', res) elif check_is_site(res, configfile) == 'HS': hs_results = hs_search_results(res) elif check_is_site(res, configfile) == 'FX': fx_results = re.findall( r'<title>(.*?)<\/title>\n.*?<link>(.*?)<\/link>', res) if nk: nk_search = post_url('https://' + nk + "/search", configfile, dbfile, data={ 'search': bl_query.replace("+", " ") + " " + quality + "3D" }) nk_results = nk_search_results(nk_search, 'https://' + nk + '/') else: nk_results = [] password = mb for result in mb_results: if not result[1].endswith("-MB") and not result[1].endswith( ".MB"): unrated.append([ rate(result[0], ignore), encode_base64(result[1] + "|" + password), result[0] + " (3D-MB)" ]) password = hw for result in hw_results: unrated.append([ rate(result[0], ignore), encode_base64(result[1] + "|" + password), result[0] + " (3D-HW)" ]) password = hs for result in hs_results: unrated.append([ rate(result[0], ignore), encode_base64(result[1] + "|" + password), result[0] + " (3D-HS)" ]) password = fx.split('.')[0] for result in fx_results: unrated.append([ rate(result[0], ignore), encode_base64(result[1] + "|" + password), result[0] + " (3D-FX)" ]) password = nk.split('.')[0].capitalize() for result in nk_results: unrated.append([ rate(result[0], ignore), encode_base64(result[1] + "|" + password), result[0] + " (3D-NK)" ]) rated = sorted(unrated, reverse=True) results = {} i = 0 for result in rated: res = {"payload": result[1], "title": result[2]} results["result" + str(i + 1000)] = res i += 1 bl_final = results if not bl_only: if sj: sj_query = sanitize(title).replace(" ", "+") sj_search = get_url( 'https://' + sj + '/serie/search?q=' + sj_query, configfile, dbfile, scraper) try: sj_results = BeautifulSoup(sj_search, 'lxml').findAll( "a", href=re.compile("/serie")) except: sj_results = [] else: sj_results = [] if special: append = " (" + special + ")" else: append = "" i = 0 results = {} for result in sj_results: r_title = result.text r_rating = fuzz.ratio(title.lower(), r_title) if r_rating > 40: res = { "payload": encode_base64(result['href'] + "|" + r_title + "|" + str(special)), "title": r_title + append } results["result" + str(i + 1000)] = res i += 1 sj_final = results return bl_final, sj_final
class SJ: def __init__(self, configfile, dbfile, device, logging, scraper, filename, internal_name): self._INTERNAL_NAME = internal_name self.configfile = configfile self.dbfile = dbfile self.device = device self.hostnames = RssConfig('Hostnames', self.configfile) self.sj = self.hostnames.get('sj') self.config = RssConfig(self._INTERNAL_NAME, self.configfile) self.rsscrawler = RssConfig("RSScrawler", self.configfile) self.hevc_retail = self.config.get("hevc_retail") self.retail_only = self.config.get("retail_only") self.hoster_fallback = self.config.get("hoster_fallback") self.hosters = RssConfig("Hosters", configfile).get_section() self.log_info = logging.info self.log_error = logging.error self.log_debug = logging.debug self.scraper = scraper self.filename = filename self.db = RssDb(self.dbfile, 'rsscrawler') self.quality = self.config.get("quality") self.cdc = RssDb(self.dbfile, 'cdc') self.last_set_sj = self.cdc.retrieve("SJSet-" + self.filename) self.last_sha_sj = self.cdc.retrieve("SJ-" + self.filename) self.headers = { 'If-Modified-Since': str(self.cdc.retrieve("SJHeaders-" + self.filename)) } settings = [ "quality", "rejectlist", "regex", "hevc_retail", "retail_only", "hoster_fallback" ] self.settings = [] self.settings.append(self.rsscrawler.get("english")) self.settings.append(self.rsscrawler.get("surround")) self.settings.append(self.hosters) for s in settings: self.settings.append(self.config.get(s)) self.empty_list = False if self.filename == 'SJ_Staffeln_Regex': self.level = 3 elif self.filename == 'MB_Staffeln': self.seasonssource = self.config.get('seasonssource').lower() self.level = 2 elif self.filename == 'SJ_Serien_Regex': self.level = 1 else: self.level = 0 self.pattern = r'^(' + "|".join( self.get_series_list(self.filename, self.level)).lower() + ')' self.listtype = "" self.day = 0 def settings_hash(self, refresh): if refresh: settings = [ "quality", "rejectlist", "regex", "hevc_retail", "retail_only", "hoster_fallback" ] self.settings = [] self.settings.append(self.rsscrawler.get("english")) self.settings.append(self.rsscrawler.get("surround")) self.settings.append(self.hosters) for s in settings: self.settings.append(self.config.get(s)) self.pattern = r'^(' + "|".join( self.get_series_list(self.filename, self.level)).lower() + ')' set_sj = str(self.settings) + str(self.pattern) return hashlib.sha256(set_sj.encode('ascii', 'ignore')).hexdigest() def get_series_list(self, liste, series_type): if series_type == 1: self.listtype = " (RegEx)" elif series_type == 2: self.listtype = " (Staffeln)" elif series_type == 3: self.listtype = " (Staffeln/RegEx)" cont = ListDb(self.dbfile, liste).retrieve() titles = [] if cont: for title in cont: if title: title = title.replace(" ", ".") titles.append(title) if not titles: self.empty_list = True return titles def parse_download(self, series_url, title, language_id): if not check_valid_release(title, self.retail_only, self.hevc_retail, self.dbfile): self.log_debug( title + u" - Release ignoriert (Gleiche oder bessere Quelle bereits vorhanden)" ) return if self.filename == 'MB_Staffeln': if not self.config.get("seasonpacks"): staffelpack = re.search(r"s\d.*(-|\.).*s\d", title.lower()) if staffelpack: self.log_debug("%s - Release ignoriert (Staffelpaket)" % title) return if not re.search(self.seasonssource, title.lower()): self.log_debug(title + " - Release hat falsche Quelle") return try: series_info = get_url(series_url, self.configfile, self.dbfile) series_id = re.findall(r'data-mediaid="(.*?)"', series_info)[0] api_url = 'https://' + self.sj + '/api/media/' + series_id + '/releases' response = get_url(api_url, self.configfile, self.dbfile, self.scraper) seasons = json.loads(response) for season in seasons: season = seasons[season] for item in season['items']: if item['name'] == title: valid = False for hoster in item['hoster']: if hoster: if check_hoster(hoster, self.configfile): valid = True if not valid and not self.hoster_fallback: storage = self.db.retrieve_all(title) if 'added' not in storage and 'notdl' not in storage: wrong_hoster = '[SJ/Hoster fehlt] - ' + title if 'wrong_hoster' not in storage: print(wrong_hoster) self.db.store(title, 'wrong_hoster') notify([wrong_hoster], self.configfile) else: self.log_debug(wrong_hoster) else: return self.send_package(title, series_url, language_id) except: print( u"SJ hat die Serien-API angepasst. Breche Download-Prüfung ab!" ) def send_package(self, title, series_url, language_id): englisch = "" if language_id == 2: englisch = "/Englisch" if self.filename == 'SJ_Serien_Regex': link_placeholder = '[Episode/RegEx' + englisch + '] - ' elif self.filename == 'SJ_Serien': link_placeholder = '[Episode' + englisch + '] - ' elif self.filename == 'SJ_Staffeln_Regex]': link_placeholder = '[Staffel/RegEx' + englisch + '] - ' else: link_placeholder = '[Staffel' + englisch + '] - ' try: storage = self.db.retrieve_all(title) except Exception as e: self.log_debug("Fehler bei Datenbankzugriff: %s, Grund: %s" % (e, title)) return if 'added' in storage or 'notdl' in storage: self.log_debug(title + " - Release ignoriert (bereits gefunden)") else: download = add_decrypt(title, series_url, self.sj, self.dbfile) if download: self.db.store(title, 'added') log_entry = link_placeholder + title + ' - [SJ]' self.log_info(log_entry) notify(["[Click'n'Load notwendig] - " + log_entry], self.configfile) return log_entry def periodical_task(self): if not self.sj: return self.device if self.filename == 'SJ_Serien_Regex': if not self.config.get('regex'): self.log_debug("Suche für SJ-Regex deaktiviert!") return self.device elif self.filename == 'SJ_Staffeln_Regex': if not self.config.get('regex'): self.log_debug("Suche für SJ-Regex deaktiviert!") return self.device elif self.filename == 'MB_Staffeln': if not self.config.get('crawlseasons'): self.log_debug("Suche für SJ-Staffeln deaktiviert!") return self.device if self.empty_list: self.log_debug("Liste ist leer. Stoppe Suche für Serien!" + self.listtype) return self.device try: reject = self.config.get("rejectlist").replace( ",", "|").lower() if len( self.config.get("rejectlist")) > 0 else r"^unmatchable$" except TypeError: reject = r"^unmatchable$" set_sj = self.settings_hash(False) header = False response = False while self.day < 8: if self.last_set_sj == set_sj: try: response = get_url_headers( 'https://' + self.sj + '/api/releases/latest/' + str(self.day), self.configfile, self.dbfile, self.headers, self.scraper) self.scraper = response[1] response = response[0] if self.filename == "MB_Staffeln" or self.filename == "SJ_Staffeln_Regex": feed = j_releases_to_feedparser_dict( response.text, "seasons", 'https://' + self.sj, True) else: feed = j_releases_to_feedparser_dict( response.text, "episodes", 'https://' + self.sj, True) except: print(u"SJ hat die Feed-API angepasst. Breche Suche ab!") feed = False if response: if response.status_code == 304: self.log_debug( "SJ-Feed seit letztem Aufruf nicht aktualisiert - breche Suche ab!" ) return self.device header = True else: try: response = get_url( 'https://' + self.sj + '/api/releases/latest/' + str(self.day), self.configfile, self.dbfile, self.scraper) if self.filename == "MB_Staffeln" or self.filename == "SJ_Staffeln_Regex": feed = j_releases_to_feedparser_dict( response, "seasons", 'https://' + self.sj, True) else: feed = j_releases_to_feedparser_dict( response, "episodes", 'https://' + self.sj, True) except: print(u"SJ hat die Feed-API angepasst. Breche Suche ab!") feed = False self.day += 1 if feed and feed.entries: first_post_sj = feed.entries[0] concat_sj = first_post_sj.title + first_post_sj.published + str( self.settings) + str(self.pattern) sha_sj = hashlib.sha256(concat_sj.encode( 'ascii', 'ignore')).hexdigest() else: self.log_debug("Feed ist leer - breche Suche ab!") return False for post in feed.entries: concat = post.title + post.published + \ str(self.settings) + str(self.pattern) sha = hashlib.sha256(concat.encode('ascii', 'ignore')).hexdigest() if sha == self.last_sha_sj: self.log_debug("Feed ab hier bereits gecrawlt (" + post.title + ") - breche Suche ab!") break series_url = post.series_url title = post.title.replace("-", "-") if self.filename == 'SJ_Serien_Regex': if self.config.get("regex"): if '.german.' in title.lower(): language_id = 1 elif self.rsscrawler.get('english'): language_id = 2 else: language_id = 0 if language_id: m = re.search(self.pattern, title.lower()) if not m and "720p" not in title and "1080p" not in title and "2160p" not in title: m = re.search( self.pattern.replace("480p", "."), title.lower()) self.quality = "480p" if m: if "720p" in title.lower(): self.quality = "720p" if "1080p" in title.lower(): self.quality = "1080p" if "2160p" in title.lower(): self.quality = "2160p" m = re.search(reject, title.lower()) if m: self.log_debug( title + " - Release durch Regex gefunden (trotz rejectlist-Einstellung)" ) title = re.sub(r'\[.*\] ', '', post.title) self.parse_download(series_url, title, language_id) else: self.log_debug( "%s - Englische Releases deaktiviert" % title) else: continue elif self.filename == 'SJ_Staffeln_Regex': if self.config.get("regex"): if '.german.' in title.lower(): language_id = 1 elif self.rsscrawler.get('english'): language_id = 2 else: language_id = 0 if language_id: m = re.search(self.pattern, title.lower()) if not m and "720p" not in title and "1080p" not in title and "2160p" not in title: m = re.search( self.pattern.replace("480p", "."), title.lower()) self.quality = "480p" if m: if "720p" in title.lower(): self.quality = "720p" if "1080p" in title.lower(): self.quality = "1080p" if "2160p" in title.lower(): self.quality = "2160p" m = re.search(reject, title.lower()) if m: self.log_debug( title + " - Release durch Regex gefunden (trotz rejectlist-Einstellung)" ) title = re.sub(r'\[.*\] ', '', post.title) self.parse_download(series_url, title, language_id) else: self.log_debug( "%s - Englische Releases deaktiviert" % title) else: continue else: if self.config.get("quality") != '480p': m = re.search(self.pattern, title.lower()) if m: if '.german.' in title.lower(): language_id = 1 elif self.rsscrawler.get('english'): language_id = 2 else: language_id = 0 if language_id: mm = re.search(self.quality, title.lower()) if mm: mmm = re.search(reject, title.lower()) if mmm: self.log_debug( title + " - Release ignoriert (basierend auf rejectlist-Einstellung)" ) continue if self.rsscrawler.get("surround"): if not re.match( r'.*\.(DTS|DD\+*51|DD\+*71|AC3\.5\.*1)\..*', title): self.log_debug( title + " - Release ignoriert (kein Mehrkanalton)" ) continue try: storage = self.db.retrieve_all(title) except Exception as e: self.log_debug( "Fehler bei Datenbankzugriff: %s, Grund: %s" % (e, title)) return self.device if 'added' in storage: self.log_debug( title + " - Release ignoriert (bereits gefunden)" ) continue self.parse_download( series_url, title, language_id) else: self.log_debug( "%s - Englische Releases deaktiviert" % title) else: m = re.search(self.pattern, title.lower()) if m: if '.german.' in title.lower(): language_id = 1 elif self.rsscrawler.get('english'): language_id = 2 else: language_id = 0 if language_id: if "720p" in title.lower( ) or "1080p" in title.lower( ) or "2160p" in title.lower(): continue mm = re.search(reject, title.lower()) if mm: self.log_debug( title + " Release ignoriert (basierend auf rejectlist-Einstellung)" ) continue if self.rsscrawler.get("surround"): if not re.match( r'.*\.(DTS|DD\+*51|DD\+*71|AC3\.5\.*1)\..*', title): self.log_debug( title + " - Release ignoriert (kein Mehrkanalton)" ) continue title = re.sub(r'\[.*\] ', '', post.title) try: storage = self.db.retrieve_all(title) except Exception as e: self.log_debug( "Fehler bei Datenbankzugriff: %s, Grund: %s" % (e, title)) return self.device if 'added' in storage: self.log_debug( title + " - Release ignoriert (bereits gefunden)" ) continue self.parse_download( series_url, title, language_id) else: self.log_debug( "%s - Englische Releases deaktiviert" % title) if set_sj: new_set_sj = self.settings_hash(True) if set_sj == new_set_sj: self.cdc.delete("SJSet-" + self.filename) self.cdc.store("SJSet-" + self.filename, set_sj) self.cdc.delete("SJ-" + self.filename) self.cdc.store("SJ-" + self.filename, sha_sj) if header and response: self.cdc.delete("SJHeaders-" + self.filename) self.cdc.store("SJHeaders-" + self.filename, response.headers['date']) return self.device
def get_url_headers(url, configfile, dbfile, headers, scraper=False): config = RssConfig('RSScrawler', configfile) proxy = config.get('proxy') if not scraper: scraper = cloudscraper.create_scraper() db = RssDb(dbfile, 'proxystatus') db_normal = RssDb(dbfile, 'normalstatus') site = check_is_site(url, configfile) if proxy: try: if site and "SJ" in site: if db.retrieve("SJ"): if config.get("fallback") and not db_normal.retrieve("SJ"): return [ scraper.get(url, headers=headers, timeout=30), scraper ] else: return ["", scraper] elif site and "DJ" in site: if db.retrieve("DJ"): if config.get("fallback") and not db_normal.retrieve("DJ"): return [ scraper.get(url, headers=headers, timeout=30), scraper ] else: return ["", scraper] elif site and "SF" in site: if db.retrieve("SF"): if config.get("fallback") and not db_normal.retrieve("SF"): return [ scraper.get(url, headers=headers, timeout=30), scraper ] else: return ["", scraper] elif site and "MB" in site: if db.retrieve("MB"): if config.get("fallback") and not db_normal.retrieve("MB"): return [ scraper.get(url, headers=headers, timeout=30), scraper ] else: return ["", scraper] elif site and "HW" in site: if db.retrieve("HW"): if config.get("fallback") and not db_normal.retrieve("HW"): return [ scraper.get(url, headers=headers, timeout=30), scraper ] else: return ["", scraper] elif site and "FX" in site: if db.retrieve("FX"): if config.get("fallback") and not db_normal.retrieve("FX"): return [ scraper.get(url, headers=headers, timeout=30), scraper ] else: return ["", scraper] elif site and "HS" in site: if db.retrieve("HS"): if config.get("fallback") and not db_normal.retrieve("HS"): return [ scraper.get(url, headers=headers, timeout=30), scraper ] else: return ["", scraper] elif site and "NK" in site: if db.retrieve("NK"): if config.get("fallback") and not db_normal.retrieve("NK"): return [ scraper.get(url, headers=headers, timeout=30), scraper ] else: return ["", scraper] elif site and "DD" in site: if db.retrieve("DD"): if config.get("fallback") and not db_normal.retrieve("DD"): return [ scraper.get(url, headers=headers, timeout=30), scraper ] else: return ["", scraper] elif site and "FC" in site: if db.retrieve("FC"): if config.get("fallback") and not db_normal.retrieve("FC"): return [ scraper.get(url, headers=headers, timeout=30), scraper ] else: return ["", scraper] proxies = {'http': proxy, 'https': proxy} response = scraper.get(url, headers=headers, proxies=proxies, timeout=30) return [response, scraper] except Exception as e: print(u"Fehler beim Abruf von: " + url + " " + str(e)) return ["", scraper] else: try: if site and "SJ" in site and db_normal.retrieve("SJ"): return ["", scraper] elif site and "DJ" in site and db_normal.retrieve("DJ"): return ["", scraper] elif site and "SF" in site and db_normal.retrieve("SF"): return ["", scraper] elif site and "MB" in site and db_normal.retrieve("MB"): return ["", scraper] elif site and "HW" in site and db_normal.retrieve("HW"): return ["", scraper] elif site and "FX" in site and db_normal.retrieve("FX"): return ["", scraper] elif site and "HS" in site and db_normal.retrieve("HS"): return ["", scraper] elif site and "NK" in site and db_normal.retrieve("NK"): return ["", scraper] elif site and "DD" in site and db_normal.retrieve("DD"): return ["", scraper] elif site and "FC" in site and db_normal.retrieve("FC"): return ["", scraper] response = scraper.get(url, headers=headers, timeout=30) return [response, scraper] except Exception as e: print(u"Fehler beim Abruf von: " + url + " " + str(e)) return ["", scraper]
class DD: _INTERNAL_NAME = 'DD' def __init__(self, configfile, dbfile, device, logging, scraper): self.configfile = configfile self.dbfile = dbfile self.device = device self.config = RssConfig(self._INTERNAL_NAME, self.configfile) self.log_info = logging.info self.log_error = logging.error self.log_debug = logging.debug self.scraper = scraper self.db = RssDb(self.dbfile, 'rsscrawler') def periodical_task(self): feeds = self.config.get("feeds") if feeds: added_items = [] feeds = feeds.replace(" ", "").split(',') for feed in feeds: feed = feedparser.parse( get_url(feed, self.configfile, self.dbfile, self.scraper)) for post in feed.entries: key = post.title.replace(" ", ".") epoch = datetime(1970, 1, 1) current_epoch = int(time()) published_format = "%Y-%m-%d %H:%M:%S+00:00" published_timestamp = str(parser.parse(post.published)) published_epoch = int((datetime.strptime( published_timestamp, published_format) - epoch).total_seconds()) if (current_epoch - 1800) > published_epoch: link_pool = post.summary unicode_links = re.findall(r'(http.*)', link_pool) links = [] for link in unicode_links: if check_hoster(link, self.configfile): links.append(str(link)) if self.config.get("hoster_fallback") and not links: for link in unicode_links: links.append(str(link)) storage = self.db.retrieve_all(key) if not links: if 'added' not in storage and 'notdl' not in storage: wrong_hoster = '[DD/Hoster fehlt] - ' + key if 'wrong_hoster' not in storage: self.log_info(wrong_hoster) self.db.store(key, 'wrong_hoster') notify([wrong_hoster], self.configfile) else: self.log_debug(wrong_hoster) elif 'added' in storage: self.log_debug( "%s - Release ignoriert (bereits gefunden)" % key) else: self.device = myjd_download( self.configfile, self.dbfile, self.device, key, "RSScrawler", links, "") if self.device: self.db.store(key, 'added') log_entry = '[Englisch] - ' + key + ' - [DD]' self.log_info(log_entry) notify([log_entry], self.configfile) added_items.append(log_entry) else: self.log_debug( "%s - Releasezeitpunkt weniger als 30 Minuten in der Vergangenheit - wird ignoriert." % key) else: self.log_debug("Liste ist leer. Stoppe Suche für DD!") return self.device
def download_sj(payload, configfile, dbfile): hostnames = RssConfig('Hostnames', configfile) sj = hostnames.get('sj') payload = decode_base64(payload).split("|") href = payload[0] title = payload[1] special = payload[2].strip().replace("None", "") series_url = 'https://' + sj + href series_info = get_url(series_url, configfile, dbfile) series_id = re.findall(r'data-mediaid="(.*?)"', series_info)[0] api_url = 'https://' + sj + '/api/media/' + series_id + '/releases' releases = get_url(api_url, configfile, dbfile) seasons = json.loads(releases) listen = ["SJ_Serien", "MB_Staffeln"] for liste in listen: cont = ListDb(dbfile, liste).retrieve() list_title = sanitize(title) if not cont: cont = "" if list_title not in cont: ListDb(dbfile, liste).store(list_title) config = RssConfig('SJ', configfile) english_ok = RssConfig('RSScrawler', configfile).get("english") quality = config.get('quality') ignore = config.get('rejectlist') result_seasons = {} result_episodes = {} for season in seasons: releases = seasons[season] for release in releases['items']: name = release['name'].encode('ascii', errors='ignore').decode('utf-8') hosters = release['hoster'] try: valid = bool(release['resolution'] == quality) except: valid = re.match(re.compile(r'.*' + quality + r'.*'), name) if valid and special: valid = bool("." + special.lower() + "." in name.lower()) if valid and not english_ok: valid = bool(".german." in name.lower()) if valid: valid = False for hoster in hosters: if hoster and check_hoster( hoster, configfile) or config.get("hoster_fallback"): valid = True if valid: try: ep = release['episode'] if ep: existing = result_episodes.get(season) if existing: for e in existing: if e == ep: if rate(name, ignore) > rate( existing[e], ignore): existing.update({ep: name}) else: existing = {ep: name} result_episodes.update({season: existing}) continue except: pass existing = result_seasons.get(season) dont = False if existing: if rate(name, ignore) < rate(existing, ignore): dont = True if not dont: result_seasons.update({season: name}) try: if result_seasons[season] and result_episodes[season]: del result_episodes[season] except: pass success = False try: if result_seasons[season]: success = True except: try: if result_episodes[season]: success = True except: pass if success: logger.debug(u"Websuche erfolgreich für " + title + " - " + season) else: for release in releases['items']: name = release['name'].encode('ascii', errors='ignore').decode('utf-8') hosters = release['hoster'] valid = True if valid and special: valid = bool("." + special.lower() + "." in name.lower()) if valid and not english_ok: valid = bool(".german." in name.lower()) if valid: valid = False for hoster in hosters: if hoster and check_hoster( hoster, configfile) or config.get("hoster_fallback"): valid = True if valid: try: ep = release['episode'] if ep: existing = result_episodes.get(season) if existing: for e in existing: if e == ep: if rate(name, ignore) > rate( existing[e], ignore): existing.update({ep: name}) else: existing = {ep: name} result_episodes.update({season: existing}) continue except: pass existing = result_seasons.get(season) dont = False if existing: if rate(name, ignore) < rate(existing, ignore): dont = True if not dont: result_seasons.update({season: name}) try: if result_seasons[season] and result_episodes[season]: del result_episodes[season] except: pass logger.debug(u"Websuche erfolgreich für " + title + " - " + season) matches = [] for season in result_seasons: matches.append(result_seasons[season]) for season in result_episodes: for episode in result_episodes[season]: matches.append(result_episodes[season][episode]) notify_array = [] for title in matches: db = RssDb(dbfile, 'rsscrawler') if add_decrypt(title, series_url, sj, dbfile): db.store(title, 'added') log_entry = u'[Suche/Serie] - ' + title + ' - [SJ]' logger.info(log_entry) notify_array.append(log_entry) notify(notify_array, configfile) if not matches: return False return matches
def download_bl(payload, device, configfile, dbfile): hostnames = RssConfig('Hostnames', configfile) mb = hostnames.get('mb') nk = hostnames.get('nk') fc = hostnames.get('fc').replace('www.', '').split('.')[0] payload = decode_base64(payload).split("|") link = payload[0] password = payload[1] url = get_url(link, configfile, dbfile) if not url or "NinjaFirewall 429" in url: return False config = RssConfig('MB', configfile) db = RssDb(dbfile, 'rsscrawler') soup = BeautifulSoup(url, 'lxml') site = check_is_site(link, configfile) if not site: return False else: if "MB" in site: if not fc: print( u"FC Hostname nicht gesetzt. MB kann keine Links finden!") return False key = soup.find("span", {"class": "fn"}).text hosters = soup.find_all("a", href=re.compile(fc)) url_hosters = [] for hoster in hosters: dl = hoster["href"] hoster = hoster.text url_hosters.append([dl, hoster]) elif "HW" in site: if not fc: print( u"FC Hostname nicht gesetzt. MB kann keine Links finden!") return False key = re.findall(r'Permanent Link: (.*?)"', str(soup)).pop() hosters = soup.find_all("a", href=re.compile(fc)) url_hosters = [] for hoster in hosters: dl = hoster["href"] hoster = hoster.text url_hosters.append([dl, hoster]) elif "HS" in site: download = soup.find("div", {"class": "entry-content"}) key = soup.find("h2", {"class": "entry-title"}).text url_hosters = re.findall(r'href="([^"\'>]*)".+?(.+?)<', str(download)) elif "NK" in site: key = soup.find("span", {"class": "subtitle"}).text url_hosters = [] hosters = soup.find_all("a", href=re.compile("/go/")) for hoster in hosters: url_hosters.append( ['https://' + nk + hoster["href"], hoster.text]) elif "FX" in site: key = payload[1] password = payload[2] else: return False links = {} if "MB" in site or "HW" in site or "HS" in site or "NK" in site: for url_hoster in reversed(url_hosters): try: if mb.split('.')[0] not in url_hoster[ 0] and "https://goo.gl/" not in url_hoster[0]: link_hoster = url_hoster[1].lower().replace( 'target="_blank">', '').replace(" ", "-") if check_hoster(link_hoster, configfile): links[link_hoster] = url_hoster[0] except: pass if config.get("hoster_fallback") and not links: for url_hoster in reversed(url_hosters): if mb.split('.')[0] not in url_hoster[ 0] and "https://goo.gl/" not in url_hoster[0]: link_hoster = url_hoster[1].lower().replace( 'target="_blank">', '').replace(" ", "-") links[link_hoster] = url_hoster[0] download_links = list(links.values()) elif "FX" in site: download_links = fx_download_links(url, key, configfile) englisch = False if "*englisch" in key.lower() or "*english" in key.lower(): key = key.replace('*ENGLISCH', '').replace("*Englisch", "").replace( "*ENGLISH", "").replace("*English", "").replace("*", "") englisch = True staffel = re.search(r"s\d{1,2}(-s\d{1,2}|-\d{1,2}|\.)", key.lower()) if config.get('enforcedl') and '.dl.' not in key.lower(): fail = False get_imdb_url = url key_regex = r'<title>' + \ re.escape( key) + r'.*?<\/title>\n.*?<link>(?:(?:.*?\n){1,25}).*?[mM][kK][vV].*?(?:|href=.?http(?:|s):\/\/(?:|www\.)imdb\.com\/title\/(tt[0-9]{7,9}).*?)[iI][mM][dD][bB].*?(?!\d(?:\.|\,)\d)(?:.|.*?)<\/a>' imdb_id = re.findall(key_regex, get_imdb_url) if len(imdb_id) > 0: if not imdb_id[0]: fail = True else: imdb_id = imdb_id[0] else: fail = True if fail: try: search_title = re.findall( r"(.*?)(?:\.(?:(?:19|20)\d{2})|\.German|\.\d{3,4}p|\.S(?:\d{1,3})\.)", key)[0].replace(".", "+") search_url = "http://www.imdb.com/find?q=" + search_title search_page = get_url(search_url, configfile, dbfile) search_results = re.findall( r'<td class="result_text"> <a href="\/title\/(tt[0-9]{7,9})\/\?ref_=fn_al_tt_\d" >(.*?)<\/a>.*? \((\d{4})\)..(.{9})', search_page) total_results = len(search_results) except: return False if staffel: try: imdb_id = search_results[0][0] except: imdb_id = False else: no_series = False while total_results > 0: attempt = 0 for result in search_results: if result[3] == "TV Series": no_series = False total_results -= 1 attempt += 1 else: no_series = True imdb_id = search_results[attempt][0] total_results = 0 break if no_series is False: logger.debug( "%s - Keine passende Film-IMDB-Seite gefunden" % key) if staffel: filename = 'MB_Staffeln' else: filename = 'MB_Filme' scraper = cloudscraper.create_scraper() blog = BL(configfile, dbfile, device, logging, scraper, filename=filename) if not imdb_id: if not blog.dual_download(key, password): logger.debug("%s - Kein zweisprachiges Release gefunden." % key) else: if isinstance(imdb_id, list): imdb_id = imdb_id.pop() imdb_url = "http://www.imdb.com/title/" + imdb_id details = get_url(imdb_url, configfile, dbfile) if not details: logger.debug("%s - Originalsprache nicht ermittelbar" % key) original_language = re.findall( r"Language:<\/h4>\n.*?\n.*?url'>(.*?)<\/a>", details) if original_language: original_language = original_language[0] if original_language == "German": logger.debug( "%s - Originalsprache ist Deutsch. Breche Suche nach zweisprachigem Release ab!" % key) else: if not blog.dual_download(key, password) and not englisch: logger.debug( "%s - Kein zweisprachiges Release gefunden!" % key) if download_links: if staffel: if myjd_download(configfile, dbfile, device, key, "RSScrawler", download_links, password): db.store( key.replace(".COMPLETE", "").replace(".Complete", ""), 'notdl' if config.get('enforcedl') and '.dl.' not in key.lower() else 'added') log_entry = '[Suche/Staffel] - ' + key.replace( ".COMPLETE", "").replace(".Complete", "") + ' - [' + site + ']' logger.info(log_entry) notify([log_entry], configfile) return True elif '.3d.' in key.lower(): retail = False if config.get('cutoff') and '.COMPLETE.' not in key.lower(): if config.get('enforcedl'): if is_retail(key, '2', dbfile): retail = True if myjd_download(configfile, dbfile, device, key, "RSScrawler/3Dcrawler", download_links, password): db.store( key, 'notdl' if config.get('enforcedl') and '.dl.' not in key.lower() else 'added') log_entry = '[Suche/Film' + ( '/Retail' if retail else "") + '/3D] - ' + key + ' - [' + site + ']' logger.info(log_entry) notify([log_entry], configfile) return True else: retail = False if config.get('cutoff') and '.COMPLETE.' not in key.lower(): if config.get('enforcedl'): if is_retail(key, '1', dbfile): retail = True else: if is_retail(key, '0', dbfile): retail = True if myjd_download(configfile, dbfile, device, key, "RSScrawler", download_links, password): db.store( key, 'notdl' if config.get('enforcedl') and '.dl.' not in key.lower() else 'added') log_entry = '[Suche/Film' + ( '/Englisch' if englisch and not retail else '') + ('/Englisch/Retail' if englisch and retail else '') + ('/Retail' if not englisch and retail else '') + '] - ' + key + ' - [' + site + ']' logger.info(log_entry) notify([log_entry], configfile) return [key] else: return False