예제 #1
0
def post_url(url, configfile, dbfile, data, scraper=False):
    config = RssConfig('RSScrawler', configfile)
    proxy = config.get('proxy')
    if not scraper:
        scraper = cloudscraper.create_scraper()

    db = RssDb(dbfile, 'proxystatus')
    db_normal = RssDb(dbfile, 'normalstatus')
    site = check_is_site(url, configfile)

    # Temporary fix for FX
    if site and "FX" in site:
        scraper = requests.session()
        scraper.headers = scraper.headers
        scraper.cookies = scraper.cookies
        scraper.verify = False

    if proxy:
        try:
            if site and "SJ" in site:
                if db.retrieve("SJ"):
                    if config.get("fallback") and not db_normal.retrieve("SJ"):
                        return scraper.post(url, data, timeout=30).content
                    else:
                        return ""
            elif site and "DJ" in site:
                if db.retrieve("DJ"):
                    if config.get("fallback") and not db_normal.retrieve("DJ"):
                        return scraper.post(url, data, timeout=30).content
                    else:
                        return ""
            elif site and "SF" in site:
                if db.retrieve("SF"):
                    if config.get("fallback") and not db_normal.retrieve("SF"):
                        return scraper.post(url, data, timeout=30).content
                    else:
                        return ""
            elif site and "MB" in site:
                if db.retrieve("MB"):
                    if config.get("fallback") and not db_normal.retrieve("MB"):
                        return scraper.post(url, data, timeout=30).content
                    else:
                        return ""
            elif site and "HW" in site:
                if db.retrieve("HW"):
                    if config.get("fallback") and not db_normal.retrieve("HW"):
                        return scraper.post(url, data, timeout=30).content
                    else:
                        return ""
            elif site and "FX" in site:
                if db.retrieve("FX"):
                    if config.get("fallback") and not db_normal.retrieve("FX"):
                        return scraper.post(url, data, timeout=30).content
                    else:
                        return ""
            elif site and "HS" in site:
                if db.retrieve("HS"):
                    if config.get("fallback") and not db_normal.retrieve("HS"):
                        return scraper.post(url, data, timeout=30).content
                    else:
                        return ""
            elif site and "NK" in site:
                if db.retrieve("NK"):
                    if config.get("fallback") and not db_normal.retrieve("NK"):
                        return scraper.post(url, data, timeout=30).content
                    else:
                        return ""
            elif site and "DD" in site:
                if db.retrieve("DD"):
                    if config.get("fallback") and not db_normal.retrieve("DD"):
                        return scraper.post(url, data, timeout=30).content
                    else:
                        return ""
            elif site and "FC" in site:
                if db.retrieve("FC"):
                    if config.get("fallback") and not db_normal.retrieve("FC"):
                        return scraper.post(url, data, timeout=30).content
                    else:
                        return ""
            proxies = {'http': proxy, 'https': proxy}
            response = scraper.post(url, data, proxies=proxies,
                                    timeout=30).content
            return response
        except Exception as e:
            print(u"Fehler beim Abruf von: " + url + " " + str(e))
            return ""
    else:
        try:
            if site and "SJ" in site and db_normal.retrieve("SJ"):
                return ""
            elif site and "DJ" in site and db_normal.retrieve("DJ"):
                return ""
            elif site and "SF" in site and db_normal.retrieve("SF"):
                return ""
            elif site and "MB" in site and db_normal.retrieve("MB"):
                return ""
            elif site and "HW" in site and db_normal.retrieve("HW"):
                return ""
            elif site and "FX" in site and db_normal.retrieve("FX"):
                return ""
            elif site and "HS" in site and db_normal.retrieve("HS"):
                return ""
            elif site and "NK" in site and db_normal.retrieve("NK"):
                return ""
            elif site and "DD" in site and db_normal.retrieve("DD"):
                return ""
            elif site and "FC" in site and db_normal.retrieve("FC"):
                return ""
            response = scraper.post(url, data, timeout=30).content
            return response
        except Exception as e:
            print(u"Fehler beim Abruf von: " + url + " " + str(e))
            return ""
예제 #2
0
파일: sf.py 프로젝트: evilmon/RSScrawler
class SF:
    def __init__(self, configfile, dbfile, device, logging, scraper, filename,
                 internal_name):
        self._INTERNAL_NAME = internal_name
        self.configfile = configfile
        self.dbfile = dbfile
        self.device = device

        self.hostnames = RssConfig('Hostnames', self.configfile)
        self.sf = self.hostnames.get('sf')

        self.config = RssConfig(self._INTERNAL_NAME, self.configfile)
        self.rsscrawler = RssConfig("RSScrawler", self.configfile)
        self.hevc_retail = self.config.get("hevc_retail")
        self.retail_only = self.config.get("retail_only")
        self.hoster_fallback = self.config.get("hoster_fallback")
        self.hosters = RssConfig("Hosters", configfile).get_section()
        self.log_info = logging.info
        self.log_error = logging.error
        self.log_debug = logging.debug
        self.scraper = scraper
        self.filename = filename
        self.db = RssDb(self.dbfile, 'rsscrawler')
        self.quality = self.config.get("quality")
        self.cdc = RssDb(self.dbfile, 'cdc')
        self.last_set_sf = self.cdc.retrieve("SFSet-" + self.filename)
        self.last_sha_sf = self.cdc.retrieve("SF-" + self.filename)
        self.headers = {
            'If-Modified-Since':
            str(self.cdc.retrieve("SFHeaders-" + self.filename))
        }
        settings = [
            "quality", "rejectlist", "regex", "hevc_retail", "retail_only",
            "hoster_fallback"
        ]
        self.settings = []
        self.settings.append(self.rsscrawler.get("english"))
        self.settings.append(self.rsscrawler.get("surround"))
        self.settings.append(self.hosters)
        for s in settings:
            self.settings.append(self.config.get(s))

        self.empty_list = False
        if self.filename == 'SJ_Staffeln_Regex':
            self.level = 3
        elif self.filename == 'MB_Staffeln':
            self.seasonssource = self.config.get('seasonssource').lower()
            self.level = 2
        elif self.filename == 'SJ_Serien_Regex':
            self.level = 1
        else:
            self.level = 0

        self.pattern = r'^(' + "|".join(
            self.get_series_list(self.filename, self.level)).lower() + ')'
        self.listtype = ""

        self.day = 0

    def settings_hash(self, refresh):
        if refresh:
            settings = [
                "quality", "rejectlist", "regex", "hevc_retail", "retail_only",
                "hoster_fallback"
            ]
            self.settings = []
            self.settings.append(self.rsscrawler.get("english"))
            self.settings.append(self.rsscrawler.get("surround"))
            self.settings.append(self.hosters)
            for s in settings:
                self.settings.append(self.config.get(s))
            self.pattern = r'^(' + "|".join(
                self.get_series_list(self.filename, self.level)).lower() + ')'
        set_sf = str(self.settings) + str(self.pattern)
        return hashlib.sha256(set_sf.encode('ascii', 'ignore')).hexdigest()

    def get_series_list(self, liste, series_type):
        if series_type == 1:
            self.listtype = " (RegEx)"
        elif series_type == 2:
            self.listtype = " (Staffeln)"
        elif series_type == 3:
            self.listtype = " (Staffeln/RegEx)"
        cont = ListDb(self.dbfile, liste).retrieve()
        titles = []
        if cont:
            for title in cont:
                if title:
                    title = title.replace(" ", ".")
                    titles.append(title)
        if not titles:
            self.empty_list = True
        return titles

    def parse_download(self, series_url, title, language_id):
        if not check_valid_release(title, self.retail_only, self.hevc_retail,
                                   self.dbfile):
            self.log_debug(
                title +
                u" - Release ignoriert (Gleiche oder bessere Quelle bereits vorhanden)"
            )
            return
        if self.filename == 'MB_Staffeln':
            if not self.config.get("seasonpacks"):
                staffelpack = re.search(r"s\d.*(-|\.).*s\d", title.lower())
                if staffelpack:
                    self.log_debug("%s - Release ignoriert (Staffelpaket)" %
                                   title)
                    return
            if not re.search(self.seasonssource, title.lower()):
                self.log_debug(title + " - Release hat falsche Quelle")
                return
        try:
            if language_id == 2:
                lang = 'EN'
            else:
                lang = 'DE'
            epoch = str(datetime.datetime.now().timestamp()).replace('.',
                                                                     '')[:-3]
            api_url = series_url + '?lang=' + lang + '&_=' + epoch
            response = get_url(api_url, self.configfile, self.dbfile,
                               self.scraper)
            info = json.loads(response)

            is_episode = re.findall(r'.*\.(s\d{1,3}e\d{1,3})\..*', title,
                                    re.IGNORECASE)
            if is_episode:
                episode_string = re.findall(r'.*S\d{1,3}(E\d{1,3}).*',
                                            is_episode[0])[0].lower()
                season_string = re.findall(r'.*(S\d{1,3})E\d{1,3}.*',
                                           is_episode[0])[0].lower()
                season_title = rreplace(
                    title.lower().replace(episode_string, ''), "-", ".*",
                    1).lower()
                season_title = season_title.replace(".untouched",
                                                    ".*").replace(
                                                        ".dd+51", ".dd.51")
                episode = str(int(episode_string.replace("e", "")))
                season = str(int(season_string.replace("s", "")))
                episode_name = re.findall(r'.*\.s\d{1,3}(\..*).german',
                                          season_title, re.IGNORECASE)
                if episode_name:
                    season_title = season_title.replace(episode_name[0], '')
                codec_tags = [".h264", ".x264"]
                for tag in codec_tags:
                    season_title = season_title.replace(tag, ".*264")
                web_tags = [".web-rip", ".webrip", ".webdl", ".web-dl"]
                for tag in web_tags:
                    season_title = season_title.replace(tag, ".web.*")
            else:
                season = False
                episode = False
                season_title = title
                multiple_episodes = re.findall(r'(e\d{1,3}-e*\d{1,3}\.)',
                                               season_title, re.IGNORECASE)
                if multiple_episodes:
                    season_title = season_title.replace(
                        multiple_episodes[0], '.*')

            content = BeautifulSoup(info['html'], 'lxml')
            releases = content.find(
                "small", text=re.compile(season_title,
                                         re.IGNORECASE)).parent.parent.parent
            links = releases.findAll("div", {'class': 'row'})[1].findAll('a')
            valid = False
            for link in links:
                download_link = link['href']
                if check_hoster(link.text.replace('\n', ''), self.configfile):
                    valid = True
                    break
            if not valid and not self.hoster_fallback:
                storage = self.db.retrieve_all(title)
                if 'added' not in storage and 'notdl' not in storage:
                    wrong_hoster = '[SF/Hoster fehlt] - ' + title
                    if 'wrong_hoster' not in storage:
                        self.log_info(wrong_hoster)
                        self.db.store(title, 'wrong_hoster')
                        notify([wrong_hoster], self.configfile)
                    else:
                        self.log_debug(wrong_hoster)
            else:
                return self.send_package(title, download_link, language_id,
                                         season, episode)
        except:
            print(
                u"SF hat die Serien-API angepasst. Breche Download-Prüfung ab!"
            )

    def send_package(self, title, download_link, language_id, season, episode):
        englisch = ""
        if language_id == 2:
            englisch = "/Englisch"
        if self.filename == 'SJ_Serien_Regex':
            link_placeholder = '[Episode/RegEx' + englisch + '] - '
        elif self.filename == 'SJ_Serien':
            link_placeholder = '[Episode' + englisch + '] - '
        elif self.filename == 'SJ_Staffeln_Regex]':
            link_placeholder = '[Staffel/RegEx' + englisch + '] - '
        else:
            link_placeholder = '[Staffel' + englisch + '] - '
        try:
            storage = self.db.retrieve_all(title)
        except Exception as e:
            self.log_debug("Fehler bei Datenbankzugriff: %s, Grund: %s" %
                           (e, title))
            return

        if 'added' in storage or 'notdl' in storage:
            self.log_debug(title + " - Release ignoriert (bereits gefunden)")
        else:
            download_link = 'https://' + self.sf + download_link
            if season and episode:
                download_link = download_link.replace(
                    '&_=', '&season=' + str(season) + '&episode=' +
                    str(episode) + '&_=')

            download = add_decrypt(title, download_link, self.sf, self.dbfile)
            if download:
                self.db.store(title, 'added')
                log_entry = link_placeholder + title + ' - [SF]'
                self.log_info(log_entry)
                notify(["[Click'n'Load notwendig] - " + log_entry],
                       self.configfile)
                return log_entry

    def periodical_task(self):
        if not self.sf:
            return self.device

        if self.filename == 'SJ_Serien_Regex':
            if not self.config.get('regex'):
                self.log_debug("Suche für SF-Regex deaktiviert!")
                return self.device
        elif self.filename == 'SJ_Staffeln_Regex':
            if not self.config.get('regex'):
                self.log_debug("Suche für SF-Regex deaktiviert!")
                return self.device
        elif self.filename == 'MB_Staffeln':
            if not self.config.get('crawlseasons'):
                self.log_debug("Suche für SF-Staffeln deaktiviert!")
                return self.device
        if self.empty_list:
            self.log_debug("Liste ist leer. Stoppe Suche für Serien!" +
                           self.listtype)
            return self.device
        try:
            reject = self.config.get("rejectlist").replace(
                ",", "|").lower() if len(
                    self.config.get("rejectlist")) > 0 else r"^unmatchable$"
        except TypeError:
            reject = r"^unmatchable$"

        set_sf = self.settings_hash(False)

        header = False
        response = False

        while self.day < 8:
            if self.last_set_sf == set_sf:
                try:
                    delta = (
                        datetime.datetime.now() -
                        datetime.timedelta(days=self.day)).strftime("%Y-%m-%d")
                    response = get_url_headers(
                        'https://' + self.sf + '/updates/' + delta,
                        self.configfile, self.dbfile, self.headers,
                        self.scraper)
                    self.scraper = response[1]
                    response = response[0]
                    if self.filename == "MB_Staffeln" or self.filename == "SJ_Staffeln_Regex":
                        feed = sf_releases_to_feedparser_dict(
                            response.text, "seasons", 'https://' + self.sf,
                            True)
                    else:
                        feed = sf_releases_to_feedparser_dict(
                            response.text, "episodes", 'https://' + self.sf,
                            True)
                except:
                    print(u"SF hat die Feed-API angepasst. Breche Suche ab!")
                    feed = False

                if response:
                    if response.status_code == 304:
                        self.log_debug(
                            "SF-Feed seit letztem Aufruf nicht aktualisiert - breche  Suche ab!"
                        )
                        return self.device
                    header = True
            else:
                try:
                    delta = (
                        datetime.datetime.now() -
                        datetime.timedelta(days=self.day)).strftime("%Y-%m-%d")
                    response = get_url(
                        'https://' + self.sf + '/updates/' + delta,
                        self.configfile, self.dbfile, self.scraper)
                    if self.filename == "MB_Staffeln" or self.filename == "SJ_Staffeln_Regex":
                        feed = sf_releases_to_feedparser_dict(
                            response, "seasons", 'https://' + self.sf, True)
                    else:
                        feed = sf_releases_to_feedparser_dict(
                            response, "episodes", 'https://' + self.sf, True)
                except:
                    print(u"SF hat die Feed-API angepasst. Breche Suche ab!")
                    feed = False

            self.day += 1

            if feed and feed.entries:
                first_post_sf = feed.entries[0]
                concat_sf = first_post_sf.title + first_post_sf.published + str(
                    self.settings) + str(self.pattern)
                sha_sf = hashlib.sha256(concat_sf.encode(
                    'ascii', 'ignore')).hexdigest()
            else:
                self.log_debug("Feed ist leer - breche  Suche ab!")
                return False

            for post in feed.entries:
                concat = post.title + post.published + \
                         str(self.settings) + str(self.pattern)
                sha = hashlib.sha256(concat.encode('ascii',
                                                   'ignore')).hexdigest()
                if sha == self.last_sha_sf:
                    self.log_debug("Feed ab hier bereits gecrawlt (" +
                                   post.title + ") - breche  Suche ab!")
                    break

                series_url = post.series_url
                title = post.title.replace("-", "-")

                if self.filename == 'SJ_Serien_Regex':
                    if self.config.get("regex"):
                        if '.german.' in title.lower():
                            language_id = 1
                        elif self.rsscrawler.get('english'):
                            language_id = 2
                        else:
                            language_id = 0
                        if language_id:
                            m = re.search(self.pattern, title.lower())
                            if not m and "720p" not in title and "1080p" not in title and "2160p" not in title:
                                m = re.search(
                                    self.pattern.replace("480p", "."),
                                    title.lower())
                                self.quality = "480p"
                            if m:
                                if "720p" in title.lower():
                                    self.quality = "720p"
                                if "1080p" in title.lower():
                                    self.quality = "1080p"
                                if "2160p" in title.lower():
                                    self.quality = "2160p"
                                m = re.search(reject, title.lower())
                                if m:
                                    self.log_debug(
                                        title +
                                        " - Release durch Regex gefunden (trotz rejectlist-Einstellung)"
                                    )
                                title = re.sub(r'\[.*\] ', '', post.title)
                                self.parse_download(series_url, title,
                                                    language_id)
                        else:
                            self.log_debug(
                                "%s - Englische Releases deaktiviert" % title)

                    else:
                        continue
                elif self.filename == 'SJ_Staffeln_Regex':
                    if self.config.get("regex"):
                        if '.german.' in title.lower():
                            language_id = 1
                        elif self.rsscrawler.get('english'):
                            language_id = 2
                        else:
                            language_id = 0
                        if language_id:
                            m = re.search(self.pattern, title.lower())
                            if not m and "720p" not in title and "1080p" not in title and "2160p" not in title:
                                m = re.search(
                                    self.pattern.replace("480p", "."),
                                    title.lower())
                                self.quality = "480p"
                            if m:
                                if "720p" in title.lower():
                                    self.quality = "720p"
                                if "1080p" in title.lower():
                                    self.quality = "1080p"
                                if "2160p" in title.lower():
                                    self.quality = "2160p"
                                m = re.search(reject, title.lower())
                                if m:
                                    self.log_debug(
                                        title +
                                        " - Release durch Regex gefunden (trotz rejectlist-Einstellung)"
                                    )
                                title = re.sub(r'\[.*\] ', '', post.title)
                                self.parse_download(series_url, title,
                                                    language_id)
                        else:
                            self.log_debug(
                                "%s - Englische Releases deaktiviert" % title)

                    else:
                        continue
                else:
                    if self.config.get("quality") != '480p':
                        m = re.search(self.pattern, title.lower())
                        if m:
                            if '.german.' in title.lower():
                                language_id = 1
                            elif self.rsscrawler.get('english'):
                                language_id = 2
                            else:
                                language_id = 0
                            if language_id:
                                mm = re.search(self.quality, title.lower())
                                if mm:
                                    mmm = re.search(reject, title.lower())
                                    if mmm:
                                        self.log_debug(
                                            title +
                                            " - Release ignoriert (basierend auf rejectlist-Einstellung)"
                                        )
                                        continue
                                    if self.rsscrawler.get("surround"):
                                        if not re.match(
                                                r'.*\.(DTS|DD\+*51|DD\+*71|AC3\.5\.*1)\..*',
                                                title):
                                            self.log_debug(
                                                title +
                                                " - Release ignoriert (kein Mehrkanalton)"
                                            )
                                            continue
                                    try:
                                        storage = self.db.retrieve_all(title)
                                    except Exception as e:
                                        self.log_debug(
                                            "Fehler bei Datenbankzugriff: %s, Grund: %s"
                                            % (e, title))
                                        return self.device
                                    if 'added' in storage:
                                        self.log_debug(
                                            title +
                                            " - Release ignoriert (bereits gefunden)"
                                        )
                                        continue
                                    self.parse_download(
                                        series_url, title, language_id)
                            else:
                                self.log_debug(
                                    "%s - Englische Releases deaktiviert" %
                                    title)

                        else:
                            m = re.search(self.pattern, title.lower())
                            if m:
                                if '.german.' in title.lower():
                                    language_id = 1
                                elif self.rsscrawler.get('english'):
                                    language_id = 2
                                else:
                                    language_id = 0
                                if language_id:
                                    if "720p" in title.lower(
                                    ) or "1080p" in title.lower(
                                    ) or "2160p" in title.lower():
                                        continue
                                    mm = re.search(reject, title.lower())
                                    if mm:
                                        self.log_debug(
                                            title +
                                            " Release ignoriert (basierend auf rejectlist-Einstellung)"
                                        )
                                        continue
                                    if self.rsscrawler.get("surround"):
                                        if not re.match(
                                                r'.*\.(DTS|DD\+*51|DD\+*71|AC3\.5\.*1)\..*',
                                                title):
                                            self.log_debug(
                                                title +
                                                " - Release ignoriert (kein Mehrkanalton)"
                                            )
                                            continue
                                    title = re.sub(r'\[.*\] ', '', post.title)
                                    try:
                                        storage = self.db.retrieve_all(title)
                                    except Exception as e:
                                        self.log_debug(
                                            "Fehler bei Datenbankzugriff: %s, Grund: %s"
                                            % (e, title))
                                        return self.device
                                    if 'added' in storage:
                                        self.log_debug(
                                            title +
                                            " - Release ignoriert (bereits gefunden)"
                                        )
                                        continue
                                    self.parse_download(
                                        series_url, title, language_id)
                                else:
                                    self.log_debug(
                                        "%s - Englische Releases deaktiviert" %
                                        title)

        if set_sf:
            new_set_sf = self.settings_hash(True)
            if set_sf == new_set_sf:
                self.cdc.delete("SFSet-" + self.filename)
                self.cdc.store("SFSet-" + self.filename, set_sf)
                self.cdc.delete("SF-" + self.filename)
                self.cdc.store("SF-" + self.filename, sha_sf)

        if header and response:
            self.cdc.delete("SFHeaders-" + self.filename)
            self.cdc.store("SFHeaders-" + self.filename,
                           response.headers['date'])

        return self.device
예제 #3
0
파일: ombi.py 프로젝트: evilmon/RSScrawler
def ombi(configfile, dbfile, device, log_debug):
    db = RssDb(dbfile, 'Ombi')
    config = RssConfig('Ombi', configfile)
    url = config.get('url')
    api = config.get('api')

    if not url or not api:
        return device

    english = RssConfig('RSScrawler', configfile).get('english')

    try:
        requested_movies = requests.get(url + '/api/v1/Request/movie',
                                        headers={'ApiKey': api})
        requested_movies = json.loads(requested_movies.text)
        requested_shows = requests.get(url + '/api/v1/Request/tv',
                                       headers={'ApiKey': api})
        requested_shows = json.loads(requested_shows.text)
    except:
        log_debug("Ombi ist nicht erreichbar!")
        return False

    scraper = False

    for r in requested_movies:
        if bool(r.get("approved")):
            if not bool(r.get("available")):
                imdb_id = r.get("imdbId")
                if not db.retrieve('movie_' + str(imdb_id)) == 'added':
                    response = imdb_movie(imdb_id, configfile, dbfile, scraper)
                    title = response[0]
                    if title:
                        scraper = response[1]
                        best_result = search.best_result_bl(
                            title, configfile, dbfile)
                        print(u"Film: " + title + u" durch Ombi hinzugefügt.")
                        if best_result:
                            search.download_bl(best_result, device, configfile,
                                               dbfile)
                        if english:
                            title = r.get('title')
                            best_result = search.best_result_bl(
                                title, configfile, dbfile)
                            print(u"Film: " + title +
                                  u"durch Ombi hinzugefügt.")
                            if best_result:
                                search.download_bl(best_result, device,
                                                   configfile, dbfile)
                        db.store('movie_' + str(imdb_id), 'added')
                    else:
                        log_debug("Titel für IMDB-ID nicht abrufbar: " +
                                  imdb_id)

    for r in requested_shows:
        imdb_id = r.get("imdbId")
        infos = None
        child_requests = r.get("childRequests")
        for cr in child_requests:
            if bool(cr.get("approved")):
                if not bool(cr.get("available")):
                    details = cr.get("seasonRequests")
                    for season in details:
                        sn = season.get("seasonNumber")
                        eps = []
                        episodes = season.get("episodes")
                        for episode in episodes:
                            if not bool(episode.get("available")):
                                enr = episode.get("episodeNumber")
                                s = str(sn)
                                if len(s) == 1:
                                    s = "0" + s
                                s = "S" + s
                                e = str(enr)
                                if len(e) == 1:
                                    e = "0" + e
                                se = s + "E" + e
                                if not db.retrieve('show_' + str(imdb_id) +
                                                   '_' + se) == 'added':
                                    eps.append(enr)
                        if eps:
                            if not infos:
                                infos = imdb_show(imdb_id, configfile, dbfile,
                                                  scraper)
                            if infos:
                                title = infos[0]
                                all_eps = infos[1]
                                scraper = infos[2]
                                check_sn = False
                                if all_eps:
                                    check_sn = all_eps.get(sn)
                                if check_sn:
                                    sn_length = len(eps)
                                    check_sn_length = len(check_sn)
                                    if check_sn_length > sn_length:
                                        for ep in eps:
                                            e = str(ep)
                                            if len(e) == 1:
                                                e = "0" + e
                                            se = s + "E" + e
                                            payload = search.best_result_sj(
                                                title, configfile, dbfile)
                                            if payload:
                                                payload = decode_base64(
                                                    payload).split("|")
                                                payload = encode_base64(
                                                    payload[0] + "|" +
                                                    payload[1] + "|" + se)
                                                added_episode = search.download_sj(
                                                    payload, configfile,
                                                    dbfile)
                                                if not added_episode:
                                                    payload = decode_base64(
                                                        payload).split("|")
                                                    payload = encode_base64(
                                                        payload[0] + "|" +
                                                        payload[1] + "|" + s)
                                                    add_season = search.download_sj(
                                                        payload, configfile,
                                                        dbfile)
                                                    for e in eps:
                                                        e = str(e)
                                                        if len(e) == 1:
                                                            e = "0" + e
                                                        se = s + "E" + e
                                                        db.store(
                                                            'show_' +
                                                            str(imdb_id) +
                                                            '_' + se, 'added')
                                                    if not add_season:
                                                        log_debug(
                                                            u"Konnte kein Release für "
                                                            + title + " " +
                                                            se + "finden.")
                                                    break
                                            db.store(
                                                'show_' + str(imdb_id) + '_' +
                                                se, 'added')
                                    else:
                                        payload = search.best_result_sj(
                                            title, configfile, dbfile)
                                        if payload:
                                            payload = decode_base64(
                                                payload).split("|")
                                            payload = encode_base64(
                                                payload[0] + "|" + payload[1] +
                                                "|" + s)
                                            search.download_sj(
                                                payload, configfile, dbfile)
                                        for ep in eps:
                                            e = str(ep)
                                            if len(e) == 1:
                                                e = "0" + e
                                            se = s + "E" + e
                                            db.store(
                                                'show_' + str(imdb_id) + '_' +
                                                se, 'added')
                                    print(u"Serie/Staffel/Episode: " + title +
                                          u" durch Ombi hinzugefügt.")

    return device
예제 #4
0
def download(configfile,
             dbfile,
             device,
             title,
             subdir,
             old_links,
             password,
             full_path=None,
             autostart=False):
    try:
        if not device or not is_device(device):
            device = get_device(configfile)

        if isinstance(old_links, list):
            links = []
            for link in old_links:
                if link not in links:
                    links.append(link)
        else:
            links = [old_links]

        links = str(links).replace(" ", "")
        crawljobs = RssConfig('Crawljobs', configfile)
        usesubdir = crawljobs.get("subdir")
        priority = "DEFAULT"

        if full_path:
            path = full_path
        else:
            if usesubdir:
                path = subdir + "/<jd:packagename>"
            else:
                path = "<jd:packagename>"
        if "Remux" in path:
            priority = "LOWER"

        try:
            device.linkgrabber.add_links(
                params=[{
                    "autostart": autostart,
                    "links": links,
                    "packageName": title,
                    "extractPassword": password,
                    "priority": priority,
                    "downloadPassword": password,
                    "destinationFolder": path,
                    "comment": "RSScrawler by rix1337",
                    "overwritePackagizerRules": False
                }])
        except rsscrawler.myjdapi.TokenExpiredException:
            device = get_device(configfile)
            if not device or not is_device(device):
                return False
            device.linkgrabber.add_links(
                params=[{
                    "autostart": autostart,
                    "links": links,
                    "packageName": title,
                    "extractPassword": password,
                    "priority": priority,
                    "downloadPassword": password,
                    "destinationFolder": path,
                    "comment": "RSScrawler by rix1337",
                    "overwritePackagizerRules": False
                }])
        db = RssDb(dbfile, 'crawldog')
        if db.retrieve(title):
            db.delete(title)
            db.store(title, 'retried')
        else:
            db.store(title, 'added')
        return device
    except rsscrawler.myjdapi.MYJDException as e:
        print(u"Fehler bei der Verbindung mit MyJDownloader: " + str(e))
        return False
예제 #5
0
class SJ:
    def __init__(self, configfile, dbfile, device, logging, scraper, filename,
                 internal_name):
        self._INTERNAL_NAME = internal_name
        self.configfile = configfile
        self.dbfile = dbfile
        self.device = device

        self.hostnames = RssConfig('Hostnames', self.configfile)
        self.sj = self.hostnames.get('sj')

        self.config = RssConfig(self._INTERNAL_NAME, self.configfile)
        self.rsscrawler = RssConfig("RSScrawler", self.configfile)
        self.hevc_retail = self.config.get("hevc_retail")
        self.retail_only = self.config.get("retail_only")
        self.hoster_fallback = self.config.get("hoster_fallback")
        self.hosters = RssConfig("Hosters", configfile).get_section()
        self.log_info = logging.info
        self.log_error = logging.error
        self.log_debug = logging.debug
        self.scraper = scraper
        self.filename = filename
        self.db = RssDb(self.dbfile, 'rsscrawler')
        self.quality = self.config.get("quality")
        self.cdc = RssDb(self.dbfile, 'cdc')
        self.last_set_sj = self.cdc.retrieve("SJSet-" + self.filename)
        self.last_sha_sj = self.cdc.retrieve("SJ-" + self.filename)
        self.headers = {
            'If-Modified-Since':
            str(self.cdc.retrieve("SJHeaders-" + self.filename))
        }
        settings = [
            "quality", "rejectlist", "regex", "hevc_retail", "retail_only",
            "hoster_fallback"
        ]
        self.settings = []
        self.settings.append(self.rsscrawler.get("english"))
        self.settings.append(self.rsscrawler.get("surround"))
        self.settings.append(self.hosters)
        for s in settings:
            self.settings.append(self.config.get(s))

        self.empty_list = False
        if self.filename == 'SJ_Staffeln_Regex':
            self.level = 3
        elif self.filename == 'MB_Staffeln':
            self.seasonssource = self.config.get('seasonssource').lower()
            self.level = 2
        elif self.filename == 'SJ_Serien_Regex':
            self.level = 1
        else:
            self.level = 0

        self.pattern = r'^(' + "|".join(
            self.get_series_list(self.filename, self.level)).lower() + ')'
        self.listtype = ""

        self.day = 0

    def settings_hash(self, refresh):
        if refresh:
            settings = [
                "quality", "rejectlist", "regex", "hevc_retail", "retail_only",
                "hoster_fallback"
            ]
            self.settings = []
            self.settings.append(self.rsscrawler.get("english"))
            self.settings.append(self.rsscrawler.get("surround"))
            self.settings.append(self.hosters)
            for s in settings:
                self.settings.append(self.config.get(s))
            self.pattern = r'^(' + "|".join(
                self.get_series_list(self.filename, self.level)).lower() + ')'
        set_sj = str(self.settings) + str(self.pattern)
        return hashlib.sha256(set_sj.encode('ascii', 'ignore')).hexdigest()

    def get_series_list(self, liste, series_type):
        if series_type == 1:
            self.listtype = " (RegEx)"
        elif series_type == 2:
            self.listtype = " (Staffeln)"
        elif series_type == 3:
            self.listtype = " (Staffeln/RegEx)"
        cont = ListDb(self.dbfile, liste).retrieve()
        titles = []
        if cont:
            for title in cont:
                if title:
                    title = title.replace(" ", ".")
                    titles.append(title)
        if not titles:
            self.empty_list = True
        return titles

    def parse_download(self, series_url, title, language_id):
        if not check_valid_release(title, self.retail_only, self.hevc_retail,
                                   self.dbfile):
            self.log_debug(
                title +
                u" - Release ignoriert (Gleiche oder bessere Quelle bereits vorhanden)"
            )
            return
        if self.filename == 'MB_Staffeln':
            if not self.config.get("seasonpacks"):
                staffelpack = re.search(r"s\d.*(-|\.).*s\d", title.lower())
                if staffelpack:
                    self.log_debug("%s - Release ignoriert (Staffelpaket)" %
                                   title)
                    return
            if not re.search(self.seasonssource, title.lower()):
                self.log_debug(title + " - Release hat falsche Quelle")
                return
        try:
            series_info = get_url(series_url, self.configfile, self.dbfile)
            series_id = re.findall(r'data-mediaid="(.*?)"', series_info)[0]
            api_url = 'https://' + self.sj + '/api/media/' + series_id + '/releases'

            response = get_url(api_url, self.configfile, self.dbfile,
                               self.scraper)
            seasons = json.loads(response)
            for season in seasons:
                season = seasons[season]
                for item in season['items']:
                    if item['name'] == title:
                        valid = False
                        for hoster in item['hoster']:
                            if hoster:
                                if check_hoster(hoster, self.configfile):
                                    valid = True
                        if not valid and not self.hoster_fallback:
                            storage = self.db.retrieve_all(title)
                            if 'added' not in storage and 'notdl' not in storage:
                                wrong_hoster = '[SJ/Hoster fehlt] - ' + title
                                if 'wrong_hoster' not in storage:
                                    print(wrong_hoster)
                                    self.db.store(title, 'wrong_hoster')
                                    notify([wrong_hoster], self.configfile)
                                else:
                                    self.log_debug(wrong_hoster)
                        else:
                            return self.send_package(title, series_url,
                                                     language_id)
        except:
            print(
                u"SJ hat die Serien-API angepasst. Breche Download-Prüfung ab!"
            )

    def send_package(self, title, series_url, language_id):
        englisch = ""
        if language_id == 2:
            englisch = "/Englisch"
        if self.filename == 'SJ_Serien_Regex':
            link_placeholder = '[Episode/RegEx' + englisch + '] - '
        elif self.filename == 'SJ_Serien':
            link_placeholder = '[Episode' + englisch + '] - '
        elif self.filename == 'SJ_Staffeln_Regex]':
            link_placeholder = '[Staffel/RegEx' + englisch + '] - '
        else:
            link_placeholder = '[Staffel' + englisch + '] - '
        try:
            storage = self.db.retrieve_all(title)
        except Exception as e:
            self.log_debug("Fehler bei Datenbankzugriff: %s, Grund: %s" %
                           (e, title))
            return

        if 'added' in storage or 'notdl' in storage:
            self.log_debug(title + " - Release ignoriert (bereits gefunden)")
        else:
            download = add_decrypt(title, series_url, self.sj, self.dbfile)
            if download:
                self.db.store(title, 'added')
                log_entry = link_placeholder + title + ' - [SJ]'
                self.log_info(log_entry)
                notify(["[Click'n'Load notwendig] - " + log_entry],
                       self.configfile)
                return log_entry

    def periodical_task(self):
        if not self.sj:
            return self.device

        if self.filename == 'SJ_Serien_Regex':
            if not self.config.get('regex'):
                self.log_debug("Suche für SJ-Regex deaktiviert!")
                return self.device
        elif self.filename == 'SJ_Staffeln_Regex':
            if not self.config.get('regex'):
                self.log_debug("Suche für SJ-Regex deaktiviert!")
                return self.device
        elif self.filename == 'MB_Staffeln':
            if not self.config.get('crawlseasons'):
                self.log_debug("Suche für SJ-Staffeln deaktiviert!")
                return self.device
        if self.empty_list:
            self.log_debug("Liste ist leer. Stoppe Suche für Serien!" +
                           self.listtype)
            return self.device
        try:
            reject = self.config.get("rejectlist").replace(
                ",", "|").lower() if len(
                    self.config.get("rejectlist")) > 0 else r"^unmatchable$"
        except TypeError:
            reject = r"^unmatchable$"

        set_sj = self.settings_hash(False)

        header = False
        response = False

        while self.day < 8:
            if self.last_set_sj == set_sj:
                try:
                    response = get_url_headers(
                        'https://' + self.sj + '/api/releases/latest/' +
                        str(self.day), self.configfile, self.dbfile,
                        self.headers, self.scraper)
                    self.scraper = response[1]
                    response = response[0]
                    if self.filename == "MB_Staffeln" or self.filename == "SJ_Staffeln_Regex":
                        feed = j_releases_to_feedparser_dict(
                            response.text, "seasons", 'https://' + self.sj,
                            True)
                    else:
                        feed = j_releases_to_feedparser_dict(
                            response.text, "episodes", 'https://' + self.sj,
                            True)
                except:
                    print(u"SJ hat die Feed-API angepasst. Breche Suche ab!")
                    feed = False

                if response:
                    if response.status_code == 304:
                        self.log_debug(
                            "SJ-Feed seit letztem Aufruf nicht aktualisiert - breche  Suche ab!"
                        )
                        return self.device
                    header = True
            else:
                try:
                    response = get_url(
                        'https://' + self.sj + '/api/releases/latest/' +
                        str(self.day), self.configfile, self.dbfile,
                        self.scraper)
                    if self.filename == "MB_Staffeln" or self.filename == "SJ_Staffeln_Regex":
                        feed = j_releases_to_feedparser_dict(
                            response, "seasons", 'https://' + self.sj, True)
                    else:
                        feed = j_releases_to_feedparser_dict(
                            response, "episodes", 'https://' + self.sj, True)
                except:
                    print(u"SJ hat die Feed-API angepasst. Breche Suche ab!")
                    feed = False

            self.day += 1

            if feed and feed.entries:
                first_post_sj = feed.entries[0]
                concat_sj = first_post_sj.title + first_post_sj.published + str(
                    self.settings) + str(self.pattern)
                sha_sj = hashlib.sha256(concat_sj.encode(
                    'ascii', 'ignore')).hexdigest()
            else:
                self.log_debug("Feed ist leer - breche  Suche ab!")
                return False

            for post in feed.entries:
                concat = post.title + post.published + \
                         str(self.settings) + str(self.pattern)
                sha = hashlib.sha256(concat.encode('ascii',
                                                   'ignore')).hexdigest()
                if sha == self.last_sha_sj:
                    self.log_debug("Feed ab hier bereits gecrawlt (" +
                                   post.title + ") - breche  Suche ab!")
                    break

                series_url = post.series_url
                title = post.title.replace("-", "-")

                if self.filename == 'SJ_Serien_Regex':
                    if self.config.get("regex"):
                        if '.german.' in title.lower():
                            language_id = 1
                        elif self.rsscrawler.get('english'):
                            language_id = 2
                        else:
                            language_id = 0
                        if language_id:
                            m = re.search(self.pattern, title.lower())
                            if not m and "720p" not in title and "1080p" not in title and "2160p" not in title:
                                m = re.search(
                                    self.pattern.replace("480p", "."),
                                    title.lower())
                                self.quality = "480p"
                            if m:
                                if "720p" in title.lower():
                                    self.quality = "720p"
                                if "1080p" in title.lower():
                                    self.quality = "1080p"
                                if "2160p" in title.lower():
                                    self.quality = "2160p"
                                m = re.search(reject, title.lower())
                                if m:
                                    self.log_debug(
                                        title +
                                        " - Release durch Regex gefunden (trotz rejectlist-Einstellung)"
                                    )
                                title = re.sub(r'\[.*\] ', '', post.title)
                                self.parse_download(series_url, title,
                                                    language_id)
                        else:
                            self.log_debug(
                                "%s - Englische Releases deaktiviert" % title)

                    else:
                        continue
                elif self.filename == 'SJ_Staffeln_Regex':
                    if self.config.get("regex"):
                        if '.german.' in title.lower():
                            language_id = 1
                        elif self.rsscrawler.get('english'):
                            language_id = 2
                        else:
                            language_id = 0
                        if language_id:
                            m = re.search(self.pattern, title.lower())
                            if not m and "720p" not in title and "1080p" not in title and "2160p" not in title:
                                m = re.search(
                                    self.pattern.replace("480p", "."),
                                    title.lower())
                                self.quality = "480p"
                            if m:
                                if "720p" in title.lower():
                                    self.quality = "720p"
                                if "1080p" in title.lower():
                                    self.quality = "1080p"
                                if "2160p" in title.lower():
                                    self.quality = "2160p"
                                m = re.search(reject, title.lower())
                                if m:
                                    self.log_debug(
                                        title +
                                        " - Release durch Regex gefunden (trotz rejectlist-Einstellung)"
                                    )
                                title = re.sub(r'\[.*\] ', '', post.title)
                                self.parse_download(series_url, title,
                                                    language_id)
                        else:
                            self.log_debug(
                                "%s - Englische Releases deaktiviert" % title)

                    else:
                        continue
                else:
                    if self.config.get("quality") != '480p':
                        m = re.search(self.pattern, title.lower())
                        if m:
                            if '.german.' in title.lower():
                                language_id = 1
                            elif self.rsscrawler.get('english'):
                                language_id = 2
                            else:
                                language_id = 0
                            if language_id:
                                mm = re.search(self.quality, title.lower())
                                if mm:
                                    mmm = re.search(reject, title.lower())
                                    if mmm:
                                        self.log_debug(
                                            title +
                                            " - Release ignoriert (basierend auf rejectlist-Einstellung)"
                                        )
                                        continue
                                    if self.rsscrawler.get("surround"):
                                        if not re.match(
                                                r'.*\.(DTS|DD\+*51|DD\+*71|AC3\.5\.*1)\..*',
                                                title):
                                            self.log_debug(
                                                title +
                                                " - Release ignoriert (kein Mehrkanalton)"
                                            )
                                            continue
                                    try:
                                        storage = self.db.retrieve_all(title)
                                    except Exception as e:
                                        self.log_debug(
                                            "Fehler bei Datenbankzugriff: %s, Grund: %s"
                                            % (e, title))
                                        return self.device
                                    if 'added' in storage:
                                        self.log_debug(
                                            title +
                                            " - Release ignoriert (bereits gefunden)"
                                        )
                                        continue
                                    self.parse_download(
                                        series_url, title, language_id)
                            else:
                                self.log_debug(
                                    "%s - Englische Releases deaktiviert" %
                                    title)

                        else:
                            m = re.search(self.pattern, title.lower())
                            if m:
                                if '.german.' in title.lower():
                                    language_id = 1
                                elif self.rsscrawler.get('english'):
                                    language_id = 2
                                else:
                                    language_id = 0
                                if language_id:
                                    if "720p" in title.lower(
                                    ) or "1080p" in title.lower(
                                    ) or "2160p" in title.lower():
                                        continue
                                    mm = re.search(reject, title.lower())
                                    if mm:
                                        self.log_debug(
                                            title +
                                            " Release ignoriert (basierend auf rejectlist-Einstellung)"
                                        )
                                        continue
                                    if self.rsscrawler.get("surround"):
                                        if not re.match(
                                                r'.*\.(DTS|DD\+*51|DD\+*71|AC3\.5\.*1)\..*',
                                                title):
                                            self.log_debug(
                                                title +
                                                " - Release ignoriert (kein Mehrkanalton)"
                                            )
                                            continue
                                    title = re.sub(r'\[.*\] ', '', post.title)
                                    try:
                                        storage = self.db.retrieve_all(title)
                                    except Exception as e:
                                        self.log_debug(
                                            "Fehler bei Datenbankzugriff: %s, Grund: %s"
                                            % (e, title))
                                        return self.device
                                    if 'added' in storage:
                                        self.log_debug(
                                            title +
                                            " - Release ignoriert (bereits gefunden)"
                                        )
                                        continue
                                    self.parse_download(
                                        series_url, title, language_id)
                                else:
                                    self.log_debug(
                                        "%s - Englische Releases deaktiviert" %
                                        title)

        if set_sj:
            new_set_sj = self.settings_hash(True)
            if set_sj == new_set_sj:
                self.cdc.delete("SJSet-" + self.filename)
                self.cdc.store("SJSet-" + self.filename, set_sj)
                self.cdc.delete("SJ-" + self.filename)
                self.cdc.store("SJ-" + self.filename, sha_sj)

        if header and response:
            self.cdc.delete("SJHeaders-" + self.filename)
            self.cdc.store("SJHeaders-" + self.filename,
                           response.headers['date'])

        return self.device
예제 #6
0
파일: url.py 프로젝트: 9Mad-Max5/RSScrawler
def get_url_headers(url, configfile, dbfile, headers, scraper=False):
    config = RssConfig('RSScrawler', configfile)
    proxy = config.get('proxy')
    if not scraper:
        scraper = cloudscraper.create_scraper()

    db = RssDb(dbfile, 'proxystatus')
    db_normal = RssDb(dbfile, 'normalstatus')
    site = check_is_site(url, configfile)

    if proxy:
        try:
            if site and "SJ" in site:
                if db.retrieve("SJ"):
                    if config.get("fallback") and not db_normal.retrieve("SJ"):
                        return [
                            scraper.get(url, headers=headers, timeout=30),
                            scraper
                        ]
                    else:
                        return ["", scraper]
            elif site and "DJ" in site:
                if db.retrieve("DJ"):
                    if config.get("fallback") and not db_normal.retrieve("DJ"):
                        return [
                            scraper.get(url, headers=headers, timeout=30),
                            scraper
                        ]
                    else:
                        return ["", scraper]
            elif site and "SF" in site:
                if db.retrieve("SF"):
                    if config.get("fallback") and not db_normal.retrieve("SF"):
                        return [
                            scraper.get(url, headers=headers, timeout=30),
                            scraper
                        ]
                    else:
                        return ["", scraper]
            elif site and "MB" in site:
                if db.retrieve("MB"):
                    if config.get("fallback") and not db_normal.retrieve("MB"):
                        return [
                            scraper.get(url, headers=headers, timeout=30),
                            scraper
                        ]
                    else:
                        return ["", scraper]
            elif site and "HW" in site:
                if db.retrieve("HW"):
                    if config.get("fallback") and not db_normal.retrieve("HW"):
                        return [
                            scraper.get(url, headers=headers, timeout=30),
                            scraper
                        ]
                    else:
                        return ["", scraper]
            elif site and "FX" in site:
                if db.retrieve("FX"):
                    if config.get("fallback") and not db_normal.retrieve("FX"):
                        return [
                            scraper.get(url, headers=headers, timeout=30),
                            scraper
                        ]
                    else:
                        return ["", scraper]
            elif site and "HS" in site:
                if db.retrieve("HS"):
                    if config.get("fallback") and not db_normal.retrieve("HS"):
                        return [
                            scraper.get(url, headers=headers, timeout=30),
                            scraper
                        ]
                    else:
                        return ["", scraper]
            elif site and "NK" in site:
                if db.retrieve("NK"):
                    if config.get("fallback") and not db_normal.retrieve("NK"):
                        return [
                            scraper.get(url, headers=headers, timeout=30),
                            scraper
                        ]
                    else:
                        return ["", scraper]
            elif site and "DD" in site:
                if db.retrieve("DD"):
                    if config.get("fallback") and not db_normal.retrieve("DD"):
                        return [
                            scraper.get(url, headers=headers, timeout=30),
                            scraper
                        ]
                    else:
                        return ["", scraper]
            elif site and "FC" in site:
                if db.retrieve("FC"):
                    if config.get("fallback") and not db_normal.retrieve("FC"):
                        return [
                            scraper.get(url, headers=headers, timeout=30),
                            scraper
                        ]
                    else:
                        return ["", scraper]
            proxies = {'http': proxy, 'https': proxy}
            response = scraper.get(url,
                                   headers=headers,
                                   proxies=proxies,
                                   timeout=30)
            return [response, scraper]
        except Exception as e:
            print(u"Fehler beim Abruf von: " + url + " " + str(e))
            return ["", scraper]
    else:
        try:
            if site and "SJ" in site and db_normal.retrieve("SJ"):
                return ["", scraper]
            elif site and "DJ" in site and db_normal.retrieve("DJ"):
                return ["", scraper]
            elif site and "SF" in site and db_normal.retrieve("SF"):
                return ["", scraper]
            elif site and "MB" in site and db_normal.retrieve("MB"):
                return ["", scraper]
            elif site and "HW" in site and db_normal.retrieve("HW"):
                return ["", scraper]
            elif site and "FX" in site and db_normal.retrieve("FX"):
                return ["", scraper]
            elif site and "HS" in site and db_normal.retrieve("HS"):
                return ["", scraper]
            elif site and "NK" in site and db_normal.retrieve("NK"):
                return ["", scraper]
            elif site and "DD" in site and db_normal.retrieve("DD"):
                return ["", scraper]
            elif site and "FC" in site and db_normal.retrieve("FC"):
                return ["", scraper]
            response = scraper.get(url, headers=headers, timeout=30)
            return [response, scraper]
        except Exception as e:
            print(u"Fehler beim Abruf von: " + url + " " + str(e))
            return ["", scraper]