Esempio n. 1
0
    def __init__(self, configfile, dbfile, device, logging, scraper, filename,
                 internal_name):
        self._INTERNAL_NAME = internal_name
        self.configfile = configfile
        self.dbfile = dbfile
        self.device = device

        self.hostnames = RssConfig('Hostnames', self.configfile)
        self.sf = self.hostnames.get('sf')

        self.config = RssConfig(self._INTERNAL_NAME, self.configfile)
        self.rsscrawler = RssConfig("RSScrawler", self.configfile)
        self.hevc_retail = self.config.get("hevc_retail")
        self.retail_only = self.config.get("retail_only")
        self.hoster_fallback = self.config.get("hoster_fallback")
        self.hosters = RssConfig("Hosters", configfile).get_section()
        self.log_info = logging.info
        self.log_error = logging.error
        self.log_debug = logging.debug
        self.scraper = scraper
        self.filename = filename
        self.db = RssDb(self.dbfile, 'rsscrawler')
        self.quality = self.config.get("quality")
        self.cdc = RssDb(self.dbfile, 'cdc')
        self.last_set_sf = self.cdc.retrieve("SFSet-" + self.filename)
        self.last_sha_sf = self.cdc.retrieve("SF-" + self.filename)
        self.headers = {
            'If-Modified-Since':
            str(self.cdc.retrieve("SFHeaders-" + self.filename))
        }
        settings = [
            "quality", "rejectlist", "regex", "hevc_retail", "retail_only",
            "hoster_fallback"
        ]
        self.settings = []
        self.settings.append(self.rsscrawler.get("english"))
        self.settings.append(self.rsscrawler.get("surround"))
        self.settings.append(self.hosters)
        for s in settings:
            self.settings.append(self.config.get(s))

        self.empty_list = False
        if self.filename == 'SJ_Staffeln_Regex':
            self.level = 3
        elif self.filename == 'MB_Staffeln':
            self.seasonssource = self.config.get('seasonssource').lower()
            self.level = 2
        elif self.filename == 'SJ_Serien_Regex':
            self.level = 1
        else:
            self.level = 0

        self.pattern = r'^(' + "|".join(
            self.get_series_list(self.filename, self.level)).lower() + ')'
        self.listtype = ""

        self.day = 0
Esempio n. 2
0
 def __init__(self, configfile, dbfile, device, logging, scraper):
     self.configfile = configfile
     self.dbfile = dbfile
     self.device = device
     self.config = RssConfig(self._INTERNAL_NAME, self.configfile)
     self.log_info = logging.info
     self.log_error = logging.error
     self.log_debug = logging.debug
     self.scraper = scraper
     self.db = RssDb(self.dbfile, 'rsscrawler')
Esempio n. 3
0
def myjd_download(configfile, dbfile, device, title, subdir, links, password):
    if device:
        is_episode = re.findall(r'[\w.\s]*S\d{1,2}(E\d{1,2})[\w.\s]*', title)
        if is_episode:
            exists = check_failed_link_exists(links, configfile, device)
            if exists:
                broken_title = False
                device = exists[0]
                old_title = exists[3]
                old_path = exists[4]
                try:
                    new_episode = is_episode.pop()
                except:
                    broken_title = True
                try:
                    old_episode = re.findall(
                        r'[\w.\s]*(?!S\d{1,2})((?:E\d{1,2}-E\d{1,2})|(?:E\d{1,2}E\d{1,2})|(?:E\d{1,2}-\d{1,2})|(?:E\d{1,2}))[\w.\s]*',
                        old_title).pop()
                    combined_episodes = new_episode + '-' + old_episode
                except:
                    broken_title = True

                if not broken_title:
                    linkids = exists[1]
                    package_id = [exists[2]]
                    new_title = title.replace(new_episode, combined_episodes)
                    new_path = old_path.replace(old_title, new_title)

                    device = move_to_new_package(configfile, device, linkids,
                                                 package_id, new_title,
                                                 new_path)
                    RssDb(dbfile, 'crawldog').store(new_title, 'added')
                    RssDb(dbfile, 'crawldog').delete(old_title)
                    return device

        device = download(configfile, dbfile, device, title, subdir, links,
                          password)
        if device:
            return device
    return False
Esempio n. 4
0
def get_to_decrypt(dbfile):
    try:
        to_decrypt = RssDb(dbfile, 'to_decrypt').retrieve_all_titles()
        if to_decrypt:
            packages = []
            for package in to_decrypt:
                title = package[0]
                details = package[1].split('|')
                url = details[0]
                password = details[1]
                packages.append({
                    'name': title,
                    'url': url,
                    'password': password
                })
            return packages
        else:
            return False
    except:
        return False
Esempio n. 5
0
def crawler(configfile, dbfile, device, rsscrawler, log_level, log_file,
            log_format):
    sys.stdout = Unbuffered(sys.stdout)

    logger = logging.getLogger('rsscrawler')
    logger.setLevel(log_level)

    console = logging.StreamHandler(stream=sys.stdout)
    formatter = logging.Formatter(log_format)
    console.setLevel(log_level)

    logfile = logging.handlers.RotatingFileHandler(log_file)
    logfile.setFormatter(formatter)
    logfile.setLevel(logging.INFO)

    logger.addHandler(logfile)
    logger.addHandler(console)

    if log_level == 10:
        logfile_debug = logging.handlers.RotatingFileHandler(
            log_file.replace("RSScrawler.log", "RSScrawler_DEBUG.log"))
        logfile_debug.setFormatter(formatter)
        logfile_debug.setLevel(10)
        logger.addHandler(logfile_debug)

    disable_request_warnings(InsecureRequestWarning)

    log_debug = logger.debug

    crawltimes = RssDb(dbfile, "crawltimes")

    arguments = docopt(__doc__, version='RSScrawler')
    if not arguments['--testlauf']:
        while True:
            try:
                if not device or not is_device(device):
                    device = get_device(configfile)
                scraper = check_url(configfile, dbfile)
                start_time = time.time()
                crawltimes.update_store("active", "True")
                crawltimes.update_store("start_time", start_time * 1000)
                log_debug("--------Alle Suchfunktion gestartet.--------")
                if device:
                    device = ombi(configfile, dbfile, device, log_debug)
                for task in search_pool(configfile, dbfile, device, logger,
                                        scraper):
                    name = task._INTERNAL_NAME
                    try:
                        file = " - Liste: " + task.filename
                    except AttributeError:
                        file = ""
                    log_debug("-----------Suchfunktion (" + name + file +
                              ") gestartet!-----------")
                    device = task.periodical_task()
                    log_debug("-----------Suchfunktion (" + name + file +
                              ") ausgeführt!-----------")
                end_time = time.time()
                total_time = end_time - start_time
                interval = int(rsscrawler.get('interval')) * 60
                random_range = random.randrange(0, interval // 4)
                wait = interval + random_range
                next_start = end_time + wait
                log_debug("-----Alle Suchfunktion ausgeführt (Dauer: " +
                          readable_time(total_time) +
                          ")! Wartezeit bis zum nächsten Suchlauf: " +
                          readable_time(wait))
                print(
                    time.strftime("%Y-%m-%d %H:%M:%S") +
                    u" - Alle Suchfunktion ausgeführt (Dauer: " +
                    readable_time(total_time) +
                    u")! Wartezeit bis zum nächsten Suchlauf: " +
                    readable_time(wait))
                crawltimes.update_store("end_time", end_time * 1000)
                crawltimes.update_store("total_time",
                                        readable_time(total_time))
                crawltimes.update_store("next_start", next_start * 1000)
                crawltimes.update_store("active", "False")
                time.sleep(wait)
                log_debug("-------------Wartezeit verstrichen-------------")
            except Exception:
                traceback.print_exc()
                time.sleep(10)
    else:
        try:
            if not device or not is_device(device):
                device = get_device(configfile)
            scraper = check_url(configfile, dbfile)
            start_time = time.time()
            log_debug("--------Testlauf gestartet.--------")
            if device:
                device = ombi(configfile, dbfile, device, log_debug)
            for task in search_pool(configfile, dbfile, device, logger,
                                    scraper):
                name = task._INTERNAL_NAME
                try:
                    file = " - Liste: " + task.filename
                except AttributeError:
                    file = ""
                log_debug("-----------Suchfunktion (" + name + file +
                          ") gestartet!-----------")
                task.periodical_task()
                log_debug("-----------Suchfunktion (" + name + file +
                          ") ausgeführt!-----------")
            end_time = time.time()
            total_time = end_time - start_time
            log_debug("---Testlauf ausgeführt (Dauer: " +
                      readable_time(total_time) + ")!---")
            print(
                time.strftime("%Y-%m-%d %H:%M:%S") +
                u" - Testlauf ausgeführt (Dauer: " +
                readable_time(total_time) + ")!")
        except Exception:
            traceback.print_exc()
            time.sleep(10)
Esempio n. 6
0
def main():
    arguments = docopt(__doc__, version='RSScrawler')

    print(u"┌──────────────────────────────────────────────┐")
    print(u"  RSScrawler " + version + " von RiX")
    print(u"  https://github.com/rix1337/RSScrawler")
    print(u"└──────────────────────────────────────────────┘")

    if arguments['--docker']:
        configpath = "/config"
    else:
        configpath = files.config(arguments['--config'])
    configfile = os.path.join(configpath, "RSScrawler.ini")
    dbfile = os.path.join(configpath, "RSScrawler.db")

    print(u"Nutze das Verzeichnis " + configpath + u" für Einstellungen/Logs")

    log_level = logging.__dict__[arguments['--log-level']] if arguments[
        '--log-level'] in logging.__dict__ else logging.INFO
    log_file = os.path.join(configpath, 'RSScrawler.log')
    log_format = '%(asctime)s - %(message)s'

    hostnames = RssConfig('Hostnames', configfile)

    def clean_up_hostname(host, string):
        if '/' in string:
            string = string.replace('https://', '').replace('http://', '')
            string = re.findall(r'([a-z-.]*\.[a-z]*)', string)[0]
            hostnames.save(host, string)
        if re.match(r'.*[A-Z].*', string):
            hostnames.save(host, string.lower())
        if not string:
            print(u'Kein Hostname gesetzt: ' + host.upper() +
                  ' (Seite wird ignoriert!)')
        return string

    set_hostnames = {}
    list_names = ['sj', 'dj', 'sf', 'mb', 'hw', 'hs', 'fx', 'nk', 'fc']
    for name in list_names:
        hostname = clean_up_hostname(name, hostnames.get(name))
        if hostname:
            set_hostnames[name] = hostname

    if not arguments['--testlauf'] and not set_hostnames:
        print(
            u'Keine Hostnamen in der RSScrawler.ini gefunden! Beende RSScrawler!'
        )
        time.sleep(10)
        sys.exit(1)

    disable_request_warnings(InsecureRequestWarning)

    if arguments['--testlauf']:
        device = False
    else:
        if not os.path.exists(configfile):
            if arguments['--docker']:
                if arguments['--jd-user'] and arguments['--jd-pass']:
                    device = files.myjd_input(configfile, arguments['--port'],
                                              arguments['--jd-user'],
                                              arguments['--jd-pass'],
                                              arguments['--jd-device'])
                else:
                    device = False
            else:
                device = files.myjd_input(configfile, arguments['--port'],
                                          arguments['--jd-user'],
                                          arguments['--jd-pass'],
                                          arguments['--jd-device'])
        else:
            rsscrawler = RssConfig('RSScrawler', configfile)
            user = rsscrawler.get('myjd_user')
            password = rsscrawler.get('myjd_pass')
            if user and password:
                device = get_device(configfile)
                if not device:
                    device = get_if_one_device(user, password)
                    if device:
                        print(u"Gerätename " + device +
                              " automatisch ermittelt.")
                        rsscrawler.save('myjd_device', device)
                        device = get_device(configfile)
            else:
                device = files.myjd_input(configfile, arguments['--port'],
                                          arguments['--jd-user'],
                                          arguments['--jd-pass'],
                                          arguments['--jd-device'])

        if not device and not arguments['--testlauf']:
            print(
                u'My JDownloader Zugangsdaten fehlerhaft! Beende RSScrawler!')
            time.sleep(10)
            sys.exit(1)
        else:
            print(u"Erfolgreich mit My JDownloader verbunden. Gerätename: " +
                  device.name)

    rsscrawler = RssConfig('RSScrawler', configfile)

    port = int(rsscrawler.get("port"))
    docker = False
    if arguments['--docker']:
        port = int('9090')
        docker = True
    elif arguments['--port']:
        port = int(arguments['--port'])

    if rsscrawler.get("prefix"):
        prefix = '/' + rsscrawler.get("prefix")
    else:
        prefix = ''
    local_address = 'http://' + common.check_ip() + ':' + str(port) + prefix
    if not arguments['--docker']:
        print(u'Der Webserver ist erreichbar unter ' + local_address)

    if arguments['--keep-cdc']:
        print(u"CDC-Tabelle nicht geleert!")
    else:
        RssDb(dbfile, 'cdc').reset()

    p = multiprocessing.Process(target=web_server,
                                args=(port, docker, configfile, dbfile,
                                      log_level, log_file, log_format, device))
    p.start()

    if not arguments['--testlauf']:
        c = multiprocessing.Process(target=crawler,
                                    args=(configfile, dbfile, device,
                                          rsscrawler, log_level, log_file,
                                          log_format))
        c.start()

        w = multiprocessing.Process(target=crawldog, args=(configfile, dbfile))
        w.start()

        print(u'Drücke [Strg] + [C] zum Beenden')

        def signal_handler():
            print(u'Beende RSScrawler...')
            p.terminate()
            c.terminate()
            w.terminate()
            sys.exit(0)

        signal.signal(signal.SIGINT, signal_handler)

        try:
            while True:
                signal.pause()
        except AttributeError:
            while True:
                time.sleep(1)
    else:
        crawler(configfile, dbfile, device, rsscrawler, log_level, log_file,
                log_format)
        p.terminate()
        sys.exit(0)
Esempio n. 7
0
def crawldog(configfile, dbfile):
    disable_request_warnings(InsecureRequestWarning)
    crawljobs = RssConfig('Crawljobs', configfile)
    autostart = crawljobs.get("autostart")
    db = RssDb(dbfile, 'crawldog')

    grabber_was_collecting = False
    device = False

    while True:
        try:
            if not device or not is_device(device):
                device = get_device(configfile)

            myjd_packages = get_info(configfile, device)
            grabber_collecting = myjd_packages[2]

            if grabber_was_collecting or grabber_collecting:
                grabber_was_collecting = grabber_collecting
                time.sleep(5)
            else:
                packages_in_downloader_decrypted = myjd_packages[4][0]
                packages_in_linkgrabber_decrypted = myjd_packages[4][1]
                offline_packages = myjd_packages[4][2]
                encrypted_packages = myjd_packages[4][3]

                try:
                    watched_titles = db.retrieve_all_titles()
                except:
                    watched_titles = False

                notify_list = []

                if packages_in_downloader_decrypted or packages_in_linkgrabber_decrypted or offline_packages or encrypted_packages:

                    if watched_titles:
                        for title in watched_titles:
                            if packages_in_downloader_decrypted:
                                for package in packages_in_downloader_decrypted:
                                    if title[0] in package[
                                            'name'] or title[0].replace(
                                                ".", " ") in package['name']:
                                        check = hoster_check(
                                            configfile, device, [package],
                                            title[0], [0])
                                        device = check[0]
                                        if device:
                                            db.delete(title[0])

                            if packages_in_linkgrabber_decrypted:
                                for package in packages_in_linkgrabber_decrypted:
                                    if title[0] in package[
                                            'name'] or title[0].replace(
                                                ".", " ") in package['name']:
                                        check = hoster_check(
                                            configfile, device, [package],
                                            title[0], [0])
                                        device = check[0]
                                        episode = RssDb(
                                            dbfile,
                                            'episode_remover').retrieve(
                                                title[0])
                                        if episode:
                                            filenames = package['filenames']
                                            if len(filenames) > 1:
                                                fname_episodes = []
                                                for fname in filenames:
                                                    try:
                                                        if re.match(
                                                                r'.*S\d{1,3}E\d{1,3}.*',
                                                                fname,
                                                                flags=re.
                                                                IGNORECASE):
                                                            fname = re.findall(
                                                                r'S\d{1,3}E(\d{1,3})',
                                                                fname,
                                                                flags=re.
                                                                IGNORECASE
                                                            ).pop()
                                                        else:
                                                            fname = fname.replace(
                                                                "hddl8",
                                                                "").replace(
                                                                    "dd51", ""
                                                                ).replace(
                                                                    "264", ""
                                                                ).replace(
                                                                    "265", "")
                                                    except:
                                                        fname = fname.replace(
                                                            "hddl8",
                                                            "").replace(
                                                                "dd51",
                                                                "").replace(
                                                                    "264", ""
                                                                ).replace(
                                                                    "265", "")
                                                    fname_episode = "".join(
                                                        re.findall(
                                                            r'\d+',
                                                            fname.split(
                                                                ".part")[0]))
                                                    try:
                                                        fname_episodes.append(
                                                            str(
                                                                int(fname_episode
                                                                    )))
                                                    except:
                                                        pass
                                                replacer = longest_substr(
                                                    fname_episodes)

                                                new_fname_episodes = []
                                                for new_ep_fname in fname_episodes:
                                                    try:
                                                        new_fname_episodes.append(
                                                            str(
                                                                int(
                                                                    new_ep_fname
                                                                    .replace(
                                                                        replacer,
                                                                        ""))))
                                                    except:
                                                        pass
                                                replacer = longest_substr(
                                                    new_fname_episodes)

                                                newer_fname_episodes = []
                                                for new_ep_fname in new_fname_episodes:
                                                    try:
                                                        newer_fname_episodes.append(
                                                            str(
                                                                int(
                                                                    re.sub(
                                                                        replacer,
                                                                        "",
                                                                        new_ep_fname,
                                                                        1))))
                                                    except:
                                                        pass

                                                replacer = longest_substr(
                                                    newer_fname_episodes)

                                                even_newer_fname_episodes = []
                                                for newer_ep_fname in newer_fname_episodes:
                                                    try:
                                                        even_newer_fname_episodes.append(
                                                            str(
                                                                int(
                                                                    re.sub(
                                                                        replacer,
                                                                        "",
                                                                        newer_ep_fname,
                                                                        1))))
                                                    except:
                                                        pass

                                                if even_newer_fname_episodes:
                                                    fname_episodes = even_newer_fname_episodes
                                                elif newer_fname_episodes:
                                                    fname_episodes = newer_fname_episodes
                                                elif new_fname_episodes:
                                                    fname_episodes = new_fname_episodes

                                                delete_linkids = []
                                                pos = 0
                                                for delete_id in package[
                                                        'linkids']:
                                                    if str(episode) != str(
                                                            fname_episodes[pos]
                                                    ):
                                                        delete_linkids.append(
                                                            delete_id)
                                                    pos += 1
                                                if delete_linkids:
                                                    delete_uuids = [
                                                        package['uuid']
                                                    ]
                                                    RssDb(
                                                        dbfile,
                                                        'episode_remover'
                                                    ).delete(title[0])
                                                    device = remove_from_linkgrabber(
                                                        configfile, device,
                                                        delete_linkids,
                                                        delete_uuids)
                                        if autostart:
                                            device = move_to_downloads(
                                                configfile, device,
                                                package['linkids'],
                                                [package['uuid']])
                                        if device:
                                            db.delete(title[0])

                            if offline_packages:
                                for package in offline_packages:
                                    if title[0] in package[
                                            'name'] or title[0].replace(
                                                ".", " ") in package['name']:
                                        notify_list.append("[Offline] - " +
                                                           title[0])
                                        print((u"[Offline] - " + title[0]))
                                        db.delete(title[0])

                            if encrypted_packages:
                                for package in encrypted_packages:
                                    if title[0] in package[
                                            'name'] or title[0].replace(
                                                ".", " ") in package['name']:
                                        if title[1] == 'added':
                                            if retry_decrypt(
                                                    configfile, dbfile, device,
                                                    package['linkids'],
                                                [package['uuid']],
                                                    package['urls']):
                                                db.delete(title[0])
                                                db.store(title[0], 'retried')
                                        else:
                                            add_decrypt(
                                                package['name'],
                                                package['url'], "", dbfile)
                                            device = remove_from_linkgrabber(
                                                configfile, device,
                                                package['linkids'],
                                                [package['uuid']])
                                            notify_list.append(
                                                "[Click'n'Load notwendig] - " +
                                                title[0])
                                            print(
                                                u"[Click'n'Load notwendig] - "
                                                + title[0])
                                            db.delete(title[0])
                else:
                    if not grabber_collecting:
                        db.reset()

                if notify_list:
                    notify(notify_list, configfile)

                time.sleep(30)
        except Exception:
            traceback.print_exc()
            time.sleep(30)
Esempio n. 8
0
def ombi(configfile, dbfile, device, log_debug):
    db = RssDb(dbfile, 'Ombi')
    config = RssConfig('Ombi', configfile)
    url = config.get('url')
    api = config.get('api')

    if not url or not api:
        return device

    english = RssConfig('RSScrawler', configfile).get('english')

    try:
        requested_movies = requests.get(url + '/api/v1/Request/movie',
                                        headers={'ApiKey': api})
        requested_movies = json.loads(requested_movies.text)
        requested_shows = requests.get(url + '/api/v1/Request/tv',
                                       headers={'ApiKey': api})
        requested_shows = json.loads(requested_shows.text)
    except:
        log_debug("Ombi ist nicht erreichbar!")
        return False

    scraper = False

    for r in requested_movies:
        if bool(r.get("approved")):
            if not bool(r.get("available")):
                imdb_id = r.get("imdbId")
                if not db.retrieve('movie_' + str(imdb_id)) == 'added':
                    response = imdb_movie(imdb_id, configfile, dbfile, scraper)
                    title = response[0]
                    if title:
                        scraper = response[1]
                        best_result = search.best_result_bl(
                            title, configfile, dbfile)
                        print(u"Film: " + title + u" durch Ombi hinzugefügt.")
                        if best_result:
                            search.download_bl(best_result, device, configfile,
                                               dbfile)
                        if english:
                            title = r.get('title')
                            best_result = search.best_result_bl(
                                title, configfile, dbfile)
                            print(u"Film: " + title +
                                  u"durch Ombi hinzugefügt.")
                            if best_result:
                                search.download_bl(best_result, device,
                                                   configfile, dbfile)
                        db.store('movie_' + str(imdb_id), 'added')
                    else:
                        log_debug("Titel für IMDB-ID nicht abrufbar: " +
                                  imdb_id)

    for r in requested_shows:
        imdb_id = r.get("imdbId")
        infos = None
        child_requests = r.get("childRequests")
        for cr in child_requests:
            if bool(cr.get("approved")):
                if not bool(cr.get("available")):
                    details = cr.get("seasonRequests")
                    for season in details:
                        sn = season.get("seasonNumber")
                        eps = []
                        episodes = season.get("episodes")
                        for episode in episodes:
                            if not bool(episode.get("available")):
                                enr = episode.get("episodeNumber")
                                s = str(sn)
                                if len(s) == 1:
                                    s = "0" + s
                                s = "S" + s
                                e = str(enr)
                                if len(e) == 1:
                                    e = "0" + e
                                se = s + "E" + e
                                if not db.retrieve('show_' + str(imdb_id) +
                                                   '_' + se) == 'added':
                                    eps.append(enr)
                        if eps:
                            if not infos:
                                infos = imdb_show(imdb_id, configfile, dbfile,
                                                  scraper)
                            if infos:
                                title = infos[0]
                                all_eps = infos[1]
                                scraper = infos[2]
                                check_sn = False
                                if all_eps:
                                    check_sn = all_eps.get(sn)
                                if check_sn:
                                    sn_length = len(eps)
                                    check_sn_length = len(check_sn)
                                    if check_sn_length > sn_length:
                                        for ep in eps:
                                            e = str(ep)
                                            if len(e) == 1:
                                                e = "0" + e
                                            se = s + "E" + e
                                            payload = search.best_result_sj(
                                                title, configfile, dbfile)
                                            if payload:
                                                payload = decode_base64(
                                                    payload).split("|")
                                                payload = encode_base64(
                                                    payload[0] + "|" +
                                                    payload[1] + "|" + se)
                                                added_episode = search.download_sj(
                                                    payload, configfile,
                                                    dbfile)
                                                if not added_episode:
                                                    payload = decode_base64(
                                                        payload).split("|")
                                                    payload = encode_base64(
                                                        payload[0] + "|" +
                                                        payload[1] + "|" + s)
                                                    add_season = search.download_sj(
                                                        payload, configfile,
                                                        dbfile)
                                                    for e in eps:
                                                        e = str(e)
                                                        if len(e) == 1:
                                                            e = "0" + e
                                                        se = s + "E" + e
                                                        db.store(
                                                            'show_' +
                                                            str(imdb_id) +
                                                            '_' + se, 'added')
                                                    if not add_season:
                                                        log_debug(
                                                            u"Konnte kein Release für "
                                                            + title + " " +
                                                            se + "finden.")
                                                    break
                                            db.store(
                                                'show_' + str(imdb_id) + '_' +
                                                se, 'added')
                                    else:
                                        payload = search.best_result_sj(
                                            title, configfile, dbfile)
                                        if payload:
                                            payload = decode_base64(
                                                payload).split("|")
                                            payload = encode_base64(
                                                payload[0] + "|" + payload[1] +
                                                "|" + s)
                                            search.download_sj(
                                                payload, configfile, dbfile)
                                        for ep in eps:
                                            e = str(ep)
                                            if len(e) == 1:
                                                e = "0" + e
                                            se = s + "E" + e
                                            db.store(
                                                'show_' + str(imdb_id) + '_' +
                                                se, 'added')
                                    print(u"Serie/Staffel/Episode: " + title +
                                          u" durch Ombi hinzugefügt.")

    return device
Esempio n. 9
0
def check_url(configfile, dbfile, scraper=False):
    hostnames = RssConfig('Hostnames', configfile)
    sj = hostnames.get('sj')
    dj = hostnames.get('dj')
    sf = hostnames.get('sf')
    mb = hostnames.get('mb')
    hw = hostnames.get('hw')
    hs = hostnames.get('hs')
    fx = hostnames.get('fx')
    nk = hostnames.get('nk')
    dd = hostnames.get('dd')
    fc = hostnames.get('fc')

    if not scraper:
        scraper = cloudscraper.create_scraper()

    sj_url = 'https://' + sj
    dj_url = 'https://' + sj
    sf_url = 'https://' + sf
    mb_url = 'https://' + mb
    hw_url = 'https://' + hw
    fx_url = 'https://' + fx
    hs_url = 'https://' + hs + '/collection/neuerscheinungen/'
    nk_url = 'https://' + nk
    dd_url = 'https://' + dd
    fc_url = 'https://' + fc

    sj_blocked_proxy = False
    dj_blocked_proxy = False
    sf_blocked_proxy = False
    mb_blocked_proxy = False
    hw_blocked_proxy = False
    fx_blocked_proxy = False
    hs_blocked_proxy = False
    nk_blocked_proxy = False
    dd_blocked_proxy = False
    fc_blocked_proxy = False
    sj_blocked = False
    dj_blocked = False
    sf_blocked = False
    mb_blocked = False
    hw_blocked = False
    fx_blocked = False
    hs_blocked = False
    nk_blocked = False
    dd_blocked = False
    fc_blocked = False

    db = RssDb(dbfile, 'proxystatus')
    db.delete("SJ")
    db.delete("DJ")
    db.delete("SF")
    db.delete("MB")
    db.delete("HW")
    db.delete("FX")
    db.delete("HS")
    db.delete("NK")
    db.delete("DD")
    db.delete("FC")
    db_normal = RssDb(dbfile, 'normalstatus')
    db_normal.delete("SJ")
    db_normal.delete("DJ")
    db_normal.delete("SF")
    db_normal.delete("MB")
    db_normal.delete("HW")
    db_normal.delete("FX")
    db_normal.delete("HS")
    db_normal.delete("NK")
    db_normal.delete("DD")
    db_normal.delete("FC")

    proxy = RssConfig('RSScrawler', configfile).get('proxy')
    fallback = RssConfig('RSScrawler', configfile).get('fallback')

    if proxy:
        proxies = {'http': proxy, 'https': proxy}

        if not sj:
            db.store("SJ", "Blocked")
        else:
            try:
                if "block." in str(
                        scraper.get(
                            sj_url,
                            proxies=proxies,
                            timeout=30,
                            allow_redirects=False).headers.get("location")):
                    sj_blocked_proxy = True
                else:
                    db.delete("SJ")
            except:
                sj_blocked_proxy = True
            if sj_blocked_proxy:
                print(
                    u"Der Zugriff auf SJ ist mit der aktuellen Proxy-IP nicht möglich!"
                )
                db.store("SJ", "Blocked")
                scraper = cloudscraper.create_scraper()

        if not dj:
            db.store("DJ", "Blocked")
        else:
            try:
                if "block." in str(
                        scraper.get(
                            dj_url,
                            proxies=proxies,
                            timeout=30,
                            allow_redirects=False).headers.get("location")):
                    dj_blocked_proxy = True
                else:
                    db.delete("DJ")
            except:
                dj_blocked_proxy = True
            if dj_blocked_proxy:
                print(
                    u"Der Zugriff auf DJ ist mit der aktuellen Proxy-IP nicht möglich!"
                )
                db.store("DJ", "Blocked")
                scraper = cloudscraper.create_scraper()

        if not sf:
            db.store("SF", "Blocked")
        else:
            try:
                if "block." in str(
                        scraper.get(
                            sf_url,
                            proxies=proxies,
                            timeout=30,
                            allow_redirects=False).headers.get("location")):
                    sf_blocked_proxy = True
                else:
                    db.delete("SF")
            except:
                sf_blocked_proxy = True
            if sf_blocked_proxy:
                print(
                    u"Der Zugriff auf SF ist mit der aktuellen Proxy-IP nicht möglich!"
                )
                db.store("SF", "Blocked")
                scraper = cloudscraper.create_scraper()

        if not mb:
            db.store("MB", "Blocked")
        else:
            try:
                if "<Response [403]>" in str(
                        scraper.get(mb_url,
                                    proxies=proxies,
                                    timeout=30,
                                    allow_redirects=False)):
                    mb_blocked_proxy = True
                else:
                    db.delete("MB")
            except:
                mb_blocked_proxy = True
            if mb_blocked_proxy:
                print(
                    u"Der Zugriff auf MB ist mit der aktuellen Proxy-IP nicht möglich!"
                )
                db.store("MB", "Blocked")
                scraper = cloudscraper.create_scraper()

        if not hw:
            db.store("HW", "Blocked")
        else:
            try:
                if "<Response [403]>" in str(
                        scraper.get(hw_url,
                                    proxies=proxies,
                                    timeout=30,
                                    allow_redirects=False)):
                    hw_blocked_proxy = True
                else:
                    db.delete("HW")
            except:
                hw_blocked_proxy = True
            if hw_blocked_proxy:
                print(
                    u"Der Zugriff auf HW ist mit der aktuellen Proxy-IP nicht möglich!"
                )
                db.store("HW", "Blocked")
                scraper = cloudscraper.create_scraper()

        if not fx:
            db.store("FX", "Blocked")
        else:
            try:
                if "<Response [403]>" in str(
                        scraper.get(fx_url,
                                    proxies=proxies,
                                    timeout=30,
                                    allow_redirects=False)):
                    fx_blocked_proxy = True
                else:
                    db.delete("FX")
            except:
                fx_blocked_proxy = True
                session = requests.session()
                session.headers = scraper.headers
                session.cookies = scraper.cookies
                session.verify = False
                if "<Response [200]>" in str(
                        session.get(fx_url,
                                    proxies=proxies,
                                    timeout=30,
                                    allow_redirects=False)):
                    fx_blocked_proxy = False
            if fx_blocked_proxy:
                print(
                    u"Der Zugriff auf FX ist mit der aktuellen Proxy-IP nicht möglich!"
                )
                db.store("FX", "Blocked")
                scraper = cloudscraper.create_scraper()

        if not hs:
            db.store("HS", "Blocked")
        else:
            try:
                if "200" not in str(
                        scraper.get(hs_url, timeout=30,
                                    allow_redirects=False).status_code):
                    hs_blocked_proxy = True
                else:
                    db.delete("HS")
            except:
                hs_blocked_proxy = True
            if hs_blocked_proxy:
                print(
                    u"Der Zugriff auf HS ist mit der aktuellen Proxy-IP nicht möglich!"
                )
                db.store("HS", "Blocked")
                scraper = cloudscraper.create_scraper()

        if not nk:
            db.store("NK", "Blocked")
        else:
            try:
                if "200" not in str(
                        scraper.get(nk_url, timeout=30,
                                    allow_redirects=False).status_code):
                    nk_blocked_proxy = True
                else:
                    db.delete("NK")
            except:
                nk_blocked_proxy = True
            if nk_blocked_proxy:
                print(
                    u"Der Zugriff auf NK ist mit der aktuellen Proxy-IP nicht möglich!"
                )
                db.store("NK", "Blocked")
                scraper = cloudscraper.create_scraper()

        if not dd:
            db.store("DD", "Blocked")
        else:
            try:
                if "200" not in str(
                        scraper.get(dd_url, timeout=30,
                                    allow_redirects=False).status_code):
                    dd_blocked_proxy = True
                else:
                    db.delete("DD")
            except:
                dd_blocked_proxy = True
            if dd_blocked_proxy:
                print(
                    u"Der Zugriff auf DD ist mit der aktuellen Proxy-IP nicht möglich!"
                )
                db.store("DD", "Blocked")
                scraper = cloudscraper.create_scraper()

        if not fc:
            db.store("FC", "Blocked")
        else:
            try:
                if "200" not in str(
                        scraper.get(fc_url, timeout=30).status_code):
                    fc_blocked_proxy = True
                else:
                    db.delete("FC")
            except:
                fc_blocked_proxy = True
            if fc_blocked_proxy:
                print(
                    u"Der Zugriff auf FC ist mit der aktuellen Proxy-IP nicht möglich!"
                )
                db.store("FC", "Blocked")
                scraper = cloudscraper.create_scraper()

    if not proxy or (proxy and sj_blocked_proxy and fallback):
        if not sj:
            db.store("SJ", "Blocked")
        else:
            try:
                if "block." in str(
                        scraper.get(
                            sj_url, timeout=30,
                            allow_redirects=False).headers.get("location")):
                    sj_blocked = True
            except:
                sj_blocked = True
            if sj_blocked:
                db_normal.store("SJ", "Blocked")
                print(
                    u"Der Zugriff auf SJ ist mit der aktuellen IP nicht möglich!"
                )

    if not proxy or (proxy and dj_blocked_proxy and fallback):
        if not dj:
            db.store("DJ", "Blocked")
        else:
            try:
                if "block." in str(
                        scraper.get(
                            dj_url, timeout=30,
                            allow_redirects=False).headers.get("location")):
                    dj_blocked = True
            except:
                dj_blocked = True
            if dj_blocked:
                db_normal.store("DJ", "Blocked")
                print(
                    u"Der Zugriff auf DJ ist mit der aktuellen IP nicht möglich!"
                )

        if not sf:
            db.store("SF", "Blocked")
        else:
            try:
                if "block." in str(
                        scraper.get(
                            sf_url, timeout=30,
                            allow_redirects=False).headers.get("location")):
                    sf_blocked = True
            except:
                sf_blocked = True
            if sf_blocked:
                db_normal.store("SF", "Blocked")
                print(
                    u"Der Zugriff auf SF ist mit der aktuellen IP nicht möglich!"
                )

    if not proxy or (proxy and mb_blocked_proxy and fallback):
        if not mb:
            db.store("MB", "Blocked")
        else:
            try:
                if "<Response [403]>" in str(
                        scraper.get(mb_url, timeout=30,
                                    allow_redirects=False)):
                    mb_blocked = True
            except:
                mb_blocked = True
            if mb_blocked:
                db_normal.store("MB", "Blocked")
                print(
                    u"Der Zugriff auf MB ist mit der aktuellen IP nicht möglich!"
                )

    if not proxy or (proxy and hw_blocked_proxy and fallback):
        if not hw:
            db.store("HW", "Blocked")
        else:
            try:
                if "<Response [403]>" in str(
                        scraper.get(hw_url, timeout=30,
                                    allow_redirects=False)):
                    hw_blocked = True
            except:
                hw_blocked = True
            if hw_blocked:
                db_normal.store("HW", "Blocked")
                print(
                    u"Der Zugriff auf HW ist mit der aktuellen IP nicht möglich!"
                )

    if not proxy or (proxy and fx_blocked_proxy and fallback):
        if not fx:
            db.store("FX", "Blocked")
        else:
            try:
                if "<Response [403]>" in str(
                        scraper.get(fx_url, timeout=30,
                                    allow_redirects=False)):
                    fx_blocked = True
            except:
                fx_blocked = True
                session = requests.session()
                session.headers = scraper.headers
                session.cookies = scraper.cookies
                session.verify = False
                if "<Response [200]>" in str(
                        session.get(fx_url, timeout=30,
                                    allow_redirects=False)):
                    fx_blocked = False
            if fx_blocked:
                db_normal.store("FX", "Blocked")
                print(
                    u"Der Zugriff auf FX ist mit der aktuellen IP nicht möglich!"
                )

    if not proxy or (proxy and hs_blocked_proxy and fallback):
        if not hs:
            db.store("HS", "Blocked")
        else:
            try:
                if "200" not in str(
                        scraper.get(hs_url, timeout=30,
                                    allow_redirects=False).status_code):
                    hs_blocked = True
            except:
                hs_blocked = True
            if hs_blocked:
                db_normal.store("HS", "Blocked")
                print(
                    u"Der Zugriff auf HS ist mit der aktuellen IP nicht möglich!"
                )

    if not proxy or (proxy and nk_blocked_proxy and fallback):
        if not nk:
            db.store("NK", "Blocked")
        else:
            try:
                if "200" not in str(
                        scraper.get(nk_url, timeout=30,
                                    allow_redirects=False).status_code):
                    nk_blocked = True
            except:
                nk_blocked = True
            if nk_blocked:
                db_normal.store("NK", "Blocked")
                print(
                    u"Der Zugriff auf NK ist mit der aktuellen IP nicht möglich!"
                )

    if not proxy or (proxy and dd_blocked_proxy and fallback):
        if not dd:
            db.store("DD", "Blocked")
        else:
            try:
                if "200" not in str(
                        scraper.get(dd_url, timeout=30,
                                    allow_redirects=False).status_code):
                    dd_blocked = True
            except:
                dd_blocked = True
            if dd_blocked:
                db_normal.store("DD", "Blocked")
                print(
                    u"Der Zugriff auf DD ist mit der aktuellen IP nicht möglich!"
                )

    if not proxy or (proxy and fc_blocked_proxy and fallback):
        if not fc:
            db.store("FC", "Blocked")
        else:
            try:
                if "200" not in str(
                        scraper.get(fc_url, timeout=30).status_code):
                    fc_blocked = True
            except:
                fc_blocked = True
            if fc_blocked:
                db_normal.store("FC", "Blocked")
                print(
                    u"Der Zugriff auf FC ist mit der aktuellen IP nicht möglich!"
                )

    return scraper
Esempio n. 10
0
def get_url_headers(url, configfile, dbfile, headers, scraper=False):
    config = RssConfig('RSScrawler', configfile)
    proxy = config.get('proxy')
    if not scraper:
        scraper = cloudscraper.create_scraper()

    db = RssDb(dbfile, 'proxystatus')
    db_normal = RssDb(dbfile, 'normalstatus')
    site = check_is_site(url, configfile)

    if proxy:
        try:
            if site and "SJ" in site:
                if db.retrieve("SJ"):
                    if config.get("fallback") and not db_normal.retrieve("SJ"):
                        return [
                            scraper.get(url, headers=headers, timeout=30),
                            scraper
                        ]
                    else:
                        return ["", scraper]
            elif site and "DJ" in site:
                if db.retrieve("DJ"):
                    if config.get("fallback") and not db_normal.retrieve("DJ"):
                        return [
                            scraper.get(url, headers=headers, timeout=30),
                            scraper
                        ]
                    else:
                        return ["", scraper]
            elif site and "SF" in site:
                if db.retrieve("SF"):
                    if config.get("fallback") and not db_normal.retrieve("SF"):
                        return [
                            scraper.get(url, headers=headers, timeout=30),
                            scraper
                        ]
                    else:
                        return ["", scraper]
            elif site and "MB" in site:
                if db.retrieve("MB"):
                    if config.get("fallback") and not db_normal.retrieve("MB"):
                        return [
                            scraper.get(url, headers=headers, timeout=30),
                            scraper
                        ]
                    else:
                        return ["", scraper]
            elif site and "HW" in site:
                if db.retrieve("HW"):
                    if config.get("fallback") and not db_normal.retrieve("HW"):
                        return [
                            scraper.get(url, headers=headers, timeout=30),
                            scraper
                        ]
                    else:
                        return ["", scraper]
            elif site and "FX" in site:
                if db.retrieve("FX"):
                    if config.get("fallback") and not db_normal.retrieve("FX"):
                        return [
                            scraper.get(url, headers=headers, timeout=30),
                            scraper
                        ]
                    else:
                        return ["", scraper]
            elif site and "HS" in site:
                if db.retrieve("HS"):
                    if config.get("fallback") and not db_normal.retrieve("HS"):
                        return [
                            scraper.get(url, headers=headers, timeout=30),
                            scraper
                        ]
                    else:
                        return ["", scraper]
            elif site and "NK" in site:
                if db.retrieve("NK"):
                    if config.get("fallback") and not db_normal.retrieve("NK"):
                        return [
                            scraper.get(url, headers=headers, timeout=30),
                            scraper
                        ]
                    else:
                        return ["", scraper]
            elif site and "DD" in site:
                if db.retrieve("DD"):
                    if config.get("fallback") and not db_normal.retrieve("DD"):
                        return [
                            scraper.get(url, headers=headers, timeout=30),
                            scraper
                        ]
                    else:
                        return ["", scraper]
            elif site and "FC" in site:
                if db.retrieve("FC"):
                    if config.get("fallback") and not db_normal.retrieve("FC"):
                        return [
                            scraper.get(url, headers=headers, timeout=30),
                            scraper
                        ]
                    else:
                        return ["", scraper]
            proxies = {'http': proxy, 'https': proxy}
            response = scraper.get(url,
                                   headers=headers,
                                   proxies=proxies,
                                   timeout=30)
            return [response, scraper]
        except Exception as e:
            print(u"Fehler beim Abruf von: " + url + " " + str(e))
            return ["", scraper]
    else:
        try:
            if site and "SJ" in site and db_normal.retrieve("SJ"):
                return ["", scraper]
            elif site and "DJ" in site and db_normal.retrieve("DJ"):
                return ["", scraper]
            elif site and "SF" in site and db_normal.retrieve("SF"):
                return ["", scraper]
            elif site and "MB" in site and db_normal.retrieve("MB"):
                return ["", scraper]
            elif site and "HW" in site and db_normal.retrieve("HW"):
                return ["", scraper]
            elif site and "FX" in site and db_normal.retrieve("FX"):
                return ["", scraper]
            elif site and "HS" in site and db_normal.retrieve("HS"):
                return ["", scraper]
            elif site and "NK" in site and db_normal.retrieve("NK"):
                return ["", scraper]
            elif site and "DD" in site and db_normal.retrieve("DD"):
                return ["", scraper]
            elif site and "FC" in site and db_normal.retrieve("FC"):
                return ["", scraper]
            response = scraper.get(url, headers=headers, timeout=30)
            return [response, scraper]
        except Exception as e:
            print(u"Fehler beim Abruf von: " + url + " " + str(e))
            return ["", scraper]
Esempio n. 11
0
class DD:
    _INTERNAL_NAME = 'DD'

    def __init__(self, configfile, dbfile, device, logging, scraper):
        self.configfile = configfile
        self.dbfile = dbfile
        self.device = device
        self.config = RssConfig(self._INTERNAL_NAME, self.configfile)
        self.log_info = logging.info
        self.log_error = logging.error
        self.log_debug = logging.debug
        self.scraper = scraper
        self.db = RssDb(self.dbfile, 'rsscrawler')

    def periodical_task(self):
        feeds = self.config.get("feeds")
        if feeds:
            added_items = []
            feeds = feeds.replace(" ", "").split(',')
            for feed in feeds:
                feed = feedparser.parse(
                    get_url(feed, self.configfile, self.dbfile, self.scraper))
                for post in feed.entries:
                    key = post.title.replace(" ", ".")

                    epoch = datetime(1970, 1, 1)
                    current_epoch = int(time())
                    published_format = "%Y-%m-%d %H:%M:%S+00:00"
                    published_timestamp = str(parser.parse(post.published))
                    published_epoch = int((datetime.strptime(
                        published_timestamp, published_format) -
                                           epoch).total_seconds())
                    if (current_epoch - 1800) > published_epoch:
                        link_pool = post.summary
                        unicode_links = re.findall(r'(http.*)', link_pool)
                        links = []
                        for link in unicode_links:
                            if check_hoster(link, self.configfile):
                                links.append(str(link))
                        if self.config.get("hoster_fallback") and not links:
                            for link in unicode_links:
                                links.append(str(link))
                        storage = self.db.retrieve_all(key)
                        if not links:
                            if 'added' not in storage and 'notdl' not in storage:
                                wrong_hoster = '[DD/Hoster fehlt] - ' + key
                                if 'wrong_hoster' not in storage:
                                    self.log_info(wrong_hoster)
                                    self.db.store(key, 'wrong_hoster')
                                    notify([wrong_hoster], self.configfile)
                                else:
                                    self.log_debug(wrong_hoster)
                        elif 'added' in storage:
                            self.log_debug(
                                "%s - Release ignoriert (bereits gefunden)" %
                                key)
                        else:
                            self.device = myjd_download(
                                self.configfile, self.dbfile, self.device, key,
                                "RSScrawler", links, "")
                            if self.device:
                                self.db.store(key, 'added')
                                log_entry = '[Englisch] - ' + key + ' - [DD]'
                                self.log_info(log_entry)
                                notify([log_entry], self.configfile)
                                added_items.append(log_entry)
                    else:
                        self.log_debug(
                            "%s - Releasezeitpunkt weniger als 30 Minuten in der Vergangenheit - wird ignoriert."
                            % key)
        else:
            self.log_debug("Liste ist leer. Stoppe Suche für DD!")
        return self.device
Esempio n. 12
0
def download_sj(payload, configfile, dbfile):
    hostnames = RssConfig('Hostnames', configfile)
    sj = hostnames.get('sj')

    payload = decode_base64(payload).split("|")
    href = payload[0]
    title = payload[1]
    special = payload[2].strip().replace("None", "")

    series_url = 'https://' + sj + href
    series_info = get_url(series_url, configfile, dbfile)
    series_id = re.findall(r'data-mediaid="(.*?)"', series_info)[0]

    api_url = 'https://' + sj + '/api/media/' + series_id + '/releases'
    releases = get_url(api_url, configfile, dbfile)

    seasons = json.loads(releases)

    listen = ["SJ_Serien", "MB_Staffeln"]
    for liste in listen:
        cont = ListDb(dbfile, liste).retrieve()
        list_title = sanitize(title)
        if not cont:
            cont = ""
        if list_title not in cont:
            ListDb(dbfile, liste).store(list_title)

    config = RssConfig('SJ', configfile)
    english_ok = RssConfig('RSScrawler', configfile).get("english")
    quality = config.get('quality')
    ignore = config.get('rejectlist')

    result_seasons = {}
    result_episodes = {}

    for season in seasons:
        releases = seasons[season]
        for release in releases['items']:
            name = release['name'].encode('ascii',
                                          errors='ignore').decode('utf-8')
            hosters = release['hoster']
            try:
                valid = bool(release['resolution'] == quality)
            except:
                valid = re.match(re.compile(r'.*' + quality + r'.*'), name)
            if valid and special:
                valid = bool("." + special.lower() + "." in name.lower())
            if valid and not english_ok:
                valid = bool(".german." in name.lower())
            if valid:
                valid = False
                for hoster in hosters:
                    if hoster and check_hoster(
                            hoster,
                            configfile) or config.get("hoster_fallback"):
                        valid = True
            if valid:
                try:
                    ep = release['episode']
                    if ep:
                        existing = result_episodes.get(season)
                        if existing:
                            for e in existing:
                                if e == ep:
                                    if rate(name, ignore) > rate(
                                            existing[e], ignore):
                                        existing.update({ep: name})
                        else:
                            existing = {ep: name}
                        result_episodes.update({season: existing})
                        continue
                except:
                    pass

                existing = result_seasons.get(season)
                dont = False
                if existing:
                    if rate(name, ignore) < rate(existing, ignore):
                        dont = True
                if not dont:
                    result_seasons.update({season: name})

        try:
            if result_seasons[season] and result_episodes[season]:
                del result_episodes[season]
        except:
            pass

        success = False
        try:
            if result_seasons[season]:
                success = True
        except:
            try:
                if result_episodes[season]:
                    success = True
            except:
                pass

        if success:
            logger.debug(u"Websuche erfolgreich für " + title + " - " + season)
        else:
            for release in releases['items']:
                name = release['name'].encode('ascii',
                                              errors='ignore').decode('utf-8')
                hosters = release['hoster']
                valid = True
                if valid and special:
                    valid = bool("." + special.lower() + "." in name.lower())
                if valid and not english_ok:
                    valid = bool(".german." in name.lower())
                if valid:
                    valid = False
                    for hoster in hosters:
                        if hoster and check_hoster(
                                hoster,
                                configfile) or config.get("hoster_fallback"):
                            valid = True
                if valid:
                    try:
                        ep = release['episode']
                        if ep:
                            existing = result_episodes.get(season)
                            if existing:
                                for e in existing:
                                    if e == ep:
                                        if rate(name, ignore) > rate(
                                                existing[e], ignore):
                                            existing.update({ep: name})
                            else:
                                existing = {ep: name}
                            result_episodes.update({season: existing})
                            continue
                    except:
                        pass

                    existing = result_seasons.get(season)
                    dont = False
                    if existing:
                        if rate(name, ignore) < rate(existing, ignore):
                            dont = True
                    if not dont:
                        result_seasons.update({season: name})

            try:
                if result_seasons[season] and result_episodes[season]:
                    del result_episodes[season]
            except:
                pass
            logger.debug(u"Websuche erfolgreich für " + title + " - " + season)

    matches = []

    for season in result_seasons:
        matches.append(result_seasons[season])
    for season in result_episodes:
        for episode in result_episodes[season]:
            matches.append(result_episodes[season][episode])

    notify_array = []
    for title in matches:
        db = RssDb(dbfile, 'rsscrawler')
        if add_decrypt(title, series_url, sj, dbfile):
            db.store(title, 'added')
            log_entry = u'[Suche/Serie] - ' + title + ' - [SJ]'
            logger.info(log_entry)
            notify_array.append(log_entry)

    notify(notify_array, configfile)

    if not matches:
        return False
    return matches
Esempio n. 13
0
def download_bl(payload, device, configfile, dbfile):
    hostnames = RssConfig('Hostnames', configfile)
    mb = hostnames.get('mb')
    nk = hostnames.get('nk')
    fc = hostnames.get('fc').replace('www.', '').split('.')[0]

    payload = decode_base64(payload).split("|")
    link = payload[0]
    password = payload[1]
    url = get_url(link, configfile, dbfile)
    if not url or "NinjaFirewall 429" in url:
        return False

    config = RssConfig('MB', configfile)
    db = RssDb(dbfile, 'rsscrawler')
    soup = BeautifulSoup(url, 'lxml')

    site = check_is_site(link, configfile)
    if not site:
        return False
    else:
        if "MB" in site:
            if not fc:
                print(
                    u"FC Hostname nicht gesetzt. MB kann keine Links finden!")
                return False
            key = soup.find("span", {"class": "fn"}).text
            hosters = soup.find_all("a", href=re.compile(fc))
            url_hosters = []
            for hoster in hosters:
                dl = hoster["href"]
                hoster = hoster.text
                url_hosters.append([dl, hoster])
        elif "HW" in site:
            if not fc:
                print(
                    u"FC Hostname nicht gesetzt. MB kann keine Links finden!")
                return False
            key = re.findall(r'Permanent Link: (.*?)"', str(soup)).pop()
            hosters = soup.find_all("a", href=re.compile(fc))
            url_hosters = []
            for hoster in hosters:
                dl = hoster["href"]
                hoster = hoster.text
                url_hosters.append([dl, hoster])
        elif "HS" in site:
            download = soup.find("div", {"class": "entry-content"})
            key = soup.find("h2", {"class": "entry-title"}).text
            url_hosters = re.findall(r'href="([^"\'>]*)".+?(.+?)<',
                                     str(download))
        elif "NK" in site:
            key = soup.find("span", {"class": "subtitle"}).text
            url_hosters = []
            hosters = soup.find_all("a", href=re.compile("/go/"))
            for hoster in hosters:
                url_hosters.append(
                    ['https://' + nk + hoster["href"], hoster.text])
        elif "FX" in site:
            key = payload[1]
            password = payload[2]
        else:
            return False

        links = {}
        if "MB" in site or "HW" in site or "HS" in site or "NK" in site:
            for url_hoster in reversed(url_hosters):
                try:
                    if mb.split('.')[0] not in url_hoster[
                            0] and "https://goo.gl/" not in url_hoster[0]:
                        link_hoster = url_hoster[1].lower().replace(
                            'target="_blank">', '').replace(" ", "-")
                        if check_hoster(link_hoster, configfile):
                            links[link_hoster] = url_hoster[0]
                except:
                    pass
            if config.get("hoster_fallback") and not links:
                for url_hoster in reversed(url_hosters):
                    if mb.split('.')[0] not in url_hoster[
                            0] and "https://goo.gl/" not in url_hoster[0]:
                        link_hoster = url_hoster[1].lower().replace(
                            'target="_blank">', '').replace(" ", "-")
                        links[link_hoster] = url_hoster[0]
            download_links = list(links.values())
        elif "FX" in site:
            download_links = fx_download_links(url, key, configfile)

        englisch = False
        if "*englisch" in key.lower() or "*english" in key.lower():
            key = key.replace('*ENGLISCH',
                              '').replace("*Englisch", "").replace(
                                  "*ENGLISH", "").replace("*English",
                                                          "").replace("*", "")
            englisch = True

        staffel = re.search(r"s\d{1,2}(-s\d{1,2}|-\d{1,2}|\.)", key.lower())

        if config.get('enforcedl') and '.dl.' not in key.lower():
            fail = False
            get_imdb_url = url
            key_regex = r'<title>' + \
                        re.escape(
                            key) + r'.*?<\/title>\n.*?<link>(?:(?:.*?\n){1,25}).*?[mM][kK][vV].*?(?:|href=.?http(?:|s):\/\/(?:|www\.)imdb\.com\/title\/(tt[0-9]{7,9}).*?)[iI][mM][dD][bB].*?(?!\d(?:\.|\,)\d)(?:.|.*?)<\/a>'
            imdb_id = re.findall(key_regex, get_imdb_url)
            if len(imdb_id) > 0:
                if not imdb_id[0]:
                    fail = True
                else:
                    imdb_id = imdb_id[0]
            else:
                fail = True
            if fail:
                try:
                    search_title = re.findall(
                        r"(.*?)(?:\.(?:(?:19|20)\d{2})|\.German|\.\d{3,4}p|\.S(?:\d{1,3})\.)",
                        key)[0].replace(".", "+")
                    search_url = "http://www.imdb.com/find?q=" + search_title
                    search_page = get_url(search_url, configfile, dbfile)
                    search_results = re.findall(
                        r'<td class="result_text"> <a href="\/title\/(tt[0-9]{7,9})\/\?ref_=fn_al_tt_\d" >(.*?)<\/a>.*? \((\d{4})\)..(.{9})',
                        search_page)
                    total_results = len(search_results)
                except:
                    return False
                if staffel:
                    try:
                        imdb_id = search_results[0][0]
                    except:
                        imdb_id = False
                else:
                    no_series = False
                    while total_results > 0:
                        attempt = 0
                        for result in search_results:
                            if result[3] == "TV Series":
                                no_series = False
                                total_results -= 1
                                attempt += 1
                            else:
                                no_series = True
                                imdb_id = search_results[attempt][0]
                                total_results = 0
                                break
                    if no_series is False:
                        logger.debug(
                            "%s - Keine passende Film-IMDB-Seite gefunden" %
                            key)

            if staffel:
                filename = 'MB_Staffeln'
            else:
                filename = 'MB_Filme'

            scraper = cloudscraper.create_scraper()
            blog = BL(configfile,
                      dbfile,
                      device,
                      logging,
                      scraper,
                      filename=filename)

            if not imdb_id:
                if not blog.dual_download(key, password):
                    logger.debug("%s - Kein zweisprachiges Release gefunden." %
                                 key)
            else:
                if isinstance(imdb_id, list):
                    imdb_id = imdb_id.pop()
                imdb_url = "http://www.imdb.com/title/" + imdb_id
                details = get_url(imdb_url, configfile, dbfile)
                if not details:
                    logger.debug("%s - Originalsprache nicht ermittelbar" %
                                 key)
                original_language = re.findall(
                    r"Language:<\/h4>\n.*?\n.*?url'>(.*?)<\/a>", details)
                if original_language:
                    original_language = original_language[0]
                if original_language == "German":
                    logger.debug(
                        "%s - Originalsprache ist Deutsch. Breche Suche nach zweisprachigem Release ab!"
                        % key)
                else:
                    if not blog.dual_download(key, password) and not englisch:
                        logger.debug(
                            "%s - Kein zweisprachiges Release gefunden!" % key)

        if download_links:
            if staffel:
                if myjd_download(configfile, dbfile, device, key, "RSScrawler",
                                 download_links, password):
                    db.store(
                        key.replace(".COMPLETE", "").replace(".Complete", ""),
                        'notdl' if config.get('enforcedl')
                        and '.dl.' not in key.lower() else 'added')
                    log_entry = '[Suche/Staffel] - ' + key.replace(
                        ".COMPLETE", "").replace(".Complete",
                                                 "") + ' - [' + site + ']'
                    logger.info(log_entry)
                    notify([log_entry], configfile)
                    return True
            elif '.3d.' in key.lower():
                retail = False
                if config.get('cutoff') and '.COMPLETE.' not in key.lower():
                    if config.get('enforcedl'):
                        if is_retail(key, '2', dbfile):
                            retail = True
                if myjd_download(configfile, dbfile, device, key,
                                 "RSScrawler/3Dcrawler", download_links,
                                 password):
                    db.store(
                        key, 'notdl' if config.get('enforcedl')
                        and '.dl.' not in key.lower() else 'added')
                    log_entry = '[Suche/Film' + (
                        '/Retail' if retail else
                        "") + '/3D] - ' + key + ' - [' + site + ']'
                    logger.info(log_entry)
                    notify([log_entry], configfile)
                    return True
            else:
                retail = False
                if config.get('cutoff') and '.COMPLETE.' not in key.lower():
                    if config.get('enforcedl'):
                        if is_retail(key, '1', dbfile):
                            retail = True
                    else:
                        if is_retail(key, '0', dbfile):
                            retail = True
                if myjd_download(configfile, dbfile, device, key, "RSScrawler",
                                 download_links, password):
                    db.store(
                        key, 'notdl' if config.get('enforcedl')
                        and '.dl.' not in key.lower() else 'added')
                    log_entry = '[Suche/Film' + (
                        '/Englisch' if englisch and not retail else
                        '') + ('/Englisch/Retail' if englisch and retail else
                               '') + ('/Retail' if not englisch and retail else
                                      '') + '] - ' + key + ' - [' + site + ']'
                    logger.info(log_entry)
                    notify([log_entry], configfile)
                    return [key]
        else:
            return False
Esempio n. 14
0
def add_decrypt(title, link, password, dbfile):
    try:
        RssDb(dbfile, 'to_decrypt').store(title, link + '|' + password)
        return True
    except:
        return False
Esempio n. 15
0
def remove_decrypt(title, dbfile):
    try:
        RssDb(dbfile, 'to_decrypt').delete(title)
        return True
    except:
        return False
Esempio n. 16
0
class SF:
    def __init__(self, configfile, dbfile, device, logging, scraper, filename,
                 internal_name):
        self._INTERNAL_NAME = internal_name
        self.configfile = configfile
        self.dbfile = dbfile
        self.device = device

        self.hostnames = RssConfig('Hostnames', self.configfile)
        self.sf = self.hostnames.get('sf')

        self.config = RssConfig(self._INTERNAL_NAME, self.configfile)
        self.rsscrawler = RssConfig("RSScrawler", self.configfile)
        self.hevc_retail = self.config.get("hevc_retail")
        self.retail_only = self.config.get("retail_only")
        self.hoster_fallback = self.config.get("hoster_fallback")
        self.hosters = RssConfig("Hosters", configfile).get_section()
        self.log_info = logging.info
        self.log_error = logging.error
        self.log_debug = logging.debug
        self.scraper = scraper
        self.filename = filename
        self.db = RssDb(self.dbfile, 'rsscrawler')
        self.quality = self.config.get("quality")
        self.cdc = RssDb(self.dbfile, 'cdc')
        self.last_set_sf = self.cdc.retrieve("SFSet-" + self.filename)
        self.last_sha_sf = self.cdc.retrieve("SF-" + self.filename)
        self.headers = {
            'If-Modified-Since':
            str(self.cdc.retrieve("SFHeaders-" + self.filename))
        }
        settings = [
            "quality", "rejectlist", "regex", "hevc_retail", "retail_only",
            "hoster_fallback"
        ]
        self.settings = []
        self.settings.append(self.rsscrawler.get("english"))
        self.settings.append(self.rsscrawler.get("surround"))
        self.settings.append(self.hosters)
        for s in settings:
            self.settings.append(self.config.get(s))

        self.empty_list = False
        if self.filename == 'SJ_Staffeln_Regex':
            self.level = 3
        elif self.filename == 'MB_Staffeln':
            self.seasonssource = self.config.get('seasonssource').lower()
            self.level = 2
        elif self.filename == 'SJ_Serien_Regex':
            self.level = 1
        else:
            self.level = 0

        self.pattern = r'^(' + "|".join(
            self.get_series_list(self.filename, self.level)).lower() + ')'
        self.listtype = ""

        self.day = 0

    def settings_hash(self, refresh):
        if refresh:
            settings = [
                "quality", "rejectlist", "regex", "hevc_retail", "retail_only",
                "hoster_fallback"
            ]
            self.settings = []
            self.settings.append(self.rsscrawler.get("english"))
            self.settings.append(self.rsscrawler.get("surround"))
            self.settings.append(self.hosters)
            for s in settings:
                self.settings.append(self.config.get(s))
            self.pattern = r'^(' + "|".join(
                self.get_series_list(self.filename, self.level)).lower() + ')'
        set_sf = str(self.settings) + str(self.pattern)
        return hashlib.sha256(set_sf.encode('ascii', 'ignore')).hexdigest()

    def get_series_list(self, liste, series_type):
        if series_type == 1:
            self.listtype = " (RegEx)"
        elif series_type == 2:
            self.listtype = " (Staffeln)"
        elif series_type == 3:
            self.listtype = " (Staffeln/RegEx)"
        cont = ListDb(self.dbfile, liste).retrieve()
        titles = []
        if cont:
            for title in cont:
                if title:
                    title = title.replace(" ", ".")
                    titles.append(title)
        if not titles:
            self.empty_list = True
        return titles

    def parse_download(self, series_url, title, language_id):
        if not check_valid_release(title, self.retail_only, self.hevc_retail,
                                   self.dbfile):
            self.log_debug(
                title +
                u" - Release ignoriert (Gleiche oder bessere Quelle bereits vorhanden)"
            )
            return
        if self.filename == 'MB_Staffeln':
            if not self.config.get("seasonpacks"):
                staffelpack = re.search(r"s\d.*(-|\.).*s\d", title.lower())
                if staffelpack:
                    self.log_debug("%s - Release ignoriert (Staffelpaket)" %
                                   title)
                    return
            if not re.search(self.seasonssource, title.lower()):
                self.log_debug(title + " - Release hat falsche Quelle")
                return
        try:
            if language_id == 2:
                lang = 'EN'
            else:
                lang = 'DE'
            epoch = str(datetime.datetime.now().timestamp()).replace('.',
                                                                     '')[:-3]
            api_url = series_url + '?lang=' + lang + '&_=' + epoch
            response = get_url(api_url, self.configfile, self.dbfile,
                               self.scraper)
            info = json.loads(response)

            is_episode = re.findall(r'.*\.(s\d{1,3}e\d{1,3})\..*', title,
                                    re.IGNORECASE)
            if is_episode:
                episode_string = re.findall(r'.*S\d{1,3}(E\d{1,3}).*',
                                            is_episode[0])[0].lower()
                season_string = re.findall(r'.*(S\d{1,3})E\d{1,3}.*',
                                           is_episode[0])[0].lower()
                season_title = rreplace(
                    title.lower().replace(episode_string, ''), "-", ".*",
                    1).lower()
                season_title = season_title.replace(".untouched",
                                                    ".*").replace(
                                                        ".dd+51", ".dd.51")
                episode = str(int(episode_string.replace("e", "")))
                season = str(int(season_string.replace("s", "")))
                episode_name = re.findall(r'.*\.s\d{1,3}(\..*).german',
                                          season_title, re.IGNORECASE)
                if episode_name:
                    season_title = season_title.replace(episode_name[0], '')
                codec_tags = [".h264", ".x264"]
                for tag in codec_tags:
                    season_title = season_title.replace(tag, ".*264")
                web_tags = [".web-rip", ".webrip", ".webdl", ".web-dl"]
                for tag in web_tags:
                    season_title = season_title.replace(tag, ".web.*")
            else:
                season = False
                episode = False
                season_title = title
                multiple_episodes = re.findall(r'(e\d{1,3}-e*\d{1,3}\.)',
                                               season_title, re.IGNORECASE)
                if multiple_episodes:
                    season_title = season_title.replace(
                        multiple_episodes[0], '.*')

            content = BeautifulSoup(info['html'], 'lxml')
            releases = content.find(
                "small", text=re.compile(season_title,
                                         re.IGNORECASE)).parent.parent.parent
            links = releases.findAll("div", {'class': 'row'})[1].findAll('a')
            valid = False
            for link in links:
                download_link = link['href']
                if check_hoster(link.text.replace('\n', ''), self.configfile):
                    valid = True
                    break
            if not valid and not self.hoster_fallback:
                storage = self.db.retrieve_all(title)
                if 'added' not in storage and 'notdl' not in storage:
                    wrong_hoster = '[SF/Hoster fehlt] - ' + title
                    if 'wrong_hoster' not in storage:
                        self.log_info(wrong_hoster)
                        self.db.store(title, 'wrong_hoster')
                        notify([wrong_hoster], self.configfile)
                    else:
                        self.log_debug(wrong_hoster)
            else:
                return self.send_package(title, download_link, language_id,
                                         season, episode)
        except:
            print(
                u"SF hat die Serien-API angepasst. Breche Download-Prüfung ab!"
            )

    def send_package(self, title, download_link, language_id, season, episode):
        englisch = ""
        if language_id == 2:
            englisch = "/Englisch"
        if self.filename == 'SJ_Serien_Regex':
            link_placeholder = '[Episode/RegEx' + englisch + '] - '
        elif self.filename == 'SJ_Serien':
            link_placeholder = '[Episode' + englisch + '] - '
        elif self.filename == 'SJ_Staffeln_Regex]':
            link_placeholder = '[Staffel/RegEx' + englisch + '] - '
        else:
            link_placeholder = '[Staffel' + englisch + '] - '
        try:
            storage = self.db.retrieve_all(title)
        except Exception as e:
            self.log_debug("Fehler bei Datenbankzugriff: %s, Grund: %s" %
                           (e, title))
            return

        if 'added' in storage or 'notdl' in storage:
            self.log_debug(title + " - Release ignoriert (bereits gefunden)")
        else:
            download_link = 'https://' + self.sf + download_link
            if season and episode:
                download_link = download_link.replace(
                    '&_=', '&season=' + str(season) + '&episode=' +
                    str(episode) + '&_=')

            download = add_decrypt(title, download_link, self.sf, self.dbfile)
            if download:
                self.db.store(title, 'added')
                log_entry = link_placeholder + title + ' - [SF]'
                self.log_info(log_entry)
                notify(["[Click'n'Load notwendig] - " + log_entry],
                       self.configfile)
                return log_entry

    def periodical_task(self):
        if not self.sf:
            return self.device

        if self.filename == 'SJ_Serien_Regex':
            if not self.config.get('regex'):
                self.log_debug("Suche für SF-Regex deaktiviert!")
                return self.device
        elif self.filename == 'SJ_Staffeln_Regex':
            if not self.config.get('regex'):
                self.log_debug("Suche für SF-Regex deaktiviert!")
                return self.device
        elif self.filename == 'MB_Staffeln':
            if not self.config.get('crawlseasons'):
                self.log_debug("Suche für SF-Staffeln deaktiviert!")
                return self.device
        if self.empty_list:
            self.log_debug("Liste ist leer. Stoppe Suche für Serien!" +
                           self.listtype)
            return self.device
        try:
            reject = self.config.get("rejectlist").replace(
                ",", "|").lower() if len(
                    self.config.get("rejectlist")) > 0 else r"^unmatchable$"
        except TypeError:
            reject = r"^unmatchable$"

        set_sf = self.settings_hash(False)

        header = False
        response = False

        while self.day < 8:
            if self.last_set_sf == set_sf:
                try:
                    delta = (
                        datetime.datetime.now() -
                        datetime.timedelta(days=self.day)).strftime("%Y-%m-%d")
                    response = get_url_headers(
                        'https://' + self.sf + '/updates/' + delta,
                        self.configfile, self.dbfile, self.headers,
                        self.scraper)
                    self.scraper = response[1]
                    response = response[0]
                    if self.filename == "MB_Staffeln" or self.filename == "SJ_Staffeln_Regex":
                        feed = sf_releases_to_feedparser_dict(
                            response.text, "seasons", 'https://' + self.sf,
                            True)
                    else:
                        feed = sf_releases_to_feedparser_dict(
                            response.text, "episodes", 'https://' + self.sf,
                            True)
                except:
                    print(u"SF hat die Feed-API angepasst. Breche Suche ab!")
                    feed = False

                if response:
                    if response.status_code == 304:
                        self.log_debug(
                            "SF-Feed seit letztem Aufruf nicht aktualisiert - breche  Suche ab!"
                        )
                        return self.device
                    header = True
            else:
                try:
                    delta = (
                        datetime.datetime.now() -
                        datetime.timedelta(days=self.day)).strftime("%Y-%m-%d")
                    response = get_url(
                        'https://' + self.sf + '/updates/' + delta,
                        self.configfile, self.dbfile, self.scraper)
                    if self.filename == "MB_Staffeln" or self.filename == "SJ_Staffeln_Regex":
                        feed = sf_releases_to_feedparser_dict(
                            response, "seasons", 'https://' + self.sf, True)
                    else:
                        feed = sf_releases_to_feedparser_dict(
                            response, "episodes", 'https://' + self.sf, True)
                except:
                    print(u"SF hat die Feed-API angepasst. Breche Suche ab!")
                    feed = False

            self.day += 1

            if feed and feed.entries:
                first_post_sf = feed.entries[0]
                concat_sf = first_post_sf.title + first_post_sf.published + str(
                    self.settings) + str(self.pattern)
                sha_sf = hashlib.sha256(concat_sf.encode(
                    'ascii', 'ignore')).hexdigest()
            else:
                self.log_debug("Feed ist leer - breche  Suche ab!")
                return False

            for post in feed.entries:
                concat = post.title + post.published + \
                         str(self.settings) + str(self.pattern)
                sha = hashlib.sha256(concat.encode('ascii',
                                                   'ignore')).hexdigest()
                if sha == self.last_sha_sf:
                    self.log_debug("Feed ab hier bereits gecrawlt (" +
                                   post.title + ") - breche  Suche ab!")
                    break

                series_url = post.series_url
                title = post.title.replace("-", "-")

                if self.filename == 'SJ_Serien_Regex':
                    if self.config.get("regex"):
                        if '.german.' in title.lower():
                            language_id = 1
                        elif self.rsscrawler.get('english'):
                            language_id = 2
                        else:
                            language_id = 0
                        if language_id:
                            m = re.search(self.pattern, title.lower())
                            if not m and "720p" not in title and "1080p" not in title and "2160p" not in title:
                                m = re.search(
                                    self.pattern.replace("480p", "."),
                                    title.lower())
                                self.quality = "480p"
                            if m:
                                if "720p" in title.lower():
                                    self.quality = "720p"
                                if "1080p" in title.lower():
                                    self.quality = "1080p"
                                if "2160p" in title.lower():
                                    self.quality = "2160p"
                                m = re.search(reject, title.lower())
                                if m:
                                    self.log_debug(
                                        title +
                                        " - Release durch Regex gefunden (trotz rejectlist-Einstellung)"
                                    )
                                title = re.sub(r'\[.*\] ', '', post.title)
                                self.parse_download(series_url, title,
                                                    language_id)
                        else:
                            self.log_debug(
                                "%s - Englische Releases deaktiviert" % title)

                    else:
                        continue
                elif self.filename == 'SJ_Staffeln_Regex':
                    if self.config.get("regex"):
                        if '.german.' in title.lower():
                            language_id = 1
                        elif self.rsscrawler.get('english'):
                            language_id = 2
                        else:
                            language_id = 0
                        if language_id:
                            m = re.search(self.pattern, title.lower())
                            if not m and "720p" not in title and "1080p" not in title and "2160p" not in title:
                                m = re.search(
                                    self.pattern.replace("480p", "."),
                                    title.lower())
                                self.quality = "480p"
                            if m:
                                if "720p" in title.lower():
                                    self.quality = "720p"
                                if "1080p" in title.lower():
                                    self.quality = "1080p"
                                if "2160p" in title.lower():
                                    self.quality = "2160p"
                                m = re.search(reject, title.lower())
                                if m:
                                    self.log_debug(
                                        title +
                                        " - Release durch Regex gefunden (trotz rejectlist-Einstellung)"
                                    )
                                title = re.sub(r'\[.*\] ', '', post.title)
                                self.parse_download(series_url, title,
                                                    language_id)
                        else:
                            self.log_debug(
                                "%s - Englische Releases deaktiviert" % title)

                    else:
                        continue
                else:
                    if self.config.get("quality") != '480p':
                        m = re.search(self.pattern, title.lower())
                        if m:
                            if '.german.' in title.lower():
                                language_id = 1
                            elif self.rsscrawler.get('english'):
                                language_id = 2
                            else:
                                language_id = 0
                            if language_id:
                                mm = re.search(self.quality, title.lower())
                                if mm:
                                    mmm = re.search(reject, title.lower())
                                    if mmm:
                                        self.log_debug(
                                            title +
                                            " - Release ignoriert (basierend auf rejectlist-Einstellung)"
                                        )
                                        continue
                                    if self.rsscrawler.get("surround"):
                                        if not re.match(
                                                r'.*\.(DTS|DD\+*51|DD\+*71|AC3\.5\.*1)\..*',
                                                title):
                                            self.log_debug(
                                                title +
                                                " - Release ignoriert (kein Mehrkanalton)"
                                            )
                                            continue
                                    try:
                                        storage = self.db.retrieve_all(title)
                                    except Exception as e:
                                        self.log_debug(
                                            "Fehler bei Datenbankzugriff: %s, Grund: %s"
                                            % (e, title))
                                        return self.device
                                    if 'added' in storage:
                                        self.log_debug(
                                            title +
                                            " - Release ignoriert (bereits gefunden)"
                                        )
                                        continue
                                    self.parse_download(
                                        series_url, title, language_id)
                            else:
                                self.log_debug(
                                    "%s - Englische Releases deaktiviert" %
                                    title)

                        else:
                            m = re.search(self.pattern, title.lower())
                            if m:
                                if '.german.' in title.lower():
                                    language_id = 1
                                elif self.rsscrawler.get('english'):
                                    language_id = 2
                                else:
                                    language_id = 0
                                if language_id:
                                    if "720p" in title.lower(
                                    ) or "1080p" in title.lower(
                                    ) or "2160p" in title.lower():
                                        continue
                                    mm = re.search(reject, title.lower())
                                    if mm:
                                        self.log_debug(
                                            title +
                                            " Release ignoriert (basierend auf rejectlist-Einstellung)"
                                        )
                                        continue
                                    if self.rsscrawler.get("surround"):
                                        if not re.match(
                                                r'.*\.(DTS|DD\+*51|DD\+*71|AC3\.5\.*1)\..*',
                                                title):
                                            self.log_debug(
                                                title +
                                                " - Release ignoriert (kein Mehrkanalton)"
                                            )
                                            continue
                                    title = re.sub(r'\[.*\] ', '', post.title)
                                    try:
                                        storage = self.db.retrieve_all(title)
                                    except Exception as e:
                                        self.log_debug(
                                            "Fehler bei Datenbankzugriff: %s, Grund: %s"
                                            % (e, title))
                                        return self.device
                                    if 'added' in storage:
                                        self.log_debug(
                                            title +
                                            " - Release ignoriert (bereits gefunden)"
                                        )
                                        continue
                                    self.parse_download(
                                        series_url, title, language_id)
                                else:
                                    self.log_debug(
                                        "%s - Englische Releases deaktiviert" %
                                        title)

        if set_sf:
            new_set_sf = self.settings_hash(True)
            if set_sf == new_set_sf:
                self.cdc.delete("SFSet-" + self.filename)
                self.cdc.store("SFSet-" + self.filename, set_sf)
                self.cdc.delete("SF-" + self.filename)
                self.cdc.store("SF-" + self.filename, sha_sf)

        if header and response:
            self.cdc.delete("SFHeaders-" + self.filename)
            self.cdc.store("SFHeaders-" + self.filename,
                           response.headers['date'])

        return self.device
Esempio n. 17
0
class SJ:
    def __init__(self, configfile, dbfile, device, logging, scraper, filename,
                 internal_name):
        self._INTERNAL_NAME = internal_name
        self.configfile = configfile
        self.dbfile = dbfile
        self.device = device

        self.hostnames = RssConfig('Hostnames', self.configfile)
        self.sj = self.hostnames.get('sj')

        self.config = RssConfig(self._INTERNAL_NAME, self.configfile)
        self.rsscrawler = RssConfig("RSScrawler", self.configfile)
        self.hevc_retail = self.config.get("hevc_retail")
        self.retail_only = self.config.get("retail_only")
        self.hoster_fallback = self.config.get("hoster_fallback")
        self.hosters = RssConfig("Hosters", configfile).get_section()
        self.log_info = logging.info
        self.log_error = logging.error
        self.log_debug = logging.debug
        self.scraper = scraper
        self.filename = filename
        self.db = RssDb(self.dbfile, 'rsscrawler')
        self.quality = self.config.get("quality")
        self.cdc = RssDb(self.dbfile, 'cdc')
        self.last_set_sj = self.cdc.retrieve("SJSet-" + self.filename)
        self.last_sha_sj = self.cdc.retrieve("SJ-" + self.filename)
        self.headers = {
            'If-Modified-Since':
            str(self.cdc.retrieve("SJHeaders-" + self.filename))
        }
        settings = [
            "quality", "rejectlist", "regex", "hevc_retail", "retail_only",
            "hoster_fallback"
        ]
        self.settings = []
        self.settings.append(self.rsscrawler.get("english"))
        self.settings.append(self.rsscrawler.get("surround"))
        self.settings.append(self.hosters)
        for s in settings:
            self.settings.append(self.config.get(s))

        self.empty_list = False
        if self.filename == 'SJ_Staffeln_Regex':
            self.level = 3
        elif self.filename == 'MB_Staffeln':
            self.seasonssource = self.config.get('seasonssource').lower()
            self.level = 2
        elif self.filename == 'SJ_Serien_Regex':
            self.level = 1
        else:
            self.level = 0

        self.pattern = r'^(' + "|".join(
            self.get_series_list(self.filename, self.level)).lower() + ')'
        self.listtype = ""

        self.day = 0

    def settings_hash(self, refresh):
        if refresh:
            settings = [
                "quality", "rejectlist", "regex", "hevc_retail", "retail_only",
                "hoster_fallback"
            ]
            self.settings = []
            self.settings.append(self.rsscrawler.get("english"))
            self.settings.append(self.rsscrawler.get("surround"))
            self.settings.append(self.hosters)
            for s in settings:
                self.settings.append(self.config.get(s))
            self.pattern = r'^(' + "|".join(
                self.get_series_list(self.filename, self.level)).lower() + ')'
        set_sj = str(self.settings) + str(self.pattern)
        return hashlib.sha256(set_sj.encode('ascii', 'ignore')).hexdigest()

    def get_series_list(self, liste, series_type):
        if series_type == 1:
            self.listtype = " (RegEx)"
        elif series_type == 2:
            self.listtype = " (Staffeln)"
        elif series_type == 3:
            self.listtype = " (Staffeln/RegEx)"
        cont = ListDb(self.dbfile, liste).retrieve()
        titles = []
        if cont:
            for title in cont:
                if title:
                    title = title.replace(" ", ".")
                    titles.append(title)
        if not titles:
            self.empty_list = True
        return titles

    def parse_download(self, series_url, title, language_id):
        if not check_valid_release(title, self.retail_only, self.hevc_retail,
                                   self.dbfile):
            self.log_debug(
                title +
                u" - Release ignoriert (Gleiche oder bessere Quelle bereits vorhanden)"
            )
            return
        if self.filename == 'MB_Staffeln':
            if not self.config.get("seasonpacks"):
                staffelpack = re.search(r"s\d.*(-|\.).*s\d", title.lower())
                if staffelpack:
                    self.log_debug("%s - Release ignoriert (Staffelpaket)" %
                                   title)
                    return
            if not re.search(self.seasonssource, title.lower()):
                self.log_debug(title + " - Release hat falsche Quelle")
                return
        try:
            series_info = get_url(series_url, self.configfile, self.dbfile)
            series_id = re.findall(r'data-mediaid="(.*?)"', series_info)[0]
            api_url = 'https://' + self.sj + '/api/media/' + series_id + '/releases'

            response = get_url(api_url, self.configfile, self.dbfile,
                               self.scraper)
            seasons = json.loads(response)
            for season in seasons:
                season = seasons[season]
                for item in season['items']:
                    if item['name'] == title:
                        valid = False
                        for hoster in item['hoster']:
                            if hoster:
                                if check_hoster(hoster, self.configfile):
                                    valid = True
                        if not valid and not self.hoster_fallback:
                            storage = self.db.retrieve_all(title)
                            if 'added' not in storage and 'notdl' not in storage:
                                wrong_hoster = '[SJ/Hoster fehlt] - ' + title
                                if 'wrong_hoster' not in storage:
                                    print(wrong_hoster)
                                    self.db.store(title, 'wrong_hoster')
                                    notify([wrong_hoster], self.configfile)
                                else:
                                    self.log_debug(wrong_hoster)
                        else:
                            return self.send_package(title, series_url,
                                                     language_id)
        except:
            print(
                u"SJ hat die Serien-API angepasst. Breche Download-Prüfung ab!"
            )

    def send_package(self, title, series_url, language_id):
        englisch = ""
        if language_id == 2:
            englisch = "/Englisch"
        if self.filename == 'SJ_Serien_Regex':
            link_placeholder = '[Episode/RegEx' + englisch + '] - '
        elif self.filename == 'SJ_Serien':
            link_placeholder = '[Episode' + englisch + '] - '
        elif self.filename == 'SJ_Staffeln_Regex]':
            link_placeholder = '[Staffel/RegEx' + englisch + '] - '
        else:
            link_placeholder = '[Staffel' + englisch + '] - '
        try:
            storage = self.db.retrieve_all(title)
        except Exception as e:
            self.log_debug("Fehler bei Datenbankzugriff: %s, Grund: %s" %
                           (e, title))
            return

        if 'added' in storage or 'notdl' in storage:
            self.log_debug(title + " - Release ignoriert (bereits gefunden)")
        else:
            download = add_decrypt(title, series_url, self.sj, self.dbfile)
            if download:
                self.db.store(title, 'added')
                log_entry = link_placeholder + title + ' - [SJ]'
                self.log_info(log_entry)
                notify(["[Click'n'Load notwendig] - " + log_entry],
                       self.configfile)
                return log_entry

    def periodical_task(self):
        if not self.sj:
            return self.device

        if self.filename == 'SJ_Serien_Regex':
            if not self.config.get('regex'):
                self.log_debug("Suche für SJ-Regex deaktiviert!")
                return self.device
        elif self.filename == 'SJ_Staffeln_Regex':
            if not self.config.get('regex'):
                self.log_debug("Suche für SJ-Regex deaktiviert!")
                return self.device
        elif self.filename == 'MB_Staffeln':
            if not self.config.get('crawlseasons'):
                self.log_debug("Suche für SJ-Staffeln deaktiviert!")
                return self.device
        if self.empty_list:
            self.log_debug("Liste ist leer. Stoppe Suche für Serien!" +
                           self.listtype)
            return self.device
        try:
            reject = self.config.get("rejectlist").replace(
                ",", "|").lower() if len(
                    self.config.get("rejectlist")) > 0 else r"^unmatchable$"
        except TypeError:
            reject = r"^unmatchable$"

        set_sj = self.settings_hash(False)

        header = False
        response = False

        while self.day < 8:
            if self.last_set_sj == set_sj:
                try:
                    response = get_url_headers(
                        'https://' + self.sj + '/api/releases/latest/' +
                        str(self.day), self.configfile, self.dbfile,
                        self.headers, self.scraper)
                    self.scraper = response[1]
                    response = response[0]
                    if self.filename == "MB_Staffeln" or self.filename == "SJ_Staffeln_Regex":
                        feed = j_releases_to_feedparser_dict(
                            response.text, "seasons", 'https://' + self.sj,
                            True)
                    else:
                        feed = j_releases_to_feedparser_dict(
                            response.text, "episodes", 'https://' + self.sj,
                            True)
                except:
                    print(u"SJ hat die Feed-API angepasst. Breche Suche ab!")
                    feed = False

                if response:
                    if response.status_code == 304:
                        self.log_debug(
                            "SJ-Feed seit letztem Aufruf nicht aktualisiert - breche  Suche ab!"
                        )
                        return self.device
                    header = True
            else:
                try:
                    response = get_url(
                        'https://' + self.sj + '/api/releases/latest/' +
                        str(self.day), self.configfile, self.dbfile,
                        self.scraper)
                    if self.filename == "MB_Staffeln" or self.filename == "SJ_Staffeln_Regex":
                        feed = j_releases_to_feedparser_dict(
                            response, "seasons", 'https://' + self.sj, True)
                    else:
                        feed = j_releases_to_feedparser_dict(
                            response, "episodes", 'https://' + self.sj, True)
                except:
                    print(u"SJ hat die Feed-API angepasst. Breche Suche ab!")
                    feed = False

            self.day += 1

            if feed and feed.entries:
                first_post_sj = feed.entries[0]
                concat_sj = first_post_sj.title + first_post_sj.published + str(
                    self.settings) + str(self.pattern)
                sha_sj = hashlib.sha256(concat_sj.encode(
                    'ascii', 'ignore')).hexdigest()
            else:
                self.log_debug("Feed ist leer - breche  Suche ab!")
                return False

            for post in feed.entries:
                concat = post.title + post.published + \
                         str(self.settings) + str(self.pattern)
                sha = hashlib.sha256(concat.encode('ascii',
                                                   'ignore')).hexdigest()
                if sha == self.last_sha_sj:
                    self.log_debug("Feed ab hier bereits gecrawlt (" +
                                   post.title + ") - breche  Suche ab!")
                    break

                series_url = post.series_url
                title = post.title.replace("-", "-")

                if self.filename == 'SJ_Serien_Regex':
                    if self.config.get("regex"):
                        if '.german.' in title.lower():
                            language_id = 1
                        elif self.rsscrawler.get('english'):
                            language_id = 2
                        else:
                            language_id = 0
                        if language_id:
                            m = re.search(self.pattern, title.lower())
                            if not m and "720p" not in title and "1080p" not in title and "2160p" not in title:
                                m = re.search(
                                    self.pattern.replace("480p", "."),
                                    title.lower())
                                self.quality = "480p"
                            if m:
                                if "720p" in title.lower():
                                    self.quality = "720p"
                                if "1080p" in title.lower():
                                    self.quality = "1080p"
                                if "2160p" in title.lower():
                                    self.quality = "2160p"
                                m = re.search(reject, title.lower())
                                if m:
                                    self.log_debug(
                                        title +
                                        " - Release durch Regex gefunden (trotz rejectlist-Einstellung)"
                                    )
                                title = re.sub(r'\[.*\] ', '', post.title)
                                self.parse_download(series_url, title,
                                                    language_id)
                        else:
                            self.log_debug(
                                "%s - Englische Releases deaktiviert" % title)

                    else:
                        continue
                elif self.filename == 'SJ_Staffeln_Regex':
                    if self.config.get("regex"):
                        if '.german.' in title.lower():
                            language_id = 1
                        elif self.rsscrawler.get('english'):
                            language_id = 2
                        else:
                            language_id = 0
                        if language_id:
                            m = re.search(self.pattern, title.lower())
                            if not m and "720p" not in title and "1080p" not in title and "2160p" not in title:
                                m = re.search(
                                    self.pattern.replace("480p", "."),
                                    title.lower())
                                self.quality = "480p"
                            if m:
                                if "720p" in title.lower():
                                    self.quality = "720p"
                                if "1080p" in title.lower():
                                    self.quality = "1080p"
                                if "2160p" in title.lower():
                                    self.quality = "2160p"
                                m = re.search(reject, title.lower())
                                if m:
                                    self.log_debug(
                                        title +
                                        " - Release durch Regex gefunden (trotz rejectlist-Einstellung)"
                                    )
                                title = re.sub(r'\[.*\] ', '', post.title)
                                self.parse_download(series_url, title,
                                                    language_id)
                        else:
                            self.log_debug(
                                "%s - Englische Releases deaktiviert" % title)

                    else:
                        continue
                else:
                    if self.config.get("quality") != '480p':
                        m = re.search(self.pattern, title.lower())
                        if m:
                            if '.german.' in title.lower():
                                language_id = 1
                            elif self.rsscrawler.get('english'):
                                language_id = 2
                            else:
                                language_id = 0
                            if language_id:
                                mm = re.search(self.quality, title.lower())
                                if mm:
                                    mmm = re.search(reject, title.lower())
                                    if mmm:
                                        self.log_debug(
                                            title +
                                            " - Release ignoriert (basierend auf rejectlist-Einstellung)"
                                        )
                                        continue
                                    if self.rsscrawler.get("surround"):
                                        if not re.match(
                                                r'.*\.(DTS|DD\+*51|DD\+*71|AC3\.5\.*1)\..*',
                                                title):
                                            self.log_debug(
                                                title +
                                                " - Release ignoriert (kein Mehrkanalton)"
                                            )
                                            continue
                                    try:
                                        storage = self.db.retrieve_all(title)
                                    except Exception as e:
                                        self.log_debug(
                                            "Fehler bei Datenbankzugriff: %s, Grund: %s"
                                            % (e, title))
                                        return self.device
                                    if 'added' in storage:
                                        self.log_debug(
                                            title +
                                            " - Release ignoriert (bereits gefunden)"
                                        )
                                        continue
                                    self.parse_download(
                                        series_url, title, language_id)
                            else:
                                self.log_debug(
                                    "%s - Englische Releases deaktiviert" %
                                    title)

                        else:
                            m = re.search(self.pattern, title.lower())
                            if m:
                                if '.german.' in title.lower():
                                    language_id = 1
                                elif self.rsscrawler.get('english'):
                                    language_id = 2
                                else:
                                    language_id = 0
                                if language_id:
                                    if "720p" in title.lower(
                                    ) or "1080p" in title.lower(
                                    ) or "2160p" in title.lower():
                                        continue
                                    mm = re.search(reject, title.lower())
                                    if mm:
                                        self.log_debug(
                                            title +
                                            " Release ignoriert (basierend auf rejectlist-Einstellung)"
                                        )
                                        continue
                                    if self.rsscrawler.get("surround"):
                                        if not re.match(
                                                r'.*\.(DTS|DD\+*51|DD\+*71|AC3\.5\.*1)\..*',
                                                title):
                                            self.log_debug(
                                                title +
                                                " - Release ignoriert (kein Mehrkanalton)"
                                            )
                                            continue
                                    title = re.sub(r'\[.*\] ', '', post.title)
                                    try:
                                        storage = self.db.retrieve_all(title)
                                    except Exception as e:
                                        self.log_debug(
                                            "Fehler bei Datenbankzugriff: %s, Grund: %s"
                                            % (e, title))
                                        return self.device
                                    if 'added' in storage:
                                        self.log_debug(
                                            title +
                                            " - Release ignoriert (bereits gefunden)"
                                        )
                                        continue
                                    self.parse_download(
                                        series_url, title, language_id)
                                else:
                                    self.log_debug(
                                        "%s - Englische Releases deaktiviert" %
                                        title)

        if set_sj:
            new_set_sj = self.settings_hash(True)
            if set_sj == new_set_sj:
                self.cdc.delete("SJSet-" + self.filename)
                self.cdc.store("SJSet-" + self.filename, set_sj)
                self.cdc.delete("SJ-" + self.filename)
                self.cdc.store("SJ-" + self.filename, sha_sj)

        if header and response:
            self.cdc.delete("SJHeaders-" + self.filename)
            self.cdc.store("SJHeaders-" + self.filename,
                           response.headers['date'])

        return self.device
Esempio n. 18
0
def download(configfile,
             dbfile,
             device,
             title,
             subdir,
             old_links,
             password,
             full_path=None,
             autostart=False):
    try:
        if not device or not is_device(device):
            device = get_device(configfile)

        if isinstance(old_links, list):
            links = []
            for link in old_links:
                if link not in links:
                    links.append(link)
        else:
            links = [old_links]

        links = str(links).replace(" ", "")
        crawljobs = RssConfig('Crawljobs', configfile)
        usesubdir = crawljobs.get("subdir")
        priority = "DEFAULT"

        if full_path:
            path = full_path
        else:
            if usesubdir:
                path = subdir + "/<jd:packagename>"
            else:
                path = "<jd:packagename>"
        if "Remux" in path:
            priority = "LOWER"

        try:
            device.linkgrabber.add_links(
                params=[{
                    "autostart": autostart,
                    "links": links,
                    "packageName": title,
                    "extractPassword": password,
                    "priority": priority,
                    "downloadPassword": password,
                    "destinationFolder": path,
                    "comment": "RSScrawler by rix1337",
                    "overwritePackagizerRules": False
                }])
        except rsscrawler.myjdapi.TokenExpiredException:
            device = get_device(configfile)
            if not device or not is_device(device):
                return False
            device.linkgrabber.add_links(
                params=[{
                    "autostart": autostart,
                    "links": links,
                    "packageName": title,
                    "extractPassword": password,
                    "priority": priority,
                    "downloadPassword": password,
                    "destinationFolder": path,
                    "comment": "RSScrawler by rix1337",
                    "overwritePackagizerRules": False
                }])
        db = RssDb(dbfile, 'crawldog')
        if db.retrieve(title):
            db.delete(title)
            db.store(title, 'retried')
        else:
            db.store(title, 'added')
        return device
    except rsscrawler.myjdapi.MYJDException as e:
        print(u"Fehler bei der Verbindung mit MyJDownloader: " + str(e))
        return False
Esempio n. 19
0
def post_url(url, configfile, dbfile, data, scraper=False):
    config = RssConfig('RSScrawler', configfile)
    proxy = config.get('proxy')
    if not scraper:
        scraper = cloudscraper.create_scraper()

    db = RssDb(dbfile, 'proxystatus')
    db_normal = RssDb(dbfile, 'normalstatus')
    site = check_is_site(url, configfile)

    # Temporary fix for FX
    if site and "FX" in site:
        scraper = requests.session()
        scraper.headers = scraper.headers
        scraper.cookies = scraper.cookies
        scraper.verify = False

    if proxy:
        try:
            if site and "SJ" in site:
                if db.retrieve("SJ"):
                    if config.get("fallback") and not db_normal.retrieve("SJ"):
                        return scraper.post(url, data, timeout=30).content
                    else:
                        return ""
            elif site and "DJ" in site:
                if db.retrieve("DJ"):
                    if config.get("fallback") and not db_normal.retrieve("DJ"):
                        return scraper.post(url, data, timeout=30).content
                    else:
                        return ""
            elif site and "SF" in site:
                if db.retrieve("SF"):
                    if config.get("fallback") and not db_normal.retrieve("SF"):
                        return scraper.post(url, data, timeout=30).content
                    else:
                        return ""
            elif site and "MB" in site:
                if db.retrieve("MB"):
                    if config.get("fallback") and not db_normal.retrieve("MB"):
                        return scraper.post(url, data, timeout=30).content
                    else:
                        return ""
            elif site and "HW" in site:
                if db.retrieve("HW"):
                    if config.get("fallback") and not db_normal.retrieve("HW"):
                        return scraper.post(url, data, timeout=30).content
                    else:
                        return ""
            elif site and "FX" in site:
                if db.retrieve("FX"):
                    if config.get("fallback") and not db_normal.retrieve("FX"):
                        return scraper.post(url, data, timeout=30).content
                    else:
                        return ""
            elif site and "HS" in site:
                if db.retrieve("HS"):
                    if config.get("fallback") and not db_normal.retrieve("HS"):
                        return scraper.post(url, data, timeout=30).content
                    else:
                        return ""
            elif site and "NK" in site:
                if db.retrieve("NK"):
                    if config.get("fallback") and not db_normal.retrieve("NK"):
                        return scraper.post(url, data, timeout=30).content
                    else:
                        return ""
            elif site and "DD" in site:
                if db.retrieve("DD"):
                    if config.get("fallback") and not db_normal.retrieve("DD"):
                        return scraper.post(url, data, timeout=30).content
                    else:
                        return ""
            elif site and "FC" in site:
                if db.retrieve("FC"):
                    if config.get("fallback") and not db_normal.retrieve("FC"):
                        return scraper.post(url, data, timeout=30).content
                    else:
                        return ""
            proxies = {'http': proxy, 'https': proxy}
            response = scraper.post(url, data, proxies=proxies,
                                    timeout=30).content
            return response
        except Exception as e:
            print(u"Fehler beim Abruf von: " + url + " " + str(e))
            return ""
    else:
        try:
            if site and "SJ" in site and db_normal.retrieve("SJ"):
                return ""
            elif site and "DJ" in site and db_normal.retrieve("DJ"):
                return ""
            elif site and "SF" in site and db_normal.retrieve("SF"):
                return ""
            elif site and "MB" in site and db_normal.retrieve("MB"):
                return ""
            elif site and "HW" in site and db_normal.retrieve("HW"):
                return ""
            elif site and "FX" in site and db_normal.retrieve("FX"):
                return ""
            elif site and "HS" in site and db_normal.retrieve("HS"):
                return ""
            elif site and "NK" in site and db_normal.retrieve("NK"):
                return ""
            elif site and "DD" in site and db_normal.retrieve("DD"):
                return ""
            elif site and "FC" in site and db_normal.retrieve("FC"):
                return ""
            response = scraper.post(url, data, timeout=30).content
            return response
        except Exception as e:
            print(u"Fehler beim Abruf von: " + url + " " + str(e))
            return ""
Esempio n. 20
0
def check_valid_release(title, retail_only, hevc_retail, dbfile):
    if retail_only:
        if not is_retail(title, False, False):
            return False

    if ".German" in title:
        search_title = title.split(".German")[0]
    elif ".GERMAN" in title:
        search_title = title.split(".GERMAN")[0]
    else:
        try:
            quality = re.findall(r"\d{3,4}p", title)[0]
            search_title = title.split(quality)[0]
        except:
            return True

    db = RssDb(dbfile, 'rsscrawler')
    is_episode = re.findall(r'.*\.s\d{1,3}(e\d{1,3}|e\d{1,3}-.*\d{1,3})\..*',
                            title, re.IGNORECASE)
    if is_episode:
        episode_name = re.findall(r'.*\.s\d{1,3}e\d{1,3}(\..*)', search_title,
                                  re.IGNORECASE)
        if episode_name:
            search_title = search_title.replace(episode_name[0], "")
        season_search_title = search_title.replace(is_episode[0], "") + "."
        season_results = db.retrieve_all_beginning_with(season_search_title)
        results = db.retrieve_all_beginning_with(search_title) + season_results
    else:
        db = RssDb(dbfile, 'rsscrawler')
        results = db.retrieve_all_beginning_with(search_title)

    if not results:
        return True

    bluray_tags = [
        ".bd-rip.", ".br-rip.", ".bluray-rip.", ".bluray.", ".bd-disk.",
        ".bd.", ".bd5.", ".bd9.", ".bd25.", ".bd50."
    ]
    web_tags = [
        ".web.", ".web-rip.", ".webrip.", ".vod-rip.", ".webdl.", ".web-dl.",
        ".ddc."
    ]
    trash_tags = [
        ".cam.", ".cam-rip.", ".ts.", ".telesync.", ".wp.", ".workprint.",
        ".tc.", ".telecine.", ".vhs-rip.", ".tv-rip.", ".hdtv-rip.", ".hdtv.",
        ".tvrip.", ".hdtvrip.", ".sat-rip.", ".dvb-rip.", ".ds-rip.", ".scr.",
        ".screener.", ".dvdscr.", ".dvdscreener.", ".bdscr.", ".r5.",
        ".dvdrip.", ".dvd."
    ]

    unknown = []
    trash = []
    web = []
    bluray = []
    retail = []

    # Get all previously found Releases and categorize them by their tags
    for r in results:
        if any(s in r.lower() for s in bluray_tags):
            if is_retail(r, False, False):
                retail.append(r)
            else:
                bluray.append(r)
        elif any(s in r.lower() for s in web_tags):
            web.append(r)
        elif any(s in r.lower() for s in trash_tags):
            trash.append(r)
        else:
            unknown.append(r)

    # Categorize the current Release by its tag to check if a release of the same or better category was already found
    # If no release is in the higher category, propers are allowed anytime
    # If no HEVC is available in the current category or higher and the current release is HEVC, it will be allowed
    if any(s in title.lower() for s in bluray_tags):
        if is_retail(r, False, False):
            if len(retail) > 0:
                if hevc_retail:
                    if is_hevc(title):
                        no_hevc = True
                        for r in retail:
                            if is_hevc(r):
                                no_hevc = False
                        if no_hevc:
                            return True
                if ".proper" in title.lower():
                    return True
                return False
        else:
            if len(retail) == 0 and len(bluray) > 0:
                if ".proper" in title.lower():
                    return True
            if len(retail) > 0 or len(bluray) > 0:
                if hevc_retail:
                    if is_hevc(title):
                        no_hevc = True
                        for r in retail + bluray:
                            if is_hevc(r):
                                no_hevc = False
                        if no_hevc:
                            return True
                return False
    elif any(s in title.lower() for s in web_tags):
        if len(retail) == 0 and len(bluray) == 0 and len(web) > 0:
            if ".proper" in title.lower():
                return True
        if len(retail) > 0 or len(bluray) > 0 or len(web) > 0:
            if hevc_retail:
                if is_hevc(title):
                    no_hevc = True
                    for r in retail + bluray + web:
                        if is_hevc(r):
                            no_hevc = False
                    if no_hevc:
                        return True
            return False
    elif any(s in title.lower() for s in trash_tags):
        if len(retail) == 0 and len(bluray) == 0 and len(
                web) == 0 and len(trash) > 0:
            if ".proper" in title.lower():
                return True
        if len(retail) > 0 or len(bluray) > 0 or len(web) > 0 or len(
                trash) > 0:
            return False
    else:
        if len(retail) == 0 and len(bluray) == 0 and len(web) == 0 and len(
                trash) == 0 and len(unknown) > 0:
            if ".proper" in title.lower():
                return True
        if len(retail) > 0 or len(bluray) > 0 or len(web) > 0 or len(
                trash) > 0 or len(unknown) > 0:
            return False

    return True