Python get_url Beispiele

Programmiersprache: Python

Namespace / Paketname: rsscrawler.url

Methode / Funktion: get_url

Beispiele auf hotexamples.com: 21

Python get_url - 21 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die rsscrawler.url.get_url, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Beispiel #1

Datei anzeigen

    def parse_download(self, series_url, title, language_id):
        if not check_valid_release(title, False, False, self.dbfile):
            self.log_debug(title + u" - Release ignoriert (Gleiche oder bessere Quelle bereits vorhanden)")
            return
        try:
            series_info = get_url(series_url, self.configfile, self.dbfile)
            series_id = re.findall(r'data-mediaid="(.*?)"', series_info)[0]
            api_url = 'https://' + self.dj + '/api/media/' + series_id + '/releases'

            response = get_url(api_url, self.configfile, self.dbfile, self.scraper)
            seasons = json.loads(response)
            for season in seasons:
                season = seasons[season]
                for item in season['items']:
                    if item['name'] == title:
                        valid = False
                        for hoster in item['hoster']:
                            if check_hoster(hoster, self.configfile):
                                valid = True
                        if not valid and not self.hoster_fallback:
                            storage = self.db.retrieve_all(title)
                            if 'added' not in storage and 'notdl' not in storage:
                                wrong_hoster = '[DJ/Hoster fehlt] - ' + title
                                if 'wrong_hoster' not in storage:
                                    print(wrong_hoster)
                                    self.db.store(title, 'wrong_hoster')
                                    notify([wrong_hoster], self.configfile)
                                else:
                                    self.log_debug(wrong_hoster)
                        else:
                            return self.send_package(title, series_url, language_id)
        except:
            print(u"DJ hat die Doku-API angepasst. Breche Download-Prüfung ab!")

Beispiel #2

Datei anzeigen

    def parse_download(self, series_url, title, language_id):
        if not check_valid_release(title, self.retail_only, self.hevc_retail,
                                   self.dbfile):
            self.log_debug(
                title +
                u" - Release ignoriert (Gleiche oder bessere Quelle bereits vorhanden)"
            )
            return
        if self.filename == 'MB_Staffeln':
            if not self.config.get("seasonpacks"):
                staffelpack = re.search(r"s\d.*(-|\.).*s\d", title.lower())
                if staffelpack:
                    self.log_debug("%s - Release ignoriert (Staffelpaket)" %
                                   title)
                    return
            if not re.search(self.seasonssource, title.lower()):
                self.log_debug(title + " - Release hat falsche Quelle")
                return
        try:
            series_info = get_url(series_url, self.configfile, self.dbfile)
            series_id = re.findall(r'data-mediaid="(.*?)"', series_info)[0]
            api_url = 'https://' + self.sj + '/api/media/' + series_id + '/releases'

            response = get_url(api_url, self.configfile, self.dbfile,
                               self.scraper)
            seasons = json.loads(response)
            for season in seasons:
                season = seasons[season]
                for item in season['items']:
                    if item['name'] == title:
                        valid = False
                        for hoster in item['hoster']:
                            if hoster:
                                if check_hoster(hoster, self.configfile):
                                    valid = True
                        if not valid and not self.hoster_fallback:
                            storage = self.db.retrieve_all(title)
                            if 'added' not in storage and 'notdl' not in storage:
                                wrong_hoster = '[SJ/Hoster fehlt] - ' + title
                                if 'wrong_hoster' not in storage:
                                    print(wrong_hoster)
                                    self.db.store(title, 'wrong_hoster')
                                    notify([wrong_hoster], self.configfile)
                                else:
                                    self.log_debug(wrong_hoster)
                        else:
                            return self.send_package(title, series_url,
                                                     language_id)
        except:
            print(
                u"SJ hat die Serien-API angepasst. Breche Download-Prüfung ab!"
            )

Beispiel #3

Datei anzeigen

Datei: dj.py Projekt: rix1337/RSScrawler

    def parse_download(self, series_url, search_title, englisch, genre):
        req_page = get_url(series_url, self.configfile, self.dbfile)

        soup = BeautifulSoup(req_page, 'lxml')
        escape_brackets = search_title.replace(
            "(", ".*").replace(")", ".*").replace("+", ".*")
        title = soup.find(text=re.compile(escape_brackets))
        if not title:
            try:
                episode = re.findall(r'\.S\d{1,3}(E\d{1,3}.*)\.German', escape_brackets).pop()
                escape_brackets_pack = escape_brackets.replace(episode, "")
                title = soup.find(text=re.compile(escape_brackets_pack))
            except:
                title = False
                self.log_debug(search_title + " - Kein Link gefunden")
        if title:
            url_hosters = re.findall(
                r'<a href="([^"\'>]*)".+?\| (.+?)<', str(title.parent.parent))
            links = []
            for url_hoster in url_hosters:
                if re.match(self.hoster, url_hoster[1]):
                    links.append(url_hoster[0])
            if not links:
                self.log_debug(
                    "%s - Release ignoriert (kein passender Link gefunden)" % search_title)
            else:
                return self.send_package(search_title, links, englisch, genre)

Beispiel #4

Datei anzeigen

Datei: dd.py Projekt: evilmon/RSScrawler

    def periodical_task(self):
        feeds = self.config.get("feeds")
        if feeds:
            added_items = []
            feeds = feeds.replace(" ", "").split(',')
            for feed in feeds:
                feed = feedparser.parse(
                    get_url(feed, self.configfile, self.dbfile, self.scraper))
                for post in feed.entries:
                    key = post.title.replace(" ", ".")

                    epoch = datetime(1970, 1, 1)
                    current_epoch = int(time())
                    published_format = "%Y-%m-%d %H:%M:%S+00:00"
                    published_timestamp = str(parser.parse(post.published))
                    published_epoch = int((datetime.strptime(
                        published_timestamp, published_format) -
                                           epoch).total_seconds())
                    if (current_epoch - 1800) > published_epoch:
                        link_pool = post.summary
                        unicode_links = re.findall(r'(http.*)', link_pool)
                        links = []
                        for link in unicode_links:
                            if check_hoster(link, self.configfile):
                                links.append(str(link))
                        if self.config.get("hoster_fallback") and not links:
                            for link in unicode_links:
                                links.append(str(link))
                        storage = self.db.retrieve_all(key)
                        if not links:
                            if 'added' not in storage and 'notdl' not in storage:
                                wrong_hoster = '[DD/Hoster fehlt] - ' + key
                                if 'wrong_hoster' not in storage:
                                    self.log_info(wrong_hoster)
                                    self.db.store(key, 'wrong_hoster')
                                    notify([wrong_hoster], self.configfile)
                                else:
                                    self.log_debug(wrong_hoster)
                        elif 'added' in storage:
                            self.log_debug(
                                "%s - Release ignoriert (bereits gefunden)" %
                                key)
                        else:
                            self.device = myjd_download(
                                self.configfile, self.dbfile, self.device, key,
                                "RSScrawler", links, "")
                            if self.device:
                                self.db.store(key, 'added')
                                log_entry = '[Englisch] - ' + key + ' - [DD]'
                                self.log_info(log_entry)
                                notify([log_entry], self.configfile)
                                added_items.append(log_entry)
                    else:
                        self.log_debug(
                            "%s - Releasezeitpunkt weniger als 30 Minuten in der Vergangenheit - wird ignoriert."
                            % key)
        else:
            self.log_debug("Liste ist leer. Stoppe Suche für DD!")
        return self.device

Beispiel #5

Datei anzeigen

Datei: fakefeed.py Projekt: rix1337/RSScrawler

def ha_search_to_soup(url, configfile, dbfile):
    content = []
    search = BeautifulSoup(get_url(url, configfile, dbfile), 'lxml')
    if search:
        results = search.find("div", {"id": "content"})
        if results:
            results = results.find_all("a")
            for r in results:
                try:
                    title = r["title"]
                    details = BeautifulSoup(get_url(r["href"], configfile, dbfile), 'lxml')
                    content.append({
                        "key": title,
                        "value": details
                    })
                except:
                    pagination = r["href"]

    return ha_search_to_feedparser_dict(content)

Beispiel #6

Datei anzeigen

Datei: fakefeed.py Projekt: rix1337/RSScrawler

def ha_search_results(url, configfile, dbfile):
    content = []
    search = BeautifulSoup(get_url(url, configfile, dbfile), 'lxml')
    if search:
        results = search.find("div", {"id": "content"})
        if results:
            results = results.find_all("a")
            for r in results:
                try:
                    content.append((r["title"], r["href"]))
                except:
                    break
    return content

Beispiel #7

Datei anzeigen

Datei: dj.py Projekt: rix1337/RSScrawler

 def range_parse(self, series_url, search_title, englisch, fallback_title, genre):
     req_page = get_url(series_url, self.configfile, self.dbfile)
     soup = BeautifulSoup(req_page, 'lxml')
     try:
         titles = soup.findAll(text=re.compile(search_title))
         added_items = []
         if not titles:
             titles = soup.findAll(text=re.compile(fallback_title))
         for title in titles:
             if self.quality != '480p' and self.quality in title:
                 added_items.append(self.parse_download(series_url, title, englisch, genre))
             if self.quality == '480p' and not (('.720p.' in title) or ('.1080p.' in title) or ('.2160p.' in title)):
                 added_items.append(self.parse_download(series_url, title, englisch, genre))
         return added_items
     except re.error as e:
         self.log_error('Konstantenfehler: %s' % e)

Beispiel #8

Datei anzeigen

Datei: dd.py Projekt: rix1337/RSScrawler

    def periodical_task(self):
        feeds = self.config.get("feeds")
        if feeds:
            added_items = []
            feeds = feeds.replace(" ", "").split(',')
            hoster = re.compile(self.config.get("hoster"))
            for feed in feeds:
                feed = feedparser.parse(get_url(feed, self.configfile, self.dbfile))
                for post in feed.entries:
                    key = post.title.replace(" ", ".")

                    epoch = datetime(1970, 1, 1)
                    current_epoch = int(time())
                    published_format = "%Y-%m-%d %H:%M:%S+00:00"
                    published_timestamp = str(parser.parse(post.published))
                    published_epoch = int((datetime.strptime(
                        published_timestamp, published_format) - epoch).total_seconds())
                    if (current_epoch - 1800) > published_epoch:
                        link_pool = post.summary
                        unicode_links = re.findall(r'(http.*)', link_pool)
                        links = []
                        for link in unicode_links:
                            if re.match(hoster, link):
                                links.append(str(link))
                        if not links:
                            self.log_debug(
                                "%s - Release ignoriert (kein passender Link gefunden)" % key)
                        elif self.db.retrieve(key) == 'added':
                            self.log_debug(
                                "%s - Release ignoriert (bereits gefunden)" % key)
                        else:
                            self.device = myjd_download(self.configfile, self.device, key, "RSScrawler", links, "")
                            if self.device:
                                self.db.store(
                                    key,
                                    'added'
                                )
                                log_entry = '[DD] - Englisch - ' + key
                                self.log_info(log_entry)
                                notify([log_entry], self.configfile)
                                added_items.append(log_entry)
                    else:
                        self.log_debug(
                            "%s - Releasezeitpunkt weniger als 30 Minuten in der Vergangenheit - wird ignoriert." % key)
        else:
            self.log_debug("Liste ist leer. Stoppe Suche für DD!")
        return self.device

Beispiel #9

Datei anzeigen

Datei: imdb.py Projekt: 9Mad-Max5/RSScrawler

def get_imdb_id(key, content, filename, configfile, dbfile, scraper, log_debug):
    try:
        imdb_id = re.findall(
            r'.*?(?:href=.?http(?:|s):\/\/(?:|www\.)imdb\.com\/title\/(tt[0-9]{7,9}).*?).*?(\d(?:\.|\,)\d)(?:.|.*?)<\/a>.*?',
            content)
    except:
        imdb_id = False

    if imdb_id:
        imdb_id = imdb_id[0][0]
    else:
        try:
            search_title = re.findall(
                r"(.*?)(?:\.(?:(?:19|20)\d{2})|\.German|\.\d{3,4}p|\.S(?:\d{1,3})\.)", key)[0].replace(".", "+")
            search_url = "http://www.imdb.com/find?q=" + search_title
            search_page = get_url(search_url, configfile, dbfile, scraper)
            search_results = re.findall(
                r'<td class="result_text"> <a href="\/title\/(tt[0-9]{7,9})\/\?ref_=fn_al_tt_\d" >(.*?)<\/a>.*? \((\d{4})\)..(.{9})',
                search_page)
        except:
            return False
        total_results = len(search_results)
        if filename == 'MB_Staffeln':
            imdb_id = search_results[0][0]
        else:
            no_series = False
            while total_results > 0:
                attempt = 0
                for result in search_results:
                    if result[3] == "TV Series":
                        no_series = False
                        total_results -= 1
                        attempt += 1
                    else:
                        no_series = True
                        imdb_id = search_results[attempt][0]
                        total_results = 0
                        break
            if no_series is False:
                log_debug(
                    "%s - Keine passende Film-IMDB-Seite gefunden" % key)
        if not imdb_id:
            return False

    return imdb_id

Beispiel #10

Datei anzeigen

Datei: imdb.py Projekt: 9Mad-Max5/RSScrawler

def get_original_language(key, imdb_details, imdb_url, configfile, dbfile, scraper, log_debug):
    original_language = False
    if imdb_details and len(imdb_details) > 0:
        soup = BeautifulSoup(imdb_details, 'lxml')
        original_language = soup.find('h4', text=re.compile(r'Language:')).parent.find("a").text
    elif imdb_url and len(imdb_url) > 0:
        imdb_details = get_url(imdb_url, configfile, dbfile, scraper)
        if imdb_details:
            soup = BeautifulSoup(imdb_details, 'lxml')
            original_language = soup.find('h4', text=re.compile(r'Language:')).parent.find("a").text

    if not original_language:
        log_debug("%s - Originalsprache nicht ermittelbar" % key)

    if original_language and original_language == "German":
        log_debug(
            "%s - Originalsprache ist Deutsch. Breche Suche nach zweisprachigem Release ab!" % key)
        return False
    else:
        return original_language

Beispiel #11

Datei anzeigen

Datei: sf.py Projekt: evilmon/RSScrawler

    def periodical_task(self):
        if not self.sf:
            return self.device

        if self.filename == 'SJ_Serien_Regex':
            if not self.config.get('regex'):
                self.log_debug("Suche für SF-Regex deaktiviert!")
                return self.device
        elif self.filename == 'SJ_Staffeln_Regex':
            if not self.config.get('regex'):
                self.log_debug("Suche für SF-Regex deaktiviert!")
                return self.device
        elif self.filename == 'MB_Staffeln':
            if not self.config.get('crawlseasons'):
                self.log_debug("Suche für SF-Staffeln deaktiviert!")
                return self.device
        if self.empty_list:
            self.log_debug("Liste ist leer. Stoppe Suche für Serien!" +
                           self.listtype)
            return self.device
        try:
            reject = self.config.get("rejectlist").replace(
                ",", "|").lower() if len(
                    self.config.get("rejectlist")) > 0 else r"^unmatchable$"
        except TypeError:
            reject = r"^unmatchable$"

        set_sf = self.settings_hash(False)

        header = False
        response = False

        while self.day < 8:
            if self.last_set_sf == set_sf:
                try:
                    delta = (
                        datetime.datetime.now() -
                        datetime.timedelta(days=self.day)).strftime("%Y-%m-%d")
                    response = get_url_headers(
                        'https://' + self.sf + '/updates/' + delta,
                        self.configfile, self.dbfile, self.headers,
                        self.scraper)
                    self.scraper = response[1]
                    response = response[0]
                    if self.filename == "MB_Staffeln" or self.filename == "SJ_Staffeln_Regex":
                        feed = sf_releases_to_feedparser_dict(
                            response.text, "seasons", 'https://' + self.sf,
                            True)
                    else:
                        feed = sf_releases_to_feedparser_dict(
                            response.text, "episodes", 'https://' + self.sf,
                            True)
                except:
                    print(u"SF hat die Feed-API angepasst. Breche Suche ab!")
                    feed = False

                if response:
                    if response.status_code == 304:
                        self.log_debug(
                            "SF-Feed seit letztem Aufruf nicht aktualisiert - breche  Suche ab!"
                        )
                        return self.device
                    header = True
            else:
                try:
                    delta = (
                        datetime.datetime.now() -
                        datetime.timedelta(days=self.day)).strftime("%Y-%m-%d")
                    response = get_url(
                        'https://' + self.sf + '/updates/' + delta,
                        self.configfile, self.dbfile, self.scraper)
                    if self.filename == "MB_Staffeln" or self.filename == "SJ_Staffeln_Regex":
                        feed = sf_releases_to_feedparser_dict(
                            response, "seasons", 'https://' + self.sf, True)
                    else:
                        feed = sf_releases_to_feedparser_dict(
                            response, "episodes", 'https://' + self.sf, True)
                except:
                    print(u"SF hat die Feed-API angepasst. Breche Suche ab!")
                    feed = False

            self.day += 1

            if feed and feed.entries:
                first_post_sf = feed.entries[0]
                concat_sf = first_post_sf.title + first_post_sf.published + str(
                    self.settings) + str(self.pattern)
                sha_sf = hashlib.sha256(concat_sf.encode(
                    'ascii', 'ignore')).hexdigest()
            else:
                self.log_debug("Feed ist leer - breche  Suche ab!")
                return False

            for post in feed.entries:
                concat = post.title + post.published + \
                         str(self.settings) + str(self.pattern)
                sha = hashlib.sha256(concat.encode('ascii',
                                                   'ignore')).hexdigest()
                if sha == self.last_sha_sf:
                    self.log_debug("Feed ab hier bereits gecrawlt (" +
                                   post.title + ") - breche  Suche ab!")
                    break

                series_url = post.series_url
                title = post.title.replace("-", "-")

                if self.filename == 'SJ_Serien_Regex':
                    if self.config.get("regex"):
                        if '.german.' in title.lower():
                            language_id = 1
                        elif self.rsscrawler.get('english'):
                            language_id = 2
                        else:
                            language_id = 0
                        if language_id:
                            m = re.search(self.pattern, title.lower())
                            if not m and "720p" not in title and "1080p" not in title and "2160p" not in title:
                                m = re.search(
                                    self.pattern.replace("480p", "."),
                                    title.lower())
                                self.quality = "480p"
                            if m:
                                if "720p" in title.lower():
                                    self.quality = "720p"
                                if "1080p" in title.lower():
                                    self.quality = "1080p"
                                if "2160p" in title.lower():
                                    self.quality = "2160p"
                                m = re.search(reject, title.lower())
                                if m:
                                    self.log_debug(
                                        title +
                                        " - Release durch Regex gefunden (trotz rejectlist-Einstellung)"
                                    )
                                title = re.sub(r'\[.*\] ', '', post.title)
                                self.parse_download(series_url, title,
                                                    language_id)
                        else:
                            self.log_debug(
                                "%s - Englische Releases deaktiviert" % title)

                    else:
                        continue
                elif self.filename == 'SJ_Staffeln_Regex':
                    if self.config.get("regex"):
                        if '.german.' in title.lower():
                            language_id = 1
                        elif self.rsscrawler.get('english'):
                            language_id = 2
                        else:
                            language_id = 0
                        if language_id:
                            m = re.search(self.pattern, title.lower())
                            if not m and "720p" not in title and "1080p" not in title and "2160p" not in title:
                                m = re.search(
                                    self.pattern.replace("480p", "."),
                                    title.lower())
                                self.quality = "480p"
                            if m:
                                if "720p" in title.lower():
                                    self.quality = "720p"
                                if "1080p" in title.lower():
                                    self.quality = "1080p"
                                if "2160p" in title.lower():
                                    self.quality = "2160p"
                                m = re.search(reject, title.lower())
                                if m:
                                    self.log_debug(
                                        title +
                                        " - Release durch Regex gefunden (trotz rejectlist-Einstellung)"
                                    )
                                title = re.sub(r'\[.*\] ', '', post.title)
                                self.parse_download(series_url, title,
                                                    language_id)
                        else:
                            self.log_debug(
                                "%s - Englische Releases deaktiviert" % title)

                    else:
                        continue
                else:
                    if self.config.get("quality") != '480p':
                        m = re.search(self.pattern, title.lower())
                        if m:
                            if '.german.' in title.lower():
                                language_id = 1
                            elif self.rsscrawler.get('english'):
                                language_id = 2
                            else:
                                language_id = 0
                            if language_id:
                                mm = re.search(self.quality, title.lower())
                                if mm:
                                    mmm = re.search(reject, title.lower())
                                    if mmm:
                                        self.log_debug(
                                            title +
                                            " - Release ignoriert (basierend auf rejectlist-Einstellung)"
                                        )
                                        continue
                                    if self.rsscrawler.get("surround"):
                                        if not re.match(
                                                r'.*\.(DTS|DD\+*51|DD\+*71|AC3\.5\.*1)\..*',
                                                title):
                                            self.log_debug(
                                                title +
                                                " - Release ignoriert (kein Mehrkanalton)"
                                            )
                                            continue
                                    try:
                                        storage = self.db.retrieve_all(title)
                                    except Exception as e:
                                        self.log_debug(
                                            "Fehler bei Datenbankzugriff: %s, Grund: %s"
                                            % (e, title))
                                        return self.device
                                    if 'added' in storage:
                                        self.log_debug(
                                            title +
                                            " - Release ignoriert (bereits gefunden)"
                                        )
                                        continue
                                    self.parse_download(
                                        series_url, title, language_id)
                            else:
                                self.log_debug(
                                    "%s - Englische Releases deaktiviert" %
                                    title)

                        else:
                            m = re.search(self.pattern, title.lower())
                            if m:
                                if '.german.' in title.lower():
                                    language_id = 1
                                elif self.rsscrawler.get('english'):
                                    language_id = 2
                                else:
                                    language_id = 0
                                if language_id:
                                    if "720p" in title.lower(
                                    ) or "1080p" in title.lower(
                                    ) or "2160p" in title.lower():
                                        continue
                                    mm = re.search(reject, title.lower())
                                    if mm:
                                        self.log_debug(
                                            title +
                                            " Release ignoriert (basierend auf rejectlist-Einstellung)"
                                        )
                                        continue
                                    if self.rsscrawler.get("surround"):
                                        if not re.match(
                                                r'.*\.(DTS|DD\+*51|DD\+*71|AC3\.5\.*1)\..*',
                                                title):
                                            self.log_debug(
                                                title +
                                                " - Release ignoriert (kein Mehrkanalton)"
                                            )
                                            continue
                                    title = re.sub(r'\[.*\] ', '', post.title)
                                    try:
                                        storage = self.db.retrieve_all(title)
                                    except Exception as e:
                                        self.log_debug(
                                            "Fehler bei Datenbankzugriff: %s, Grund: %s"
                                            % (e, title))
                                        return self.device
                                    if 'added' in storage:
                                        self.log_debug(
                                            title +
                                            " - Release ignoriert (bereits gefunden)"
                                        )
                                        continue
                                    self.parse_download(
                                        series_url, title, language_id)
                                else:
                                    self.log_debug(
                                        "%s - Englische Releases deaktiviert" %
                                        title)

        if set_sf:
            new_set_sf = self.settings_hash(True)
            if set_sf == new_set_sf:
                self.cdc.delete("SFSet-" + self.filename)
                self.cdc.store("SFSet-" + self.filename, set_sf)
                self.cdc.delete("SF-" + self.filename)
                self.cdc.store("SF-" + self.filename, sha_sf)

        if header and response:
            self.cdc.delete("SFHeaders-" + self.filename)
            self.cdc.store("SFHeaders-" + self.filename,
                           response.headers['date'])

        return self.device

Beispiel #12

Datei anzeigen

Datei: sf.py Projekt: evilmon/RSScrawler

    def parse_download(self, series_url, title, language_id):
        if not check_valid_release(title, self.retail_only, self.hevc_retail,
                                   self.dbfile):
            self.log_debug(
                title +
                u" - Release ignoriert (Gleiche oder bessere Quelle bereits vorhanden)"
            )
            return
        if self.filename == 'MB_Staffeln':
            if not self.config.get("seasonpacks"):
                staffelpack = re.search(r"s\d.*(-|\.).*s\d", title.lower())
                if staffelpack:
                    self.log_debug("%s - Release ignoriert (Staffelpaket)" %
                                   title)
                    return
            if not re.search(self.seasonssource, title.lower()):
                self.log_debug(title + " - Release hat falsche Quelle")
                return
        try:
            if language_id == 2:
                lang = 'EN'
            else:
                lang = 'DE'
            epoch = str(datetime.datetime.now().timestamp()).replace('.',
                                                                     '')[:-3]
            api_url = series_url + '?lang=' + lang + '&_=' + epoch
            response = get_url(api_url, self.configfile, self.dbfile,
                               self.scraper)
            info = json.loads(response)

            is_episode = re.findall(r'.*\.(s\d{1,3}e\d{1,3})\..*', title,
                                    re.IGNORECASE)
            if is_episode:
                episode_string = re.findall(r'.*S\d{1,3}(E\d{1,3}).*',
                                            is_episode[0])[0].lower()
                season_string = re.findall(r'.*(S\d{1,3})E\d{1,3}.*',
                                           is_episode[0])[0].lower()
                season_title = rreplace(
                    title.lower().replace(episode_string, ''), "-", ".*",
                    1).lower()
                season_title = season_title.replace(".untouched",
                                                    ".*").replace(
                                                        ".dd+51", ".dd.51")
                episode = str(int(episode_string.replace("e", "")))
                season = str(int(season_string.replace("s", "")))
                episode_name = re.findall(r'.*\.s\d{1,3}(\..*).german',
                                          season_title, re.IGNORECASE)
                if episode_name:
                    season_title = season_title.replace(episode_name[0], '')
                codec_tags = [".h264", ".x264"]
                for tag in codec_tags:
                    season_title = season_title.replace(tag, ".*264")
                web_tags = [".web-rip", ".webrip", ".webdl", ".web-dl"]
                for tag in web_tags:
                    season_title = season_title.replace(tag, ".web.*")
            else:
                season = False
                episode = False
                season_title = title
                multiple_episodes = re.findall(r'(e\d{1,3}-e*\d{1,3}\.)',
                                               season_title, re.IGNORECASE)
                if multiple_episodes:
                    season_title = season_title.replace(
                        multiple_episodes[0], '.*')

            content = BeautifulSoup(info['html'], 'lxml')
            releases = content.find(
                "small", text=re.compile(season_title,
                                         re.IGNORECASE)).parent.parent.parent
            links = releases.findAll("div", {'class': 'row'})[1].findAll('a')
            valid = False
            for link in links:
                download_link = link['href']
                if check_hoster(link.text.replace('\n', ''), self.configfile):
                    valid = True
                    break
            if not valid and not self.hoster_fallback:
                storage = self.db.retrieve_all(title)
                if 'added' not in storage and 'notdl' not in storage:
                    wrong_hoster = '[SF/Hoster fehlt] - ' + title
                    if 'wrong_hoster' not in storage:
                        self.log_info(wrong_hoster)
                        self.db.store(title, 'wrong_hoster')
                        notify([wrong_hoster], self.configfile)
                    else:
                        self.log_debug(wrong_hoster)
            else:
                return self.send_package(title, download_link, language_id,
                                         season, episode)
        except:
            print(
                u"SF hat die Serien-API angepasst. Breche Download-Prüfung ab!"
            )

Beispiel #13

Datei anzeigen

Datei: search.py Projekt: rix1337/RSScrawler

def download_sj(sj_id, special, device, configfile, dbfile):
    url = get_url(decode_base64("aHR0cDovL3Nlcmllbmp1bmtpZXMub3JnLz9jYXQ9") + str(sj_id), configfile, dbfile)
    season_pool = re.findall(r'<h2>Staffeln:(.*?)<h2>Feeds', url).pop()
    season_links = re.findall(
        r'href="(.{1,125})">.{1,90}(Staffel|Season).*?(\d{1,2}-?\d{1,2}|\d{1,2})', season_pool)
    title = html_to_str(re.findall(r'>(.{1,85}?) &#', season_pool).pop())

    rsscrawler = RssConfig('RSScrawler', configfile)

    listen = ["SJ_Serien", "MB_Staffeln"]
    for liste in listen:
        cont = ListDb(dbfile, liste).retrieve()
        list_title = sanitize(title)
        if not cont:
            cont = ""
        if not list_title in cont:
            ListDb(dbfile, liste).store(list_title)

    staffeln = []
    staffel_nr = []
    seasons = []

    for s in season_links:
        if "staffel" in s[1].lower():
            staffeln.append([s[2], s[0]])
            if "-" in s[2]:
                split = s[2].split("-")
                split = range(int(split[0]), int(split[1]) + 1)
                for nr in split:
                    staffel_nr.append(str(nr))
            else:
                staffel_nr.append(s[2])
        else:
            seasons.append([s[2], s[0]])

    if rsscrawler.get("english"):
        for se in seasons:
            if not se[0] in staffel_nr:
                staffeln.append(se)

    to_dl = []
    for s in staffeln:
        if "-" in s[0]:
            split = s[0].split("-")
            split = range(int(split[0]), int(split[1]) + 1)
            for i in split:
                to_dl.append([str(i), s[1]])
        else:
            to_dl.append([s[0], s[1]])

    found_seasons = {}
    for dl in to_dl:
        if len(dl[0]) is 1:
            sxx = "S0" + str(dl[0])
        else:
            sxx = "S" + str(dl[0])
        link = dl[1]
        if sxx not in found_seasons:
            found_seasons[sxx] = link

    something_found = False

    for sxx, link in found_seasons.items():
        config = RssConfig('SJ', configfile)
        quality = config.get('quality')
        url = get_url(link, configfile, dbfile)
        pakete = re.findall(re.compile(r'<p><strong>(.*?\.' + sxx + r'\..*?' + quality +
                                       r'.*?)<.*?\n.*?href="(.*?)".*? \| (.*)<(?:.*?\n.*?href="(.*?)".*? \| (.*)<|)'),
                            url)
        folgen = re.findall(re.compile(r'<p><strong>(.*?\.' + sxx +
                                       r'E\d{1,3}.*?' + quality + r'.*?)<.*?\n.*?href="(.*?)".*? \| (.*)<(?:.*?\n.*?href="(.*?)".*? \| (.*)<|)'),
                            url)
        lq_pakete = re.findall(re.compile(
            r'<p><strong>(.*?\.' + sxx + r'\..*?)<.*?\n.*?href="(.*?)".*? \| (.*)<(?:.*?\n.*?href="(.*?)".*? \| (.*)<|)'),
            url)
        lq_folgen = re.findall(re.compile(
            r'<p><strong>(.*?\.' + sxx + r'E\d{1,3}.*?)<.*?\n.*?href="(.*?)".*? \| (.*)<(?:.*?\n.*?href="(.*?)".*? \| (.*)<|)'),
            url)

        if not pakete and not folgen and not lq_pakete and not lq_folgen:
            sxx = sxx.replace("S0", "S")
            pakete = re.findall(re.compile(r'<p><strong>(.*?\.' + sxx + r'\..*?' + quality +
                                           r'.*?)<.*?\n.*?href="(.*?)".*? \| (.*)<(?:.*?\n.*?href="(.*?)".*? \| (.*)<|)'),
                                url)
            folgen = re.findall(re.compile(
                r'<p><strong>(.*?\.' + sxx + r'E\d{1,3}.*?' + quality + r'.*?)<.*?\n.*?href="(.*?)".*? \| (.*)<(?:.*?\n.*?href="(.*?)".*? \| (.*)<|)'),
                url)
            lq_pakete = re.findall(re.compile(
                r'<p><strong>(.*?\.' + sxx + r'\..*?)<.*?\n.*?href="(.*?)".*? \| (.*)<(?:.*?\n.*?href="(.*?)".*? \| (.*)<|)'),
                url)
            lq_folgen = re.findall(re.compile(
                r'<p><strong>(.*?\.' + sxx + r'E\d{1,3}.*?)<.*?\n.*?href="(.*?)".*? \| (.*)<(?:.*?\n.*?href="(.*?)".*? \| (.*)<|)'),
                url)

        if special and "e" in special.lower():
            pakete = []
            lq_pakete = []

        best_matching_links = []

        if pakete:
            links = []
            for x in pakete:
                title = x[0]
                score = rate(title, configfile)
                hoster = [[x[2], x[1]], [x[4], x[3]]]
                if special:
                    if special.lower() in title.lower():
                        links.append([score, title, hoster])
                else:
                    links.append([score, title, hoster])
            if links:
                highest_score = sorted(links, reverse=True)[0][0]
                for l in links:
                    if l[0] == highest_score:
                        for hoster in l[2]:
                            best_matching_links.append(
                                [l[1], hoster[0], hoster[1]])
        elif folgen:
            links = []
            for x in folgen:
                title = x[0]
                score = rate(title, configfile)
                hoster = [[x[2], x[1]], [x[4], x[3]]]
                if special:
                    if special.lower() in title.lower():
                        links.append([score, title, hoster])
                else:
                    links.append([score, title, hoster])
            if links:
                highest_score = sorted(links, reverse=True)[0][0]
                for l in links:
                    if l[0] == highest_score:
                        for hoster in l[2]:
                            best_matching_links.append(
                                [l[1], hoster[0], hoster[1]])
        elif lq_pakete:
            links = []
            for x in lq_pakete:
                title = x[0]
                score = rate(title, configfile)
                hoster = [[x[2], x[1]], [x[4], x[3]]]
                if special:
                    if special.lower() in title.lower():
                        links.append([score, title, hoster])
                else:
                    links.append([score, title, hoster])
            if links:
                highest_score = sorted(links, reverse=True)[0][0]
                for l in links:
                    if l[0] == highest_score:
                        for hoster in l[2]:
                            best_matching_links.append(
                                [l[1], hoster[0], hoster[1]])
        elif lq_folgen:
            links = []
            for x in lq_folgen:
                title = x[0]
                score = rate(title, configfile)
                hoster = [[x[2], x[1]], [x[4], x[3]]]
                if special:
                    if special.lower() in title.lower():
                        links.append([score, title, hoster])
                else:
                    links.append([score, title, hoster])
            if links:
                highest_score = sorted(links, reverse=True)[0][0]
                for l in links:
                    if l[0] == highest_score:
                        for hoster in l[2]:
                            best_matching_links.append(
                                [l[1], hoster[0], hoster[1]])

        notify_array = []
        for best_link in best_matching_links:
            dl_title = best_link[0].replace(
                "Staffelpack ", "").replace("Staffelpack.", "")
            dl_hoster = best_link[1]
            dl_link = best_link[2]
            config = RssConfig('SJ', configfile)
            hoster = re.compile(config.get('hoster'))
            db = RssDb(dbfile, 'rsscrawler')

            if re.match(hoster, dl_hoster.lower()):
                if myjd_download(configfile, device, dl_title, "RSScrawler", dl_link,
                                 decode_base64("c2VyaWVuanVua2llcy5vcmc=")):
                    db.store(dl_title, 'added')
                    log_entry = '[Suche/Serie] - ' + dl_title
                    logging.info(log_entry)
                    notify_array.append(log_entry)
                else:
                    return False
        if len(best_matching_links) > 0:
            something_found = True
        notify(notify_array, configfile)
    if not something_found:
        return False
    return True

Beispiel #14

Datei anzeigen

Datei: search.py Projekt: rix1337/RSScrawler

def download_bl(payload, device, configfile, dbfile):
    payload = decode_base64(payload).split(";")
    link = payload[0]
    password = payload[1]
    url = get_url(link, configfile, dbfile)
    config = RssConfig('MB', configfile)
    hoster = re.compile(config.get('hoster'))
    db = RssDb(dbfile, 'rsscrawler')

    soup = BeautifulSoup(url, 'lxml')
    download = soup.find("div", {"id": "content"})
    try:
        key = re.findall(r'Permanent Link: (.*?)"', str(download)).pop()
        url_hosters = re.findall(r'href="([^"\'>]*)".+?(.+?)<', str(download))
    except:
        items_head = soup.find("div", {"class": "topbox"})
        key = items_head.contents[1].a["title"]
        items_download = soup.find("div", {"class": "download"})
        url_hosters = []
        download = items_download.find_all("span", {"style": "display:inline;"}, text=True)
        for link in download:
            link = link.a
            text = link.text.strip()
            if text:
                url_hosters.append([str(link["href"]), str(text)])

    links = {}
    for url_hoster in reversed(url_hosters):
        if not decode_base64("bW92aWUtYmxvZy50by8=") in url_hoster[0] and "https://goo.gl/" not in url_hoster[0]:
            link_hoster = url_hoster[1].lower().replace('target="_blank">', '').replace(" ", "-")
            if re.match(hoster, link_hoster):
                links[link_hoster] = url_hoster[0]
    download_links = links.values() if six.PY2 else list(links.values())

    englisch = False
    if "*englisch*" in key.lower():
        key = key.replace('*ENGLISCH*', '').replace("*Englisch*", "")
        englisch = True

    staffel = re.search(r"s\d{1,2}(-s\d{1,2}|-\d{1,2}|\.)", key.lower())

    if config.get('enforcedl') and '.dl.' not in key.lower():
        fail = False
        get_imdb_url = url
        key_regex = r'<title>' + \
                    re.escape(
                        key) + r'.*?<\/title>\n.*?<link>(?:(?:.*?\n){1,25}).*?[mM][kK][vV].*?(?:|href=.?http(?:|s):\/\/(?:|www\.)imdb\.com\/title\/(tt[0-9]{7,9}).*?)[iI][mM][dD][bB].*?(?!\d(?:\.|\,)\d)(?:.|.*?)<\/a>'
        imdb_id = re.findall(key_regex, get_imdb_url)
        if len(imdb_id) > 0:
            if not imdb_id[0]:
                fail = True
            else:
                imdb_id = imdb_id[0]
        else:
            fail = True
        if fail:
            search_title = re.findall(
                r"(.*?)(?:\.(?:(?:19|20)\d{2})|\.German|\.\d{3,4}p|\.S(?:\d{1,3})\.)", key)[0].replace(".", "+")
            search_url = "http://www.imdb.com/find?q=" + search_title
            search_page = get_url(search_url, configfile, dbfile)
            search_results = re.findall(
                r'<td class="result_text"> <a href="\/title\/(tt[0-9]{7,9})\/\?ref_=fn_al_tt_\d" >(.*?)<\/a>.*? \((\d{4})\)..(.{9})',
                search_page)
            total_results = len(search_results)
            if staffel:
                imdb_id = search_results[0][0]
            else:
                no_series = False
                while total_results > 0:
                    attempt = 0
                    for result in search_results:
                        if result[3] == "TV Series":
                            no_series = False
                            total_results -= 1
                            attempt += 1
                        else:
                            no_series = True
                            imdb_id = search_results[attempt][0]
                            total_results = 0
                            break
                if no_series is False:
                    logging.debug(
                        "%s - Keine passende Film-IMDB-Seite gefunden" % key)

        if staffel:
            filename = 'MB_Staffeln'
        else:
            filename = 'MB_Filme'

        bl = BL(configfile, dbfile, device, logging, filename=filename)

        if not imdb_id:
            if not bl.dual_download(key, password):
                logging.debug(
                    "%s - Kein zweisprachiges Release gefunden." % key)
        else:
            if isinstance(imdb_id, list):
                imdb_id = imdb_id.pop()
            imdb_url = "http://www.imdb.com/title/" + imdb_id
            details = get_url(imdb_url, configfile, dbfile)
            if not details:
                logging.debug("%s - Originalsprache nicht ermittelbar" % key)
            original_language = re.findall(
                r"Language:<\/h4>\n.*?\n.*?url'>(.*?)<\/a>", details)
            if original_language:
                original_language = original_language[0]
            if original_language == "German":
                logging.debug(
                    "%s - Originalsprache ist Deutsch. Breche Suche nach zweisprachigem Release ab!" % key)
            else:
                if not bl.dual_download(key, password) and not englisch:
                    logging.debug(
                        "%s - Kein zweisprachiges Release gefunden! Breche ab." % key)

    if download_links:
        if staffel:
            if myjd_download(configfile, device, key, "RSScrawler", download_links, password):
                db.store(
                    key.replace(".COMPLETE", "").replace(".Complete", ""),
                    'notdl' if config.get(
                        'enforcedl') and '.dl.' not in key.lower() else 'added'
                )
                log_entry = '[Staffel] - ' + key.replace(".COMPLETE", "").replace(".Complete", "")
                logging.info(log_entry)
                notify([log_entry], configfile)
                return True
        elif '.3d.' in key.lower():
            retail = False
            if config.get('cutoff') and '.COMPLETE.' not in key.lower():
                if config.get('enforcedl'):
                    if cutoff(key, '2', dbfile):
                        retail = True
            if myjd_download(configfile, device, key, "RSScrawler/3Dcrawler", download_links, password):
                db.store(
                    key,
                    'notdl' if config.get(
                        'enforcedl') and '.dl.' not in key.lower() else 'added'
                )
                log_entry = '[Suche/Film] - ' + (
                    'Retail/' if retail else "") + '3D - ' + key
                logging.info(log_entry)
                notify([log_entry], configfile)
                return True
        else:
            retail = False
            if config.get('cutoff') and '.COMPLETE.' not in key.lower():
                if config.get('enforcedl'):
                    if cutoff(key, '1', dbfile):
                        retail = True
                else:
                    if cutoff(key, '0', dbfile):
                        retail = True
            if myjd_download(configfile, device, key, "RSScrawler", download_links, password):
                db.store(
                    key,
                    'notdl' if config.get(
                        'enforcedl') and '.dl.' not in key.lower() else 'added'
                )
                log_entry = '[Suche/Film] - ' + ('Englisch - ' if englisch and not retail else "") + (
                    'Englisch/Retail - ' if englisch and retail else "") + (
                                'Retail - ' if not englisch and retail else "") + key
                logging.info(log_entry)
                notify([log_entry], configfile)
                return True
    else:
        return False

Beispiel #15

Datei anzeigen

Datei: search.py Projekt: rix1337/RSScrawler

def get(title, configfile, dbfile):
    specific_season = re.match(r'^(.*);(s\d{1,3})$', title.lower())
    specific_episode = re.match(r'^(.*);(s\d{1,3}e\d{1,3})$', title.lower())
    if specific_season:
        split = title.split(";")
        title = split[0]
        special = split[1].upper()
    elif specific_episode:
        split = title.split(";")
        title = split[0]
        special = split[1].upper()
    else:
        special = None

    query = title.replace(".", " ").replace(" ", "+")
    if special:
        bl_query = query + "+" + special
    else:
        bl_query = query

    unrated = []

    config = RssConfig('MB', configfile)

    quality = config.get('quality')
    if "480p" not in quality:
        search_quality = "+" + quality
    else:
        search_quality = ""

    mb_search = get_url(
        decode_base64('aHR0cDovL21vdmllLWJsb2cudG8=') + '/search/' + bl_query + "+" + search_quality + '/feed/rss2/',
        configfile, dbfile)
    mb_results = re.findall(r'<title>(.*?)<\/title>\n.*?<link>(.*?)<\/link>', mb_search)
    password = decode_base64("bW92aWUtYmxvZy5vcmc=")
    for result in mb_results:
        if "480p" in quality:
            if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[0].lower() or "2160p" in \
                    result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[
                0].lower() or "complete.uhd.bluray" in result[0].lower():
                continue
        if not result[0].endswith("-MB") and not result[0].endswith(".MB"):
            unrated.append(
                [rate(result[0], configfile), encode_base64(result[1] + ";" + password), result[0] + " (MB)"])

    hw_search = get_url(
        decode_base64('aHR0cDovL2hkLXdvcmxkLm9yZw==') + '/search/' + bl_query + "+" + search_quality + '/feed/rss2/',
        configfile, dbfile)
    hw_results = re.findall(r'<title>(.*?)<\/title>\n.*?<link>(.*?)<\/link>', hw_search)
    password = decode_base64("aGQtd29ybGQub3Jn")
    for result in hw_results:
        if "480p" in quality:
            if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[0].lower() or "2160p" in \
                    result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[
                0].lower() or "complete.uhd.bluray" in result[0].lower():
                continue
        unrated.append(
            [rate(result[0], configfile), encode_base64(result[1] + ";" + password), result[0] + " (HW)"])

    ha_search = decode_base64('aHR0cDovL3d3dy5oZC1hcmVhLm9yZy8/cz1zZWFyY2gmcT0=') + bl_query + "&c=" + search_quality
    ha_results = ha_search_results(ha_search, configfile, dbfile)
    password = decode_base64("aGQtYXJlYS5vcmc=")
    for result in ha_results:
        if "480p" in quality:
            if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[0].lower() or "2160p" in \
                    result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[
                0].lower() or "complete.uhd.bluray" in result[0].lower():
                continue
        unrated.append(
            [rate(result[0], configfile), encode_base64(result[1] + ";" + password), result[0] + " (HA)"])

    if config.get("crawl3d"):
        mb_search = get_url(
            decode_base64('aHR0cDovL21vdmllLWJsb2cudG8=') + '/search/' + bl_query + "+3D+1080p" + '/feed/rss2/',
            configfile, dbfile)
        mb_results = re.findall(r'<title>(.*?)<\/title>\n.*?<link>(.*?)<\/link>', mb_search)
        for result in mb_results:
            if not result[1].endswith("-MB") and not result[1].endswith(".MB"):
                unrated.append(
                    [rate(result[0], configfile), encode_base64(result[1] + ";" + password), result[0] + " (3D-MB)"])

        hw_search = get_url(
            decode_base64('aHR0cDovL2hkLXdvcmxkLm9yZw==') + '/search/' + bl_query + "+3D+1080p" + '/feed/rss2/',
            configfile, dbfile)
        hw_results = re.findall(r'<title>(.*?)<\/title>\n.*?<link>(.*?)<\/link>', hw_search)
        password = decode_base64("aGQtd29ybGQub3Jn")
        for result in hw_results:
            unrated.append(
                [rate(result[0], configfile), encode_base64(result[1] + ";" + password), result[0] + " (3D-HW)"])

        ha_search = decode_base64('aHR0cDovL3d3dy5oZC1hcmVhLm9yZy8/cz1zZWFyY2gmcT0=') + bl_query + "&c=1080p"
        ha_results = ha_search_results(ha_search, configfile, dbfile)
        password = decode_base64("aGQtYXJlYS5vcmc=")
        for result in ha_results:
            if "3d" in result[0].lower():
                unrated.append(
                    [rate(result[0], configfile), encode_base64(result[1] + ";" + password), result[0] + " (3D-HA)"])

    rated = sorted(unrated, reverse=True)

    results = {}
    i = 0
    for result in rated:
        res = {"link": result[1], "title": result[2]}
        results["result" + str(i)] = res
        i += 1
    mb_final = results

    sj_search = post_url(decode_base64("aHR0cDovL3Nlcmllbmp1bmtpZXMub3JnL21lZGlhL2FqYXgvc2VhcmNoL3NlYXJjaC5waHA="),
                         configfile,
                         dbfile,
                         data={'string': "'" + query + "'"})
    try:
        sj_results = json.loads(sj_search)
    except:
        sj_results = []

    if special:
        append = " (" + special + ")"
    else:
        append = ""
    i = 0
    results = {}
    for result in sj_results:
        r_title = html_to_str(result[1])
        r_rating = fuzz.ratio(title.lower(), r_title)
        if r_rating > 65:
            res = {"id": result[0], "title": r_title + append, "special": special}
            results["result" + str(i)] = res
            i += 1
    sj_final = results
    return mb_final, sj_final

Beispiel #16

Datei anzeigen

Datei: yt.py Projekt: rix1337/RSScrawler

    def periodical_task(self):
        if not self.config.get('youtube'):
            self.log_debug("Suche für YouTube deaktiviert!")
            return self.device
        added_items = []
        channels = []
        videos = []

        self.liste = self.read_input(self.youtube)

        if not self.liste:
            self.log_debug("Liste ist leer. Stoppe Suche für YouTube!")

        for item in self.liste:
            if len(item) > 0:
                if self.config.get("youtube") is False:
                    self.log_debug(
                        "Liste ist leer. Stoppe Suche für YouTube!")
                    return self.device
                channels.append(item)

        for channel in channels:
            if 'list=' in channel:
                id_cutter = channel.rfind('list=') + 5
                channel = channel[id_cutter:]
                url = 'https://www.youtube.com/playlist?list=' + channel
                response = get_url(url, self.configfile, self.dbfile)
            else:
                url = 'https://www.youtube.com/user/' + channel + '/videos'
                urlc = 'https://www.youtube.com/channel/' + channel + '/videos'
                cnotfound = False
                try:
                    response = get_url(url, self.configfile, self.dbfile)
                except HTTPError:
                    try:
                        response = get_url(urlc, self.configfile, self.dbfile)
                    except HTTPError:
                        cnotfound = True
                    if cnotfound:
                        self.log_debug("YouTube-Kanal: " +
                                       channel + " nicht gefunden!")
                        break

            links = re.findall(
                r'VideoRenderer":{"videoId":"(.*?)",".*?[Tt]ext":"(.*?)"}', response)

            maxvideos = int(self.config.get("maxvideos"))
            if maxvideos < 1:
                self.log_debug("Anzahl zu suchender YouTube-Videos (" +
                               str(maxvideos) + ") zu gering. Suche stattdessen 1 Video!")
                maxvideos = 1
            elif maxvideos > 50:
                self.log_debug("Anzahl zu suchender YouTube-Videos (" +
                               str(maxvideos) + ") zu hoch. Suche stattdessen maximal 50 Videos!")
                maxvideos = 50

            for link in links[:maxvideos]:
                if len(link[0]) > 10:
                    videos.append(
                        [link[0], link[1], channel])

        for video in videos:
            channel = video[2]
            title = video[1]
            if "[private" in title.lower() and "video]" in title.lower():
                self.log_debug(
                    "[%s] - YouTube-Video ignoriert (Privates Video)" % video)
                continue
            video_title = title.replace("&amp;", "&").replace("&gt;", ">").replace(
                "&lt;", "<").replace('&quot;', '"').replace("&#39;", "'").replace("\u0026", "&")
            video = video[0]
            download_link = 'https://www.youtube.com/watch?v=' + video
            if download_link:
                if self.db.retrieve(video) == 'added':
                    self.log_debug(
                        "[%s] - YouTube-Video ignoriert (bereits gefunden)" % video)
                else:
                    ignore = "|".join(["%s" % p for p in self.config.get("ignore").lower().split(
                        ',')]) if self.config.get("ignore") else r"^unmatchable$"
                    ignorevideo = re.search(ignore, video_title.lower())
                    if ignorevideo:
                        self.log_debug(video_title + " (" + channel + ") " +
                                       "[" + video + "] - YouTube-Video ignoriert (basierend auf ignore-Einstellung)")
                        continue
                    self.device = myjd_download(self.configfile, self.device, "YouTube/" + channel, "RSScrawler",
                                                download_link, "")
                    if self.device:
                        self.db.store(
                            video,
                            'added'
                        )
                        log_entry = '[YouTube] - ' + video_title + ' (' + channel + ')'
                        self.log_info(log_entry)
                        notify([log_entry], self.configfile)
                        added_items.append(log_entry)
        return self.device

Beispiel #17

Datei anzeigen

Datei: dj.py Projekt: rix1337/RSScrawler

    def periodical_task(self):
        if self.filename == 'DJ_Dokus_Regex':
            if not self.config.get('regex'):
                self.log_debug("Suche für DJ-Regex deaktiviert!")
                return self.device
        if self.empty_list:
            self.log_debug(
                "Liste ist leer. Stoppe Suche für Dokus!" + self.listtype)
            return self.device
        try:
            reject = self.config.get("rejectlist").replace(",", "|").lower() if len(
                self.config.get("rejectlist")) > 0 else r"^unmatchable$"
        except TypeError:
            reject = r"^unmatchable$"

        set_dj = self.settings_hash(False)

        header = False
        if self.last_set_dj == set_dj:
            try:
                response = get_url_headers(
                    decode_base64('aHR0cDovL2Rva3VqdW5raWVzLm9yZy8='),
                    self.configfile,
                    self.dbfile,
                    self.headers)
                feed = dj_content_to_soup(response.content)
            except:
                response = False
                feed = False
            if response:
                if response.status_code == 304:
                    self.log_debug(
                        "DJ-Feed seit letztem Aufruf nicht aktualisiert - breche  Suche ab!")
                    return self.device
                header = True
        else:
            feed = dj_content_to_soup(
                get_url(decode_base64('aHR0cDovL2Rva3VqdW5raWVzLm9yZy8='), self.configfile, self.dbfile))
            response = False

        if feed and feed.entries:
            first_post_dj = feed.entries[0]
            concat_dj = first_post_dj.title + first_post_dj.published + str(self.settings) + str(self.pattern)
            sha_dj = hashlib.sha256(concat_dj.encode(
                'ascii', 'ignore')).hexdigest()
        else:
            self.log_debug(
                "Feed ist leer - breche  Suche ab!")
            return False

        for post in feed.entries:
            if not post.link:
                continue

            concat = post.title + post.published + str(self.settings) + str(self.pattern)
            sha = hashlib.sha256(concat.encode(
                'ascii', 'ignore')).hexdigest()
            if sha == self.last_sha_dj:
                self.log_debug(
                    "Feed ab hier bereits gecrawlt (" + post.title + ") - breche  Suche ab!")
                break

            link = post.link
            title = post.title
            genre = post.genre

            if self.filename == 'DJ_Dokus_Regex':
                if self.config.get("regex"):
                    if '[DEUTSCH]' in title or '[TV-FILM]' in title:
                        language_ok = 1
                    elif self.rsscrawler.get('english'):
                        language_ok = 2
                    else:
                        language_ok = 0
                    if language_ok:
                        m = re.search(self.pattern, title.lower())
                        if not m and "720p" not in title and "1080p" not in title and "2160p" not in title:
                            m = re.search(self.pattern.replace(
                                "480p", "."), title.lower())
                            self.quality = "480p"
                        if m:
                            if not re.match(self.genres, genre.lower()):
                                self.log_debug(
                                    title + " - Release aufgrund unerwünschten Genres ignoriert (" + genre + ")")
                                continue
                            if "720p" in title.lower():
                                self.quality = "720p"
                            if "1080p" in title.lower():
                                self.quality = "1080p"
                            if "2160p" in title.lower():
                                self.quality = "2160p"
                            m = re.search(reject, title.lower())
                            if m:
                                self.log_debug(
                                    title + " - Release durch Regex gefunden (trotz rejectlist-Einstellung)")
                            title = re.sub(r'\[.*\] ', '', post.title)
                            self.range_checkr(link, title, language_ok, genre)
                    else:
                        self.log_debug(
                            "%s - Englische Releases deaktiviert" % title)

                else:
                    continue
            else:
                if self.config.get("quality") != '480p':
                    m = re.search(self.pattern, title.lower())
                    if m:
                        if not re.match(self.genres, genre.lower()):
                            self.log_debug(title + " - Release aufgrund unerwünschten Genres ignoriert (" + genre + ")")
                            continue
                        if 'german' in title.lower():
                            language_ok = 1
                        elif self.rsscrawler.get('english'):
                            language_ok = 2
                        else:
                            language_ok = 0
                        if language_ok:
                            mm = re.search(self.quality, title.lower())
                            if mm:
                                mmm = re.search(reject, title.lower())
                                if mmm:
                                    self.log_debug(
                                        title + " - Release ignoriert (basierend auf rejectlist-Einstellung)")
                                    continue
                                if self.rsscrawler.get("surround"):
                                    if not re.match(r'.*\.(DTS|DD\+*51|DD\+*71|AC3\.5\.*1)\..*', title):
                                        self.log_debug(
                                            title + " - Release ignoriert (kein Mehrkanalton)")
                                        continue
                                title = re.sub(r'\[.*\] ', '', post.title)
                                try:
                                    storage = self.db.retrieve(title)
                                except Exception as e:
                                    self.log_debug(
                                        "Fehler bei Datenbankzugriff: %s, Grund: %s" % (e, title))
                                    return self.device
                                if storage == 'added':
                                    self.log_debug(
                                        title + " - Release ignoriert (bereits gefunden)")
                                    continue
                                self.range_checkr(link, title, language_ok, genre)
                        else:
                            self.log_debug(
                                "%s - Englische Releases deaktiviert" % title)

                    else:
                        m = re.search(self.pattern, title.lower())
                        if m:
                            if '[DEUTSCH]' in title:
                                language_ok = 1
                            elif self.rsscrawler.get('english'):
                                language_ok = 2
                            else:
                                language_ok = 0
                            if language_ok:
                                if "720p" in title.lower() or "1080p" in title.lower() or "2160p" in title.lower():
                                    continue
                                mm = re.search(reject, title.lower())
                                if mm:
                                    self.log_debug(
                                        title + " Release ignoriert (basierend auf rejectlist-Einstellung)")
                                    continue
                                if self.rsscrawler.get("surround"):
                                    if not re.match(r'.*\.(DTS|DD\+*51|DD\+*71|AC3\.5\.*1)\..*', title):
                                        self.log_debug(
                                            title + " - Release ignoriert (kein Mehrkanalton)")
                                        continue
                                title = re.sub(r'\[.*\] ', '', post.title)
                                try:
                                    storage = self.db.retrieve(title)
                                except Exception as e:
                                    self.log_debug(
                                        "Fehler bei Datenbankzugriff: %s, Grund: %s" % (e, title))
                                    return self.device
                                if storage == 'added':
                                    self.log_debug(
                                        title + " - Release ignoriert (bereits gefunden)")
                                    continue
                                self.range_checkr(link, title, language_ok, genre)
                            else:
                                self.log_debug(
                                    "%s - Englische Releases deaktiviert" % title)

        if set_dj:
            new_set_dj = self.settings_hash(True)
            if set_dj == new_set_dj:
                self.cdc.delete("DJSet-" + self.filename)
                self.cdc.store("DJSet-" + self.filename, set_dj)
                self.cdc.delete("DJ-" + self.filename)
                self.cdc.store("DJ-" + self.filename, sha_dj)
        if header and response:
            self.cdc.delete("DJHeaders-" + self.filename)
            self.cdc.store("DJHeaders-" + self.filename, response.headers['Last-Modified'])

        return self.device

Beispiel #18

Datei anzeigen

Datei: fakefeed.py Projekt: rix1337/RSScrawler

def ha_url_to_soup(url, configfile, dbfile):
    content = BeautifulSoup(get_url(url, configfile, dbfile), 'lxml')
    return ha_to_feedparser_dict(content)

Beispiel #19

Datei anzeigen

def get(title, configfile, dbfile, bl_only=False, sj_only=False):
    hostnames = RssConfig('Hostnames', configfile)
    mb = hostnames.get('mb')
    hw = hostnames.get('hw')
    hs = hostnames.get('hs')
    fx = hostnames.get('fx')
    nk = hostnames.get('nk')
    sj = hostnames.get('sj')

    specific_season = re.match(r'^(.*),(s\d{1,3})$', title.lower())
    specific_episode = re.match(r'^(.*),(s\d{1,3}e\d{1,3})$', title.lower())
    if specific_season:
        split = title.split(",")
        title = split[0]
        special = split[1].upper()
    elif specific_episode:
        split = title.split(",")
        title = split[0]
        special = split[1].upper()
    else:
        special = None

    bl_final = {}
    sj_final = {}
    scraper = cloudscraper.create_scraper()

    if not sj_only:
        mb_query = sanitize(title).replace(" ", "+")
        if special:
            bl_query = mb_query + "+" + special
        else:
            bl_query = mb_query

        unrated = []

        config = RssConfig('MB', configfile)
        quality = config.get('quality')
        ignore = config.get('ignore')

        if "480p" not in quality:
            search_quality = "+" + quality
        else:
            search_quality = ""

        if mb:
            mb_search = 'https://' + mb + '/search/' + bl_query + search_quality + '/feed/rss2/'
        else:
            mb_search = None
        if hw:
            hw_search = 'https://' + hw + '/search/' + bl_query + search_quality + '/feed/rss2/'
        else:
            hw_search = None
        if hs:
            hs_search = 'https://' + hs + '/search/' + bl_query + search_quality + '/feed'
        else:
            hs_search = None
        if fx:
            fx_search = 'https://' + fx + '/?s=' + bl_query
        else:
            fx_search = None

        async_results = get_urls_async(
            [mb_search, hw_search, hs_search, fx_search], configfile, dbfile,
            scraper)
        scraper = async_results[1]
        async_results = async_results[0]

        mb_results = []
        hw_results = []
        hs_results = []
        fx_results = []

        for res in async_results:
            if check_is_site(res, configfile) == 'MB':
                mb_results = re.findall(
                    r'<title>(.*?)<\/title>\n.*?<link>(.*?)<\/link>', res)
            elif check_is_site(res, configfile) == 'HW':
                hw_results = re.findall(
                    r'<title>(.*?)<\/title>\n.*?<link>(.*?)<\/link>', res)
            elif check_is_site(res, configfile) == 'HS':
                hs_results = hs_search_results(res)
            elif check_is_site(res, configfile) == 'FX':
                fx_results = fx_search_results(fx_content_to_soup(res),
                                               configfile, dbfile, scraper)

        if nk:
            nk_search = post_url(
                'https://' + nk + "/search",
                configfile,
                dbfile,
                data={'search': bl_query.replace("+", " ") + " " + quality})
            nk_results = nk_search_results(nk_search, 'https://' + nk + '/')
        else:
            nk_results = []

        password = mb
        for result in mb_results:
            if "480p" in quality:
                if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[
                    0].lower() or "2160p" in \
                        result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[
                    0].lower() or "complete.uhd.bluray" in result[0].lower():
                    continue
            if not result[0].endswith("-MB") and not result[0].endswith(".MB"):
                unrated.append([
                    rate(result[0], ignore),
                    encode_base64(result[1] + "|" + password),
                    result[0] + " (MB)"
                ])

        password = hw
        for result in hw_results:
            if "480p" in quality:
                if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[
                    0].lower() or "2160p" in \
                        result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[
                    0].lower() or "complete.uhd.bluray" in result[0].lower():
                    continue
            unrated.append([
                rate(result[0], ignore),
                encode_base64(result[1] + "|" + password), result[0] + " (HW)"
            ])

        password = hs
        for result in hs_results:
            if "480p" in quality:
                if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[
                    0].lower() or "2160p" in \
                        result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[
                    0].lower() or "complete.uhd.bluray" in result[0].lower():
                    continue
            unrated.append([
                rate(result[0], ignore),
                encode_base64(result[1] + "|" + password), result[0] + " (HS)"
            ])

        password = fx.split('.')[0]
        for result in fx_results:
            if "480p" in quality:
                if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[
                    0].lower() or "2160p" in \
                        result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[
                    0].lower() or "complete.uhd.bluray" in result[0].lower():
                    continue
            unrated.append([
                rate(result[0], ignore),
                encode_base64(result[1] + "|" + password), result[0] + " (FX)"
            ])

        password = nk.split('.')[0].capitalize()
        for result in nk_results:
            if "480p" in quality:
                if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[
                    0].lower() or "2160p" in \
                        result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[
                    0].lower() or "complete.uhd.bluray" in result[0].lower():
                    continue
            unrated.append([
                rate(result[0], ignore),
                encode_base64(result[1] + "|" + password), result[0] + " (NK)"
            ])

        if config.get("crawl3d"):
            if mb:
                mb_search = 'https://' + mb + '/search/' + bl_query + search_quality + "+3D/feed/rss2/"
            else:
                mb_search = None
            if hw:
                hw_search = 'https://' + hw + '/search/' + bl_query + search_quality + "+3D/feed/rss2/"
            else:
                hw_search = None
            if hs:
                hs_search = 'https://' + hs + '/search/' + bl_query + search_quality + '+3D/feed'
            else:
                hs_search = None
            if fx:
                fx_search = 'https://' + fx + '/?s=' + bl_query + "+3D"
            else:
                fx_search = None

            async_results = get_urls_async(
                [mb_search, hw_search, hs_search, fx_search], configfile,
                dbfile, scraper)
            async_results = async_results[0]

            mb_results = []
            hw_results = []
            hs_results = []
            fx_results = []

            for res in async_results:
                if check_is_site(res, configfile) == 'MB':
                    mb_results = re.findall(
                        r'<title>(.*?)<\/title>\n.*?<link>(.*?)<\/link>', res)
                elif check_is_site(res, configfile) == 'HW':
                    hw_results = re.findall(
                        r'<title>(.*?)<\/title>\n.*?<link>(.*?)<\/link>', res)
                elif check_is_site(res, configfile) == 'HS':
                    hs_results = hs_search_results(res)
                elif check_is_site(res, configfile) == 'FX':
                    fx_results = re.findall(
                        r'<title>(.*?)<\/title>\n.*?<link>(.*?)<\/link>', res)

            if nk:
                nk_search = post_url('https://' + nk + "/search",
                                     configfile,
                                     dbfile,
                                     data={
                                         'search':
                                         bl_query.replace("+", " ") + " " +
                                         quality + "3D"
                                     })
                nk_results = nk_search_results(nk_search,
                                               'https://' + nk + '/')
            else:
                nk_results = []

            password = mb
            for result in mb_results:
                if not result[1].endswith("-MB") and not result[1].endswith(
                        ".MB"):
                    unrated.append([
                        rate(result[0], ignore),
                        encode_base64(result[1] + "|" + password),
                        result[0] + " (3D-MB)"
                    ])

            password = hw
            for result in hw_results:
                unrated.append([
                    rate(result[0], ignore),
                    encode_base64(result[1] + "|" + password),
                    result[0] + " (3D-HW)"
                ])

            password = hs
            for result in hs_results:
                unrated.append([
                    rate(result[0], ignore),
                    encode_base64(result[1] + "|" + password),
                    result[0] + " (3D-HS)"
                ])

            password = fx.split('.')[0]
            for result in fx_results:
                unrated.append([
                    rate(result[0], ignore),
                    encode_base64(result[1] + "|" + password),
                    result[0] + " (3D-FX)"
                ])

            password = nk.split('.')[0].capitalize()
            for result in nk_results:
                unrated.append([
                    rate(result[0], ignore),
                    encode_base64(result[1] + "|" + password),
                    result[0] + " (3D-NK)"
                ])

        rated = sorted(unrated, reverse=True)

        results = {}
        i = 0

        for result in rated:
            res = {"payload": result[1], "title": result[2]}
            results["result" + str(i + 1000)] = res
            i += 1
        bl_final = results

    if not bl_only:
        if sj:
            sj_query = sanitize(title).replace(" ", "+")
            sj_search = get_url(
                'https://' + sj + '/serie/search?q=' + sj_query, configfile,
                dbfile, scraper)
            try:
                sj_results = BeautifulSoup(sj_search, 'lxml').findAll(
                    "a", href=re.compile("/serie"))
            except:
                sj_results = []
        else:
            sj_results = []

        if special:
            append = " (" + special + ")"
        else:
            append = ""
        i = 0
        results = {}
        for result in sj_results:
            r_title = result.text
            r_rating = fuzz.ratio(title.lower(), r_title)
            if r_rating > 40:
                res = {
                    "payload":
                    encode_base64(result['href'] + "|" + r_title + "|" +
                                  str(special)),
                    "title":
                    r_title + append
                }
                results["result" + str(i + 1000)] = res
                i += 1
        sj_final = results

    return bl_final, sj_final

Beispiel #20

Datei anzeigen

def download_bl(payload, device, configfile, dbfile):
    hostnames = RssConfig('Hostnames', configfile)
    mb = hostnames.get('mb')
    nk = hostnames.get('nk')
    fc = hostnames.get('fc').replace('www.', '').split('.')[0]

    payload = decode_base64(payload).split("|")
    link = payload[0]
    password = payload[1]
    url = get_url(link, configfile, dbfile)
    if not url or "NinjaFirewall 429" in url:
        return False

    config = RssConfig('MB', configfile)
    db = RssDb(dbfile, 'rsscrawler')
    soup = BeautifulSoup(url, 'lxml')

    site = check_is_site(link, configfile)
    if not site:
        return False
    else:
        if "MB" in site:
            if not fc:
                print(
                    u"FC Hostname nicht gesetzt. MB kann keine Links finden!")
                return False
            key = soup.find("span", {"class": "fn"}).text
            hosters = soup.find_all("a", href=re.compile(fc))
            url_hosters = []
            for hoster in hosters:
                dl = hoster["href"]
                hoster = hoster.text
                url_hosters.append([dl, hoster])
        elif "HW" in site:
            if not fc:
                print(
                    u"FC Hostname nicht gesetzt. MB kann keine Links finden!")
                return False
            key = re.findall(r'Permanent Link: (.*?)"', str(soup)).pop()
            hosters = soup.find_all("a", href=re.compile(fc))
            url_hosters = []
            for hoster in hosters:
                dl = hoster["href"]
                hoster = hoster.text
                url_hosters.append([dl, hoster])
        elif "HS" in site:
            download = soup.find("div", {"class": "entry-content"})
            key = soup.find("h2", {"class": "entry-title"}).text
            url_hosters = re.findall(r'href="([^"\'>]*)".+?(.+?)<',
                                     str(download))
        elif "NK" in site:
            key = soup.find("span", {"class": "subtitle"}).text
            url_hosters = []
            hosters = soup.find_all("a", href=re.compile("/go/"))
            for hoster in hosters:
                url_hosters.append(
                    ['https://' + nk + hoster["href"], hoster.text])
        elif "FX" in site:
            key = payload[1]
            password = payload[2]
        else:
            return False

        links = {}
        if "MB" in site or "HW" in site or "HS" in site or "NK" in site:
            for url_hoster in reversed(url_hosters):
                try:
                    if mb.split('.')[0] not in url_hoster[
                            0] and "https://goo.gl/" not in url_hoster[0]:
                        link_hoster = url_hoster[1].lower().replace(
                            'target="_blank">', '').replace(" ", "-")
                        if check_hoster(link_hoster, configfile):
                            links[link_hoster] = url_hoster[0]
                except:
                    pass
            if config.get("hoster_fallback") and not links:
                for url_hoster in reversed(url_hosters):
                    if mb.split('.')[0] not in url_hoster[
                            0] and "https://goo.gl/" not in url_hoster[0]:
                        link_hoster = url_hoster[1].lower().replace(
                            'target="_blank">', '').replace(" ", "-")
                        links[link_hoster] = url_hoster[0]
            download_links = list(links.values())
        elif "FX" in site:
            download_links = fx_download_links(url, key, configfile)

        englisch = False
        if "*englisch" in key.lower() or "*english" in key.lower():
            key = key.replace('*ENGLISCH',
                              '').replace("*Englisch", "").replace(
                                  "*ENGLISH", "").replace("*English",
                                                          "").replace("*", "")
            englisch = True

        staffel = re.search(r"s\d{1,2}(-s\d{1,2}|-\d{1,2}|\.)", key.lower())

        if config.get('enforcedl') and '.dl.' not in key.lower():
            fail = False
            get_imdb_url = url
            key_regex = r'<title>' + \
                        re.escape(
                            key) + r'.*?<\/title>\n.*?<link>(?:(?:.*?\n){1,25}).*?[mM][kK][vV].*?(?:|href=.?http(?:|s):\/\/(?:|www\.)imdb\.com\/title\/(tt[0-9]{7,9}).*?)[iI][mM][dD][bB].*?(?!\d(?:\.|\,)\d)(?:.|.*?)<\/a>'
            imdb_id = re.findall(key_regex, get_imdb_url)
            if len(imdb_id) > 0:
                if not imdb_id[0]:
                    fail = True
                else:
                    imdb_id = imdb_id[0]
            else:
                fail = True
            if fail:
                try:
                    search_title = re.findall(
                        r"(.*?)(?:\.(?:(?:19|20)\d{2})|\.German|\.\d{3,4}p|\.S(?:\d{1,3})\.)",
                        key)[0].replace(".", "+")
                    search_url = "http://www.imdb.com/find?q=" + search_title
                    search_page = get_url(search_url, configfile, dbfile)
                    search_results = re.findall(
                        r'<td class="result_text"> <a href="\/title\/(tt[0-9]{7,9})\/\?ref_=fn_al_tt_\d" >(.*?)<\/a>.*? \((\d{4})\)..(.{9})',
                        search_page)
                    total_results = len(search_results)
                except:
                    return False
                if staffel:
                    try:
                        imdb_id = search_results[0][0]
                    except:
                        imdb_id = False
                else:
                    no_series = False
                    while total_results > 0:
                        attempt = 0
                        for result in search_results:
                            if result[3] == "TV Series":
                                no_series = False
                                total_results -= 1
                                attempt += 1
                            else:
                                no_series = True
                                imdb_id = search_results[attempt][0]
                                total_results = 0
                                break
                    if no_series is False:
                        logger.debug(
                            "%s - Keine passende Film-IMDB-Seite gefunden" %
                            key)

            if staffel:
                filename = 'MB_Staffeln'
            else:
                filename = 'MB_Filme'

            scraper = cloudscraper.create_scraper()
            blog = BL(configfile,
                      dbfile,
                      device,
                      logging,
                      scraper,
                      filename=filename)

            if not imdb_id:
                if not blog.dual_download(key, password):
                    logger.debug("%s - Kein zweisprachiges Release gefunden." %
                                 key)
            else:
                if isinstance(imdb_id, list):
                    imdb_id = imdb_id.pop()
                imdb_url = "http://www.imdb.com/title/" + imdb_id
                details = get_url(imdb_url, configfile, dbfile)
                if not details:
                    logger.debug("%s - Originalsprache nicht ermittelbar" %
                                 key)
                original_language = re.findall(
                    r"Language:<\/h4>\n.*?\n.*?url'>(.*?)<\/a>", details)
                if original_language:
                    original_language = original_language[0]
                if original_language == "German":
                    logger.debug(
                        "%s - Originalsprache ist Deutsch. Breche Suche nach zweisprachigem Release ab!"
                        % key)
                else:
                    if not blog.dual_download(key, password) and not englisch:
                        logger.debug(
                            "%s - Kein zweisprachiges Release gefunden!" % key)

        if download_links:
            if staffel:
                if myjd_download(configfile, dbfile, device, key, "RSScrawler",
                                 download_links, password):
                    db.store(
                        key.replace(".COMPLETE", "").replace(".Complete", ""),
                        'notdl' if config.get('enforcedl')
                        and '.dl.' not in key.lower() else 'added')
                    log_entry = '[Suche/Staffel] - ' + key.replace(
                        ".COMPLETE", "").replace(".Complete",
                                                 "") + ' - [' + site + ']'
                    logger.info(log_entry)
                    notify([log_entry], configfile)
                    return True
            elif '.3d.' in key.lower():
                retail = False
                if config.get('cutoff') and '.COMPLETE.' not in key.lower():
                    if config.get('enforcedl'):
                        if is_retail(key, '2', dbfile):
                            retail = True
                if myjd_download(configfile, dbfile, device, key,
                                 "RSScrawler/3Dcrawler", download_links,
                                 password):
                    db.store(
                        key, 'notdl' if config.get('enforcedl')
                        and '.dl.' not in key.lower() else 'added')
                    log_entry = '[Suche/Film' + (
                        '/Retail' if retail else
                        "") + '/3D] - ' + key + ' - [' + site + ']'
                    logger.info(log_entry)
                    notify([log_entry], configfile)
                    return True
            else:
                retail = False
                if config.get('cutoff') and '.COMPLETE.' not in key.lower():
                    if config.get('enforcedl'):
                        if is_retail(key, '1', dbfile):
                            retail = True
                    else:
                        if is_retail(key, '0', dbfile):
                            retail = True
                if myjd_download(configfile, dbfile, device, key, "RSScrawler",
                                 download_links, password):
                    db.store(
                        key, 'notdl' if config.get('enforcedl')
                        and '.dl.' not in key.lower() else 'added')
                    log_entry = '[Suche/Film' + (
                        '/Englisch' if englisch and not retail else
                        '') + ('/Englisch/Retail' if englisch and retail else
                               '') + ('/Retail' if not englisch and retail else
                                      '') + '] - ' + key + ' - [' + site + ']'
                    logger.info(log_entry)
                    notify([log_entry], configfile)
                    return [key]
        else:
            return False

Beispiel #21

Datei anzeigen

def download_sj(payload, configfile, dbfile):
    hostnames = RssConfig('Hostnames', configfile)
    sj = hostnames.get('sj')

    payload = decode_base64(payload).split("|")
    href = payload[0]
    title = payload[1]
    special = payload[2].strip().replace("None", "")

    series_url = 'https://' + sj + href
    series_info = get_url(series_url, configfile, dbfile)
    series_id = re.findall(r'data-mediaid="(.*?)"', series_info)[0]

    api_url = 'https://' + sj + '/api/media/' + series_id + '/releases'
    releases = get_url(api_url, configfile, dbfile)

    seasons = json.loads(releases)

    listen = ["SJ_Serien", "MB_Staffeln"]
    for liste in listen:
        cont = ListDb(dbfile, liste).retrieve()
        list_title = sanitize(title)
        if not cont:
            cont = ""
        if list_title not in cont:
            ListDb(dbfile, liste).store(list_title)

    config = RssConfig('SJ', configfile)
    english_ok = RssConfig('RSScrawler', configfile).get("english")
    quality = config.get('quality')
    ignore = config.get('rejectlist')

    result_seasons = {}
    result_episodes = {}

    for season in seasons:
        releases = seasons[season]
        for release in releases['items']:
            name = release['name'].encode('ascii',
                                          errors='ignore').decode('utf-8')
            hosters = release['hoster']
            try:
                valid = bool(release['resolution'] == quality)
            except:
                valid = re.match(re.compile(r'.*' + quality + r'.*'), name)
            if valid and special:
                valid = bool("." + special.lower() + "." in name.lower())
            if valid and not english_ok:
                valid = bool(".german." in name.lower())
            if valid:
                valid = False
                for hoster in hosters:
                    if hoster and check_hoster(
                            hoster,
                            configfile) or config.get("hoster_fallback"):
                        valid = True
            if valid:
                try:
                    ep = release['episode']
                    if ep:
                        existing = result_episodes.get(season)
                        if existing:
                            for e in existing:
                                if e == ep:
                                    if rate(name, ignore) > rate(
                                            existing[e], ignore):
                                        existing.update({ep: name})
                        else:
                            existing = {ep: name}
                        result_episodes.update({season: existing})
                        continue
                except:
                    pass

                existing = result_seasons.get(season)
                dont = False
                if existing:
                    if rate(name, ignore) < rate(existing, ignore):
                        dont = True
                if not dont:
                    result_seasons.update({season: name})

        try:
            if result_seasons[season] and result_episodes[season]:
                del result_episodes[season]
        except:
            pass

        success = False
        try:
            if result_seasons[season]:
                success = True
        except:
            try:
                if result_episodes[season]:
                    success = True
            except:
                pass

        if success:
            logger.debug(u"Websuche erfolgreich für " + title + " - " + season)
        else:
            for release in releases['items']:
                name = release['name'].encode('ascii',
                                              errors='ignore').decode('utf-8')
                hosters = release['hoster']
                valid = True
                if valid and special:
                    valid = bool("." + special.lower() + "." in name.lower())
                if valid and not english_ok:
                    valid = bool(".german." in name.lower())
                if valid:
                    valid = False
                    for hoster in hosters:
                        if hoster and check_hoster(
                                hoster,
                                configfile) or config.get("hoster_fallback"):
                            valid = True
                if valid:
                    try:
                        ep = release['episode']
                        if ep:
                            existing = result_episodes.get(season)
                            if existing:
                                for e in existing:
                                    if e == ep:
                                        if rate(name, ignore) > rate(
                                                existing[e], ignore):
                                            existing.update({ep: name})
                            else:
                                existing = {ep: name}
                            result_episodes.update({season: existing})
                            continue
                    except:
                        pass

                    existing = result_seasons.get(season)
                    dont = False
                    if existing:
                        if rate(name, ignore) < rate(existing, ignore):
                            dont = True
                    if not dont:
                        result_seasons.update({season: name})

            try:
                if result_seasons[season] and result_episodes[season]:
                    del result_episodes[season]
            except:
                pass
            logger.debug(u"Websuche erfolgreich für " + title + " - " + season)

    matches = []

    for season in result_seasons:
        matches.append(result_seasons[season])
    for season in result_episodes:
        for episode in result_episodes[season]:
            matches.append(result_episodes[season][episode])

    notify_array = []
    for title in matches:
        db = RssDb(dbfile, 'rsscrawler')
        if add_decrypt(title, series_url, sj, dbfile):
            db.store(title, 'added')
            log_entry = u'[Suche/Serie] - ' + title + ' - [SJ]'
            logger.info(log_entry)
            notify_array.append(log_entry)

    notify(notify_array, configfile)

    if not matches:
        return False
    return matches