예제 #1
0
    def parse_download(self, series_url, title, language_id):
        if not check_valid_release(title, False, False, self.dbfile):
            self.log_debug(title + u" - Release ignoriert (Gleiche oder bessere Quelle bereits vorhanden)")
            return
        try:
            series_info = get_url(series_url, self.configfile, self.dbfile)
            series_id = re.findall(r'data-mediaid="(.*?)"', series_info)[0]
            api_url = 'https://' + self.dj + '/api/media/' + series_id + '/releases'

            response = get_url(api_url, self.configfile, self.dbfile, self.scraper)
            seasons = json.loads(response)
            for season in seasons:
                season = seasons[season]
                for item in season['items']:
                    if item['name'] == title:
                        valid = False
                        for hoster in item['hoster']:
                            if check_hoster(hoster, self.configfile):
                                valid = True
                        if not valid and not self.hoster_fallback:
                            storage = self.db.retrieve_all(title)
                            if 'added' not in storage and 'notdl' not in storage:
                                wrong_hoster = '[DJ/Hoster fehlt] - ' + title
                                if 'wrong_hoster' not in storage:
                                    print(wrong_hoster)
                                    self.db.store(title, 'wrong_hoster')
                                    notify([wrong_hoster], self.configfile)
                                else:
                                    self.log_debug(wrong_hoster)
                        else:
                            return self.send_package(title, series_url, language_id)
        except:
            print(u"DJ hat die Doku-API angepasst. Breche Download-Prüfung ab!")
예제 #2
0
파일: dd.py 프로젝트: evilmon/RSScrawler
    def periodical_task(self):
        feeds = self.config.get("feeds")
        if feeds:
            added_items = []
            feeds = feeds.replace(" ", "").split(',')
            for feed in feeds:
                feed = feedparser.parse(
                    get_url(feed, self.configfile, self.dbfile, self.scraper))
                for post in feed.entries:
                    key = post.title.replace(" ", ".")

                    epoch = datetime(1970, 1, 1)
                    current_epoch = int(time())
                    published_format = "%Y-%m-%d %H:%M:%S+00:00"
                    published_timestamp = str(parser.parse(post.published))
                    published_epoch = int((datetime.strptime(
                        published_timestamp, published_format) -
                                           epoch).total_seconds())
                    if (current_epoch - 1800) > published_epoch:
                        link_pool = post.summary
                        unicode_links = re.findall(r'(http.*)', link_pool)
                        links = []
                        for link in unicode_links:
                            if check_hoster(link, self.configfile):
                                links.append(str(link))
                        if self.config.get("hoster_fallback") and not links:
                            for link in unicode_links:
                                links.append(str(link))
                        storage = self.db.retrieve_all(key)
                        if not links:
                            if 'added' not in storage and 'notdl' not in storage:
                                wrong_hoster = '[DD/Hoster fehlt] - ' + key
                                if 'wrong_hoster' not in storage:
                                    self.log_info(wrong_hoster)
                                    self.db.store(key, 'wrong_hoster')
                                    notify([wrong_hoster], self.configfile)
                                else:
                                    self.log_debug(wrong_hoster)
                        elif 'added' in storage:
                            self.log_debug(
                                "%s - Release ignoriert (bereits gefunden)" %
                                key)
                        else:
                            self.device = myjd_download(
                                self.configfile, self.dbfile, self.device, key,
                                "RSScrawler", links, "")
                            if self.device:
                                self.db.store(key, 'added')
                                log_entry = '[Englisch] - ' + key + ' - [DD]'
                                self.log_info(log_entry)
                                notify([log_entry], self.configfile)
                                added_items.append(log_entry)
                    else:
                        self.log_debug(
                            "%s - Releasezeitpunkt weniger als 30 Minuten in der Vergangenheit - wird ignoriert."
                            % key)
        else:
            self.log_debug("Liste ist leer. Stoppe Suche für DD!")
        return self.device
예제 #3
0
    def parse_download(self, series_url, title, language_id):
        if not check_valid_release(title, self.retail_only, self.hevc_retail,
                                   self.dbfile):
            self.log_debug(
                title +
                u" - Release ignoriert (Gleiche oder bessere Quelle bereits vorhanden)"
            )
            return
        if self.filename == 'MB_Staffeln':
            if not self.config.get("seasonpacks"):
                staffelpack = re.search(r"s\d.*(-|\.).*s\d", title.lower())
                if staffelpack:
                    self.log_debug("%s - Release ignoriert (Staffelpaket)" %
                                   title)
                    return
            if not re.search(self.seasonssource, title.lower()):
                self.log_debug(title + " - Release hat falsche Quelle")
                return
        try:
            series_info = get_url(series_url, self.configfile, self.dbfile)
            series_id = re.findall(r'data-mediaid="(.*?)"', series_info)[0]
            api_url = 'https://' + self.sj + '/api/media/' + series_id + '/releases'

            response = get_url(api_url, self.configfile, self.dbfile,
                               self.scraper)
            seasons = json.loads(response)
            for season in seasons:
                season = seasons[season]
                for item in season['items']:
                    if item['name'] == title:
                        valid = False
                        for hoster in item['hoster']:
                            if hoster:
                                if check_hoster(hoster, self.configfile):
                                    valid = True
                        if not valid and not self.hoster_fallback:
                            storage = self.db.retrieve_all(title)
                            if 'added' not in storage and 'notdl' not in storage:
                                wrong_hoster = '[SJ/Hoster fehlt] - ' + title
                                if 'wrong_hoster' not in storage:
                                    print(wrong_hoster)
                                    self.db.store(title, 'wrong_hoster')
                                    notify([wrong_hoster], self.configfile)
                                else:
                                    self.log_debug(wrong_hoster)
                        else:
                            return self.send_package(title, series_url,
                                                     language_id)
        except:
            print(
                u"SJ hat die Serien-API angepasst. Breche Download-Prüfung ab!"
            )
예제 #4
0
def hoster_check(configfile, device, decrypted_packages, title,
                 known_packages):
    if not decrypted_packages:
        return [False, False]

    delete_packages = []
    delete_linkids = []
    delete_uuids = []
    keep_linkids = []
    keep_uuids = []

    merge_first = package_merge_check(device, configfile, decrypted_packages,
                                      known_packages)
    if merge_first:
        device = merge_first[0]
        decrypted_packages = merge_first[1]

    valid_links = False
    if decrypted_packages:
        i = 0
        for dp in decrypted_packages:
            linkids = dp['linkids']
            for link in linkids:
                delete_linkids.append(link)
            uuid = dp['uuid']
            delete_uuids.append(uuid)
            if uuid not in known_packages:
                delete = True
                links = split_urls(dp['urls'])
                for link in links:
                    if check_hoster(link, configfile):
                        try:
                            keep_linkids.append(linkids[i])
                            valid_links = True
                        except:
                            pass
                        if uuid not in keep_uuids:
                            keep_uuids.append(uuid)
                        delete = False
                    i += 1
                if delete:
                    delete_packages.append(dp)

    if valid_links:
        removed = remove_unfit_links(configfile, device, decrypted_packages,
                                     known_packages, keep_linkids, keep_uuids,
                                     delete_linkids, delete_uuids,
                                     delete_packages, title)
        return [removed[0], removed[1]]
    return [device, False]
예제 #5
0
파일: sf.py 프로젝트: evilmon/RSScrawler
    def parse_download(self, series_url, title, language_id):
        if not check_valid_release(title, self.retail_only, self.hevc_retail,
                                   self.dbfile):
            self.log_debug(
                title +
                u" - Release ignoriert (Gleiche oder bessere Quelle bereits vorhanden)"
            )
            return
        if self.filename == 'MB_Staffeln':
            if not self.config.get("seasonpacks"):
                staffelpack = re.search(r"s\d.*(-|\.).*s\d", title.lower())
                if staffelpack:
                    self.log_debug("%s - Release ignoriert (Staffelpaket)" %
                                   title)
                    return
            if not re.search(self.seasonssource, title.lower()):
                self.log_debug(title + " - Release hat falsche Quelle")
                return
        try:
            if language_id == 2:
                lang = 'EN'
            else:
                lang = 'DE'
            epoch = str(datetime.datetime.now().timestamp()).replace('.',
                                                                     '')[:-3]
            api_url = series_url + '?lang=' + lang + '&_=' + epoch
            response = get_url(api_url, self.configfile, self.dbfile,
                               self.scraper)
            info = json.loads(response)

            is_episode = re.findall(r'.*\.(s\d{1,3}e\d{1,3})\..*', title,
                                    re.IGNORECASE)
            if is_episode:
                episode_string = re.findall(r'.*S\d{1,3}(E\d{1,3}).*',
                                            is_episode[0])[0].lower()
                season_string = re.findall(r'.*(S\d{1,3})E\d{1,3}.*',
                                           is_episode[0])[0].lower()
                season_title = rreplace(
                    title.lower().replace(episode_string, ''), "-", ".*",
                    1).lower()
                season_title = season_title.replace(".untouched",
                                                    ".*").replace(
                                                        ".dd+51", ".dd.51")
                episode = str(int(episode_string.replace("e", "")))
                season = str(int(season_string.replace("s", "")))
                episode_name = re.findall(r'.*\.s\d{1,3}(\..*).german',
                                          season_title, re.IGNORECASE)
                if episode_name:
                    season_title = season_title.replace(episode_name[0], '')
                codec_tags = [".h264", ".x264"]
                for tag in codec_tags:
                    season_title = season_title.replace(tag, ".*264")
                web_tags = [".web-rip", ".webrip", ".webdl", ".web-dl"]
                for tag in web_tags:
                    season_title = season_title.replace(tag, ".web.*")
            else:
                season = False
                episode = False
                season_title = title
                multiple_episodes = re.findall(r'(e\d{1,3}-e*\d{1,3}\.)',
                                               season_title, re.IGNORECASE)
                if multiple_episodes:
                    season_title = season_title.replace(
                        multiple_episodes[0], '.*')

            content = BeautifulSoup(info['html'], 'lxml')
            releases = content.find(
                "small", text=re.compile(season_title,
                                         re.IGNORECASE)).parent.parent.parent
            links = releases.findAll("div", {'class': 'row'})[1].findAll('a')
            valid = False
            for link in links:
                download_link = link['href']
                if check_hoster(link.text.replace('\n', ''), self.configfile):
                    valid = True
                    break
            if not valid and not self.hoster_fallback:
                storage = self.db.retrieve_all(title)
                if 'added' not in storage and 'notdl' not in storage:
                    wrong_hoster = '[SF/Hoster fehlt] - ' + title
                    if 'wrong_hoster' not in storage:
                        self.log_info(wrong_hoster)
                        self.db.store(title, 'wrong_hoster')
                        notify([wrong_hoster], self.configfile)
                    else:
                        self.log_debug(wrong_hoster)
            else:
                return self.send_package(title, download_link, language_id,
                                         season, episode)
        except:
            print(
                u"SF hat die Serien-API angepasst. Breche Download-Prüfung ab!"
            )
예제 #6
0
def download_sj(payload, configfile, dbfile):
    hostnames = RssConfig('Hostnames', configfile)
    sj = hostnames.get('sj')

    payload = decode_base64(payload).split("|")
    href = payload[0]
    title = payload[1]
    special = payload[2].strip().replace("None", "")

    series_url = 'https://' + sj + href
    series_info = get_url(series_url, configfile, dbfile)
    series_id = re.findall(r'data-mediaid="(.*?)"', series_info)[0]

    api_url = 'https://' + sj + '/api/media/' + series_id + '/releases'
    releases = get_url(api_url, configfile, dbfile)

    seasons = json.loads(releases)

    listen = ["SJ_Serien", "MB_Staffeln"]
    for liste in listen:
        cont = ListDb(dbfile, liste).retrieve()
        list_title = sanitize(title)
        if not cont:
            cont = ""
        if list_title not in cont:
            ListDb(dbfile, liste).store(list_title)

    config = RssConfig('SJ', configfile)
    english_ok = RssConfig('RSScrawler', configfile).get("english")
    quality = config.get('quality')
    ignore = config.get('rejectlist')

    result_seasons = {}
    result_episodes = {}

    for season in seasons:
        releases = seasons[season]
        for release in releases['items']:
            name = release['name'].encode('ascii',
                                          errors='ignore').decode('utf-8')
            hosters = release['hoster']
            try:
                valid = bool(release['resolution'] == quality)
            except:
                valid = re.match(re.compile(r'.*' + quality + r'.*'), name)
            if valid and special:
                valid = bool("." + special.lower() + "." in name.lower())
            if valid and not english_ok:
                valid = bool(".german." in name.lower())
            if valid:
                valid = False
                for hoster in hosters:
                    if hoster and check_hoster(
                            hoster,
                            configfile) or config.get("hoster_fallback"):
                        valid = True
            if valid:
                try:
                    ep = release['episode']
                    if ep:
                        existing = result_episodes.get(season)
                        if existing:
                            for e in existing:
                                if e == ep:
                                    if rate(name, ignore) > rate(
                                            existing[e], ignore):
                                        existing.update({ep: name})
                        else:
                            existing = {ep: name}
                        result_episodes.update({season: existing})
                        continue
                except:
                    pass

                existing = result_seasons.get(season)
                dont = False
                if existing:
                    if rate(name, ignore) < rate(existing, ignore):
                        dont = True
                if not dont:
                    result_seasons.update({season: name})

        try:
            if result_seasons[season] and result_episodes[season]:
                del result_episodes[season]
        except:
            pass

        success = False
        try:
            if result_seasons[season]:
                success = True
        except:
            try:
                if result_episodes[season]:
                    success = True
            except:
                pass

        if success:
            logger.debug(u"Websuche erfolgreich für " + title + " - " + season)
        else:
            for release in releases['items']:
                name = release['name'].encode('ascii',
                                              errors='ignore').decode('utf-8')
                hosters = release['hoster']
                valid = True
                if valid and special:
                    valid = bool("." + special.lower() + "." in name.lower())
                if valid and not english_ok:
                    valid = bool(".german." in name.lower())
                if valid:
                    valid = False
                    for hoster in hosters:
                        if hoster and check_hoster(
                                hoster,
                                configfile) or config.get("hoster_fallback"):
                            valid = True
                if valid:
                    try:
                        ep = release['episode']
                        if ep:
                            existing = result_episodes.get(season)
                            if existing:
                                for e in existing:
                                    if e == ep:
                                        if rate(name, ignore) > rate(
                                                existing[e], ignore):
                                            existing.update({ep: name})
                            else:
                                existing = {ep: name}
                            result_episodes.update({season: existing})
                            continue
                    except:
                        pass

                    existing = result_seasons.get(season)
                    dont = False
                    if existing:
                        if rate(name, ignore) < rate(existing, ignore):
                            dont = True
                    if not dont:
                        result_seasons.update({season: name})

            try:
                if result_seasons[season] and result_episodes[season]:
                    del result_episodes[season]
            except:
                pass
            logger.debug(u"Websuche erfolgreich für " + title + " - " + season)

    matches = []

    for season in result_seasons:
        matches.append(result_seasons[season])
    for season in result_episodes:
        for episode in result_episodes[season]:
            matches.append(result_episodes[season][episode])

    notify_array = []
    for title in matches:
        db = RssDb(dbfile, 'rsscrawler')
        if add_decrypt(title, series_url, sj, dbfile):
            db.store(title, 'added')
            log_entry = u'[Suche/Serie] - ' + title + ' - [SJ]'
            logger.info(log_entry)
            notify_array.append(log_entry)

    notify(notify_array, configfile)

    if not matches:
        return False
    return matches
예제 #7
0
def download_bl(payload, device, configfile, dbfile):
    hostnames = RssConfig('Hostnames', configfile)
    mb = hostnames.get('mb')
    nk = hostnames.get('nk')
    fc = hostnames.get('fc').replace('www.', '').split('.')[0]

    payload = decode_base64(payload).split("|")
    link = payload[0]
    password = payload[1]
    url = get_url(link, configfile, dbfile)
    if not url or "NinjaFirewall 429" in url:
        return False

    config = RssConfig('MB', configfile)
    db = RssDb(dbfile, 'rsscrawler')
    soup = BeautifulSoup(url, 'lxml')

    site = check_is_site(link, configfile)
    if not site:
        return False
    else:
        if "MB" in site:
            if not fc:
                print(
                    u"FC Hostname nicht gesetzt. MB kann keine Links finden!")
                return False
            key = soup.find("span", {"class": "fn"}).text
            hosters = soup.find_all("a", href=re.compile(fc))
            url_hosters = []
            for hoster in hosters:
                dl = hoster["href"]
                hoster = hoster.text
                url_hosters.append([dl, hoster])
        elif "HW" in site:
            if not fc:
                print(
                    u"FC Hostname nicht gesetzt. MB kann keine Links finden!")
                return False
            key = re.findall(r'Permanent Link: (.*?)"', str(soup)).pop()
            hosters = soup.find_all("a", href=re.compile(fc))
            url_hosters = []
            for hoster in hosters:
                dl = hoster["href"]
                hoster = hoster.text
                url_hosters.append([dl, hoster])
        elif "HS" in site:
            download = soup.find("div", {"class": "entry-content"})
            key = soup.find("h2", {"class": "entry-title"}).text
            url_hosters = re.findall(r'href="([^"\'>]*)".+?(.+?)<',
                                     str(download))
        elif "NK" in site:
            key = soup.find("span", {"class": "subtitle"}).text
            url_hosters = []
            hosters = soup.find_all("a", href=re.compile("/go/"))
            for hoster in hosters:
                url_hosters.append(
                    ['https://' + nk + hoster["href"], hoster.text])
        elif "FX" in site:
            key = payload[1]
            password = payload[2]
        else:
            return False

        links = {}
        if "MB" in site or "HW" in site or "HS" in site or "NK" in site:
            for url_hoster in reversed(url_hosters):
                try:
                    if mb.split('.')[0] not in url_hoster[
                            0] and "https://goo.gl/" not in url_hoster[0]:
                        link_hoster = url_hoster[1].lower().replace(
                            'target="_blank">', '').replace(" ", "-")
                        if check_hoster(link_hoster, configfile):
                            links[link_hoster] = url_hoster[0]
                except:
                    pass
            if config.get("hoster_fallback") and not links:
                for url_hoster in reversed(url_hosters):
                    if mb.split('.')[0] not in url_hoster[
                            0] and "https://goo.gl/" not in url_hoster[0]:
                        link_hoster = url_hoster[1].lower().replace(
                            'target="_blank">', '').replace(" ", "-")
                        links[link_hoster] = url_hoster[0]
            download_links = list(links.values())
        elif "FX" in site:
            download_links = fx_download_links(url, key, configfile)

        englisch = False
        if "*englisch" in key.lower() or "*english" in key.lower():
            key = key.replace('*ENGLISCH',
                              '').replace("*Englisch", "").replace(
                                  "*ENGLISH", "").replace("*English",
                                                          "").replace("*", "")
            englisch = True

        staffel = re.search(r"s\d{1,2}(-s\d{1,2}|-\d{1,2}|\.)", key.lower())

        if config.get('enforcedl') and '.dl.' not in key.lower():
            fail = False
            get_imdb_url = url
            key_regex = r'<title>' + \
                        re.escape(
                            key) + r'.*?<\/title>\n.*?<link>(?:(?:.*?\n){1,25}).*?[mM][kK][vV].*?(?:|href=.?http(?:|s):\/\/(?:|www\.)imdb\.com\/title\/(tt[0-9]{7,9}).*?)[iI][mM][dD][bB].*?(?!\d(?:\.|\,)\d)(?:.|.*?)<\/a>'
            imdb_id = re.findall(key_regex, get_imdb_url)
            if len(imdb_id) > 0:
                if not imdb_id[0]:
                    fail = True
                else:
                    imdb_id = imdb_id[0]
            else:
                fail = True
            if fail:
                try:
                    search_title = re.findall(
                        r"(.*?)(?:\.(?:(?:19|20)\d{2})|\.German|\.\d{3,4}p|\.S(?:\d{1,3})\.)",
                        key)[0].replace(".", "+")
                    search_url = "http://www.imdb.com/find?q=" + search_title
                    search_page = get_url(search_url, configfile, dbfile)
                    search_results = re.findall(
                        r'<td class="result_text"> <a href="\/title\/(tt[0-9]{7,9})\/\?ref_=fn_al_tt_\d" >(.*?)<\/a>.*? \((\d{4})\)..(.{9})',
                        search_page)
                    total_results = len(search_results)
                except:
                    return False
                if staffel:
                    try:
                        imdb_id = search_results[0][0]
                    except:
                        imdb_id = False
                else:
                    no_series = False
                    while total_results > 0:
                        attempt = 0
                        for result in search_results:
                            if result[3] == "TV Series":
                                no_series = False
                                total_results -= 1
                                attempt += 1
                            else:
                                no_series = True
                                imdb_id = search_results[attempt][0]
                                total_results = 0
                                break
                    if no_series is False:
                        logger.debug(
                            "%s - Keine passende Film-IMDB-Seite gefunden" %
                            key)

            if staffel:
                filename = 'MB_Staffeln'
            else:
                filename = 'MB_Filme'

            scraper = cloudscraper.create_scraper()
            blog = BL(configfile,
                      dbfile,
                      device,
                      logging,
                      scraper,
                      filename=filename)

            if not imdb_id:
                if not blog.dual_download(key, password):
                    logger.debug("%s - Kein zweisprachiges Release gefunden." %
                                 key)
            else:
                if isinstance(imdb_id, list):
                    imdb_id = imdb_id.pop()
                imdb_url = "http://www.imdb.com/title/" + imdb_id
                details = get_url(imdb_url, configfile, dbfile)
                if not details:
                    logger.debug("%s - Originalsprache nicht ermittelbar" %
                                 key)
                original_language = re.findall(
                    r"Language:<\/h4>\n.*?\n.*?url'>(.*?)<\/a>", details)
                if original_language:
                    original_language = original_language[0]
                if original_language == "German":
                    logger.debug(
                        "%s - Originalsprache ist Deutsch. Breche Suche nach zweisprachigem Release ab!"
                        % key)
                else:
                    if not blog.dual_download(key, password) and not englisch:
                        logger.debug(
                            "%s - Kein zweisprachiges Release gefunden!" % key)

        if download_links:
            if staffel:
                if myjd_download(configfile, dbfile, device, key, "RSScrawler",
                                 download_links, password):
                    db.store(
                        key.replace(".COMPLETE", "").replace(".Complete", ""),
                        'notdl' if config.get('enforcedl')
                        and '.dl.' not in key.lower() else 'added')
                    log_entry = '[Suche/Staffel] - ' + key.replace(
                        ".COMPLETE", "").replace(".Complete",
                                                 "") + ' - [' + site + ']'
                    logger.info(log_entry)
                    notify([log_entry], configfile)
                    return True
            elif '.3d.' in key.lower():
                retail = False
                if config.get('cutoff') and '.COMPLETE.' not in key.lower():
                    if config.get('enforcedl'):
                        if is_retail(key, '2', dbfile):
                            retail = True
                if myjd_download(configfile, dbfile, device, key,
                                 "RSScrawler/3Dcrawler", download_links,
                                 password):
                    db.store(
                        key, 'notdl' if config.get('enforcedl')
                        and '.dl.' not in key.lower() else 'added')
                    log_entry = '[Suche/Film' + (
                        '/Retail' if retail else
                        "") + '/3D] - ' + key + ' - [' + site + ']'
                    logger.info(log_entry)
                    notify([log_entry], configfile)
                    return True
            else:
                retail = False
                if config.get('cutoff') and '.COMPLETE.' not in key.lower():
                    if config.get('enforcedl'):
                        if is_retail(key, '1', dbfile):
                            retail = True
                    else:
                        if is_retail(key, '0', dbfile):
                            retail = True
                if myjd_download(configfile, dbfile, device, key, "RSScrawler",
                                 download_links, password):
                    db.store(
                        key, 'notdl' if config.get('enforcedl')
                        and '.dl.' not in key.lower() else 'added')
                    log_entry = '[Suche/Film' + (
                        '/Englisch' if englisch and not retail else
                        '') + ('/Englisch/Retail' if englisch and retail else
                               '') + ('/Retail' if not englisch and retail else
                                      '') + '] - ' + key + ' - [' + site + ']'
                    logger.info(log_entry)
                    notify([log_entry], configfile)
                    return [key]
        else:
            return False