Python check_hoster Examples, feedcrawler.common.check_hoster Python Examples

Example #1

0

Show file

def check_download_links(self, url_hosters):
    links = {}
    for url_hoster in reversed(url_hosters):
        hoster = url_hoster[1].lower().replace('target="_blank">', '').replace(
            " ", "-").replace("ddownload", "ddl")
        if check_hoster(hoster, self.configfile):
            link = url_hoster[0]
            if self.url in link:
                demasked_link = get_redirected_url(link, self.configfile,
                                                   self.dbfile, self.scraper)
                if demasked_link:
                    link = demasked_link
            links[hoster] = link
    if self.hoster_fallback and not links:
        for url_hoster in reversed(url_hosters):
            hoster = url_hoster[1].lower().replace(
                'target="_blank">',
                '').replace(" ", "-").replace("ddownload", "ddl")
            link = url_hoster[0]
            if self.url in link:
                demasked_link = get_redirected_url(link, self.configfile,
                                                   self.dbfile, self.scraper)
                if demasked_link:
                    link = demasked_link
            links[hoster] = link
    return list(links.values())

Example #2

0

Show file

File: content_custom_dd.py Project: Crabtronic/FeedCrawler

    def periodical_task(self):
        feeds = self.config.get("feeds")
        if feeds:
            added_items = []
            feeds = feeds.replace(" ", "").split(',')
            for feed in feeds:
                feed = feedparser.parse(
                    get_url(feed, self.configfile, self.dbfile, self.scraper))
                for post in feed.entries:
                    key = post.title.replace(" ", ".")

                    epoch = datetime(1970, 1, 1)
                    current_epoch = int(time())
                    published_format = "%Y-%m-%d %H:%M:%S+00:00"
                    published_timestamp = str(parser.parse(post.published))
                    published_epoch = int((datetime.strptime(
                        published_timestamp, published_format) -
                                           epoch).total_seconds())
                    if (current_epoch - 1800) > published_epoch:
                        link_pool = post.summary
                        unicode_links = re.findall(r'(http.*)', link_pool)
                        links = []
                        for link in unicode_links:
                            if check_hoster(link, self.configfile):
                                links.append(str(link))
                        if self.config.get("hoster_fallback") and not links:
                            for link in unicode_links:
                                links.append(str(link))
                        storage = self.db.retrieve_all(key)
                        if not links:
                            if 'added' not in storage and 'notdl' not in storage:
                                wrong_hoster = '[' + self._SITE + '/Hoster fehlt] - ' + key
                                if 'wrong_hoster' not in storage:
                                    print(wrong_hoster)
                                    self.db.store(key, 'wrong_hoster')
                                    notify([wrong_hoster], self.configfile)
                                else:
                                    self.log_debug(wrong_hoster)
                        elif 'added' in storage:
                            self.log_debug(
                                "%s - Release ignoriert (bereits gefunden)" %
                                key)
                        else:
                            self.device = myjd_download(
                                self.configfile, self.dbfile, self.device, key,
                                "FeedCrawler", links, "")
                            if self.device:
                                self.db.store(key, 'added')
                                log_entry = '[Englisch] - ' + key + ' - [' + self._SITE + ']'
                                self.log_info(log_entry)
                                notify([log_entry], self.configfile)
                                added_items.append(log_entry)
                    else:
                        self.log_debug(
                            "%s - Releasezeitpunkt weniger als 30 Minuten in der Vergangenheit - wird ignoriert."
                            % key)
        else:
            self.log_debug("Liste ist leer. Stoppe Suche für " + self._SITE +
                           "!")
        return self.device

Example #3

0

Show file

def dw_parse_download(self, release_url, title, language_id):
    if not check_valid_release(title, self.retail_only, self.hevc_retail,
                               self.dbfile):
        self.log_debug(
            title +
            u" - Release ignoriert (Gleiche oder bessere Quelle bereits vorhanden)"
        )
        return False
    if self.filename == 'List_ContentAll_Seasons':
        if not self.config.get("seasonpacks"):
            staffelpack = re.search(r"s\d.*(-|\.).*s\d", title.lower())
            if staffelpack:
                self.log_debug("%s - Release ignoriert (Staffelpaket)" % title)
                return False
        if not re.search(self.seasonssource, title.lower()):
            self.log_debug(title + " - Release hat falsche Quelle")
            return False
    try:
        release_info = get_url(release_url, self.configfile, self.dbfile)
        post_hosters = BeautifulSoup(release_info, 'lxml').find(
            "div", {
                "id": "download"
            }).findAll("img", src=re.compile(r"images/hosterimg"))
        hosters = []
        valid = False
        for hoster in post_hosters:
            hoster = hoster["title"].replace("Premium-Account bei ",
                                             "").replace("ddownload", "ddl")
            if hoster not in hosters:
                hosters.append(hoster)

        for hoster in hosters:
            if hoster:
                if check_hoster(hoster, self.configfile):
                    valid = True
        if not valid and not self.hoster_fallback:
            storage = self.db.retrieve_all(title)
            if 'added' not in storage and 'notdl' not in storage:
                wrong_hoster = '[SJ/Hoster fehlt] - ' + title
                if 'wrong_hoster' not in storage:
                    print(wrong_hoster)
                    self.db.store(title, 'wrong_hoster')
                    notify([wrong_hoster], self.configfile)
                else:
                    self.log_debug(wrong_hoster)
                return False
        else:
            return [title, release_url, language_id, False, False]
    except:
        print(self._INTERNAL_NAME +
              u" hat die Serien-API angepasst. Breche Download-Prüfung ab!")
        return False

Example #4

0

Show file

def j_parse_download(self, series_url, title, language_id):
    if not check_valid_release(title, self.retail_only, self.hevc_retail,
                               self.dbfile):
        self.log_debug(
            title +
            u" - Release ignoriert (Gleiche oder bessere Quelle bereits vorhanden)"
        )
        return False
    if self.filename == 'List_ContentAll_Seasons':
        if not self.config.get("seasonpacks"):
            staffelpack = re.search(r"s\d.*(-|\.).*s\d", title.lower())
            if staffelpack:
                self.log_debug("%s - Release ignoriert (Staffelpaket)" % title)
                return False
        if not re.search(self.seasonssource, title.lower()):
            self.log_debug(title + " - Release hat falsche Quelle")
            return False
    try:
        series_info = get_url(series_url, self.configfile, self.dbfile)
        series_id = re.findall(r'data-mediaid="(.*?)"', series_info)[0]
        api_url = 'https://' + self.url + '/api/media/' + series_id + '/releases'

        response = get_url(api_url, self.configfile, self.dbfile, self.scraper)
        seasons = json.loads(response)
        for season in seasons:
            season = seasons[season]
            for item in season['items']:
                if item['name'] == title:
                    valid = False
                    for hoster in item['hoster']:
                        if hoster:
                            if check_hoster(hoster, self.configfile):
                                valid = True
                    if not valid and not self.hoster_fallback:
                        storage = self.db.retrieve_all(title)
                        if 'added' not in storage and 'notdl' not in storage:
                            wrong_hoster = '[SJ/Hoster fehlt] - ' + title
                            if 'wrong_hoster' not in storage:
                                print(wrong_hoster)
                                self.db.store(title, 'wrong_hoster')
                                notify([wrong_hoster], self.configfile)
                            else:
                                self.log_debug(wrong_hoster)
                            return False
                    else:
                        return [title, series_url, language_id, False, False]
    except:
        print(self._INTERNAL_NAME +
              u" hat die Serien-API angepasst. Breche Download-Prüfung ab!")
        return False

Example #5

0

Show file

def dw_get_download_links(self, content, title):
    unused_get_feed_parameter(title)
    try:
        download_link = False
        hosters = re.findall(r'HOSTERS="(.*)"', content)[0].split("|")
        for hoster in hosters:
            hoster = hoster.lower().replace("ddownload", "ddl")
            if check_hoster(hoster, self.configfile):
                download_link = re.findall(r'DOWNLOADLINK="(.*)"HOSTERS="',
                                           content)[0]
        if self.hoster_fallback and not download_link:
            download_link = re.findall(r'DOWNLOADLINK="(.*)"HOSTERS="',
                                       content)[0]
    except:
        return False
    return [download_link]

Example #6

0

Show file

def hoster_check(configfile, device, decrypted_packages, title, known_packages):
    if not decrypted_packages:
        return [False, False]

    delete_packages = []
    delete_linkids = []
    delete_uuids = []
    keep_linkids = []
    keep_uuids = []

    merge_first = package_merge_check(device, configfile, decrypted_packages, known_packages)
    if merge_first:
        device = merge_first[0]
        decrypted_packages = merge_first[1]

    valid_links = False
    if decrypted_packages:
        i = 0
        for dp in decrypted_packages:
            linkids = dp['linkids']
            for link in linkids:
                delete_linkids.append(link)
            uuid = dp['uuid']
            delete_uuids.append(uuid)
            if uuid not in known_packages:
                delete = True
                links = split_urls(dp['urls'])
                for link in links:
                    if check_hoster(link, configfile):
                        try:
                            keep_linkids.append(linkids[i])
                            valid_links = True
                        except:
                            pass
                        if uuid not in keep_uuids:
                            keep_uuids.append(uuid)
                        delete = False
                    i += 1
                if delete:
                    delete_packages.append(dp)

    if valid_links:
        removed = remove_unfit_links(configfile, device, decrypted_packages, known_packages, keep_linkids, keep_uuids,
                                     delete_linkids,
                                     delete_uuids, delete_packages, title)
        return [removed[0], removed[1]]
    return [device, False]

Example #7

0

Show file

def dw_mirror(self, title):
    hostnames = CrawlerConfig('Hostnames', self.configfile)
    dw = hostnames.get('dw')

    if dw:
        dw_search = 'https://' + dw + '/?search=' + title

        dw_results = get_url(dw_search, self.configfile, self.dbfile,
                             self.scraper)
        dw_results = dw_search_results(dw_results, dw)

        for result in dw_results:
            release_url = result[1].split("|")[0]
            release_info = get_url(release_url, self.configfile, self.dbfile)
            post_hosters = BeautifulSoup(release_info, 'lxml').find(
                "div", {
                    "id": "download"
                }).findAll("img", src=re.compile(r"images/hosterimg"))
            hosters = []
            valid = False
            for hoster in post_hosters:
                hoster = hoster["title"].replace("Premium-Account bei ",
                                                 "").replace(
                                                     "ddownload", "ddl")
                if hoster not in hosters:
                    hosters.append(hoster)

            for hoster in hosters:
                if hoster:
                    if check_hoster(hoster, self.configfile):
                        valid = True
            if not valid and not self.hoster_fallback:
                return False
            else:
                return [release_url]

    return False

Example #8

0

Show file

def download(payload, configfile, dbfile):
    hostnames = CrawlerConfig('Hostnames', configfile)
    sj = hostnames.get('sj')

    payload = decode_base64(payload).split("|")
    href = payload[0]
    title = payload[1]
    special = payload[2].strip().replace("None", "")

    series_url = 'https://' + sj + href
    series_info = get_url(series_url, configfile, dbfile)
    series_id = re.findall(r'data-mediaid="(.*?)"', series_info)[0]

    api_url = 'https://' + sj + '/api/media/' + series_id + '/releases'
    releases = get_url(api_url, configfile, dbfile)

    unsorted_seasons = json.loads(releases)

    listen = ["List_ContentShows_Shows", "List_ContentAll_Seasons"]
    for liste in listen:
        cont = ListDb(dbfile, liste).retrieve()
        list_title = sanitize(title)
        if not cont:
            cont = ""
        if list_title not in cont:
            ListDb(dbfile, liste).store(list_title)

    config = CrawlerConfig('ContentShows', configfile)
    english_ok = CrawlerConfig('FeedCrawler', configfile).get("english")
    quality = config.get('quality')
    ignore = config.get('rejectlist')

    result_seasons = {}
    result_episodes = {}

    seasons = {}
    for season in unsorted_seasons:
        if "sp" in season.lower():
            seasons[season] = unsorted_seasons[season]
    for season in unsorted_seasons:
        if "sp" not in season.lower():
            seasons[season] = unsorted_seasons[season]

    for season in seasons:
        releases = seasons[season]
        for release in releases['items']:
            name = release['name'].encode('ascii',
                                          errors='ignore').decode('utf-8')
            try:
                season = re.findall(r'.*\.(s\d{1,3}).*', name,
                                    re.IGNORECASE)[0]
            except:
                pass
            hosters = release['hoster']
            try:
                valid = bool(release['resolution'] == quality)
            except:
                valid = re.match(re.compile(r'.*' + quality + r'.*'), name)
            if valid and special:
                valid = bool("." + special.lower() + "." in name.lower())
            if valid and not english_ok:
                valid = bool(".german." in name.lower())
            if valid:
                valid = False
                for hoster in hosters:
                    if hoster and check_hoster(
                            hoster,
                            configfile) or config.get("hoster_fallback"):
                        valid = True
            if valid:
                try:
                    ep = release['episode']
                    if ep:
                        existing = result_episodes.get(season)
                        if existing:
                            valid = False
                            for e in existing:
                                if e == ep:
                                    if rate(name, ignore) > rate(
                                            existing[e], ignore):
                                        valid = True
                                else:
                                    valid = True
                            if valid:
                                existing.update({ep: name})
                        else:
                            existing = {ep: name}
                        result_episodes.update({season: existing})
                        continue
                except:
                    pass

                existing = result_seasons.get(season)
                dont = False
                if existing:
                    if rate(name, ignore) < rate(existing, ignore):
                        dont = True
                if not dont:
                    result_seasons.update({season: name})

        try:
            if result_seasons[season] and result_episodes[season]:
                del result_episodes[season]
        except:
            pass

        success = False
        try:
            if result_seasons[season]:
                success = True
        except:
            try:
                if result_episodes[season]:
                    success = True
            except:
                pass

        if success:
            logger.debug(u"Websuche erfolgreich für " + title + " - " + season)
        else:
            for release in releases['items']:
                name = release['name'].encode('ascii',
                                              errors='ignore').decode('utf-8')
                hosters = release['hoster']
                valid = True
                if valid and special:
                    valid = bool("." + special.lower() + "." in name.lower())
                if valid and not english_ok:
                    valid = bool(".german." in name.lower())
                if valid:
                    valid = False
                    for hoster in hosters:
                        if hoster and check_hoster(
                                hoster,
                                configfile) or config.get("hoster_fallback"):
                            valid = True
                if valid:
                    try:
                        ep = release['episode']
                        if ep:
                            existing = result_episodes.get(season)
                            if existing:
                                for e in existing:
                                    if e == ep:
                                        if rate(name, ignore) > rate(
                                                existing[e], ignore):
                                            existing.update({ep: name})
                            else:
                                existing = {ep: name}
                            result_episodes.update({season: existing})
                            continue
                    except:
                        pass

                    existing = result_seasons.get(season)
                    dont = False
                    if existing:
                        if rate(name, ignore) < rate(existing, ignore):
                            dont = True
                    if not dont:
                        result_seasons.update({season: name})

            try:
                if result_seasons[season] and result_episodes[season]:
                    del result_episodes[season]
            except:
                pass
            logger.debug(u"Websuche erfolgreich für " + title + " - " + season)

    matches = []

    for season in result_seasons:
        matches.append(result_seasons[season])
    for season in result_episodes:
        for episode in result_episodes[season]:
            matches.append(result_episodes[season][episode])

    notify_array = []
    for title in matches:
        db = FeedDb(dbfile, 'FeedCrawler')
        if add_decrypt(title, series_url, sj, dbfile):
            db.store(title, 'added')
            log_entry = u'[Suche/Serie] - ' + title + ' - [SJ]'
            logger.info(log_entry)
            notify_array.append(log_entry)

    notify(notify_array, configfile)

    if not matches:
        return False
    return matches

Example #9

0

Show file

File: content_all.py Project: Crabtronic/FeedCrawler

def download(payload, device, configfile, dbfile):
    config = CrawlerConfig('ContentAll', configfile)
    db = FeedDb(dbfile, 'FeedCrawler')
    hostnames = CrawlerConfig('Hostnames', configfile)
    by = hostnames.get('by')
    nk = hostnames.get('nk')

    payload = decode_base64(payload).split("|")
    link = payload[0]
    password = payload[1]

    site = check_is_site(link, configfile)
    if not site:
        return False
    elif "DW" in site:
        download_method = add_decrypt_instead_of_download
        download_links = [link]
        key = payload[1]
        password = payload[2]
    else:
        url = get_url(link, configfile, dbfile)
        if not url or "NinjaFirewall 429" in url:
            return False
        download_method = myjd_download
        soup = BeautifulSoup(url, 'lxml')

        if "BY" in site:
            key = soup.find("small").text
            links = soup.find_all("iframe")
            async_link_results = []
            for link in links:
                link = link["src"]
                if 'https://' + by in link:
                    async_link_results.append(link)
            async_link_results = get_urls_async(async_link_results, configfile,
                                                dbfile)
            links = async_link_results[0]
            url_hosters = []
            for link in links:
                if link:
                    link = BeautifulSoup(link, 'lxml').find(
                        "a", href=re.compile("/go\.php\?"))
                    if link:
                        url_hosters.append(
                            [link["href"],
                             link.text.replace(" ", "")])
        elif "NK" in site:
            key = soup.find("span", {"class": "subtitle"}).text
            url_hosters = []
            hosters = soup.find_all("a", href=re.compile("/go/"))
            for hoster in hosters:
                url_hosters.append(
                    ['https://' + nk + hoster["href"], hoster.text])
        elif "FX" in site:
            key = payload[1]
            password = payload[2]
        else:
            return False

        links = {}
        if "FX" in site:

            class FX:
                configfile = ""

            FX.configfile = configfile
            download_links = fx_get_download_links(FX, url, key)
        else:
            for url_hoster in reversed(url_hosters):
                try:
                    link_hoster = url_hoster[1].lower().replace(
                        'target="_blank">',
                        '').replace(" ", "-").replace("ddownload", "ddl")
                    if check_hoster(link_hoster, configfile):
                        link = url_hoster[0]
                        if by in link:
                            demasked_link = get_redirected_url(
                                link, configfile, dbfile, False)
                            if demasked_link:
                                link = demasked_link
                        links[link_hoster] = link
                except:
                    pass
            if config.get("hoster_fallback") and not links:
                for url_hoster in reversed(url_hosters):
                    link_hoster = url_hoster[1].lower().replace(
                        'target="_blank">',
                        '').replace(" ", "-").replace("ddownload", "ddl")
                    link = url_hoster[0]
                    if by in link:
                        demasked_link = get_redirected_url(
                            link, configfile, dbfile, False)
                        if demasked_link:
                            link = demasked_link
                    links[link_hoster] = link
            download_links = list(links.values())

    englisch = False
    if "*englisch" in key.lower() or "*english" in key.lower():
        key = key.replace('*ENGLISCH', '').replace("*Englisch", "").replace(
            "*ENGLISH", "").replace("*English", "").replace("*", "")
        englisch = True

    staffel = re.search(r"s\d{1,2}(-s\d{1,2}|-\d{1,2}|\.)", key.lower())

    if download_links:
        if staffel:
            if download_method(configfile, dbfile, device, key, "FeedCrawler",
                               download_links, password):
                db.store(
                    key.replace(".COMPLETE", "").replace(".Complete", ""),
                    'notdl' if config.get('enforcedl')
                    and '.dl.' not in key.lower() else 'added')
                log_entry = '[Suche/Staffel] - ' + key.replace(
                    ".COMPLETE", "").replace(".Complete",
                                             "") + ' - [' + site + ']'
                logger.info(log_entry)
                notify([log_entry], configfile)
                return True
        else:
            retail = False
            if config.get('cutoff') and '.COMPLETE.' not in key.lower():
                if is_retail(key, dbfile):
                    retail = True
            if download_method(configfile, dbfile, device, key, "FeedCrawler",
                               download_links, password):
                db.store(
                    key, 'notdl' if config.get('enforcedl')
                    and '.dl.' not in key.lower() else 'added')
                log_entry = '[Suche/Film' + (
                    '/Englisch' if englisch and not retail else
                    '') + ('/Englisch/Retail' if englisch and retail else
                           '') + ('/Retail' if not englisch and retail else
                                  '') + '] - ' + key + ' - [' + site + ']'
                logger.info(log_entry)
                notify([log_entry], configfile)
                return [key]
    else:
        return False

Example #10

0

Show file

def sf_parse_download(self, series_url, title, language_id):
    if not check_valid_release(title, self.retail_only, self.hevc_retail,
                               self.dbfile):
        self.log_debug(
            title +
            u" - Release ignoriert (Gleiche oder bessere Quelle bereits vorhanden)"
        )
        return False
    if self.filename == 'List_ContentAll_Seasons':
        if not self.config.get("seasonpacks"):
            staffelpack = re.search(r"s\d.*(-|\.).*s\d", title.lower())
            if staffelpack:
                self.log_debug("%s - Release ignoriert (Staffelpaket)" % title)
                return False
        if not re.search(self.seasonssource, title.lower()):
            self.log_debug(title + " - Release hat falsche Quelle")
            return False
    try:
        if language_id == 2:
            lang = 'EN'
        else:
            lang = 'DE'
        epoch = str(datetime.datetime.now().timestamp()).replace('.', '')[:-3]
        api_url = series_url + '?lang=' + lang + '&_=' + epoch
        response = get_url(api_url, self.configfile, self.dbfile, self.scraper)
        info = json.loads(response)

        is_episode = re.findall(r'.*\.(s\d{1,3}e\d{1,3})\..*', title,
                                re.IGNORECASE)
        if is_episode:
            episode_string = re.findall(r'.*S\d{1,3}(E\d{1,3}).*',
                                        is_episode[0])[0].lower()
            season_string = re.findall(r'.*(S\d{1,3})E\d{1,3}.*',
                                       is_episode[0])[0].lower()
            season_title = rreplace(title.lower().replace(episode_string,
                                                          ''), "-", ".*",
                                    1).lower().replace(".repack", "")
            season_title = season_title.replace(".untouched", ".*").replace(
                ".dd+51", ".dd.51")
            episode = str(int(episode_string.replace("e", "")))
            season = str(int(season_string.replace("s", "")))
            episode_name = re.findall(r'.*\.s\d{1,3}(\..*).german',
                                      season_title, re.IGNORECASE)
            if episode_name:
                season_title = season_title.replace(episode_name[0], '')
            codec_tags = [".h264", ".x264"]
            for tag in codec_tags:
                season_title = season_title.replace(tag, ".*264")
            web_tags = [".web-rip", ".webrip", ".webdl", ".web-dl"]
            for tag in web_tags:
                season_title = season_title.replace(tag, ".web.*")
        else:
            season = False
            episode = False
            season_title = title
            multiple_episodes = re.findall(r'(e\d{1,3}-e*\d{1,3}\.)',
                                           season_title, re.IGNORECASE)
            if multiple_episodes:
                season_title = season_title.replace(multiple_episodes[0], '.*')

        content = BeautifulSoup(info['html'], 'lxml')
        releases = content.find(
            "small", text=re.compile(season_title,
                                     re.IGNORECASE)).parent.parent.parent
        links = releases.findAll("div", {'class': 'row'})[1].findAll('a')
        download_link = False
        for link in links:
            if check_hoster(link.text.replace('\n', ''), self.configfile):
                download_link = get_redirected_url(
                    "https://" + self.url + link['href'], self.configfile,
                    self.dbfile, self.scraper)
                break
        if not download_link and not self.hoster_fallback:
            storage = self.db.retrieve_all(title)
            if 'added' not in storage and 'notdl' not in storage:
                wrong_hoster = '[SF/Hoster fehlt] - ' + title
                if 'wrong_hoster' not in storage:
                    print(wrong_hoster)
                    self.db.store(title, 'wrong_hoster')
                    notify([wrong_hoster], self.configfile)
                else:
                    self.log_debug(wrong_hoster)
                return False
        else:
            return [title, download_link, language_id, season, episode]
    except:
        print(u"SF hat die Serien-API angepasst. Breche Download-Prüfung ab!")
        return False