Python check_is_site 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: feedcrawler.common

메소드/함수: check_is_site

hotexamples.com에서의 예제들: 5

Python check_is_site - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 feedcrawler.common.check_is_site에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: search.py 프로젝트: Crabtronic/FeedCrawler

def get(title, configfile, dbfile, bl_only=False, sj_only=False):
    hostnames = CrawlerConfig('Hostnames', configfile)
    by = hostnames.get('by')
    dw = hostnames.get('dw')
    fx = hostnames.get('fx')
    nk = hostnames.get('nk')
    sj = hostnames.get('sj')

    specific_season = re.match(r'^(.*),(s\d{1,3})$', title.lower())
    specific_episode = re.match(r'^(.*),(s\d{1,3}e\d{1,3})$', title.lower())
    if specific_season:
        split = title.split(",")
        title = split[0]
        special = split[1].upper()
    elif specific_episode:
        split = title.split(",")
        title = split[0]
        special = split[1].upper()
    else:
        special = None

    bl_final = {}
    sj_final = {}
    scraper = cloudscraper.create_scraper()

    if not sj_only:
        mb_query = sanitize(title).replace(" ", "+")
        if special:
            bl_query = mb_query + "+" + special
        else:
            bl_query = mb_query

        unrated = []

        config = CrawlerConfig('ContentAll', configfile)
        quality = config.get('quality')
        ignore = config.get('ignore')

        if "480p" not in quality:
            search_quality = "+" + quality
        else:
            search_quality = ""

        if by:
            by_search = 'https://' + by + '/?q=' + bl_query + search_quality
        else:
            by_search = None
        if dw:
            dw_search = 'https://' + dw + '/?kategorie=Movies&search=' + bl_query + search_quality
        else:
            dw_search = None
        if fx:
            fx_search = 'https://' + fx + '/?s=' + bl_query
        else:
            fx_search = None

        async_results = get_urls_async([by_search, dw_search, fx_search],
                                       configfile, dbfile, scraper)
        scraper = async_results[1]
        async_results = async_results[0]

        by_results = []
        dw_results = []
        fx_results = []

        for res in async_results:
            if check_is_site(res, configfile) == 'BY':
                by_results = by_search_results(res, by)
            elif check_is_site(res, configfile) == 'DW':
                dw_results = dw_search_results(res, dw)
            elif check_is_site(res, configfile) == 'FX':
                fx_results = fx_search_results(fx_content_to_soup(res),
                                               configfile, dbfile, scraper)

        if nk:
            nk_search = post_url(
                'https://' + nk + "/search",
                configfile,
                dbfile,
                data={'search': bl_query.replace("+", " ") + " " + quality})
            nk_results = nk_search_results(nk_search, 'https://' + nk + '/')
        else:
            nk_results = []

        password = by
        for result in by_results:
            if "480p" in quality:
                if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[
                    0].lower() or "2160p" in \
                        result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[
                    0].lower() or "complete.uhd.bluray" in result[0].lower():
                    continue
            if "xxx" not in result[0].lower():
                unrated.append([
                    rate(result[0], ignore),
                    encode_base64(result[1] + "|" + password),
                    result[0] + " (BY)"
                ])

        password = dw
        for result in dw_results:
            if "480p" in quality:
                if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[
                    0].lower() or "2160p" in \
                        result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[
                    0].lower() or "complete.uhd.bluray" in result[0].lower():
                    continue
            unrated.append([
                rate(result[0], ignore),
                encode_base64(result[1] + "|" + password), result[0] + " (DW)"
            ])

        password = fx.split('.')[0]
        for result in fx_results:
            if "480p" in quality:
                if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[
                    0].lower() or "2160p" in \
                        result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[
                    0].lower() or "complete.uhd.bluray" in result[0].lower():
                    continue
            if "-low" not in result[0].lower():
                unrated.append([
                    rate(result[0], ignore),
                    encode_base64(result[1] + "|" + password),
                    result[0] + " (FX)"
                ])

        password = nk.split('.')[0].capitalize()
        for result in nk_results:
            if "480p" in quality:
                if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[
                    0].lower() or "2160p" in \
                        result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[
                    0].lower() or "complete.uhd.bluray" in result[0].lower():
                    continue
            unrated.append([
                rate(result[0], ignore),
                encode_base64(result[1] + "|" + password), result[0] + " (NK)"
            ])

        rated = sorted(unrated, reverse=True)

        results = {}
        i = 0

        for result in rated:
            res = {"payload": result[1], "title": result[2]}
            results["result" + str(i + 1000)] = res
            i += 1
        bl_final = results

    if not bl_only:
        if sj:
            sj_query = sanitize(title).replace(" ", "+")
            sj_search = get_url(
                'https://' + sj + '/serie/search?q=' + sj_query, configfile,
                dbfile, scraper)
            try:
                sj_results = BeautifulSoup(sj_search, 'lxml').findAll(
                    "a", href=re.compile("/serie"))
            except:
                sj_results = []
        else:
            sj_results = []

        if special:
            append = " (" + special + ")"
        else:
            append = ""
        i = 0
        results = {}
        for result in sj_results:
            r_title = result.text
            r_rating = fuzz.ratio(title.lower(), r_title)
            if r_rating > 40:
                res = {
                    "payload":
                    encode_base64(result['href'] + "|" + r_title + "|" +
                                  str(special)),
                    "title":
                    r_title + append
                }
                results["result" + str(i + 1000)] = res
                i += 1
        sj_final = results

    return bl_final, sj_final

예제 #2

파일 보기

파일: content_all.py 프로젝트: Crabtronic/FeedCrawler

def download(payload, device, configfile, dbfile):
    config = CrawlerConfig('ContentAll', configfile)
    db = FeedDb(dbfile, 'FeedCrawler')
    hostnames = CrawlerConfig('Hostnames', configfile)
    by = hostnames.get('by')
    nk = hostnames.get('nk')

    payload = decode_base64(payload).split("|")
    link = payload[0]
    password = payload[1]

    site = check_is_site(link, configfile)
    if not site:
        return False
    elif "DW" in site:
        download_method = add_decrypt_instead_of_download
        download_links = [link]
        key = payload[1]
        password = payload[2]
    else:
        url = get_url(link, configfile, dbfile)
        if not url or "NinjaFirewall 429" in url:
            return False
        download_method = myjd_download
        soup = BeautifulSoup(url, 'lxml')

        if "BY" in site:
            key = soup.find("small").text
            links = soup.find_all("iframe")
            async_link_results = []
            for link in links:
                link = link["src"]
                if 'https://' + by in link:
                    async_link_results.append(link)
            async_link_results = get_urls_async(async_link_results, configfile,
                                                dbfile)
            links = async_link_results[0]
            url_hosters = []
            for link in links:
                if link:
                    link = BeautifulSoup(link, 'lxml').find(
                        "a", href=re.compile("/go\.php\?"))
                    if link:
                        url_hosters.append(
                            [link["href"],
                             link.text.replace(" ", "")])
        elif "NK" in site:
            key = soup.find("span", {"class": "subtitle"}).text
            url_hosters = []
            hosters = soup.find_all("a", href=re.compile("/go/"))
            for hoster in hosters:
                url_hosters.append(
                    ['https://' + nk + hoster["href"], hoster.text])
        elif "FX" in site:
            key = payload[1]
            password = payload[2]
        else:
            return False

        links = {}
        if "FX" in site:

            class FX:
                configfile = ""

            FX.configfile = configfile
            download_links = fx_get_download_links(FX, url, key)
        else:
            for url_hoster in reversed(url_hosters):
                try:
                    link_hoster = url_hoster[1].lower().replace(
                        'target="_blank">',
                        '').replace(" ", "-").replace("ddownload", "ddl")
                    if check_hoster(link_hoster, configfile):
                        link = url_hoster[0]
                        if by in link:
                            demasked_link = get_redirected_url(
                                link, configfile, dbfile, False)
                            if demasked_link:
                                link = demasked_link
                        links[link_hoster] = link
                except:
                    pass
            if config.get("hoster_fallback") and not links:
                for url_hoster in reversed(url_hosters):
                    link_hoster = url_hoster[1].lower().replace(
                        'target="_blank">',
                        '').replace(" ", "-").replace("ddownload", "ddl")
                    link = url_hoster[0]
                    if by in link:
                        demasked_link = get_redirected_url(
                            link, configfile, dbfile, False)
                        if demasked_link:
                            link = demasked_link
                    links[link_hoster] = link
            download_links = list(links.values())

    englisch = False
    if "*englisch" in key.lower() or "*english" in key.lower():
        key = key.replace('*ENGLISCH', '').replace("*Englisch", "").replace(
            "*ENGLISH", "").replace("*English", "").replace("*", "")
        englisch = True

    staffel = re.search(r"s\d{1,2}(-s\d{1,2}|-\d{1,2}|\.)", key.lower())

    if download_links:
        if staffel:
            if download_method(configfile, dbfile, device, key, "FeedCrawler",
                               download_links, password):
                db.store(
                    key.replace(".COMPLETE", "").replace(".Complete", ""),
                    'notdl' if config.get('enforcedl')
                    and '.dl.' not in key.lower() else 'added')
                log_entry = '[Suche/Staffel] - ' + key.replace(
                    ".COMPLETE", "").replace(".Complete",
                                             "") + ' - [' + site + ']'
                logger.info(log_entry)
                notify([log_entry], configfile)
                return True
        else:
            retail = False
            if config.get('cutoff') and '.COMPLETE.' not in key.lower():
                if is_retail(key, dbfile):
                    retail = True
            if download_method(configfile, dbfile, device, key, "FeedCrawler",
                               download_links, password):
                db.store(
                    key, 'notdl' if config.get('enforcedl')
                    and '.dl.' not in key.lower() else 'added')
                log_entry = '[Suche/Film' + (
                    '/Englisch' if englisch and not retail else
                    '') + ('/Englisch/Retail' if englisch and retail else
                           '') + ('/Retail' if not englisch and retail else
                                  '') + '] - ' + key + ' - [' + site + ']'
                logger.info(log_entry)
                notify([log_entry], configfile)
                return [key]
    else:
        return False

예제 #3

파일 보기

파일: url.py 프로젝트: Crabtronic/FeedCrawler

def get_redirected_url(url, configfile, dbfile, scraper=False):
    config = CrawlerConfig('FeedCrawler', configfile)
    proxy = config.get('proxy')
    if not scraper:
        scraper = cloudscraper.create_scraper()

    db = FeedDb(dbfile, 'proxystatus')
    db_normal = FeedDb(dbfile, 'normalstatus')
    site = check_is_site(url, configfile)

    if proxy:
        try:
            if site and "SJ" in site:
                if db.retrieve("SJ"):
                    if config.get("fallback") and not db_normal.retrieve("SJ"):
                        return scraper.get(
                            url, allow_redirects=False,
                            timeout=30).headers._store["location"][1]
                    else:
                        return url
            elif site and "DJ" in site:
                if db.retrieve("DJ"):
                    if config.get("fallback") and not db_normal.retrieve("DJ"):
                        return scraper.get(
                            url, allow_redirects=False,
                            timeout=30).headers._store["location"][1]
                    else:
                        return url
            elif site and "SF" in site:
                if db.retrieve("SF"):
                    if config.get("fallback") and not db_normal.retrieve("SF"):
                        return scraper.get(
                            url, allow_redirects=False,
                            timeout=30).headers._store["location"][1]
                    else:
                        return url
            elif site and "BY" in site:
                if db.retrieve("BY"):
                    if config.get("fallback") and not db_normal.retrieve("BY"):
                        return scraper.get(
                            url, allow_redirects=False,
                            timeout=30).headers._store["location"][1]
                    else:
                        return url
            elif site and "DW" in site:
                if db.retrieve("DW"):
                    if config.get("fallback") and not db_normal.retrieve("DW"):
                        return scraper.get(
                            url, allow_redirects=False,
                            timeout=30).headers._store["location"][1]
                    else:
                        return url
            elif site and "FX" in site:
                if db.retrieve("FX"):
                    if config.get("fallback") and not db_normal.retrieve("FX"):
                        return scraper.get(
                            url, allow_redirects=False,
                            timeout=30).headers._store["location"][1]
                    else:
                        return url
            elif site and "NK" in site:
                if db.retrieve("NK"):
                    if config.get("fallback") and not db_normal.retrieve("NK"):
                        return scraper.get(
                            url, allow_redirects=False,
                            timeout=30).headers._store["location"][1]
                    else:
                        return url
            elif site and "WW" in site:
                return url
            elif site and "DD" in site:
                if db.retrieve("DD"):
                    if config.get("fallback") and not db_normal.retrieve("DD"):
                        return scraper.get(
                            url, allow_redirects=False,
                            timeout=30).headers._store["location"][1]
                    else:
                        return url
            proxies = {'http': proxy, 'https': proxy}
            response = scraper.get(url,
                                   allow_redirects=False,
                                   proxies=proxies,
                                   timeout=30).headers._store["location"][1]
            return response
        except Exception as e:
            print(u"Fehler beim Abruf von: " + url + " " + str(e))
            return url
    else:
        try:
            if site and "SJ" in site and db_normal.retrieve("SJ"):
                return url
            elif site and "DJ" in site and db_normal.retrieve("DJ"):
                return url
            elif site and "SF" in site and db_normal.retrieve("SF"):
                return url
            elif site and "BY" in site and db_normal.retrieve("BY"):
                return url
            elif site and "DW" in site and db_normal.retrieve("DW"):
                return url
            elif site and "FX" in site and db_normal.retrieve("FX"):
                return url
            elif site and "NK" in site and db_normal.retrieve("NK"):
                return url
            elif site and "WW" in site:
                return url
            elif site and "DD" in site and db_normal.retrieve("DD"):
                return url
            response = scraper.get(url, allow_redirects=False,
                                   timeout=30).headers._store["location"][1]
            return response
        except Exception as e:
            print(u"Fehler beim Abruf von: " + url + " " + str(e))
            return url

예제 #4

파일 보기

파일: url.py 프로젝트: Crabtronic/FeedCrawler

def post_url_headers(url, configfile, dbfile, headers, data, scraper=False):
    config = CrawlerConfig('FeedCrawler', configfile)
    proxy = config.get('proxy')
    if not scraper:
        scraper = cloudscraper.create_scraper()

    db = FeedDb(dbfile, 'proxystatus')
    db_normal = FeedDb(dbfile, 'normalstatus')
    site = check_is_site(url, configfile)

    if proxy:
        try:
            if site and "SJ" in site:
                if db.retrieve("SJ"):
                    if config.get("fallback") and not db_normal.retrieve("SJ"):
                        return [
                            scraper.post(url,
                                         data,
                                         headers=headers,
                                         timeout=30), scraper
                        ]
                    else:
                        return ["", scraper]
            elif site and "DJ" in site:
                if db.retrieve("DJ"):
                    if config.get("fallback") and not db_normal.retrieve("DJ"):
                        return [
                            scraper.post(url,
                                         data,
                                         headers=headers,
                                         timeout=30), scraper
                        ]
                    else:
                        return ["", scraper]
            elif site and "SF" in site:
                if db.retrieve("SF"):
                    if config.get("fallback") and not db_normal.retrieve("SF"):
                        return [
                            scraper.post(url,
                                         data,
                                         headers=headers,
                                         timeout=30), scraper
                        ]
                    else:
                        return ["", scraper]
            elif site and "BY" in site:
                if db.retrieve("BY"):
                    if config.get("fallback") and not db_normal.retrieve("BY"):
                        return [
                            scraper.post(url,
                                         data,
                                         headers=headers,
                                         timeout=30), scraper
                        ]
                    else:
                        return ["", scraper]
            elif site and "DW" in site:
                if db.retrieve("DW"):
                    if config.get("fallback") and not db_normal.retrieve("DW"):
                        return [
                            scraper.post(url,
                                         data,
                                         headers=headers,
                                         timeout=30), scraper
                        ]
                    else:
                        return ["", scraper]
            elif site and "FX" in site:
                if db.retrieve("FX"):
                    if config.get("fallback") and not db_normal.retrieve("FX"):
                        return [
                            scraper.post(url,
                                         data,
                                         headers=headers,
                                         timeout=30), scraper
                        ]
                    else:
                        return ["", scraper]
            elif site and "NK" in site:
                if db.retrieve("NK"):
                    if config.get("fallback") and not db_normal.retrieve("NK"):
                        return [
                            scraper.post(url,
                                         data,
                                         headers=headers,
                                         timeout=30), scraper
                        ]
                    else:
                        return ["", scraper]
            elif site and "WW" in site:
                if db.retrieve("WW"):
                    if config.get("fallback") and not db_normal.retrieve("WW"):
                        return [
                            scraper.post(url,
                                         data,
                                         headers=headers,
                                         timeout=30), scraper
                        ]
                    else:
                        return ["", scraper]
            elif site and "DD" in site:
                if db.retrieve("DD"):
                    if config.get("fallback") and not db_normal.retrieve("DD"):
                        return [
                            scraper.post(url,
                                         data,
                                         headers=headers,
                                         timeout=30), scraper
                        ]
                    else:
                        return ["", scraper]
            proxies = {'http': proxy, 'https': proxy}
            response = scraper.post(url,
                                    data,
                                    headers=headers,
                                    proxies=proxies,
                                    timeout=30)
            return [response, scraper]
        except Exception as e:
            print(u"Fehler beim Abruf von: " + url + " " + str(e))
            return ["", scraper]
    else:
        try:
            if site and "SJ" in site and db_normal.retrieve("SJ"):
                return ["", scraper]
            elif site and "DJ" in site and db_normal.retrieve("DJ"):
                return ["", scraper]
            elif site and "SF" in site and db_normal.retrieve("SF"):
                return ["", scraper]
            elif site and "BY" in site and db_normal.retrieve("BY"):
                return ["", scraper]
            elif site and "DW" in site and db_normal.retrieve("DW"):
                return ["", scraper]
            elif site and "FX" in site and db_normal.retrieve("FX"):
                return ["", scraper]
            elif site and "NK" in site and db_normal.retrieve("NK"):
                return ["", scraper]
            elif site and "WW" in site and db_normal.retrieve("WW"):
                return ["", scraper]
            elif site and "DD" in site and db_normal.retrieve("DD"):
                return ["", scraper]
            response = scraper.post(url, data, headers=headers, timeout=30)
            return [response, scraper]
        except Exception as e:
            print(u"Fehler beim Abruf von: " + url + " " + str(e))
            return ["", scraper]

예제 #5

파일 보기

def get_search_results(self, bl_query):
    hostnames = CrawlerConfig('Hostnames', self.configfile)
    by = hostnames.get('by')
    dw = hostnames.get('dw')
    fx = hostnames.get('fx')
    nk = hostnames.get('nk')

    search_results = []

    config = CrawlerConfig('ContentAll', self.configfile)
    quality = config.get('quality')

    if "480p" not in quality:
        search_quality = "+" + quality
    else:
        search_quality = ""

    if by:
        by_search = 'https://' + by + '/?q=' + bl_query + search_quality
    else:
        by_search = None
    if dw:
        dw_search = 'https://' + dw + '/?kategorie=Movies&search=' + bl_query + search_quality
    else:
        dw_search = None
    if fx:
        fx_search = 'https://' + fx + '/?s=' + bl_query
    else:
        fx_search = None

    async_results = get_urls_async([by_search, dw_search, fx_search],
                                   self.configfile, self.dbfile, self.scraper)
    scraper = async_results[1]
    async_results = async_results[0]

    by_results = []
    dw_results = []
    fx_results = []

    for res in async_results:
        if check_is_site(res, self.configfile) == 'BY':
            by_results = by_search_results(res, by)
        elif check_is_site(res, self.configfile) == 'DW':
            dw_results = dw_search_results(res, dw)
        elif check_is_site(res, self.configfile) == 'FX':
            fx_results = fx_search_results(fx_content_to_soup(res),
                                           self.configfile, self.dbfile,
                                           scraper)

    if nk:
        nk_search = post_url(
            'https://' + nk + "/search",
            self.configfile,
            self.dbfile,
            data={'search': bl_query.replace("+", " ") + " " + quality})
        nk_results = nk_search_results(nk_search, 'https://' + nk + '/')
    else:
        nk_results = []

    password = by
    for result in by_results:
        if "480p" in quality:
            if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[
                0].lower() or "2160p" in \
                    result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[
                0].lower() or "complete.uhd.bluray" in result[0].lower():
                continue
        if "xxx" not in result[0].lower():
            search_results.append([result[0], result[1] + "|" + password])

    password = dw
    for result in dw_results:
        if "480p" in quality:
            if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[
                0].lower() or "2160p" in \
                    result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[
                0].lower() or "complete.uhd.bluray" in result[0].lower():
                continue
        search_results.append([result[0], result[1] + "|" + password])

    for result in fx_results:
        if "480p" in quality:
            if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[
                0].lower() or "2160p" in \
                    result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[
                0].lower() or "complete.uhd.bluray" in result[0].lower():
                continue
        if "-low" not in result[0].lower():
            search_results.append([result[0], result[1]])

        password = nk.split('.')[0].capitalize()
    for result in nk_results:
        if "480p" in quality:
            if "720p" in result[0].lower() or "1080p" in result[0].lower() or "1080i" in result[
                0].lower() or "2160p" in \
                    result[0].lower() or "complete.bluray" in result[0].lower() or "complete.mbluray" in result[
                0].lower() or "complete.uhd.bluray" in result[0].lower():
                continue
        search_results.append([result[0], result[1] + "|" + password])

    return search_results