Esempio n. 1
0
def run(sUrl, sCookieSource=None, nVideoLimit=None, bDebug=False):
    print(f"Running {sExtractor} extractor for {sUrl}\r\n")

    if sCookieSource is not None:
        dl_common.parseCookieFile(sCookieSource)

    if dl_common.dCookiesParsed is None:
        print("WARNING :: No cookies were provided! Private videos/playlists will fail to download!\r\n")

    # Attempt initial connection
    html = dl_common.session.get(sUrl, headers=dl_common.dHeaders, cookies=dl_common.dCookiesParsed)
    print(f"Initial connection status: {html.status_code}")
    if html.status_code == 403:
        raise ConnectionError(f"403 Forbidden! Please check if cookies are required! Private videos/playlists cannot be accessed without cookies!")
    elif html.status_code != 200:
        raise ConnectionError(f"Initial connection failed : Status {html.status_code}")
    print()

    if bDebug:
        # Save HTML content to a text file for debug
        text_file = open("html_content.txt", "w", encoding='utf-8')
        text_file.write(html.text)
        text_file.close()

    page = Page_Xvideos(sUrl)
    nPageStatus = page.content.status_code
    if nPageStatus != 200:
        if nPageStatus == 403:
            raise ConnectionError(f"403 Forbidden! Please check if cookies are required! Private videos/playlists cannot be accessed without cookies!")

    dYdlOptions = dict(dl_common.dYdlOptions)
    dYdlOptions['download_archive'] = rf".\\sites\\{sExtractor}\\{dYdlOptions['download_archive'].format(sExtractor)}"

    print()
    for nIdx, sVideoUrl in enumerate(page.videos):
        if page.sUrlType == 'playlist':
            print(f"Processing playlist video {nIdx + 1} of {len(page.videos)} :: {sVideoUrl}")
            print()

        dYdlOptions['outtmpl'] = rf'.\\sites\\{sExtractor}\\%(title).125s.%(ext)s'

        with youtube_dl.YoutubeDL(dYdlOptions) as ydl:
            ydl.download([sVideoUrl])

        if nVideoLimit is not None and (nIdx + 1) >= nVideoLimit:
            print(f"Hit the specified maximum limit of {nVideoLimit}. Stopping...")
            break
        print()
Esempio n. 2
0
def run(sUrl, sCookieSource=None, nVideoLimit=None, bDebug=False):
    print(f"Running {sExtractor} extractor for {sUrl}\r\n")

    if sCookieSource is not None:
        dl_common.parseCookieFile(sCookieSource)

    if dl_common.dCookiesParsed is None:
        print(
            "WARNING :: No cookies were provided! Private videos/playlists will fail to download!\r\n"
        )

    if 'porntrex.com/video' in sUrl:
        sUrlType = 'video'
    elif 'porntrex.com/my' in sUrl:
        sUrlType = 'playlist'
    elif 'porntrex.com/search' in sUrl:
        sUrlType = 'playlist'  # Search results can be treated as a playlist
    else:
        raise ValueError(
            f"Unable to determine {sExtractor} URL type for {sUrl}! Please submit a bug report!"
        )

    # Attempt initial connection
    html = dl_common.session.get(sUrl,
                                 headers=dl_common.dHeaders,
                                 cookies=dl_common.dCookiesParsed)
    print(f"Initial connection status: {html.status_code}")
    if html.status_code == 403:
        raise ConnectionError(
            f"403 Forbidden! Please check if cookies are required! Private videos/playlists cannot be accessed without cookies!"
        )
    elif html.status_code != 200:
        raise ConnectionError(
            f"Initial connection failed : Status {html.status_code}")
    print()

    if bDebug:
        # Save HTML content to a text file for debug
        text_file = open("html_content.txt", "w", encoding='utf-8')
        text_file.write(html.text)
        text_file.close()

    lUrlVideos = []
    if sUrlType == 'playlist':
        print("Playlist detected. Getting videos...")
        sUrlBaseFormat = urlBaseFormatGet(sUrl)
        nPage = 0

        while True:
            nPage += 1
            print(f"Attempting page {nPage:02}")
            if 'search' in sUrl:
                if nPage == 1:
                    sUrlPage = sUrlBaseFormat.format('')
                else:
                    sUrlPage = sUrlBaseFormat.format(f'{nPage}/')
            else:
                sUrlPage = sUrlBaseFormat.format(f'{nPage:02}')
            page = dl_common.Page(sUrlPage)
            nPageStatus = page.content.status_code
            if nPageStatus != 200:
                if nPageStatus == 403:
                    raise ConnectionError(
                        f"403 Forbidden! Please check if cookies are required! Private videos/playlists cannot be accessed without cookies!"
                    )
                elif nPageStatus == 404:
                    print(f"Page {nPage} returned 404!")
                    print(
                        f"Assuming page {nPage - 1} was the last page of the playlist"
                    )
                    break
            page._extract_video_urls()
            if page.videos:
                lUrlVideos += page.videos
            else:
                break

        # Remove non-video URLs that may have been picked up
        lTemp = []
        for sUrl in lUrlVideos:
            if sUrl == 'https://www.porntrex.com/my/favourites/videos/':
                continue
            if 'video' in sUrl:
                lTemp += [sUrl]
        lUrlVideos = lTemp

        nNumVideos = len(lUrlVideos)
        print(f"Found {nNumVideos} video URLs in the playlist")
        if bDebug:
            for sUrl in lUrlVideos:
                print(sUrl)

    elif sUrlType == 'video':
        lUrlVideos = [sUrl]

    dYdlOptions = dict(dl_common.dYdlOptions)
    dYdlOptions[
        'download_archive'] = rf".\\sites\\{sExtractor}\\{dYdlOptions['download_archive'].format(sExtractor)}"

    for nIdx, sVideoUrl in enumerate(lUrlVideos):
        if sUrlType == 'playlist':
            print(f"Processing video {nIdx + 1} of {nNumVideos}...")
            print()

        if bDebug:
            print(f"Processing {sVideoUrl}")
        video = Video(sVideoUrl)
        dYdlOptions['outtmpl'] = rf'.\\sites\\{sExtractor}\\{video.sFullName}'

        with youtube_dl.YoutubeDL(dYdlOptions) as ydl:
            ydl.download([video.downloadUrl])

        if nVideoLimit is not None and (nIdx + 1) >= nVideoLimit:
            print(
                f"Hit the specified maximum limit of {nVideoLimit}. Stopping..."
            )
            break
        print()
Esempio n. 3
0
def run(sUrl, sCookieSource=None, nVideoLimit=None, bDebug=False):
    print(f"Running {sExtractor} extractor for {sUrl}\r\n")

    if sCookieSource is not None:
        dl_common.parseCookieFile(sCookieSource)

    if dl_common.dCookiesParsed is None:
        print(
            "WARNING :: No cookies were provided! Private videos/playlists will fail to download!\r\n"
        )

    # Attempt initial connection
    dl_common.randomizeHeader()
    html = dl_common.session.get(sUrl,
                                 headers=dl_common.dHeaders,
                                 cookies=dl_common.dCookiesParsed)
    print(f"Initial connection status: {html.status_code}")
    if html.status_code == 403:
        raise ConnectionError(
            f"403 Forbidden! Please check if cookies are required! Private videos/playlists cannot be accessed without cookies!"
        )
    elif html.status_code != 200:
        raise ConnectionError(
            f"Initial connection failed : Status {html.status_code}")
    print()
    sleepRandom(1, 3)

    if bDebug:
        # Save HTML content to a text file for debug
        text_file = open("html_content.txt", "w", encoding='utf-8')
        text_file.write(html.text)
        text_file.close()

    page = Page_Pornve(sUrl)
    sleepRandom(3, 5)

    dYdlOptions = dict(dl_common.dYdlOptions)
    dYdlOptions['download_archive'] = None

    for nIdx, sVideoUrl in enumerate(page.videos):
        if page.sUrlType == 'playlist':
            print(
                f"Processing playlist video {nIdx + 1} of {page._nVideos} :: {sVideoUrl}"
            )
            print()

        # Get the actual video stream info for a video link from a playlist
        if page.sUrlType == 'playlist':
            pageVideo = Page_Pornve(sVideoUrl)
            sVideoName = pageVideo._sVideoName
            sVideoStreamUrl = pageVideo.videos[0]
            sPageUrl = pageVideo.url
        else:
            sVideoName = page._sVideoName
            sVideoStreamUrl = page.videos[0]
            sPageUrl = page.url

        bRun = True
        try:
            with open(sArchive) as archive:
                if sPageUrl in archive.read():
                    print(f"Archive already has an entry for {sPageUrl}")
                    print("Skipping...")
                    bRun = False
        except:
            pass

        if bRun:
            dYdlOptions[
                'outtmpl'] = rf'.\\sites\\{sExtractor}\\{sVideoName}.%(ext)s'

            with youtube_dl.YoutubeDL(dYdlOptions) as ydl:
                ydl.cache.remove()
                ret = ydl.download([sVideoStreamUrl])

            # Need to do our own archiving since YTDL will treat everything with the name "index-v1-a1" because
            # of how the video is extracted in _extract_video_stream
            # YTDL ret 0 is good, 1 is bad
            if not ret:
                with open(sArchive, 'a') as archive:
                    archive.write(sPageUrl + "\r\n")

        if nVideoLimit is not None and (nIdx + 1) >= nVideoLimit:
            print(
                f"Hit the specified maximum limit of {nVideoLimit}. Stopping..."
            )
            break
        print()
        sleepRandom(3, 5)
Esempio n. 4
0
def run(sUrl, sCookieSource=None, nVideoLimit=None, bDebug=False):
    print(f"Running {sExtractor} extractor for {sUrl}\r\n")

    if sCookieSource is not None:
        dl_common.parseCookieFile(sCookieSource)

    if dl_common.dCookiesParsed is None:
        print(
            "WARNING :: No cookies were provided! Private videos/playlists will fail to download!\r\n"
        )

    # 20210619 :: Workaround for https://github.com/ppldl/p_pl_dl/issues/1
    dl_common.addCipher("https://spankbang.com")

    # Attempt initial connection
    dl_common.randomizeHeader()
    html = dl_common.session.get(sUrl,
                                 headers=dl_common.dHeaders,
                                 cookies=dl_common.dCookiesParsed)
    print(f"Initial connection status: {html.status_code}")
    if html.status_code == 403:
        raise ConnectionError(
            f"403 Forbidden! Please check if cookies are required! Private videos/playlists cannot be accessed without cookies!"
        )
    elif html.status_code != 200:
        raise ConnectionError(
            f"Initial connection failed : Status {html.status_code}")
    print()
    sleepRandom(1, 3)

    if bDebug:
        # Save HTML content to a text file for debug
        text_file = open("html_content.txt", "w", encoding='utf-8')
        text_file.write(html.text)
        text_file.close()

    page = Page_Spankbang(sUrl)
    sleepRandom(3, 5)

    dYdlOptions = dict(dl_common.dYdlOptions)
    dYdlOptions[
        'download_archive'] = rf".\\sites\\{sExtractor}\\{dYdlOptions['download_archive'].format(sExtractor)}"
    # dYdlOptions['referer']          = 'https://spankbang.com'
    # dYdlOptions['user_agent']       = dl_common.dHeaders['User-Agent']        # Not needed - YTDL already has a UA randomizer

    for nIdx, sVideoUrl in enumerate(page.videos):
        if page.sUrlType == 'playlist':
            print(
                f"Processing playlist video {nIdx + 1} of {page._nVideos} :: {sVideoUrl}"
            )
            print()

        dYdlOptions[
            'outtmpl'] = rf'.\\sites\\{sExtractor}\\%(title).125s.%(ext)s'

        with youtube_dl.YoutubeDL(dYdlOptions) as ydl:
            ydl.cache.remove()
            ydl.download([sVideoUrl])

        if nVideoLimit is not None and (nIdx + 1) >= nVideoLimit:
            print(
                f"Hit the specified maximum limit of {nVideoLimit}. Stopping..."
            )
            break
        print()
        sleepRandom()
Esempio n. 5
0
def main(argv):
    print()

    if argv.dest is not None:
        os.chdir(argv.dest)
    print(f"Working download directory: {os.getcwd()}")
    sleep(2)

    print()
    sSourceCookies = argv.cookies
    if sSourceCookies is not None:
        print(f"Cookies source: {sSourceCookies}")
        if ".txt'" in sSourceCookies:
            dl_common.parseCookieFile(sSourceCookies)
        else:
            dl_common.parseCookies(sSourceCookies)
    else:
        print(f"No cookies provided!")
    sleep(0.5)

    print()
    sSourceUrls = argv.input
    print(f"Using the following input source: {sSourceUrls}")
    print()
    sleep(0.5)

    dSites = {
        'lewdthots': False,
        'pornhub': False,
        'porntrex': False,
        'pornve': False,
        'spankbang': False,
        'xhamster': False,
        'xvideos': False,
        'youporn': False,
    }

    dExtractors = {
        'lewdthots': dl_lt,
        'pornhub': dl_ph,
        'porntrex': dl_pt,
        'pornve': dl_pornve,
        'spankbang': dl_sb,
        'xhamster': dl_xh,
        'xvideos': dl_xv,
    }

    # Get each URL into a dict
    dUrlDefs = {}
    with open(sSourceUrls) as fSourceUrls:
        sLines = fSourceUrls.readlines()
        for sLine in sLines:
            sUrl = sLine.strip()
            print(f"URL: {sUrl}")
            for sSite in dSites.keys():
                if sSite in sLine:
                    dSites[sSite] = True
                    dUrlDefs[sUrl] = sSite
    print()
    print("Detected websites:")
    print(json.dumps(dSites, indent=4))
    print()
    sleep(2)

    if argv.only is not None:
        argv.only = argv.only.lower()
        if argv.only in dSites.keys():
            for key, value in dSites.items():
                if argv.only == key:
                    dSites[key] = True
                else:
                    dSites[key] = False

    for sUrl, sSite in dUrlDefs.items():
        if sSite in dExtractors.keys() and dSites[sSite]:
            try:
                dExtractors[sSite].run(
                    sUrl, sCookieSource=None
                )  # Cookies should already be parsed and available when going through main
            except:
                print("\r\n\r\n")
                traceback.print_exc()
                print("\r\n\r\n")
                continue
        else:
            print(f"No extractor available for {sSite} - {sUrl}")
            sleep(0.5)
        print()
Esempio n. 6
0
def run(sUrl, sCookieSource=None, nVideoLimit=None, bDebug=False):
    print(f"Running {sExtractor} extractor for {sUrl}\r\n")

    if sCookieSource is not None:
        dl_common.parseCookieFile(sCookieSource)

    if dl_common.dCookiesParsed is None:
        print("WARNING :: No cookies were provided! Private videos/playlists will fail to download!\r\n")

    # Attempt initial connection
    html = dl_common.session.get(sUrl, headers=dl_common.dHeaders, cookies=dl_common.dCookiesParsed)
    print(f"Initial connection status: {html.status_code}")
    if html.status_code == 403:
        raise ConnectionError(f"403 Forbidden! Please check if cookies are required! Private videos/playlists cannot be accessed without cookies!")
    elif html.status_code != 200:
        raise ConnectionError(f"Initial connection failed : Status {html.status_code}")
    print()

    if bDebug:
        # Save HTML content to a text file for debug
        text_file = open("html_content.txt", "w", encoding='utf-8')
        text_file.write(html.text)
        text_file.close()

    page = Page_Pornhub(sUrl)

    dYdlOptions = dict(dl_common.dYdlOptions)
    dYdlOptions['download_archive'] = rf".\\sites\\{sExtractor}\\{dYdlOptions['download_archive'].format(sExtractor)}"

    # Set options helpful for pornhub
    # dYdlOptions['retries']                      = 10
    # dYdlOptions['fragment_retries']             = 10
    # dYdlOptions['keep_fragments']               = True
    # dYdlOptions['skip_unavailable_fragments']   = False
    # dYdlOptions['external_downloader_args']     = ["-m3u8_hold_counters", "3", "-max_reload", "3"]

    lFailedUrls = []

    def ytdlLoop(lUrls, bLogFailures):
        nonlocal lFailedUrls

        for nIdx, sVideoUrl in enumerate(lUrls):
            print(f"Processing video {nIdx + 1} of {len(lUrls)} :: {sVideoUrl}")
            print()

            sVideoId = sVideoUrl.split('view_video.php?viewkey=')[-1]
            dYdlOptions['outtmpl'] = rf'.\\sites\\{sExtractor}\\{sVideoId}_%(title).125s.mp4'

            nStart = time()
            try:
                with youtube_dl.YoutubeDL(dYdlOptions) as ydl:
                    ydl.download([sVideoUrl])
            except:
                if bLogFailures:
                    print(f"\r\nEncountered some error for URL = {sVideoUrl}")
                    print(f"Adding it to the retry list...")
                    lFailedUrls += [sVideoUrl]
                continue
            nStop = time()
            print(f"\r\nElapsed time for URL = {sVideoUrl}: {round((nStop - nStart) / 60, 2)} minutes\r\n")

            if nVideoLimit is not None and (nIdx + 1) >= nVideoLimit:
                print(f"Hit the specified maximum limit of {nVideoLimit}. Stopping...")
                break
        print()

    ytdlLoop(page.videos, bLogFailures=True)

    if lFailedUrls:
        print("Retrying URLs that failed...")
        for sUrl in lFailedUrls:
            print(sUrl)
        ytdlLoop(lFailedUrls, bLogFailures=False)