Python NotADownloadableLinkErrorの例、src.errors.NotADownloadableLinkError Pythonの例

コード例 #1

0

ファイルを表示

ファイル: downloader.py プロジェクト: nikitha-magi/bulk-downloader-for-reddit

    def __init__(self, directory, POST):
        try:
            POST['mediaURL'] = self.getLink(POST['postURL'])
        except IndexError:
            raise NotADownloadableLinkError("Could not read the page source")
        except Exception as exception:
            raise NotADownloadableLinkError("Could not read the page source")

        POST['postExt'] = getExtension(POST['mediaURL'])

        if not os.path.exists(directory): os.makedirs(directory)
        title = nameCorrector(POST['postTitle'])
        print(POST["postSubmitter"] + "_" + title + "_" + POST['postId'] +
              POST['postExt'])

        fileDir = directory / (POST["postSubmitter"] + "_" + title + "_" +
                               POST['postId'] + POST['postExt'])
        tempDir = directory / (POST["postSubmitter"] + "_" + title + "_" +
                               POST['postId'] + ".tmp")

        try:
            getFile(fileDir, tempDir, POST['mediaURL'])
        except FileNameTooLong:
            fileDir = directory / (POST['postId'] + POST['postExt'])
            tempDir = directory / (POST['postId'] + ".tmp")

            getFile(fileDir, tempDir, POST['mediaURL'])

コード例 #2

0

ファイルを表示

    def getData(link):

        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.64",
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
        }
        res = requests.get(link, headers=headers)
        if res.status_code != 200:
            raise ImageNotFound(
                f"Server responded with {res.status_code} to {link}")
        pageSource = res.text

        STARTING_STRING = "_r = {"
        ENDING_STRING = "</script>"

        STARTING_STRING_LENGHT = len(STARTING_STRING)
        try:
            startIndex = pageSource.index(
                STARTING_STRING) + STARTING_STRING_LENGHT
            endIndex = pageSource.index(ENDING_STRING, startIndex)
        except ValueError:
            raise NotADownloadableLinkError(
                f"Could not read the page source on {link}")

        data = json.loads(pageSource[startIndex - 1:endIndex + 1].strip()[:-1])
        return data

コード例 #3

0

ファイルを表示

ファイル: redgifs.py プロジェクト: wxguychris/bulk-downloader-for-reddit

    def getLink(url):
        """Extract direct link to the video from page's source
        and return it
        """

        if '.webm' in url or '.mp4' in url or '.gif' in url:
            return url

        if url[-1:] == '/':
            url = url[:-1]

        url = urllib.request.Request("https://redgifs.com/watch/" +
                                     url.split('/')[-1])

        url.add_header(
            'User-Agent',
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.64'
        )

        pageSource = (urllib.request.urlopen(url).read().decode())

        soup = BeautifulSoup(pageSource, "html.parser")
        attributes = {
            "data-react-helmet": "true",
            "type": "application/ld+json"
        }
        content = soup.find("script", attrs=attributes)

        if content is None:
            raise NotADownloadableLinkError("Could not read the page source")

        return json.loads(content.contents[0])["video"]["contentUrl"]

コード例 #4

0

ファイルを表示

ファイル: downloader.py プロジェクト: nikitha-magi/bulk-downloader-for-reddit

    def getLink(self,
                url,
                query='<source id="mp4Source" src=',
                lineNumber=105):
        """Extract direct link to the video from page's source
        and return it
        """

        if '.webm' in url or '.mp4' in url or '.gif' in url:
            return url

        if url[-1:] == '/':
            url = url[:-1]

        url = "https://gfycat.com/" + url.split('/')[-1]

        pageSource = (urllib.request.urlopen(url).read().decode().split('\n'))

        theLine = pageSource[lineNumber]
        lenght = len(query)
        link = []

        for i in range(len(theLine)):
            if theLine[i:i + lenght] == query:
                cursor = (i + lenght) + 1
                while not theLine[cursor] == '"':
                    link.append(theLine[cursor])
                    cursor += 1
                break

        if "".join(link) == "":
            raise NotADownloadableLinkError("Could not read the page source")

        return "".join(link)

コード例 #5

0

ファイルを表示

    def getLink(url):
        """Extract direct link to the video from page's source
        and return it
        """

        if '.webm' in url.split('/')[-1] or '.mp4' in url.split(
                '/')[-1] or '.gif' in url.split('/')[-1]:
            return url

        if url[-1:] == '/':
            url = url[:-1]

        url = "https://www.gifdeliverynetwork.com/" + url.split('/')[-1]

        pageSource = (urllib.request.urlopen(url).read().decode())

        soup = BeautifulSoup(pageSource, "html.parser")
        attributes = {"id": "mp4Source", "type": "video/mp4"}
        content = soup.find("source", attrs=attributes)

        if content is None:

            raise NotADownloadableLinkError("Could not read the page source")

        return content["src"]

コード例 #6

0

ファイルを表示

    def getLink(url):
        """Extract direct link to the video from page's source
        and return it
        """

        if '.webm' in url.split('/')[-1] or '.mp4' in url.split('/')[-1] or '.gif' in url.split('/')[-1]:
            return url

        if url[-1:] == '/':
            url = url[:-1]

        url = "https://www.gifdeliverynetwork.com/" + url.split('/')[-1]
        req = urllib.request.Request(url)
        req.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.64')

        response = urllib.request.urlopen(req)
        httpResponseCodeCheck(response.getcode(), url)
        pageSource = response.read().decode()

        soup = BeautifulSoup(pageSource, "html.parser")
        attributes = {"id": "mp4Source", "type": "video/mp4"}
        content = soup.find("source", attrs=attributes)

        if content is None:
            
            raise NotADownloadableLinkError("Could not read the page source")

        return content["src"]

コード例 #7

0

ファイルを表示

ファイル: downloader.py プロジェクト: wAuner/bulk-downloader-for-reddit

    def getLink(self,
                url,
                query='<source id="mp4Source" src=',
                lineNumber=105):
        """Extract direct link to the video from page's source
        and return it
        """

        if '.webm' in url or '.mp4' in url or '.gif' in url:
            return url

        if url[-1:] == '/':
            url = url[:-1]

        url = "https://gfycat.com/" + url.split('/')[-1]

        pageSource = (urllib.request.urlopen(url).read().decode())

        soup = BeautifulSoup(pageSource, "html.parser")
        attributes = {
            "data-react-helmet": "true",
            "type": "application/ld+json"
        }
        content = soup.find("script", attrs=attributes)

        if content is None:
            raise NotADownloadableLinkError("Could not read the page source")

        return json.loads(content.text)["video"]["contentUrl"]

コード例 #8

0

ファイルを表示

    def getLink(self,
                url,
                query='<source id="mp4Source" src=',
                lineNumber=105):
        """Extract direct link to the video from page's source
        and return it
        """

        if '.webm' in url or '.mp4' in url or '.gif' in url:
            return url

        if url[-1:] == '/':
            url = url[:-1]

        url = "https://gfycat.com/" + url.split('/')[-1]

        pageSource = (urllib.request.urlopen(url).read().decode())

        soup = BeautifulSoup(pageSource, "html.parser")
        attributes = {
            "data-react-helmet": "true",
            "type": "application/ld+json"
        }
        content = soup.find("meta", attrs={"property": "og:video"})

        if content is None:
            print("Error parsing: {}".format(pageSource))
            raise NotADownloadableLinkError(
                "Could not read the page source (parse) {}".format(url))

        return content["content"]

コード例 #9

0

ファイルを表示

    def getData(link):

        cookies = {"over18": "1", "postpagebeta": "0"}
        res = requests.get(link, cookies=cookies)
        if res.status_code != 200:
            raise ImageNotFound(
                f"Server responded with {res.status_code} to {link}")
        pageSource = requests.get(link, cookies=cookies).text

        STARTING_STRING = "image               : "
        ENDING_STRING = "group               :"

        STARTING_STRING_LENGHT = len(STARTING_STRING)
        try:
            startIndex = pageSource.index(
                STARTING_STRING) + STARTING_STRING_LENGHT
            endIndex = pageSource.index(ENDING_STRING, startIndex)
        except ValueError:
            raise NotADownloadableLinkError(
                f"Could not read the page source on {link}")

        while pageSource[endIndex] != "}":
            endIndex = endIndex - 1
        try:
            data = pageSource[startIndex:endIndex + 2].strip()[:-1]
        except:
            pageSource[endIndex + 1] = '}'
            data = pageSource[startIndex:endIndex + 3].strip()[:-1]

        return json.loads(data)

コード例 #10

0

ファイルを表示

    def __init__(self, directory, POST):
        try:
            POST['MEDIAURL'] = self.getLink(POST['CONTENTURL'])
        except IndexError:
            raise NotADownloadableLinkError("Could not read the page source")

        POST['EXTENSION'] = getExtension(POST['MEDIAURL'])

        if not os.path.exists(directory): os.makedirs(directory)

        filename = GLOBAL.config['filename'].format(**POST) + POST["EXTENSION"]
        shortFilename = POST['POSTID'] + POST['EXTENSION']

        getFile(filename, shortFilename, directory, POST['MEDIAURL'])

コード例 #11

0

ファイルを表示

ファイル: downloader.py プロジェクト: nikitha-magi/bulk-downloader-for-reddit

    def __init__(self, directory, post):
        try:
            IMAGES = self.getLinks(post['postURL'])
        except urllib.error.HTTPError:
            raise NotADownloadableLinkError("Not a downloadable link")

        imagesLenght = len(IMAGES)
        howManyDownloaded = imagesLenght
        duplicates = 0

        if imagesLenght == 1:

            extension = getExtension(IMAGES[0])

            title = nameCorrector(post['postTitle'])
            print(post["postSubmitter"] + "_" + title + "_" + post['postId'] +
                  extension)

            fileDir = directory / (post["postSubmitter"] + "_" + title + "_" +
                                   post['postId'] + extension)
            tempDir = directory / (post["postSubmitter"] + "_" + title + "_" +
                                   post['postId'] + ".tmp")

            imageURL = "https:" + IMAGES[0]

            try:
                getFile(fileDir, tempDir, imageURL)
            except FileNameTooLong:
                fileDir = directory / (post['postId'] + extension)
                tempDir = directory / (post['postId'] + '.tmp')
                getFile(fileDir, tempDir, imageURL)

        else:
            title = nameCorrector(post['postTitle'])
            print(post["postSubmitter"] + "_" + title + "_" + post['postId'],
                  end="\n\n")

            folderDir = directory / (post["postSubmitter"] + "_" + title +
                                     "_" + post['postId'])

            try:
                if not os.path.exists(folderDir):
                    os.makedirs(folderDir)
            except FileNotFoundError:
                folderDir = directory / post['postId']
                os.makedirs(folderDir)

            for i in range(imagesLenght):

                extension = getExtension(IMAGES[i])

                fileName = str(i + 1)
                imageURL = "https:" + IMAGES[i]

                fileDir = folderDir / (fileName + extension)
                tempDir = folderDir / (fileName + ".tmp")

                print("  ({}/{})".format(i + 1, imagesLenght))
                print("  {}".format(fileName + extension))

                try:
                    getFile(fileDir, tempDir, imageURL, indent=2)
                    print()
                except FileAlreadyExistsError:
                    print("  The file already exists" + " " * 10, end="\n\n")
                    duplicates += 1
                    howManyDownloaded -= 1

                except Exception as exception:
                    # raise exception
                    print("\n  Could not get the file")
                    print("  " + "{class_name}: {info}".format(
                        class_name=exception.__class__.__name__,
                        info=str(exception)) + "\n")
                    exceptionType = exception
                    howManyDownloaded -= 1

            if duplicates == imagesLenght:
                raise FileAlreadyExistsError
            elif howManyDownloaded + duplicates < imagesLenght:
                raise AlbumNotDownloadedCompletely(
                    "Album Not Downloaded Completely")

コード例 #12

0

ファイルを表示

ファイル: Erome.py プロジェクト: linxchaos/bulk-downloader-for-reddit

    def __init__(self, directory, post):
        try:
            IMAGES = self.getLinks(post['CONTENTURL'])
        except urllib.error.HTTPError:
            raise NotADownloadableLinkError("Not a downloadable link")

        imagesLenght = len(IMAGES)
        howManyDownloaded = imagesLenght
        duplicates = 0

        if imagesLenght == 1:

            extension = getExtension(IMAGES[0])
            """Filenames are declared here"""

            filename = GLOBAL.config['filename'].format(
                **post) + post["EXTENSION"]
            shortFilename = post['POSTID'] + extension

            imageURL = IMAGES[0]
            if 'https://' not in imageURL or 'http://' not in imageURL:
                imageURL = "https://" + imageURL

            getFile(filename, shortFilename, directory, imageURL)

        else:
            filename = GLOBAL.config['filename'].format(**post)

            print(filename)

            folderDir = directory / filename

            try:
                if not os.path.exists(folderDir):
                    os.makedirs(folderDir)
            except FileNotFoundError:
                folderDir = directory / post['POSTID']
                os.makedirs(folderDir)

            for i in range(imagesLenght):

                extension = getExtension(IMAGES[i])

                filename = str(i + 1) + extension
                imageURL = IMAGES[i]
                if 'https://' not in imageURL and 'http://' not in imageURL:
                    imageURL = "https://" + imageURL

                print("  ({}/{})".format(i + 1, imagesLenght))
                print("  {}".format(filename))

                try:
                    getFile(filename, filename, folderDir, imageURL, indent=2)
                    print()
                except FileAlreadyExistsError:
                    print("  The file already exists" + " " * 10, end="\n\n")
                    duplicates += 1
                    howManyDownloaded -= 1

                except Exception as exception:
                    # raise exception
                    print("\n  Could not get the file")
                    print("  " + "{class_name}: {info}".format(
                        class_name=exception.__class__.__name__,
                        info=str(exception)) + "\n")
                    howManyDownloaded -= 1

            if duplicates == imagesLenght:
                raise FileAlreadyExistsError
            elif howManyDownloaded + duplicates < imagesLenght:
                raise AlbumNotDownloadedCompletely(
                    "Album Not Downloaded Completely")