Python nameCorrector Examples, src.utils.nameCorrector Python Examples

Example #1

0

Show file

File: gifDeliveryNetwork.py Project: wnyffenegger/bulk-downloader-for-reddit

    def __init__(self, directory, POST):
        try:
            POST['mediaURL'] = self.getLink(POST['postURL'])
        except IndexError:
            raise NotADownloadableLinkError("Could not read the page source")

        POST['postExt'] = getExtension(POST['mediaURL'])

        if not os.path.exists(directory): os.makedirs(directory)
        title = nameCorrector(POST['postTitle'])
        """Filenames are declared here"""

        print(POST["postSubmitter"] + "_" + title + "_" + POST['postId'] +
              POST['postExt'])

        fileDir = directory / (POST["postSubmitter"] + "_" + title + "_" +
                               POST['postId'] + POST['postExt'])
        tempDir = directory / (POST["postSubmitter"] + "_" + title + "_" +
                               POST['postId'] + ".tmp")

        try:
            getFile(fileDir, tempDir, POST['mediaURL'])
        except FileNameTooLong:
            fileDir = directory / (POST['postId'] + POST['postExt'])
            tempDir = directory / (POST['postId'] + ".tmp")

            getFile(fileDir, tempDir, POST['mediaURL'])

Example #2

0

Show file

File: Imgur.py Project: linxchaos/bulk-downloader-for-reddit

    def downloadAlbum(self, images):
        folderName = GLOBAL.config['filename'].format(**self.post)
        folderDir = self.directory / folderName

        imagesLenght = images["count"]
        howManyDownloaded = 0
        duplicates = 0

        try:
            if not os.path.exists(folderDir):
                os.makedirs(folderDir)
        except FileNotFoundError:
            folderDir = self.directory / self.post['POSTID']
            os.makedirs(folderDir)

        print(folderName)

        for i in range(imagesLenght):

            extension = self.validateExtension(images["images"][i]["ext"])

            imageURL = self.IMGUR_IMAGE_DOMAIN + images["images"][i][
                "hash"] + extension

            filename = "_".join([
                str(i + 1),
                nameCorrector(images["images"][i]['title']),
                images["images"][i]['hash']
            ]) + extension
            shortFilename = str(i + 1) + "_" + images["images"][i]['hash']

            print("\n  ({}/{})".format(i + 1, imagesLenght))

            try:
                getFile(filename, shortFilename, folderDir, imageURL, indent=2)
                howManyDownloaded += 1
                print()

            except FileAlreadyExistsError:
                print("  The file already exists" + " " * 10, end="\n\n")
                duplicates += 1

            except TypeInSkip:
                print("  Skipping...")
                howManyDownloaded += 1

            except Exception as exception:
                print("\n  Could not get the file")
                print(
                    "  " +
                    "{class_name}: {info}\nSee CONSOLE_LOG.txt for more information"
                    .format(class_name=exception.__class__.__name__,
                            info=str(exception)) + "\n")
                print(GLOBAL.log_stream.getvalue(), noPrint=True)

        if duplicates == imagesLenght:
            raise FileAlreadyExistsError
        elif howManyDownloaded + duplicates < imagesLenght:
            raise AlbumNotDownloadedCompletely(
                "Album Not Downloaded Completely")

Example #3

0

Show file

def isPostExists(POST):
    """Figure out a file's name and checks if the file already exists"""

    title = nameCorrector(POST['postTitle'])
    PATH = GLOBAL.directory / POST["postSubreddit"]

    possibleExtensions = [".jpg", ".png", ".mp4", ".gif", ".webm", ".md"]
    """If you change the filenames, don't forget to add them here.
    Please don't remove existing ones
    """
    for extension in possibleExtensions:

        OLD_FILE_PATH = PATH / (title + "_" + POST['postId'] + extension)
        FILE_PATH = PATH / (POST["postSubmitter"] + "_" + title + "_" +
                            POST['postId'] + extension)

        SHORT_FILE_PATH = PATH / (POST['postId'] + extension)

        if OLD_FILE_PATH.exists() or \
           FILE_PATH.exists() or \
           SHORT_FILE_PATH.exists():

            return True

    else:
        return False

Example #4

0

Show file

File: config.py Project: linxchaos/bulk-downloader-for-reddit

    def setCustomFolderPath(self):
        print(
            """
Type a folder structure (generic folder path)

Use slash or DOUBLE backslash to separate folders

You can use SUBREDDIT, REDDITOR, POSTID, TITLE, UPVOTES, FLAIR, DATE in curly braces
The text in curly braces will be replaced with the corresponding property of an each post

For example: {REDDITOR}\{SUBREDDIT}\{FLAIR}

Existing folder structure""", None if "folderpath" not in self.file.read() else
            self.file.read()["folderpath"])

        folderpath = nameCorrector(input(">> ").strip("\\").strip("/").upper())

        self.file.add({"folderpath": folderpath})

Example #5

0

Show file

File: config.py Project: linxchaos/bulk-downloader-for-reddit

    def setCustomFileName(self):
        print(
            """
IMPORTANT: Do not change the filename structure frequently.
           If you did, the program could not find duplicates and
           would download the already downloaded files again.
           This would not create any duplicates in the directory but
           the program would not be as snappy as it should be.

Type a template file name for each post.

You can use SUBREDDIT, REDDITOR, POSTID, TITLE, UPVOTES, FLAIR, DATE in curly braces
The text in curly braces will be replaced with the corresponding property of an each post

For example: {FLAIR}_{SUBREDDIT}_{REDDITOR}

Existing filename template:""", None if "filename" not in self.file.read() else
            self.file.read()["filename"])

        filename = nameCorrector(input(">> ").upper())
        self.file.add({"filename": filename})

Example #6

0

Show file

    def __init__(self, directory, post):
        if not os.path.exists(directory): os.makedirs(directory)

        title = nameCorrector(post['postTitle'])
        """Filenames are declared here"""

        print(post["postSubmitter"] + "_" + title + "_" + post['postId'] +
              ".md")

        fileDir = directory / (post["postSubmitter"] + "_" + title + "_" +
                               post['postId'] + ".md")

        if Path.is_file(fileDir):
            raise FileAlreadyExistsError

        try:
            self.writeToFile(fileDir, post)
        except FileNotFoundError:
            fileDir = post['postId'] + ".md"
            fileDir = directory / fileDir

            self.writeToFile(fileDir, post)

Example #7

0

Show file

    def __init__(self,directory,POST):
        POST['postExt'] = getExtension(POST['postURL'])
        if not os.path.exists(directory): os.makedirs(directory)
        title = nameCorrector(POST['postTitle'])

        """Filenames are declared here"""

        print(POST["postSubmitter"]+"_"+title+"_"+POST['postId']+POST['postExt'])

        fileDir = directory / (
            POST["postSubmitter"]+"_"+title+"_"+POST['postId']+POST['postExt']
        )
        tempDir = directory / (
            POST["postSubmitter"]+"_"+title+"_"+POST['postId']+".tmp"
        )

        try:
            getFile(fileDir,tempDir,POST['postURL'])
        except FileNameTooLong:
            fileDir = directory / (POST['postId']+POST['postExt'])
            tempDir = directory / (POST['postId']+".tmp")

            getFile(fileDir,tempDir,POST['postURL'])

Example #8

0

Show file

def download(submissions):
    """Analyze list of submissions and call the right function
    to download each one, catch errors, update the log files
    """

    downloadedCount = 0
    duplicates = 0

    FAILED_FILE = createLogFile("FAILED")

    if GLOBAL.arguments.unsave:
        reddit = Reddit(GLOBAL.config['credentials']['reddit']).begin()

    subsLenght = len(submissions)
        
    for i in range(len(submissions)):
        print(f"\n({i+1}/{subsLenght})",end=" — ")
        print(submissions[i]['POSTID'],
              f"r/{submissions[i]['SUBREDDIT']}",
              f"u/{submissions[i]['REDDITOR']}",
              submissions[i]['FLAIR'] if submissions[i]['FLAIR'] else "",
              sep=" — ",
              end="")
        print(f" – {submissions[i]['TYPE'].upper()}",end="",noPrint=True)

        directory = GLOBAL.directory / GLOBAL.config["folderpath"].format(**submissions[i])
        details = {
            **submissions[i], 
            **{
                "TITLE": nameCorrector(
                    submissions[i]['TITLE'],
                    reference = str(directory)
                                + GLOBAL.config['filename'].format(**submissions[i])
                                + ".ext"
                )
            }
        }
        filename = GLOBAL.config['filename'].format(**details)

        if isPostExists(details,directory):
            print()
            print(directory)
            print(filename)
            print("It already exists")
            duplicates += 1
            continue

        if any(domain in submissions[i]['CONTENTURL'] for domain in GLOBAL.arguments.skip):
            print()
            print(submissions[i]['CONTENTURL'])
            print("Domain found in skip domains, skipping post...")
            continue

        try:
            downloadPost(details,directory)
            GLOBAL.downloadedPosts.add(details['POSTID'])
            try:
                if GLOBAL.arguments.unsave:
                    reddit.submission(id=details['POSTID']).unsave()
            except InsufficientScope:
                reddit = Reddit().begin()
                reddit.submission(id=details['POSTID']).unsave()
              
            downloadedCount += 1
              
        except FileAlreadyExistsError:
            print("It already exists")
            GLOBAL.downloadedPosts.add(details['POSTID'])
            duplicates += 1

        except ImgurLoginError:
            print(
                "Imgur login failed. \nQuitting the program "\
                "as unexpected errors might occur."
            )
            sys.exit()

        except ImgurLimitError as exception:
            FAILED_FILE.add({int(i+1):[
                "{class_name}: {info}".format(
                    class_name=exception.__class__.__name__,info=str(exception)
                ),
                details
            ]})

        except NotADownloadableLinkError as exception:
            print(
                "{class_name}: {info}".format(
                    class_name=exception.__class__.__name__,info=str(exception)
                )
            )
            FAILED_FILE.add({int(i+1):[
                "{class_name}: {info}".format(
                    class_name=exception.__class__.__name__,info=str(exception)
                ),
                submissions[i]
            ]})

        except TypeInSkip:
            print()
            print(submissions[i]['CONTENTURL'])
            print("Skipping post...")

        except DomainInSkip:
            print()
            print(submissions[i]['CONTENTURL'])
            print("Skipping post...")

        except NoSuitablePost:
            print("No match found, skipping...")

        except FailedToDownload:
            print("Failed to download the posts, skipping...")
        except AlbumNotDownloadedCompletely:
            print("Album did not downloaded completely.")
            FAILED_FILE.add({int(i+1):[
                "{class_name}: {info}".format(
                    class_name=exc.__class__.__name__,info=str(exc)
                ),
                submissions[i]
            ]})
        
        except Exception as exc:
            print(
                "{class_name}: {info}\nSee CONSOLE_LOG.txt for more information".format(
                    class_name=exc.__class__.__name__,info=str(exc)
                )
            )

            logging.error(sys.exc_info()[0].__name__,
                          exc_info=full_exc_info(sys.exc_info()))
            print(GLOBAL.log_stream.getvalue(),noPrint=True)

            FAILED_FILE.add({int(i+1):[
                "{class_name}: {info}".format(
                    class_name=exc.__class__.__name__,info=str(exc)
                ),
                submissions[i]
            ]})

    if duplicates:
        print(f"\nThere {'were' if duplicates > 1 else 'was'} " \
              f"{duplicates} duplicate{'s' if duplicates > 1 else ''}")

    if downloadedCount == 0:
        print("Nothing is downloaded :(")

    else:
        print(f"Total of {downloadedCount} " \
              f"link{'s' if downloadedCount > 1 else ''} downloaded!")

Example #9

0

Show file

def getFile(filename,
            shortFilename,
            folderDir,
            imageURL,
            indent=0,
            silent=False):

    if any(domain in imageURL for domain in GLOBAL.arguments.skip):
        raise DomainInSkip

    headers = [
        ("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " \
            "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 "\
            "Safari/537.36 OPR/54.0.2952.64"),
        ("Accept", "text/html,application/xhtml+xml,application/xml;" \
            "q=0.9,image/webp,image/apng,*/*;q=0.8"),
        ("Accept-Charset", "ISO-8859-1,utf-8;q=0.7,*;q=0.3"),
        ("Accept-Encoding", "none"),
        ("Accept-Language", "en-US,en;q=0.8"),
        ("Connection", "keep-alive")
    ]

    opener = urllib.request.build_opener()
    if not "imgur" in imageURL:
        opener.addheaders = headers
    urllib.request.install_opener(opener)

    filename = nameCorrector(filename)

    if not silent:
        print(" " * indent + str(folderDir),
              " " * indent + str(filename),
              sep="\n")

    for i in range(3):
        fileDir = Path(folderDir) / filename
        tempDir = Path(folderDir) / (filename + ".tmp")

        if not (os.path.isfile(fileDir)):
            try:
                urllib.request.urlretrieve(imageURL,
                                           tempDir,
                                           reporthook=dlProgress)

                if GLOBAL.arguments.no_dupes:
                    fileHash = createHash(tempDir)
                    if fileHash in GLOBAL.hashList:
                        os.remove(tempDir)
                        raise FileAlreadyExistsError
                    GLOBAL.hashList.add(fileHash)

                os.rename(tempDir, fileDir)
                if not silent: print(" " * indent + "Downloaded" + " " * 10)
                return None
            except ConnectionResetError as exception:
                if not silent: print(" " * indent + str(exception))
                if not silent: print(" " * indent + "Trying again\n")
            except FileNotFoundError:
                filename = shortFilename
        else:
            raise FileAlreadyExistsError
    raise FailedToDownload

Example #10

0

Show file

    def __init__(self, directory, post):
        self.imgurClient = self.initImgur()

        imgurID = self.getId(post['CONTENTURL'])
        content = self.getLink(imgurID)

        if not os.path.exists(directory): os.makedirs(directory)

        if content['type'] == 'image':

            try:
                post['MEDIAURL'] = content['object'].mp4
            except AttributeError:
                post['MEDIAURL'] = content['object'].link

            post['EXTENSION'] = getExtension(post['MEDIAURL'])

            filename = GLOBAL.config['filename'].format(
                **post) + post["EXTENSION"]
            shortFilename = post['POSTID'] + post['EXTENSION']

            getFile(filename, shortFilename, directory, post['MEDIAURL'])

        elif content['type'] == 'album':
            images = content['object'].images
            imagesLenght = len(images)
            howManyDownloaded = imagesLenght
            duplicates = 0

            filename = GLOBAL.config['filename'].format(**post)

            print(filename)

            folderDir = directory / filename

            try:
                if not os.path.exists(folderDir):
                    os.makedirs(folderDir)
            except FileNotFoundError:
                folderDir = directory / post['POSTID']
                os.makedirs(folderDir)

            for i in range(imagesLenght):
                try:
                    imageURL = images[i]['mp4']
                except KeyError:
                    imageURL = images[i]['link']

                images[i]['Ext'] = getExtension(imageURL)

                filename = (str(i + 1) + "_" +
                            nameCorrector(str(images[i]['title'])) + "_" +
                            images[i]['id'])

                shortFilename = (str(i + 1) + "_" + images[i]['id'])

                print("\n  ({}/{})".format(i + 1, imagesLenght))

                try:
                    getFile(filename,
                            shortFilename,
                            folderDir,
                            imageURL,
                            indent=2)
                    print()
                except FileAlreadyExistsError:
                    print("  The file already exists" + " " * 10, end="\n\n")
                    duplicates += 1
                    howManyDownloaded -= 1

                except Exception as exception:
                    print("\n  Could not get the file")
                    print("  " + "{class_name}: {info}".format(
                        class_name=exception.__class__.__name__,
                        info=str(exception)) + "\n")
                    howManyDownloaded -= 1

            if duplicates == imagesLenght:
                raise FileAlreadyExistsError
            elif howManyDownloaded + duplicates < imagesLenght:
                raise AlbumNotDownloadedCompletely(
                    "Album Not Downloaded Completely")

Example #11

0

Show file

    def __init__(self, directory, post):
        try:
            IMAGES = self.getLinks(post['postURL'])
        except urllib.error.HTTPError:
            raise NotADownloadableLinkError("Not a downloadable link")

        imagesLenght = len(IMAGES)
        howManyDownloaded = imagesLenght
        duplicates = 0

        if imagesLenght == 1:

            extension = getExtension(IMAGES[0])
            """Filenames are declared here"""

            title = nameCorrector(post['postTitle'])
            print(post["postSubmitter"] + "_" + title + "_" + post['postId'] +
                  extension)

            fileDir = directory / (post["postSubmitter"] + "_" + title + "_" +
                                   post['postId'] + extension)
            tempDir = directory / (post["postSubmitter"] + "_" + title + "_" +
                                   post['postId'] + ".tmp")

            imageURL = IMAGES[0]
            if 'https://' not in imageURL and 'http://' not in imageURL:
                imageURL = "https://" + imageURL

            try:
                getFile(fileDir, tempDir, imageURL)
            except FileNameTooLong:
                fileDir = directory / (post['postId'] + extension)
                tempDir = directory / (post['postId'] + '.tmp')
                getFile(fileDir, tempDir, imageURL)

        else:
            title = nameCorrector(post['postTitle'])
            print(post["postSubmitter"] + "_" + title + "_" + post['postId'],
                  end="\n\n")

            folderDir = directory / (post["postSubmitter"] + "_" + title +
                                     "_" + post['postId'])

            try:
                if not os.path.exists(folderDir):
                    os.makedirs(folderDir)
            except FileNotFoundError:
                folderDir = directory / post['postId']
                os.makedirs(folderDir)

            for i in range(imagesLenght):

                extension = getExtension(IMAGES[i])

                fileName = str(i + 1)
                imageURL = IMAGES[i]
                if 'https://' not in imageURL and 'http://' not in imageURL:
                    imageURL = "https://" + imageURL

                fileDir = folderDir / (fileName + extension)
                tempDir = folderDir / (fileName + ".tmp")

                print("  ({}/{})".format(i + 1, imagesLenght))
                print("  {}".format(fileName + extension))

                try:
                    getFile(fileDir, tempDir, imageURL, indent=2)
                    print()
                except FileAlreadyExistsError:
                    print("  The file already exists" + " " * 10, end="\n\n")
                    duplicates += 1
                    howManyDownloaded -= 1

                except Exception as exception:
                    # raise exception
                    print("\n  Could not get the file")
                    print("  " + "{class_name}: {info}".format(
                        class_name=exception.__class__.__name__,
                        info=str(exception)) + "\n")
                    howManyDownloaded -= 1

            if duplicates == imagesLenght:
                raise FileAlreadyExistsError
            elif howManyDownloaded + duplicates < imagesLenght:
                raise AlbumNotDownloadedCompletely(
                    "Album Not Downloaded Completely")

Example #12

0

Show file

    def __init__(self,directory,post):
        self.imgurClient = self.initImgur()

        imgurID = self.getId(post['postURL'])
        content = self.getLink(imgurID)

        if not os.path.exists(directory): os.makedirs(directory)

        if content['type'] == 'image':

            try:
                post['mediaURL'] = content['object'].mp4
            except AttributeError:
                post['mediaURL'] = content['object'].link

            post['postExt'] = getExtension(post['mediaURL'])
            
            title = nameCorrector(post['postTitle'])

            """Filenames are declared here"""

            print(post["postSubmitter"]+"_"+title+"_"+post['postId']+post['postExt'])

            fileDir = directory / (
                post["postSubmitter"]
                + "_" + title
                + "_" + post['postId'] 
                + post['postExt']
            )

            tempDir = directory / (
                post["postSubmitter"]
                + "_" + title 
                + "_" + post['postId'] 
                + ".tmp"
            )

            try:
                getFile(fileDir,tempDir,post['mediaURL'])
            except FileNameTooLong:
                fileDir = directory / post['postId'] + post['postExt']
                tempDir = directory / post['postId'] + '.tmp'
                getFile(fileDir,tempDir,post['mediaURL'])

        elif content['type'] == 'album':
            images = content['object'].images
            imagesLenght = len(images)
            howManyDownloaded = imagesLenght
            duplicates = 0

            title = nameCorrector(post['postTitle'])
            print(post["postSubmitter"]+"_"+title+"_"+post['postId'],end="\n\n")

            folderDir = directory / (
                post["postSubmitter"] + "_" + title + "_" + post['postId']
            )

            try:
                if not os.path.exists(folderDir):
                    os.makedirs(folderDir)
            except FileNotFoundError:
                folderDir = directory / post['postId']
                os.makedirs(folderDir)

            for i in range(imagesLenght):
                try:
                    imageURL = images[i]['mp4']
                except KeyError:
                    imageURL = images[i]['link']

                images[i]['Ext'] = getExtension(imageURL)

                fileName = (str(i+1)
                            + "_"
                            + nameCorrector(str(images[i]['title']))
                            + "_"
                            + images[i]['id'])

                """Filenames are declared here"""

                fileDir = folderDir / (fileName + images[i]['Ext'])
                tempDir = folderDir / (fileName + ".tmp")

                print("  ({}/{})".format(i+1,imagesLenght))
                print("  {}".format(fileName+images[i]['Ext']))

                try:
                    getFile(fileDir,tempDir,imageURL,indent=2)
                    print()
                except FileAlreadyExistsError:
                    print("  The file already exists" + " "*10,end="\n\n")
                    duplicates += 1
                    howManyDownloaded -= 1

                # IF FILE NAME IS TOO LONG, IT WONT REGISTER
                except FileNameTooLong:
                    fileName = (str(i+1) + "_" + images[i]['id'])
                    fileDir = folderDir / (fileName + images[i]['Ext'])
                    tempDir = folderDir / (fileName + ".tmp")
                    try:
                        getFile(fileDir,tempDir,imageURL,indent=2)
                    # IF STILL TOO LONG
                    except FileNameTooLong:
                        fileName = str(i+1)
                        fileDir = folderDir / (fileName + images[i]['Ext'])
                        tempDir = folderDir / (fileName + ".tmp")
                        getFile(fileDir,tempDir,imageURL,indent=2)

                except Exception as exception:
                    print("\n  Could not get the file")
                    print(
                        "  "
                        + "{class_name}: {info}".format(
                            class_name=exception.__class__.__name__,
                            info=str(exception)
                        )
                        + "\n"
                    )
                    howManyDownloaded -= 1

            if duplicates == imagesLenght:
                raise FileAlreadyExistsError
            elif howManyDownloaded + duplicates < imagesLenght:
                raise AlbumNotDownloadedCompletely(
                    "Album Not Downloaded Completely"
                )