def __init__(self, directory, POST): try: POST['mediaURL'] = self.getLink(POST['postURL']) except IndexError: raise NotADownloadableLinkError("Could not read the page source") POST['postExt'] = getExtension(POST['mediaURL']) if not os.path.exists(directory): os.makedirs(directory) title = nameCorrector(POST['postTitle']) """Filenames are declared here""" print(POST["postSubmitter"] + "_" + title + "_" + POST['postId'] + POST['postExt']) fileDir = directory / (POST["postSubmitter"] + "_" + title + "_" + POST['postId'] + POST['postExt']) tempDir = directory / (POST["postSubmitter"] + "_" + title + "_" + POST['postId'] + ".tmp") try: getFile(fileDir, tempDir, POST['mediaURL']) except FileNameTooLong: fileDir = directory / (POST['postId'] + POST['postExt']) tempDir = directory / (POST['postId'] + ".tmp") getFile(fileDir, tempDir, POST['mediaURL'])
def downloadAlbum(self, images): folderName = GLOBAL.config['filename'].format(**self.post) folderDir = self.directory / folderName imagesLenght = images["count"] howManyDownloaded = 0 duplicates = 0 try: if not os.path.exists(folderDir): os.makedirs(folderDir) except FileNotFoundError: folderDir = self.directory / self.post['POSTID'] os.makedirs(folderDir) print(folderName) for i in range(imagesLenght): extension = self.validateExtension(images["images"][i]["ext"]) imageURL = self.IMGUR_IMAGE_DOMAIN + images["images"][i][ "hash"] + extension filename = "_".join([ str(i + 1), nameCorrector(images["images"][i]['title']), images["images"][i]['hash'] ]) + extension shortFilename = str(i + 1) + "_" + images["images"][i]['hash'] print("\n ({}/{})".format(i + 1, imagesLenght)) try: getFile(filename, shortFilename, folderDir, imageURL, indent=2) howManyDownloaded += 1 print() except FileAlreadyExistsError: print(" The file already exists" + " " * 10, end="\n\n") duplicates += 1 except TypeInSkip: print(" Skipping...") howManyDownloaded += 1 except Exception as exception: print("\n Could not get the file") print( " " + "{class_name}: {info}\nSee CONSOLE_LOG.txt for more information" .format(class_name=exception.__class__.__name__, info=str(exception)) + "\n") print(GLOBAL.log_stream.getvalue(), noPrint=True) if duplicates == imagesLenght: raise FileAlreadyExistsError elif howManyDownloaded + duplicates < imagesLenght: raise AlbumNotDownloadedCompletely( "Album Not Downloaded Completely")
def isPostExists(POST): """Figure out a file's name and checks if the file already exists""" title = nameCorrector(POST['postTitle']) PATH = GLOBAL.directory / POST["postSubreddit"] possibleExtensions = [".jpg", ".png", ".mp4", ".gif", ".webm", ".md"] """If you change the filenames, don't forget to add them here. Please don't remove existing ones """ for extension in possibleExtensions: OLD_FILE_PATH = PATH / (title + "_" + POST['postId'] + extension) FILE_PATH = PATH / (POST["postSubmitter"] + "_" + title + "_" + POST['postId'] + extension) SHORT_FILE_PATH = PATH / (POST['postId'] + extension) if OLD_FILE_PATH.exists() or \ FILE_PATH.exists() or \ SHORT_FILE_PATH.exists(): return True else: return False
def setCustomFolderPath(self): print( """ Type a folder structure (generic folder path) Use slash or DOUBLE backslash to separate folders You can use SUBREDDIT, REDDITOR, POSTID, TITLE, UPVOTES, FLAIR, DATE in curly braces The text in curly braces will be replaced with the corresponding property of an each post For example: {REDDITOR}\{SUBREDDIT}\{FLAIR} Existing folder structure""", None if "folderpath" not in self.file.read() else self.file.read()["folderpath"]) folderpath = nameCorrector(input(">> ").strip("\\").strip("/").upper()) self.file.add({"folderpath": folderpath})
def setCustomFileName(self): print( """ IMPORTANT: Do not change the filename structure frequently. If you did, the program could not find duplicates and would download the already downloaded files again. This would not create any duplicates in the directory but the program would not be as snappy as it should be. Type a template file name for each post. You can use SUBREDDIT, REDDITOR, POSTID, TITLE, UPVOTES, FLAIR, DATE in curly braces The text in curly braces will be replaced with the corresponding property of an each post For example: {FLAIR}_{SUBREDDIT}_{REDDITOR} Existing filename template:""", None if "filename" not in self.file.read() else self.file.read()["filename"]) filename = nameCorrector(input(">> ").upper()) self.file.add({"filename": filename})
def __init__(self, directory, post): if not os.path.exists(directory): os.makedirs(directory) title = nameCorrector(post['postTitle']) """Filenames are declared here""" print(post["postSubmitter"] + "_" + title + "_" + post['postId'] + ".md") fileDir = directory / (post["postSubmitter"] + "_" + title + "_" + post['postId'] + ".md") if Path.is_file(fileDir): raise FileAlreadyExistsError try: self.writeToFile(fileDir, post) except FileNotFoundError: fileDir = post['postId'] + ".md" fileDir = directory / fileDir self.writeToFile(fileDir, post)
def __init__(self,directory,POST): POST['postExt'] = getExtension(POST['postURL']) if not os.path.exists(directory): os.makedirs(directory) title = nameCorrector(POST['postTitle']) """Filenames are declared here""" print(POST["postSubmitter"]+"_"+title+"_"+POST['postId']+POST['postExt']) fileDir = directory / ( POST["postSubmitter"]+"_"+title+"_"+POST['postId']+POST['postExt'] ) tempDir = directory / ( POST["postSubmitter"]+"_"+title+"_"+POST['postId']+".tmp" ) try: getFile(fileDir,tempDir,POST['postURL']) except FileNameTooLong: fileDir = directory / (POST['postId']+POST['postExt']) tempDir = directory / (POST['postId']+".tmp") getFile(fileDir,tempDir,POST['postURL'])
def download(submissions): """Analyze list of submissions and call the right function to download each one, catch errors, update the log files """ downloadedCount = 0 duplicates = 0 FAILED_FILE = createLogFile("FAILED") if GLOBAL.arguments.unsave: reddit = Reddit(GLOBAL.config['credentials']['reddit']).begin() subsLenght = len(submissions) for i in range(len(submissions)): print(f"\n({i+1}/{subsLenght})",end=" — ") print(submissions[i]['POSTID'], f"r/{submissions[i]['SUBREDDIT']}", f"u/{submissions[i]['REDDITOR']}", submissions[i]['FLAIR'] if submissions[i]['FLAIR'] else "", sep=" — ", end="") print(f" – {submissions[i]['TYPE'].upper()}",end="",noPrint=True) directory = GLOBAL.directory / GLOBAL.config["folderpath"].format(**submissions[i]) details = { **submissions[i], **{ "TITLE": nameCorrector( submissions[i]['TITLE'], reference = str(directory) + GLOBAL.config['filename'].format(**submissions[i]) + ".ext" ) } } filename = GLOBAL.config['filename'].format(**details) if isPostExists(details,directory): print() print(directory) print(filename) print("It already exists") duplicates += 1 continue if any(domain in submissions[i]['CONTENTURL'] for domain in GLOBAL.arguments.skip): print() print(submissions[i]['CONTENTURL']) print("Domain found in skip domains, skipping post...") continue try: downloadPost(details,directory) GLOBAL.downloadedPosts.add(details['POSTID']) try: if GLOBAL.arguments.unsave: reddit.submission(id=details['POSTID']).unsave() except InsufficientScope: reddit = Reddit().begin() reddit.submission(id=details['POSTID']).unsave() downloadedCount += 1 except FileAlreadyExistsError: print("It already exists") GLOBAL.downloadedPosts.add(details['POSTID']) duplicates += 1 except ImgurLoginError: print( "Imgur login failed. \nQuitting the program "\ "as unexpected errors might occur." ) sys.exit() except ImgurLimitError as exception: FAILED_FILE.add({int(i+1):[ "{class_name}: {info}".format( class_name=exception.__class__.__name__,info=str(exception) ), details ]}) except NotADownloadableLinkError as exception: print( "{class_name}: {info}".format( class_name=exception.__class__.__name__,info=str(exception) ) ) FAILED_FILE.add({int(i+1):[ "{class_name}: {info}".format( class_name=exception.__class__.__name__,info=str(exception) ), submissions[i] ]}) except TypeInSkip: print() print(submissions[i]['CONTENTURL']) print("Skipping post...") except DomainInSkip: print() print(submissions[i]['CONTENTURL']) print("Skipping post...") except NoSuitablePost: print("No match found, skipping...") except FailedToDownload: print("Failed to download the posts, skipping...") except AlbumNotDownloadedCompletely: print("Album did not downloaded completely.") FAILED_FILE.add({int(i+1):[ "{class_name}: {info}".format( class_name=exc.__class__.__name__,info=str(exc) ), submissions[i] ]}) except Exception as exc: print( "{class_name}: {info}\nSee CONSOLE_LOG.txt for more information".format( class_name=exc.__class__.__name__,info=str(exc) ) ) logging.error(sys.exc_info()[0].__name__, exc_info=full_exc_info(sys.exc_info())) print(GLOBAL.log_stream.getvalue(),noPrint=True) FAILED_FILE.add({int(i+1):[ "{class_name}: {info}".format( class_name=exc.__class__.__name__,info=str(exc) ), submissions[i] ]}) if duplicates: print(f"\nThere {'were' if duplicates > 1 else 'was'} " \ f"{duplicates} duplicate{'s' if duplicates > 1 else ''}") if downloadedCount == 0: print("Nothing is downloaded :(") else: print(f"Total of {downloadedCount} " \ f"link{'s' if downloadedCount > 1 else ''} downloaded!")
def getFile(filename, shortFilename, folderDir, imageURL, indent=0, silent=False): if any(domain in imageURL for domain in GLOBAL.arguments.skip): raise DomainInSkip headers = [ ("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " \ "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 "\ "Safari/537.36 OPR/54.0.2952.64"), ("Accept", "text/html,application/xhtml+xml,application/xml;" \ "q=0.9,image/webp,image/apng,*/*;q=0.8"), ("Accept-Charset", "ISO-8859-1,utf-8;q=0.7,*;q=0.3"), ("Accept-Encoding", "none"), ("Accept-Language", "en-US,en;q=0.8"), ("Connection", "keep-alive") ] opener = urllib.request.build_opener() if not "imgur" in imageURL: opener.addheaders = headers urllib.request.install_opener(opener) filename = nameCorrector(filename) if not silent: print(" " * indent + str(folderDir), " " * indent + str(filename), sep="\n") for i in range(3): fileDir = Path(folderDir) / filename tempDir = Path(folderDir) / (filename + ".tmp") if not (os.path.isfile(fileDir)): try: urllib.request.urlretrieve(imageURL, tempDir, reporthook=dlProgress) if GLOBAL.arguments.no_dupes: fileHash = createHash(tempDir) if fileHash in GLOBAL.hashList: os.remove(tempDir) raise FileAlreadyExistsError GLOBAL.hashList.add(fileHash) os.rename(tempDir, fileDir) if not silent: print(" " * indent + "Downloaded" + " " * 10) return None except ConnectionResetError as exception: if not silent: print(" " * indent + str(exception)) if not silent: print(" " * indent + "Trying again\n") except FileNotFoundError: filename = shortFilename else: raise FileAlreadyExistsError raise FailedToDownload
def __init__(self, directory, post): self.imgurClient = self.initImgur() imgurID = self.getId(post['CONTENTURL']) content = self.getLink(imgurID) if not os.path.exists(directory): os.makedirs(directory) if content['type'] == 'image': try: post['MEDIAURL'] = content['object'].mp4 except AttributeError: post['MEDIAURL'] = content['object'].link post['EXTENSION'] = getExtension(post['MEDIAURL']) filename = GLOBAL.config['filename'].format( **post) + post["EXTENSION"] shortFilename = post['POSTID'] + post['EXTENSION'] getFile(filename, shortFilename, directory, post['MEDIAURL']) elif content['type'] == 'album': images = content['object'].images imagesLenght = len(images) howManyDownloaded = imagesLenght duplicates = 0 filename = GLOBAL.config['filename'].format(**post) print(filename) folderDir = directory / filename try: if not os.path.exists(folderDir): os.makedirs(folderDir) except FileNotFoundError: folderDir = directory / post['POSTID'] os.makedirs(folderDir) for i in range(imagesLenght): try: imageURL = images[i]['mp4'] except KeyError: imageURL = images[i]['link'] images[i]['Ext'] = getExtension(imageURL) filename = (str(i + 1) + "_" + nameCorrector(str(images[i]['title'])) + "_" + images[i]['id']) shortFilename = (str(i + 1) + "_" + images[i]['id']) print("\n ({}/{})".format(i + 1, imagesLenght)) try: getFile(filename, shortFilename, folderDir, imageURL, indent=2) print() except FileAlreadyExistsError: print(" The file already exists" + " " * 10, end="\n\n") duplicates += 1 howManyDownloaded -= 1 except Exception as exception: print("\n Could not get the file") print(" " + "{class_name}: {info}".format( class_name=exception.__class__.__name__, info=str(exception)) + "\n") howManyDownloaded -= 1 if duplicates == imagesLenght: raise FileAlreadyExistsError elif howManyDownloaded + duplicates < imagesLenght: raise AlbumNotDownloadedCompletely( "Album Not Downloaded Completely")
def __init__(self, directory, post): try: IMAGES = self.getLinks(post['postURL']) except urllib.error.HTTPError: raise NotADownloadableLinkError("Not a downloadable link") imagesLenght = len(IMAGES) howManyDownloaded = imagesLenght duplicates = 0 if imagesLenght == 1: extension = getExtension(IMAGES[0]) """Filenames are declared here""" title = nameCorrector(post['postTitle']) print(post["postSubmitter"] + "_" + title + "_" + post['postId'] + extension) fileDir = directory / (post["postSubmitter"] + "_" + title + "_" + post['postId'] + extension) tempDir = directory / (post["postSubmitter"] + "_" + title + "_" + post['postId'] + ".tmp") imageURL = IMAGES[0] if 'https://' not in imageURL and 'http://' not in imageURL: imageURL = "https://" + imageURL try: getFile(fileDir, tempDir, imageURL) except FileNameTooLong: fileDir = directory / (post['postId'] + extension) tempDir = directory / (post['postId'] + '.tmp') getFile(fileDir, tempDir, imageURL) else: title = nameCorrector(post['postTitle']) print(post["postSubmitter"] + "_" + title + "_" + post['postId'], end="\n\n") folderDir = directory / (post["postSubmitter"] + "_" + title + "_" + post['postId']) try: if not os.path.exists(folderDir): os.makedirs(folderDir) except FileNotFoundError: folderDir = directory / post['postId'] os.makedirs(folderDir) for i in range(imagesLenght): extension = getExtension(IMAGES[i]) fileName = str(i + 1) imageURL = IMAGES[i] if 'https://' not in imageURL and 'http://' not in imageURL: imageURL = "https://" + imageURL fileDir = folderDir / (fileName + extension) tempDir = folderDir / (fileName + ".tmp") print(" ({}/{})".format(i + 1, imagesLenght)) print(" {}".format(fileName + extension)) try: getFile(fileDir, tempDir, imageURL, indent=2) print() except FileAlreadyExistsError: print(" The file already exists" + " " * 10, end="\n\n") duplicates += 1 howManyDownloaded -= 1 except Exception as exception: # raise exception print("\n Could not get the file") print(" " + "{class_name}: {info}".format( class_name=exception.__class__.__name__, info=str(exception)) + "\n") howManyDownloaded -= 1 if duplicates == imagesLenght: raise FileAlreadyExistsError elif howManyDownloaded + duplicates < imagesLenght: raise AlbumNotDownloadedCompletely( "Album Not Downloaded Completely")
def __init__(self,directory,post): self.imgurClient = self.initImgur() imgurID = self.getId(post['postURL']) content = self.getLink(imgurID) if not os.path.exists(directory): os.makedirs(directory) if content['type'] == 'image': try: post['mediaURL'] = content['object'].mp4 except AttributeError: post['mediaURL'] = content['object'].link post['postExt'] = getExtension(post['mediaURL']) title = nameCorrector(post['postTitle']) """Filenames are declared here""" print(post["postSubmitter"]+"_"+title+"_"+post['postId']+post['postExt']) fileDir = directory / ( post["postSubmitter"] + "_" + title + "_" + post['postId'] + post['postExt'] ) tempDir = directory / ( post["postSubmitter"] + "_" + title + "_" + post['postId'] + ".tmp" ) try: getFile(fileDir,tempDir,post['mediaURL']) except FileNameTooLong: fileDir = directory / post['postId'] + post['postExt'] tempDir = directory / post['postId'] + '.tmp' getFile(fileDir,tempDir,post['mediaURL']) elif content['type'] == 'album': images = content['object'].images imagesLenght = len(images) howManyDownloaded = imagesLenght duplicates = 0 title = nameCorrector(post['postTitle']) print(post["postSubmitter"]+"_"+title+"_"+post['postId'],end="\n\n") folderDir = directory / ( post["postSubmitter"] + "_" + title + "_" + post['postId'] ) try: if not os.path.exists(folderDir): os.makedirs(folderDir) except FileNotFoundError: folderDir = directory / post['postId'] os.makedirs(folderDir) for i in range(imagesLenght): try: imageURL = images[i]['mp4'] except KeyError: imageURL = images[i]['link'] images[i]['Ext'] = getExtension(imageURL) fileName = (str(i+1) + "_" + nameCorrector(str(images[i]['title'])) + "_" + images[i]['id']) """Filenames are declared here""" fileDir = folderDir / (fileName + images[i]['Ext']) tempDir = folderDir / (fileName + ".tmp") print(" ({}/{})".format(i+1,imagesLenght)) print(" {}".format(fileName+images[i]['Ext'])) try: getFile(fileDir,tempDir,imageURL,indent=2) print() except FileAlreadyExistsError: print(" The file already exists" + " "*10,end="\n\n") duplicates += 1 howManyDownloaded -= 1 # IF FILE NAME IS TOO LONG, IT WONT REGISTER except FileNameTooLong: fileName = (str(i+1) + "_" + images[i]['id']) fileDir = folderDir / (fileName + images[i]['Ext']) tempDir = folderDir / (fileName + ".tmp") try: getFile(fileDir,tempDir,imageURL,indent=2) # IF STILL TOO LONG except FileNameTooLong: fileName = str(i+1) fileDir = folderDir / (fileName + images[i]['Ext']) tempDir = folderDir / (fileName + ".tmp") getFile(fileDir,tempDir,imageURL,indent=2) except Exception as exception: print("\n Could not get the file") print( " " + "{class_name}: {info}".format( class_name=exception.__class__.__name__, info=str(exception) ) + "\n" ) howManyDownloaded -= 1 if duplicates == imagesLenght: raise FileAlreadyExistsError elif howManyDownloaded + duplicates < imagesLenght: raise AlbumNotDownloadedCompletely( "Album Not Downloaded Completely" )