def __init__(self, directory, post): extension = ".mp4" if not os.path.exists(directory): os.makedirs(directory) filename = GLOBAL.config['filename'].format(**post) + extension shortFilename = post['POSTID'] + extension try: FNULL = open(os.devnull, 'w') subprocess.call("ffmpeg", stdout=FNULL, stderr=subprocess.STDOUT) except BaseException: getFile(filename, shortFilename, directory, post['CONTENTURL']) print("FFMPEG library not found, skipping merging video and audio") else: videoName = post['POSTID'] + "_video" videoURL = post['CONTENTURL'] audioName = post['POSTID'] + "_audio" audioURL = videoURL[:videoURL.rfind('/')] + '/DASH_audio.mp4' print(directory, filename, sep="\n") getFile(videoName, videoName, directory, videoURL, silent=True) getFile(audioName, audioName, directory, audioURL, silent=True) try: self._mergeAudio(videoName, audioName, filename, shortFilename, directory) except KeyboardInterrupt: os.remove(directory / filename) os.remove(directory / audioName) os.rename(directory / videoName, directory / filename)
def __init__(self, directory, post): if not os.path.exists(directory): os.makedirs(directory) filename = GLOBAL.config['filename'].format(**post) print(filename) self.download(filename, directory, post['CONTENTURL'])
def __init__(self, directory, POST): try: POST['mediaURL'] = self.getLink(POST['postURL']) except IndexError: raise NotADownloadableLinkError("Could not read the page source") POST['postExt'] = getExtension(POST['mediaURL']) if not os.path.exists(directory): os.makedirs(directory) title = nameCorrector(POST['postTitle']) """Filenames are declared here""" print(POST["postSubmitter"] + "_" + title + "_" + POST['postId'] + POST['postExt']) fileDir = directory / (POST["postSubmitter"] + "_" + title + "_" + POST['postId'] + POST['postExt']) tempDir = directory / (POST["postSubmitter"] + "_" + title + "_" + POST['postId'] + ".tmp") try: getFile(fileDir, tempDir, POST['mediaURL']) except FileNameTooLong: fileDir = directory / (POST['postId'] + POST['postExt']) tempDir = directory / (POST['postId'] + ".tmp") getFile(fileDir, tempDir, POST['mediaURL'])
def set_method(self, method_str): """ Selects the method used by the Initialize function :param method_str: :return: """ if method_str.lower() == "cached_euclidean": self.method = dist_from_cache print("Cached euclidean distance method selected for evaluation") else: raise Exception("Incorrect method selected for evaluation")
def writeToFile(directory, post): """Self posts are formatted here""" content = ("## [" + post["TITLE"] + "](" + post["CONTENTURL"] + ")\n" + post["CONTENT"] + "\n\n---\n\n" + "submitted to [r/" + post["SUBREDDIT"] + "](https://www.reddit.com/r/" + post["SUBREDDIT"] + ") by [u/" + post["REDDITOR"] + "](https://www.reddit.com/user/" + post["REDDITOR"] + ")") with io.open(directory, "w", encoding="utf-8") as FILE: VanillaPrint(content, file=FILE) print("Downloaded")
def __init__(self, directory, POST): i = 0 for key in POST['CONTENTURL']: i = i + 1 extension = getExtension(key) if not os.path.exists(directory): os.makedirs(directory) filename = GLOBAL.config['filename'].format( **POST) + ' - ' + str(i) + extension print(filename) shortFilename = POST['POSTID'] + ' - ' + str(i) + extension getFile(filename, shortFilename, directory, key)
def set_method(self, method_str): """ Selects the method used by the select function :return: """ if method_str.lower() == "num_iterations": self.method = self.num_iterations print("Fixed number of iterations method selected for termination") elif method_str.lower() == "time_limit": self.method = self.time_limit print("Fixed time limit method selected for termination") else: raise Exception("Incorrect method selected for termination")
def set_method(self, method_str): """ Selects the method used by the mutate function """ if method_str.lower() == "swap": self.method = self.swap print("Swap method selected for mutation") elif method_str.lower() == "flip": self.method = self.flip elif method_str.lower() == "scramble": self.method = self.scramble print("Scramble method selected for mutation") else: raise Exception("Incorrect method selected for mutation")
def writeToFile(directory, post): """Self posts are formatted here""" content = ("## [" + post["postTitle"] + "](" + post["postURL"] + ")\n" + post["postContent"] + "\n\n---\n\n" + "submitted to [r/" + post["postSubreddit"] + "](https://www.reddit.com/r/" + post["postSubreddit"] + ") by [u/" + post["postSubmitter"] + "](https://www.reddit.com/user/" + post["postSubmitter"] + ")") with io.open(directory, "w", encoding="utf-8") as FILE: VanillaPrint(content, file=FILE) print("Downloaded")
def set_method(self, method_str): """ Selects the method used by the Initialize function :param method_str: :return: """ if method_str.lower() == "random_permutations": self.method = self.random_permutations print("Random permutations method selected for initialization") elif method_str.lower() == "demo_random": self.method = self.DEMO_random elif method_str.lower() == "greedy_neighbour": self.method = self.greedy_neighbour print("Greedy Neighbour method selected for initialization") else: raise Exception("Incorrect method selected for initialization")
def __init__(self, directory, post): if not os.path.exists(directory): os.makedirs(directory) filename = GLOBAL.config['filename'].format(**post) fileDir = directory / (filename + ".md") print(fileDir) print(filename + ".md") if Path.is_file(fileDir): raise FileAlreadyExistsError try: self.writeToFile(fileDir, post) except FileNotFoundError: fileDir = post['POSTID'] + ".md" fileDir = directory / fileDir self.writeToFile(fileDir, post)
def __init__(self, directory, post): if not os.path.exists(directory): os.makedirs(directory) title = nameCorrector(post['postTitle']) """Filenames are declared here""" print(post["postSubmitter"] + "_" + title + "_" + post['postId'] + ".md") fileDir = directory / (post["postSubmitter"] + "_" + title + "_" + post['postId'] + ".md") if Path.is_file(fileDir): raise FileAlreadyExistsError try: self.writeToFile(fileDir, post) except FileNotFoundError: fileDir = post['postId'] + ".md" fileDir = directory / fileDir self.writeToFile(fileDir, post)
def __init__(self,directory,POST): POST['postExt'] = getExtension(POST['postURL']) if not os.path.exists(directory): os.makedirs(directory) title = nameCorrector(POST['postTitle']) """Filenames are declared here""" print(POST["postSubmitter"]+"_"+title+"_"+POST['postId']+POST['postExt']) fileDir = directory / ( POST["postSubmitter"]+"_"+title+"_"+POST['postId']+POST['postExt'] ) tempDir = directory / ( POST["postSubmitter"]+"_"+title+"_"+POST['postId']+".tmp" ) try: getFile(fileDir,tempDir,POST['postURL']) except FileNameTooLong: fileDir = directory / (POST['postId']+POST['postExt']) tempDir = directory / (POST['postId']+".tmp") getFile(fileDir,tempDir,POST['postURL'])
def set_method(self, method_str): """ Selects the method used by the Initialize function :param method_str: :return: """ if method_str.lower() == "cut_and_crossfill": self.method = cut_and_crossfill print("Cut and crossfill method selected for recombination") elif method_str.lower() == "order_crossover": self.method = order_crossover print("Order crossover method selected for recombination") elif method_str.lower() == "scx": self.method = scx print("Sequential constructive crossover operator selected for recombination") elif method_str.lower() == "pmx": self.method = pmx print("PMX method selected for recombination") else: raise Exception("Incorrect method selected for recombination")
def set_method(self, method_str): """ Selects the method used by the select function :return: """ if method_str.lower() == "random": self.method = self.random print("Random method selected for parent selection") elif method_str.lower() == "mu_plus_lambda": self.method = self.mu_plus_lambda print("mu+lambda method selected for parent selection") elif method_str.lower() == "roulette_wheel": self.method = self.roulette_wheel print("Roulette Wheel method selected for parent selection") else: raise Exception("Incorrect method selected for parent selection")
def set_method(self, method_str): """ Selects the method used by the select function :return: """ if method_str.lower() == "random": self.method = self.random print("Random method selected for survivor selection") elif method_str.lower() == "mu_plus_lambda": self.method = self.mu_plus_lambda print("mu+lambda method selected for survivor selection") elif method_str.lower() == "mu_comma_lambda": self.method = self.mu_comma_lambda print("mu,lambda method selected for survivor selection") else: raise Exception("Incorrect method selected for survivor selection")
def __init__(self, directory, post): extension = ".mp4" if not os.path.exists(directory): os.makedirs(directory) filename = GLOBAL.config['filename'].format(**post) + extension shortFilename = post['POSTID'] + extension try: FNULL = open(os.devnull, 'w') subprocess.call("ffmpeg", stdout=FNULL, stderr=subprocess.STDOUT) except: getFile(filename, shortFilename, directory, post['CONTENTURL']) print("FFMPEG library not found, skipping merging video and audio") else: videoName = post['POSTID'] + "_video" videoURL = post['CONTENTURL'] audioName = post['POSTID'] + "_audio" audioURL = [ videoURL[:videoURL.rfind('/')] + '/DASH_audio.mp4', videoURL[:videoURL.rfind('/')] + '/audio?source=fallback.mp4' ] print(directory, filename, sep="\n") getFile(videoName, videoName, directory, videoURL, silent=True) for x in audioURL: try: getFile(audioName, audioName, directory, x, silent=True) except Exception as e: pass else: break else: # Could not find a valid audio file, do not try to merge. print("No audio for this video.") os.rename(directory / videoName, directory / filename) return try: self._mergeAudio(videoName, audioName, filename, shortFilename, directory) except KeyboardInterrupt: os.remove(directory / filename) os.remove(directory / audioName) os.rename(directory / videoName, directory / filename)
def downloadAlbum(self, images, count): folderName = GLOBAL.config['filename'].format(**self.post) folderDir = self.directory / folderName howManyDownloaded = 0 duplicates = 0 try: if not os.path.exists(folderDir): os.makedirs(folderDir) except FileNotFoundError: folderDir = self.directory / self.post['POSTID'] os.makedirs(folderDir) print(folderName) for i in range(count): path = urllib.parse.urlparse(images[i]['url']).path extension = os.path.splitext(path)[1] filename = "_".join([ str(i + 1), images[i]['id'] ]) + extension shortFilename = str(i + 1) + "_" + images[i]['id'] print("\n ({}/{})".format(i + 1, count)) try: getFile(filename, shortFilename, folderDir, images[i]['url'], indent=2) howManyDownloaded += 1 print() except FileAlreadyExistsError: print(" The file already exists" + " " * 10, end="\n\n") duplicates += 1 except TypeInSkip: print(" Skipping...") howManyDownloaded += 1 except Exception as exception: print("\n Could not get the file") print( " " + "{class_name}: {info}\nSee CONSOLE_LOG.txt for more information".format( class_name=exception.__class__.__name__, info=str(exception)) + "\n") print(GLOBAL.log_stream.getvalue(), noPrint=True) if duplicates == count: raise FileAlreadyExistsError if howManyDownloaded + duplicates < count: raise AlbumNotDownloadedCompletely( "Album Not Downloaded Completely" )
def __init__(self, directory, post): try: IMAGES = self.getLinks(post['postURL']) except urllib.error.HTTPError: raise NotADownloadableLinkError("Not a downloadable link") imagesLenght = len(IMAGES) howManyDownloaded = imagesLenght duplicates = 0 if imagesLenght == 1: extension = getExtension(IMAGES[0]) """Filenames are declared here""" title = nameCorrector(post['postTitle']) print(post["postSubmitter"] + "_" + title + "_" + post['postId'] + extension) fileDir = directory / (post["postSubmitter"] + "_" + title + "_" + post['postId'] + extension) tempDir = directory / (post["postSubmitter"] + "_" + title + "_" + post['postId'] + ".tmp") imageURL = IMAGES[0] if 'https://' not in imageURL and 'http://' not in imageURL: imageURL = "https://" + imageURL try: getFile(fileDir, tempDir, imageURL) except FileNameTooLong: fileDir = directory / (post['postId'] + extension) tempDir = directory / (post['postId'] + '.tmp') getFile(fileDir, tempDir, imageURL) else: title = nameCorrector(post['postTitle']) print(post["postSubmitter"] + "_" + title + "_" + post['postId'], end="\n\n") folderDir = directory / (post["postSubmitter"] + "_" + title + "_" + post['postId']) try: if not os.path.exists(folderDir): os.makedirs(folderDir) except FileNotFoundError: folderDir = directory / post['postId'] os.makedirs(folderDir) for i in range(imagesLenght): extension = getExtension(IMAGES[i]) fileName = str(i + 1) imageURL = IMAGES[i] if 'https://' not in imageURL and 'http://' not in imageURL: imageURL = "https://" + imageURL fileDir = folderDir / (fileName + extension) tempDir = folderDir / (fileName + ".tmp") print(" ({}/{})".format(i + 1, imagesLenght)) print(" {}".format(fileName + extension)) try: getFile(fileDir, tempDir, imageURL, indent=2) print() except FileAlreadyExistsError: print(" The file already exists" + " " * 10, end="\n\n") duplicates += 1 howManyDownloaded -= 1 except Exception as exception: # raise exception print("\n Could not get the file") print(" " + "{class_name}: {info}".format( class_name=exception.__class__.__name__, info=str(exception)) + "\n") howManyDownloaded -= 1 if duplicates == imagesLenght: raise FileAlreadyExistsError elif howManyDownloaded + duplicates < imagesLenght: raise AlbumNotDownloadedCompletely( "Album Not Downloaded Completely")
def the_tsp_problem(): # Initialize modules start_timer("setup") x_large_data = "../data/TSP_Italy_16862.txt" big_data = "../data/TSP_Canada_4663.txt" middle_data = "../data/TSP_Uruguay_734.txt" small_data = "../data/TSP_WesternSahara_29.txt" actual_data = parse(middle_data) # Create Instance tsp = TSP(graph=actual_data, population_size=POP_SIZE, mating_pool_size=MATING_POOL_SIZE, mutation_rate=MUTATION_RATE, num_generations=NUM_GENERATIONS) # Initialize modules initializer = Initialization(tsp, INIT_METHOD) parent_selector = Parent_Selection(tsp, SELECT_METHOD) mutator = Mutation(tsp, MUTATION_METHOD) evaluator = Evaluation(tsp, EVALUATION_METHOD) recombinator = Recombination(tsp, CROSSOVER_METHOD, evaluator) survivor_selector = Survivor_Selection(tsp, SURVIVOR_METHOD) terminator = Termination(NUM_GENERATIONS, TIME_LIMIT, TERMINATOR_METHOD) # Initialize Population and fitness initializer.initialize() evaluator.evaluate() end_timer("setup") print("*" * 20) print("Initial Mean Fitness: {}\t Best Fitness:{}".format( tsp.fitness.mean(), tsp.fitness.max())) # print("Best initial member of Population:\n", tsp.population[np.argmax(tsp.fitness)]) print("*" * 20) current_time = 0 fitness = [] generation = [] while terminator.method(tsp.current_generation, current_time): # select parents and spawn children parent_selector.select() recombinator.recombine() # mutate population and children mutator.mutate_population() mutator.mutate_children() # re-evaluate children and population evaluator.evaluate(use_mask=True) evaluator.evaluate_children() # select from parents and children to form new population survivor_selector.select() # add history and print debugs every 10% tsp.add_history("mean_fitness", tsp.fitness.mean()) tsp.add_history("best_fitness", tsp.fitness.max()) std = tsp.fitness.std() tsp.add_history("std_dev", std) tsp.current_generation += 1 if not (tsp.current_generation % (tsp.num_generations // 10)): # print("Mutation Rate:",tsp.mutation_rate) print( "Generation {:<4} Mean Fitness: {:5.2f}\t Best Fitness:{:5.2f}\t STD DEV: {:.2f}" .format(tsp.current_generation, tsp.fitness.mean(), tsp.fitness.max(), std)) tsp.add_history("best_individual", tsp.population[np.argmax(tsp.fitness.max())]) fitness.append(tsp.fitness.max()) generation.append(tsp.current_generation) # If animation is set to true if ANIMATE: animator = Animation(actual_data, tsp.history["best_individual"], fitness, generation) animator.start() # finished, print results print("*" * 20) # print("Best Member of Population:\n", tsp.population[np.argmax(tsp.fitness)]) print("Final Mean Fitness: {}\t Best Fitness:{}".format( tsp.fitness.mean(), tsp.fitness.max())) print("*" * 10 + "\nFunction Times (in ms):\n") time_sum = 0 for k, v in get_times(): print("{:16}\t{:.2f}".format(k, v * 1000)) time_sum += v print("-" * 20) print("Total Time:\t{:.2f} seconds".format(time_sum)) # plot history tsp.plot_history("mean_fitness") tsp.plot_history("best_fitness") tsp.plot_history("std_dev")
def _hook(d): if d['status'] == 'finished': return print("Downloaded") downloadedMbs = int(d['downloaded_bytes'] * (10**(-6))) fileSize = int(d['total_bytes'] * (10**(-6))) sys.stdout.write("{}Mb/{}Mb\r".format(downloadedMbs, fileSize)) sys.stdout.flush()
def __init__(self, directory, post): self.imgurClient = self.initImgur() imgurID = self.getId(post['CONTENTURL']) content = self.getLink(imgurID) if not os.path.exists(directory): os.makedirs(directory) if content['type'] == 'image': try: post['MEDIAURL'] = content['object'].mp4 except AttributeError: post['MEDIAURL'] = content['object'].link post['EXTENSION'] = getExtension(post['MEDIAURL']) filename = GLOBAL.config['filename'].format( **post) + post["EXTENSION"] shortFilename = post['POSTID'] + post['EXTENSION'] getFile(filename, shortFilename, directory, post['MEDIAURL']) elif content['type'] == 'album': images = content['object'].images imagesLenght = len(images) howManyDownloaded = imagesLenght duplicates = 0 filename = GLOBAL.config['filename'].format(**post) print(filename) folderDir = directory / filename try: if not os.path.exists(folderDir): os.makedirs(folderDir) except FileNotFoundError: folderDir = directory / post['POSTID'] os.makedirs(folderDir) for i in range(imagesLenght): try: imageURL = images[i]['mp4'] except KeyError: imageURL = images[i]['link'] images[i]['Ext'] = getExtension(imageURL) filename = (str(i + 1) + "_" + nameCorrector(str(images[i]['title'])) + "_" + images[i]['id']) shortFilename = (str(i + 1) + "_" + images[i]['id']) print("\n ({}/{})".format(i + 1, imagesLenght)) try: getFile(filename, shortFilename, folderDir, imageURL, indent=2) print() except FileAlreadyExistsError: print(" The file already exists" + " " * 10, end="\n\n") duplicates += 1 howManyDownloaded -= 1 except Exception as exception: print("\n Could not get the file") print(" " + "{class_name}: {info}".format( class_name=exception.__class__.__name__, info=str(exception)) + "\n") howManyDownloaded -= 1 if duplicates == imagesLenght: raise FileAlreadyExistsError elif howManyDownloaded + duplicates < imagesLenght: raise AlbumNotDownloadedCompletely( "Album Not Downloaded Completely")
def getFile(filename, shortFilename, folderDir, imageURL, indent=0, silent=False): FORMATS = { "videos": [".mp4", ".webm"], "images": [".jpg", ".jpeg", ".png", ".bmp"], "gifs": [".gif"], "self": [] } for type in GLOBAL.arguments.skip: for extension in FORMATS[type]: if extension in filename: raise TypeInSkip if any(domain in imageURL for domain in GLOBAL.arguments.skip_domain): raise DomainInSkip # if imageURL in GLOBAL.downloadedPosts(): # raise AlreadyDownloaded headerMozilla = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) ' \ 'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 ' \ 'Safari/537.36 OPR/54.0.2952.64', 'Accept': 'text/html,application/xhtml+xml,application/xml;' \ 'q=0.9,image/webp,image/apng,*/*;q=0.8', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3', 'Accept-Encoding': 'none', 'Accept-Language': 'en-US,en;q=0.8', 'Connection': 'keep-alive' } if not os.path.exists(folderDir): os.makedirs(folderDir) header_q = False # opener = urllib.request.build_opener() if not "imgur" in imageURL: # opener.addheaders = headerMozilla header_q = True # urllib.request.install_opener(opener) if not silent: print(" " * indent + str(folderDir), " " * indent + str(filename), sep="\n") # Number of attempts for i in range(3): fileDir = Path(folderDir) / filename tempDir = Path(folderDir) / (filename + ".tmp") if not (os.path.isfile(fileDir)): try: print(imageURL) # urllib.request.urlretrieve(imageURL, # tempDir, # reporthook=dlProgress) if (header_q): r = requests.get(imageURL, headers=headerMozilla, stream=True) else: r = requests.get(imageURL, stream=True) with open(tempDir, 'wb') as out_file: total_length = int(r.headers.get('content-length')) for chunk in progress.bar( r.iter_content(chunk_size=2048), expected_size=(total_length / 2048) + 1): if chunk: out_file.write(chunk) out_file.flush() del r fileHash = createHash(tempDir) if GLOBAL.arguments.no_dupes: if fileHash in GLOBAL.downloadedPosts(): os.remove(tempDir) raise FileAlreadyExistsError GLOBAL.downloadedPosts.add(fileHash) # GLOBAL.downloadedPosts.add(imageURL) os.rename(tempDir, fileDir) if not silent: print(" " * indent + "Downloaded" + " " * 10) return None except ConnectionResetError as exception: # if not silent: print(" "*indent + str(exception)) if not silent: print(" " * indent + "Connection Reset Error\nTrying again\n") except ConnectionError as exception: if not silent: print(" " * indent + str(exception)) if not silent: print(" " * indent + "Trying again\n") except ChunkedEncodingError as exception: if not silent: print(" " * indent + str(exception)) if not silent: print(" " * indent + "Trying again\n") except FileNotFoundError: filename = shortFilename else: raise FileAlreadyExistsError raise FailedToDownload
def __init__(self, directory, post): try: IMAGES = self.getLinks(post['CONTENTURL']) except urllib.error.HTTPError: raise NotADownloadableLinkError("Not a downloadable link") imagesLenght = len(IMAGES) howManyDownloaded = imagesLenght duplicates = 0 if imagesLenght == 1: extension = getExtension(IMAGES[0]) """Filenames are declared here""" filename = GLOBAL.config['filename'].format( **post) + post["EXTENSION"] shortFilename = post['POSTID'] + extension imageURL = IMAGES[0] if 'https://' not in imageURL or 'http://' not in imageURL: imageURL = "https://" + imageURL getFile(filename, shortFilename, directory, imageURL) else: filename = GLOBAL.config['filename'].format(**post) print(filename) folderDir = directory / filename try: if not os.path.exists(folderDir): os.makedirs(folderDir) except FileNotFoundError: folderDir = directory / post['POSTID'] os.makedirs(folderDir) for i in range(imagesLenght): extension = getExtension(IMAGES[i]) filename = str(i + 1) + extension imageURL = IMAGES[i] if 'https://' not in imageURL and 'http://' not in imageURL: imageURL = "https://" + imageURL print(" ({}/{})".format(i + 1, imagesLenght)) print(" {}".format(filename)) try: getFile(filename, filename, folderDir, imageURL, indent=2) print() except FileAlreadyExistsError: print(" The file already exists" + " " * 10, end="\n\n") duplicates += 1 howManyDownloaded -= 1 except Exception as exception: # raise exception print("\n Could not get the file") print(" " + "{class_name}: {info}".format( class_name=exception.__class__.__name__, info=str(exception)) + "\n") howManyDownloaded -= 1 if duplicates == imagesLenght: raise FileAlreadyExistsError elif howManyDownloaded + duplicates < imagesLenght: raise AlbumNotDownloadedCompletely( "Album Not Downloaded Completely")
def downloadAlbum(self, images): folderName = GLOBAL.config['filename'].format(**self.post) folderDir = self.directory / folderName imagesLenght = images["count"] howManyDownloaded = 0 duplicates = 0 try: if not os.path.exists(folderDir): os.makedirs(folderDir) except FileNotFoundError: folderDir = self.directory / self.post['POSTID'] os.makedirs(folderDir) print(folderName) for i in range(imagesLenght): extension = self.validateExtension(images["images"][i]["ext"]) imageURL = self.IMGUR_IMAGE_DOMAIN + images["images"][i][ "hash"] + extension filename = "_".join([ str(i + 1), nameCorrector(images["images"][i]['title']), images["images"][i]['hash'] ]) + extension shortFilename = str(i + 1) + "_" + images["images"][i]['hash'] print("\n ({}/{})".format(i + 1, imagesLenght)) try: getFile(filename, shortFilename, folderDir, imageURL, indent=2) howManyDownloaded += 1 print() except FileAlreadyExistsError: print(" The file already exists" + " " * 10, end="\n\n") duplicates += 1 except TypeInSkip: print(" Skipping...") howManyDownloaded += 1 except Exception as exception: print("\n Could not get the file") print( " " + "{class_name}: {info}\nSee CONSOLE_LOG.txt for more information" .format(class_name=exception.__class__.__name__, info=str(exception)) + "\n") print(GLOBAL.log_stream.getvalue(), noPrint=True) if duplicates == imagesLenght: raise FileAlreadyExistsError elif howManyDownloaded + duplicates < imagesLenght: raise AlbumNotDownloadedCompletely( "Album Not Downloaded Completely")
def getFile(filename, shortFilename, folderDir, imageURL, indent=0, silent=False): FORMATS = { "videos": [".mp4", ".webm"], "images": [".jpg", ".jpeg", ".png", ".bmp"], "gifs": [".gif"] } for type in GLOBAL.arguments.skip: for extension in FORMATS[type]: if extension in filename: raise TypeInSkip if any(domain in imageURL for domain in GLOBAL.arguments.skip_domain): raise DomainInSkip headers = [ ("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " \ "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 "\ "Safari/537.36 OPR/54.0.2952.64"), ("Accept", "text/html,application/xhtml+xml,application/xml;" \ "q=0.9,image/webp,image/apng,*/*;q=0.8"), ("Accept-Charset", "ISO-8859-1,utf-8;q=0.7,*;q=0.3"), ("Accept-Encoding", "none"), ("Accept-Language", "en-US,en;q=0.8"), ("Connection", "keep-alive") ] if not os.path.exists(folderDir): os.makedirs(folderDir) opener = urllib.request.build_opener() if not "imgur" in imageURL: opener.addheaders = headers urllib.request.install_opener(opener) if not silent: print(" " * indent + str(folderDir), " " * indent + str(filename), sep="\n") for i in range(3): fileDir = Path(folderDir) / filename tempDir = Path(folderDir) / (filename + ".tmp") if not (os.path.isfile(fileDir)): try: urllib.request.urlretrieve(imageURL, tempDir, reporthook=dlProgress) fileHash = createHash(tempDir) if GLOBAL.arguments.no_dupes: if fileHash in GLOBAL.downloadedPosts(): os.remove(tempDir) raise FileAlreadyExistsError GLOBAL.downloadedPosts.add(fileHash) os.rename(tempDir, fileDir) if not silent: print(" " * indent + "Downloaded" + " " * 10) return None except ConnectionResetError as exception: if not silent: print(" " * indent + str(exception)) if not silent: print(" " * indent + "Trying again\n") except FileNotFoundError: filename = shortFilename else: raise FileAlreadyExistsError raise FailedToDownload
def __init__(self,directory,post): self.imgurClient = self.initImgur() imgurID = self.getId(post['postURL']) content = self.getLink(imgurID) if not os.path.exists(directory): os.makedirs(directory) if content['type'] == 'image': try: post['mediaURL'] = content['object'].mp4 except AttributeError: post['mediaURL'] = content['object'].link post['postExt'] = getExtension(post['mediaURL']) title = nameCorrector(post['postTitle']) """Filenames are declared here""" print(post["postSubmitter"]+"_"+title+"_"+post['postId']+post['postExt']) fileDir = directory / ( post["postSubmitter"] + "_" + title + "_" + post['postId'] + post['postExt'] ) tempDir = directory / ( post["postSubmitter"] + "_" + title + "_" + post['postId'] + ".tmp" ) try: getFile(fileDir,tempDir,post['mediaURL']) except FileNameTooLong: fileDir = directory / post['postId'] + post['postExt'] tempDir = directory / post['postId'] + '.tmp' getFile(fileDir,tempDir,post['mediaURL']) elif content['type'] == 'album': images = content['object'].images imagesLenght = len(images) howManyDownloaded = imagesLenght duplicates = 0 title = nameCorrector(post['postTitle']) print(post["postSubmitter"]+"_"+title+"_"+post['postId'],end="\n\n") folderDir = directory / ( post["postSubmitter"] + "_" + title + "_" + post['postId'] ) try: if not os.path.exists(folderDir): os.makedirs(folderDir) except FileNotFoundError: folderDir = directory / post['postId'] os.makedirs(folderDir) for i in range(imagesLenght): try: imageURL = images[i]['mp4'] except KeyError: imageURL = images[i]['link'] images[i]['Ext'] = getExtension(imageURL) fileName = (str(i+1) + "_" + nameCorrector(str(images[i]['title'])) + "_" + images[i]['id']) """Filenames are declared here""" fileDir = folderDir / (fileName + images[i]['Ext']) tempDir = folderDir / (fileName + ".tmp") print(" ({}/{})".format(i+1,imagesLenght)) print(" {}".format(fileName+images[i]['Ext'])) try: getFile(fileDir,tempDir,imageURL,indent=2) print() except FileAlreadyExistsError: print(" The file already exists" + " "*10,end="\n\n") duplicates += 1 howManyDownloaded -= 1 # IF FILE NAME IS TOO LONG, IT WONT REGISTER except FileNameTooLong: fileName = (str(i+1) + "_" + images[i]['id']) fileDir = folderDir / (fileName + images[i]['Ext']) tempDir = folderDir / (fileName + ".tmp") try: getFile(fileDir,tempDir,imageURL,indent=2) # IF STILL TOO LONG except FileNameTooLong: fileName = str(i+1) fileDir = folderDir / (fileName + images[i]['Ext']) tempDir = folderDir / (fileName + ".tmp") getFile(fileDir,tempDir,imageURL,indent=2) except Exception as exception: print("\n Could not get the file") print( " " + "{class_name}: {info}".format( class_name=exception.__class__.__name__, info=str(exception) ) + "\n" ) howManyDownloaded -= 1 if duplicates == imagesLenght: raise FileAlreadyExistsError elif howManyDownloaded + duplicates < imagesLenght: raise AlbumNotDownloadedCompletely( "Album Not Downloaded Completely" )