def setUp(self): # create temp output directory, return absolute path self.__downloads_temp_folder__ = tempfile.mkdtemp() print("created ", self.__downloads_temp_folder__, " to store downloads") # create a downloader with default params number_threads = 12 ratelimit_downloads = 60 ratelimit_interval = 1 verbose = False store_into_tar = False progressbar = False self.__downloader__ = Downloader(self.__downloads_temp_folder__, number_threads, ratelimit_downloads, ratelimit_interval, verbose, store_into_tar, progressbar) # test image urls file self.__test_urls_file = "tests/test_image_urls_800.txt" # read list of urls with open(self.__test_urls_file) as f: self.__test_urls_list = [ line.strip().rstrip('\n') for line in f.readlines() if line.strip().rstrip('\n') ] # create list of image context path from urls self.__test_urls_image_context_path_list = list() for url in self.__test_urls_list: # extract the image context path and store in list self.__test_urls_image_context_path_list.append( urlparse(url).path[1:])
def __download() -> None: """ execute when trigger dl button :return: """ value = clicked.get() if value != '0' or value != 'off': extension = 'mp3' if value == 'audio' else 'mp4' dl = Downloader(extension=extension, resolution=clicked.get(), path=filedialog.askdirectory(), video=video) dl.download() else: messagebox.showinfo('No resolution selected', 'Please select resolution')
def main(args=None): args = parse_parameters() if os.path.isabs(args.image_list_file): image_list_file=args.image_list_file else: image_list_file = os.path.join(os.getcwd(),args.image_list_file) if os.path.isabs(args.download_folder): download_folder = args.download_folder else: download_folder = os.path.join(os.getcwd(),args.download_folder) if not os.path.isfile(image_list_file): print("The given path "+image_list_file+" is not a file. Abort.") sys.exit(1) if (args.command == "download"): downloader=Downloader(download_folder,args.threads,args.ratelimit_downloads,args.ratelimit_interval,args.verbose,args.tarfile,args.progressbar) downloader.download_list(image_list_file) elif (args.command == "status"): downloader=Downloader(download_folder,args.threads,1,1,args.verbose,args.tarfile,args.progressbar) downloader.check_status(image_list_file) else: print("Command not known: "+args.command)
def main(): url = '' output_file = 'dummy' nthread = 20 part = 10000000 try: opts, args = getopt.getopt( sys.argv[1:], "hu:o:n:p:", ["url=", "outputfile=", "nthread=", "part="]) except getopt.GetoptError: sys.exit(2) for opt, arg in opts: if opt == '-h': usage() sys.exit(0) elif opt in ('-u', '--url'): url = arg elif opt in ('-o', '--outputfile'): output_file = arg elif opt in ('-n', '--nthread'): try: nthread = int(arg) except ValueError: usage() sys.exit(2) elif opt in ('-p', '--part'): try: part = int(arg) except ValueError: usage() sys.exit(2) if url == '': usage() sys.exit(2) #print "url = %s" % url #print "output file = %s" % output_file #print 'remove below line' #url = 'http://download.fedoraproject.org/pub/fedora/linux/releases/21/Workstation/x86_64/iso/Fedora-Live-Workstation-x86_64-21-5.iso' d = Downloader(url, output_file, nthread, part)
class DownloadTestFileTree(unittest.TestCase): def setUp(self): # create temp output directory, return absolute path self.__downloads_temp_folder__ = tempfile.mkdtemp() print("created ", self.__downloads_temp_folder__, " to store downloads") # create a downloader with default params number_threads = 12 ratelimit_downloads = 60 ratelimit_interval = 1 verbose = False store_into_tar = False progressbar = False self.__downloader__ = Downloader(self.__downloads_temp_folder__, number_threads, ratelimit_downloads, ratelimit_interval, verbose, store_into_tar, progressbar) # test image urls file self.__test_urls_file = "tests/test_image_urls_800.txt" # read list of urls with open(self.__test_urls_file) as f: self.__test_urls_list = [ line.strip().rstrip('\n') for line in f.readlines() if line.strip().rstrip('\n') ] # create list of image context path from urls self.__test_urls_image_context_path_list = list() for url in self.__test_urls_list: # extract the image context path and store in list self.__test_urls_image_context_path_list.append( urlparse(url).path[1:]) def tearDown(self): # delete temp output directory print("cleanup ", self.__downloads_temp_folder__) shutil.rmtree(self.__downloads_temp_folder__) def test_simple_download_list_into_filetree(self): # let the downloader read the urls file and download the files self.__downloader__.download_list(self.__test_urls_file) files_missing = 0 files_corrupt = 0 files_ok = 0 number_urls = len(self.__test_urls_image_context_path_list) # now we assert that each file has been downloaded and properly named for file in self.__test_urls_image_context_path_list: expected_outfile = os.path.join(self.__downloads_temp_folder__, file) if not os.path.isfile(expected_outfile): # assert the file exists print("Expected that file " + expected_outfile + " exists, but it is missing.") files_missing += 1 continue if not (imghdr.what(expected_outfile) == "jpeg" or imghdr.what(expected_outfile) == "jpg" or imghdr.what(expected_outfile) == "png" or imghdr.what(expected_outfile) == "gif"): # and it is a real image file print("Expected that file " + expected_outfile + " is a image, but it is of type " + str(imghdr.what(expected_outfile))) files_corrupt += 1 continue files_ok += 1 assert files_missing == 0 and files_corrupt == 0 and files_ok == number_urls, "Expected that " + str( number_urls) + " has been downloaded. " + str( files_missing) + " files are missing, " + str( files_corrupt) + " images are corrupt. Only " + str( files_ok) + " has been succesfully downloaded."
results = [] count = 0 for v in data_list: result = ResultModel() result.nid = v.nid result.title = v.title result.field_data_field_body = v.field_body_value result.link = v.link result.field_revision_field_resource_description_g = v.field_resource_description_g_value result.field_data_field_resource_url_g = v.field_resource_url_g_url result.taxonomy_term_data = v.name try: res = Downloader.download(v.field_resource_url_g_url) result.status = res.status_code if res.status_code == 200: try: result.type = Downloader.getFileNameNExtension(res) except NotFileException, e: print e try: result.type = Downloader.getFileExtension(res) except NoExceptionFound, e: print e result.type = Downloader.guessExtension(res) # save file url = v.field_resource_url_g_url file_name = "file/" + str(v.nid) + "_" + v.title + "" + url[len(url)-4:] with open(file_name, "wb") as code:
# -*- coding: utf-8 -*- import requests from downloader.Downloader import Downloader __author__ = 'johnnytsai' url = "http://ws.ndc.gov.tw/001/administrator/10/relfile/0/1000/歷年數位機會(落差)調查彙整資料(csv檔案).csv" response = requests.get(url, timeout=5) print response.status_code print response.headers print url[len(url)-4:] Downloader.getFileName(response)
#print model.title values.append(model) size = 0 for v in values: if v.status == u'-1': size+=1 result = [] count = 0 for v in values: if v.status == u'-1': print v.title try: res = Downloader.download(v.field_data_field_resource_url_g) v.status = res.status_code if res.status_code == 200: try: v.type = Downloader.getFileNameNExtension(res) except NotFileException, e: print e try: v.type = Downloader.getFileExtension(res) except NoExceptionFound, e: print e v.type = Downloader.guessExtension(res) except HTTPError, e: v.status = -1 v.message = str(e.message).decode('utf-8') except RequestException, e: