Beispiel #1
0
def main(args=None):
    args = parse_parameters()
    if os.path.isabs(args.image_list_file):
        image_list_file=args.image_list_file
    else:
        image_list_file = os.path.join(os.getcwd(),args.image_list_file)
    if os.path.isabs(args.download_folder):
        download_folder = args.download_folder
    else:
        download_folder = os.path.join(os.getcwd(),args.download_folder)
    if not os.path.isfile(image_list_file):
        print("The given path "+image_list_file+" is not a file. Abort.")
        sys.exit(1)
    if (args.command == "download"):
        downloader=Downloader(download_folder,args.threads,args.ratelimit_downloads,args.ratelimit_interval,args.verbose,args.tarfile,args.progressbar)
        downloader.download_list(image_list_file)
    elif (args.command == "status"):
        downloader=Downloader(download_folder,args.threads,1,1,args.verbose,args.tarfile,args.progressbar)
        downloader.check_status(image_list_file)
    else:
        print("Command not known: "+args.command)
class DownloadTestFileTree(unittest.TestCase):
    def setUp(self):
        # create temp output directory, return absolute path
        self.__downloads_temp_folder__ = tempfile.mkdtemp()
        print("created ", self.__downloads_temp_folder__,
              " to store downloads")
        # create a downloader with default params
        number_threads = 12
        ratelimit_downloads = 60
        ratelimit_interval = 1
        verbose = False
        store_into_tar = False
        progressbar = False
        self.__downloader__ = Downloader(self.__downloads_temp_folder__,
                                         number_threads, ratelimit_downloads,
                                         ratelimit_interval, verbose,
                                         store_into_tar, progressbar)
        # test image urls file
        self.__test_urls_file = "tests/test_image_urls_800.txt"
        # read list of urls
        with open(self.__test_urls_file) as f:
            self.__test_urls_list = [
                line.strip().rstrip('\n') for line in f.readlines()
                if line.strip().rstrip('\n')
            ]
        # create list of image context path from urls
        self.__test_urls_image_context_path_list = list()
        for url in self.__test_urls_list:
            # extract the image context path and store in list
            self.__test_urls_image_context_path_list.append(
                urlparse(url).path[1:])

    def tearDown(self):
        # delete temp output directory
        print("cleanup ", self.__downloads_temp_folder__)
        shutil.rmtree(self.__downloads_temp_folder__)

    def test_simple_download_list_into_filetree(self):
        # let the downloader read the urls file and download the files
        self.__downloader__.download_list(self.__test_urls_file)
        files_missing = 0
        files_corrupt = 0
        files_ok = 0
        number_urls = len(self.__test_urls_image_context_path_list)
        # now we assert that each file has been downloaded and properly named
        for file in self.__test_urls_image_context_path_list:
            expected_outfile = os.path.join(self.__downloads_temp_folder__,
                                            file)
            if not os.path.isfile(expected_outfile):
                # assert the file exists
                print("Expected that file " + expected_outfile +
                      " exists, but it is missing.")
                files_missing += 1
                continue
            if not (imghdr.what(expected_outfile) == "jpeg"
                    or imghdr.what(expected_outfile) == "jpg"
                    or imghdr.what(expected_outfile) == "png"
                    or imghdr.what(expected_outfile) == "gif"):
                # and it is a real image file
                print("Expected that file " + expected_outfile +
                      " is a image, but it is of type " +
                      str(imghdr.what(expected_outfile)))
                files_corrupt += 1
                continue
            files_ok += 1
        assert files_missing == 0 and files_corrupt == 0 and files_ok == number_urls, "Expected that " + str(
            number_urls) + " has been downloaded. " + str(
                files_missing) + " files are missing, " + str(
                    files_corrupt) + " images are corrupt. Only " + str(
                        files_ok) + " has been succesfully downloaded."