def setUp(self):
     # create temp output directory, return absolute path
     self.__downloads_temp_folder__ = tempfile.mkdtemp()
     print("created ", self.__downloads_temp_folder__,
           " to store downloads")
     # create a downloader with default params
     number_threads = 12
     ratelimit_downloads = 60
     ratelimit_interval = 1
     verbose = False
     store_into_tar = False
     progressbar = False
     self.__downloader__ = Downloader(self.__downloads_temp_folder__,
                                      number_threads, ratelimit_downloads,
                                      ratelimit_interval, verbose,
                                      store_into_tar, progressbar)
     # test image urls file
     self.__test_urls_file = "tests/test_image_urls_800.txt"
     # read list of urls
     with open(self.__test_urls_file) as f:
         self.__test_urls_list = [
             line.strip().rstrip('\n') for line in f.readlines()
             if line.strip().rstrip('\n')
         ]
     # create list of image context path from urls
     self.__test_urls_image_context_path_list = list()
     for url in self.__test_urls_list:
         # extract the image context path and store in list
         self.__test_urls_image_context_path_list.append(
             urlparse(url).path[1:])
Esempio n. 2
0
 def __download() -> None:
     """
     execute when trigger dl button
     :return:
     """
     value = clicked.get()
     if value != '0' or value != 'off':
         extension = 'mp3' if value == 'audio' else 'mp4'
         dl = Downloader(extension=extension,
                         resolution=clicked.get(),
                         path=filedialog.askdirectory(),
                         video=video)
         dl.download()
     else:
         messagebox.showinfo('No resolution selected',
                             'Please select resolution')
Esempio n. 3
0
def main(args=None):
    args = parse_parameters()
    if os.path.isabs(args.image_list_file):
        image_list_file=args.image_list_file
    else:
        image_list_file = os.path.join(os.getcwd(),args.image_list_file)
    if os.path.isabs(args.download_folder):
        download_folder = args.download_folder
    else:
        download_folder = os.path.join(os.getcwd(),args.download_folder)
    if not os.path.isfile(image_list_file):
        print("The given path "+image_list_file+" is not a file. Abort.")
        sys.exit(1)
    if (args.command == "download"):
        downloader=Downloader(download_folder,args.threads,args.ratelimit_downloads,args.ratelimit_interval,args.verbose,args.tarfile,args.progressbar)
        downloader.download_list(image_list_file)
    elif (args.command == "status"):
        downloader=Downloader(download_folder,args.threads,1,1,args.verbose,args.tarfile,args.progressbar)
        downloader.check_status(image_list_file)
    else:
        print("Command not known: "+args.command)
Esempio n. 4
0
def main():
    url = ''
    output_file = 'dummy'
    nthread = 20
    part = 10000000
    try:
        opts, args = getopt.getopt(
            sys.argv[1:], "hu:o:n:p:",
            ["url=", "outputfile=", "nthread=", "part="])
    except getopt.GetoptError:

        sys.exit(2)

    for opt, arg in opts:
        if opt == '-h':
            usage()
            sys.exit(0)
        elif opt in ('-u', '--url'):
            url = arg
        elif opt in ('-o', '--outputfile'):
            output_file = arg
        elif opt in ('-n', '--nthread'):
            try:
                nthread = int(arg)
            except ValueError:
                usage()
                sys.exit(2)
        elif opt in ('-p', '--part'):
            try:
                part = int(arg)
            except ValueError:
                usage()
                sys.exit(2)

    if url == '':
        usage()
        sys.exit(2)
    #print "url = %s" % url
    #print "output file = %s" % output_file
    #print 'remove below line'
    #url = 'http://download.fedoraproject.org/pub/fedora/linux/releases/21/Workstation/x86_64/iso/Fedora-Live-Workstation-x86_64-21-5.iso'
    d = Downloader(url, output_file, nthread, part)
class DownloadTestFileTree(unittest.TestCase):
    def setUp(self):
        # create temp output directory, return absolute path
        self.__downloads_temp_folder__ = tempfile.mkdtemp()
        print("created ", self.__downloads_temp_folder__,
              " to store downloads")
        # create a downloader with default params
        number_threads = 12
        ratelimit_downloads = 60
        ratelimit_interval = 1
        verbose = False
        store_into_tar = False
        progressbar = False
        self.__downloader__ = Downloader(self.__downloads_temp_folder__,
                                         number_threads, ratelimit_downloads,
                                         ratelimit_interval, verbose,
                                         store_into_tar, progressbar)
        # test image urls file
        self.__test_urls_file = "tests/test_image_urls_800.txt"
        # read list of urls
        with open(self.__test_urls_file) as f:
            self.__test_urls_list = [
                line.strip().rstrip('\n') for line in f.readlines()
                if line.strip().rstrip('\n')
            ]
        # create list of image context path from urls
        self.__test_urls_image_context_path_list = list()
        for url in self.__test_urls_list:
            # extract the image context path and store in list
            self.__test_urls_image_context_path_list.append(
                urlparse(url).path[1:])

    def tearDown(self):
        # delete temp output directory
        print("cleanup ", self.__downloads_temp_folder__)
        shutil.rmtree(self.__downloads_temp_folder__)

    def test_simple_download_list_into_filetree(self):
        # let the downloader read the urls file and download the files
        self.__downloader__.download_list(self.__test_urls_file)
        files_missing = 0
        files_corrupt = 0
        files_ok = 0
        number_urls = len(self.__test_urls_image_context_path_list)
        # now we assert that each file has been downloaded and properly named
        for file in self.__test_urls_image_context_path_list:
            expected_outfile = os.path.join(self.__downloads_temp_folder__,
                                            file)
            if not os.path.isfile(expected_outfile):
                # assert the file exists
                print("Expected that file " + expected_outfile +
                      " exists, but it is missing.")
                files_missing += 1
                continue
            if not (imghdr.what(expected_outfile) == "jpeg"
                    or imghdr.what(expected_outfile) == "jpg"
                    or imghdr.what(expected_outfile) == "png"
                    or imghdr.what(expected_outfile) == "gif"):
                # and it is a real image file
                print("Expected that file " + expected_outfile +
                      " is a image, but it is of type " +
                      str(imghdr.what(expected_outfile)))
                files_corrupt += 1
                continue
            files_ok += 1
        assert files_missing == 0 and files_corrupt == 0 and files_ok == number_urls, "Expected that " + str(
            number_urls) + " has been downloaded. " + str(
                files_missing) + " files are missing, " + str(
                    files_corrupt) + " images are corrupt. Only " + str(
                        files_ok) + " has been succesfully downloaded."
Esempio n. 6
0
results = []

count = 0
for v in data_list:
    result = ResultModel()

    result.nid = v.nid
    result.title = v.title
    result.field_data_field_body = v.field_body_value
    result.link = v.link
    result.field_revision_field_resource_description_g = v.field_resource_description_g_value
    result.field_data_field_resource_url_g = v.field_resource_url_g_url
    result.taxonomy_term_data = v.name
    try:
        res = Downloader.download(v.field_resource_url_g_url)
        result.status = res.status_code
        if res.status_code == 200:
            try:
                result.type = Downloader.getFileNameNExtension(res)
            except NotFileException, e:
                print e
                try:
                    result.type = Downloader.getFileExtension(res)
                except NoExceptionFound, e:
                    print e
                    result.type = Downloader.guessExtension(res)
            # save file
            url = v.field_resource_url_g_url
            file_name = "file/" + str(v.nid) + "_" + v.title + "" + url[len(url)-4:]
            with open(file_name, "wb") as code:
Esempio n. 7
0
# -*- coding: utf-8 -*-
import requests
from downloader.Downloader import Downloader

__author__ = 'johnnytsai'

url = "http://ws.ndc.gov.tw/001/administrator/10/relfile/0/1000/歷年數位機會(落差)調查彙整資料(csv檔案).csv"

response = requests.get(url, timeout=5)

print response.status_code

print response.headers

print url[len(url)-4:]

Downloader.getFileName(response)



Esempio n. 8
0
            #print model.title
            values.append(model)

size = 0
for v in values:
    if v.status == u'-1':
        size+=1

result = []

count = 0
for v in values:
    if v.status == u'-1':
        print v.title
        try:
            res = Downloader.download(v.field_data_field_resource_url_g)
            v.status = res.status_code
            if res.status_code == 200:
                try:
                    v.type = Downloader.getFileNameNExtension(res)
                except NotFileException, e:
                    print e
                    try:
                        v.type = Downloader.getFileExtension(res)
                    except NoExceptionFound, e:
                        print e
                        v.type = Downloader.guessExtension(res)
        except HTTPError, e:
            v.status = -1
            v.message = str(e.message).decode('utf-8')
        except RequestException, e: