Beispiel #1
0
def download(url, dest, startByte=0, endByte=None, headers=None, timeout=4, shared_var=None, thread_shared_cmds=None, logger=None, retries=3):
    "The basic download function that runs at each thread."
    logger = logger or utils.DummyLogger()
    if not headers:
        headers = {}
    if endByte:
        headers['Range'] = 'bytes=%d-%d' % (startByte, endByte)
    
    logger.info("Downloading '%s' to '%s'..." % (url, dest))
    req = urllib2.Request(url, headers=headers)
    try:
        urlObj = urllib2.urlopen(req, timeout=timeout)
    except urllib2.HTTPError, e:
        if e.code == 416:
            '''
            HTTP 416 Error: Requested Range Not Satisfiable. Happens when we ask
            for a range that is not available on the server. It will happen when
            the server will try to send us a .html page that means something like
            "you opened too many connections to our server". If this happens, we
            will wait for the other threads to finish their connections and try again.
            '''
            
            if retries > 0:
                logger.warning("Thread didn't got the file it was expecting. Retrying (%d times left)..." % (retries-1))
                time.sleep(5)
                return download(url, dest, startByte, endByte, headers, timeout, shared_var, thread_shared_cmds, logger, retries-1)
            else:
                raise
        else:
            raise
Beispiel #2
0
def download(url,
             dest,
             startByte=0,
             endByte=None,
             headers=None,
             timeout=4,
             shared_var=None,
             thread_shared_cmds=None,
             logger=None,
             retries=3):
    logger = logger or utils.DummyLogger()
    if not headers:
        headers = {}
    if endByte:
        headers['Range'] = 'bytes=%d-%d' % (startByte, endByte)

    logger.debug("Downloading '%s' to '%s'..." % (url, dest))
    req = urllib2.Request(url, headers=headers)
    try:
        urlObj = urllib2.urlopen(req, timeout=timeout)
    except urllib2.HTTPError, e:
        if e.code == 416:
            if retries > 0:
                logger.warning(
                    "Thread didn't got the file it was expecting. Retrying (%d times left)..."
                    % (retries - 1))
                time.sleep(5)
                return download(url, dest, startByte, endByte, headers,
                                timeout, shared_var, thread_shared_cmds,
                                logger, retries - 1)
            else:
                raise
        else:
            raise
Beispiel #3
0
    def __init__(self, urls, dest=None, progress_bar=True, fix_urls=True, logger=None, connect_default_logger=False):
        self.mirrors = [urls] if isinstance(urls, basestring) else urls
        if fix_urls:
            self.mirrors = [utils.url_fix(x) for x in self.mirrors]
        self.url = self.mirrors.pop(0)

        fn = os.path.basename(urlparse(self.url).path)
        self.dest = dest or os.path.join(tempfile.gettempdir(), 'pySmartDL', fn)
        if self.dest[-1] == os.sep:
            if os.path.exists(self.dest[:-1]) and os.path.isfile(self.dest[:-1]):
                os.unlink(self.dest[:-1])
            self.dest += fn
        if os.path.isdir(self.dest):
            self.dest = os.path.join(self.dest, fn)

        self.progress_bar = progress_bar

        if logger:
            self.logger = logger
        elif connect_default_logger:
            self.logger = utils.create_debugging_logger()
        else:
            self.logger = utils.DummyLogger()

        self.headers = {'User-Agent': utils.get_random_useragent()}
        self.threads_count = 3
        self.timeout = 4
        self.current_attemp = 1
        self.attemps_limit = 4
        self.minChunkFile = 1024**2*2 # 2MB
        self.filesize = 0
        self.shared_var = multiprocessing.Value(c_int, 0) # a ctypes var that counts the bytes already downloaded
        self.thread_shared_cmds = {}
        self.status = "ready"
        self.verify_hash = False
        self._killed = False
        self._failed = False
        self._start_func_blocking = True
        self.errors = []

        self.post_threadpool_thread = None
        self.control_thread = None

        if not os.path.exists(os.path.dirname(self.dest)):
            self.logger.debug('Folder "%s" does not exist. Creating...' % os.path.dirname(self.dest))
            os.makedirs(os.path.dirname(self.dest))
        if not utils.is_HTTPRange_supported(self.url):
            self.logger.warning("Server does not support HTTPRange. threads_count is set to 1.")
            self.threads_count = 1
        if os.path.exists(self.dest):
            self.logger.warning('Destination "%s" already exists. Existing file will be removed.' % self.dest)
        if not os.path.exists(os.path.dirname(self.dest)):
            self.logger.warning('Directory "%s" does not exist. Creating it...' % os.path.dirname(self.dest))
            os.makedirs(os.path.dirname(self.dest))

        self.pool = utils.ManagedThreadPoolExecutor(self.threads_count)
Beispiel #4
0
    def __init__(self,
                 urls,
                 dest=None,
                 progress_bar=True,
                 fix_urls=True,
                 threads=5,
                 logger=None,
                 connect_default_logger=False,
                 proxy=None):
        global DEFAULT_LOGGER_CREATED

        self.mirrors = [urls] if isinstance(urls, basestring) else urls
        if fix_urls:
            self.mirrors = [utils.url_fix(x) for x in self.mirrors]
        self.url = self.mirrors.pop(0)

        if proxy is not None:
            proxy = urllib2.ProxyHandler({'http': proxy, 'https': proxy})
            opener = urllib2.build_opener(proxy)
            urllib2.install_opener(opener)

        fn = urllib2.unquote(os.path.basename(urlparse(self.url).path))
        if sys.version_info < (3, 0):
            fn = fn.decode('utf-8')  # required only on python 2
        self.dest = dest or os.path.join(tempfile.gettempdir(), 'pySmartDL',
                                         fn)
        if self.dest[-1] == os.sep:
            if os.path.exists(self.dest[:-1]) and os.path.isfile(
                    self.dest[:-1]):
                os.unlink(self.dest[:-1])
            self.dest += fn
        if os.path.isdir(self.dest):
            self.dest = os.path.join(self.dest, fn)

        self.progress_bar = progress_bar

        if logger:
            self.logger = logger
        elif connect_default_logger:
            if not DEFAULT_LOGGER_CREATED:
                self.logger = utils.create_debugging_logger()
                DEFAULT_LOGGER_CREATED = True
            else:
                self.logger = logging.getLogger('pySmartDL')
        else:
            self.logger = utils.DummyLogger()

        self.headers = {'User-Agent': utils.get_random_useragent()}
        self.threads_count = threads
        self.timeout = 4
        self.current_attemp = 1
        self.attemps_limit = 4
        self.minChunkFile = 1024**2 * 2  # 2MB
        self.filesize = 0
        self.shared_var = multiprocessing.Value(
            c_int, 0)  # a ctypes var that counts the bytes already downloaded
        self.thread_shared_cmds = {}
        self.status = "ready"
        self.verify_hash = False
        self._killed = False
        self._failed = False
        self._start_func_blocking = True
        self.errors = []

        self.post_threadpool_thread = None
        self.control_thread = None

        if not os.path.exists(os.path.dirname(self.dest)):
            self.logger.info('Folder "%s" does not exist. Creating...' %
                             os.path.dirname(self.dest))
            os.makedirs(os.path.dirname(self.dest))
        if not utils.is_HTTPRange_supported(self.url):
            self.logger.warning(
                "Server does not support HTTPRange. threads_count is set to 1."
            )
            self.threads_count = 1
        if os.path.exists(self.dest):
            self.logger.warning(
                'Destination "%s" already exists. Existing file will be removed.'
                % self.dest)
        if not os.path.exists(os.path.dirname(self.dest)):
            self.logger.warning(
                'Directory "%s" does not exist. Creating it...' %
                os.path.dirname(self.dest))
            os.makedirs(os.path.dirname(self.dest))

        self.logger.info("Creating a ThreadPool of %d thread(s).",
                         self.threads_count)
        self.pool = utils.ManagedThreadPoolExecutor(self.threads_count)