def _copyChunks(self): init = fs_encode(self.info.getChunkName(0)) #initial chunk name if self.info.getCount() > 1: fo = open(init, "rb+") #first chunkfile for i in range(1, self.info.getCount()): #input file fo.seek( self.info.getChunkRange(i - 1)[1] + 1 ) #seek to beginning of chunk, to get rid of overlapping chunks fname = fs_encode("%s.chunk%d" % (self.path, i)) fi = open(fname, "rb") buf = 32 * 1024 while True: #copy in chunks, consumes less memory data = fi.read(buf) if not data: break fo.write(data) fi.close() if fo.tell() < self.info.getChunkRange(i)[1]: fo.close() remove(init) self.info.remove() #there are probably invalid chunks raise Exception( "Downloaded content was smaller than expected. Try to reduce download connections." ) remove(fname) #remove chunk fo.close() if self.name: self.path = save_join(dirname(self.path), self.name) move(init, fs_encode(self.path)) self.info.remove() #remove info file
def _copyChunks(self): init = fs_encode(self.info.getChunkName(0)) #initial chunk name if self.info.getCount() > 1: fo = open(init, "rb+") #first chunkfile for i in range(1, self.info.getCount()): #input file fo.seek( self.info.getChunkRange(i - 1)[1] + 1) #seek to beginning of chunk, to get rid of overlapping chunks fname = fs_encode("%s.chunk%d" % (self.path, i)) fi = open(fname, "rb") buf = 32 * 1024 while True: #copy in chunks, consumes less memory data = fi.read(buf) if not data: break fo.write(data) fi.close() if fo.tell() < self.info.getChunkRange(i)[1]: fo.close() remove(init) self.info.remove() #there are probably invalid chunks raise Exception("Downloaded content was smaller than expected. Try to reduce download connections.") remove(fname) #remove chunk fo.close() if self.name: self.path = save_join(dirname(self.path), self.name) move(init, fs_encode(self.path)) self.info.remove() #remove info file
def load(name): fs_name = fs_encode("%s.chunks" % name) if not exists(fs_name): raise IOError() fh = codecs.open(fs_name, "r", "utf_8") name = fh.readline()[:-1] size = fh.readline()[:-1] if name.startswith("name:") and size.startswith("size:"): name = name[5:] size = size[5:] else: fh.close() raise WrongFormat() ci = ChunkInfo(name) ci.loaded = True ci.setSize(size) while True: if not fh.readline(): #skip line break name = fh.readline()[1:-1] range = fh.readline()[1:-1] if name.startswith("name:") and range.startswith("range:"): name = name[5:] range = range[6:].split("-") else: raise WrongFormat() ci.addChunk(name, (long(range[0]), long(range[1]))) fh.close() return ci
def save(self): fs_name = fs_encode("%s.chunks" % self.name) fh = codecs.open(fs_name, "w", "utf_8") fh.write("name:%s\n" % self.name) fh.write("size:%s\n" % self.size) for i, c in enumerate(self.chunks): fh.write("#%d:\n" % i) fh.write("\tname:%s\n" % c[0]) fh.write("\trange:%i-%i\n" % c[1]) fh.close()
def getHandle(self): """ returns a Curl handle ready to use for perform/multiperform """ self.setRequestContext(self.p.url, self.p.get, self.p.post, self.p.referer, self.p.cookies) self.c.setopt(pycurl.WRITEFUNCTION, self.writeBody) self.c.setopt(pycurl.HEADERFUNCTION, self.writeHeader) # request all bytes, since some servers in russia seems to have a defect arihmetic unit fs_name = fs_encode(self.p.info.getChunkName(self.id)) if self.resume: self.fp = open(fs_name, "ab") self.arrived = self.fp.tell() if not self.arrived: self.arrived = stat(fs_name).st_size if self.range: #do nothing if chunk already finished if self.arrived + self.range[0] >= self.range[1]: return None if self.id == len( self.p.info.chunks ) - 1: #as last chunk dont set end range, so we get everything range = "%i-" % (self.arrived + self.range[0]) else: range = "%i-%i" % (self.arrived + self.range[0], min(self.range[1] + 1, self.p.size - 1)) self.log.debug("Chunked resume with range %s" % range) self.c.setopt(pycurl.RANGE, range) else: self.log.debug("Resume File from %i" % self.arrived) self.c.setopt(pycurl.RESUME_FROM, self.arrived) else: if self.range: if self.id == len(self.p.info.chunks) - 1: # see above range = "%i-" % self.range[0] else: range = "%i-%i" % (self.range[0], min(self.range[1] + 1, self.p.size - 1)) self.log.debug("Chunked with range %s" % range) self.c.setopt(pycurl.RANGE, range) self.fp = open(fs_name, "wb") return self.c
def getHandle(self): """ returns a Curl handle ready to use for perform/multiperform """ self.setRequestContext(self.p.url, self.p.get, self.p.post, self.p.referer, self.p.cookies) self.c.setopt(pycurl.WRITEFUNCTION, self.writeBody) self.c.setopt(pycurl.HEADERFUNCTION, self.writeHeader) # request all bytes, since some servers in russia seems to have a defect arihmetic unit fs_name = fs_encode(self.p.info.getChunkName(self.id)) if self.resume: self.fp = open(fs_name, "ab") self.arrived = self.fp.tell() if not self.arrived: self.arrived = stat(fs_name).st_size if self.range: # do nothing if chunk already finished if self.arrived + self.range[0] >= self.range[1]: return None if self.id == len(self.p.info.chunks) - 1: # as last chunk dont set end range, so we get everything range = "%i-" % (self.arrived + self.range[0]) else: range = "%i-%i" % (self.arrived + self.range[0], min(self.range[1] + 1, self.p.size - 1)) self.log.debug("Chunked resume with range %s" % range) self.c.setopt(pycurl.RANGE, range) else: self.log.debug("Resume File from %i" % self.arrived) self.c.setopt(pycurl.RESUME_FROM, self.arrived) else: if self.range: if self.id == len(self.p.info.chunks) - 1: # see above range = "%i-" % self.range[0] else: range = "%i-%i" % (self.range[0], min(self.range[1] + 1, self.p.size - 1)) self.log.debug("Chunked with range %s" % range) self.c.setopt(pycurl.RANGE, range) self.fp = open(fs_name, "wb") return self.c
def checkDownload(self, rules, api_size=0, max_size=50000, delete=True, read_size=0): """ checks the content of the last downloaded file, re match is saved to `lastCheck` :param rules: dict with names and rules to match (compiled regexp or strings) :param api_size: expected file size :param max_size: if the file is larger then it wont be checked :param delete: delete if matched :param read_size: amount of bytes to read from files larger then max_size :return: dictionary key of the first rule that matched """ lastDownload = fs_encode(self.lastDownload) if not exists(lastDownload): return None size = stat(lastDownload) size = size.st_size if api_size and api_size <= size: return None elif size > max_size and not read_size: return None self.log.debug("Download Check triggered") f = open(lastDownload, "rb") content = f.read(read_size if read_size else -1) f.close() #produces encoding errors, better log to other file in the future? #self.log.debug("Content: %s" % content) for name, rule in rules.iteritems(): if type(rule) in (str, unicode): if rule in content: if delete: remove(lastDownload) return name elif hasattr(rule, "search"): m = rule.search(content) if m: if delete: remove(lastDownload) self.lastCheck = m return name
def process(self, pyfile): if not self.account: self.logError( _("Please enter your %s account or deactivate this plugin") % "premium.to") self.fail("No premium.to account provided") self.logDebug("premium.to: Old URL: %s" % pyfile.url) tra = self.getTraffic() #raise timeout to 2min self.req.setOption("timeout", 120) self.download("http://premium.to/api/getfile.php?authcode=%s&link=%s" % (self.account.authcode, quote(pyfile.url, "")), disposition=True) check = self.checkDownload( {"nopremium": "No premium account available"}) if check == "nopremium": self.retry(60, 300, 'No premium account available') err = '' if self.req.http.code == '420': # Custom error code send - fail lastDownload = fs_encode(self.lastDownload) if exists(lastDownload): f = open(lastDownload, "rb") err = f.read(256).strip() f.close() remove(lastDownload) else: err = 'File does not exist' trb = self.getTraffic() self.logInfo("Filesize: %d, Traffic used %d, traffic left %d" % (pyfile.size, tra - trb, trb)) if err: self.fail(err)
def checkDownload(self, rules, api_size=0, max_size=50000, delete=True, read_size=0): """ checks the content of the last downloaded file, re match is saved to `lastCheck` :param rules: dict with names and rules to match (compiled regexp or strings) :param api_size: expected file size :param max_size: if the file is larger then it wont be checked :param delete: delete if matched :param read_size: amount of bytes to read from files larger then max_size :return: dictionary key of the first rule that matched """ lastDownload = fs_encode(self.lastDownload) if not exists(lastDownload): return None size = stat(lastDownload) size = size.st_size if api_size and api_size <= size: return None elif size > max_size and not read_size: return None self.log.debug("Download Check triggered") f = open(lastDownload, "rb") content = f.read(read_size if read_size else -1) f.close() # produces encoding errors, better log to other file in the future? # self.log.debug("Content: %s" % content) for name, rule in rules.iteritems(): if type(rule) in (str, unicode): if rule in content: if delete: remove(lastDownload) return name elif hasattr(rule, "search"): m = rule.search(content) if m: if delete: remove(lastDownload) self.lastCheck = m return name
def process(self, pyfile): if not self.account: self.logError(_("Please enter your %s account or deactivate this plugin") % "premium.to") self.fail("No premium.to account provided") self.logDebug("premium.to: Old URL: %s" % pyfile.url) tra = self.getTraffic() #raise timeout to 2min self.req.setOption("timeout", 120) self.download( "http://premium.to/api/getfile.php?authcode=%s&link=%s" % (self.account.authcode, quote(pyfile.url, "")), disposition=True) check = self.checkDownload({"nopremium": "No premium account available"}) if check == "nopremium": self.retry(60, 5 * 60, "No premium account available") err = '' if self.req.http.code == '420': # Custom error code send - fail lastDownload = fs_encode(self.lastDownload) if exists(lastDownload): f = open(lastDownload, "rb") err = f.read(256).strip() f.close() remove(lastDownload) else: err = 'File does not exist' trb = self.getTraffic() self.logInfo("Filesize: %d, Traffic used %d, traffic left %d" % (pyfile.size, tra - trb, trb)) if err: self.fail(err)
def getLocalContent(self, urls): """Load files from disk and separate to file content and url list :param urls: :return: list of (filename, content), remote urls """ content = [] # do nothing if no decryptFile method if hasattr(self.__class__, "decryptFile"): remote = [] for url in urls: path = None if url.startswith("http"): # skip urls directly pass elif url.startswith(self.CONTENT_PREFIX): path = url elif exists(url): path = url elif exists(self.core.path(url)): path = self.core.path(url) if path: try: if path.startswith(self.CONTENT_PREFIX): content.append( ("", path[len(self.CONTENT_PREFIX)])) else: f = open(fs_encode(path), "rb") content.append((f.name, f.read())) f.close() except IOError, e: self.logError("IOError", e) else: remote.append(url) #swap filtered url list urls = remote
def getLocalContent(self, urls): """Load files from disk and separate to file content and url list :param urls: :return: list of (filename, content), remote urls """ content = [] # do nothing if no decryptFile method if hasattr(self.__class__, "decryptFile"): remote = [] for url in urls: path = None if url.startswith("http"): # skip urls directly pass elif url.startswith(self.CONTENT_PREFIX): path = url elif exists(url): path = url elif exists(self.core.path(url)): path = self.core.path(url) if path: try: if path.startswith(self.CONTENT_PREFIX): content.append(("", path[len(self.CONTENT_PREFIX)])) else: f = open(fs_encode(path), "rb") content.append((f.name, f.read())) f.close() except IOError, e: self.logError("IOError", e) else: remote.append(url) #swap filtered url list urls = remote
def extract(self, ids, thread=None): # reload from txt file self.reloadPasswords() # dl folder dl = self.config['general']['download_folder'] extracted = [] #iterate packages -> plugins -> targets for pid in ids: p = self.core.files.getPackage(pid) self.logInfo(_("Check package %s") % p.name) if not p: continue # determine output folder out = save_join(dl, p.folder, "") # force trailing slash if self.getConfig("destination") and self.getConfig( "destination").lower() != "none": out = save_join(dl, p.folder, self.getConfig("destination"), "") #relative to package folder if destination is relative, otherwise absolute path overwrites them if self.getConfig("subfolder"): out = join(out, fs_encode(p.folder)) if not exists(out): makedirs(out) files_ids = [(save_join(dl, p.folder, f.name), f.fid) for f in p.getFiles().itervalues()] matched = False # check as long there are unseen files while files_ids: new_files_ids = [] for plugin in self.plugins: targets = plugin.getTargets(files_ids) if targets: self.logDebug("Targets for %s: %s" % (plugin.__name__, targets)) matched = True for target, fid in targets: if target in extracted: self.logDebug(basename(target), "skipped") continue extracted.append( target) # prevent extracting same file twice klass = plugin(self, target, out, self.getConfig("fullpath"), self.getConfig("overwrite"), self.getConfig("excludefiles"), self.getConfig("renice")) klass.init() self.logInfo(basename(target), _("Extract to %s") % out) new_files = self.startExtracting( klass, fid, p.password.strip().splitlines(), thread) self.logDebug("Extracted: %s" % new_files) self.setPermissions(new_files) for file in new_files: if not exists(file): self.logDebug("new file %s does not exists" % file) continue if self.getConfig("recursive") and isfile(file): new_files_ids.append( (file, fid)) # append as new target files_ids = new_files_ids # also check extracted files if not matched: self.logInfo(_("No files found to extract"))
def extract(self, ids, thread=None): # reload from txt file self.reloadPasswords() # dl folder dl = self.config['general']['download_folder'] extracted = [] #iterate packages -> plugins -> targets for pid in ids: p = self.core.files.getPackage(pid) self.logInfo(_("Check package %s") % p.name) if not p: continue # determine output folder out = save_join(dl, p.folder, "") # force trailing slash if self.getConfig("destination") and self.getConfig("destination").lower() != "none": out = save_join(dl, p.folder, self.getConfig("destination"), "") #relative to package folder if destination is relative, otherwise absolute path overwrites them if self.getConfig("subfolder"): out = join(out, fs_encode(p.folder)) if not exists(out): makedirs(out) files_ids = [(save_join(dl, p.folder, f.name), f.fid) for f in p.getFiles().itervalues()] matched = False # check as long there are unseen files while files_ids: new_files_ids = [] for plugin in self.plugins: targets = plugin.getTargets(files_ids) if targets: self.logDebug("Targets for %s: %s" % (plugin.__name__, targets)) matched = True for target, fid in targets: if target in extracted: self.logDebug(basename(target), "skipped") continue extracted.append(target) # prevent extracting same file twice klass = plugin(self, target, out, self.getConfig("fullpath"), self.getConfig("overwrite"), self.getConfig("excludefiles"), self.getConfig("renice")) klass.init() self.logInfo(basename(target), _("Extract to %s") % out) new_files = self.startExtracting(klass, fid, p.password.strip().splitlines(), thread) self.logDebug("Extracted: %s" % new_files) self.setPermissions(new_files) for file in new_files: if not exists(file): self.logDebug("new file %s does not exists" % file) continue if self.getConfig("recursive") and isfile(file): new_files_ids.append((file, fid)) # append as new target files_ids = new_files_ids # also check extracted files if not matched: self.logInfo(_("No files found to extract"))
class Hoster(Base): """ Base plugin for hoster plugin. Overwrite getInfo for online status retrieval, process for downloading. """ #: Class used to make requests with `self.load` REQUEST_CLASS = DefaultRequest #: Class used to make download DOWNLOAD_CLASS = DefaultDownload @staticmethod def getInfo(urls): """This method is used to retrieve the online status of files for hoster plugins. :param urls: List of urls :return: yield list of :class:`LinkStatus` as result """ pass __type__ = "hoster" def __init__(self, pyfile): # TODO: pyfile.owner, but it's not correct yet Base.__init__(self, pyfile.m.core) self.wantReconnect = False #: enables simultaneous processing of multiple downloads self.limitDL = 0 #: chunk limit self.chunkLimit = 1 #: enables resume (will be ignored if server dont accept chunks) self.resumeDownload = False #: plugin is waiting self.waiting = False self.ocr = None #captcha reader instance #: account handler instance, see :py:class:`Account` self.account = self.core.accountManager.selectAccount( self.__name__, self.owner) #: premium status self.premium = False if self.account: #: Request instance bound to account self.req = self.account.getAccountRequest() # Default: -1, True, True self.chunkLimit, self.limitDL, self.resumeDownload = self.account.getDownloadSettings( ) self.premium = self.account.isPremium() else: self.req = self.core.requestFactory.getRequest( klass=self.REQUEST_CLASS) #: Will hold the download class self.dl = None #: associated pyfile instance, see `PyFile` self.pyfile = pyfile self.thread = None # holds thread in future #: location where the last call to download was saved self.lastDownload = "" #: re match of the last call to `checkDownload` self.lastCheck = None self.retries = 0 # amount of retries already made self.html = None # some plugins store html code here self.init() @property def user(self): self.logDebug( "Deprecated usage of self.user -> use self.account.loginname") if self.account: return self.account.loginname def getMultiDL(self): return self.limitDL <= 0 def setMultiDL(self, val): self.limitDL = 0 if val else 1 #: virtual attribute using self.limitDL on behind multiDL = property(getMultiDL, setMultiDL) def getChunkCount(self): if self.chunkLimit <= 0: return self.config["download"]["chunks"] return min(self.config["download"]["chunks"], self.chunkLimit) def getDownloadLimit(self): if self.account: limit = self.account.options.get("limitDL", 0) if limit == "": limit = 0 if self.limitDL > 0: # a limit is already set, we use the minimum return min(int(limit), self.limitDL) else: return int(limit) else: return self.limitDL def __call__(self): return self.__name__ def init(self): """initialize the plugin (in addition to `__init__`)""" pass def setup(self): """ setup for environment and other things, called before downloading (possibly more than one time)""" pass def preprocessing(self, thread): """ handles important things to do before starting """ self.thread = thread if self.account: # will force a re-login or reload of account info if necessary self.account.getAccountInfo() else: self.req.reset() self.setup() self.pyfile.setStatus("starting") return self.process(self.pyfile) def process(self, pyfile): """the 'main' method of every plugin, you **have to** overwrite it""" raise NotImplementedError def abort(self): return self.pyfile.abort def resetAccount(self): """ don't use account and retry download """ self.account = None self.req = self.core.requestFactory.getRequest(self.__name__) self.retry() def checksum(self, local_file=None): """ return codes: 0 - checksum ok 1 - checksum wrong 5 - can't get checksum 10 - not implemented 20 - unknown error """ #@TODO checksum check addon return True, 10 def setWait(self, seconds, reconnect=None): """Set a specific wait time later used with `wait` :param seconds: wait time in seconds :param reconnect: True if a reconnect would avoid wait time """ if reconnect is not None: self.wantReconnect = reconnect self.pyfile.waitUntil = time() + int(seconds) def wait(self, seconds=None, reconnect=None): """ Waits the time previously set or use these from arguments. See `setWait` """ if seconds is not None: self.setWait(seconds, reconnect) self._wait() def _wait(self): self.waiting = True self.pyfile.setStatus("waiting") while self.pyfile.waitUntil > time(): self.thread.m.reconnecting.wait(2) self.checkAbort() if self.thread.m.reconnecting.isSet(): self.waiting = False self.wantReconnect = False raise Reconnect self.waiting = False self.pyfile.setStatus("starting") def offline(self): """ fail and indicate file is offline """ raise Fail("offline") def tempOffline(self): """ fail and indicates file ist temporary offline, the core may take consequences """ raise Fail("temp. offline") def retry(self, max_tries=3, wait_time=1, reason="", backoff=lambda x, y: x): """Retries and begin again from the beginning :param max_tries: number of maximum retries :param wait_time: time to wait in seconds :param reason: reason for retrying, will be passed to fail if max_tries reached :param backoff: Function to backoff the wait time, takes initial time and number of retry as argument. defaults to no backoff / fixed wait time """ if 0 < max_tries <= self.retries: if not reason: reason = "Max retries reached" raise Fail(reason) self.wantReconnect = False self.retries += 1 self.setWait(backoff(wait_time, self.retries)) self.wait() raise Retry(reason) def download(self, url, get={}, post={}, ref=True, cookies=True, disposition=False): """Downloads the content at url to download folder :param disposition: if True and server provides content-disposition header\ the filename will be changed if needed :return: The location where the file was saved """ self.checkForSameFiles() self.checkAbort() self.pyfile.setStatus("downloading") download_folder = self.config['general']['download_folder'] location = save_join(download_folder, self.pyfile.package().folder) if not exists(location): makedirs(location, int(self.core.config["permission"]["folder"], 8)) if self.core.config["permission"]["change_dl"] and os.name != "nt": try: uid = getpwnam(self.config["permission"]["user"])[2] gid = getgrnam(self.config["permission"]["group"])[2] chown(location, uid, gid) except Exception, e: self.log.warning( _("Setting User and Group failed: %s") % str(e)) # convert back to unicode location = fs_decode(location) name = self.pyfile.name filename = join(location, name) self.core.addonManager.dispatchEvent("download:start", self.pyfile, url, filename) # Create the class used for downloading self.dl = self.core.requestFactory.getDownloadRequest( self.req, self.DOWNLOAD_CLASS) try: # TODO: hardcoded arguments newname = self.dl.download(url, filename, get=get, post=post, referer=ref, chunks=self.getChunkCount(), resume=self.resumeDownload, cookies=cookies, disposition=disposition) finally: self.dl.close() self.pyfile.size = self.dl.size if disposition and newname and newname != name: #triple check, just to be sure self.log.info("%(name)s saved as %(newname)s" % { "name": name, "newname": newname }) self.pyfile.name = newname filename = join(location, newname) fs_filename = fs_encode(filename) if self.core.config["permission"]["change_file"]: chmod(fs_filename, int(self.core.config["permission"]["file"], 8)) if self.core.config["permission"]["change_dl"] and os.name != "nt": try: uid = getpwnam(self.config["permission"]["user"])[2] gid = getgrnam(self.config["permission"]["group"])[2] chown(fs_filename, uid, gid) except Exception, e: self.log.warning( _("Setting User and Group failed: %s") % str(e))
def remove(self): fs_name = fs_encode("%s.chunks" % self.name) if exists(fs_name): remove(fs_name)
def _download(self, chunks, resume): if not resume: self.info.clear() self.info.addChunk("%s.chunk0" % self.path, (0, 0)) #create an initial entry self.chunks = [] init = CurlChunk( 0, self, None, resume) #initial chunk that will load complete file (if needed) self.chunks.append(init) self.m.add_handle(init.getHandle()) lastFinishCheck = 0 lastTimeCheck = 0 chunksDone = set() # list of curl handles that are finished chunksCreated = False done = False if self.info.getCount( ) > 1: # This is a resume, if we were chunked originally assume still can self.chunkSupport = True while 1: #need to create chunks if not chunksCreated and self.chunkSupport and self.size: #will be set later by first chunk self.flags ^= Connection.Resumable if not resume: self.info.setSize(self.size) self.info.createChunks(chunks) self.info.save() chunks = self.info.getCount() init.setRange(self.info.getChunkRange(0)) for i in range(1, chunks): c = CurlChunk(i, self, self.info.getChunkRange(i), resume) handle = c.getHandle() if handle: self.chunks.append(c) self.m.add_handle(handle) else: #close immediately self.log.debug("Invalid curl handle -> closed") c.close() chunksCreated = True while 1: ret, num_handles = self.m.perform() if ret != pycurl.E_CALL_MULTI_PERFORM: break t = time() # reduce these calls # when num_q is 0, the loop is exited while lastFinishCheck + 0.5 < t: # list of failed curl handles failed = [] ex = None # save only last exception, we can only raise one anyway num_q, ok_list, err_list = self.m.info_read() for c in ok_list: chunk = self.findChunk(c) try: # check if the header implies success, else add it to failed list chunk.verifyHeader() except ResponseException, e: self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(e))) failed.append(chunk) ex = e else: chunksDone.add(c) for c in err_list: curl, errno, msg = c chunk = self.findChunk(curl) #test if chunk was finished if errno != 23 or "0 !=" not in msg: failed.append(chunk) ex = pycurl.error(errno, msg) self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(ex))) continue try: # check if the header implies success, else add it to failed list chunk.verifyHeader() except ResponseException, e: self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(e))) failed.append(chunk) ex = e else: chunksDone.add(curl) if not num_q: # no more info to get # check if init is not finished so we reset download connections # note that other chunks are closed and everything downloaded with initial connection if failed and init not in failed and init.c not in chunksDone: self.log.error( _("Download chunks failed, fallback to single connection | %s" % (str(ex)))) #list of chunks to clean and remove to_clean = filter(lambda x: x is not init, self.chunks) for chunk in to_clean: self.closeChunk(chunk) self.chunks.remove(chunk) remove(fs_encode(self.info.getChunkName(chunk.id))) #let first chunk load the rest and update the info file init.resetRange() self.info.clear() self.info.addChunk("%s.chunk0" % self.path, (0, self.size)) self.info.save() elif failed: raise ex lastFinishCheck = t if len(chunksDone) >= len(self.chunks): if len(chunksDone) > len(self.chunks): self.log.warning( "Finished download chunks size incorrect, please report bug." ) done = True #all chunks loaded break
def _download(self, chunks, resume): if not resume: self.info.clear() self.info.addChunk("%s.chunk0" % self.path, (0, 0)) #create an initial entry self.chunks = [] init = CurlChunk(0, self, None, resume) #initial chunk that will load complete file (if needed) self.chunks.append(init) self.m.add_handle(init.getHandle()) lastFinishCheck = 0 lastTimeCheck = 0 chunksDone = set() # list of curl handles that are finished chunksCreated = False done = False if self.info.getCount() > 1: # This is a resume, if we were chunked originally assume still can self.chunkSupport = True while 1: #need to create chunks if not chunksCreated and self.chunkSupport and self.size: #will be set later by first chunk self.flags ^= Connection.Resumable if not resume: self.info.setSize(self.size) self.info.createChunks(chunks) self.info.save() chunks = self.info.getCount() init.setRange(self.info.getChunkRange(0)) for i in range(1, chunks): c = CurlChunk(i, self, self.info.getChunkRange(i), resume) handle = c.getHandle() if handle: self.chunks.append(c) self.m.add_handle(handle) else: #close immediately self.log.debug("Invalid curl handle -> closed") c.close() chunksCreated = True while 1: ret, num_handles = self.m.perform() if ret != pycurl.E_CALL_MULTI_PERFORM: break t = time() # reduce these calls # when num_q is 0, the loop is exited while lastFinishCheck + 0.5 < t: # list of failed curl handles failed = [] ex = None # save only last exception, we can only raise one anyway num_q, ok_list, err_list = self.m.info_read() for c in ok_list: chunk = self.findChunk(c) try: # check if the header implies success, else add it to failed list chunk.verifyHeader() except ResponseException, e: self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(e))) failed.append(chunk) ex = e else: chunksDone.add(c) for c in err_list: curl, errno, msg = c chunk = self.findChunk(curl) #test if chunk was finished if errno != 23 or "0 !=" not in msg: failed.append(chunk) ex = pycurl.error(errno, msg) self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(ex))) continue try: # check if the header implies success, else add it to failed list chunk.verifyHeader() except ResponseException, e: self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(e))) failed.append(chunk) ex = e else: chunksDone.add(curl) if not num_q: # no more info to get # check if init is not finished so we reset download connections # note that other chunks are closed and everything downloaded with initial connection if failed and init not in failed and init.c not in chunksDone: self.log.error(_("Download chunks failed, fallback to single connection | %s" % (str(ex)))) #list of chunks to clean and remove to_clean = filter(lambda x: x is not init, self.chunks) for chunk in to_clean: self.closeChunk(chunk) self.chunks.remove(chunk) remove(fs_encode(self.info.getChunkName(chunk.id))) #let first chunk load the rest and update the info file init.resetRange() self.info.clear() self.info.addChunk("%s.chunk0" % self.path, (0, self.size)) self.info.save() elif failed: raise ex lastFinishCheck = t if len(chunksDone) >= len(self.chunks): if len(chunksDone) > len(self.chunks): self.log.warning("Finished download chunks size incorrect, please report bug.") done = True #all chunks loaded break