def _copy_chunks(self): init = format.path(self.info.get_chunk_name(0)) #: initial chunk name if self.info.get_count() > 1: with io.open(init, "rb+") as fo: #: first chunkfile for i in range(1, self.info.get_count()): # input file fo.seek( self.info.get_chunk_range(i - 1)[1] + 1) #: seek to beginning of chunk, to get rid of overlapping chunks fname = format.path("{0}.chunk{1:d}".format(self.path, i)) buf = 32 << 10 with io.open(fname, mode='rb') as fi: while True: #: copy in chunks, consumes less memory data = fi.read(buf) if not data: break fo.write(data) if fo.tell() < self.info.get_chunk_range(i)[1]: remove(init) self.info.remove() #: there are probably invalid chunks raise Exception( "Downloaded content was smaller than expected. Try to reduce download connections") remove(fname) #: remove chunk if self.name: self.path = format.path(os.path.dirname(self.path), self.name) shutil.move(init, format.path(self.path)) self.info.remove() #: remove info file
def _copy_chunks(self): init = format.path(self.info.get_chunk_name(0)) #: initial chunk name if self.info.get_count() > 1: with io.open(init, "rb+") as fo: #: first chunkfile for i in range(1, self.info.get_count()): # input file fo.seek( self.info.get_chunk_range(i - 1)[1] + 1 ) #: seek to beginning of chunk, to get rid of overlapping chunks fname = format.path("{0}.chunk{1:d}".format(self.path, i)) buf = 32 << 10 with io.open(fname, mode='rb') as fi: while True: #: copy in chunks, consumes less memory data = fi.read(buf) if not data: break fo.write(data) if fo.tell() < self.info.get_chunk_range(i)[1]: remove(init) self.info.remove( ) #: there are probably invalid chunks raise Exception( "Downloaded content was smaller than expected. Try to reduce download connections" ) remove(fname) #: remove chunk if self.name: self.path = format.path(os.path.dirname(self.path), self.name) shutil.move(init, format.path(self.path)) self.info.remove() #: remove info file
def load(name): fs_name = format.path("{0}.chunks".format(name)) if not os.path.exists(fs_name): raise IOError with io.open(fs_name) as fp: name = fp.readline()[:-1] size = fp.readline()[:-1] if name.startswith("name:") and size.startswith("size:"): name = name[5:] size = size[5:] else: raise TypeError("chunk.file has wrong format") ci = ChunkInfo(name) ci.loaded = True ci.set_size(size) while True: if not fp.readline(): #: skip line break name = fp.readline()[1:-1] range = fp.readline()[1:-1] if name.startswith("name:") and range.startswith("range:"): name = name[5:] range = range[6:].split("-") else: raise TypeError("chunk.file has wrong format") ci.add_chunk(name, (int(range[0]), int(range[1]))) return ci
def save(self): fs_name = format.path("{0}.chunks".format(self.name)) with io.open(fs_name, mode='w') as fp: fp.write("name:{0}\n".format(self.name)) fp.write("size:{0}\n".format(self.size)) for i, c in enumerate(self.chunks): fp.write("#{0:d}:\n".format(i)) fp.write("\tname:{0}\n".format(c[0])) fp.write("\trange:{0:d}-{1:d}\n".format(*c[1]))
def get_handle(self): """ Returns a Curl handle ready to use for perform/multiperform. """ self.set_request_context(self.p.url, self.p.get, self.p.post, self.p.referer, self.p.cookies) self.c.setopt(pycurl.WRITEFUNCTION, self.write_body) self.c.setopt(pycurl.HEADERFUNCTION, self.write_header) # request all bytes, since some servers in russia seems to have a # defect arihmetic unit fs_name = format.path(self.p.info.get_chunk_name(self.id)) if self.resume: self.fp = io.open(fs_name, mode='ab') self.arrived = self.fp.tell() if not self.arrived: self.arrived = os.stat(fs_name).st_size if self.range: # do nothing if chunk already finished if self.arrived + self.range[0] >= self.range[1]: return None # as last chunk dont set end range, so we get everything if self.id == len(self.p.info.chunks) - 1: range = b"{0:d}-".format(self.arrived + self.range[0]) else: range = b"{0:d}-{1:d}".format( self.arrived + self.range[0], min(self.range[1] + 1, self.p.size - 1)) self.log.debug("Chunked resume with range {0}".format(range)) self.c.setopt(pycurl.RANGE, range) else: self.log.debug("Resume File from {0:d}".format(self.arrived)) self.c.setopt(pycurl.RESUME_FROM, self.arrived) else: if self.range: if self.id == len(self.p.info.chunks) - 1: #: see above range = "{0:d}-".format(self.range[0]) else: range = "{0:d}-{1:d}".format( self.range[0], min(self.range[1] + 1, self.p.size - 1)) self.log.debug("Chunked with range {0}".format(range)) self.c.setopt(pycurl.RANGE, range) self.fp = io.open(fs_name, mode='wb') return self.c
def get_handle(self): """ Returns a Curl handle ready to use for perform/multiperform. """ self.set_request_context( self.p.url, self.p.get, self.p.post, self.p.referer, self.p.cookies) self.c.setopt(pycurl.WRITEFUNCTION, self.write_body) self.c.setopt(pycurl.HEADERFUNCTION, self.write_header) # request all bytes, since some servers in russia seems to have a # defect arihmetic unit fs_name = format.path(self.p.info.get_chunk_name(self.id)) if self.resume: self.fp = io.open(fs_name, mode='ab') self.arrived = self.fp.tell() if not self.arrived: self.arrived = os.stat(fs_name).st_size if self.range: # do nothing if chunk already finished if self.arrived + self.range[0] >= self.range[1]: return None # as last chunk dont set end range, so we get everything if self.id == len(self.p.info.chunks) - 1: range = b"{0:d}-".format(self.arrived + self.range[0]) else: range = b"{0:d}-{1:d}".format(self.arrived + self.range[ 0], min(self.range[1] + 1, self.p.size - 1)) self.log.debug("Chunked resume with range {0}".format(range)) self.c.setopt(pycurl.RANGE, range) else: self.log.debug("Resume File from {0:d}".format(self.arrived)) self.c.setopt(pycurl.RESUME_FROM, self.arrived) else: if self.range: if self.id == len(self.p.info.chunks) - 1: #: see above range = "{0:d}-".format(self.range[0]) else: range = "{0:d}-{1:d}".format(self.range[0], min(self.range[1] + 1, self.p.size - 1)) self.log.debug("Chunked with range {0}".format(range)) self.c.setopt(pycurl.RANGE, range) self.fp = io.open(fs_name, mode='wb') return self.c
def get_local_content(self, urls): """ Load files from disk and separate to file content and url list :param urls: :return: list of (filename, content), remote urls """ content = [] # do nothing if no decrypt_file method if hasattr(self.__class__, "decrypt_file"): remote = [] for url in urls: path = None if url.startswith("http"): #: skip urls directly pass elif url.startswith(self.CONTENT_PREFIX): path = url elif os.path.exists(url): path = url elif os.path.exists(self.pyload.path(url)): path = self.pyload.path(url) if path: try: if path.startswith(self.CONTENT_PREFIX): content.append( ("", path[len(self.CONTENT_PREFIX)])) else: with io.open(format.path(path), mode='rb') as fp: content.append((fp.name, fp.read())) except IOError as e: self.log_error(_("IOError"), e.message) else: remote.append(url) # swap filtered url list urls = remote return content, urls
def _download(self, chunks, resume): if not resume: self.info.clear() self.info.add_chunk("{0}.chunk0".format(self.path), (0, 0)) #: create an initial entry self.chunks = [] # initial chunk that will load complete file (if needed) init = CurlChunk(0, self, None, resume) self.chunks.append(init) self.manager.add_handle(init.get_handle()) last_finish_check = 0 last_time_check = 0 chunks_done = set() #: list of curl handles that are finished chunks_created = False done = False if self.info.get_count( ) > 1: #: This is a resume, if we were chunked originally assume still can self.chunk_support = True while True: # need to create chunks if not chunks_created and self.chunk_support and self.size: #: will be set later by first chunk self.flags ^= Connection.Resumable if not resume: self.info.set_size(self.size) self.info.create_chunks(chunks) self.info.save() chunks = self.info.get_count() init.set_range(self.info.get_chunk_range(0)) for i in range(1, chunks): c = CurlChunk(i, self, self.info.get_chunk_range(i), resume) handle = c.get_handle() if handle: self.chunks.append(c) self.manager.add_handle(handle) else: # close immediately self.pyload.log.debug("Invalid curl handle -> closed") c.close() chunks_created = True while True: ret, num_handles = self.manager.perform() if ret != pycurl.E_CALL_MULTI_PERFORM: break t = time() # reduce these calls # when num_q is 0, the loop is exited while last_finish_check + 0.5 < t: # list of failed curl handles failed = [] ex = None #: save only last exception, we can only raise one anyway num_q, ok_list, err_list = self.manager.info_read() for c in ok_list: chunk = self.find_chunk(c) try: #: check if the header implies success, else add it to failed list chunk.verify_header() except ResponseException as e: self.pyload.log.debug("Chunk {0:d} failed: {1}".format( chunk.id + 1, str(e))) failed.append(chunk) ex = e else: chunks_done.add(c) for c in err_list: curl, errno, msg = c chunk = self.find_chunk(curl) # test if chunk was finished if errno != 23 or "0 !=" not in msg: failed.append(chunk) ex = pycurl.error(errno, msg) self.pyload.log.debug("Chunk {0:d} failed: {1}".format( chunk.id + 1, ex)) continue try: #: check if the header implies success, else add it to failed list chunk.verify_header() except ResponseException as e: self.pyload.log.debug("Chunk {0:d} failed: {1}".format( chunk.id + 1, str(e))) failed.append(chunk) ex = e else: chunks_done.add(curl) if not num_q: #: no more info to get # check if init is not finished so we reset download connections # note that other chunks are closed and everything # downloaded with initial connection if failed and init not in failed and init.c not in chunks_done: self.pyload.log.error( _("Download chunks failed, fallback to single connection | {0}" .format(ex))) # list of chunks to clean and remove to_clean = [x for x in self.chunks if x is not init] for chunk in to_clean: self.close_chunk(chunk) self.chunks.remove(chunk) remove( format.path(self.info.get_chunk_name( chunk.id))) # let first chunk load the rest and update the info # file init.reset_range() self.info.clear() self.info.add_chunk("{0}.chunk0".format(self.path), (0, self.size)) self.info.save() elif failed: raise ex last_finish_check = t if len(chunks_done) >= len(self.chunks): if len(chunks_done) > len(self.chunks): self.pyload.log.warning( _("Finished download chunks size incorrect, please report bug" )) done = True #: all chunks loaded break if done: break #: all chunks loaded # calc speed once per second, averaging over 3 seconds if last_time_check + 1 < t: diff = [ c.arrived - (self.last_arrived[i] if len(self.last_arrived) > i else 0) for i, c in enumerate(self.chunks) ] self.last_speeds[1] = self.last_speeds[0] self.last_speeds[0] = self.speeds self.speeds = [float(a) // (t - last_time_check) for a in diff] self.last_arrived = [c.arrived for c in self.chunks] last_time_check = t if self.do_abort: raise Abort self.manager.select(1) for chunk in self.chunks: chunk.flush_file() #: make sure downloads are written to disk self._copy_chunks()
def do_download(self, url): self.pyfile.set_status("waiting") #: real link download_folder = self.pyload.config.get('general', 'storage_folder') location = os.path.join(download_folder, self.pyfile.package( ).folder.decode(sys.getfilesystemencoding())) if not os.path.exists(location): makedirs(location) m = re.match(r'xdcc://(.*?)/#?(.*?)/(.*?)/#?(\d+)/?', url) server = m.group(1) chan = m.group(2) bot = m.group(3) pack = m.group(4) nick = self.get_config('nick') ident = self.get_config('ident') real = self.get_config('realname') temp = server.split(':') ln = len(temp) if ln == 2: host, port = temp elif ln == 1: host, port = temp[0], 6667 else: self.fail(_("Invalid hostname for IRC Server ({0})").format(server)) ####################### # CONNECT TO IRC AND IDLE FOR REAL LINK dl_time = time.time() with closing(socket.socket()) as sock: sock.connect((host, int(port))) if nick == "pyload": # last 3 digits nick = "pyload-{0:d}".format(time.time() % 1000) sock.send("NICK {0}\r\n".format(nick)) sock.send("USER {0} {1} bla :{2}\r\n".format(ident, host, real)) time.sleep(3) sock.send("JOIN #{0}\r\n".format(chan)) sock.send("PRIVMSG {0} :xdcc send #{1}\r\n".format(bot, pack)) # IRC recv loop readbuffer = "" done = False retry = None m = None while True: # done is set if we got our real link if done: break if retry: if time.time() > retry: retry = None dl_time = time.time() sock.send( "PRIVMSG {0} :xdcc send #{1}\r\n".format(bot, pack)) else: if (dl_time + self.timeout) < time.time(): # TODO: add in config sock.send("QUIT :byebye\r\n") # sock.close() self.fail(_("XDCC Bot did not answer")) fdset = select([sock], [], [], 0) if sock not in fdset[0]: continue readbuffer += sock.recv(1024) temp = readbuffer.split("\n") readbuffer = temp.pop() for line in temp: if self.debug == 2: print("*> {0}".format(line, errors='ignore')) line = line.rstrip() first = line.split() if first[0] == "PING": sock.send("PONG {0}\r\n".format(first[1])) if first[0] == "ERROR": self.fail(_("IRC-Error: {0}").format(line)) msg = line.split(None, 3) if len(msg) != 4: continue msg = { 'origin': msg[0][1:], 'action': msg[1], 'target': msg[2], 'text': msg[3][1:] } if nick == msg['target'][0:len(nick)] and "PRIVMSG" == msg[ 'action']: if msg['text'] == "\x01VERSION\x01": self.log_debug("XDCC: Sending CTCP VERSION") sock.send("NOTICE {0} :{1}\r\n".format( msg['origin'], "pyLoad IRC Interface")) elif msg['text'] == "\x01TIME\x01": self.log_debug("Sending CTCP TIME") sock.send("NOTICE {0} :{1:d}\r\n".format( msg['origin'], time.time())) elif msg['text'] == "\x01LAG\x01": pass #: do not know how to answer if not (bot == msg['origin'][0:len(bot)] and nick == msg['target'][0:len(nick)] and msg['action'] in ("PRIVMSG", "NOTICE")): continue if self.debug == 1: print("{0}: {1}".format(msg['origin'], msg['text'])) if "You already requested that pack" in msg['text']: retry = time.time() + 300 if "you must be on a known channel to request a pack" in msg[ 'text']: self.fail(_("Wrong channel")) m = re.match( '\x01DCC SEND (.*?) (\d+) (\d+)(?: (\d+))?\x01', msg['text']) if m: done = True # get connection data ip = socket.inet_ntoa(struct.pack( 'L', socket.ntohl(int(m.group(2))))) port = int(m.group(3)) packname = m.group(1) if len(m.groups()) > 3: self.req.filesize = int(m.group(4)) self.pyfile.name = packname filename = format.path(location, packname) self.log_info( _("XDCC: Downloading {0} from {1}:{2:d}").format(packname, ip, port)) self.pyfile.set_status("downloading") newname = self.req.download( ip, port, filename, sock, self.pyfile.set_progress) if newname and newname != filename: self.log_info(_("{0} saved as {1}").format( self.pyfile.name, newname)) filename = newname # kill IRC socket # sock.send("QUIT :byebye\r\n") self.last_download = filename return self.last_download
def remove(self): fs_name = format.path("{0}.chunks".format(self.name)) remove(fs_name)
def download(self, url, get={}, post={}, ref=True, cookies=True, disposition=False): """ Downloads the content at url to download folder :param disposition: if True and server provides content-disposition header the filename will be changed if needed :return: The location where the file was saved """ self.check_for_same_files() self.check_abort() self.pyfile.set_status("downloading") download_folder = self.pyload.config.get('general', 'storage_folder') location = os.path.join(download_folder, self.pyfile.package().folder) if not os.path.exists(location): makedirs(location, int(self.pyload.config.get( 'permission', 'foldermode'), 8)) if self.pyload.config.get( 'permission', 'change_fileowner') and os.name != 'nt': try: uid = pwd.getpwnam(self.pyload.config.get( 'permission', 'user'))[2] gid = grp.getgrnam(self.pyload.config.get( 'permission', 'group'))[2] os.chown(location, uid, gid) except Exception as e: self.pyload.log.warning( _("Setting User and Group failed: {0}").format(e.message)) name = self.pyfile.name filename = os.path.join(location, name) self.pyload.adm.fire("download:start", self.pyfile, url, filename) # Create the class used for downloading self.dl = self.pyload.req.get_download_request( self.req, self.DOWNLOAD_CLASS) try: # TODO: hardcoded arguments newname = self.dl.download(url, filename, get=get, post=post, referer=ref, chunks=self.get_chunk_count(), resume=self.resume_download, cookies=cookies, disposition=disposition) finally: self.dl.close() self.pyfile.size = self.dl.size if disposition and newname and newname != name: #: triple check, just to be sure self.pyload.log.info(_("{0} saved as {1}").format(name, newname)) self.pyfile.name = newname filename = os.path.join(location, newname) fs_filename = format.path(filename) if self.pyload.config.get('permission', 'change_filemode'): os.chmod(fs_filename, int(self.pyload.config.get( 'permission', 'filemode'), 8)) if self.pyload.config.get( 'permission', 'change_fileowner') and os.name != 'nt': try: uid = pwd.getpwnam( self.pyload.config.get( 'permission', 'user'))[2] gid = grp.getgrnam(self.pyload.config.get( 'permission', 'group'))[2] os.chown(fs_filename, uid, gid) except Exception as e: self.pyload.log.warning( _("Setting User and Group failed: {0}").format(e.message)) self.last_download = fs_filename return self.last_download
def _download(self, chunks, resume): if not resume: self.info.clear() self.info.add_chunk("{0}.chunk0".format( self.path), (0, 0)) #: create an initial entry self.chunks = [] # initial chunk that will load complete file (if needed) init = CurlChunk(0, self, None, resume) self.chunks.append(init) self.manager.add_handle(init.get_handle()) last_finish_check = 0 last_time_check = 0 chunks_done = set() #: list of curl handles that are finished chunks_created = False done = False if self.info.get_count() > 1: #: This is a resume, if we were chunked originally assume still can self.chunk_support = True while True: # need to create chunks if not chunks_created and self.chunk_support and self.size: #: will be set later by first chunk self.flags ^= Connection.Resumable if not resume: self.info.set_size(self.size) self.info.create_chunks(chunks) self.info.save() chunks = self.info.get_count() init.set_range(self.info.get_chunk_range(0)) for i in range(1, chunks): c = CurlChunk( i, self, self.info.get_chunk_range(i), resume) handle = c.get_handle() if handle: self.chunks.append(c) self.manager.add_handle(handle) else: # close immediately self.pyload.log.debug("Invalid curl handle -> closed") c.close() chunks_created = True while True: ret, num_handles = self.manager.perform() if ret != pycurl.E_CALL_MULTI_PERFORM: break t = time() # reduce these calls # when num_q is 0, the loop is exited while last_finish_check + 0.5 < t: # list of failed curl handles failed = [] ex = None #: save only last exception, we can only raise one anyway num_q, ok_list, err_list = self.manager.info_read() for c in ok_list: chunk = self.find_chunk(c) try: #: check if the header implies success, else add it to failed list chunk.verify_header() except ResponseException as e: self.pyload.log.debug( "Chunk {0:d} failed: {1}".format(chunk.id + 1, e.message)) failed.append(chunk) ex = e else: chunks_done.add(c) for c in err_list: curl, errno, msg = c chunk = self.find_chunk(curl) # test if chunk was finished if errno != 23 or "0 !=" not in msg: failed.append(chunk) ex = pycurl.error(errno, msg) self.pyload.log.debug( "Chunk {0:d} failed: {1}".format(chunk.id + 1, ex)) continue try: #: check if the header implies success, else add it to failed list chunk.verify_header() except ResponseException as e: self.pyload.log.debug( "Chunk {0:d} failed: {1}".format(chunk.id + 1, e.message)) failed.append(chunk) ex = e else: chunks_done.add(curl) if not num_q: #: no more info to get # check if init is not finished so we reset download connections # note that other chunks are closed and everything # downloaded with initial connection if failed and init not in failed and init.c not in chunks_done: self.pyload.log.error( _("Download chunks failed, fallback to single connection | {0}".format(ex))) # list of chunks to clean and remove to_clean = [x for x in self.chunks if x is not init] for chunk in to_clean: self.close_chunk(chunk) self.chunks.remove(chunk) remove( format.path( self.info.get_chunk_name( chunk.id))) # let first chunk load the rest and update the info # file init.reset_range() self.info.clear() self.info.add_chunk("{0}.chunk0".format( self.path), (0, self.size)) self.info.save() elif failed: raise ex last_finish_check = t if len(chunks_done) >= len(self.chunks): if len(chunks_done) > len(self.chunks): self.pyload.log.warning( _("Finished download chunks size incorrect, please report bug")) done = True #: all chunks loaded break if done: break #: all chunks loaded # calc speed once per second, averaging over 3 seconds if last_time_check + 1 < t: diff = [c.arrived - (self.last_arrived[i] if len(self.last_arrived) > i else 0) for i, c in enumerate(self.chunks)] self.last_speeds[1] = self.last_speeds[0] self.last_speeds[0] = self.speeds self.speeds = [float(a) // (t - last_time_check) for a in diff] self.last_arrived = [c.arrived for c in self.chunks] last_time_check = t if self.do_abort: raise Abort self.manager.select(1) for chunk in self.chunks: chunk.flush_file() #: make sure downloads are written to disk self._copy_chunks()