def sendfile(outfile,infile,offset,size): try: out_fd = outfile.fileno() in_fd = infile.fileno() except: highlevel_sendfile(outfile,infile,offset,size) else: # size == 0 has special meaning for some sendfile implentations if size > 0: os.sendfile(out_fd, in_fd, offset, size)
def do_GET(self): site, path = self.log_and_parse_request() if site in self.server.config['backup_sites']: try: if path[1] == "basebackups": # TODO use something nicer to map URIs if len(path) == 2: response, headers, status = self.list_basebackups(site) elif len(path) == 3: response, headers, status = self.get_basebackup(site, path[2]) elif len(path[1]) == 24 and len(path) == 2: response, headers, status = self.get_wal_or_timeline_file(site, path[1], "xlog") elif path[1].endswith(".history"): response, headers, status = self.get_wal_or_timeline_file(site, path[1], "timeline") else: self.send_response(404) return except: # pylint: disable=bare-except self.server.log.exception("Exception occured when processing: %r", path) self.send_response(404) return self.send_response(status) for header_key, header_value in headers.items(): self.send_header(header_key, header_value) if status not in (206, 404): if 'Content-type' not in headers: if isinstance(response, dict): mimetype = "application/json" response = json.dumps(response, indent=4).encode("utf8") size = len(response) elif hasattr(response, "read"): mimetype = "application/x-xz" size = os.fstat(response.fileno()).st_size # pylint: disable=maybe-no-member self.send_header('Content-type', mimetype) self.send_header('Content-length', str(size)) self.end_headers() if isinstance(response, bytes): self.wfile.write(response) elif hasattr(response, "read"): if hasattr(os, "sendfile"): os.sendfile(self.wfile.fileno(), response.fileno(), 0, size) # pylint: disable=maybe-no-member else: shutil.copyfileobj(response, self.wfile) else: self.server.log.warning("Site: %r not found, path was: %r", site, path) self.send_response(404)
def handle_request(self): self.request = self.client_connection.recv(1024) if not self.request: return self.parse_request() http_response = 'HTTP/1.1 {status}\r\n'.format(status=self.status) http_response += 'Date: {date}\r\n'.format(date=strftime("%a, %d %b %Y %X GMT", gmtime())) http_response += 'Server: Technapache\r\n' http_response += 'Connection: keep-alive\r\n' if self.status == '200 OK': http_response += 'Content-Length: {length}\r\n'.format(length=self.file_size) http_response += 'Content-Type: {content_type}\r\n'.format(content_type=self.content_type) http_response += '\r\n' self.client_connection.sendall(http_response.encode()) if (self.request_method != 'HEAD') and self.status == '200 OK': offset = 0 blocksize = 4096 while True: sent = os.sendfile(self.client_connection.fileno(), self.f.fileno(), offset, blocksize) if sent == 0: break offset += sent self.f.close()
def blob_client(self, csock): file_id = Pyro4.socketutil.receiveData(csock, 36).decode() print("{0} requesting file id {1}".format(csock.getpeername(), file_id)) is_file, data = self.find_blob_data(file_id) if is_file: if hasattr(os, "sendfile"): print("...from file using sendfile()") out_fn = csock.fileno() in_fn = data.fileno() sent = 1 offset = 0 while sent: sent = os.sendfile(out_fn, in_fn, offset, 512000) offset += sent else: print("...from file using plain old read()") while True: chunk = data.read(512000) if not chunk: break csock.sendall(chunk) else: print("...from memory") csock.sendall(data) csock.close()
def writer(self, loop, after_seqno): last_offset = 0 log_seqno, log_offset = self.log.logfile.head while log_seqno <= after_seqno: last_offset = log_offset yield from self.log_updated.wait() log_seqno, log_offset = self.log.logfile.head # TODO FIXME: Handle starting from an offset, and then do that # here as well in case we somehow missed the correct offset # while waiting to start. This all requires, presumably, an # offset index to be implemented. self.install_write_watcher(loop) while True: yield from self.writeable.wait() # FIXME: What happens when the client disconnects? Will I # sleep forever, or will the socket become writable and # then raise an exception? log_seqno, log_offset = self.log.logfile.head to_send = log_offset - last_offset if to_send > 0: try: last_offset += os.sendfile( self.socket.fileno(), self.log.logfile.fileno(), last_offset, to_send) except BlockingIOError: pass else: loop.remove_writer(self.socket.fileno()) yield from self.log_updated.wait() self.install_write_watcher(loop)
def _fastcopy_sendfile(fsrc, fdst): """Copy data from one regular mmap-like fd to another by using high-performance sendfile(2) syscall. This should work on Linux >= 2.6.33 and Solaris only. """ # Note: copyfileobj() is left alone in order to not introduce any # unexpected breakage. Possible risks by using zero-copy calls # in copyfileobj() are: # - fdst cannot be open in "a"(ppend) mode # - fsrc and fdst may be open in "t"(ext) mode # - fsrc may be a BufferedReader (which hides unread data in a buffer), # GzipFile (which decompresses data), HTTPResponse (which decodes # chunks). # - possibly others (e.g. encrypted fs/partition?) global _HAS_SENDFILE try: infd = fsrc.fileno() outfd = fdst.fileno() except Exception as err: raise _GiveupOnFastCopy(err) # not a regular file # Hopefully the whole file will be copied in a single call. # sendfile() is called in a loop 'till EOF is reached (0 return) # so a bufsize smaller or bigger than the actual file size # should not make any difference, also in case the file content # changes while being copied. try: blocksize = max(os.fstat(infd).st_size, 2 ** 23) # min 8MB except Exception: blocksize = 2 ** 27 # 128MB offset = 0 while True: try: sent = os.sendfile(outfd, infd, offset, blocksize) except OSError as err: # ...in oder to have a more informative exception. err.filename = fsrc.name err.filename2 = fdst.name if err.errno == errno.ENOTSOCK: # sendfile() on this platform (probably Linux < 2.6.33) # does not support copies between regular files (only # sockets). _HAS_SENDFILE = False raise _GiveupOnFastCopy(err) if err.errno == errno.ENOSPC: # filesystem is full raise err from None # Give up on first call and if no data was copied. if offset == 0 and os.lseek(outfd, 0, os.SEEK_CUR) == 0: raise _GiveupOnFastCopy(err) raise err else: if sent == 0: break # EOF offset += sent
def sendfile_all(self, fileno, sockno, offset, nbytes): # Send file in at most 1GB blocks as some operating # systems can have problems with sending files in blocks # over 2GB. BLKSIZE = 0x3FFFFFFF if nbytes > BLKSIZE: for m in range(0, nbytes, BLKSIZE): self.sendfile_all(fileno, sockno, offset, min(nbytes, BLKSIZE)) offset += BLKSIZE nbytes -= BLKSIZE else: sent = 0 sent += sendfile(fileno, sockno, offset+sent, nbytes-sent) while sent != nbytes: sent += sendfile(fileno, sockno, offset+sent, nbytes-sent)
def handle_write(self): check_end = True while self.ws_buffer: is_sendfile, dat = self.ws_buffer[0] try: # :TODO: учитывать нулевое копирование байт, см. IOStream._write_buffer_frozen if is_sendfile: if dat.f is None: is_ok, f = try_open(dat.fpath) if is_ok: dat.f = f else: gen_log.warning("sendfile/handle_write: can't open %s", dat.fpath) self.close() check_end = False break while dat.sz > 0: num_bytes = os.sendfile(self.fileno(), dat.f.fileno(), dat.off, dat.sz) dat.off += num_bytes dat.sz -= num_bytes dat.f.close() else: while dat: _merge_prefix(dat, 128 * 1024) num_bytes = self.write_to_fd(dat[0]) _merge_prefix(dat, num_bytes) dat.popleft() self.ws_buffer.popleft() self.on_queue_change(-1) except socket.error as e: # :COPY_N_PASTE: if e.args[0] in (errno.EWOULDBLOCK, errno.EAGAIN): # if isinstance(dat, argparse.Namespace): # dat.f.close() # self.ws_buffer.popleft() break else: if e.args[0] not in (errno.EPIPE, errno.ECONNRESET): # Broken pipe errors are usually caused by connection # reset, and its better to not log EPIPE errors to # minimize log spam gen_log.warning("Write error on %d: %s", self.fileno(), e) self.close(exc_info=True) check_end = False break if check_end: if not self.ws_buffer and self._write_callback: # :COPY_N_PASTE: callback = self._write_callback self._write_callback = None self._run_callback(callback)
def _eventlet_sendfile(fdout, fdin, offset, nbytes): while True: try: return os.sendfile(fdout, fdin, offset, nbytes) except OSError as e: if e.args[0] == errno.EAGAIN: trampoline(fdout, write=True) else: raise
def _gevent_sendfile(fdout, fdin, offset, nbytes): while True: try: return os.sendfile(fdout, fdin, offset, nbytes) except OSError as e: if e.args[0] == errno.EAGAIN: wait_write(fdout) else: raise
def _sock_sendfile_native_impl(self, fut, registered_fd, sock, fileno, offset, count, blocksize, total_sent): fd = sock.fileno() if registered_fd is not None: # Remove the callback early. It should be rare that the # selector says the fd is ready but the call still returns # EAGAIN, and I am willing to take a hit in that case in # order to simplify the common case. self.remove_writer(registered_fd) if fut.cancelled(): self._sock_sendfile_update_filepos(fileno, offset, total_sent) return if count: blocksize = count - total_sent if blocksize <= 0: self._sock_sendfile_update_filepos(fileno, offset, total_sent) fut.set_result(total_sent) return try: sent = os.sendfile(fd, fileno, offset, blocksize) except (BlockingIOError, InterruptedError): if registered_fd is None: self._sock_add_cancellation_callback(fut, sock) self.add_writer(fd, self._sock_sendfile_native_impl, fut, fd, sock, fileno, offset, count, blocksize, total_sent) except OSError as exc: if total_sent == 0: # We can get here for different reasons, the main # one being 'file' is not a regular mmap(2)-like # file, in which case we'll fall back on using # plain send(). err = events.SendfileNotAvailableError( "os.sendfile call failed") self._sock_sendfile_update_filepos(fileno, offset, total_sent) fut.set_exception(err) else: self._sock_sendfile_update_filepos(fileno, offset, total_sent) fut.set_exception(exc) except Exception as exc: self._sock_sendfile_update_filepos(fileno, offset, total_sent) fut.set_exception(exc) else: if sent == 0: # EOF self._sock_sendfile_update_filepos(fileno, offset, total_sent) fut.set_result(total_sent) else: offset += sent total_sent += sent if registered_fd is None: self._sock_add_cancellation_callback(fut, sock) self.add_writer(fd, self._sock_sendfile_native_impl, fut, fd, sock, fileno, offset, count, blocksize, total_sent)
def async_sendfile(fdout, fdin, offset, nbytes): total_sent = 0 while total_sent < nbytes: try: sent = sendfile(fdout, fdin, offset + total_sent, nbytes - total_sent) total_sent += sent except OSError, e: if e.args[0] == errno.EAGAIN: wait_write(fdout) else: raise
def _do_sendfile(self, out_fd: int) -> bool: try: n = os.sendfile(out_fd, self._in_fd, self._offset, self._count) if n == 0: # in_fd EOF reached n = self._count except (BlockingIOError, InterruptedError): n = 0 self.output_size += n self._offset += n self._count -= n assert self._count >= 0 return self._count == 0
def sendfile(outputfileno, inputfileno, nbytes): ''' copy file using the zero-copy way: system call os.sendfile Use case: file2file copy, file2socket copy, socket2socket copy Note: os.sendfile only available in Python3.x and need OS support, In Linux kernel before 2.6.33 the outputfileno must refer to a socket Since Linux 2.6.33 it can be any file. @return: Total size of the sent content ''' sent = offset = 0 while True: sent = os.sendfile(outputfileno, inputfileno, offset, nbytes) offset += sent if sent == 0: break return offset
def _sendfile_cb(self, fut, out_fd, in_fd, offset, count, loop, registered): if registered: loop.remove_writer(out_fd) try: n = os.sendfile(out_fd, in_fd, offset, count) if n == 0: # EOF reached n = count except (BlockingIOError, InterruptedError): n = 0 except Exception as exc: fut.set_exception(exc) return if n < count: loop.add_writer(out_fd, self._sendfile_cb, fut, out_fd, in_fd, offset + n, count - n, loop, True) else: fut.set_result(None)
def sendfile(self, respiter): if self.cfg.is_ssl or not self.can_sendfile(): return False try: fileno = respiter.filelike.fileno() except AttributeError: return False try: offset = os.lseek(fileno, 0, os.SEEK_CUR) if self.response_length is None: filesize = os.fstat(fileno).st_size # The file may be special and sendfile will fail. # It may also be zero-length, but that is okay. if filesize == 0: return False nbytes = filesize - offset else: nbytes = self.response_length except (OSError, io.UnsupportedOperation): return False self.send_headers() if self.is_chunked(): chunk_size = "%X\r\n" % nbytes self.sock.sendall(chunk_size.encode('utf-8')) sockno = self.sock.fileno() sent = 0 while sent != nbytes: count = min(nbytes - sent, BLKSIZE) sent += sendfile(sockno, fileno, offset + sent, count) if self.is_chunked(): self.sock.sendall(b"\r\n") os.lseek(fileno, offset, os.SEEK_SET) return True
def zero_cp_send(): sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) server_address = ('127.0.0.1', 4455) sock.connect(server_address) start = time.time() try: with open('/Users/fangpeng/Downloads/test.rar') as f: ret = 0 offset = 0 while True: # only python 3.x ret = os.sendfile(sock.fileno(), f.fileno(), offset, 65536) offset += ret if ret == 0: break finally: sock.close() end = time.time() print('Total time: ', end-start) # 2.803406000137329
def _sendfile_cb(self, fut, out_fd, in_fd, offset, count, loop, registered): if registered: loop.remove_writer(out_fd) if fut.cancelled(): return try: n = os.sendfile(out_fd, in_fd, offset, count) if n == 0: # EOF reached n = count except (BlockingIOError, InterruptedError): n = 0 except Exception as exc: set_exception(fut, exc) return if n < count: loop.add_writer(out_fd, self._sendfile_cb, fut, out_fd, in_fd, offset + n, count - n, loop, True) else: set_result(fut, None)
def _fastcopy_sendfile(self, fsrc, fdst, callback, total): global _USE_CP_SENDFILE try: infd = fsrc.fileno() outfd = fdst.fileno() except Exception as err: raise _GiveupOnFastCopy(err) # not a regular file try: blocksize = max(os.fstat(infd).st_size, 2**23) # min 8MiB except OSError: blocksize = 2**27 # 128MiB if sys.maxsize < 2**32: blocksize = min(blocksize, 2**30) offset = 0 while True: try: sent = os.sendfile(outfd, infd, offset, blocksize) except OSError as err: err.filename = fsrc.name err.filename2 = fdst.name if err.errno == errno.ENOTSOCK: _USE_CP_SENDFILE = False raise _GiveupOnFastCopy(err) if err.errno == errno.ENOSPC: # filesystem is full raise err from None if offset == 0 and os.lseek(outfd, 0, os.SEEK_CUR) == 0: raise _GiveupOnFastCopy(err) raise err else: if sent == 0 or not self.statusWork: break # EOF offset += sent callback(offset, sent, total=total)
def zerocopyclient(): sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) server_address = ('127.0.0.1', 10000) sock.connect(server_address) start = time.time() try: with open(path, 'rb') as f: # message = f.read() # sock.sendall(message) ret = 0 offset = 0 while True: ret = os.sendfile(sock.fileno(), f.fileno(), offset, 65536) offset += ret if ret == 0: break finally: sock.close() end = time.time() print('Total time: ', end - start)
def pullData(self, ip, pathFile): ''' Отправляем данные определеннуму пользователю :param ip: :param pathFile: :return: ''' conn = self.listConnect.get(ip, None) if not conn is None: print("отправка") sf = conn.fileno() lf = os.open(pathFile, os.O_RDONLY) while os.sendfile(sf, lf, None, 1024 * 1024 * 10) > 0: print('.', end='') conn.close() del self.listConnect[ip] return 0 else: return -1
def copy_raw( source_image: FirmwareImage, target_image: FirmwareImage, ): """Copy the contents of one image over another image.""" with open(source_image.device, "rb") as source: source.seek(source_image.offset) fd = os.open(target_image.device, os.O_WRONLY) try: os.set_blocking(fd, True) os.lseek(fd, target_image.offset, os.SEEK_SET) # And now we rely on sendfile() aligning things properly write_size = os.sendfile(fd, source.fileno(), None, source_image.size) assert write_size == source_image.size except OSError: # reraise it immediately; this except-clause is to satisfy the # grammar so we can have an else-clause raise else: os.fsync(fd) finally: os.close(fd)
def sendfile_cat(infiles: typing.List[str], outfile: str, remove: bool = False, preremove: bool = True, error_file: typing.TextIO = sys.stderr, verbose: bool = False): """ Use the superfast sendfile method to copy file data. This works on Linux. It won't work on MacOS because sendfile will send only to a socket on that operating system. Sendfile isn't implemented at all on Python for Windows. """ start_time: float = 0 if verbose: print('Using sendfile_cat.', file=error_file, flush=True) start_time = time.time() if preremove: try: # This can be faster then truncating an existing file. # # TODO: We might get even better performance if we # removed existing output files on a thread before the start of # whatever we are doing. We'd have to wait for the thread to # complete before we start to write the new files. os.remove(outfile) except FileNotFoundError: pass ofd: int = os.open(outfile, os.O_WRONLY | os.O_CREAT | os.O_TRUNC) totallen: int = 0 filename: str for filename in infiles: if verbose: print('Copying {}.'.format(filename), file=error_file, flush=True) ifd: int = os.open(filename, os.O_RDONLY) # This is chunk size is chosen to be less than the limit in # the 32-bit system call. count: int = 1024 * 1024 * 1024 offset: int = 0 while True: copycount: int = os.sendfile(ofd, ifd, offset, count) if copycount == 0: break offset += copycount totallen += copycount os.close(ifd) os.close(ofd) if remove: for filename in infiles: if verbose: print('Removing {}.'.format(filename), file=error_file, flush=True) os.remove(filename) if verbose: print('Done with sendfile_cat. len=%d' % totallen, file=error_file, flush=True) print('Time taken : {}s'.format(time.time() - start_time), file=error_file, flush=True)
def sendfile(self, fd): while os.sendfile(self.socket.fileno(), fd, None, self.fileChunkSize) > 0: True
import os source_file = open(r'./480P_2000K_200329271.mp4', 'rb') dist_file = open(r'./dist.mp4', 'w+') ret = 0 offset = 0 while True: ret = os.sendfile(dist_file.fileno(), source_file.fileno(), offset, 65536) offset += ret if ret == 0: break """ % python3 copy_local_file.py Traceback (most recent call last): File "copy_local_file.py", line 9, in <module> ret = os.sendfile(dist_file.fileno(), source_file.fileno(), offset, 65536) OSError: [Errno 38] Socket operation on non-socket % """
def parse(self): # skip the SOI magic self.infile.seek(2) # then further process the frame according to B.2.1 # After SOI there are optional tables/miscellaneous (B.2.4) # These are defined in B.2.4.*. Marker values are in B.1 # JPEG is in big endian order (B.1.1.1) # keep track of whether or not a frame can be restarted restart = False eofseen = False seen_markers = set() while True: checkbytes = self.infile.read(2) check_condition(len(checkbytes) == 2, "not enough data for table/misc") if checkbytes in TABLES_MISC_MARKERS or checkbytes in APP_MARKERS: # store the marker marker = checkbytes seen_markers.add(marker) # extract the length of the table or app marker. # this includes the 2 bytes of the length field itself checkbytes = self.infile.read(2) check_condition(len(checkbytes) == 2, "not enough data for table/misc length field") misctablelength = int.from_bytes(checkbytes, byteorder='big') check_condition(self.infile.tell() + misctablelength - 2 <= self.fileresult.filesize, "table outside of file") if marker == b'\xff\xdd': # DRI oldoffset = self.infile.tell() checkbytes = self.infile.read(2) check_condition(len(checkbytes) == 2, "not enough data for DRI") restartinterval = int.from_bytes(checkbytes, byteorder='big') if restartinterval != 0: restart = True self.infile.seek(oldoffset) elif marker == b'\xff\xdb': # DQT, not present for lossless JPEG by definition (B.2.4.1) oldoffset = self.infile.tell() # check Pq and Tq checkbytes = self.infile.read(1) check_condition(len(checkbytes) == 1, "not enough data for DQT") pqtq = ord(checkbytes) pq = pqtq >> 4 check_condition(pq in [0, 1], "invalid DQT value") tq = pqtq & 15 check_condition(tq < 4, "invalid DQT value") self.infile.seek(oldoffset) elif marker == b'\xff\xe0': # APP0, TODO oldoffset = self.infile.tell() checkbytes = self.infile.read(5) check_condition(len(checkbytes) == 5, "not enough data for APP0") self.infile.seek(oldoffset) elif marker == b'\xff\xe1': # APP1, EXIF and friends # EXIF could have a thumbnail, TODO oldoffset = self.infile.tell() checkbytes = self.infile.read(5) check_condition(len(checkbytes) == 5, "not enough data for APP1") self.infile.seek(oldoffset) # skip over the section self.infile.seek(misctablelength-2, os.SEEK_CUR) else: break ''' # the abbreviated syntax is not widely used, so do not allow it # but keep the code for now allowabbreviated = False if allowabbreviated: # There *could* be an EOI marker here and it would be # a valid JPEG according to section B.5, although not # all markers would be allowed. if checkbytes == b'\xff\xd9': check_condition(seen_markers != set(), "no tables present, needed for abbreviated syntax") # according to B.5 DAC and DRI are not allowed in this syntax. check_condition(b'\xff\xcc' not in seen_markers and b'\xff\xdd' not in seen_markers, "DAC and/or DRI not allowed in abbreviated syntax") self.unpacked_size = self.infile.tell() return ''' ishierarchical = False # there could be a DHP segment here according to section B.3, # but only one in the entire image if checkbytes == b'\xff\xde': checkbytes = self.infile.read(2) check_condition(len(checkbytes) == 2, "not enough data for table/misc length field") sectionlength = int.from_bytes(checkbytes, byteorder='big') check_condition(self.infile.tell() + sectionlength - 2 <= self.fileresult.filesize, "table outside of file") ishierarchical = True # skip over the section self.infile.seek(sectionlength-2, os.SEEK_CUR) # and make sure that a few bytes are already read checkbytes = self.infile.read(2) check_condition(len(checkbytes) == 2, "not enough data for table/misc") # now there could be multiple frames, starting with optional # misc/tables again. while True: framerestart = restart while True: if checkbytes in TABLES_MISC_MARKERS or checkbytes in APP_MARKERS: isdri = False if checkbytes == b'\xff\xdd': isdri = True # extract the length of the table or app marker. # this includes the 2 bytes of the length field itself checkbytes = self.infile.read(2) check_condition(len(checkbytes) == 2, "not enough data for table/misc length field") misctablelength = int.from_bytes(checkbytes, byteorder='big') check_condition(self.infile.tell() + misctablelength - 2 <= self.fileresult.filesize, "table outside of file") if isdri: oldoffset = self.infile.tell() checkbytes = self.infile.read(2) check_condition(len(checkbytes) == 2, "not enough data for table/misc") restartinterval = int.from_bytes(checkbytes, byteorder='big') if restartinterval != 0: framerestart = True self.infile.seek(oldoffset) # skip over the section self.infile.seek(misctablelength-2, os.SEEK_CUR) # and read the next few bytes checkbytes = self.infile.read(2) check_condition(len(checkbytes) == 2, "not enough data for table/misc") else: break # check if this is EXP (only in hierarchical syntax) if checkbytes == b'\xff\xdf': check_condition(ishierarchical, "EXP only allowed in hierarchical syntax") checkbytes = self.infile.read(2) check_condition(len(checkbytes) == 2, "not enough data for table/misc length field") misctablelength = int.from_bytes(checkbytes, byteorder='big') check_condition(self.infile.tell() + misctablelength - 2 <= self.fileresult.filesize, "table outside of file") # skip over the section self.infile.seek(misctablelength-2, os.SEEK_CUR) # and read the next two bytes checkbytes = self.infile.read(2) check_condition(len(checkbytes) == 2, "not enough data for table/misc") # after the tables/misc (and possibly EXP) there should be # a frame header (B.2.2) with a SOF (start of frame) marker check_condition(checkbytes in START_OF_FRAME_MARKERS, "invalid value for start of frame") # extract the length of the frame # this includes the 2 bytes of the length field itself checkbytes = self.infile.read(2) check_condition(len(checkbytes) == 2, "not enough data for table/misc length field") misctablelength = int.from_bytes(checkbytes, byteorder='big') check_condition(self.infile.tell() + misctablelength - 2 <= self.fileresult.filesize, "table outside of file") # skip over the section self.infile.seek(misctablelength-2, os.SEEK_CUR) # This is followed by at least one scan header, # optionally preceded by more tables/misc while True: if eofseen: break # optionally preceded by more tables/misc while True: checkbytes = self.infile.read(2) check_condition(len(checkbytes) == 2, "not enough data for table/misc") if checkbytes in TABLES_MISC_MARKERS or checkbytes in APP_MARKERS: # Extract the length of the table or app marker. # This includes the 2 bytes of the length field itself checkbytes = self.infile.read(2) check_condition(len(checkbytes) == 2, "not enough data for table/misc length field") misctablelength = int.from_bytes(checkbytes, byteorder='big') check_condition(self.infile.tell() + misctablelength - 2 <= self.fileresult.filesize, "table outside of file") # skip over the section self.infile.seek(misctablelength-2, os.SEEK_CUR) else: break # RST: no data, so simply ignore, but immediately # skip to more of the raw data. isrestart = False if checkbytes in RST_MARKERS: isrestart = True # DNL (section B.2.5) if checkbytes == b'\xff\xdc': # extract the length of the DNL # this includes the 2 bytes of the length field itself checkbytes = self.infile.read(2) check_condition(len(checkbytes) == 2, "not enough data for table/misc length field") headerlength = int.from_bytes(checkbytes, byteorder='big') check_condition(self.infile.tell() + headerlength - 2 <= self.fileresult.filesize, "start of scan outside of file") # skip over the section self.infile.seek(headerlength-3, os.SEEK_CUR) # and read two bytes checkbytes = self.infile.read(2) check_condition(len(checkbytes) == 2, "not enough data for table/misc") # the SOS (start of scan) header if checkbytes == b'\xff\xda': # extract the length of the start of scan header # this includes the 2 bytes of the length field itself checkbytes = self.infile.read(2) check_condition(len(checkbytes) == 2, "not enough data for table/misc length field") headerlength = int.from_bytes(checkbytes, byteorder='big') check_condition(self.infile.tell() + headerlength - 2 <= self.fileresult.filesize, "start of scan outside of file") # the number of image components, can only be 1-4 checkbytes = self.infile.read(1) check_condition(len(checkbytes) == 1, "not enough data for number of image components") numberimagecomponents = ord(checkbytes) check_condition(numberimagecomponents in [1, 2, 3, 4], "invalid value for number of image components") # the header length = 6+2* number of image components check_condition(headerlength == 6+2*numberimagecomponents, "invalid value for number of image components or start of scan header length") # skip over the section self.infile.seek(headerlength-3, os.SEEK_CUR) else: if not isrestart: check_condition(checkbytes == b'\xff\xd9', "invalid value for start of scan") eofseen = True continue # now read the image data in chunks to search for # JPEG markers (section B.1.1.2) # This is not fully fool proof: if data from the # entropy coded segment (ECS) is missing, or if data # has been inserted or changed in the ECS. The only # way to verify this is to reimplement it, or to run # it through an external tool or library such as pillow. readsize = 100 while True: oldpos = self.infile.tell() checkbytes = self.infile.read(readsize) if checkbytes == b'': break # check if 0xff can be found in the data. If so, then it # is either part of the entropy coded data (and followed # by 0x00), or a valid JPEG marker, or bogus data. if b'\xff' in checkbytes: startffpos = 0 fffound = False while True: ffpos = checkbytes.find(b'\xff', startffpos) if ffpos == -1: break # if 0xff is the last byte, bail out if oldpos + ffpos == self.fileresult.filesize - 1: break startffpos = ffpos + 1 if ffpos < readsize - 1: if checkbytes[ffpos+1] != 0: if checkbytes[ffpos:ffpos+2] in TABLES_MISC_MARKERS or checkbytes[ffpos:ffpos+2] in APP_MARKERS: self.infile.seek(oldpos + ffpos) fffound = True break if checkbytes[ffpos:ffpos+2] in JPEG_EXT_MARKERS: self.infile.seek(oldpos + ffpos) fffound = True break if checkbytes[ffpos:ffpos+2] in RST_MARKERS: self.infile.seek(oldpos + ffpos) fffound = True break # check for SOS if checkbytes[ffpos:ffpos+2] == b'\xff\xda': self.infile.seek(oldpos + ffpos) fffound = True break # check for DNL if checkbytes[ffpos:ffpos+2] == b'\xff\xdc': self.infile.seek(oldpos + ffpos) fffound = True break # check for EOI if checkbytes[ffpos:ffpos+2] == b'\xff\xd9': self.infile.seek(oldpos + ffpos + 2) eofseen = True fffound = True break # a valid marker was found, so break out of the loop if fffound: break if self.infile.tell() == self.fileresult.filesize: break self.infile.seek(-1, os.SEEK_CUR) # end of the image, so break out of the loop if eofseen: break self.unpacked_size = self.infile.tell() if self.unpacked_size == self.fileresult.filesize: # now load the file using PIL as an extra sanity check # although this doesn't seem to do a lot. try: testimg = PIL.Image.open(self.infile) testimg.load() testimg.close() except OSError as e: raise UnpackParserException(e.args) except PIL.Image.DecompressionBombError as e: raise UnpackParserException(e.args) else: temporary_file = tempfile.mkstemp(dir=self.scan_environment.temporarydirectory) os.sendfile(temporary_file[0], self.infile.fileno(), self.offset, self.unpacked_size) os.fdopen(temporary_file[0]).close() # reopen as read only jpeg_file = open(temporary_file[1], 'rb') try: testimg = PIL.Image.open(jpeg_file) testimg.load() testimg.close() except OSError as e: raise UnpackParserException(e.args) except PIL.Image.DecompressionBombError as e: raise UnpackParserException(e.args) finally: jpeg_file.close() os.unlink(temporary_file[1])
def _copy_to_fd(self, source: pathlib.Path, fd: int): with source.open('r') as file: end = file.seek(0, io.SEEK_END) os.sendfile(fd, file.fileno(), 0, end)
def handle(self, sock, addr): p = HttpStream(SocketReader(sock)) path = p.path() if not path or path == "/": path = "index.html" if path.startswith("/"): path = path[1:] real_path = os.path.join(CURDIR, "static", path) if os.path.isdir(real_path): lines = ["<ul>"] for d in os.listdir(real_path): fpath = os.path.join(real_path, d) lines.append("<li><a href=" + d + ">" + d + "</a>") data = "".join(lines) resp = "".join([ "HTTP/1.1 200 OK\r\n", "Content-Type: text/html\r\n", "Content-Length:" + str(len(data)) + "\r\n", "Connection: close\r\n\r\n", data ]) sock.sendall(resp) elif not os.path.exists(real_path): util.write_error(sock, 404, "Not found", real_path + " not found") else: ctype = mimetypes.guess_type(real_path)[0] if ctype.startswith('text') or 'html' in ctype: try: f = open(real_path, 'rb') data = f.read() resp = "".join([ "HTTP/1.1 200 OK\r\n", "Content-Type: " + ctype + "\r\n", "Content-Length:" + str(len(data)) + "\r\n", "Connection: close\r\n\r\n", data ]) sock.sendall(resp) finally: f.close() else: try: f = open(real_path, 'r') clen = int(os.fstat(f.fileno())[6]) # send headers sock.send("".join([ "HTTP/1.1 200 OK\r\n", "Content-Type: " + ctype + "\r\n", "Content-Length:" + str(clen) + "\r\n", "Connection: close\r\n\r\n" ])) if not sendfile: while True: data = f.read(4096) if not data: break sock.send(data) else: fileno = f.fileno() sockno = sock.fileno() sent = 0 offset = 0 nbytes = clen sent += sendfile(sockno, fileno, offset + sent, nbytes - sent) while sent != nbytes: sent += sendfile(sock.fileno(), fileno, offset + sent, nbytes - sent) finally: f.close()
#!/usr/bin/python3 import sys import socket import os if len(sys.argv) < 4: print("Usage: <hostname> <port> <filename>") sys.exit(1) hostname = sys.argv[1] portnum = sys.argv[2] filename = sys.argv[3] file = open(filename, "rb") blocksize = os.path.getsize(filename) sock = socket.socket() sock.connect((hostname, int(portnum))) offset = 0 while True: sent = os.sendfile(sock.fileno(), file.fileno(), offset, blocksize) if sent == 0: break # EOF offset += sent
def parse(self): # read the first few bytes to see which kind of file it possibly is self.infile.seek(0) checkbytes = self.infile.read(2) if checkbytes == b'P6': self.pnmtype = 'ppm' elif checkbytes == b'P5': self.pnmtype = 'pgm' elif checkbytes == b'P4': self.pnmtype = 'pbm' # then there should be one or more whitespace characters seenwhitespace = False while True: checkbytes = self.infile.read(1) check_condition( len(checkbytes) == 1, "not enough data for header whitespace") if chr(ord(checkbytes)) in string.whitespace: seenwhitespace = True else: if seenwhitespace: self.infile.seek(-1, os.SEEK_CUR) break raise UnpackParserException("no whitespace in header") # width, in ASCII digital, possibly first preceded by a comment widthbytes = b'' seenint = False while True: checkbytes = self.infile.read(1) check_condition(len(checkbytes) == 1, "not enough data for width") if checkbytes == b'#': # comment, read until newline is found while True: checkbytes = self.infile.read(1) check_condition( len(checkbytes) == 1, "not enough data for width") if checkbytes == b'\n': break continue try: int(checkbytes) widthbytes += checkbytes seenint = True except ValueError as e: if seenint: self.infile.seek(-1, os.SEEK_CUR) break raise UnpackParserException(e.args) width = int(widthbytes) # then there should be one or more whitespace characters seenwhitespace = False while True: checkbytes = self.infile.read(1) check_condition( len(checkbytes) == 1, "not enough data for header whitespace") if chr(ord(checkbytes)) in string.whitespace: seenwhitespace = True else: if seenwhitespace: self.infile.seek(-1, os.SEEK_CUR) break raise UnpackParserException("no whitespace in header") # height, in ASCII digital heightbytes = b'' seenint = False while True: checkbytes = self.infile.read(1) check_condition(len(checkbytes) == 1, "not enough data for height") try: int(checkbytes) heightbytes += checkbytes seenint = True except ValueError as e: if seenint: self.infile.seek(-1, os.SEEK_CUR) break raise UnpackParserException(e.args) height = int(heightbytes) if self.pnmtype != 'pbm': # then more whitespace seenwhitespace = False while True: checkbytes = self.infile.read(1) check_condition( len(checkbytes) == 1, "not enough data for header whitespace") if chr(ord(checkbytes)) in string.whitespace: seenwhitespace = True else: if seenwhitespace: self.infile.seek(-1, os.SEEK_CUR) break raise UnpackParserException("no whitespace in header") # maximum color value, in ASCII digital maxbytes = b'' seenint = False while True: checkbytes = self.infile.read(1) check_condition( len(checkbytes) == 1, "not enough data for maximum color value") try: int(checkbytes) maxbytes += checkbytes seenint = True except ValueError as e: if seenint: self.infile.seek(-1, os.SEEK_CUR) break raise UnpackParserException(e.args) maxvalue = int(maxbytes) # single whitespace checkbytes = self.infile.read(1) check_condition( len(checkbytes) == 1, "not enough data for header whitespace") check_condition( chr(ord(checkbytes)) in string.whitespace, "invalid whitespace") if self.pnmtype == 'pbm': # each row is width bits rowlength = width // 8 if width % 8 != 0: rowlength += 1 len_data_bytes = rowlength * height else: if maxvalue < 256: len_data_bytes = width * height if self.pnmtype == 'ppm': len_data_bytes = len_data_bytes * 3 else: len_data_bytes = width * height * 2 if self.pnmtype == 'ppm': len_data_bytes = len_data_bytes * 3 check_condition( self.infile.tell() + len_data_bytes <= self.fileresult.filesize, "not enough data for raster") self.unpacked_size = self.infile.tell() + len_data_bytes # use PIL as an extra sanity check if self.unpacked_size == self.fileresult.filesize: # now load the file using PIL as an extra sanity check # although this doesn't seem to do a lot. try: testimg = PIL.Image.open(self.infile) testimg.load() testimg.close() except OSError as e: raise UnpackParserException(e.args) except ValueError as e: raise UnpackParserException(e.args) else: temporary_file = tempfile.mkstemp( dir=self.scan_environment.temporarydirectory) os.sendfile(temporary_file[0], self.infile.fileno(), self.offset, self.unpacked_size) os.fdopen(temporary_file[0]).close() # reopen as read only pnm_file = open(temporary_file[1], 'rb') try: testimg = PIL.Image.open(pnm_file) testimg.load() testimg.close() except OSError as e: raise UnpackParserException(e.args) except ValueError as e: raise UnpackParserException(e.args) finally: pnm_file.close() os.unlink(temporary_file[1])
#!/usr/bin/env python3 import sys, os from pathlib import Path output_fd = sys.stdout.buffer.fileno() for fname in sys.argv[1:]: inputf = Path(fname) with inputf.open('rb') as f: while os.sendfile(output_fd, f.fileno(), None, 1 << 30) != 0: pass
fasta_part_paths: typing.List[pathlib.Path] = [ pathlib.Path(f'{fasta_path.name}_{i}') for i in range(num_parts) ] with fasta_path.open('rb') as fo: for s, fasta_part_path in zip(file_parts_offsets, fasta_part_paths): with pathlib.Path(fasta_part_path).open('wb') as f: mm = mmap.mmap(fo.fileno(), 0 if s.stop is None else s.stop, access=mmap.ACCESS_READ) mm.seek(s.start) shutil.copyfileobj(mm, f) with fasta_path.open('rb') as fo: for s, fasta_part_path in zip(file_parts_offsets, fasta_part_paths): with pathlib.Path(fasta_part_path).open('wb') as f: os.sendfile(f.fileno(), fo.fileno(), s.start, s.stop - s.start) pti == pti2 == [ fasta_part_path.read_bytes() for fasta_part_path in fasta_part_paths ] fasta_part_paths: typing.List[pathlib.Path] = [ tempdir / str(i) / f'{fasta_path.name}' for i in range(num_parts) ] param_part_paths: typing.List[pathlib.Path] = [ tempdir / str(i) / param_path.name for i in range(num_parts) ] infiles_name = [e.absolute() for e in infiles] infiles_symlinks_target_pairs = [(ee / e.name, e) for e in infiles for ee in tempdir_parts] # cmds = [msfragger_cmd + [param_part_path.name, *infiles_name, '--partial', f'{i}'] # for i, param_part_path in zip(range(num_parts), param_part_paths)]
def unpack(self): unpacked_files = [] # create a temporary directory and remove it again # fsck.cramfs cannot unpack to an existing directory # and move contents after unpacking. cramfs_unpack_directory = tempfile.mkdtemp( dir=self.scan_environment.temporarydirectory) shutil.rmtree(cramfs_unpack_directory) if not self.havetmpfile: p = subprocess.Popen([ 'fsck.cramfs', '--extract=%s' % cramfs_unpack_directory, self.fileresult.filename ], stdout=subprocess.PIPE, stderr=subprocess.PIPE) else: temporaryfile = tempfile.mkstemp( dir=self.scan_environment.temporarydirectory) os.sendfile(temporaryfile[0], self.infile.fileno(), self.offset, self.cramfs_size) os.fdopen(temporaryfile[0]).close() p = subprocess.Popen([ 'fsck.cramfs', '--extract=%s' % cramfs_unpack_directory, temporaryfile[1] ], stdout=subprocess.PIPE, stderr=subprocess.PIPE) (outputmsg, errormsg) = p.communicate() if self.havetmpfile: os.unlink(temporaryfile[1]) # move the unpacked files # move contents of the unpacked file system for result in pathlib.Path(cramfs_unpack_directory).glob('**/*'): relative_result = result.relative_to(cramfs_unpack_directory) outfile_rel = self.rel_unpack_dir / relative_result outfile_full = self.scan_environment.unpack_path(outfile_rel) os.makedirs(outfile_full.parent, exist_ok=True) if result.is_symlink(): self.local_copy2(result, outfile_full) elif result.is_dir(): os.makedirs(outfile_full, exist_ok=True) outfile_full.chmod(stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR) elif result.is_file(): self.local_copy2(result, outfile_full) outfile_full.chmod(stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR) else: continue # then add the file to the result set fr = FileResult(self.fileresult, outfile_rel, set()) unpacked_files.append(fr) # clean up the temporary directory shutil.rmtree(cramfs_unpack_directory) return unpacked_files
def _handle_event(self, fd, evt): if evt & tornado.ioloop.IOLoop.ERROR: print( os.strerror( self._sock.getsockopt(socket.SOL_SOCKET, socket.SO_ERROR))) self.close() return if evt & tornado.ioloop.IOLoop.READ: while len(self._read_queue) > 0: iocb = self._read_queue[0] datatype = iocb[0] if datatype == self.DATA_BUF: size = iocb[1] try: while True: buf = self._sock.recv(size) if len(buf) == 0: self.close() return iocb[2].extend(buf) size -= len(buf) if size == 0: if iocb[3] != None: iocb[3](iocb[2]) self._read_queue.popleft() break except BlockingIOError: iocb[1] = size break except Exception: self.close() return elif datatype == self.DATA_NOBUF: size = iocb[1] try: while True: buf = self._sock.recv(size) if len(buf) == 0: self.close() return iocb[2](buf) size -= len(buf) if size == 0: self._read_queue.popleft() break except BlockingIOError: iocb[1] = size break except Exception: self.close() return elif datatype == self.DATA_FILE: size = iocb[1] try: while True: buf = self._sock.recv(min(size, 65536)) os.write(iocb[2], buf) size -= len(buf) if size == 0: if iocb[3] != None: iocb[3]() self._read_queue.popleft() break if len(buf) == 0: self.close() return except BlockingIOError: iocb[1] = size break except Exception: self.close() return if evt & tornado.ioloop.IOLoop.WRITE: if self._conning == True: self._conning = False if self._conn_callback != None: self._conn_callback() while len(self._write_queue) > 0: iocb = self._write_queue[0] datatype = iocb[0] if datatype == self.DATA_BUF: off = iocb[1] buf = iocb[2] try: while True: ret = self._sock.send(buf[off:]) off += ret if off == len(buf): if iocb[3] != None: iocb[3]() self._write_queue.popleft() break if ret == 0: self.close() return except BlockingIOError: iocb[1] = off break except Exception: self.close() return elif datatype == self.DATA_FILE: size = iocb[1] filefd = iocb[2] sockfd = self._sock.fileno() try: while True: ret = os.sendfile(sockfd, filefd, None, min(size, 65536)) size -= ret if size == 0: if iocb[3] != None: iocb[3]() self._write_queue.popleft() break if ret == 0: self.close() return except BlockingIOError: iocb[1] = size break except Exception: self.close() return if self._closed == True: return stat = tornado.ioloop.IOLoop.ERROR if len(self._read_queue) > 0: stat |= tornado.ioloop.IOLoop.READ if len(self._write_queue) > 0: stat |= tornado.ioloop.IOLoop.WRITE if stat != self._stat: self._stat = stat self._ioloop.update_handler(fd, stat)
def handle(self, sock, addr): p = HttpStream(SocketReader(sock)) path = p.path() if not path or path == "/": path = "index.html" if path.startswith("/"): path = path[1:] real_path = os.path.join(CURDIR, "static", path) if os.path.isdir(real_path): lines = ["<ul>"] for d in os.listdir(real_path): fpath = os.path.join(real_path, d) lines.append("<li><a href=" + d + ">" + d + "</a>") data = "".join(lines) resp = "".join(["HTTP/1.1 200 OK\r\n", "Content-Type: text/html\r\n", "Content-Length:" + str(len(data)) + "\r\n", "Connection: close\r\n\r\n", data]) sock.sendall(resp) elif not os.path.exists(real_path): util.write_error(sock, 404, "Not found", real_path + " not found") else: ctype = mimetypes.guess_type(real_path)[0] if ctype.startswith('text') or 'html' in ctype: try: f = open(real_path, 'rb') data = f.read() resp = "".join(["HTTP/1.1 200 OK\r\n", "Content-Type: " + ctype + "\r\n", "Content-Length:" + str(len(data)) + "\r\n", "Connection: close\r\n\r\n", data]) sock.sendall(resp) finally: f.close() else: try: f = open(real_path, 'r') clen = int(os.fstat(f.fileno())[6]) # send headers sock.send("".join(["HTTP/1.1 200 OK\r\n", "Content-Type: " + ctype + "\r\n", "Content-Length:" + str(clen) + "\r\n", "Connection: close\r\n\r\n"])) if not sendfile: while True: data = f.read(4096) if not data: break sock.send(data) else: fileno = f.fileno() sockno = sock.fileno() sent = 0 offset = 0 nbytes = clen sent += sendfile(sockno, fileno, offset+sent, nbytes-sent) while sent != nbytes: sent += sendfile(sock.fileno(), fileno, offset+sent, nbytes-sent) finally: f.close()
import os import sys if len(sys.argv) != 3: print("Usage: cmd src dst") exit(1) src = sys.argv[1] dst = sys.argv[2] with open(src, 'r') as s, open(dst, 'w') as d: st = os.fstat(s.fileno()) offset = 0 count = 4096 s_len = st.st_size sfd = s.fileno() dfd = d.fileno() while s_len > 0: ret = os.sendfile(dfd, sfd, offset, count) offset += ret s_len -= ret
def unpack(self): unpacked_files = [] unpackdir_full = self.scan_environment.unpack_path(self.rel_unpack_dir) chunk_size = self.metadata['chunk size'] spare_size = self.metadata['spare size'] # seek to the start of the data cur_offset = 0 self.infile.seek(cur_offset) # keep a mapping of object ids to latest chunk id object_id_to_latest_chunk = {} # keep a mapping of object ids to type object_id_to_type = {} # keep a mapping of object ids to name object_id_to_name = {} # keep a mapping of object ids to file size # for sanity checks object_id_to_size = {} # store the last open file for an object last_open = None last_open_name = None last_open_size = 0 previous_object_id = 0 # store if element with object id 1 has been seen. Most, but not all, # YAFFS2 images have this as a separate chunk. seen_root_element = False is_first_element = True # store if this is an inband image inband = False self.last_valid_offset = cur_offset while True: if self.infile.tell() == self.unpacked_size: break self.last_valid_offset = self.infile.tell() # read relevant spare data. self.infile.seek(chunk_size, os.SEEK_CUR) # read the sequence number spare_bytes = self.infile.read(4) sequence_number = int.from_bytes(spare_bytes, byteorder=self.byteorder) # skip padding chunks if sequence_number == 0xffffffff: self.infile.seek(self.last_valid_offset + chunk_size + spare_size) continue # read the object id spare_bytes = self.infile.read(4) object_id = int.from_bytes(spare_bytes, byteorder=self.byteorder) # read the chunk id spare_bytes = self.infile.read(4) chunk_id = int.from_bytes(spare_bytes, byteorder=self.byteorder) # first check if the relevant info is stored in an inband tag # or in a normal tag. Inbound tags are described in the YAFFS2 # code in the file yaffs_packedtags2.c # # For inbound tags some data (object id, chunk id) are # mixed with the actual data, so extract them first. if chunk_id & EXTRA_HEADER_INFO_FLAG == EXTRA_HEADER_INFO_FLAG: # store the original chunk_id as it will be needed later orig_chunk_id = chunk_id # extract the object_id object_id = object_id & ~EXTRA_OBJECT_TYPE_MASK # the chunk_id will have been changed ONLY for # the chunk with id 0 and not for any chunks # with data (files), so it is safe to simply # set the chunk_id to 0 here (new chunk). chunk_id = 0 inband = True # read the chunk byte count spare_bytes = self.infile.read(4) byte_count = int.from_bytes(spare_bytes, byteorder=self.byteorder) # if it is a continuation of an existing object, then the # chunk id cannot be 0, as that is the header. if chunk_id != 0: object_id_to_latest_chunk[object_id] = chunk_id # jump to the offset of the chunk and write data. This needs # absolute offsets again. Dirty hack! os.sendfile(last_open.fileno(), self.infile.fileno(), self.last_valid_offset + self.offset, byte_count) else: # close open file, if any if last_open is not None: last_open.close() fr = FileResult(self.fileresult, last_open_name, set()) unpacked_files.append(fr) last_open = None # store latest chunk id for this object object_id_to_latest_chunk[object_id] = chunk_id # jump to the offset of the chunk and analyze self.infile.seek(self.last_valid_offset) # object type object_bytes = self.infile.read(4) chunk_object_type = int.from_bytes(object_bytes, byteorder=self.byteorder) # read the parent object id parent_id_bytes = self.infile.read(4) parent_object_id = int.from_bytes(parent_id_bytes, byteorder=self.byteorder) if inband: parent_object_id = orig_chunk_id & ~ALL_EXTRA_FLAG # skip the name checksum (2 bytes) self.infile.seek(2, os.SEEK_CUR) # object name # For some reason 2 extra bytes need to be read that have # been initialized to 0xff checkbytes = self.infile.read(YAFFS_MAX_NAME_LENGTH + 1 + 2) try: object_name = os.path.normpath( checkbytes.split(b'\x00', 1)[0].decode()) # sanity check, needs more TODO if os.path.isabs(object_name): object_name = os.path.relpath(object_name, '/') except: break # yst_mode stat_bytes = self.infile.read(4) mode = int.from_bytes(stat_bytes, byteorder=self.byteorder) # stat information: uid, gid, atime, mtime, ctime stat_bytes = self.infile.read(4) uid = int.from_bytes(stat_bytes, byteorder=self.byteorder) stat_bytes = self.infile.read(4) gid = int.from_bytes(stat_bytes, byteorder=self.byteorder) stat_bytes = self.infile.read(4) atime = int.from_bytes(stat_bytes, byteorder=self.byteorder) stat_bytes = self.infile.read(4) mtime = int.from_bytes(stat_bytes, byteorder=self.byteorder) stat_bytes = self.infile.read(4) ctime = int.from_bytes(stat_bytes, byteorder=self.byteorder) # the object size. This only makes sense for files. The real # size depends on the "high" value as well. size_bytes = self.infile.read(4) object_size_low = int.from_bytes(size_bytes, byteorder=self.byteorder) # equiv_id, only makes sense for hard links equiv_bytes = self.infile.read(4) equiv_id = int.from_bytes(equiv_bytes, byteorder=self.byteorder) # alias, only makes sense for symlinks alias = self.infile.read(YAFFS_MAX_ALIAS_LENGTH + 1) # rdev, only for special files (block/char) rdev_bytes = self.infile.read(4) rdev = int.from_bytes(rdev_bytes, byteorder=self.byteorder) # skip some Windows specific structures self.infile.seek(24, os.SEEK_CUR) # skip some inband related structures self.infile.seek(8, os.SEEK_CUR) # object size high size_bytes = self.infile.read(4) object_size_high = int.from_bytes(size_bytes, byteorder=self.byteorder) # element 1 is special, but not every yaffs2 file system # seems to have element 1, so sometimes it needs to be # artificially added. if object_id != 1: if is_first_element: # artificially add object 1 object_id_to_type[1] = YAFFS_OBJECT_TYPE_DIRECTORY object_id_to_name[1] = '' else: # add the root element and skip to the next chunk object_id_to_type[1] = YAFFS_OBJECT_TYPE_DIRECTORY object_id_to_name[1] = '' self.infile.seek(self.last_valid_offset + chunk_size + spare_size) continue full_object_name = os.path.join( object_id_to_name[parent_object_id], object_name) outfile_rel = self.rel_unpack_dir / full_object_name outfile_full = unpackdir_full / full_object_name object_id_to_name[object_id] = full_object_name if chunk_object_type == YAFFS_OBJECT_TYPE_FILE: # first reconstruct the file size. if object_size_high != 0xffffffff: object_size = ( object_size_high << 32) + object_size_low else: object_size = object_size_low last_open = open(outfile_full, 'wb') last_open_name = outfile_rel last_open_size = object_size previous_object_id = object_id elif chunk_object_type == YAFFS_OBJECT_TYPE_SYMLINK: alias = alias.split(b'\x00', 1)[0].decode() # create the symlink os.symlink(alias, outfile_full) fr = FileResult(self.fileresult, outfile_rel, set(['symbolic link'])) unpacked_files.append(fr) elif chunk_object_type == YAFFS_OBJECT_TYPE_DIRECTORY: # create the directory os.makedirs(outfile_full, exist_ok=True) fr = FileResult(self.fileresult, outfile_rel, set(['directory'])) unpacked_files.append(fr) elif chunk_object_type == YAFFS_OBJECT_TYPE_HARDLINK: linkname = unpackdir_full / object_id_to_name[equiv_id] os.link(linkname, outfile_full) elif chunk_object_type == YAFFS_OBJECT_TYPE_SPECIAL: # no permissions to create special files, # so don't create, but report instead. TODO pass object_id_to_type[object_id] = chunk_object_type is_first_element = False dataunpacked = True if self.infile.tell() == self.unpacked_size: break # skip to the next chunk/spare self.infile.seek(self.last_valid_offset + chunk_size + spare_size) # close any open files if last_open is not None: last_open.close() if object_id_to_latest_chunk[previous_object_id] == 0: if last_open_size != 0: os.unlink(last_open.name) else: fr = FileResult(self.fileresult, last_open_name, set()) unpacked_files.append(fr) else: fr = FileResult(self.fileresult, last_open_name, set()) unpacked_files.append(fr) return unpacked_files
import socket import os import time sock = socket.socket(socket.AF_INET,socket.SOCK_STREAM) server_address = ('127.0.0.1',10000) sock.connect(server_address) start = time.time() try: with open("file_for_test","rb") as f: ret = 0 offset = 0 while True: ret = os.sendfile(sock.fileno(),f.fileno(),offset,65536) offset +=ret if ret ==0: break finally: sock.close() end = time.time() print('total time',end-start)
def parse(self): check_condition( shutil.which('fsck.cramfs') is not None, 'fsck.cramfs program not found') # read the magic to see what the endianness is buf = self.infile.read(4) if buf == b'\x45\x3d\xcd\x28': byteorder = 'little' bigendian = False else: byteorder = 'big' bigendian = True # length in bytes buf = self.infile.read(4) self.cramfs_size = int.from_bytes(buf, byteorder=byteorder) check_condition( self.offset + self.cramfs_size <= self.fileresult.filesize, "declared size larger than file") # feature flags buf = self.infile.read(4) check_condition(len(buf) == 4, "not enough data for feature flags") featureflags = int.from_bytes(buf, byteorder=byteorder) if featureflags & 1 == 1: cramfs_version = 2 else: cramfs_version = 0 # currently only version 2 is supported check_condition(cramfs_version == 2, "unsupported cramfs version") # reserved for future use, skip self.infile.seek(4, os.SEEK_CUR) # signature buf = self.infile.read(16) check_condition(len(buf) == 16, "not enough data for signature field") if buf != b'Compressed ROMFS': self.infile.close() unpackingerror = { 'offset': offset, 'fatal': False, 'reason': 'invalid signature' } return {'status': False, 'error': unpackingerror} # cramfs_info struct (32 bytes) # crc32 buf = self.infile.read(4) check_condition(len(buf) == 4, "not enough data for crc32 field") cramfs_crc32 = int.from_bytes(buf, byteorder=byteorder) # edition buf = self.infile.read(4) check_condition( len(buf) == 4, "not enough data for cramfs edition field") cramfs_edition = int.from_bytes(buf, byteorder=byteorder) # blocks buf = self.infile.read(4) check_condition(len(buf) == 4, "not enough data for blocks field") cramfs_blocks = int.from_bytes(buf, byteorder=byteorder) # files buf = self.infile.read(4) check_condition(len(buf) == 4, "not enough data for files field") cramfs_files = int.from_bytes(buf, byteorder=byteorder) # user defined name buf = self.infile.read(16) check_condition( len(buf) == 16, "not enough data for user defined name field") try: volumename = buf.split(b'\x00', 1)[0].decode() except UnicodeDecodeError: raise UnpackParserException('invalid volume name') # then process the inodes. # keep a mapping of inode numbers to metadata # and a reverse mapping from offset to inode inodes = {} offsettoinode = {} # See defines in Linux kernel include/uapi/linux/cramfs_fs.h # for the width/length of modes, lengths, etc. for inode in range(0, cramfs_files): # store the current offset, as it is used by directories curoffset = self.infile.tell() # 2 bytes mode width, 2 bytes uid width buf = self.infile.read(2) check_condition(len(buf) == 2, "not enough data for inode") inode_mode = int.from_bytes(buf, byteorder=byteorder) # determine the kind of file if stat.S_ISDIR(inode_mode): mode = 'directory' elif stat.S_ISCHR(inode_mode): mode = 'chardev' elif stat.S_ISBLK(inode_mode): mode = 'blockdev' elif stat.S_ISREG(inode_mode): mode = 'file' elif stat.S_ISFIFO(inode_mode): mode = 'fifo' elif stat.S_ISLNK(inode_mode): mode = 'symlink' elif stat.S_ISSOCK(inode_mode): mode = 'socket' buf = self.infile.read(2) check_condition(len(buf) == 2, "not enough data for inode") inode_uid = int.from_bytes(buf, byteorder=byteorder) # 3 bytes size width, 1 bytes gid width buf = self.infile.read(3) check_condition(len(buf) == 3, "not enough data for inode") # size of the decompressed inode inode_size = int.from_bytes(buf, byteorder=byteorder) buf = self.infile.read(1) check_condition(len(buf) == 1, "not enough data for inode") inode_gid = int.from_bytes(buf, byteorder=byteorder) # length of the name and offset. The first 6 bits are for # the name length (divided by 4), the last 26 bits for the # offset of the data (divided by 4). This is regardless of # the endianness! # The name is padded to 4 bytes. Because the original name length # is restored by multiplying with 4 there is no need for a # check for padding. buf = self.infile.read(4) if len(buf) != 4: self.infile.close() unpackingerror = { 'offset': offset, 'fatal': False, 'reason': 'not enough data for inode' } return {'status': False, 'error': unpackingerror} namelenbytes = int.from_bytes(buf, byteorder=byteorder) if bigendian: # get the most significant bits and then shift 26 bits name_length = ((namelenbytes & 4227858432) >> 26) * 4 # 0b11111111111111111111111111 = 67108863 data_offset = (namelenbytes & 67108863) * 4 else: # 0b111111 = 63 name_length = (namelenbytes & 63) * 4 # get the bits, then shift 6 bits data_offset = ((namelenbytes & 67108863) >> 6) * 4 # the data cannot be outside of the file check_condition( self.offset + data_offset <= self.fileresult.filesize, "data cannot be outside of file") # if this is the root node there won't be any data # following, so continue with the next inode. if inode == 0: continue check_condition(name_length != 0, "cannot have zero length filename") buf = self.infile.read(name_length) try: inode_name = buf.split(b'\x00', 1)[0].decode() except UnicodeDecodeError: raise UnpackParserException('invalid filename') inodes[inode] = { 'name': inode_name, 'mode': mode, 'offset': curoffset, 'data_offset': data_offset, 'uid': inode_uid, 'gid': inode_gid, 'size': inode_size } offsettoinode[curoffset] = inode inodeoffsettodirectory = {} # for now unpack using fsck.cramfs from util-linux. In the future # this should be replaced by an own unpacker. # now verify the data for inode in inodes: # don't recreate device files if inodes[inode]['mode'] == 'blockdev': continue if inodes[inode]['mode'] == 'chardev': continue if inodes[inode]['mode'] == 'file': pass elif inodes[inode]['mode'] == 'directory': # the data offset points to the offset of # the first inode in the directory if inodes[inode]['data_offset'] != 0: # verify if there is a valid inode check_condition( inodes[inode]['data_offset'] in offsettoinode, "invalid directory entry") self.havetmpfile = False # unpack in a temporary directory to rule out things like CRC errors. # fsck.cramfs expects to create the directory itself so only create # the name and then let fsck.cramfs create the directory. # first get a temporary name cramfs_unpack_directory = tempfile.mkdtemp( dir=self.scan_environment.temporarydirectory) # remove the directory. Possible race condition? shutil.rmtree(cramfs_unpack_directory) if self.offset == 0 and self.cramfs_size == self.fileresult.filesize: p = subprocess.Popen([ 'fsck.cramfs', '--extract=%s' % cramfs_unpack_directory, self.fileresult.filename ], stdout=subprocess.PIPE, stderr=subprocess.PIPE) else: temporaryfile = tempfile.mkstemp( dir=self.scan_environment.temporarydirectory) os.sendfile(temporaryfile[0], self.infile.fileno(), self.offset, self.cramfs_size) os.fdopen(temporaryfile[0]).close() self.havetmpfile = True p = subprocess.Popen([ 'fsck.cramfs', '--extract=%s' % cramfs_unpack_directory, temporaryfile[1] ], stdout=subprocess.PIPE, stderr=subprocess.PIPE) (outputmsg, errormsg) = p.communicate() # clean up if self.havetmpfile: os.unlink(temporaryfile[1]) if os.path.exists(cramfs_unpack_directory): shutil.rmtree(cramfs_unpack_directory) if p.returncode != 0: # clean up the temporary directory. It could be that # fsck.cramfs actually didn't create the directory due to # other errors, such as a CRC error. raise UnpackParserError("cannot unpack cramfs")
def _handle_event(self,fd,evt): if evt & tornado.ioloop.IOLoop.ERROR: print(os.strerror(self._sock.getsockopt(socket.SOL_SOCKET,socket.SO_ERROR))) self.close() return if evt & tornado.ioloop.IOLoop.READ: while len(self._read_queue) > 0: iocb = self._read_queue[0] datatype = iocb[0] if datatype == self.DATA_BUF: size = iocb[1] try: while True: buf = self._sock.recv(size) if len(buf) == 0: self.close() return iocb[2].extend(buf) size -= len(buf) if size == 0: if iocb[3] != None: iocb[3](iocb[2]) self._read_queue.popleft() break except BlockingIOError: iocb[1] = size break except Exception: self.close() return elif datatype == self.DATA_NOBUF: size = iocb[1] try: while True: buf = self._sock.recv(size) if len(buf) == 0: self.close() return iocb[2](buf) size -= len(buf) if size == 0: self._read_queue.popleft() break except BlockingIOError: iocb[1] = size break except Exception: self.close() return elif datatype == self.DATA_FILE: size = iocb[1] try: while True: buf = self._sock.recv(min(size,65536)) os.write(iocb[2],buf) size -= len(buf) if size == 0: if iocb[3] != None: iocb[3]() self._read_queue.popleft() break if len(buf) == 0: self.close() return except BlockingIOError: iocb[1] = size break except Exception: self.close() return if evt & tornado.ioloop.IOLoop.WRITE: if self._conning == True: self._conning = False if self._conn_callback != None: self._conn_callback() while len(self._write_queue) > 0: iocb = self._write_queue[0] datatype = iocb[0] if datatype == self.DATA_BUF: off = iocb[1] buf = iocb[2] try: while True: ret = self._sock.send(buf[off:]) off += ret if off == len(buf): if iocb[3] != None: iocb[3]() self._write_queue.popleft() break if ret == 0: self.close() return except BlockingIOError: iocb[1] = off break except Exception: self.close() return elif datatype == self.DATA_FILE: size = iocb[1] filefd = iocb[2] sockfd = self._sock.fileno() try: while True: ret = os.sendfile(sockfd,filefd,None,min(size,65536)) size -= ret if size == 0: if iocb[3] != None: iocb[3]() self._write_queue.popleft() break if ret == 0: self.close() return except BlockingIOError: iocb[1] = size break except Exception: self.close() return if self._closed == True: return stat = tornado.ioloop.IOLoop.ERROR if len(self._read_queue) > 0: stat |= tornado.ioloop.IOLoop.READ if len(self._write_queue) > 0: stat |= tornado.ioloop.IOLoop.WRITE if stat != self._stat: self._stat = stat self._ioloop.update_handler(fd,stat)
elif event == select.EPOLLIN: requests[fileno] += connections[fileno].recv(1024) if EOL1 in requests[fileno] or EOL2 in requests[fileno]: epoll.modify(fileno, select.EPOLLOUT) print('-'*40 + '\n' + requests[fileno].decode()[:-2]) elif event == select.EPOLLOUT: header_p = responses[fileno] if header_p < len(response): byteswritten = connections[fileno].send(response[header_p:]) responses[fileno] += byteswritten else: p = response_file[fileno] byteswritten = os.sendfile(fileno, file_pointer.fileno(), p, 8192) response_file[fileno] = p + byteswritten if response_file[fileno] == file_size: epoll.modify(fileno, 0) connections[fileno].shutdown(socket.SHUT_RDWR) elif event & select.EPOLLHUP: epoll.unregister(fileno) connections[fileno].close() del connections[fileno] del response_file[fileno] finally: epoll.unregister(serversocket.fileno())
def sendfile_all(fileno, sockno, offset, nbytes): sent = 0 sent += sendfile(sockno, fileno, offset+sent, nbytes-sent) while sent < nbytes: sent += sendfile(sockno, fileno, offset+sent, nbytes-sent)
def _sock_sendfile_native_impl(self, fut, registered_fd, sock, fileno, offset, count, blocksize, total_sent): fd = sock.fileno() if registered_fd is not None: # Remove the callback early. It should be rare that the # selector says the fd is ready but the call still returns # EAGAIN, and I am willing to take a hit in that case in # order to simplify the common case. self.remove_writer(registered_fd) if fut.cancelled(): self._sock_sendfile_update_filepos(fileno, offset, total_sent) return if count: blocksize = count - total_sent if blocksize <= 0: self._sock_sendfile_update_filepos(fileno, offset, total_sent) fut.set_result(total_sent) return try: sent = os.sendfile(fd, fileno, offset, blocksize) except (BlockingIOError, InterruptedError): if registered_fd is None: self._sock_add_cancellation_callback(fut, sock) self.add_writer(fd, self._sock_sendfile_native_impl, fut, fd, sock, fileno, offset, count, blocksize, total_sent) except OSError as exc: if (registered_fd is not None and exc.errno == errno.ENOTCONN and type(exc) is not ConnectionError): # If we have an ENOTCONN and this isn't a first call to # sendfile(), i.e. the connection was closed in the middle # of the operation, normalize the error to ConnectionError # to make it consistent across all Posix systems. new_exc = ConnectionError( "socket is not connected", errno.ENOTCONN) new_exc.__cause__ = exc exc = new_exc if total_sent == 0: # We can get here for different reasons, the main # one being 'file' is not a regular mmap(2)-like # file, in which case we'll fall back on using # plain send(). err = events.SendfileNotAvailableError( "os.sendfile call failed") self._sock_sendfile_update_filepos(fileno, offset, total_sent) fut.set_exception(err) else: self._sock_sendfile_update_filepos(fileno, offset, total_sent) fut.set_exception(exc) except Exception as exc: self._sock_sendfile_update_filepos(fileno, offset, total_sent) fut.set_exception(exc) else: if sent == 0: # EOF self._sock_sendfile_update_filepos(fileno, offset, total_sent) fut.set_result(total_sent) else: offset += sent total_sent += sent if registered_fd is None: self._sock_add_cancellation_callback(fut, sock) self.add_writer(fd, self._sock_sendfile_native_impl, fut, fd, sock, fileno, offset, count, blocksize, total_sent)
import os import sys source = sys.argv[1] dest = sys.argv[2] src_fd = os.open(source, os.O_RDONLY) dest_fd = os.open(dest, os.O_RDWR | os.O_CREAT) offset = 0 statinfo = os.stat(source) count = statinfo.st_size print(count) bytesSent = 0 while offset < count: bytesSent = os.sendfile(dest_fd, src_fd, offset, count) print("% d bytes sent / copied successfully." % bytesSent) offset += bytesSent os.close(src_fd) os.close(dest_fd)
def _fastcopy_sendfile(fsrc, fdst): """Copy data from one regular mmap-like fd to another by using high-performance sendfile(2) syscall. This should work on Linux >= 2.6.33 only. """ # Note: copyfileobj() is left alone in order to not introduce any # unexpected breakage. Possible risks by using zero-copy calls # in copyfileobj() are: # - fdst cannot be open in "a"(ppend) mode # - fsrc and fdst may be open in "t"(ext) mode # - fsrc may be a BufferedReader (which hides unread data in a buffer), # GzipFile (which decompresses data), HTTPResponse (which decodes # chunks). # - possibly others (e.g. encrypted fs/partition?) global _USE_CP_SENDFILE try: infd = fsrc.fileno() outfd = fdst.fileno() except Exception as err: raise _GiveupOnFastCopy(err) # not a regular file # Hopefully the whole file will be copied in a single call. # sendfile() is called in a loop 'till EOF is reached (0 return) # so a bufsize smaller or bigger than the actual file size # should not make any difference, also in case the file content # changes while being copied. try: blocksize = max(os.fstat(infd).st_size, 2 ** 23) # min 8MiB except OSError: blocksize = 2 ** 27 # 128MiB # On 32-bit architectures truncate to 1GiB to avoid OverflowError, # see bpo-38319. if sys.maxsize < 2 ** 32: blocksize = min(blocksize, 2 ** 30) offset = 0 while True: try: sent = os.sendfile(outfd, infd, offset, blocksize) except OSError as err: # ...in oder to have a more informative exception. err.filename = fsrc.name err.filename2 = fdst.name if err.errno == errno.ENOTSOCK: # sendfile() on this platform (probably Linux < 2.6.33) # does not support copies between regular files (only # sockets). _USE_CP_SENDFILE = False raise _GiveupOnFastCopy(err) if err.errno == errno.ENOSPC: # filesystem is full raise err from None # Give up on first call and if no data was copied. if offset == 0 and os.lseek(outfd, 0, os.SEEK_CUR) == 0: raise _GiveupOnFastCopy(err) raise err else: if sent == 0: break # EOF offset += sent
def do_GET(self): file_basename = X100HTTPServerHelper.get_file_basename(self.path) file_ext = X100HTTPServerHelper.get_file_ext(file_basename) self.file_path_without_query_string = self.path.split('?')[0] file_path = self.path.split('?') self.file_path_without_query_string = file_path[0] if self.file_path_without_query_string in self.__class__.routers_get: req = X100Request() req.remote_ip = self.address_string() if len(file_path) > 1: req.query_string = file_path[1] req.args = X100HTTPServerHelper.parse_query_string( req.query_string) try: response = self.__class__.routers_get[ self.file_path_without_query_string](req) response = self.make_x100response(response) except: self.__class__.logger.logger.warning( "get_handler() def exec error. ", exc_info=True, stack_info=False) self.send_error(500) else: self.send_x100response(response) else: for patten in self.__class__.routers_get_regex: result = patten.fullmatch(self.file_path_without_query_string) if result: req = X100Request() req.remote_ip = self.address_string() if len(file_path) > 1: req.query_string = file_path[1] req.args = X100HTTPServerHelper.parse_query_string( req.query_string) req.args_in_url = result.groupdict() try: response = self.__class__.routers_get_regex[ patten](req) response = self.make_x100response(response) except: self.__class__.logger.logger.warning( "regex_get_handler() def exec error. ", exc_info=True, stack_info=False) self.send_error(500) else: self.send_x100response(response) return if file_ext in self.__class__.static_file_ext and os.path.exists(file_basename): self.send_response(200) self.send_header( "Content-type", X100HTTPServerHelper.get_mime(self.file_path_without_query_string)) self.end_headers() f = open(file_basename, "rb", buffering=0) os.sendfile( self.wfile.fileno(), f.fileno(), 0, os.path.getsize(file_basename)) f.close() else: for patten in self.__class__.routers_static: result = patten.fullmatch( self.file_path_without_query_string) if result: static_file_path = self.__class__.routers_static[ patten][0] + result.group(1).split('?')[0] cors = self.__class__.routers_static[patten][1] if os.path.exists(static_file_path): self.send_response(200) self.send_header( "Content-type", X100HTTPServerHelper.get_mime(self.file_path_without_query_string)) if cors: self.send_header( "Access-Control-Allow-Origin", cors) self.send_header( "Access-Control-Allow-Methods", 'GET, POST, OPTIONS') self.end_headers() f = open(static_file_path, "rb", buffering=0) os.sendfile( self.wfile.fileno(), f.fileno(), 0, os.path.getsize(static_file_path)) f.close() return self.send_error(404)
os.close(fd) assert_raises(OSError, lambda: os.read(fd, 10)) assert_raises(FileNotFoundError, lambda: os.open('DOES_NOT_EXIST', os.O_RDONLY)) assert_raises(FileNotFoundError, lambda: os.open('DOES_NOT_EXIST', os.O_WRONLY)) assert_raises(FileNotFoundError, lambda: os.rename('DOES_NOT_EXIST', 'DOES_NOT_EXIST 2')) # sendfile only supports in_fd as non-socket on linux and solaris if hasattr(os, "sendfile") and sys.platform.startswith("linux"): src_fd = os.open('README.md', os.O_RDONLY) dest_fd = os.open('destination.md', os.O_RDWR | os.O_CREAT) src_len = os.stat('README.md').st_size bytes_sent = os.sendfile(dest_fd, src_fd, 0, src_len) assert src_len == bytes_sent os.lseek(dest_fd, 0, 0) assert os.read(src_fd, src_len) == os.read(dest_fd, bytes_sent) os.close(src_fd) os.close(dest_fd) try: os.open('DOES_NOT_EXIST', 0) except OSError as err: assert err.errno == 2 assert os.O_RDONLY == 0 assert os.O_WRONLY == 1 assert os.O_RDWR == 2
def copy_data(data_length, blocksize, infp, outfp): # type: (int, int, BinaryIO, BinaryIO) -> None ''' A utility function to copy data from the input file object to the output file object. This function will use the most efficient copy method available, which is often sendfile. Parameters: data_length - The amount of data to copy. blocksize - How much data to copy per iteration. infp - The file object to copy data from. outfp - The file object to copy data to. Returns: Nothing. ''' use_sendfile = False if have_sendfile: # Python 3 implements the fileno method for all file-like objects, so # we can't just use the existence of the method to tell whether it is # available. Instead, we try to assign it, and if we fail, then we # assume it is not available. try: x_unused = infp.fileno() # NOQA y_unused = outfp.fileno() # NOQA use_sendfile = True except (AttributeError, io.UnsupportedOperation): pass if use_sendfile: # This is one of those instances where using the file object and the # file descriptor causes problems. The sendfile() call actually updates # the underlying file descriptor, but the file object does not know # about it. To get around this, we instead get the offset, allow # sendfile() to update the offset, then manually seek the file object # to the right location. This ensures that the file object gets updated # properly. in_offset = infp.tell() out_offset = outfp.tell() # We also have to make sure to seek the output to the right location, # since it is possible the caller did not do so (particularly if a # write happened right before this, the fileno may not know the # current offset). outfp.seek(out_offset) sendfile(outfp.fileno(), infp.fileno(), in_offset, data_length) infp.seek(in_offset + data_length) outfp.seek(out_offset + data_length) else: left = data_length readsize = blocksize while left > 0: if left < readsize: readsize = left data = infp.read(readsize) # We have seen ISOs in the wild (Tribes Vengeance 1of4.iso) that # lie about the size of their files, causing reads to fail (since # we hit EOF before the supposed end of the file). If we are using # sendfile above, sendfile just silently returns as much data as it # can, with no additional checking. We should do the same here, so # if we got less data than we asked for, abort the loop silently. data_len = len(data) if data_len != readsize: data_len = left outfp.write(data) left -= data_len
print("flags:", flags) return flags get_blocking() s.setblocking(0) flags = get_blocking() assert flags & os.O_NONBLOCK off = 0 sz = ln while True: if sz: try: res = os.sendfile(s_fd, f.fileno(), off, sz) except socket.error as es: print(es) raise except Exception as e: print(e) raise print(res) assert res > 0, res off += res sz -= res else: break print("end") else:
def unpack_wad(fileresult, scanenvironment, offset, unpackdir): '''Verify a Doom WAD file''' filesize = fileresult.filesize filename_full = scanenvironment.unpack_path(fileresult.filename) unpackedfilesandlabels = [] labels = [] unpackingerror = {} unpackedsize = 0 if offset + 12 > filesize: unpackingerror = { 'offset': offset + unpackedsize, 'fatal': False, 'reason': 'not enough data for header' } return {'status': False, 'error': unpackingerror} # open the file and skip the magic checkfile = open(filename_full, 'rb') checkfile.seek(offset + 4) unpackedsize += 4 # number of lumps in the file checkbytes = checkfile.read(4) nr_lumps = int.from_bytes(checkbytes, byteorder='little') unpackedsize += 4 if nr_lumps == 0: checkfile.close() unpackingerror = { 'offset': offset + unpackedsize, 'fatal': False, 'reason': 'no lumps defined' } return {'status': False, 'error': unpackingerror} # offset to beginning of the lumps directory checkbytes = checkfile.read(4) lumps_dir_offset = int.from_bytes(checkbytes, byteorder='little') unpackedsize += 4 if offset + lumps_dir_offset + nr_lumps * 16 > filesize: checkfile.close() unpackingerror = { 'offset': offset + unpackedsize, 'fatal': False, 'reason': 'not enough data for lumps directory' } return {'status': False, 'error': unpackingerror} maxoffset = lumps_dir_offset + nr_lumps * 16 # now check the lumps directory checkfile.seek(offset + lumps_dir_offset) for lump in range(0, nr_lumps): # lump offset checkbytes = checkfile.read(4) lump_offset = int.from_bytes(checkbytes, byteorder='little') # lump size checkbytes = checkfile.read(4) lump_size = int.from_bytes(checkbytes, byteorder='little') # sanity check if offset + lump_offset + lump_size > filesize: checkfile.close() unpackingerror = { 'offset': offset + unpackedsize, 'fatal': False, 'reason': 'data cannot be outside of file' } return {'status': False, 'error': unpackingerror} maxoffset = max(maxoffset, lump_offset + lump_size) # lump name checkbytes = checkfile.read(8) try: lump_name = checkbytes.split(b'\x00', 1)[0].decode() except UnicodeDecodeError: checkfile.close() unpackingerror = { 'offset': offset + unpackedsize, 'fatal': False, 'reason': 'invalid lump name' } return {'status': False, 'error': unpackingerror} if offset == 0 and maxoffset == filesize: labels.append('doom') labels.append('wad') labels.append('resource') else: # else carve the file outfile_rel = os.path.join(unpackdir, "unpacked.wad") outfile_full = scanenvironment.unpack_path(outfile_rel) outfile = open(outfile_full, 'wb') os.sendfile(outfile.fileno(), checkfile.fileno(), offset, maxoffset) outfile.close() unpackedfilesandlabels.append( (outfile_rel, ['doom', 'wad', 'resource', 'unpacked'])) checkfile.close() return { 'status': True, 'length': maxoffset, 'labels': labels, 'filesandlabels': unpackedfilesandlabels }
def unpack_pak(fileresult, scanenvironment, offset, unpackdir): '''Unpack a Quake PAK file''' filesize = fileresult.filesize filename_full = scanenvironment.unpack_path(fileresult.filename) unpackedfilesandlabels = [] labels = [] unpackingerror = {} unpackedsize = 0 if offset + 12 > filesize: unpackingerror = { 'offset': offset + unpackedsize, 'fatal': False, 'reason': 'not enough data for header' } return {'status': False, 'error': unpackingerror} # open the file and skip the magic checkfile = open(filename_full, 'rb') checkfile.seek(offset + 4) unpackedsize += 4 # offset to beginning of the file table checkbytes = checkfile.read(4) file_table_offset = int.from_bytes(checkbytes, byteorder='little') unpackedsize += 4 # file table cannot be in the header if file_table_offset < 12: checkfile.close() unpackingerror = { 'offset': offset + unpackedsize, 'fatal': False, 'reason': 'wrong value for file table offset' } return {'status': False, 'error': unpackingerror} # size of the file table checkbytes = checkfile.read(4) file_table_size = int.from_bytes(checkbytes, byteorder='little') unpackedsize += 4 # there has to be at least one file if file_table_size == 0: checkfile.close() unpackingerror = { 'offset': offset + unpackedsize, 'fatal': False, 'reason': 'wrong value for file table size' } return {'status': False, 'error': unpackingerror} # file_table_size has to be a multiple of 64 if file_table_size % 64 != 0: checkfile.close() unpackingerror = { 'offset': offset + unpackedsize, 'fatal': False, 'reason': 'wrong value for file table size' } return {'status': False, 'error': unpackingerror} # file table cannot be outside of file if offset + file_table_offset + file_table_size > filesize: checkfile.close() unpackingerror = { 'offset': offset + unpackedsize, 'fatal': False, 'reason': 'not enough data for file table' } return {'status': False, 'error': unpackingerror} # each file table entry is 64 bytes number_of_files = file_table_size // 64 maxoffset = file_table_offset + file_table_size # seek to the file table offset checkfile.seek(offset + file_table_offset) for fn in range(0, number_of_files): # read the name checkbytes = checkfile.read(56) try: fn_name = checkbytes.split(b'\x00', 1)[0].decode() # force a relative path if os.path.isabs(fn_name): fn_name = os.path.relpath(fn_name, '/') except UnicodeDecodeError: checkfile.close() unpackingerror = { 'offset': offset + unpackedsize, 'fatal': False, 'reason': 'invalid file name' } return {'status': False, 'error': unpackingerror} # read the offset checkbytes = checkfile.read(4) fn_offset = int.from_bytes(checkbytes, byteorder='little') # read the size checkbytes = checkfile.read(4) fn_size = int.from_bytes(checkbytes, byteorder='little') # sanity check if offset + fn_offset + fn_size > filesize: checkfile.close() unpackingerror = { 'offset': offset + unpackedsize, 'fatal': False, 'reason': 'data cannot be outside of file' } return {'status': False, 'error': unpackingerror} maxoffset = max(maxoffset, fn_offset + fn_size) outfile_rel = os.path.join(unpackdir, fn_name) outfile_full = scanenvironment.unpack_path(outfile_rel) # create subdirectories, if any are defined in the file name if '/' in fn_name: os.makedirs(os.path.dirname(outfile_full), exist_ok=True) # write the file outfile = open(outfile_full, 'wb') os.sendfile(outfile.fileno(), checkfile.fileno(), offset + fn_offset, fn_size) outfile.close() unpackedfilesandlabels.append((outfile_rel, [])) checkfile.close() if offset == 0 and maxoffset == filesize: labels.append('quake') return { 'status': True, 'length': maxoffset, 'labels': labels, 'filesandlabels': unpackedfilesandlabels }
def _sock_sendfile_native_impl(self, fut, registered_fd, sock, fileno, offset, count, blocksize, total_sent): fd = sock.fileno() if registered_fd is not None: # Remove the callback early. It should be rare that the # selector says the fd is ready but the call still returns # EAGAIN, and I am willing to take a hit in that case in # order to simplify the common case. self.remove_writer(registered_fd) if fut.cancelled(): self._sock_sendfile_update_filepos(fileno, offset, total_sent) return if count: blocksize = count - total_sent if blocksize <= 0: self._sock_sendfile_update_filepos(fileno, offset, total_sent) fut.set_result(total_sent) return try: sent = os.sendfile(fd, fileno, offset, blocksize) except (BlockingIOError, InterruptedError): if registered_fd is None: self._sock_add_cancellation_callback(fut, sock) self.add_writer(fd, self._sock_sendfile_native_impl, fut, fd, sock, fileno, offset, count, blocksize, total_sent) except OSError as exc: if (registered_fd is not None and exc.errno == errno.ENOTCONN and type(exc) is not ConnectionError): # If we have an ENOTCONN and this isn't a first call to # sendfile(), i.e. the connection was closed in the middle # of the operation, normalize the error to ConnectionError # to make it consistent across all Posix systems. new_exc = ConnectionError("socket is not connected", errno.ENOTCONN) new_exc.__cause__ = exc exc = new_exc if total_sent == 0: # We can get here for different reasons, the main # one being 'file' is not a regular mmap(2)-like # file, in which case we'll fall back on using # plain send(). err = exceptions.SendfileNotAvailableError( "os.sendfile call failed") self._sock_sendfile_update_filepos(fileno, offset, total_sent) fut.set_exception(err) else: self._sock_sendfile_update_filepos(fileno, offset, total_sent) fut.set_exception(exc) except Exception as exc: self._sock_sendfile_update_filepos(fileno, offset, total_sent) fut.set_exception(exc) else: if sent == 0: # EOF self._sock_sendfile_update_filepos(fileno, offset, total_sent) fut.set_result(total_sent) else: offset += sent total_sent += sent if registered_fd is None: self._sock_add_cancellation_callback(fut, sock) self.add_writer(fd, self._sock_sendfile_native_impl, fut, fd, sock, fileno, offset, count, blocksize, total_sent)
elif event == select.EPOLLIN: requests[fileno] += connections[fileno].recv(1024) if EOL1 in requests[fileno] or EOL2 in requests[fileno]: epoll.modify(fileno, select.EPOLLOUT) print('-' * 40 + '\n' + requests[fileno].decode()[:-2]) elif event == select.EPOLLOUT: header_p = responses[fileno] if header_p < len(response): byteswritten = connections[fileno].send( response[header_p:]) responses[fileno] += byteswritten else: p = response_file[fileno] byteswritten = os.sendfile(fileno, file_pointer.fileno(), p, 8192) response_file[fileno] = p + byteswritten if response_file[fileno] == file_size: epoll.modify(fileno, 0) connections[fileno].shutdown(socket.SHUT_RDWR) elif event & select.EPOLLHUP: epoll.unregister(fileno) connections[fileno].close() del connections[fileno] del response_file[fileno] finally: epoll.unregister(serversocket.fileno()) epoll.close()
def carve_file_data(self, unpacker): # Now carve any data that was not unpacked from the file and # put it back into the scanning queue to see if something # could be unpacked after all, or to more quickly recognize # padding data. # # This also makes it easier for doing a "post mortem". unpacked_range = unpacker.unpacked_range() if unpacked_range != []: # first check if the first entry covers the entire file # because if so there is nothing to do if unpacked_range[0] != (0, self.fileresult.filesize): synthesizedcounter = 1 # Invariant: everything up to carve_index has been inspected carve_index = 0 filename_full = self.scanenvironment.unpack_path( self.fileresult.filename) scanfile = open(filename_full, 'rb') scanfile.seek(carve_index) # then try to see if the any useful data can be uncarved. # Add an artifical entry for the end of the file # TODO: why self.fileresult.filesize + 1 ? # unpack ranges are [u_low:u_high) for u_low, u_high in unpacked_range + [ (self.fileresult.filesize + 1, self.fileresult.filesize + 1) ]: if carve_index == self.fileresult.filesize: break # get the bytes from range [carve_index:u_low) if u_low > carve_index: #if u_low - carve_index < scanenvironment.get_synthesizedminimum(): # carve_index = u_high # continue synthesizedcounter = unpacker.make_data_unpack_directory( self.fileresult.filename, "synthesized", synthesizedcounter) outfile_rel = os.path.join( unpacker.get_data_unpack_directory(), "unpacked-%s-%s" % (hex(carve_index), hex(u_low - 1))) outfile_full = self.scanenvironment.unpack_path( outfile_rel) outfile = open(outfile_full, 'wb') os.sendfile(outfile.fileno(), scanfile.fileno(), carve_index, u_low - carve_index) outfile.close() unpackedlabel = ['synthesized'] if self.is_padding(outfile_full): unpackedlabel.append('padding') if self.scanenvironment.get_paddingname( ) is not None: newoutfile_rel = os.path.join( unpacker.get_data_unpack_directory(), "%s-%s-%s" % (self.scanenvironment.get_paddingname(), hex(carve_index), hex(u_low - 1))) newoutfile_full = self.scanenvironment.unpack_path( newoutfile_rel) shutil.move(outfile_full, newoutfile_full) outfile_rel = newoutfile_rel # add the data, plus labels, to the queue fr = FileResult(pathlib.Path(outfile_rel), self.fileresult.filename, self.fileresult.labels, set(unpackedlabel)) j = ScanJob(fr) self.scanenvironment.scanfilequeue.put(j) carve_index = u_high scanfile.close()
def _parse_csv( path: Path, *, encoding: Optional[str], delimiter: Optional[str], has_header: bool, autoconvert_text_to_numbers: bool, ) -> ParseCsvResult: """ Parse CSV, TSV or other delimiter-separated text file. Raise LookupError for an `encoding` Python cannot handle. Raise UnicodeError when the file simply cannot be read as text. (e.g., a UTF-16 file that does not start with a byte-order marker.) The process: 1. Truncate the file to our maximum size. (WARNING This is destructive!) (TODO if any caller minds the truncation, fix this logic.) 2. Convert the file to UTF-8. 3. Sniff delimiter, if the passed argument is `None`. 4. Run `csv-to-arrow` to parse the CSV into unnamed columns. 5. Postprocess each column: remove its header if needed and dictionary-encode if it's helpful. (This doesn't cost much RAM per column: either dictionary encoding makes it small, or it's a zero-copy slice of the csv-to-arrow output file.) 6. Write the final Arrow file. """ warnings = [] with contextlib.ExitStack() as ctx: n_bytes = path.stat().st_size if n_bytes > settings.MAX_CSV_BYTES: # We can't simply os.truncate() the input file, because sandboxed code # can't modify input files. truncated_path = ctx.enter_context( tempfile_context(prefix="truncated-")) with path.open("rb") as src, truncated_path.open("wb") as dest: os.sendfile(dest.fileno(), src.fileno(), 0, settings.MAX_CSV_BYTES) path = truncated_path warnings.append( ParseCsvWarning.TruncatedFile( original_n_bytes=n_bytes, max_n_bytes=settings.MAX_CSV_BYTES)) utf8_path = ctx.enter_context( tempfile_context(prefix="utf8-", suffix=".txt")) # raises LookupError, UnicodeError transcode_warning = transcode_to_utf8_and_warn(path, utf8_path, encoding) if transcode_warning is not None: warnings.append( ParseCsvWarning.RepairedEncoding( encoding=transcode_warning.encoding, first_invalid_byte=transcode_warning.first_invalid_byte, first_invalid_byte_position=transcode_warning. first_invalid_byte_position, )) # Sniff delimiter if not delimiter: delimiter = detect_delimiter(utf8_path) with tempfile_context(suffix=".arrow") as arrow_path: # raise subprocess.CalledProcessError on error ... but there is no # error csv-to-arrow will throw that we can recover from. child = subprocess.run( [ "/usr/bin/csv-to-arrow", "--delimiter", delimiter, "--max-rows", str(settings.MAX_ROWS_PER_TABLE), "--max-columns", str(settings.MAX_COLUMNS_PER_TABLE), "--max-bytes-per-value", str(settings.MAX_BYTES_PER_VALUE), utf8_path.as_posix(), arrow_path.as_posix(), ], capture_output=True, check=True, ) if child.stdout: warnings.extend( _parse_csv_to_arrow_warnings(child.stdout.decode("utf-8"))) reader = pyarrow.ipc.open_file(arrow_path.as_posix()) raw_table = reader.read_all() # efficient -- RAM is mmapped table, more_warnings = _postprocess_table(raw_table, has_header, autoconvert_text_to_numbers) return ParseCsvResult(table, warnings + more_warnings)