def parseFile(source:IOBase, logger=logging.getLogger()): try: return Parser(Lexer(source), logger).parseModule(False) except CompilerError as e: source.seek(0) e.format(source.read()) raise e
def peek(stream: IOBase, chunk_size: int) -> str: if hasattr(stream, 'peek'): return stream.peek(chunk_size) else: current_pos = stream.tell() result = stream.read(chunk_size) stream.seek(current_pos) return result
def __new__(cls, buffer: IOBase = None, file_header: XTFFileHeader = None, *args, **kwargs): obj = super().__new__(cls, buffer=buffer, file_header=file_header, *args, **kwargs) if buffer: # TODO: Make getters/setters that updates StringSize when changed obj.RawAsciiData = buffer.read(ctypes.sizeof(ctypes.c_char) * obj.StringSize.value) else: obj.RawAsciiData = b''
def unreadpack(fs: io.IOBase, f: str) -> Tuple: s = struct.calcsize(f) buf = fs.read(s) if buf == b'': raise EOFError if len(buf) != s: raise Exception('not enough data') return struct.unpack(f, buf)
def create_from_buffer(cls, buffer: IOBase, file_header: XTFFileHeader = None): obj = super().create_from_buffer(buffer) n_bytes = obj.NumBytesThisRecord - ctypes.sizeof(cls) obj.data = buffer.read(n_bytes) return obj
def create_from_buffer(cls, buffer: IOBase, file_header: XTFFileHeader = None): obj = super().create_from_buffer(buffer) # TODO: Make getters/setters that updates StringSize when changed obj.RawAsciiData = buffer.read( ctypes.sizeof(ctypes.c_char) * obj.StringSize.value) return obj
def deserialize( stream: IOBase, content_len: int, remote_ip: str, ) -> Any: """ Deserialize stream using information from server. Generic deserialization for following formats: 1. single utf-8 string without whitespaces 2. utf-8 string and raw byte data separated by first zero byte 3. two utf-8 string separated by first space in stream 4. utf-8 string and flag represented by single '!' and separated by space Parameters ---------- stream : IOBase Stream of request body. content_len : int Length of request body. remote_ip : str IP address of client. Returns ------- Any: One of the options described above """ it = iter(stream.read(content_len)) has_blob = False path = b'' for b in it: b = bytes([b]) if b == b' ': break if b == b'\0': has_blob = True break path += b else: return (path.decode('utf-8'), ) if path else () path = path.decode('utf-8') if has_blob: return path, bytes(it) b = bytes([next(it)]) if b == b'!': nb = bytes([nnext(it)]) if not nb: return path, True b += nb b += bytes(it) return path, b.decode('utf-8')
def checksum_mzml_stream(stream: io.IOBase) -> Tuple[bytes, bytes]: """Calculate the SHA1 checksum of an indexed mzML file for the purposes of validating the checksum at the end of the file. Parameters ---------- stream : file-like A file-like object supporting a `read` method. Returns ------- calculated_checksum: bytes The checksum calculated from the file's contents up to the first occurrence of <fileChecksum>, exclusive. obsesrved_checksum: bytes or :const:`None` The checksum written in the file within the <fileChecksum> tag. Will be :const:`None` if the tag is not found and closed. Expected to match the calculated checksum. """ hasher = hashlib.sha1() target = b"<fileChecksum>" target_pattern = re.compile(b"(" + target + b")") extract_checksum = re.compile(br"<fileChecksum>\s*(\S+)\s*</fileChecksum>") block_size = int(2 ** 12) chunk = stream.read(block_size) hit_target = False observed_checksum = None while chunk: tokens = target_pattern.split(chunk) for token in tokens: hasher.update(token) if token == target: hit_target = True chunk += stream.read(5000) observed_checksum = extract_checksum.findall(chunk) if observed_checksum: observed_checksum = observed_checksum[0] else: observed_checksum = None break if hit_target: break chunk = stream.read(block_size) return hasher.hexdigest(), observed_checksum
def load(source: IOBase) -> Sequence[str]: """Loads translatable strings from a Javascript file. This function finds all occurrences of the translation functions ``_`` and ``_N``, extracts its first argument and then performs ``eval`` on that expression. """ data = source.read() for m in TRANS_RE.finditer(data): yield eval(compile('({})'.format(m.group(1)), '__code__', 'eval'))
def get_sha1(obj: IOBase) -> str: """A function to get sha1 in a memory efficient way. returns hexdigest of obj obj = io object to digest""" hashhold = hashlib.sha1() try: for chunk in iter(lambda: obj.read(4096), b""): hashhold.update(chunk) obj.seek(0) except AttributeError: hashhold.update(obj) return hashhold.hexdigest()
def verify(module:Module, builtin:Module, logger = logging.getLogger(), source:IOBase = None): # Set up the initial state before verifying State.init(builtin, logger.getChild("lekvar")) State.logger.info(module.context) try: module.verify() except CompilerError as e: if source is not None: source.seek(0) e.format(source.read()) raise e
def calc_size_and_sha265(content: io.IOBase, chunk_size: int): """Calculates the size and the sha2566 value of the content.""" size = 0 sha256 = hashlib.sha256() content.seek(0, io.SEEK_SET) while True: buf = content.read(chunk_size) length = len(buf) size += length sha256.update(buf) if length != chunk_size: break return size, sha256.hexdigest()
def calc_size_and_sha265(content: io.IOBase, chunk_size: int): """Calculates the size and the sha2566 value of the content.""" size = 0 sha256 = hashlib.sha256() content.seek(0, io.SEEK_SET) while True: buf = content.read(chunk_size) length = len(buf) size += length sha256.update(buf) if length != chunk_size: break return size, sha256.hexdigest()
def calc_hash(src: io.IOBase, m: hashlib._hashlib.HASH) -> str: """calc hash of the io-src and specified hash mode""" if src is None: raise Exception("Invalid src for hash calc") if m is None: raise Exception("Invalid hash m for hash calc") while True: b = src.read(4096) if not b: break m.update(b) res = m.hexdigest() return res
def deserialize( stream: IOBase, message: Union[Type[Message], Message], max_size: int = MB, ) -> Message: if isinstance(message, Type): message: Message = message() contents = stream.read(max_size + 1) if len(contents) > max_size: raise BufferError('Message content length is greater than max_size') text_format.Parse(contents, message) return message
def __new__(cls, buffer: IOBase = None): if buffer: if type(buffer) in [bytes, bytearray]: buffer = BytesIO(buffer) header_bytes = buffer.read(ctypes.sizeof(cls)) if not header_bytes: raise RuntimeError('XTF file shorter than expected (end hit while reading {})'.format(cls.__name__)) obj = cls.from_buffer_copy(header_bytes) else: obj = super().__new__(cls) return obj
def create_from_buffer(cls, buffer: IOBase, file_header=None): if type(buffer) in [bytes, bytearray]: buffer = BytesIO(buffer) # Read bytes up until the variable-sized data base_bytes = buffer.read(cls.TX.offset) n_bytes = ctypes.c_uint32.from_buffer_copy(base_bytes, cls.NumberOfBytes.offset).value n_tx = ctypes.c_uint16.from_buffer_copy(base_bytes, cls.Ntx.offset).value n_rx = ctypes.c_uint16.from_buffer_copy(base_bytes, cls.Nrx.offset).value # Read remaining bytes remaining_bytes = buffer.read(n_bytes - cls.TX.offset + cls.NumberOfBytes.size) # Create new class dynamically with string array at the correct size new_name = cls.__name__ + '_ntx{}_nrx{}'.format(n_tx, n_rx) new_fields = cls._fields_.copy() tx_idx = [i for i, (name, fieldtype) in enumerate(cls._fields_) if name == 'TX'][0] rx_idx = [i for i, (name, fieldtype) in enumerate(cls._fields_) if name == 'RX'][0] new_fields[tx_idx] = ('TX', KMRawRangeAngle78_TX * n_tx) new_fields[rx_idx] = ('RX', KMRawRangeAngle78_RX * n_rx) new_cls = type(new_name, (ctypes.LittleEndianStructure,), { '__str__': cls.__str__, '_pack_': cls._pack_, '_fields_': new_fields }) all_bytes = base_bytes + remaining_bytes obj = new_cls.from_buffer_copy(all_bytes) # Checksum (not crc16, but a straight sum of bytes with overflow) chk = (sum(all_bytes[new_cls.DatagramType.offset:new_cls.EndID.offset]) & 0xFFFF) if chk != obj.Checksum: warning_str = '{}: Checksum failed'.format(cls.__name__) warnings.warn(warning_str) return obj
def get_md5_from_stream(src: io.IOBase) -> str: """calculate md5 of src stream. The stream could been from a file(mode='rb')/network-stream/stringio or any other readable object in BINARY stream. This method will NOT close the stream! Return the MD5 hex digest number.""" if not isinstance(src, io.IOBase) or not src.readable(): raise Exception("src is not stream or unreadable") m: hashlib._hashlib.HASH = hashlib.md5() while True: b = src.read(4096) if not b: break m.update(b) res = m.hexdigest() return res
def read_bytes(n: int, reader: io.IOBase) -> bytes: """ Reads the specified number of bytes from the reader. It raises an `EOFError` if the specified number of bytes is not available. Parameters: - `n`: The number of bytes to read; - `reader`: The reader; Returns the bytes read. """ buff = reader.read(n) if not isinstance(buff, bytes): raise ValueError('The reader is expected to return bytes.') if len(buff) != n: raise EOFError(f'Unable to read {n} bytes from the stream.') return buff
def create_from_buffer(cls, buffer: IOBase, file_header=None): """ Initializes the XTF structure by copying from the target buffer. Note: not to be confused with .from_buffer and .from_buffer_copy which are the direct ctypes functions :param buffer: Input bytes :param file_header: XTFFileHeader, only necessary for XTFPingHeader :return: """ if type(buffer) in [bytes, bytearray]: buffer = BytesIO(buffer) header_bytes = buffer.read(ctypes.sizeof(cls)) if not header_bytes: raise RuntimeError( 'XTF file shorter than expected (end hit while reading {})'. format(cls.__name__)) return cls.from_buffer_copy(header_bytes)
def parse(self, code: IOBase, fname='<string>', name='_pysh_func') -> ast.AST: # Get an AST from the input code node = ast.parse(code.read()) # Now wrap the script in a function so we can get a reference to it. wrapper = ast.parse('def {}(): pass'.format(name)) wrapper.body[0].body = node.body for parser in self.parsers: wrapper = _apply_transform(parser, wrapper, fname=fname) # Ensure locations are ok before compiling ast.fix_missing_locations(wrapper) return wrapper
def _upload_chunks(cls, rfile: BootResourceFile, content: io.IOBase, chunk_size: int, progress_callback=None): """Upload the `content` to `rfile` in chunks using `chunk_size`.""" content.seek(0, io.SEEK_SET) upload_uri = urlparse(cls._handler.uri)._replace( path=rfile._data['upload_uri']).geturl() uploaded_size = 0 while True: buf = content.read(chunk_size) length = len(buf) if length > 0: uploaded_size += length cls._put_chunk(upload_uri, buf) if progress_callback is not None: progress_callback(uploaded_size / rfile.size) if length != chunk_size: break
def readfile(stream: IOBase, writer: csv.DictWriter, formver: List[int], maxcount: int = 20): count = 0 last = 0 offset = 8 last_end = 8 while True: stream.seek(offset) buf = stream.read(4) if buf == b'' or len(buf) < 4: logger.info('no more data.') break # read data as if offset data = struct.unpack('<I', buf) if data[0] == offset: # yes, we have an equal stream.seek(offset) # go back a bit fr = Frame.read(stream, formver) told = stream.tell() dct = fr.to_dict(formver[0]) dct['start'] = offset dct['end'] = told dct['offby'] = offset - last_end dct['size'] = offset - last dct['asdf'] = [fr.channel, f'({fr.flags}) {fr.flags:016b}'] writer.writerow(dct) # print( # 'match at', offset, 'now', dct['now'], 'size', offset - last, 'asd', now-offset-fr.headersize, # fr.to_dict(format=3, fields=['offset', 'index', 'latitude', 'packetsize', 'headersize']) # ) last_end = told last = offset count += 1 offset += 1 if count >= maxcount: break return count
def attach_stream(self, stream: IOBase, file_name: str) -> Message: """Read a stream into an attachment and attach to this message. This method returns the object, so you can chain it like:: msg.attach(file_handle, "test.txt").attach(byte_stream, "test.bin") Args: stream (IOBase): The stream to read from. file_name (str): The name of the file, used for MIME type identification. Returns: Message: this Message, for chaining. """ mime_type = mimetypes.guess_type(file_name)[0] # it's possible we get a file that doesn't have a mime type, like a # Linux executable, or a mach-o file - in that case just set it # to octet-stream as a generic stream of bytes if mime_type is None: main_type, sub_type = ("application", "octet-stream") else: main_type, sub_type = mime_type.split("/") attachment = MIMEPart() # we need special handling for set_content with datatype of str, as # for some reason this method doesn't like 'maintype' # see: https://docs.python.org/3/library/ # email.contentmanager.html#email.contentmanager.set_content content_args = {"subtype": sub_type} if main_type != "text": content_args["maintype"] = main_type file_name = path.basename(file_name) attachment.set_content(stream.read(), filename=file_name, disposition="attachment", **content_args) self.attachments.append(attachment) return self
def streamcopy(from_: IOBase, to_: IOBase, size=-1, chunk_size=163840): '''Copies content from one buffer to the other,chunk by chunk NOTE: Nor from_ or to_ has to be IOBase objects,it's doable as long as they have read() / write() calls Args: from_ (IOBase): Stream to copy from to_ (IOBase): Stream to copy to size (int, optional): Length to be copied. Defaults to -1. chunk_size (int, optional): Size of chunk. Defaults to 163840. Returns: int : copied length ''' if not size: return 0 size, copied = int(size), 0 if size < 0: # read until EOF def copychunk(): chunk = from_.read(chunk_size) if not chunk: return 0 to_.write(chunk) return len(chunk) while (True): copied_ = copychunk() if not copied_: break copied += copied_ else: # read `size` of bytes for offset in range(0, size, chunk_size): remaining = size - offset chunk = from_.read( remaining if remaining < chunk_size else chunk_size) if not chunk: break copied += len(chunk) to_.write(chunk) return copied
def load(source: IOBase) -> Sequence[str]: """Loads translatable strings from an HTML 5 file. This function will find all elements with a ``data-trans`` attribute. If the attribute is empty, the text content, after normalisation by removing redundant whitespace, is used as translatable string, otherwise the value of the attribute indicated by the value is used. :param source: The source file. :returns: a sequence of translatable strings """ for el in document_fromstring(source.read()).findall( './/*[@{}]'.format(ATTR)): attrib = el.attrib[ATTR] if attrib: if attrib in el.attrib: yield el.attrib[attrib] else: raise ValueError( 'unknown attribute {}'.format(attrib)) else: yield ' '.join(el.text_content().split())
async def _upload_chunks( cls, rfile: BootResourceFile, content: io.IOBase, chunk_size: int, progress_callback=None): """Upload the `content` to `rfile` in chunks using `chunk_size`.""" content.seek(0, io.SEEK_SET) upload_uri = urlparse( cls._handler.uri)._replace(path=rfile._data['upload_uri']).geturl() uploaded_size = 0 insecure = cls._handler.session.insecure connector = aiohttp.TCPConnector(verify_ssl=(not insecure)) session = aiohttp.ClientSession(connector=connector) async with session: while True: buf = content.read(chunk_size) length = len(buf) if length > 0: uploaded_size += length await cls._put_chunk(session, upload_uri, buf) if progress_callback is not None: progress_callback(uploaded_size / rfile.size) if length != chunk_size: break
def header_and_file(infile: IOBase, bytesio=False, only_header=False) -> tuple: """opens filename and splits the header from the file. returns tuple of header(str) and file(bytes) file = file to split rootdir = the root of all media storage sent to the default_storage class bytesio = return file in bytesio mode instead of as bytes only_header = discard the file after reading and return a tuple of the header(str) and None """ infile.seek(0) header = b"" addnext = b"" #iterate until the end of the header while addnext != b"---END HEADER---\n": addnext = infile.readline() header += addnext #add the rest of the file to retfile if not only_header: if bytesio: retfile = infile else: retfile = infile.read() infile.close() return (header.decode("ascii"), None if only_header else retfile)
def deserialize_list( stream: IOBase, content_len: int, remote_ip: str, ) -> List[str]: """ Deserialize list. Parameters ---------- stream : IOBase Stream of request body. content_len : int Length of request body. remote_ip : str IP address of client. Returns ------- List[str]: Deserialized list. """ tmp = stream.read(content_len).decode('utf-8') return tmp.split()
async def _upload_chunks( cls, rfile: BootResourceFile, content: io.IOBase, chunk_size: int, progress_callback=None): """Upload the `content` to `rfile` in chunks using `chunk_size`.""" content.seek(0, io.SEEK_SET) upload_uri = urlparse( cls._handler.uri)._replace(path=rfile._data['upload_uri']).geturl() uploaded_size = 0 insecure = cls._handler.session.insecure connector = aiohttp.TCPConnector(verify_ssl=(not insecure)) session = aiohttp.ClientSession(connector=connector) async with session: while True: buf = content.read(chunk_size) length = len(buf) if length > 0: uploaded_size += length await cls._put_chunk(session, upload_uri, buf) if progress_callback is not None: progress_callback(uploaded_size / rfile.size) if length != chunk_size: break
def _read_string(fp: IOBase, wide: bool = False) -> str: buf, end = b"", -1 offset = fp.tell() # locate string end while end == -1: chunk = fp.read(64) if not chunk: raise VDFDecodeError(f"Unterminated cstring (offset: {offset})") buf += chunk end = buf.find(b"\x00\x00" if wide else b"\x00") if wide: end += end % 2 # rewind fp fp.seek(end - len(buf) + (2 if wide else 1), 1) # decode string result = buf[:end] return result.decode("utf-16") if wide else result.decode("utf-8", "replace")
def load(self, stream: IOBase) -> any: return stream.read().decode('utf-8')
def load(self, stream: IOBase) -> any: return stream.read()
def get_file_data_and_close_file(file_instance: io.IOBase) -> bytes: file_data = file_instance.read() file_instance.close() return file_data
def process_text(db: Database, source: Source, text: IOBase) -> Optional[Exception]: session = get_session(db) line_no = 1 # lol ultimate_text = '' futures = [] source.content = '' session.add(source) session.commit() # so we can attach phrases to it. need its id. line_queue = Queue() error_queue = Queue() db_proc = Process(target=line_handler, args=(db, line_queue, error_queue, source.id)) db_proc.start() chunk = text.read(CHUNK_SIZE) while len(chunk) > 0: line_buff = "" for c in chunk: if BAD_CHARS.get(c, False): if not line_buff.endswith(' '): line_buff += ' ' continue if CLAUSE_MARKERS.get(c, False): if len(line_buff) > LONG_ENOUGH: ultimate_text += line_buff line_queue.put((line_no, line_buff)) line_no += 1 line_buff = "" else: line_buff += c continue if SENTENCE_MARKERS.get(c, False): if len(line_buff) > LONG_ENOUGH: ultimate_text += line_buff line_queue.put((line_no, line_buff)) line_no += 1 line_buff = "" continue if c == ' ' and line_buff.endswith(' '): continue if c == "'" and line_buff.endswith(' '): continue if c == "'" and peek(text, 1) == ' ': continue line_buff += c chunk = text.read(CHUNK_SIZE) line_queue.put(DONE_READING) db_proc.join() error = None if error_queue.empty(): source.content = ultimate_text session.add(source) else: error = error_queue.get() session.delete(source) result = None if error is None: result = source.id else: result = error session.commit() session.close() return result
def process_text(db: Database, source: Source, text: IOBase) -> Optional[Exception]: session = get_session(db) line_no = 1 # lol ultimate_text = '' futures = [] source.content = '' session.add(source) session.commit() # so we can attach phrases to it. need its id. line_queue = Queue() error_queue = Queue() db_proc = Process(target=line_handler, args=(db, line_queue, error_queue, source.id)) db_proc.start() chunk = text.read(CHUNK_SIZE) while len(chunk) > 0: line_buff = "" for c in chunk: if BAD_CHARS.get(c, False): if not line_buff.endswith(' '): line_buff += ' ' continue if CLAUSE_MARKERS.get(c, False): if len(line_buff) > LONG_ENOUGH: ultimate_text += line_buff line_queue.put((line_no, line_buff)) line_no += 1 line_buff = "" else: line_buff += c continue if SENTENCE_MARKERS.get(c, False): if len(line_buff) > LONG_ENOUGH: ultimate_text += line_buff line_queue.put((line_no, line_buff)) line_no += 1 line_buff = "" continue if c == ' ' and line_buff.endswith(' '): continue if c == "'" and line_buff.endswith(' '): continue if c == "'" and peek(text, 1) == ' ': continue line_buff += c chunk = text.read(CHUNK_SIZE) line_queue.put(DONE_READING) db_proc.join() error = None if error_queue.empty(): source.content = ultimate_text session.add(source) else: error = error_queue.get() session.delete(source) result = None if error is None: result = source.id else: result = error session.commit() session.close() return result
def create_from_buffer(cls, buffer: IOBase, file_header: XTFFileHeader = None): if not file_header: raise RuntimeError( 'Initialization of XTFPingHeader from buffer requires file_header to be passed.' ) obj = super().create_from_buffer(buffer=buffer) obj.ping_chan_headers = [] # type: List[XTFPingChanHeader] obj.data = None # Sonar and bathy has a different data structure following the header if obj.HeaderType == XTFHeaderType.sonar: obj.data = [] # type: List[np.ndarray] bytes_remaining = obj.NumBytesThisRecord - ctypes.sizeof( XTFPingHeader) for i in range(0, obj.NumChansToFollow): # Retrieve XTFPingChanHeader for this channel p_chan = XTFPingChanHeader.create_from_buffer(buffer=buffer) obj.ping_chan_headers.append(p_chan) bytes_remaining -= ctypes.sizeof(XTFPingChanHeader) # Backwards-compatibility: retrive from NumSamples if possible, else use old field n_samples = p_chan.NumSamples if p_chan.NumSamples > 0 else file_header.sonar_info[ i].Reserved # Calculate number of bytes to read n_bytes = n_samples * file_header.sonar_info[i].BytesPerSample if n_bytes > bytes_remaining: raise RuntimeError( 'Number of bytes to read exceeds the number of bytes remaining in packet.' ) # Read the data and output as a numpy array of the specified bytes-per-sample samples = buffer.read(n_bytes) if not samples: raise RuntimeError( 'File ended while reading data packets (file corrupt?)' ) bytes_remaining -= len(samples) # Favor getting the sample format from the dedicated field added in X41. # If the field is not populated deduce the type from the bytes per sample field. if file_header.sonar_info[ i].SampleFormat in sample_format_dtype: sample_format = sample_format_dtype[ file_header.sonar_info[i].SampleFormat] else: sample_format = xtf_dtype[ file_header.sonar_info[i].BytesPerSample] samples = np.frombuffer(samples, dtype=sample_format) obj.data.append(samples) elif obj.HeaderType == XTFHeaderType.bathy_xyza: # Bathymetry uses the same header as sonar, but without the XTFPingChanHeaders # TODO: Should the sub-channel number be used to index chan_info (?) # sub_chan = obj.SubChannelNumber # Read the data that follows n_bytes = obj.NumBytesThisRecord - ctypes.sizeof(XTFPingHeader) samples = buffer.read(n_bytes) if not samples: warn('XTFBathyHeader without any data encountered.') # Processed bathy data consists of repeated XTFBeamXYZA structures # Note: Using a ctypes array is a _lot_ faster than constructing a list of BeamXYZA num_xyza = n_bytes // ctypes.sizeof(XTFBeamXYZA) xyza_array_type = XTFBeamXYZA * num_xyza xyza_array_type._pack_ = 1 obj.data = xyza_array_type.from_buffer_copy(samples) elif obj.HeaderType == XTFHeaderType.reson_7018_watercolumn: # 7018 water column consists of XTFPingHeader followed by (one?) XTFPingChanHeader, then vendor data # Retrieve XTFPingChanHeader p_chan = XTFPingChanHeader.create_from_buffer(buffer=buffer) obj.ping_chan_headers.append(p_chan) # Read the data that follows n_bytes = obj.NumBytesThisRecord - ctypes.sizeof( XTFPingHeader) - ctypes.sizeof(XTFPingChanHeader) samples = buffer.read(n_bytes) if not samples: warn('XTFPingHeader (Reson7018) without any data encountered.') obj.data = samples else: # Generic XTFPingHeader construction n_bytes = obj.NumBytesThisRecord - ctypes.sizeof(XTFPingHeader) samples = buffer.read(n_bytes) if not samples and n_bytes > 0: warn('XTFPingHeader without any data encountered.') # The data is the raw bytes following the header obj.data = samples return obj