def __serialize_morph_tree_node(res: io.IOBase, tn: 'MorphTreeNode') -> None: if (tn.rules is not None): for r in tn.rules: MorphSerializeHelper.__serialize_short(res, r._id0_) MorphSerializeHelper.__serialize_short(res, 0) if (tn.reverce_variants is not None): for v in tn.reverce_variants: MorphSerializeHelper.__serialize_string( res, Utils.ifNotNull(v.tail, "")) if (v.rule is not None): pass MorphSerializeHelper.__serialize_short( res, (0 if v.rule is None else v.rule._id0_)) MorphSerializeHelper.__serialize_short(res, v.coef) MorphSerializeHelper.__serialize_morph_rule_variant(res, v) MorphSerializeHelper.__serialize_string(res, None) if (tn.nodes is not None): for n in tn.nodes.items(): MorphSerializeHelper.__serialize_short(res, n[0]) p0 = res.tell() MorphSerializeHelper.__serialize_int(res, 0) MorphSerializeHelper.__serialize_morph_tree_node(res, n[1]) p1 = res.tell() res.seek(p0, io.SEEK_SET) MorphSerializeHelper.__serialize_int(res, p1) res.seek(p1, io.SEEK_SET) MorphSerializeHelper.__serialize_short(res, 0xFFFF)
def deserialize(self, stream : io.IOBase) -> bool: vers = 0 b = Utils.readByteIO(stream) if (b == (0xAA)): b = (Utils.readByteIO(stream)) vers = (b) else: stream.seek(stream.tell() - (1), io.SEEK_SET) self.__m_sofa = SourceOfAnalysis(None) self.__m_sofa.deserialize(stream) self.base_language = MorphLang._new5(SerializerHelper.deserializeInt(stream)) self.__m_entities = list() cou = SerializerHelper.deserializeInt(stream) i = 0 while i < cou: typ = SerializerHelper.deserializeString(stream) r = ProcessorService.createReferent(typ) if (r is None): r = Referent("UNDEFINED") self.__m_entities.append(r) i += 1 i = 0 while i < cou: self.__m_entities[i].deserialize(stream, self.__m_entities, self.__m_sofa) i += 1 self.first_token = SerializerHelper.deserializeTokens(stream, self, vers) self.__createStatistics() return True
def get_stream_content(stream: IOBase, seek: Optional[int] = None) -> (Optional[str], str): if seek is not None and stream.seekable(): stream.seek(seek) content = stream.read(limits.STREAM_SIZE_LIMIT_BYTES + 1) default_ext = "txt" if isinstance(content, str) else "bin" if isinstance(content, str): content = content.encode("utf-8") if limits.stream_size_exceeds_limit(len(content)): return None, default_ext while True: chunk = stream.read(1024 * 1024) if chunk is None: continue elif not chunk: break else: if not isinstance(content, BytesIO): content = BytesIO(content) content.seek(0, 2) if isinstance(chunk, str): chunk = chunk.encode("utf-8") if limits.stream_size_exceeds_limit(content.tell() + len(chunk)): return None, default_ext content.write(chunk) if isinstance(content, BytesIO): content = content.getvalue() return content, default_ext
def _find_backwards( self, src: io.IOBase, tok: HighLevelTokenizer, text_to_find: str, ) -> int: # length of str to check str_len = 1024 # go to end of file src.seek(0, io.SEEK_END) file_length = src.tell() pos = file_length - str_len if pos < 1: pos = 1 while pos > 0: src.seek(pos) bytes_near_eof = "".join( [tok._next_char() for _ in range(0, str_len)]) idx = bytes_near_eof.find(text_to_find) if idx >= 0: return pos + idx pos = pos - str_len + len(text_to_find) # raise error return -1
def deserialize(self, stream: io.IOBase, all0_: typing.List['Referent'], sofa: 'SourceOfAnalysis') -> None: typ = SerializerHelper.deserializeString(stream) cou = SerializerHelper.deserializeInt(stream) i = 0 while i < cou: typ = SerializerHelper.deserializeString(stream) c = SerializerHelper.deserializeInt(stream) id0_ = SerializerHelper.deserializeInt(stream) val = None if (id0_ < 0): val = (all0_[(-id0_) - 1]) elif (id0_ > 0): stream.seek(stream.tell() - (4), io.SEEK_SET) val = (SerializerHelper.deserializeString(stream)) self.addSlot(typ, val, False, c) i += 1 cou = SerializerHelper.deserializeInt(stream) self.__m_occurrence = list() i = 0 while i < cou: a = TextAnnotation._new2691(sofa, self) self.__m_occurrence.append(a) a.begin_char = SerializerHelper.deserializeInt(stream) a.end_char = SerializerHelper.deserializeInt(stream) attr = SerializerHelper.deserializeInt(stream) if (((attr & 1)) != 0): a.essential_for_occurence = True i += 1
def parseFile(source:IOBase, logger=logging.getLogger()): try: return Parser(Lexer(source), logger).parseModule(False) except CompilerError as e: source.seek(0) e.format(source.read()) raise e
def _read_trailer(self, src: io.IOBase, tok: HighLevelTokenizer) -> Dictionary: # return None if there is no trailer token = tok.next_non_comment_token() assert token is not None if token.text != "trailer": return Dictionary() # if there is a keyword "trailer" the next token should be TokenType.START_DICT token = tok.next_non_comment_token() assert token is not None if token.token_type != TokenType.START_DICT: raise PDFSyntaxError( byte_offset=tok.tell(), message="invalid XREF trailer", ) # go back 2 chars "<<" src.seek(-2, io.SEEK_CUR) # read dictionary as trailer trailer_dict = tok.read_dictionary() # process startxref token = tok.next_non_comment_token() assert token is not None if token.token_type != TokenType.OTHER or token.text != "startxref": raise PDFSyntaxError( byte_offset=token.byte_offset, message="start of XREF not found", ) # return return trailer_dict
def read( self, src: io.IOBase, tok: HighLevelTokenizer, initial_offset: Optional[int] = None, ) -> "XREF": if initial_offset is not None: src.seek(initial_offset) else: self._seek_to_xref_token(src, tok) # now we should be back to the start of XREF token = tok.next_non_comment_token() assert token is not None if token.text != "xref": raise XREFTokenNotFoundError() # read xref sections while True: xref_section = self._read_section(src, tok) if len(xref_section) == 0: break else: for r in xref_section: self.append(r) # process trailer self["Trailer"] = self._read_trailer(src, tok) # return self return self
async def _upload_chunks(cls, rfile: BootResourceFile, content: io.IOBase, chunk_size: int, progress_callback=None): """Upload the `content` to `rfile` in chunks using `chunk_size`.""" content.seek(0, io.SEEK_SET) upload_uri = urlparse(cls._handler.uri)._replace( path=rfile._data['upload_uri']).geturl() uploaded_size = 0 insecure = cls._handler.session.insecure connector = aiohttp.TCPConnector(verify_ssl=(not insecure)) session = aiohttp.ClientSession(connector=connector) async with session: while True: buf = content.read(chunk_size) length = len(buf) if length > 0: uploaded_size += length await cls._put_chunk(session, upload_uri, buf) if progress_callback is not None: progress_callback(uploaded_size / rfile.size) if length != chunk_size: break
def peek(stream: IOBase, chunk_size: int) -> str: if hasattr(stream, 'peek'): return stream.peek(chunk_size) else: current_pos = stream.tell() result = stream.read(chunk_size) stream.seek(current_pos) return result
def read_file_checksum(stream: io.IOBase) -> bytes: '''Read the stored file checksum of an indexedmzML file. ''' stream.seek(-5000, 2) chunk = stream.read(5001) target = re.compile(br"<fileChecksum>\s*(\S+)\s*</fileChecksum>") matches = target.findall(chunk) return matches
def seek_then_rewind(fd: io.IOBase, seek=0) -> typing.IO: pos = fd.tell() if seek is not None: fd.seek(seek) try: yield fd finally: fd.seek(pos)
def __call__(self, stream: io.IOBase): pos = stream.tell() try: return self.__parser(stream) except ParseError: pass stream.seek(pos) return None
def _seek(f: io.IOBase, param: str): try: offset = int(param) f.seek(offset, os.SEEK_SET) except OSError: print("cloud not seek file.") return os.EX_IOERR else: print(f"seek {offset} bytes successed.")
def uncloseable(buffer: Buffer): """ Context manager which turns the fd's close operation to no-op for the duration of the context. """ close = buffer.close buffer.close = lambda: None yield buffer buffer.close = close buffer.seek(0) # fake close
def _read_section(self, src: io.IOBase, tok: HighLevelTokenizer) -> List[Reference]: tokens = [tok.next_non_comment_token() for _ in range(0, 2)] assert tokens[0] is not None assert tokens[1] is not None if tokens[0].text in ["trailer", "startxref"]: src.seek(tokens[0].byte_offset) return [] if tokens[0].token_type != TokenType.NUMBER: raise PDFValueError( byte_offset=tokens[0].byte_offset, expected_value_description="number", received_value_description=tokens[0].text, ) if tokens[1].token_type != TokenType.NUMBER: raise PDFValueError( byte_offset=tokens[1].byte_offset, expected_value_description="number", received_value_description=tokens[1].text, ) start_object_number = int(tokens[0].text) number_of_objects = int(tokens[1].text) indirect_references = [] # read subsection for i in range(0, number_of_objects): tokens = [tok.next_non_comment_token() for _ in range(0, 3)] assert tokens[0] is not None assert tokens[1] is not None assert tokens[2] is not None if tokens[0].text in ["trailer", "startxref"]: raise PDFSyntaxError( byte_offset=tokens[0].byte_offset, message="unexpected EOF while processing XREF", ) if (tokens[0].token_type != TokenType.NUMBER or tokens[1].token_type != TokenType.NUMBER or tokens[2].token_type != TokenType.OTHER or tokens[2].text not in ["f", "n"]): raise PDFSyntaxError( byte_offset=tokens[0].byte_offset, message="invalid XREF line", ) indirect_references.append( Reference( object_number=start_object_number + i, byte_offset=int(tokens[0].text), generation_number=int(tokens[1].text), is_in_use=(tokens[2].text == "n"), )) # return return indirect_references
def _readSlx(stream: io.IOBase, blocksize: int, formver: List[int], strict: bool) -> Frame: format = formver[0] version = formver[1] f = FRAME_FORMATS[format] s = struct.calcsize(f) here = stream.tell() bad = 0 while True: buf = stream.read(s) if buf == b'': # EOF return None if len(buf) < s: print(f'This is bad. Only got {len(buf)}/{s} bytes=', buf) raise NotEnoughDataError("this is bad") data = struct.unpack(f, buf) if data[0] == here: # offset is always first value if bad > 1: logger.warn('got back at offset: %s', here) break elif here > 0: bad += 1 if bad == 1: logger.warn('unexpected offset %s at location: %s. will try to find next frame', data[0], here) if strict: raise OffsetError('offset missmatch') # jump forward and try to catch next here += 1 stream.seek(here) continue else: raise OffsetError('location does not match expected offset') kv = {'headersize': s} for i, d in enumerate(FRAME_DEFINITIONS[format]): name = d['name'] if not name == "-": kv[name] = data[i] if name == 'flags' and FLAG_FORMATS[format]: if FLAG_AS_BINARY: kv[name] = f'({kv[name]}) {kv[name]:016b}' flagform = FLAG_FORMATS[format] flags = data[i] for k, v in flagform.items(): kv[k] = flags & v == v b = Frame(**kv) packetsize = b.packetsize if version == 1 and not b.has_tbd1: packetsize = b.framesize - 168 if version == 1 or (version == 2 and b.channel <= 5): extra = 168-s stream.read(extra) b.packet = stream.read(packetsize) return b
def read(self, io_source: io.IOBase) -> "Canvas": """ This method reads a byte stream of canvas operators, and processes them, returning this Canvas afterwards """ io_source.seek(0, os.SEEK_END) length = io_source.tell() io_source.seek(0) canvas_tokenizer = HighLevelTokenizer(io_source) # process content operand_stk = [] instruction_number: int = 0 while canvas_tokenizer.tell() != length: # print("<canvas pos='%d' length='%d' percentage='%d'/>" % ( canvas_tokenizer.tell(), length, int(canvas_tokenizer.tell() * 100 / length))) # attempt to read object obj = canvas_tokenizer.read_object() if obj is None: break # push argument onto stack if not isinstance(obj, CanvasOperatorName): operand_stk.append(obj) continue # process operator instruction_number += 1 operator = self.canvas_operators.get(obj, None) if operator is None: logger.debug("Missing operator %s" % obj) continue if not self.in_compatibility_section: assert len(operand_stk) >= operator.get_number_of_operands() operands: typing.List["CanvasOperator"] = [] # type: ignore [name-defined] for _ in range(0, operator.get_number_of_operands()): operands.insert(0, operand_stk.pop(-1)) # debug operand_str = str([str(x) for x in operands]) if len(operands) == 1 and isinstance(operands[0], list): operand_str = str([str(x) for x in operands[0]]) logger.debug("%d %s %s" % (instruction_number, operator.text, operand_str)) # invoke try: operator.invoke(self, operands) except Exception as e: if not self.in_compatibility_section: raise e # return return self
def head(file_obj: IOBase, n: int) -> Iterator: if not hasattr(file_obj, "seek"): raise TypeError("Missing seek method") i = 0 for v in file_obj: if i >= n: break yield v i += 1 file_obj.seek(0)
def get_sha1(obj: IOBase) -> str: """A function to get sha1 in a memory efficient way. returns hexdigest of obj obj = io object to digest""" hashhold = hashlib.sha1() try: for chunk in iter(lambda: obj.read(4096), b""): hashhold.update(chunk) obj.seek(0) except AttributeError: hashhold.update(obj) return hashhold.hexdigest()
def calc_size_and_sha265(content: io.IOBase, chunk_size: int): """Calculates the size and the sha2566 value of the content.""" size = 0 sha256 = hashlib.sha256() content.seek(0, io.SEEK_SET) while True: buf = content.read(chunk_size) length = len(buf) size += length sha256.update(buf) if length != chunk_size: break return size, sha256.hexdigest()
def verify(module:Module, builtin:Module, logger = logging.getLogger(), source:IOBase = None): # Set up the initial state before verifying State.init(builtin, logger.getChild("lekvar")) State.logger.info(module.context) try: module.verify() except CompilerError as e: if source is not None: source.seek(0) e.format(source.read()) raise e
def __call__(self, stream: io.IOBase): values = [] for _ in range(self.__min_repeats): values.append(self.__parser(stream)) repeats = self.__min_repeats while self.__max_repeats is None or repeats < self.__max_repeats: pos = stream.tell() try: values.append(self.__parser(stream)) except ParseError: stream.seek(pos) break return values
def _decode_avro_alert(avro_alert: io.IOBase, schema: dict) -> Any: """Decodes a file-like stream of avro data Parameters ---------- avro_alert: io.IOBase a file-like stream with avro encoded data schema: dict Dictionary of json format schema to decode avro data Returns ---------- record: Any Record obtained after decoding avro data (typically, dict) """ avro_alert.seek(0) return fastavro.schemaless_reader(avro_alert, schema)
def _upload_chunks(cls, rfile: BootResourceFile, content: io.IOBase, chunk_size: int, progress_callback=None): """Upload the `content` to `rfile` in chunks using `chunk_size`.""" content.seek(0, io.SEEK_SET) upload_uri = urlparse(cls._handler.uri)._replace( path=rfile._data['upload_uri']).geturl() uploaded_size = 0 while True: buf = content.read(chunk_size) length = len(buf) if length > 0: uploaded_size += length cls._put_chunk(upload_uri, buf) if progress_callback is not None: progress_callback(uploaded_size / rfile.size) if length != chunk_size: break
def read_private_key_file(file_: io.IOBase) -> PKey: """Read a private key file. Similar to :meth:`PKey.from_private_key() <paramiko.pkey.PKey.from_private_key>` except it guess the key type. :param file_: a stream of the private key to read :type file_: :class:`io.IOBase` :return: the read private key :rtype: :class:`paramiko.pkey.PKery` :raise paramiko.ssh_exception.SSHException: when something goes wrong """ classes = PKey.__subclasses__() last = len(classes) + 1 for i, cls in enumerate(classes): try: return cls.from_private_key(file_) except SSHException: if i == last: raise file_.seek(0) continue
def installPackages(self, logFile: io.IOBase = tempfile.TemporaryFile, outputLogFile: io.IOBase = tempfile.TemporaryFile): self._unlock() logFile, outputLogFile = logFile(), outputLogFile() oldStdError, oldStdOut = os.dup(sys.stderr.fileno()), os.dup( sys.stdout.fileno()) os.dup2(outputLogFile.fileno(), sys.stderr.fileno()) os.dup2(outputLogFile.fileno(), sys.stdout.fileno()) try: self.pkgManager.do_install(logFile.fileno()) except Exception as e: self.installError = (e, traceback.format_exc()) os.dup2(oldStdError, sys.stderr.fileno()) os.dup2(oldStdOut, sys.stdout.fileno()) self._lock() logFile.seek(0) outputLogFile.seek(0) self.updateCache() [package.update(install=True) for package in self.pkgList.values()] for line in (line.decode().replace("\n", "") for line in logFile.readlines()): matches = re.match(r'^pmerror:([^:]+):([^:]+):(.*)$', line) if matches: pkgName, _, error = matches[1], matches[2], matches[3] self.pkgList[pkgName].errors += [error] logFile.seek(0) self.logFile, self.outputLogFile = logFile, outputLogFile
def readfile(stream: IOBase, writer: csv.DictWriter, formver: List[int], maxcount: int = 20): count = 0 last = 0 offset = 8 last_end = 8 while True: stream.seek(offset) buf = stream.read(4) if buf == b'' or len(buf) < 4: logger.info('no more data.') break # read data as if offset data = struct.unpack('<I', buf) if data[0] == offset: # yes, we have an equal stream.seek(offset) # go back a bit fr = Frame.read(stream, formver) told = stream.tell() dct = fr.to_dict(formver[0]) dct['start'] = offset dct['end'] = told dct['offby'] = offset - last_end dct['size'] = offset - last dct['asdf'] = [fr.channel, f'({fr.flags}) {fr.flags:016b}'] writer.writerow(dct) # print( # 'match at', offset, 'now', dct['now'], 'size', offset - last, 'asd', now-offset-fr.headersize, # fr.to_dict(format=3, fields=['offset', 'index', 'latitude', 'packetsize', 'headersize']) # ) last_end = told last = offset count += 1 offset += 1 if count >= maxcount: break return count
def _seek_to_xref_token(self, src: io.IOBase, tok: HighLevelTokenizer): # find "startxref" text start_of_xref_token_byte_offset = self._find_backwards( src, tok, "startxref") assert start_of_xref_token_byte_offset is not None if start_of_xref_token_byte_offset == -1: raise StartXREFTokenNotFoundError() # set tokenizer to "startxref" src.seek(start_of_xref_token_byte_offset) token = tok.next_non_comment_token() assert token is not None if token.text == "xref": src.seek(start_of_xref_token_byte_offset) return # if we are at startxref, we are reading the XREF table backwards # and we need to go back to the start of XREF if token.text == "startxref": token = tok.next_non_comment_token() assert token is not None if token.token_type != TokenType.NUMBER: raise PDFSyntaxError(byte_offset=token.byte_offset, message="invalid XREF") start_of_xref_offset = int(token.text) src.seek(start_of_xref_offset)
def __call__(self, stream: io.IOBase): values = [] pos = stream.tell() try: value = self.__parser(stream) except ParseError: stream.seek(pos) return values values.append(value) while True: pos = stream.tell() try: self.__sep_parser(stream) except ParseError: stream.seek(pos) break value = self.__parser(stream) values.append(value) return values
def read_data(line: str, f: io.IOBase, num_peaks: int) -> Generator[Tuple[float], None, None]: mz = intensity = '' icol = False # whether we are in intensity column or not peaks_read = 0 while True: if line == '\n': return if line[:5].upper() == 'NAME:': try: f.seek(f.tell()-len(line)-1, os.SEEK_SET) except io.UnsupportedOperation: pass return for char in line: if char in '()[]{}': # Ignore brackets continue elif char in ' \t,;:\n': # Delimiter if icol and mz and intensity: yield float(mz), float(intensity) peaks_read += 1 if peaks_read >= num_peaks: return mz = intensity = '' icol = not icol elif not icol: mz += char else: intensity += char line = f.readline() if not line: break if icol and mz and intensity: yield float(mz), float(intensity)
def __call__(self, stream: io.IOBase): pos = stream.tell() exceptions = [] for choice in self._choices: try: return choice(stream) except ParseError as exception: exceptions.append(exception) stream.seek(pos) furthest_pos = max(e.pos for e in exceptions) exceptions = [exception for exception in exceptions if exception.pos == furthest_pos] if len(exceptions) == 1: raise exceptions[0] reasons = (exception.reason for exception in exceptions) joined_reasons = '\n'.join('Option %d: %s' % (index, reason) for index, reason in enumerate(reasons)) raise ParseError(furthest_pos, 'Tried these options:\n%s' % joined_reasons)
async def _upload_chunks( cls, rfile: BootResourceFile, content: io.IOBase, chunk_size: int, progress_callback=None): """Upload the `content` to `rfile` in chunks using `chunk_size`.""" content.seek(0, io.SEEK_SET) upload_uri = urlparse( cls._handler.uri)._replace(path=rfile._data['upload_uri']).geturl() uploaded_size = 0 insecure = cls._handler.session.insecure connector = aiohttp.TCPConnector(verify_ssl=(not insecure)) session = aiohttp.ClientSession(connector=connector) async with session: while True: buf = content.read(chunk_size) length = len(buf) if length > 0: uploaded_size += length await cls._put_chunk(session, upload_uri, buf) if progress_callback is not None: progress_callback(uploaded_size / rfile.size) if length != chunk_size: break