def load(file_handle: io.BinaryIO) -> TSerializable: """load(file) -> object This function reads a tnetstring from a file and parses it into a python object. The file must support the read() method, and this function promises not to read more data than necessary. """ # Read the length prefix one char at a time. # Note that the netstring spec explicitly forbids padding zeros. c = file_handle.read(1) if c == b"": # we want to detect this special case. raise ValueError("not a tnetstring: empty file") data_length = b"" while c.isdigit(): data_length += c if len(data_length) > 9: raise ValueError("not a tnetstring: absurdly large length prefix") c = file_handle.read(1) if c != b":": raise ValueError("not a tnetstring: missing or invalid length prefix") data = file_handle.read(int(data_length)) data_type = file_handle.read(1)[0] return parse(data_type, data)
def bytes_to_datauri(fp: BinaryIO, name): """Convert a file (specified by a path) into a data URI.""" mime, _ = mimetypes.guess_type(name) fp.seek(0) data = fp.read() data64 = b"".join(base64.encodebytes(data).splitlines()) return "data:%s;base64,%s" % (mime, data64.decode("utf-8"))
def read(self, file_input: BinaryIO, file_size: int) -> Flags: """populate header data""" first_dg = (file_input.tell() == 0) chunk = file_input.read(16) hdr_data = struct.unpack("<IBBHII", chunk) self.length = hdr_data[0] self.stx = hdr_data[1] self.id = hdr_data[2] self.model = hdr_data[3] self.date = hdr_data[4] self.time = hdr_data[5] if first_dg and self.stx != 2: if self.verbose: logger.warning("invalid Kongberg file > STX: %s" % self.stx) return self.Flags.MISSING_FIRST_STX if (self.stx != 2) or (self.id == 0): if self.verbose: logger.warning("corrupted datagram") return self.Flags.CORRUPTED_START_DATAGRAM # try to read ETX # Make sure we don't try to read beyond the EOF (-13 since 16 for header and 3 for ender) if (file_input.tell() + (self.length - 13)) >= file_size: if self.verbose: logger.warning("unexpected EOF > current pos: %s, datagram length: %s, file size: %s" % (file_input.tell(), self.length, file_size)) return self.Flags.UNEXPECTED_EOF # move file cursor to the end of the datagram file_input.seek(self.length - 15, 1) chunk = file_input.read(3) footer_data = struct.unpack("<BH", chunk) self.etx = footer_data[0] self.checksum = footer_data[1] if self.etx != 3: # print 'ETX not found, trying next datagram at position',file.tell()-(length+3) return self.Flags.CORRUPTED_END_DATAGRAM return self.Flags.VALID
def read_varint(s: BinaryIO) -> int: """ reads a variable integer from a stream """ i = s.read(1)[0] if i == 0xfd: # 2 bytes return little_endian_to_int(s.read(2)) elif i == 0xfe: # 4 bytes return little_endian_to_int(s.read(4)) elif i == 0xff: # 8 bytes return little_endian_to_int(s.read(8)) else: return i
def read(self, file_input: BinaryIO, file_size: int) -> Flags: """populate header data""" chunk = file_input.read(20) hdr_data = struct.unpack("<I4cBBHII", chunk) self.length = hdr_data[0] # logger.debug('length: %s' % self.length) self.type = b''.join(hdr_data[1:5]) # logger.debug('type: %s -> %s' % (self.type, self.kmall_datagrams[self.type])) self.version = hdr_data[5] # logger.debug('version: %s' % self.version) self.system_id = hdr_data[6] # logger.debug('system id: %s' % self.system_id) self.sounder_id = hdr_data[7] # logger.debug('sounder id: %s' % self.sounder_id) self.time_sec = hdr_data[8] # logger.debug('time sec: %s' % self.time_sec) self.time_microsec = hdr_data[9] # logger.debug('time microsec: %s' % self.time_microsec) self.datetime = datetime.utcfromtimestamp(self.time_sec) + timedelta( microseconds=(self.time_microsec * 10e-3)) # logger.debug('datetime: %s' % self.datetime.strftime('%Y-%m-%d %H:%M:%S.%f')) # Make sure we don't try to read beyond the EOF (-13 since 16 for header and 3 for ender) if (file_input.tell() + (self.length - 20)) > file_size: if self.verbose: logging.warning( "unexpected EOF > current pos: %s, datagram length: %s, file size: %s" % (file_input.tell(), self.length, file_size)) return self.Flags.UNEXPECTED_EOF # move file cursor to almost the end of the datagram (just minus the length field) file_input.seek(self.length - 24, 1) # 1 -> current file position chunk = file_input.read(4) footer_length = struct.unpack("<I", chunk)[0] if self.length != footer_length: logging.warning( "mismatch between initial and end datagram length: %s != %s" % (self.length, footer_length)) return self.Flags.CORRUPTED_END_DATAGRAM return self.Flags.VALID
def read(self, file_input: BinaryIO, file_size: int) -> Flags: """populate header data""" chunk = file_input.read(20) hdr_data = struct.unpack("<I4cBBHII", chunk) self.length = hdr_data[0] # logger.debug('length: %s' % self.length) self.id = b''.join(hdr_data[1:5]) # logger.debug('type: %s -> %s' % (self.type, self.kmall_datagrams[self.type])) self.version = hdr_data[5] # logger.debug('version: %s' % self.version) self.system_id = hdr_data[6] # logger.debug('system id: %s' % self.system_id) self.sounder_id = hdr_data[7] # logger.debug('sounder id: %s' % self.sounder_id) self.time_sec = hdr_data[8] # logger.debug('time sec: %s' % self.time_sec) self.time_nanosec = hdr_data[9] # logger.debug('time nanosec: %s' % self.time_nanosec) self.dg_time = self.kmall_datetime(self.time_sec, self.time_nanosec) # logger.debug('datetime: %s' % self.dg_time.strftime('%Y-%m-%d %H:%M:%S.%f')) # try to read ETX # Make sure we don't try to read beyond the EOF (-13 since 16 for header and 3 for ender) if (file_input.tell() + (self.length - 20)) >= file_size: if self.verbose: logger.warning("unexpected EOF > current pos: %s, datagram length: %s, file size: %s" % (file_input.tell(), self.length, file_size)) return self.Flags.UNEXPECTED_EOF # move file cursor to the end of the datagram file_input.seek(self.length - 24, 1) chunk = file_input.read(4) footer_length = struct.unpack("<I", chunk)[0] if footer_length != self.length: logger.info("datagram length mismatch: %s vs. %s" % (self.length, footer_length)) return self.Flags.CORRUPTED_END_DATAGRAM return self.Flags.VALID
def _read_metadata(proto_file: BinaryIO) -> bytes: """Reads metadata from a protobufs file. Notes ----- Internal use. For external API, use read_metadata. Parameters ---------- proto_file Binary file. Returns ------- bytes Metadata. """ metadata_length = proto_file.read(8) # Long long metadata_length, = struct.unpack('<Q', metadata_length) return proto_file.read(metadata_length)
def _read_protos( proto_file: BinaryIO, Proto: GeneratedProtocolMessageType ) -> 'GeneratedProtocolMessageType()': """Reads many protobufs from a file. Notes ----- Internal use. For external API, use read_protos. Parameters ---------- proto_file Binary file. Proto: Protocol message class (from the generated protobuf module). Yields ------- GeneratedProtocolMessageType A parsed protobuf. """ # This is essentially the inverse of the write_protos function. # Skip the metadata. metadata_length = proto_file.read(8) # Long long metadata_length, = struct.unpack('<Q', metadata_length) proto_file.read(metadata_length) length = proto_file.read(8) # long long while length: length, = struct.unpack('<Q', length) proto = Proto() proto.ParseFromString(proto_file.read(length)) yield proto length = proto_file.read(8)
def _tempnc(data: BinaryIO) -> Generator[str, None, None]: """Create a temporary netcdf file.""" from tempfile import NamedTemporaryFile tmp = None try: tmp = NamedTemporaryFile(suffix=".nc", prefix="erddapy_") tmp.write(data.read()) tmp.flush() yield tmp.name finally: if tmp is not None: tmp.close()
async def send_reader(self, title: str, description: str, file_reader: BinaryIO, close=False) -> Response: self._check_connection() data = file_reader.read() if close: file_reader.close() payload = Client._prepare_payload(title, description) + base64.b64encode(data) return await self._send_data(payload)
def _lwf_prelim(f: BinaryIO) -> Dict[str, Any]: dat = {} dat["bearng"] = struct.unpack("f", f.read(4))[0] dat["rhomx"] = struct.unpack("f", f.read(4))[0] dat["rlat"] = struct.unpack("f", f.read(4))[0] dat["rlon"] = struct.unpack("f", f.read(4))[0] dat["rrho"] = struct.unpack("f", f.read(4))[0] dat["nrps"] = struct.unpack("i", f.read(4))[0] dat["nrsgmnt"] = struct.unpack("i", f.read(4))[0] dat["nrprm"] = struct.unpack("i", f.read(4))[0] dat["nrpts"] = struct.unpack("i", f.read(4))[0] dat["nrcmp"] = struct.unpack("i", f.read(4))[0] dat["nrlwf"] = struct.unpack("i", f.read(4))[0] return dat
def _lwf_prelim(f: BinaryIO) -> Dict[str, Any]: dat = {} dat['bearng'] = struct.unpack('f', f.read(4))[0] dat['rhomx'] = struct.unpack('f', f.read(4))[0] dat['rlat'] = struct.unpack('f', f.read(4))[0] dat['rlon'] = struct.unpack('f', f.read(4))[0] dat['rrho'] = struct.unpack('f', f.read(4))[0] dat['nrps'] = struct.unpack('i', f.read(4))[0] dat['nrsgmnt'] = struct.unpack('i', f.read(4))[0] dat['nrprm'] = struct.unpack('i', f.read(4))[0] dat['nrpts'] = struct.unpack('i', f.read(4))[0] dat['nrcmp'] = struct.unpack('i', f.read(4))[0] dat['nrlwf'] = struct.unpack('i', f.read(4))[0] return dat
def from_bytestream(cls, bytestream: BinaryIO, num_length_bytes=8, block_size=None, num_padding_bytes=0): segment_length_bytes = bytestream.read(num_length_bytes) segment_length = decode_integer(segment_length_bytes) header_blocks = [] if num_padding_bytes > 0: header_blocks.append( RecBlock.from_bytestream(bytestream, length=num_padding_bytes)) content_blocks = [] for i in range(segment_length): content_blocks.append(cls.parse_block(bytestream, block_size)) return cls(header_blocks, content_blocks)
def fromFile(cls, file: BinaryIO): instance = cls(file.read()) file.close() return instance
def lwf_header(f: BinaryIO) -> Dict[str, Any]: if isinstance(f, (str, Path)): fn = Path(f).expanduser().with_suffix(".lwf") with opener(fn) as f: return lwf_header(f) out = {} out["archive"] = f.read(8) out["file_id"] = [f.read(120).decode("ascii") for _ in range(3)] out["prgm_id"] = f.read(12).decode("ascii") out["case_id"] = f.read(80).decode("ascii") out["prfl_id"] = f.read(40).decode("ascii") out["xmtr_id"] = f.read(20).decode("ascii") out["freq"] = struct.unpack("f", f.read(4))[0] out["txlat"] = struct.unpack("f", f.read(4))[0] out["txlon"] = struct.unpack("f", f.read(4))[0] out["path_id"] = f.read(20).decode("ascii") out["oplat1"] = struct.unpack("f", f.read(4))[0] out["oplon1"] = struct.unpack("f", f.read(4))[0] out["oplat2"] = struct.unpack("f", f.read(4))[0] out["oplon2"] = struct.unpack("f", f.read(4))[0] nrpath = struct.unpack("i", f.read(4))[0] out["bearing"] = np.empty(nrpath) out["rhomax"] = np.empty(nrpath) out["rxlat"] = np.empty(nrpath) out["rxlon"] = np.empty(nrpath) for i in range(nrpath): out["bearing"][i] = struct.unpack("f", f.read(4))[0] out["rhomax"][i] = struct.unpack("f", f.read(4))[0] out["rxlat"][i] = struct.unpack("f", f.read(4))[0] out["rxlon"][i] = struct.unpack("f", f.read(4))[0] return out
def lwf_header(f: BinaryIO) -> Dict[str, Any]: if isinstance(f, (str, Path)): fn = Path(f).expanduser().with_suffix('.lwf') with opener(fn) as f: return lwf_header(f) out = {} out['archive'] = f.read(8) out['file_id'] = [f.read(120).decode('ascii') for _ in range(3)] out['prgm_id'] = f.read(12).decode('ascii') out['case_id'] = f.read(80).decode('ascii') out['prfl_id'] = f.read(40).decode('ascii') out['xmtr_id'] = f.read(20).decode('ascii') out['freq'] = struct.unpack('f', f.read(4))[0] out['txlat'] = struct.unpack('f', f.read(4))[0] out['txlon'] = struct.unpack('f', f.read(4))[0] out['path_id'] = f.read(20).decode('ascii') out['oplat1'] = struct.unpack('f', f.read(4))[0] out['oplon1'] = struct.unpack('f', f.read(4))[0] out['oplat2'] = struct.unpack('f', f.read(4))[0] out['oplon2'] = struct.unpack('f', f.read(4))[0] nrpath = struct.unpack('i', f.read(4))[0] out['bearing'] = np.empty(nrpath) out['rhomax'] = np.empty(nrpath) out['rxlat'] = np.empty(nrpath) out['rxlon'] = np.empty(nrpath) for i in range(nrpath): out['bearing'][i] = struct.unpack('f', f.read(4))[0] out['rhomax'][i] = struct.unpack('f', f.read(4))[0] out['rxlat'][i] = struct.unpack('f', f.read(4))[0] out['rxlon'][i] = struct.unpack('f', f.read(4))[0] return out
def read_record(stream: BinaryIO, charset=ANSI) -> Optional[MarcRecord]: """ Чтение записи из файла в формате ISO 2709. :param stream: Файл или файлоподобный объект :param charset: Кодировка :return: Декодированная запись либо None """ # Считываем длину записи marker = stream.read(5) if len(marker) != 5: return None # а затем и ее остаток record_length = parse_int(marker) need = record_length - 5 tail = stream.read(need) if len(tail) != need: return None # Простая проверка, что мы имеем дело с нормальной ISO-записью record = marker + tail if record[record_length - 1] != RECORD_DELIMITER: return None # Превращаем запись в Unicode indicator_length = parse_int(record[10:11]) base_address = parse_int(record[12:17]) # Начинаем собственно конверсию result = MarcRecord() # Пошли по полям при помощи справочника directory = MARKER_LENGTH while record[directory] != FIELD_DELIMITER: # если нарвались на разделитель, значит, справочник закончился tag = parse_int(record[directory:directory + 3]) field_length = parse_int(record[directory + 3:directory + 7]) field_offset = parse_int( record[directory + 7:directory + 12]) + base_address field = RecordField(tag) result.fields.append(field) if tag < 10: # фиксированное поле # не может содержать подполей и индикаторов field.value = record[field_offset:field_offset + field_length - 1].decode(charset) else: # поле переменной длины # содержит два однобайтных индикатора # может содержать подполя start = field_offset + indicator_length stop = field_offset + field_length - indicator_length + 1 position = start # ищем значение поля до первого разделителя while position < stop: if record[start] == SUBFIELD_DELIMITER: break position += 1 # если есть текст до первого раздлителя, запоминаем его if position != start: field.value = record[start:position].decode(charset) # просматриваем подполя start = position while start < stop: position = start + 1 while position < stop: if record[position] == SUBFIELD_DELIMITER: break position += 1 subfield = SubField(chr(record[start + 1]), record[start + 2:position].decode(charset)) field.subfields.append(subfield) start = position # переходим к следующему полю в справочнике directory += 12 return result