def write_header(self, f: BinaryIO) -> None: """Writes the BSA header to f Args: f (BinaryIO): file-like output stream """ if self.auto_file_flags: logging.debug("Determining file flags") self.file_flags = functools.reduce(operator.or_, (x.flags for x in self.walk_folders()), FileFlag.NONE) logging.info(f"Archive flags: {str(self.flags)}") logging.info(f"File flags: {str(self.file_flags)}") logging.info(f"Folders count: {self.non_empty_folders_count}") logging.info(f"Files count: {self.files_count}") logging.debug(f"Total folder length: {self.total_folder_name_length}") logging.debug(f"Total file names length: {self.total_file_name_length}") logging.info(f"Compression level: {self.compression_level if self.compress else 'no compression'}") logging.debug("Writing header") f.write(b"BSA\x00") f.write( struct.pack( "<LLLLLLLL", self.game.value, 0x24, self.flags.value, self.non_empty_folders_count, self.files_count, self.total_folder_name_length, self.total_file_name_length, self.file_flags.value ) )
def load(file_handle: io.BinaryIO) -> TSerializable: """load(file) -> object This function reads a tnetstring from a file and parses it into a python object. The file must support the read() method, and this function promises not to read more data than necessary. """ # Read the length prefix one char at a time. # Note that the netstring spec explicitly forbids padding zeros. c = file_handle.read(1) if c == b"": # we want to detect this special case. raise ValueError("not a tnetstring: empty file") data_length = b"" while c.isdigit(): data_length += c if len(data_length) > 9: raise ValueError("not a tnetstring: absurdly large length prefix") c = file_handle.read(1) if c != b":": raise ValueError("not a tnetstring: missing or invalid length prefix") data = file_handle.read(int(data_length)) data_type = file_handle.read(1)[0] return parse(data_type, data)
def bytes_to_datauri(fp: BinaryIO, name): """Convert a file (specified by a path) into a data URI.""" mime, _ = mimetypes.guess_type(name) fp.seek(0) data = fp.read() data64 = b"".join(base64.encodebytes(data).splitlines()) return "data:%s;base64,%s" % (mime, data64.decode("utf-8"))
def dump(self, value: T, f: BinaryIO) -> None: try: pickle.dump(value, f, pickle_protocol) except pickle.PickleError: raise except Exception as e: msg = "Could not serialize broadcast: %s: %s" % ( e.__class__.__name__, str(e)) print_exec(sys.stderr) raise pickle.PicklingError(msg) f.close()
async def send_reader(self, title: str, description: str, file_reader: BinaryIO, close=False) -> Response: self._check_connection() data = file_reader.read() if close: file_reader.close() payload = Client._prepare_payload(title, description) + base64.b64encode(data) return await self._send_data(payload)
def write_file_names(self, f: BinaryIO) -> None: """Writes file names block to f. If BSA flags do not have the Flags.INCLUDE_FILE_NAMES set, this method does nothing. Args: f (BinaryIO): file-like output stream """ if (self.flags & Flags.INCLUDE_FILE_NAMES) == 0: return logging.debug("Writing file names") for folder in self.sorted_folders: for file in folder.sorted_files: f.write(file.name.encode("ascii")) f.write(b"\x00")
def __send_file(self, file: BinaryIO, filename: str): """ Sends a file in the socket. :param file: file to be sent """ self.__send_string(filename) # get file size file.seek(0, io.SEEK_END) file_len = file.tell() file.seek(0) self.__send_int(file_len) self.socket.sendfile(file)
def _save_index(self, file: BinaryIO): start_time = time.time() max_bytes = 2**31 - 1 print('Serializing index') bytes = pickle.dumps(self.index) elapsed_time = time.time() - start_time print('Index serialized; elapsed time: {}'.format( str(datetime.timedelta(seconds=elapsed_time)))) start_time = time.time() print('Writing index') for i in tqdm(range(0, len(bytes), max_bytes)): file.write(bytes[i:i + max_bytes]) file.flush() elapsed_time = time.time() - start_time print('Index saved; elapsed time: {}'.format( str(datetime.timedelta(seconds=elapsed_time))))
def read_varint(s: BinaryIO) -> int: """ reads a variable integer from a stream """ i = s.read(1)[0] if i == 0xfd: # 2 bytes return little_endian_to_int(s.read(2)) elif i == 0xfe: # 4 bytes return little_endian_to_int(s.read(4)) elif i == 0xff: # 8 bytes return little_endian_to_int(s.read(8)) else: return i
def write_folder_records(self, f: BinaryIO) -> None: """Writes the folder records block to f Args: f (BinaryIO): file-like output stream """ # And write their info logging.debug("Writing folder records") logging.debug(f"Sorted folder hashes: {[x.tes_hash for x in self.sorted_folders]}") for folder in self.sorted_folders: folder.record_offset = f.tell() f.write( struct.pack( "<QLLQ", folder.tes_hash, len(folder.files), 0, 0 ) )
def _read_metadata(proto_file: BinaryIO) -> bytes: """Reads metadata from a protobufs file. Notes ----- Internal use. For external API, use read_metadata. Parameters ---------- proto_file Binary file. Returns ------- bytes Metadata. """ metadata_length = proto_file.read(8) # Long long metadata_length, = struct.unpack('<Q', metadata_length) return proto_file.read(metadata_length)
def _read_protos( proto_file: BinaryIO, Proto: GeneratedProtocolMessageType ) -> 'GeneratedProtocolMessageType()': """Reads many protobufs from a file. Notes ----- Internal use. For external API, use read_protos. Parameters ---------- proto_file Binary file. Proto: Protocol message class (from the generated protobuf module). Yields ------- GeneratedProtocolMessageType A parsed protobuf. """ # This is essentially the inverse of the write_protos function. # Skip the metadata. metadata_length = proto_file.read(8) # Long long metadata_length, = struct.unpack('<Q', metadata_length) proto_file.read(metadata_length) length = proto_file.read(8) # long long while length: length, = struct.unpack('<Q', length) proto = Proto() proto.ParseFromString(proto_file.read(length)) yield proto length = proto_file.read(8)
def _tempnc(data: BinaryIO) -> Generator[str, None, None]: """Create a temporary netcdf file.""" from tempfile import NamedTemporaryFile tmp = None try: tmp = NamedTemporaryFile(suffix=".nc", prefix="erddapy_") tmp.write(data.read()) tmp.flush() yield tmp.name finally: if tmp is not None: tmp.close()
def write_certificate_to_file( certificate: Certificate, dest_file: BinaryIO, encoding: serialization.Encoding, ) -> None: """Writes a certificate to a file. Args: certificate: Certificate object to be saved to file. dest_file: BinaryIO object representing the file to be written to. encoding: The serialization format to use to save the certificate. Raises: X509CertificateError: In case the certificate cannot be saved to file. """ try: cert_bytes = serialize_certificate(certificate, encoding) dest_file.write(cert_bytes) except Exception as err: raise X509CertificateError( 'Error writing certificate to file: {}'.format(str(err)))
def _lwf_prelim(f: BinaryIO) -> Dict[str, Any]: dat = {} dat["bearng"] = struct.unpack("f", f.read(4))[0] dat["rhomx"] = struct.unpack("f", f.read(4))[0] dat["rlat"] = struct.unpack("f", f.read(4))[0] dat["rlon"] = struct.unpack("f", f.read(4))[0] dat["rrho"] = struct.unpack("f", f.read(4))[0] dat["nrps"] = struct.unpack("i", f.read(4))[0] dat["nrsgmnt"] = struct.unpack("i", f.read(4))[0] dat["nrprm"] = struct.unpack("i", f.read(4))[0] dat["nrpts"] = struct.unpack("i", f.read(4))[0] dat["nrcmp"] = struct.unpack("i", f.read(4))[0] dat["nrlwf"] = struct.unpack("i", f.read(4))[0] return dat
def _lwf_prelim(f: BinaryIO) -> Dict[str, Any]: dat = {} dat['bearng'] = struct.unpack('f', f.read(4))[0] dat['rhomx'] = struct.unpack('f', f.read(4))[0] dat['rlat'] = struct.unpack('f', f.read(4))[0] dat['rlon'] = struct.unpack('f', f.read(4))[0] dat['rrho'] = struct.unpack('f', f.read(4))[0] dat['nrps'] = struct.unpack('i', f.read(4))[0] dat['nrsgmnt'] = struct.unpack('i', f.read(4))[0] dat['nrprm'] = struct.unpack('i', f.read(4))[0] dat['nrpts'] = struct.unpack('i', f.read(4))[0] dat['nrcmp'] = struct.unpack('i', f.read(4))[0] dat['nrlwf'] = struct.unpack('i', f.read(4))[0] return dat
def from_bytestream(cls, bytestream: BinaryIO, num_length_bytes=8, block_size=None, num_padding_bytes=0): segment_length_bytes = bytestream.read(num_length_bytes) segment_length = decode_integer(segment_length_bytes) header_blocks = [] if num_padding_bytes > 0: header_blocks.append( RecBlock.from_bytestream(bytestream, length=num_padding_bytes)) content_blocks = [] for i in range(segment_length): content_blocks.append(cls.parse_block(bytestream, block_size)) return cls(header_blocks, content_blocks)
def read(self, file_input: BinaryIO, file_size: int) -> Flags: """populate header data""" first_dg = (file_input.tell() == 0) chunk = file_input.read(16) hdr_data = struct.unpack("<IBBHII", chunk) self.length = hdr_data[0] self.stx = hdr_data[1] self.id = hdr_data[2] self.model = hdr_data[3] self.date = hdr_data[4] self.time = hdr_data[5] if first_dg and self.stx != 2: if self.verbose: logger.warning("invalid Kongberg file > STX: %s" % self.stx) return self.Flags.MISSING_FIRST_STX if (self.stx != 2) or (self.id == 0): if self.verbose: logger.warning("corrupted datagram") return self.Flags.CORRUPTED_START_DATAGRAM # try to read ETX # Make sure we don't try to read beyond the EOF (-13 since 16 for header and 3 for ender) if (file_input.tell() + (self.length - 13)) >= file_size: if self.verbose: logger.warning("unexpected EOF > current pos: %s, datagram length: %s, file size: %s" % (file_input.tell(), self.length, file_size)) return self.Flags.UNEXPECTED_EOF # move file cursor to the end of the datagram file_input.seek(self.length - 15, 1) chunk = file_input.read(3) footer_data = struct.unpack("<BH", chunk) self.etx = footer_data[0] self.checksum = footer_data[1] if self.etx != 3: # print 'ETX not found, trying next datagram at position',file.tell()-(length+3) return self.Flags.CORRUPTED_END_DATAGRAM return self.Flags.VALID
def read(self, file_input: BinaryIO, file_size: int) -> Flags: """populate header data""" chunk = file_input.read(20) hdr_data = struct.unpack("<I4cBBHII", chunk) self.length = hdr_data[0] # logger.debug('length: %s' % self.length) self.id = b''.join(hdr_data[1:5]) # logger.debug('type: %s -> %s' % (self.type, self.kmall_datagrams[self.type])) self.version = hdr_data[5] # logger.debug('version: %s' % self.version) self.system_id = hdr_data[6] # logger.debug('system id: %s' % self.system_id) self.sounder_id = hdr_data[7] # logger.debug('sounder id: %s' % self.sounder_id) self.time_sec = hdr_data[8] # logger.debug('time sec: %s' % self.time_sec) self.time_nanosec = hdr_data[9] # logger.debug('time nanosec: %s' % self.time_nanosec) self.dg_time = self.kmall_datetime(self.time_sec, self.time_nanosec) # logger.debug('datetime: %s' % self.dg_time.strftime('%Y-%m-%d %H:%M:%S.%f')) # try to read ETX # Make sure we don't try to read beyond the EOF (-13 since 16 for header and 3 for ender) if (file_input.tell() + (self.length - 20)) >= file_size: if self.verbose: logger.warning("unexpected EOF > current pos: %s, datagram length: %s, file size: %s" % (file_input.tell(), self.length, file_size)) return self.Flags.UNEXPECTED_EOF # move file cursor to the end of the datagram file_input.seek(self.length - 24, 1) chunk = file_input.read(4) footer_length = struct.unpack("<I", chunk)[0] if footer_length != self.length: logger.info("datagram length mismatch: %s vs. %s" % (self.length, footer_length)) return self.Flags.CORRUPTED_END_DATAGRAM return self.Flags.VALID
def read(self, file_input: BinaryIO, file_size: int) -> Flags: """populate header data""" chunk = file_input.read(20) hdr_data = struct.unpack("<I4cBBHII", chunk) self.length = hdr_data[0] # logger.debug('length: %s' % self.length) self.type = b''.join(hdr_data[1:5]) # logger.debug('type: %s -> %s' % (self.type, self.kmall_datagrams[self.type])) self.version = hdr_data[5] # logger.debug('version: %s' % self.version) self.system_id = hdr_data[6] # logger.debug('system id: %s' % self.system_id) self.sounder_id = hdr_data[7] # logger.debug('sounder id: %s' % self.sounder_id) self.time_sec = hdr_data[8] # logger.debug('time sec: %s' % self.time_sec) self.time_microsec = hdr_data[9] # logger.debug('time microsec: %s' % self.time_microsec) self.datetime = datetime.utcfromtimestamp(self.time_sec) + timedelta( microseconds=(self.time_microsec * 10e-3)) # logger.debug('datetime: %s' % self.datetime.strftime('%Y-%m-%d %H:%M:%S.%f')) # Make sure we don't try to read beyond the EOF (-13 since 16 for header and 3 for ender) if (file_input.tell() + (self.length - 20)) > file_size: if self.verbose: logging.warning( "unexpected EOF > current pos: %s, datagram length: %s, file size: %s" % (file_input.tell(), self.length, file_size)) return self.Flags.UNEXPECTED_EOF # move file cursor to almost the end of the datagram (just minus the length field) file_input.seek(self.length - 24, 1) # 1 -> current file position chunk = file_input.read(4) footer_length = struct.unpack("<I", chunk)[0] if self.length != footer_length: logging.warning( "mismatch between initial and end datagram length: %s != %s" % (self.length, footer_length)) return self.Flags.CORRUPTED_END_DATAGRAM return self.Flags.VALID
def dump(value: TSerializable, file_handle: io.BinaryIO) -> None: """ This function dumps a python object as a tnetstring and writes it to the given file. """ file_handle.write(dumps(value))
def write_record(stream: BinaryIO, record: MarcRecord, encoding: str) -> None: """ Сохранение записи в файл в формате ISO 2709. :param stream: Поток :param record: Запись :param encoding: Кодировка :return: None """ record_length = MARKER_LENGTH dictionary_length = 1 # С учетом ограничителя справочника field_length: List[int] = [] # Сначала подсчитываем общую длину записи for field in record.fields: if field.tag <= 0 or field.tag >= 1000: # Невозможно закодировать тег поля raise Exception dictionary_length += 12 # Одна статья справочника fldlen = 0 if field.tag < 10: # В фиксированном поле не бывает подполей и индикаторов fldlen += len(field.value.encode(encoding)) else: fldlen += 2 # Индикаторы if field.value: fldlen += len(field.value.encode(encoding)) for subfield in field.subfields: code = subfield.code if code is None or ord(code) <= 32 or ord(code) >= 255: raise IrbisError('Bad code: ' + safe_str(code)) fldlen += 2 # Признак подполя и его код fldlen += len(subfield.value.encode(encoding)) fldlen += 1 # Разделитель полей if fldlen >= 10_000: # Слишком длинное поле raise Exception field_length.append(fldlen) record_length += fldlen record_length += dictionary_length # Справочник record_length += 1 # Разделитель записей if record_length >= 100_000: # Слишком длинная запись raise Exception # Приступаем к кодированию dictionary_position = MARKER_LENGTH base_address = MARKER_LENGTH + dictionary_length current_address = base_address buffer = bytearray(record_length) for i in range(base_address): buffer[i] = 32 # Заполняем пробелами encode_int(buffer, 0, 5, record_length) encode_int(buffer, 12, 5, base_address) buffer[5] = ord('n') # Record status buffer[6] = ord('a') # Record type buffer[7] = ord('m') # Bibligraphical index buffer[8] = ord('2') buffer[10] = ord('2') buffer[11] = ord('2') buffer[17] = ord(' ') # Bibliographical level buffer[18] = ord('i') # Cataloging rules buffer[19] = ord(' ') # Related record buffer[20] = ord('4') # Field length buffer[21] = ord('5') # Field offset buffer[22] = ord('0') # Кодируем конец справочника buffer[base_address - 1] = FIELD_DELIMITER # Проходим по полям for i, field in enumerate(record.fields): # Кодируем справочник encode_int(buffer, dictionary_position + 0, 3, field.tag) encode_int(buffer, dictionary_position + 3, 4, field_length[i]) encode_int(buffer, dictionary_position + 7, 5, current_address - base_address) # Кодируем поле if field.tag < 10: # В фиксированном поле не бывает подполей и индикаторов encode_str(buffer, current_address, field.value, encoding) else: # Два индикатора buffer[current_address + 0] = 32 buffer[current_address + 1] = 32 current_address += 2 # Значение поля до первого разделителя current_address = encode_str(buffer, current_address, field.value, encoding) # Подполя for subfield in field.subfields: buffer[current_address + 0] = SUBFIELD_DELIMITER buffer[current_address + 1] = ord(subfield.code) current_address += 2 current_address = encode_str(buffer, current_address, subfield.value, encoding) buffer[current_address] = FIELD_DELIMITER current_address += 1 dictionary_position += 12 # Ограничитель записи buffer[record_length - 2] = FIELD_DELIMITER buffer[record_length - 1] = RECORD_DELIMITER # Собственно записываем stream.write(buffer)
def lwf_header(f: BinaryIO) -> Dict[str, Any]: if isinstance(f, (str, Path)): fn = Path(f).expanduser().with_suffix(".lwf") with opener(fn) as f: return lwf_header(f) out = {} out["archive"] = f.read(8) out["file_id"] = [f.read(120).decode("ascii") for _ in range(3)] out["prgm_id"] = f.read(12).decode("ascii") out["case_id"] = f.read(80).decode("ascii") out["prfl_id"] = f.read(40).decode("ascii") out["xmtr_id"] = f.read(20).decode("ascii") out["freq"] = struct.unpack("f", f.read(4))[0] out["txlat"] = struct.unpack("f", f.read(4))[0] out["txlon"] = struct.unpack("f", f.read(4))[0] out["path_id"] = f.read(20).decode("ascii") out["oplat1"] = struct.unpack("f", f.read(4))[0] out["oplon1"] = struct.unpack("f", f.read(4))[0] out["oplat2"] = struct.unpack("f", f.read(4))[0] out["oplon2"] = struct.unpack("f", f.read(4))[0] nrpath = struct.unpack("i", f.read(4))[0] out["bearing"] = np.empty(nrpath) out["rhomax"] = np.empty(nrpath) out["rxlat"] = np.empty(nrpath) out["rxlon"] = np.empty(nrpath) for i in range(nrpath): out["bearing"][i] = struct.unpack("f", f.read(4))[0] out["rhomax"][i] = struct.unpack("f", f.read(4))[0] out["rxlat"][i] = struct.unpack("f", f.read(4))[0] out["rxlon"][i] = struct.unpack("f", f.read(4))[0] return out
def fromFile(cls, file: BinaryIO): instance = cls(file.read()) file.close() return instance
def write(self, content, fp: BinaryIO): """Write `content` into a file-like object. Content should be a barcode rendered by this writer. """ fp.write(content)
def lwf_header(f: BinaryIO) -> Dict[str, Any]: if isinstance(f, (str, Path)): fn = Path(f).expanduser().with_suffix('.lwf') with opener(fn) as f: return lwf_header(f) out = {} out['archive'] = f.read(8) out['file_id'] = [f.read(120).decode('ascii') for _ in range(3)] out['prgm_id'] = f.read(12).decode('ascii') out['case_id'] = f.read(80).decode('ascii') out['prfl_id'] = f.read(40).decode('ascii') out['xmtr_id'] = f.read(20).decode('ascii') out['freq'] = struct.unpack('f', f.read(4))[0] out['txlat'] = struct.unpack('f', f.read(4))[0] out['txlon'] = struct.unpack('f', f.read(4))[0] out['path_id'] = f.read(20).decode('ascii') out['oplat1'] = struct.unpack('f', f.read(4))[0] out['oplon1'] = struct.unpack('f', f.read(4))[0] out['oplat2'] = struct.unpack('f', f.read(4))[0] out['oplon2'] = struct.unpack('f', f.read(4))[0] nrpath = struct.unpack('i', f.read(4))[0] out['bearing'] = np.empty(nrpath) out['rhomax'] = np.empty(nrpath) out['rxlat'] = np.empty(nrpath) out['rxlon'] = np.empty(nrpath) for i in range(nrpath): out['bearing'][i] = struct.unpack('f', f.read(4))[0] out['rhomax'][i] = struct.unpack('f', f.read(4))[0] out['rxlat'][i] = struct.unpack('f', f.read(4))[0] out['rxlon'][i] = struct.unpack('f', f.read(4))[0] return out
def write_files(self, f: BinaryIO) -> None: """Writes file data to f. Args: f (BinaryIO): file-like output stream """ # TODO: name bstring Full path and name of the file. Only present if Bit 9 of archiveFlags is set. self.i = 0 total = self.files_count for folder in self.sorted_folders: for file in folder.sorted_files: p = f"{file.folder.name}\\{file.name}" # logging.info(f"Writing {p:100s}[{(i * 100) / total:2.2f}%]") data_start = f.tell() with open(os.path.join(self.data_path, folder.name, file.name), "rb") as o: if not self.compress: f.write(o.read()) else: uncompressed_data = o.read() compressed_data = lz4.frame.compress(uncompressed_data, compression_level=self.compression_level) f.write(struct.pack("<L", len(uncompressed_data))) f.write(compressed_data) size = f.tell() - data_start f.seek(file.record_offset + 8) f.write(struct.pack("<LL", size + (4 if self.compress else 0), data_start)) f.seek(0, os.SEEK_END) self.i += 1
def read_record(stream: BinaryIO, charset=ANSI) -> Optional[MarcRecord]: """ Чтение записи из файла в формате ISO 2709. :param stream: Файл или файлоподобный объект :param charset: Кодировка :return: Декодированная запись либо None """ # Считываем длину записи marker = stream.read(5) if len(marker) != 5: return None # а затем и ее остаток record_length = parse_int(marker) need = record_length - 5 tail = stream.read(need) if len(tail) != need: return None # Простая проверка, что мы имеем дело с нормальной ISO-записью record = marker + tail if record[record_length - 1] != RECORD_DELIMITER: return None # Превращаем запись в Unicode indicator_length = parse_int(record[10:11]) base_address = parse_int(record[12:17]) # Начинаем собственно конверсию result = MarcRecord() # Пошли по полям при помощи справочника directory = MARKER_LENGTH while record[directory] != FIELD_DELIMITER: # если нарвались на разделитель, значит, справочник закончился tag = parse_int(record[directory:directory + 3]) field_length = parse_int(record[directory + 3:directory + 7]) field_offset = parse_int( record[directory + 7:directory + 12]) + base_address field = RecordField(tag) result.fields.append(field) if tag < 10: # фиксированное поле # не может содержать подполей и индикаторов field.value = record[field_offset:field_offset + field_length - 1].decode(charset) else: # поле переменной длины # содержит два однобайтных индикатора # может содержать подполя start = field_offset + indicator_length stop = field_offset + field_length - indicator_length + 1 position = start # ищем значение поля до первого разделителя while position < stop: if record[start] == SUBFIELD_DELIMITER: break position += 1 # если есть текст до первого раздлителя, запоминаем его if position != start: field.value = record[start:position].decode(charset) # просматриваем подполя start = position while start < stop: position = start + 1 while position < stop: if record[position] == SUBFIELD_DELIMITER: break position += 1 subfield = SubField(chr(record[start + 1]), record[start + 2:position].decode(charset)) field.subfields.append(subfield) start = position # переходим к следующему полю в справочнике directory += 12 return result
def write_file_records(self, f: BinaryIO) -> None: """Writes the file records block to f Args: f (BinaryIO): file-like output stream """ logging.debug("Writing file records") for folder in self.sorted_folders: logging.debug(f"Processing file records for folder {folder.name}") offset = f.tell() f.seek(folder.record_offset + 8 + 4 + 4) f.write(struct.pack("<Q", offset + self.total_file_name_length)) f.seek(0, os.SEEK_END) if (self.flags & Flags.INCLUDE_DIRECTORY_NAMES) > 0: f.write(pack_str(folder.name)) logging.debug(f"Sorted files in {folder.name}: {[x.tes_hash for x in folder.sorted_files]}") for file in folder.sorted_files: file.record_offset = f.tell() f.write( struct.pack( "<QLL", file.tes_hash, 0, 0 ) )