def process(self, data): index: Sequence[int] = self.args.index image: Sequence[int] = self.args.image if not self.bytestream: if isbuffer(index): self.log_info( F'chunking index sequence into blocks of size {self.args.blocksize}' ) index = list(self.chunk(index)) self.log_debug(F'index sequence: {index}') if isbuffer(image): self.log_info( F'chunking image sequence into blocks of size {self.args.blocksize}' ) image = list(self.chunk(image)) self.log_debug(F'image sequence: {image}') if len(set(index)) != len(index): raise ValueError('The index sequence contains duplicates.') if len(index) > len(image): raise ValueError( 'The index sequence is longer than the image sequence.') if self.bytestream: mapping = dict(zip(index, image)) mapping = bytes(mapping.get(c, c) for c in range(0x100)) if not isinstance(data, bytearray): data = bytearray(data) data[:] = (mapping[b] for b in data) return data try: self._map = dict(zip(index, image)) return super().process(data) finally: self._map = None
def _peekmeta(self, linewidth, sep, _x_peek=None, **meta) -> Generator[str, None, None]: if not meta and not _x_peek: return width = max((len(name) for name in meta), default=0) separators = iter([sep]) if _x_peek is not None: yield from separators yield _x_peek for name in sorted(meta): value = meta[name] if value is None: continue if isinstance(value, CustomStringRepresentation): value = repr(value).strip() elif isbuffer(value): value = repr(ByteStringWrapper(value)) elif isinstance(value, int): value = F'0x{value:X}' elif isinstance(value, float): value = F'{value:.4f}' metavar = F'{name:>{width+2}} = {value!s}' if len(metavar) > linewidth: metavar = metavar[:linewidth - 3] + '...' yield from separators yield metavar
def filter(self, chunks: Iterable[Chunk]): src = self.args.src dst = self.args.dst for chunk in chunks: if not chunk.visible: pass elif dst is None: try: value = chunk.meta[src] except KeyError: value = bytearray() if isinstance(value, str): value = value.encode(self.codec) elif not isbuffer(value): raise ValueError( F'Unable to swap data with variable {src} because it has type {type(value).__name__}.' ) if not chunk: del chunk.meta[src] else: chunk.meta[src] = bytes(chunk) chunk[:] = value else: try: value = chunk.meta.pop(src) except KeyError: raise KeyError(F'The variable {src} does not exist.') try: swap = chunk.meta.pop(dst) except KeyError: chunk.meta[dst] = value else: chunk.meta[src], chunk.meta[dst] = swap, value yield chunk
def _get_parts_outlook(self, data): def ensure_bytes(data): return data if isinstance(data, bytes) else data.encode(self.codec) def make_message(name, msg): with NoLogging(): try: htm = msg.htmlBody except Exception: htm = None try: txt = msg.body except Exception: txt = None if txt: yield UnpackResult(F'{name}.txt', ensure_bytes(txt)) if htm: yield UnpackResult(F'{name}.htm', ensure_bytes(htm)) msgcount = 0 with NoLogging(): msg = self._extract_msg.Message(bytes(data)) yield from self._get_headparts(msg.header.items()) yield from make_message('body', msg) def attachments(msg): for attachment in getattr(msg, 'attachments', ()): yield attachment if attachment.type == 'data': continue yield from attachments(attachment.data) for attachment in attachments(msg): self.log_debug(attachment) if attachment.type == 'msg': msgcount += 1 yield from make_message(F'attachments/msg_{msgcount:d}', attachment.data) continue if not isbuffer(attachment.data): self.log_warn(F'unknown attachment of type {attachment.type}, please report this!') continue path = attachment.longFilename or attachment.shortFilename yield UnpackResult(F'attachments/{path}', attachment.data)
def process(self, data): value = self.args.value if value is None: value = data if not isinstance(value, (int, float)) and not isbuffer(value): try: len(value) except TypeError: if isinstance(value, itertools.repeat): value = next(value) if not isinstance(value, (int, float)): raise NotImplementedError( F'put does not support {value.__class__.__name__} values.' ) else: if not isinstance(value, list): value = list(value) self.log_debug(F'storing {type(value).__name__}:', value) return self.labelled(data, **{self.args.name: value})
def unpack(self, data): try: managed = NetStructuredResources(data) except NoManagedResource: managed = None if not managed: raise RefineryPartialResult('no managed resources found', partial=data) for entry in managed: if entry.Error: self.log_warn( F'entry {entry.Name} carried error message: {entry.Error}') data = entry.Data if not self.args.raw: if isinstance(entry.Value, str): data = entry.Value.encode('utf-16le') elif isbuffer(entry.Value): data = entry.Value yield UnpackResult(entry.Name, data)
def _get_parts_regular(self, data): if not re.match(BR'^[\s!-~]+$', data): raise ValueError('This is not a plaintext email message.') msg = BytesParser().parsebytes(data) yield from self._get_headparts(msg.items()) for k, part in enumerate(msg.walk()): path = part.get_filename() elog = None if path is None: extension = file_extension(part.get_content_type(), 'txt') path = F'body.{extension}' else: path = F'attachments/{path}' try: data = part.get_payload(decode=True) except Exception as E: try: data = part.get_payload(decode=False) except Exception as E: elog = str(E) data = None else: from refinery import carve self.log_warn(F'manually decoding part {k}, data might be corrupted: {path}') if isinstance(data, str): data = data.encode('latin1') if isbuffer(data): data = next(data | carve('b64', stripspace=True, single=True, decode=True)) else: elog = str(E) data = None if not data: if elog is not None: self.log_warn(F'could not get content of message part {k}: {elog!s}') continue yield UnpackResult(path, data)
def __init__(self, string: Union[str, ByteString], codec: Optional[str] = None): if isinstance(string, str): self._string = string self._buffer = False codec = codec or 'utf8' string = string.encode(codec) elif isbuffer(string): self._string = None self._buffer = True else: raise TypeError( F'The argument {string!r} is not a buffer or string.') super().__init__(string) if codec is not None: nc = codecs.lookup(codec).name if nc not in self._CODECS: raise ValueError(F'The codec {nc} is not a supported codec.') codec = nc self.codec = codec
def format(self, spec: str, codec: str, args: Union[list, tuple], symb: dict, binary: bool, fixup: bool = True, used: Optional[set] = None, escaped: bool = False) -> Union[str, ByteString]: """ Formats a string using Python-like string fomatting syntax. The formatter for `binary` mode is different; each formatting is documented in one of the following two proxy methods: - `refinery.lib.meta.LazyMetaOracle.format_str` - `refinery.lib.meta.LazyMetaOracle.format_bin` """ from refinery.lib.argformats import multibin, ParserError, PythonExpression # prevents circular import symb = symb or {} if used is None: class dummy: def add(self, _): pass used = dummy() if args is None: args = () elif not isinstance(args, (list, tuple)): args = list(args) if fixup: for (store, it) in ( (args, enumerate(args)), (self, self.items()), (symb, symb.items()), ): for key, value in it: with contextlib.suppress(TypeError): if isinstance(value, CustomStringRepresentation): continue store[key] = ByteStringWrapper(value, codec) formatter = string.Formatter() autoindex = 0 if binary: stream = MemoryFile() def putstr(s: str): stream.write(s.encode(codec)) else: stream = StringIO() putstr = stream.write with stream: for prefix, field, modifier, conversion in formatter.parse(spec): output = value = None if prefix: if binary: prefix = prefix.encode(codec) elif escaped: prefix = prefix.encode('raw-unicode-escape').decode( 'unicode-escape') stream.write(prefix) if field is None: continue if not field: if not args: raise LookupError( 'no positional arguments given to formatter') value = args[autoindex] used.add(autoindex) if autoindex < len(args) - 1: autoindex += 1 if binary and conversion: conversion = conversion.lower() if conversion == 'h': value = bytes.fromhex(field) elif conversion == 'q': value = unquote_to_bytes(field) elif conversion == 's': value = field.encode(codec) elif conversion == 'u': value = field.encode('utf-16le') elif conversion == 'a': value = field.encode('latin1') elif conversion == 'e': value = field.encode(codec).decode( 'unicode-escape').encode('latin1') elif field in symb: value = symb[field] used.add(field) if value is None: with contextlib.suppress(ValueError, IndexError): index = int(field, 0) value = args[index] used.add(index) if value is None: with contextlib.suppress(KeyError): value = self[field] used.add(field) if value is None: try: expression = PythonExpression(field, *self, *symb) value = expression(self, **symb) except ParserError: if not self.ghost: raise KeyError(field) putstr(F'{{{field}') if conversion: putstr(F'!{conversion}') if modifier: putstr(F':{modifier}') putstr('}') continue if binary: modifier = modifier.strip() if modifier: expression = self.format(modifier, codec, args, symb, True, False, used) output = multibin(expression.decode(codec), reverse=True, seed=value) elif isbuffer(value): output = value elif not isinstance(value, int): with contextlib.suppress(TypeError): output = bytes(value) if output is None: converter = { 'a': ascii, 's': str, 'r': repr, 'H': lambda b: b.hex().upper(), 'h': lambda b: b.hex(), 'u': lambda b: b.decode('utf-16le'), 'e': lambda b: repr(bytes(b)).lstrip('bBrR')[1:-1], 'q': lambda b: quote_from_bytes(bytes(b)) }.get(conversion) if converter: output = converter(value) elif modifier: output = value elif isinstance(value, CustomStringRepresentation): output = str(value) elif isbuffer(value): output = value.decode('utf8', errors='replace') else: output = value output = output.__format__(modifier) if binary: output = output.encode(codec) stream.write(output) return stream.getvalue()
def default(self, obj): if isbuffer(obj): return {'_bin': base64.b85encode(obj).decode('ascii')} return super().default(obj)