Beispiel #1
0
 def process(self, data):
     index: Sequence[int] = self.args.index
     image: Sequence[int] = self.args.image
     if not self.bytestream:
         if isbuffer(index):
             self.log_info(
                 F'chunking index sequence into blocks of size {self.args.blocksize}'
             )
             index = list(self.chunk(index))
             self.log_debug(F'index sequence: {index}')
         if isbuffer(image):
             self.log_info(
                 F'chunking image sequence into blocks of size {self.args.blocksize}'
             )
             image = list(self.chunk(image))
             self.log_debug(F'image sequence: {image}')
     if len(set(index)) != len(index):
         raise ValueError('The index sequence contains duplicates.')
     if len(index) > len(image):
         raise ValueError(
             'The index sequence is longer than the image sequence.')
     if self.bytestream:
         mapping = dict(zip(index, image))
         mapping = bytes(mapping.get(c, c) for c in range(0x100))
         if not isinstance(data, bytearray):
             data = bytearray(data)
         data[:] = (mapping[b] for b in data)
         return data
     try:
         self._map = dict(zip(index, image))
         return super().process(data)
     finally:
         self._map = None
Beispiel #2
0
 def _peekmeta(self,
               linewidth,
               sep,
               _x_peek=None,
               **meta) -> Generator[str, None, None]:
     if not meta and not _x_peek:
         return
     width = max((len(name) for name in meta), default=0)
     separators = iter([sep])
     if _x_peek is not None:
         yield from separators
         yield _x_peek
     for name in sorted(meta):
         value = meta[name]
         if value is None:
             continue
         if isinstance(value, CustomStringRepresentation):
             value = repr(value).strip()
         elif isbuffer(value):
             value = repr(ByteStringWrapper(value))
         elif isinstance(value, int):
             value = F'0x{value:X}'
         elif isinstance(value, float):
             value = F'{value:.4f}'
         metavar = F'{name:>{width+2}} = {value!s}'
         if len(metavar) > linewidth:
             metavar = metavar[:linewidth - 3] + '...'
         yield from separators
         yield metavar
Beispiel #3
0
 def filter(self, chunks: Iterable[Chunk]):
     src = self.args.src
     dst = self.args.dst
     for chunk in chunks:
         if not chunk.visible:
             pass
         elif dst is None:
             try:
                 value = chunk.meta[src]
             except KeyError:
                 value = bytearray()
             if isinstance(value, str):
                 value = value.encode(self.codec)
             elif not isbuffer(value):
                 raise ValueError(
                     F'Unable to swap data with variable {src} because it has type {type(value).__name__}.'
                 )
             if not chunk:
                 del chunk.meta[src]
             else:
                 chunk.meta[src] = bytes(chunk)
             chunk[:] = value
         else:
             try:
                 value = chunk.meta.pop(src)
             except KeyError:
                 raise KeyError(F'The variable {src} does not exist.')
             try:
                 swap = chunk.meta.pop(dst)
             except KeyError:
                 chunk.meta[dst] = value
             else:
                 chunk.meta[src], chunk.meta[dst] = swap, value
         yield chunk
Beispiel #4
0
    def _get_parts_outlook(self, data):
        def ensure_bytes(data):
            return data if isinstance(data, bytes) else data.encode(self.codec)

        def make_message(name, msg):
            with NoLogging():
                try:
                    htm = msg.htmlBody
                except Exception:
                    htm = None
                try:
                    txt = msg.body
                except Exception:
                    txt = None
            if txt:
                yield UnpackResult(F'{name}.txt', ensure_bytes(txt))
            if htm:
                yield UnpackResult(F'{name}.htm', ensure_bytes(htm))

        msgcount = 0

        with NoLogging():
            msg = self._extract_msg.Message(bytes(data))

        yield from self._get_headparts(msg.header.items())
        yield from make_message('body', msg)

        def attachments(msg):
            for attachment in getattr(msg, 'attachments', ()):
                yield attachment
                if attachment.type == 'data':
                    continue
                yield from attachments(attachment.data)

        for attachment in attachments(msg):
            self.log_debug(attachment)
            if attachment.type == 'msg':
                msgcount += 1
                yield from make_message(F'attachments/msg_{msgcount:d}', attachment.data)
                continue
            if not isbuffer(attachment.data):
                self.log_warn(F'unknown attachment of type {attachment.type}, please report this!')
                continue
            path = attachment.longFilename or attachment.shortFilename
            yield UnpackResult(F'attachments/{path}', attachment.data)
Beispiel #5
0
 def process(self, data):
     value = self.args.value
     if value is None:
         value = data
     if not isinstance(value, (int, float)) and not isbuffer(value):
         try:
             len(value)
         except TypeError:
             if isinstance(value, itertools.repeat):
                 value = next(value)
             if not isinstance(value, (int, float)):
                 raise NotImplementedError(
                     F'put does not support {value.__class__.__name__} values.'
                 )
         else:
             if not isinstance(value, list):
                 value = list(value)
     self.log_debug(F'storing {type(value).__name__}:', value)
     return self.labelled(data, **{self.args.name: value})
Beispiel #6
0
 def unpack(self, data):
     try:
         managed = NetStructuredResources(data)
     except NoManagedResource:
         managed = None
     if not managed:
         raise RefineryPartialResult('no managed resources found',
                                     partial=data)
     for entry in managed:
         if entry.Error:
             self.log_warn(
                 F'entry {entry.Name} carried error message: {entry.Error}')
         data = entry.Data
         if not self.args.raw:
             if isinstance(entry.Value, str):
                 data = entry.Value.encode('utf-16le')
             elif isbuffer(entry.Value):
                 data = entry.Value
         yield UnpackResult(entry.Name, data)
Beispiel #7
0
    def _get_parts_regular(self, data):
        if not re.match(BR'^[\s!-~]+$', data):
            raise ValueError('This is not a plaintext email message.')

        msg = BytesParser().parsebytes(data)

        yield from self._get_headparts(msg.items())

        for k, part in enumerate(msg.walk()):
            path = part.get_filename()
            elog = None
            if path is None:
                extension = file_extension(part.get_content_type(), 'txt')
                path = F'body.{extension}'
            else:
                path = F'attachments/{path}'
            try:
                data = part.get_payload(decode=True)
            except Exception as E:
                try:
                    data = part.get_payload(decode=False)
                except Exception as E:
                    elog = str(E)
                    data = None
                else:
                    from refinery import carve
                    self.log_warn(F'manually decoding part {k}, data might be corrupted: {path}')
                    if isinstance(data, str):
                        data = data.encode('latin1')
                    if isbuffer(data):
                        data = next(data | carve('b64', stripspace=True, single=True, decode=True))
                    else:
                        elog = str(E)
                        data = None
            if not data:
                if elog is not None:
                    self.log_warn(F'could not get content of message part {k}: {elog!s}')
                continue
            yield UnpackResult(path, data)
Beispiel #8
0
    def __init__(self,
                 string: Union[str, ByteString],
                 codec: Optional[str] = None):
        if isinstance(string, str):
            self._string = string
            self._buffer = False
            codec = codec or 'utf8'
            string = string.encode(codec)
        elif isbuffer(string):
            self._string = None
            self._buffer = True
        else:
            raise TypeError(
                F'The argument {string!r} is not a buffer or string.')

        super().__init__(string)

        if codec is not None:
            nc = codecs.lookup(codec).name
            if nc not in self._CODECS:
                raise ValueError(F'The codec {nc} is not a supported codec.')
            codec = nc

        self.codec = codec
Beispiel #9
0
    def format(self,
               spec: str,
               codec: str,
               args: Union[list, tuple],
               symb: dict,
               binary: bool,
               fixup: bool = True,
               used: Optional[set] = None,
               escaped: bool = False) -> Union[str, ByteString]:
        """
        Formats a string using Python-like string fomatting syntax. The formatter for `binary`
        mode is different; each formatting is documented in one of the following two proxy methods:

        - `refinery.lib.meta.LazyMetaOracle.format_str`
        - `refinery.lib.meta.LazyMetaOracle.format_bin`
        """
        from refinery.lib.argformats import multibin, ParserError, PythonExpression
        # prevents circular import

        symb = symb or {}

        if used is None:

            class dummy:
                def add(self, _):
                    pass

            used = dummy()

        if args is None:
            args = ()
        elif not isinstance(args, (list, tuple)):
            args = list(args)

        if fixup:
            for (store, it) in (
                (args, enumerate(args)),
                (self, self.items()),
                (symb, symb.items()),
            ):
                for key, value in it:
                    with contextlib.suppress(TypeError):
                        if isinstance(value, CustomStringRepresentation):
                            continue
                        store[key] = ByteStringWrapper(value, codec)

        formatter = string.Formatter()
        autoindex = 0

        if binary:
            stream = MemoryFile()

            def putstr(s: str):
                stream.write(s.encode(codec))
        else:
            stream = StringIO()
            putstr = stream.write

        with stream:
            for prefix, field, modifier, conversion in formatter.parse(spec):
                output = value = None
                if prefix:
                    if binary:
                        prefix = prefix.encode(codec)
                    elif escaped:
                        prefix = prefix.encode('raw-unicode-escape').decode(
                            'unicode-escape')
                    stream.write(prefix)
                if field is None:
                    continue
                if not field:
                    if not args:
                        raise LookupError(
                            'no positional arguments given to formatter')
                    value = args[autoindex]
                    used.add(autoindex)
                    if autoindex < len(args) - 1:
                        autoindex += 1
                if binary and conversion:
                    conversion = conversion.lower()
                    if conversion == 'h':
                        value = bytes.fromhex(field)
                    elif conversion == 'q':
                        value = unquote_to_bytes(field)
                    elif conversion == 's':
                        value = field.encode(codec)
                    elif conversion == 'u':
                        value = field.encode('utf-16le')
                    elif conversion == 'a':
                        value = field.encode('latin1')
                    elif conversion == 'e':
                        value = field.encode(codec).decode(
                            'unicode-escape').encode('latin1')
                elif field in symb:
                    value = symb[field]
                    used.add(field)
                if value is None:
                    with contextlib.suppress(ValueError, IndexError):
                        index = int(field, 0)
                        value = args[index]
                        used.add(index)
                if value is None:
                    with contextlib.suppress(KeyError):
                        value = self[field]
                        used.add(field)
                if value is None:
                    try:
                        expression = PythonExpression(field, *self, *symb)
                        value = expression(self, **symb)
                    except ParserError:
                        if not self.ghost:
                            raise KeyError(field)
                        putstr(F'{{{field}')
                        if conversion:
                            putstr(F'!{conversion}')
                        if modifier:
                            putstr(F':{modifier}')
                        putstr('}')
                        continue
                if binary:
                    modifier = modifier.strip()
                    if modifier:
                        expression = self.format(modifier, codec, args, symb,
                                                 True, False, used)
                        output = multibin(expression.decode(codec),
                                          reverse=True,
                                          seed=value)
                    elif isbuffer(value):
                        output = value
                    elif not isinstance(value, int):
                        with contextlib.suppress(TypeError):
                            output = bytes(value)
                if output is None:
                    converter = {
                        'a': ascii,
                        's': str,
                        'r': repr,
                        'H': lambda b: b.hex().upper(),
                        'h': lambda b: b.hex(),
                        'u': lambda b: b.decode('utf-16le'),
                        'e': lambda b: repr(bytes(b)).lstrip('bBrR')[1:-1],
                        'q': lambda b: quote_from_bytes(bytes(b))
                    }.get(conversion)
                    if converter:
                        output = converter(value)
                    elif modifier:
                        output = value
                    elif isinstance(value, CustomStringRepresentation):
                        output = str(value)
                    elif isbuffer(value):
                        output = value.decode('utf8', errors='replace')
                    else:
                        output = value
                    output = output.__format__(modifier)
                    if binary:
                        output = output.encode(codec)
                stream.write(output)
            return stream.getvalue()
Beispiel #10
0
 def default(self, obj):
     if isbuffer(obj):
         return {'_bin': base64.b85encode(obj).decode('ascii')}
     return super().default(obj)