def extract(info=info, path=path): if info: buffer = MemoryFile(bytearray(info.data_length)) else: buffer = MemoryFile(bytearray()) facade.get_file_from_iso_fp(buffer, path) return buffer.getvalue()
def reverse(self, data): meta = metavars(data) path = meta.get('path', None) name = path and pathlib.Path(path).name with MemoryFile(data) as stream: with MemoryFile() as output: uu.encode(stream, output, name, backtick=True) return output.getvalue()
def _generate_bytes(self, data: ByteString): if not self.squeeze: yield from self.action(data) return buffer = MemoryFile(bytearray()) for item in self.action(data): buffer.write(item) yield buffer.getbuffer()
def decompile_buffer(buffer: ByteString, file_name: str) -> ByteString: code_objects = {} sys_stderr = sys.stderr sys.stderr = open(os.devnull, 'w') try: version, timestamp, magic_int, codez, is_pypy, _, _ = \ xtpyi._xdis.load.load_module_from_file_object(MemoryFile(buffer), file_name, code_objects) finally: sys.stderr.close() sys.stderr = sys_stderr if not isinstance(codez, list): codez = [codez] errors = '' python = '' for code in codez: for name, engine in { 'decompyle3': xtpyi._decompyle3, 'uncompyle6': xtpyi._uncompyle6, }.items(): with io.StringIO(newline='') as output, NoLogging( NoLogging.Mode.ALL): try: engine.main.decompile( version, code, output, timestamp=timestamp, code_objects=code_objects, is_pypy=is_pypy, magic_int=magic_int, ) except Exception as E: errors += '\n'.join( F'# {line}' for line in (F'Error while decompiling with {name}:', *str(E).splitlines(True))) errors += '\n' else: python = output.getvalue() break if python: return python.encode(xtpyi.codec) embedded = bytes(buffer | carve('printable', single=True)) if len(buffer) - len(embedded) < 0x20: return embedded disassembly = MemoryFile() with io.TextIOWrapper(disassembly, xtpyi.codec, newline='\n') as output: output.write(errors) output.write('# Generating Disassembly:\n\n') for code in codez: instructions = list(xtpyi._xdis.std.Bytecode(code)) width_offset = max(len(str(i.offset)) for i in instructions) for i in instructions: opname = i.opname.replace('_', '.').lower() offset = F'{i.offset:0{width_offset}d}' output.write(F'# {offset:>5} {opname:<25} {i.argrepr}\n') output.write('\n') return disassembly.getbuffer()
def open(self, mode: str) -> MemoryFile: """ Open the virtual file. """ if self.data is None and 'w' in mode: self.data = bytearray() fd = MemoryFile(self.data, read_as_bytes=True, fileno=self.node) fd.name = self.path return fd
def _decompress_mszip(self, reader: StructReader, writer: MemoryFile, target: Optional[int] = None): header = bytes(reader.read(2)) if header != B'CK': raise ValueError( F'chunk did not begin with CK header, got {header!r} instead') decompress = zlib.decompressobj(-zlib.MAX_WBITS, zdict=writer.getbuffer()) writer.write(decompress.decompress(reader.read())) writer.write(decompress.flush())
def process(self, data): password: bytes = self.args.password with MemoryFile(data) as stream: doc = self._msoffcrypto.OfficeFile(stream) if not doc.is_encrypted(): self.log_warn('the document is not encrypted; returning input') return data if password: doc.load_key(password=password.decode(self.codec)) with MemoryFile(bytearray()) as output: doc.decrypt(output) return output.getvalue()
def _unpack_hive(self, data: bytearray): try: with MemoryFile(data) as stream: root = self._registry.Registry.Registry(stream).root() yield from self._walk(root, root.name()) except self._registry.RegistryParse.ParseException: raise ParseException
def unpack(self, data): if not self.handles(data): self.log_warn('The data does not look like an ISO file.') with MemoryFile(data, read_as_bytes=True) as stream: iso = self._pycdlib.PyCdlib() iso.open_fp(stream) fs = self.args.fs if fs != 'auto': mkfacade = { 'iso': iso.get_iso9660_facade, 'udf': iso.get_udf_facade, 'joliet': iso.get_joliet_facade, 'rr': iso.get_rock_ridge_facade, } facade = mkfacade[fs]() elif iso.has_udf(): self.log_info('using format: udf') facade = iso.get_udf_facade() elif iso.has_joliet(): self.log_info('using format: joliet') facade = iso.get_joliet_facade() elif iso.has_rock_ridge(): self.log_info('using format: rr') facade = iso.get_rock_ridge_facade() else: self.log_info('using format: iso') facade = iso.get_iso9660_facade() for root, _, files in facade.walk('/'): root = root.rstrip('/') for name in files: name = name.lstrip('/') path = F'{root}/{name}' try: info = facade.get_record(path) date = info.date except Exception: info = None date = None else: date = datetime.datetime( date.years_since_1900 + 1900, date.month, date.day_of_month, date.hour, date.minute, date.second, tzinfo=datetime.timezone( datetime.timedelta(minutes=15 * date.gmtoffset))) def extract(info=info, path=path): if info: buffer = MemoryFile(bytearray(info.data_length)) else: buffer = MemoryFile(bytearray()) facade.get_file_from_iso_fp(buffer, path) return buffer.getvalue() yield self._pack(self._strip_revision(path), date, extract)
def process(self, data): image = self._image.open(MemoryFile(data)) width, height = image.size for y in range(height): yield bytearray( image.getpixel((x, y))[p] for x in range(width) for p in self.args.parts)
def process(self, data: bytearray): with MemoryFile() as output, StructReader(data) as reader: if reader.read(2) != B'JC': self.log_warn( 'data does not begin with magic sequence, assuming that header is missing' ) reader.seek(0) size = checksum = None else: size = reader.u32() checksum = reader.u32() if self.args.ignore_header: size = None self._decompress(output, reader, size) if size is not None: if len(output) > size: self.log_info(F'tuncating to size {size}') output.truncate(size) elif len(output) < size: self.log_warn( F'header size was {size}, but only {len(data)} bytes were decompressed' ) data = output.getvalue() if checksum: c = self._checksum(data) if c != checksum: self.log_warn( F'header checksum was {checksum:08X}, computed value is {c:08X}' ) return data
def process(self, data): parsed = self._LnkParse3.lnk_file(MemoryFile(data)).get_json() with JSONEncoderEx as encoder: pp = ppjson(tabular=self.args.tabular) yield from pp._pretty_output(parsed, indent=4, cls=encoder, ensure_ascii=False)
def mmap(self, length: int = 0, offset: int = 0) -> MemoryFile: """ Emulate the result of an `mmap` call to the virtual file. """ view = memoryview(self.data) if length: view = view[offset:offset + length] fd = MemoryFile(view, read_as_bytes=True, fileno=self.node) return fd
def test_memoryfile(self): buffer = bytearray() data = [ B"Slumber, watcher, till the spheres" B"\n", B"Six and twenty thousand years" B"\n", B"Have revolv'd, and I return" B"\n", B"To the spot where now I burn." B"\n", B"Other stars anon shall rise" B"\n", B"To the axis of the skies;" B"\n", B"Stars that soothe and stars that bless" B"\n", B"With a sweet forgetfulness:" B"\n", B"Only when my round is o'er" B"\n", B"Shall the past disturb thy door." B"\n", ] with MemoryFile(buffer) as mem: self.assertTrue(mem.writable()) self.assertTrue(mem.seekable()) self.assertTrue(mem.readable()) self.assertFalse(mem.isatty()) mem.writelines(data) self.assertRaises(ValueError, lambda: mem.truncate(-7)) self.assertRaises(OSError, mem.fileno) mem.seek(0) self.assertEqual(mem.tell(), 0) mem.seekrel(9) self.assertEqual(mem.tell(), 9) self.assertEqual(mem.read(7), B'watcher') self.assertTrue(mem.readline().endswith(B'spheres\n')) self.assertSequenceEqual(list(mem.readlines()), data[1:]) mem.seek(0, io.SEEK_END) self.assertEqual(mem.tell(), len(mem.getbuffer())) mem.seekrel(-7) tmp = bytearray(10) self.assertLessEqual(mem.readinto(tmp), 10) self.assertIn(B'door', tmp) mem.seek(7) self.assertEqual(10, mem.readinto(tmp)) self.assertEqual(tmp, data[0][7:17]) mem.seek(0) self.assertSequenceEqual(list(mem), data) self.assertTrue(mem.eof) mem.close() self.assertFalse(mem.writable()) self.assertFalse(mem.readable()) self.assertFalse(mem.seekable()) self.assertTrue(mem.closed)
def unpack(self, data): ace = acefile.open(MemoryFile(data, read_as_bytes=True)) for member in ace.getmembers(): member: acefile.AceMember comment = {} if not member.comment else {'comment': member.comment} yield self._pack( member.filename, member.datetime, lambda a=ace, m=member: a.read(m, pwd=self.args.pwd), **comment)
def test_memoryfile_bytes(self): buffers = [B'Binary Refinery'] buffers.append(memoryview(buffers[0])) if hasattr(memoryview, 'toreadonly'): # Python 3.8 addition buffers.append(memoryview(bytearray(buffers[0])).toreadonly()) for b in buffers: with MemoryFile(b) as mem: self.assertFalse(mem.writable()) self.assertTrue(mem.readable()) with self.assertRaises(OSError): mem.write(B'Unicode') self.assertEqual(mem.read(6), B'Binary')
def _get_angry_blocks(self, data) -> Dict[int, _BasicBlock]: self.log_debug('loading angr project') class TheFastArch(self._angr_arch): def get_register_by_name(self, reg_name): try: rmap = self.rmap except AttributeError: self.rmap = rmap = {} for register in self.register_list: rmap[register.name] = register for alias in register.alias_names: rmap[alias] = register return rmap[reg_name] with NoLogging(): pr = self._angr.project.Project( MemoryFile(data), default_analysis_mode='static', auto_load_libs=False, main_opts=dict( backend='blob', arch=TheFastArch, entry_point=0, base_addr=0, ), ) self.log_debug('computing control flow graph') with NoLogging(): cfg: CFGFast = pr.analyses.CFGFast() cfg.normalize() functions: List[Function] = list(cfg.functions.values()) blocks: Dict[int, _BasicBlock] = {} for function in functions: for block in function.blocks: try: bb = blocks[block.addr] except KeyError: blocks[block.addr] = bb = _BasicBlock(block.size, [ opc.insn for opc in block.disassembly.insns ]) if bb.size != block.size: self.log_warn(F'conflicting blocks at 0x{block.addr:08X}') bb.users.append(function) return blocks
def process(self, data): if self.bytestream: self.log_warn( 'running this unit with a block size of 1 does not have any effect' ) return data with MemoryFile() as stream: for block in self.chunk(data, True): stream.write(block[::-1]) rest = self.rest(data) if rest: padding = -len(rest) % self.args.blocksize stream.write(B'\0' * padding + rest[::-1]) return stream.getvalue()
def _extract_ole(self, data: bytearray) -> str: stream = MemoryFile(data) with self._olefile.OleFileIO(stream) as ole: doc = ole.openstream('WordDocument').read() with StructReader(doc) as reader: table_name = F'{(doc[11]>>1)&1}Table' reader.seek(0x1A2) offset = reader.u32() length = reader.u32() with StructReader(ole.openstream(table_name).read()) as reader: reader.seek(offset) table = reader.read(length) piece_table = self._load_piece_table(table) return self._get_text(doc, piece_table)
def process(self, data): try: lzo = LZO(data) except LZOError: self.log_info('Not an LZO archive, processing raw stream.') return self.decompress_stream(data) with MemoryFile() as output: for k, chunk in enumerate(lzo, 1): self.log_debug(F'decompressing chunk {k}') output.write(self.decompress_stream(chunk.data)) return self.labelled( output.getbuffer(), path=lzo.name, date=datetime.utcfromtimestamp(lzo.mtime) )
def _process_openpyxl(self, data): workbook = self._openpyxl.load_workbook(MemoryFile(data), read_only=True) for ref in self.args.references: ref: SheetReference for k, name in enumerate(workbook.sheetnames): if not ref.match(k, name): continue sheet = workbook[name] cells = [row for row in sheet.iter_rows(values_only=True)] nrows = len(cells) ncols = max((len(row) for row in cells), default=0) for row, col in ref.cells(nrows, ncols): yield from self._get_value(k, name, lambda r, c: cells[r][c], row, col)
def unpack(self, data: bytearray): with MemoryFile(data) as stream: try: archive = tarfile.open(fileobj=stream) except Exception: ustar = data.find(B'ustar') if ustar < 257: raise stream.seek(ustar - 257) archive = tarfile.open(fileobj=stream) for info in archive.getmembers(): if not info.isfile(): continue extractor = archive.extractfile(info) if extractor is None: continue date = datetime.datetime.fromtimestamp(info.mtime) yield self._pack(info.name, date, lambda e=extractor: e.read())
def process(self, data): unpacker = mp.Unpacker(MemoryFile(data, read_as_bytes=True)) while True: try: item = unpacker.unpack() except mp.exceptions.OutOfData: position = unpacker.tell() if position < len(data): self.log_warn("oops") break except Exception as E: position = unpacker.tell() if not position: raise view = memoryview(data) raise RefineryPartialResult(str(E), view[position:]) else: yield json.dumps(item).encode(self.codec)
def unpack(self, data): with MemoryFile(data) as stream: try: oledoc = self._olefile.OleFileIO(stream) except OSError as error: self.log_info(F'error, {error}, treating input as zip file') yield from xtzip().unpack(data) return for item in oledoc.listdir(): if not item or not item[-1]: continue path = '/'.join(item) olestream = oledoc.openstream(path) c0 = ord(item[-1][:1]) if c0 < 20: item[-1] = F'[{c0:d}]{item[-1][1:]}' path = '/'.join(item) self.log_debug('exploring:', path) yield UnpackResult(path, olestream.read())
def _process_pyxlsb2(self, data): with self._pyxlsb2.open_workbook(MemoryFile(data)) as wb: for ref in self.args.references: ref: SheetReference for k, rec in enumerate(wb.sheets): rec: SheetRecord self.log_info(rec) name = rec.name if not ref.match(k, name): continue sheet = wb.get_sheet_by_name(name) rows = list(sheet.rows()) nrows = len(rows) ncols = max((len(r) for r in rows), default=0) for row, col in ref.cells(nrows, ncols): def get(row, col): return rows[row][col].v yield from self._get_value(k, name, get, row, col)
def exeroute(data, handler_elf, handler_macho, handler_pe, *args, **kwargs): if data[:2] == B'MZ': from pefile import PE as PEFile try: parsed = PEFile(data=data, fast_load=True) except Exception as E: raise ParsingFailure('PE') from E else: return handler_pe(parsed, *args, **kwargs) if data[:4] == B'\x7FELF': from refinery.lib.structures import MemoryFile from elftools.elf.elffile import ELFFile try: parsed = ELFFile(MemoryFile(data)) except Exception as E: raise ParsingFailure('ELF') from E else: return handler_elf(parsed, *args, **kwargs) if set(data[:4]) <= {0xFE, 0xED, 0xFA, 0xCE, 0xCF}: from refinery.lib.structures import MemoryFile import macholib import macholib.mach_o import macholib.MachO class InMemoryMachO(macholib.MachO.MachO): def __init__(self): super().__init__(os.devnull) def load(self, _): return super().load(MemoryFile(data)) try: parsed = InMemoryMachO() assert parsed.headers except Exception as E: raise ParsingFailure('MachO') from E else: return handler_macho(parsed, *args, **kwargs) raise ValueError('Unknown executable format')
def _read_lines(self, fd): count = self.args.size or 1 if count == 1: while True: buffer = fd.readline() if not buffer: break yield buffer return with MemoryFile() as out: while True: for _ in range(count): buffer = fd.readline() if not buffer: break out.write(buffer) if not out.tell(): break yield out.getvalue() out.seek(0) out.truncate()
def process(self, data: bytearray): view = memoryview(data) with MemoryFile() as output, StructReader(view) as reader: for k in count(1): if reader.eof: break trailing_size = len(data) - reader.tell() try: ID, VN, DS = reader.read_struct('4sBB') if ID != B'LZIP': if k > 1: raise EOF else: self.log_warn(F'ignoring invalid LZIP signature: {ID.hex()}') if VN != 1: self.log_warn(F'ignoring invalid LZIP version: {VN}') dict_size = 1 << (DS & 0x1F) dict_size -= (dict_size // 16) * ((DS >> 5) & 7) if dict_size not in range(_MIN_DICT_SIZE, _MAX_DICT_SIZE + 1): raise ValueError( F'The dictionary size {dict_size} is out of the valid range ' F'[{_MIN_DICT_SIZE}, {_MAX_DICT_SIZE}]; unable to proceed.' ) decoder = MemberDecoder(dict_size, reader, output) if not decoder(): raise ValueError(F'Data error in stream {k}.') crc32, data_size, member_size = reader.read_struct('<LQQ') if crc32 != decoder.crc32: self.log_warn(F'checksum in stream {k} was {decoder.crc:08X}, should have been {crc32:08X}.') if member_size - 20 != decoder.member_position: self.log_warn(F'member size in stream {k} was {decoder.member_position}, should have been {member_size}.') if data_size != decoder.data_position: self.log_warn(F'data size in stream {k} was {decoder.data_position}, should have been {data_size}.') except EOF: if k <= 1: raise self.log_info(F'silently ignoring {trailing_size} bytes of trailing data') break return output.getvalue()
def _generate_chunks(self, parent: Chunk): if not self.squeeze: for item in self.action(parent): yield copy.copy(item).inherit(parent) return it = self.action(parent) try: header = next(it) except StopIteration: return else: header.inherit(parent) buffer = MemoryFile(header) buffer.seek(len(header)) for item in it: header &= item buffer.write(item) yield header
def _decompress_xpress(self, reader: StructReader, writer: MemoryFile, target: Optional[int] = None) -> bytearray: if target is not None: target += writer.tell() flags = BitBufferedReader(reader) nibble_cache = None while not reader.eof: if target is not None and writer.tell() >= target: return if not flags.next(): writer.write(reader.read(1)) continue offset, length = divmod(reader.u16(), 8) offset += 1 if length == 7: length = nibble_cache if length is None: length_pair = reader.u8() nibble_cache = length_pair >> 4 length = length_pair & 0xF else: nibble_cache = None if length == 15: length = reader.u8() if length == 0xFF: length = reader.u16() or reader.u32() length -= 22 if length < 0: raise RuntimeError( F'Invalid match length of {length} for long delta sequence' ) length += 15 length += 7 length += 3 writer.replay(offset, length)