def get_security_key_info(self): self._stream.seek(REGF_HEADER_SIZE + self.header.security_key_offset) # TODO: If parsing fails, parse with SECURITY_KEY_v1_2 security_key = SECURITY_KEY_v1_1.parse_stream(self._stream) security_descriptor = SECURITY_DESCRIPTOR.parse( security_key.security_descriptor) with boomerang_stream(self._stream) as s: security_base_offset = REGF_HEADER_SIZE + self.header.security_key_offset + 24 s.seek(security_base_offset + security_descriptor.owner) owner_sid = convert_sid(SID.parse_stream(s)) s.seek(security_base_offset + security_descriptor.group) group_sid = convert_sid(SID.parse_stream(s)) sacl_aces = None if security_descriptor.offset_sacl > 0: s.seek(security_base_offset + security_descriptor.offset_sacl) sacl_aces = get_acls(s) dacl_aces = None if security_descriptor.offset_dacl > 0: s.seek(security_base_offset + security_descriptor.offset_dacl) dacl_aces = get_acls(s) return { 'owner': owner_sid, 'group': group_sid, 'dacl': dacl_aces, 'sacl': sacl_aces }
def __init__(self, hive_path): """ Represents a registry hive :param hive_path: Path to the registry hive """ with open(hive_path, 'rb') as f: self._stream = BytesIO(f.read()) with boomerang_stream(self._stream) as s: self.header = REGF_HEADER.parse_stream(s) # Get the first cell in root HBin, which is the root NKRecord: root_hbin = self.get_hbin_at_offset() root_hbin_cell = next(root_hbin.iter_cells(s)) self.root = NKRecord(root_hbin_cell, s) self.name = self.header.file_name self.hive_type = identify_hive_type(self.name)
def __init__(self, hive_path, hive_type=None, partial_hive_path=None): """ Represents a registry hive :param hive_path: Path to the registry hive :param hive_type: The hive type can be specified if this is a partial hive, or for some other reason regipy cannot identify the hive type :param partial_hive_path: The path from which the partial hive actually starts, for example: hive_type=ntuser partial_hive_path="/Software" would mean this is actually a HKCU hive, starting from HKCU/Software """ self.partial_hive_path = None self.hive_type = None with open(hive_path, 'rb') as f: self._stream = BytesIO(f.read()) with boomerang_stream(self._stream) as s: self.header = REGF_HEADER.parse_stream(s) # Get the first cell in root HBin, which is the root NKRecord: root_hbin = self.get_hbin_at_offset() root_hbin_cell = next(root_hbin.iter_cells(s)) self.root = NKRecord(root_hbin_cell, s) self.name = self.header.file_name if hive_type: if hive_type.lower() in SUPPORTED_HIVE_TYPES: self.hive_type = hive_type else: raise UnidentifiedHiveException( f'{hive_type} is not a supported hive type: ' f'only the following are supported: {SUPPORTED_HIVE_TYPES}' ) else: try: self.hive_type = identify_hive_type(self.name) except UnidentifiedHiveException: logger.info( f'Hive type for {hive_path} was not identified: {self.name}' ) if partial_hive_path: self.partial_hive_path = partial_hive_path
def _parse_indirect_block(stream, value): # This is an indirect datablock (Bigger than 16344, therefor we handle it differently) # The value inside the vk entry actually contains a pointer to the buffers containing the data big_data_block_header = BIG_DATA_BLOCK.parse(value.value) # Go to the start of the segment offset list stream.seek(REGF_HEADER_SIZE + big_data_block_header.offset_to_list_of_segments) buffer = BytesIO() # Read them sequentially until we got all the size of the VK value_size = value.size while value_size > 0: data_segment_offset = Int32ul.parse_stream(stream) with boomerang_stream(stream) as tmpstream: tmpstream.seek(REGF_HEADER_SIZE + 4 + data_segment_offset) tmpbuffer = tmpstream.read(min(0x3fd8, value_size)) value_size -= len(tmpbuffer) buffer.write(tmpbuffer) buffer.seek(0) return buffer.read()
def iter_values(self, as_json=False, max_len=MAX_LEN): """ Get the values of a subkey. Will raise if no values exist :param as_json: Whether to normalize the data as JSON or not :param max_len: Max length of value to return :return: List of values for the subkey """ if not self.values_count: return # Get the offset of the values key. We skip 4 because of Cell Header target_offset = REGF_HEADER_SIZE + 4 + self.header.values_list_offset self._stream.seek(target_offset) for _ in range(self.values_count): is_corrupted = False vk_offset = Int32ul.parse_stream(self._stream) with boomerang_stream(self._stream) as substream: actual_vk_offset = REGF_HEADER_SIZE + 4 + vk_offset substream.seek(actual_vk_offset) try: vk = VALUE_KEY.parse_stream(substream) except ConstError: logger.error( f'Could not parse VK at {substream.tell()}, registry hive is probably corrupted.' ) return value = self.read_value(vk, substream) if vk.name_size == 0: value_name = '(default)' else: value_name = vk.name.decode(errors='replace') # If the value is bigger than this value, it means this is a DEVPROP structure # https://doxygen.reactos.org/d0/dba/devpropdef_8h_source.html # https://sourceforge.net/p/mingw-w64/mingw-w64/ci/668a1d3e85042c409e0c292e621b3dc0aa26177c/tree/ # mingw-w64-headers/include/devpropdef.h?diff=dd86a3b7594dadeef9d6a37c4b6be3ca42ef7e94 # We currently do not support these, but also wouldn't like to yield this as binary data # This int casting will always work because the data_type is construct's EnumIntegerString # TODO: Add actual parsing if int(vk.data_type) > 0xffff0000: data_type = int(vk.data_type) & 0xffff continue # Skip this unknown data type, research pending :) # TODO: Add actual parsing if int(vk.data_type) == 0x200000: continue data_type = str(vk.data_type) if data_type in ['REG_SZ', 'REG_EXPAND', 'REG_EXPAND_SZ']: if vk.data_size >= 0x80000000: # data is contained in the data_offset field value.size -= 0x80000000 actual_value = vk.data_offset elif vk.data_size > 0x3fd8 and value.value[:2] == b'db': data = self._parse_indirect_block(substream, value) actual_value = try_decode_binary(data, as_json=as_json) else: actual_value = try_decode_binary(value.value, as_json=as_json) elif data_type in ['REG_BINARY', 'REG_NONE']: if vk.data_size >= 0x80000000: # data is contained in the data_offset field actual_value = vk.data_offset elif vk.data_size > 0x3fd8 and value.value[:2] == b'db': try: actual_value = self._parse_indirect_block( substream, value) actual_value = try_decode_binary( actual_value, as_json=True) if as_json else actual_value except ConstError: logger.error(f'Bad value at {actual_vk_offset}') continue else: # Return the actual data actual_value = binascii.b2a_hex(value.value).decode( )[:max_len] if as_json else value.value elif data_type == 'REG_SZ': actual_value = try_decode_binary(value.value, as_json=as_json) elif data_type == 'REG_DWORD': # If the data size is bigger than 0x80000000, data is actually stored in the VK data offset. actual_value = vk.data_offset if vk.data_size >= 0x80000000 else Int32ul.parse( value.value) elif data_type == 'REG_QWORD': actual_value = vk.data_offset if vk.data_size >= 0x80000000 else Int64ul.parse( value.value) elif data_type == 'REG_MULTI_SZ': parsed_value = GreedyRange(CString('utf-16-le')).parse( value.value) # Because the ListContainer object returned by Construct cannot be turned into a list, # we do this trick actual_value = [x for x in parsed_value if x] # We currently dumps this as hex string or raw # TODO: Add actual parsing elif data_type in [ 'REG_RESOURCE_REQUIREMENTS_LIST', 'REG_RESOURCE_LIST' ]: actual_value = binascii.b2a_hex(value.value).decode( )[:max_len] if as_json else value.value else: actual_value = try_decode_binary(value.value, as_json=as_json) yield Value(name=value_name, value_type=str(value.value_type), value=actual_value, is_corrupted=is_corrupted)
def iter_values(self, as_json=False, max_len=MAX_LEN): """ Get the values of a subkey. Will raise if no values exist :param as_json: Whether to normalize the data as JSON or not :param max_len: Max length of value to return :return: List of values for the subkey """ if not self.values_count: return None # Get the offset of the values key. We skip 4 because of Cell Header target_offset = REGF_HEADER_SIZE + 4 + self.header.values_list_offset self._stream.seek(target_offset) for _ in range(self.values_count): is_corrupted = False vk_offset = Int32ul.parse_stream(self._stream) with boomerang_stream(self._stream) as substream: actual_vk_offset = REGF_HEADER_SIZE + 4 + vk_offset substream.seek(actual_vk_offset) vk = VALUE_KEY.parse_stream(substream) value = self.read_value(vk, substream) if vk.name_size == 0: value_name = '(default)' else: value_name = vk.name.decode(errors='replace') data_type = str(vk.data_type) if data_type in ['REG_SZ', 'REG_EXPAND', 'REG_EXPAND_SZ']: if vk.data_size >= 0x80000000: # data is contained in the data_offset field value.size -= 0x80000000 actual_value = vk.data_offset elif vk.data_size > 0x3fd8 and value.value[:2] == b'db': data = self._parse_indirect_block(substream, value) actual_value = try_decode_binary(data, as_json=as_json) else: actual_value = try_decode_binary(value.value, as_json=as_json) elif data_type in ['REG_BINARY', 'REG_NONE']: if vk.data_size >= 0x80000000: # data is contained in the data_offset field actual_value = vk.data_offset elif vk.data_size > 0x3fd8 and value.value[:2] == b'db': try: actual_value = self._parse_indirect_block( substream, value) actual_value = try_decode_binary( actual_value, as_json=True) if as_json else actual_value except ConstError: logger.error(f'Bad value at {actual_vk_offset}') continue else: # Return the actual data actual_value = binascii.b2a_hex(value.value).decode( )[:max_len] if as_json else value.value elif data_type == 'REG_SZ': actual_value = try_decode_binary(value.value, as_json=as_json) elif data_type == 'REG_DWORD': # If the data size is bigger than 0x80000000, data is actually stored in the VK data offset. actual_value = vk.data_offset if vk.data_size >= 0x80000000 else Int32ul.parse( value.value) elif data_type == 'REG_QWORD': actual_value = vk.data_offset if vk.data_size >= 0x80000000 else Int64ul.parse( value.value) elif data_type == 'REG_MULTI_SZ': parsed_value = GreedyRange(CString('utf-16-le')).parse( value.value) # Because the ListContainer object returned by Construct cannot be turned into a list, # we do this trick actual_value = str(parsed_value) if as_json else [ x for x in parsed_value if x ] else: actual_value = try_decode_binary(value.value, as_json=as_json) yield Value(name=value_name, value_type=str(value.value_type), value=actual_value, is_corrupted=is_corrupted)