def __init__(self, partinfo: PartitionInfo, artifacts_registry: Registry, system: OperatingSystemBase = None): """ Initializes the class and loads artifacts :param dfvfs: [DFVFSHelper]: DFVFSHelper-object to access data :param partition: [PathSpec]: A DFVFS-PathSpec object identifying the root of the system partition :param partition_name: Name of the current partition :param artifacts_registry: [Registry]: Database of forensic artifacts definitions :param system: [OperatingSystemBase]: optional reference to a OperatingSystem-instance for further variable resolving """ # pylint: disable=invalid-name,too-many-instance-attributes,too-many-arguments self.dfvfs = partinfo.helper self.partition = partinfo.path_spec self.partition_name = partinfo.name if system: self.system = system else: self.system = UnknownOS() if system: self.os = system.get_os_name() else: self.os = None if not self.os: self.artifacts = artifacts_registry.artifacts else: self.artifacts = {name: artifact for name, artifact in artifacts_registry.artifacts.items() if not artifact.supported_os or self.os in artifact.supported_os} LOGGER.debug("Picked %d matching artifact definitions", len(self.artifacts)) self.knowledge_base = KnowledgeBase(self.artifacts) # hard code values that cannot be resolved with their provides-definition for implementation reasons self.knowledge_cache: Dict[str, Iterable[str]] = {'environ_systemdrive': '/'}
def extract_artifact(self, artifact_name): # pylint: disable=invalid-name """ Extract a particular artifact from all possible locations within this dfvfs-image """ real_partitions: List[PartitionInfo] = [] if self.zip_mode: # zip extract have exactly one partition per dfvfs instance (zip file) real_partitions = [ PartitionInfo(helper=d, path_spec=d.partitions()[0], name=chr(ord('c') + i)) for i, d in enumerate(self.dfvfs_list) ] else: # forensic images can have more than one partition, but we always only process one image at a time real_partitions = [ PartitionInfo(helper=self.dfvfs_list[0], path_spec=partition, name=chr(ord('c') + i)) for i, partition in enumerate(self.dfvfs_list[0].partitions()) if not dfvfs_helper.is_on_filesystem( partition, dfvfs_defs.TYPE_INDICATOR_VSHADOW) ] LOGGER.info("Found %d partitions", len(real_partitions)) for partinfo in real_partitions: current_os = self._guess_os(partinfo.helper, partinfo.path_spec) try: if current_os == definitions.OPERATING_SYSTEM_WINDOWS: system = WindowsSystem(partinfo.helper, partinfo.path_spec) elif current_os == definitions.OPERATING_SYSTEM_UNKNOWN: system = UnknownOS() LOGGER.warning( "Operating system not detected on partition %s. Only basic extraction possible.", dfvfs_helper.reconstruct_full_path(partinfo.path_spec)) else: LOGGER.warning( "Operating system %s is not yet supported on %s. Using basic extraction.", dfvfs_helper.reconstruct_full_path(partinfo.path_spec), current_os) system = UnknownOS() LOGGER.info("=== Starting processing of partition") resolver = ArtifactResolver(partinfo, self.artifact_registry, system) resolver.process_artifact(artifact_name, self.store) if current_os == definitions.OPERATING_SYSTEM_WINDOWS: system._reg_reader._cleanup_open_files("") # TODO except RuntimeError as err: LOGGER.exception( "Encountered exception during processing of %s: %s", dfvfs_helper.reconstruct_full_path(partinfo.path_spec), err) if 'pytest' in sys.modules: raise # we want to see what exactly is failing when tests are running
class ArtifactResolver: """ This class converts artifacts to actual bits of information """ def __init__(self, partinfo: PartitionInfo, artifacts_registry: Registry, system: OperatingSystemBase = None): """ Initializes the class and loads artifacts :param dfvfs: [DFVFSHelper]: DFVFSHelper-object to access data :param partition: [PathSpec]: A DFVFS-PathSpec object identifying the root of the system partition :param partition_name: Name of the current partition :param artifacts_registry: [Registry]: Database of forensic artifacts definitions :param system: [OperatingSystemBase]: optional reference to a OperatingSystem-instance for further variable resolving """ # pylint: disable=invalid-name,too-many-instance-attributes,too-many-arguments self.dfvfs = partinfo.helper self.partition = partinfo.path_spec self.partition_name = partinfo.name if system: self.system = system else: self.system = UnknownOS() if system: self.os = system.get_os_name() else: self.os = None if not self.os: self.artifacts = artifacts_registry.artifacts else: self.artifacts = {name: artifact for name, artifact in artifacts_registry.artifacts.items() if not artifact.supported_os or self.os in artifact.supported_os} LOGGER.debug("Picked %d matching artifact definitions", len(self.artifacts)) self.knowledge_base = KnowledgeBase(self.artifacts) # hard code values that cannot be resolved with their provides-definition for implementation reasons self.knowledge_cache: Dict[str, Iterable[str]] = {'environ_systemdrive': '/'} def _resolve_artifact(self, artifact: ArtifactDefinition) -> ResolvedArtifact: # pylint: disable=too-many-locals,too-many-branches,too-many-nested-blocks """ Resolve actual paths from artifact path definitions :param artifact[ArtifactDefinition]: An artifact as defined by the artifacts python module :return: A ResolvedArtifact instance where all variables are resolved and wildcards expanded """ if artifact.name != TEMPORARY_RESOLVE: LOGGER.info("Resolving artifact %s", artifact.name) else: LOGGER.debug("\tresolving variables..") files = [] dirs = [] paths = [] registry_keys = [] registry_vals = [] sub_artifacts = [] for source in artifact.sources: supported_os = getattr(source, "supported_os", None) if self.os and supported_os and self.os not in supported_os: continue # FILE: Single file(s): Might be wildcarded if source.type == artifact_defs.SOURCE_TYPE_FILE: files.extend(self.glob_file_paths(self._expand_paths(source.paths, source.separator))) # DIRECTORY: An (absolute) directory (might be wildcarded) # Used to obtain a directory listing elif source.type == artifact_defs.SOURCE_TYPE_DIRECTORY: dirs.extend(self.glob_file_paths(self._expand_paths(source.paths, source.separator))) # A path specification elif source.type == artifact_defs.SOURCE_TYPE_PATH: paths.extend(self.glob_file_paths(self._expand_paths(source.paths, source.separator))) # REGISTRY_KEY: Whole key with all values (may be wildcarded) elif source.type == artifact_defs.SOURCE_TYPE_REGISTRY_KEY: for k in source.keys: expanded_keys = self._expand_path(k) for kk in expanded_keys: registry_keys.extend(self.glob_registry_path(kk)) # REGISTRY_VALUE: Single key-value-pair(s) (may be wildcarded) elif source.type == artifact_defs.SOURCE_TYPE_REGISTRY_VALUE: for pair in source.key_value_pairs: try: reg_paths = self._expand_path(pair["key"]) except RuntimeError as err: LOGGER.info("Failed to resolve path [%s]: %s", pair["key"], err) continue for path in reg_paths: globbed_paths = self.glob_registry_path(path) # we need to glob key paths first for reg_key in globbed_paths: value_names = self.glob_registry_value(reg_key, pair['value']) # .. and values second registry_vals.append((reg_key, value_names)) # ARTIFACT_GROUP: A list of other artifacts elif source.type == artifact_defs.SOURCE_TYPE_ARTIFACT_GROUP: for s_artifact_name in source.names: s_artifact = self.get_resolved_artifact(s_artifact_name) if s_artifact: sub_artifacts.append(s_artifact) else: LOGGER.warning("Skipping unknown sub-artifact %s in %s", s_artifact_name, artifact.name) return ResolvedArtifact(artifact, files, dirs, paths, registry_keys, registry_vals, sub_artifacts) def _resolve_source(self, source: ArtifactSource) -> Iterable[str]: """ This is used as the callback function for KnowledgeBase to do variable resolving""" LOGGER.debug('Resolving source: %s:%s', source.type, source.__dict__) results = set() artifact = ArtifactDefinition(TEMPORARY_RESOLVE, [source]) resolved = self._resolve_artifact(artifact) # For keys, the key path is of interest for key_path in [key.path for key in resolved.registry_keys]: results.add(key_path) # For registry values, the content of the value is relevant for key, values in resolved.registry_vals: for value in values: value = key.GetValueByName(value) if value.DataIsInteger() or value.DataIsString(): data = str(value.GetDataAsObject()) results.add(data) else: LOGGER.warning("Not adding value from %s since it has unparseable type", key) # paths are added as strings # .. as are directories (should not be source-provides, but might happen) for path in resolved.paths + resolved.dirs: path_str = dfvfs_utils.get_relative_path(path) results.add(path_str) # for files, we want the content (line based) for filename in resolved.files: handle = dfvfs_utils.get_file_handle(filename) results.add(''.join(handle.readlines())) handle.close() LOGGER.debug('Resolved to: %s', results) return results def _expand_paths(self, paths: List[str], separator: str = None): """ Resolve a list of paths. See _expand_path :param paths[List[str]]: List of paths to resolve :param separator[str]: optional, separator char from the artifact :return: List of resolved path(s), flattened """ results = [] for path in paths: try: resolved_paths = self._expand_path(path) except RuntimeError as err: LOGGER.info("Failed to resolve path [%s]: %s", path, err) continue for resolved in resolved_paths: if separator: resolved = resolved.replace(separator, '/') if self.system.get_os_name() == 'Windows': resolved = resolved.replace('\\', '/') results.append(resolved) return results def _expand_path(self, path: str) -> List[str]: """ This method will resolve artifact source paths that have variables. Some variables might refer to lists (e.g. users), so multiple paths can be returned after resolving. :param path: The artifact definition path to resolve :return: A list of paths with resolved sources. They can still contain wildcards like '*'! """ # pylint: disable=too-many-nested-blocks,too-many-branches variable_regex = '(%?%([a-zA-Z0-9_.-]+)%?%)' variables = re.findall(variable_regex, path) # will return tuples: (with surrounding %s, without) results = [path] for var, stripped_var in variables: # replace variables one by one, updating all paths in results subst = self.get_var(stripped_var) if not subst: LOGGER.warning("Cannot resolve path \"%s\", \"%s\" is unknown", path, var) return [] new_results = [] for result in results: for value in subst: # some variables contain variables themselves, so recurse if necessary # these variables should resolve to exactly one value though if re.match(variable_regex, value): replacement_list = self._expand_path(value) if len(replacement_list) > 1: LOGGER.error("Nested variable replacement in \"%s\" found. Aborting...", value) return [] replacement = replacement_list[0] else: replacement = value new_results.append(result.replace(var, replacement)) results = new_results return results def get_var(self, key: str) -> Iterable[str]: """ Retrieves the value of a specified variable from the variable database. :param key: str: The name of the variable :return: The value of the variable or None if the variable does not exist """ if not key: return [] if key in self.knowledge_cache: return self.knowledge_cache[key] real_key = key.replace('%', '') try: results = self.knowledge_base.get_value(real_key, self._resolve_source) except ValueError as first_err: # maybe this was a windows variable, try some magic try: results = self.get_var('environ_' + real_key.lower()) except ValueError: LOGGER.warning("Could not resolve %s: %s", key, first_err) return [] variable_regex = '(%?%([a-zA-Z0-9_.-]+)%?%)' real_results = CasePreservingSet() # we do not want the same path in different case for result in results: more_vars = re.search(variable_regex, result) if result.startswith('C:\\'): LOGGER.debug("Fixing absolute path %s", result) result = result.replace('C:\\', '/').replace('\\', '/') if more_vars: real_results.update(self._expand_path(result)) else: real_results.add(result) self.knowledge_cache[key] = real_results return real_results def get_resolved_artifact(self, artifact_name: str) -> Optional[ResolvedArtifact]: """ Retrieves an artifact from the database, resolves variables and returns it. This method will also check if the artifact is supported on the OS and if the dependencies (from the artifacts "conditions" field) are met. :param artifact_name: [str]: Name of the artifact :return: ResolvedArtifact instance or None if the artifact was not found or is not supported """ artifact = self.artifacts.get(artifact_name, None) if not artifact: LOGGER.warning("Unknown or non-applicable artifact: %s", artifact_name) return None if self.os and artifact.supported_os and self.os not in artifact.supported_os: LOGGER.info("Artifact %s not supported for OS %s", artifact_name, self.os) return None if not self.os and artifact.supported_os: LOGGER.warning("Trying optimistic extract of %s, no OS known for current partition", artifact_name) return self._resolve_artifact(artifact) def get_all_valid(self) -> List[ResolvedArtifact]: """ :return: List[ResolvedArtifact] of all valid artifacts that can be extracted """ for artifact in self.artifacts: try: resolved = self.get_resolved_artifact(artifact) if resolved: yield resolved except RuntimeError as err: LOGGER.info("Caught error while trying to load artifact [%s]: %s", artifact, err) def extract(self, artifact: ResolvedArtifact, store: ForensicStore) -> bool: """ Extract an artifact to an export directory by creating a new ForensicStore there :return: True on success, false if nothing was extracted, raises RuntimeError otherwise """ if not artifact: return False artifact_name = artifact.artifact.name if artifact.empty(): LOGGER.debug("Nothing to extract found for %s", artifact_name) return False extracted = self._extract_files(artifact, store) extracted |= self._extract_registry(artifact, store) # handle sub-artifacts for ARTIFACT_GROUP and extract them to our output folder for sub in artifact.sub_artifacts: LOGGER.debug("Attemping extract of sub-artifact %s", sub.artifact.name) extracted |= self.extract(sub, store) if extracted: LOGGER.info("Extracted %s", artifact_name) else: LOGGER.debug("Nothing extracted for %s", artifact_name) return extracted def _extract_files(self, artifact: ResolvedArtifact, artifact_output: ForensicStore) -> bool: """ Extract an artifact's files and folders to the forensic store :param artifact: artifact to extract :param artifact_output: Output forensic store :type artifact_output: ForensicStore :type artifact: ResolvedArtifact :return: True on success, False if nothing was written """ artifact_name = artifact.artifact.name success = False for export_file in artifact.files: success = True file_infos = get_file_infos(export_file) if not file_infos: LOGGER.warning("Could not get file infos for \"%s\". Skipping", dfvfs_utils.reconstruct_full_path(export_file)) continue if file_infos['type'] != dfvfs_defs.FILE_ENTRY_TYPE_FILE: LOGGER.debug("Not exporting entry of wrong type: %s", dfvfs_utils.reconstruct_full_path(export_file)) continue store_obj_id = artifact_output.add_file_item(artifact_name, file_infos['name'], created=file_infos.get('created', None), modified=file_infos.get('modified', None), accessed=file_infos.get('accessed', None), origin={ 'path': file_infos['path'], 'partition': self.partition_name }, errors=None) output_name = f"{self.partition_name}_" \ f"{dfvfs_utils.get_relative_path(export_file).replace('/', '_').strip('_')}" file_contents = dfvfs_helper.get_file_handle(export_file) with artifact_output.add_file_item_export(store_obj_id, export_name=output_name) as file_export: chunk_size = 65536 data = file_contents.read(chunk_size) while data: file_export.write(data) data = file_contents.read(chunk_size) file_contents.close() return success def _extract_registry(self, artifact: ResolvedArtifact, store: ForensicStore): """ Extract an artifact's registry keys and values to an export directory. :type artifact: ResolvedArtifact :type store: ForensicStore :return: True on success, False if nothing was written """ if not artifact: return False if not artifact.registry_keys and not artifact.registry_vals: return False if not self.system.get_registry(): LOGGER.info("Cannot glob registry path since system does not support registry operations.") return False artifact_name = artifact.artifact.name exported = False for key in artifact.registry_keys: exported = True self._key_to_forensicstore(store, key, artifact=artifact_name) for key, values in artifact.registry_vals: exported = True self._key_to_forensicstore(store, key, values, artifact=artifact_name) return exported @staticmethod def _key_to_forensicstore(store: ForensicStore, key: WinRegistryKey, values: Optional[List[str]] = None, artifact: str = '') -> None: """ Export a registry key to the forensicstore, optionally only picking certain values from the key """ last_write_tuple = key.last_written_time.CopyToStatTimeTuple() if last_write_tuple[0]: last_write_date = datetime.utcfromtimestamp(last_write_tuple[0]) last_write_date = last_write_date.replace(microsecond=(int(last_write_tuple[1] / 10))) else: last_write_date = datetime.utcfromtimestamp(0) try: key_item_id = store.add_registry_key_item(artifact=artifact, modified=last_write_date, key=key.path, errors=None) except TypeError as err: LOGGER.exception("Error adding registry key: %s", err) return for value in key.GetValues(): name = value.name if values and name not in values: continue if name is None: name = '(Default)' type_str = value.data_type_string if type_str == 'REG_DWORD_LE': type_str = 'REG_DWORD' # if value.data and (value.DataIsBinaryData() or value.data_type_string == "REG_NONE"): # data = value.data # elif value.data: # data = '{}'.format(value.GetDataAsObject()) # else: # data = b"" try: store.add_registry_value_item(key_item_id, type_str, value.data, name) except sqlite3.OperationalError: LOGGER.exception("Error updating value") continue def resolve_superglobs(self, paths: List[str], separator: str = '/') -> List[str]: """ Convert the forensicartifacts "superglob" operator (**) to a list of normal wildcarded paths. """ paths_to_find = [] simple_globs = [f for f in paths if '**' not in f] super_globs = [f for f in paths if '**' in f] paths_to_find.extend(simple_globs) for file_path in super_globs: # Artifacts can have superglobs with a number behind them. # Since we need to glob a max-number for unbound depths to be fast, use 3 as default match = re.search(r'\*\*(\d+)', file_path) if match: max_depth = int(match.group(1)) else: max_depth = 3 match = re.search(r'\*\*', file_path) sub = '*' for _ in range(max_depth): paths_to_find.append(file_path.replace(match.group(0), sub)) sub = sub + separator + '*' return paths_to_find def glob_file_paths(self, paths: List[str]) -> List[PathSpec]: """ Resolves file level globbing patterns to actual dfvfs file references. This is mostly used to extract artifacts, but can also be called externally. :param paths: [List[str]]: List of file paths with globbing characters (*, **) :return: List[PathSpec] of results from globbing on this system's partition """ result = [] paths_to_find = self.resolve_superglobs(paths) if paths_to_find: result.extend(self.dfvfs.find_paths(paths_to_find, case_sensitive=False, partitions=[self.partition])) return result def glob_registry_path(self, registry_key: str, ignore_trailing_wildcard=False) -> List[WinRegistryKey]: """ This will glob a registry path with one ore more wildcards to all the registry keys that match. This is mostly used to extract artifacts, but can also be called externally. :param ignore_trailing_wildcard: Skip the last '*' if it is at the very end :param key """ paths_to_find = self.resolve_superglobs([registry_key], separator='\\') registry = self.system.get_registry() if not registry: return for key in paths_to_find: # we will work like in dfwinreg's regsearcher, but try to skip the first few steps until # we see a wildcard. This means we don't need to use a virtual root key keyparts = key.split('\\') if ignore_trailing_wildcard and keyparts[-1] == '*': keyparts = keyparts[:-1] # find first location of a wildcard tail = None tail_idx = -1 i = 0 for i, val in enumerate(keyparts): if '*' in val: tail = '\\'.join(keyparts[0:i]) tail_idx = i break if not tail: # no wildcard at all tail = '\\'.join(keyparts) tail_idx = i + 1 reg_key = registry.GetKeyByPath(tail) if not reg_key: return for result in self._glob_registry_path_rec(keyparts, tail_idx, reg_key): yield result def _glob_registry_path_rec(self, keyparts: List[str], level: int, reg_key: WinRegistryKey) -> List[WinRegistryKey]: """ Recursive part of glob_registry_path. Only used internally. :param keyparts[List[str]]: List of registry key parts :param level: current level in the matching (index into keyparts) :param reg_key[RegistryKey]: regf parent key object :return: Recursively calculates all matches and returns List[RegistryKey] """ # catch edge case where no globbing is needed at all if level >= len(keyparts): yield self.system.get_registry().GetKeyByPath('\\'.join(keyparts)) return matches = [] subkeys = reg_key.GetSubkeys() curr_part = keyparts[level] if '*' in curr_part: # this is a location where globbing is needed regex = re.compile(curr_part.replace('*', '.*').lower()) for subkey in subkeys: if regex.match(subkey.name.lower()): matches.append(subkey) else: # no globbing at this point, simple string match for subkey in subkeys: if subkey.name.lower() == curr_part.lower(): matches.append(subkey) # determine if we have checked all parts if level == (len(keyparts) - 1): for match in matches: yield match else: for match in matches: for submatch in self._glob_registry_path_rec(keyparts, level + 1, match): yield submatch @staticmethod def glob_registry_value(key: WinRegistryKey, search_value: str) -> List[str]: """ Globs wildcards in registry value names to a list of names :param key: regf-registry key :param search_value: str, can contain wildcard :return: list of str """ if not key or not search_value: return [] matches = [] value_regex = re.compile(search_value.replace('*', '.*').lower()) for value in key.GetValues(): if value is None or value.name is None: continue if value_regex.match(value.name.lower()): matches.append(value.name) return matches def process_artifact(self, artifact_name: str, store: ForensicStore) -> bool: """ Start the extraction process of a particular artifact :param artifact_name: Name of the artifact :param store: ForensicStorage output :return: True if success, False otherwise """ LOGGER.debug("Attempting extract of %s", artifact_name) artifact = self.get_resolved_artifact(artifact_name) if not artifact: return False return self.extract(artifact, store)