def _CheckStorageFile(self, storage_file_path, warn_about_existing=False): """Checks if the storage file path is valid. Args: storage_file_path (str): path of the storage file. warn_about_existing (bool): True if the user should be warned about the storage file already existing. Raises: BadConfigOption: if the storage file path is invalid. """ if os.path.exists(storage_file_path): if not os.path.isfile(storage_file_path): raise errors.BadConfigOption( 'Storage file: {0:s} already exists and is not a file.'. format(storage_file_path)) if warn_about_existing: logger.warning( 'Appending to an already existing storage file.') dirname = os.path.dirname(storage_file_path) if not dirname: dirname = '.' # TODO: add a more thorough check to see if the storage file really is # a plaso storage file. if not os.access(dirname, os.W_OK): raise errors.BadConfigOption( 'Unable to write to storage file: {0:s}'.format( storage_file_path))
def _CheckStorageFile(self, storage_file_path): # pylint: disable=arguments-differ """Checks if the storage file path is valid. Args: storage_file_path (str): path of the storage file. Raises: BadConfigOption: if the storage file path is invalid. """ if os.path.exists(storage_file_path): if not os.path.isfile(storage_file_path): raise errors.BadConfigOption( 'Storage file: {0:s} already exists and is not a file.'.format( storage_file_path)) logger.warning('Appending to an already existing storage file.') dirname = os.path.dirname(storage_file_path) if not dirname: dirname = '.' # TODO: add a more thorough check to see if the storage file really is # a plaso storage file. if not os.access(dirname, os.W_OK): raise errors.BadConfigOption( 'Unable to write to storage file: {0:s}'.format(storage_file_path))
def _CreateOutputModule(self, options): """Creates the output module. Args: options (argparse.Namespace): command line arguments. Returns: OutputModule: output module. """ formatter_mediator = formatters_mediator.FormatterMediator( data_location=self._data_location) try: formatter_mediator.SetPreferredLanguageIdentifier( self._preferred_language) except (KeyError, TypeError) as exception: raise RuntimeError(exception) mediator = output_mediator.OutputMediator( self._knowledge_base, formatter_mediator, preferred_encoding=self.preferred_encoding) mediator.SetTimezone(self._preferred_time_zone) try: output_module = output_manager.OutputManager.NewOutputModule( self._output_format, mediator) except (KeyError, ValueError) as exception: raise RuntimeError( 'Unable to create output module with error: {0!s}'.format( exception)) if output_manager.OutputManager.IsLinearOutputModule(self._output_format): output_file_object = open(self._output_filename, 'wb') output_writer = tools.FileObjectOutputWriter(output_file_object) output_module.SetOutputWriter(output_writer) helpers_manager.ArgumentHelperManager.ParseOptions(options, output_module) # Check if there are parameters that have not been defined and need to # in order for the output module to continue. Prompt user to supply # those that may be missing. missing_parameters = output_module.GetMissingArguments() while missing_parameters: for parameter in missing_parameters: value = self._PromptUserForInput( 'Missing parameter {0:s} for output module'.format(parameter)) if value is None: logger.warning( 'Unable to set the missing parameter for: {0:s}'.format( parameter)) continue setattr(options, parameter, value) helpers_manager.ArgumentHelperManager.ParseOptions( options, output_module) missing_parameters = output_module.GetMissingArguments() return output_module
def _CheckStorageFile(self, storage_file_path, warn_about_existing=False): """Checks if the storage file path is valid. Args: storage_file_path (str): path of the storage file. warn_about_existing (bool): True if the user should be warned about the storage file already existing. Raises: BadConfigOption: if the storage file path is invalid. """ if os.path.exists(storage_file_path): if not os.path.isfile(storage_file_path): raise errors.BadConfigOption( 'Storage file: {0:s} already exists and is not a file.'.format( storage_file_path)) if warn_about_existing: logger.warning('Appending to an already existing storage file.') dirname = os.path.dirname(storage_file_path) if not dirname: dirname = '.' # TODO: add a more thorough check to see if the storage file really is # a plaso storage file. if not os.access(dirname, os.W_OK): raise errors.BadConfigOption( 'Unable to write to storage file: {0:s}'.format(storage_file_path))
def ParseOptions(cls, options, output_module): """Parses and validates options. Args: options (argparse.Namespace): parser options. output_module (OutputModule): output module to configure. Raises: BadConfigObject: when the output module object is of the wrong type. BadConfigOption: when a configuration parameter fails validation. """ elastic_output_modules = ( elastic.ElasticsearchOutputModule, elastic.ElasticsearchOutputModule) if not isinstance(output_module, elastic_output_modules): raise errors.BadConfigObject( 'Output module is not an instance of ElasticsearchOutputModule') index_name = cls._ParseStringOption( options, 'index_name', default_value=cls._DEFAULT_INDEX_NAME) document_type = cls._ParseStringOption( options, 'document_type', default_value=cls._DEFAULT_DOCUMENT_TYPE) flush_interval = cls._ParseNumericOption( options, 'flush_interval', default_value=cls._DEFAULT_FLUSH_INTERVAL) raw_fields = getattr(options, 'raw_fields', cls._DEFAULT_RAW_FIELDS) elastic_user = cls._ParseStringOption(options, 'elastic_user') elastic_password = cls._ParseStringOption(options, 'elastic_password') use_ssl = getattr(options, 'use_ssl', False) ca_certificates_path = cls._ParseStringOption( options, 'ca_certificates_file_path') elastic_url_prefix = cls._ParseStringOption(options, 'elastic_url_prefix') if elastic_password is None: elastic_password = os.getenv('PLASO_ELASTIC_PASSWORD', None) if elastic_password is not None: logger.warning( 'Note that specifying your Elasticsearch password via ' '--elastic_password or the environment PLASO_ELASTIC_PASSWORD can ' 'expose the password to other users on the system.') if elastic_user is not None and elastic_password is None: elastic_password = getpass.getpass('Enter your Elasticsearch password: ') ElasticSearchServerArgumentsHelper.ParseOptions(options, output_module) output_module.SetIndexName(index_name) output_module.SetDocumentType(document_type) output_module.SetFlushInterval(flush_interval) output_module.SetRawFields(raw_fields) output_module.SetUsername(elastic_user) output_module.SetPassword(elastic_password) output_module.SetUseSSL(use_ssl) output_module.SetCACertificatesPath(ca_certificates_path) output_module.SetURLPrefix(elastic_url_prefix)
def ParseOptions(self, options): """Parses the options and initializes the front-end. Args: options (argparse.Namespace): command line arguments. Raises: BadConfigOption: if the options are invalid. """ # The data location is required to list signatures. helpers_manager.ArgumentHelperManager.ParseOptions( options, self, names=['data_location']) self.show_troubleshooting = getattr(options, 'show_troubleshooting', False) # Check the list options first otherwise required options will raise. signature_identifiers = self.ParseStringOption( options, 'signature_identifiers') if signature_identifiers == 'list': self.list_signature_identifiers = True if self.list_signature_identifiers or self.show_troubleshooting: return self._ParseInformationalOptions(options) self._ParseLogFileOptions(options) self._ParseStorageMediaOptions(options) self._destination_path = self.ParseStringOption(options, 'path', default_value='export') if not self._data_location: logger.warning('Unable to automatically determine data location.') argument_helper_names = [ 'artifact_definitions', 'process_resources', 'vfs_backend' ] helpers_manager.ArgumentHelperManager.ParseOptions( options, self, names=argument_helper_names) if self._vfs_back_end == 'tsk': dfvfs_definitions.PREFERRED_NTFS_BACK_END = ( dfvfs_definitions.TYPE_INDICATOR_TSK) self._ParseFilterOptions(options) include_duplicates = getattr(options, 'include_duplicates', False) self._skip_duplicates = not include_duplicates self._EnforceProcessMemoryLimit(self._process_memory_limit)
def CheckOutDated(self): """Checks if the version of plaso is outdated and warns the user.""" version_date_time = datetime.datetime(int(plaso.__version__[0:4], 10), int(plaso.__version__[4:6], 10), int(plaso.__version__[6:8], 10)) date_time_delta = datetime.datetime.utcnow() - version_date_time if date_time_delta.days > 180: logger.warning('This version of plaso is more than 6 months old.') self._PrintUserWarning(( 'the version of plaso you are using is more than 6 months old. We ' 'strongly recommend to update it.'))
def _ParseInformationalOptions(self, options): """Parses the informational options. Args: options (argparse.Namespace): command line arguments. """ self._debug_mode = getattr(options, 'debug', False) self._quiet_mode = getattr(options, 'quiet', False) if self._debug_mode and self._quiet_mode: logger.warning( 'Cannot use debug and quiet mode at the same time, defaulting to ' 'debug output.')
def _SetExtractionPreferredTimeZone(self, knowledge_base): """Sets the preferred time zone before extraction. Args: knowledge_base (KnowledgeBase): contains information from the source data needed for parsing. """ # Note session.preferred_time_zone will default to UTC but # self._preferred_time_zone is None when not set. if self._preferred_time_zone: try: knowledge_base.SetTimeZone(self._preferred_time_zone) except ValueError: # pylint: disable=protected-access logger.warning( 'Unsupported time zone: {0:s}, defaulting to {1:s}'.format( self._preferred_time_zone, knowledge_base._time_zone.zone))
def ParseOptions(self, options): """Parses the options and initializes the front-end. Args: options (argparse.Namespace): command line arguments. Raises: BadConfigOption: if the options are invalid. """ # The data location is required to list signatures. helpers_manager.ArgumentHelperManager.ParseOptions( options, self, names=['data_location']) self.show_troubleshooting = getattr(options, 'show_troubleshooting', False) # Check the list options first otherwise required options will raise. signature_identifiers = self.ParseStringOption( options, 'signature_identifiers') if signature_identifiers == 'list': self.list_signature_identifiers = True if self.list_signature_identifiers or self.show_troubleshooting: return self._ParseInformationalOptions(options) self._ParseLogFileOptions(options) self._ParseStorageMediaOptions(options) self._destination_path = self.ParseStringOption( options, 'path', default_value='export') if not self._data_location: logger.warning('Unable to automatically determine data location.') argument_helper_names = ['artifact_definitions', 'process_resources'] helpers_manager.ArgumentHelperManager.ParseOptions( options, self, names=argument_helper_names) self._ParseFilterOptions(options) if (getattr(options, 'no_vss', False) or getattr(options, 'include_duplicates', False)): self._skip_duplicates = False self._EnforceProcessMemoryLimit(self._process_memory_limit)
def _ExtractFileEntry(self, path_spec, destination_path, output_writer, skip_duplicates=True): """Extracts a file entry. Args: path_spec (dfvfs.PathSpec): path specification of the source file. destination_path (str): path where the extracted files should be stored. output_writer (CLIOutputWriter): output writer. skip_duplicates (Optional[bool]): True if files with duplicate content should be skipped. """ file_entry = path_spec_resolver.Resolver.OpenFileEntry(path_spec) if not file_entry: logger.warning( 'Unable to open file entry for path spec: {0:s}'.format( path_spec.comparable)) return if not self._filter_collection.Matches(file_entry): return file_entry_processed = False for data_stream in file_entry.data_streams: if self._abort: break self._ExtractDataStream(file_entry, data_stream.name, destination_path, output_writer, skip_duplicates=skip_duplicates) file_entry_processed = True if not file_entry_processed: self._ExtractDataStream(file_entry, '', destination_path, output_writer, skip_duplicates=skip_duplicates)
def _ExtractFileEntry( self, path_spec, destination_path, output_writer, skip_duplicates=True): """Extracts a file entry. Args: path_spec (dfvfs.PathSpec): path specification of the source file. destination_path (str): path where the extracted files should be stored. output_writer (CLIOutputWriter): output writer. skip_duplicates (Optional[bool]): True if files with duplicate content should be skipped. """ file_entry = path_spec_resolver.Resolver.OpenFileEntry(path_spec) if not file_entry: logger.warning('Unable to open file entry for path spec: {0:s}'.format( path_spec.comparable)) return if not self._filter_collection.Matches(file_entry): return file_entry_processed = False for data_stream in file_entry.data_streams: if self._abort: break self._ExtractDataStream( file_entry, data_stream.name, destination_path, output_writer, skip_duplicates=skip_duplicates) file_entry_processed = True if not file_entry_processed: self._ExtractDataStream( file_entry, '', destination_path, output_writer, skip_duplicates=skip_duplicates)
def ParseOptions(cls, options, output_module): # pylint: disable=arguments-renamed """Parses and validates options. Args: options (argparse.Namespace): parser options. output_module (OutputModule): output module to configure. Raises: BadConfigObject: when the output module object is of the wrong type. BadConfigOption: when a configuration parameter fails validation. """ if not isinstance(output_module, shared_elastic.SharedElasticsearchOutputModule): raise errors.BadConfigObject( 'Output module is not an instance of ElasticsearchOutputModule' ) index_name = cls._ParseStringOption( options, 'index_name', default_value=cls._DEFAULT_INDEX_NAME) flush_interval = cls._ParseNumericOption( options, 'flush_interval', default_value=cls._DEFAULT_FLUSH_INTERVAL) fields = ','.join(cls._DEFAULT_FIELDS) additional_fields = cls._ParseStringOption(options, 'additional_fields') if additional_fields: fields = ','.join([fields, additional_fields]) mappings_file_path = cls._ParseStringOption(options, 'elastic_mappings') elastic_user = cls._ParseStringOption(options, 'elastic_user') elastic_password = cls._ParseStringOption(options, 'elastic_password') use_ssl = getattr(options, 'use_ssl', False) ca_certificates_path = cls._ParseStringOption( options, 'ca_certificates_file_path') elastic_url_prefix = cls._ParseStringOption(options, 'elastic_url_prefix') if elastic_password is None: elastic_password = os.getenv('PLASO_ELASTIC_PASSWORD', None) if elastic_password is not None: logger.warning( 'Note that specifying your Elasticsearch password via ' '--elastic_password or the environment PLASO_ELASTIC_PASSWORD can ' 'expose the password to other users on the system.') if elastic_user is not None and elastic_password is None: elastic_password = getpass.getpass( 'Enter your Elasticsearch password: '******'server', default_value=cls._DEFAULT_SERVER) port = cls._ParseNumericOption(options, 'port', default_value=cls._DEFAULT_PORT) output_module.SetServerInformation(server, port) output_module.SetIndexName(index_name) output_module.SetFlushInterval(flush_interval) output_module.SetFields( [field_name.strip() for field_name in fields.split(',')]) output_module.SetUsername(elastic_user) output_module.SetPassword(elastic_password) output_module.SetUseSSL(use_ssl) output_module.SetCACertificatesPath(ca_certificates_path) output_module.SetURLPrefix(elastic_url_prefix) if not mappings_file_path or not os.path.isfile(mappings_file_path): mappings_filename = output_module.MAPPINGS_FILENAME mappings_path = getattr(output_module, 'MAPPINGS_PATH', None) if mappings_path: mappings_file_path = os.path.join(mappings_path, mappings_filename) else: data_location = getattr(options, '_data_location', None) or 'data' mappings_file_path = os.path.join(data_location, mappings_filename) if not mappings_file_path or not os.path.isfile(mappings_file_path): raise errors.BadConfigOption( 'No such Elasticsearch mappings file: {0!s}.'.format( mappings_file_path)) with open(mappings_file_path, 'r') as file_object: mappings_json = json.load(file_object) output_module.SetMappings(mappings_json)
def ParseOptions(self, options): """Parses the options. Args: options (argparse.Namespace): command line arguments. Raises: BadConfigOption: if the options are invalid. """ # The output modules options are dependent on the preferred language # and preferred time zone options. self._ParseTimezoneOption(options) names = ['analysis_plugins', 'language', 'profiling'] helpers_manager.ArgumentHelperManager.ParseOptions( options, self, names=names) self.list_analysis_plugins = self._analysis_plugins == 'list' self.list_language_identifiers = self._preferred_language == 'list' self.list_profilers = self._profilers == 'list' if (self.list_analysis_plugins or self.list_language_identifiers or self.list_profilers or self.list_timezones): return # Check output modules after the other listable options, otherwise # it could raise with "requires an output file". helpers_manager.ArgumentHelperManager.ParseOptions( options, self, names=['output_modules']) self.list_output_modules = self._output_format == 'list' if self.list_output_modules: return self._ParseInformationalOptions(options) helpers_manager.ArgumentHelperManager.ParseOptions( options, self, names=['data_location']) self._ParseLogFileOptions(options) self._ParseProcessingOptions(options) helpers_manager.ArgumentHelperManager.ParseOptions( options, self, names=['event_filters']) self._deduplicate_events = getattr(options, 'dedup', True) if self._data_location: # Update the data location with the calculated value. options.data_location = self._data_location else: logger.warning('Unable to automatically determine data location.') self._command_line_arguments = self.GetCommandLineArguments() helpers_manager.ArgumentHelperManager.ParseOptions( options, self, names=['storage_file']) # TODO: move check into _CheckStorageFile. if not self._storage_file_path: raise errors.BadConfigOption('Missing storage file option.') if not os.path.isfile(self._storage_file_path): raise errors.BadConfigOption( 'No such storage file: {0:s}.'.format(self._storage_file_path)) self._EnforceProcessMemoryLimit(self._process_memory_limit) self._analysis_plugins = self._CreateAnalysisPlugins(options) self._output_module = self._CreateOutputModule(options)
def ParseArguments(self): """Parses the command line arguments. Returns: bool: True if the arguments were successfully parsed. """ loggers.ConfigureLogging() argument_parser = argparse.ArgumentParser( description=self.DESCRIPTION, add_help=False, conflict_handler='resolve', formatter_class=argparse.RawDescriptionHelpFormatter) self.AddBasicOptions(argument_parser) helpers_manager.ArgumentHelperManager.AddCommandLineArguments( argument_parser, names=['storage_file']) analysis_group = argument_parser.add_argument_group('Analysis Arguments') helpers_manager.ArgumentHelperManager.AddCommandLineArguments( analysis_group, names=['analysis_plugins']) processing_group = argument_parser.add_argument_group('Processing') self.AddProcessingOptions(processing_group) info_group = argument_parser.add_argument_group('Informational Arguments') self.AddLogFileOptions(info_group) self.AddInformationalOptions(info_group) helpers_manager.ArgumentHelperManager.AddCommandLineArguments( info_group, names=['status_view']) filter_group = argument_parser.add_argument_group('Filter Arguments') helpers_manager.ArgumentHelperManager.AddCommandLineArguments( filter_group, names=['event_filters']) input_group = argument_parser.add_argument_group('Input Arguments') helpers_manager.ArgumentHelperManager.AddCommandLineArguments( input_group, names=['data_location']) output_group = argument_parser.add_argument_group('Output Arguments') output_group.add_argument( '-a', '--include_all', '--include-all', action='store_false', dest='dedup', default=True, help=( 'By default the psort removes duplicate entries from the ' 'output. This parameter changes that behavior so all events ' 'are included.')) helpers_manager.ArgumentHelperManager.AddCommandLineArguments( output_group, names=['language']) self.AddTimeZoneOption(output_group) output_format_group = argument_parser.add_argument_group( 'Output Format Arguments') helpers_manager.ArgumentHelperManager.AddCommandLineArguments( output_format_group, names=['output_modules']) profiling_group = argument_parser.add_argument_group('profiling arguments') helpers_manager.ArgumentHelperManager.AddCommandLineArguments( profiling_group, names=['profiling']) try: # TODO: refactor how arguments is used in a more argparse way. options = argument_parser.parse_args() except UnicodeEncodeError: # If we get here we are attempting to print help in a non-Unicode # terminal. self._output_writer.Write('\n') self._output_writer.Write(argument_parser.format_help()) return False # Properly prepare the attributes according to local encoding. if self.preferred_encoding == 'ascii': logger.warning( 'The preferred encoding of your system is ASCII, which is not ' 'optimal for the typically non-ASCII characters that need to be ' 'parsed and processed. The tool will most likely crash and die, ' 'perhaps in a way that may not be recoverable. A five second delay ' 'is introduced to give you time to cancel the runtime and ' 'reconfigure your preferred encoding, otherwise continue at own ' 'risk.') time.sleep(5) try: self.ParseOptions(options) except errors.BadConfigOption as exception: self._output_writer.Write('ERROR: {0!s}\n'.format(exception)) self._output_writer.Write('\n') self._output_writer.Write(argument_parser.format_usage()) return False loggers.ConfigureLogging( debug_output=self._debug_mode, filename=self._log_file, quiet_mode=self._quiet_mode) return True
def _ProcessSources(self, session, storage_writer): """Processes the sources and extract events. Args: session (Session): session in which the sources are processed. storage_writer (StorageWriter): storage writer for a session storage. Returns: ProcessingStatus: processing status. Raises: BadConfigOption: if an invalid collection filter was specified. """ is_archive = False if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE: is_archive = self._IsArchiveFile(self._source_path_specs[0]) if is_archive: self._source_type = definitions.SOURCE_TYPE_ARCHIVE single_process_mode = self._single_process_mode if self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE: if not self._process_archives or not is_archive: single_process_mode = True if single_process_mode: extraction_engine = single_extraction_engine.SingleProcessEngine() else: extraction_engine = multi_extraction_engine.ExtractionMultiProcessEngine( number_of_worker_processes=self._number_of_extraction_workers, worker_memory_limit=self._worker_memory_limit, worker_timeout=self._worker_timeout) # If the source is a directory or a storage media image # run pre-processing. if self._source_type in self._SOURCE_TYPES_TO_PREPROCESS: self._PreprocessSources(extraction_engine, session, storage_writer) self._expanded_parser_filter_expression = ( self._GetExpandedParserFilterExpression( extraction_engine.knowledge_base)) enabled_parser_names = self._expanded_parser_filter_expression.split( ',') number_of_enabled_parsers = len(enabled_parser_names) force_parser = False if (self._source_type == dfvfs_definitions.SOURCE_TYPE_FILE and not is_archive and number_of_enabled_parsers == 1): force_parser = True self._extract_winevt_resources = False elif ('winevt' not in enabled_parser_names and 'winevtx' not in enabled_parser_names): self._extract_winevt_resources = False elif (self._extract_winevt_resources and 'pe' not in enabled_parser_names): logger.warning( 'A Windows EventLog parser is enabled in combination with ' 'extraction of Windows EventLog resources, but the Portable ' 'Executable (PE) parser is disabled. Therefore Windows EventLog ' 'resources cannot be extracted.') self._extract_winevt_resources = False configuration = self._CreateExtractionProcessingConfiguration() try: extraction_engine.BuildCollectionFilters( self._artifact_definitions_path, self._custom_artifacts_path, extraction_engine.knowledge_base, self._artifact_filters, self._filter_file) except errors.InvalidFilter as exception: raise errors.BadConfigOption( 'Unable to build collection filters with error: {0!s}'.format( exception)) session_configuration = self._CreateExtractionSessionConfiguration( session, enabled_parser_names) storage_writer.AddAttributeContainer(session_configuration) source_configurations = [] for path_spec in self._source_path_specs: source_configuration = artifacts.SourceConfigurationArtifact( path_spec=path_spec) source_configurations.append(source_configuration) # TODO: improve to detect more than 1 system configurations. # TODO: improve to add volumes to system configuration. system_configuration = ( extraction_engine.knowledge_base.GetSystemConfigurationArtifact()) storage_writer.AddAttributeContainer(system_configuration) status_update_callback = ( self._status_view.GetExtractionStatusUpdateCallback()) if single_process_mode: logger.debug('Starting extraction in single process mode.') processing_status = extraction_engine.ProcessSources( source_configurations, storage_writer, self._resolver_context, configuration, force_parser=force_parser, status_update_callback=status_update_callback) else: logger.debug('Starting extraction in multi process mode.') # The following overrides are needed because pylint 2.6.0 gets confused # about which ProcessSources to check against. # pylint: disable=no-value-for-parameter,unexpected-keyword-arg processing_status = extraction_engine.ProcessSources( source_configurations, storage_writer, session.identifier, configuration, enable_sigsegv_handler=self._enable_sigsegv_handler, status_update_callback=status_update_callback, storage_file_path=self._storage_file_path) return processing_status
def ParseArguments(self): """Parses the command line arguments. Returns: bool: True if the arguments were successfully parsed. """ loggers.ConfigureLogging() argument_parser = argparse.ArgumentParser( description=self.DESCRIPTION, epilog=self.EPILOG, add_help=False, formatter_class=argparse.RawDescriptionHelpFormatter) self.AddBasicOptions(argument_parser) helpers_manager.ArgumentHelperManager.AddCommandLineArguments( argument_parser, names=['storage_file']) data_location_group = argument_parser.add_argument_group( 'data location arguments') argument_helper_names = ['artifact_definitions', 'data_location'] helpers_manager.ArgumentHelperManager.AddCommandLineArguments( data_location_group, names=argument_helper_names) extraction_group = argument_parser.add_argument_group( 'extraction arguments') argument_helper_names = [ 'artifact_filters', 'extraction', 'filter_file', 'hashers', 'parsers', 'yara_rules'] helpers_manager.ArgumentHelperManager.AddCommandLineArguments( extraction_group, names=argument_helper_names) self.AddStorageMediaImageOptions(extraction_group) self.AddTimeZoneOption(extraction_group) self.AddVSSProcessingOptions(extraction_group) self.AddCredentialOptions(extraction_group) info_group = argument_parser.add_argument_group('informational arguments') self.AddInformationalOptions(info_group) info_group.add_argument( '--info', dest='show_info', action='store_true', default=False, help='Print out information about supported plugins and parsers.') info_group.add_argument( '--use_markdown', '--use-markdown', dest='use_markdown', action='store_true', default=False, help=( 'Output lists in Markdown format use in combination with ' '"--hashers list", "--parsers list" or "--timezone list"')) info_group.add_argument( '--no_dependencies_check', '--no-dependencies-check', dest='dependencies_check', action='store_false', default=True, help='Disable the dependencies check.') self.AddLogFileOptions(info_group) helpers_manager.ArgumentHelperManager.AddCommandLineArguments( info_group, names=['status_view']) output_group = argument_parser.add_argument_group('output arguments') helpers_manager.ArgumentHelperManager.AddCommandLineArguments( output_group, names=['text_prepend']) processing_group = argument_parser.add_argument_group( 'processing arguments') self.AddPerformanceOptions(processing_group) self.AddProcessingOptions(processing_group) processing_group.add_argument( '--sigsegv_handler', '--sigsegv-handler', dest='sigsegv_handler', action='store_true', default=False, help=( 'Enables the SIGSEGV handler. WARNING this functionality is ' 'experimental and will a deadlock worker process if a real ' 'segfault is caught, but not signal SIGSEGV. This functionality ' 'is therefore primarily intended for debugging purposes')) profiling_group = argument_parser.add_argument_group('profiling arguments') helpers_manager.ArgumentHelperManager.AddCommandLineArguments( profiling_group, names=['profiling']) storage_group = argument_parser.add_argument_group('storage arguments') helpers_manager.ArgumentHelperManager.AddCommandLineArguments( storage_group, names=['storage_format']) argument_parser.add_argument( self._SOURCE_OPTION, action='store', metavar='SOURCE', nargs='?', default=None, type=str, help=( 'Path to a source device, file or directory. If the source is ' 'a supported storage media device or image file, archive file ' 'or a directory, the files within are processed recursively.')) try: options = argument_parser.parse_args() except UnicodeEncodeError: # If we get here we are attempting to print help in a non-Unicode # terminal. self._output_writer.Write('\n') self._output_writer.Write(argument_parser.format_help()) return False # Properly prepare the attributes according to local encoding. if self.preferred_encoding == 'ascii': logger.warning( 'The preferred encoding of your system is ASCII, which is not ' 'optimal for the typically non-ASCII characters that need to be ' 'parsed and processed. The tool will most likely crash and die, ' 'perhaps in a way that may not be recoverable. A five second delay ' 'is introduced to give you time to cancel the runtime and ' 'reconfigure your preferred encoding, otherwise continue at own ' 'risk.') time.sleep(5) if self._process_archives: logger.warning( 'Scanning archive files currently can cause deadlock. Continue at ' 'your own risk.') time.sleep(5) try: self.ParseOptions(options) except errors.BadConfigOption as exception: self._output_writer.Write('ERROR: {0!s}\n'.format(exception)) self._output_writer.Write('\n') self._output_writer.Write(argument_parser.format_usage()) return False self._command_line_arguments = self.GetCommandLineArguments() loggers.ConfigureLogging( debug_output=self._debug_mode, filename=self._log_file, quiet_mode=self._quiet_mode) return True
def ParseOptions(cls, options, output_module): """Parses and validates options. Args: options (argparse.Namespace): parser options. output_module (OutputModule): output module to configure. Raises: BadConfigObject: when the output module object is of the wrong type. BadConfigOption: when a configuration parameter fails validation. """ if not isinstance(output_module, shared_elastic.SharedElasticsearchOutputModule): raise errors.BadConfigObject( 'Output module is not an instance of ElasticsearchOutputModule' ) index_name = cls._ParseStringOption( options, 'index_name', default_value=cls._DEFAULT_INDEX_NAME) flush_interval = cls._ParseNumericOption( options, 'flush_interval', default_value=cls._DEFAULT_FLUSH_INTERVAL) mappings_file_path = cls._ParseStringOption(options, 'elastic_mappings') elastic_user = cls._ParseStringOption(options, 'elastic_user') elastic_password = cls._ParseStringOption(options, 'elastic_password') use_ssl = getattr(options, 'use_ssl', False) ca_certificates_path = cls._ParseStringOption( options, 'ca_certificates_file_path') elastic_url_prefix = cls._ParseStringOption(options, 'elastic_url_prefix') if elastic_password is None: elastic_password = os.getenv('PLASO_ELASTIC_PASSWORD', None) if elastic_password is not None: logger.warning( 'Note that specifying your Elasticsearch password via ' '--elastic_password or the environment PLASO_ELASTIC_PASSWORD can ' 'expose the password to other users on the system.') if elastic_user is not None and elastic_password is None: elastic_password = getpass.getpass( 'Enter your Elasticsearch password: '******'raw_fields', cls._DEFAULT_RAW_FIELDS) if raw_fields: logger.warning('--raw_fields option is deprecated instead use: ' '--elastic_mappings=raw_fields.mappings') if not mappings_file_path or not os.path.isfile(mappings_file_path): mappings_filename = output_module.MAPPINGS_FILENAME if raw_fields and isinstance(output_module, elastic.ElasticsearchOutputModule): mappings_filename = 'raw_fields.mappings' data_location = getattr(options, '_data_location', None) or 'data' mappings_file_path = os.path.join(data_location, mappings_filename) if not mappings_file_path or not os.path.isfile(mappings_file_path): raise errors.BadConfigOption( 'No such Elasticsearch mappings file: {0!s}.'.format( mappings_file_path)) with open(mappings_file_path, 'r') as file_object: mappings_json = json.load(file_object) output_module.SetMappings(mappings_json)
def _Extract(self, source_path_specs, destination_path, output_writer, artifact_filters, filter_file, artifact_definitions_path, custom_artifacts_path, skip_duplicates=True): """Extracts files. This method runs the file extraction process on the image and potentially on every VSS if that is wanted. Args: source_path_specs (list[dfvfs.PathSpec]): path specifications to extract. destination_path (str): path where the extracted files should be stored. output_writer (CLIOutputWriter): output writer. artifact_definitions_path (str): path to artifact definitions file. custom_artifacts_path (str): path to custom artifact definitions file. artifact_filters (list[str]): names of artifact definitions that are used for filtering file system and Windows Registry key paths. filter_file (str): path of the file that contains the filter file path filters. skip_duplicates (Optional[bool]): True if files with duplicate content should be skipped. Raises: BadConfigOption: if an invalid collection filter was specified. """ extraction_engine = engine.BaseEngine() # If the source is a directory or a storage media image # run pre-processing. if self._source_type in self._SOURCE_TYPES_TO_PREPROCESS: self._PreprocessSources(extraction_engine) try: extraction_engine.BuildCollectionFilters( artifact_definitions_path, custom_artifacts_path, extraction_engine.knowledge_base, artifact_filters, filter_file) except errors.InvalidFilter as exception: raise errors.BadConfigOption( 'Unable to build collection filters with error: {0!s}'.format( exception)) filters_helper = extraction_engine.collection_filters_helper excluded_find_specs = None included_find_specs = None if filters_helper: excluded_find_specs = filters_helper.excluded_file_system_find_specs included_find_specs = filters_helper.included_file_system_find_specs output_writer.Write('Extracting file entries.\n') path_spec_generator = self._path_spec_extractor.ExtractPathSpecs( source_path_specs, find_specs=included_find_specs, resolver_context=self._resolver_context) for path_spec in path_spec_generator: file_entry = path_spec_resolver.Resolver.OpenFileEntry( path_spec, resolver_context=self._resolver_context) if not file_entry: logger.warning( 'Unable to open file entry for path spec: {0:s}'.format( path_spec.comparable)) continue skip_file_entry = False for find_spec in excluded_find_specs or []: skip_file_entry = find_spec.CompareLocation(file_entry) if skip_file_entry: break if skip_file_entry: logger.info( 'Skipped: {0:s} because of exclusion filter.'.format( file_entry.path_spec.location)) continue self._ExtractFileEntry(file_entry, destination_path, skip_duplicates=skip_duplicates)
def _ExtractDataStream(self, file_entry, data_stream_name, destination_path, skip_duplicates=True): """Extracts a data stream. Args: file_entry (dfvfs.FileEntry): file entry containing the data stream. data_stream_name (str): name of the data stream. destination_path (str): path where the extracted files should be stored. skip_duplicates (Optional[bool]): True if files with duplicate content should be skipped. """ if not data_stream_name and not file_entry.IsFile(): return display_name = path_helper.PathHelper.GetDisplayNameForPathSpec( file_entry.path_spec) try: digest = self._CalculateDigestHash(file_entry, data_stream_name) except (IOError, dfvfs_errors.BackEndError) as exception: logger.error( ('[skipping] unable to read content of file entry: {0:s} ' 'with error: {1!s}').format(display_name, exception)) return if not digest: logger.error( '[skipping] unable to read content of file entry: {0:s}'. format(display_name)) return target_directory, target_filename = self._CreateSanitizedDestination( file_entry, file_entry.path_spec, data_stream_name, destination_path) # If does not exist, append path separator to have consistant behaviour. if not destination_path.endswith(os.path.sep): destination_path = destination_path + os.path.sep target_path = os.path.join(target_directory, target_filename) if target_path.startswith(destination_path): path = target_path[len(destination_path):] self._paths_by_hash[digest].append(path) if skip_duplicates: duplicate_display_name = self._digests.get(digest, None) if duplicate_display_name: logger.warning(( '[skipping] file entry: {0:s} is a duplicate of: {1:s} with ' 'digest: {2:s}').format(display_name, duplicate_display_name, digest)) return self._digests[digest] = display_name if not os.path.isdir(target_directory): os.makedirs(target_directory) if os.path.exists(target_path): logger.warning( ('[skipping] unable to export contents of file entry: {0:s} ' 'because exported file: {1:s} already exists.').format( display_name, target_path)) return try: self._WriteFileEntry(file_entry, data_stream_name, target_path) except (IOError, dfvfs_errors.BackEndError) as exception: logger.error( ('[skipping] unable to export contents of file entry: {0:s} ' 'with error: {1!s}').format(display_name, exception)) try: os.remove(target_path) except (IOError, OSError): pass
def ProcessStorage(self): """Processes a Plaso storage file. Raises: BadConfigOption: when a configuration parameter fails validation or the storage file cannot be opened with read access. RuntimeError: if a non-recoverable situation is encountered. """ self._status_view.SetMode(self._status_view_mode) self._status_view.SetStorageFileInformation(self._storage_file_path) status_update_callback = ( self._status_view.GetAnalysisStatusUpdateCallback()) storage_reader = storage_factory.StorageFactory.CreateStorageReaderForFile( self._storage_file_path) if not storage_reader: raise RuntimeError('Unable to create storage reader.') text_prepend = None try: for session_index, session in enumerate( storage_reader.GetSessions()): self._knowledge_base.SetActiveSession(session.identifier) if session.source_configurations: # TODO: kept for backwards compatibility. for source_configuration in session.source_configurations: self._knowledge_base.ReadSystemConfigurationArtifact( source_configuration.system_configuration) else: system_configuration = storage_reader.GetAttributeContainerByIndex( 'system_configuration', session_index) self._knowledge_base.ReadSystemConfigurationArtifact( system_configuration) text_prepend = session.text_prepend self._number_of_stored_analysis_reports = ( storage_reader.GetNumberOfAttributeContainers( self._CONTAINER_TYPE_ANALYSIS_REPORT)) finally: storage_reader.Close() session = engine.BaseEngine.CreateSession() configuration = configurations.ProcessingConfiguration() configuration.data_location = self._data_location configuration.debug_output = self._debug_mode configuration.log_filename = self._log_file configuration.text_prepend = text_prepend configuration.profiling.directory = self._profiling_directory configuration.profiling.sample_rate = self._profiling_sample_rate configuration.profiling.profilers = self._profilers if self._analysis_plugins: self._AnalyzeEvents(session, configuration, status_update_callback=status_update_callback) # TODO: abort if session.aborted is True if self._output_format != 'null': storage_reader = ( storage_factory.StorageFactory.CreateStorageReaderForFile( self._storage_file_path)) preferred_language = self._knowledge_base.language if self._preferred_language: preferred_language = self._preferred_language try: self._output_mediator.SetPreferredLanguageIdentifier( preferred_language) except (KeyError, TypeError): logger.warning( 'Unable to to set preferred language: {0!s}.'.format( preferred_language)) self._output_mediator.SetTextPrepend(text_prepend) self._output_module.SetStorageReader(storage_reader) # TODO: add single process output and formatting engine support. output_engine = ( multi_output_engine.OutputAndFormattingMultiProcessEngine()) output_engine.ExportEvents( self._knowledge_base, storage_reader, self._output_module, configuration, deduplicate_events=self._deduplicate_events, event_filter=self._event_filter, status_update_callback=status_update_callback, time_slice=self._time_slice, use_time_slicer=self._use_time_slicer) self._output_module.Close() self._output_module = None if self._quiet_mode: return self._output_writer.Write('Processing completed.\n') storage_reader = storage_factory.StorageFactory.CreateStorageReaderForFile( self._storage_file_path) self._PrintAnalysisReportsDetails(storage_reader)
def _CreateOutputModule(self, options): """Creates the output module. Args: options (argparse.Namespace): command line arguments. Returns: OutputModule: output module. Raises: RuntimeError: if the output module cannot be created. """ formatter_mediator = formatters_mediator.FormatterMediator( data_location=self._data_location) try: formatter_mediator.SetPreferredLanguageIdentifier( self._preferred_language) except (KeyError, TypeError) as exception: raise RuntimeError(exception) mediator = output_mediator.OutputMediator( self._knowledge_base, formatter_mediator, preferred_encoding=self.preferred_encoding) mediator.SetTimezone(self._preferred_time_zone) try: output_module = output_manager.OutputManager.NewOutputModule( self._output_format, mediator) except (KeyError, ValueError) as exception: raise RuntimeError( 'Unable to create output module with error: {0!s}'.format( exception)) if output_manager.OutputManager.IsLinearOutputModule(self._output_format): output_file_object = open(self._output_filename, 'wb') output_writer = tools.FileObjectOutputWriter(output_file_object) output_module.SetOutputWriter(output_writer) helpers_manager.ArgumentHelperManager.ParseOptions(options, output_module) # Check if there are parameters that have not been defined and need to # in order for the output module to continue. Prompt user to supply # those that may be missing. missing_parameters = output_module.GetMissingArguments() while missing_parameters: for parameter in missing_parameters: value = self._PromptUserForInput( 'Missing parameter {0:s} for output module'.format(parameter)) if value is None: logger.warning( 'Unable to set the missing parameter for: {0:s}'.format( parameter)) continue setattr(options, parameter, value) helpers_manager.ArgumentHelperManager.ParseOptions( options, output_module) missing_parameters = output_module.GetMissingArguments() return output_module
def ParseArguments(self): """Parses the command line arguments. Returns: bool: True if the arguments were successfully parsed. """ loggers.ConfigureLogging() argument_parser = argparse.ArgumentParser( description=self.DESCRIPTION, epilog=self.EPILOG, add_help=False, formatter_class=argparse.RawDescriptionHelpFormatter) self.AddBasicOptions(argument_parser) helpers_manager.ArgumentHelperManager.AddCommandLineArguments( argument_parser, names=['storage_file']) data_location_group = argument_parser.add_argument_group( 'data location arguments') argument_helper_names = ['artifact_definitions', 'data_location'] helpers_manager.ArgumentHelperManager.AddCommandLineArguments( data_location_group, names=argument_helper_names) extraction_group = argument_parser.add_argument_group( 'extraction arguments') argument_helper_names = [ 'extraction', 'filter_file', 'hashers', 'parsers', 'yara_rules'] helpers_manager.ArgumentHelperManager.AddCommandLineArguments( extraction_group, names=argument_helper_names) self.AddStorageMediaImageOptions(extraction_group) self.AddTimeZoneOption(extraction_group) self.AddVSSProcessingOptions(extraction_group) self.AddCredentialOptions(extraction_group) info_group = argument_parser.add_argument_group('informational arguments') self.AddInformationalOptions(info_group) info_group.add_argument( '--info', dest='show_info', action='store_true', default=False, help='Print out information about supported plugins and parsers.') info_group.add_argument( '--use_markdown', '--use-markdown', dest='use_markdown', action='store_true', default=False, help=( 'Output lists in Markdown format use in combination with ' '"--hashers list", "--parsers list" or "--timezone list"')) info_group.add_argument( '--no_dependencies_check', '--no-dependencies-check', dest='dependencies_check', action='store_false', default=True, help='Disable the dependencies check.') self.AddLogFileOptions(info_group) helpers_manager.ArgumentHelperManager.AddCommandLineArguments( info_group, names=['status_view']) output_group = argument_parser.add_argument_group('output arguments') helpers_manager.ArgumentHelperManager.AddCommandLineArguments( output_group, names=['text_prepend']) processing_group = argument_parser.add_argument_group( 'processing arguments') self.AddPerformanceOptions(processing_group) self.AddProcessingOptions(processing_group) processing_group.add_argument( '--sigsegv_handler', '--sigsegv-handler', dest='sigsegv_handler', action='store_true', default=False, help=( 'Enables the SIGSEGV handler. WARNING this functionality is ' 'experimental and will a deadlock worker process if a real ' 'segfault is caught, but not signal SIGSEGV. This functionality ' 'is therefore primarily intended for debugging purposes')) profiling_group = argument_parser.add_argument_group('profiling arguments') helpers_manager.ArgumentHelperManager.AddCommandLineArguments( profiling_group, names=['profiling']) storage_group = argument_parser.add_argument_group('storage arguments') helpers_manager.ArgumentHelperManager.AddCommandLineArguments( storage_group, names=['storage_format']) argument_parser.add_argument( self._SOURCE_OPTION, action='store', metavar='SOURCE', nargs='?', default=None, type=str, help=( 'Path to a source device, file or directory. If the source is ' 'a supported storage media device or image file, archive file ' 'or a directory, the files within are processed recursively.')) try: options = argument_parser.parse_args() except UnicodeEncodeError: # If we get here we are attempting to print help in a non-Unicode # terminal. self._output_writer.Write('\n') self._output_writer.Write(argument_parser.format_help()) return False # Properly prepare the attributes according to local encoding. if self.preferred_encoding == 'ascii': logger.warning( 'The preferred encoding of your system is ASCII, which is not ' 'optimal for the typically non-ASCII characters that need to be ' 'parsed and processed. The tool will most likely crash and die, ' 'perhaps in a way that may not be recoverable. A five second delay ' 'is introduced to give you time to cancel the runtime and ' 'reconfigure your preferred encoding, otherwise continue at own ' 'risk.') time.sleep(5) if self._process_archives: logger.warning( 'Scanning archive files currently can cause deadlock. Continue at ' 'your own risk.') time.sleep(5) try: self.ParseOptions(options) except errors.BadConfigOption as exception: self._output_writer.Write('ERROR: {0!s}\n'.format(exception)) self._output_writer.Write('\n') self._output_writer.Write(argument_parser.format_usage()) return False self._command_line_arguments = self.GetCommandLineArguments() loggers.ConfigureLogging( debug_output=self._debug_mode, filename=self._log_file, quiet_mode=self._quiet_mode) return True
def ParseOptions(self, options): """Parses the options. Args: options (argparse.Namespace): command line arguments. Raises: BadConfigOption: if the options are invalid. """ # The output modules options are dependent on the preferred_language # and output_time_zone options. self._ParseOutputOptions(options) names = ['analysis_plugins', 'language', 'profiling'] helpers_manager.ArgumentHelperManager.ParseOptions(options, self, names=names) self.list_analysis_plugins = self._analysis_plugins == 'list' self.list_language_tags = self._preferred_language == 'list' self.list_profilers = self._profilers == 'list' self.show_troubleshooting = getattr(options, 'show_troubleshooting', False) if (self.list_analysis_plugins or self.list_language_tags or self.list_profilers or self.list_time_zones or self.show_troubleshooting): return # Check output modules after the other listable options, otherwise # it could raise with "requires an output file". helpers_manager.ArgumentHelperManager.ParseOptions( options, self, names=['output_modules']) self.list_output_modules = self._output_format == 'list' if self.list_output_modules: return self._ParseInformationalOptions(options) helpers_manager.ArgumentHelperManager.ParseOptions( options, self, names=['data_location']) self._ParseLogFileOptions(options) self._ParseProcessingOptions(options) helpers_manager.ArgumentHelperManager.ParseOptions( options, self, names=['event_filters']) self._deduplicate_events = getattr(options, 'dedup', True) if self._data_location: # Update the data location with the calculated value. options.data_location = self._data_location else: logger.warning('Unable to automatically determine data location.') self._command_line_arguments = self.GetCommandLineArguments() self._storage_file_path = self.ParseStringOption( options, 'storage_file') self._EnforceProcessMemoryLimit(self._process_memory_limit) self._analysis_plugins = self._CreateAnalysisPlugins(options) self._output_module = self._CreateOutputModule(options) check_readable_only = not self._analysis_plugins self._CheckStorageFile(self._storage_file_path, check_readable_only=check_readable_only)