def Process(self): """Checks whether the paths exists and updates the state accordingly.""" for file_container in self.state.GetContainers(containers.File, pop=True): self.logger.info('{0:s} -> {1:s}'.format(file_container.path, self._target_directory)) if not self._compress: try: full_paths = self._CopyFileOrDirectory( file_container.path, self._target_directory) except OSError as exception: self.ModuleError( 'Could not copy files to {0:s}: {1!s}'.format( self._target_directory, exception), critical=True) for path_ in full_paths: file_name = os.path.basename(path_) self.state.StoreContainer( containers.File(name=file_name, path=path_)) else: try: tar_file = utils.Compress(file_container.path, self._target_directory) self.state.StoreContainer( containers.File(name=os.path.basename(tar_file), path=tar_file)) self.logger.info('{0:s} was compressed into {1:s}'.format( file_container.path, tar_file)) except RuntimeError as exception: self.ModuleError(exception, critical=True) return
def _ProcessThread(self, client): """Processes a single client. This function is used as a callback for the processing thread. Args: client (object): GRR client object to act on. """ file_list = self.files if not file_list: return self.logger.info('Filefinder to collect {0:d} items'.format( len(file_list))) flow_action = flows_pb2.FileFinderAction(action_type=self.action) flow_args = flows_pb2.FileFinderArgs( paths=file_list, action=flow_action, ) flow_id = self._LaunchFlow(client, 'FileFinder', flow_args) self._AwaitFlow(client, flow_id) collected_flow_data = self._DownloadFiles(client, flow_id) if collected_flow_data: self.logger.info('{0!s}: Downloaded: {1:s}'.format( flow_id, collected_flow_data)) container = containers.File(name=client.data.os_info.fqdn.lower(), path=collected_flow_data) self.state.StoreContainer(container)
def _ProcessLogContainer(self, logs_container): """Processes a GCP logs container. Args: logs_container (GCPLogs): logs container. """ if not logs_container.path: return output_file = tempfile.NamedTemporaryFile(mode='w', encoding='utf-8', delete=False, suffix='.jsonl') output_path = output_file.name with open(logs_container.path, 'r') as input_file: for line in input_file: transformed_line = self._ProcessLogLine( line, logs_container.filter_expression, logs_container.project_name) if transformed_line: output_file.write(transformed_line) output_file.write('\n') output_file.close() timeline_name = 'GCP logs {0:s} "{1:s}"'.format( logs_container.project_name, logs_container.filter_expression) container = containers.File(name=timeline_name, path=output_path) self.state.StoreContainer(container)
def Process(self): """Copies the list of paths to or from the remote host.""" if not self._paths: if self._upload: # We're uploading local paths to the remote host. fspaths = self.state.GetContainers(containers.File) else: # We're downloading remote paths to the local host. fspaths = self.state.GetContainers(containers.RemoteFSPath) self._paths = [fspath.path for fspath in fspaths] if not self._paths: self.ModuleError('No paths specified to SCP module.', critical=True) self._CreateDestinationDirectory(remote=self._upload) cmd = ['scp'] # Set options for SSH multiplexing if self._multiplexing: cmd.extend([ '-o', 'ControlMaster=auto', '-o', 'ControlPath=~/.ssh/ctrl-%C', ]) if self._id_file: cmd.extend(['-i', self._id_file]) if self._upload: # scp /path1 /path2 user@host:/destination cmd.extend(self._paths) cmd.extend(self._PrefixRemotePaths([self._destination])) else: # We can use (faster) # scp user@host:"/path1 /path2" # or (slower) # scp user@host:/path1 user@host:/path2 /destination cmd.extend(self._PrefixRemotePaths(self._paths)) cmd.extend([self._destination]) self.logger.debug('Executing SCP command: {0:s}'.format(' '.join(cmd))) ret = subprocess.call(cmd) if ret != 0: self.ModuleError('Failed copying {0!s}'.format(self._paths), critical=True) for path_ in self._paths: file_name = os.path.basename(path_) full_path = os.path.join(self._destination, file_name) if self._upload: self.logger.info( 'Remote filesystem path {0:s}'.format(full_path)) fspath = containers.RemoteFSPath(path=full_path, hostname=self._hostname) else: self.logger.info( 'Local filesystem path {0:s}'.format(full_path)) fspath = containers.File(name=file_name, path=full_path) self.state.StoreContainer(fspath)
def testProcessCompress(self, mock_mkdtemp, mock_compress): """Tests that the module processes input and compresses correctly.""" test_state = state.DFTimewolfState(config.Config) test_state.StoreContainer( containers.File(name='description', path='/fake/evidence_directory')) test_state.StoreContainer( containers.File(name='description2', path='/fake/evidence_file')) mock_mkdtemp.return_value = '/fake/random' mock_compress.return_value = '/fake/tarball.tgz' local_filesystem_copy = local_filesystem.LocalFilesystemCopy( test_state) local_filesystem_copy.SetUp(compress=True) local_filesystem_copy.Process() mock_compress.assert_has_calls([ mock.call('/fake/evidence_directory', '/fake/random'), mock.call('/fake/evidence_file', '/fake/random'), ])
def Process(self): """Downloads the results of a GRR hunt. Raises: RuntimeError: if no items specified for collection. """ hunt = self.grr_api.Hunt(self.hunt_id).Get() for description, path in self._CollectHuntResults(hunt): container = containers.File(name=description, path=path) self.state.StoreContainer(container)
def _ProcessThread(self, client): """Processes a single GRR client. This function is used as a callback for the processing thread. Args: client (object): a GRR client object. """ system_type = client.data.os_info.system self.logger.info('System type: {0:s}'.format(system_type)) # If the list is supplied by the user via a flag, honor that. artifact_list = [] if self.artifacts: self.logger.info('Artifacts to be collected: {0!s}'.format( self.artifacts)) artifact_list = self.artifacts else: default_artifacts = self.artifact_registry.get(system_type, None) if default_artifacts: self.logger.info( 'Collecting default artifacts for {0:s}: {1:s}'.format( system_type, ', '.join(default_artifacts))) artifact_list.extend(default_artifacts) if self.extra_artifacts: self.logger.info('Throwing in an extra {0!s}'.format( self.extra_artifacts)) artifact_list.extend(self.extra_artifacts) artifact_list = list(set(artifact_list)) if not artifact_list: return flow_args = flows_pb2.ArtifactCollectorFlowArgs( artifact_list=artifact_list, use_tsk=self.use_tsk, ignore_interpolation_errors=True, apply_parsers=False) flow_id = self._LaunchFlow(client, 'ArtifactCollectorFlow', flow_args) if not flow_id: msg = 'Flow could not be launched on {0:s}.'.format( client.client_id) msg += '\nArtifactCollectorFlow args: {0!s}'.format(flow_args) self.ModuleError(msg, critical=True) self._AwaitFlow(client, flow_id) collected_flow_data = self._DownloadFiles(client, flow_id) if collected_flow_data: self.logger.info('{0!s}: Downloaded: {1:s}'.format( flow_id, collected_flow_data)) container = containers.File(name=client.data.os_info.fqdn.lower(), path=collected_flow_data) self.state.StoreContainer(container)
def testProcessCopy(self, mock_mkdtemp, unused_mocklistdir, unused_mockisdir, mock_copy2, mock_copytree): """Tests that the module processes input and copies correctly.""" test_state = state.DFTimewolfState(config.Config) test_state.StoreContainer( containers.File(name='description', path='/fake/evidence_directory')) test_state.StoreContainer( containers.File(name='description2', path='/fake/evidence_file')) mock_mkdtemp.return_value = '/fake/random' local_filesystem_copy = local_filesystem.LocalFilesystemCopy( test_state) local_filesystem_copy.SetUp() local_filesystem_copy.Process() mock_copytree.assert_has_calls([ mock.call('/fake/evidence_directory', '/fake/random'), ]) mock_copy2.assert_called_with('/fake/evidence_file', '/fake/random')
def testSetupError(self, mock_copytree, mock_isdir): """Tests that an error is generated if target_directory is unavailable.""" mock_copytree.side_effect = OSError('FAKEERROR') mock_isdir.return_value = False test_state = state.DFTimewolfState(config.Config) test_state.StoreContainer( containers.File(name='blah', path='/sourcefile')) local_filesystem_copy = local_filesystem.LocalFilesystemCopy( test_state) local_filesystem_copy.SetUp(target_directory="/nonexistent") local_filesystem_copy.Process() self.assertEqual(len(test_state.errors), 1)
def Process(self): """Executes log2timeline.py on the module input.""" for file_container in self.state.GetContainers(containers.File, pop=True): description = file_container.name path = file_container.path log_file_path = os.path.join(self._output_path, 'plaso.log') print('Log file: {0:s}'.format(log_file_path)) # Build the plaso command line. cmd = ['log2timeline.py'] # Since we might be running alongside another Module, always disable # the status view. cmd.extend(['-q', '--status_view', 'none']) if self._timezone: cmd.extend(['-z', self._timezone]) # Analyze all available partitions. cmd.extend(['--partition', 'all']) # Setup logging. cmd.extend(['--logfile', log_file_path]) # And now, the crux of the command. # Generate a new storage file for each plaso run plaso_storage_file_path = os.path.join( self._output_path, '{0:s}.plaso'.format(uuid.uuid4().hex)) cmd.extend([plaso_storage_file_path, path]) # Run the l2t command full_cmd = ' '.join(cmd) print('Running external command: "{0:s}"'.format(full_cmd)) try: l2t_proc = subprocess.Popen( cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) _, error = l2t_proc.communicate() l2t_status = l2t_proc.wait() except OSError as exception: self.state.AddError(str(exception), critical=True) return # Catch all remaining errors since we want to gracefully report them except Exception as exception: # pylint: disable=broad-except self.state.AddError(str(exception), critical=True) return if l2t_status: message = ('The log2timeline command {0:s} failed: {1!s}.' ' Check log file for details.').format(full_cmd, error) self.state.AddError(message, critical=True) return container = containers.File(description, plaso_storage_file_path) self.state.StoreContainer(container)
def Process(self): """Collects paths from the local file system.""" file_containers = [] for path in self._paths: if os.path.exists(path): container = containers.File(os.path.basename(path), path) file_containers.append(container) else: self.logger.warning('Path {0:s} does not exist'.format(path)) if not file_containers: self.ModuleError(message='No valid paths collected, bailing', critical=True) for container in file_containers: self.state.StoreContainer(container)
def Process(self): """Downloads the results of a GRR collection flow. Raises: DFTimewolfError: if no files specified """ client = self._GetClientByHostname(self.host) self._AwaitFlow(client, self.flow_id) collected_flow_data = self._DownloadFiles(client, self.flow_id) if collected_flow_data: self.logger.info('{0:s}: Downloaded: {1:s}'.format( self.flow_id, collected_flow_data)) container = containers.File(name=client.data.os_info.fqdn.lower(), path=collected_flow_data) self.state.StoreContainer(container)
def Process(self): """Process files with Turbinia.""" log_file_path = os.path.join(self._output_path, 'turbinia.log') print('Turbinia log file: {0:s}'.format(log_file_path)) vm_containers = self.state.GetContainers(containers.ForensicsVM) if vm_containers and not self.disk_name: forensics_vm = vm_containers[0] self.disk_name = forensics_vm.evidence_disk.name self.logger.info( 'Using disk {0:s} from previous collector'.format(self.disk_name)) evidence_ = evidence.GoogleCloudDisk( disk_name=self.disk_name, project=self.project, zone=self.turbinia_zone) task_data = self.TurbiniaProcess(evidence_) local_paths, gs_paths = self._DeterminePaths(task_data) if not local_paths and not gs_paths: self.ModuleError( 'No interesting files found in Turbinia output.', critical=True) timeline_label = '{0:s}-{1:s}'.format(self.project, self.disk_name) # Any local files that exist we can add immediately to the output all_local_paths = [ (timeline_label, p) for p in local_paths if os.path.exists(p)] downloaded_gs_paths = self._DownloadFilesFromGCS(timeline_label, gs_paths) all_local_paths.extend(downloaded_gs_paths) self.logger.info('Collected {0:d} results'.format(len(all_local_paths))) if not all_local_paths: self.ModuleError('No interesting files could be found.', critical=True) for description, path in all_local_paths: if path.endswith('BinaryExtractorTask.tar.gz'): self.logger.info('Found BinaryExtractorTask result: {0:s}'.format(path)) container = containers.ThreatIntelligence( name='BinaryExtractorResults', indicator=None, path=path) if path.endswith('hashes.json'): self.logger.info('Found hashes.json: {0:s}'.format(path)) container = containers.ThreatIntelligence( name='ImageExportHashes', indicator=None, path=path) if path.endswith('.plaso'): self.logger.info('Found plaso result: {0:s}'.format(path)) container = containers.File(name=description, path=path) self.state.StoreContainer(container)
def Process(self): """Downloads the results of a GRR collection flow. Raises: DFTimewolfError: if no files specified """ # TODO (tomchop): Change the host attribute into something more appropriate # like 'selectors', and the corresponding recipes. client = self._GetClientBySelector(self.host) self._AwaitFlow(client, self.flow_id) self._CheckSkippedFlows() collected_flow_data = self._DownloadFiles(client, self.flow_id) if collected_flow_data: self.logger.info('{0:s}: Downloaded: {1:s}'.format( self.flow_id, collected_flow_data)) container = containers.File(name=client.data.os_info.fqdn.lower(), path=collected_flow_data) self.state.StoreContainer(container)
def testProcessing(self, mock_Popen): """Tests that the correct number of containers is added.""" test_state = state.DFTimewolfState(config.Config) mock_popen_object = mock.Mock() mock_popen_object.communicate.return_value = (None, None) mock_popen_object.wait.return_value = False mock_Popen.return_value = mock_popen_object local_plaso_processor = localplaso.LocalPlasoProcessor(test_state) test_state.StoreContainer( containers.File(name='test', path='/notexist/test')) local_plaso_processor.SetUp() local_plaso_processor.Process() mock_Popen.assert_called_once() args = mock_Popen.call_args[0][ 0] # Get positional arguments of first call self.assertEqual(args[9], '/notexist/test') plaso_path = args[8] # Dynamically generated path to the plaso file self.assertEqual( test_state.GetContainers(containers.File)[0].path, plaso_path)
def _ProcessThread(self, client): """Processes a single client. This function is used as a callback for the processing thread. Args: client (object): GRR client object to act on. """ root_path = self.root_path if not root_path: return self.logger.info('Timeline to start from "{0:s}" items'.format( root_path.decode())) timeline_args = timeline_pb2.TimelineArgs(root=root_path, ) flow_id = self._LaunchFlow(client, 'TimelineFlow', timeline_args) self._AwaitFlow(client, flow_id) collected_flow_data = self._DownloadTimeline(client, flow_id) if collected_flow_data: self.logger.info('{0!s}: Downloaded: {1:s}'.format( flow_id, collected_flow_data)) container = containers.File(name=client.data.os_info.fqdn.lower(), path=collected_flow_data) self.state.StoreContainer(container)
def testSingleGrep(self): """Test just single keyword grep search on text files.""" test_state = state.DFTimewolfState(config.Config) base_grepper_search = grepper.GrepperSearch(test_state) base_grepper_search.SetUp( keywords='foo|lorem|meow|triage|bar|homebrew') # Put here a path to a test directory where you have files to grep on the # above keyword. This is to simulate the path received an input from GRR test_state.StoreContainer( containers.File( name='Test description', path='tests/lib/collectors/test_data/grepper_test_dir')) base_grepper_search.Process() # pylint: disable=protected-access self.assertEqual(base_grepper_search._keywords, 'foo|lorem|meow|triage|bar|homebrew') # pylint: disable=line-too-long self.assertEqual( base_grepper_search._final_output, 'tests/lib/collectors/test_data/grepper_test_dir/1test.pdf:homebrew\n' 'tests/lib/collectors/test_data/grepper_test_dir/grepper_test.txt:bar,foo,lorem,triage\n' 'tests/lib/collectors/test_data/grepper_test_dir/grepper_test2.txt:foo' )
def Process(self): """Process files with Turbinia.""" log_file_path = os.path.join(self._output_path, 'turbinia.log') print('Turbinia log file: {0:s}'.format(log_file_path)) vm_containers = self.state.GetContainers(containers.ForensicsVM) if vm_containers and not self.disk_name: forensics_vm = vm_containers[0] self.disk_name = forensics_vm.evidence_disk.name print('Using disk {0:s} from previous collector'.format( self.disk_name)) evidence_ = evidence.GoogleCloudDisk(disk_name=self.disk_name, project=self.project, zone=self.turbinia_zone) try: evidence_.validate() except TurbiniaException as exception: self.state.AddError(exception, critical=True) return request = TurbiniaRequest(requester=getpass.getuser()) request.evidence.append(evidence_) if self.sketch_id: request.recipe['sketch_id'] = self.sketch_id if not self.run_all_jobs: request.recipe['jobs_blacklist'] = ['StringsJob'] # Get threat intelligence data from any modules that have stored some. # In this case, observables is a list of containers.ThreatIntelligence # objects. threatintel = self.state.GetContainers(containers.ThreatIntelligence) if threatintel: print( 'Sending {0:d} threatintel to Turbinia GrepWorkers...'.format( len(threatintel))) indicators = [item.indicator for item in threatintel] request.recipe['filter_patterns'] = indicators request_dict = { 'instance': self.instance, 'project': self.project, 'region': self.turbinia_region, 'request_id': request.request_id } try: print('Creating Turbinia request {0:s} with Evidence {1!s}'.format( request.request_id, evidence_.name)) self.client.send_request(request) print('Waiting for Turbinia request {0:s} to complete'.format( request.request_id)) self.client.wait_for_request(**request_dict) task_data = self.client.get_task_data(**request_dict) except TurbiniaException as exception: # TODO: determine if exception should be converted into a string as # elsewhere in the codebase. self.state.AddError(exception, critical=True) return message = self.client.format_task_status(**request_dict, full_report=True) short_message = self.client.format_task_status(**request_dict) print(short_message) # Store the message for consumption by any reporting modules. report = containers.Report(module_name='TurbiniaProcessor', text=message, text_format='markdown') self.state.StoreContainer(report) local_paths, gs_paths = self._DeterminePaths(task_data) if not local_paths and not gs_paths: self.state.AddError( 'No interesting files found in Turbinia output.', critical=True) return timeline_label = '{0:s}-{1:s}'.format(self.project, self.disk_name) # Any local files that exist we can add immediately to the output all_local_paths = [(timeline_label, p) for p in local_paths if os.path.exists(p)] downloaded_gs_paths = self._DownloadFilesFromGCS( timeline_label, gs_paths) all_local_paths.extend(downloaded_gs_paths) if not all_local_paths: self.state.AddError('No interesting files could be found.', critical=True) for description, path in all_local_paths: if path.endswith('BinaryExtractorTask.tar.gz'): container = containers.ThreatIntelligence( name='BinaryExtractorResults', indicator=None, path=path) if path.endswith('hashes.json'): container = containers.ThreatIntelligence( name='ImageExportHashes', indicator=None, path=path) if path.endswith('.plaso'): container = containers.File(name=description, path=path) self.state.StoreContainer(container)