Exemple #1
0
    def _DownloadFilesFromGCS(self, timeline_label, gs_paths):
        """Downloads files stored in Google Cloud Storage to the local filesystem.

    Args:
      timeline_label (str): Label to use to construct the path list.
      gs_paths (str):  gs:// URI to files that need to be downloaded from GS.

    Returns:
      list(str): A list of local paths were GS files have been copied to.
    """
        # TODO: Externalize fetching files from GCS buckets to a different module.

        local_paths = []
        for path in gs_paths:
            local_path = None
            try:
                output_writer = output_manager.GCSOutputWriter(
                    path, local_output_dir=self._output_path)
                local_path = output_writer.copy_from(path)
            except TurbiniaException as exception:
                # Don't add a critical error for now, until we start raising errors
                # instead of returning manually each
                self.state.AddError(exception, critical=False)

        if local_path:
            local_paths.append((timeline_label, local_path))

        return local_paths
Exemple #2
0
    def process(self):
        """Process files with Turbinia."""
        log_file_path = os.path.join(self._output_path, 'turbinia.log')
        print('Turbinia log file: {0:s}'.format(log_file_path))

        evidence_ = evidence.GoogleCloudDisk(disk_name=self.disk_name,
                                             project=self.project,
                                             zone=self.turbinia_zone)
        request = TurbiniaRequest()
        request.evidence.append(evidence_)

        try:
            print('Creating Turbinia request {0:s} with Evidence {1:s}'.format(
                request.request_id, evidence_.name))
            self.client.send_request(request)
            print('Waiting for Turbinia request {0:s} to complete'.format(
                request.request_id))
            self.client.wait_for_request(instance=self.instance,
                                         project=self.project,
                                         region=self.turbinia_region,
                                         request_id=request.request_id)
            task_data = self.client.get_task_data(
                instance=self.instance,
                project=self.project,
                region=self.turbinia_region,
                request_id=request.request_id)
            print(
                self.client.format_task_status(instance=self.instance,
                                               project=self.project,
                                               region=self.turbinia_region,
                                               request_id=request.request_id,
                                               all_fields=True))
        except TurbiniaException as e:
            self.state.add_error(e, critical=True)
            return

        # This finds all .plaso files in the Turbinia output, and determines if they
        # are local or remote (it's possible this will be running against a local
        # instance of Turbinia).
        local_paths = []
        gs_paths = []
        timeline_label = '{0:s}-{1:s}'.format(self.project, self.disk_name)
        for task in task_data:
            for path in task.get('saved_paths', []):
                if path.startswith('/') and path.endswith('.plaso'):
                    local_paths.append(path)
                if path.startswith('gs://') and path.endswith('.plaso'):
                    gs_paths.append(path)

        if not local_paths and not gs_paths:
            self.state.add_error('No .plaso files found in Turbinia output.',
                                 critical=True)
            return

        # Any local .plaso files that exist we can add immediately to the output
        self.state.output = [(timeline_label, p) for p in local_paths
                             if os.path.exists(p)]

        # For files remote in GCS we copy each plaso file back from GCS and then add
        # to output paths
        # TODO: Externalize fetching files from GCS buckets to a different module.
        for path in gs_paths:
            local_path = None
            try:
                output_writer = output_manager.GCSOutputWriter(
                    path, local_output_dir=self._output_path)
                local_path = output_writer.copy_from(path)
            except TurbiniaException as e:
                self.state.add_error(e, critical=True)
                return

            if local_path:
                self.state.output.append((timeline_label, local_path))

        if not self.state.output:
            self.state.add_error('No .plaso files could be found.',
                                 critical=True)
Exemple #3
0
    def Process(self):
        """Process files with Turbinia."""
        log_file_path = os.path.join(self._output_path, 'turbinia.log')
        print('Turbinia log file: {0:s}'.format(log_file_path))

        if self.state.input and not self.disk_name:
            _, disk = self.state.input[0]
            self.disk_name = disk.name
            print('Using disk {0:s} from previous collector'.format(
                self.disk_name))

        evidence_ = evidence.GoogleCloudDisk(disk_name=self.disk_name,
                                             project=self.project,
                                             zone=self.turbinia_zone)
        try:
            evidence_.validate()
        except TurbiniaException as exception:
            self.state.AddError(exception, critical=True)
            return

        request = TurbiniaRequest(requester=getpass.getuser())
        request.evidence.append(evidence_)
        if self.sketch_id:
            request.recipe['sketch_id'] = self.sketch_id
        if not self.run_all_jobs:
            request.recipe['jobs_blacklist'] = ['StringsJob']

        # Get threat intelligence data from any modules that have stored some.
        # In this case, observables is a list of containers.ThreatIntelligence
        # objects.
        threatintel = self.state.GetContainers(containers.ThreatIntelligence)
        if threatintel:
            print(
                'Sending {0:d} threatintel to Turbinia GrepWorkers...'.format(
                    len(threatintel)))
            indicators = [item.indicator for item in threatintel]
            request.recipe['filter_patterns'] = indicators

        request_dict = {
            'instance': self.instance,
            'project': self.project,
            'region': self.turbinia_region,
            'request_id': request.request_id
        }

        try:
            print('Creating Turbinia request {0:s} with Evidence {1!s}'.format(
                request.request_id, evidence_.name))
            self.client.send_request(request)
            print('Waiting for Turbinia request {0:s} to complete'.format(
                request.request_id))
            self.client.wait_for_request(**request_dict)
            task_data = self.client.get_task_data(**request_dict)
        except TurbiniaException as exception:
            # TODO: determine if exception should be converted into a string as
            # elsewhere in the codebase.
            self.state.AddError(exception, critical=True)
            return

        message = self.client.format_task_status(**request_dict,
                                                 full_report=True)
        short_message = self.client.format_task_status(**request_dict)
        print(short_message)

        # Store the message for consumption by any reporting modules.
        report = containers.Report(module_name='TurbiniaProcessor',
                                   text=message,
                                   text_format='markdown')
        self.state.StoreContainer(report)

        # This finds all .plaso files in the Turbinia output, and determines if they
        # are local or remote (it's possible this will be running against a local
        # instance of Turbinia).
        local_paths = []
        gs_paths = []
        timeline_label = '{0:s}-{1:s}'.format(self.project, self.disk_name)
        for task in task_data:
            # saved_paths may be set to None
            for path in task.get('saved_paths') or []:
                if path.startswith('/') and path.endswith('.plaso'):
                    local_paths.append(path)
                if path.startswith('gs://') and path.endswith('.plaso'):
                    gs_paths.append(path)

        if not local_paths and not gs_paths:
            self.state.AddError('No .plaso files found in Turbinia output.',
                                critical=True)
            return

        # Any local .plaso files that exist we can add immediately to the output
        self.state.output = [(timeline_label, p) for p in local_paths
                             if os.path.exists(p)]

        # For files remote in GCS we copy each plaso file back from GCS and then add
        # to output paths
        # TODO: Externalize fetching files from GCS buckets to a different module.
        for path in gs_paths:
            local_path = None
            try:
                output_writer = output_manager.GCSOutputWriter(
                    path, local_output_dir=self._output_path)
                local_path = output_writer.copy_from(path)
            except TurbiniaException as exception:
                # TODO: determine if exception should be converted into a string as
                # elsewhere in the codebase.
                self.state.AddError(exception, critical=True)
                return

            if local_path:
                self.state.output.append((timeline_label, local_path))

        if not self.state.output:
            self.state.AddError('No .plaso files could be found.',
                                critical=True)
Exemple #4
0
    def process(self):
        """Process files with Turbinia."""
        log_file_path = os.path.join(self._output_path, 'turbinia.log')
        print('Turbinia log file: {0:s}'.format(log_file_path))

        if self.state.input and not self.disk_name:
            _, disk = self.state.input[0]
            self.disk_name = disk.name
            print('Using disk {0:s} from previous collector'.format(
                self.disk_name))

        evidence_ = evidence.GoogleCloudDisk(disk_name=self.disk_name,
                                             project=self.project,
                                             zone=self.turbinia_zone)
        request = TurbiniaRequest()
        request.evidence.append(evidence_)

        # Get threat intelligence data from any modules that have stored some.
        # In this case, observables is a list of containers.ThreatIntelligence
        # objects.
        threatintel = self.state.get_containers(containers.ThreatIntelligence)
        if threatintel:
            print(
                'Sending {0:d} threatintel to Turbinia GrepWorkers...'.format(
                    len(threatintel)))
            indicators = [item.indicator for item in threatintel]
            request.recipe['filter_patterns'] = indicators

        request_dict = {
            'instance': self.instance,
            'project': self.project,
            'region': self.turbinia_region,
            'request_id': request.request_id
        }

        try:
            print('Creating Turbinia request {0:s} with Evidence {1!s}'.format(
                request.request_id, evidence_.name))
            self.client.send_request(request)
            print('Waiting for Turbinia request {0:s} to complete'.format(
                request.request_id))
            self.client.wait_for_request(**request_dict)
            task_data = self.client.get_task_data(**request_dict)
        except TurbiniaException as e:
            self.state.add_error(e, critical=True)
            return

        # Turbinia run complete, build a human-readable message of results.
        message = 'Completed {0:d} Turbinia tasks\n'.format(len(task_data))
        for task in task_data:
            message += '{0!s} ({1!s}): {2!s}\n'.format(
                task.get('name'), task.get('id'),
                task.get('status', 'No task status'))
            # saved_paths may be set to None
            for path in task.get('saved_paths') or []:
                if path.endswith('worker-log.txt'):
                    continue
                if path.endswith('{0!s}.log'.format(task.get('id'))):
                    continue
                if path.startswith('/'):
                    continue
                message += '  {0:s}\n'.format(path)
        print(message)

        # Store the message for consumption by any reporting modules.
        report = containers.Report(module_name='TurbiniaProcessor',
                                   text=message)
        self.state.store_container(report)

        # This finds all .plaso files in the Turbinia output, and determines if they
        # are local or remote (it's possible this will be running against a local
        # instance of Turbinia).
        local_paths = []
        gs_paths = []
        timeline_label = '{0:s}-{1:s}'.format(self.project, self.disk_name)
        for task in task_data:
            # saved_paths may be set to None
            for path in task.get('saved_paths') or []:
                if path.startswith('/') and path.endswith('.plaso'):
                    local_paths.append(path)
                if path.startswith('gs://') and path.endswith('.plaso'):
                    gs_paths.append(path)

        if not local_paths and not gs_paths:
            self.state.add_error('No .plaso files found in Turbinia output.',
                                 critical=True)
            return

        # Any local .plaso files that exist we can add immediately to the output
        self.state.output = [(timeline_label, p) for p in local_paths
                             if os.path.exists(p)]

        # For files remote in GCS we copy each plaso file back from GCS and then add
        # to output paths
        # TODO: Externalize fetching files from GCS buckets to a different module.
        for path in gs_paths:
            local_path = None
            try:
                output_writer = output_manager.GCSOutputWriter(
                    path, local_output_dir=self._output_path)
                local_path = output_writer.copy_from(path)
            except TurbiniaException as e:
                self.state.add_error(e, critical=True)
                return

            if local_path:
                self.state.output.append((timeline_label, local_path))

        if not self.state.output:
            self.state.add_error('No .plaso files could be found.',
                                 critical=True)