Beispiel #1
0
  def copy_to(self, source_path):
    if os.path.getsize(source_path) == 0:
      message = (
          'Local source file {0:s} is empty.  Not uploading to GCS'.format(
              source_path))
      log.warning(message)
      return None

    bucket = self.client.get_bucket(self.bucket)
    destination_path = os.path.join(
        self.base_output_dir, self.unique_dir, os.path.basename(source_path))
    log.info(
        'Writing {0:s} to GCS path {1:s}'.format(source_path, destination_path))
    try:
      blob = storage.Blob(destination_path, bucket, chunk_size=self.CHUNK_SIZE)
      blob.upload_from_filename(source_path, client=self.client)
    except exceptions.GoogleCloudError as exception:
      message = 'File upload to GCS failed: {0!s}'.format(exception)
      log.error(message)
      raise TurbiniaException(message)
    return os.path.join('gs://', self.bucket, destination_path)
Beispiel #2
0
  def __init__(self, *_, **__):
    """Initialization for PSQ Worker."""
    config.LoadConfig()
    psq_publisher = pubsub.PublisherClient()
    psq_subscriber = pubsub.SubscriberClient()
    datastore_client = datastore.Client(project=config.TURBINIA_PROJECT)
    try:
      self.psq = psq.Queue(
          psq_publisher, psq_subscriber, config.TURBINIA_PROJECT,
          name=config.PSQ_TOPIC, storage=psq.DatastoreStorage(datastore_client))
    except exceptions.GoogleCloudError as e:
      msg = 'Error creating PSQ Queue: {0:s}'.format(str(e))
      log.error(msg)
      raise TurbiniaException(msg)

    check_directory(config.MOUNT_DIR_PREFIX)
    check_directory(config.OUTPUT_DIR)
    check_directory(config.TMP_DIR)

    log.info('Starting PSQ listener on queue {0:s}'.format(self.psq.name))
    self.worker = psq.Worker(queue=self.psq)
Beispiel #3
0
  def GetInstance(self, instance_name, zone=None):
    """Get instance from project.

    Args:
      instance_name: The instance name.
      zone: The zone for the instance.

    Returns:
      A Google Compute Instance object (instance of GoogleComputeInstance).

    Raises:
      TurbiniaException: If instance does not exist.
    """
    instances = self.ListInstances()
    try:
      instance = instances[instance_name]
      if not zone:
        zone = instance['zone']
      return GoogleComputeInstance(project=self, zone=zone, name=instance_name)
    except KeyError:
      raise TurbiniaException('Unknown instance')
Beispiel #4
0
  def _collect_windows_files(self, evidence):
    """Extract artifacts using image_export.

    Args:
        evidence (Evidence object):  The evidence to process
    Returns:
        location (str): The file path to the extracted evidence.
        number of artifacts (int): The number of files extracted.
    """
    try:
      collected_artifacts = extract_artifacts(
          artifact_names=['WindowsSystemRegistryFiles'],
          disk_path=evidence.local_path, output_dir=self.output_dir,
          credentials=evidence.credentials)
    except TurbiniaException as e:
      raise TurbiniaException('artifact extraction failed: {}'.format(str(e)))

    # Extract base dir from our list of collected artifacts
    location = os.path.dirname(collected_artifacts[0])

    return (location, len(collected_artifacts))
Beispiel #5
0
  def _collect_wordpress_file(self, evidence):
    """Extract artifacts using image_export.

    Args:
        evidence (Evidence object):  The evidence to process
    Returns:
        location (str): The file path to the extracted evidence.
        number of artifacts (int): The number of files extracted.
    """
    try:
      collected_artifacts = extract_files(
          file_name=_WP_DB_NAME, disk_path=evidence.local_path,
          output_dir=os.path.join(self.output_dir, 'artifacts'))
    except TurbiniaException as e:
      raise TurbiniaException(
          'artifact extraction failed: {0:s}'.format(str(e)))

    # Extract base dir from our list of collected artifacts
    location = os.path.dirname(collected_artifacts[0])

    return (location, len(collected_artifacts))
Beispiel #6
0
def GetFilesystem(path):
  """Uses lsblk to detect the filesystem of a partition block device.

  Args:
    path(str): the full path to the block device.
  Returns:
    str: the filesystem detected (for example: 'ext4')
  """
  cmd = ['lsblk', path, '-f', '-o', 'FSTYPE', '-n']
  log.info('Running {0!s}'.format(cmd))
  fstype = subprocess.check_output(cmd).split()
  if not fstype:
    # Lets wait a bit for any previous blockdevice operation to settle
    time.sleep(2)
    fstype = subprocess.check_output(cmd).split()

  if len(fstype) != 1:
    raise TurbiniaException(
        '{0:s} should contain exactly one partition, found {1:d}'.format(
            path, len(fstype)))
  return fstype[0].decode('utf-8').strip()
Beispiel #7
0
def ParseDependencies():
    """Parses the config file DEPENDENCIES variable.

  Raises:
    TurbiniaException: If bad config file.

  Returns:
   dependencies(dict): The parsed dependency values.
  """
    dependencies = {}
    try:
        for values in CONFIG.DEPENDENCIES:
            job = values['job'].lower()
            dependencies[job] = {}
            dependencies[job]['programs'] = values['programs']
            dependencies[job]['docker_image'] = values.get('docker_image')
            dependencies[job]['timeout'] = values.get('timeout')
    except (KeyError, TypeError) as exception:
        raise TurbiniaException('An issue has occurred while parsing the '
                                'dependency config: {0!s}'.format(exception))
    return dependencies
Beispiel #8
0
    def setup(self, task):
        """Handles initializing task based attributes, after object creation.

    Args:
      task (TurbiniaTask): The calling Task object

    Raises:
      TurbiniaException: If the Output Manager is not setup.
    """

        self.task_id = task.id
        self.task_name = task.name
        self.requester = task.requester
        if not self.no_state_manager:
            self.state_manager = state_manager.get_state_manager()
        if not self.no_output_manager:
            if task.output_manager.is_setup:
                ldirs = task.output_manager.get_local_output_dirs()
                _, self.output_dir = ldirs
            else:
                raise TurbiniaException('Output Manager is not setup yet.')
Beispiel #9
0
    def setup(self, jobs_denylist=None, jobs_allowlist=None, *args, **kwargs):
        """Does setup of Task manager and its dependencies.

    Args:
      jobs_denylist (list): Jobs that will be excluded from running
      jobs_allowlist (list): The only Jobs will be included to run
    """
        self._backend_setup(*args, **kwargs)
        job_names = jobs_manager.JobsManager.GetJobNames()
        if jobs_denylist or jobs_allowlist:
            selected_jobs = jobs_denylist or jobs_allowlist
            for job in selected_jobs:
                if job.lower() not in job_names:
                    msg = (
                        'Error creating server. Job {0!s} is not found in registered '
                        'jobs {1!s}.'.format(job, job_names))
                    log.error(msg)
                    raise TurbiniaException(msg)
            log.info('Filtering Jobs with allowlist {0!s} and denylist {1!s}'.
                     format(jobs_allowlist, jobs_denylist))
            job_names = jobs_manager.JobsManager.FilterJobNames(
                job_names, jobs_denylist, jobs_allowlist)

        # Disable any jobs from the config that were not previously allowlisted.
        disabled_jobs = list(
            config.DISABLED_JOBS) if config.DISABLED_JOBS else []
        disabled_jobs = [j.lower() for j in disabled_jobs]
        if jobs_allowlist:
            disabled_jobs = list(set(disabled_jobs) - set(jobs_allowlist))
        if disabled_jobs:
            log.info(
                'Disabling non-allowlisted jobs configured to be disabled in the '
                'config file: {0:s}'.format(', '.join(disabled_jobs)))
            job_names = jobs_manager.JobsManager.FilterJobNames(
                job_names, disabled_jobs, [])

        self.jobs = [
            job for _, job in jobs_manager.JobsManager.GetJobs(job_names)
        ]
        log.debug('Registered job list: {0:s}'.format(str(job_names)))
Beispiel #10
0
  def _preprocess(self, _, required_states):
    # Need to mount parent disk
    if not self.parent_evidence.partition_paths:
      self.parent_evidence.mount_path = mount_local.PreprocessMountPartition(
          self.parent_evidence.device_path)
    else:
      partition_paths = self.parent_evidence.partition_paths
      self.parent_evidence.mount_path = mount_local.PreprocessMountDisk(
          partition_paths, self.parent_evidence.mount_partition)
    self.parent_evidence.local_path = self.parent_evidence.mount_path
    self.parent_evidence.state[EvidenceState.MOUNTED] = True

    if EvidenceState.ATTACHED in required_states or self.has_child_evidence:
      rawdisk_path = os.path.join(
          self.parent_evidence.mount_path, self.embedded_path)
      if not os.path.exists(rawdisk_path):
        raise TurbiniaException(
            'Unable to find raw disk image {0:s} in GoogleCloudDisk'.format(
                rawdisk_path))
      self.device_path = mount_local.PreprocessLosetup(rawdisk_path)
      self.state[EvidenceState.ATTACHED] = True
      self.local_path = self.device_path
Beispiel #11
0
    def _create_mount_points(self, mount_paths, mode='rw'):
        """Creates file and device mounting arguments.

    The arguments will be passed into the container with the appropriate
    mounting parameters. All device blocks will be mounted as read only,
    regardless of the specified mode.

    Attributes:
      mount_paths(list): The paths on the host system to be mounted.
      mode(str): The mode the path will be mounted in. The acceptable
                 parameters are rw for read write and ro for read only.

    Returns:
      tuple: containing:
        list: The device blocks that will be mounted.
        dict: The file paths that will be mounted.

    Raises:
      TurbiniaException: If an incorrect mode was passed.
    """
        accepted_vars = ['rw', 'ro']
        device_paths = []
        file_paths = {}

        if mode in accepted_vars:
            for mpath in mount_paths:
                device_mpath = '{0:s}:{0:s}:{1:s}'.format(str(mpath), 'r')
                if mpath not in file_paths.keys(
                ) and device_mpath not in device_paths:
                    if IsBlockDevice(mpath):
                        device_paths.append(device_mpath)
                    else:
                        file_paths[mpath] = {'bind': mpath, 'mode': mode}
        else:
            raise TurbiniaException(
                'An incorrect mode was passed: {0:s}. Unable to create the correct '
                'mount points for the Docker container.'.format(mode))

        return device_paths, file_paths
Beispiel #12
0
def setup_stackdriver_handler(project_id, origin):
    """Set up Google Cloud Stackdriver Logging

  The Google Cloud Logging library will attach itself as a
  handler to the default Python logging module.

  Attributes:
    project_id: The name of the Google Cloud project.
    origin: Where the log is originating from.(i.e. server, worker)
  Raises:
    TurbiniaException: When an error occurs enabling GCP Stackdriver Logging.
  """

    # Patching cloud logging to allow custom fields
    def my_enqueue(self, record, message, **kwargs):
        queue_entry = {
            "info": {
                "message": message,
                "python_logger": record.name,
                "origin": origin
            },
            "severity": _helpers._normalize_severity(record.levelno),
            "timestamp": datetime.datetime.utcfromtimestamp(record.created),
        }

        queue_entry.update(kwargs)
        self._queue.put_nowait(queue_entry)

    _Worker.enqueue = my_enqueue

    try:
        client = cloud_logging.Client(project=project_id)
        cloud_handler = cloud_logging.handlers.CloudLoggingHandler(client)
        logger.addHandler(cloud_handler)

    except exceptions.GoogleCloudError as exception:
        msg = 'Error enabling Stackdriver Logging: {0:s}'.format(
            str(exception))
        raise TurbiniaException(msg)
Beispiel #13
0
    def __init__(self,
                 name=None,
                 description=None,
                 source=None,
                 source_path=None,
                 tags=None,
                 request_id=None,
                 copyable=False):
        """Initialization for Evidence."""
        self.copyable = copyable
        self.config = {}
        self.context_dependent = False
        self.cloud_only = False
        self.description = description
        self.mount_path = None
        self.source = source
        self.source_path = source_path
        self.tags = tags if tags else {}
        self.request_id = request_id
        self.parent_evidence = None
        self.save_metadata = False

        self.local_path = source_path

        # List of jobs that have processed this evidence
        self.processed_by = []
        self.type = self.__class__.__name__
        self.name = name if name else self.type
        self.saved_path = None
        self.saved_path_type = None

        self.state = {}
        for state in EvidenceState:
            self.state[state] = False

        if self.copyable and not self.local_path:
            raise TurbiniaException(
                '{0:s} is a copyable evidence and needs a source_path'.format(
                    self.type))
Beispiel #14
0
    def __init__(self,
                 task,
                 evidence=None,
                 input_evidence=None,
                 base_output_dir=None,
                 request_id=None):
        """Initialize the TurbiniaTaskResult object.

    Args:
      task (TurbiniaTask): The calling Task object

    Raises:
      TurbiniaException: If the Output Manager is not setup.
    """

        self.closed = False
        self.evidence = evidence if evidence else []
        self.input_evidence = input_evidence
        self.id = uuid.uuid4().hex
        self.task_id = task.id
        self.task_name = task.name
        self.base_output_dir = base_output_dir
        self.request_id = request_id
        self.user = task.user

        self.start_time = datetime.now()
        self.run_time = None
        self.saved_paths = []
        self.successful = None
        self.status = None
        self.error = {}
        self.worker_name = platform.node()
        # TODO(aarontp): Create mechanism to grab actual python logging data.
        self._log = []
        if task.output_manager.is_setup:
            _, self.output_dir = task.output_manager.get_local_output_dirs()
        else:
            raise TurbiniaException('Output Manager is not setup yet.')
Beispiel #15
0
def task_deserialize(input_dict):
    """Converts an input dictionary back into a TurbiniaTask object.

  Args:
    input_dict (dict): TurbiniaTask object dictionary.

  Returns:
    TurbiniaTask: Deserialized object.
  """

    type_ = input_dict['name']
    task_loader = TaskLoader()
    task = task_loader.get_task(type_)
    if not task:
        raise TurbiniaException(
            'Could not load Task module {0:s}'.format(type_))
    # Remove serialized output manager because this gets reinstantiated when the
    # empty Task is instantiated and we don't want to overwrite it.
    input_dict.pop('output_manager')
    task.__dict__.update(input_dict)
    task.last_update = datetime.strptime(input_dict['last_update'],
                                         DATETIME_FORMAT)
    return task
Beispiel #16
0
  def evidence_setup(self, evidence):
    """Validates and processes the evidence.

    Args:
      evidence(Evidence): The Evidence to setup.

    Raises:
      TurbiniaException: If the Evidence can't be validated or the current
          state does not meet the required state.
    """
    evidence.validate()
    evidence.preprocess(self.tmp_dir, required_states=self.REQUIRED_STATES)

    # Final check to make sure that the required evidence state has been met
    # for Evidence types that have those capabilities.
    for state in self.REQUIRED_STATES:
      if state in evidence.POSSIBLE_STATES and not evidence.state.get(state):
        raise TurbiniaException(
            'Evidence {0!s} being processed by Task {1:s} requires Evidence '
            'to be in state {2:s}, but earlier pre-processors may have '
            'failed.  Current state is {3:s}. See previous logs for more '
            'information.'.format(
                evidence, self.name, state.name, evidence.format_state()))
Beispiel #17
0
  def __init__(self, jobs_blacklist=None, jobs_whitelist=None):
    """Initialization for PSQ Worker.

    Args:
      jobs_blacklist (Optional[list[str]]): Jobs we will exclude from running
      jobs_whitelist (Optional[list[str]]): The only Jobs we will include to run
    """
    config.LoadConfig()
    psq_publisher = pubsub.PublisherClient()
    psq_subscriber = pubsub.SubscriberClient()
    datastore_client = datastore.Client(project=config.TURBINIA_PROJECT)
    try:
      self.psq = psq.Queue(
          psq_publisher, psq_subscriber, config.TURBINIA_PROJECT,
          name=config.PSQ_TOPIC, storage=psq.DatastoreStorage(datastore_client))
    except exceptions.GoogleCloudError as e:
      msg = 'Error creating PSQ Queue: {0:s}'.format(str(e))
      log.error(msg)
      raise TurbiniaException(msg)

    # Deregister jobs from blacklist/whitelist.
    disabled_jobs = list(config.DISABLED_JOBS) if config.DISABLED_JOBS else []
    job_manager.JobsManager.DeregisterJobs(jobs_blacklist, jobs_whitelist)
    if disabled_jobs:
      log.info(
          'Disabling jobs that were configured to be disabled in the '
          'config file: {0:s}'.format(', '.join(disabled_jobs)))
      job_manager.JobsManager.DeregisterJobs(jobs_blacklist=disabled_jobs)

    # Check for valid dependencies/directories.
    check_dependencies(config.DEPENDENCIES)
    check_directory(config.MOUNT_DIR_PREFIX)
    check_directory(config.OUTPUT_DIR)
    check_directory(config.TMP_DIR)

    log.info('Starting PSQ listener on queue {0:s}'.format(self.psq.name))
    self.worker = psq.Worker(queue=self.psq)
Beispiel #18
0
    def setup(self, evidence):
        """Perform common setup operations and runtime environment.

    Even though TurbiniaTasks are initially instantiated by the Jobs under the
    Task Manager, this setup method needs to be run from the task on the worker
    because it handles setting up the task runtime environment.

    Args:
      evidence: An Evidence object to process.

    Returns:
      A TurbiniaTaskResult object.

    Raises:
      TurbiniaException: If the evidence can not be found.
    """
        self.output_manager.setup(self)
        self.tmp_dir, self.output_dir = self.output_manager.get_local_output_dirs(
        )
        if not self.result:
            self.result = TurbiniaTaskResult(
                input_evidence=evidence,
                base_output_dir=self.base_output_dir,
                request_id=self.request_id,
                job_id=self.job_id)
            self.result.setup(self)

        if not self.run_local:
            if evidence.copyable and not config.SHARED_FILESYSTEM:
                self.output_manager.retrieve_evidence(evidence)

        if evidence.source_path and not os.path.exists(evidence.source_path):
            raise TurbiniaException(
                'Evidence source path {0:s} does not exist'.format(
                    evidence.source_path))
        evidence.preprocess(self.tmp_dir)
        return self.result
Beispiel #19
0
    def FilterJobNames(cls,
                       job_names,
                       jobs_denylist=None,
                       jobs_allowlist=None):
        """Filters a list of job names against white/black lists.

    jobs_allowlist and jobs_denylist must not be specified at the same time.

    Args:
      job_names (list[str]): The names of the job_names to filter.
      jobs_denylist (Optional[list[str]]): Job names to exclude.
      jobs_allowlist (Optional[list[str]]): Job names to include.

    Returns:
     list[str]: Job names

    Raises:
      TurbiniaException if both jobs_denylist and jobs_allowlist are specified.
    """
        jobs_denylist = jobs_denylist if jobs_denylist else []
        jobs_denylist = [job.lower() for job in jobs_denylist]
        jobs_allowlist = jobs_allowlist if jobs_allowlist else []
        jobs_allowlist = [job.lower() for job in jobs_allowlist]

        if jobs_allowlist and jobs_denylist:
            raise TurbiniaException(
                'jobs_allowlist and jobs_denylist cannot be specified at the same '
                'time.')
        elif jobs_denylist:
            return [
                job for job in job_names if job.lower() not in jobs_denylist
            ]
        elif jobs_allowlist:
            return [job for job in job_names if job.lower() in jobs_allowlist]
        else:
            return job_names
Beispiel #20
0
def PreprocessLosetup(source_path):
  """Runs Losetup on a target block device or image file.

  Args:
    source_path(str): the source path to run losetup on.

  Raises:
    TurbiniaException: if the losetup command failed to run.

  Returns:
    str: the path to the created loopdevice (ie: /dev/loopX)
  """
  losetup_device = None
  # TODO(aarontp): Remove hard-coded sudo in commands:
  # https://github.com/google/turbinia/issues/73
  losetup_command = ['sudo', 'losetup', '--show', '--find', '-P', source_path]
  log.info('Running command {0:s}'.format(' '.join(losetup_command)))
  try:
    losetup_device = subprocess.check_output(
        losetup_command, universal_newlines=True).strip()
  except subprocess.CalledProcessError as e:
    raise TurbiniaException('Could not set losetup devices {0!s}'.format(e))

  return losetup_device
Beispiel #21
0
    def send_message(self, message):
        """Send a pubsub message.

    message: The message to send.
    """
        base64_data = base64.b64encode(message.encode('utf-8'))
        request_body = {
            "messages": [{
                "data": base64_data.decode('utf-8')  # base64 encoded string
            }]
        }
        publish_client = self.pubsub_api_client.projects().topics()
        response = gcp_common.ExecuteRequest(publish_client, 'publish', {
            'topic': self.topic_path,
            'body': request_body
        })
        # Safe to unpack since response is unpaged.
        if not response[0]['messageIds']:
            raise TurbiniaException(
                'Message {0:s} was not published to topic {1:s}'.format(
                    message, self.topic_path))
        msg_id = response[0]['messageIds'][0]
        log.info('Published message {0!s} to topic {1!s}'.format(
            msg_id, self.topic_name))
Beispiel #22
0
  def execute(
      self, cmd, result, save_files=None, log_files=None, new_evidence=None,
      close=False, shell=False, stderr_file=None, stdout_file=None,
      success_codes=None):
    """Executes a given binary and saves output.

    Args:
      cmd (list|string): Command arguments to run
      result (TurbiniaTaskResult): The result object to put data into.
      save_files (list): A list of files to save (files referenced by Evidence
          objects are automatically saved, so no need to include them).
      log_files (list): A list of files to save even if execution fails.
      new_evidence (list): These are new evidence objects created by the task.
          If the task is successful, they will be added to the result.
      close (bool): Whether to close out the result.
      shell (bool): Whether the cmd is in the form of a string or a list.
      success_codes (list(int)): Which return codes are considered successful.
      stderr_file (str): Path to location to save stderr.
      stdout_file (str): Path to location to save stdout.

    Returns:
      Tuple of the return code, and the TurbiniaTaskResult object
    """
    # Avoid circular dependency.
    from turbinia.jobs import manager as job_manager

    save_files = save_files if save_files else []
    log_files = log_files if log_files else []
    new_evidence = new_evidence if new_evidence else []
    success_codes = success_codes if success_codes else [0]
    stdout = None
    stderr = None

    # Get timeout value.
    timeout_limit = job_manager.JobsManager.GetTimeoutValue(self.job_name)

    # Execute the job via docker.
    docker_image = job_manager.JobsManager.GetDockerImage(self.job_name)
    if docker_image:
      ro_paths = [
          result.input_evidence.local_path, result.input_evidence.source_path,
          result.input_evidence.device_path, result.input_evidence.mount_path
      ]
      rw_paths = [self.output_dir, self.tmp_dir]
      container_manager = docker_manager.ContainerManager(docker_image)
      stdout, stderr, ret = container_manager.execute_container(
          cmd, shell, ro_paths=ro_paths, rw_paths=rw_paths,
          timeout_limit=timeout_limit)

    # Execute the job on the host system.
    else:
      try:
        if shell:
          proc = subprocess.Popen(
              cmd, shell=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
          proc.wait(timeout_limit)
        else:
          proc = subprocess.Popen(
              cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
          proc.wait(timeout_limit)
      except subprocess.TimeoutExpired as exception:
        # Log error and close result.
        message = (
            'Execution of [{0!s}] failed due to job timeout of '
            '{1:d} seconds has been reached.'.format(cmd, timeout_limit))
        result.log(message)
        result.close(self, success=False, status=message)
        # Increase timeout metric and raise exception
        turbinia_worker_tasks_timeout_total.inc()
        raise TurbiniaException(message)

      stdout, stderr = proc.communicate()
      ret = proc.returncode

    result.error['stdout'] = str(stdout)
    result.error['stderr'] = str(stderr)

    if stderr_file and not stderr:
      result.log(
          'Attempting to save stderr to {0:s}, but no stderr found during '
          'execution'.format(stderr_file))
    elif stderr:
      if not stderr_file:
        _, stderr_file = tempfile.mkstemp(
            suffix='.txt', prefix='stderr-', dir=self.output_dir)
      result.log(
          'Writing stderr to {0:s}'.format(stderr_file), level=logging.DEBUG)
      with open(stderr_file, 'wb') as fh:
        fh.write(stderr)
      log_files.append(stderr_file)

    if stdout_file and not stdout:
      result.log(
          'Attempting to save stdout to {0:s}, but no stdout found during '
          'execution'.format(stdout_file))
    elif stdout:
      if not stdout_file:
        _, stdout_file = tempfile.mkstemp(
            suffix='.txt', prefix='stdout-', dir=self.output_dir)
      result.log(
          'Writing stdout to {0:s}'.format(stdout_file), level=logging.DEBUG)
      with open(stdout_file, 'wb') as fh:
        fh.write(stdout)
      log_files.append(stdout_file)

    log_files = list(set(log_files))
    for file_ in log_files:
      if not os.path.exists(file_):
        result.log(
            'Log file {0:s} does not exist to save'.format(file_),
            level=logging.DEBUG)
        continue
      if os.path.getsize(file_) == 0:
        result.log(
            'Log file {0:s} is empty. Not saving'.format(file_),
            level=logging.DEBUG)
        continue
      result.log('Output log file found at {0:s}'.format(file_))
      if not self.run_local:
        self.output_manager.save_local_file(file_, result)

    if ret not in success_codes:
      message = 'Execution of [{0!s}] failed with status {1:d}'.format(cmd, ret)
      result.log(message)
      if close:
        result.close(self, success=False, status=message)
    else:
      result.log('Execution of [{0!s}] succeeded'.format(cmd))
      for file_ in save_files:
        if os.path.getsize(file_) == 0:
          result.log(
              'Output file {0:s} is empty. Not saving'.format(file_),
              level=logging.DEBUG)
          continue
        result.log('Output save file at {0:s}'.format(file_))
        if not self.run_local:
          self.output_manager.save_local_file(file_, result)

      for evidence in new_evidence:
        # If the local path is set in the Evidence, we check to make sure that
        # the path exists and is not empty before adding it.
        if evidence.source_path and not os.path.exists(evidence.source_path):
          message = (
              'Evidence {0:s} source_path {1:s} does not exist. Not returning '
              'empty Evidence.'.format(evidence.name, evidence.source_path))
          result.log(message, level=logging.WARN)
        elif (evidence.source_path and os.path.exists(evidence.source_path) and
              os.path.getsize(evidence.source_path) == 0):
          message = (
              'Evidence {0:s} source_path {1:s} is empty. Not returning '
              'empty new Evidence.'.format(evidence.name, evidence.source_path))
          result.log(message, level=logging.WARN)
        else:
          result.add_evidence(evidence, self._evidence_config)

      if close:
        result.close(self, success=True)

    return ret, result
Beispiel #23
0
    def get_task_data(self,
                      instance,
                      project,
                      region,
                      days=0,
                      task_id=None,
                      request_id=None,
                      user=None,
                      function_name='gettasks'):
        """Gets task data from Google Cloud Functions.

    Args:
      instance (string): The Turbinia instance name (by default the same as the
          INSTANCE_ID in the config).
      project (string): The name of the project.
      region (string): The name of the region to execute in.
      days (int): The number of days we want history for.
      task_id (string): The Id of the task.
      request_id (string): The Id of the request we want tasks for.
      user (string): The user of the request we want tasks for.
      function_name (string): The GCF function we want to call

    Returns:
      List of Task dict objects.
    """
        cloud_function = gcp_function.GoogleCloudFunction(project)
        func_args = {'instance': instance, 'kind': 'TurbiniaTask'}

        if days:
            start_time = datetime.now() - timedelta(days=days)
            # Format this like '1990-01-01T00:00:00z' so we can cast it directly to a
            # javascript Date() object in the cloud function.
            start_string = start_time.strftime(DATETIME_FORMAT)
            func_args.update({'start_time': start_string})
        elif task_id:
            func_args.update({'task_id': task_id})
        elif request_id:
            func_args.update({'request_id': request_id})

        if user:
            func_args.update({'user': user})

        response = None
        retry_count = 0
        credential_error_count = 0
        while response is None and retry_count < MAX_RETRIES:
            try:
                response = cloud_function.ExecuteFunction(
                    function_name, region, func_args)
            except auth.exceptions.RefreshError as exception:
                if credential_error_count == 0:
                    log.info(
                        'GCP Credentials need to be refreshed, please refresh in another '
                        'terminal and this process will resume. Error: {0!s}'.
                        format(exception))
                else:
                    log.debug(
                        'GCP Credentials need to be refreshed, please refresh in another '
                        'terminal and this process will resume. Attempt {0:d}. Error: '
                        '{1!s}'.format(credential_error_count + 1, exception))
                # Note, we are intentially not incrementing the retry_count here because
                # we will retry indefinitely while we wait for the user to reauth.
                credential_error_count += 1
            except httplib2.ServerNotFoundError as exception:
                log.info(
                    'Error connecting to server, will retry [{0:d} of {1:d} retries]: '
                    '{2!s}'.format(retry_count, MAX_RETRIES, exception))
                retry_count += 1

            if response is None:
                time.sleep(RETRY_SLEEP)

        if 'result' not in response:
            log.error('No results found')
            if response.get('error', '{}') != '{}':
                msg = 'Error executing Cloud Function: [{0!s}].'.format(
                    response.get('error'))
                log.error(msg)
            log.debug('GCF response: {0!s}'.format(response))
            raise TurbiniaException(
                'Cloud Function {0:s} returned no results.'.format(
                    function_name))

        try:
            results = json.loads(response['result'])
        except (TypeError, ValueError) as e:
            raise TurbiniaException(
                'Could not deserialize result [{0!s}] from GCF: [{1!s}]'.
                format(response.get('result'), e))

        # Convert run_time/last_update back into datetime objects
        task_data = results[0]
        for task in task_data:
            if task.get('run_time'):
                task['run_time'] = timedelta(seconds=task['run_time'])
            if task.get('last_update'):
                task['last_update'] = datetime.strptime(
                    task['last_update'], DATETIME_FORMAT)

        return task_data
Beispiel #24
0
from turbinia import config
from turbinia import TurbiniaException
from turbinia.workers import TurbiniaTask
from turbinia.workers import TurbiniaTaskResult

config.LoadConfig()
if config.STATE_MANAGER.lower() == 'datastore':
    from google.cloud import datastore
    from google.cloud import exceptions
elif config.STATE_MANAGER.lower() == 'redis':
    import redis
else:
    msg = 'State Manager type "{0:s}" not implemented'.format(
        config.STATE_MANAGER)
    raise TurbiniaException(msg)

DATETIME_FORMAT = '%Y-%m-%dT%H:%M:%S'
MAX_DATASTORE_STRLEN = 1500
log = logging.getLogger('turbinia')


def get_state_manager():
    """Return state manager object based on config.

  Returns:
    Initialized StateManager object.
  """
    config.LoadConfig()
    # pylint: disable=no-else-return
    if config.STATE_MANAGER.lower() == 'datastore':
Beispiel #25
0
    def get_task_data(self,
                      instance,
                      project,
                      region,
                      days=0,
                      task_id=None,
                      request_id=None,
                      user=None,
                      function_name='gettasks'):
        """Gets task data from Google Cloud Functions.

    Args:
      instance (string): The Turbinia instance name (by default the same as the
          INSTANCE_ID in the config).
      project (string): The name of the project.
      region (string): The name of the region to execute in.
      days (int): The number of days we want history for.
      task_id (string): The Id of the task.
      request_id (string): The Id of the request we want tasks for.
      user (string): The user of the request we want tasks for.
      function_name (string): The GCF function we want to call

    Returns:
      List of Task dict objects.
    """
        cloud_function = GoogleCloudFunction(project_id=project, region=region)
        func_args = {'instance': instance, 'kind': 'TurbiniaTask'}

        if days:
            start_time = datetime.now() - timedelta(days=days)
            # Format this like '1990-01-01T00:00:00z' so we can cast it directly to a
            # javascript Date() object in the cloud function.
            start_string = start_time.strftime('%Y-%m-%dT%H:%M:%S')
            func_args.update({'start_time': start_string})
        elif task_id:
            func_args.update({'task_id': task_id})
        elif request_id:
            func_args.update({'request_id': request_id})

        if user:
            func_args.update({'user': user})

        response = cloud_function.ExecuteFunction(function_name, func_args)
        if 'result' not in response:
            log.error('No results found')
            if response.get('error', '{}') != '{}':
                msg = 'Error executing Cloud Function: [{0!s}].'.format(
                    response.get('error'))
                log.error(msg)
            log.debug('GCF response: {0!s}'.format(response))
            raise TurbiniaException(
                'Cloud Function {0:s} returned no results.'.format(
                    function_name))

        try:
            results = json.loads(response['result'])
        except (TypeError, ValueError) as e:
            raise TurbiniaException(
                'Could not deserialize result from GCF: [{0!s}]'.format(e))

        return results[0]
Beispiel #26
0
def PreprocessMountDisk(partition_paths, partition_number):
    """Locally mounts disk in an instance.

  Args:
    partition_paths(list(str)): A list of paths to partition block devices;
    partition_number(int): the number of the partition to mount. Remember these
      are 1-indexed (first partition is 1).

  Raises:
    TurbiniaException: if the mount command failed to run.

  Returns:
    str: the path to the mounted filesystem.
  """
    config.LoadConfig()
    mount_prefix = config.MOUNT_DIR_PREFIX

    if partition_number > len(partition_paths):
        raise TurbiniaException(
            'Can not mount partition {0:d}: found only {1:d} partitions in '
            'Evidence.'.format(partition_number, len(partition_paths)))

    # Partitions are 1-indexed for the user and the system
    if partition_number < 1:
        raise TurbiniaException(
            'Can not mount partition {0:d}: partition numbering starts at 1'.
            format(partition_number))

    partition_path = partition_paths[partition_number - 1]

    if not os.path.exists(partition_path):
        raise TurbiniaException(
            'Could not mount partition {0:s}, the path does not exist'.format(
                partition_path))

    if os.path.exists(mount_prefix) and not os.path.isdir(mount_prefix):
        raise TurbiniaException(
            'Mount dir {0:s} exists, but is not a directory'.format(
                mount_prefix))
    if not os.path.exists(mount_prefix):
        log.info(
            'Creating local mount parent directory {0:s}'.format(mount_prefix))
        try:
            os.makedirs(mount_prefix)
        except OSError as e:
            raise TurbiniaException(
                'Could not create mount directory {0:s}: {1!s}'.format(
                    mount_prefix, e))

    mount_path = tempfile.mkdtemp(prefix='turbinia', dir=mount_prefix)

    mount_cmd = ['sudo', 'mount', '-o', 'ro']
    fstype = GetFilesystem(partition_path)
    if fstype in ['ext3', 'ext4']:
        # This is in case the underlying filesystem is dirty, as we want to mount
        # everything read-only.
        mount_cmd.extend(['-o', 'noload'])
    mount_cmd.extend([partition_path, mount_path])

    log.info('Running: {0:s}'.format(' '.join(mount_cmd)))
    try:
        subprocess.check_call(mount_cmd)
    except subprocess.CalledProcessError as e:
        raise TurbiniaException('Could not mount directory {0!s}'.format(e))

    return mount_path
Beispiel #27
0
def PreprocessBitLocker(source_path, partition_offset=None, credentials=None):
    """Uses libbde on a target block device or image file.

  Creates a decrypted virtual device of the encrypted volume.

  Args:
    source_path(str): the source path to run bdemount on.
    partition_offset(int): offset of volume in bytes.
    credentials(list[{str: str}]): decryption credentials set in evidence setup

  Raises:
    TurbiniaException: if source_path doesn't exist or if the bdemount command
      failed to create a virtual device.

  Returns:
    str: the path to the decrypted virtual block device
  """
    config.LoadConfig()
    mount_prefix = config.MOUNT_DIR_PREFIX
    decrypted_device = None

    if not os.path.exists(source_path):
        raise TurbiniaException(
            ('Cannot create virtual device for non-existing source_path '
             '{0!s}').format(source_path))

    if os.path.exists(mount_prefix) and not os.path.isdir(mount_prefix):
        raise TurbiniaException(
            'Mount dir {0:s} exists, but is not a directory'.format(
                mount_prefix))
    if not os.path.exists(mount_prefix):
        log.info(
            'Creating local mount parent directory {0:s}'.format(mount_prefix))
        try:
            os.makedirs(mount_prefix)
        except OSError as e:
            raise TurbiniaException(
                'Could not create mount directory {0:s}: {1!s}'.format(
                    mount_prefix, e))

    mount_path = tempfile.mkdtemp(prefix='turbinia', dir=mount_prefix)

    for credential in credentials:
        libbde_command = ['sudo', 'bdemount', '-o', str(partition_offset)]
        credential_type = credential['credential_type']
        credential_data = credential['credential_data']
        if credential_type == 'password':
            libbde_command.extend(['-p', credential_data])
        elif credential_type == 'recovery_password':
            libbde_command.extend(['-r', credential_data])
        else:
            # Unsupported credential type, try the next
            log.warning(
                'Unsupported credential type: {0!s}'.format(credential_type))
            continue

        libbde_command.extend(['-X', 'allow_other', source_path, mount_path])

        # Not logging command since it will contain credentials
        try:
            subprocess.check_call(libbde_command)
        except subprocess.CalledProcessError as e:
            # Decryption failed with these credentials, try the next
            continue

        # Decrypted volume was mounted
        decrypted_device = os.path.join(mount_path, 'bde1')
        if not os.path.exists(decrypted_device):
            raise TurbiniaException(
                'Cannot attach decrypted device: {0!s}'.format(
                    decrypted_device))
        else:
            log.info(
                'Decrypted device attached: {0!s}'.format(decrypted_device))

        return decrypted_device
    def _ProcessPartition(self, evidence_path, path_spec):
        """Generate RawDiskPartition from a PathSpec.

    Args:
      evidence_path (str): Local path of the parent evidence
      path_spec (dfvfs.PathSpec): dfVFS path spec.

    Returns:
      A new RawDiskPartition evidence item and a list of strings containing
      partition information to add to the status report.
    """
        status_report = []

        fs_path_spec = path_spec
        fs_location = None
        partition_location = None
        volume_index = None
        partition_index = None
        partition_offset = None
        partition_size = None

        # File system location / identifier
        fs_location = getattr(path_spec, 'location', None)
        while path_spec.HasParent():
            type_indicator = path_spec.type_indicator
            if type_indicator == dfvfs_definitions.TYPE_INDICATOR_APFS_CONTAINER:
                # APFS volume index
                volume_index = getattr(path_spec, 'volume_index', None)

            if type_indicator == dfvfs_definitions.TYPE_INDICATOR_TSK_PARTITION:
                if fs_location in ('\\', '/'):
                    # Partition location / identifier
                    fs_location = getattr(path_spec, 'location', None)
                partition_location = getattr(path_spec, 'location', None)
                # Partition index
                partition_index = getattr(path_spec, 'part_index', None)

                volume_system = tsk_volume_system.TSKVolumeSystem()
                try:
                    volume_system.Open(path_spec)
                    volume_identifier = partition_location.replace('/', '')
                    volume = volume_system.GetVolumeByIdentifier(
                        volume_identifier)

                    partition_offset = volume.extents[0].offset
                    partition_size = volume.extents[0].size
                except dfvfs_errors.VolumeSystemError as e:
                    raise TurbiniaException(
                        'Could not process partition: {0!s}'.format(e))
                break

            path_spec = path_spec.parent

        status_report.append(fmt.heading5('{0!s}:'.format(fs_location)))
        if partition_index:
            if not volume_index is None:
                status_report.append(
                    fmt.bullet('Volume index: {0!s}'.format(volume_index)))
            status_report.append(
                fmt.bullet('Partition index: {0!s}'.format(partition_index)))
            status_report.append(
                fmt.bullet('Partition offset: {0!s}'.format(partition_offset)))
            status_report.append(
                fmt.bullet('Partition size: {0!s}'.format(partition_size)))
        else:
            status_report.append(
                fmt.bullet('Source evidence is a volume image'))

        partition_evidence = RawDiskPartition(
            source_path=evidence_path,
            path_spec=fs_path_spec,
            partition_offset=partition_offset,
            partition_size=partition_size)

        return partition_evidence, status_report
Beispiel #29
0
  def execute_container(
      self, cmd, shell=False, ro_paths=None, rw_paths=None, **kwargs):
    """Executes a Docker container.

    A new Docker container will be created from the image id,
    executed, and then removed.

    Attributes:
      cmd(str|list): command to be executed.
      shell (bool): Whether the cmd is in the form of a string or a list.
      mount_paths(list): A list of paths to mount to the container.
      **kwargs: Any additional keywords to pass to the container.

    Returns:
      stdout(str): stdout of the container.
      stderr(str): stderr of the container.
      ret(int): the return code of the process run.

    Raises:
      TurbiniaException: If an error occurred with the Docker container.
    """
    container = None
    args = {}
    stdout = ''

    # Override the entrypoint to /bin/sh
    kwargs['entrypoint'] = '/bin/sh'
    if shell:
      cmd = '-c ' + '\"{0:s}\"'.format(cmd)
    else:
      cmd = ' '.join(cmd)
      cmd = '-c ' + '\"{0:s}\"'.format(cmd)

    # Create the device and file mount paths
    device_paths = []
    file_paths = {}
    if rw_paths:
      dwpath, fwpath = self._create_mount_points(rw_paths)
      device_paths.extend(dwpath)
      file_paths.update(fwpath)
    if ro_paths:
      drpath, frpath = self._create_mount_points(ro_paths, mode='ro')
      device_paths.extend(drpath)
      file_paths.update(frpath)

    args['devices'] = device_paths
    args['volumes'] = file_paths

    # Add any additional arguments
    for key, value in kwargs.items():
      args[key] = value

    try:
      container = self.client.containers.create(self.image, cmd, **args)
      container.start()
      # Stream program stdout from container
      stdstream = container.logs(stream=True)
      for stdo in stdstream:
        stdo = codecs.decode(stdo, 'utf-8').strip()
        log.debug(stdo)
        stdout += stdo
      results = container.wait()
    except docker.errors.APIError as exception:
      if container:
        container.remove(v=True)
      message = (
          'An error has occurred with the container: {0!s}'.format(exception))
      log.error(message)
      raise TurbiniaException(message)

    stderr, ret = results['Error'], results['StatusCode']
    if container:
      container.remove(v=True)

    return stdout, stderr, ret
Beispiel #30
0
    def preprocess(self, tmp_dir=None, required_states=None):
        """Runs the possible parent's evidence preprocessing code, then ours.

    This is a wrapper function that will call the chain of pre-processors
    starting with the most distant ancestor.  After all of the ancestors have
    been processed, then we run our pre-processor.  These processors get run in
    the context of the local task execution on the worker nodes prior to the
    task itself running.  This can be used to prepare the evidence to be
    processed (e.g. attach a cloud disk, mount a local disk etc).

    Tasks export a list of the required_states they have for the state of the
    Evidence it can process in `TurbiniaTask.REQUIRED_STATES`[1].  Evidence also
    exports a list of the possible states it can have after pre/post-processing
    in `Evidence.POSSIBLE_STATES`.  The pre-processors should run selectively
    based on the these requirements that come from the Task, and the
    post-processors should run selectively based on the current state of the
    Evidence.

    If a Task requires a given state supported by the given Evidence class, but
    it is not met after the preprocessing of the Evidence is run, then the Task
    will abort early.  Note that for compound evidence types that have parent
    Evidence objects (e.g. where `context_dependent` is True), we only inspect
    the child Evidence type for its state as it is assumed that it would only be
    able to run the appropriate pre/post-processors when the parent Evidence
    processors have been successful.

    [1] Note that the evidence states required by the Task are only required if
    the Evidence also supports that state in `POSSSIBLE_STATES`.  This is so
    that the Tasks are flexible enough to support multiple types of Evidence.
    For example, `PlasoTask` allows both `CompressedDirectory` and
    `GoogleCloudDisk` as Evidence input, and has states `ATTACHED` and
    `DECOMPRESSED` listed in `PlasoTask.REQUIRED_STATES`.  Since `ATTACHED`
    state is supported by `GoogleCloudDisk`, and `DECOMPRESSED` is supported by
    `CompressedDirectory`, only those respective pre-processors will be run and
    the state is confirmed after the preprocessing is complete.

    Args:
      tmp_dir(str): The path to the temporary directory that the
                       Task will write to.
      required_states(list[EvidenceState]): The list of evidence state
          requirements from the Task.

    Raises:
      TurbiniaException: If the required evidence state cannot be met by the
          possible states of the Evidence or if the parent evidence object does
          not exist when it is required by the Evidence type..
    """
        self.local_path = self.source_path
        if not required_states:
            required_states = []

        if self.context_dependent:
            if not self.parent_evidence:
                raise TurbiniaException(
                    'Evidence of type {0:s} needs parent_evidence to be set'.
                    format(self.type))
            self.parent_evidence.preprocess(tmp_dir, required_states)
        try:
            log.debug('Starting pre-processor for evidence {0:s}'.format(
                self.name))
            self._preprocess(tmp_dir, required_states)
        except TurbiniaException as exception:
            log.error('Error running preprocessor for {0:s}: {1!s}'.format(
                self.name, exception))

        log.debug(
            'Pre-processing evidence {0:s} is complete, and evidence is in state '
            '{1:s}'.format(self.name, self.format_state()))