Exemple #1
0
    def _do_update(self, source_repo, repo, vulnerability, yaml_path):
        """Process updates on a vulnerability."""
        logging.info(f'Processing update for vulnerability {vulnerability.id}')
        package_repo_dir = tempfile.TemporaryDirectory()
        package_repo_url = None
        package_repo = None

        added_ranges = set()
        added_versions = set()
        try:
            for affected_range in vulnerability.affects.ranges:
                # Go through existing provided ranges to find additional ranges (via
                # cherrypicks and branches).
                if affected_range.type != vulnerability_pb2.AffectedRangeNew.GIT:
                    continue

                current_repo_url = affected_range.repo
                if current_repo_url != package_repo_url:
                    # Different repo from previous one.
                    package_repo_dir.cleanup()
                    package_repo_dir = tempfile.TemporaryDirectory()
                    package_repo_url = current_repo_url
                    package_repo = osv.clone_with_retries(
                        package_repo_url, package_repo_dir.name)

                result = osv.get_affected(package_repo,
                                          affected_range.introduced,
                                          affected_range.fixed)
                new_ranges, new_versions = osv.update_vulnerability(
                    vulnerability, package_repo_url, result)

                # Collect newly added ranges and versions.
                added_ranges.update(new_ranges)
                added_versions.update(new_versions)
        finally:
            package_repo_dir.cleanup()

        if added_ranges or added_versions:
            if not self._push_new_ranges_and_versions(
                    source_repo, repo, vulnerability, yaml_path, added_ranges,
                    added_versions):
                logging.warning(
                    f'Discarding changes for {vulnerability.id} due to conflicts.'
                )
                return
        else:
            # Nothing to do.
            logging.info(
                f'No range/version changes for vulnerability {vulnerability.id}.'
            )

        # Update datastore with new information.
        bug = osv.Bug.get_by_id(vulnerability.id)
        if not bug:
            # TODO(ochang): Create new entry if needed.
            logging.error('Failed to find bug with ID %s', vulnerability.id)
            return

        bug.update_from_vulnerability(vulnerability)
        bug.put()
Exemple #2
0
    def _source_update(self, message):
        """Source update."""
        source = message.attributes['source']
        path = message.attributes['path']
        original_sha256 = message.attributes['original_sha256']

        source_repo = osv.get_source_repository(source)
        repo = osv.clone_with_retries(
            source_repo.repo_url,
            os.path.join(self._sources_dir, source),
            callbacks=self._git_callbacks(source_repo))

        yaml_path = os.path.join(osv.repo_path(repo), path)
        current_sha256 = osv.sha256(yaml_path)
        if current_sha256 != original_sha256:
            logging.warning(
                'sha256sum of %s no longer matches (expected=%s vs current=%s).',
                path, original_sha256, current_sha256)
            return

        try:
            vulnerability = osv.parse_vulnerability(yaml_path)
        except Exception as e:
            logging.error('Failed to parse vulnerability %s: %s', yaml_path, e)
            return

        self._do_update(source_repo, repo, vulnerability, yaml_path,
                        original_sha256)
Exemple #3
0
    def process_oss_fuzz(self, oss_fuzz_source):
        """Process OSS-Fuzz source data."""
        # Export OSS-Fuzz Vulnerability data into source repository.
        # OSS-Fuzz data is first imported via a special Pub/Sub pipeline into OSV.
        # This data needs to be dumped into a publicly accessible/editable place for
        # manual/human editing if required.
        #
        # This then becomes the source of truth where any edits are imported back
        # into OSV.
        with tempfile.TemporaryDirectory() as tmp_dir:
            callbacks = GitRemoteCallback(oss_fuzz_source.repo_username,
                                          self._ssh_key_public_path,
                                          self._ssh_key_private_path)

            repo = osv.clone_with_retries(oss_fuzz_source.repo_url,
                                          tmp_dir,
                                          callbacks=callbacks)
            if not repo:
                raise RuntimeError('Failed to clone source repo')

            vulnerabilities_path = os.path.join(
                tmp_dir, oss_fuzz_source.directory_path or '')

            # TODO(ochang): Make this more efficient by recording whether or not we
            # imported already in Datastore.
            for bug in osv.Bug.query(
                    osv.Bug.status == osv.BugStatus.PROCESSED):
                source_name, source_id = osv.parse_source_id(bug.source_id)
                if source_name != oss_fuzz_source.name:
                    continue

                project_dir = os.path.join(vulnerabilities_path, bug.project)
                os.makedirs(project_dir, exist_ok=True)
                vulnerability_path = os.path.join(project_dir,
                                                  source_id + '.yaml')

                if os.path.exists(vulnerability_path):
                    continue

                logging.info('Writing %s', bug.key.id())
                with open(vulnerability_path, 'w') as handle:
                    data = json_format.MessageToDict(bug.to_vulnerability())
                    yaml.dump(data,
                              handle,
                              sort_keys=False,
                              Dumper=self.YamlDumper)

            # Commit Vulnerability changes back to the oss-fuzz source repository.
            logging.info('Commiting and pushing changes')
            repo.index.add_all()
            repo.index.write()
            tree = repo.index.write_tree()
            author = _git_author()
            repo.create_commit(repo.head.name, author, author,
                               'Import from OSS-Fuzz', tree,
                               [repo.head.peel().oid])

            # TODO(ochang): Rebase and retry if necessary.
            repo.remotes['origin'].push([repo.head.name], callbacks=callbacks)
Exemple #4
0
    def _source_update(self, message):
        """Source update."""
        source = message.attributes['source']
        path = message.attributes['path']

        source_repo = osv.get_source_repository(source)
        repo = osv.clone_with_retries(
            source_repo.repo_url,
            os.path.join(self._sources_dir, source),
            callbacks=self._git_callbacks(source_repo))

        yaml_path = os.path.join(osv.repo_path(repo), path)
        vulnerability = osv.parse_vulnerability(yaml_path)
        self._do_update(source_repo, repo, vulnerability, yaml_path)
Exemple #5
0
    def checkout(self, source_repo):
        """Check out a source repo."""
        checkout_dir = os.path.join(self._work_dir, source_repo.name)

        if os.path.exists(checkout_dir):
            # Already exists, reset and checkout latest revision.
            try:
                return self._use_existing_checkout(source_repo, checkout_dir)
            except Exception as e:
                # Failed to re-use existing checkout. Delete it and start over.
                logging.error('Failed to load existing checkout: %s', e)
                shutil.rmtree(checkout_dir)

        return osv.clone_with_retries(
            source_repo.repo_url,
            checkout_dir,
            callbacks=self._git_callbacks(source_repo))
Exemple #6
0
def process_impact_task(source_id, message):
  """Process an impact task."""
  logging.info('Processing impact task for %s', source_id)

  regress_result = ndb.Key(osv.RegressResult, source_id).get()
  if not regress_result:
    logging.error('Missing RegressResult for %s', source_id)
    return

  fix_result = ndb.Key(osv.FixResult, source_id).get()
  if not fix_result:
    logging.warning('Missing FixResult for %s', source_id)
    fix_result = osv.FixResult()

  # Check if there is an existing Bug for the same source, but with a different
  # allocated ID. This shouldn't happen.
  allocated_bug_id = message.attributes['allocated_id']

  existing_bug = osv.Bug.query(osv.Bug.source_id == source_id).get()
  if existing_bug and existing_bug.key.id() != allocated_bug_id:
    logging.error('Bug entry already exists for %s with a different ID %s',
                  source_id, existing_bug.key.id())
    return

  if existing_bug and existing_bug.status == osv.BugStatus.INVALID:
    logging.warning('Bug %s already marked as invalid.', existing_bug.key.id())
    return

  if existing_bug:
    public = existing_bug.public
  else:
    raise osv.ImpactError('Task requested without Bug allocated.')

  # TODO(ochang): Handle changing repo types? e.g. SVN -> Git.

  repo_url = regress_result.repo_url or fix_result.repo_url
  if not repo_url:
    raise osv.ImpactError('No repo_url set')

  issue_id = fix_result.issue_id or regress_result.issue_id
  fix_commit = fix_result.commit

  with tempfile.TemporaryDirectory() as tmp_dir:
    repo = osv.clone_with_retries(repo_url, tmp_dir)

    # If not a precise fix commit, try to find the exact one by going through
    # commit messages (oss-fuzz only).
    if source_id.startswith(SOURCE_PREFIX) and ':' in fix_commit:
      start_commit, end_commit = fix_commit.split(':')
      commit = find_oss_fuzz_fix_via_commit(repo, start_commit, end_commit,
                                            source_id, issue_id)
      if commit:
        logging.info('Found exact fix commit %s via commit message (oss-fuzz)',
                     commit)
        fix_commit = commit

    # Actually compute the affected commits/tags.
    result = osv.get_affected(repo, regress_result.commit, fix_commit)
    logging.info('Found affected %s', ', '.join(result.tags))

  # If the range resolved to a single commit, simplify it.
  if len(result.fix_commits) == 1:
    fix_commit = result.fix_commits[0]
  elif not result.fix_commits:
    # Not fixed.
    fix_commit = ''

  if len(result.regress_commits) == 1:
    regress_commit = result.regress_commits[0]
  else:
    regress_commit = regress_result.commit

  project = fix_result.project or regress_result.project
  ecosystem = fix_result.ecosystem or regress_result.ecosystem
  summary = fix_result.summary or regress_result.summary
  details = fix_result.details or regress_result.details
  severity = fix_result.severity or regress_result.severity
  reference_urls = fix_result.reference_urls or regress_result.reference_urls

  update_affected_commits(allocated_bug_id, result, project, ecosystem, public)

  existing_bug.repo_url = repo_url
  existing_bug.fixed = fix_commit
  existing_bug.regressed = regress_commit
  existing_bug.affected = result.tags
  existing_bug.affected_fuzzy = osv.normalize_tags(result.tags)
  existing_bug.confidence = result.confidence
  existing_bug.issue_id = issue_id
  existing_bug.project = project
  existing_bug.ecosystem = ecosystem
  existing_bug.summary = summary
  existing_bug.details = details
  existing_bug.status = osv.BugStatus.PROCESSED
  existing_bug.severity = severity
  existing_bug.reference_urls = reference_urls

  existing_bug.additional_commit_ranges = []
  # Don't display additional ranges for imprecise commits, as they can be
  # confusing.
  if ':' in existing_bug.fixed or ':' in existing_bug.regressed:
    existing_bug.put()
    return

  def _sort_key(value):
    # Allow sorting of None values.
    return (value[0] or '', value[1] or '')

  for introduced_in, fixed_in in sorted(result.affected_ranges, key=_sort_key):
    if (introduced_in == existing_bug.regressed and
        (fixed_in or '') == existing_bug.fixed):
      # Don't include the main range.
      continue

    existing_bug.additional_commit_ranges.append(
        osv.CommitRange(introduced_in=introduced_in, fixed_in=fixed_in))

  existing_bug.put()
Exemple #7
0
    def _do_update(self, source_repo, repo, vulnerability, yaml_path,
                   original_sha256):
        """Process updates on a vulnerability."""
        logging.info('Processing update for vulnerability %s',
                     vulnerability.id)
        package_repo_dir = tempfile.TemporaryDirectory()
        package_repo_url = None
        package_repo = None

        bug = osv.Bug.get_by_id(vulnerability.id)
        if bug:
            fix_result = osv.FixResult.get_by_id(bug.source_id)
            if fix_result:
                add_fix_information(vulnerability, bug, fix_result)

        range_collectors = collections.defaultdict(osv.RangeCollector)
        versions_with_bug = set()
        versions_with_fix = set()
        commits = set()

        try:
            for affected_range in vulnerability.affects.ranges:
                if affected_range.type != vulnerability_pb2.AffectedRange.GIT:
                    continue

                range_collectors[affected_range.repo].add(
                    affected_range.introduced, affected_range.fixed)

            for affected_range in vulnerability.affects.ranges:
                # Go through existing provided ranges to find additional ranges (via
                # cherrypicks and branches).
                if affected_range.type != vulnerability_pb2.AffectedRange.GIT:
                    continue

                current_repo_url = affected_range.repo
                if current_repo_url != package_repo_url:
                    # Different repo from previous one.
                    package_repo_dir.cleanup()
                    package_repo_dir = tempfile.TemporaryDirectory()
                    package_repo_url = current_repo_url
                    package_repo = osv.clone_with_retries(
                        package_repo_url, package_repo_dir.name)

                result = osv.get_affected(package_repo,
                                          affected_range.introduced,
                                          affected_range.fixed)
                for introduced, fixed in result.affected_ranges:
                    range_collectors[current_repo_url].add(introduced, fixed)

                versions_with_fix.update(result.tags_with_fix)
                versions_with_bug.update(result.tags_with_bug)
                commits.update(result.commits)
        finally:
            package_repo_dir.cleanup()

        if self._push_new_ranges_and_versions(
                source_repo, repo, vulnerability, yaml_path, original_sha256,
                range_collectors, list(versions_with_bug - versions_with_fix)):
            logging.info('Updated range/versions for vulnerability %s.',
                         vulnerability.id)
        else:
            logging.warning('Discarding changes for %s due to conflicts.',
                            vulnerability.id)
            return

        # Update datastore with new information.
        bug = osv.Bug.get_by_id(vulnerability.id)
        if not bug:
            # TODO(ochang): Create new entry if needed.
            logging.error('Failed to find bug with ID %s', vulnerability.id)
            return

        bug.update_from_vulnerability(vulnerability)
        bug.put()

        osv.update_affected_commits(bug.key.id(), commits, bug.project,
                                    bug.ecosystem, bug.public)
Exemple #8
0
def process_impact_task(source_id, message):
    """Process an impact task."""
    logging.info('Processing impact task for %s', source_id)

    regress_result = ndb.Key(osv.RegressResult, source_id).get()
    if not regress_result:
        logging.error('Missing RegressResult for %s', source_id)
        return

    fix_result = ndb.Key(osv.FixResult, source_id).get()
    if not fix_result:
        logging.warning('Missing FixResult for %s', source_id)
        fix_result = osv.FixResult()

    # Check if there is an existing Bug for the same source, but with a different
    # allocated ID. This shouldn't happen.
    allocated_bug_id = message.attributes['allocated_id']

    existing_bug = osv.Bug.query(osv.Bug.source_id == source_id).get()
    if existing_bug and existing_bug.key.id() != allocated_bug_id:
        logging.error('Bug entry already exists for %s with a different ID %s',
                      source_id, existing_bug.key.id())
        return

    if existing_bug and existing_bug.status == osv.BugStatus.INVALID:
        logging.warning('Bug %s already marked as invalid.',
                        existing_bug.key.id())
        return

    if existing_bug:
        public = existing_bug.public
    else:
        raise osv.ImpactError('Task requested without Bug allocated.')

    repo_url = regress_result.repo_url or fix_result.repo_url
    if not repo_url:
        raise osv.ImpactError('No repo_url set')

    # Always populate Bug attributes, even if the remainder of the analysis fails.
    # This does not mark the Bug as being valid.
    set_bug_attributes(existing_bug, regress_result, fix_result)
    existing_bug.put()

    issue_id = fix_result.issue_id or regress_result.issue_id
    fix_commit = fix_result.commit

    with tempfile.TemporaryDirectory() as tmp_dir:
        repo = osv.clone_with_retries(repo_url, tmp_dir)

        # If not a precise fix commit, try to find the exact one by going through
        # commit messages (oss-fuzz only).
        if source_id.startswith(SOURCE_PREFIX) and ':' in fix_commit:
            start_commit, end_commit = fix_commit.split(':')
            commit = find_oss_fuzz_fix_via_commit(repo, start_commit,
                                                  end_commit, source_id,
                                                  issue_id)
            if commit:
                logging.info(
                    'Found exact fix commit %s via commit message (oss-fuzz)',
                    commit)
                fix_commit = commit

        # Actually compute the affected commits/tags.
        repo_analyzer = osv.RepoAnalyzer()
        result = repo_analyzer.get_affected(repo, regress_result.commit,
                                            fix_commit)
        affected_tags = sorted(list(result.tags))
        logging.info('Found affected %s', ', '.join(affected_tags))

    # If the range resolved to a single commit, simplify it.
    if len(result.fix_commits) == 1:
        fix_commit = result.fix_commits[0]
    elif not result.fix_commits:
        # Not fixed.
        fix_commit = ''

    if (len(result.regress_commits) == 1
            and osv.UNKNOWN_COMMIT not in regress_result.commit):
        regress_commit = result.regress_commits[0]
    else:
        regress_commit = regress_result.commit

    project = fix_result.project or regress_result.project
    ecosystem = fix_result.ecosystem or regress_result.ecosystem
    osv.update_affected_commits(allocated_bug_id, result.commits, project,
                                ecosystem, public)

    affected_tags = sorted(list(result.tags))
    existing_bug.fixed = fix_commit
    existing_bug.regressed = regress_commit
    existing_bug.affected = affected_tags
    existing_bug.affected_fuzzy = osv.normalize_tags(affected_tags)
    existing_bug.status = osv.BugStatus.PROCESSED

    # For the AffectedRange, use the first commit in the regress commit range, and
    # the last commit in the fix commit range.
    introduced = result.regress_commits[0] if result.regress_commits else ''
    fixed = result.fix_commits[-1] if result.fix_commits else ''
    existing_bug.affected_ranges = [
        osv.AffectedRange(type='GIT',
                          repo_url=repo_url,
                          introduced=introduced,
                          fixed=fixed),
    ]

    # Expose range data in `database_specific`.
    database_specific = {}
    if ':' in existing_bug.regressed:
        database_specific['introduced_range'] = existing_bug.regressed
    if ':' in existing_bug.fixed:
        database_specific['fixed_range'] = existing_bug.fixed

    if database_specific:
        existing_bug.database_specific = database_specific

    # Don't display additional ranges for imprecise commits, as they can be
    # confusing.
    if ':' in existing_bug.fixed or ':' in existing_bug.regressed:
        existing_bug.put()
        return

    def _sort_key(value):
        # Allow sorting of None values.
        return (value[0] or '', value[1] or '')

    for introduced_in, fixed_in in sorted(result.affected_ranges,
                                          key=_sort_key):
        if not fixed_in:
            fixed_in = ''

        if (introduced_in == existing_bug.regressed
                and fixed_in == existing_bug.fixed):
            # Don't repeat the main range.
            continue

        existing_bug.affected_ranges.append(
            osv.AffectedRange(type='GIT',
                              repo_url=repo_url,
                              introduced=introduced_in,
                              fixed=fixed_in))

    existing_bug.put()
Exemple #9
0
    def _do_update(self, source_repo, repo, vulnerability, yaml_path,
                   relative_path, original_sha256):
        """Process updates on a vulnerability."""
        logging.info('Processing update for vulnerability %s',
                     vulnerability.id)
        package_repo_dir = tempfile.TemporaryDirectory()
        package_repo_url = None
        package_repo = None

        bug = osv.Bug.get_by_id(vulnerability.id)
        if bug:
            fix_result = osv.FixResult.get_by_id(bug.source_id)
            if fix_result:
                add_fix_information(vulnerability, bug, fix_result)

        # Repo -> Git range collectors
        range_collectors = collections.defaultdict(osv.RangeCollector)
        versions_with_bug = set()
        versions_with_fix = set()
        commits = set()

        try:
            for affected_range in vulnerability.affects.ranges:
                if affected_range.type != vulnerability_pb2.AffectedRange.GIT:
                    continue

                # Convert empty values ('') to None.
                introduced = affected_range.introduced or None
                fixed = affected_range.fixed or None
                range_collectors[affected_range.repo].add(introduced, fixed)

            for affected_range in vulnerability.affects.ranges:
                # Go through existing provided ranges to find additional ranges (via
                # cherrypicks and branches).
                if affected_range.type != vulnerability_pb2.AffectedRange.GIT:
                    continue

                current_repo_url = affected_range.repo
                if current_repo_url != package_repo_url:
                    # Different repo from previous one.
                    package_repo_dir.cleanup()
                    package_repo_dir = tempfile.TemporaryDirectory()
                    package_repo_url = current_repo_url
                    package_repo = osv.clone_with_retries(
                        package_repo_url, package_repo_dir.name)

                result = osv.get_affected(package_repo,
                                          affected_range.introduced,
                                          affected_range.fixed)
                for introduced, fixed in result.affected_ranges:
                    range_collectors[current_repo_url].add(introduced, fixed)

                versions_with_fix.update(result.tags_with_fix)
                versions_with_bug.update(result.tags_with_bug)
                commits.update(result.commits)
        finally:
            package_repo_dir.cleanup()

        # Enumerate ECOSYSTEM and SEMVER ranges.
        versions = self._enumerate_versions(vulnerability.package.name,
                                            vulnerability.package.ecosystem,
                                            vulnerability.affects.ranges)
        # Add additional versions derived from tags.
        versions.extend(versions_with_bug - versions_with_fix)

        if self._push_new_ranges_and_versions(source_repo, repo, vulnerability,
                                              yaml_path, original_sha256,
                                              range_collectors, versions):
            logging.info('Updated range/versions for vulnerability %s.',
                         vulnerability.id)
        else:
            logging.warning('Discarding changes for %s due to conflicts.',
                            vulnerability.id)
            return

        # Update datastore with new information.
        bug = osv.Bug.get_by_id(vulnerability.id)
        if not bug:
            if source_repo.name == 'oss-fuzz':
                logging.warning('%s not found for OSS-Fuzz source.',
                                vulnerability.id)
                return

            bug = osv.Bug(id=vulnerability.id,
                          source_id=f'{source_repo.name}:{relative_path}',
                          timestamp=osv.utcnow(),
                          status=osv.BugStatus.PROCESSED,
                          source_of_truth=osv.SourceOfTruth.SOURCE_REPO)

        bug.update_from_vulnerability(vulnerability)
        bug.put()

        osv.update_affected_commits(bug.key.id(), commits, bug.project,
                                    bug.ecosystem, bug.public)