Esempio n. 1
0
def main():
    parser = argparse.ArgumentParser(
        description='Run FOSSA analysis (open source license compliance).')
    parser.add_argument('--verbose', action='store_true', help='Enable verbose output')
    parser.add_argument(
        'fossa_cli_args',
        nargs='*',
        help='These arguments are passed directly to fossa-cli')
    args = parser.parse_args()
    init_env(args.verbose)

    # TODO: We may also want to try using the v2 option --unpack-archives
    #       Though that may be going to deeper level than we want.
    fossa_cmd_line = ['fossa', 'analyze']
    fossa_cmd_line.extend(args.fossa_cli_args)

    should_upload = not any(
        arg in args.fossa_cli_args for arg in ('--show-output', '--output', '-o'))

    if should_upload and not os.getenv('FOSSA_API_KEY'):
        # --output is used for local analysis only, without uploading the results. In all other
        # cases we would like .
        raise RuntimeError('FOSSA_API_KEY must be specified in order to upload analysis results.')

    logging.info(
        f"FOSSA CLI command line: {shlex_join(fossa_cmd_line)}")

    fossa_version_str = subprocess.check_output(['fossa', '--version']).decode('utf-8')
    fossa_version_match = FOSSA_VERSION_RE.match(fossa_version_str)
    if not fossa_version_match:
        raise RuntimeError(f"Cannot parse fossa-cli version: {fossa_version_str}")
    fossa_version = fossa_version_match.group(1)
    if version.parse(fossa_version) < version.parse(MIN_FOSSA_CLI_VERSION):
        raise RuntimeError(
            f"fossa version too old: {fossa_version} "
            f"(expected {MIN_FOSSA_CLI_VERSION} or later)")

    download_cache_path = get_download_cache_dir()
    logging.info(f"Using the download cache directory {download_cache_path}")
    download_config = DownloadConfig(
        verbose=args.verbose,
        cache_dir_path=download_cache_path
    )
    downloader = Downloader(download_config)

    fossa_yml_path = os.path.join(YB_SRC_ROOT, '.fossa-local.yml')
    fossa_yml_data = load_yaml_file(fossa_yml_path)
    modules = fossa_yml_data['analyze']['modules']
    # fossa v2.6.1 does not pick up project name from config file version 2 format.
    # TODO: update to config file version 3
    fossa_cmd_line.extend(["--project", fossa_yml_data['cli']['project']])

    thirdparty_dir = get_thirdparty_dir()
    fossa_modules_path = os.path.join(thirdparty_dir, 'fossa_modules.yml')

    seen_urls = set()

    start_time_sec = time.time()
    if os.path.exists(fossa_modules_path):
        thirdparty_fossa_modules_data = load_yaml_file(fossa_modules_path)
        for thirdparty_module_data in thirdparty_fossa_modules_data:
            fossa_module_data = thirdparty_module_data['fossa_module']
            module_name = fossa_module_data['name']
            if not should_include_fossa_module(module_name):
                continue
            fossa_module_yb_metadata = thirdparty_module_data['yb_metadata']
            expected_sha256 = fossa_module_yb_metadata['sha256sum']
            url = fossa_module_yb_metadata['url']
            if url in seen_urls:
                # Due to a bug in some versions of yugabyte-db-thirdparty scripts, as of 04/20/2021
                # we may include the same dependency twice in the fossa_modules.yml file. We just
                # skip the duplicates here.
                continue
            seen_urls.add(url)

            logging.info(f"Adding module from {url}")
            downloaded_path = downloader.download_url(
                url,
                download_parent_dir_path=None,  # Download to cache directly.
                verify_checksum=True,
                expected_sha256=expected_sha256
            )
            fossa_module_data['target'] = downloaded_path
            modules.append(fossa_module_data)

        # TODO: Once we move to v2 fossa, we may want to use fossa-dep.yml file instead of
        #       re-writing the main file.
        effective_fossa_yml_path = os.path.join(YB_SRC_ROOT, '.fossa.yml')
        write_yaml_file(fossa_yml_data, effective_fossa_yml_path)

        logging.info(f"Wrote the expanded FOSSA file to {effective_fossa_yml_path}")
    else:
        logging.warning(
            f"File {fossa_modules_path} does not exist. Some C/C++ dependencies will be missing "
            f"from FOSSA analysis.")

        effective_fossa_yml_path = fossa_yml_path

    elapsed_time_sec = time.time() - start_time_sec
    logging.info("Generated the effective FOSSA configuration file in %.1f sec", elapsed_time_sec)
    logging.info(f"Running command: {shlex_join(fossa_cmd_line)})")
    subprocess.check_call(fossa_cmd_line)
Esempio n. 2
0
    def update_archive_metadata_file(self) -> None:
        yb_version = read_file(os.path.join(YB_SRC_ROOT, 'version.txt')).strip()

        archive_metadata_path = get_archive_metadata_file_path()
        logging.info(f"Updating third-party archive metadata file in {archive_metadata_path}")

        github_client = Github(get_github_token(self.github_token_file_path))
        repo = github_client.get_repo('yugabyte/yugabyte-db-thirdparty')

        releases_by_commit: Dict[str, ReleaseGroup] = {}
        num_skipped_old_tag_format = 0
        num_skipped_wrong_branch = 0
        num_releases_found = 0

        releases = []
        get_releases_start_time_sec = time.time()
        try:
            for release in repo.get_releases():
                releases.append(release)
        except GithubException as exc:
            if 'Only the first 1000 results are available.' in str(exc):
                logging.info("Ignoring exception: %s", exc)
            else:
                raise exc
        logging.info("Time spent to iterate all releases: %.1f sec",
                     time.time() - get_releases_start_time_sec)

        for release in releases:
            sha: str = release.target_commitish
            assert(isinstance(sha, str))

            if SHA_HASH.match(sha) is None:
                sha = repo.get_commit(sha).sha

            tag_name = release.tag_name
            if len(tag_name.split('-')) <= 2:
                logging.debug(f"Skipping release tag: {tag_name} (old format, too few components)")
                num_skipped_old_tag_format += 1
                continue
            if self.tag_filter_pattern and not self.tag_filter_pattern.match(tag_name):
                logging.info(f'Skipping tag {tag_name}, does not match the filter')
                continue

            try:
                yb_dep_release = GitHubThirdPartyRelease(release, target_commitish=sha)
            except SkipThirdPartyReleaseException as ex:
                logging.warning("Skipping release: %s", ex)
                continue

            if not yb_dep_release.is_consistent_with_yb_version(yb_version):
                logging.debug(
                    f"Skipping release tag: {tag_name} (does not match version {yb_version}")
                num_skipped_wrong_branch += 1
                continue

            if sha not in releases_by_commit:
                releases_by_commit[sha] = ReleaseGroup(sha)

            num_releases_found += 1
            logging.debug(f"Found release: {yb_dep_release}")
            releases_by_commit[sha].add_release(yb_dep_release)

        if num_skipped_old_tag_format > 0:
            logging.info(f"Skipped {num_skipped_old_tag_format} releases due to old tag format")
        if num_skipped_wrong_branch > 0:
            logging.info(f"Skipped {num_skipped_wrong_branch} releases due to branch mismatch")
        logging.info(
            f"Found {num_releases_found} releases for {len(releases_by_commit)} different commits")

        latest_group_by_max = max(
            releases_by_commit.values(), key=ReleaseGroup.get_max_creation_timestamp)
        latest_group_by_min = max(
            releases_by_commit.values(), key=ReleaseGroup.get_min_creation_timestamp)
        if latest_group_by_max is not latest_group_by_min:
            raise ValueError(
                "Overlapping releases for different commits. No good way to identify latest "
                "release: e.g. {latest_group_by_max.sha} and {latest_group_by_min.sha}.")

        latest_group: ReleaseGroup = latest_group_by_max

        latest_release_sha = latest_group.sha
        logging.info(
            f"Latest released yugabyte-db-thirdparty commit: {latest_release_sha}. "
            f"Released at: {latest_group.get_max_creation_timestamp()}.")

        groups_to_use: List[ReleaseGroup] = [latest_group]

        if self.also_use_commits:
            for extra_commit in self.also_use_commits:
                logging.info(f"Additional manually specified commit to use: {extra_commit}")
                if extra_commit == latest_release_sha:
                    logging.info(
                        f"(already matches the latest commit {latest_release_sha}, skipping.)")
                    continue
                if extra_commit not in releases_by_commit:
                    raise ValueError(
                        f"No releases found for user-specified commit {extra_commit}. "
                        "Please check if there is an error.")
                groups_to_use.append(releases_by_commit[extra_commit])

        new_metadata: Dict[str, Any] = {
            SHA_FOR_LOCAL_CHECKOUT_KEY: latest_release_sha,
            'archives': []
        }
        releases_to_use: List[GitHubThirdPartyRelease] = [
            rel for release_group in groups_to_use
            for rel in release_group.releases
            if rel.tag not in BROKEN_TAGS
        ]

        releases_by_key_without_tag: DefaultDict[Tuple[str, ...], List[GitHubThirdPartyRelease]] = \
            defaultdict(list)

        num_valid_releases = 0
        num_invalid_releases = 0
        for yb_thirdparty_release in releases_to_use:
            if yb_thirdparty_release.validate_url():
                num_valid_releases += 1
                releases_by_key_without_tag[
                    yb_thirdparty_release.get_sort_key(include_tag=False)
                ].append(yb_thirdparty_release)
            else:
                num_invalid_releases += 1
        logging.info(
            f"Valid releases found: {num_valid_releases}, invalid releases: {num_invalid_releases}")

        filtered_releases_to_use = []
        for key_without_tag, releases_for_key in releases_by_key_without_tag.items():
            if len(releases_for_key) > 1:
                picked_release = max(releases_for_key, key=lambda r: r.tag)
                logging.info(
                    "Multiple releases found for the same key (excluding the tag). "
                    "Using the latest one: %s\n"
                    "Key: %s.\nReleases:\n  %s" % (
                        picked_release,
                        key_without_tag,
                        '\n  '.join([str(r) for r in releases_for_key])))
                filtered_releases_to_use.append(picked_release)
            else:
                filtered_releases_to_use.append(releases_for_key[0])

        filtered_releases_to_use.sort(key=GitHubThirdPartyRelease.get_sort_key)

        for yb_thirdparty_release in filtered_releases_to_use:
            new_metadata['archives'].append(yb_thirdparty_release.as_dict())

        write_yaml_file(new_metadata, archive_metadata_path)
        logging.info(
            f"Wrote information for {len(filtered_releases_to_use)} pre-built "
            f"yugabyte-db-thirdparty archives to {archive_metadata_path}.")
Esempio n. 3
0
    def update_archive_metadata_file(self) -> None:
        yb_version = read_file(os.path.join(YB_SRC_ROOT,
                                            'version.txt')).strip()

        archive_metadata_path = get_archive_metadata_file_path()
        logging.info(
            f"Updating third-party archive metadata file in {archive_metadata_path}"
        )

        github_client = Github(get_github_token(self.github_token_file_path))
        repo = github_client.get_repo('yugabyte/yugabyte-db-thirdparty')

        releases_by_commit: Dict[str, ReleaseGroup] = {}
        num_skipped_old_tag_format = 0
        num_skipped_wrong_branch = 0
        num_releases_found = 0

        for release in repo.get_releases():
            sha: str = release.target_commitish
            assert (isinstance(sha, str))
            tag_name = release.tag_name
            if len(tag_name.split('-')) <= 2:
                logging.debug(
                    f"Skipping release tag: {tag_name} (old format, too few components)"
                )
                num_skipped_old_tag_format += 1
                continue
            if self.tag_filter_pattern and not self.tag_filter_pattern.match(
                    tag_name):
                logging.info(
                    f'Skipping tag {tag_name}, does not match the filter')
                continue

            yb_dep_release = GitHubThirdPartyRelease(release)
            if not yb_dep_release.is_consistent_with_yb_version(yb_version):
                logging.debug(
                    f"Skipping release tag: {tag_name} (does not match version {yb_version}"
                )
                num_skipped_wrong_branch += 1
                continue

            if sha not in releases_by_commit:
                releases_by_commit[sha] = ReleaseGroup(sha)

            num_releases_found += 1
            logging.info(f"Found release: {yb_dep_release}")
            releases_by_commit[sha].add_release(yb_dep_release)

        if num_skipped_old_tag_format > 0:
            logging.info(
                f"Skipped {num_skipped_old_tag_format} releases due to old tag format"
            )
        if num_skipped_wrong_branch > 0:
            logging.info(
                f"Skipped {num_skipped_wrong_branch} releases due to branch mismatch"
            )
        logging.info(
            f"Found {num_releases_found} releases for {len(releases_by_commit)} different commits"
        )
        latest_group_by_max = max(releases_by_commit.values(),
                                  key=ReleaseGroup.get_max_creation_timestamp)
        latest_group_by_min = max(releases_by_commit.values(),
                                  key=ReleaseGroup.get_min_creation_timestamp)
        if latest_group_by_max is not latest_group_by_min:
            raise ValueError(
                "Overlapping releases for different commits. No good way to identify latest "
                "release: e.g. {latest_group_by_max.sha} and {latest_group_by_min.sha}."
            )

        latest_group = latest_group_by_max

        sha = latest_group.sha
        logging.info(
            f"Latest released yugabyte-db-thirdparty commit: f{sha}. "
            f"Released at: {latest_group.get_max_creation_timestamp()}.")

        new_metadata: Dict[str, Any] = {
            SHA_FOR_LOCAL_CHECKOUT_KEY: sha,
            'archives': []
        }
        releases_for_one_commit = [
            rel for rel in latest_group.releases if rel.tag not in BROKEN_TAGS
        ]

        releases_by_key_without_tag: DefaultDict[Tuple[str, ...], List[GitHubThirdPartyRelease]] = \
            defaultdict(list)

        num_valid_releases = 0
        num_invalid_releases = 0
        for yb_thirdparty_release in releases_for_one_commit:
            if yb_thirdparty_release.validate_url():
                num_valid_releases += 1
                releases_by_key_without_tag[yb_thirdparty_release.get_sort_key(
                    include_tag=False)].append(yb_thirdparty_release)
            else:
                num_invalid_releases += 1
        logging.info(
            f"Valid releases found: {num_valid_releases}, invalid releases: {num_invalid_releases}"
        )

        filtered_releases_for_one_commit = []
        for key_without_tag, releases_for_key in releases_by_key_without_tag.items(
        ):
            if len(releases_for_key) > 1:
                picked_release = max(releases_for_key, key=lambda r: r.tag)
                logging.info(
                    "Multiple releases found for the same key (excluding the tag). "
                    "Using the latest one: %s\n"
                    "Key: %s.\nReleases:\n  %s" %
                    (picked_release, key_without_tag, '\n  '.join(
                        [str(r) for r in releases_for_key])))
                filtered_releases_for_one_commit.append(picked_release)
            else:
                filtered_releases_for_one_commit.append(releases_for_key[0])

        filtered_releases_for_one_commit.sort(
            key=GitHubThirdPartyRelease.get_sort_key)

        for yb_thirdparty_release in filtered_releases_for_one_commit:
            new_metadata['archives'].append(yb_thirdparty_release.as_dict())

        write_yaml_file(new_metadata, archive_metadata_path)
        logging.info(
            f"Wrote information for {len(filtered_releases_for_one_commit)} pre-built "
            f"yugabyte-db-thirdparty archives to {archive_metadata_path}.")