def data_by_package_version_id(
     db_graph: PackageGraph,
 ) -> Dict[PackageVersionID, Any]:
     return db_graph.get_advisories_by_package_version_id()
 def data_by_package_version_id(
     db_graph: PackageGraph,
 ) -> Dict[PackageVersionID, Any]:
     return db_graph.get_npm_registry_data_by_package_version_id()
def scan_npm_package_then_build_report_tree(
    package_name: str,
    package_version: Optional[str] = None,
    **kwargs,
) -> None:
    package_name_validation_error = validators.get_npm_package_name_validation_error(
        package_name)
    if package_name_validation_error is not None:
        raise package_name_validation_error

    if package_version:
        package_version_validation_error = validators.get_npm_package_version_validation_error(
            package_version)
        if package_version_validation_error is not None:
            raise package_version_validation_error

    # TODO: use asyncio.gather to run these concurrently
    fetch_and_save_registry_entries([package_name])

    fetch_and_save_npmsio_scores([package_name])

    scanned_package_name_and_versions: List[Tuple[str, str]] = []

    log.info(f"scanning {package_name}")

    # fetch npm registry entries from DB
    for (
            package_version,
            source_url,
            git_head,
            tarball_url,
    ) in models.get_npm_registry_entries_to_scan(package_name,
                                                 package_version):
        if package_version is None:
            log.warn(
                f"skipping npm registry entry with null version {package_name}"
            )
            continue

        log.info(f"scanning {package_name}@{package_version}")

        # we need a source_url and git_head or a tarball url to install
        if tarball_url:
            log.info(
                f"scanning {package_name}@{package_version} with {tarball_url} with config {current_app.config['SCAN_NPM_TARBALL_ARGS']}"
            )
            # start an npm container, install the tarball, run list and audit
            # assert tarball_url == f"https://registry.npmjs.org/{package_name}/-/{package_name}-{package_version}.tgz
            container_task_results: Dict[str, Any] = asyncio.run(
                scan_tarball_url(
                    current_app.config["SCAN_NPM_TARBALL_ARGS"],
                    tarball_url,
                    package_name,
                    package_version,
                ))
            log.info(
                f"got container task results for {package_name}@{package_version}"
            )
            log.debug(f"got container task results:\n{container_task_results}")
            for task_result in container_task_results["task_results"]:
                serialized_container_task_result: Optional[Dict[
                    str, Any]] = serializers.serialize_repo_task(
                        task_result, {"list_metadata", "audit"})
                if not serialized_container_task_result:
                    continue

                task_data = serialized_container_task_result
                task_name = task_data["name"]
                if task_name == "list_metadata":
                    insert_package_graph(task_data)
                elif task_name == "audit":
                    for (
                            advisory_fields,
                            impacted_versions,
                    ) in serializers.node_repo_task_audit_output_to_advisories_and_impacted_versions(
                            task_data):
                        advisory: models.Advisory = list(
                            serializers.serialize_advisories([advisory_fields
                                                              ]))[0]
                        models.insert_advisories([advisory])
                        models.update_advisory_vulnerable_package_versions(
                            advisory, set(impacted_versions))
                else:
                    log.warning(f"skipping unrecognized task {task_name}")

            scanned_package_name_and_versions.append(
                (package_name, package_version))
        elif source_url and git_head:
            # TODO: port scanner find_dep_files and run_repo_tasks pipelines as used in analyze_package.sh
            log.info(
                f"scanning {package_name}@{package_version} from {source_url}#{git_head} not implemented"
            )
            log.error(
                f"Installing from VCS source and ref not implemented to scan {package_name}@{package_version}"
            )

    # fetch missing registry entries and scores
    # TODO: use asyncio.gather to run these concurrently
    log.info(f"fetching missing npms.io scores")
    fetch_and_save_npmsio_scores(
        row[0]
        for row in models.get_package_names_with_missing_npms_io_scores()
        if row is not None)
    log.info(f"fetching missing npm registry entries")
    fetch_and_save_registry_entries(
        row[0] for row in models.get_package_names_with_missing_npm_entries()
        if row is not None)

    log.info(f"scoring package versions")
    for package_name, package_version in scanned_package_name_and_versions:
        log.info(f"scoring package version {package_name}@{package_version}")

        # build_report_tree(package_name, package_version)
        package: Optional[
            PackageVersion] = get_most_recently_inserted_package_from_name_and_version(
                package_name, package_version)
        if package is None:
            log.error(
                f"PackageVersion not found for {package_name} {package_version}. Skipping scoring."
            )
            continue

        db_graph: Optional[
            PackageGraph] = get_latest_graph_including_package_as_parent(
                package)
        if db_graph is None:
            log.info(f"{package.name} {package.version} has no children")
            db_graph = PackageGraph(id=None, link_ids=[])
            db_graph.distinct_package_ids = set([package.id])

        store_package_reports(
            list(scoring.score_package_graph(db_graph).values()))
 def data_by_package_version_id(
     db_graph: PackageGraph,
 ) -> Dict[PackageVersionID, Any]:
     return db_graph.get_npmsio_scores_by_package_version_id()
Beispiel #5
0
def deserialize_scan_job_results(
    messages: Iterable[JSONResult],
) -> Generator[Union[PackageVersion, Tuple[
        PackageGraph, Optional[PackageVersion], List[Tuple[
            PackageVersion, PackageVersion]], ], Tuple[
                Advisory, AbstractSet[str]], ], None, None, ]:
    """Takes an iterable of JSONResults of pubsub messages for a
    completed npm scan (tarball or dep file), parses the messages, and
    yields models to save in the following order:

    * one or more PackageVersions
    * a PackageGraph with an optional root package version and a list of its links in a pairs of PackageVersions
    * Advisory models with impacted versions (if any)

    The models will not have IDs and should be upserted to avoid
    violating index constraints and creating duplicate rows.
    """
    for json_result in messages:
        if json_result.data is None:
            log.warning(f"json result ID: {json_result.id} null data column")
            continue
        if not isinstance(json_result.data, dict):
            log.warn(f"json result ID: {json_result.id} non-dict data column")
            continue
        if (json_result.data.get("type", None) !=
                "google.cloud.pubsub_v1.types.PubsubMessage"):
            log.warn(
                f"json result ID: {json_result.id} invalid type (not PubsubMessage)"
            )
            continue

        for line in json_result.data["data"]:
            if not isinstance(line, dict):
                continue
            if line.get("type", None) != "task_result":
                continue

            task_data: Optional[Dict[str, Any]] = serialize_repo_task(
                line, {"list_metadata", "audit"})
            if not task_data:
                continue

            task_name = line["name"]
            if task_name == "list_metadata":
                links: List[Tuple[PackageVersion, PackageVersion]] = []
                for task_dep in task_data.get("dependencies", []):
                    parent: PackageVersion = deserialize_npm_package_version(
                        task_dep)
                    yield parent
                    for dep in task_dep.get("dependencies", []):
                        # is fully qualified semver for npm (or file: or github: url), semver for yarn
                        name, version = dep.rsplit("@", 1)
                        child: PackageVersion = deserialize_npm_package_version(
                            dict(
                                name=name,
                                version=version,
                            ))
                        yield child
                        links.append((parent, child))
                package_manager = "yarn" if "yarn" in task_data[
                    "command"] else "npm"
                root_package_version = (deserialize_npm_package_version(
                    task_data["root"]) if task_data["root"] else None)
                # NB: caller must convert links to link_ids, root_package_version to root_package_version_id
                yield PackageGraph(
                    root_package_version_id=None,
                    link_ids=[],
                    package_manager=package_manager,
                    package_manager_version=None,  # TODO: find and set
                ), root_package_version, links
            elif task_name == "audit":
                for (
                        advisory_fields,
                        impacted_versions,
                ) in node_repo_task_audit_output_to_advisories_and_impacted_versions(
                        task_data):
                    advisory: Advisory = list(
                        serialize_advisories([advisory_fields]))[0]
                    yield advisory, impacted_versions
            else:
                log.warning(f"skipping unrecognized task {task_name}")