def data_by_package_version_id( db_graph: PackageGraph, ) -> Dict[PackageVersionID, Any]: return db_graph.get_advisories_by_package_version_id()
def data_by_package_version_id( db_graph: PackageGraph, ) -> Dict[PackageVersionID, Any]: return db_graph.get_npm_registry_data_by_package_version_id()
def scan_npm_package_then_build_report_tree( package_name: str, package_version: Optional[str] = None, **kwargs, ) -> None: package_name_validation_error = validators.get_npm_package_name_validation_error( package_name) if package_name_validation_error is not None: raise package_name_validation_error if package_version: package_version_validation_error = validators.get_npm_package_version_validation_error( package_version) if package_version_validation_error is not None: raise package_version_validation_error # TODO: use asyncio.gather to run these concurrently fetch_and_save_registry_entries([package_name]) fetch_and_save_npmsio_scores([package_name]) scanned_package_name_and_versions: List[Tuple[str, str]] = [] log.info(f"scanning {package_name}") # fetch npm registry entries from DB for ( package_version, source_url, git_head, tarball_url, ) in models.get_npm_registry_entries_to_scan(package_name, package_version): if package_version is None: log.warn( f"skipping npm registry entry with null version {package_name}" ) continue log.info(f"scanning {package_name}@{package_version}") # we need a source_url and git_head or a tarball url to install if tarball_url: log.info( f"scanning {package_name}@{package_version} with {tarball_url} with config {current_app.config['SCAN_NPM_TARBALL_ARGS']}" ) # start an npm container, install the tarball, run list and audit # assert tarball_url == f"https://registry.npmjs.org/{package_name}/-/{package_name}-{package_version}.tgz container_task_results: Dict[str, Any] = asyncio.run( scan_tarball_url( current_app.config["SCAN_NPM_TARBALL_ARGS"], tarball_url, package_name, package_version, )) log.info( f"got container task results for {package_name}@{package_version}" ) log.debug(f"got container task results:\n{container_task_results}") for task_result in container_task_results["task_results"]: serialized_container_task_result: Optional[Dict[ str, Any]] = serializers.serialize_repo_task( task_result, {"list_metadata", "audit"}) if not serialized_container_task_result: continue task_data = serialized_container_task_result task_name = task_data["name"] if task_name == "list_metadata": insert_package_graph(task_data) elif task_name == "audit": for ( advisory_fields, impacted_versions, ) in serializers.node_repo_task_audit_output_to_advisories_and_impacted_versions( task_data): advisory: models.Advisory = list( serializers.serialize_advisories([advisory_fields ]))[0] models.insert_advisories([advisory]) models.update_advisory_vulnerable_package_versions( advisory, set(impacted_versions)) else: log.warning(f"skipping unrecognized task {task_name}") scanned_package_name_and_versions.append( (package_name, package_version)) elif source_url and git_head: # TODO: port scanner find_dep_files and run_repo_tasks pipelines as used in analyze_package.sh log.info( f"scanning {package_name}@{package_version} from {source_url}#{git_head} not implemented" ) log.error( f"Installing from VCS source and ref not implemented to scan {package_name}@{package_version}" ) # fetch missing registry entries and scores # TODO: use asyncio.gather to run these concurrently log.info(f"fetching missing npms.io scores") fetch_and_save_npmsio_scores( row[0] for row in models.get_package_names_with_missing_npms_io_scores() if row is not None) log.info(f"fetching missing npm registry entries") fetch_and_save_registry_entries( row[0] for row in models.get_package_names_with_missing_npm_entries() if row is not None) log.info(f"scoring package versions") for package_name, package_version in scanned_package_name_and_versions: log.info(f"scoring package version {package_name}@{package_version}") # build_report_tree(package_name, package_version) package: Optional[ PackageVersion] = get_most_recently_inserted_package_from_name_and_version( package_name, package_version) if package is None: log.error( f"PackageVersion not found for {package_name} {package_version}. Skipping scoring." ) continue db_graph: Optional[ PackageGraph] = get_latest_graph_including_package_as_parent( package) if db_graph is None: log.info(f"{package.name} {package.version} has no children") db_graph = PackageGraph(id=None, link_ids=[]) db_graph.distinct_package_ids = set([package.id]) store_package_reports( list(scoring.score_package_graph(db_graph).values()))
def data_by_package_version_id( db_graph: PackageGraph, ) -> Dict[PackageVersionID, Any]: return db_graph.get_npmsio_scores_by_package_version_id()
def deserialize_scan_job_results( messages: Iterable[JSONResult], ) -> Generator[Union[PackageVersion, Tuple[ PackageGraph, Optional[PackageVersion], List[Tuple[ PackageVersion, PackageVersion]], ], Tuple[ Advisory, AbstractSet[str]], ], None, None, ]: """Takes an iterable of JSONResults of pubsub messages for a completed npm scan (tarball or dep file), parses the messages, and yields models to save in the following order: * one or more PackageVersions * a PackageGraph with an optional root package version and a list of its links in a pairs of PackageVersions * Advisory models with impacted versions (if any) The models will not have IDs and should be upserted to avoid violating index constraints and creating duplicate rows. """ for json_result in messages: if json_result.data is None: log.warning(f"json result ID: {json_result.id} null data column") continue if not isinstance(json_result.data, dict): log.warn(f"json result ID: {json_result.id} non-dict data column") continue if (json_result.data.get("type", None) != "google.cloud.pubsub_v1.types.PubsubMessage"): log.warn( f"json result ID: {json_result.id} invalid type (not PubsubMessage)" ) continue for line in json_result.data["data"]: if not isinstance(line, dict): continue if line.get("type", None) != "task_result": continue task_data: Optional[Dict[str, Any]] = serialize_repo_task( line, {"list_metadata", "audit"}) if not task_data: continue task_name = line["name"] if task_name == "list_metadata": links: List[Tuple[PackageVersion, PackageVersion]] = [] for task_dep in task_data.get("dependencies", []): parent: PackageVersion = deserialize_npm_package_version( task_dep) yield parent for dep in task_dep.get("dependencies", []): # is fully qualified semver for npm (or file: or github: url), semver for yarn name, version = dep.rsplit("@", 1) child: PackageVersion = deserialize_npm_package_version( dict( name=name, version=version, )) yield child links.append((parent, child)) package_manager = "yarn" if "yarn" in task_data[ "command"] else "npm" root_package_version = (deserialize_npm_package_version( task_data["root"]) if task_data["root"] else None) # NB: caller must convert links to link_ids, root_package_version to root_package_version_id yield PackageGraph( root_package_version_id=None, link_ids=[], package_manager=package_manager, package_manager_version=None, # TODO: find and set ), root_package_version, links elif task_name == "audit": for ( advisory_fields, impacted_versions, ) in node_repo_task_audit_output_to_advisories_and_impacted_versions( task_data): advisory: Advisory = list( serialize_advisories([advisory_fields]))[0] yield advisory, impacted_versions else: log.warning(f"skipping unrecognized task {task_name}")