def test_add_manifest_force(): registry_data = RegistryData.from_json(VALID_DATA) LOCAL = ANALYZER_B.copy() LOCAL["version"] = UNIQUE_VERSION registry_data = registry_data.add_pending_manifest( AnalyzerManifest.from_json(LOCAL), force=True) registry_data = registry_data.add_pending_manifest( AnalyzerManifest.from_json(LINKED_ANALYZER_A), force=True) assert registry_data._resolve("test-org-name/a", "0.0.1") sorted_deps = registry_data.sorted_deps( SpecifiedAnalyzer( VersionedAnalyzer(AnalyzerName("test-org-name/a"), Version("0.0.1")))) assert (SpecifiedAnalyzer( VersionedAnalyzer(AnalyzerName("test-org-name/b"), Version(UNIQUE_VERSION))) in sorted_deps) assert (SpecifiedAnalyzer( VersionedAnalyzer(AnalyzerName("test-org-name/c"), Version("0.0.1"))) in sorted_deps) # test that linking is local: Overriding A->B edge does not overried C->B edge sorted_deps = registry_data.sorted_deps( SpecifiedAnalyzer( VersionedAnalyzer(AnalyzerName("test-org-name/c"), Version("0.0.1")))) assert (SpecifiedAnalyzer( VersionedAnalyzer(AnalyzerName("test-org-name/b"), Version("0.0.2"))) in sorted_deps)
def setup_locally_linked_analyzer(manifest: AnalyzerManifest, registry_data: RegistryData, analyzer_directory: str) -> RegistryData: """ Build and tags analyzer in ANALYZER_DIRECTORY with a unique version and returns a modified registry so that local runs will resolve to said built analyzer. """ new_registry = registry_data.deepcopy() new_dependencies: List[AnalyzerDependency] = [] for dep in manifest.dependencies: if not dep.path: new_dependencies.append(dep) continue try: local_manifest, local_dir = find_and_open_analyzer_manifest( os.path.normpath(os.path.join(analyzer_directory, dep.path))) except Exception as e: logger.debug( f"Exception while resolving local linked dependendies: {str(e)}" ) raise e # validate name if local_manifest.analyzer_name != dep.name: raise LinkedAnalyzerNameMismatch( f"Linked analyzer name must match {local_manifest.analyzer_name} != {dep.name}" ) # build docker with unique version local_version = get_unique_semver(local_manifest.version) build_docker( local_manifest.analyzer_name, local_version, os.path.relpath(local_dir, os.getcwd()), verbose=True, ) # add linked dep to registry local_manifest.version = local_version new_registry = new_registry.add_pending_manifest(local_manifest, force=True) new_dependencies.append( AnalyzerDependency( AnalyzerName(local_manifest.analyzer_name), wildcard_version=str(local_version), parameters=dep.parameters, )) # add analyzer to registry manifest.dependencies = new_dependencies manifest._original_json["dependencies"] = { dep.name: { "version": dep.wildcard_version, "path": dep.path } for dep in new_dependencies } new_registry = new_registry.add_pending_manifest(manifest, force=True) return new_registry
def from_json(cls, data_json: RegistryDataJson) -> "RegistryData": return cls( { AnalyzerName(name): AnalyzerData.from_json(analyzer_data_json) for name, analyzer_data_json in data_json.items() } )
def test_equality(): sa = SpecifiedAnalyzer(VERSIONED_ANALYZER, PARAMETERS) # Constructing from new objects sa2 = SpecifiedAnalyzer( VersionedAnalyzer(AnalyzerName("r2c/test-analyzer"), Version("1.2.3")), AnalyzerParameters({}), ) assert sa == sa2
def get_direct_dependencies(self, va: VersionedAnalyzer) -> List[SpecifiedAnalyzer]: """ Returns direct dependencies of an analyzer """ manifest = self.manifest_for(va) if manifest is None: raise ManifestNotFoundException(f"manifest not found for {va}.") resolved_values = [] for dep in manifest.dependencies: resolved_version = self._resolve( AnalyzerName(dep.name), dep.wildcard_version ) if resolved_version is not None: resolved_values.append( SpecifiedAnalyzer( VersionedAnalyzer(AnalyzerName(dep.name), resolved_version), dep.parameters, ) ) return resolved_values
def prepull_analyzers(analyzer_name: str, version: Version) -> None: """ Pulls all needed analyzers to run SPECIFIED_ANALYZER (i.e. dependencies) """ specified_analyzer = SpecifiedAnalyzer( VersionedAnalyzer(AnalyzerName(analyzer_name), version)) registry = RegistryData.from_json(REGISTRY) deps = registry.sorted_deps(specified_analyzer) client = get_docker_client() for dep in deps: if _should_pull_analyzer(dep): client.images.pull(dep.versioned_analyzer.image_id)
def _dependency_graph( self, subgraph_from_node: SpecifiedAnalyzer = None ) -> Graph[SpecifiedAnalyzer]: edges = set() nodes = set() if subgraph_from_node: to_explore = set([subgraph_from_node]) else: to_explore = set(SpecifiedAnalyzer(va) for va in self.versioned_analyzers) # this loop terminates after at most sum(len(deps)) because we always pop values off # `to_explore` and add them to `nodes`, and only add values to `to_explore` if # they're not in `nodes`. while to_explore: sa = to_explore.pop() next_manifest = self.manifest_for(sa.versioned_analyzer) if not next_manifest: raise ManifestNotFoundException( f"manifest not found for analyzer {sa.versioned_analyzer.name} at version {sa.versioned_analyzer.version}. Registry data: {self.to_json()}" ) deps = next_manifest.dependencies nodes.add(sa) for dep in deps: resolved_version = self._resolve( AnalyzerName(dep.name), dep.wildcard_version ) if resolved_version is None: raise Exception(f"Can't resolve dependency {dep} of {sa}") resolved_dep = VersionedAnalyzer(dep.name, resolved_version) specified_dep = SpecifiedAnalyzer(resolved_dep, dep.parameters) edges.add((sa, specified_dep)) if specified_dep not in nodes: to_explore.add(specified_dep) return Graph(list(nodes), list(edges))
def test_should_pull() -> None: assert _should_pull_analyzer( SpecifiedAnalyzer( VersionedAnalyzer(AnalyzerName("doesnt/exist"), Version("9.1.1")) ) )
def add_pending_manifest( self, manifest: AnalyzerManifest, force: bool = False ) -> "RegistryData": """ Add this manifest into the current registry data as pending upload. This method first verifies that: 1. Name conforms to org/name 2. This is not a duplicate versioned analyzer 3. It's dependencies can be resolved 4. It doesn't cause circular dependencies Arguments: manifest: The manifest of the analyzer we want to add to the registry force: Force overwrite into registry if manifest already exists with matching name. This flag nullifies the InvalidManifestException thrown for manifest that already exists Returns: A new RegistryData object with manifest added in. Throws: An InvalidManifestException if the manifest can't be added """ name = manifest.analyzer_name version = manifest.version va = VersionedAnalyzer(name, version) # check that name looks like org/name # don't do this check for now until we change analyzer naming everywhere else # TODO: Actually get the current org's name current_org # if not is_analyzer_of_org(name, current_org): # raise Exception(f"Analyzer name must be of the form {org_name}/name") # create here and return at the end because it comes in handy new_reg = self.UNSAFE_add_manifest(manifest) # check that we can resolve its dependencies for dep in manifest.dependencies: # Check that it doesn't depend on itself if dep.name == name: raise InvalidManifestException( f"Resolving this dependency: {dep} But analyzer can't depend on itself." ) resolved_version = new_reg._resolve( AnalyzerName(dep.name), dep.wildcard_version ) if dep.path: if not os.path.isdir(dep.path) or not os.path.exists(dep.path): raise InvalidLocalPathException( f"A dependency in this manifest cannot be resolved: {dep}" ) else: if resolved_version is None: raise InvalidManifestException( f"A dependency in this manifest cannot be resolved: {dep}" ) # Check that we don't already have a manifest for it. # i.e. don't allow a new manifest without changing analyzer version. # TODO: check that it's increased analyzer_data = self._data.get(name) if analyzer_data: if version in analyzer_data.versions.keys(): if not force: raise InvalidManifestException( f"A manifest for this analyzer and version already exists: {va}" ) # and see if it can be topologically sorted deps_graph = new_reg._dependency_graph() try: sorted_deps = deps_graph.topo_sorted() except CircularDependencyError as e: raise InvalidManifestException( f"This manifest would cause a cycle in the dependendency graph" ) # all is well? return the new registry return new_reg
def run( ctx, analyzer_directory, analyzer_input, output_path, quiet, analyzer_quiet, no_login, wait, debug, parameters, env_args_string, ): """ Run the analyzer in the current directory over a code directory. You may have to log in if your analyzer depends on privately published analyzers. """ if debug == True: # allow passing --debug to run as well as globally set_debug_flag(ctx, True) debug_mode = ctx.obj["DEBUG"] print_msg(f"🏃 Running analyzer...{'with debug mode' if debug_mode else ''}") env_args_dict = parse_remaining(env_args_string) try: parameter_obj = json.loads(parameters) except ValueError as e: print_error_exit( f'Failed to parse parameter string:"{parameters}" as json. Parse Error: {e}' ) manifest, analyzer_directory = find_and_open_analyzer_manifest( analyzer_directory, ctx ) try: registry_data = RegistryData.from_json(fetch_registry_data()) except Exception as e: message = getattr(e, "message", repr(e)) print_error_exit( f"There was an error fetching data from the registry: {message}" ) dependencies = manifest.dependencies logger.info(f"Parsing and resolving dependencies: {dependencies}") if dependencies: for analyzer_dep in dependencies: dep_name = analyzer_dep.name dep_semver_version = analyzer_dep.wildcard_version dep_version = registry_data._resolve( AnalyzerName(analyzer_dep.name), dep_semver_version ) if not dep_version: print_error_exit( f"Error resolving dependency {dep_name} at version {dep_semver_version}. Check that you're using the right version of this dependency and try again." ) logger.info(f"Resolved dependency {dep_name}:{dep_semver_version}") if not no_login: # we need at least one dep and its version to get credentials when the user isn't logged in dep_name = dependencies[0].name dep_semver_version = dependencies[0].wildcard_version dep_version = registry_data._resolve( AnalyzerName(dep_name), dep_semver_version ) artifact_link = ( f"{get_base_url()}/api/v1/artifacts/{dep_name}/{dep_version}" ) logger.info(f"Getting credential from {artifact_link}") # TODO (ulzii) use proper auth credential once its done creds = get_docker_creds(artifact_link) if creds is None: print_error_exit( "Error getting dependency credentials. Please contact us with the following information: failed to get Docker credentials." ) # docker login successful_login = docker_login(creds) if not successful_login: print_error_exit( "Error validating dependency credentials. Please contact us with the following information: failed to log in to Docker." ) else: print_warning( "No dependencies found; are dependencies intentionally omitted in analyzer.json? Most analyzers are expected to have 1 or more dependencies (e.g. for taking source code as input)." ) abort_on_build_failure( build_docker( manifest.analyzer_name, manifest.version, os.path.relpath(analyzer_directory, os.getcwd()), env_args_dict={**DEFAULT_ENV_ARGS_TO_DOCKER, **env_args_dict}, verbose=debug_mode, ) ) try: run_analyzer_on_local_code( registry_data=registry_data, manifest=manifest, workdir=None, code_dir=analyzer_input.strip( '"' ), # idk why this is happening for quoted paths output_path=output_path, wait=wait, show_output_on_stdout=not quiet, pass_analyzer_output=not analyzer_quiet, no_preserve_workdir=True, parameters=parameter_obj, env_args_dict={**DEFAULT_ENV_ARGS_TO_DOCKER, **env_args_dict}, ) except SymlinkNeedsElevationError as sym: print_error_exit( f"Error setting up local analysis. {sym}. Try again as an admin." )
def run( ctx, analyzer_directory, analyzer_input, output_path, quiet, analyzer_quiet, no_login, debug, interactive, interactive_name, reset_cache, verbose, parameters, env_args_string, ): """ Run the analyzer in the current directory over a code directory. You may have to log in if your analyzer depends on privately published analyzers. """ if verbose is True: # allow passing --verbose to run as well as globally set_verbose_flag(ctx, True) if debug is True: set_debug_flag(ctx, True) print_msg(f"🏃 Starting to run analyzer...") interactive_index = -1 if interactive else None env_args_dict = parse_remaining(env_args_string) parameter_obj = load_params(parameters) manifest, analyzer_directory = find_and_open_analyzer_manifest( analyzer_directory, ctx) registry_data = get_registry_data() dependencies = manifest.dependencies print_msg("Resolving dependencies") logger.debug(f"Parsing and resolving dependencies: {dependencies}") if dependencies: for analyzer_dep in dependencies: dep_name = analyzer_dep.name dep_semver_version = analyzer_dep.wildcard_version dep_version = registry_data._resolve( AnalyzerName(analyzer_dep.name), dep_semver_version) if not dep_version: if not analyzer_dep.path: print_error_exit( f"Error resolving dependency {dep_name} at version {dep_semver_version}. Check that you're using the right version of this dependency and try again." ) logger.debug( f"Resolved dependency {dep_name}:{dep_semver_version}") if not no_login: # we need at least one dep and its version to get credentials when the user isn't logged in dep_name = dependencies[0].name dep_semver_version = dependencies[0].wildcard_version dep_version = registry_data._resolve(AnalyzerName(dep_name), dep_semver_version) artifact_link = ( f"{get_base_url()}/api/v1/artifacts/{dep_name}/{dep_version}") logger.debug(f"Getting credential from {artifact_link}") # TODO (ulzii) use proper auth credential once its done creds = get_docker_creds(artifact_link) if creds is None: print_error_exit( "Error getting dependency credentials. Please contact us with the following information: failed to get Docker credentials." ) # docker login successful_login = docker_login(creds) if not successful_login: print_error_exit( "Error validating dependency credentials. Please contact us with the following information: failed to log in to Docker." ) else: print_warning( "No dependencies found; are dependencies intentionally omitted in analyzer.json? Most analyzers are expected to have 1 or more dependencies (e.g. for taking source code as input)." ) print_msg("🔨 Building docker container") abort_on_build_failure( build_docker( manifest.analyzer_name, manifest.version, os.path.relpath(analyzer_directory, os.getcwd()), env_args_dict=env_args_dict, no_cache=reset_cache, )) try: if interactive_index: print_msg( f"🔎 Inspecting containers interactively by `docker exec` into last analyzer in execution." ) elif interactive_name: print_msg( f"🔎 Inspecting containers interactively by `docker exec` into analyzer with name containing `{interactive_name}`." ) else: print_msg(f"🔎 Running analysis on `{analyzer_input}`") logger.info(f"Reset cache: {reset_cache}") try: run_analyzer_on_local_code( registry_data=registry_data, manifest=manifest, workdir=None, analyzer_dir=analyzer_directory, code_dir=analyzer_input, output_path=output_path, show_output_on_stdout=not quiet, pass_analyzer_output=not analyzer_quiet, no_preserve_workdir=True, parameters=parameter_obj, env_args_dict=env_args_dict, interactive_index=interactive_index, interactive_name=interactive_name, reset_cache=reset_cache, ) except AnalyzerOutputNotFound as fne: print_error_exit(str(fne), err=False) except AnalyzerNonZeroExitError as ae: print_exception_exit("Analyzer non-zero exit", ae, err=False) if output_path: path_msg = f"Analysis results in `{output_path}`." else: path_msg = f"Analysis results printed to `stdout`. unless suppressed explicitly with `-q`" print_success(f"Finished analyzing `{analyzer_input}`. {path_msg}") except SymlinkNeedsElevationError as sym: print_error_exit( f"Error setting up local analysis. {sym}. Try again as an admin.")
def from_json(cls, json_obj: Dict[str, Any]) -> "AnalyzerManifest": # The type of the json_obj argument is a bit of a hack, since in # r2c.lib.registry we cast an arbitrary dict to an AnalyzerManifestJson # before calling this. spec_version = json_obj.get("spec_version") if spec_version is None: raise MalformedManifestException( json_obj, "Must specify a spec_version field" ) if Version(spec_version).major > SPEC_VERSION.major: logger.error( f"Trying to parse manifest for analyzer {json_obj['analyzer_name']}:{json_obj['version']}" f" with spec_version: {spec_version}, but that spec_version is" f" too new and not compatible with the latest supported: {SPEC_VERSION}." ) raise IncompatibleManifestException( f"Can't parse manifest for analyzer {json_obj['analyzer_name']}:{json_obj['version']}" f" with spec_version: {spec_version}. The spec_version is" f" incompatible with the latest supported: {SPEC_VERSION}." ) validator = schemas.manifest_validator(json_obj) if validator is None: raise MalformedManifestException( json_obj, "Could not find a schema for the given spec_version {}".format( spec_version ), ) try: validator.validate(json_obj) except jsonschema.ValidationError as err: raise MalformedManifestException(json_obj, err.message) from err original_json_obj = json_obj json_obj = manifest_migrations.migrate(json_obj) original_version = ( Version(json_obj["_original_spec_version"]) if json_obj.get("_original_spec_version") else Version(json_obj["spec_version"]) ) dependencies = [] for dependency_name, value in json_obj["dependencies"].items(): if isinstance(value, str): # if value is string, assume its Semver version dependencies.append( AnalyzerDependency( AnalyzerName(dependency_name), wildcard_version=value ) ) else: # If value is an object, parse it for params, version, path dependencies.append( AnalyzerDependency( AnalyzerName(dependency_name), value.get("version"), value.get("parameters"), value.get("path"), ) ) return cls( json_obj["analyzer_name"], json_obj.get("author_email"), json_obj.get("author_name"), Version(json_obj["version"]), json_obj["spec_version"], dependencies, AnalyzerType.from_name(json_obj["type"]), AnalyzerOutput.from_json(json_obj["output"]), json_obj["deterministic"], original_json_obj, original_version, json_obj.get("extra"), )
def run_analyzer_on_local_code( analyzer_name: str, version: Version, base: Path, ignore_files: Set[Path], target_files: Iterable[str], ) -> JsonR: """Run an analyzer on a local folder. """ get_docker_client() # Ensures that docker is running specified_analyzer = SpecifiedAnalyzer( VersionedAnalyzer(AnalyzerName(analyzer_name), version)) registry = RegistryData.from_json(REGISTRY) json_output_store = LocalJsonOutputStore() filesystem_output_store = LocalFilesystemOutputStore() log_store = LocalLogStore() stats_store = LocalStatsStore() # All cacheing should be handled by bento json_output_store.delete_all() # type: ignore filesystem_output_store.delete_all() # type: ignore log_store.delete_all() # type: ignore stats_store.delete_all() # type: ignore pathlib.Path(LOCAL_RUN_TMP_FOLDER).mkdir(parents=True, exist_ok=True) analyzer = Analyzer( registry, json_output_store, filesystem_output_store, log_store, stats_store, workdir=LOCAL_RUN_TMP_FOLDER, timeout= 0, # Note Timeout relied on signaling which is not valid in multithreaded world memory_limit=CONTAINER_MEMORY_LIMIT, ) # get all cloner versions from registry so we can copy the passed in code directory in place # of output for all versions of cloner fetchers = [ sa for sa in registry.sorted_deps(specified_analyzer) if sa.versioned_analyzer.name in SPECIAL_ANALYZERS ] analyzer_input = LocalCode(str(base)) for fetcher in fetchers: _copy_local_input( analyzer, fetcher.versioned_analyzer, analyzer_input, ignore_files, set(target_files), ) analyzer.full_analyze_request( analyzer_input=analyzer_input, specified_analyzer=specified_analyzer, force=False, ) # Get Final Output output = json_output_store.read(analyzer_input, specified_analyzer) if output is None: output = "" output_json = json.loads(output).get("results", []) # Cleanup state json_output_store.delete_all() # type: ignore filesystem_output_store.delete_all() # type: ignore log_store.delete_all() # type: ignore stats_store.delete_all() # type: ignore return output_json
import json from r2c.lib.specified_analyzer import SpecifiedAnalyzer, AnalyzerParameters from r2c.lib.versioned_analyzer import AnalyzerName, VersionedAnalyzer from semantic_version import Version ANALYZER_NAME = AnalyzerName("r2c/test-analyzer") VERSION = Version("1.2.3") VERSIONED_ANALYZER = VersionedAnalyzer(ANALYZER_NAME, VERSION) PARAMETERS = AnalyzerParameters({}) def test_constructor(): sa = SpecifiedAnalyzer(VERSIONED_ANALYZER, PARAMETERS) assert ANALYZER_NAME == sa.versioned_analyzer.name assert VERSION == sa.versioned_analyzer.version assert sa.parameters is not None for parameter in sa.parameters: assert PARAMETERS[parameter] == sa.parameters[parameter] def test_json_conversion(): sa = SpecifiedAnalyzer(VERSIONED_ANALYZER, PARAMETERS) sa2 = SpecifiedAnalyzer.from_json_str(json.dumps(sa.to_json())) assert sa.versioned_analyzer.name == sa2.versioned_analyzer.name assert sa.versioned_analyzer.version == sa2.versioned_analyzer.version # Parameters Match