Esempio n. 1
0
def test_add_manifest_force():
    registry_data = RegistryData.from_json(VALID_DATA)
    LOCAL = ANALYZER_B.copy()
    LOCAL["version"] = UNIQUE_VERSION

    registry_data = registry_data.add_pending_manifest(
        AnalyzerManifest.from_json(LOCAL), force=True)
    registry_data = registry_data.add_pending_manifest(
        AnalyzerManifest.from_json(LINKED_ANALYZER_A), force=True)

    assert registry_data._resolve("test-org-name/a", "0.0.1")
    sorted_deps = registry_data.sorted_deps(
        SpecifiedAnalyzer(
            VersionedAnalyzer(AnalyzerName("test-org-name/a"),
                              Version("0.0.1"))))
    assert (SpecifiedAnalyzer(
        VersionedAnalyzer(AnalyzerName("test-org-name/b"),
                          Version(UNIQUE_VERSION))) in sorted_deps)
    assert (SpecifiedAnalyzer(
        VersionedAnalyzer(AnalyzerName("test-org-name/c"), Version("0.0.1")))
            in sorted_deps)
    # test that linking is local: Overriding A->B edge does not overried C->B edge
    sorted_deps = registry_data.sorted_deps(
        SpecifiedAnalyzer(
            VersionedAnalyzer(AnalyzerName("test-org-name/c"),
                              Version("0.0.1"))))
    assert (SpecifiedAnalyzer(
        VersionedAnalyzer(AnalyzerName("test-org-name/b"), Version("0.0.2")))
            in sorted_deps)
Esempio n. 2
0
def setup_locally_linked_analyzer(manifest: AnalyzerManifest,
                                  registry_data: RegistryData,
                                  analyzer_directory: str) -> RegistryData:
    """
        Build and tags analyzer in ANALYZER_DIRECTORY with a unique version
        and returns a modified registry so that local runs will resolve to said built analyzer.
    """

    new_registry = registry_data.deepcopy()
    new_dependencies: List[AnalyzerDependency] = []
    for dep in manifest.dependencies:
        if not dep.path:
            new_dependencies.append(dep)
            continue
        try:
            local_manifest, local_dir = find_and_open_analyzer_manifest(
                os.path.normpath(os.path.join(analyzer_directory, dep.path)))
        except Exception as e:
            logger.debug(
                f"Exception while resolving local linked dependendies: {str(e)}"
            )
            raise e
        # validate name
        if local_manifest.analyzer_name != dep.name:
            raise LinkedAnalyzerNameMismatch(
                f"Linked analyzer name must match {local_manifest.analyzer_name} != {dep.name}"
            )

        # build docker with unique version
        local_version = get_unique_semver(local_manifest.version)
        build_docker(
            local_manifest.analyzer_name,
            local_version,
            os.path.relpath(local_dir, os.getcwd()),
            verbose=True,
        )

        # add linked dep to registry
        local_manifest.version = local_version
        new_registry = new_registry.add_pending_manifest(local_manifest,
                                                         force=True)

        new_dependencies.append(
            AnalyzerDependency(
                AnalyzerName(local_manifest.analyzer_name),
                wildcard_version=str(local_version),
                parameters=dep.parameters,
            ))

    # add analyzer to registry
    manifest.dependencies = new_dependencies
    manifest._original_json["dependencies"] = {
        dep.name: {
            "version": dep.wildcard_version,
            "path": dep.path
        }
        for dep in new_dependencies
    }
    new_registry = new_registry.add_pending_manifest(manifest, force=True)
    return new_registry
Esempio n. 3
0
 def from_json(cls, data_json: RegistryDataJson) -> "RegistryData":
     return cls(
         {
             AnalyzerName(name): AnalyzerData.from_json(analyzer_data_json)
             for name, analyzer_data_json in data_json.items()
         }
     )
Esempio n. 4
0
def test_equality():
    sa = SpecifiedAnalyzer(VERSIONED_ANALYZER, PARAMETERS)

    # Constructing from new objects
    sa2 = SpecifiedAnalyzer(
        VersionedAnalyzer(AnalyzerName("r2c/test-analyzer"), Version("1.2.3")),
        AnalyzerParameters({}),
    )

    assert sa == sa2
Esempio n. 5
0
    def get_direct_dependencies(self, va: VersionedAnalyzer) -> List[SpecifiedAnalyzer]:
        """
            Returns direct dependencies of an analyzer
        """
        manifest = self.manifest_for(va)
        if manifest is None:
            raise ManifestNotFoundException(f"manifest not found for {va}.")

        resolved_values = []
        for dep in manifest.dependencies:
            resolved_version = self._resolve(
                AnalyzerName(dep.name), dep.wildcard_version
            )
            if resolved_version is not None:
                resolved_values.append(
                    SpecifiedAnalyzer(
                        VersionedAnalyzer(AnalyzerName(dep.name), resolved_version),
                        dep.parameters,
                    )
                )

        return resolved_values
Esempio n. 6
0
def prepull_analyzers(analyzer_name: str, version: Version) -> None:
    """
        Pulls all needed analyzers to run SPECIFIED_ANALYZER (i.e. dependencies)
    """

    specified_analyzer = SpecifiedAnalyzer(
        VersionedAnalyzer(AnalyzerName(analyzer_name), version))
    registry = RegistryData.from_json(REGISTRY)

    deps = registry.sorted_deps(specified_analyzer)
    client = get_docker_client()
    for dep in deps:
        if _should_pull_analyzer(dep):
            client.images.pull(dep.versioned_analyzer.image_id)
Esempio n. 7
0
    def _dependency_graph(
        self, subgraph_from_node: SpecifiedAnalyzer = None
    ) -> Graph[SpecifiedAnalyzer]:
        edges = set()
        nodes = set()

        if subgraph_from_node:
            to_explore = set([subgraph_from_node])
        else:
            to_explore = set(SpecifiedAnalyzer(va) for va in self.versioned_analyzers)

        # this loop terminates after at most sum(len(deps)) because we always pop values off
        # `to_explore` and add them to `nodes`, and only add values to `to_explore` if
        # they're not in `nodes`.
        while to_explore:
            sa = to_explore.pop()
            next_manifest = self.manifest_for(sa.versioned_analyzer)
            if not next_manifest:
                raise ManifestNotFoundException(
                    f"manifest not found for analyzer {sa.versioned_analyzer.name} at version {sa.versioned_analyzer.version}. Registry data: {self.to_json()}"
                )

            deps = next_manifest.dependencies

            nodes.add(sa)
            for dep in deps:
                resolved_version = self._resolve(
                    AnalyzerName(dep.name), dep.wildcard_version
                )
                if resolved_version is None:
                    raise Exception(f"Can't resolve dependency {dep} of {sa}")

                resolved_dep = VersionedAnalyzer(dep.name, resolved_version)
                specified_dep = SpecifiedAnalyzer(resolved_dep, dep.parameters)

                edges.add((sa, specified_dep))
                if specified_dep not in nodes:
                    to_explore.add(specified_dep)

        return Graph(list(nodes), list(edges))
Esempio n. 8
0
def test_should_pull() -> None:
    assert _should_pull_analyzer(
        SpecifiedAnalyzer(
            VersionedAnalyzer(AnalyzerName("doesnt/exist"), Version("9.1.1"))
        )
    )
Esempio n. 9
0
    def add_pending_manifest(
        self, manifest: AnalyzerManifest, force: bool = False
    ) -> "RegistryData":
        """
            Add this manifest into the current registry data as pending upload.
            This method first verifies that:
            1. Name conforms to org/name
            2. This is not a duplicate versioned analyzer
            3. It's dependencies can be resolved
            4. It doesn't cause circular dependencies

            Arguments:
                manifest: The manifest of the analyzer we want to add to the registry
                force: Force overwrite into registry if manifest already exists with matching name.
                    This flag nullifies the InvalidManifestException thrown for manifest that already exists

            Returns:
                A new RegistryData object with manifest added in.

            Throws:
                An InvalidManifestException if the manifest can't be added
        """
        name = manifest.analyzer_name
        version = manifest.version
        va = VersionedAnalyzer(name, version)
        # check that name looks like org/name
        # don't do this check for now until we change analyzer naming everywhere else
        # TODO: Actually get the current org's name current_org
        # if not is_analyzer_of_org(name, current_org):
        #     raise Exception(f"Analyzer name must be of the form {org_name}/name")

        # create here and return at the end because it comes in handy
        new_reg = self.UNSAFE_add_manifest(manifest)
        # check that we can resolve its dependencies
        for dep in manifest.dependencies:
            # Check that it doesn't depend on itself
            if dep.name == name:
                raise InvalidManifestException(
                    f"Resolving this dependency: {dep} But analyzer can't depend on itself."
                )
            resolved_version = new_reg._resolve(
                AnalyzerName(dep.name), dep.wildcard_version
            )

            if dep.path:
                if not os.path.isdir(dep.path) or not os.path.exists(dep.path):
                    raise InvalidLocalPathException(
                        f"A dependency in this manifest cannot be resolved: {dep}"
                    )
            else:
                if resolved_version is None:
                    raise InvalidManifestException(
                        f"A dependency in this manifest cannot be resolved: {dep}"
                    )

        # Check that we don't already have a manifest for it.
        # i.e. don't allow a new manifest without changing analyzer version.
        # TODO: check that it's increased
        analyzer_data = self._data.get(name)
        if analyzer_data:
            if version in analyzer_data.versions.keys():
                if not force:
                    raise InvalidManifestException(
                        f"A manifest for this analyzer and version already exists: {va}"
                    )

        # and see if it can be topologically sorted
        deps_graph = new_reg._dependency_graph()
        try:
            sorted_deps = deps_graph.topo_sorted()
        except CircularDependencyError as e:
            raise InvalidManifestException(
                f"This manifest would cause a cycle in the dependendency graph"
            )

        # all is well? return the new registry
        return new_reg
Esempio n. 10
0
def run(
    ctx,
    analyzer_directory,
    analyzer_input,
    output_path,
    quiet,
    analyzer_quiet,
    no_login,
    wait,
    debug,
    parameters,
    env_args_string,
):
    """
    Run the analyzer in the current directory over a code directory.

    You may have to log in if your analyzer depends on privately
    published analyzers.
    """
    if debug == True:  # allow passing --debug to run as well as globally
        set_debug_flag(ctx, True)

    debug_mode = ctx.obj["DEBUG"]
    print_msg(f"🏃 Running analyzer...{'with debug mode' if debug_mode else ''}")
    env_args_dict = parse_remaining(env_args_string)

    try:
        parameter_obj = json.loads(parameters)
    except ValueError as e:
        print_error_exit(
            f'Failed to parse parameter string:"{parameters}" as json. Parse Error: {e}'
        )

    manifest, analyzer_directory = find_and_open_analyzer_manifest(
        analyzer_directory, ctx
    )

    try:
        registry_data = RegistryData.from_json(fetch_registry_data())
    except Exception as e:
        message = getattr(e, "message", repr(e))
        print_error_exit(
            f"There was an error fetching data from the registry: {message}"
        )
    dependencies = manifest.dependencies
    logger.info(f"Parsing and resolving dependencies: {dependencies}")
    if dependencies:
        for analyzer_dep in dependencies:
            dep_name = analyzer_dep.name
            dep_semver_version = analyzer_dep.wildcard_version
            dep_version = registry_data._resolve(
                AnalyzerName(analyzer_dep.name), dep_semver_version
            )
            if not dep_version:
                print_error_exit(
                    f"Error resolving dependency {dep_name} at version {dep_semver_version}. Check that you're using the right version of this dependency and try again."
                )
            logger.info(f"Resolved dependency {dep_name}:{dep_semver_version}")

        if not no_login:
            # we need at least one dep and its version to get credentials when the user isn't logged in
            dep_name = dependencies[0].name
            dep_semver_version = dependencies[0].wildcard_version
            dep_version = registry_data._resolve(
                AnalyzerName(dep_name), dep_semver_version
            )

            artifact_link = (
                f"{get_base_url()}/api/v1/artifacts/{dep_name}/{dep_version}"
            )
            logger.info(f"Getting credential from {artifact_link}")

            # TODO (ulzii) use proper auth credential once its done
            creds = get_docker_creds(artifact_link)
            if creds is None:
                print_error_exit(
                    "Error getting dependency credentials. Please contact us with the following information: failed to get Docker credentials."
                )
            # docker login
            successful_login = docker_login(creds)
            if not successful_login:
                print_error_exit(
                    "Error validating dependency credentials. Please contact us with the following information: failed to log in to Docker."
                )
    else:
        print_warning(
            "No dependencies found; are dependencies intentionally omitted in analyzer.json? Most analyzers are expected to have 1 or more dependencies (e.g. for taking source code as input)."
        )

    abort_on_build_failure(
        build_docker(
            manifest.analyzer_name,
            manifest.version,
            os.path.relpath(analyzer_directory, os.getcwd()),
            env_args_dict={**DEFAULT_ENV_ARGS_TO_DOCKER, **env_args_dict},
            verbose=debug_mode,
        )
    )

    try:
        run_analyzer_on_local_code(
            registry_data=registry_data,
            manifest=manifest,
            workdir=None,
            code_dir=analyzer_input.strip(
                '"'
            ),  # idk why this is happening for quoted paths
            output_path=output_path,
            wait=wait,
            show_output_on_stdout=not quiet,
            pass_analyzer_output=not analyzer_quiet,
            no_preserve_workdir=True,
            parameters=parameter_obj,
            env_args_dict={**DEFAULT_ENV_ARGS_TO_DOCKER, **env_args_dict},
        )
    except SymlinkNeedsElevationError as sym:
        print_error_exit(
            f"Error setting up local analysis. {sym}. Try again as an admin."
        )
Esempio n. 11
0
def run(
    ctx,
    analyzer_directory,
    analyzer_input,
    output_path,
    quiet,
    analyzer_quiet,
    no_login,
    debug,
    interactive,
    interactive_name,
    reset_cache,
    verbose,
    parameters,
    env_args_string,
):
    """
    Run the analyzer in the current directory over a code directory.

    You may have to log in if your analyzer depends on privately
    published analyzers.
    """

    if verbose is True:  # allow passing --verbose to run as well as globally
        set_verbose_flag(ctx, True)
    if debug is True:
        set_debug_flag(ctx, True)
    print_msg(f"🏃 Starting to run analyzer...")

    interactive_index = -1 if interactive else None
    env_args_dict = parse_remaining(env_args_string)

    parameter_obj = load_params(parameters)

    manifest, analyzer_directory = find_and_open_analyzer_manifest(
        analyzer_directory, ctx)

    registry_data = get_registry_data()

    dependencies = manifest.dependencies
    print_msg("Resolving dependencies")
    logger.debug(f"Parsing and resolving dependencies: {dependencies}")
    if dependencies:
        for analyzer_dep in dependencies:
            dep_name = analyzer_dep.name
            dep_semver_version = analyzer_dep.wildcard_version
            dep_version = registry_data._resolve(
                AnalyzerName(analyzer_dep.name), dep_semver_version)
            if not dep_version:
                if not analyzer_dep.path:
                    print_error_exit(
                        f"Error resolving dependency {dep_name} at version {dep_semver_version}. Check that you're using the right version of this dependency and try again."
                    )
            logger.debug(
                f"Resolved dependency {dep_name}:{dep_semver_version}")

        if not no_login:
            # we need at least one dep and its version to get credentials when the user isn't logged in
            dep_name = dependencies[0].name
            dep_semver_version = dependencies[0].wildcard_version
            dep_version = registry_data._resolve(AnalyzerName(dep_name),
                                                 dep_semver_version)

            artifact_link = (
                f"{get_base_url()}/api/v1/artifacts/{dep_name}/{dep_version}")
            logger.debug(f"Getting credential from {artifact_link}")

            # TODO (ulzii) use proper auth credential once its done
            creds = get_docker_creds(artifact_link)
            if creds is None:
                print_error_exit(
                    "Error getting dependency credentials. Please contact us with the following information: failed to get Docker credentials."
                )
            # docker login
            successful_login = docker_login(creds)
            if not successful_login:
                print_error_exit(
                    "Error validating dependency credentials. Please contact us with the following information: failed to log in to Docker."
                )
    else:
        print_warning(
            "No dependencies found; are dependencies intentionally omitted in analyzer.json? Most analyzers are expected to have 1 or more dependencies (e.g. for taking source code as input)."
        )
    print_msg("🔨 Building docker container")

    abort_on_build_failure(
        build_docker(
            manifest.analyzer_name,
            manifest.version,
            os.path.relpath(analyzer_directory, os.getcwd()),
            env_args_dict=env_args_dict,
            no_cache=reset_cache,
        ))
    try:
        if interactive_index:
            print_msg(
                f"🔎 Inspecting containers interactively by `docker exec` into last analyzer in execution."
            )
        elif interactive_name:
            print_msg(
                f"🔎 Inspecting containers interactively by `docker exec` into analyzer with name containing `{interactive_name}`."
            )
        else:
            print_msg(f"🔎 Running analysis on `{analyzer_input}`")

        logger.info(f"Reset cache: {reset_cache}")
        try:
            run_analyzer_on_local_code(
                registry_data=registry_data,
                manifest=manifest,
                workdir=None,
                analyzer_dir=analyzer_directory,
                code_dir=analyzer_input,
                output_path=output_path,
                show_output_on_stdout=not quiet,
                pass_analyzer_output=not analyzer_quiet,
                no_preserve_workdir=True,
                parameters=parameter_obj,
                env_args_dict=env_args_dict,
                interactive_index=interactive_index,
                interactive_name=interactive_name,
                reset_cache=reset_cache,
            )
        except AnalyzerOutputNotFound as fne:
            print_error_exit(str(fne), err=False)
        except AnalyzerNonZeroExitError as ae:
            print_exception_exit("Analyzer non-zero exit", ae, err=False)
        if output_path:
            path_msg = f"Analysis results in `{output_path}`."
        else:
            path_msg = f"Analysis results printed to `stdout`. unless suppressed explicitly with `-q`"
        print_success(f"Finished analyzing `{analyzer_input}`. {path_msg}")

    except SymlinkNeedsElevationError as sym:
        print_error_exit(
            f"Error setting up local analysis. {sym}. Try again as an admin.")
Esempio n. 12
0
    def from_json(cls, json_obj: Dict[str, Any]) -> "AnalyzerManifest":
        # The type of the json_obj argument is a bit of a hack, since in
        # r2c.lib.registry we cast an arbitrary dict to an AnalyzerManifestJson
        # before calling this.

        spec_version = json_obj.get("spec_version")
        if spec_version is None:
            raise MalformedManifestException(
                json_obj, "Must specify a spec_version field"
            )

        if Version(spec_version).major > SPEC_VERSION.major:
            logger.error(
                f"Trying to parse manifest for analyzer {json_obj['analyzer_name']}:{json_obj['version']}"
                f" with spec_version: {spec_version}, but that spec_version is"
                f" too new and not compatible with the latest supported: {SPEC_VERSION}."
            )
            raise IncompatibleManifestException(
                f"Can't parse manifest for analyzer {json_obj['analyzer_name']}:{json_obj['version']}"
                f" with spec_version: {spec_version}. The spec_version is"
                f" incompatible with the latest supported: {SPEC_VERSION}."
            )

        validator = schemas.manifest_validator(json_obj)
        if validator is None:
            raise MalformedManifestException(
                json_obj,
                "Could not find a schema for the given spec_version {}".format(
                    spec_version
                ),
            )
        try:
            validator.validate(json_obj)
        except jsonschema.ValidationError as err:
            raise MalformedManifestException(json_obj, err.message) from err

        original_json_obj = json_obj

        json_obj = manifest_migrations.migrate(json_obj)
        original_version = (
            Version(json_obj["_original_spec_version"])
            if json_obj.get("_original_spec_version")
            else Version(json_obj["spec_version"])
        )

        dependencies = []
        for dependency_name, value in json_obj["dependencies"].items():
            if isinstance(value, str):
                # if value is string, assume its Semver version
                dependencies.append(
                    AnalyzerDependency(
                        AnalyzerName(dependency_name), wildcard_version=value
                    )
                )
            else:
                # If value is an object, parse it for params, version, path
                dependencies.append(
                    AnalyzerDependency(
                        AnalyzerName(dependency_name),
                        value.get("version"),
                        value.get("parameters"),
                        value.get("path"),
                    )
                )
        return cls(
            json_obj["analyzer_name"],
            json_obj.get("author_email"),
            json_obj.get("author_name"),
            Version(json_obj["version"]),
            json_obj["spec_version"],
            dependencies,
            AnalyzerType.from_name(json_obj["type"]),
            AnalyzerOutput.from_json(json_obj["output"]),
            json_obj["deterministic"],
            original_json_obj,
            original_version,
            json_obj.get("extra"),
        )
Esempio n. 13
0
def run_analyzer_on_local_code(
    analyzer_name: str,
    version: Version,
    base: Path,
    ignore_files: Set[Path],
    target_files: Iterable[str],
) -> JsonR:
    """Run an analyzer on a local folder.
    """
    get_docker_client()  # Ensures that docker is running

    specified_analyzer = SpecifiedAnalyzer(
        VersionedAnalyzer(AnalyzerName(analyzer_name), version))
    registry = RegistryData.from_json(REGISTRY)

    json_output_store = LocalJsonOutputStore()
    filesystem_output_store = LocalFilesystemOutputStore()
    log_store = LocalLogStore()
    stats_store = LocalStatsStore()

    # All cacheing should be handled by bento
    json_output_store.delete_all()  # type: ignore
    filesystem_output_store.delete_all()  # type: ignore
    log_store.delete_all()  # type: ignore
    stats_store.delete_all()  # type: ignore

    pathlib.Path(LOCAL_RUN_TMP_FOLDER).mkdir(parents=True, exist_ok=True)

    analyzer = Analyzer(
        registry,
        json_output_store,
        filesystem_output_store,
        log_store,
        stats_store,
        workdir=LOCAL_RUN_TMP_FOLDER,
        timeout=
        0,  # Note Timeout relied on signaling which is not valid in multithreaded world
        memory_limit=CONTAINER_MEMORY_LIMIT,
    )

    # get all cloner versions from registry so we can copy the passed in code directory in place
    # of output for all versions of cloner
    fetchers = [
        sa for sa in registry.sorted_deps(specified_analyzer)
        if sa.versioned_analyzer.name in SPECIAL_ANALYZERS
    ]

    analyzer_input = LocalCode(str(base))
    for fetcher in fetchers:
        _copy_local_input(
            analyzer,
            fetcher.versioned_analyzer,
            analyzer_input,
            ignore_files,
            set(target_files),
        )

    analyzer.full_analyze_request(
        analyzer_input=analyzer_input,
        specified_analyzer=specified_analyzer,
        force=False,
    )

    # Get Final Output
    output = json_output_store.read(analyzer_input, specified_analyzer)
    if output is None:
        output = ""
    output_json = json.loads(output).get("results", [])

    # Cleanup state
    json_output_store.delete_all()  # type: ignore
    filesystem_output_store.delete_all()  # type: ignore
    log_store.delete_all()  # type: ignore
    stats_store.delete_all()  # type: ignore

    return output_json
Esempio n. 14
0
import json

from r2c.lib.specified_analyzer import SpecifiedAnalyzer, AnalyzerParameters
from r2c.lib.versioned_analyzer import AnalyzerName, VersionedAnalyzer

from semantic_version import Version


ANALYZER_NAME = AnalyzerName("r2c/test-analyzer")
VERSION = Version("1.2.3")
VERSIONED_ANALYZER = VersionedAnalyzer(ANALYZER_NAME, VERSION)
PARAMETERS = AnalyzerParameters({})


def test_constructor():
    sa = SpecifiedAnalyzer(VERSIONED_ANALYZER, PARAMETERS)
    assert ANALYZER_NAME == sa.versioned_analyzer.name
    assert VERSION == sa.versioned_analyzer.version
    assert sa.parameters is not None
    for parameter in sa.parameters:
        assert PARAMETERS[parameter] == sa.parameters[parameter]


def test_json_conversion():
    sa = SpecifiedAnalyzer(VERSIONED_ANALYZER, PARAMETERS)
    sa2 = SpecifiedAnalyzer.from_json_str(json.dumps(sa.to_json()))

    assert sa.versioned_analyzer.name == sa2.versioned_analyzer.name
    assert sa.versioned_analyzer.version == sa2.versioned_analyzer.version

    # Parameters Match