def read_algorithm_config(config_file):
    """Read config file and return a list of values"""
    LOG.info("Reading config file ...")
    config_data = ()
    with open(config_file, "r") as f:
        data = f.read()
    out_re = data.replace("\r", "").replace(" ", "")
    out_ind = out_re.split('\n')
    config_data = (out_ind[0].split(':')[1], out_ind[1].split(':')[1],
                   out_ind[2].split(':')[1], out_ind[3].split(':')[1])
    # config_data (Population_Range, Termination, Adaptive_Mutation_Step,
    #              Survivor_Selection)
    LOG.info("{0}".format(config_data))
    return config_data
예제 #2
0
def nodejs_build(src, reports_dir, lang_tools):
    """
    Automatically build nodejs project

    :param src: Source directory
    :param reports_dir: Reports directory to store any logs
    :param lang_tools: Language specific build tools

    :return: boolean status from the build. True if the command executed successfully. False otherwise
    """
    cmd_args = lang_tools.get("npm")
    yarn_mode = False
    rush_mode = False
    rushjson_files = [p.as_posix() for p in Path(src).glob("rush.json")]
    pjson_files = [p.as_posix() for p in Path(src).glob("package.json")]
    ylock_files = [p.as_posix() for p in Path(src).glob("yarn.lock")]
    if ylock_files:
        cmd_args = lang_tools.get("yarn")
        yarn_mode = True
    elif rushjson_files:
        cmd_args = lang_tools.get("rush")
        rush_mode = True
    elif not pjson_files:
        LOG.debug(
            "Nodejs auto build is supported only for npm or yarn or rush based projects"
        )
        return False
    cp = exec_tool("auto-build", cmd_args, src)
    if cp:
        ret = cp.returncode == 0
    else:
        ret = False
    try:
        cmd_args = ["npm"]
        if yarn_mode:
            cmd_args = ["yarn"]
        if rush_mode:
            cmd_args = ["rush", "rebuild"]
        else:
            cmd_args += ["run", "build"]
        exec_tool("auto-build", cmd_args, src)
    except Exception:
        if rush_mode:
            LOG.warning(
                "Automatic build for rush.js has failed. Try installing the packages manually before invoking scan.\nIf this works then let us know the build steps by filing an issue."
            )
        else:
            LOG.debug("Automatic build has failed for the node.js project")
    return ret
예제 #3
0
def php_build(src, reports_dir, lang_tools):
    """
    Automatically build php project

    :param src: Source directory
    :param reports_dir: Reports directory to store any logs
    :param lang_tools: Language specific build tools

    :return: boolean status from the build. True if the command executed successfully. False otherwise
    """
    ret = False
    cmd_args = lang_tools.get("install")
    cjson_files = [p.as_posix() for p in Path(src).glob("composer.json")]
    # If there is no composer.json try to create one
    if not cjson_files:
        cp = exec_tool(
            "auto-build",
            lang_tools.get("init"),
            src,
            env=os.environ.copy(),
            stdout=subprocess.PIPE,
        )
        if cp:
            LOG.debug(cp.stdout)
    cp = exec_tool("auto-build",
                   cmd_args,
                   src,
                   env=os.environ.copy(),
                   stdout=subprocess.PIPE)
    if cp:
        LOG.debug(cp.stdout)
        ret = cp.returncode == 0
    # If composer install fails, try composer update
    if not ret:
        cmd_args = lang_tools.get("update")
        cp = exec_tool("auto-build",
                       cmd_args,
                       src,
                       env=os.environ.copy(),
                       stdout=subprocess.PIPE)
        if cp:
            LOG.debug(cp.stdout)
            ret = cp.returncode == 0
    return ret
예제 #4
0
def exec_tool(  # scan:ignore
        tool_name,
        args,
        cwd=None,
        env=utils.get_env(),
        stdout=subprocess.DEVNULL):
    """
    Convenience method to invoke cli tools

    Args:
      tool_name Tool name
      args cli command and args
      cwd Current working directory
      env Environment variables
      stdout stdout configuration for run command

    Returns:
      CompletedProcess instance
    """
    with Progress(
            console=console,
            redirect_stderr=False,
            redirect_stdout=False,
            refresh_per_second=1,
    ) as progress:
        task = None
        try:
            env = use_java(env)
            LOG.debug('⚡︎ Executing {} "{}"'.format(tool_name, " ".join(args)))
            stderr = subprocess.DEVNULL
            if LOG.isEnabledFor(DEBUG):
                stderr = subprocess.STDOUT
            tool_verb = "Scanning with"
            if "init" in tool_name:
                tool_verb = "Initializing"
            elif "build" in tool_name:
                tool_verb = "Building with"
            task = progress.add_task("[green]" + tool_verb + " " + tool_name,
                                     total=100,
                                     start=False)
            cp = subprocess.run(
                args,
                stdout=stdout,
                stderr=stderr,
                cwd=cwd,
                env=env,
                check=False,
                shell=False,
                encoding="utf-8",
            )
            if cp and stdout == subprocess.PIPE:
                for line in cp.stdout:
                    progress.update(task, completed=5)
            if (cp and LOG.isEnabledFor(DEBUG) and cp.returncode
                    and cp.stdout is not None):
                LOG.debug(cp.stdout)
            progress.update(task, completed=100, total=100)
            return cp
        except Exception as e:
            if task:
                progress.update(task, completed=20, total=10, visible=False)
            if not LOG.isEnabledFor(DEBUG):
                LOG.info(
                    f"{tool_name} has reported few errors. To view, pass the environment variable SCAN_DEBUG_MODE=debug"
                )
            LOG.debug(e)
            return None
예제 #5
0
def execute_default_cmd(  # scan:ignore
    cmd_map_list,
    type_str,
    tool_name,
    src,
    reports_dir,
    convert,
    scan_mode,
    repo_context,
):
    """
    Method to execute default command for the given type

    Args:
      cmd_map_list Default commands in the form of a dict (multiple) or list
      type_str Project type
      tool_name Tool name
      src Project dir
      reports_dir Directory for output reports
      convert Boolean to enable normalisation of reports json
      scan_mode Scan mode string
      repo_context Repo context
    """
    # Check if there is a default command specified for the given type
    # Create the reports dir
    report_fname_prefix = os.path.join(reports_dir, tool_name + "-report")
    # Look for any additional direct arguments for the tool and inject them
    if config.get(tool_name + "_direct_args"):
        direct_args = config.get(tool_name + "_direct_args").split(" ")
        if direct_args:
            cmd_map_list += direct_args
    src_or_file = src
    if config.get("SHIFTLEFT_ANALYZE_FILE"):
        src_or_file = config.get("SHIFTLEFT_ANALYZE_FILE")
    default_cmd = " ".join(cmd_map_list) % dict(
        src=src,
        src_or_file=src_or_file,
        reports_dir=reports_dir,
        report_fname_prefix=report_fname_prefix,
        type=type_str,
        scan_mode=scan_mode,
    )
    # Try to detect if the output could be json
    outext = ".out"
    if "json" in default_cmd:
        outext = ".json"
    elif "csv" in default_cmd:
        outext = ".csv"
    elif "sarif" in default_cmd:
        outext = ".sarif"
    elif "xml" in default_cmd:
        outext = ".xml"
    report_fname = report_fname_prefix + outext

    # If the command doesn't support file output then redirect stdout automatically
    stdout = None
    if LOG.isEnabledFor(DEBUG):
        stdout = None
    if reports_dir and report_fname_prefix not in default_cmd:
        report_fname = report_fname_prefix + outext
        stdout = io.open(report_fname, "w")
        LOG.debug("Output will be written to {}".format(report_fname))

    # If the command is requesting list of files then construct the argument
    filelist_prefix = "(filelist="
    if default_cmd.find(filelist_prefix) > -1:
        si = default_cmd.find(filelist_prefix)
        ei = default_cmd.find(")", si + 10)
        ext = default_cmd[si + 10:ei]
        filelist = utils.find_files(src, ext)
        # Temporary fix for the yaml issue
        if ext == "yaml":
            yml_list = utils.find_files(src, "yml")
            if yml_list:
                filelist.extend(yml_list)
        delim = " "
        default_cmd = default_cmd.replace(filelist_prefix + ext + ")",
                                          delim.join(filelist))
    cmd_with_args = default_cmd.split(" ")
    # Suppress psalm output
    if should_suppress_output(type_str, cmd_with_args[0]):
        stdout = subprocess.DEVNULL
    exec_tool(tool_name, cmd_with_args, cwd=src, stdout=stdout)
    # Should we attempt to convert the report to sarif format
    if should_convert(convert, tool_name, cmd_with_args[0], report_fname):
        crep_fname = utils.get_report_file(tool_name,
                                           reports_dir,
                                           convert,
                                           ext_name="sarif")
        if (cmd_with_args[0] == "java" or "pmd-bin" in cmd_with_args[0]
                or "php" in tool_name):
            convertLib.convert_file(
                tool_name,
                cmd_with_args,
                src,
                report_fname,
                crep_fname,
            )
        else:
            convertLib.convert_file(
                cmd_with_args[0],
                cmd_with_args[1:],
                src,
                report_fname,
                crep_fname,
            )
        try:
            if not LOG.isEnabledFor(DEBUG):
                os.remove(report_fname)
        except Exception:
            LOG.debug("Unable to remove file {}".format(report_fname))
    elif type_str == "depscan":
        # Convert depscan and license scan files to html
        depscan_files = utils.find_files(reports_dir, "depscan", True)
        for df in depscan_files:
            if not df.endswith(".html"):
                depscan_data = grafeas.parse(df)
                if depscan_data and len(depscan_data):
                    html_fname = df.replace(".json", ".html")
                    grafeas.render_html(depscan_data, html_fname)
                    track({
                        "id": config.get("run_uuid"),
                        "depscan_summary": depscan_data
                    })
                    LOG.debug(
                        "Depscan and HTML report written to file: %s, %s :thumbsup:",
                        df,
                        html_fname,
                    )
        licence_files = utils.find_files(reports_dir, "license", True)
        for lf in licence_files:
            if not lf.endswith(".html"):
                licence_data = licence.parse(lf)
                if licence_data and len(licence_data):
                    html_fname = lf.replace(".json", ".html")
                    licence.render_html(licence_data, html_fname)
                    track({
                        "id": config.get("run_uuid"),
                        "license_summary": licence_data
                    })
                    LOG.debug(
                        "License check and HTML report written to file: %s, %s :thumbsup:",
                        lf,
                        html_fname,
                    )
예제 #6
0
def find_repo_details(src_dir=None):
    """Method to find repo details such as url, sha etc
    This will be populated into versionControlProvenance attribute

    :param src_dir: Source directory
    """
    # See if repository uri is specified in the config
    repositoryName = None
    repositoryUri = ""
    revisionId = ""
    branch = ""
    invokedBy = ""
    pullRequest = False
    gitProvider = ""
    ciProvider = ""
    """
    Since CI servers typically checkout repo in detached mode, we need to rely on environment
    variables as a starting point to find the repo details. To make matters worse, since we
    run the tools inside a container these variables should be passed as part of the docker run
    command. With native integrations such as GitHub action and cloudbuild this could be taken
    care by our builders.

    Env variables detection for popular CI server is implemented here anyways. But they are effective
    only in few cases.

    Azure pipelines - https://docs.microsoft.com/en-us/azure/devops/pipelines/build/variables?view=azure-devops&tabs=yaml
    BitBucket - https://confluence.atlassian.com/bitbucket/environment-variables-in-bitbucket-pipelines-794502608.html
    GitHub actions - https://help.github.com/en/actions/automating-your-workflow-with-github-actions/using-environment-variables
    Google CloudBuild - https://cloud.google.com/cloud-build/docs/configuring-builds/substitute-variable-values
    CircleCI - https://circleci.com/docs/2.0/env-vars/#built-in-environment-variables
    Travis - https://docs.travis-ci.com/user/environment-variables/#default-environment-variables
    AWS CodeBuild - https://docs.aws.amazon.com/codebuild/latest/userguide/build-env-ref-env-vars.html
    GitLab - https://docs.gitlab.com/ee/ci/variables/predefined_variables.html
    Jenkins - https://jenkins.io/doc/book/pipeline/jenkinsfile/#using-environment-variables
    """
    for key, value in os.environ.items():
        # Check REPOSITORY_URL first followed CI specific vars
        # Some CI such as GitHub pass only the slug instead of the full url :(
        if not gitProvider or not ciProvider:
            if key.startswith("GITHUB_"):
                if key == "GITHUB_REPOSITORY":
                    gitProvider = "github"
                if key == "GITHUB_ACTION":
                    ciProvider = "github"
            elif key.startswith("GITLAB_"):
                gitProvider = "gitlab"
                if key == "GITLAB_CI":
                    ciProvider = "gitlab"
            elif key.startswith("BITBUCKET_"):
                gitProvider = "bitbucket"
                if key == "BITBUCKET_BUILD_NUMBER":
                    ciProvider = "bitbucket"
            elif key.startswith("CIRCLE_"):
                ciProvider = "circle"
            elif key.startswith("TRAVIS_"):
                ciProvider = "travis"
            elif key.startswith("CODEBUILD_"):
                ciProvider = "codebuild"
            elif key.startswith("BUILD_REQUESTEDFOREMAIL"):
                ciProvider = "azure"
            elif key.startswith("JENKINS_"):
                ciProvider = "jenkins"
        if not repositoryName:
            if key in [
                "BUILD_REPOSITORY_NAME",
                "GITHUB_REPOSITORY",
                "BITBUCKET_REPO_SLUG",
                "REPO_NAME",
                "CIRCLE_PROJECT_REPONAME",
                "TRAVIS_REPO_SLUG",
                "CI_PROJECT_NAME",
            ]:
                if "/" in value:
                    repositoryName = value.split("/")[-1]
                else:
                    repositoryName = value
        if not repositoryUri:
            if key in [
                "REPOSITORY_URL",
                "BUILD_REPOSITORY_URI",
                "GITHUB_REPOSITORY",
                "BITBUCKET_GIT_HTTP_ORIGIN",
                "REPO_NAME",
                "CIRCLE_REPOSITORY_URL",
                "TRAVIS_REPO_SLUG",
                "CODEBUILD_SOURCE_REPO_URL",
                "CI_REPOSITORY_URL",
            ]:
                repositoryUri = value
        if key in [
            "COMMIT_SHA",
            "BUILD_SOURCEVERSION",
            "BITBUCKET_COMMIT",
            "GITHUB_SHA",
            "CIRCLE_SHA1",
            "TRAVIS_COMMIT",
            "CODEBUILD_SOURCE_VERSION",
            "CI_COMMIT_SHA",
        ]:
            revisionId = value
        if key in [
            "BRANCH",
            "BUILD_SOURCEBRANCH",
            "BITBUCKET_BRANCH",
            "GITHUB_REF",
            "BRANCH_NAME",
            "CIRCLE_BRANCH",
            "TRAVIS_BRANCH",
            "CI_COMMIT_REF_NAME",
        ]:
            branch = value
        if key in [
            "BUILD_REQUESTEDFOREMAIL",
            "GITHUB_ACTOR",
            "PROJECT_ID",
            "CIRCLE_USERNAME",
            "GITLAB_USER_EMAIL",
        ]:
            invokedBy = value
        if key.startswith("CI_MERGE_REQUEST"):
            pullRequest = True
    if src_dir and os.path.isdir(os.path.join(src_dir, ".git")):
        # Try interacting with git
        try:
            repo = Repo(src_dir)
            head = repo.head
            if not branch and not head.is_detached:
                branch = repo.active_branch.name
            if not revisionId and head:
                revisionId = head.commit.hexsha
            if not repositoryUri:
                repositoryUri = next(iter(repo.remote().urls))
            if not invokedBy or "@" not in invokedBy:
                if head and head.commit.author and head.commit.author.email:
                    invokedBy = "{} <{}>".format(
                        head.commit.author.name, head.commit.author.email
                    )
        except Exception:
            LOG.debug("Unable to find repo details from the local repository")
    if branch.startswith("refs/pull"):
        pullRequest = True
        branch = branch.replace("refs/pull/", "")
    # Cleanup the variables
    branch = branch.replace("refs/heads/", "")
    if repositoryUri:
        repositoryUri = repositoryUri.replace(
            "[email protected]:", "https://github.com/"
        ).replace(".git", "")
        # Is it a repo slug?
        repo_slug = True
        repositoryUri = sanitize_url(repositoryUri)
        for pref in repo_url_prefixes:
            if repositoryUri.startswith(pref):
                repo_slug = False
                break
        if not repo_slug:
            if "vs-ssh" in repositoryUri:
                repo_slug = False
        # For repo slug just assume github for now
        if repo_slug:
            repositoryUri = "https://github.com/" + repositoryUri
    if not repositoryName and repositoryUri:
        repositoryName = os.path.basename(repositoryUri)
    if not gitProvider:
        if "github" in repositoryUri:
            gitProvider = "github"
        if "gitlab" in repositoryUri:
            gitProvider = "gitlab"
        if "atlassian" in repositoryUri or "bitbucket" in repositoryUri:
            gitProvider = "bitbucket"
        if "azure" in repositoryUri or "visualstudio" in repositoryUri:
            gitProvider = "azure"
            if not ciProvider:
                ciProvider = "azure"
        if not gitProvider and "tfs" in repositoryUri:
            gitProvider = "tfs"
            ciProvider = "tfs"
    return {
        "gitProvider": gitProvider,
        "ciProvider": ciProvider,
        "repositoryName": "" if not repositoryName else repositoryName,
        "repositoryUri": repositoryUri,
        "revisionId": revisionId,
        "branch": branch,
        "invokedBy": invokedBy,
        "pullRequest": pullRequest,
        "botUser": is_bot(invokedBy),
    }
예제 #7
0
def convert_sarif(app_name, repo_context, sarif_files, findings_fname):
    """
    Method to convert sarif to findings json

    :param app_name: Application name
    :param sarif_file:
    :param findings_fname:
    :return:
    """
    finding_id = 1
    with open(findings_fname, mode="w") as out_file:
        for sf in sarif_files:
            with open(sf, mode="r") as report_file:
                report_data = json.loads(report_file.read())
                # skip this file if the data is empty
                if not report_data or not report_data.get("runs"):
                    continue
                # Iterate through all the runs
                for run in report_data["runs"]:
                    try:
                        rules = {
                            r["id"]: r
                            for r in run["tool"]["driver"]["rules"]
                        }
                        results = run["results"]
                        for result in results:
                            rule = rules.get(result["ruleId"])
                            for location in result["locations"]:
                                finding = {
                                    "app":
                                    app_name,
                                    "type":
                                    "vuln",
                                    "title":
                                    result["message"]["text"],
                                    "description":
                                    rule["fullDescription"]["text"],
                                    "internal_id":
                                    "{}/{}".format(
                                        result["ruleId"],
                                        utils.calculate_line_hash(
                                            location["physicalLocation"]
                                            ["region"]["snippet"]["text"]),
                                    ),
                                    "severity":
                                    convert_severity(result["properties"]
                                                     ["issue_severity"]),
                                    "owasp_category":
                                    "",
                                    "category":
                                    result["ruleId"],
                                    "details": {
                                        "repoContext":
                                        repo_context,
                                        "name":
                                        result["message"]["text"],
                                        "tags":
                                        ",".join(rule["properties"]["tags"]),
                                        "fileName":
                                        location["physicalLocation"]
                                        ["artifactLocation"]["uri"],
                                        "DATA_TYPE":
                                        "OSS_SCAN",
                                        "lineNumber":
                                        location["physicalLocation"]["region"]
                                        ["startLine"],
                                    },
                                }
                                out_file.write(json.dumps(finding))
                                finding_id = finding_id + 1
                    except Exception as e:
                        LOG.warning(
                            "Unable to convert the run to findings format")
예제 #8
0
def convert_sarif(app_name, repo_context, sarif_files, findings_fname):
    """
    Method to convert sarif to findings json

    :param app_name: Application name
    :param sarif_file:
    :param findings_fname:
    :return:
    """
    finding_id = 1
    findings_list = []
    rule_id_owasp_cache = {}
    for sf in sarif_files:
        with open(sf, mode="r") as report_file:
            report_data = None
            try:
                report_data = json.loads(report_file.read())
                # skip this file if the data is empty
                if not report_data or not report_data.get("runs"):
                    continue
                # Iterate through all the runs
                for run in report_data["runs"]:
                    results = run.get("results")
                    if not results:
                        continue
                    tool_name = run.get("tool", {}).get("driver", {}).get("name")
                    rules = {
                        r["id"]: r
                        for r in run.get("tool", {}).get("driver", {}).get("rules")
                        if r and r.get("id")
                    }
                    for result in results:
                        rule_id = result.get("ruleId", "")
                        rule = rules.get(rule_id)
                        if not rule:
                            continue

                        owasp_category = rule_id_owasp_cache.get(rule_id, "")
                        if not owasp_category:
                            # Check the config for any available owasp category mapping
                            for rok, rov in config.get("rules_owasp_category").items():
                                if (
                                    rok.upper() == rule_id.upper()
                                    or rok.upper() in rule_id.upper()
                                ):
                                    rule_id_owasp_cache[rule_id] = rov
                                    owasp_category = rov
                        category = rule.get("name")
                        if not category:
                            category = rule_id
                        desc = get_help(
                            rule_id,
                            rule_obj=rule,
                            tool_name=tool_name,
                            owasp_category=owasp_category,
                        )
                        short_desc = rule.get("shortDescription", {}).get("text")
                        if not short_desc:
                            short_desc = result.get("message", {}).get("text")
                        ngsev = convert_severity(
                            result.get("properties", {})["issue_severity"]
                        )
                        # Populate tags
                        tags = []
                        if "CWE" in rule_id:
                            tags.append(
                                {
                                    "key": "cwe_category",
                                    "value": rule_id.replace("CWE-", ""),
                                    "shiftleft_managed": True,
                                }
                            )
                        if "CKV_" in rule_id or "CIS_" in rule_id or "AWS" in rule_id:
                            cis_rule = cis.get_rule(rule_id)
                            if cis_rule:
                                tags.append(
                                    {
                                        "key": "cis_category",
                                        "value": cis_rule.get("id", ""),
                                        "shiftleft_managed": False,
                                    }
                                )
                                if cis_rule.get("scored"):
                                    tags.append(
                                        {
                                            "key": "cis_status",
                                            "value": "SCORED",
                                            "shiftleft_managed": False,
                                        }
                                    )

                        for location in result.get("locations"):
                            filename = location["physicalLocation"]["artifactLocation"][
                                "uri"
                            ]
                            lineno = location.get("physicalLocation", {})["region"][
                                "startLine"
                            ]
                            end_lineno = location.get("physicalLocation", {})[
                                "contextRegion"
                            ]["endLine"]
                            finding = {
                                "app": app_name,
                                "type": "extscan",
                                "title": result.get("message", {})["text"],
                                "description": desc,
                                "internal_id": "{}/{}".format(
                                    rule_id,
                                    utils.calculate_line_hash(
                                        filename,
                                        lineno,
                                        end_lineno,
                                        location.get("physicalLocation", {})["region"][
                                            "snippet"
                                        ]["text"],
                                        short_desc,
                                    ),
                                ),
                                "severity": ngsev,
                                "owasp_category": owasp_category,
                                "category": category,
                                "details": {
                                    "repoContext": repo_context,
                                    "name": result.get("message", {})["text"],
                                    "tags": ",".join(rule["properties"]["tags"]),
                                    "fileName": filename,
                                    "DATA_TYPE": "OSS_SCAN",
                                    "lineNumber": lineno,
                                    "ruleId": rule_id,
                                    "ruleName": rule.get("name"),
                                    "contextText": location.get("physicalLocation", {})[
                                        "region"
                                    ]["snippet"]["text"],
                                    "snippetText": location.get("physicalLocation", {})[
                                        "contextRegion"
                                    ]["snippet"]["text"],
                                },
                                "tags": tags,
                            }
                            findings_list.append(finding)
                            finding_id = finding_id + 1
            except Exception as e:
                LOG.debug(e)
                continue

    with open(findings_fname, mode="w") as out_file:
        json.dump({"findings": findings_list}, out_file)
예제 #9
0
def report(
    tool_name,
    tool_args,
    working_dir,
    metrics,
    skips,
    issues,
    crep_fname,
    file_path_list=None,
):
    """Prints issues in SARIF format

    :param tool_name: tool name
    :param tool_args: Args used for the tool
    :param working_dir: Working directory
    :param metrics: metrics data
    :param skips: skips data
    :param issues: issues data
    :param crep_fname: The output file name
    :param file_path_list: Full file path for any manipulation

    :return serialized_log: SARIF output data
    """
    if not tool_args:
        tool_args = []
    tool_args_str = tool_args
    if isinstance(tool_args, list):
        tool_args_str = " ".join(tool_args)
    repo_details = find_repo_details(working_dir)
    log_uuid = str(uuid.uuid4())
    run_uuid = config.get("run_uuid")

    # working directory to use in the log
    WORKSPACE_PREFIX = config.get("WORKSPACE", None)
    wd_dir_log = WORKSPACE_PREFIX if WORKSPACE_PREFIX is not None else working_dir
    driver_name = config.tool_purpose_message.get(tool_name, tool_name)
    # Construct SARIF log
    log = om.SarifLog(
        schema_uri="https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json",
        version="2.1.0",
        inline_external_properties=[
            om.ExternalProperties(guid=log_uuid, run_guid=run_uuid)
        ],
        runs=[
            om.Run(
                automation_details=om.RunAutomationDetails(
                    guid=log_uuid,
                    description=om.Message(
                        text="Static Analysis Security Test results using @ShiftLeft/sast-scan"
                    ),
                ),
                tool=om.Tool(
                    driver=om.ToolComponent(
                        name=driver_name, full_name=driver_name, version="1.0.0-scan"
                    )
                ),
                invocations=[
                    om.Invocation(
                        end_time_utc=datetime.datetime.utcnow().strftime(TS_FORMAT),
                        execution_successful=True,
                        working_directory=om.ArtifactLocation(uri=to_uri(wd_dir_log)),
                    )
                ],
                conversion={
                    "tool": om.Tool(
                        driver=om.ToolComponent(name="@ShiftLeft/sast-scan")
                    ),
                    "invocation": om.Invocation(
                        execution_successful=True,
                        command_line=tool_args_str,
                        arguments=tool_args,
                        working_directory=om.ArtifactLocation(uri=to_uri(wd_dir_log)),
                        end_time_utc=datetime.datetime.utcnow().strftime(TS_FORMAT),
                    ),
                },
                version_control_provenance=[
                    om.VersionControlDetails(
                        repository_uri=repo_details["repositoryUri"],
                        branch=repo_details["branch"],
                        revision_id=repo_details["revisionId"],
                    )
                ],
            )
        ],
    )

    run = log.runs[0]
    invocation = run.invocations[0]

    add_skipped_file_notifications(skips, invocation)
    add_results(tool_name, issues, run, file_path_list, working_dir)

    serialized_log = to_json(log)

    if crep_fname:
        html_file = crep_fname.replace(".sarif", ".html")
        with io.open(crep_fname, "w") as fileobj:
            fileobj.write(serialized_log)
        if tool_name != "empty-scan":
            render_html(json.loads(serialized_log), html_file)
            if fileobj.name != sys.stdout.name:
                LOG.debug(
                    "SARIF and HTML report written to file: %s, %s :thumbsup:",
                    fileobj.name,
                    html_file,
                )
    return serialized_log
예제 #10
0
def report(
    tool_name,
    tool_args,
    working_dir,
    metrics,
    skips,
    issues,
    crep_fname,
    file_path_list=None,
):
    """Prints issues in SARIF format

    :param tool_name: tool name
    :param tool_args: Args used for the tool
    :param working_dir: Working directory
    :param metrics: metrics data
    :param skips: skips data
    :param issues: issues data
    :param crep_fname: The output file name
    :param file_path_list: Full file path for any manipulation

    :return serialized_log: SARIF output data
    """
    if not tool_args:
        tool_args = []
    tool_args_str = tool_args
    if isinstance(tool_args, list):
        tool_args_str = " ".join(tool_args)
    repo_details = find_repo_details(working_dir)
    log_uuid = str(uuid.uuid4())
    run_uuid = config.get("run_uuid")

    # Populate metrics
    metrics = {
        "total": 0,
        "critical": 0,
        "high": 0,
        "medium": 0,
        "low": 0,
    }

    total = 0
    for issue in issues:
        issue_dict = issue_from_dict(issue).as_dict()
        rule_id = issue_dict.get("test_id")
        # Is this rule ignored globally?
        if rule_id in config.ignored_rules:
            continue
        total += 1
        issue_severity = issue_dict["issue_severity"]
        # Fix up severity for certain tools
        issue_severity = tweak_severity(tool_name, issue_dict)
        key = issue_severity.lower()
        if not metrics.get(key):
            metrics[key] = 0
        metrics[key] += 1
    metrics["total"] = total
    # working directory to use in the log
    WORKSPACE_PREFIX = config.get("WORKSPACE", None)
    wd_dir_log = WORKSPACE_PREFIX if WORKSPACE_PREFIX is not None else working_dir
    driver_name = config.tool_purpose_message.get(tool_name, tool_name)
    if tool_name != "inspect" and config.get("CI") or config.get(
            "GITHUB_ACTIONS"):
        driver_name = "ShiftLeft " + driver_name
    # Construct SARIF log
    log = om.SarifLog(
        schema_uri=
        "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json",
        version="2.1.0",
        inline_external_properties=[
            om.ExternalProperties(guid=log_uuid, run_guid=run_uuid)
        ],
        runs=[
            om.Run(
                automation_details=om.RunAutomationDetails(
                    guid=log_uuid,
                    description=om.Message(
                        text=
                        "Static Analysis Security Test results using @ShiftLeft/sast-scan"
                    ),
                ),
                tool=om.Tool(driver=om.ToolComponent(name=driver_name)),
                invocations=[
                    om.Invocation(
                        end_time_utc=datetime.datetime.utcnow().strftime(
                            TS_FORMAT),
                        execution_successful=True,
                        working_directory=om.ArtifactLocation(
                            uri=to_uri(wd_dir_log)),
                    )
                ],
                conversion={
                    "tool":
                    om.Tool(driver=om.ToolComponent(
                        name="@ShiftLeft/sast-scan")),
                    "invocation":
                    om.Invocation(
                        execution_successful=True,
                        command_line=tool_args_str,
                        arguments=tool_args,
                        working_directory=om.ArtifactLocation(
                            uri=to_uri(wd_dir_log)),
                        end_time_utc=datetime.datetime.utcnow().strftime(
                            TS_FORMAT),
                    ),
                },
                properties={"metrics": metrics},
                version_control_provenance=[
                    om.VersionControlDetails(
                        repository_uri=repo_details["repositoryUri"],
                        branch=repo_details["branch"],
                        revision_id=repo_details["revisionId"],
                    )
                ],
            )
        ],
    )

    run = log.runs[0]
    invocation = run.invocations[0]

    add_skipped_file_notifications(skips, invocation)
    add_results(tool_name, issues, run, file_path_list, working_dir)

    serialized_log = to_json(log)

    if crep_fname:
        html_file = crep_fname.replace(".sarif", ".html")
        with io.open(crep_fname, "w") as fileobj:
            fileobj.write(serialized_log)
        render_html(json.loads(serialized_log), html_file)
        if fileobj.name != sys.stdout.name:
            LOG.debug(
                "SARIF and HTML report written to file: %s, %s 👍",
                fileobj.name,
                html_file,
            )
    return serialized_log
예제 #11
0
    def annotate_pr(self, repo_context, findings_file, report_summary,
                    build_status):
        if not findings_file:
            return
        with open(findings_file, mode="r") as fp:
            try:
                gitlab_context = self.get_context(repo_context)
                findings_obj = json.load(fp)
                findings = findings_obj.get("findings")
                if not findings:
                    LOG.debug("No findings from scan available to report")
                    return
                if not gitlab_context.get(
                        "mergeRequestIID") or not gitlab_context.get(
                            "mergeRequestProjectId"):
                    LOG.debug(
                        "Scan is not running as part of a merge request. Check if the pipeline is using only: [merge_requests] or rules syntax"
                    )
                    return
                private_token = self.get_token()
                if not private_token:
                    LOG.info(
                        "To create a merge request note, create a personal access token with api scope and set it as GITLAB_TOKEN environment variable"
                    )
                    return
                summary = "| Tool | Critical | High | Medium | Low | Status |\n"
                summary = (
                    summary +
                    "| ---- | ------- | ------ | ----- | ---- | ---- |\n")
                for rk, rv in report_summary.items():
                    summary = f'{summary}| {rv.get("tool")} | {rv.get("critical")} | {rv.get("high")} | {rv.get("medium")} | {rv.get("low")} | {rv.get("status")} |\n'
                template = config.get("PR_COMMENT_TEMPLATE")
                recommendation = (
                    f"Please review the [scan reports]({gitlab_context.get('jobUrl')}/artifacts/browse/reports) before approving this merge request."
                    if build_status == "fail" else "Looks good")
                apiUrl = (f"{gitlab_context.get('apiUrl')}")
                mergeRequestIID = (f"{gitlab_context.get('mergeRequestIID')}")
                mergeRequestProjectId = (
                    f"{gitlab_context.get('mergeRequestProjectId')}")
                mergeRequestSourceBranch = (
                    f"{gitlab_context.get('mergeRequestSourceBranch')}")
                mergeRequestTargetBranch = (
                    f"{gitlab_context.get('mergeRequestTargetBranch')}")
                commitSHA = (f"{gitlab_context.get('commitSHA')}")
                projectId = (f"{gitlab_context.get('projectId')}")
                projectName = (f"{gitlab_context.get('projectName')}")
                projectUrl = (f"{gitlab_context.get('projectUrl')}")
                jobUrl = (f"{gitlab_context.get('jobUrl')}")
                jobId = (f"{gitlab_context.get('jobId')}")
                jobName = (f"{gitlab_context.get('jobName')}")
                jobToken = (f"{gitlab_context.get('jobToken')}")

                body = template % dict(
                    summary=summary,
                    recommendation=recommendation,
                    apiUrl=apiUrl,
                    mergeRequestIID=mergeRequestIID,
                    mergeRequestProjectId=mergeRequestProjectId,
                    mergeRequestSourceBranch=mergeRequestSourceBranch,
                    mergeRequestTargetBranch=mergeRequestTargetBranch,
                    commitSHA=commitSHA,
                    projectId=projectId,
                    projectName=projectName,
                    projectUrl=projectUrl,
                    jobUrl=jobUrl,
                    jobId=jobId,
                    jobName=jobName,
                    jobToken=jobToken)
                rr = requests.post(
                    self.get_mr_notes_url(repo_context),
                    headers={
                        "Content-Type": "application/json",
                        "PRIVATE-TOKEN": self.get_token(),
                    },
                    json={"body": body},
                )
                if not rr.ok:
                    LOG.debug(rr.json())
            except Exception as e:
                LOG.debug(e)
예제 #12
0
def execute_default_cmd(
    cmd_map_list,
    type_str,
    tool_name,
    src,
    reports_dir,
    convert,
    scan_mode,
    repo_context,
):
    """
    Method to execute default command for the given type

    Args:
      cmd_map_list Default commands in the form of a dict (multiple) or list
      type_str Project type
      tool_name Tool name
      src Project dir
      reports_dir Directory for output reports
      convert Boolean to enable normalisation of reports json
      scan_mode Scan mode string
      repo_context Repo context
    """
    # Check if there is a default command specified for the given type
    # Create the reports dir
    os.makedirs(reports_dir, exist_ok=True)
    report_fname_prefix = os.path.join(reports_dir, tool_name + "-report")
    default_cmd = " ".join(cmd_map_list) % dict(
        src=src,
        reports_dir=reports_dir,
        report_fname_prefix=report_fname_prefix,
        type=type_str,
        scan_mode=scan_mode,
    )
    # Try to detect if the output could be json
    outext = ".out"
    if default_cmd.find("json") > -1:
        outext = ".json"
    if default_cmd.find("csv") > -1:
        outext = ".csv"
    if default_cmd.find("sarif") > -1:
        outext = ".sarif"
    report_fname = report_fname_prefix + outext

    # If the command doesn't support file output then redirect stdout automatically
    stdout = None
    if reports_dir and default_cmd.find(report_fname_prefix) == -1:
        report_fname = report_fname_prefix + outext
        stdout = io.open(report_fname, "w")
        LOG.debug("Output will be written to {}".format(report_fname))

    # If the command is requesting list of files then construct the argument
    filelist_prefix = "(filelist="
    if default_cmd.find(filelist_prefix) > -1:
        si = default_cmd.find(filelist_prefix)
        ei = default_cmd.find(")", si + 10)
        ext = default_cmd[si + 10 : ei]
        filelist = utils.find_files(src, ext)
        delim = " "
        default_cmd = default_cmd.replace(
            filelist_prefix + ext + ")", delim.join(filelist)
        )
    cmd_with_args = default_cmd.split(" ")
    exec_tool(cmd_with_args, cwd=src, stdout=stdout)
    # Should we attempt to convert the report to sarif format
    if (
        convert
        and config.tool_purpose_message.get(cmd_with_args[0])
        and os.path.isfile(report_fname)
    ):
        crep_fname = utils.get_report_file(
            tool_name, reports_dir, convert, ext_name="sarif"
        )
        convertLib.convert_file(
            cmd_with_args[0], cmd_with_args[1:], src, report_fname, crep_fname,
        )
        try:
            if not os.environ.get("SCAN_DEBUG_MODE") == "debug":
                os.remove(report_fname)
        except Exception:
            LOG.debug("Unable to remove file {}".format(report_fname))
    elif type_str == "depscan":
        # Convert depscan and license scan files to html
        depscan_files = utils.find_files(reports_dir, "depscan", True)
        for df in depscan_files:
            if not df.endswith(".html"):
                depscan_data = grafeas.parse(df)
                if depscan_data and len(depscan_data):
                    html_fname = df.replace(".json", ".html")
                    grafeas.render_html(depscan_data, html_fname)
                    track(
                        {"id": config.get("run_uuid"), "depscan_summary": depscan_data}
                    )
                    LOG.debug(
                        "Depscan and HTML report written to file: %s, %s 👍",
                        df,
                        html_fname,
                    )
        licence_files = utils.find_files(reports_dir, "license", True)
        for lf in licence_files:
            if not lf.endswith(".html"):
                licence_data = licence.parse(lf)
                if licence_data and len(licence_data):
                    html_fname = lf.replace(".json", ".html")
                    licence.render_html(licence_data, html_fname)
                    track(
                        {"id": config.get("run_uuid"), "license_summary": licence_data}
                    )
                    LOG.debug(
                        "License check and HTML report written to file: %s, %s 👍",
                        lf,
                        html_fname,
                    )
예제 #13
0
def deep_analysis(src, files):
    has_unsanitised_vulnerabilities = False
    cfg_list = list()
    insights = []
    framework_route_criteria = is_taintable_function
    for path in sorted(files, key=os.path.dirname, reverse=True):
        directory = os.path.dirname(path)
        project_modules = get_modules(directory, prepend_module_root=False)
        local_modules = get_directory_modules(directory)

        LOG.debug(f"Generating AST and CFG for {path}")
        try:
            tree = generate_ast(path)
            if not tree:
                continue
        except Exception as e:
            LOG.debug(e)
        try:
            violations = find_insights(tree, path)
            if violations:
                insights += violations
            cfg = make_cfg(
                tree,
                project_modules,
                local_modules,
                path,
                allow_local_directory_imports=True,
            )
            cfg_list.append(cfg)
        except Exception as e:
            LOG.debug(e)

    try:
        # Taint all possible entry points
        LOG.debug("Determining taints")
        FrameworkAdaptor(cfg_list, project_modules, local_modules,
                         framework_route_criteria)
        LOG.debug("Building constraints table")
        initialize_constraint_table(cfg_list)
        LOG.debug("About to begin deep analysis")
        analyse(cfg_list)
    except Exception as e:
        LOG.debug(e)
    LOG.debug("Finding vulnerabilities from the graph")
    vulnerabilities = find_vulnerabilities(
        cfg_list,
        default_blackbox_mapping_file,
        default_trigger_word_file,
    )
    if vulnerabilities:
        has_unsanitised_vulnerabilities = any(
            not isinstance(v, SanitisedVulnerability) for v in vulnerabilities)
    return vulnerabilities, insights, has_unsanitised_vulnerabilities
예제 #14
0
def convert_sarif(app_name, repo_context, sarif_files, findings_fname):
    """
    Method to convert sarif to findings json

    :param app_name: Application name
    :param sarif_file:
    :param findings_fname:
    :return:
    """
    finding_id = 1
    findings_list = []
    with open(findings_fname, mode="w") as out_file:
        for sf in sarif_files:
            with open(sf, mode="r") as report_file:
                report_data = json.loads(report_file.read())
                # skip this file if the data is empty
                if not report_data or not report_data.get("runs"):
                    continue
                # Iterate through all the runs
                for run in report_data["runs"]:
                    results = run.get("results")
                    if not results:
                        continue
                    rules = {
                        r["id"]: r
                        for r in run.get("tool", {}).get("driver", {}).get(
                            "rules") if r and r.get("id")
                    }
                    for result in results:
                        rule = rules.get(result.get("ruleId"))
                        if not rule:
                            continue
                        for location in result.get("locations"):
                            filename = location["physicalLocation"][
                                "artifactLocation"]["uri"]
                            lineno = location.get("physicalLocation",
                                                  {})["region"]["startLine"]
                            finding = {
                                "app":
                                app_name,
                                "type":
                                "extscan",
                                "title":
                                result.get("message", {}).get("text"),
                                "description":
                                rule.get("fullDescription", {}).get("text"),
                                "internal_id":
                                "{}/{}".format(
                                    result["ruleId"],
                                    utils.calculate_line_hash(
                                        filename,
                                        lineno,
                                        location.get(
                                            "physicalLocation",
                                            {})["region"]["snippet"]["text"],
                                    ),
                                ),
                                "severity":
                                convert_severity(
                                    result.get("properties",
                                               {})["issue_severity"]),
                                "owasp_category":
                                "",
                                "category":
                                result["ruleId"],
                                "details": {
                                    "repoContext":
                                    repo_context,
                                    "name":
                                    result.get("message", {})["text"],
                                    "tags":
                                    ",".join(rule["properties"]["tags"]),
                                    "fileName":
                                    filename,
                                    "DATA_TYPE":
                                    "OSS_SCAN",
                                    "lineNumber":
                                    lineno,
                                    "ruleId":
                                    result["ruleId"],
                                    "ruleName":
                                    rule.get("name"),
                                    "snippetText":
                                    location.get(
                                        "physicalLocation",
                                        {})["region"]["snippet"]["text"],
                                    "contextText":
                                    location.get("physicalLocation", {})
                                    ["contextRegion"]["snippet"]["text"],
                                },
                            }
                            findings_list.append(finding)
                            finding_id = finding_id + 1
        try:
            json.dump({"findings": findings_list}, out_file)
        except Exception:
            LOG.debug("Unable to convert the run to findings format")
예제 #15
0
def summary(sarif_files, aggregate_file=None, override_rules={}):
    """Generate overall scan summary based on the generated
    SARIF file

    :param sarif_files: List of generated sarif report files
    :param aggregate_file: Filename to store aggregate data
    :param override_rules Build break rules to override for testing
    :returns dict representing the summary
    """
    report_summary = {}
    build_status = "pass"
    # This is the list of all runs which will get stored as an aggregate
    run_data_list = []
    for sf in sarif_files:
        with open(sf, mode="r") as report_file:
            report_data = json.loads(report_file.read())
            # skip this file if the data is empty
            if not report_data or not report_data.get("runs"):
                LOG.warn("Report file {} is invalid. Skipping ...".format(sf))
                continue
            # Iterate through all the runs
            for run in report_data["runs"]:
                # Add it to the run data list for aggregation
                run_data_list.append(run)
                tool_desc = run["tool"]["driver"]["name"]
                tool_name = tool_desc
                # Initialise
                report_summary[tool_name] = {
                    "tool": tool_desc,
                    "critical": 0,
                    "high": 0,
                    "medium": 0,
                    "low": 0,
                    "status": "✅",
                }
                results = run.get("results", [])
                metrics = run.get("properties", {}).get("metrics", None)
                # If the result includes metrics use it. If not compute it
                if metrics:
                    report_summary[tool_name].update(metrics)
                    report_summary[tool_name].pop("total", None)
                else:
                    for aresult in results:
                        sev = aresult["properties"]["issue_severity"].lower()
                        report_summary[tool_name][sev] += 1
                # Compare against the build break rule to determine status
                default_rules = config.get("build_break_rules").get("default")
                tool_rules = config.get("build_break_rules").get(tool_name, {})
                build_break_rules = {
                    **default_rules,
                    **tool_rules,
                    **override_rules
                }
                for rsev in ["critical", "high", "medium", "low"]:
                    if build_break_rules.get("max_" + rsev) is not None:
                        if (report_summary.get(tool_name).get(rsev) >
                                build_break_rules["max_" + rsev]):
                            report_summary[tool_name]["status"] = "❌"
                            build_status = "fail"
    # Should we store the aggregate data
    if aggregate_file:
        # agg_sarif_file = aggregate_file.replace(".json", ".sarif")
        # aggregate.sarif_aggregate(run_data_list, agg_sarif_file)
        aggregate.jsonl_aggregate(run_data_list, aggregate_file)
        LOG.debug("Aggregate report written to {}\n".format(aggregate_file))
    return report_summary, build_status
예제 #16
0
def extract_from_file(tool_name,
                      working_dir,
                      report_file,
                      file_path_list=None):
    """Extract properties from reports

    :param tool_name: tool name
    :param working_dir: Working directory
    :param report_file: Report file
    :param file_path_list: Full file path for any manipulation

    :return issues, metrics, skips information
    """
    issues = []
    metrics = None
    skips = []
    # If the tools did not produce any result do not crash
    if not os.path.isfile(report_file):
        return issues, metrics, skips
    extn = pathlib.PurePosixPath(report_file).suffix

    with io.open(report_file, "r") as rfile:
        # Static check use jsonlines format, duh
        if tool_name == "staticcheck":
            contents = rfile.read()
            try:
                issues = [
                    json.loads(str(item))
                    for item in contents.strip().split("\n")
                ]
            except json.decoder.JSONDecodeError:
                LOG.warning(
                    "staticcheck produced no result since the project was not built before analysis!"
                )
            return issues, metrics, skips
        if extn == ".json":
            try:
                report_data = json.loads(rfile.read())
            except json.decoder.JSONDecodeError:
                return issues, metrics, skips
            # Inspect uses vulnerabilities
            if tool_name == "inspect":
                file_name_prefix = ""
                for v in report_data.get("vulnerabilities"):
                    if not v:
                        continue
                    vuln = v["vulnerability"]
                    location = {}
                    if vuln.get("dataFlow") and vuln.get("dataFlow").get(
                            "dataFlow"):
                        for l in vuln["dataFlow"]["dataFlow"]["list"]:
                            if not is_generic_package(
                                    l["location"].get("fileName")):
                                location = l["location"]
                                break
                    fileName = location.get("fileName")
                    if fileName == "N/A":
                        continue
                    if not file_name_prefix:
                        file_name_prefix = find_path_prefix(
                            working_dir, fileName)
                    issues.append({
                        "rule_id":
                        vuln["category"],
                        "title":
                        vuln["title"],
                        "description":
                        vuln["description"],
                        "score":
                        vuln["score"],
                        "severity":
                        vuln["severity"],
                        "line_number":
                        location.get("lineNumber"),
                        "filename":
                        os.path.join(file_name_prefix, fileName),
                        "first_found":
                        vuln["firstVersionDetected"],
                        "issue_confidence":
                        "HIGH",
                    })
            elif isinstance(report_data, list):
                issues = report_data
            else:
                if tool_name == "checkov":
                    issues = report_data.get("results",
                                             {}).get("failed_checks")
                elif "sec_issues" in report_data:
                    # NodeJsScan uses sec_issues
                    sec_data = report_data["sec_issues"]
                    for key, value in sec_data.items():
                        if isinstance(value, list):
                            issues = issues + value
                        else:
                            issues.append(value)
                elif "Issues" in report_data:
                    tmpL = report_data.get("Issues", [])
                    if tmpL:
                        issues += tmpL
                    else:
                        LOG.debug("%s produced no result" % tool_name)
                elif "results" in report_data:
                    tmpL = report_data.get("results", [])
                    if tmpL:
                        issues += tmpL
                    else:
                        LOG.debug("%s produced no result" % tool_name)
        if extn == ".csv":
            headers, issues = csv_parser.get_report_data(rfile)
        if extn == ".xml":
            issues, metrics = xml_parser.get_report_data(rfile, file_path_list)
    return issues, metrics, skips
예제 #17
0
def extract_from_file(
    tool_name, tool_args, working_dir, report_file, file_path_list=None
):
    """Extract properties from reports

    :param tool_name: tool name
    :param tool_args: tool args
    :param working_dir: Working directory
    :param report_file: Report file
    :param file_path_list: Full file path for any manipulation

    :return issues, metrics, skips information
    """
    issues = []
    metrics = None
    skips = []
    # If the tools did not produce any result do not crash
    if not os.path.isfile(report_file):
        return issues, metrics, skips
    extn = pathlib.PurePosixPath(report_file).suffix

    with io.open(report_file, "r") as rfile:
        # Static check use jsonlines format, duh
        if tool_name == "staticcheck":
            contents = rfile.read()
            try:
                issues = [
                    json.loads(str(item)) for item in contents.strip().split("\n")
                ]
            except json.decoder.JSONDecodeError:
                LOG.warning(
                    "staticcheck produced no result since the project was not built before analysis!"
                )
            return issues, metrics, skips
        if extn == ".json":
            try:
                report_data = json.loads(rfile.read())
            except json.decoder.JSONDecodeError:
                return issues, metrics, skips
            # NG SAST (Formerly Inspect) uses vulnerabilities
            if tool_name == "ng-sast":
                for v in report_data.get("vulnerabilities"):
                    if not v:
                        continue
                    vuln = v["vulnerability"]
                    location_list = []
                    if vuln.get("dataFlow") and vuln.get("dataFlow", {}).get(
                        "dataFlow"
                    ):
                        location_list = convert_dataflow(
                            working_dir, tool_args, vuln["dataFlow"]["dataFlow"]["list"]
                        )
                    for location in location_list:
                        issues.append(
                            {
                                "rule_id": vuln["category"],
                                "title": vuln["title"],
                                "description": vuln["description"],
                                "score": vuln["score"],
                                "severity": vuln["severity"],
                                "line_number": location.get("line_number"),
                                "filename": location.get("filename"),
                                "first_found": vuln["firstVersionDetected"],
                                "issue_confidence": "HIGH",
                            }
                        )
            elif tool_name == "taint-php":
                for entry in report_data:
                    taint_trace = entry.get("taint_trace")
                    labels = []
                    if taint_trace:
                        source, sink, labels = get_from_taints(taint_trace)
                    else:
                        source, _, _ = get_from_taints([entry])
                    issues.append(
                        {
                            "rule_id": entry.get("shortcode"),
                            "test_name": entry.get("type"),
                            "description": "{}: {}".format(
                                entry.get("message"), "\\n".join(labels)
                            ),
                            "link": entry.get("link"),
                            "severity": entry.get("severity"),
                            "issue_confidence": "HIGH",
                            "line_number": source.get("line_number"),
                            "filename": source.get("filename"),
                        }
                    )
            elif tool_name == "taint-python":
                taint_list = report_data.get("vulnerabilities")
                for taint in taint_list:
                    source = taint.get("source")
                    sink = taint.get("sink")
                    tags = {}
                    for taint_props in [
                        "source_trigger_word",
                        "source_label",
                        "source_type",
                        "sink_trigger_word",
                        "sink_label",
                        "sink_type",
                    ]:
                        if taint.get(taint_props):
                            tags[taint_props] = taint.get(taint_props)
                    issues.append(
                        {
                            "rule_id": taint.get("rule_id"),
                            "test_name": taint.get("rule_name"),
                            "short_description": taint.get("short_description"),
                            "cwe_category": taint.get("cwe_category"),
                            "owasp_category": taint.get("owasp_category"),
                            "description": taint.get("description"),
                            "severity": taint.get("severity"),
                            "issue_confidence": "HIGH",
                            "line_from": source.get("line_number"),
                            "line_to": sink.get("line_number"),
                            "filename": source.get("path"),
                            "tags": tags,
                        }
                    )
            elif tool_name == "phpstan" or tool_name == "source-php":
                file_errors = report_data.get("files")
                for filename, messageobj in file_errors.items():
                    messages = messageobj.get("messages")
                    for msg in messages:
                        # Create a rule id for phpstan
                        rule_word = msg.get("message", "").split(" ")[0]
                        rule_word = "phpstan-" + rule_word.lower()
                        issues.append(
                            {
                                "rule_id": rule_word,
                                "title": msg.get("message"),
                                "line_number": msg.get("line"),
                                "filename": filename,
                                "severity": "LOW",
                                "issue_confidence": "MEDIUM",
                            }
                        )
            elif tool_name == "source-js":
                njs_findings = report_data.get("nodejs", {})
                njs_findings.update(report_data.get("templates", {}))
                for k, v in njs_findings.items():
                    # Password detection by njsscan is full of false positives
                    if k == "node_password":
                        continue
                    files = v.get("files", [])
                    metadata = v.get("metadata", {})
                    if not files or not metadata:
                        continue
                    for afile in files:
                        line_number = 0
                        if afile.get("match_lines"):
                            line_number = afile.get("match_lines")[0]
                        issues.append(
                            {
                                "rule_id": metadata.get("owasp")
                                .replace(":", "-")
                                .replace(" ", "")
                                .lower(),
                                "title": metadata.get("cwe"),
                                "description": metadata.get("description"),
                                "severity": metadata.get("severity"),
                                "line_number": line_number,
                                "filename": afile.get("file_path"),
                                "issue_confidence": "HIGH",
                            }
                        )
            elif tool_name == "checkov":
                if isinstance(report_data, list):
                    for rd in report_data:
                        issues += rd.get("results", {}).get("failed_checks")
                else:
                    issues = report_data.get("results", {}).get("failed_checks")
            elif tool_name == "source-ruby":
                issues = report_data.get("warnings", [])
                issues += report_data.get("errors", [])
            elif isinstance(report_data, list):
                issues = report_data
            else:
                if "sec_issues" in report_data:
                    # NodeJsScan uses sec_issues
                    sec_data = report_data["sec_issues"]
                    for key, value in sec_data.items():
                        if isinstance(value, list):
                            issues = issues + value
                        else:
                            issues.append(value)
                elif "Issues" in report_data:
                    tmpL = report_data.get("Issues", [])
                    if tmpL:
                        issues += tmpL
                    else:
                        LOG.debug("%s produced no result" % tool_name)
                elif "results" in report_data:
                    tmpL = report_data.get("results", [])
                    if tmpL:
                        issues += tmpL
                    else:
                        LOG.debug("%s produced no result" % tool_name)
        if extn == ".csv":
            headers, issues = csv_parser.get_report_data(rfile)
        if extn == ".xml":
            issues, metrics = xml_parser.get_report_data(
                rfile, file_path_list=file_path_list, working_dir=working_dir
            )
    return issues, metrics, skips
예제 #18
0
def summary(sarif_files,
            depscan_files=None,
            aggregate_file=None,
            override_rules={}):
    """Generate overall scan summary based on the generated
    SARIF file

    :param sarif_files: List of generated sarif report files
    :param aggregate_file: Filename to store aggregate data
    :param override_rules Build break rules to override for testing
    :returns dict representing the summary
    """
    report_summary = {}
    build_status = "pass"
    # This is the list of all runs which will get stored as an aggregate
    run_data_list = []
    default_rules = config.get("build_break_rules").get("default")
    depscan_default_rules = config.get("build_break_rules").get("depscan")
    # Collect stats from depscan files if available
    if depscan_files:
        for df in depscan_files:
            with open(df, mode="r") as drep_file:
                dep_data = get_depscan_data(drep_file)
                if not dep_data:
                    continue
                # depscan-java or depscan-nodejs based on filename
                dep_type = (os.path.basename(df).replace(".json", "").replace(
                    "-report", ""))
                metrics, required_pkgs_found = calculate_depscan_metrics(
                    dep_data)
                report_summary[dep_type] = {
                    "tool":
                    f"""Dependency Scan ({dep_type.replace("depscan-", "")})""",
                    "critical": metrics["critical"],
                    "high": metrics["high"],
                    "medium": metrics["medium"],
                    "low": metrics["low"],
                    "status": ":white_heavy_check_mark:",
                }
                report_summary[dep_type].pop("total", None)
                # Compare against the build break rule to determine status
                dep_tool_rules = config.get("build_break_rules").get(
                    dep_type, {})
                build_break_rules = {**depscan_default_rules, **dep_tool_rules}
                if override_rules and override_rules.get("depscan"):
                    build_break_rules = {
                        **build_break_rules,
                        **override_rules.get("depscan"),
                    }
                # Default severity categories for build status
                build_status_categories = (
                    "critical",
                    "required_critical",
                    "optional_critical",
                    "high",
                    "required_high",
                    "optional_high",
                    "medium",
                    "required_medium",
                    "optional_medium",
                    "low",
                    "required_low",
                    "optional_low",
                )
                # Issue 233 - Consider only required packages if available
                if required_pkgs_found:
                    build_status_categories = (
                        "required_critical",
                        "required_high",
                        "required_medium",
                        "required_low",
                    )
                for rsev in build_status_categories:
                    if build_break_rules.get("max_" + rsev) is not None:
                        if metrics.get(rsev) > build_break_rules["max_" +
                                                                 rsev]:
                            report_summary[dep_type]["status"] = ":cross_mark:"
                            build_status = "fail"

    for sf in sarif_files:
        with open(sf, mode="r") as report_file:
            report_data = json.load(report_file)
            # skip this file if the data is empty
            if not report_data or not report_data.get("runs"):
                LOG.warn("Report file {} is invalid. Skipping ...".format(sf))
                continue
            # Iterate through all the runs
            for run in report_data["runs"]:
                # Add it to the run data list for aggregation
                run_data_list.append(run)
                tool_desc = run["tool"]["driver"]["name"]
                tool_name = tool_desc
                # Initialise
                report_summary[tool_name] = {
                    "tool": tool_desc,
                    "critical": 0,
                    "high": 0,
                    "medium": 0,
                    "low": 0,
                    "status": ":white_heavy_check_mark:",
                }
                results = run.get("results", [])
                metrics = run.get("properties", {}).get("metrics", None)
                # If the result includes metrics use it. If not compute it
                if metrics:
                    report_summary[tool_name].update(metrics)
                    report_summary[tool_name].pop("total", None)
                else:
                    for aresult in results:
                        sev = aresult["properties"]["issue_severity"].lower()
                        report_summary[tool_name][sev] += 1
                # Compare against the build break rule to determine status
                tool_rules = config.get("build_break_rules").get(tool_name, {})
                build_break_rules = {
                    **default_rules,
                    **tool_rules,
                    **override_rules
                }
                for rsev in ("critical", "high", "medium", "low"):
                    if build_break_rules.get("max_" + rsev) is not None:
                        if (report_summary.get(tool_name).get(rsev) >
                                build_break_rules["max_" + rsev]):
                            report_summary[tool_name][
                                "status"] = ":cross_mark:"
                            build_status = "fail"

    # Should we store the aggregate data
    if aggregate_file:
        # agg_sarif_file = aggregate_file.replace(".json", ".sarif")
        # aggregate.sarif_aggregate(run_data_list, agg_sarif_file)
        aggregate.jsonl_aggregate(run_data_list, aggregate_file)
        LOG.debug("Aggregate report written to {}\n".format(aggregate_file))
    return report_summary, build_status
예제 #19
0
def inspect_scan(language, src, reports_dir, convert, repo_context):
    """
    Method to perform inspect cloud scan

    Args:
      language Project language
      src Project dir
      reports_dir Directory for output reports
      convert Boolean to enable normalisation of reports json
      repo_context Repo context
    """
    run_uuid = config.get("run_uuid")
    cpg_mode = config.get("SHIFTLEFT_CPG")
    env = os.environ.copy()
    env["SCAN_JAVA_HOME"] = os.environ.get("SCAN_JAVA_8_HOME")
    report_fname = utils.get_report_file(
        "ng-sast", reports_dir, convert, ext_name="json"
    )
    sl_cmd = config.get("SHIFTLEFT_NGSAST_CMD")
    # Check if sl cli is available
    if not utils.check_command(sl_cmd):
        LOG.warning(
            "sl cli is not available. Please check if your build uses shiftleft/scan-java as the image"
        )
        return
    analyze_files = config.get("SHIFTLEFT_ANALYZE_FILE")
    analyze_target_dir = config.get(
        "SHIFTLEFT_ANALYZE_DIR", os.path.join(src, "target")
    )
    extra_args = None
    if not analyze_files:
        if language == "java":
            analyze_files = utils.find_java_artifacts(analyze_target_dir)
        elif language == "csharp":
            if not utils.check_dotnet():
                LOG.warning(
                    "dotnet is not available. Please check if your build uses shiftleft/scan-csharp as the image"
                )
                return
            analyze_files = utils.find_csharp_artifacts(src)
            cpg_mode = True
        else:
            if language == "ts" or language == "nodejs":
                language = "js"
                extra_args = ["--", "--ts", "--babel"]
            analyze_files = [src]
            cpg_mode = True
    app_name = find_app_name(src, repo_context)
    branch = repo_context.get("revisionId")
    if not branch:
        branch = "master"
    if not analyze_files:
        LOG.warning(
            "Unable to find any build artifacts. Compile your project first before invoking scan or use the auto build feature."
        )
        return
    if isinstance(analyze_files, list) and len(analyze_files) > 1:
        LOG.warning(
            "Multiple files found in {}. Only {} will be analyzed".format(
                analyze_target_dir, analyze_files[0]
            )
        )
        analyze_files = analyze_files[0]
    sl_args = [
        sl_cmd,
        "analyze",
        "--no-auto-update" if language == "java" else None,
        "--wait",
        "--cpg" if cpg_mode else None,
        "--" + language,
        "--tag",
        "branch=" + branch,
        "--app",
        app_name,
    ]
    sl_args += [analyze_files]
    if extra_args:
        sl_args += extra_args
    sl_args = [arg for arg in sl_args if arg is not None]
    LOG.info(
        "About to perform ShiftLeft NG SAST cloud analysis. This might take a few minutes ..."
    )
    LOG.debug(" ".join(sl_args))
    LOG.debug(repo_context)
    cp = exec_tool("NG SAST", sl_args, src, env=env)
    if cp.returncode != 0:
        LOG.warning("NG SAST cloud analyze has failed with the below logs")
        LOG.debug(sl_args)
        LOG.info(cp.stderr)
        return
    findings_data = fetch_findings(app_name, branch, report_fname)
    if findings_data and convert:
        crep_fname = utils.get_report_file(
            "ng-sast", reports_dir, convert, ext_name="sarif"
        )
        convertLib.convert_file("ng-sast", sl_args[1:], src, report_fname, crep_fname)
    track({"id": run_uuid, "scan_mode": "ng-sast", "sl_args": sl_args})
예제 #20
0
    def annotate_pr(self, repo_context, findings_file, report_summary,
                    build_status):
        if not findings_file:
            return
        with open(findings_file, mode="r") as fp:
            try:
                findings_obj = json.load(fp)
                findings = findings_obj.get("findings")
                if not findings:
                    LOG.debug("No findings from scan available to report")
                    return
                context = self.get_context(repo_context)
                # Leave a comment on the pull request
                if context.get("prID") and context.get("bitbucketToken"):
                    summary = "| Tool | Critical | High | Medium | Low | Status |\n"
                    summary = (
                        summary +
                        "| ---- | ------- | ------ | ----- | ---- | ---- |\n")
                    for rk, rv in report_summary.items():
                        status_emoji = self.to_emoji(rv.get("status"))
                        summary = f'{summary}| {rv.get("tool")} | {rv.get("critical")} | {rv.get("high")} | {rv.get("medium")} | {rv.get("low")} | {status_emoji} |\n'
                    template = config.get("PR_COMMENT_BASIC_TEMPLATE")
                    recommendation = (
                        f"Please review the scan reports before approving this pull request for {context.get('prTargetBranch')} branch"
                        if build_status == "fail" else "Looks good")
                    repoOwner = f"{context.get('BITBUCKET_REPO_OWNER')}"
                    repoFullname = f"{context.get('BITBUCKET_REPO_FULL_NAME')}"
                    repoWorkspace = f"{context.get('BITBUCKET_WORKSPACE')}"
                    repoUUID = f"{context.get('BITBUCKET_REPO_UUID')}"
                    prID = f"{context.get('BITBUCKET_PR_ID')}"
                    prTargetBranch = f"{context.get('BITBUCKET_PR_DESTINATION_BRANCH')}"
                    bitbucketToken = f"{context.get('BITBUCKET_TOKEN')}"
                    commitSHA = f"{context.get('BITBUCKET_COMMIT')}"
                    repoId = f"{context.get('BITBUCKET_REPO_UUID')}"
                    projectUrl = f"{context.get('BITBUCKET_REPO_SLUG')}"
                    jobId = f"{context.get('BITBUCKET_BUILD_NUMBER')}"

                    body = template % dict(
                        summary=summary,
                        recommendation=recommendation,
                        repoOwner=repoOwner,
                        repoFullname=repoFullname,
                        repoWorkspace=repoWorkspace,
                        repoUUID=repoUUID,
                        prID=prID,
                        prTargetBranch=prTargetBranch,
                        bitbucketToken=bitbucketToken,
                        commitSHA=commitSHA,
                        repoId=repoId,
                        projectUrl=projectUrl,
                        jobId=jobId,
                    )
                    rc = requests.post(
                        self.get_pr_comments_url(repo_context),
                        auth=(
                            context.get("repoWorkspace"),
                            context.get("bitbucketToken"),
                        ),
                        headers={"Content-Type": "application/json"},
                        json={"content": {
                            "raw": body
                        }},
                    )
                    if not rc.ok:
                        LOG.debug(rc.json())
                else:
                    LOG.debug(
                        "Either build is not part of a PR or variable BITBUCKET_TOKEN was not set with Pull Request write permission"
                    )
                total_count = len(findings)
                data_list = [
                    {
                        "title": "Safe to merge?",
                        "type": "BOOLEAN",
                        "value": build_status != "fail",
                    },
                ]
                for rk, rv in report_summary.items():
                    data_list.append({
                        "title": rv.get("tool"),
                        "type": "TEXT",
                        "value": rv.get("status"),
                    })
                scan_id = config.get("run_uuid", "001")
                # Create a PR report based on the total findings
                rr = requests.put(
                    f"{self.get_reports_url(repo_context)}-{scan_id}",
                    proxies=proxies,
                    headers={"Content-Type": "application/json"},
                    json={
                        "title": "Scan",
                        "details":
                        f"This pull request contains {total_count} issues",
                        "report_type": "SECURITY",
                        "reporter":
                        f"Scan report for {repo_context.get('repositoryName')}",
                        "link": "https://slscan.io",
                        "logo_url":
                        "https://www.shiftleft.io/static/images/ShiftLeft_logo_white.svg",
                        "result":
                        "FAILED" if build_status == "fail" else "PASSED",
                        "data": data_list,
                    },
                )
                if rr.ok:
                    for f in findings:
                        finternal = f.get("internal_id")
                        tmpA = finternal.split("/")
                        title = tmpA[0]
                        occurrenceHash = tmpA[-1]
                        annotation_url = f"{self.get_reports_url(repo_context)}-{scan_id}/annotations/scan-{occurrenceHash}"
                        fileName = ""
                        lineNumber = None
                        if f.get("details"):
                            fileName = f.get("details", {}).get("fileName")
                            lineNumber = f.get("details", {}).get("lineNumber")
                        workspace = utils.get_workspace(repo_context)
                        # Remove the workspace
                        if workspace:
                            workspace = workspace + "/"
                            fileName = fileName.replace(workspace, "")
                        # Cleanup title and description
                        title = f.get("title")
                        description = f.get("description")
                        if len(title) > len(description) and "\n" in title:
                            description = f.get("title")
                        if "\n" in title:
                            title = title.split("\n")[0]
                        annotation = {
                            "title": "Scan Report",
                            "annotation_type": "VULNERABILITY",
                            "summary": title,
                            "details": description,
                            "severity":
                            self.convert_severity(f.get("severity")),
                            "path": fileName,
                            "line": lineNumber,
                        }
                        ar = requests.put(
                            annotation_url,
                            proxies=proxies,
                            headers={"Content-Type": "application/json"},
                            json=annotation,
                        )
                        if not ar.ok:
                            break
                else:
                    LOG.debug(rr.json())
            except Exception as e:
                LOG.debug(e)
예제 #21
0
def fetch_findings(app_name, version, report_fname):
    """
    Fetch findings from the NG SAST Cloud
    """
    sl_org = config.get("SHIFTLEFT_ORG_ID", config.get("SHIFTLEFT_ORGANIZATION_ID"))
    sl_org_token = config.get(
        "SHIFTLEFT_ORG_TOKEN", config.get("SHIFTLEFT_ORGANIZATION_TOKEN")
    )
    if not sl_org_token:
        sl_org_token = config.get("SHIFTLEFT_API_TOKEN")
    findings_api = config.get("SHIFTLEFT_VULN_API")
    findings_list = []
    if sl_org and sl_org_token:
        findings_api = findings_api % dict(
            sl_org=sl_org, app_name=app_name, version=version
        )
        query_obj = {
            "query": {
                "returnRuntimeData": False,
                "orderByDirection": "VULNERABILITY_ORDER_DIRECTION_DESC",
            }
        }
        headers = {
            "Content-Type": "application/json",
            "Authorization": "Bearer " + sl_org_token,
        }
        try:
            r = requests.post(findings_api, headers=headers, json=query_obj)
            if r.status_code == 200:
                findings_data = r.json()
                if findings_data:
                    findings_list += findings_data.get("vulnerabilities", [])
                    nextPageBookmark = findings_data.get("nextPageBookmark")
                    # Recurse and fetch all pages
                    while nextPageBookmark:
                        LOG.debug("Retrieving findings from next page")
                        r = requests.post(
                            findings_api,
                            headers=headers,
                            json={"pageBookmark": nextPageBookmark},
                        )
                        if r.status_code == 200:
                            findings_data = r.json()
                            if findings_data:
                                findings_list += findings_data.get(
                                    "vulnerabilities", []
                                )
                                nextPageBookmark = findings_data.get("nextPageBookmark")
                            else:
                                nextPageBookmark = None
                    with open(report_fname, mode="w") as rp:
                        json.dump({"vulnerabilities": findings_list}, rp)
                        LOG.debug(
                            "Data written to {}, {}".format(
                                report_fname, len(findings_list)
                            )
                        )
                return findings_list
            else:
                if not findings_list:
                    LOG.warning(
                        "Unable to retrieve any findings from NG SAST Cloud. Status {}".format(
                            r.status_code
                        )
                    )
                else:
                    LOG.debug(
                        "Unable to retrieve some findings from NG SAST Cloud. Proceeding with partial list. Status {}".format(
                            r.status_code
                        )
                    )
                return findings_list
        except Exception as e:
            LOG.error(e)
    else:
        return findings_list
예제 #22
0
    parser.add_option("-g", "--ga-conf", dest="ga_conf",
                      action="store", help="Configuration file of the" \
                      "genetic algorithm")

    parser.add_option("-e", "--es-conf", dest="es_conf", action="store",
                      help="Configuration file of the evolutionary strategy")

    opts = parser.parse_args()[0]

    try:
        if len(sys.argv) < 2:
            raise OptsError("Missing arguments")

        if opts.ga_conf:
            algorithm_name = 'GA behavior'
            LOG.info("Starting GA")
            pop_size, term, ad_mut_stp, mu_lambda = read_algorithm_config(opts.ga_conf)
            search_ga(int(term), int(pop_size), ast.literal_eval(ad_mut_stp),
                      ast.literal_eval((mu_lambda)))
            plot_data(algorithm_name, LOG_NAME)
            LOG.info("Finish GA")

        if opts.es_conf:
            algorithm_name = 'ES behavior'
            LOG.info("Starting ES")
            pop_range, term, ad_mut_stp, mu_lambda = read_algorithm_config(opts.es_conf)
            search_es(int(term), int(pop_range), ast.literal_eval(ad_mut_stp),
                      ast.literal_eval((mu_lambda)))
            plot_data(algorithm_name, LOG_NAME)
            LOG.info("Finish ES")