Python LOG.debug Examples, lib.logger.LOG.debug Python Examples

Example #1

0

Show file

def java_build(src, reports_dir, lang_tools):
    """
    Automatically build java project

    :param src: Source directory
    :param reports_dir: Reports directory to store any logs
    :param lang_tools: Language specific build tools

    :return: boolean status from the build. True if the command executed successfully. False otherwise
    """
    cmd_args = []
    pom_files = [p.as_posix() for p in Path(src).glob("pom.xml")]
    env = os.environ.copy()
    if os.environ.get("USE_JAVA_8") or os.environ.get("WITH_JAVA_8"):
        env["SCAN_JAVA_HOME"] = os.environ.get("SCAN_JAVA_8_HOME")
    else:
        env["SCAN_JAVA_HOME"] = os.environ.get("SCAN_JAVA_11_HOME")
    if pom_files:
        cmd_args = lang_tools.get("maven")
    else:
        gradle_files = [p.as_posix() for p in Path(src).glob("build.gradle")]
        if gradle_files:
            cmd_args = lang_tools.get("gradle")
    if not cmd_args:
        LOG.info(
            "Java auto build is supported only for maven or gradle based projects"
        )
        return False
    cp = exec_tool(cmd_args, src, env=env, stdout=subprocess.PIPE)
    LOG.debug(cp.stdout)
    return cp.returncode == 0

Example #2

0

Show file

def kotlin_build(src, reports_dir, lang_tools):
    """
    Automatically build kotlin project

    :param src: Source directory
    :param reports_dir: Reports directory to store any logs
    :param lang_tools: Language specific build tools

    :return: boolean status from the build. True if the command executed successfully. False otherwise
    """
    # Check if this is a android kotlin project
    gradle_kts_files = [p.as_posix() for p in Path(src).rglob("build.gradle.kts")]
    if find_files(src, "proguard-rules.pro", False, True) or find_files(
        src, "AndroidManifest.xml", False, True
    ):
        return android_build(src, reports_dir, lang_tools)
    if gradle_kts_files:
        cmd_args = get_gradle_cmd(src, lang_tools.get("gradle"))
        cp = exec_tool(
            "auto-build", cmd_args, src, env=get_env(), stdout=subprocess.PIPE
        )
        if cp:
            LOG.debug(cp.stdout)
            return cp.returncode == 0
    else:
        return java_build(src, reports_dir, lang_tools)

Example #3

0

Show file

def java_build(src, reports_dir, lang_tools):
    """
    Automatically build java project

    :param src: Source directory
    :param reports_dir: Reports directory to store any logs
    :param lang_tools: Language specific build tools

    :return: boolean status from the build. True if the command executed successfully. False otherwise
    """
    cmd_args = []
    pom_files = [p.as_posix() for p in Path(src).rglob("pom.xml")]
    gradle_files = [p.as_posix() for p in Path(src).rglob("build.gradle")]
    sbt_files = [p.as_posix() for p in Path(src).rglob("build.sbt")]
    env = get_env()
    if pom_files:
        cmd_args = lang_tools.get("maven")
    elif gradle_files:
        cmd_args = get_gradle_cmd(src, lang_tools.get("gradle"))
    elif sbt_files:
        cmd_args = lang_tools.get("sbt")
    if not cmd_args:
        LOG.info(
            "Java auto build is supported only for maven or gradle based projects"
        )
        return False
    cp = exec_tool("auto-build",
                   cmd_args,
                   src,
                   env=env,
                   stdout=subprocess.PIPE)
    if cp:
        LOG.debug(cp.stdout)
        return cp.returncode == 0
    return False

Example #4

0

Show file

File: executor.py Project: takirala/sast-scan

def exec_tool(args, cwd=None, env=os.environ.copy(), stdout=subprocess.DEVNULL):
    """
    Convenience method to invoke cli tools

    Args:
      args cli command and args
      cwd Current working directory
      env Environment variables
      stdout stdout configuration for run command

    Returns:
      CompletedProcess instance
    """
    try:
        env = use_java(env)
        LOG.info("=" * 80)
        LOG.debug('⚡︎ Executing "{}"'.format(" ".join(args)))
        cp = subprocess.run(
            args,
            stdout=stdout,
            stderr=subprocess.STDOUT,
            cwd=cwd,
            env=env,
            check=False,
            shell=False,
            encoding="utf-8",
        )
        return cp
    except Exception as e:
        LOG.error(e)
        return None

Example #5

0

Show file

File: builder.py Project: takirala/sast-scan

def nodejs_build(src, reports_dir, lang_tools):
    """
    Automatically build nodejs project

    :param src: Source directory
    :param reports_dir: Reports directory to store any logs
    :param lang_tools: Language specific build tools

    :return: boolean status from the build. True if the command executed successfully. False otherwise
    """
    cmd_args = lang_tools.get("npm")
    yarn_mode = False
    pjson_files = [p.as_posix() for p in Path(src).glob("package.json")]
    ylock_files = [p.as_posix() for p in Path(src).glob("yarn.lock")]
    if ylock_files:
        cmd_args = lang_tools.get("yarn")
        yarn_mode = True
    elif not pjson_files:
        LOG.info(
            "Nodejs auto build is supported only for npm or yarn based projects"
        )
        return False
    cp = exec_tool(cmd_args, src)
    LOG.debug(cp.stdout)
    ret = cp.returncode == 0
    try:
        cmd_args = ["npm"]
        if yarn_mode:
            cmd_args = ["yarn"]
        cmd_args += ["run", "build"]
        exec_tool(cmd_args, src)
    except Exception:
        LOG.debug("Automatic build has failed for the node.js project")
    return ret

Example #6

0

Show file

def android_build(src, reports_dir, lang_tools):
    """
    Automatically build android project

    :param src: Source directory
    :param reports_dir: Reports directory to store any logs
    :param lang_tools: Language specific build tools

    :return: boolean status from the build. True if the command executed successfully. False otherwise
    """
    if not os.getenv("ANDROID_SDK_ROOT") and not os.getenv("ANDROID_HOME"):
        LOG.info(
            "ANDROID_SDK_ROOT or ANDROID_HOME should be set for automatically building android projects"
        )
        return False
    lang_tools = build_tools_map.get("android")
    env = get_env()
    gradle_files = [p.as_posix() for p in Path(src).rglob("build.gradle")]
    gradle_kts_files = [
        p.as_posix() for p in Path(src).rglob("build.gradle.kts")
    ]
    if gradle_files or gradle_kts_files:
        cmd_args = get_gradle_cmd(src, lang_tools.get("gradle"))
    cp = exec_tool("auto-build",
                   cmd_args,
                   src,
                   env=env,
                   stdout=subprocess.PIPE)
    if cp:
        LOG.debug(cp.stdout)
        return cp.returncode == 0
    return False

Example #7

0

Show file

def auto_build(type_list, src, reports_dir):
    """
    Automatically build project identified by type

    :param type_list: Project types
    :param src: Source directory
    :param reports_dir: Reports directory to store any logs

    :return: boolean status from the build. True if the command executed successfully. False otherwise
    """
    ret = True
    for ptype in type_list:
        lang_tools = build_tools_map.get(ptype)
        if not lang_tools:
            continue
        if isinstance(lang_tools, list):
            cp = exec_tool(lang_tools,
                           src,
                           env=os.environ.copy(),
                           stdout=subprocess.PIPE)
            LOG.debug(cp.stdout)
            ret = ret & (cp.returncode == 0)
        # Look for any _scan function in this module for execution
        try:
            ret = ret & getattr(sys.modules[__name__], "%s_build" % ptype)(
                src, reports_dir, lang_tools)
        except Exception:
            LOG.debug("Unable to auto build project of type {}".format(ptype))
    return ret

Example #8

0

Show file

File: github.py Project: orlyjamie/sast-scan

 def annotate_pr(self, repo_context, findings_file, report_summary, build_status):
     if not findings_file:
         return
     with open(findings_file, mode="r") as fp:
         try:
             github_context = self.get_context(repo_context)
             findings_obj = json.load(fp)
             findings = findings_obj.get("findings")
             if not findings:
                 LOG.debug("No findings from scan available to report")
             if not github_context.get("githubToken") or not g:
                 LOG.debug("Did not receive GITHUB_TOKEN")
                 return
             self.create_status(
                 findings, github_context, report_summary, build_status
             )
             workflow_run = self.get_workflow(github_context)
             if not workflow_run:
                 LOG.debug("Unable to find the workflow run for this invocation")
                 return
             pull_requests = workflow_run.pull_requests
             if not pull_requests:
                 LOG.debug("No Pull Requests are associated with this workflow run")
                 return
             if findings:
                 self.create_review(
                     pull_requests,
                     findings,
                     github_context,
                     report_summary,
                     build_status,
                 )
         except Exception as e:
             LOG.debug(e)

Example #9

0

Show file

def report(vulnerabilities, insights, report_fname):
    """
    Prints issues in JSON format.
    Args:
        vulnerabilities: list of vulnerabilities to report
        insights: list of insights
        report_fname: The output file name
    """
    TZ_AGNOSTIC_FORMAT = "%Y-%m-%dT%H:%M:%SZ"
    time_string = datetime.utcnow().strftime(TZ_AGNOSTIC_FORMAT)
    filtered_vulns = []
    filtered_insights = []
    vuln_keys = {}
    for vuln in vulnerabilities:
        if not isinstance(vuln, SanitisedVulnerability) and not isinstance(
                vuln, UnknownVulnerability):
            avuln = vuln.as_dict()
            avuln_key = f"""{avuln["rule_id"]}|{avuln["source"]["line_number"]}|{avuln["source"]["path"]}|{avuln["sink"]["line_number"]}|{avuln["sink"]["path"]}"""
            if not vuln_keys.get(avuln_key):
                filtered_vulns.append(avuln)
                vuln_keys[avuln_key] = True
    for ins in insights:
        filtered_insights.append({
            "rule_id": ins.code,
            "rule_name": ins.name,
            "short_description": ins.short_description,
            "description": ins.short_description,
            "recommendation": ins.recommendation,
            "cwe_category": ins.cwe_category,
            "owasp_category": ins.owasp_category,
            "severity": ins.severity,
            "source": {
                "trigger_word": ins.source.trigger_word,
                "line_number": ins.source.line_number,
                "label": ins.source.label,
                "path": ins.source.path,
            },
            "sink": {
                "trigger_word": ins.sink.trigger_word,
                "line_number": ins.sink.line_number,
                "label": ins.sink.label,
                "path": ins.sink.path,
            },
        })
    if filtered_insights:
        filtered_vulns += filtered_insights
    machine_output = {
        "generated_at": time_string,
        "vulnerabilities": filtered_vulns
    }
    try:
        with open(report_fname, mode="w") as fileobj:
            json.dump(machine_output, fileobj, indent=2)
    except Exception as e:
        LOG.debug(e)

Example #10

0

Show file

    def get_code(self,
                 max_lines=config.get("CODE_SNIPPET_MAX_LINES"),
                 tabbed=False):
        """Gets lines of code from a file the generated this issue.

        :param max_lines: Max lines of context to return
        :param tabbed: Use tabbing in the output
        :return: strings of code
        """
        if not self.fname:
            return ""
        lines = []
        max_lines = max(max_lines, 1)
        if not self.snippet_based:
            lmin = max(1, self.lineno - max_lines // 2)
            lmax = lmin + len(self.linerange) + max_lines - 1

            tmplt = "%i\t%s" if tabbed else "%i %s"
            for line in moves.xrange(lmin, lmax):
                text = self._get_code_line(self.fname, line)
                if isinstance(text, bytes):
                    text = text.decode("utf-8", "ignore")

                if not len(text):
                    break
                lines.append(tmplt % (line, text))
            if lines:
                return "".join(lines)
            elif self.code:
                # Validate if the code snippet is in the right format
                orig_lines = self.code.split("\n")
                if orig_lines:
                    orig_first_line = orig_lines[0]
                    firstword = orig_first_line.split(" ", 1)[0]
                    if firstword and str(firstword).isdigit():
                        return self.code
                return ""
            else:
                return ""
        else:
            lineno = self.lineno
            try:
                tmplineno = 1
                with open(self.fname, mode="r") as fp:
                    for aline in fp:
                        if aline.strip() == self.code.strip():
                            lineno = tmplineno
                            # Fix the line number
                            self.lineno = lineno
                            break
                        tmplineno = tmplineno + 1
            except Exception as e:
                LOG.debug(e)
            tmplt = "%i\t%s" if tabbed else "%i %s"
            return tmplt % (lineno, self.code)

Example #11

0

Show file

File: insights.py Project: kenke7/sast-scan

def find_insights(ast_tree, path):
    violations_list = []
    # Invoke all the _check methods
    for mods in sys.modules[__name__].__dict__.keys():
        if mods.startswith("_check"):
            try:
                dfn = getattr(sys.modules[__name__], mods, None)
                if dfn:
                    violations = dfn(ast_tree, path)
                    if violations:
                        violations_list += violations
            except Exception as e:
                LOG.debug(e)
    return violations_list

Example #12

0

Show file

def nodejs_build(src, reports_dir, lang_tools):
    """
    Automatically build nodejs project

    :param src: Source directory
    :param reports_dir: Reports directory to store any logs
    :param lang_tools: Language specific build tools

    :return: boolean status from the build. True if the command executed successfully. False otherwise
    """
    cmd_args = lang_tools.get("npm")
    yarn_mode = False
    rush_mode = False
    rushjson_files = [p.as_posix() for p in Path(src).glob("rush.json")]
    pjson_files = [p.as_posix() for p in Path(src).glob("package.json")]
    ylock_files = [p.as_posix() for p in Path(src).glob("yarn.lock")]
    if ylock_files:
        cmd_args = lang_tools.get("yarn")
        yarn_mode = True
    elif rushjson_files:
        cmd_args = lang_tools.get("rush")
        rush_mode = True
    elif not pjson_files:
        LOG.debug(
            "Nodejs auto build is supported only for npm or yarn or rush based projects"
        )
        return False
    cp = exec_tool("auto-build", cmd_args, src)
    if cp:
        ret = cp.returncode == 0
    else:
        ret = False
    try:
        cmd_args = ["npm"]
        if yarn_mode:
            cmd_args = ["yarn"]
        if rush_mode:
            cmd_args = ["rush", "rebuild"]
        else:
            cmd_args += ["run", "build"]
        exec_tool("auto-build", cmd_args, src)
    except Exception:
        if rush_mode:
            LOG.warning(
                "Automatic build for rush.js has failed. Try installing the packages manually before invoking scan.\nIf this works then let us know the build steps by filing an issue."
            )
        else:
            LOG.debug("Automatic build has failed for the node.js project")
    return ret

Example #13

0

Show file

File: telemetry.py Project: takirala/sast-scan

def track(track_obj):
    """
    Method to send a track message to the telemetry api
    :param track_obj:
    :return:
    """
    # Check if telemetry is disabled
    disable_telemetry = config.get("DISABLE_TELEMETRY", False)
    if disable_telemetry == "true" or disable_telemetry == "1":
        disable_telemetry = True
    else:
        disable_telemetry = False
    if track_obj and not disable_telemetry:
        try:
            track_obj["tool"] = "@ShiftLeft/scan"
            requests.post(config.TELEMETRY_URL, json=track_obj)
        except Exception:
            LOG.debug("Unable to send telemetry")

Example #14

0

Show file

def get_gradle_cmd(src, cmd_args):
    # Check for the presence of local gradle wrapper
    fullPath = os.path.join(src, "gradlew")
    if os.path.exists(fullPath):
        try:
            os.chmod(
                fullPath,
                stat.S_IRUSR
                | stat.S_IWUSR
                | stat.S_IXUSR
                | stat.S_IRGRP
                | stat.S_IWGRP
                | stat.S_IROTH,
            )
        except Exception:
            LOG.debug("Ensure {} has execute permissions".format(fullPath))
        cmd_args[0] = fullPath
    return cmd_args

Example #15

0

Show file

def should_suppress_fingerprint(fingerprint, working_dir):
    """Method to check if a result has to be suppressed based on its fingerprint hash

    :param fingerprint: Fingerprint hash object
    :param working_dir: Working directory
    """
    if not fingerprint:
        return False
    supress_fps = config.get_suppress_fingerprints(working_dir)
    if not supress_fps or not isinstance(supress_fps, dict):
        return False
    # supress_fps = {"scanPrimaryLocationHash": [], "scanTagsHash": [], "scanFileHash": []}
    for sk, svl in supress_fps.items():
        if not svl:
            continue
        if fingerprint[sk] in svl:
            LOG.debug(f"Suppressing fingerprint {fingerprint[sk]} of type {sk}")
            return True
    return False

Example #16

0

Show file

    def _get_code_line(self, fname, line):
        """Return the given line from the file. Handles any utf8 error from tokenize

        :param fname: File name
        :param line: Line number
        :return: Exact line as string
        """
        text = ""
        try:
            text = linecache.getline(fname, line)
        except UnicodeDecodeError:
            LOG.debug(
                f"Error parsing the file {fname} in utf-8. Falling to binary mode"
            )
            with io.open(fname, "rb") as fp:
                all_lines = fp.readlines()
                if line < len(all_lines):
                    text = all_lines[line]
        return text

Example #17

0

Show file

    def get_code(self, max_lines=3, tabbed=False):
        """Gets lines of code from a file the generated this issue.

        :param max_lines: Max lines of context to return
        :param tabbed: Use tabbing in the output
        :return: strings of code
        """
        if not self.fname:
            return ""
        lines = []
        max_lines = max(max_lines, 1)
        if not self.snippet_based:
            lmin = max(1, self.lineno - max_lines // 2)
            lmax = lmin + len(self.linerange) + max_lines - 1

            tmplt = "%i\t%s" if tabbed else "%i %s"
            for line in moves.xrange(lmin, lmax):
                text = linecache.getline(self.fname, line)

                if isinstance(text, bytes):
                    text = text.decode("utf-8")

                if not len(text):
                    break
                lines.append(tmplt % (line, text))
            return "".join(lines)
        else:
            lineno = self.lineno
            try:
                tmplineno = 1
                with open(self.fname, mode="r") as fp:
                    for aline in fp:
                        if aline.strip() == self.code.strip():
                            lineno = tmplineno
                            # Fix the line number
                            self.lineno = lineno
                            break
                        tmplineno = tmplineno + 1
            except Exception as e:
                LOG.debug(e)
            tmplt = "%i\t%s" if tabbed else "%i %s"
            return tmplt % (lineno, self.code)

Example #18

0

Show file

def php_build(src, reports_dir, lang_tools):
    """
    Automatically build php project

    :param src: Source directory
    :param reports_dir: Reports directory to store any logs
    :param lang_tools: Language specific build tools

    :return: boolean status from the build. True if the command executed successfully. False otherwise
    """
    ret = False
    cmd_args = lang_tools.get("install")
    cjson_files = [p.as_posix() for p in Path(src).glob("composer.json")]
    # If there is no composer.json try to create one
    if not cjson_files:
        cp = exec_tool(
            "auto-build",
            lang_tools.get("init"),
            src,
            env=os.environ.copy(),
            stdout=subprocess.PIPE,
        )
        if cp:
            LOG.debug(cp.stdout)
    cp = exec_tool("auto-build",
                   cmd_args,
                   src,
                   env=os.environ.copy(),
                   stdout=subprocess.PIPE)
    if cp:
        LOG.debug(cp.stdout)
        ret = cp.returncode == 0
    # If composer install fails, try composer update
    if not ret:
        cmd_args = lang_tools.get("update")
        cp = exec_tool("auto-build",
                       cmd_args,
                       src,
                       env=os.environ.copy(),
                       stdout=subprocess.PIPE)
        if cp:
            LOG.debug(cp.stdout)
            ret = cp.returncode == 0
    return ret

Example #19

0

Show file

File: inspect.py Project: robertp/sast-scan

def inspect_scan(language, src, reports_dir, convert, repo_context):
    """
    Method to perform inspect cloud scan

    Args:
      language Project language
      src Project dir
      reports_dir Directory for output reports
      convert Boolean to enable normalisation of reports json
      repo_context Repo context
    """
    run_uuid = config.get("run_uuid")
    cpg_mode = config.get("SHIFTLEFT_CPG")
    env = os.environ.copy()
    env["SCAN_JAVA_HOME"] = os.environ.get("SCAN_JAVA_8_HOME")
    report_fname = utils.get_report_file(
        "ng-sast", reports_dir, convert, ext_name="json"
    )
    sl_cmd = config.get("SHIFTLEFT_NGSAST_CMD")
    # Check if sl cli is available
    if not utils.check_command(sl_cmd):
        LOG.warning(
            "sl cli is not available. Please check if your build uses shiftleft/scan-java as the image"
        )
        return
    analyze_files = config.get("SHIFTLEFT_ANALYZE_FILE")
    analyze_target_dir = config.get(
        "SHIFTLEFT_ANALYZE_DIR", os.path.join(src, "target")
    )
    extra_args = None
    if not analyze_files:
        if language == "java":
            analyze_files = utils.find_java_artifacts(analyze_target_dir)
        elif language == "csharp":
            if not utils.check_dotnet():
                LOG.warning(
                    "dotnet is not available. Please check if your build uses shiftleft/scan-csharp as the image"
                )
                return
            analyze_files = utils.find_csharp_artifacts(src)
            cpg_mode = True
        else:
            if language == "ts" or language == "nodejs":
                language = "js"
                extra_args = ["--", "--ts", "--babel"]
            analyze_files = [src]
            cpg_mode = True
    app_name = find_app_name(src, repo_context)
    branch = repo_context.get("revisionId")
    if not branch:
        branch = "master"
    if not analyze_files:
        LOG.warning(
            "Unable to find any build artifacts. Compile your project first before invoking scan or use the auto build feature."
        )
        return
    if isinstance(analyze_files, list) and len(analyze_files) > 1:
        LOG.warning(
            "Multiple files found in {}. Only {} will be analyzed".format(
                analyze_target_dir, analyze_files[0]
            )
        )
        analyze_files = analyze_files[0]
    sl_args = [
        sl_cmd,
        "analyze",
        "--no-auto-update" if language == "java" else None,
        "--wait",
        "--cpg" if cpg_mode else None,
        "--" + language,
        "--tag",
        "branch=" + branch,
        "--app",
        app_name,
    ]
    sl_args += [analyze_files]
    if extra_args:
        sl_args += extra_args
    sl_args = [arg for arg in sl_args if arg is not None]
    LOG.info(
        "About to perform ShiftLeft NG SAST cloud analysis. This might take a few minutes ..."
    )
    LOG.debug(" ".join(sl_args))
    LOG.debug(repo_context)
    cp = exec_tool("NG SAST", sl_args, src, env=env)
    if cp.returncode != 0:
        LOG.warning("NG SAST cloud analyze has failed with the below logs")
        LOG.debug(sl_args)
        LOG.info(cp.stderr)
        return
    findings_data = fetch_findings(app_name, branch, report_fname)
    if findings_data and convert:
        crep_fname = utils.get_report_file(
            "ng-sast", reports_dir, convert, ext_name="sarif"
        )
        convertLib.convert_file("ng-sast", sl_args[1:], src, report_fname, crep_fname)
    track({"id": run_uuid, "scan_mode": "ng-sast", "sl_args": sl_args})

Example #20

0

Show file

def report(
    tool_name,
    tool_args,
    working_dir,
    metrics,
    skips,
    issues,
    crep_fname,
    file_path_list=None,
):
    """Prints issues in SARIF format

    :param tool_name: tool name
    :param tool_args: Args used for the tool
    :param working_dir: Working directory
    :param metrics: metrics data
    :param skips: skips data
    :param issues: issues data
    :param crep_fname: The output file name
    :param file_path_list: Full file path for any manipulation

    :return serialized_log: SARIF output data
    """
    if not tool_args:
        tool_args = []
    tool_args_str = tool_args
    if isinstance(tool_args, list):
        tool_args_str = " ".join(tool_args)
    repo_details = find_repo_details(working_dir)
    log_uuid = str(uuid.uuid4())
    run_uuid = config.get("run_uuid")

    # working directory to use in the log
    WORKSPACE_PREFIX = config.get("WORKSPACE", None)
    wd_dir_log = WORKSPACE_PREFIX if WORKSPACE_PREFIX is not None else working_dir
    driver_name = config.tool_purpose_message.get(tool_name, tool_name)
    # Construct SARIF log
    log = om.SarifLog(
        schema_uri="https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json",
        version="2.1.0",
        inline_external_properties=[
            om.ExternalProperties(guid=log_uuid, run_guid=run_uuid)
        ],
        runs=[
            om.Run(
                automation_details=om.RunAutomationDetails(
                    guid=log_uuid,
                    description=om.Message(
                        text="Static Analysis Security Test results using @ShiftLeft/sast-scan"
                    ),
                ),
                tool=om.Tool(
                    driver=om.ToolComponent(
                        name=driver_name, full_name=driver_name, version="1.0.0-scan"
                    )
                ),
                invocations=[
                    om.Invocation(
                        end_time_utc=datetime.datetime.utcnow().strftime(TS_FORMAT),
                        execution_successful=True,
                        working_directory=om.ArtifactLocation(uri=to_uri(wd_dir_log)),
                    )
                ],
                conversion={
                    "tool": om.Tool(
                        driver=om.ToolComponent(name="@ShiftLeft/sast-scan")
                    ),
                    "invocation": om.Invocation(
                        execution_successful=True,
                        command_line=tool_args_str,
                        arguments=tool_args,
                        working_directory=om.ArtifactLocation(uri=to_uri(wd_dir_log)),
                        end_time_utc=datetime.datetime.utcnow().strftime(TS_FORMAT),
                    ),
                },
                version_control_provenance=[
                    om.VersionControlDetails(
                        repository_uri=repo_details["repositoryUri"],
                        branch=repo_details["branch"],
                        revision_id=repo_details["revisionId"],
                    )
                ],
            )
        ],
    )

    run = log.runs[0]
    invocation = run.invocations[0]

    add_skipped_file_notifications(skips, invocation)
    add_results(tool_name, issues, run, file_path_list, working_dir)

    serialized_log = to_json(log)

    if crep_fname:
        html_file = crep_fname.replace(".sarif", ".html")
        with io.open(crep_fname, "w") as fileobj:
            fileobj.write(serialized_log)
        if tool_name != "empty-scan":
            render_html(json.loads(serialized_log), html_file)
            if fileobj.name != sys.stdout.name:
                LOG.debug(
                    "SARIF and HTML report written to file: %s, %s :thumbsup:",
                    fileobj.name,
                    html_file,
                )
    return serialized_log

Example #21

0

Show file

def extract_from_file(
    tool_name, tool_args, working_dir, report_file, file_path_list=None
):
    """Extract properties from reports

    :param tool_name: tool name
    :param tool_args: tool args
    :param working_dir: Working directory
    :param report_file: Report file
    :param file_path_list: Full file path for any manipulation

    :return issues, metrics, skips information
    """
    issues = []
    metrics = None
    skips = []
    # If the tools did not produce any result do not crash
    if not os.path.isfile(report_file):
        return issues, metrics, skips
    extn = pathlib.PurePosixPath(report_file).suffix

    with io.open(report_file, "r") as rfile:
        # Static check use jsonlines format, duh
        if tool_name == "staticcheck":
            contents = rfile.read()
            try:
                issues = [
                    json.loads(str(item)) for item in contents.strip().split("\n")
                ]
            except json.decoder.JSONDecodeError:
                LOG.warning(
                    "staticcheck produced no result since the project was not built before analysis!"
                )
            return issues, metrics, skips
        if extn == ".json":
            try:
                report_data = json.loads(rfile.read())
            except json.decoder.JSONDecodeError:
                return issues, metrics, skips
            # NG SAST (Formerly Inspect) uses vulnerabilities
            if tool_name == "ng-sast":
                for v in report_data.get("vulnerabilities"):
                    if not v:
                        continue
                    vuln = v["vulnerability"]
                    location_list = []
                    if vuln.get("dataFlow") and vuln.get("dataFlow", {}).get(
                        "dataFlow"
                    ):
                        location_list = convert_dataflow(
                            working_dir, tool_args, vuln["dataFlow"]["dataFlow"]["list"]
                        )
                    for location in location_list:
                        issues.append(
                            {
                                "rule_id": vuln["category"],
                                "title": vuln["title"],
                                "description": vuln["description"],
                                "score": vuln["score"],
                                "severity": vuln["severity"],
                                "line_number": location.get("line_number"),
                                "filename": location.get("filename"),
                                "first_found": vuln["firstVersionDetected"],
                                "issue_confidence": "HIGH",
                            }
                        )
            elif tool_name == "taint-php":
                for entry in report_data:
                    taint_trace = entry.get("taint_trace")
                    labels = []
                    if taint_trace:
                        source, sink, labels = get_from_taints(taint_trace)
                    else:
                        source, _, _ = get_from_taints([entry])
                    issues.append(
                        {
                            "rule_id": entry.get("shortcode"),
                            "test_name": entry.get("type"),
                            "description": "{}: {}".format(
                                entry.get("message"), "\\n".join(labels)
                            ),
                            "link": entry.get("link"),
                            "severity": entry.get("severity"),
                            "issue_confidence": "HIGH",
                            "line_number": source.get("line_number"),
                            "filename": source.get("filename"),
                        }
                    )
            elif tool_name == "taint-python":
                taint_list = report_data.get("vulnerabilities")
                for taint in taint_list:
                    source = taint.get("source")
                    sink = taint.get("sink")
                    tags = {}
                    for taint_props in [
                        "source_trigger_word",
                        "source_label",
                        "source_type",
                        "sink_trigger_word",
                        "sink_label",
                        "sink_type",
                    ]:
                        if taint.get(taint_props):
                            tags[taint_props] = taint.get(taint_props)
                    issues.append(
                        {
                            "rule_id": taint.get("rule_id"),
                            "test_name": taint.get("rule_name"),
                            "short_description": taint.get("short_description"),
                            "cwe_category": taint.get("cwe_category"),
                            "owasp_category": taint.get("owasp_category"),
                            "description": taint.get("description"),
                            "severity": taint.get("severity"),
                            "issue_confidence": "HIGH",
                            "line_from": source.get("line_number"),
                            "line_to": sink.get("line_number"),
                            "filename": source.get("path"),
                            "tags": tags,
                        }
                    )
            elif tool_name == "phpstan" or tool_name == "source-php":
                file_errors = report_data.get("files")
                for filename, messageobj in file_errors.items():
                    messages = messageobj.get("messages")
                    for msg in messages:
                        # Create a rule id for phpstan
                        rule_word = msg.get("message", "").split(" ")[0]
                        rule_word = "phpstan-" + rule_word.lower()
                        issues.append(
                            {
                                "rule_id": rule_word,
                                "title": msg.get("message"),
                                "line_number": msg.get("line"),
                                "filename": filename,
                                "severity": "LOW",
                                "issue_confidence": "MEDIUM",
                            }
                        )
            elif tool_name == "source-js":
                njs_findings = report_data.get("nodejs", {})
                njs_findings.update(report_data.get("templates", {}))
                for k, v in njs_findings.items():
                    # Password detection by njsscan is full of false positives
                    if k == "node_password":
                        continue
                    files = v.get("files", [])
                    metadata = v.get("metadata", {})
                    if not files or not metadata:
                        continue
                    for afile in files:
                        line_number = 0
                        if afile.get("match_lines"):
                            line_number = afile.get("match_lines")[0]
                        issues.append(
                            {
                                "rule_id": metadata.get("owasp")
                                .replace(":", "-")
                                .replace(" ", "")
                                .lower(),
                                "title": metadata.get("cwe"),
                                "description": metadata.get("description"),
                                "severity": metadata.get("severity"),
                                "line_number": line_number,
                                "filename": afile.get("file_path"),
                                "issue_confidence": "HIGH",
                            }
                        )
            elif tool_name == "checkov":
                if isinstance(report_data, list):
                    for rd in report_data:
                        issues += rd.get("results", {}).get("failed_checks")
                else:
                    issues = report_data.get("results", {}).get("failed_checks")
            elif tool_name == "source-ruby":
                issues = report_data.get("warnings", [])
                issues += report_data.get("errors", [])
            elif isinstance(report_data, list):
                issues = report_data
            else:
                if "sec_issues" in report_data:
                    # NodeJsScan uses sec_issues
                    sec_data = report_data["sec_issues"]
                    for key, value in sec_data.items():
                        if isinstance(value, list):
                            issues = issues + value
                        else:
                            issues.append(value)
                elif "Issues" in report_data:
                    tmpL = report_data.get("Issues", [])
                    if tmpL:
                        issues += tmpL
                    else:
                        LOG.debug("%s produced no result" % tool_name)
                elif "results" in report_data:
                    tmpL = report_data.get("results", [])
                    if tmpL:
                        issues += tmpL
                    else:
                        LOG.debug("%s produced no result" % tool_name)
        if extn == ".csv":
            headers, issues = csv_parser.get_report_data(rfile)
        if extn == ".xml":
            issues, metrics = xml_parser.get_report_data(
                rfile, file_path_list=file_path_list, working_dir=working_dir
            )
    return issues, metrics, skips

Example #22

0

Show file

File: analysis.py Project: tophfr/sast-scan

def summary(sarif_files, aggregate_file=None, override_rules={}):
    """Generate overall scan summary based on the generated
    SARIF file

    :param sarif_files: List of generated sarif report files
    :param aggregate_file: Filename to store aggregate data
    :param override_rules Build break rules to override for testing
    :returns dict representing the summary
    """
    report_summary = {}
    build_status = "pass"
    # This is the list of all runs which will get stored as an aggregate
    run_data_list = []
    for sf in sarif_files:
        with open(sf, mode="r") as report_file:
            report_data = json.loads(report_file.read())
            # skip this file if the data is empty
            if not report_data or not report_data.get("runs"):
                LOG.warn("Report file {} is invalid. Skipping ...".format(sf))
                continue
            # Iterate through all the runs
            for run in report_data["runs"]:
                # Add it to the run data list for aggregation
                run_data_list.append(run)
                tool_desc = run["tool"]["driver"]["name"]
                tool_name = tool_desc
                # Initialise
                report_summary[tool_name] = {
                    "tool": tool_desc,
                    "critical": 0,
                    "high": 0,
                    "medium": 0,
                    "low": 0,
                    "status": "✅",
                }
                results = run.get("results", [])
                metrics = run.get("properties", {}).get("metrics", None)
                # If the result includes metrics use it. If not compute it
                if metrics:
                    report_summary[tool_name].update(metrics)
                    report_summary[tool_name].pop("total", None)
                else:
                    for aresult in results:
                        sev = aresult["properties"]["issue_severity"].lower()
                        report_summary[tool_name][sev] += 1
                # Compare against the build break rule to determine status
                default_rules = config.get("build_break_rules").get("default")
                tool_rules = config.get("build_break_rules").get(tool_name, {})
                build_break_rules = {
                    **default_rules,
                    **tool_rules,
                    **override_rules
                }
                for rsev in ["critical", "high", "medium", "low"]:
                    if build_break_rules.get("max_" + rsev) is not None:
                        if (report_summary.get(tool_name).get(rsev) >
                                build_break_rules["max_" + rsev]):
                            report_summary[tool_name]["status"] = "❌"
                            build_status = "fail"
    # Should we store the aggregate data
    if aggregate_file:
        # agg_sarif_file = aggregate_file.replace(".json", ".sarif")
        # aggregate.sarif_aggregate(run_data_list, agg_sarif_file)
        aggregate.jsonl_aggregate(run_data_list, aggregate_file)
        LOG.debug("Aggregate report written to {}\n".format(aggregate_file))
    return report_summary, build_status

Example #23

0

Show file

def find_repo_details(src_dir=None):
    """Method to find repo details such as url, sha etc
    This will be populated into versionControlProvenance attribute

    :param src_dir: Source directory
    """
    # See if repository uri is specified in the config
    repositoryName = None
    repositoryUri = ""
    revisionId = ""
    branch = ""
    invokedBy = ""
    pullRequest = False
    gitProvider = ""
    ciProvider = ""
    """
    Since CI servers typically checkout repo in detached mode, we need to rely on environment
    variables as a starting point to find the repo details. To make matters worse, since we
    run the tools inside a container these variables should be passed as part of the docker run
    command. With native integrations such as GitHub action and cloudbuild this could be taken
    care by our builders.

    Env variables detection for popular CI server is implemented here anyways. But they are effective
    only in few cases.

    Azure pipelines - https://docs.microsoft.com/en-us/azure/devops/pipelines/build/variables?view=azure-devops&tabs=yaml
    BitBucket - https://confluence.atlassian.com/bitbucket/environment-variables-in-bitbucket-pipelines-794502608.html
    GitHub actions - https://help.github.com/en/actions/automating-your-workflow-with-github-actions/using-environment-variables
    Google CloudBuild - https://cloud.google.com/cloud-build/docs/configuring-builds/substitute-variable-values
    CircleCI - https://circleci.com/docs/2.0/env-vars/#built-in-environment-variables
    Travis - https://docs.travis-ci.com/user/environment-variables/#default-environment-variables
    AWS CodeBuild - https://docs.aws.amazon.com/codebuild/latest/userguide/build-env-ref-env-vars.html
    GitLab - https://docs.gitlab.com/ee/ci/variables/predefined_variables.html
    Jenkins - https://jenkins.io/doc/book/pipeline/jenkinsfile/#using-environment-variables
    """
    for key, value in os.environ.items():
        # Check REPOSITORY_URL first followed CI specific vars
        # Some CI such as GitHub pass only the slug instead of the full url :(
        if not gitProvider or not ciProvider:
            if key.startswith("GITHUB_"):
                if key == "GITHUB_REPOSITORY":
                    gitProvider = "github"
                if key == "GITHUB_ACTION":
                    ciProvider = "github"
            elif key.startswith("GITLAB_"):
                gitProvider = "gitlab"
                if key == "GITLAB_CI":
                    ciProvider = "gitlab"
            elif key.startswith("BITBUCKET_"):
                gitProvider = "bitbucket"
                if key == "BITBUCKET_BUILD_NUMBER":
                    ciProvider = "bitbucket"
            elif key.startswith("CIRCLE_"):
                ciProvider = "circle"
            elif key.startswith("TRAVIS_"):
                ciProvider = "travis"
            elif key.startswith("CODEBUILD_"):
                ciProvider = "codebuild"
            elif key.startswith("BUILD_REQUESTEDFOREMAIL"):
                ciProvider = "azure"
            elif key.startswith("JENKINS_"):
                ciProvider = "jenkins"
        if not repositoryName:
            if key in [
                "BUILD_REPOSITORY_NAME",
                "GITHUB_REPOSITORY",
                "BITBUCKET_REPO_SLUG",
                "REPO_NAME",
                "CIRCLE_PROJECT_REPONAME",
                "TRAVIS_REPO_SLUG",
                "CI_PROJECT_NAME",
            ]:
                if "/" in value:
                    repositoryName = value.split("/")[-1]
                else:
                    repositoryName = value
        if not repositoryUri:
            if key in [
                "REPOSITORY_URL",
                "BUILD_REPOSITORY_URI",
                "GITHUB_REPOSITORY",
                "BITBUCKET_GIT_HTTP_ORIGIN",
                "REPO_NAME",
                "CIRCLE_REPOSITORY_URL",
                "TRAVIS_REPO_SLUG",
                "CODEBUILD_SOURCE_REPO_URL",
                "CI_REPOSITORY_URL",
            ]:
                repositoryUri = value
        if key in [
            "COMMIT_SHA",
            "BUILD_SOURCEVERSION",
            "BITBUCKET_COMMIT",
            "GITHUB_SHA",
            "CIRCLE_SHA1",
            "TRAVIS_COMMIT",
            "CODEBUILD_SOURCE_VERSION",
            "CI_COMMIT_SHA",
        ]:
            revisionId = value
        if key in [
            "BRANCH",
            "BUILD_SOURCEBRANCH",
            "BITBUCKET_BRANCH",
            "GITHUB_REF",
            "BRANCH_NAME",
            "CIRCLE_BRANCH",
            "TRAVIS_BRANCH",
            "CI_COMMIT_REF_NAME",
        ]:
            branch = value
        if key in [
            "BUILD_REQUESTEDFOREMAIL",
            "GITHUB_ACTOR",
            "PROJECT_ID",
            "CIRCLE_USERNAME",
            "GITLAB_USER_EMAIL",
        ]:
            invokedBy = value
        if key.startswith("CI_MERGE_REQUEST"):
            pullRequest = True
    if src_dir and os.path.isdir(os.path.join(src_dir, ".git")):
        # Try interacting with git
        try:
            repo = Repo(src_dir)
            head = repo.head
            if not branch and not head.is_detached:
                branch = repo.active_branch.name
            if not revisionId and head:
                revisionId = head.commit.hexsha
            if not repositoryUri:
                repositoryUri = next(iter(repo.remote().urls))
            if not invokedBy or "@" not in invokedBy:
                if head and head.commit.author and head.commit.author.email:
                    invokedBy = "{} <{}>".format(
                        head.commit.author.name, head.commit.author.email
                    )
        except Exception:
            LOG.debug("Unable to find repo details from the local repository")
    if branch.startswith("refs/pull"):
        pullRequest = True
        branch = branch.replace("refs/pull/", "")
    # Cleanup the variables
    branch = branch.replace("refs/heads/", "")
    if repositoryUri:
        repositoryUri = repositoryUri.replace(
            "[email protected]:", "https://github.com/"
        ).replace(".git", "")
        # Is it a repo slug?
        repo_slug = True
        repositoryUri = sanitize_url(repositoryUri)
        for pref in repo_url_prefixes:
            if repositoryUri.startswith(pref):
                repo_slug = False
                break
        if not repo_slug:
            if "vs-ssh" in repositoryUri:
                repo_slug = False
        # For repo slug just assume github for now
        if repo_slug:
            repositoryUri = "https://github.com/" + repositoryUri
    if not repositoryName and repositoryUri:
        repositoryName = os.path.basename(repositoryUri)
    if not gitProvider:
        if "github" in repositoryUri:
            gitProvider = "github"
        if "gitlab" in repositoryUri:
            gitProvider = "gitlab"
        if "atlassian" in repositoryUri or "bitbucket" in repositoryUri:
            gitProvider = "bitbucket"
        if "azure" in repositoryUri or "visualstudio" in repositoryUri:
            gitProvider = "azure"
            if not ciProvider:
                ciProvider = "azure"
        if not gitProvider and "tfs" in repositoryUri:
            gitProvider = "tfs"
            ciProvider = "tfs"
    return {
        "gitProvider": gitProvider,
        "ciProvider": ciProvider,
        "repositoryName": "" if not repositoryName else repositoryName,
        "repositoryUri": repositoryUri,
        "revisionId": revisionId,
        "branch": branch,
        "invokedBy": invokedBy,
        "pullRequest": pullRequest,
        "botUser": is_bot(invokedBy),
    }

Example #24

0

Show file

    def annotate_pr(self, repo_context, findings_file, report_summary,
                    build_status):
        if not findings_file:
            return
        with open(findings_file, mode="r") as fp:
            try:
                findings_obj = json.load(fp)
                findings = findings_obj.get("findings")
                if not findings:
                    LOG.debug("No findings from scan available to report")
                    return
                context = self.get_context(repo_context)
                # Leave a comment on the pull request
                if context.get("prID") and context.get("bitbucketToken"):
                    summary = "| Tool | Critical | High | Medium | Low | Status |\n"
                    summary = (
                        summary +
                        "| ---- | ------- | ------ | ----- | ---- | ---- |\n")
                    for rk, rv in report_summary.items():
                        status_emoji = self.to_emoji(rv.get("status"))
                        summary = f'{summary}| {rv.get("tool")} | {rv.get("critical")} | {rv.get("high")} | {rv.get("medium")} | {rv.get("low")} | {status_emoji} |\n'
                    template = config.get("PR_COMMENT_BASIC_TEMPLATE")
                    recommendation = (
                        f"Please review the scan reports before approving this pull request for {context.get('prTargetBranch')} branch"
                        if build_status == "fail" else "Looks good")
                    repoOwner = f"{context.get('BITBUCKET_REPO_OWNER')}"
                    repoFullname = f"{context.get('BITBUCKET_REPO_FULL_NAME')}"
                    repoWorkspace = f"{context.get('BITBUCKET_WORKSPACE')}"
                    repoUUID = f"{context.get('BITBUCKET_REPO_UUID')}"
                    prID = f"{context.get('BITBUCKET_PR_ID')}"
                    prTargetBranch = f"{context.get('BITBUCKET_PR_DESTINATION_BRANCH')}"
                    bitbucketToken = f"{context.get('BITBUCKET_TOKEN')}"
                    commitSHA = f"{context.get('BITBUCKET_COMMIT')}"
                    repoId = f"{context.get('BITBUCKET_REPO_UUID')}"
                    projectUrl = f"{context.get('BITBUCKET_REPO_SLUG')}"
                    jobId = f"{context.get('BITBUCKET_BUILD_NUMBER')}"

                    body = template % dict(
                        summary=summary,
                        recommendation=recommendation,
                        repoOwner=repoOwner,
                        repoFullname=repoFullname,
                        repoWorkspace=repoWorkspace,
                        repoUUID=repoUUID,
                        prID=prID,
                        prTargetBranch=prTargetBranch,
                        bitbucketToken=bitbucketToken,
                        commitSHA=commitSHA,
                        repoId=repoId,
                        projectUrl=projectUrl,
                        jobId=jobId,
                    )
                    rc = requests.post(
                        self.get_pr_comments_url(repo_context),
                        auth=(
                            context.get("repoWorkspace"),
                            context.get("bitbucketToken"),
                        ),
                        headers={"Content-Type": "application/json"},
                        json={"content": {
                            "raw": body
                        }},
                    )
                    if not rc.ok:
                        LOG.debug(rc.json())
                else:
                    LOG.debug(
                        "Either build is not part of a PR or variable BITBUCKET_TOKEN was not set with Pull Request write permission"
                    )
                total_count = len(findings)
                data_list = [
                    {
                        "title": "Safe to merge?",
                        "type": "BOOLEAN",
                        "value": build_status != "fail",
                    },
                ]
                for rk, rv in report_summary.items():
                    data_list.append({
                        "title": rv.get("tool"),
                        "type": "TEXT",
                        "value": rv.get("status"),
                    })
                scan_id = config.get("run_uuid", "001")
                # Create a PR report based on the total findings
                rr = requests.put(
                    f"{self.get_reports_url(repo_context)}-{scan_id}",
                    proxies=proxies,
                    headers={"Content-Type": "application/json"},
                    json={
                        "title": "Scan",
                        "details":
                        f"This pull request contains {total_count} issues",
                        "report_type": "SECURITY",
                        "reporter":
                        f"Scan report for {repo_context.get('repositoryName')}",
                        "link": "https://slscan.io",
                        "logo_url":
                        "https://www.shiftleft.io/static/images/ShiftLeft_logo_white.svg",
                        "result":
                        "FAILED" if build_status == "fail" else "PASSED",
                        "data": data_list,
                    },
                )
                if rr.ok:
                    for f in findings:
                        finternal = f.get("internal_id")
                        tmpA = finternal.split("/")
                        title = tmpA[0]
                        occurrenceHash = tmpA[-1]
                        annotation_url = f"{self.get_reports_url(repo_context)}-{scan_id}/annotations/scan-{occurrenceHash}"
                        fileName = ""
                        lineNumber = None
                        if f.get("details"):
                            fileName = f.get("details", {}).get("fileName")
                            lineNumber = f.get("details", {}).get("lineNumber")
                        workspace = utils.get_workspace(repo_context)
                        # Remove the workspace
                        if workspace:
                            workspace = workspace + "/"
                            fileName = fileName.replace(workspace, "")
                        # Cleanup title and description
                        title = f.get("title")
                        description = f.get("description")
                        if len(title) > len(description) and "\n" in title:
                            description = f.get("title")
                        if "\n" in title:
                            title = title.split("\n")[0]
                        annotation = {
                            "title": "Scan Report",
                            "annotation_type": "VULNERABILITY",
                            "summary": title,
                            "details": description,
                            "severity":
                            self.convert_severity(f.get("severity")),
                            "path": fileName,
                            "line": lineNumber,
                        }
                        ar = requests.put(
                            annotation_url,
                            proxies=proxies,
                            headers={"Content-Type": "application/json"},
                            json=annotation,
                        )
                        if not ar.ok:
                            break
                else:
                    LOG.debug(rr.json())
            except Exception as e:
                LOG.debug(e)

Example #25

0

Show file

def extract_from_file(tool_name,
                      working_dir,
                      report_file,
                      file_path_list=None):
    """Extract properties from reports

    :param tool_name: tool name
    :param working_dir: Working directory
    :param report_file: Report file
    :param file_path_list: Full file path for any manipulation

    :return issues, metrics, skips information
    """
    issues = []
    metrics = None
    skips = []
    # If the tools did not produce any result do not crash
    if not os.path.isfile(report_file):
        return issues, metrics, skips
    extn = pathlib.PurePosixPath(report_file).suffix

    with io.open(report_file, "r") as rfile:
        # Static check use jsonlines format, duh
        if tool_name == "staticcheck":
            contents = rfile.read()
            try:
                issues = [
                    json.loads(str(item))
                    for item in contents.strip().split("\n")
                ]
            except json.decoder.JSONDecodeError:
                LOG.warning(
                    "staticcheck produced no result since the project was not built before analysis!"
                )
            return issues, metrics, skips
        if extn == ".json":
            try:
                report_data = json.loads(rfile.read())
            except json.decoder.JSONDecodeError:
                return issues, metrics, skips
            # Inspect uses vulnerabilities
            if tool_name == "inspect":
                file_name_prefix = ""
                for v in report_data.get("vulnerabilities"):
                    if not v:
                        continue
                    vuln = v["vulnerability"]
                    location = {}
                    if vuln.get("dataFlow") and vuln.get("dataFlow").get(
                            "dataFlow"):
                        for l in vuln["dataFlow"]["dataFlow"]["list"]:
                            if not is_generic_package(
                                    l["location"].get("fileName")):
                                location = l["location"]
                                break
                    fileName = location.get("fileName")
                    if fileName == "N/A":
                        continue
                    if not file_name_prefix:
                        file_name_prefix = find_path_prefix(
                            working_dir, fileName)
                    issues.append({
                        "rule_id":
                        vuln["category"],
                        "title":
                        vuln["title"],
                        "description":
                        vuln["description"],
                        "score":
                        vuln["score"],
                        "severity":
                        vuln["severity"],
                        "line_number":
                        location.get("lineNumber"),
                        "filename":
                        os.path.join(file_name_prefix, fileName),
                        "first_found":
                        vuln["firstVersionDetected"],
                        "issue_confidence":
                        "HIGH",
                    })
            elif isinstance(report_data, list):
                issues = report_data
            else:
                if tool_name == "checkov":
                    issues = report_data.get("results",
                                             {}).get("failed_checks")
                elif "sec_issues" in report_data:
                    # NodeJsScan uses sec_issues
                    sec_data = report_data["sec_issues"]
                    for key, value in sec_data.items():
                        if isinstance(value, list):
                            issues = issues + value
                        else:
                            issues.append(value)
                elif "Issues" in report_data:
                    tmpL = report_data.get("Issues", [])
                    if tmpL:
                        issues += tmpL
                    else:
                        LOG.debug("%s produced no result" % tool_name)
                elif "results" in report_data:
                    tmpL = report_data.get("results", [])
                    if tmpL:
                        issues += tmpL
                    else:
                        LOG.debug("%s produced no result" % tool_name)
        if extn == ".csv":
            headers, issues = csv_parser.get_report_data(rfile)
        if extn == ".xml":
            issues, metrics = xml_parser.get_report_data(rfile, file_path_list)
    return issues, metrics, skips

Example #26

0

Show file

File: inspect.py Project: robertp/sast-scan

def convert_sarif(app_name, repo_context, sarif_files, findings_fname):
    """
    Method to convert sarif to findings json

    :param app_name: Application name
    :param sarif_file:
    :param findings_fname:
    :return:
    """
    finding_id = 1
    findings_list = []
    rule_id_owasp_cache = {}
    for sf in sarif_files:
        with open(sf, mode="r") as report_file:
            report_data = None
            try:
                report_data = json.loads(report_file.read())
                # skip this file if the data is empty
                if not report_data or not report_data.get("runs"):
                    continue
                # Iterate through all the runs
                for run in report_data["runs"]:
                    results = run.get("results")
                    if not results:
                        continue
                    tool_name = run.get("tool", {}).get("driver", {}).get("name")
                    rules = {
                        r["id"]: r
                        for r in run.get("tool", {}).get("driver", {}).get("rules")
                        if r and r.get("id")
                    }
                    for result in results:
                        rule_id = result.get("ruleId", "")
                        rule = rules.get(rule_id)
                        if not rule:
                            continue

                        owasp_category = rule_id_owasp_cache.get(rule_id, "")
                        if not owasp_category:
                            # Check the config for any available owasp category mapping
                            for rok, rov in config.get("rules_owasp_category").items():
                                if (
                                    rok.upper() == rule_id.upper()
                                    or rok.upper() in rule_id.upper()
                                ):
                                    rule_id_owasp_cache[rule_id] = rov
                                    owasp_category = rov
                        category = rule.get("name")
                        if not category:
                            category = rule_id
                        desc = get_help(
                            rule_id,
                            rule_obj=rule,
                            tool_name=tool_name,
                            owasp_category=owasp_category,
                        )
                        short_desc = rule.get("shortDescription", {}).get("text")
                        if not short_desc:
                            short_desc = result.get("message", {}).get("text")
                        ngsev = convert_severity(
                            result.get("properties", {})["issue_severity"]
                        )
                        # Populate tags
                        tags = []
                        if "CWE" in rule_id:
                            tags.append(
                                {
                                    "key": "cwe_category",
                                    "value": rule_id.replace("CWE-", ""),
                                    "shiftleft_managed": True,
                                }
                            )
                        if "CKV_" in rule_id or "CIS_" in rule_id or "AWS" in rule_id:
                            cis_rule = cis.get_rule(rule_id)
                            if cis_rule:
                                tags.append(
                                    {
                                        "key": "cis_category",
                                        "value": cis_rule.get("id", ""),
                                        "shiftleft_managed": False,
                                    }
                                )
                                if cis_rule.get("scored"):
                                    tags.append(
                                        {
                                            "key": "cis_status",
                                            "value": "SCORED",
                                            "shiftleft_managed": False,
                                        }
                                    )

                        for location in result.get("locations"):
                            filename = location["physicalLocation"]["artifactLocation"][
                                "uri"
                            ]
                            lineno = location.get("physicalLocation", {})["region"][
                                "startLine"
                            ]
                            end_lineno = location.get("physicalLocation", {})[
                                "contextRegion"
                            ]["endLine"]
                            finding = {
                                "app": app_name,
                                "type": "extscan",
                                "title": result.get("message", {})["text"],
                                "description": desc,
                                "internal_id": "{}/{}".format(
                                    rule_id,
                                    utils.calculate_line_hash(
                                        filename,
                                        lineno,
                                        end_lineno,
                                        location.get("physicalLocation", {})["region"][
                                            "snippet"
                                        ]["text"],
                                        short_desc,
                                    ),
                                ),
                                "severity": ngsev,
                                "owasp_category": owasp_category,
                                "category": category,
                                "details": {
                                    "repoContext": repo_context,
                                    "name": result.get("message", {})["text"],
                                    "tags": ",".join(rule["properties"]["tags"]),
                                    "fileName": filename,
                                    "DATA_TYPE": "OSS_SCAN",
                                    "lineNumber": lineno,
                                    "ruleId": rule_id,
                                    "ruleName": rule.get("name"),
                                    "contextText": location.get("physicalLocation", {})[
                                        "region"
                                    ]["snippet"]["text"],
                                    "snippetText": location.get("physicalLocation", {})[
                                        "contextRegion"
                                    ]["snippet"]["text"],
                                },
                                "tags": tags,
                            }
                            findings_list.append(finding)
                            finding_id = finding_id + 1
            except Exception as e:
                LOG.debug(e)
                continue

    with open(findings_fname, mode="w") as out_file:
        json.dump({"findings": findings_list}, out_file)

Example #27

0

Show file

File: inspect.py Project: robertp/sast-scan

def fetch_findings(app_name, version, report_fname):
    """
    Fetch findings from the NG SAST Cloud
    """
    sl_org = config.get("SHIFTLEFT_ORG_ID", config.get("SHIFTLEFT_ORGANIZATION_ID"))
    sl_org_token = config.get(
        "SHIFTLEFT_ORG_TOKEN", config.get("SHIFTLEFT_ORGANIZATION_TOKEN")
    )
    if not sl_org_token:
        sl_org_token = config.get("SHIFTLEFT_API_TOKEN")
    findings_api = config.get("SHIFTLEFT_VULN_API")
    findings_list = []
    if sl_org and sl_org_token:
        findings_api = findings_api % dict(
            sl_org=sl_org, app_name=app_name, version=version
        )
        query_obj = {
            "query": {
                "returnRuntimeData": False,
                "orderByDirection": "VULNERABILITY_ORDER_DIRECTION_DESC",
            }
        }
        headers = {
            "Content-Type": "application/json",
            "Authorization": "Bearer " + sl_org_token,
        }
        try:
            r = requests.post(findings_api, headers=headers, json=query_obj)
            if r.status_code == 200:
                findings_data = r.json()
                if findings_data:
                    findings_list += findings_data.get("vulnerabilities", [])
                    nextPageBookmark = findings_data.get("nextPageBookmark")
                    # Recurse and fetch all pages
                    while nextPageBookmark:
                        LOG.debug("Retrieving findings from next page")
                        r = requests.post(
                            findings_api,
                            headers=headers,
                            json={"pageBookmark": nextPageBookmark},
                        )
                        if r.status_code == 200:
                            findings_data = r.json()
                            if findings_data:
                                findings_list += findings_data.get(
                                    "vulnerabilities", []
                                )
                                nextPageBookmark = findings_data.get("nextPageBookmark")
                            else:
                                nextPageBookmark = None
                    with open(report_fname, mode="w") as rp:
                        json.dump({"vulnerabilities": findings_list}, rp)
                        LOG.debug(
                            "Data written to {}, {}".format(
                                report_fname, len(findings_list)
                            )
                        )
                return findings_list
            else:
                if not findings_list:
                    LOG.warning(
                        "Unable to retrieve any findings from NG SAST Cloud. Status {}".format(
                            r.status_code
                        )
                    )
                else:
                    LOG.debug(
                        "Unable to retrieve some findings from NG SAST Cloud. Proceeding with partial list. Status {}".format(
                            r.status_code
                        )
                    )
                return findings_list
        except Exception as e:
            LOG.error(e)
    else:
        return findings_list

Example #28

0

Show file

def execute_default_cmd(  # scan:ignore
    cmd_map_list,
    type_str,
    tool_name,
    src,
    reports_dir,
    convert,
    scan_mode,
    repo_context,
):
    """
    Method to execute default command for the given type

    Args:
      cmd_map_list Default commands in the form of a dict (multiple) or list
      type_str Project type
      tool_name Tool name
      src Project dir
      reports_dir Directory for output reports
      convert Boolean to enable normalisation of reports json
      scan_mode Scan mode string
      repo_context Repo context
    """
    # Check if there is a default command specified for the given type
    # Create the reports dir
    report_fname_prefix = os.path.join(reports_dir, tool_name + "-report")
    # Look for any additional direct arguments for the tool and inject them
    if config.get(tool_name + "_direct_args"):
        direct_args = config.get(tool_name + "_direct_args").split(" ")
        if direct_args:
            cmd_map_list += direct_args
    src_or_file = src
    if config.get("SHIFTLEFT_ANALYZE_FILE"):
        src_or_file = config.get("SHIFTLEFT_ANALYZE_FILE")
    default_cmd = " ".join(cmd_map_list) % dict(
        src=src,
        src_or_file=src_or_file,
        reports_dir=reports_dir,
        report_fname_prefix=report_fname_prefix,
        type=type_str,
        scan_mode=scan_mode,
    )
    # Try to detect if the output could be json
    outext = ".out"
    if "json" in default_cmd:
        outext = ".json"
    elif "csv" in default_cmd:
        outext = ".csv"
    elif "sarif" in default_cmd:
        outext = ".sarif"
    elif "xml" in default_cmd:
        outext = ".xml"
    report_fname = report_fname_prefix + outext

    # If the command doesn't support file output then redirect stdout automatically
    stdout = None
    if LOG.isEnabledFor(DEBUG):
        stdout = None
    if reports_dir and report_fname_prefix not in default_cmd:
        report_fname = report_fname_prefix + outext
        stdout = io.open(report_fname, "w")
        LOG.debug("Output will be written to {}".format(report_fname))

    # If the command is requesting list of files then construct the argument
    filelist_prefix = "(filelist="
    if default_cmd.find(filelist_prefix) > -1:
        si = default_cmd.find(filelist_prefix)
        ei = default_cmd.find(")", si + 10)
        ext = default_cmd[si + 10:ei]
        filelist = utils.find_files(src, ext)
        # Temporary fix for the yaml issue
        if ext == "yaml":
            yml_list = utils.find_files(src, "yml")
            if yml_list:
                filelist.extend(yml_list)
        delim = " "
        default_cmd = default_cmd.replace(filelist_prefix + ext + ")",
                                          delim.join(filelist))
    cmd_with_args = default_cmd.split(" ")
    # Suppress psalm output
    if should_suppress_output(type_str, cmd_with_args[0]):
        stdout = subprocess.DEVNULL
    exec_tool(tool_name, cmd_with_args, cwd=src, stdout=stdout)
    # Should we attempt to convert the report to sarif format
    if should_convert(convert, tool_name, cmd_with_args[0], report_fname):
        crep_fname = utils.get_report_file(tool_name,
                                           reports_dir,
                                           convert,
                                           ext_name="sarif")
        if (cmd_with_args[0] == "java" or "pmd-bin" in cmd_with_args[0]
                or "php" in tool_name):
            convertLib.convert_file(
                tool_name,
                cmd_with_args,
                src,
                report_fname,
                crep_fname,
            )
        else:
            convertLib.convert_file(
                cmd_with_args[0],
                cmd_with_args[1:],
                src,
                report_fname,
                crep_fname,
            )
        try:
            if not LOG.isEnabledFor(DEBUG):
                os.remove(report_fname)
        except Exception:
            LOG.debug("Unable to remove file {}".format(report_fname))
    elif type_str == "depscan":
        # Convert depscan and license scan files to html
        depscan_files = utils.find_files(reports_dir, "depscan", True)
        for df in depscan_files:
            if not df.endswith(".html"):
                depscan_data = grafeas.parse(df)
                if depscan_data and len(depscan_data):
                    html_fname = df.replace(".json", ".html")
                    grafeas.render_html(depscan_data, html_fname)
                    track({
                        "id": config.get("run_uuid"),
                        "depscan_summary": depscan_data
                    })
                    LOG.debug(
                        "Depscan and HTML report written to file: %s, %s :thumbsup:",
                        df,
                        html_fname,
                    )
        licence_files = utils.find_files(reports_dir, "license", True)
        for lf in licence_files:
            if not lf.endswith(".html"):
                licence_data = licence.parse(lf)
                if licence_data and len(licence_data):
                    html_fname = lf.replace(".json", ".html")
                    licence.render_html(licence_data, html_fname)
                    track({
                        "id": config.get("run_uuid"),
                        "license_summary": licence_data
                    })
                    LOG.debug(
                        "License check and HTML report written to file: %s, %s :thumbsup:",
                        lf,
                        html_fname,
                    )

Example #29

0

Show file

def summary(sarif_files,
            depscan_files=None,
            aggregate_file=None,
            override_rules={}):
    """Generate overall scan summary based on the generated
    SARIF file

    :param sarif_files: List of generated sarif report files
    :param aggregate_file: Filename to store aggregate data
    :param override_rules Build break rules to override for testing
    :returns dict representing the summary
    """
    report_summary = {}
    build_status = "pass"
    # This is the list of all runs which will get stored as an aggregate
    run_data_list = []
    default_rules = config.get("build_break_rules").get("default")
    depscan_default_rules = config.get("build_break_rules").get("depscan")
    # Collect stats from depscan files if available
    if depscan_files:
        for df in depscan_files:
            with open(df, mode="r") as drep_file:
                dep_data = get_depscan_data(drep_file)
                if not dep_data:
                    continue
                # depscan-java or depscan-nodejs based on filename
                dep_type = (os.path.basename(df).replace(".json", "").replace(
                    "-report", ""))
                metrics, required_pkgs_found = calculate_depscan_metrics(
                    dep_data)
                report_summary[dep_type] = {
                    "tool":
                    f"""Dependency Scan ({dep_type.replace("depscan-", "")})""",
                    "critical": metrics["critical"],
                    "high": metrics["high"],
                    "medium": metrics["medium"],
                    "low": metrics["low"],
                    "status": ":white_heavy_check_mark:",
                }
                report_summary[dep_type].pop("total", None)
                # Compare against the build break rule to determine status
                dep_tool_rules = config.get("build_break_rules").get(
                    dep_type, {})
                build_break_rules = {**depscan_default_rules, **dep_tool_rules}
                if override_rules and override_rules.get("depscan"):
                    build_break_rules = {
                        **build_break_rules,
                        **override_rules.get("depscan"),
                    }
                # Default severity categories for build status
                build_status_categories = (
                    "critical",
                    "required_critical",
                    "optional_critical",
                    "high",
                    "required_high",
                    "optional_high",
                    "medium",
                    "required_medium",
                    "optional_medium",
                    "low",
                    "required_low",
                    "optional_low",
                )
                # Issue 233 - Consider only required packages if available
                if required_pkgs_found:
                    build_status_categories = (
                        "required_critical",
                        "required_high",
                        "required_medium",
                        "required_low",
                    )
                for rsev in build_status_categories:
                    if build_break_rules.get("max_" + rsev) is not None:
                        if metrics.get(rsev) > build_break_rules["max_" +
                                                                 rsev]:
                            report_summary[dep_type]["status"] = ":cross_mark:"
                            build_status = "fail"

    for sf in sarif_files:
        with open(sf, mode="r") as report_file:
            report_data = json.load(report_file)
            # skip this file if the data is empty
            if not report_data or not report_data.get("runs"):
                LOG.warn("Report file {} is invalid. Skipping ...".format(sf))
                continue
            # Iterate through all the runs
            for run in report_data["runs"]:
                # Add it to the run data list for aggregation
                run_data_list.append(run)
                tool_desc = run["tool"]["driver"]["name"]
                tool_name = tool_desc
                # Initialise
                report_summary[tool_name] = {
                    "tool": tool_desc,
                    "critical": 0,
                    "high": 0,
                    "medium": 0,
                    "low": 0,
                    "status": ":white_heavy_check_mark:",
                }
                results = run.get("results", [])
                metrics = run.get("properties", {}).get("metrics", None)
                # If the result includes metrics use it. If not compute it
                if metrics:
                    report_summary[tool_name].update(metrics)
                    report_summary[tool_name].pop("total", None)
                else:
                    for aresult in results:
                        sev = aresult["properties"]["issue_severity"].lower()
                        report_summary[tool_name][sev] += 1
                # Compare against the build break rule to determine status
                tool_rules = config.get("build_break_rules").get(tool_name, {})
                build_break_rules = {
                    **default_rules,
                    **tool_rules,
                    **override_rules
                }
                for rsev in ("critical", "high", "medium", "low"):
                    if build_break_rules.get("max_" + rsev) is not None:
                        if (report_summary.get(tool_name).get(rsev) >
                                build_break_rules["max_" + rsev]):
                            report_summary[tool_name][
                                "status"] = ":cross_mark:"
                            build_status = "fail"

    # Should we store the aggregate data
    if aggregate_file:
        # agg_sarif_file = aggregate_file.replace(".json", ".sarif")
        # aggregate.sarif_aggregate(run_data_list, agg_sarif_file)
        aggregate.jsonl_aggregate(run_data_list, aggregate_file)
        LOG.debug("Aggregate report written to {}\n".format(aggregate_file))
    return report_summary, build_status

Example #30

0

Show file

def exec_tool(  # scan:ignore
        tool_name,
        args,
        cwd=None,
        env=utils.get_env(),
        stdout=subprocess.DEVNULL):
    """
    Convenience method to invoke cli tools

    Args:
      tool_name Tool name
      args cli command and args
      cwd Current working directory
      env Environment variables
      stdout stdout configuration for run command

    Returns:
      CompletedProcess instance
    """
    with Progress(
            console=console,
            redirect_stderr=False,
            redirect_stdout=False,
            refresh_per_second=1,
    ) as progress:
        task = None
        try:
            env = use_java(env)
            LOG.debug('⚡︎ Executing {} "{}"'.format(tool_name, " ".join(args)))
            stderr = subprocess.DEVNULL
            if LOG.isEnabledFor(DEBUG):
                stderr = subprocess.STDOUT
            tool_verb = "Scanning with"
            if "init" in tool_name:
                tool_verb = "Initializing"
            elif "build" in tool_name:
                tool_verb = "Building with"
            task = progress.add_task("[green]" + tool_verb + " " + tool_name,
                                     total=100,
                                     start=False)
            cp = subprocess.run(
                args,
                stdout=stdout,
                stderr=stderr,
                cwd=cwd,
                env=env,
                check=False,
                shell=False,
                encoding="utf-8",
            )
            if cp and stdout == subprocess.PIPE:
                for line in cp.stdout:
                    progress.update(task, completed=5)
            if (cp and LOG.isEnabledFor(DEBUG) and cp.returncode
                    and cp.stdout is not None):
                LOG.debug(cp.stdout)
            progress.update(task, completed=100, total=100)
            return cp
        except Exception as e:
            if task:
                progress.update(task, completed=20, total=10, visible=False)
            if not LOG.isEnabledFor(DEBUG):
                LOG.info(
                    f"{tool_name} has reported few errors. To view, pass the environment variable SCAN_DEBUG_MODE=debug"
                )
            LOG.debug(e)
            return None