def rule_match_nosem(rule_match: RuleMatch, strict: bool) -> Tuple[bool, List[SemgrepError]]: if not rule_match.lines: return False, [] # Only consider the first line of a match. This will keep consistent # behavior on where we expect a 'nosem' comment to exist. If we allow these # comments on any line of a match it will get confusing as to what finding # the 'nosem' is referring to. re_match = NOSEM_INLINE_RE.search(rule_match.lines[0]) if re_match is None: return False, [] ids_str = re_match.groupdict()["ids"] if ids_str is None: logger.verbose( f"found 'nosem' comment, skipping rule '{rule_match.id}' on line {rule_match.start['line']}" ) return True, [] # Strip quotes to allow for use of nosem as an HTML attribute inside tags. # HTML comments inside tags are not allowed by the spec. pattern_ids = { pattern_id.strip().strip("\"'") for pattern_id in COMMA_SEPARATED_LIST_RE.split(ids_str) if pattern_id.strip() } # Filter out ids that are not alphanum+dashes+underscores+periods. # This removes trailing symbols from comments, such as HTML comments `-->` # or C-like multiline comments `*/`. pattern_ids = set( filter(lambda x: not sub(r"[\w\-\.]+", "", x), pattern_ids)) errors = [] result = False for pattern_id in pattern_ids: if rule_match.id == pattern_id: logger.verbose( f"found 'nosem' comment with id '{pattern_id}', skipping rule '{rule_match.id}' on line {rule_match.start['line']}" ) result = result or True else: message = f"found 'nosem' comment with id '{pattern_id}', but no corresponding rule trying '{rule_match.id}'" if strict: errors.append(SemgrepError(message, level=Level.WARN)) else: logger.verbose(message) return result, errors
def rule_match_nosem(rule_match: RuleMatch, strict: bool) -> bool: if not rule_match.lines: return False # Only consider the first line of a match. This will keep consistent # behavior on where we expect a 'nosem' comment to exist. If we allow these # comments on any line of a match it will get confusing as to what finding # the 'nosem' is referring to. re_match = NOSEM_INLINE_RE.search(rule_match.lines[0]) if re_match is None: return False ids_str = re_match.groupdict()["ids"] if ids_str is None: logger.debug( f"found 'nosem' comment, skipping rule '{rule_match.id}' on line {rule_match.start['line']}" ) return True pattern_ids = { pattern_id.strip() for pattern_id in COMMA_SEPARATED_LIST_RE.split(ids_str) if pattern_id.strip() } result = False for pattern_id in pattern_ids: if rule_match.id == pattern_id: logger.debug( f"found 'nosem' comment with id '{pattern_id}', skipping rule '{rule_match.id}' on line {rule_match.start['line']}" ) result = result or True else: message = f"found 'nosem' comment with id '{pattern_id}', but no corresponding rule trying '{rule_match.id}'" if strict: raise SemgrepError(message) else: logger.debug(message) return result