def _analyze_yaml_file(self, file, filename): """ :returns: same format as super().analyze() """ if os.path.splitext(filename)[1] not in YAML_EXTENSIONS: # The yaml parser is pretty powerful. It eagerly # parses things when it's not even a yaml file. Therefore, # we use this heuristic to quit early if appropriate. raise yaml.YAMLError parser = YamlFileParser( file, exclude_lines_regex=self.exclude_lines_regex, ) data = parser.json() ignored_lines = parser.get_ignored_lines() potential_secrets = {} to_search = [data] with self.non_quoted_string_regex(): while len(to_search) > 0: item = to_search.pop() try: if '__line__' in item and not item[ '__line__'] in ignored_lines: potential_secrets.update( self.analyze_string( item['__value__'], item['__line__'], filename, ), ) if '__line__' in item: continue for key in item: obj = item[key] if isinstance(item, dict) else key if isinstance(obj, dict): to_search.append(obj) except TypeError: pass return potential_secrets
def test_get_ignored_lines(self): content = """keyA: value keyB: \"another_value\" # pragma: whitelist secret keyC: yet_another_value """ f = mock_file_object(content) ignored_lines = YamlFileParser(f).get_ignored_lines() assert ignored_lines == {2}
def test_get_ignored_lines(self): content = """keyA: value keyB: \"another_value\" # pragma: allowlist secret keyC: \"another_value\" # pragma: whitelist secret (backwards compatibility test) keyD: yet_another_value """ f = mock_file_object(content) ignored_lines = YamlFileParser(f).get_ignored_lines() assert ignored_lines == {2, 3}
def test_possible_secret_format( self, yaml_value, expected_value, expected_is_binary, ): content = 'key: {yaml_value}'.format(yaml_value=yaml_value) f = mock_file_object(content) result = YamlFileParser(f).json() assert result['key'] == { '__value__': expected_value, '__is_binary__': expected_is_binary, '__line__': mock.ANY, }
def _analyze_yaml_file(self, file, filename): """ :returns: same format as super().analyze() """ if determine_file_type(filename) != FileType.YAML: # The yaml parser is pretty powerful. It eagerly # parses things when it's not even a yaml file. Therefore, # we use this heuristic to quit early if appropriate. raise yaml.YAMLError parser = YamlFileParser( file, exclude_lines_regex=self.exclude_lines_regex, ) data = parser.json() # If the file is all comments if not data: raise yaml.YAMLError ignored_lines = parser.get_ignored_lines() potential_secrets = {} to_search = [data] with self.non_quoted_string_regex(): while len(to_search) > 0: item = to_search.pop() if '__line__' not in item: for key in item: obj = item[key] if isinstance(item, dict) else key if isinstance(obj, dict): to_search.append(obj) continue if item['__line__'] in ignored_lines: continue # An isinstance check doesn't work in py2 # so we need the __is_binary__ field. string_to_scan = (self.decode_binary(item['__value__']) if item['__is_binary__'] else item['__value__']) secrets = self.analyze_string_content( string_to_scan, item['__line__'], filename, ) if item['__is_binary__']: secrets = self._encode_yaml_binary_secrets(secrets) dumped_key_value = yaml.dump({ item['__original_key__']: item['__value__'], }).replace('\n', '') secrets = self._filter_false_positives_with_line_ctx( secrets, dumped_key_value, ) potential_secrets.update(secrets) return potential_secrets