Example #1
0
    def _analyze_yaml_file(self, file, filename):
        """
        :returns: same format as super().analyze()
        """
        if os.path.splitext(filename)[1] not in YAML_EXTENSIONS:
            # The yaml parser is pretty powerful. It eagerly
            # parses things when it's not even a yaml file. Therefore,
            # we use this heuristic to quit early if appropriate.
            raise yaml.YAMLError

        parser = YamlFileParser(
            file,
            exclude_lines_regex=self.exclude_lines_regex,
        )
        data = parser.json()
        ignored_lines = parser.get_ignored_lines()
        potential_secrets = {}

        to_search = [data]
        with self.non_quoted_string_regex():
            while len(to_search) > 0:
                item = to_search.pop()

                try:
                    if '__line__' in item and not item[
                            '__line__'] in ignored_lines:
                        potential_secrets.update(
                            self.analyze_string(
                                item['__value__'],
                                item['__line__'],
                                filename,
                            ), )

                    if '__line__' in item:
                        continue

                    for key in item:
                        obj = item[key] if isinstance(item, dict) else key
                        if isinstance(obj, dict):
                            to_search.append(obj)
                except TypeError:
                    pass

        return potential_secrets
    def test_get_ignored_lines(self):
        content = """keyA: value
        keyB: \"another_value\"  # pragma: whitelist secret
        keyC: yet_another_value
        """

        f = mock_file_object(content)

        ignored_lines = YamlFileParser(f).get_ignored_lines()

        assert ignored_lines == {2}
    def test_get_ignored_lines(self):
        content = """keyA: value
        keyB: \"another_value\"  # pragma: allowlist secret
        keyC: \"another_value\"  # pragma: whitelist secret (backwards compatibility test)
        keyD: yet_another_value
        """

        f = mock_file_object(content)

        ignored_lines = YamlFileParser(f).get_ignored_lines()

        assert ignored_lines == {2, 3}
Example #4
0
    def test_possible_secret_format(
        self,
        yaml_value,
        expected_value,
        expected_is_binary,
    ):
        content = 'key: {yaml_value}'.format(yaml_value=yaml_value)
        f = mock_file_object(content)

        result = YamlFileParser(f).json()
        assert result['key'] == {
            '__value__': expected_value,
            '__is_binary__': expected_is_binary,
            '__line__': mock.ANY,
        }
Example #5
0
    def _analyze_yaml_file(self, file, filename):
        """
        :returns: same format as super().analyze()
        """
        if determine_file_type(filename) != FileType.YAML:
            # The yaml parser is pretty powerful. It eagerly
            # parses things when it's not even a yaml file. Therefore,
            # we use this heuristic to quit early if appropriate.
            raise yaml.YAMLError

        parser = YamlFileParser(
            file,
            exclude_lines_regex=self.exclude_lines_regex,
        )
        data = parser.json()
        # If the file is all comments
        if not data:
            raise yaml.YAMLError

        ignored_lines = parser.get_ignored_lines()
        potential_secrets = {}

        to_search = [data]
        with self.non_quoted_string_regex():
            while len(to_search) > 0:
                item = to_search.pop()

                if '__line__' not in item:
                    for key in item:
                        obj = item[key] if isinstance(item, dict) else key
                        if isinstance(obj, dict):
                            to_search.append(obj)
                    continue

                if item['__line__'] in ignored_lines:
                    continue

                # An isinstance check doesn't work in py2
                # so we need the __is_binary__ field.
                string_to_scan = (self.decode_binary(item['__value__']) if
                                  item['__is_binary__'] else item['__value__'])

                secrets = self.analyze_string_content(
                    string_to_scan,
                    item['__line__'],
                    filename,
                )

                if item['__is_binary__']:
                    secrets = self._encode_yaml_binary_secrets(secrets)

                dumped_key_value = yaml.dump({
                    item['__original_key__']:
                    item['__value__'],
                }).replace('\n', '')

                secrets = self._filter_false_positives_with_line_ctx(
                    secrets,
                    dumped_key_value,
                )

                potential_secrets.update(secrets)

        return potential_secrets