コード例 #1
0
    def test_saves_to_baseline():
        # We create an empty baseline, with customized settings.
        # This way, we expect the engine to use the settings configured by the baseline,
        # but have the results replaced by the new scan.
        with transient_settings({
                'plugins_used': [
                    {
                        'name': 'Base64HighEntropyString',
                        'limit': 4.5,
                    },
                ],
        }):
            secrets = SecretsCollection()
            old_secrets = baseline.format_for_output(secrets)

        with mock_printer(
                main_module) as printer, tempfile.NamedTemporaryFile() as f:
            baseline.save_to_file(old_secrets, f.name)
            f.seek(0)

            # We also test setting the root directory through this test.
            main_module.main(['scan', 'test_data', '--baseline', f.name])

            f.seek(0)
            new_secrets = json.loads(f.read())
            assert not secrets.exactly_equals(
                baseline.load(new_secrets, f.name))
            assert new_secrets['plugins_used'] == [
                {
                    'name': 'Base64HighEntropyString',
                    'limit': 4.5,
                },
            ]
            assert not printer.message
コード例 #2
0
def get_secrets_from_baseline(baseline, filter_func=lambda secret: True):
    """
    :type baseline: SecretsCollection
    :param baseline: SecretsCollection of baseline results.
                     This will be updated accordingly (by reference)

    :type filter_func: function
    :param filter_func: the function to filter on secret. If not supplied
                        return all secrets

    :rtype: SecretsCollection
    :returns: SecretsCollection of non-audited results
    """
    if not isinstance(filter_func, types.FunctionType):
        return baseline

    new_secrets = SecretsCollection()
    for filename in baseline.data:
        # The __hash__ method of PotentialSecret makes this work
        filtered_results = {
            secret: secret
            for secret in baseline.data[filename] if filter_func(secret)
        }

        if filtered_results:
            new_secrets.data[filename] = filtered_results

    return new_secrets
コード例 #3
0
def secrets_collection_factory(secrets=None,
                               plugins=(),
                               exclude_regex=''):  # pragma: no cover
    """
    :type secrets: list(dict)
    :param secrets: list of params to pass to add_secret.
                    Eg. [ {'secret': 'blah'}, ]

    :type plugins: tuple
    :type exclude_regex: str

    :rtype: SecretsCollection
    """
    collection = SecretsCollection(plugins, exclude_regex)

    if plugins:
        collection.plugins = plugins

    # Handle secrets
    if secrets is None:
        return collection

    for kwargs in secrets:
        _add_secret(collection, **kwargs)

    return collection
コード例 #4
0
def test_ensure_file_transformers_are_used(printer):
    """
    In this tests, we construct a situation where detect-secrets scan leverages special
    file transformers in order to find a secret, that wouldn't otherwise be found with
    normal line-by-line reading. In doing so, if audit is able to find this secret, it
    can be inferred that it too knows how to use file transformers.
    """
    with transient_settings({
            'plugins_used': [
                {
                    'name': 'Base64HighEntropyString'
                },
            ],
    }):
        secrets = SecretsCollection()
        secrets.scan_file('test_data/config.env')
        assert bool(secrets)

    with open('test_data/config.env') as f:
        lines = [line.rstrip() for line in f.readlines()]

    with mock.patch('detect_secrets.audit.io.print_secret_not_found') as m:
        run_logic(secrets, 'y')
        assert not m.called

    line_number = list(secrets['test_data/config.env'])[0].line_number
    assert lines[line_number - 1] in printer.message
コード例 #5
0
    def test_main_scan_repo_scan_success_secrets_found(self, mock_file,
                                                       mock_scan, mock_log):
        mock_file.return_value = {
            'sha': 'does_not_matter',
            'repo': 'repo_name',
            'plugins': {
                'base64_limit': 3,
            },
            'cron': '* * * * *',
            'baseline_file': '.secrets.baseline',
        }

        mock_secret_collection = SecretsCollection()
        mock_secret_collection.data['junk'] = 'data'
        mock_scan.return_value = mock_secret_collection

        with mock.patch('detect_secrets_server.usage.ExternalHook') as hook, \
                mock.patch('detect_secrets_server.repos.base_tracked_repo.BaseTrackedRepo.update') as update, \
                mock.patch('detect_secrets.core.secrets_collection.SecretsCollection.json') as secrets_json:
            assert main([
                '--scan-repo',
                'will-be-mocked',
                '--output-hook',
                'examples/standalone_hook.py',
            ]) == 0

            assert update.call_count == 0
            assert hook().alert.call_count == 1
            assert secrets_json.call_count == 1
コード例 #6
0
def test_merge():
    old_secrets = SecretsCollection()
    old_secrets.scan_file('test_data/each_secret.py')
    assert len(list(old_secrets)) >= 3  # otherwise, this test won't work.

    index = 0
    for _, secret in old_secrets:
        if index == 0:
            secret.is_secret = False
        elif index == 1:
            secret.is_secret = True
        elif index == 2:
            secret.is_verified = True

        index += 1

    new_secrets = SecretsCollection()
    new_secrets.scan_file('test_data/each_secret.py')
    list(new_secrets)[-1][1].is_secret = True

    new_secrets.merge(old_secrets)

    index = 0
    for _, secret in new_secrets:
        if index == 0:
            assert secret.is_secret is False
            assert secret.is_verified is False
        elif index == 1:
            assert secret.is_secret is True
            assert secret.is_verified is False
        elif index == 2:
            assert secret.is_secret is True
            assert secret.is_verified is True

        index += 1
コード例 #7
0
def initialize(plugins, exclude_regex=None, rootdir='.'):
    """Scans the entire codebase for high entropy strings, and returns a
    SecretsCollection object.

    :type plugins: tuple of detect_secrets.plugins.base.BasePlugin
    :param plugins: rules to initialize the SecretsCollection with.

    :type exclude_regex: str|None
    :type rootdir: str

    :rtype: SecretsCollection
    """
    output = SecretsCollection(plugins, exclude_regex)

    if os.path.isfile(rootdir):
        # This option allows for much easier adhoc usage.
        git_files = [rootdir]
    else:
        git_files = _get_git_tracked_files(rootdir)

    if not git_files:
        return output

    if exclude_regex:
        regex = re.compile(exclude_regex, re.IGNORECASE)
        git_files = filter(
            lambda x: not regex.search(x),
            git_files,
        )

    for file in git_files:
        output.scan_file(file)

    return output
コード例 #8
0
def secrets_collection_factory(secrets=None,
                               plugins=(),
                               exclude_files_regex=None):
    """
    :type secrets: list(dict)
    :param secrets: list of params to pass to add_secret.
                    E.g. [ {'secret': 'blah'}, ]

    :type plugins: tuple
    :type exclude_files_regex: str|None

    :rtype: SecretsCollection
    """
    collection = SecretsCollection(
        plugins,
        exclude_files=exclude_files_regex,
    )

    if plugins:
        for plugin in plugins:
            # We don't want to incur network calls during test cases
            plugin.should_verify = False

        collection.plugins = plugins

    # Handle secrets
    if secrets is None:
        return collection

    for kwargs in secrets:
        _add_secret(collection, **kwargs)

    return collection
コード例 #9
0
def test_baseline_filters_out_known_secrets():
    secrets = SecretsCollection()
    secrets.scan_file('test_data/each_secret.py')

    with tempfile.NamedTemporaryFile() as f:
        baseline.save_to_file(secrets, f.name)
        f.seek(0)

        # This succeeds, because all the secrets are known.
        assert_commit_succeeds([
            'test_data/each_secret.py',
            '--baseline',
            f.name,
        ])

    # Remove one arbitrary secret, so that it won't be the full set.
    secrets.data['test_data/each_secret.py'].pop()

    with tempfile.NamedTemporaryFile() as f:
        baseline.save_to_file(secrets, f.name)
        f.seek(0)

        # Test that it isn't the case that a baseline is provided, and everything passes.
        # import pdb; pdb.set_trace()
        assert_commit_blocked([
            'test_data/each_secret.py',
            '--baseline',
            f.name,
        ])
コード例 #10
0
ファイル: baseline.py プロジェクト: soopsio/detect-secrets
def initialize(plugins, exclude_regex=None, rootdir='.'):
    """Scans the entire codebase for high entropy strings, and returns a
    SecretsCollection object.

    :type plugins: tuple of detect_secrets.plugins.base.BasePlugin
    :param plugins: rules to initialize the SecretsCollection with.

    :type exclude_regex: str|None
    :type rootdir: str

    :rtype: SecretsCollection
    """
    output = SecretsCollection(plugins, exclude_regex)

    if os.path.isfile(rootdir):
        # This option allows for much easier adhoc usage.
        git_files = [rootdir]
    else:
        git_files = _get_git_tracked_files(rootdir)

    if not git_files:
        return output

    if exclude_regex:
        regex = re.compile(exclude_regex, re.IGNORECASE)
        git_files = filter(
            lambda x: not regex.search(x),
            git_files,
        )

    for file in git_files:
        output.scan_file(file)

    return output
コード例 #11
0
    def modified_baseline(self):
        secrets = SecretsCollection()
        secrets.scan_file(self.FILENAME)
        for _, secret in secrets:
            secret.line_number += 1

        yield secrets
コード例 #12
0
 def test_load_baseline_without_any_valid_fields(self, mock_log):
     with pytest.raises(IOError):
         SecretsCollection.load_baseline_from_string(
             json.dumps({
                 'junk': 'dictionary',
             }), )
     assert mock_log.error_messages == 'Incorrectly formatted baseline!\n'
コード例 #13
0
    def test_load_baseline_with_invalid_input(self, mock_log):
        with pytest.raises(IOError):
            SecretsCollection.load_baseline_from_string(
                json.dumps({
                    'junk': 'dictionary',
                }), )

        assert mock_log.getLogger().error.called
コード例 #14
0
    def test_no_modifications(base_state, scanned_results):
        secrets = SecretsCollection.load_from_baseline({'results': base_state})
        results = SecretsCollection.load_from_baseline(
            {'results': scanned_results})

        secrets.trim(results)

        assert secrets.json() == base_state
コード例 #15
0
 def test_load_baseline_without_exclude(self, mock_log):
     with pytest.raises(IOError):
         SecretsCollection.load_baseline_from_string(
             json.dumps({
                 'plugins_used': (),
                 'results': {},
             }), )
     assert mock_log.error_messages == 'Incorrectly formatted baseline!\n'
コード例 #16
0
def test_file_no_longer_exists(printer, mock_user_decision):
    secretsA = SecretsCollection()
    secretsA['fileB'].add(potential_secret_factory('a'))

    secretsB = SecretsCollection()
    secretsB['fileA'].add(potential_secret_factory('a'))

    run_logic(secretsA, secretsB)
    assert not mock_user_decision.called
コード例 #17
0
def test_basic(file_content):
    with tempfile.NamedTemporaryFile() as f:
        f.write(file_content.encode())
        f.seek(0)

        secrets = SecretsCollection()
        secrets.scan_file(f.name)

    assert len(list(secrets)) == 1
コード例 #18
0
    def test_load_baseline_with_invalid_input(self, mock_log):
        with pytest.raises(IOError):
            SecretsCollection.load_baseline_from_string(
                json.dumps({
                    'junk': 'dictionary',
                }),
            )

        assert mock_log.error_messages == 'Incorrectly formatted baseline!\n'
コード例 #19
0
def test_bool():
    secrets = SecretsCollection()
    assert not secrets

    secrets.scan_file('test_data/each_secret.py')
    assert secrets

    secrets['test_data/each_secret.py'].clear()
    assert not secrets
コード例 #20
0
def initialize(
    path,
    plugins,
    exclude_files_regex=None,
    exclude_lines_regex=None,
    should_scan_all_files=False,
):
    """Scans the entire codebase for secrets, and returns a
    SecretsCollection object.

    :type plugins: tuple of detect_secrets.plugins.base.BasePlugin
    :param plugins: rules to initialize the SecretsCollection with.

    :type exclude_files_regex: str|None
    :type exclude_lines_regex: str|None
    :type path: list
    :type should_scan_all_files: bool

    :rtype: SecretsCollection
    """
    output = SecretsCollection(
        plugins,
        exclude_files=exclude_files_regex,
        exclude_lines=exclude_lines_regex,
    )

    files_to_scan = []
    for element in path:
        if os.path.isdir(element):
            if should_scan_all_files:
                files_to_scan.extend(_get_files_recursively(element))
            else:
                files = _get_git_tracked_files(element)
                if files:
                    files_to_scan.extend(files)
        elif os.path.isfile(element):
            files_to_scan.append(element)
        else:
            log.error('detect-secrets: ' + element + ': No such file or directory')

    if not files_to_scan:
        return output

    if exclude_files_regex:
        exclude_files_regex = re.compile(exclude_files_regex, re.IGNORECASE)
        files_to_scan = filter(
            lambda file: (
                not exclude_files_regex.search(file)
            ),
            files_to_scan,
        )

    for file in files_to_scan:
        output.scan_file(file)

    return output
コード例 #21
0
    def test_deleted_secret_file():
        secrets = SecretsCollection()
        secrets.scan_file('test_data/each_secret.py')

        secrets.trim(SecretsCollection())
        assert secrets

        secrets.trim(SecretsCollection(),
                     filelist=['test_data/each_secret.py'])
        assert not secrets
コード例 #22
0
    def test_strict_equality():
        secret = potential_secret_factory()
        secretsA = SecretsCollection()
        secretsA[secret.filename].add(secret)

        secret = potential_secret_factory(line_number=2)
        secretsB = SecretsCollection()
        secretsB[secret.filename].add(secret)

        assert secretsA == secretsB
        assert not secretsA.exactly_equals(secretsB)
コード例 #23
0
def find_secrets_in_files(args, plugins):
    collection = SecretsCollection(plugins)

    for filename in args.filenames:
        # Don't scan the baseline file
        if filename == args.baseline[0]:
            continue

        collection.scan_file(filename)

    return collection
コード例 #24
0
    def scan(self, exclude_files_regex=None, exclude_lines_regex=None, scan_head=False):
        """Fetches latest changes, and scans the git diff between last_commit_hash
        and HEAD.

        :raises: subprocess.CalledProcessError

        :type exclude_files_regex: str|None
        :param exclude_files_regex: A regex matching filenames to skip over.

        :type exclude_lines: str|None
        :param exclude_lines: A regex matching lines to skip over.

        :rtype: SecretsCollection
        :returns: secrets found.
        """
        self.storage.fetch_new_changes()

        default_plugins = initialize_plugins.from_parser_builder(
            self.plugin_config,
            exclude_lines_regex=exclude_lines_regex,
        )
        # TODO Issue 17: Ignoring self.exclude_regex, using the server scan CLI arg
        secrets = SecretsCollection(
            plugins=default_plugins,
            exclude_files=exclude_files_regex,
            exclude_lines=exclude_lines_regex,
        )

        scan_from_this_commit = git.get_empty_tree_commit_hash() if scan_head else self.last_commit_hash
        try:
            diff_name_only = self.storage.get_diff_name_only(scan_from_this_commit)

            # do a per-file diff + scan so we don't get a OOM if the the commit-diff is too large
            for filename in diff_name_only:
                file_diff = self.storage.get_diff(scan_from_this_commit, filename)

                secrets.scan_diff(
                    file_diff,
                    baseline_filename=self.baseline_filename,
                    last_commit_hash=scan_from_this_commit,
                    repo_name=self.name,
                )
        except subprocess.CalledProcessError:
            self.update()
            return secrets

        if self.baseline_filename:
            baseline = self.storage.get_baseline_file(self.baseline_filename)
            if baseline:
                baseline_collection = SecretsCollection.load_baseline_from_string(baseline)
                secrets = get_secrets_not_in_baseline(secrets, baseline_collection)

        return secrets
コード例 #25
0
def find_secrets_in_files(args):
    plugins = initialize.from_parser_builder(args.plugins)
    collection = SecretsCollection(plugins)

    for filename in args.filenames:
        if filename == args.baseline[0]:
            # Obviously, don't detect the baseline file
            continue

        collection.scan_file(filename)

    return collection
コード例 #26
0
    def test_load_baseline_from_file_fails_early_on_bad_filename(
            self, mock_log):
        with mock.patch.object(SecretsCollection, 'load_baseline_from_string') as \
                mock_load_baseline_from_string, \
                mock_open('will_throw_error') as mock_file:
            mock_file().read.side_effect = MockUnicodeDecodeError

            with pytest.raises(UnicodeDecodeError):
                SecretsCollection.load_baseline_from_file('does_not_matter')

            assert not mock_load_baseline_from_string.called
            assert mock_log.getLogger().error.called
コード例 #27
0
def find_secrets_in_files(args):
    plugins = initialize.from_parser_builder(args.plugins)
    collection = SecretsCollection(plugins)

    for filename in args.filenames:
        if filename == args.baseline[0]:
            # Obviously, don't detect the baseline file
            continue

        collection.scan_file(filename)

    return collection
コード例 #28
0
def main(argv: Optional[List[str]] = None) -> int:
    try:
        args = parse_args(argv)
    except ValueError:
        return 1

    if args.verbose:  # pragma: no cover
        log.set_debug_level(args.verbose)

    # Find all secrets in files to be committed
    secrets = SecretsCollection()
    for filename in args.filenames:
        secrets.scan_file(filename)

    new_secrets = secrets
    if args.baseline:
        new_secrets = secrets - args.baseline

    if new_secrets:
        pretty_print_diagnostics(new_secrets)
        return 1

    if not args.baseline:
        return 0

    # Only attempt baseline modifications if we don't find any new secrets.
    is_modified = should_update_baseline(
        args.baseline,
        scanned_results=secrets,
        filelist=args.filenames,
        baseline_version=args.baseline_version,
    )

    if is_modified:
        if args.baseline_version != VERSION:
            with open(args.baseline_filename) as f:
                old_baseline = json.loads(f.read())

            # Override the results, because this has been updated in `should_update_baseline`.
            old_baseline['results'] = args.baseline.json()

            args.baseline = baseline.upgrade(old_baseline)

        baseline.save_to_file(args.baseline, filename=args.baseline_filename)
        print(
            'The baseline file was updated.\n'
            'Probably to keep line numbers of secrets up-to-date.\n'
            'Please `git add {}`, thank you.\n\n'.format(
                args.baseline_filename), )
        return 3

    return 0
コード例 #29
0
    def get_baseline_file(self, formatter=baseline.format_for_output):
        secrets = SecretsCollection()
        secrets.scan_file(self.FILENAME)

        with tempfile.NamedTemporaryFile() as f:
            with mock.patch('detect_secrets.core.baseline.VERSION', '0.0.1'):
                data = formatter(secrets)

            # Simulating old version
            data['plugins_used'][0]['base64_limit'] = data['plugins_used'][0].pop('limit')
            baseline.save_to_file(data, f.name)

            yield f
コード例 #30
0
    def test_maintains_labels():
        labelled_secrets = SecretsCollection()
        labelled_secrets.scan_file('test_data/each_secret.py')
        for _, secret in labelled_secrets:
            secret.is_secret = True
            break

        secrets = SecretsCollection()
        secrets.scan_file('test_data/each_secret.py')

        labelled_secrets.trim(scanned_results=secrets)

        assert any([secret.is_secret for _, secret in labelled_secrets])
コード例 #31
0
    def test_basic(configure_plugins):
        with transient_settings({**configure_plugins, 'filters_used': []}):
            secrets = SecretsCollection()
            secrets.scan_file('test_data/each_secret.py')

        # This baseline will have less secrets, since it filtered out some.
        with transient_settings({
                **configure_plugins,
                'filters_used': [
                    {
                        'path':
                        'detect_secrets.filters.regex.should_exclude_line',
                        'pattern': [
                            'EXAMPLE',
                        ],
                    },
                ],
        }):
            baseline = SecretsCollection()
            baseline.scan_file('test_data/each_secret.py')

        # This tests the != operator for same file, different number of secrets.
        # It's hidden in a different test, but I didn't want to set up the boilerplate
        # again.
        assert secrets != baseline

        result = secrets - baseline
        assert len(result['test_data/each_secret.py']) == 2
        assert len(secrets['test_data/each_secret.py']) == 4
コード例 #32
0
    def test_file_based_success_yaml():
        get_settings().configure_plugins([
            {
                'name': 'HexHighEntropyString',
                'limit': 3.0,
            },
        ])
        secrets = SecretsCollection()
        secrets.scan_file('test_data/config.yaml')

        assert [str(secret).splitlines()[1] for _, secret in secrets] == [
            'Location:    test_data/config.yaml:3',
            'Location:    test_data/config.yaml:5',
        ]
コード例 #33
0
def test_disable_filter(parser):
    with tempfile.NamedTemporaryFile() as f:
        f.write(f'secret = "{uuid.uuid4()}"'.encode())

        # First, make sure that we actually catch it.
        f.seek(0)
        with transient_settings({
                'plugins_used': [{
                    'name': 'KeywordDetector',
                }],
        }):
            secrets = SecretsCollection()
            secrets.scan_file(f.name)

            assert not secrets

        f.seek(0)
        with default_settings():
            parser.parse_args([
                'scan',
                '--disable-filter',
                'detect_secrets.filters.heuristic.is_potential_uuid',

                # invalid filter
                '--disable-filter',
                'blah',
            ])

            secrets = SecretsCollection()
            secrets.scan_file(f.name)

            assert secrets
コード例 #34
0
def get_baseline(baseline_filename):
    """
    :raises: IOError
    :raises: ValueError
    """
    if not baseline_filename:
        return

    raise_exception_if_baseline_file_is_not_up_to_date(baseline_filename)

    baseline_string = _get_baseline_string_from_file(baseline_filename)
    baseline_version = json.loads(baseline_string).get('version')

    try:
        raise_exception_if_baseline_version_is_outdated(
            baseline_version,
        )
    except ValueError:
        log.error(
            'The supplied baseline may be incompatible with the current\n'
            'version of detect-secrets. Please recreate your baseline to\n'
            'avoid potential mis-configurations.\n\n'
            'Current Version: %s\n'
            'Baseline Version: %s',
            VERSION,
            baseline_version if baseline_version else '0.0.0',
        )

        raise

    return SecretsCollection.load_baseline_from_string(baseline_string)
コード例 #35
0
    def test_load_baseline_from_string(self, mock_gmtime):
        """
        We use load_baseline_from_string as a proxy to testing _load_baseline_from_dict,
        because it's the most entry into the private function.
        """
        original = self.get_baseline_dict(mock_gmtime)

        secrets = SecretsCollection.load_baseline_from_string(
            json.dumps(original),
        ).format_for_baseline_output()

        self.assert_loaded_collection_is_original_collection(original, secrets)
コード例 #36
0
ファイル: baseline.py プロジェクト: soopsio/detect-secrets
def get_secrets_not_in_baseline(results, baseline):
    """
    :type results: SecretsCollection
    :param results: SecretsCollection of current results

    :type baseline: SecretsCollection
    :param baseline: SecretsCollection of baseline results.
                     This will be updated accordingly (by reference)

    :rtype: SecretsCollection
    :returns: SecretsCollection of new results (filtering out baseline)
    """
    regex = None
    if baseline.exclude_regex:
        regex = re.compile(baseline.exclude_regex, re.IGNORECASE)

    new_secrets = SecretsCollection()
    for filename in results.data:
        if regex and regex.search(filename):
            continue

        if filename not in baseline.data:
            # We don't have a previous record of this file, so obviously
            # everything is new.
            new_secrets.data[filename] = results.data[filename]
            continue

        # The __hash__ method of PotentialSecret makes this work
        filtered_results = {
            secret: secret
            for secret in results.data[filename]
            if secret not in baseline.data[filename]
        }

        if filtered_results:
            new_secrets.data[filename] = filtered_results

    return new_secrets