Example #1
0
    def test_scan_with_baseline(self, mock_subprocess_obj,
                                mock_load_from_string, mock_apply):
        repo = mock_tracked_repo()

        # Setup secrets
        secretA = PotentialSecret('type', 'filenameA', 1, 'blah')
        secretB = PotentialSecret('type', 'filenameA', 2, 'curry')
        original_secrets = SecretsCollection()
        original_secrets.data['filenameA'] = {
            secretA: secretA,
            secretB: secretB,
        }
        baseline_secrets = SecretsCollection()
        baseline_secrets.data['filenameA'] = {
            secretA: secretA,
        }

        # Easier than mocking load_from_diff.
        mock_apply.side_effect = lambda orig, base: \
            get_secrets_not_in_baseline(original_secrets, baseline_secrets)

        mock_subprocess_obj.side_effect = mock_subprocess((SubprocessMock(
            expected_input='git show',
            mocked_output=b'will be mocked',
        ), ))
        secrets = repo.scan()

        assert len(secrets.data) == 1
        assert secrets.data['filenameA'][secretB] == secretB
Example #2
0
    def test_basic(configure_plugins):
        with transient_settings({**configure_plugins, 'filters_used': []}):
            secrets = SecretsCollection()
            secrets.scan_file('test_data/each_secret.py')

        # This baseline will have less secrets, since it filtered out some.
        with transient_settings({
                **configure_plugins,
                'filters_used': [
                    {
                        'path':
                        'detect_secrets.filters.regex.should_exclude_line',
                        'pattern': [
                            'EXAMPLE',
                        ],
                    },
                ],
        }):
            baseline = SecretsCollection()
            baseline.scan_file('test_data/each_secret.py')

        # This tests the != operator for same file, different number of secrets.
        # It's hidden in a different test, but I didn't want to set up the boilerplate
        # again.
        assert secrets != baseline

        result = secrets - baseline
        assert len(result['test_data/each_secret.py']) == 2
        assert len(secrets['test_data/each_secret.py']) == 4
Example #3
0
def test_merge():
    old_secrets = SecretsCollection()
    old_secrets.scan_file('test_data/each_secret.py')
    assert len(list(old_secrets)) >= 3  # otherwise, this test won't work.

    index = 0
    for _, secret in old_secrets:
        if index == 0:
            secret.is_secret = False
        elif index == 1:
            secret.is_secret = True
        elif index == 2:
            secret.is_verified = True

        index += 1

    new_secrets = SecretsCollection()
    new_secrets.scan_file('test_data/each_secret.py')
    list(new_secrets)[-1][1].is_secret = True

    new_secrets.merge(old_secrets)

    index = 0
    for _, secret in new_secrets:
        if index == 0:
            assert secret.is_secret is False
            assert secret.is_verified is False
        elif index == 1:
            assert secret.is_secret is True
            assert secret.is_verified is False
        elif index == 2:
            assert secret.is_secret is True
            assert secret.is_verified is True

        index += 1
Example #4
0
def test_disable_filter(parser):
    with tempfile.NamedTemporaryFile() as f:
        f.write(f'secret = "{uuid.uuid4()}"'.encode())

        # First, make sure that we actually catch it.
        f.seek(0)
        with transient_settings({
                'plugins_used': [{
                    'name': 'KeywordDetector',
                }],
        }):
            secrets = SecretsCollection()
            secrets.scan_file(f.name)

            assert not secrets

        f.seek(0)
        with default_settings():
            parser.parse_args([
                'scan',
                '--disable-filter',
                'detect_secrets.filters.heuristic.is_potential_uuid',

                # invalid filter
                '--disable-filter',
                'blah',
            ])

            secrets = SecretsCollection()
            secrets.scan_file(f.name)

            assert secrets
Example #5
0
    def test_mismatch_files():
        secretsA = SecretsCollection()
        secretsA.scan_file('test_data/each_secret.py')

        secretsB = SecretsCollection()
        secretsB.scan_file('test_data/files/file_with_secrets.py')

        assert secretsA != secretsB
Example #6
0
def test_file_no_longer_exists(printer, mock_user_decision):
    secretsA = SecretsCollection()
    secretsA['fileB'].add(potential_secret_factory('a'))

    secretsB = SecretsCollection()
    secretsB['fileA'].add(potential_secret_factory('a'))

    run_logic(secretsA, secretsB)
    assert not mock_user_decision.called
Example #7
0
    def test_no_overlapping_files(configure_plugins):
        secrets_a = SecretsCollection()
        secrets_b = SecretsCollection()
        with transient_settings({**configure_plugins, 'filters_used': []}):
            secrets_a.scan_file('test_data/each_secret.py')
            secrets_b.scan_file('test_data/config.env')

        assert (secrets_a - secrets_b).files == {'test_data/each_secret.py'}
        assert (secrets_b - secrets_a).files == {'test_data/config.env'}
Example #8
0
    def test_deleted_secret_file():
        secrets = SecretsCollection()
        secrets.scan_file('test_data/each_secret.py')

        secrets.trim(SecretsCollection())
        assert secrets

        secrets.trim(SecretsCollection(),
                     filelist=['test_data/each_secret.py'])
        assert not secrets
Example #9
0
    def test_strict_equality():
        secret = potential_secret_factory()
        secretsA = SecretsCollection()
        secretsA[secret.filename].add(secret)

        secret = potential_secret_factory(line_number=2)
        secretsB = SecretsCollection()
        secretsB[secret.filename].add(secret)

        assert secretsA == secretsB
        assert not secretsA.exactly_equals(secretsB)
Example #10
0
    def test_maintains_labels():
        labelled_secrets = SecretsCollection()
        labelled_secrets.scan_file('test_data/each_secret.py')
        for _, secret in labelled_secrets:
            secret.is_secret = True
            break

        secrets = SecretsCollection()
        secrets.scan_file('test_data/each_secret.py')

        labelled_secrets.trim(scanned_results=secrets)

        assert any([secret.is_secret for _, secret in labelled_secrets])
Example #11
0
def test_ensure_file_transformers_are_used(printer):
    """
    In this tests, we construct a situation where detect-secrets scan leverages special
    file transformers in order to find a secret, that wouldn't otherwise be found with
    normal line-by-line reading. In doing so, if audit is able to find this secret, it
    can be inferred that it too knows how to use file transformers.
    """
    with transient_settings({
            'plugins_used': [
                {
                    'name': 'Base64HighEntropyString'
                },
            ],
    }):
        secrets = SecretsCollection()
        secrets.scan_file('test_data/config.env')
        assert bool(secrets)

    with open('test_data/config.env') as f:
        lines = [line.rstrip() for line in f.readlines()]

    with mock.patch('detect_secrets.audit.io.print_secret_not_found') as m:
        run_logic(secrets, 'y')
        assert not m.called

    line_number = list(secrets['test_data/config.env'])[0].line_number
    assert lines[line_number - 1] in printer.message
Example #12
0
def secrets_collection_factory(secrets=None,
                               plugins=(),
                               exclude_files_regex=None):
    """
    :type secrets: list(dict)
    :param secrets: list of params to pass to add_secret.
                    E.g. [ {'secret': 'blah'}, ]

    :type plugins: tuple
    :type exclude_files_regex: str|None

    :rtype: SecretsCollection
    """
    collection = SecretsCollection(
        plugins,
        exclude_files=exclude_files_regex,
    )

    if plugins:
        for plugin in plugins:
            # We don't want to incur network calls during test cases
            plugin.should_verify = False

        collection.plugins = plugins

    # Handle secrets
    if secrets is None:
        return collection

    for kwargs in secrets:
        _add_secret(collection, **kwargs)

    return collection
Example #13
0
def secrets_collection_factory(secrets=None,
                               plugins=(),
                               exclude_regex=''):  # pragma: no cover
    """
    :type secrets: list(dict)
    :param secrets: list of params to pass to add_secret.
                    Eg. [ {'secret': 'blah'}, ]

    :type plugins: tuple
    :type exclude_regex: str

    :rtype: SecretsCollection
    """
    collection = SecretsCollection(plugins, exclude_regex)

    if plugins:
        collection.plugins = plugins

    # Handle secrets
    if secrets is None:
        return collection

    for kwargs in secrets:
        _add_secret(collection, **kwargs)

    return collection
    def test_main_scan_repo_scan_success_secrets_found(self, mock_file,
                                                       mock_scan, mock_log):
        mock_file.return_value = {
            'sha': 'does_not_matter',
            'repo': 'repo_name',
            'plugins': {
                'base64_limit': 3,
            },
            'cron': '* * * * *',
            'baseline_file': '.secrets.baseline',
        }

        mock_secret_collection = SecretsCollection()
        mock_secret_collection.data['junk'] = 'data'
        mock_scan.return_value = mock_secret_collection

        with mock.patch('detect_secrets_server.usage.ExternalHook') as hook, \
                mock.patch('detect_secrets_server.repos.base_tracked_repo.BaseTrackedRepo.update') as update, \
                mock.patch('detect_secrets.core.secrets_collection.SecretsCollection.json') as secrets_json:
            assert main([
                '--scan-repo',
                'will-be-mocked',
                '--output-hook',
                'examples/standalone_hook.py',
            ]) == 0

            assert update.call_count == 0
            assert hook().alert.call_count == 1
            assert secrets_json.call_count == 1
def get_secrets_from_baseline(baseline, filter_func=lambda secret: True):
    """
    :type baseline: SecretsCollection
    :param baseline: SecretsCollection of baseline results.
                     This will be updated accordingly (by reference)

    :type filter_func: function
    :param filter_func: the function to filter on secret. If not supplied
                        return all secrets

    :rtype: SecretsCollection
    :returns: SecretsCollection of non-audited results
    """
    if not isinstance(filter_func, types.FunctionType):
        return baseline

    new_secrets = SecretsCollection()
    for filename in baseline.data:
        # The __hash__ method of PotentialSecret makes this work
        filtered_results = {
            secret: secret
            for secret in baseline.data[filename] if filter_func(secret)
        }

        if filtered_results:
            new_secrets.data[filename] = filtered_results

    return new_secrets
Example #16
0
def initialize(plugins, exclude_regex=None, rootdir='.'):
    """Scans the entire codebase for high entropy strings, and returns a
    SecretsCollection object.

    :type plugins: tuple of detect_secrets.plugins.base.BasePlugin
    :param plugins: rules to initialize the SecretsCollection with.

    :type exclude_regex: str|None
    :type rootdir: str

    :rtype: SecretsCollection
    """
    output = SecretsCollection(plugins, exclude_regex)

    if os.path.isfile(rootdir):
        # This option allows for much easier adhoc usage.
        git_files = [rootdir]
    else:
        git_files = _get_git_tracked_files(rootdir)

    if not git_files:
        return output

    if exclude_regex:
        regex = re.compile(exclude_regex, re.IGNORECASE)
        git_files = filter(
            lambda x: not regex.search(x),
            git_files,
        )

    for file in git_files:
        output.scan_file(file)

    return output
Example #17
0
    def test_saves_to_baseline():
        # We create an empty baseline, with customized settings.
        # This way, we expect the engine to use the settings configured by the baseline,
        # but have the results replaced by the new scan.
        with transient_settings({
                'plugins_used': [
                    {
                        'name': 'Base64HighEntropyString',
                        'limit': 4.5,
                    },
                ],
        }):
            secrets = SecretsCollection()
            old_secrets = baseline.format_for_output(secrets)

        with mock_printer(
                main_module) as printer, tempfile.NamedTemporaryFile() as f:
            baseline.save_to_file(old_secrets, f.name)
            f.seek(0)

            # We also test setting the root directory through this test.
            main_module.main(['scan', 'test_data', '--baseline', f.name])

            f.seek(0)
            new_secrets = json.loads(f.read())
            assert not secrets.exactly_equals(
                baseline.load(new_secrets, f.name))
            assert new_secrets['plugins_used'] == [
                {
                    'name': 'Base64HighEntropyString',
                    'limit': 4.5,
                },
            ]
            assert not printer.message
Example #18
0
    def modified_baseline(self):
        secrets = SecretsCollection()
        secrets.scan_file(self.FILENAME)
        for _, secret in secrets:
            secret.line_number += 1

        yield secrets
Example #19
0
def test_baseline_filters_out_known_secrets():
    secrets = SecretsCollection()
    secrets.scan_file('test_data/each_secret.py')

    with tempfile.NamedTemporaryFile() as f:
        baseline.save_to_file(secrets, f.name)
        f.seek(0)

        # This succeeds, because all the secrets are known.
        assert_commit_succeeds([
            'test_data/each_secret.py',
            '--baseline',
            f.name,
        ])

    # Remove one arbitrary secret, so that it won't be the full set.
    secrets.data['test_data/each_secret.py'].pop()

    with tempfile.NamedTemporaryFile() as f:
        baseline.save_to_file(secrets, f.name)
        f.seek(0)

        # Test that it isn't the case that a baseline is provided, and everything passes.
        # import pdb; pdb.set_trace()
        assert_commit_blocked([
            'test_data/each_secret.py',
            '--baseline',
            f.name,
        ])
def test_no_divide_by_zero(secret):
    secrets = SecretsCollection()
    secrets['file'].add(secret)
    with tempfile.NamedTemporaryFile() as f:
        baseline.save_to_file(secrets, f.name)
        f.seek(0)

        main(['audit', f.name, '--stats', '--json'])
Example #21
0
    def test_filename_filters_are_invoked_first(mock_log):
        # This is a directory, which should be ignored via
        # detect_secrets.filters.common.is_invalid_file
        SecretsCollection().scan_file('test_data')

        assert (
            'Skipping "test_data" due to `detect_secrets.filters.common.is_invalid_file`'
            in mock_log.debug_messages)
Example #22
0
    def test_error_reading_file(mock_log_warning):
        with mock.patch(
                'detect_secrets.core.scan.open',
                side_effect=IOError,
        ):
            SecretsCollection().scan_file('test_data/config.env')

        assert 'Unable to open file: test_data/config.env' in mock_log_warning.warning_messages
Example #23
0
    def test_does_not_write_state_when_dry_run(self, mock_file_operations):
        with self.setup_env(
            SecretsCollection(),
            '--dry-run',
        ) as args:
            assert scan_repo(args) == 0

        assert not mock_file_operations.write.called
Example #24
0
def test_bool():
    secrets = SecretsCollection()
    assert not secrets

    secrets.scan_file('test_data/each_secret.py')
    assert secrets

    secrets['test_data/each_secret.py'].clear()
    assert not secrets
Example #25
0
def test_basic(file_content):
    with tempfile.NamedTemporaryFile() as f:
        f.write(file_content.encode())
        f.seek(0)

        secrets = SecretsCollection()
        secrets.scan_file(f.name)

    assert len(list(secrets)) == 1
Example #26
0
def initialize(
    path,
    plugins,
    exclude_files_regex=None,
    exclude_lines_regex=None,
    should_scan_all_files=False,
):
    """Scans the entire codebase for secrets, and returns a
    SecretsCollection object.

    :type plugins: tuple of detect_secrets.plugins.base.BasePlugin
    :param plugins: rules to initialize the SecretsCollection with.

    :type exclude_files_regex: str|None
    :type exclude_lines_regex: str|None
    :type path: list
    :type should_scan_all_files: bool

    :rtype: SecretsCollection
    """
    output = SecretsCollection(
        plugins,
        exclude_files=exclude_files_regex,
        exclude_lines=exclude_lines_regex,
    )

    files_to_scan = []
    for element in path:
        if os.path.isdir(element):
            if should_scan_all_files:
                files_to_scan.extend(_get_files_recursively(element))
            else:
                files = _get_git_tracked_files(element)
                if files:
                    files_to_scan.extend(files)
        elif os.path.isfile(element):
            files_to_scan.append(element)
        else:
            log.error('detect-secrets: ' + element + ': No such file or directory')

    if not files_to_scan:
        return output

    if exclude_files_regex:
        exclude_files_regex = re.compile(exclude_files_regex, re.IGNORECASE)
        files_to_scan = filter(
            lambda file: (
                not exclude_files_regex.search(file)
            ),
            files_to_scan,
        )

    for file in files_to_scan:
        output.scan_file(file)

    return output
Example #27
0
    def test_remove_non_existent_files():
        secrets = SecretsCollection()
        secrets.scan_file('test_data/each_secret.py')
        assert bool(secrets)

        secrets.data['does-not-exist'] = secrets.data.pop(
            'test_data/each_secret.py')
        secrets.trim()

        assert not bool(secrets)
def find_secrets_in_files(args, plugins):
    collection = SecretsCollection(plugins)

    for filename in args.filenames:
        # Don't scan the baseline file
        if filename == args.baseline[0]:
            continue

        collection.scan_file(filename)

    return collection
Example #29
0
def find_secrets_in_files(args):
    plugins = initialize.from_parser_builder(args.plugins)
    collection = SecretsCollection(plugins)

    for filename in args.filenames:
        if filename == args.baseline[0]:
            # Obviously, don't detect the baseline file
            continue

        collection.scan_file(filename)

    return collection
Example #30
0
    def test_deleted_secret():
        secrets = SecretsCollection()
        secrets.scan_file('test_data/each_secret.py')

        results = SecretsCollection.load_from_baseline(
            {'results': secrets.json()})
        results.data['test_data/each_secret.py'].pop()

        original_size = len(secrets['test_data/each_secret.py'])
        secrets.trim(results)

        assert len(secrets['test_data/each_secret.py']) < original_size