Exemple #1
0
def test_merge():
    old_secrets = SecretsCollection()
    old_secrets.scan_file('test_data/each_secret.py')
    assert len(list(old_secrets)) >= 3  # otherwise, this test won't work.

    index = 0
    for _, secret in old_secrets:
        if index == 0:
            secret.is_secret = False
        elif index == 1:
            secret.is_secret = True
        elif index == 2:
            secret.is_verified = True

        index += 1

    new_secrets = SecretsCollection()
    new_secrets.scan_file('test_data/each_secret.py')
    list(new_secrets)[-1][1].is_secret = True

    new_secrets.merge(old_secrets)

    index = 0
    for _, secret in new_secrets:
        if index == 0:
            assert secret.is_secret is False
            assert secret.is_verified is False
        elif index == 1:
            assert secret.is_secret is True
            assert secret.is_verified is False
        elif index == 2:
            assert secret.is_secret is True
            assert secret.is_verified is True

        index += 1
Exemple #2
0
    def modified_baseline(self):
        secrets = SecretsCollection()
        secrets.scan_file(self.FILENAME)
        for _, secret in secrets:
            secret.line_number += 1

        yield secrets
Exemple #3
0
def test_baseline_filters_out_known_secrets():
    secrets = SecretsCollection()
    secrets.scan_file('test_data/each_secret.py')

    with tempfile.NamedTemporaryFile() as f:
        baseline.save_to_file(secrets, f.name)
        f.seek(0)

        # This succeeds, because all the secrets are known.
        assert_commit_succeeds([
            'test_data/each_secret.py',
            '--baseline',
            f.name,
        ])

    # Remove one arbitrary secret, so that it won't be the full set.
    secrets.data['test_data/each_secret.py'].pop()

    with tempfile.NamedTemporaryFile() as f:
        baseline.save_to_file(secrets, f.name)
        f.seek(0)

        # Test that it isn't the case that a baseline is provided, and everything passes.
        # import pdb; pdb.set_trace()
        assert_commit_blocked([
            'test_data/each_secret.py',
            '--baseline',
            f.name,
        ])
Exemple #4
0
def initialize(plugins, exclude_regex=None, rootdir='.'):
    """Scans the entire codebase for high entropy strings, and returns a
    SecretsCollection object.

    :type plugins: tuple of detect_secrets.plugins.base.BasePlugin
    :param plugins: rules to initialize the SecretsCollection with.

    :type exclude_regex: str|None
    :type rootdir: str

    :rtype: SecretsCollection
    """
    output = SecretsCollection(plugins, exclude_regex)

    if os.path.isfile(rootdir):
        # This option allows for much easier adhoc usage.
        git_files = [rootdir]
    else:
        git_files = _get_git_tracked_files(rootdir)

    if not git_files:
        return output

    if exclude_regex:
        regex = re.compile(exclude_regex, re.IGNORECASE)
        git_files = filter(
            lambda x: not regex.search(x),
            git_files,
        )

    for file in git_files:
        output.scan_file(file)

    return output
Exemple #5
0
def initialize(plugins, exclude_regex=None, rootdir='.'):
    """Scans the entire codebase for high entropy strings, and returns a
    SecretsCollection object.

    :type plugins: tuple of detect_secrets.plugins.base.BasePlugin
    :param plugins: rules to initialize the SecretsCollection with.

    :type exclude_regex: str|None
    :type rootdir: str

    :rtype: SecretsCollection
    """
    output = SecretsCollection(plugins, exclude_regex)

    if os.path.isfile(rootdir):
        # This option allows for much easier adhoc usage.
        git_files = [rootdir]
    else:
        git_files = _get_git_tracked_files(rootdir)

    if not git_files:
        return output

    if exclude_regex:
        regex = re.compile(exclude_regex, re.IGNORECASE)
        git_files = filter(
            lambda x: not regex.search(x),
            git_files,
        )

    for file in git_files:
        output.scan_file(file)

    return output
Exemple #6
0
def test_disable_filter(parser):
    with tempfile.NamedTemporaryFile() as f:
        f.write(f'secret = "{uuid.uuid4()}"'.encode())

        # First, make sure that we actually catch it.
        f.seek(0)
        with transient_settings({
                'plugins_used': [{
                    'name': 'KeywordDetector',
                }],
        }):
            secrets = SecretsCollection()
            secrets.scan_file(f.name)

            assert not secrets

        f.seek(0)
        with default_settings():
            parser.parse_args([
                'scan',
                '--disable-filter',
                'detect_secrets.filters.heuristic.is_potential_uuid',

                # invalid filter
                '--disable-filter',
                'blah',
            ])

            secrets = SecretsCollection()
            secrets.scan_file(f.name)

            assert secrets
Exemple #7
0
    def test_basic(configure_plugins):
        with transient_settings({**configure_plugins, 'filters_used': []}):
            secrets = SecretsCollection()
            secrets.scan_file('test_data/each_secret.py')

        # This baseline will have less secrets, since it filtered out some.
        with transient_settings({
                **configure_plugins,
                'filters_used': [
                    {
                        'path':
                        'detect_secrets.filters.regex.should_exclude_line',
                        'pattern': [
                            'EXAMPLE',
                        ],
                    },
                ],
        }):
            baseline = SecretsCollection()
            baseline.scan_file('test_data/each_secret.py')

        # This tests the != operator for same file, different number of secrets.
        # It's hidden in a different test, but I didn't want to set up the boilerplate
        # again.
        assert secrets != baseline

        result = secrets - baseline
        assert len(result['test_data/each_secret.py']) == 2
        assert len(secrets['test_data/each_secret.py']) == 4
Exemple #8
0
def test_ensure_file_transformers_are_used(printer):
    """
    In this tests, we construct a situation where detect-secrets scan leverages special
    file transformers in order to find a secret, that wouldn't otherwise be found with
    normal line-by-line reading. In doing so, if audit is able to find this secret, it
    can be inferred that it too knows how to use file transformers.
    """
    with transient_settings({
            'plugins_used': [
                {
                    'name': 'Base64HighEntropyString'
                },
            ],
    }):
        secrets = SecretsCollection()
        secrets.scan_file('test_data/config.env')
        assert bool(secrets)

    with open('test_data/config.env') as f:
        lines = [line.rstrip() for line in f.readlines()]

    with mock.patch('detect_secrets.audit.io.print_secret_not_found') as m:
        run_logic(secrets, 'y')
        assert not m.called

    line_number = list(secrets['test_data/config.env'])[0].line_number
    assert lines[line_number - 1] in printer.message
Exemple #9
0
    def test_mismatch_files():
        secretsA = SecretsCollection()
        secretsA.scan_file('test_data/each_secret.py')

        secretsB = SecretsCollection()
        secretsB.scan_file('test_data/files/file_with_secrets.py')

        assert secretsA != secretsB
Exemple #10
0
def test_basic(file_content):
    with tempfile.NamedTemporaryFile() as f:
        f.write(file_content.encode())
        f.seek(0)

        secrets = SecretsCollection()
        secrets.scan_file(f.name)

    assert len(list(secrets)) == 1
Exemple #11
0
    def test_no_overlapping_files(configure_plugins):
        secrets_a = SecretsCollection()
        secrets_b = SecretsCollection()
        with transient_settings({**configure_plugins, 'filters_used': []}):
            secrets_a.scan_file('test_data/each_secret.py')
            secrets_b.scan_file('test_data/config.env')

        assert (secrets_a - secrets_b).files == {'test_data/each_secret.py'}
        assert (secrets_b - secrets_a).files == {'test_data/config.env'}
Exemple #12
0
def test_bool():
    secrets = SecretsCollection()
    assert not secrets

    secrets.scan_file('test_data/each_secret.py')
    assert secrets

    secrets['test_data/each_secret.py'].clear()
    assert not secrets
Exemple #13
0
def initialize(
    path,
    plugins,
    exclude_files_regex=None,
    exclude_lines_regex=None,
    should_scan_all_files=False,
):
    """Scans the entire codebase for secrets, and returns a
    SecretsCollection object.

    :type plugins: tuple of detect_secrets.plugins.base.BasePlugin
    :param plugins: rules to initialize the SecretsCollection with.

    :type exclude_files_regex: str|None
    :type exclude_lines_regex: str|None
    :type path: list
    :type should_scan_all_files: bool

    :rtype: SecretsCollection
    """
    output = SecretsCollection(
        plugins,
        exclude_files=exclude_files_regex,
        exclude_lines=exclude_lines_regex,
    )

    files_to_scan = []
    for element in path:
        if os.path.isdir(element):
            if should_scan_all_files:
                files_to_scan.extend(_get_files_recursively(element))
            else:
                files = _get_git_tracked_files(element)
                if files:
                    files_to_scan.extend(files)
        elif os.path.isfile(element):
            files_to_scan.append(element)
        else:
            log.error('detect-secrets: ' + element + ': No such file or directory')

    if not files_to_scan:
        return output

    if exclude_files_regex:
        exclude_files_regex = re.compile(exclude_files_regex, re.IGNORECASE)
        files_to_scan = filter(
            lambda file: (
                not exclude_files_regex.search(file)
            ),
            files_to_scan,
        )

    for file in files_to_scan:
        output.scan_file(file)

    return output
Exemple #14
0
    def test_remove_non_existent_files():
        secrets = SecretsCollection()
        secrets.scan_file('test_data/each_secret.py')
        assert bool(secrets)

        secrets.data['does-not-exist'] = secrets.data.pop(
            'test_data/each_secret.py')
        secrets.trim()

        assert not bool(secrets)
Exemple #15
0
    def test_deleted_secret_file():
        secrets = SecretsCollection()
        secrets.scan_file('test_data/each_secret.py')

        secrets.trim(SecretsCollection())
        assert secrets

        secrets.trim(SecretsCollection(),
                     filelist=['test_data/each_secret.py'])
        assert not secrets
def find_secrets_in_files(args, plugins):
    collection = SecretsCollection(plugins)

    for filename in args.filenames:
        # Don't scan the baseline file
        if filename == args.baseline[0]:
            continue

        collection.scan_file(filename)

    return collection
def find_secrets_in_files(args):
    plugins = initialize.from_parser_builder(args.plugins)
    collection = SecretsCollection(plugins)

    for filename in args.filenames:
        if filename == args.baseline[0]:
            # Obviously, don't detect the baseline file
            continue

        collection.scan_file(filename)

    return collection
Exemple #18
0
    def test_deleted_secret():
        secrets = SecretsCollection()
        secrets.scan_file('test_data/each_secret.py')

        results = SecretsCollection.load_from_baseline(
            {'results': secrets.json()})
        results.data['test_data/each_secret.py'].pop()

        original_size = len(secrets['test_data/each_secret.py'])
        secrets.trim(results)

        assert len(secrets['test_data/each_secret.py']) < original_size
Exemple #19
0
def find_secrets_in_files(args):
    plugins = initialize.from_parser_builder(args.plugins)
    collection = SecretsCollection(plugins)

    for filename in args.filenames:
        if filename == args.baseline[0]:
            # Obviously, don't detect the baseline file
            continue

        collection.scan_file(filename)

    return collection
Exemple #20
0
def main(argv: Optional[List[str]] = None) -> int:
    try:
        args = parse_args(argv)
    except ValueError:
        return 1

    if args.verbose:  # pragma: no cover
        log.set_debug_level(args.verbose)

    # Find all secrets in files to be committed
    secrets = SecretsCollection()
    for filename in args.filenames:
        secrets.scan_file(filename)

    new_secrets = secrets
    if args.baseline:
        new_secrets = secrets - args.baseline

    if new_secrets:
        pretty_print_diagnostics(new_secrets)
        return 1

    if not args.baseline:
        return 0

    # Only attempt baseline modifications if we don't find any new secrets.
    is_modified = should_update_baseline(
        args.baseline,
        scanned_results=secrets,
        filelist=args.filenames,
        baseline_version=args.baseline_version,
    )

    if is_modified:
        if args.baseline_version != VERSION:
            with open(args.baseline_filename) as f:
                old_baseline = json.loads(f.read())

            # Override the results, because this has been updated in `should_update_baseline`.
            old_baseline['results'] = args.baseline.json()

            args.baseline = baseline.upgrade(old_baseline)

        baseline.save_to_file(args.baseline, filename=args.baseline_filename)
        print(
            'The baseline file was updated.\n'
            'Probably to keep line numbers of secrets up-to-date.\n'
            'Please `git add {}`, thank you.\n\n'.format(
                args.baseline_filename), )
        return 3

    return 0
Exemple #21
0
    def get_baseline_file(self, formatter=baseline.format_for_output):
        secrets = SecretsCollection()
        secrets.scan_file(self.FILENAME)

        with tempfile.NamedTemporaryFile() as f:
            with mock.patch('detect_secrets.core.baseline.VERSION', '0.0.1'):
                data = formatter(secrets)

            # Simulating old version
            data['plugins_used'][0]['base64_limit'] = data['plugins_used'][0].pop('limit')
            baseline.save_to_file(data, f.name)

            yield f
Exemple #22
0
    def test_maintains_labels():
        labelled_secrets = SecretsCollection()
        labelled_secrets.scan_file('test_data/each_secret.py')
        for _, secret in labelled_secrets:
            secret.is_secret = True
            break

        secrets = SecretsCollection()
        secrets.scan_file('test_data/each_secret.py')

        labelled_secrets.trim(scanned_results=secrets)

        assert any([secret.is_secret for _, secret in labelled_secrets])
Exemple #23
0
    def test_file_based_success_yaml():
        get_settings().configure_plugins([
            {
                'name': 'HexHighEntropyString',
                'limit': 3.0,
            },
        ])
        secrets = SecretsCollection()
        secrets.scan_file('test_data/config.yaml')

        assert [str(secret).splitlines()[1] for _, secret in secrets] == [
            'Location:    test_data/config.yaml:3',
            'Location:    test_data/config.yaml:5',
        ]
Exemple #24
0
    def test_local_file_success(scheme, parser):
        secrets = SecretsCollection()
        with transient_settings({
                'plugins_used': [{
                    'name': 'Base64HighEntropyString',
                }],
        }):
            parser.parse_args([
                'scan',
                '--filter',
                scheme + 'testing/custom_filters.py::is_invalid_secret',
            ])
            secrets.scan_file('test_data/config.env')

        assert not secrets
Exemple #25
0
def initialize(
    plugins,
    exclude_files_regex=None,
    exclude_lines_regex=None,
    path='.',
    scan_all_files=False,
):
    """Scans the entire codebase for secrets, and returns a
    SecretsCollection object.

    :type plugins: tuple of detect_secrets.plugins.base.BasePlugin
    :param plugins: rules to initialize the SecretsCollection with.

    :type exclude_files_regex: str|None
    :type exclude_lines_regex: str|None
    :type path: str
    :type scan_all_files: bool

    :rtype: SecretsCollection
    """
    output = SecretsCollection(
        plugins,
        exclude_files=exclude_files_regex,
        exclude_lines=exclude_lines_regex,
    )

    if os.path.isfile(path):
        # This option allows for much easier adhoc usage.
        files_to_scan = [path]
    elif scan_all_files:
        files_to_scan = _get_files_recursively(path)
    else:
        files_to_scan = _get_git_tracked_files(path)

    if not files_to_scan:
        return output

    if exclude_files_regex:
        exclude_files_regex = re.compile(exclude_files_regex, re.IGNORECASE)
        files_to_scan = filter(
            lambda file: (not exclude_files_regex.search(file)),
            files_to_scan,
        )

    for file in files_to_scan:
        output.scan_file(file)

    return output
Exemple #26
0
    def test_file_based_success_config():
        get_settings().configure_plugins([
            {
                'name': 'Base64HighEntropyString',
                'limit': 3.0,
            },
        ])
        secrets = SecretsCollection()
        secrets.scan_file('test_data/config.ini')

        assert [str(secret).splitlines()[1] for _, secret in secrets] == [
            'Location:    test_data/config.ini:2',
            'Location:    test_data/config.ini:10',
            'Location:    test_data/config.ini:21',
            'Location:    test_data/config.ini:22',
            'Location:    test_data/config.ini:32',
        ]
Exemple #27
0
def test_nothing_to_audit(printer):
    with transient_settings({
            'plugins_used': [
                {
                    'name': 'BasicAuthDetector'
                },
            ],
    }):
        secrets = SecretsCollection()
        secrets.scan_file('test_data/each_secret.py')

        for _, secret in secrets:
            secret.is_secret = random.choice([True, False])

    run_logic(secrets)
    assert 'Nothing to audit' in printer.message
    assert 'Saving progress' not in printer.message
Exemple #28
0
def test_fails_if_no_line_numbers_found(printer):
    with transient_settings({
            'plugins_used': [
                {
                    'name': 'Base64HighEntropyString'
                },
            ],
    }):
        secrets = SecretsCollection()
        secrets.scan_file('test_data/config.env')

    # Remove line numbers
    secrets = baseline.load(
        baseline.format_for_output(secrets, is_slim_mode=True))

    with mock.patch('detect_secrets.audit.io.clear_screen') as m:
        run_logic(secrets)
        assert not m.called

    assert 'No line numbers found in baseline' in printer.message
Exemple #29
0
    def execute(self, finder):
        issues = []

        plugins = [
            plugin for plugin in PLUGINS
            if plugin_code(plugin) not in self.config['disabled']
        ]

        detector = SecretsCollection(plugins)

        for filepath in finder.files(self.config['filters']):
            try:
                detector.scan_file(filepath)
            except Exception as exc:  # pylint: disable=broad-except
                issues.append(self.make_issue(exc, filepath))

        for filepath, problems in iteritems(detector.data):
            for problem in problems:
                issues.append(self.make_issue(problem, filepath))

        return issues
def baseline_file():
    # Create our own SecretsCollection manually, so that we have fine-tuned control.
    first_content = textwrap.dedent(f"""
        url = {url_format.format(first_secret)}
        example = {url_format.format(random_secret)}
        link = {url_format.format(first_secret)}
    """)[1:]
    second_content = textwrap.dedent(f"""
        url = {url_format.format(second_secret)}
        example = {url_format.format(random_secret)}
    """)[1:]

    with create_file_with_content(first_content) as first_file, \
            create_file_with_content(second_content) as second_file, \
            tempfile.NamedTemporaryFile() as baseline_file, \
            transient_settings({
                'plugins_used': [
                    {'name': 'BasicAuthDetector'},
                    {'name': 'JwtTokenDetector'},

                ],
            }):
        secrets = SecretsCollection()
        secrets.scan_file(first_file)
        secrets.scan_file(second_file)
        labels = {
            (first_file, BasicAuthDetector.secret_type, 1): True,
            (first_file, BasicAuthDetector.secret_type, 2): None,
            (first_file, BasicAuthDetector.secret_type, 3): True,
            (second_file, JwtTokenDetector.secret_type, 1): True,
            (second_file, BasicAuthDetector.secret_type, 1): False,
            (second_file, BasicAuthDetector.secret_type, 2): False,
        }
        for item in secrets:
            _, secret = item
            secret.is_secret = labels[(secret.filename, secret.type,
                                       secret.line_number)]
        baseline.save_to_file(secrets, baseline_file.name)
        baseline_file.seek(0)
        yield baseline_file.name
Exemple #31
0
    def test_line_based_success():
        # Explicitly configure filters, so that additions to filters won't affect this test.
        get_settings().configure_filters([
            # This will remove the `id` string
            {
                'path': 'detect_secrets.filters.heuristic.is_likely_id_string'
            },

            # This gets rid of the aws keys with `EXAMPLE` in them.
            {
                'path': 'detect_secrets.filters.regex.should_exclude_line',
                'pattern': [
                    'EXAMPLE',
                ],
            },
        ])

        secrets = SecretsCollection()
        secrets.scan_file('test_data/each_secret.py')

        secret = next(iter(secrets['test_data/each_secret.py']))
        assert secret.secret_value.startswith('c2VjcmV0IG1lc')
        assert len(secrets['test_data/each_secret.py']) == 1
def initialize(
    path,
    plugins,
    exclude_files_regex=None,
    exclude_lines_regex=None,
    word_list_file=None,
    word_list_hash=None,
    should_scan_all_files=False,
    output_raw=False,
    output_verified_false=False,
):
    """Scans the entire codebase for secrets, and returns a
    SecretsCollection object.

    :type path: list

    :type plugins: tuple of detect_secrets.plugins.base.BasePlugin
    :param plugins: rules to initialize the SecretsCollection with.

    :type exclude_files_regex: str|None
    :type exclude_lines_regex: str|None

    :type word_list_file: str|None
    :param word_list_file: optional word list file for ignoring certain words.

    :type word_list_hash: str|None
    :param word_list_hash: optional iterated sha1 hash of the words in the word list.

    :type should_scan_all_files: bool
    :type output_raw: bool
    :type output_verified_false: bool
    :rtype: SecretsCollection
    """
    output = SecretsCollection(
        plugins,
        exclude_files=exclude_files_regex,
        exclude_lines=exclude_lines_regex,
        word_list_file=word_list_file,
        word_list_hash=word_list_hash,
        output_raw=output_raw,
        output_verified_false=output_verified_false,
    )

    files_to_scan = []
    for element in path:
        if os.path.isdir(element):
            if should_scan_all_files:
                files_to_scan.extend(_get_files_recursively(element), )
            else:
                files_to_scan.extend(_get_git_tracked_files(element), )
        elif os.path.isfile(element):
            files_to_scan.append(element)
        else:
            log.error('detect-secrets: %s: No such file or directory', element)

    if not files_to_scan:
        return output

    if exclude_files_regex:
        exclude_files_regex = re.compile(exclude_files_regex, re.IGNORECASE)
        files_to_scan = filter(
            lambda file: (not exclude_files_regex.search(file)),
            files_to_scan,
        )

    for file in sorted(files_to_scan):
        output.scan_file(file)

    return output