Esempio n. 1
0
    def test_scan_file_name(self, ):
        grs = pyrepscan.GitRepositoryScanner()
        grs.add_file_path_rule(
            name='First Rule',
            pattern=r'(prod|dev|stage).+key',
        )

        results = grs.scan(
            repository_path=self.tmpdir.name,
            branch_glob_pattern='*',
        )
        for result in results:
            result.pop('commit_id')
        self.assertCountEqual(
            first=results,
            second=[
                {
                    'author_email': '*****@*****.**',
                    'author_name': 'Author Name',
                    'commit_message': 'initial commit',
                    'commit_time': '2000-01-01T00:00:00',
                    'file_oid': 'ec3741ea9c00bc5cd88564e49fd81d2340a5582f',
                    'file_path': 'prod_env_with_content.key',
                    'match_text': 'prod_env_with_content.key',
                    'rule_name': 'First Rule'
                },
            ],
        )
Esempio n. 2
0
    def test_add_file_path_rule_exceptions(self, ):
        grs = pyrepscan.GitRepositoryScanner()

        with self.assertRaises(expected_exception=RuntimeError, ):
            grs.add_file_path_rule(
                name='',
                pattern=r'regex',
            )
Esempio n. 3
0
    def test_add_content_rule_exceptions(self, ):
        grs = pyrepscan.GitRepositoryScanner()

        with self.assertRaises(expected_exception=RuntimeError, ):
            grs.add_content_rule(
                name='',
                pattern=r'regex',
                whitelist_patterns=[],
                blacklist_patterns=[],
            )
Esempio n. 4
0
    def test_get_file_content(self, ):
        grs = pyrepscan.GitRepositoryScanner()

        self.assertEqual(
            first=grs.get_file_content(
                repository_path=self.tmpdir.name,
                file_oid='0407a18f7c6802c7e7ddc5c9e8af4a34584383ff',
            ),
            second=b'new content from new branch',
        )
Esempio n. 5
0
    def test_get_file_content_exceptions(self, ):
        grs = pyrepscan.GitRepositoryScanner()

        with self.assertRaises(expected_exception=RuntimeError, ):
            grs.get_file_content(
                repository_path=self.tmpdir.name,
                file_oid='',
            )

        with self.assertRaises(expected_exception=RuntimeError, ):
            grs.get_file_content(
                repository_path=self.tmpdir.name,
                file_oid='aaaaaaaaa',
            )

        with self.assertRaises(expected_exception=RuntimeError, ):
            grs.get_file_content(
                repository_path=self.tmpdir.name,
                file_oid='0407a18f7c6802c7e7ddc5c9e8af4a34584383fa',
            )
Esempio n. 6
0
    def test_add_file_path_to_skip_exceptions(self, ):
        grs = pyrepscan.GitRepositoryScanner()

        with self.assertRaises(expected_exception=RuntimeError, ):
            grs.add_file_path_to_skip(file_path='', )
Esempio n. 7
0
    def test_scan_exceptions(self, ):
        grs = pyrepscan.GitRepositoryScanner()

        with self.assertRaises(expected_exception=RuntimeError, ):
            grs.scan(repository_path='/non/existent/path', )
Esempio n. 8
0
    def test_scan_from_timestamp(self, ):
        grs = pyrepscan.GitRepositoryScanner()
        grs.add_content_rule(
            name='First Rule',
            pattern=r'''(content)''',
            whitelist_patterns=[],
            blacklist_patterns=[],
        )

        grs.add_file_extension_to_skip('py')
        grs.add_file_path_to_skip('test_')

        results = grs.scan(
            repository_path=self.tmpdir.name,
            branch_glob_pattern='*',
            from_timestamp=int(
                datetime.datetime(
                    year=2004,
                    month=1,
                    day=1,
                    hour=0,
                    minute=0,
                    second=0,
                    tzinfo=datetime.timezone.utc,
                ).timestamp()),
        )
        for result in results:
            result.pop('commit_id')
        self.assertCountEqual(
            first=results,
            second=[
                {
                    'author_email': '*****@*****.**',
                    'author_name': 'Author Name',
                    'commit_message': 'edited file in non_merged_branch',
                    'commit_time': '2004-01-01T00:00:00',
                    'file_oid': '057032a2108721ad1de6a9240fd1a8f45bc3f2ef',
                    'file_path': 'file.txt',
                    'match_text': 'content',
                    'rule_name': 'First Rule'
                },
            ],
        )

        results = grs.scan(
            repository_path=self.tmpdir.name,
            branch_glob_pattern='*',
            from_timestamp=int(
                datetime.datetime(
                    year=2004,
                    month=1,
                    day=1,
                    hour=0,
                    minute=0,
                    second=1,
                    tzinfo=datetime.timezone.utc,
                ).timestamp()),
        )
        for result in results:
            result.pop('commit_id')
        self.assertListEqual(
            list1=results,
            list2=[],
        )
Esempio n. 9
0
    def test_scan_regular(self, ):
        grs = pyrepscan.GitRepositoryScanner()
        grs.add_content_rule(
            name='First Rule',
            pattern=r'''(content)''',
            whitelist_patterns=[],
            blacklist_patterns=[],
        )

        grs.add_file_extension_to_skip('py')
        grs.add_file_path_to_skip('test_')

        results = grs.scan(
            repository_path=self.tmpdir.name,
            branch_glob_pattern='*master',
            from_timestamp=0,
        )
        for result in results:
            result.pop('commit_id')
        self.assertCountEqual(
            first=results,
            second=[
                {
                    'author_email': '*****@*****.**',
                    'author_name': 'Author Name',
                    'commit_message': 'edited file',
                    'commit_time': '2001-01-01T00:00:00',
                    'file_oid': '47d2739ba2c34690248c8f91b84bb54e8936899a',
                    'file_path': 'file.txt',
                    'match_text': 'content',
                    'rule_name': 'First Rule'
                },
                {
                    'author_email': '*****@*****.**',
                    'author_name': 'Author Name',
                    'commit_message': 'edited file in new branch',
                    'commit_time': '2002-01-01T00:00:00',
                    'file_oid': '0407a18f7c6802c7e7ddc5c9e8af4a34584383ff',
                    'file_path': 'file.txt',
                    'match_text': 'content',
                    'rule_name': 'First Rule'
                },
                {
                    'author_email': '*****@*****.**',
                    'author_name': 'Author Name',
                    'commit_message': 'initial commit',
                    'commit_time': '2000-01-01T00:00:00',
                    'file_oid': '6b584e8ece562ebffc15d38808cd6b98fc3d97ea',
                    'file_path': 'file.txt',
                    'match_text': 'content',
                    'rule_name': 'First Rule'
                },
            ],
        )

        results = grs.scan(
            repository_path=self.tmpdir.name,
            branch_glob_pattern='*',
            from_timestamp=0,
        )
        for result in results:
            result.pop('commit_id')
        self.assertCountEqual(
            first=results,
            second=[
                {
                    'author_email': '*****@*****.**',
                    'author_name': 'Author Name',
                    'commit_message': 'edited file',
                    'commit_time': '2001-01-01T00:00:00',
                    'file_oid': '47d2739ba2c34690248c8f91b84bb54e8936899a',
                    'file_path': 'file.txt',
                    'match_text': 'content',
                    'rule_name': 'First Rule'
                },
                {
                    'author_email': '*****@*****.**',
                    'author_name': 'Author Name',
                    'commit_message': 'edited file in new branch',
                    'commit_time': '2002-01-01T00:00:00',
                    'file_oid': '0407a18f7c6802c7e7ddc5c9e8af4a34584383ff',
                    'file_path': 'file.txt',
                    'match_text': 'content',
                    'rule_name': 'First Rule'
                },
                {
                    'author_email': '*****@*****.**',
                    'author_name': 'Author Name',
                    'commit_message': 'initial commit',
                    'commit_time': '2000-01-01T00:00:00',
                    'file_oid': '6b584e8ece562ebffc15d38808cd6b98fc3d97ea',
                    'file_path': 'file.txt',
                    'match_text': 'content',
                    'rule_name': 'First Rule'
                },
                {
                    'author_email': '*****@*****.**',
                    'author_name': 'Author Name',
                    'commit_message': 'edited file in non_merged_branch',
                    'commit_time': '2004-01-01T00:00:00',
                    'file_oid': '057032a2108721ad1de6a9240fd1a8f45bc3f2ef',
                    'file_path': 'file.txt',
                    'match_text': 'content',
                    'rule_name': 'First Rule'
                },
            ],
        )

        self.assertEqual(
            first=b'new content',
            second=grs.get_file_content(
                repository_path=self.tmpdir.name,
                file_oid='47d2739ba2c34690248c8f91b84bb54e8936899a',
            ),
        )
        self.assertEqual(
            first=b'new content from new branch',
            second=grs.get_file_content(
                repository_path=self.tmpdir.name,
                file_oid='0407a18f7c6802c7e7ddc5c9e8af4a34584383ff',
            ),
        )
        self.assertEqual(
            first=b'content',
            second=grs.get_file_content(
                repository_path=self.tmpdir.name,
                file_oid='6b584e8ece562ebffc15d38808cd6b98fc3d97ea',
            ),
        )
Esempio n. 10
0
import pyrepscan


grs = pyrepscan.GitRepositoryScanner()
grs.add_content_rule(
    name='AWS Manager ID',
    pattern=r'(A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)[A-Z0-9]{16}',
    whitelist_patterns=[],
    blacklist_patterns=[],
)
results = grs.scan(
    repository_path='/path/to/repository',
    branch_glob_pattern='*',
)