def test_detects_hardcoded_value_json_single_quotes(self): str_to_check = '''{ \'password\': \'super-secret-password\' \n\n\t }''' password_matcher, matches = matchers.password_matcher(str_to_check) self.assertTrue(password_matcher) self.assertEqual(matches, ['\'password\': \'super-secret-password\''])
def test_detects_hardcoded_value_json(self): str_to_check = '''{ "password":"******" \n\n\t }''' password_matcher, matches = matchers.password_matcher(str_to_check) self.assertTrue(password_matcher) self.assertEqual(matches, ['"password":"******"'])
def test_detects_hardcoded_value_json_multiple_keys(self): str_to_check = '''{ "pass": "******", "key": "1234" }''' password_matcher, matches = matchers.password_matcher(str_to_check) self.assertTrue(password_matcher) self.assertEqual(matches, ['"pass": "******"'])
def test_ignores_json_without_passwords(self): str_to_check = '''{ "some_key": "this is not a password", "another_key": 100-12301-123, }''' password_matcher, matches = matchers.password_matcher(str_to_check) self.assertFalse(password_matcher) self.assertEqual(matches, None)
def test_detects_url_in_json_file(self): str_to_check = '''{ "engine": "db-schema://user:strong-pwd@localhost:5432/mydb", "key": "1234", }''' password_matcher, matches = matchers.password_matcher(str_to_check) self.assertTrue(password_matcher) self.assertEqual(matches, ['"db-schema://user:strong-pwd@localhost:5432/mydb"'])
def test_detects_hardcoded_value_json_multiple_passwords(self): str_to_check = """{ "pass": "******", "key": "1234", \'pwd\' : \'qwerty\', }""" password_matcher, matches = matchers.password_matcher(str_to_check) self.assertTrue(password_matcher) self.assertEqual(matches, ['"pass": "******"', "'pwd' : 'qwerty'"])
def test_detects_hardcoded_with_a_double_quote(self): str_to_check = ''' db: host: 'host' user: '******' password: '******' database: 'a_db' ''' password_matcher, matches = matchers.password_matcher(str_to_check) self.assertTrue(password_matcher) self.assertEqual(matches, ["password: '******'"])
def test_detects_hardcoded_double_quotes(self): str_to_check = ''' database: drivername: "dbdriver" host: "dbhost" port: "port" username: "******" password: "******" database: "database" ''' password_matcher, matches = matchers.password_matcher(str_to_check) self.assertTrue(password_matcher) self.assertEqual(matches, ['password: "******"'])
def test_detects_hardcoded_with_a_single_quote(self): str_to_check = ''' database: drivername: "dbdriver" host: "dbhost" port: "port" username: "******" password: "******" database: "database" ''' password_matcher, matches = matchers.password_matcher(str_to_check) self.assertTrue(password_matcher) self.assertEqual(matches, ['password: "******"'])
def test_detects_hardcoded_value_json(self): str_to_check = """ database: drivername: "dbdriver" host: "dbhost" port: "port" username: "******" password: "******" database: "database" """ password_matcher, matches = matchers.password_matcher(str_to_check) self.assertTrue(password_matcher) self.assertEqual(matches, ['password: "******"'])
def check(self): #Build the identifier using the filename and commit hashes identifier = '%s from commit %s to commit %s' % (self.filename, self.commit_hashes[0], self.commit_hashes[1]) #The commments is a list to keep track of useful information #encountered when checking, right now, its only being used #to annotate when base64 code was removed commments = [] #Git is smart enough to detect changes binary files when doing diff, #will not show any differences, only a message similar to this: #Binary files /dev/null and b/img.JPG differ #Check the number of additions, if there are too many #send a warning and skip, this may be due to a big data file addition #print 'Characters %d' % len(self.content) if self.error: return Result(self.filename, self.error) #Check file extension, if it's a text file continue, if it's not, #send a warning and skip #if filetype.mime_from_name(self.filename) is None: # return Result(self.filename, NOT_PLAIN_TEXT) #Check if extension/mimetype is allowed if filetype.get_extension(self.filename) not in self.allowed_extensions: return Result(identifier, FILETYPE_NOT_ALLOWED) #Start applying rules... #First check if additions contain base64, if there is remove it has_base64, self.content = matchers.base64_matcher(self.content, remove=True) if has_base64: commments.append('BASE64_REMOVED') #Now check for passwords has_pwd, matches = matchers.password_matcher(self.content) if has_pwd: return Result(identifier, MATCH, matches=matches, comments=commments) else: return Result(identifier, NOT_MATCH, comments=commments)
def check(self): #Check file size if it's more than 1MB #send just a warning and do not open the file, #since pattern matching is going to be really slow f_size = os.stat(self.path).st_size if f_size > 1048576L: return Result(self.path, BIG_FILE) #Then, filter all non-plain text files #also send a warning for those, if they are non-plain text #and less than 1MB they are probably xlsx, pdfs, pngs, zips, ppt, pptx if not self.mimetype.startswith('text/'): #Add checks for certain files? (word, excel, powerpoint...) return Result(self.path, NOT_PLAIN_TEXT) #Now, filter all files which mimetype could not be determined #At this point you only have plain text files, smaller than 1MB #open the file and then apply all rules with open(self.path, 'r') as f: content = f.read() #Last check: search for potential base64 strings and remove them, send a warning has_base64, content = matchers.base64_matcher(content, remove=True) if has_base64: print 'Removing base64 code...' #Maybe send warnings for data files (even if they are less than 1MB)? #First matcher: passwords password_matcher, matches = matchers.password_matcher(content) if password_matcher: return Result(self.path, MATCH, matches) else: return Result(self.path, NOT_MATCH)
def test_detects_easy_password_single_quotes(self): str_to_check = 'password=\'123456\'' password_matcher, matches = matchers.password_matcher(str_to_check) self.assertTrue(password_matcher) self.assertEqual(matches, [str_to_check])
def test_detects_hardcoded_value_json_blanks(self): str_to_check = '''{ " pass" : "dont-hack-me-please" \n\n\t }''' password_matcher, matches = matchers.password_matcher(str_to_check)
def test_detects_easy_password_linebreaks(self): password_matcher, matches = matchers.password_matcher( 'password ="******"\n') self.assertTrue(password_matcher) self.assertEqual(matches, ['password ="******"'])
def test_detects_sqlalchemy_engine_different_settings(self): str_to_check = 'another-schema://user2:1234@localhost:0000/awesome-db' password_matcher, matches = matchers.password_matcher(str_to_check) self.assertTrue(password_matcher) self.assertEqual(matches, [str_to_check])
def test_detects_easy_password(self): str_to_check = 'password="******"' password_matcher, matches = matchers.password_matcher(str_to_check) self.assertTrue(password_matcher) self.assertEqual(matches, [str_to_check])
def test_detects_multiple_passwords(self): str_to_check = 'PASSWORD_MYSQL=\'iYiLKi7879\' \n \n password ="******"\n var=5' password_matcher, matches = matchers.password_matcher(str_to_check) self.assertTrue(password_matcher) self.assertEqual(matches, ['password ="******"', 'PASSWORD_MYSQL=\'iYiLKi7879\''])
def test_ignores_pwd_from_another_variable(self): str_to_check = 'pwd=variable' password_matcher, matches = matchers.password_matcher(str_to_check) self.assertFalse(password_matcher) self.assertEqual(matches, None)
def test_ignores_password_from_another_variable_with_blanks(self): password_matcher, matches = matchers.password_matcher( 'pwd =variable\n') self.assertFalse(password_matcher) self.assertEqual(matches, None)
def test_detects_multiple_passwords(self): str_to_check = "PASSWORD_MYSQL='iYiLKi7879' \n \n password =\"123456\"\n var=5" password_matcher, matches = matchers.password_matcher(str_to_check) self.assertTrue(password_matcher) self.assertEqual(matches, ["PASSWORD_MYSQL='iYiLKi7879'", 'password ="******"'])
def test_detects_password_with_prefix(self): str_to_check = "POSTGRES_PASSWORD='******'" password_matcher, matches = matchers.password_matcher(str_to_check) self.assertTrue(password_matcher) self.assertEqual(matches, [str_to_check])
def test_detects_easy_password_linebreaks(self): password_matcher, matches = matchers.password_matcher('password ="******"\n') self.assertTrue(password_matcher) self.assertEqual(matches, ['password ="******"'])
def test_detects_password_with_suffix(self): str_to_check = 'PASSWORD_MYSQL=\'iYiLKi7879\'' password_matcher, matches = matchers.password_matcher(str_to_check) self.assertTrue(password_matcher) self.assertEqual(matches, [str_to_check])
def test_detects_multiple_passwords(self): str_to_check = 'PASSWORD_MYSQL=\'iYiLKi7879\' \n \n password ="******"\n var=5' password_matcher, matches = matchers.password_matcher(str_to_check) self.assertTrue(password_matcher) self.assertEqual( matches, ['password ="******"', 'PASSWORD_MYSQL=\'iYiLKi7879\''])
def test_ignores_password_from_another_variable_with_blanks(self): password_matcher, matches = matchers.password_matcher('pwd =variable\n') self.assertFalse(password_matcher) self.assertEqual(matches, None)
def test_detects_sqlalchemy_double_quoted(self): str_to_check = '"db-schema://user:strong-pwd@localhost:5432/mydb"' password_matcher, matches = matchers.password_matcher(str_to_check) self.assertTrue(password_matcher) self.assertEqual(matches, [str_to_check])
def test_detects_sqlalchemy_quoted(self): str_to_check = '\'db-schema://user:strong-pwd@localhost:5432/mydb\'' password_matcher, matches = matchers.password_matcher(str_to_check) self.assertTrue(password_matcher) self.assertEqual(matches, [str_to_check])