def test_mask_secrets(): sql = "create stage mystage " \ "URL = 's3://mybucket/mypath/' " \ "credentials = (aws_key_id = 'AKIAIOSFODNN7EXAMPLE' " \ "aws_secret_key = 'frJIUN8DYpKDtOLCwo//yllqDzg='); " \ "create stage mystage2 " \ "URL = 'azure//mystorage.blob.core.windows.net/cont' " \ "credentials = (azure_sas_token = " \ "'?sv=2016-05-31&ss=b&srt=sco&sp=rwdl&se=2018-06-27T10:05:50Z&" \ "st=2017-06-27T02:05:50Z&spr=https,http&" \ "sig=bgqQwoXwxzuD2GJfagRg7VOS8hzNr3QLT7rhS8OFRLQ%3D')" masked_sql = "create stage mystage " \ "URL = 's3://mybucket/mypath/' " \ "credentials = (aws_key_id='****' " \ "aws_secret_key='****'); " \ "create stage mystage2 " \ "URL = 'azure//mystorage.blob.core.windows.net/cont' " \ "credentials = (azure_sas_token = " \ "'?sv=2016-05-31&ss=b&srt=sco&sp=rwdl&se=2018-06-27T10:05:50Z&" \ "st=2017-06-27T02:05:50Z&spr=https,http&" \ "sig=****')" # Test masking all kinds of secrets _, masked_text, _ = SecretDetector.mask_secrets(sql) assert masked_text == masked_sql text = ''.join([random.choice(string.ascii_lowercase) for i in range(500)]) _, masked_text, _ = SecretDetector.mask_secrets(text) # Randomly generated string should cause no substitutions assert masked_text == text
def test_password(): random_password = "******" random_password_w_prefix = "password:"******"password:****" random_password_caps = "PASSWORD:"******"PASSWORD:****" random_password_mix_case = "PassWorD:"******"PassWorD:****" random_password_equal_sign = "password = "******"password = ****" random_password = "******" random_password_w_prefix = "pwd:" + random_password masked, masked_str, err_str = SecretDetector.mask_secrets(random_password_w_prefix) assert masked assert err_str is None assert masked_str == "pwd:****"
def exception_in_log_masking(): test_str = "This string will raise an exception" log_record = logging.LogRecord(SecretDetector.__name__, logging.DEBUG, "test_unit_log_secret_detector.py", 45, test_str, list(), None) log_record.asctime = '2003-07-08 16:49:45,896' secret_detector = SecretDetector() sanitized_log = secret_detector.format(log_record) assert "Test exception" in sanitized_log assert "secret_detector.py" in sanitized_log assert "sanitize_log_str" in sanitized_log assert test_str not in sanitized_log
def test_mask_private_keys(): text = "\"privateKeyData\": \"aslkjdflasjf\"" filtered_text = "\"privateKeyData\": \"XXXX\"" _, result, _ = SecretDetector.mask_secrets(text) assert result == filtered_text
def filter_log() -> None: """Sets up our SecretDetector as a logging formatter. A workaround to use our custom Formatter in pytest based on the discussion at https://github.com/pytest-dev/pytest/issues/2987 """ import logging import pathlib from snowflake.connector.secret_detector import SecretDetector if not isinstance(SecretDetector, logging.Formatter): # Override it if SecretDetector is not an instance of logging.Formatter class SecretDetector(logging.Formatter): def format(self, record: logging.LogRecord) -> str: return super().format(record) log_dir = os.getenv('CLIENT_LOG_DIR_PATH_DOCKER', str(pathlib.Path(__file__).parent.absolute())) _logger = getLogger('snowflake.connector') _logger.setLevel(logging.DEBUG) sd = logging.FileHandler( os.path.join(log_dir, '', '..', 'snowflake_ssm_rt.log')) sd.setLevel(logging.DEBUG) sd.setFormatter( SecretDetector( '%(asctime)s - %(threadName)s %(filename)s:%(lineno)d - %(funcName)s() - %(levelname)s - %(message)s' )) _logger.addHandler(sd)
def filter_log() -> None: """Sets up our SecretDetector as a logging formatter. A workaround to use our custom Formatter in pytest based on the discussion at https://github.com/pytest-dev/pytest/issues/2987 """ import logging import pathlib from snowflake.connector.secret_detector import SecretDetector if not isinstance(SecretDetector, logging.Formatter): # Override it if SecretDetector is not an instance of logging.Formatter class SecretDetector(logging.Formatter): def format(self, record: logging.LogRecord) -> str: return super().format(record) log_dir = os.getenv( "CLIENT_LOG_DIR_PATH_DOCKER", str(pathlib.Path(__file__).parent.absolute()) ) _logger = getLogger("snowflake.connector") original_log_level = _logger.getEffectiveLevel() # Make sure that the old handlers are unaffected by the DEBUG level set for the new handler for handler in _logger.handlers: handler.setLevel(original_log_level) _logger.setLevel(logging.DEBUG) sd = logging.FileHandler(os.path.join(log_dir, "", "..", "snowflake_ssm_rt.log")) sd.setLevel(logging.DEBUG) sd.setFormatter( SecretDetector( "%(asctime)s - %(threadName)s %(filename)s:%(lineno)d - %(funcName)s() - %(levelname)s - %(message)s" ) ) _logger.addHandler(sd)
def test_mask_private_keys(): text = '"privateKeyData": "aslkjdflasjf"' filtered_text = '"privateKeyData": "XXXX"' _, result, _ = SecretDetector.mask_secrets(text) assert result == filtered_text
def test_mask_token(): long_token = ("_Y1ZNETTn5/qfUWj3Jedby7gipDzQs=U" "KyJH9DS=nFzzWnfZKGV+C7GopWCGD4Lj" "OLLFZKOE26LXHDt3pTi4iI1qwKuSpf/F" "mClCMBSissVsU3Ei590FP0lPQQhcSGcD" "u69ZL_1X6e9h5z62t/iY7ZkII28n2qU=" "nrBJUgPRCIbtJQkVJXIuOHjX4G5yUEKj" "ZBAx4w6=_lqtt67bIA=o7D=oUSjfywsR" "FoloNIkBPXCwFTv+1RVUHgVA2g8A9Lw5" "XdJYuI8vhg=f0bKSq7AhQ2Bh") token_str_w_prefix = "Token =" + long_token masked, masked_str, err_str = SecretDetector.mask_secrets( token_str_w_prefix) assert masked assert err_str is None assert masked_str == "Token =****" id_token_str_w_prefix = "idToken : " + long_token masked, masked_str, err_str = SecretDetector.mask_secrets( id_token_str_w_prefix) assert masked assert err_str is None assert masked_str == "idToken : ****" session_token_w_prefix = "sessionToken : " + long_token masked, masked_str, err_str = SecretDetector.mask_secrets( session_token_w_prefix) assert masked assert err_str is None assert masked_str == "sessionToken : ****" master_token_w_prefix = "masterToken : " + long_token masked, masked_str, err_str = SecretDetector.mask_secrets( master_token_w_prefix) assert masked assert err_str is None assert masked_str == "masterToken : ****" assertion_w_prefix = "assertion content:" + long_token masked, masked_str, err_str = SecretDetector.mask_secrets( assertion_w_prefix) assert masked assert err_str is None assert masked_str == "assertion content:****"
def test_token_false_positives(): false_positive_token_str = ( "2020-04-30 23:06:04,069 - MainThread auth.py:397" " - write_temporary_credential() - DEBUG - no ID " "token is given when try to store temporary credential") masked, masked_str, err_str = SecretDetector.mask_secrets( false_positive_token_str) assert not masked assert err_str is None assert masked_str == false_positive_token_str
def filter_log(): # A workround to use our custom Formatter in pytest. # Based on the discussion here 'https://github.com/pytest-dev/pytest/issues/2987' from snowflake.connector.secret_detector import SecretDetector import logging import pathlib # the directory that is going to contain test logs, default is THIS_DIR log_dir = os.getenv('CLIENT_LOG_DIR_PATH_DOCKER', str(pathlib.Path(__file__).parent.absolute())) _logger = getLogger('snowflake.connector') _logger.setLevel(logging.DEBUG) sd = logging.FileHandler(os.path.join(log_dir, 'snowflake_ssm_rt.log')) sd.setLevel(logging.DEBUG) sd.setFormatter(SecretDetector('%(asctime)s - %(threadName)s %(filename)s:%(lineno)d - %(funcName)s() - %(levelname)s - %(message)s')) _logger.addHandler(sd)
def test_mask_sas_token(): azure_sas_token = ( "https://someaccounts.blob.core.windows.net/results/018b90ab-0033-" "5f8e-0000-14f1000bd376_0/main/data_0_0_1?sv=2015-07-08&" "sig=iCvQmdZngZNW%2F4vw43j6%2BVz6fndHF5LI639QJba4r8o%3D&" "spr=https&st=2016-04-12T03%3A24%3A31Z&" "se=2016-04-13T03%3A29%3A31Z&srt=s&ss=bf&sp=rwl") masked_azure_sas_token = ( "https://someaccounts.blob.core.windows.net/results/018b90ab-0033-" "5f8e-0000-14f1000bd376_0/main/data_0_0_1?sv=2015-07-08&" "sig=****&" "spr=https&st=2016-04-12T03%3A24%3A31Z&" "se=2016-04-13T03%3A29%3A31Z&srt=s&ss=bf&sp=rwl") s3_sas_token = ( "https://somebucket.s3.amazonaws.com/vzy1-s-va_demo0/results/018b92f3" "-01c2-02dd-0000-03d5000c8066_0/main/data_0_0_1?" "x-amz-server-side-encryption-customer-algorithm=AES256&" "response-content-encoding=gzip&AWSAccessKeyId=AKIAIOSFODNN7EXAMPLE" "&Expires=1555481960&Signature=zFiRkdB9RtRRYomppVes4fQ%2ByWw%3D") masked_s3_sas_token = ( "https://somebucket.s3.amazonaws.com/vzy1-s-va_demo0/results/018b92f3" "-01c2-02dd-0000-03d5000c8066_0/main/data_0_0_1?" "x-amz-server-side-encryption-customer-algorithm=AES256&" "response-content-encoding=gzip&AWSAccessKeyId=****" "&Expires=1555481960&Signature=****") # Mask azure token _, masked_text, _ = SecretDetector.mask_secrets(azure_sas_token) assert masked_text == masked_azure_sas_token # Mask s3 token _, masked_text, _ = SecretDetector.mask_secrets(s3_sas_token) assert masked_text == masked_s3_sas_token text = "".join([random.choice(string.ascii_lowercase) for i in range(200)]) _, masked_text, _ = SecretDetector.mask_secrets(text) # Randomly generated string should cause no substitutions assert masked_text == text # Mask multiple azure tokens _, masked_text, _ = SecretDetector.mask_secrets(azure_sas_token + "\n" + azure_sas_token) assert masked_text == masked_azure_sas_token + "\n" + masked_azure_sas_token # Mask multiple s3 tokens _, masked_text, _ = SecretDetector.mask_secrets(s3_sas_token + "\n" + s3_sas_token) assert masked_text == masked_s3_sas_token + "\n" + masked_s3_sas_token # Mask azure and s3 token _, masked_text, _ = SecretDetector.mask_secrets(azure_sas_token + "\n" + s3_sas_token) assert masked_text == masked_azure_sas_token + "\n" + masked_s3_sas_token
def filter_log(): # TODO: maybe we can use env variable to control whether doing this or not (this should be done) # A workround to use our custom Formatter in pytest. # Based on the discussion here 'https://github.com/pytest-dev/pytest/issues/2987' from snowflake.connector.secret_detector import SecretDetector import logging import pathlib # the directory of this conftest file this_dir = pathlib.Path(__file__).parent.absolute() _logger = getLogger('snowflake.connector') _logger.setLevel(logging.DEBUG) sd = logging.FileHandler( os.path.join(str(this_dir), 'snowflake_ssm_rt.log')) sd.setLevel(logging.DEBUG) sd.setFormatter( SecretDetector( '%(asctime)s - %(threadName)s %(filename)s:%(lineno)d - %(funcName)s() - %(levelname)s - %(message)s' )) _logger.addHandler(sd)
def test_mask_aws_secret(): sql = "copy into 's3://xxxx/test' from \n" \ "(select seq1(), random()\n" \ ", random(), random(), random(), random()\n" \ ", random(), random(), random(), random()\n" \ ", random() , random(), random(), random()\n" \ "\tfrom table(generator(rowcount => 10000)))\n" \ "credentials=(\n" \ " aws_key_id='xxdsdfsafds'\n" \ " aws_secret_key='safas+asfsad+safasf'\n" \ " )\n" \ "OVERWRITE = TRUE \n" \ "MAX_FILE_SIZE = 500000000 \n" \ "HEADER = TRUE \n" \ "FILE_FORMAT = (TYPE = PARQUET SNAPPY_COMPRESSION = TRUE )\n" \ ";" correct = "copy into 's3://xxxx/test' from \n" \ "(select seq1(), random()\n" \ ", random(), random(), random(), random()\n" \ ", random(), random(), random(), random()\n" \ ", random() , random(), random(), random()\n" \ "\tfrom table(generator(rowcount => 10000)))\n" \ "credentials=(\n" \ " aws_key_id='****'\n" \ " aws_secret_key='****'\n" \ " )\n" \ "OVERWRITE = TRUE \n" \ "MAX_FILE_SIZE = 500000000 \n" \ "HEADER = TRUE \n" \ "FILE_FORMAT = (TYPE = PARQUET SNAPPY_COMPRESSION = TRUE )\n" \ ";" # Mask an aws key id and secret key _, masked_sql, _ = SecretDetector.mask_secrets(sql) assert masked_sql == correct
def test_exception_in_masking(): test_str = "This string will raise an exception" masked, masked_str, err_str = SecretDetector.mask_secrets(test_str) assert masked assert err_str == "Test exception" assert masked_str == "Test exception"
def test_token_password(): long_token = ("_Y1ZNETTn5/qfUWj3Jedby7gipDzQs=U" "KyJH9DS=nFzzWnfZKGV+C7GopWCGD4Lj" "OLLFZKOE26LXHDt3pTi4iI1qwKuSpf/F" "mClCMBSissVsU3Ei590FP0lPQQhcSGcD" "u69ZL_1X6e9h5z62t/iY7ZkII28n2qU=" "nrBJUgPRCIbtJQkVJXIuOHjX4G5yUEKj" "ZBAx4w6=_lqtt67bIA=o7D=oUSjfywsR" "FoloNIkBPXCwFTv+1RVUHgVA2g8A9Lw5" "XdJYuI8vhg=f0bKSq7AhQ2Bh") long_token2 = ("ktL57KJemuq4-M+Q0pdRjCIMcf1mzcr" "MwKteDS5DRE/Pb+5MzvWjDH7LFPV5b_" "/tX/yoLG3b4TuC6Q5qNzsARPPn_zs/j" "BbDOEg1-IfPpdsbwX6ETeEnhxkHIL4H" "sP-V") random_pwd = "Fh[+2J~AcqeqW%?" random_pwd2 = random_pwd + "vdkav13" test_string_w_prefix = ("token=" + long_token + " random giberish " + "password:"******"token=****" + " random giberish " + "password:****" # order reversed test_string_w_prefix = ("password:"******" random giberish " + "token=" + long_token) masked, masked_str, err_str = SecretDetector.mask_secrets( test_string_w_prefix) assert masked assert err_str is None assert masked_str == "password:****" + " random giberish " + "token=****" # multiple tokens and password test_string_w_prefix = ("token=" + long_token + " random giberish " + "password:"******" random giberish " + "idToken:" + long_token2) masked, masked_str, err_str = SecretDetector.mask_secrets( test_string_w_prefix) assert masked assert err_str is None assert (masked_str == "token=****" + " random giberish " + "password:****" + " random giberish " + "idToken:****") # multiple passwords test_string_w_prefix = ("password="******" random giberish " + "pwd:" + random_pwd2) masked, masked_str, err_str = SecretDetector.mask_secrets( test_string_w_prefix) assert masked assert err_str is None assert masked_str == "password="******"****" + " random giberish " + "pwd:" + "****" test_string_w_prefix = ("password="******" random giberish " + "password="******" random giberish " + "password="******"password="******"****" + " random giberish " + "password="******"****" + " random giberish " + "password="******"****")
def basic_masking(test_str): masked, masked_str, err_str = SecretDetector.mask_secrets(test_str) assert not masked assert err_str is None assert masked_str == test_str