def test_date_shift(): test_date = str(datetime.date(2006, 6, 15)) date_config = DateShiftConfig( secret= "2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", lower_range_days=-10, upper_range_days=45, labels=["date"], ) date_config_tweak = DateShiftConfig( secret= "2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", lower_range_days=-10, upper_range_days=45, labels=["date"], tweak=FieldRef("test_id"), ) date2_config = DateShiftConfig( secret= "1628AED2A6A809CBF7158F7F036D6F059D8D54FC6A942B7E15F4F3CEF4380AA4", lower_range_days=-10, upper_range_days=45, labels=["date"], ) date2_config_tweak = DateShiftConfig( secret= "1628AED2A6A809CBF7158F7F036D6F059D8D54FC6A942B7E15F4F3CEF4380AA4", lower_range_days=-10, upper_range_days=45, labels=["date"], ) xf = factory(date_config) encode = xf.transform_field("date", test_date, None) decode = xf.restore_field("date", encode["date"], None) xf = factory(date2_config) encode2 = xf.transform_field("date", test_date, None) decode2 = xf.restore_field("date", encode2["date"], None) xf = factory(date_config_tweak) xf.field_ref_dict["tweak"] = FieldRef("user_id", 10, 17) encode_t = xf.transform_field("date", test_date, None) decode_t = xf.restore_field("date", encode_t["date"], None) xf = factory(date2_config_tweak) xf.field_ref_dict["tweak"] = FieldRef("user_id", 10, 17) encode2_t = xf.transform_field("date", test_date, None) decode2_t = xf.restore_field("date", encode2_t["date"], None) assert decode_t["date"] == test_date assert decode2_t["date"] == test_date assert decode["date"] == test_date assert decode2["date"] == test_date assert encode_t["date"] != test_date assert encode2_t["date"] != test_date assert encode["date"] != test_date assert encode2["date"] != test_date assert encode_t != encode2_t and encode_t != encode and encode_t != encode2 assert encode2_t != encode and encode2_t != encode2 assert encode != encode2
def test_combine(record_and_meta_2): xf_combine = CombineConfig(combine=FieldRef(['latitude', 'city', 'state']), separator=", ") data_paths = [ DataPath(input='dni', xforms=xf_combine, output='everything'), ] xf = DataTransformPipeline(data_paths) check_aw = xf.transform_record(record_and_meta_2) assert check_aw['record'] == {'everything': 'He loves 8.8.8.8 for DNS, 112.221, San Diego, California'}
def test_pipe_combine(records_date_tweak): xf_combine = CombineConfig(combine=FieldRef(['first_name', 'city', 'state']), separator=", ") data_paths = [ DataPath(input='last_name', xforms=xf_combine, output='name_location'), ] xf = DataTransformPipeline(data_paths) check_aw = xf.transform_record(records_date_tweak[0]) assert check_aw == {'name_location': 'Watson, Alex, San Diego, California'} check_ae = xf.transform_record(records_date_tweak[1]) assert check_ae == {'name_location': 'Ehrath, Alex, San Marcos, California'}
def test_date_shift_format(): xf_date = DateShiftConfig( secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", lower_range_days=-10, upper_range_days=25, date_format='%m/%d/%Y', tweak=FieldRef("user_id") ) data_paths = [DataPath(input="birthday", xforms=xf_date), DataPath(input="*")] pipe = DataTransformPipeline(data_paths) restore_pipe = DataRestorePipeline(data_paths) records = [ {"user_id": "*****@*****.**", "birthday": "02/17/1963"}, {"user_id": "*****@*****.**", "birthday": "06/09/1961"}, {"user_id": "*****@*****.**", "birthday": "08/29/1958"}, ] out = [pipe.transform_record(rec) for rec in records] assert out == [ {"user_id": "*****@*****.**", "birthday": "02/13/1963"}, {"user_id": "*****@*****.**", "birthday": "06/05/1961"}, {"user_id": "*****@*****.**", "birthday": "08/25/1958"}, ] restored = [restore_pipe.transform_record(rec) for rec in out] assert restored == [ {"user_id": "*****@*****.**", "birthday": "02/17/1963"}, {"user_id": "*****@*****.**", "birthday": "06/09/1961"}, {"user_id": "*****@*****.**", "birthday": "08/29/1958"}, ] records = [ {"user_id": "*****@*****.**", "birthday": "1963-02-17"}, {"user_id": "*****@*****.**", "birthday": "1961-06-09"}, {"user_id": "*****@*****.**", "birthday": "1958-08-29"}, ] out = [pipe.transform_record(rec) for rec in records] assert out == [ {"user_id": "*****@*****.**", "birthday": "02/13/1963"}, {"user_id": "*****@*****.**", "birthday": "06/05/1961"}, {"user_id": "*****@*****.**", "birthday": "08/25/1958"}, ] restored = [restore_pipe.transform_record(rec) for rec in out] # Please note the format! assert restored == [ {"user_id": "*****@*****.**", "birthday": "02/17/1963"}, {"user_id": "*****@*****.**", "birthday": "06/09/1961"}, {"user_id": "*****@*****.**", "birthday": "08/29/1958"}, ]
def test_pipe_date_shift_cbc_fast(records_date_tweak): # run tests with user_id to tweak the de-identified date xf_user_id = FpeStringConfig(secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=10, aes_mode=crypto_aes.Mode.CBC_FAST) xf_date = DateShiftConfig(secret='2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94', lower_range_days=-10, upper_range_days=25, tweak=FieldRef('user_id')) data_paths = [DataPath(input='user_id', xforms=xf_user_id), DataPath(input='created', xforms=xf_date), DataPath(input='*') ] xf = DataTransformPipeline(data_paths) rf = DataRestorePipeline(data_paths) check_aw = xf.transform_record(records_date_tweak[0]) check_ae = xf.transform_record(records_date_tweak[1]) assert check_aw['created'] == '2016-06-18' assert check_ae['created'] == '2016-06-18' check_ae = rf.transform_record(check_ae) check_aw = rf.transform_record(check_aw) assert check_aw['created'] == '2016-06-17' assert check_ae['created'] == '2016-06-17' # run tests without tweaking the de-identified date xf_date = DateShiftConfig(secret='2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94', lower_range_days=-10, upper_range_days=25) data_paths = [DataPath(input='created', xforms=xf_date)] xf = DataTransformPipeline(data_paths) rf = DataRestorePipeline(data_paths) check_aw = xf.transform_record(records_date_tweak[0]) check_ae = xf.transform_record(records_date_tweak[1]) assert check_aw['created'] == '2016-06-13' assert check_ae['created'] == '2016-06-13' record_and_meta_aw = check_aw record_and_meta_ae = check_ae check_aw = rf.transform_record(record_and_meta_aw) check_ae = rf.transform_record(record_and_meta_ae) assert check_aw['created'] == '2016-06-17' assert check_ae['created'] == '2016-06-17'
def test_conditional_transformer(records_conditional): xf_fpe = FpeFloatConfig(secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=10) xf_consent = ConditionalConfig(conditional_value=FieldRef('user_consent'), regex=r"['1']", true_xform=xf_fpe, false_xform=RedactWithLabelConfig()) data_paths_encrypt = [DataPath(input='lon', xforms=xf_fpe), DataPath(input='lat', xforms=xf_fpe), DataPath(input='*') ] data_paths_decrypt = [DataPath(input='lon', xforms=xf_consent), DataPath(input='lat', xforms=xf_consent), DataPath(input='*') ] xf_encrypt = DataTransformPipeline(data_paths_encrypt) xf_decrypt = DataRestorePipeline(data_paths_decrypt) check_aw = xf_encrypt.transform_record(records_conditional[0]) check_ae = xf_encrypt.transform_record(records_conditional[1]) assert check_ae['record']['lat'] == 50.65564864394322 assert check_ae['record']['lon'] == 191.8142181740291 assert check_aw['record']['lat'] == 77.00217823076872 assert check_aw['record']['lon'] == 254.0404040486477 check_aw = xf_decrypt.transform_record(check_aw) check_ae = xf_decrypt.transform_record(check_ae) assert check_ae['record']['lat'] == 'LATITUDE' assert check_ae['record']['lon'] == 'LONGITUDE' assert check_aw['record']['lat'] == 112.22134 assert check_aw['record']['lon'] == 135.76433 xf_fpe = FpeFloatConfig(secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=10) xf_consent = ConditionalConfig(conditional_value=FieldRef('user_consent'), regex=r"['1']", true_xform=xf_fpe) data_paths_encrypt = [DataPath(input='lon', xforms=xf_fpe), DataPath(input='lat', xforms=xf_fpe), DataPath(input='*') ] data_paths_decrypt = [DataPath(input='lon', xforms=xf_consent), DataPath(input='lat', xforms=xf_consent), DataPath(input='*') ] xf_encrypt = DataTransformPipeline(data_paths_encrypt) xf_decrypt = DataRestorePipeline(data_paths_decrypt) check_aw = xf_encrypt.transform_record(records_conditional[0]) check_ae = xf_encrypt.transform_record(records_conditional[1]) assert check_ae['record']['lat'] == 50.65564864394322 assert check_ae['record']['lon'] == 191.8142181740291 assert check_aw['record']['lat'] == 77.00217823076872 assert check_aw['record']['lon'] == 254.0404040486477 check_aw = xf_decrypt.transform_record(check_aw) check_ae = xf_decrypt.transform_record(check_ae) assert check_ae['record']['lat'] == 50.65564864394322 assert check_ae['record']['lon'] == 191.8142181740291 assert check_aw['record']['lat'] == 112.22134 assert check_aw['record']['lon'] == 135.76433 xf_fpe = FpeFloatConfig(secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=10) xf_consent = ConditionalConfig(conditional_value=FieldRef('user_consent'), regex=r"['1']", false_xform=xf_fpe) data_paths_decrypt = [DataPath(input='lon', xforms=xf_consent), DataPath(input='lat', xforms=xf_consent), DataPath(input='*') ] xf_decrypt = DataRestorePipeline(data_paths_decrypt) check_aw = xf_decrypt.transform_record(check_aw) check_ae = xf_decrypt.transform_record(check_ae) assert check_ae['record']['lat'] == 35.659491 assert check_ae['record']['lon'] == 139.72785 assert check_aw['record']['lat'] == 112.22134 assert check_aw['record']['lon'] == 135.76433