def test_record_fpe_mask():
    rec = {'latitude': -70.783, 'longitude': -112.221, 'credit_card': '4123 5678 9123 4567', 'the_dude': 100000000,
           'the_hotness': "convertme", "the_sci_notation": 1.23E-7}
    mask = StringMask(start_pos=1)
    cc_xf = [FormatConfig(pattern=r'\s+', replacement=''),
             FpeStringConfig(
                 secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=10,
                 mask=[mask])]

    data_paths = [DataPath(input='credit_card', xforms=cc_xf)]
    xf = DataTransformPipeline(data_paths)
    rf = DataRestorePipeline(data_paths)
    xf_payload = xf.transform_record(rec)
    check = xf_payload.get('credit_card')
    assert check == '4599631908097107'
    rf_payload = rf.transform_record(xf_payload)
    check = rf_payload.get('credit_card')
    assert check == '4123567891234567'
    cc_xf = [FpeStringConfig(
        secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=10,
        mask=[mask])]
    data_paths = [DataPath(input='credit_card', xforms=cc_xf)]
    xf = DataTransformPipeline(data_paths)
    rf = DataRestorePipeline(data_paths)
    xf_payload = xf.transform_record(rec)
    check = xf_payload.get('credit_card')
    assert check == '4599 6319 0809 7107'
    rf_payload = rf.transform_record(xf_payload)
    check = rf_payload.get('credit_card')
    assert check == '4123 5678 9123 4567'
def test_fpe_string():
    mask_last_name = StringMask(mask_after=' ')
    mask_first_name = StringMask(mask_until=' ')
    fpe_string_config = FpeStringConfig(secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94",
                                        radix=62, mask=[mask_last_name])
    xf = factory(fpe_string_config)
    record = xf.transform_field("person_name", "John Doe", None)
    assert record == {'person_name': 'John BDy'}
    record = xf._restore_field('person_name', record['person_name'], None)
    assert record == {'person_name': 'John Doe'}

    fpe_string_config = FpeStringConfig(secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94",
                                        radix=62, mask=[mask_first_name])
    xf = factory(fpe_string_config)
    record = xf.transform_field("person_name", "John Doe", None)
    assert record == {'person_name': 'Uugx Doe'}
    record = xf._restore_field('person_name', record['person_name'], None)
    assert record == {'person_name': 'John Doe'}

    fpe_string_config = FpeStringConfig(secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94",
                                        radix=62, mask=[mask_first_name, mask_last_name])
    xf = factory(fpe_string_config)
    record = xf.transform_field("person_name", "John Doe", None)
    assert record == {'person_name': 'Uugx BDy'}
    record = xf._restore_field('person_name', record['person_name'], None)
    assert record == {'person_name': 'John Doe'}
def test_record_fpe_base62():
    rec = {'latitude': -70.783, 'longitude': -112.221, 'credit_card': '4123567891234567', 'the_dude': 100000000,
           'the_hotness': "This is some awesome text with UPPER and lower case characters.",
           "the_sci_notation": 1.23E-7}
    numbers_xf = [FpeStringConfig(
        secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=10)]

    float_xf = [FpeFloatConfig(
        secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=10,
        float_precision=3)]
    cc_xf = [FormatConfig(pattern=r'\s+', replacement=''),
             FpeStringConfig(
                 secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=10)]

    text_xf = [
        FpeStringConfig(secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=62)]

    data_paths = [DataPath(input='credit_card', xforms=cc_xf),
                  DataPath(input='longitude', xforms=float_xf),
                  DataPath(input='latitude', xforms=float_xf),
                  DataPath(input='the_dude', xforms=numbers_xf),
                  DataPath(input='the_sci_notation', xforms=float_xf),
                  DataPath(input='the_hotness', xforms=text_xf)
                  ]
    xf = DataTransformPipeline(data_paths)
    rf = DataRestorePipeline(data_paths)
    xf_payload = xf.transform_record(rec)
    check = xf_payload.get('credit_card')
    assert check == '5931468769662449'
    check = rf.transform_record(xf_payload)
    assert check == rec
def test_pipe_record_filter(record_meta_data_check):
    entity_xf = [
        RedactWithLabelConfig(labels=['date']),
        SecureHashConfig(secret='rockybalboa', labels=['location']),
        FpeStringConfig(labels=['credit_card_number'],
                        secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94",
                        radix=10)
    ]
    data_paths = [
        DataPath(input='Country', xforms=entity_xf),
        DataPath(input='?ddress', xforms=entity_xf),
        DataPath(input='Cr*', xforms=entity_xf)
    ]
    xf = DataTransformPipeline(data_paths)
    rf = DataRestorePipeline(data_paths)
    transformed = xf.transform_record(record_meta_data_check)
    assert transformed['record']['Credit Card'] == '4471585942734458'
    assert transformed['metadata']['fields']['Credit Card']['ner']['labels'][0]['text'] == '4471585942734458'
    assert transformed['metadata']['fields']['Country']['ner']['labels'][0]['start'] == 0
    assert transformed['metadata']['fields']['Country']['ner']['labels'][0]['end'] == 64
    # The metadata has one entry less than record entries, because Address does not have meta data in this test.
    assert len(transformed['metadata']['fields']) == 2
    assert len(transformed['record']) == 3
    restored = rf.transform_record(transformed)
    assert restored['record']['Credit Card'] == record_meta_data_check['record']['Credit Card']
def test_gretel_meta(record_and_meta_2):
    xf_fpe = FpeStringConfig(secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=10)
    xf_redact_entity = FpeStringConfig(labels=['ip_address'],
                                       secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94",
                                       radix=10)

    data_paths = [
        DataPath(input='latitude', xforms=xf_fpe),
        DataPath(input='*', xforms=xf_redact_entity)
    ]

    xf = DataTransformPipeline(data_paths)
    rf = DataRestorePipeline(data_paths)
    check = xf.transform_record(record_and_meta_2)
    assert check['metadata']['gretel_id'] == '2732c7ed44a8402f899a01e52a931985'
    check = rf.transform_record(check)
    assert check['record'] == record_and_meta_2['record']
    assert check['metadata']['gretel_id'] == '2732c7ed44a8402f899a01e52a931985'
def test_record_fpe_precision():
    rec = {'latitude': -70.783, 'longitude': -112.221, 'credit_card': '4123567891234567', 'the_dude': 100000000,
           'the_hotness': "convertme", "the_sci_notation": 1.23E-7}
    int_xf = FpeStringConfig(secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=10)

    num1_xf = FpeFloatConfig(secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=10,
                             float_precision=1)

    num2_xf = FpeFloatConfig(secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=10,
                             float_precision=0)

    num3_xf = FpeFloatConfig(secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=10,
                             float_precision=1)

    num4_xf = FpeStringConfig(secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=36)

    data_paths = [
        DataPath(input='credit_card', xforms=int_xf),
        DataPath(input='latitude', xforms=num1_xf),
        DataPath(input='the_dude', xforms=int_xf),
        DataPath(input='longitude', xforms=num2_xf),
        DataPath(input='the_sci_notation', xforms=num3_xf),
        DataPath(input='the_hotness', xforms=num4_xf),
        DataPath(input='*')
    ]
    xf = DataTransformPipeline(data_paths)
    rf = DataRestorePipeline(data_paths)
    xf_payload = xf.transform_record(rec)
    check = xf_payload.get('credit_card')
    assert check == '5931468769662449'
    check = xf_payload.get('longitude')
    assert check == -112.2929577756414
    check = xf_payload.get('latitude')
    assert check == -70.78143312456855
    check = xf_payload.get('the_hotness')
    assert check == '2qjuxg7ju'
    check = xf_payload.get('the_dude')
    assert check == 128994144
    check = xf_payload.get('the_sci_notation')
    assert check == 1.2342967235924508e-07
    check = rf.transform_record(xf_payload)
    assert check == rec
Ejemplo n.º 7
0
    def build_anonymizing_transforms(self):
        for entity in self.id_entities:
            # Get all the project fields tagged as this entity type
            entity_fields = [
                d["field"]
                for d in self.project.get_field_details(entity=entity)
            ]
            for field in entity_fields:
                dice_roll = random.randint(1, 6)
                xf = []
                if dice_roll == 1:
                    print(f"Dropping field {field}")
                    xf = [DropConfig()]
                if dice_roll == 2:
                    print(f"Faking field {field}")
                    xf = [
                        FakeConstantConfig(seed=SEED,
                                           fake_method=FAKER_MAP.get(
                                               entity, "name"))
                    ]
                if dice_roll == 3:
                    print(f"Encrypting field {field}")
                    # radix 62 will encrypt alphanumeric but no special characters
                    xf = [FpeStringConfig(secret=SECRET, radix=62)]
                if dice_roll == 4:
                    print(f"Character redacting field {field}")
                    # Use a fancier mask for emails
                    if entity == "email_address":
                        xf = [
                            RedactWithCharConfig(
                                char="X",
                                mask=[
                                    StringMask(start_pos=3, mask_until="@"),
                                    StringMask(mask_after="@",
                                               mask_until=".",
                                               greedy=True),
                                ],
                            )
                        ]
                    else:
                        xf = [
                            RedactWithCharConfig(
                                "#", mask=[StringMask(start_pos=3)])
                        ]
                if dice_roll == 5:
                    print(f"Label redacting field {field}")
                    xf = [RedactWithLabelConfig(labels=[entity])]
                if dice_roll == 6:
                    print(f"String redacting field {field}")
                    xf = [RedactWithStringConfig(string="CLASSIFIED")]

                self.data_paths.append(DataPath(input=field, xforms=xf))
def test_record_zero_fpe():
    rec = {'latitude': 0.0, 'longitude': -0.0, 'credit_card': '4123567891234567', 'the_dude': 100000000,
           'the_hotness': "convertme", "the_sci_notation": 1.23E-7}
    numbers_xf = [FpeStringConfig(
        secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=10)]

    float_xf = [FpeFloatConfig(
        secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=10,
        float_precision=3)]
    text_xf = [
        FpeStringConfig(secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=36)]

    data_paths = [
        DataPath(input='credit_card', xforms=numbers_xf),
        DataPath(input='latitude', xforms=float_xf),

        DataPath(input='longitude', xforms=float_xf),
        DataPath(input='the_dude', xforms=numbers_xf),
        DataPath(input='the_sci_notation', xforms=float_xf),
        DataPath(input='the_hotness', xforms=text_xf)
    ]
    xf = DataTransformPipeline(data_paths)
    rf = DataRestorePipeline(data_paths)
    xf_payload = xf.transform_record(rec)
    check = xf_payload.get('credit_card')
    assert check == '5931468769662449'
    check = xf_payload.get('longitude')
    assert check == -1.32547939979e-312
    check = xf_payload.get('latitude')
    assert check == 1.32547939979e-312
    check = xf_payload.get('the_hotness')
    assert check == '2qjuxg7ju'
    check = xf_payload.get('the_dude')
    assert check == 128994144
    check = xf_payload.get('the_sci_notation')
    assert check == 1.229570610794763e-07
    check = rf.transform_record(xf_payload)
    assert check == rec
def test_record_output_map_and_schemas():
    rec = {'a': 1.23, 'b': 2.34, 'c': 3.45, 'd': 4.56, 'e': 5.67}
    rec2 = {'f': 1.23, 'b': 2.34, 'c': 3.45, 'd': 4.56, 'e': 5.67}
    test_payloads = [(rec, record_key) for record_key in RECORD_KEYS]
    test_payloads.append((rec, None))
    for payload, record_key in test_payloads:
        xf_list = FpeFloatConfig(secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=10)

        data_paths = [
            DataPath(input='a', output='x'),
            DataPath(input='b', output='y'),
            DataPath(input='c', xforms=xf_list, output='z'),
            DataPath(input='d', xforms=xf_list),
            DataPath(input='e', xforms=xf_list),
            DataPath(input='*')
        ]
        xf = DataTransformPipeline(data_paths)
        rf = DataRestorePipeline(data_paths)
        xf_payload = xf.transform_record(payload)
        xf_record = xf_payload.get(record_key) or xf_payload
        check = xf_record.get('x')
        assert check == 1.23
        check = xf_record.get('y')
        assert check == 2.34
        check = xf_record.get('z')
        assert check == 3.590038584114511
        check = xf_record.get('d')
        assert check == 7.002521213914073
        check = xf_record.get('e')
        assert check == 4.9570355284951875
        check = rf.transform_record(xf_payload)
        check = check.get(record_key) or check
        assert check == rec

        # test multiple names mapping to the same output field
        xf_list = FpeStringConfig(secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=10)
        data_paths = [
            DataPath(input='a', xforms=xf_list, output='x'),
            DataPath(input='f', xforms=xf_list, output='x'),
            DataPath(input='b', xforms=xf_list, output='y'),
            DataPath(input='c', xforms=xf_list, output='z'),
            DataPath(input='*')
        ]
        xf = DataTransformPipeline(data_paths)
        xf_payload = xf.transform_record(rec)
        xf_payload2 = xf.transform_record(rec2)
        xf_record = xf_payload.get(record_key) or xf_payload
        xf_record2 = xf_payload2.get(record_key) or xf_payload2

        assert xf_record == xf_record2
def test_fpe_dirty_transform(record_dirty_fpe_check):
    field_xf = FpeStringConfig(secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=10)
    data_paths = [
        DataPath(input='Credit Card', xforms=field_xf),
        DataPath(input='Customer ID', xforms=field_xf),
        DataPath(input='*')
    ]
    xf = DataTransformPipeline(data_paths)
    rf = DataRestorePipeline(data_paths)
    transformed = xf.transform_record(record_dirty_fpe_check)
    assert transformed['Credit Card'] == '447158 5942734 458'
    assert transformed['Customer ID'] == '747/52*232 83-19'
    restored = rf.transform_record(transformed)
    assert restored == record_dirty_fpe_check
def test_meta_data_transform(record_meta_data_check):
    entity_xf = [
        RedactWithLabelConfig(labels=['date']),
        SecureHashConfig(secret='rockybalboa', labels=['location']),
        FpeStringConfig(labels=['credit_card_number'],
                        secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94",
                        radix=10)
    ]
    data_paths = [DataPath(input='*', xforms=entity_xf)]
    xf = DataTransformPipeline(data_paths)
    rf = DataRestorePipeline(data_paths)
    transformed = xf.transform_record(record_meta_data_check)
    assert transformed['record']['Credit Card'] == '4471585942734458'
    assert transformed['metadata']['fields']['Credit Card']['ner']['labels'][0]['text'] == '4471585942734458'
    assert transformed['metadata']['fields']['Country']['ner']['labels'][0]['start'] == 0
    assert transformed['metadata']['fields']['Country']['ner']['labels'][0]['end'] == 64
    restored = rf.transform_record(transformed)
    assert restored['record']['Credit Card'] == record_meta_data_check['record']['Credit Card']
def test_pipe_date_shift_cbc_fast(records_date_tweak):
    # run tests with user_id to tweak the de-identified date

    xf_user_id = FpeStringConfig(secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94", radix=10,
                                 aes_mode=crypto_aes.Mode.CBC_FAST)
    xf_date = DateShiftConfig(secret='2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94',
                              lower_range_days=-10, upper_range_days=25,
                              tweak=FieldRef('user_id'))

    data_paths = [DataPath(input='user_id', xforms=xf_user_id),
                  DataPath(input='created', xforms=xf_date),
                  DataPath(input='*')
                  ]

    xf = DataTransformPipeline(data_paths)
    rf = DataRestorePipeline(data_paths)
    check_aw = xf.transform_record(records_date_tweak[0])
    check_ae = xf.transform_record(records_date_tweak[1])
    assert check_aw['created'] == '2016-06-18'
    assert check_ae['created'] == '2016-06-18'
    check_ae = rf.transform_record(check_ae)
    check_aw = rf.transform_record(check_aw)
    assert check_aw['created'] == '2016-06-17'
    assert check_ae['created'] == '2016-06-17'

    # run tests without tweaking the de-identified date
    xf_date = DateShiftConfig(secret='2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94',
                              lower_range_days=-10, upper_range_days=25)

    data_paths = [DataPath(input='created', xforms=xf_date)]

    xf = DataTransformPipeline(data_paths)
    rf = DataRestorePipeline(data_paths)
    check_aw = xf.transform_record(records_date_tweak[0])
    check_ae = xf.transform_record(records_date_tweak[1])
    assert check_aw['created'] == '2016-06-13'
    assert check_ae['created'] == '2016-06-13'
    record_and_meta_aw = check_aw
    record_and_meta_ae = check_ae
    check_aw = rf.transform_record(record_and_meta_aw)
    check_ae = rf.transform_record(record_and_meta_ae)
    assert check_aw['created'] == '2016-06-17'
    assert check_ae['created'] == '2016-06-17'
Ejemplo n.º 13
0
"""
Basic Format Preserving Encryption
"""
from gretel_client.transformers import FpeStringConfig
from gretel_client.transformers import DataPath, DataTransformPipeline
from gretel_client.transformers.string_mask import StringMask

mask = StringMask(start_pos=1)

xf = FpeStringConfig(
    secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94",
    radix=10,
    mask=[mask],
)
xf2 = FpeStringConfig(
    secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94",
    radix=62)

paths = [
    DataPath(input="credit_card", xforms=xf),
    DataPath(input="name", xforms=xf2),
    DataPath(input="*"),
]

pipe = DataTransformPipeline(paths)

rec = {"name": "John Doe", "credit_card": "4123 5678 9012 3456"}

out = pipe.transform_record(rec)

assert out == {"name": "2DZv ZmN", "credit_card": "4521 1021 2994 9272"}
Ejemplo n.º 14
0
    DataPath,
    DataTransformPipeline,
    DataRestorePipeline,
)

rec = {
    "Address": "317 Massa. Av.",
    "City": "Didim",
    "Country": "Eritrea",
    "Credit Card": "601128 2195205 818",
    "Customer ID": "169/61*009 38-34",
    "Date": "2019-10-08",
    "Name": "Grimes, Bo H.",
    "Zipcode": "745558",
}
field_xf = FpeStringConfig(
    secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94",
    radix=10)
data_paths = [
    DataPath(input="Credit Card", xforms=field_xf),
    DataPath(input="Customer ID", xforms=field_xf),
    DataPath(input="*"),
]
xf = DataTransformPipeline(data_paths)
rf = DataRestorePipeline(data_paths)
transformed = xf.transform_record(rec)
assert transformed["Credit Card"] == "447158 5942734 458"
assert transformed["Customer ID"] == "747/52*232 83-19"
restored = rf.transform_record(transformed)
assert restored == rec