Exemple #1
0
    def test_sanity_check(self):
        fschema = self.data_gen.get_faker_schema()
        schema_faker = FakerSchema()
        data = schema_faker.generate_fake(fschema, 1)  # Generate one record.

        # Note at this point data[u'lo_orderdate'] is a datetime.date object while Biguery expects
        # a string
        self.assertIsInstance(data[u'lo_orderdate'], datetime.date)

        data = self.fakerowgen.sanity_check(record=data,
                                            fieldname=u'lo_orderdate')

        # Check that the date was converted to a string
        self.assertIsInstance(data[u'lo_orderdate'], unicode)

        # Check that the date is in the correct format
        _ = datetime.datetime.strptime(data[u'lo_orderdate'], '%Y-%m-%d')

        # Check if sanity check enforces integers < data_args.max_int
        data[u'lo_linenumber'] = 10**12  # Note that max_int is 10**11

        data = self.fakerowgen.sanity_check(record=data,
                                            fieldname=u'lo_linenumber')

        self.assertLessEqual(data[u'lo_linenumber'], self.data_gen.max_int)
Exemple #2
0
    def generate_fake(self, fschema=None, key_dict=None):
        """
        This method creates a single fake record based on the constraints
        defined in this FakeRowGen instance's data_gen attribute.

        Arguments:
                fschema (dict): Contains a faker_schema (this should be
                    generated by DataGenerator.get_faker_schema() )
        """
        schema_faker = FakerSchema()

        # Drop the key columns because we do not need to randomly generate them.
        if key_dict:
            for key in list(key_dict.keys()):
                fschema.pop(key, None)

        # Generate a fake record.
        data = schema_faker.generate_fake(fschema, 1)  # Generate one record.
        # This performs a sanity check on datatypes and parameterized
        # constraints.

        for col_name in data:
            data = self.sanity_check(data, col_name)

        if key_dict:
            keys = self.convert_key_types(key_dict)
            # Join the keys and the rest of the genreated data
            data.update(keys)
            data.pop('frequency')
        return json.dumps(data)
Exemple #3
0
class TestFakerSchema(unittest.TestCase):
    def setUp(self):
        self.faker_schema = FakerSchema(faker=MockFaker())

    def test_generate_fake_flat_schema(self):
        schema = {'Full Name': 'name', 'Address': 'address', 'Email': 'email'}
        data = self.faker_schema.generate_fake(schema)

        self.assertIsInstance(data, dict)

    def test_generate_fake_flat_schema_4_iterations(self):
        schema = {'Full Name': 'name', 'Address': 'address', 'Email': 'email'}
        data = self.faker_schema.generate_fake(schema, iterations=4)

        self.assertIsInstance(data, list)
        self.assertEqual(len(data), 4)

    def test_generate_fake_nested_schema(self):
        schema = {
            'Full Name': 'name',
            'Location': {
                'Address': 'street_address',
                'City': 'city',
                'Country': 'country',
                'Postal Code': 'postalcode'
            }
        }
        data = self.faker_schema.generate_fake(schema)

        self.assertIsInstance(data, dict)
        self.assertIsInstance(data['Location'], dict)

    def test_generate_fake_schema_with_list(self):
        schema = {
            'Employer': 'name',
            'EmployeeList': [{
                'Employee1': 'name'
            }, {
                'Employee2': 'name'
            }]
        }
        data = self.faker_schema.generate_fake(schema)

        self.assertIsInstance(data, dict)
        self.assertIsInstance(data['EmployeeList'], list)
    def generate_fake(self, fschema):
        """
        This method creates a single fake record based on the constraints defined int
        the FakeRowGen instance's data_gen attribute.

        Args:
                fschema: A dictionary containing a faker_schema (this should be generated by
                         DataGenerator.get_faker_schema() )
        """
        # Initialize a FakerSchema object.
        schema_faker = FakerSchema()

        # Generate a fake record.
        data = schema_faker.generate_fake(fschema, 1)  # Generate one record.

        # This performs a sanity check on datatypes and parameterized constraints.
        for col_name in data:
            data = self.sanity_check(data, col_name)

        return data
Exemple #5
0
def fake_data(json_file, seed=0):
    """Return faked json data
    Args:
        json_file: example json file with data to simulate
        seed (int): Optionally seed Faker for reproducability
    Returns:
        dict: dict with summary and DataFrame with extracted data
    """

    # get dict of visited places
    places = _create_places(total=max(NPLACES.values()))

    # Get json schema from json file
    with open(json_file) as file_object:
        json_data = json.load(file_object)
        json_schema = get_json_schema(json_data)

    fake = Faker('nl_NL')
    fake.add_provider(geo)
    faker = FakerSchema(faker=fake, locale='nl_NL')

    faked_data = {}
    for year in YEARS:
        for month in MONTHS:
            schema = get_faker_schema(
                json_schema["properties"],
                custom=SCHEMA_TYPES,
                iterations={"timelineObjects": NACTIVITIES[year]})

            data = faker.generate_fake(schema)
            month_number = datetime.strptime(month[:3], '%b').month
            seed += 1
            json_data = _update_data(
                data,
                datetime(year, month_number, 1),
                dict(itertools.islice(places.items(), NPLACES[year])),
                seed=seed)
            faked_data[(year, month)] = json_data

    return faked_data
Exemple #6
0
def generate(schema):
    fake = Faker()
    fake.add_provider(BornProvider)
    faker = FakerSchema(faker=fake)
    return faker.generate_fake(load_json_from_file(schema))
class TestFakerSchema(unittest.TestCase):

    def setUp(self):
        self.faker_schema = FakerSchema(faker=MockFaker())

    def test_generate_fake_flat_schema(self):
        schema = {'Full Name': 'name', 'Address': 'address', 'Email': 'email'}
        data = self.faker_schema.generate_fake(schema)

        self.assertIsInstance(data, dict)

    def test_generate_fake_flat_schema_4_iterations(self):
        schema = {'Full Name': 'name', 'Address': 'address', 'Email': 'email'}
        data = self.faker_schema.generate_fake(schema, iterations=4)

        self.assertIsInstance(data, list)
        self.assertEqual(len(data), 4)

    def test_generate_fake_nested_schema(self):
        schema = {'Full Name': 'name', 'Location': {'Address': 'street_address', 'City': 'city',
                  'Country': 'country', 'Postal Code': 'postalcode'}}
        data = self.faker_schema.generate_fake(schema)

        self.assertIsInstance(data, dict)
        self.assertIsInstance(data['Location'], dict)

    def test_generate_fake_schema_with_list(self):
        schema = {'Employer': 'name', 'EmployeeList': [{'Employee1': 'name'},
                  {'Employee2': 'name'}]}
        data = self.faker_schema.generate_fake(schema)

        self.assertIsInstance(data, dict)
        self.assertIsInstance(data['EmployeeList'], list)

    def test_generate_fake_schema_with_choices(self):
        schema = {
            'Person': 'name',
            'Gender': '(Male,Female)'  # simliar to fuzzy choices
        }
        data = self.faker_schema.generate_fake(schema)

        self.assertIsInstance(data, dict)
        self.assertIsInstance(data['Gender'], str)

    def test_generate_fake_schema_with_date(self):
        schema = {
            'Person': 'name',
            'Gender': '(Male,Female)',
            'BirthDay': 'date_of_birth'  # returns date with isoformat
        }
        data = self.faker_schema.generate_fake(schema)

        self.assertIsInstance(data, dict)
        self.assertIsInstance(data['BirthDay'], str)

    def test_generate_fake_schema_with_fake_args(self):
        schema = {
            'Person': 'name',
            'Gender': '(Male,Female)',
            'BirthDay': 'date_of_birth',
            'Age': 'random_int(18,80)'  # passing arguments (age betweeen 18 and 80)
        }
        data = self.faker_schema.generate_fake(schema)

        self.assertIsInstance(data, dict)
        self.assertIsInstance(data['Age'], int)

    def test_generate_fake_schema_with_fake_kargs(self):
        schema = {
            'Person': 'name',
            'Gender': '(Male,Female)',
            'BirthDay': 'date_of_birth',
            'Age': 'random_int(min=18,max=80)'  # passing named arguments (age betweeen min=18 and max=80)
        }
        data = self.faker_schema.generate_fake(schema)

        self.assertIsInstance(data, dict)
        self.assertIsInstance(data['Age'], int)