Ejemplo n.º 1
0
    def test_sanity_check(self):
        fschema = self.data_gen.get_faker_schema()
        schema_faker = FakerSchema()
        data = schema_faker.generate_fake(fschema, 1)  # Generate one record.

        # Note at this point data[u'lo_orderdate'] is a datetime.date object while Biguery expects
        # a string
        self.assertIsInstance(data[u'lo_orderdate'], datetime.date)

        data = self.fakerowgen.sanity_check(record=data,
                                            fieldname=u'lo_orderdate')

        # Check that the date was converted to a string
        self.assertIsInstance(data[u'lo_orderdate'], unicode)

        # Check that the date is in the correct format
        _ = datetime.datetime.strptime(data[u'lo_orderdate'], '%Y-%m-%d')

        # Check if sanity check enforces integers < data_args.max_int
        data[u'lo_linenumber'] = 10**12  # Note that max_int is 10**11

        data = self.fakerowgen.sanity_check(record=data,
                                            fieldname=u'lo_linenumber')

        self.assertLessEqual(data[u'lo_linenumber'], self.data_gen.max_int)
Ejemplo n.º 2
0
    def generate_fake(self, fschema=None, key_dict=None):
        """
        This method creates a single fake record based on the constraints
        defined in this FakeRowGen instance's data_gen attribute.

        Arguments:
                fschema (dict): Contains a faker_schema (this should be
                    generated by DataGenerator.get_faker_schema() )
        """
        schema_faker = FakerSchema()

        # Drop the key columns because we do not need to randomly generate them.
        if key_dict:
            for key in list(key_dict.keys()):
                fschema.pop(key, None)

        # Generate a fake record.
        data = schema_faker.generate_fake(fschema, 1)  # Generate one record.
        # This performs a sanity check on datatypes and parameterized
        # constraints.

        for col_name in data:
            data = self.sanity_check(data, col_name)

        if key_dict:
            keys = self.convert_key_types(key_dict)
            # Join the keys and the rest of the genreated data
            data.update(keys)
            data.pop('frequency')
        return json.dumps(data)
Ejemplo n.º 3
0
 def generate_data(self,file_name):
         faker = FakerSchema()
         data = self.generate_fake(self.schema, iterations=self.total_req)
         with open(file_name, 'w') as f:
               for item in data:
                   json.dump(item,f)
                   f.write('\n') 
Ejemplo n.º 4
0
class TestFakerSchema(unittest.TestCase):
    def setUp(self):
        self.faker_schema = FakerSchema(faker=MockFaker())

    def test_generate_fake_flat_schema(self):
        schema = {'Full Name': 'name', 'Address': 'address', 'Email': 'email'}
        data = self.faker_schema.generate_fake(schema)

        self.assertIsInstance(data, dict)

    def test_generate_fake_flat_schema_4_iterations(self):
        schema = {'Full Name': 'name', 'Address': 'address', 'Email': 'email'}
        data = self.faker_schema.generate_fake(schema, iterations=4)

        self.assertIsInstance(data, list)
        self.assertEqual(len(data), 4)

    def test_generate_fake_nested_schema(self):
        schema = {
            'Full Name': 'name',
            'Location': {
                'Address': 'street_address',
                'City': 'city',
                'Country': 'country',
                'Postal Code': 'postalcode'
            }
        }
        data = self.faker_schema.generate_fake(schema)

        self.assertIsInstance(data, dict)
        self.assertIsInstance(data['Location'], dict)

    def test_generate_fake_schema_with_list(self):
        schema = {
            'Employer': 'name',
            'EmployeeList': [{
                'Employee1': 'name'
            }, {
                'Employee2': 'name'
            }]
        }
        data = self.faker_schema.generate_fake(schema)

        self.assertIsInstance(data, dict)
        self.assertIsInstance(data['EmployeeList'], list)
    def generate_fake(self, fschema):
        """
        This method creates a single fake record based on the constraints defined int
        the FakeRowGen instance's data_gen attribute.

        Args:
                fschema: A dictionary containing a faker_schema (this should be generated by
                         DataGenerator.get_faker_schema() )
        """
        # Initialize a FakerSchema object.
        schema_faker = FakerSchema()

        # Generate a fake record.
        data = schema_faker.generate_fake(fschema, 1)  # Generate one record.

        # This performs a sanity check on datatypes and parameterized constraints.
        for col_name in data:
            data = self.sanity_check(data, col_name)

        return data
Ejemplo n.º 6
0
def fake_data(json_file, seed=0):
    """Return faked json data
    Args:
        json_file: example json file with data to simulate
        seed (int): Optionally seed Faker for reproducability
    Returns:
        dict: dict with summary and DataFrame with extracted data
    """

    # get dict of visited places
    places = _create_places(total=max(NPLACES.values()))

    # Get json schema from json file
    with open(json_file) as file_object:
        json_data = json.load(file_object)
        json_schema = get_json_schema(json_data)

    fake = Faker('nl_NL')
    fake.add_provider(geo)
    faker = FakerSchema(faker=fake, locale='nl_NL')

    faked_data = {}
    for year in YEARS:
        for month in MONTHS:
            schema = get_faker_schema(
                json_schema["properties"],
                custom=SCHEMA_TYPES,
                iterations={"timelineObjects": NACTIVITIES[year]})

            data = faker.generate_fake(schema)
            month_number = datetime.strptime(month[:3], '%b').month
            seed += 1
            json_data = _update_data(
                data,
                datetime(year, month_number, 1),
                dict(itertools.islice(places.items(), NPLACES[year])),
                seed=seed)
            faked_data[(year, month)] = json_data

    return faked_data
Ejemplo n.º 7
0
 def setUp(self):
     self.faker_schema = FakerSchema(faker=MockFaker())
from elasticsearch import Elasticsearch
from faker_schema.faker_schema import FakerSchema

API_URI = "http://127.0.0.1:9200"

es = Elasticsearch([API_URI])

# indices creation

es.indices.create(index='project_1', ignore=400)
es.indices.create(index='project_2', ignore=400)
es.indices.create(index='project_3', ignore=400)

# data ingestion

faker = FakerSchema()


def ingest_data(esclient, index, schema):
    for i in range(10):
        doc = faker.generate_fake(schema)
        res = esclient.index(index=index, id=index + "_" + str(i), body=doc)


schema = {
    'employee_id': 'uuid4',
    'employee_name': 'name',
    'employee address': 'address',
    'email_address': 'email'
}
ingest_data(es, "project_1", schema)
Ejemplo n.º 9
0
ACCESS_TOKEN = ""
REFRESH_TOKEN = ""

HOST = "https://{project}.machinable.io{path}"
PATHS = {
    "people": "/api/detailPeople",
    "count": "/api/count",
    "countLookup": "/api/count?peopleId={id}",
    "metrics": "/collections/metrics",
    "login": "******",
    "refresh": "/sessions/refresh/",
}

SLEEP_TIME = 2

fakerSchema = FakerSchema()
fake = Faker()
humanSchema = load_json_from_file('./scripts/schemas/people.details.json')

class TokenExpired(Exception):
  pass

def login(project, username, password):
  global ACCESS_TOKEN
  global REFRESH_TOKEN
  logger.info("logging in")

  encoded = base64.b64encode(bytes("{username}:{password}".format(username=username, password=password), 'utf8'))
  headers = {"Authorization": "Basic " + encoded.decode()}
  url = HOST.format(project=project, path=PATHS["login"])
  r = requests.post(url, headers=headers)
Ejemplo n.º 10
0
def generate(schema):
    fake = Faker()
    fake.add_provider(BornProvider)
    faker = FakerSchema(faker=fake)
    return faker.generate_fake(load_json_from_file(schema))
Ejemplo n.º 11
0
class TestFakerSchema(unittest.TestCase):

    def setUp(self):
        self.faker_schema = FakerSchema(faker=MockFaker())

    def test_generate_fake_flat_schema(self):
        schema = {'Full Name': 'name', 'Address': 'address', 'Email': 'email'}
        data = self.faker_schema.generate_fake(schema)

        self.assertIsInstance(data, dict)

    def test_generate_fake_flat_schema_4_iterations(self):
        schema = {'Full Name': 'name', 'Address': 'address', 'Email': 'email'}
        data = self.faker_schema.generate_fake(schema, iterations=4)

        self.assertIsInstance(data, list)
        self.assertEqual(len(data), 4)

    def test_generate_fake_nested_schema(self):
        schema = {'Full Name': 'name', 'Location': {'Address': 'street_address', 'City': 'city',
                  'Country': 'country', 'Postal Code': 'postalcode'}}
        data = self.faker_schema.generate_fake(schema)

        self.assertIsInstance(data, dict)
        self.assertIsInstance(data['Location'], dict)

    def test_generate_fake_schema_with_list(self):
        schema = {'Employer': 'name', 'EmployeeList': [{'Employee1': 'name'},
                  {'Employee2': 'name'}]}
        data = self.faker_schema.generate_fake(schema)

        self.assertIsInstance(data, dict)
        self.assertIsInstance(data['EmployeeList'], list)

    def test_generate_fake_schema_with_choices(self):
        schema = {
            'Person': 'name',
            'Gender': '(Male,Female)'  # simliar to fuzzy choices
        }
        data = self.faker_schema.generate_fake(schema)

        self.assertIsInstance(data, dict)
        self.assertIsInstance(data['Gender'], str)

    def test_generate_fake_schema_with_date(self):
        schema = {
            'Person': 'name',
            'Gender': '(Male,Female)',
            'BirthDay': 'date_of_birth'  # returns date with isoformat
        }
        data = self.faker_schema.generate_fake(schema)

        self.assertIsInstance(data, dict)
        self.assertIsInstance(data['BirthDay'], str)

    def test_generate_fake_schema_with_fake_args(self):
        schema = {
            'Person': 'name',
            'Gender': '(Male,Female)',
            'BirthDay': 'date_of_birth',
            'Age': 'random_int(18,80)'  # passing arguments (age betweeen 18 and 80)
        }
        data = self.faker_schema.generate_fake(schema)

        self.assertIsInstance(data, dict)
        self.assertIsInstance(data['Age'], int)

    def test_generate_fake_schema_with_fake_kargs(self):
        schema = {
            'Person': 'name',
            'Gender': '(Male,Female)',
            'BirthDay': 'date_of_birth',
            'Age': 'random_int(min=18,max=80)'  # passing named arguments (age betweeen min=18 and max=80)
        }
        data = self.faker_schema.generate_fake(schema)

        self.assertIsInstance(data, dict)
        self.assertIsInstance(data['Age'], int)