Example #1
0
def generate_column_value_pairs(columns, n):
    """
     function: dictionary, int -> generator
     Takes a dictionary describing the table columns and generates test data
     to be inserted into the table.

    @return Return data in the form (but as a generator): [{"NAME": "Donald", "AGE": 54}, ...]

    columns looks like: {"name": str, "age": int}

    n corresponds to the number of rows of generated data to return
    as a generator.
    """
    # generates the distributions
    gs = get_names()
    gg = gaussian_generator(5, 5)
    uuid = generate_uuid()
    # return result
    for _ in range(n):
        data_dict = {}
        for k, v in columns.items():
            if v == dt.str:
                data_dict[k] = next(gs)
            elif v == dt.gaussian_int:
                data_dict[k] = round(next(gg))
            elif v == dt.uuid:
                data_dict[k] = next(uuid)
            else:
                raise Exception("Type not found")
        yield data_dict
    def test_gaussian_generator(self):
        """ Tests that a Gaussian distribution of numbers has the correct mean
        and standard deviation."""
        expected_mean = 10
        expected_sigma = 15
        gg = gaussian_generator(expected_mean, expected_sigma)
        for _ in range(10):
            distribution = [next(gg) for _ in range(10000)]
            actual_mean = self.mean(distribution)
            actual_sigma = self.standard_deviation(distribution)

            # assert that the mean is about 10
            self.assertGreater(actual_mean, 9.6)
            self.assertLess(actual_mean, 10.4)

            # assert that the standard deviation
            # is around 15
            self.assertGreater(actual_sigma, 14.5)
            self.assertLess(actual_sigma, 15.5)