Beispiel #1
0
    def test_choice_distribution(self):
        from faker.utils.distribution import choice_distribution

        a = ('a', 'b', 'c', 'd')
        p = (0.5, 0.2, 0.2, 0.1)

        sample = choice_distribution(a, p)
        self.assertTrue(sample in a)

        with open(os.path.join(TEST_DIR, 'random_state.json'), 'r') as fh:
            random_state = json.load(fh)
        random_state[1] = tuple(random_state[1])

        random.setstate(random_state)
        samples = [choice_distribution(a, p) for i in range(100)]
        a_pop = len([i for i in samples if i == 'a'])
        b_pop = len([i for i in samples if i == 'b'])
        c_pop = len([i for i in samples if i == 'c'])
        d_pop = len([i for i in samples if i == 'd'])

        boundaries = []
        tolerance = 5
        for probability in p:
            boundaries.append([100 * probability + tolerance,  100 * probability - tolerance])

        self.assertTrue(boundaries[0][0] > a_pop > boundaries[0][1])
        self.assertTrue(boundaries[1][0] > b_pop > boundaries[1][1])
        self.assertTrue(boundaries[2][0] > c_pop > boundaries[2][1])
        self.assertTrue(boundaries[3][0] > d_pop > boundaries[3][1])
Beispiel #2
0
    def horse_dob(self):
        '''get a date of birth for a live horse, assuming horses live up to 40 years but distribution favours
        younger horses'''
        this_year = date.today().year
        years = [y for y in range(this_year, this_year - 40, -1)]

        # note that these do not add up to 100 and they are pure guesswork
        p = [
            0.06, 0.06, 0.06, 0.06, 0.05, 0.05, 0.05, 0.04, 0.04, 0.04, 0.03,
            0.03, 0.03, 0.03, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.01, 0.01,
            0.01, 0.01, 0.01, 0.01, 0.01, 0.005, 0.005, 0.005, 0.001, 0.001,
            0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001
        ]

        # get a year - distributed around average horse age
        year = choice_distribution(years, p)

        # get a month - in northern hemisphere most horses are born in spring
        #TODO: handle date of birth for southern hemisphere
        months = [m for m in range(1, 13)]
        p = [0.2, 0.2, 0.2, 0.2, 0.1, 0.5, 0.2, 0.1, 0.01, 0.01, 0.01, 0.01]
        month = choice_distribution(months, p)

        # get day of birth
        dates = calendar.Calendar().itermonthdates(year, month)

        dob = random.choice([date for date in dates if date.month == month])

        # return in format YYYY-MM-DD
        return str(dob)
Beispiel #3
0
    def test_choice_distribution(self):
        a = ('a', 'b', 'c', 'd')
        p = (0.5, 0.2, 0.2, 0.1)

        sample = choice_distribution(a, p)
        self.assertTrue(sample in a)

        with open(os.path.join(TEST_DIR, 'random_state.json'), 'r') as fh:
            random_state = json.load(fh)
        random_state[1] = tuple(random_state[1])

        random.setstate(random_state)
        samples = [choice_distribution(a, p) for i in range(100)]
        a_pop = len([i for i in samples if i == 'a'])
        b_pop = len([i for i in samples if i == 'b'])
        c_pop = len([i for i in samples if i == 'c'])
        d_pop = len([i for i in samples if i == 'd'])

        boundaries = []
        tolerance = 5
        for probability in p:
            boundaries.append(
                [100 * probability + tolerance, 100 * probability - tolerance])

        self.assertTrue(boundaries[0][0] > a_pop > boundaries[0][1])
        self.assertTrue(boundaries[1][0] > b_pop > boundaries[1][1])
        self.assertTrue(boundaries[2][0] > c_pop > boundaries[2][1])
        self.assertTrue(boundaries[3][0] > d_pop > boundaries[3][1])
Beispiel #4
0
    def age(cls, minor=False):
        if minor:
            # kids' ages are pretty evenly distributed..
            return cls.random_int(0, 20)

        random_range = choice_distribution(cls.age_ranges_US, cls.age_freq_US)
        return random.randint(*random_range)
Beispiel #5
0
    def random_element(self, elements=('a', 'b', 'c')):
        """
        Returns a random element from a passed object.

        If `elements` is a dictionary, the value will be used as
        a weighting element. For example::

            random_element({"{{variable_1}}": 0.5, "{{variable_2}}": 0.2, "{{variable_3}}": 0.2, "{{variable_4}}": 0.1})

        will have the following distribution:
            * `variable_1`: 50% probability
            * `variable_2`: 20% probability
            * `variable_3`: 20% probability
            * `variable_4`: 10% probability

        """

        if isinstance(elements, dict):
            choices = elements.keys()
            probabilities = elements.values()
            return choice_distribution(
                list(choices),
                list(probabilities),
                self.generator.random)
        else:
            return self.generator.random.choice(list(elements))
Beispiel #6
0
    def ueln(self, country):

        # choose breed society (PIO)
        pio = choice_distribution(self.pios[country],
                                  self.pios_distribution[country])

        # create random id
        id = randint(100000, 999999999)

        return "%s-%s-%s" % (country, pio, id)
Beispiel #7
0
    def horse_sex(self):
        '''
        Many male horses are gelded/neutered, so the code for sex includes this option.  Rarely a female horse can also be neutered. The full list is:

    00 - Not Known
    10 - Male - entire/neutered not known
    11 - Stallion - entire
    12 - Gelding
    20 - Female - neutered not known
    21 - Mare
    22 - Neutered female
    30 - Hermaphrodite

        :return:
        '''

        sex_choices = list(self.SEX.keys())

        return choice_distribution(sex_choices, self.SEX_PROPORTIONS)
Beispiel #8
0
    def random_element(cls, elements=('a', 'b', 'b')):
        """
        Returns a random element from a passed object.

        If `elements` is a dictionary, the value will be used as
        a weighting element. For example::

            random_element({"{{variable_1}}": 0.5, "{{variable_2}}": 0.2, "{{variable_3}}": 0.2, "{{variable_4}}": 0.1})

        will have the following distribution:
            * `variable_1`: 50% probability
            * `variable_2`: 20% probability
            * `variable_3`: 20% probability
            * `variable_4`: 10% probability

        """

        if isinstance(elements, dict):
            choices = elements.keys()
            probabilities = elements.values()
            return choice_distribution(list(choices), list(probabilities))
        else:
            return random.choice(list(elements))
Beispiel #9
0
def GetFakerDataAsPercent(arr, arrp):
    '''arr = ('a', 'b', 'c', 'd')
       arrp = (0.5, 0.2, 0.2, 0.1)
    '''
    data = choice_distribution(arr, arrp)
    return
Beispiel #10
0
 def first_name_male(cls):
     return choice_distribution(cls.first_names_male_US, cls.first_names_male_freq_US)
Beispiel #11
0
 def last_name(cls):
     return choice_distribution(cls.last_names_US, cls.last_name_freq_US)
Beispiel #12
0
    def country_of_birth(self):

        return choice_distribution(self.population,
                                   self.population_distribution)
Beispiel #13
0
    def horse_color(self):
        #https://en.wikipedia.org/wiki/Equine_coat_color
        #http://www.animalgenetics.us/Equine/CCalculator1.asp

        return choice_distribution(self.COLORS, self.COLOURS_PROPORTIONS)
Beispiel #14
0
 def city(self):
     """
     :example 'Cork'
     """
     return choice_distribution(self.population,
                                self.population_distribution)