def test_weighted(self):
     sampsz = 9000
     d = {
         '*':
         '{q}',
         'q': [
             '{r*3}',  # this counts as 3 r's
             '{s}',
             'qq{t*100}',  # the extra text means this won't be parsed as a weight.
             '{u*0}',  # this will never be picked
             '{v*2}'
         ],
         'r':
         'a',
         's':
         'b{v*10}',  # a weight in a string has no effect
         't*100':
         'c',
         'u':
         'd',
         'v':
         'e',
         'v*10':
         'f'
     }
     g = Grammarizer(d, None)
     c = {'a': 0, 'bf': 0, 'qqc': 0, 'd': 0, 'e': 0}
     for i in xrange(sampsz):
         c[g.generate()] += 1
     self._check_distribution(c['a'], sampsz, 3.0 / 7.0)
     self._check_distribution(c['bf'], sampsz, 1.0 / 7.0)
     self._check_distribution(c['qqc'], sampsz, 1.0 / 7.0)
     self._check_distribution(c['d'], sampsz, 0.0 / 7.0)
     self._check_distribution(c['e'], sampsz, 2.0 / 7.0)
 def test_replacement(self):
     d = {
         '*': 'a{foo}f',
         'foo': 'b{bar}e',
         'bar': 'cd',
     }
     g = Grammarizer(d, None)
     self.assertEqual(g.generate(), 'abcdef')
 def test_badtoken(self):
     d = {'*': '{nope}'}
     g = Grammarizer(d, None)
     try:
         s = g.generate()
         self.fail('grammarizer should choke on bad token')
     except KeyError:
         pass
     except:
         raise
 def test_caseify(self):
     d = {
         '*': 'lorem {nochange} ipsum {UPPER} dolor {TItle} amet',
         'nochaNGE': 'AbCd EfGh',
         'upPER': 'iJkL mNoP',
         'tiTLE': 'qrsT uvwX',
     }
     g = Grammarizer(d, None)
     self.assertEqual(
         g.generate(),
         'lorem AbCd EfGh ipsum IJKL MNOP dolor Qrst Uvwx amet')
 def test_aan(self):
     d = {
         '*':
         '{aan} {vowel} {aan} {consonant} {aan} {accvowel} {aan} {accconsonant} {aan} {nonlatin} {aan} {digitstart} {aan} {ystart} {aan} {punctuated}',
         'vowel': 'Avowel',
         'consonant': 'consonant',
         'accvowel': u'âvowel',
         'accconsonant': u'çonsonant',
         'nonlatin':
         u'እvowel',  # \u12A5 ETHIOPIC SYLLABLE GLOTTAL E pronounces as a vowel, but we will fail to recognize it.
         'digitstart': u'1owel',
         'ystart': u'Ypsilanti',  # oh well
         'punctuated': u'!?«avowel»',
     }
     g = Grammarizer(d, None)
     self.assertEqual(
         g.generate(),
         u'an Avowel a consonant an âvowel a çonsonant a እvowel a 1owel a Ypsilanti an !?«avowel»'
     )
 def test_empty(self):
     d = {
         '*': '',
     }
     g = Grammarizer(d, None)
     self.assertEqual(g.generate(), '')
     # empty dict will have an empty root added to it
     d = {}
     g = Grammarizer(d, None)
     self.assertEqual(g.generate(), '')
    def test_optional(self):
        sampsz = 9000
        # A list should give a roughly uniform distribution of its elements.
        d = {'*': '{q}', 'q': ['a', 'b', 'c']}
        g = Grammarizer(d, None)
        c = {'a': 0, 'b': 0, 'c': 0}
        for i in xrange(sampsz):
            c[g.generate()] += 1
        self._check_distribution(c['a'], sampsz, 1.0 / 3.0)
        self._check_distribution(c['b'], sampsz, 1.0 / 3.0)
        self._check_distribution(c['c'], sampsz, 1.0 / 3.0)

        # A '?' in the token adds an empty string to its options, distributed
        # evenly with the rest of the elements.
        d['*'] = '{q?}'
        g = Grammarizer(d, None)
        c = {'a': 0, 'b': 0, 'c': 0, '': 0}
        for i in xrange(sampsz):
            c[g.generate()] += 1
        self._check_distribution(c['a'], sampsz, 0.25)
        self._check_distribution(c['b'], sampsz, 0.25)
        self._check_distribution(c['c'], sampsz, 0.25)
        self._check_distribution(c[''], sampsz, 0.25)

        # A '??' in the token makes the token as a whole optional, so the
        # result should come up 50% empty, while the other values evenly split
        # the other 50%.
        d['*'] = '{q??}'
        g = Grammarizer(d, None)
        c = {'a': 0, 'b': 0, 'c': 0, '': 0}
        for i in xrange(sampsz):
            c[g.generate()] += 1
        self._check_distribution(c['a'], sampsz, 1.0 / 6.0)
        self._check_distribution(c['b'], sampsz, 1.0 / 6.0)
        self._check_distribution(c['c'], sampsz, 1.0 / 6.0)
        self._check_distribution(c[''], sampsz, 0.5)
    def test_fixed(self):
        sampsz = 9000
        d = {'*': '{pick},{foo}', 'foo': '{pick}', 'pick': ['a', 'b', 'c']}
        g = Grammarizer(d, None)
        c = collections.defaultdict(int)
        for i in xrange(sampsz):
            c[g.generate()] += 1
        self.assertEqual(len(c), 9)
        for k, ct in c.iteritems():
            self._check_distribution(ct, sampsz, 1.0 / 9.0)

        d['*fix'] = [
            'pick',
        ]
        g = Grammarizer(d, None)
        c = collections.defaultdict(int)
        for i in xrange(sampsz):
            c[g.generate()] += 1
        self.assertEqual(len(c), 3)
        self._check_distribution(c['a,a'], sampsz, 1.0 / 3.0)
        self._check_distribution(c['b,b'], sampsz, 1.0 / 3.0)
        self._check_distribution(c['c,c'], sampsz, 1.0 / 3.0)
    def test_squeeze_blank(self):
        d = {
            '*': 'lorem {empty} ipsum',
            'empty': '',
        }
        g = Grammarizer(d, None)
        self.assertEqual(g.generate(), 'lorem ipsum')

        d['*'] = '{empty} lorem ipsum'
        g = Grammarizer(d, None)
        self.assertEqual(g.generate(), 'lorem ipsum')

        d['*'] = 'lorem{empty} ipsum',
        g = Grammarizer(d, None)
        self.assertEqual(g.generate(), 'lorem ipsum')

        d['*'] = 'lorem {empty}ipsum',
        g = Grammarizer(d, None)
        self.assertEqual(g.generate(), 'lorem ipsum')

        d['*'] = 'lorem ipsum {empty}'
        g = Grammarizer(d, None)
        self.assertEqual(g.generate(), 'lorem ipsum')

        d['*'] = 'lorem {empty} {empty} ipsum',
        g = Grammarizer(d, None)
        self.assertEqual(g.generate(), 'lorem ipsum')