Example #1
0
    def __init__(self, grammar, length=1):
        """Convert the grammar to Chomsky Normal Form and do preprocessing.
        
        `grammar` can be:
            (1) an instance of nltk.grammar.ContextFreeGrammar,
            (2) a string representing the path to a .cfg file, or
            (3) a string that can be parsed into a grammar by parse_cfg

        `length` is the maximum string length that should be preprocessed.
        """
        if length < 1:
            raise ValueError('length must be greater than 0.')

        # self.grammar must be instance of nltk.grammar.Grammar
        if isinstance(grammar, ContextFreeGrammar): 
            self.grammar = grammar
        elif isinstance(grammar, str) and grammar.endswith('.cfg'):
            self.grammar = nltk.data.load('file:' + grammar)
        elif isinstance(grammar, str):
            self.grammar = parse_cfg(grammar)
        else:
            raise ValueError('Arg grammar must be nltk.grammar.Grammar or str.')
        
        if not self.grammar.is_chomsky_normal_form():
            #raise ValueError('Input grammar must be in CNF '
            #                 '(conversion method isn\'t implemented)')
            self.grammar = convert_to_cnf(self.grammar)
            assert self.grammar.is_chomsky_normal_form()

        self.productions = self.grammar.productions()

        # TODO: Is it ok to assume all nonterminals occur on a LHS?
        # Technically yes, but check whether nltk's is_cnf ensures it.
        self.nonterminals = set([p.lhs() for p in self.productions])

        self.terminals = set([token for prod in self.productions 
                              for token in prod.rhs()
                              if not isinstance(token, Nonterminal)])

        # Initialize self._counts then populate it in _preprocess(). 
        # self.length is the string length that has been preprocessed.
        self._counts = {}
        self.length = 0
        self._preprocess(length)
Example #2
0
VOWEL -> MONOPHTHONG | DIPHTHONG 
MONOPHTHONG -> 'ɔ' | 'ɑ' | 'i' | 'u' | 'ɛ' | 'ɪ' | 'ʊ' | 'ʌ' | 'ə' | 'æ'
DIPHTHONG -> 'eɪ' | 'aɪ' | 'oʊ' | 'aʊ' | 'ɔɪ'

GLIDE -> 'j' | 'w'
SYLLABIC_CONSONANT -> 'm̩' | 'n̩' | 'ɹ̩' | 'ɫ̩'
NASAL  -> 'm' | 'n' | 'ŋ'
LIQUID -> 'ɹ' | 'l'
FRICATIVE -> 'f' | 'v' | 'θ' | 'ð' | 's' | 'z' | 'ʃ' | 'ʒ' 
STOP -> 'p' | 'b' | 't' | 'd' | 'k' | 'g'
AFFRICATE -> 'tʃ' | 'dʒ'

""")

grammar = cnf.convert_to_cnf(non_cnf_grammar)
assert grammar.is_chomsky_normal_form()

generator = cfl.CFLGenerator(grammar)

def main(args):
    if args:
        num_phones = int(args[0])
    else:
        num_phones = 50

    print ''.join(generator.generate(num_phones))

if __name__ == '__main__':
    main(sys.argv[1:])