class Parser: """ Base class for a lexer/parser that has the rules defined as methods """ tokens = () precedence = () def __init__(self, **kw): self.debug = kw.get('debug', 0) self.sentences = [] self.markov = Markov() self.clause_starter = {} self.para_starter = [] self.words = kw.get('words', None) try: modname = os.path.split(os.path.splitext(__file__)[0])[1] + "_" + self.__class__.__name__ except: modname = "parser"+"_"+self.__class__.__name__ self.debugfile = modname + ".dbg" self.tabmodule = modname + "_" + "parsetab" #print self.debugfile, self.tabmodule # Build the lexer and parser lex.lex(module=self, debug=self.debug) yacc.yacc(module=self, debug=self.debug, debugfile=self.debugfile, tabmodule=self.tabmodule) def run(self): s = sys.stdin.read() s = s.replace('\n\n', '\x00') s = s.replace('\x00\x00', '\x00') s = s.replace('\n\n', '') s = s.replace('\n', ' ') s = s.replace(' ', ' ') yacc.parse(s) print self.sentences self.markov.printout() print print "clause starters" keys = self.clause_starter.keys() keys.sort() for k in keys: v = self.clause_starter[k] print "\t", repr(k), v print print "para starters", self.para_starter print self.markov.prepare() sentence = random_sentence(self.markov, 800, starters=self.clause_starter, para_starters=self.para_starter) print_sentence(sentence, word_filter=self.words)
class Parser: """ Base class for a lexer/parser that has the rules defined as methods """ tokens = () precedence = () def __init__(self, **kw): self.debug = kw.get('debug', 0) self.sentences = [] self.markov = Markov() self.clause_starter = {} self.para_starter = [] self.word_filter = kw.get('word_filter', None) self.letter_priority = kw.get('letter_priority', None) try: modname = os.path.split(os.path.splitext(__file__)[0])[1] + "_" + self.__class__.__name__ except: modname = "parser"+"_"+self.__class__.__name__ self.debugfile = modname + ".dbg" self.tabmodule = modname + "_" + "parsetab" #print self.debugfile, self.tabmodule # Build the lexer and parser lex.lex(module=self, debug=self.debug) yacc.yacc(module=self, debug=self.debug, debugfile=self.debugfile, tabmodule=self.tabmodule) def run(self, txt=None, para_starter=False): if txt is None: s = sys.stdin.read() else: s = txt s = s.replace('\n\n', '\x00') s = s.replace('\x00\x00', '\x00') s = s.replace('\n\n', '') s = s.replace('\n', ' ') s = s.replace(' ', ' ') yacc.parse(s) print self.sentences self.markov.printout() print print "clause starters" keys = self.clause_starter.keys() keys.sort() for k in keys: v = self.clause_starter[k] print "\t", repr(k), v print print "para starters", self.para_starter print word_filter = self.word_filter if self.letter_priority and word_filter: # certain words are given a higher priority (multiplier) # than others. states = self.markov.states for from_word, fp in states.items(): for to_word in fp.keys(): if word_filter.has_key(to_word.lower()): fp[to_word] *= self.letter_priority word_filter = None self.markov.prepare() if para_starter: para_starters = None else: para_starters = self.para_starter sentence = random_sentence(self.markov, 800, starters=self.clause_starter, para_starters=para_starters) return make_sentence(sentence, word_filter=word_filter)