def __init__(self): """ Create and return the object """ self.machine = PushDownMachine() self.tokenizer = None # set up the parenthesis self.parens = {'(': ')', '[': ']', '{': '}'} self.lparens = self.parens.keys() self.rparens = self.parens.values() self._build_machine() self.machine.stack = [[]]
def __init__(self): """ Create and return the object """ self.machine = PushDownMachine() self.tokenizer = None # set up the parenthesis self.parens = {'(':')', '[':']', '{':'}'} self.lparens = self.parens.keys() self.rparens = self.parens.values() self._build_machine() self.machine.stack = [[]]
class SexpListParser(object): """ Parse the text and return a a C{SexpList} """ def __init__(self): """ Create and return the object """ self.machine = PushDownMachine() self.tokenizer = None # set up the parenthesis self.parens = {'(':')', '[':']', '{':'}'} self.lparens = self.parens.keys() self.rparens = self.parens.values() self._build_machine() self.machine.stack = [[]] def _build_machine(self): """ Build the state machine with the defined states """ self.machine.addstate(self._lparen) self.machine.addstate(self._rparen) self.machine.addstate(self._word) self.machine.addstate(self._end, end_state=True) self.machine.setstart(self._lparen) def _tokenizer(self, to_tokenize): """ Return a tokenizer """ lparen_res = ''.join([re.escape(lparen) for lparen in self.parens.keys()]) rparen_res = ''.join([re.escape(rparen) for rparen in self.parens.values()]) tok_re = re.compile('[%s]|[%s]|[^%s%s\s]+' % (lparen_res, rparen_res, lparen_res, rparen_res)) return tok_re.finditer(to_tokenize) def parse(self, to_parse): """ Parse the text and return the C{SexpList} @param to_parse: The string to parse @type to_parse: string @return: list of lists """ self.tokenizer = self._tokenizer(SexpListParser.remove_comments(to_parse)) start = self.tokenizer.next().group() if (start not in self.lparens): raise ValueError("Expression must start with an open paren") stack_w_list = self.machine.run(tokens=start) return stack_w_list[0][0] def _transition(self): """ Move to the next state based on the following token """ try: tok = self.tokenizer.next().group() # collect strings if tok.startswith('"') and tok.endswith('"'): tok = tok[1:-1] elif tok.startswith('"') and not tok.endswith('"'): while True: tok = "%s %s" % (tok, self.tokenizer.next().group()) if tok.endswith('"'): tok = tok.replace('"', '') break except Exception: return self._end, "" if tok in self.lparens: return self._lparen, tok elif tok in self.rparens: return self._rparen, tok return self._word, tok def _lparen(self, current): """ Actions to take given the left parenthesis """ self.machine.push(SexpList(current, self.parens[current])) return self._transition() def _rparen(self, current): """ Actions to take given the right parenthesis """ if len(self.machine.stack) == 1: raise ValueError("Unexpected close paren") if current != self.machine.stack[-1].rparen: raise ValueError("Mismatched paren") # close the and make it an item of the previous list closed_sexp_list = self.machine.stack.pop() # this makes sure that we dont add any of the tracing stuff if not any(i in closed_sexp_list for i in ("control-demo", "control", ":demo", "trace")): self.machine.stack[-1].append(closed_sexp_list) return self._transition() def _word(self, current): """ Actions to take given that the current token is a word """ # add the word to the last list if len(self.machine.stack) == 0: raise ValueError("Expected open paren") if not ('%' in current): self.machine.stack[-1].append(current) return self._transition() def _end(self, token): """ Actions to take when the string is finished processing """ if len(self.machine.stack) > 1: raise ValueError("Expected close paren") assert len(self.machine.stack) == 1 if len(self.machine.stack[0]) == 0: raise ValueError("Excepted open paren") if len(self.machine.stack[0]) > 1: raise ValueError("Expected a single sexp list") return self.machine.stack[0][0] @staticmethod def _error(s, match): pos = match.start() for lineno, line in enumerate(s.splitlines(True)): if pos < len(line): break pos -= len(line) return 'line %s, char %s' % (lineno+1, pos) @staticmethod def remove_comments(text): """ Remove the comments from a given expression """ remover = re.compile("\;.*") result = [] for line in text.splitlines(True): temp = remover.sub('', line) if not temp.isspace(): result.append(temp) return "".join(result)
class SexpListParser(object): """ Parse the text and return a a C{SexpList} """ def __init__(self): """ Create and return the object """ self.machine = PushDownMachine() self.tokenizer = None # set up the parenthesis self.parens = {'(': ')', '[': ']', '{': '}'} self.lparens = self.parens.keys() self.rparens = self.parens.values() self._build_machine() self.machine.stack = [[]] def _build_machine(self): """ Build the state machine with the defined states """ self.machine.addstate(self._lparen) self.machine.addstate(self._rparen) self.machine.addstate(self._word) self.machine.addstate(self._end, end_state=True) self.machine.setstart(self._lparen) def _tokenizer(self, to_tokenize): """ Return a tokenizer """ lparen_res = ''.join( [re.escape(lparen) for lparen in self.parens.keys()]) rparen_res = ''.join( [re.escape(rparen) for rparen in self.parens.values()]) tok_re = re.compile('[%s]|[%s]|[^%s%s\s]+' % (lparen_res, rparen_res, lparen_res, rparen_res)) return tok_re.finditer(to_tokenize) def parse(self, to_parse): """ Parse the text and return the C{SexpList} @param to_parse: The string to parse @type to_parse: string @return: list of lists """ self.tokenizer = self._tokenizer( SexpListParser.remove_comments(to_parse)) start = self.tokenizer.next().group() if (start not in self.lparens): raise ValueError("Expression must start with an open paren") stack_w_list = self.machine.run(tokens=start) return stack_w_list[0][0] def _transition(self): """ Move to the next state based on the following token """ try: tok = self.tokenizer.next().group() # collect strings if tok.startswith('"') and tok.endswith('"'): tok = tok[1:-1] elif tok.startswith('"') and not tok.endswith('"'): while True: tok = "%s %s" % (tok, self.tokenizer.next().group()) if tok.endswith('"'): tok = tok.replace('"', '') break except Exception: return self._end, "" if tok in self.lparens: return self._lparen, tok elif tok in self.rparens: return self._rparen, tok return self._word, tok def _lparen(self, current): """ Actions to take given the left parenthesis """ self.machine.push(SexpList(current, self.parens[current])) return self._transition() def _rparen(self, current): """ Actions to take given the right parenthesis """ if len(self.machine.stack) == 1: raise ValueError("Unexpected close paren") if current != self.machine.stack[-1].rparen: raise ValueError("Mismatched paren") # close the and make it an item of the previous list closed_sexp_list = self.machine.stack.pop() # this makes sure that we dont add any of the tracing stuff if not any(i in closed_sexp_list for i in ("control-demo", "control", ":demo", "trace")): self.machine.stack[-1].append(closed_sexp_list) return self._transition() def _word(self, current): """ Actions to take given that the current token is a word """ # add the word to the last list if len(self.machine.stack) == 0: raise ValueError("Expected open paren") if not ('%' in current): self.machine.stack[-1].append(current) return self._transition() def _end(self, token): """ Actions to take when the string is finished processing """ if len(self.machine.stack) > 1: raise ValueError("Expected close paren") assert len(self.machine.stack) == 1 if len(self.machine.stack[0]) == 0: raise ValueError("Excepted open paren") if len(self.machine.stack[0]) > 1: raise ValueError("Expected a single sexp list") return self.machine.stack[0][0] @staticmethod def _error(s, match): pos = match.start() for lineno, line in enumerate(s.splitlines(True)): if pos < len(line): break pos -= len(line) return 'line %s, char %s' % (lineno + 1, pos) @staticmethod def remove_comments(text): """ Remove the comments from a given expression """ remover = re.compile("\;.*") result = [] for line in text.splitlines(True): temp = remover.sub('', line) if not temp.isspace(): result.append(temp) return "".join(result)