def grep(expr, filename): prog = regexp.compile(expr) fp = open(filename, 'r') lineno = 0 while 1: line = fp.readline() if not line: break lineno = lineno + 1 res = prog.exec(line) if res: #print res start, end = res[0] if line[-1:] = '\n': line = line[:-1] prefix = string.rjust(`lineno`, 3) + ': ' print prefix + line if 0: line = line[:start] if '\t' not in line: prefix = ' ' * (len(prefix) + start) else: prefix = ' ' * len(prefix) for c in line: if c <> '\t': c = ' ' prefix = prefix + c if start = end: prefix = prefix + '\\' else: prefix = prefix + '^'*(end-start) print prefix
def get_tags(f): # # First see if the last "node" is the end of tag table marker. # f.seek(0, 2) # Seek to EOF end = f.tell() buf = ifile.backup_node(f, end) if not labelmatch(buf, 0, 'end tag table\n'): return {} # No succes # # Next backup to the previous "node" -- the tag table itself. # ###print 'Getting prebuilt tag table...' end = f.tell() - len(buf) buf = ifile.backup_node(f, end) label = 'tag table:\n' if not labelmatch(buf, 0, label): print 'Weird: end tag table marker but no tag table?' print 'Node begins:', `buf[:50]` return {} # # Now read the whole tag table. # end = f.tell() - len(buf) # Do this first! buf = ifile.read_node(f, buf) # # First check for an indirection table. # indirlist = [] if labelmatch(buf, len(label), '(indirect)\n'): indirbuf = ifile.backup_node(f, end) if not labelmatch(indirbuf, 0, 'indirect:\n'): print 'Weird: promised indirection table not found' print 'Node begins:', `indirbuf[:50]` # Carry on. Things probably won't work though. else: indirbuf = ifile.read_node(f, indirbuf) indirlist = parse_indirlist(indirbuf) # # Now parse the tag table. # findtag = regexp.compile('^(.*[nN]ode:[ \t]*(.*))\177([0-9]+)$').match i = 0 tags = {} while 1: match = findtag(buf, i) if not match: break (a,b), (a1,b1), (a2,b2), (a3,b3) = match i = b line = buf[a1:b1] node = string.lower(buf[a2:b2]) offset = eval(buf[a3:b3]) # XXX What if it overflows? if tags.has_key(node): print 'Duplicate key in tag table:', `node` file, offset = map_offset(offset, indirlist) tags[node] = file, offset, line # return tags
def get_tags(f): # # First see if the last "node" is the end of tag table marker. # f.seek(0, 2) # Seek to EOF end = f.tell() buf = ifile.backup_node(f, end) if not labelmatch(buf, 0, 'end tag table\n'): return {} # No succes # # Next backup to the previous "node" -- the tag table itself. # ###print 'Getting prebuilt tag table...' end = f.tell() - len(buf) buf = ifile.backup_node(f, end) label = 'tag table:\n' if not labelmatch(buf, 0, label): print 'Weird: end tag table marker but no tag table?' print 'Node begins:', ` buf[:50] ` return {} # # Now read the whole tag table. # end = f.tell() - len(buf) # Do this first! buf = ifile.read_node(f, buf) # # First check for an indirection table. # indirlist = [] if labelmatch(buf, len(label), '(indirect)\n'): indirbuf = ifile.backup_node(f, end) if not labelmatch(indirbuf, 0, 'indirect:\n'): print 'Weird: promised indirection table not found' print 'Node begins:', ` indirbuf[:50] ` # Carry on. Things probably won't work though. else: indirbuf = ifile.read_node(f, indirbuf) indirlist = parse_indirlist(indirbuf) # # Now parse the tag table. # findtag = regexp.compile('^(.*[nN]ode:[ \t]*(.*))\177([0-9]+)$').match i = 0 tags = {} while 1: match = findtag(buf, i) if not match: break (a, b), (a1, b1), (a2, b2), (a3, b3) = match i = b line = buf[a1:b1] node = string.lower(buf[a2:b2]) offset = eval(buf[a3:b3]) # XXX What if it overflows? if tags.has_key(node): print 'Duplicate key in tag table:', ` node ` file, offset = map_offset(offset, indirlist) tags[node] = file, offset, line # return tags
def GrepCmd(interp, argv): if len(argv) < 3: raise UsageError, 'usage: grep regexp file ...' import regexp try: prog = regexp.compile(argv[1]) except regexp.error, msg: raise TclRuntimeError, \ ('grep', argv[1], ': bad regexp :', msg)
def do_grep(args): if len(args) < 2: print 'usage: grep regexp file ...' return import regexp try: prog = regexp.compile(args[0]) except regexp.error, msg: print 'regexp.compile error for', args[0], ':', msg return
def test_print_transitions(self): automaton = compile("ab*c") with closing(StringIO()) as buffer: with redirect_stdout(buffer): automaton.initial_node.print_transitions() anonymized = re.sub(r"\([0-9]+\)", "(x)", buffer.getvalue()) unspaced = re.sub(r" +", " ", anonymized) buffer_lines = Counter(unspaced.splitlines()) testcase_lines = Counter(["(x) a (x)", "(x) Σ (x)"]) self.assertEqual(buffer_lines, testcase_lines)
def parse_indirlist(buf): list = [] findindir = regexp.compile('^(.+):[ \t]*([0-9]+)$').match i = 0 while 1: match = findindir(buf, i) if not match: break (a,b), (a1,b1), (a2,b2) = match file = buf[a1:b1] offset = eval(buf[a2:b2]) # XXX What if this gets overflow? list.append((file, offset)) i = b return list
def parse_indirlist(buf): list = [] findindir = regexp.compile('^(.+):[ \t]*([0-9]+)$').match i = 0 while 1: match = findindir(buf, i) if not match: break (a, b), (a1, b1), (a2, b2) = match file = buf[a1:b1] offset = eval(buf[a2:b2]) # XXX What if this gets overflow? list.append((file, offset)) i = b return list
def isearch(win): try: pat = stdwin.askstr('Search pattern:', win.pat) except KeyboardInterrupt: return if not pat: pat = win.pat if not pat: stdwin.message('No previous pattern') return try: cpat = regexp.compile(pat) except regexp.error, msg: stdwin.message('Bad pattern: ' + msg) return
#INFOPATH = ['', ':Info.Ibrowse:', ':Info:'] # Mac INFOPATH = ['', '/usr/local/emacs/info/'] # X11 on UNIX # Tunable constants. # BLOCKSIZE = 512 # Qty to align reads to, if possible FUZZ = 2*BLOCKSIZE # Qty to back-up before searching for a node CHUNKSIZE = 4*BLOCKSIZE # Qty to read at once when reading lots of data # Regular expressions used. # Note that it is essential that Python leaves unrecognized backslash # escapes in a string so they can be seen by regexp.compile! # findheader = regexp.compile('\037\014?\n(.*\n)').match findescape = regexp.compile('\037').match parseheader = regexp.compile('[nN]ode:[ \t]*([^\t,\n]*)').match findfirstline = regexp.compile('^.*\n').match findnode = regexp.compile('[nN]ode:[ \t]*([^\t,\n]*)').match findprev = regexp.compile('[pP]rev[ious]*:[ \t]*([^\t,\n]*)').match findnext = regexp.compile('[nN]ext:[ \t]*([^\t,\n]*)').match findup = regexp.compile('[uU]p:[ \t]*([^\t,\n]*)').match findmenu = regexp.compile('^\* [mM]enu:').match findmenuitem = regexp.compile( \ '^\* ([^:]+):[ \t]*(:|\([^\t]*\)[^\t,\n.]*|[^:(][^\t,\n.]*)').match findfootnote = regexp.compile( \ '\*[nN]ote ([^:]+):[ \t]*(:|[^:][^\t,\n.]*)').match parsenoderef = regexp.compile('^\((.*)\)(.*)$').match
def test_single_match(self): auto = compile("a") self.assertEqual(auto.read_lazy("a"), 1)
else: win.nodemenu.additem(topic) digit = digit + 1 # win.footnotes = footnotes if footnotes: win.footmenu = win.menucreate('Footnotes') for topic, ref in footnotes: win.footmenu.additem(topic) # win.settitle('(' + win.file + ')' + win.node) # Find menu item at focus # findmenu = regexp.compile('^\* [mM]enu:').match findmenuitem = regexp.compile( \ '^\* ([^:]+):[ \t]*(:|\([^\t]*\)[^\t,\n.]*|[^:(][^\t,\n.]*)').match # def whichmenuitem(win): if not win.menu: return '' match = findmenu(win.text) if not match: return '' a, b = match[0] i = b f1, f2 = win.textobj.getfocus() lastmatch = '' while i < len(win.text): match = findmenuitem(win.text, i)
def test_multiple(self): auto = compile("abcdef") self.assertEqual(auto.read_greedy("abcdef"), 6)
def test_kleene(self): auto = compile("ab*") self.assertEqual(auto.read_greedy("abbbbbb"), 7)
# Parser for CWI Multimedia Interchange Files (CMIF, extension .cmif) from MMExc import * # Exceptions import regexp # Globals used by class MMParser expr = '0[xX][0-9a-fA-F]+|[0-9]+(\.[0-9]*)?([eE][-+]?[0-9]+)?' matchnumber = regexp.compile(expr).exec expr = '[a-zA-Z_][a-zA-Z0-9_]*' matchname = regexp.compile(expr).exec letters = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_' digits = '0123456789' # Parser for CMIF files. # Conceivably subclassing from this class might make sense. # After initializing the parser once, it is possible to # use it to get multiple objects from an input source # by calling the reset() method in between calls to getnode() # or other get*() methods. (This resets the scanner except for the # current line number.) class MMParser(): #
def test_single_no_match(self): auto = compile("a") self.assertEqual(auto.read_greedy("b"), 0)
#XXX Multiline colour support (this does happen with B (biological) but to be fair the general audience does not use geni as colour names) #The Python<=1.4 one, not to be confused with the high-level-low-usabilty "regex" or its successer "re" #This is an anachronism, I got this from the 1.4 source tree and compiled 2.4 regex as a backend import regexp main_re=regexp.compile('^<tr><td>([][ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 (){}]+)</td>(.*)</tr>') #If colour number ends with a minus it probably means it is almost certaily a typo #If colour number ends with + it means a typo or omission fix(?) #For example quince yellow: the author of the table has wisely decided the Prussian Blue/Slate Grey #187 is a typo (marked with -) and made own contribution 87 (a yellow-brown) marked with + col_re=regexp.compile('<td( width="5%")? title="[0123456789]+" style="background-color:#?([0123456789ABCDEF]+)(; color:#FFF)?">[0123456789]*[^-]</[tT][dD]>') d={} def extract(colours): rgbs=set() while 1: match=col_re.match(colours) if not match: break rgb=colours[match[2][0]:match[2][1]] rgbs.add(rgb) colours=colours[match[0][1]:] return frozenset(rgbs) def namify(name): if name.startswith("{"): #IE notice only, no colour return () name=name.split("(")[0] #Regexes here would be complete overkill
argv[0] + '"' # XXX No defaults or variable length 'args' yet frame = _Frame().Create() for i in range(len(proc.args)): frame.locals[proc.args[i]] = argv[i+1] proc.interp.stack.append(frame) try: value = proc.interp.Eval(proc.body) except TclReturn, value: pass del proc.interp.stack[-1:] return value import regexp _expand_prog = regexp.compile('([^[$\\]+|\n)*') del regexp class Interpreter(): # def Create(interp): interp.globals = {} interp.commands = {} interp.stack = [] interp.commands['break'] = interp.BreakCmd interp.commands['concat'] = interp.ConcatCmd interp.commands['continue'] = interp.ContinueCmd interp.commands['echo'] = interp.EchoCmd interp.commands['eval'] = interp.EvalCmd interp.commands['expr'] = interp.ExprCmd interp.commands['for'] = interp.ForCmd
# so they can simply be concatenated to a relative pathname. # #INFOPATH = ['', ':Info.Ibrowse:', ':Info:'] # Mac INFOPATH = ['', '/usr/local/emacs/info/'] # X11 on UNIX # Tunable constants. # BLOCKSIZE = 512 # Qty to align reads to, if possible FUZZ = 2 * BLOCKSIZE # Qty to back-up before searching for a node CHUNKSIZE = 4 * BLOCKSIZE # Qty to read at once when reading lots of data # Regular expressions used. # Note that it is essential that Python leaves unrecognized backslash # escapes in a string so they can be seen by regexp.compile! # findheader = regexp.compile('\037\014?\n(.*\n)').match findescape = regexp.compile('\037').match parseheader = regexp.compile('[nN]ode:[ \t]*([^\t,\n]*)').match findfirstline = regexp.compile('^.*\n').match findnode = regexp.compile('[nN]ode:[ \t]*([^\t,\n]*)').match findprev = regexp.compile('[pP]rev[ious]*:[ \t]*([^\t,\n]*)').match findnext = regexp.compile('[nN]ext:[ \t]*([^\t,\n]*)').match findup = regexp.compile('[uU]p:[ \t]*([^\t,\n]*)').match findmenu = regexp.compile('^\* [mM]enu:').match findmenuitem = regexp.compile( \ '^\* ([^:]+):[ \t]*(:|\([^\t]*\)[^\t,\n.]*|[^:(][^\t,\n.]*)').match findfootnote = regexp.compile( \ '\*[nN]ote ([^:]+):[ \t]*(:|[^:][^\t,\n.]*)').match parsenoderef = regexp.compile('^\((.*)\)(.*)$').match
-##### Be Built From These Functions #####- - import needed libraries - create a list of all journal files - read each journal-file into a separate string - parse all strings into a single list of entries - sort all entries - spit return result to stdout ''' # import needed libraries import datetime import sys import regexp as re # create a list of all journal files arguments = sys.argv journalfiles = sys.argv[1:] print('journalfiles: ' + journalfiles) # read each journal-file into a separate string journalstring.append(file(f).read() for f in journalfiles) print(journalstring) # parse all strings into a single list of entries # an entry is text in /^\\section/ prog = re.compile(pattern) positions = [] while true: postions.append(prog.match(journalstring)) # sort entries chronologically # timeofentry = datetime.datetime.strpdatetime # spit return result to stdout
import regexp # Match maximal blocks of a's example_1 = regexp.compile('^(.*[^a])?(?P<block_a>a+)([^a].*)?$') # Match all email address delimited with spaces example_2 = regexp.compile('\\w+@\\w+') # Enumerate all subwords example_3 = regexp.compile('.*') # Enumerate all pairs of a non-empty block of a's followed by a non-empty block # of b's example_4 = regexp.compile( '^(.*[^a])?(?P<block_a>a+)([^a].*[^b]|[^ab])?(?P<block_b>b+)([^b].*)?$') # Match email addresses in the form [a.]*@a*.a* example_5 = regexp.compile('(?P<login>\\w+(\\.\\w+)*)@(?P<server>\\w+\\.\\w+)') INSTANCES = [ { 'name': 'block_a', 'automata': example_1, 'documents': ['a', 'aaaaaaaaaaaaa', 'bbbabb', 'aaaabbaaababbbb'] }, { 'name': 'sep_email', 'automata': example_2, 'documents': ['a bba a@b b@a aaa@bab abbababaa@@@babbabb'] }, {
else: win.nodemenu.additem(topic) digit = digit + 1 # win.footnotes = footnotes if footnotes: win.footmenu = win.menucreate('Footnotes') for topic, ref in footnotes: win.footmenu.additem(topic) # win.settitle('(' + win.file + ')' + win.node) # Find menu item at focus # findmenu = regexp.compile('^\* [mM]enu:').match findmenuitem = regexp.compile( \ '^\* ([^:]+):[ \t]*(:|\([^\t]*\)[^\t,\n.]*|[^:(][^\t,\n.]*)').match # def whichmenuitem(win): if not win.menu: return '' match = findmenu(win.text) if not match: return '' a, b = match[0] i = b f1, f2 = win.textobj.getfocus() lastmatch = ''
# Exceptions raised for various error conditions. TclAssertError = 'Tcl assert error' TclSyntaxError = 'Tcl syntax error' TclRuntimeError = 'Tcl runtime error' TclMatchingError = 'Tcl matching error' # Find a variable name. # A variable name is either a (possiblly empty) sequence of letters, # digits and underscores, or anything enclosed in matching braces. # Return the index past the end of the name. _varname_prog = regexp.compile('[a-zA-Z0-9_]*') def FindVarName(str, i, end): if i < end and str[i] = '{': return BalanceBraces(str, i, end) i = _varname_prog.exec(str, i)[0][1] return min(i, end) # Split a list into its elements. # Return a list of elements (strings). def SplitList(str): i, end = 0, len(str) list = [] while 1: i = SkipSpaces(str, i, end)
parser.add_argument('--show-automata', dest='show_automata', action='store_true', help='Display the automata built out of the input regexp.') parser.add_argument('--show-dag', '--show-graph', dest='show_graph', action='store_true', help='Display the dag built out of the input regexp.') args = parser.parse_args() # ----- Read inputs ----- pattern = regexp.compile(args.regexp) document = args.file.read() if document[-1] == '\n': document = document[:-1] # ----- Special Actions ----- if args.show_automata: pattern = regexp.compile(args.regexp) pattern.render('automata', display=True) if args.show_graph: raise NotImplementedError # TODO # ----- Match The Expression -----