Esempio n. 1
0
def grep(expr, filename):
       prog = regexp.compile(expr)
       fp = open(filename, 'r')
       lineno = 0
       while 1:
               line = fp.readline()
               if not line: break
               lineno = lineno + 1
               res = prog.exec(line)
               if res:
                       #print res
                       start, end = res[0]
                       if line[-1:] = '\n': line = line[:-1]
                       prefix = string.rjust(`lineno`, 3) + ': '
                       print prefix + line
                       if 0:
                               line = line[:start]
                               if '\t' not in line:
                                       prefix = ' ' * (len(prefix) + start)
                               else:
                                       prefix = ' ' * len(prefix)
                                       for c in line:
                                               if c <> '\t': c = ' '
                                               prefix = prefix + c
                               if start = end: prefix = prefix + '\\'
                               else: prefix = prefix + '^'*(end-start)
                               print prefix
Esempio n. 2
0
def get_tags(f):
	#
	# First see if the last "node" is the end of tag table marker.
	#
	f.seek(0, 2) # Seek to EOF
	end = f.tell()
	buf = ifile.backup_node(f, end)
	if not labelmatch(buf, 0, 'end tag table\n'):
		return {} # No succes
	#
	# Next backup to the previous "node" -- the tag table itself.
	#
	###print 'Getting prebuilt tag table...'
	end = f.tell() - len(buf)
	buf = ifile.backup_node(f, end)
	label = 'tag table:\n'
	if not labelmatch(buf, 0, label):
		print 'Weird: end tag table marker but no tag table?'
		print 'Node begins:', `buf[:50]`
		return {}
	#
	# Now read the whole tag table.
	#
	end = f.tell() - len(buf) # Do this first!
	buf = ifile.read_node(f, buf)
	#
	# First check for an indirection table.
	#
	indirlist = []
	if labelmatch(buf, len(label), '(indirect)\n'):
		indirbuf = ifile.backup_node(f, end)
		if not labelmatch(indirbuf, 0, 'indirect:\n'):
			print 'Weird: promised indirection table not found'
			print 'Node begins:', `indirbuf[:50]`
			# Carry on.  Things probably won't work though.
		else:
			indirbuf = ifile.read_node(f, indirbuf)
			indirlist = parse_indirlist(indirbuf)
	#
	# Now parse the tag table.
	#
	findtag = regexp.compile('^(.*[nN]ode:[ \t]*(.*))\177([0-9]+)$').match
	i = 0
	tags = {}
	while 1:
		match = findtag(buf, i)
		if not match:
			break
		(a,b), (a1,b1), (a2,b2), (a3,b3) = match
		i = b
		line = buf[a1:b1]
		node = string.lower(buf[a2:b2])
		offset = eval(buf[a3:b3]) # XXX What if it overflows?
		if tags.has_key(node):
			print 'Duplicate key in tag table:', `node`
		file, offset = map_offset(offset, indirlist)
		tags[node] = file, offset, line
	#
	return tags
Esempio n. 3
0
def get_tags(f):
    #
    # First see if the last "node" is the end of tag table marker.
    #
    f.seek(0, 2)  # Seek to EOF
    end = f.tell()
    buf = ifile.backup_node(f, end)
    if not labelmatch(buf, 0, 'end tag table\n'):
        return {}  # No succes
    #
    # Next backup to the previous "node" -- the tag table itself.
    #
    ###print 'Getting prebuilt tag table...'
    end = f.tell() - len(buf)
    buf = ifile.backup_node(f, end)
    label = 'tag table:\n'
    if not labelmatch(buf, 0, label):
        print 'Weird: end tag table marker but no tag table?'
        print 'Node begins:', ` buf[:50] `
        return {}
    #
    # Now read the whole tag table.
    #
    end = f.tell() - len(buf)  # Do this first!
    buf = ifile.read_node(f, buf)
    #
    # First check for an indirection table.
    #
    indirlist = []
    if labelmatch(buf, len(label), '(indirect)\n'):
        indirbuf = ifile.backup_node(f, end)
        if not labelmatch(indirbuf, 0, 'indirect:\n'):
            print 'Weird: promised indirection table not found'
            print 'Node begins:', ` indirbuf[:50] `
            # Carry on.  Things probably won't work though.
        else:
            indirbuf = ifile.read_node(f, indirbuf)
            indirlist = parse_indirlist(indirbuf)
    #
    # Now parse the tag table.
    #
    findtag = regexp.compile('^(.*[nN]ode:[ \t]*(.*))\177([0-9]+)$').match
    i = 0
    tags = {}
    while 1:
        match = findtag(buf, i)
        if not match:
            break
        (a, b), (a1, b1), (a2, b2), (a3, b3) = match
        i = b
        line = buf[a1:b1]
        node = string.lower(buf[a2:b2])
        offset = eval(buf[a3:b3])  # XXX What if it overflows?
        if tags.has_key(node):
            print 'Duplicate key in tag table:', ` node `
        file, offset = map_offset(offset, indirlist)
        tags[node] = file, offset, line
    #
    return tags
Esempio n. 4
0
 def GrepCmd(interp, argv):
         if len(argv) < 3:
                 raise UsageError, 'usage: grep regexp file ...'
         import regexp
         try:
                 prog = regexp.compile(argv[1])
         except regexp.error, msg:
                 raise TclRuntimeError, \
                   ('grep', argv[1], ': bad regexp :', msg)
Esempio n. 5
0
	def GrepCmd(interp, argv):
		if len(argv) < 3:
			raise UsageError, 'usage: grep regexp file ...'
		import regexp
		try:
			prog = regexp.compile(argv[1])
		except regexp.error, msg:
			raise TclRuntimeError, \
			  ('grep', argv[1], ': bad regexp :', msg)
Esempio n. 6
0
def do_grep(args):
       if len(args) < 2:
               print 'usage: grep regexp file ...'
               return
       import regexp
       try:
               prog = regexp.compile(args[0])
       except regexp.error, msg:
               print 'regexp.compile error for', args[0], ':', msg
               return
Esempio n. 7
0
    def test_print_transitions(self):
        automaton = compile("ab*c")

        with closing(StringIO()) as buffer:
            with redirect_stdout(buffer):
                automaton.initial_node.print_transitions()
            anonymized = re.sub(r"\([0-9]+\)", "(x)", buffer.getvalue())
            unspaced = re.sub(r" +", " ", anonymized)
            buffer_lines = Counter(unspaced.splitlines())
            testcase_lines = Counter(["(x) a (x)", "(x) Σ (x)"])
            self.assertEqual(buffer_lines, testcase_lines)
Esempio n. 8
0
def parse_indirlist(buf):
	list = []
	findindir = regexp.compile('^(.+):[ \t]*([0-9]+)$').match
	i = 0
	while 1:
		match = findindir(buf, i)
		if not match:
			break
		(a,b), (a1,b1), (a2,b2) = match
		file = buf[a1:b1]
		offset = eval(buf[a2:b2]) # XXX What if this gets overflow?
		list.append((file, offset))
		i = b
	return list
Esempio n. 9
0
def parse_indirlist(buf):
    list = []
    findindir = regexp.compile('^(.+):[ \t]*([0-9]+)$').match
    i = 0
    while 1:
        match = findindir(buf, i)
        if not match:
            break
        (a, b), (a1, b1), (a2, b2) = match
        file = buf[a1:b1]
        offset = eval(buf[a2:b2])  # XXX What if this gets overflow?
        list.append((file, offset))
        i = b
    return list
Esempio n. 10
0
def isearch(win):
    try:
        pat = stdwin.askstr('Search pattern:', win.pat)
    except KeyboardInterrupt:
        return
    if not pat:
        pat = win.pat
        if not pat:
            stdwin.message('No previous pattern')
            return
    try:
        cpat = regexp.compile(pat)
    except regexp.error, msg:
        stdwin.message('Bad pattern: ' + msg)
        return
Esempio n. 11
0
def isearch(win):
	try:
		pat = stdwin.askstr('Search pattern:', win.pat)
	except KeyboardInterrupt:
		return
	if not pat:
		pat = win.pat
		if not pat:
			stdwin.message('No previous pattern')
			return
	try:
		cpat = regexp.compile(pat)
	except regexp.error, msg:
		stdwin.message('Bad pattern: ' + msg)
		return
Esempio n. 12
0
#INFOPATH = ['', ':Info.Ibrowse:', ':Info:']	# Mac
INFOPATH = ['', '/usr/local/emacs/info/']	# X11 on UNIX


# Tunable constants.
#
BLOCKSIZE = 512			# Qty to align reads to, if possible
FUZZ = 2*BLOCKSIZE		# Qty to back-up before searching for a node
CHUNKSIZE = 4*BLOCKSIZE		# Qty to read at once when reading lots of data


# Regular expressions used.
# Note that it is essential that Python leaves unrecognized backslash
# escapes in a string so they can be seen by regexp.compile!
#
findheader = regexp.compile('\037\014?\n(.*\n)').match
findescape = regexp.compile('\037').match
parseheader = regexp.compile('[nN]ode:[ \t]*([^\t,\n]*)').match
findfirstline = regexp.compile('^.*\n').match
findnode = regexp.compile('[nN]ode:[ \t]*([^\t,\n]*)').match
findprev = regexp.compile('[pP]rev[ious]*:[ \t]*([^\t,\n]*)').match
findnext = regexp.compile('[nN]ext:[ \t]*([^\t,\n]*)').match
findup = regexp.compile('[uU]p:[ \t]*([^\t,\n]*)').match
findmenu = regexp.compile('^\* [mM]enu:').match
findmenuitem = regexp.compile( \
	'^\* ([^:]+):[ \t]*(:|\([^\t]*\)[^\t,\n.]*|[^:(][^\t,\n.]*)').match
findfootnote = regexp.compile( \
	'\*[nN]ote ([^:]+):[ \t]*(:|[^:][^\t,\n.]*)').match
parsenoderef = regexp.compile('^\((.*)\)(.*)$').match

Esempio n. 13
0
 def test_single_match(self):
     auto = compile("a")
     self.assertEqual(auto.read_lazy("a"), 1)
Esempio n. 14
0
			else:
				win.nodemenu.additem(topic)
			digit = digit + 1
	#
	win.footnotes = footnotes
	if footnotes:
		win.footmenu = win.menucreate('Footnotes')
		for topic, ref in footnotes:
			win.footmenu.additem(topic)
	#
	win.settitle('(' + win.file + ')' + win.node)


# Find menu item at focus
#
findmenu = regexp.compile('^\* [mM]enu:').match
findmenuitem = regexp.compile( \
	'^\* ([^:]+):[ \t]*(:|\([^\t]*\)[^\t,\n.]*|[^:(][^\t,\n.]*)').match
#
def whichmenuitem(win):
	if not win.menu:
		return ''
	match = findmenu(win.text)
	if not match:
		return ''
	a, b = match[0]
	i = b
	f1, f2 = win.textobj.getfocus()
	lastmatch = ''
	while i < len(win.text):
		match = findmenuitem(win.text, i)
Esempio n. 15
0
 def test_multiple(self):
     auto = compile("abcdef")
     self.assertEqual(auto.read_greedy("abcdef"), 6)
Esempio n. 16
0
 def test_kleene(self):
     auto = compile("ab*")
     self.assertEqual(auto.read_greedy("abbbbbb"), 7)
Esempio n. 17
0
# Parser for CWI Multimedia Interchange Files (CMIF, extension .cmif)


from MMExc import *		# Exceptions


import regexp


# Globals used by class MMParser

expr = '0[xX][0-9a-fA-F]+|[0-9]+(\.[0-9]*)?([eE][-+]?[0-9]+)?'
matchnumber = regexp.compile(expr).exec

expr = '[a-zA-Z_][a-zA-Z0-9_]*'
matchname = regexp.compile(expr).exec

letters = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_'
digits = '0123456789'


# Parser for CMIF files.
# Conceivably subclassing from this class might make sense.
# After initializing the parser once, it is possible to
# use it to get multiple objects from an input source
# by calling the reset() method in between calls to getnode()
# or other get*() methods.  (This resets the scanner except for the
# current line number.)

class MMParser():
	#
Esempio n. 18
0
 def test_single_no_match(self):
     auto = compile("a")
     self.assertEqual(auto.read_greedy("b"), 0)
Esempio n. 19
0
#XXX Multiline colour support (this does happen with B (biological) but to be fair the general audience does not use geni as colour names)

#The Python<=1.4 one, not to be confused with the high-level-low-usabilty "regex" or its successer "re"
#This is an anachronism, I got this from the 1.4 source tree and compiled 2.4 regex as a backend
import regexp

main_re=regexp.compile('^<tr><td>([][ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 (){}]+)</td>(.*)</tr>')

#If colour number ends with a minus it probably means it is almost certaily a typo
#If colour number ends with + it means a typo or omission fix(?)
#For example quince yellow: the author of the table has wisely decided the Prussian Blue/Slate Grey
#187 is a typo (marked with -) and made own contribution 87 (a yellow-brown) marked with +
col_re=regexp.compile('<td( width="5%")? title="[0123456789]+" style="background-color:#?([0123456789ABCDEF]+)(; color:#FFF)?">[0123456789]*[^-]</[tT][dD]>')

d={}

def extract(colours):
	rgbs=set()
	while 1:
		match=col_re.match(colours)
		if not match:
			break
		rgb=colours[match[2][0]:match[2][1]]
		rgbs.add(rgb)
		colours=colours[match[0][1]:]
	return frozenset(rgbs)

def namify(name):
	if name.startswith("{"): #IE notice only, no colour
		return ()
	name=name.split("(")[0] #Regexes here would be complete overkill
Esempio n. 20
0
				argv[0] + '"'
		# XXX No defaults or variable length 'args' yet
		frame = _Frame().Create()
		for i in range(len(proc.args)):
			frame.locals[proc.args[i]] = argv[i+1]
		proc.interp.stack.append(frame)
		try:
			value = proc.interp.Eval(proc.body)
		except TclReturn, value:
			pass
		del proc.interp.stack[-1:]
		return value


import regexp
_expand_prog = regexp.compile('([^[$\\]+|\n)*')
del regexp

class Interpreter():
	#
	def Create(interp):
		interp.globals = {}
		interp.commands = {}
		interp.stack = []
		interp.commands['break'] = interp.BreakCmd
		interp.commands['concat'] = interp.ConcatCmd
		interp.commands['continue'] = interp.ContinueCmd
		interp.commands['echo'] = interp.EchoCmd
		interp.commands['eval'] = interp.EvalCmd
		interp.commands['expr'] = interp.ExprCmd
		interp.commands['for'] = interp.ForCmd
Esempio n. 21
0
# so they can simply be concatenated to a relative pathname.
#
#INFOPATH = ['', ':Info.Ibrowse:', ':Info:']	# Mac
INFOPATH = ['', '/usr/local/emacs/info/']  # X11 on UNIX

# Tunable constants.
#
BLOCKSIZE = 512  # Qty to align reads to, if possible
FUZZ = 2 * BLOCKSIZE  # Qty to back-up before searching for a node
CHUNKSIZE = 4 * BLOCKSIZE  # Qty to read at once when reading lots of data

# Regular expressions used.
# Note that it is essential that Python leaves unrecognized backslash
# escapes in a string so they can be seen by regexp.compile!
#
findheader = regexp.compile('\037\014?\n(.*\n)').match
findescape = regexp.compile('\037').match
parseheader = regexp.compile('[nN]ode:[ \t]*([^\t,\n]*)').match
findfirstline = regexp.compile('^.*\n').match
findnode = regexp.compile('[nN]ode:[ \t]*([^\t,\n]*)').match
findprev = regexp.compile('[pP]rev[ious]*:[ \t]*([^\t,\n]*)').match
findnext = regexp.compile('[nN]ext:[ \t]*([^\t,\n]*)').match
findup = regexp.compile('[uU]p:[ \t]*([^\t,\n]*)').match
findmenu = regexp.compile('^\* [mM]enu:').match
findmenuitem = regexp.compile( \
 '^\* ([^:]+):[ \t]*(:|\([^\t]*\)[^\t,\n.]*|[^:(][^\t,\n.]*)').match
findfootnote = regexp.compile( \
 '\*[nN]ote ([^:]+):[ \t]*(:|[^:][^\t,\n.]*)').match
parsenoderef = regexp.compile('^\((.*)\)(.*)$').match

Esempio n. 22
0
-#####  Be Built From These Functions  #####-
- import needed libraries
- create a list of all journal files
- read each journal-file into a separate string
- parse all strings into a single list of entries
- sort all entries
- spit return result to stdout
'''


# import needed libraries
import datetime
import sys
import regexp as re
# create a list of all journal files
arguments = sys.argv
journalfiles = sys.argv[1:]
print('journalfiles: ' + journalfiles)
# read each journal-file into a separate string
journalstring.append(file(f).read() for f in journalfiles)
print(journalstring)
# parse all strings into a single list of entries
  # an entry is text in /^\\section/
prog = re.compile(pattern)
positions = []
while true:
    postions.append(prog.match(journalstring))
# sort entries chronologically
  # timeofentry = datetime.datetime.strpdatetime
# spit return result to stdout
Esempio n. 23
0
import regexp

# Match maximal blocks of a's
example_1 = regexp.compile('^(.*[^a])?(?P<block_a>a+)([^a].*)?$')

# Match all email address delimited with spaces
example_2 = regexp.compile('\\w+@\\w+')

# Enumerate all subwords
example_3 = regexp.compile('.*')

# Enumerate all pairs of a non-empty block of a's followed by a non-empty block
# of b's
example_4 = regexp.compile(
    '^(.*[^a])?(?P<block_a>a+)([^a].*[^b]|[^ab])?(?P<block_b>b+)([^b].*)?$')

# Match email addresses in the form [a.]*@a*.a*
example_5 = regexp.compile('(?P<login>\\w+(\\.\\w+)*)@(?P<server>\\w+\\.\\w+)')

INSTANCES = [
    {
        'name': 'block_a',
        'automata': example_1,
        'documents': ['a', 'aaaaaaaaaaaaa', 'bbbabb', 'aaaabbaaababbbb']
    },
    {
        'name': 'sep_email',
        'automata': example_2,
        'documents': ['a bba a@b b@a aaa@bab abbababaa@@@babbabb']
    },
    {
Esempio n. 24
0
                               argv[0] + '"'
               # XXX No defaults or variable length 'args' yet
               frame = _Frame().Create()
               for i in range(len(proc.args)):
                       frame.locals[proc.args[i]] = argv[i+1]
               proc.interp.stack.append(frame)
               try:
                       value = proc.interp.Eval(proc.body)
               except TclReturn, value:
                       pass
               del proc.interp.stack[-1:]
               return value


import regexp
_expand_prog = regexp.compile('([^[$\\]+|\n)*')
del regexp

class Interpreter():
       #
       def Create(interp):
               interp.globals = {}
               interp.commands = {}
               interp.stack = []
               interp.commands['break'] = interp.BreakCmd
               interp.commands['concat'] = interp.ConcatCmd
               interp.commands['continue'] = interp.ContinueCmd
               interp.commands['echo'] = interp.EchoCmd
               interp.commands['eval'] = interp.EvalCmd
               interp.commands['expr'] = interp.ExprCmd
               interp.commands['for'] = interp.ForCmd
Esempio n. 25
0
            else:
                win.nodemenu.additem(topic)
            digit = digit + 1
    #
    win.footnotes = footnotes
    if footnotes:
        win.footmenu = win.menucreate('Footnotes')
        for topic, ref in footnotes:
            win.footmenu.additem(topic)
    #
    win.settitle('(' + win.file + ')' + win.node)


# Find menu item at focus
#
findmenu = regexp.compile('^\* [mM]enu:').match
findmenuitem = regexp.compile( \
 '^\* ([^:]+):[ \t]*(:|\([^\t]*\)[^\t,\n.]*|[^:(][^\t,\n.]*)').match


#
def whichmenuitem(win):
    if not win.menu:
        return ''
    match = findmenu(win.text)
    if not match:
        return ''
    a, b = match[0]
    i = b
    f1, f2 = win.textobj.getfocus()
    lastmatch = ''
Esempio n. 26
0

# Exceptions raised for various error conditions.

TclAssertError = 'Tcl assert error'
TclSyntaxError = 'Tcl syntax error'
TclRuntimeError = 'Tcl runtime error'
TclMatchingError = 'Tcl matching error'


# Find a variable name.
# A variable name is either a (possiblly empty) sequence of letters,
# digits and underscores, or anything enclosed in matching braces.
# Return the index past the end of the name.

_varname_prog = regexp.compile('[a-zA-Z0-9_]*')

def FindVarName(str, i, end):
       if i < end and str[i] = '{': return BalanceBraces(str, i, end)
       i = _varname_prog.exec(str, i)[0][1]
       return min(i, end)


# Split a list into its elements.
# Return a list of elements (strings).

def SplitList(str):
       i, end = 0, len(str)
       list = []
       while 1:
               i = SkipSpaces(str, i, end)
Esempio n. 27
0
parser.add_argument('--show-automata',
                    dest='show_automata',
                    action='store_true',
                    help='Display the automata built out of the input regexp.')

parser.add_argument('--show-dag',
                    '--show-graph',
                    dest='show_graph',
                    action='store_true',
                    help='Display the dag built out of the input regexp.')

args = parser.parse_args()

# ----- Read inputs -----

pattern = regexp.compile(args.regexp)
document = args.file.read()

if document[-1] == '\n':
    document = document[:-1]

# ----- Special Actions -----

if args.show_automata:
    pattern = regexp.compile(args.regexp)
    pattern.render('automata', display=True)

if args.show_graph:
    raise NotImplementedError  # TODO

# ----- Match The Expression -----