def GetAttributeContent(TagName, Attribute, String): '''Returns a string containing the value assigned to an attribute(Attribute) in a particular tag (TagName), found in a particular string (String).''' S = '' RegEx = r'<%s.*?%s\s?=\s?"([^>]*?)"[^>]*?>' % (TagName, Attribute) TAG = re.compile(RegEx, re.DOTALL) Match = TAG.search(String) return Match.group(1)
def regexr(searchlist, searchstring): """ Accepts an arbitrary length list of tuples containing regular expressions, and runs them on the specified string. Tuples are of the form (search, replace). Also understands the "\L(spam)\=" style of case conversion, seen in span_inq. Only does lowercase for the time being, because I'm a lazy bum. """ for (search, replace) in searchlist: # Handle regex flags in search expressions find_flag = re.compile(r'\(\?s\)(.*)') search = find_flag.sub(r'\1, re.DOTALL', search) # Handle case conversion flags in replace expressions find_case_conv = re.compile(r'\\L(.*)\\=') replace = find_case_conv.sub(r'<convert_case case="lowercase">\1</convert_case>',replace) regex = re.compile(search) # Now do the actual substitutions and case conversion searchstring = regex.sub(replace, searchstring) searchstring = convert_case(searchstring) return searchstring
def GetMultTag(TagName, String): '''Return a list containing each example of a particular element(TagName) in a particular string (String).''' L = [] RegEx = r"(<%s.*?>.*?</%s>)" % (TagName, TagName) TAG = re.compile(RegEx, re.DOTALL) Match = TAG.search(String) while Match: Content = Match.group(1) INDEX = Match.end() L.append(Content) Match = TAG.search(String, INDEX) return L
import pre p = pre.compile("[Python]+") print p.findall("Python is not that bad") ## ['Python', 'not', 'th', 't']
def __init__(self, regex, replacement=""): self.regex = regex self.replacement = replacement def __repr__(self): return "Replace " + repr( self.regex.pattern) + "with '" + self.replacement + "'" # Some people, when confronted with a problem, think ``I know, I'll use # regular expressions.'' Now they have two problems. # -- jwz searchAndReplacements = [] xmlns = re.compile(r"<html\s*?xmlns.*?>", re.DOTALL) searchAndReplacements.append(SearchAndReplace(xmlns, "<html>")) meta_http = re.compile(r"<meta.*?>", re.DOTALL) searchAndReplacements.append(SearchAndReplace(meta_http)) # can break the recursion limit when using sre (the default implementation) comment = re.compile(r"<!--.*?-->", re.DOTALL) searchAndReplacements.append(SearchAndReplace(comment)) body = re.compile(r"<body.*?>", re.DOTALL) searchAndReplacements.append(SearchAndReplace(body, "<body>")) file_list = re.compile(r"<link rel=File-List.*?>", re.DOTALL) searchAndReplacements.append(SearchAndReplace(file_list))
import os import pre as re from sys import argv, environ from string import * os.system("dir") digits = re.compile("\d+") print argv[0], environ
import pre p = pre.compile("[Python]+") print(p.findall("Python is not that bad")) ## ['Python', 'not', 'th', 't']