def remove_compiled_regex(txt: str, compiled_regex: re.compile, substitute: str = ""): """ Search for the compiled regex in the txt and either replace it with the substitute or remove it """ entities = compiled_regex.findall(txt) txt = compiled_regex.sub(substitute, txt) return txt, entities
def load(string, required=None, callbacks=None): """Parse the given string. Raises a ParseError if any problems are encountered, including if any field in required is not included. Callbacks is a dictionary of functions, listed by field, with the signature (mapping, line_number).""" if callbacks is None: callbacks = {} lines = string.splitlines() mapping = {} #Skip blank lines at the beginning of the file offset = 0 for offset, line in enumerate(lines): if line and not line.isspace(): break current_line, lines = lines[offset], lines[offset:] current_line_number = offset indent_size = get_indent(current_line) comment = Regex(r"(?<!\\)#.*$") line_numbers = {} for line_number, indented_line in enumerate_with_offset(lines, offset + 1): indent, line = indented_line[:indent_size], indented_line[indent_size:] line = comment.sub("", line) if indent and not indent.isspace(): raise ParseError("Unexpected unindent.", line_number, indented_line) if not line or line[0].isspace(): current_line += line else: field, value = pairify(current_line, current_line_number) mapping[field] = value line_numbers[field] = current_line_number current_line = line current_line_number = line_number field, value = pairify(current_line, current_line_number) mapping[field] = value line_numbers[field] = current_line_number if required is not None: for field in required: if field not in mapping: raise ParseError("Required field \"%s\" missing." % field) for field in callbacks.iterkeys(): line_number = line_numbers[field] if field in line_numbers else None callbacks[field](mapping, line_number) return mapping
#! /usr/bin/env python # $ ./foo.py # BEFORE: A \\" \\\" Z # AFTER : A \\ \\\" Z # # BEFORE: A \\\" \\" Z # AFTER : A \\\" \\ Z from re import compile as Regex def remove_first_group(m): start = m.start(1) - m.start(0) end = m.end(1) - m.start(0) whole_match = m.group(0) return whole_match[:start] + whole_match[end:] unescaped_doublequote = Regex(r'(?<!\\)(?:\\\\)*(")') for test in ( r'A \\" \\\" Z', r'A \\\" \\" Z', ): print 'BEFORE:', test print 'AFTER :', unescaped_doublequote.sub(remove_first_group, test) print
if oSwap4Blanks( sTest ) != ' ' * 82: # lProblems.append( 'getReplaceManyOldWithBlanksSwapper()' ) # if ReplaceManyOldWithBlanks( sTest, ( digits, uppercase, lowercase ) ) != ' ' * 82: # lProblems.append( 'ReplaceManyOldWithBlanks()' ) # if get_obsoleteGlobalReplaceWithSwapper( sTest, oSwap4Blanks ) != ' ' * 82: # lProblems.append( 'get_obsoleteGlobalReplaceWithSwapper()' ) # # oMatch = REcompile( 'l.+e' ) # if oMatch.sub( getBlanksForReMatchObj, 'Mary had a little lamb.' ) != \ 'Mary had a lamb.': # lProblems.append( 'getBlanksForReMatchObj()' ) # # sWhiteChars = '\n\t\rabc\r\t\n' # if getSpaceForWhiteAlsoStrip( sWhiteChars ) != 'abc' or \ getSpaceForWhiteAlsoStrip( sWhiteChars+sWhiteChars ) != 'abc abc': # lProblems.append( 'getSpaceForWhiteAlsoStrip()' ) # # if replaceLast( 'abcde0fghij0klmno0pqrst0uvwxy0z', '0', '8' ) != \ 'abcde0fghij0klmno0pqrst0uvwxy8z':
def remove_comments(lines): """Remove all lines containing a comment.""" comment_line = Regex("^\s*#.*$") eol_comment = Regex(r"(?<!\\)#.*$") return [eol_comment.sub("", line) for line in lines if not comment_line.match(line)]