def match_begin_end_env(env='equation', get_content=True): '''matchs \begin{equation*} something ... \end{equation} One special option is env='anything', will match all \begin{}*\end{} ''' er = Regex() er.add(r"\\begin{") er.add(er.zero_or_more(er.whitespace())) if env == 'anything': env = er.non_greedy(er.zero_or_more(er.anything())) er.add(env) er.add(er.zero_or_more(er.whitespace())) er.add(er.zero_or_one(r'\*')) er.add(er.zero_or_more(er.whitespace())) er.add(r"}") if get_content: er.add(er.group_begin(name="content")) er.add(er.non_greedy(er.zero_or_more(er.anything()))) if get_content: er.add(er.group_end()) er.add(r"\\end{") er.add(er.zero_or_more(er.whitespace())) er.add(env) er.add(er.zero_or_more(er.whitespace())) er.add(er.zero_or_one(r'\*')) er.add(er.zero_or_more(er.whitespace())) er.add(r"}") er.compile() return er
def main(): ###### testing code snippets (leftover from development) ###### re = Regex.compile('(.)\\1') re.display() assert re.match('AA') assert not re.match('AB') print "====================================" re = Regex.compile('AA') re.display() assert not re.match('A') assert re.match('AA') assert not re.match('AAAA') print "====================================" re = Regex.compile('(O|RHH|MM)*') re.display() assert re.match('') assert re.match('OOOO') assert re.match('MMORHHO') assert not re.match('MMORHHH') assert re.match('ORHH') print "====================================" re = Regex.compile('((A)\\2)\\1') re.display() assert re.match('AAAA') return 0
def main(): x_regexes = [ '.*H.*H.*', '(DI|NS|TH|OM)*', 'F.*[AO].*[AO].*', '(O|RHH|MM)*', '.*', 'C*MC(CCC|MM)*', '[^C]*[^R]*III.*', '(...?)\\1*', '([^X]|XCC)*', '(RR|HHH)*.?', 'N.*X.X.X.*E', 'R*D*M*', '.(C|HH)*', ] y_regexes = [ '(ND|ET|IN)[^X]*', '[CHMNOR]*I[CHMNOR]*', 'P+(..)\\1.*', '(E|CR|MN)*', '([^MC]|MM|CC)*', '[AM]*CM(RC)*R?', '.*', '.*PRR.*DDC.*', '(HHX|[^HX])*', '([^EMC]|EM)*', '.*OXR.*', '.*LR.*RL.*', '.*SE.*UE.*', ] # start with x = 0, y = max z_regexes = [ '.*G.*V.*H.*', '[CR]*', '.*XEXM*', '.*DD.*CCM.*', '.*XHCR.*X.*', '.*(.)(.)(.)(.)\\4\\3\\2\\1.*', '.*(IN|SE|HI)', '[^C]*MMM[^C]*', '.*(.)C\\1X\\1.*', '[CEIMU]*OH[AEMOR]*', '(RX|[^R])*', '[^M]*M[^M]*', '(S|MM|HHH)*', ] n = 7 x_regexes = [Regex.compile(i) for i in x_regexes] y_regexes = [Regex.compile(i) for i in y_regexes] z_regexes = [Regex.compile(i) for i in z_regexes] arr = RegexCrossword.solve(n, x_regexes, y_regexes, z_regexes) display_hexagon(arr) return 0
def match_env(env='section', get_content=True): '''matchs text in the title or captions, \section{Chapter one}''' er = Regex() er.add(r"\\%s{" % env) if get_content: er.add(er.group_begin(name="content")) er.add(er.non_greedy(er.zero_or_more(er.anything()))) if get_content: er.add(er.group_end()) er.add(r"}") er.compile() return er