def untagged_par(): return SEQ( DEBUG('checking for untagged paragraph'), fd_parse.TRACE(False), pattern( '\s*(?P<paragraph_text>(?![^<]*\[Adjourned)([^<]|<i>|</i>)+)\s*(</ul>|</p>)?' ), START('paragraph'), ATTRIBUTES(type='"untagged"'), fd_parse.TRACE(False, envlength=512), OUT('$paragraph_text'), END('paragraph'))
def untagged_par(): return SEQ( DEBUG('checking for untagged paragraph'), fd_parse.TRACE(False), pattern('\s*(?P<paragraph_text>(?![^<]*\[Adjourned)([^<]|<i>|</i>)+)\s*(</ul>|</p>)?'), START('paragraph'), ATTRIBUTES(type='"untagged"'), fd_parse.TRACE(False, envlength=512), OUT('$paragraph_text'), END('paragraph') )
def paragraph_text(partype='plain', no=None): # if no: # attrmap={'no' : no} # else: # attrmap=None return SEQ( pattern('(?P<paragraph_text>([^<]|<i>|</i>)+)'), ELEMENT('paragraph', body='$paragraph_text', type='%s' % repr(partype) # attrlit={'type' : partype}, # attrmap=attrmap ))
def paragraph_text(partype='plain', no=None): # if no: # attrmap={'no' : no} # else: # attrmap=None return SEQ( pattern('(?P<paragraph_text>([^<]|<i>|</i>)+)'), ELEMENT('paragraph', body='$paragraph_text', type='%s' % repr(partype) # attrlit={'type' : partype}, # attrmap=attrmap ) )
#from fd_core import nextdayinstance m2=Match('(?P<no>\d+)') m3=Match('(?P<no>one|two)') g2=m2.prog.groupindex g3=m3.prog.groupindex overlap=set(g2) & set(g3) b=Match.join(m2, m3) c=Match.join(m2, m3, lambda x,y: x+"|" + y) import fd_parse p1=fd_parse.pattern('(?P<no>one)') p2=fd_parse.pattern('(?P<no>two)') d=fd_parse.OR(p1, p2) from fd_dates import * f=futureday p=plaindate() s1=SEQ(p1,p2) tenv={'today' : datetime.date.today(), 'test' : '"testvalue"', 'no' : 4} today=datetime.date.today()
engnumber12s = 'one|two|three|four|five|six(?!ty)|seven|eight|nine|ten|eleven|twelve' engnumber12p = '(%s)' % engnumber12s engnumber60p = '(' + engnumber12s + '|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|((twenty|thirty|forty|fifty)(-(one|two|three|four|five|six|seven|eight|nine))?))' # half an hour # an hour and a half # three and a half hours # three hours and 27 minutes timequantump = '(an hour and a (half|quarter)|((a quarter|three quarters) of|half) an hour|%(engnumber60p)s minutes|%(engnumber12p)s (and a half hours|hours( and %(engnumber60p)s minutes)?))' % { 'engnumber12p': engnumber12p, 'engnumber60p': engnumber60p } DEFINE('timequantum', pattern(timequantump)) # English times, eg "three minutes to four o'clock" archtimep = '(twelve (noon|midnight)|(a quarter past|half-past|a quarter to|' + engnumber60p + ' minutes (to|past)|)\s*' + engnumber12p + '(\s*o\'\s*clock))?' archtime = DEFINE( 'archtime', SEQ(pattern('\s*(?P<archtime>' + archtimep + ')(?i)'), ELEMENT('time', archtime='$archtime'))) # Date handling monthnamep = '(January|February|March|April|May|June|July|August|September|October|November|December)' daynamep = '(Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday)' ordinalp = '(st|nd|rd|th)'
import os.path import re import fd_parse from fd_dates import * from fd_parse import SEQ, OR, ANY, POSSIBLY, IF, START, END, OBJECT, NULL, OUT, DEBUG, STOP, FORCE, CALL, ATTRIBUTES, ELEMENT, DEFINE, pattern, tagged, plaintextpar, plaintext sys.path.append("../") from xmlfilewrite import WriteXMLHeader from contextexception import ContextException from miscfuncs import toppath splitparagraphs = ANY( SEQ( pattern( '\s*(<p([^>]*?)>(<ul>)?)(?P<partext>([^<]|<i>|</i>)*)(</ul>)?(</p>)?' ), ELEMENT('paragraph', body='$partext'))) def namepattern(label='name'): return "(?P<" + label + ">[-A-Za-z .']+)" # Patterns specific to Votes and Proceedings that are used frequently DEFINE('mp', pattern('[-A-Za-z .]+')) DEFINE('act', pattern('[-a-z.,A-Z0-9()\s]*?'), fragment=True) DEFINE('speaker', pattern('(Deputy )?Speaker')) DEFINE('number', pattern('\d+')) DEFINE('ordinal', pattern('1st|2nd|3rd|\d+th')) DEFINE('text', pattern('[a-z .?;,()]*?'), fragment=True)
import os.path import re import fd_parse from fd_dates import * from fd_parse import SEQ, OR, ANY, POSSIBLY, IF, START, END, OBJECT, NULL, OUT, DEBUG, STOP, FORCE, CALL, ATTRIBUTES, ELEMENT, DEFINE, pattern, tagged, plaintextpar, plaintext sys.path.append("../") from xmlfilewrite import WriteXMLHeader from contextexception import ContextException from miscfuncs import toppath splitparagraphs=ANY( SEQ( pattern('\s*(<p([^>]*?)>(<ul>)?)(?P<partext>([^<]|<i>|</i>)*)(</ul>)?(</p>)?'), ELEMENT('paragraph',body='$partext') ) ) def namepattern(label='name'): return "(?P<"+label+">[-A-Za-z .']+)" # Patterns specific to Votes and Proceedings that are used frequently DEFINE('mp', pattern('[-A-Za-z .]+')) DEFINE('act', pattern('[-a-z.,A-Z0-9()\s]*?'), fragment=True) DEFINE('speaker', pattern('(Deputy )?Speaker')) DEFINE('number', pattern('\d+')) DEFINE('ordinal', pattern('1st|2nd|3rd|\d+th'))
# English numbers up to 12, and up to 60 engnumber12s='one|two|three|four|five|six(?!ty)|seven|eight|nine|ten|eleven|twelve' engnumber12p='(%s)' % engnumber12s engnumber60p='(' + engnumber12s + '|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|((twenty|thirty|forty|fifty)(-(one|two|three|four|five|six|seven|eight|nine))?))' # half an hour # an hour and a half # three and a half hours # three hours and 27 minutes timequantump='(an hour and a (half|quarter)|((a quarter|three quarters) of|half) an hour|%(engnumber60p)s minutes|%(engnumber12p)s (and a half hours|hours( and %(engnumber60p)s minutes)?))' % {'engnumber12p' : engnumber12p, 'engnumber60p' : engnumber60p} DEFINE('timequantum', pattern(timequantump)) # English times, eg "three minutes to four o'clock" archtimep='(twelve (noon|midnight)|(a quarter past|half-past|a quarter to|'+engnumber60p+' minutes (to|past)|)\s*' + engnumber12p + '(\s*o\'\s*clock))?' archtime=DEFINE('archtime', SEQ( pattern('\s*(?P<archtime>'+archtimep+')(?i)'), ELEMENT('time', archtime='$archtime') ) ) # Date handling monthnamep='(January|February|March|April|May|June|July|August|September|October|November|December)'