'\\2\\1\\3', 1, '2003-02-24'), ( '(<B> Mr\. Spellar)\)', '\\1', 1, '2003-03-31'), # wrong constituency in debates ( 'Sir Archy Kirkwood \(Brecon and Radnorshire\)', 'Sir Archy Kirkwood (Roxburgh and Berwickshire)', 1, '2003-06-26'), ] # 2. <B> Mr. Colin Breed (South-East Cornwall)</B> (LD): # <B> Mr. Hutton: </B> # 2. <stamp aname="40205-06_para4"/><B> Mr. Colin Breed</B>: # Q4. [161707]<a name="40317-03_wqn5"><B> Mr. Andy Reed (Loughborough)</B> parties = "|".join(map(string.lower, memberList.partylist())) + "|uup|ld|dup|in the chair" # Splitting condition # this must be a generalization of the one below. so changes need to be reflected in both. recomb = re.compile('''(?ix)((?:[QT]?\d+\.\s*)?(?:\[\d+\]\s*)? (?:<stamp\saname="[^"]*"/>\s*)? <b> (?:<stamp\saname="[^"]*"/>)* [^<]* </b>(?!</h[34]>) \s*\)? (?:\s*\([^)]*\))? (?:\s*\((?:%s)\))? \s*:?)''' % parties)
'\\2\\1\\3', 1, '2003-02-24'), ( '(<B> Mr\. Spellar)\)', '\\1', 1, '2003-03-31'), # wrong constituency in debates ( 'Sir Archy Kirkwood \(Brecon and Radnorshire\)', 'Sir Archy Kirkwood (Roxburgh and Berwickshire)', 1, '2003-06-26'), ] # 2. <B> Mr. Colin Breed (South-East Cornwall)</B> (LD): # <B> Mr. Hutton: </B> # 2. <stamp aname="40205-06_para4"/><B> Mr. Colin Breed</B>: # Q4. [161707]<a name="40317-03_wqn5"><B> Mr. Andy Reed (Loughborough)</B> parties = "|".join(map(lambda x: x.lower().replace(' ', '[ ]'), memberList.partylist())) + "|uup|ld|dup|in[ ]the[ ]chair|ind|sdlp|snp|con|lab|pc|lab/[ ]?co-op" retabletext = '<p[ ]class="tabletext"><b>[^<]*</b></p>' # Splitting condition # this must be a generalization of the one below. so changes need to be reflected in both. respeaker = ''' (?:[QT]?\d+\.\s*)?(?:\[\d+\]\s*)? # Question number before speaker name (?:<stamp\saname="[^"]*"/>\s*)? # Stamps before <b> <b> (?:<stamp\saname="[^"]*"/>)* # Stamps after <b> [^<]* # Any non-HTML </b>(?!</h[34]>) # End bold as long as not followed by end heading \s*\)? # Possible random closing bracket (?:\s*\([^)]*\))? # Possible string in brackets (e.g. constituency) (?:\s*\((?:%s)\))? # Possible party string in brackets \s*:? # Possible colon at the end