def FilterWMSColnum(fout, text, sdate): stamp = StampUrl(sdate) # for error messages colnum = -1 for fss in recomb.split(text): #import pdb;pdb.set_trace() columng = recolumnumvals.match(fss) if columng: ldate = mx.DateTime.DateTimeFrom(columng.group(1)).date if sdate != ldate: raise ContextException("Column date disagrees %s -- %s" % (sdate, fss), fragment=fss, stamp=stamp) lcolnum = string.atoi(columng.group(2)) if (colnum == -1) or (lcolnum == colnum + 1): pass # good elif lcolnum < colnum: raise ContextException("Colnum not incrementing %d -- %s" % (lcolnum, fss), fragment=fss, stamp=stamp) colnum = lcolnum stamp.stamp = '<stamp coldate="%s" colnum="%sWS"/>' % (sdate, lcolnum) fout.write(' ') fout.write(stamp.stamp) continue columncontg = recolnumcontvals.match(fss) if columncontg: ldate = mx.DateTime.DateTimeFrom(columncontg.group(1)).date if sdate != ldate: raise ContextException("Cont column date disagrees %s -- %s" % (sdate, fss), fragment=fss, stamp=stamp) lcolnum = string.atoi(columncontg.group(2)) if colnum != lcolnum: raise ContextException("Cont column number disagrees %d -- %s" % (colnum, fss), fragment=fss, stamp=stamp) continue # anchor names from HTML <a name="xxx"> anameg = reanamevals.match(fss) if anameg: aname = anameg.group(1) stamp.aname = '<stamp aname="%s"/>' % aname fout.write(stamp.aname) continue # nothing detected # check if we've missed anything obvious if recomb.match(fss): raise ContextException('regexpvals not general enough', fragment=fss, stamp=stamp) #if remarginal.search(fss): # raise ContextException('marginal colnum detection case', # fragment=remarginal.search(fss).group(0), # stamp=stamp) fout.write(fss)
def FilterLordsColtime(fout, text, sdate): colnum = -1 time = '' stampurl = StampUrl(sdate) previoustime = [] for fss in recomb.split(text): # column number type # we need some very elaboirate checking to sort out the sections, by # titles that are sometimes on the wrong side of the first column, # and by colnums that miss the GC code in that section. # column numbers are also missed during divisions, and this exception # should be detected and noted. # That implies that this is the filter which detects the boundaries # between the standard four sections. columng = recolumnumvals.match(fss) if columng: # check date ldate = mx.DateTime.DateTimeFrom(columng.group(1)).date if sdate != ldate: raise ContextException("Column date disagrees %s -- %s" % (sdate, fss), stamp=stampurl, fragment=fss) # check number # ltype = columng.group(2) lcolnum = string.atoi(columng.group(3)) if lcolnum == colnum - 1: pass # spurious decrementing of column number stamps elif lcolnum == colnum: pass # spurious repeat of column number stamps # good (we get skipped columns in divisions) elif (colnum == -1) or (colnum + 1 <= lcolnum <= colnum + 5): # was 2 but this caused us to miss ones colnum = lcolnum fout.write('<stamp coldate="%s" colnum="%s%s"/>' % (sdate, colnum, "")) # column numbers do get skipped during division listings else: pass #print "Colnum not incrementing %d -- %d -- %s" % (colnum, lcolnum, fss) #raise Exception, "Colnum not incrementing %d -- %d -- %s" % (colnum, lcolnum, fss) #print (ldate, colnum, lindexstyle) continue timeg = retimevals.match(fss) if timeg: time = timeg.group(1) if not re.match('(?:</h5>|</st>)(?i)', time): time = TimeProcessing(time, previoustime, False, stampurl) fout.write('<stamp time="%s"/>' % time) if time: previoustime.append(time) continue # special lift a time out of the heading regtime3 = regtime3vals.match(fss) if regtime3: fout.write(fss) # put this heading back into the flow of text assert not previoustime lntimematch = re.match("(half[\- ]past )?(\w+)(-thirty)?$", regtime3.group(1)) lnhour = lntimematch and lntimematch.group(2) # strange way to do it, but I'm keeping tab on examples, and the transition between am and pm if lnhour == "two": lntimep = "2:%s pm" elif lnhour == "three": lntimep = "3:%s pm" elif lnhour == "six": lntimep = "6:%s pm" elif lnhour == "nine": lntimep = "9:%s am" elif lnhour == "eleven": lntimep = "11:%s am" elif lnhour == "ten": lntimep = "10:%s am" else: print "-------------'%s'" % regtime3.group(1) assert False assert not lntimematch.group(1) or not lntimematch.group(3) ntime = lntimep % ((lntimematch.group(1) or lntimematch.group(3)) and "30" or "00") time = TimeProcessing(ntime, previoustime, False, stampurl) fout.write('<stamp time="%s"/>' % time) continue # anchor names from HTML <a name="xxx"> anameg = reanamevals.match(fss) if anameg: aname = anameg.group(1) fout.write('<stamp aname="%s"/>' % aname) stampurl.aname = aname continue # nothing detected # check if we've missed anything obvious if recomb.match(fss): print "$$$", fss, "$$-$" raise ContextException(' regexpvals not general enough ', stamp=stampurl, fragment=fss) # a programming error between splitting and matching if remarginal.search(fss): print remarginal.search(fss).group(0) lregcolumnum6 = '<p>\s*</ul>\s*<a name="column_\d+">(?:</a>)?\s*<b>[^:<]*:\s*column\s*\d+\s*</b></p>\s*<ul><font size=3>(?i)' print re.findall(lregcolumnum6, fss) #print fss raise ContextException(' marginal coltime detection case ', stamp=stampurl, fragment=fss) fout.write(fss)
def FilterDebateColTime(fout, text, sdate, typ): # old style fixing (before patches existed) if typ == "debate": text = ApplyFixSubstitutions(text, sdate, fixsubs) stamp = StampUrl(sdate) # for error messages btodaytype = re.match('<pagex [^>]*type="today"', text) if btodaytype: fout.write('<stamp colnum="000"/>\n') colnum = -1 previoustime = [] for fss in recomb.split(text): # column number type columng = recolumnumvals.match(fss) if columng: assert not btodaytype # no columns in today # check date ldate = mx.DateTime.DateTimeFrom(columng.group(1)).date if sdate != ldate: raise ContextException("Column date disagrees %s -- %s" % (sdate, fss), stamp=stamp, fragment=fss) # check number lcolnum = string.atoi(columng.group(2)) if lcolnum == colnum - 1: pass # spurious decrementing of column number stamps elif (colnum == -1) or (lcolnum == colnum + 1): pass # good # column numbers do get skipped during division listings elif lcolnum < colnum: raise ContextException("Colnum not incrementing %d smaller than %d -- %s" % (lcolnum, colnum, fss), stamp=stamp, fragment=fss) # write a column number stamp (has to increase no matter what) if lcolnum > colnum: colnum = lcolnum stamp.stamp = '<stamp coldate="%s" colnum="%sW"/>' % (sdate, lcolnum) fout.write('<stamp coldate="%s" colnum="%s"/>' % (sdate, colnum)) continue columncg = recolnumcontvals.match(fss) if columncg: ldate = mx.DateTime.DateTimeFrom(columncg.group(1)).date if sdate != ldate: raise ContextException("Column date disagrees %s -- %s" % (sdate, fss), stamp=stamp, fragment=fss) lcolnum = string.atoi(columncg.group(2)) if colnum != lcolnum and sdate<'2006-05-08': raise ContextException("Cont column number disagrees %d -- %s" % (colnum, fss), stamp=stamp, fragment=fss) continue timeg = retimevals.match(fss) if timeg: time = TimeProcessing(timeg.group(1), previoustime, (timeg.group(0)[0] == '['), stamp) if not time: raise ContextException("Time not matched: " + timeg.group(1), stamp=stamp, fragment=fss) fout.write('<stamp time="%s"/>' % time) previoustime.append(time) continue # anchor names from HTML <a name="xxx"> anameg = reanamevals.match(fss) if anameg: aname = anameg.group(1) stamp.aname = '<stamp aname="%s"/>' % aname fout.write('<stamp aname="%s"/>' % aname) continue # nothing detected # check if we've missed anything obvious if recomb.match(fss): print "$$$", fss, "$$$" print regcolnumcont print re.match(regcolnumcont + "(?i)", fss) raise ContextException('regexpvals not general enough', stamp=stamp, fragment=fss) if remarginal.search(fss): print fss print '--------------------------------\n' print "marginal found: ", remarginal.search(fss).groups() print "zeroth: ", remarginal.search(fss).group(0) print '--------------------------------\n' raise ContextException('marginal coltime/a detection case', stamp=stamp, fragment=fss) fout.write(fss)
def FilterWransColnum(fout, text, sdate): # Legacy individual substitution rules text = ApplyFixSubstitutions(text, sdate, fixsubs) # Remove junk text = text.replace("{**con**}{**/con**}", "") stamp = StampUrl(sdate) # for error messages colnum = -1 for fss in recomb.split(text): columng = recolumnumvals.match(fss) if columng: ldate = mx.DateTime.DateTimeFrom(columng.group(1)).date if sdate != ldate: raise ContextException("Column date disagrees %s -- %s" % (sdate, fss), fragment=fss, stamp=stamp) lcolnum = string.atoi(columng.group(2)) if (colnum == -1) or (lcolnum == colnum + 1): pass # good elif lcolnum < colnum: raise ContextException("Colnum not incrementing %d -- %s" % (lcolnum, fss), fragment=fss, stamp=stamp) # column numbers do get skipped during division listings colnum = lcolnum stamp.stamp = '<stamp coldate="%s" colnum="%sW"/>' % (sdate, lcolnum) fout.write(' ') fout.write(stamp.stamp) continue columncontg = recolnumcontvals.match(fss) if columncontg: ldate = columncontg.group(1) or columncontg.group(3) or None lcolnum = columncontg.group(2) or columncontg.group(4) or None if ldate: ldate = mx.DateTime.DateTimeFrom(ldate).date if sdate != ldate: raise ContextException( "Cont column date disagrees %s -- %s" % (sdate, fss), fragment=fss, stamp=stamp) lcolnum = string.atoi(lcolnum) if colnum != lcolnum and sdate < '2006-05-08': raise ContextException( "Cont column number disagrees %d -- %s" % (colnum, fss), fragment=fss, stamp=stamp) # no need to output anything fout.write(' ') continue if columncontg.group(5): lcolnum = string.atoi(columncontg.group(5)) if colnum != lcolnum and colnum != lcolnum + 1: raise ContextException( "Cont column number disagrees %d -- %s" % (colnum, fss), fragment=fss, stamp=stamp) fout.write(' ') continue if columncontg.group(6): lcolnum = string.atoi(columncontg.group(6)) if colnum + 1 != lcolnum: raise ContextException( "Cont column number disagrees %d -- %s" % (colnum, fss), fragment=fss, stamp=stamp) colnum = lcolnum stamp.stamp = '<stamp coldate="%s" colnum="%sW"/>' % (sdate, lcolnum) fout.write(' ') fout.write(stamp.stamp) continue # anchor names from HTML <a name="xxx"> anameg = reanamevals.match(fss) if anameg: aname = anameg.group(1) stamp.aname = '<stamp aname="%s"/>' % aname fout.write(stamp.aname) continue # nothing detected # check if we've missed anything obvious if recomb.match(fss): raise ContextException('regexpvals not general enough', fragment=fss, stamp=stamp) # Removed FAI 2007-05-25, I really don't care! #if remarginal.search(fss): # raise ContextException('marginal colnum detection case', # fragment=remarginal.search(fss).group(0), # stamp=stamp) fout.write(fss)
def FilterDebateColTime(fout, text, sdate, typ): # old style fixing (before patches existed) if typ == "debate": text = ApplyFixSubstitutions(text, sdate, fixsubs) stamp = StampUrl(sdate) # for error messages btodaytype = re.match('<pagex [^>]*type="today"', text) if btodaytype: fout.write('<stamp colnum="000"/>\n') colnum = -1 previoustime = [] for fss in recomb.split(text): # column number type columng = recolumnumvals.match(fss) if columng: assert not btodaytype # no columns in today # check date ldate = mx.DateTime.DateTimeFrom(columng.group(1)).date if sdate != ldate: raise ContextException("Column date disagrees %s -- %s" % (sdate, fss), stamp=stamp, fragment=fss) # check number lcolnum = string.atoi(columng.group(2)) if lcolnum == colnum - 1: pass # spurious decrementing of column number stamps elif (colnum == -1) or (lcolnum == colnum + 1): pass # good # column numbers do get skipped during division listings elif lcolnum < colnum: raise ContextException( "Colnum not incrementing %d smaller than %d -- %s" % (lcolnum, colnum, fss), stamp=stamp, fragment=fss) # write a column number stamp (has to increase no matter what) if lcolnum > colnum: colnum = lcolnum stamp.stamp = '<stamp coldate="%s" colnum="%sW"/>' % (sdate, lcolnum) fout.write('<stamp coldate="%s" colnum="%s"/>' % (sdate, colnum)) continue columncg = recolnumcontvals.match(fss) if columncg: ldate = mx.DateTime.DateTimeFrom(columncg.group(1)).date if sdate != ldate: raise ContextException("Column date disagrees %s -- %s" % (sdate, fss), stamp=stamp, fragment=fss) lcolnum = string.atoi(columncg.group(2)) if colnum != lcolnum and sdate < '2006-05-08': raise ContextException( "Cont column number disagrees %d -- %s" % (colnum, fss), stamp=stamp, fragment=fss) continue timeg = retimevals.match(fss) if timeg: time = TimeProcessing(timeg.group(1), previoustime, (timeg.group(0)[0] == '['), stamp) if not time: raise ContextException("Time not matched: " + timeg.group(1), stamp=stamp, fragment=fss) fout.write('<stamp time="%s"/>' % time) previoustime.append(time) continue # anchor names from HTML <a name="xxx"> anameg = reanamevals.match(fss) if anameg: aname = anameg.group(1) stamp.aname = '<stamp aname="%s"/>' % aname fout.write('<stamp aname="%s"/>' % aname) continue # nothing detected # check if we've missed anything obvious if recomb.match(fss): print "$$$", fss, "$$$" print regcolnumcont print re.match(regcolnumcont + "(?i)", fss) raise ContextException('regexpvals not general enough', stamp=stamp, fragment=fss) if remarginal.search(fss): print fss print '--------------------------------\n' print "marginal found: ", remarginal.search(fss).groups() print "zeroth: ", remarginal.search(fss).group(0) print '--------------------------------\n' raise ContextException('marginal coltime/a detection case', stamp=stamp, fragment=fss) fout.write(fss)
def FilterWransColnum(fout, text, sdate): # Legacy individual substitution rules text = ApplyFixSubstitutions(text, sdate, fixsubs) # Remove junk text = text.replace("{**con**}{**/con**}", "") stamp = StampUrl(sdate) # for error messages colnum = -1 for fss in recomb.split(text): columng = recolumnumvals.match(fss) if columng: ldate = mx.DateTime.DateTimeFrom(columng.group(1)).date if sdate != ldate: raise ContextException("Column date disagrees %s -- %s" % (sdate, fss), fragment=fss, stamp=stamp) lcolnum = string.atoi(columng.group(2)) if (colnum == -1) or (lcolnum == colnum + 1): pass # good elif lcolnum < colnum: raise ContextException("Colnum not incrementing %d -- %s" % (lcolnum, fss), fragment=fss, stamp=stamp) # column numbers do get skipped during division listings colnum = lcolnum stamp.stamp = '<stamp coldate="%s" colnum="%sW"/>' % (sdate, lcolnum) fout.write(" ") fout.write(stamp.stamp) continue columncontg = recolnumcontvals.match(fss) if columncontg: ldate = columncontg.group(1) or columncontg.group(3) or None lcolnum = columncontg.group(2) or columncontg.group(4) or None if ldate: ldate = mx.DateTime.DateTimeFrom(ldate).date if sdate != ldate: raise ContextException( "Cont column date disagrees %s -- %s" % (sdate, fss), fragment=fss, stamp=stamp ) lcolnum = string.atoi(lcolnum) if colnum != lcolnum and sdate < "2006-05-08": raise ContextException( "Cont column number disagrees %d -- %s" % (colnum, fss), fragment=fss, stamp=stamp ) # no need to output anything fout.write(" ") continue if columncontg.group(5): lcolnum = string.atoi(columncontg.group(5)) if colnum != lcolnum and colnum != lcolnum + 1: raise ContextException( "Cont column number disagrees %d -- %s" % (colnum, fss), fragment=fss, stamp=stamp ) fout.write(" ") continue if columncontg.group(6): lcolnum = string.atoi(columncontg.group(6)) if colnum + 1 != lcolnum: raise ContextException( "Cont column number disagrees %d -- %s" % (colnum, fss), fragment=fss, stamp=stamp ) colnum = lcolnum stamp.stamp = '<stamp coldate="%s" colnum="%sW"/>' % (sdate, lcolnum) fout.write(" ") fout.write(stamp.stamp) continue # anchor names from HTML <a name="xxx"> anameg = reanamevals.match(fss) if anameg: aname = anameg.group(1) stamp.aname = '<stamp aname="%s"/>' % aname fout.write(stamp.aname) continue # nothing detected # check if we've missed anything obvious if recomb.match(fss): raise ContextException("regexpvals not general enough", fragment=fss, stamp=stamp) # Removed FAI 2007-05-25, I really don't care! # if remarginal.search(fss): # raise ContextException('marginal colnum detection case', # fragment=remarginal.search(fss).group(0), # stamp=stamp) fout.write(fss)
def FilterWMSColnum(fout, text, sdate): stamp = StampUrl(sdate) # for error messages colnum = -1 for fss in recomb.split(text): #import pdb;pdb.set_trace() columng = recolumnumvals.match(fss) if columng: ldate = mx.DateTime.DateTimeFrom(columng.group(1)).date if sdate != ldate: raise ContextException("Column date disagrees %s -- %s" % (sdate, fss), fragment=fss, stamp=stamp) lcolnum = string.atoi(columng.group(2)) if (colnum == -1) or (lcolnum == colnum + 1): pass # good elif lcolnum < colnum: raise ContextException("Colnum not incrementing %d -- %s" % (lcolnum, fss), fragment=fss, stamp=stamp) colnum = lcolnum stamp.stamp = '<stamp coldate="%s" colnum="%sWS"/>' % (sdate, lcolnum) fout.write(' ') fout.write(stamp.stamp) continue columncontg = recolnumcontvals.match(fss) if columncontg: ldate = mx.DateTime.DateTimeFrom(columncontg.group(1)).date if sdate != ldate: raise ContextException("Cont column date disagrees %s -- %s" % (sdate, fss), fragment=fss, stamp=stamp) lcolnum = string.atoi(columncontg.group(2)) if colnum != lcolnum: raise ContextException( "Cont column number disagrees %d -- %s" % (colnum, fss), fragment=fss, stamp=stamp) continue # anchor names from HTML <a name="xxx"> anameg = reanamevals.match(fss) if anameg: aname = anameg.group(1) stamp.aname = '<stamp aname="%s"/>' % aname fout.write(stamp.aname) continue # nothing detected # check if we've missed anything obvious if recomb.match(fss): raise ContextException('regexpvals not general enough', fragment=fss, stamp=stamp) #if remarginal.search(fss): # raise ContextException('marginal colnum detection case', # fragment=remarginal.search(fss).group(0), # stamp=stamp) fout.write(fss)