('0\]-9\]', r'') ] files = glob.glob('*toc') for file in files: f = read_file(file) s_id = shortID(f) l_id = longID(f) toc = '<?xml version="1.0" encoding="iso-8859-1"?>\n' \ '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" ' \ '"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">\n' \ '<html xmlns="http://www.w3.org/1999/xhtml">\n<head>\n<title>' \ 'BSSA, Volume ' + s_id + '</title>\n</head>\n' \ '<body bgcolor="#FFFFFF" link="#0000FF" vlink="#FF0000" alink="#E6E6E6">' \ '\n<table cellpadding="20" cellspacing="0" width="500">\n<tr><td>\n' \ '<h2>Bulletin of the<br />Seismological Society of America</h2>\n' \ '<p><b><span style="font-size: larger">' + l_id + '</span></b></p>\n' \ '<h2>Contents</h2>\n' regex = re.compile('<pag>.*?\]<1>', re.DOTALL) f = regex.sub('<1>', f) f = regexr(cleanupL, f) toc = toc + f + '<hr />\n<table border="0" cellpadding="5" cellspacing="0">\n' \ '<tr>\n<td bgcolor="0000FF"><p>[ <a href="javascript:history.go(-1)">Back</a> ]</p>' \ '\n</td></tr></table>' \ '\n</td></tr></table>\n</body></html>' regex = re.compile('^.*ssa_e_logo.*$', re.MULTILINE) toc = regex.sub('', toc) namestr = 'ssa_toc' + file[3:5] + '-' + file[5] + '.html' write_file(namestr, toc)
def number_roots(m): global TEIcount TEIcount = TEIcount + 3 return """<TEI xml:id="ch%s" xmlns="http://www.tei-c.org/ns/1.0">""" % TEIcount ## Main files = handle_args(sys.argv[1:]) if not os.path.isdir(output_dir): os.mkdir(output_dir) for file in files: # set counters to 0 (or a value) to make them reset to that value for each file # comment counters out to increment across files # (maybe add a command-line switch for this) # bibcount = 70 # divcount = 110 # pcount = 480 s = read_file(file) s = re.sub(r"""<bibl>""", number_bibs, s) ## <-- edit search regex! ## previous s = re.sub(r"""<div>""", number_divs, s) s = re.sub(r"""<p>""", number_paras, s) s = re.sub(r"""<TEI>""", number_roots, s) write_file(os.path.join(output_dir, file), s) print "\n TEIcount ended at %s." % TEIcount print "\n Bibcount ended at %s." % bibcount print "\n Divcount ended at %s." % divcount print "\n Pcount ended at %s." % pcount
n = glyph[0] hx = glyph[1] gn = glyph[2] w = glyph[3] h = glyph[4] w2 = glyph[3]*2 h2 = glyph[4]*2 glyph_str = ''' <tr> <td>%(n)s</td> <td>%(hx)s</td> <td>%(gn)s.png</td> <td><img src="S:\\projects\\lf_temp\\various_resources\\special_characters\\latest_glyphs\\%(gn)s.png" alt="%(gn)s" width="%(w)s" height="%(h)s"> <img src="S:\\projects\\lf_temp\\various_resources\\special_characters\\latest_glyphs\\%(gn)s.png" alt="%(gn)s" width="%(w2)s" height="%(h2)s"></td> <td>%(w)sx%(h)s</td> </tr>''' % vars() html_str = html_str + glyph_str html_str = html_str + '\n</table>\n</body>\n</html>' write_file('glyphlist.htm', html_str)
from implib.io import read_file, write_file import re, sys f = read_file(sys.argv[1]) split = re.compile(r'^.*?$', re.MULTILINE) l = split.findall(f) for item in l: while l.count(item) > 1: l.remove(item) s = '' for item in l: s = s + item + '\n' write_file(sys.argv[2], s)
return '''<TEI xml:id="ch%s" xmlns="http://www.tei-c.org/ns/1.0">''' % TEIcount ## Main files = handle_args(sys.argv[1:]) if not os.path.isdir(output_dir): os.mkdir(output_dir) for file in files: # set counters to 0 (or a value) to make them reset to that value for each file # comment counters out to increment across files # (maybe add a command-line switch for this) #bibcount = 70 #divcount = 110 #pcount = 480 s = read_file(file) s = re.sub( r'''<bibl>''', ## <-- edit search regex! ## previous number_bibs, s) s = re.sub(r'''<div>''', number_divs, s) s = re.sub(r'''<p>''', number_paras, s) s = re.sub(r'''<TEI>''', number_roots, s) write_file(os.path.join(output_dir, file), s) print '\n TEIcount ended at %s.' % TEIcount print '\n Bibcount ended at %s.' % bibcount print '\n Divcount ended at %s.' % divcount print '\n Pcount ended at %s.' % pcount