Ejemplo n.º 1
0
    ('0\]-9\]', r'')
]

files = glob.glob('*toc')

for file in files:
    f = read_file(file)
    s_id = shortID(f)
    l_id = longID(f)
    toc = '<?xml version="1.0" encoding="iso-8859-1"?>\n' \
       '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" ' \
       '"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">\n' \
       '<html xmlns="http://www.w3.org/1999/xhtml">\n<head>\n<title>' \
       'BSSA, Volume ' + s_id + '</title>\n</head>\n' \
       '<body bgcolor="#FFFFFF" link="#0000FF" vlink="#FF0000" alink="#E6E6E6">' \
       '\n<table cellpadding="20" cellspacing="0" width="500">\n<tr><td>\n' \
       '<h2>Bulletin of the<br />Seismological Society of America</h2>\n' \
       '<p><b><span style="font-size: larger">' + l_id + '</span></b></p>\n' \
       '<h2>Contents</h2>\n'
    regex = re.compile('<pag>.*?\]<1>', re.DOTALL)
    f = regex.sub('<1>', f)
    f = regexr(cleanupL, f)
    toc = toc + f + '<hr />\n<table border="0" cellpadding="5" cellspacing="0">\n' \
       '<tr>\n<td bgcolor="0000FF"><p>[ <a href="javascript:history.go(-1)">Back</a> ]</p>' \
       '\n</td></tr></table>' \
       '\n</td></tr></table>\n</body></html>'
    regex = re.compile('^.*ssa_e_logo.*$', re.MULTILINE)
    toc = regex.sub('', toc)
    namestr = 'ssa_toc' + file[3:5] + '-' + file[5] + '.html'
    write_file(namestr, toc)
Ejemplo n.º 2
0
def number_roots(m):
    global TEIcount
    TEIcount = TEIcount + 3
    return """<TEI xml:id="ch%s" xmlns="http://www.tei-c.org/ns/1.0">""" % TEIcount


## Main
files = handle_args(sys.argv[1:])

if not os.path.isdir(output_dir):
    os.mkdir(output_dir)

for file in files:
    # set counters to 0 (or a value) to make them reset to that value for each file
    # comment counters out to increment across files
    # (maybe add a command-line switch for this)
    # bibcount = 70
    # divcount = 110
    # pcount = 480
    s = read_file(file)
    s = re.sub(r"""<bibl>""", number_bibs, s)  ## <-- edit search regex! ## previous
    s = re.sub(r"""<div>""", number_divs, s)
    s = re.sub(r"""<p>""", number_paras, s)
    s = re.sub(r"""<TEI>""", number_roots, s)
    write_file(os.path.join(output_dir, file), s)

print "\n  TEIcount ended at %s." % TEIcount
print "\n  Bibcount ended at %s." % bibcount
print "\n  Divcount ended at %s." % divcount
print "\n  Pcount ended at %s." % pcount
Ejemplo n.º 3
0
	n = glyph[0]
	hx = glyph[1]
	gn = glyph[2]
	w = glyph[3]
	h = glyph[4]
	w2 = glyph[3]*2
	h2 = glyph[4]*2

	glyph_str = '''
<tr>
	<td>%(n)s</td>
	<td>%(hx)s</td>
	<td>%(gn)s.png</td>
	<td><img src="S:\\projects\\lf_temp\\various_resources\\special_characters\\latest_glyphs\\%(gn)s.png" alt="%(gn)s" width="%(w)s" height="%(h)s">
	&nbsp;<img src="S:\\projects\\lf_temp\\various_resources\\special_characters\\latest_glyphs\\%(gn)s.png" alt="%(gn)s" width="%(w2)s" height="%(h2)s"></td>
	<td>%(w)sx%(h)s</td>
</tr>''' % vars()

	html_str = html_str + glyph_str
    

html_str = html_str + '\n</table>\n</body>\n</html>'
write_file('glyphlist.htm', html_str)


    
    



Ejemplo n.º 4
0
from implib.io import read_file, write_file
import re, sys
f = read_file(sys.argv[1])
split = re.compile(r'^.*?$', re.MULTILINE)
l = split.findall(f)

for item in l:
	while l.count(item) > 1:
		l.remove(item)

s = ''
for item in l:
	s = s + item + '\n'
	
write_file(sys.argv[2], s)
Ejemplo n.º 5
0
from implib.io import read_file, write_file
import re, sys
f = read_file(sys.argv[1])
split = re.compile(r'^.*?$', re.MULTILINE)
l = split.findall(f)

for item in l:
    while l.count(item) > 1:
        l.remove(item)

s = ''
for item in l:
    s = s + item + '\n'

write_file(sys.argv[2], s)
Ejemplo n.º 6
0
('0\]-9\]', r'')
]

files = glob.glob('*toc')

for file in files:
	f = read_file(file)
	s_id = shortID(f)
	l_id = longID(f)
	toc = '<?xml version="1.0" encoding="iso-8859-1"?>\n' \
		  '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" ' \
		  '"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">\n' \
		  '<html xmlns="http://www.w3.org/1999/xhtml">\n<head>\n<title>' \
		  'BSSA, Volume ' + s_id + '</title>\n</head>\n' \
		  '<body bgcolor="#FFFFFF" link="#0000FF" vlink="#FF0000" alink="#E6E6E6">' \
		  '\n<table cellpadding="20" cellspacing="0" width="500">\n<tr><td>\n' \
		  '<h2>Bulletin of the<br />Seismological Society of America</h2>\n' \
		  '<p><b><span style="font-size: larger">' + l_id + '</span></b></p>\n' \
		  '<h2>Contents</h2>\n'
	regex = re.compile('<pag>.*?\]<1>', re.DOTALL)
	f = regex.sub('<1>', f)
	f = regexr(cleanupL, f)
	toc = toc + f + '<hr />\n<table border="0" cellpadding="5" cellspacing="0">\n' \
		  '<tr>\n<td bgcolor="0000FF"><p>[ <a href="javascript:history.go(-1)">Back</a> ]</p>' \
		  '\n</td></tr></table>' \
		  '\n</td></tr></table>\n</body></html>'
	regex = re.compile('^.*ssa_e_logo.*$', re.MULTILINE)
	toc = regex.sub('', toc)
	namestr = 'ssa_toc' + file[3:5] + '-' + file[5] + '.html'
	write_file(namestr, toc)
Ejemplo n.º 7
0
    return '''<TEI xml:id="ch%s" xmlns="http://www.tei-c.org/ns/1.0">''' % TEIcount


## Main
files = handle_args(sys.argv[1:])

if not os.path.isdir(output_dir):
    os.mkdir(output_dir)

for file in files:
    # set counters to 0 (or a value) to make them reset to that value for each file
    # comment counters out to increment across files
    # (maybe add a command-line switch for this)
    #bibcount = 70
    #divcount = 110
    #pcount = 480
    s = read_file(file)
    s = re.sub(
        r'''<bibl>''',  ## <-- edit search regex! ## previous
        number_bibs,
        s)
    s = re.sub(r'''<div>''', number_divs, s)
    s = re.sub(r'''<p>''', number_paras, s)
    s = re.sub(r'''<TEI>''', number_roots, s)
    write_file(os.path.join(output_dir, file), s)

print '\n  TEIcount ended at %s.' % TEIcount
print '\n  Bibcount ended at %s.' % bibcount
print '\n  Divcount ended at %s.' % divcount
print '\n  Pcount ended at %s.' % pcount