/
fix_latin_comp_sup_adv.py
171 lines (155 loc) · 6.61 KB
/
fix_latin_comp_sup_adv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import pywikibot, re, sys, argparse
import blib
from blib import getparam, rmparam, tname, msg, site
import lalib
def find_head_comp_sup(pagetitle, pagemsg):
page = pywikibot.Page(site, pagetitle)
text = str(page.text)
parsed = blib.parse_text(text)
for t in parsed.filter_templates():
if tname(t) == "la-adv":
head = getparam(t, "1")
comp = getparam(t, "comp") or getparam(t, "2")
sup = getparam(t, "sup") or getparam(t, "3")
if not comp or not sup:
for suff in ["iter", "nter", "ter", "er", "iē", "ē", "im", "ō"]:
m = re.search("^(.*?)%s$" % suff, head)
if m:
stem = m.group(1)
if suff == "nter":
stem += "nt"
default_comp = stem + "ius"
default_sup = stem + "issimē"
break
else:
pagemsg("WARNING: Didn't recognize ending of adverb headword %s" % head)
return head, comp, sup
comp = comp or default_comp
sup = sup or default_sup
return head, comp, sup
return None, None, None
def process_page(page, index, parsed):
pagetitle = str(page.title())
def pagemsg(txt):
msg("Page %s %s: %s" % (index, pagetitle, txt))
pagemsg("Processing")
text = str(page.text)
origtext = text
retval = lalib.find_latin_section(text, pagemsg)
if retval is None:
return None, None
sections, j, secbody, sectail, has_non_latin = retval
notes = []
subsections = re.split("(^===[^=\n]*===\n)", secbody, 0, re.M)
for k in range(2, len(subsections), 2):
if "==Adverb==" in subsections[k - 1]:
parsed = blib.parse_text(subsections[k])
posdeg = None
compt = None
supt = None
for t in parsed.filter_templates():
if tname(t) == "comparative of":
if compt:
pagemsg("WARNING: Saw multiple {{comparative of}}: %s and %s" % (
str(compt), str(t)))
else:
compt = t
posdeg = blib.remove_links(getparam(t, "1"))
if not posdeg:
pagemsg("WARNING: Didn't see positive degree in {{comparative of}}: %s" % str(t))
elif tname(t) == "superlative of":
if supt:
pagemsg("WARNING: Saw multiple {{superlative of}}: %s and %s" % (
str(supt), str(t)))
else:
supt = t
posdeg = blib.remove_links(getparam(t, "1"))
if not posdeg:
pagemsg("WARNING: Didn't see positive degree in {{superlative of}}: %s" % str(t))
if compt and supt:
pagemsg("WARNING: Saw both comparative and superlative, skipping: %s and %s" % (
str(compt), str(supt)))
continue
if not compt and not supt:
pagemsg("WARNING: Didn't see {{comparative of}} or {{superlative of}} in section %s" %
k)
continue
for t in parsed.filter_templates():
tn = tname(t)
if tn in ["la-adv-comp", "la-adv-sup"]:
pagemsg("Already saw fixed headword: %s" % str(t))
break
if tn == "head":
if not getparam(t, "1") == "la":
pagemsg("WARNING: Saw wrong language in {{head}}: %s" % str(t))
else:
pos = getparam(t, "2")
head = blib.remove_links(getparam(t, "head")) or pagetitle
if pos not in ["adverb", "adverbs",
"adverb form", "adverb forms",
"adverb comparative form", "adverb comparative forms",
"adverb superlative form", "adverb superlative forms",
]:
pagemsg("WARNING: Unrecognized part of speech '%s': %s" % (
pos, str(t)))
else:
real_head, real_comp, real_sup = find_head_comp_sup(lalib.remove_macrons(posdeg), pagemsg)
if real_head:
if lalib.remove_macrons(real_head) != lalib.remove_macrons(posdeg):
pagemsg("WARNING: Can't replace positive degree %s with %s because they differ when macrons are removed" % (
posdeg, real_head))
else:
pagemsg("Using real positive degree %s instead of %s" % (
real_head, posdeg))
inflt = compt or supt
origt = str(inflt)
inflt.add("1", real_head)
pagemsg("Replaced %s with %s" % (origt, str(inflt)))
if compt:
newname = "la-adv-comp"
infldeg = "comparative"
if real_comp and real_comp != "-":
if lalib.remove_macrons(real_comp) != lalib.remove_macrons(head):
pagemsg("WARNING: Can't replace comparative degree %s with %s because they differ when macrons are removed" % (
head, real_comp))
else:
pagemsg("Using real comparative degree %s instead of %s" % (
real_comp, head))
head = real_comp
else:
pagemsg("WARNING: Couldn't retrieve real comparative for positive degree %s" % real_head)
else:
newname = "la-adv-sup"
infldeg = "superlative"
if real_sup and real_sup != "-":
if lalib.remove_macrons(real_sup) != lalib.remove_macrons(head):
pagemsg("WARNING: Can't replace superlative degree %s with %s because they differ when macrons are removed" % (
head, real_sup))
else:
pagemsg("Using real superlative degree %s instead of %s" % (
real_sup, head))
head = real_sup
else:
pagemsg("WARNING: Couldn't retrieve real superlative for positive degree %s" % real_head)
origt = str(t)
rmparam(t, "head")
rmparam(t, "2")
rmparam(t, "1")
blib.set_template_name(t, newname)
t.add("1", head)
pagemsg("Replaced %s with %s" % (origt, str(t)))
notes.append("replace {{head|la|...}} with {{%s}} and fix up positive/%s" %
(newname, infldeg))
subsections[k] = str(parsed)
secbody = "".join(subsections)
sections[j] = secbody + sectail
return "".join(sections), notes
parser = blib.create_argparser("Fix headword of Latin comparative and superlative adverbs",
include_pagefile=True)
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)
blib.do_pagefile_cats_refs(args, start, end, process_page,
default_cats=["Latin comparative adverbs", "Latin superlative adverbs"],
edit=True)