-
Notifications
You must be signed in to change notification settings - Fork 0
/
fix_latin_impers_pass_part.py
119 lines (97 loc) · 4.04 KB
/
fix_latin_impers_pass_part.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import pywikibot, re, sys, argparse
import blib
from blib import getparam, rmparam, tname, msg, site
import lalib
def correct_nom_sg_n_participle(page, index, participle, lemma):
pagetitle = str(page.title())
def pagemsg(txt):
msg("Page %s %s: %s" % (index, pagetitle, txt))
pagemsg("Processing")
text = str(page.text)
origtext = text
retval = lalib.find_latin_section(text, pagemsg)
if retval is None:
return None, None
sections, j, secbody, sectail, has_non_latin = retval
if "===Etymology 1===" in secbody:
pagemsg("WARNING: Multiple etymologies, don't know what to do")
return None, None
notes = []
subsections = re.split("(^===[^=\n]*===\n)", secbody, 0, re.M)
participle_text = """{{head|la|participle|[[indeclinable]]|head=%s}}
# {{inflection of|la|%s||perf|pasv|part}}\n\n""" % (participle, lemma)
saw_participle = False
for k in range(2, len(subsections), 2):
if subsections[k - 1] == "===Participle===\n":
if saw_participle:
pagemsg("WARNING: Saw multiple participles, skipping")
return None, None
saw_participle = True
subsections[k] = participle_text
notes.append("correct participle %s of %s to be impersonal" %
(participle, lemma))
secbody = "".join(subsections)
if not saw_participle:
for k in range(2, len(subsections), 2):
insert_before = False
if subsections[k - 1] == "===References===\n":
pagemsg("Inserting new participle subsection before references subsection")
insert_before = True
elif re.search(r"\{\{inflection of.*\|sup", subsections[k]):
pagemsg("Inserting new participle subsection before supine subsection")
insert_before = True
if insert_before:
subsections[k - 1:k - 1] = ["===Participle===\n" + participle_text]
secbody = "".join(subsections)
break
else:
# no break
if not secbody.endswith("\n\n"):
secbody += "\n\n"
secbody += "===Participle===\n" + participle_text
notes.append("add impersonal participle %s of %s" % (participle, lemma))
sections[j] = secbody + sectail
return "".join(sections), notes
def process_page(index, page, save, verbose, diff):
pagetitle = str(page.title())
def pagemsg(txt):
msg("Page %s %s: %s" % (index, pagetitle, txt))
def errandpagemsg(txt):
errandmsg("Page %s %s: %s" % (index, pagetitle, txt))
def expand_text(tempcall):
return blib.expand_text(tempcall, pagetitle, pagemsg, verbose)
pagemsg("Processing")
parsed = blib.parse(page)
for t in parsed.filter_templates():
if tname(t) == "la-conj":
args = lalib.generate_verb_forms(str(t), errandpagemsg, expand_text)
supforms = args.get("sup_acc", "")
if supforms:
supforms = supforms.split(",")
for supform in supforms:
non_impers_part = re.sub("um$", "us", supform)
pagemsg("Line to delete: part %s allbutnomsgn {{la-adecl|%s}}" % (
non_impers_part, non_impers_part))
def do_correct_nom_sg_n_participle(page, index, parsed):
return correct_nom_sg_n_participle(page, index, supform,
args["1s_pres_actv_indc"])
blib.do_edit(pywikibot.Page(site,
lalib.remove_macrons(supform)), index,
do_correct_nom_sg_n_participle, save=save, verbose=verbose,
diff=diff)
parser = blib.create_argparser("Fix Latin impersonal passive participles and output deletion lines for non-impersonal variants",
include_pagefile=True)
parser.add_argument("--ignore", help="Comma-separated pages to ignore.")
args = parser.parse_args()
start, end = blib.parse_start_end(args.start, args.end)
ignore_pages = []
if args.ignore:
ignore_pages = args.ignore.split(",")
def do_process_page(page, index, parsed):
if str(page.title()) not in ignore_pages:
return process_page(index, page, args.save, args.verbose, args.diff)
return None, None
blib.do_pagefile_cats_refs(args, start, end, do_process_page, edit=True,
default_cats=["Latin verbs with impersonal passive"])