forked from matejsuchanek/pywikibot-scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
list_typos.py
96 lines (76 loc) · 3.02 KB
/
list_typos.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import pywikibot
from pywikibot import pagegenerators, textlib
from pywikibot.bot import SingleSiteBot, SkipPageError
from .typoloader import TypoRule, TyposLoader
class TypoReportBot(SingleSiteBot):
pattern = '# {} – {}'
def __init__(self, **kwargs):
self.availableOptions.update({
'always': True,
'anything': False,
'outputpage': None,
'typospage': None,
'whitelistpage': None,
})
super(TypoReportBot, self).__init__(**kwargs)
def setup(self):
loader = TyposLoader(
self.site, allrules=True, typospage=self.getOption('typospage'),
whitelistpage=self.getOption('whitelistpage'))
self.typoRules = loader.loadTypos()
self.fp_page = loader.getWhitelistPage()
self.whitelist = loader.loadWhitelist()
self.data = []
@property
def generator(self):
for rule in self.typoRules:
if not rule.canSearch():
continue
pywikibot.output('Query: "%s"' % rule.query)
self.current_rule = rule
for page in pagegenerators.PreloadingGenerator(rule.querySearch()):
yield page
def skip_page(self, page):
if page.title() in self.whitelist:
pywikibot.warning('Skipped {page} because it is whitelisted'
.format(page=page))
return True
if self.current_rule.find.search(page.title()):
pywikibot.warning('Skipped {page} because the rule matches '
'its title'.format(page=page))
return True
return super(TypoReportBot, self).skip_page(page)
def treat(self, page):
match = self.current_rule.find.search(page.text)
if not match:
return
text = textlib.removeDisabledParts(
page.text, TypoRule.exceptions, site=self.site)
match = self.current_rule.find.search(text)
if match:
text = self.pattern.format(page.title(as_link=True), match.group(0))
pywikibot.stdout(text)
self.data.append(text)
def teardown(self):
outputpage = self.getOption('outputpage')
if (self._generator_completed or self.getOption('anything')
) and outputpage:
page = pywikibot.Page(self.site, outputpage)
page.put('\n'.join(self.data), summary='aktualizace seznamu překlepů',
apply_cosmetic_changes=False, botflag=False, minor=False)
super(TypoReportBot, self).teardown()
def main(*args):
options = {}
for arg in pywikibot.handle_args(args):
if arg.startswith('-'):
arg, sep, value = arg.partition(':')
if value != '':
options[arg[1:]] = int(value) if value.isdigit() else value
else:
options[arg[1:]] = True
bot = TypoReportBot(**options)
bot.run()
if __name__ == '__main__':
main()