Ejemplo n.º 1
0
 def __init__(self):
     self.cache = WikiCache(self.base_url)
Ejemplo n.º 2
0
class WikiChecker(object):
    base_url = 'http://bulbapedia.bulbagarden.net/w/api.php?'

    def __init__(self):
        self.cache = WikiCache(self.base_url)

    def check(self):
        errors = []
        checkers = []

        for checker, i in zip(self.checkers(), xrange(9999999)):
            needs_articles = False
            needed_articles = getattr(checker, 'needed_articles', [])
            self.cache.mark_needed_pages(needed_articles)
            for article in needed_articles:
                if not self.cache.is_up_to_date(article):
                    needs_articles = True
            checkers.append((checker, i))
        self.cache.fetch_pages()
        for checker, number in checkers:
            new_errors = list(checker())
            for error in new_errors:
                error.checker_number = number
                print error.str_format()
            errors.extend(new_errors)
        print '%s mismatches found' % len(errors)

        try:
            expected_file = open(os.path.join(self.path, 'expected'))
        except IOError:
            expected = set()
        else:
            expected = set(s.decode('utf-8').strip() for s in
                    expected_file.readlines())

        with open(os.path.join(self.path, 'mismatches'), 'w') as error_file:
            base_url, sep, b = self.base_url.rpartition('api.php?')
            if b:
                base_url = self.base_url
            error_file.write(textwrap.dedent('''
            {{User:En-Cu-Kou/T|head|||

                | site = %s
                | wiki revision = %s
                |

            This report shows:
            * Errors and ommissions in the checking script
            * Errors in the database
            * Errors on the wiki
            It's up to humans to decide which is which.
            }}

            ''' % (base_url, self.cache.wiki.sync_timestamp)))
            ignored = []
            for error in sorted(errors, key=lambda e: (e.sort_key, e.checker_number, e.args)):
                str_formatted = error.str_format()
                if str_formatted.replace('\n', r'\n') in expected:
                    ignored.append(str_formatted)
                else:
                    error_file.write('* ')
                    error_file.write(str_formatted.encode('utf-8'))
                    error_file.write('\n')
            error_file.write('\n')
            error_file.write('{{User:En-Cu-Kou/T|total||| num = %s }}\n' %
                    (len(errors) - len(ignored)))
            if ignored:
                error_file.write('{{User:En-Cu-Kou/T|ignored||| num = %s }}\n' %
                        len(ignored))

        print '%s mismatches written to file' % (len(errors) - len(ignored))
        print '%s expected mismatches ignored' % len(ignored)

    def error(self, message):
        self.errors.append(message)
        print message