Ejemplo n.º 1
0
    def standardizePageFooter(self, text):
        """
        Standardize page footer.

        Makes sure that interwiki links and categories are put
        into the correct position and into the right order. This
        combines the old instances of standardizeInterwiki
        and standardizeCategories.

        The page footer consists of the following parts
        in that sequence:
        1. categories
        2. additional information depending on the local site policy
        3. interwiki
        """
        categories = []
        interwiki_links = []

        # get categories
        if not self.template:
            categories = textlib.getCategoryLinks(text, site=self.site)

        if not self.talkpage:
            subpage = False
            if self.template:
                try:
                    tmpl, loc = moved_links[self.site.code]
                    del tmpl
                except KeyError:
                    loc = None
                if loc is not None and loc in self.title:
                    subpage = True

            # get interwiki
            interwiki_links = textlib.getLanguageLinks(
                text, insite=self.site, template_subpage=subpage)

            # remove interwiki
            text = textlib.removeLanguageLinks(text, site=self.site)

        # add categories, main to top
        if categories:
            # TODO: Sort categories in alphabetic order, e.g. using
            # categories.sort()? (T100265)
            # TODO: Get main categories from Wikidata?
            main = pywikibot.Category(self.site,
                                      'Category:' + self.title,
                                      sort_key=' ')
            if main in categories:
                categories.pop(categories.index(main))
                categories.insert(0, main)
            text = textlib.replaceCategoryLinks(text,
                                                categories,
                                                site=self.site)

        # add interwiki
        if interwiki_links:
            text = textlib.replaceLanguageLinks(text,
                                                interwiki_links,
                                                site=self.site,
                                                template=self.template,
                                                template_subpage=subpage)

        return text
Ejemplo n.º 2
0
def CAT(site, name, hide):
    name = site.namespace(14) + ':' + name
    cat = pywikibot.Category(site, name)
    yield from cat.articles(endsort=hide)
    if hide:
        yield from cat.articles(startFrom=chr(ord(hide) + 1))
Ejemplo n.º 3
0
import mwparserfromhell as mwp
import pywikibot as pwb

pwb.config.put_throttle = 0

site = pwb.Site()

try:
    for page in pwb.Category(site, "Formes de verbes en français").articles():
        page_text = page.text
        if "pron-rimes" not in page_text:
            parsed_text = mwp.parse(page_text)
            templates = parsed_text.filter_templates()
            fr_pron_templates = list(
                filter(lambda t: t.name == "pron" and t.params[1] == "fr",
                       templates))
            if not fr_pron_templates or not fr_pron_templates[0].params:
                continue
            fr_pron = fr_pron_templates[0].params[0]

            lang_sections = parsed_text.get_sections(levels=[2])
            if page_text.count("{{langue|") != len(lang_sections):
                print("Mauvais nombre de sections : " + page.title())
                continue
            fr_section = \
                list(
                    filter(lambda s: s.filter_headings()[0].title.filter_templates()[0] == "{{langue|fr}}",
                           lang_sections))[
                    0]
            if "{{S|prononciation}}" in fr_section:
                level_3_sections = fr_section.get_sections(levels=[3])
Ejemplo n.º 4
0
def encatlist(encat):
    count = 0
    listenpageTitle = []
    encat = encat.replace(u'[[', u'').replace(u']]', u'').replace(
        u'Category:', u'').replace(u'category:', u'').strip()
    language = 'en'
    encat = pywikibot.Category(pywikibot.Site(language), encat)
    listacategory = [encat]
    for enpageTitle in listacategory:
        try:
            fapages = pagefafinder(enpageTitle)
            if fapages is not False:
                for pages, profix_fa in fapages:
                    if profix_fa == '14':
                        pages = u'Category:' + unicode(pages, 'UTF-8')
                    elif profix_fa == '12':
                        pages = u'Help:' + unicode(pages, 'UTF-8')
                    elif profix_fa == '10':
                        pages = u'Template:' + unicode(pages, 'UTF-8')
                    elif profix_fa == '6':
                        pages = u'File:' + unicode(pages, 'UTF-8')
                    elif profix_fa == '4':
                        pages = u'Wikipedia:' + unicode(pages, 'UTF-8')
                    elif profix_fa == '100':
                        pages = u'Portal:' + unicode(pages, 'UTF-8')
                    elif profix_fa in [
                            '1', '2', '3', '5', '7', '8', '9', '11', '13',
                            '15', '101', '103', '118', '119', '446', '447',
                            '828', '829'
                    ]:
                        continue
                    else:
                        pages = unicode(pages, 'UTF-8')
                    pywikibot.output(u'\03{lightgreen}Adding ' + pages +
                                     u' to fapage lists\03{default}')
                    listenpageTitle.append(pages)

        except:

            try:
                enpageTitle = unicode(
                    str(enpageTitle),
                    'UTF-8').split(u'|')[0].split(u']]')[0].replace(
                        u'[[', u'').strip()
            except:
                enpageTitle = enpageTitle.split(u'|')[0].split(
                    u']]')[0].replace(u'[[', u'').strip()
            cat = pywikibot.Category(pywikibot.Site(language), enpageTitle)
            gent = pagegenerators.CategorizedPageGenerator(cat)
            for pagework in gent:
                count += 1
                try:
                    link = str(pagework).split(u'|')[0].split(
                        u']]')[0].replace(u'[[', u'').strip()
                except:
                    pagework = unicode(str(pagework), 'UTF-8')
                    link = pagework.split(u'|')[0].split(u']]')[0].replace(
                        u'[[', u'').strip()
                pywikibot.output(link)
                fapagetitle = englishdictionry(link, en_site, fa_site)
                if fapagetitle is False:
                    continue
                else:
                    pywikibot.output(u'\03{lightgreen}Adding ' + fapagetitle +
                                     u' to fapage lists\03{default}')
                    listenpageTitle.append(fapagetitle)

    if listenpageTitle == []:
        return False, False
    return listenpageTitle, listacategory
Ejemplo n.º 5
0
import pywikibot
from pywikibot import pagegenerators
import regex as re  #use this rather than "re" to avoid the "look-behind requires fixed-width pattern" error

site = pywikibot.Site('zh', 'wikipedia')
cat = pywikibot.Category(site, 'Category:連結格式不正確的條目')
gen = pagegenerators.CategorizedPageGenerator(cat, recurse=True)

ilh = '(?<!\{\{(Advtranslation|Plant\-translation|Translate|Translating|Translation[ _]+WIP|Translation|Trans|Tran|Voltranslation|Wptranslation|正在翻(譯|译)|(翻)?(譯|译)(中)?)[^\}]*)\[\[\:(w|aa|ab|ace|ady|af|ak|als|am|an|ang|ar|arc|arz|as|ast|av|ay|az|azb|ba|bar|bat-smg|bcl|be|be-tarask|be-x-old|bg|bh|bi|bjn|bm|bn|bo|bpy|br|bs|bug|bxr|ca|cbk-zam|cdo|ce|ceb|ch|cho|chr|chy|ckb|co|cr|crh|cs|csb|cu|cv|cy|da|de|diq|dsb|dv|dz|ee|egl|eml|el|en|eo|es|et|eu|ext|fa|ff|fi|fiu-vro|fj|fo|fr|frp|frr|fur|fy|ga|gag|gan|gd|gl|glk|gn|gom|got|gsw|als|gu|gv|ha|hak|haw|he|hi|hif|ho|hr|hsb|ht|hu|hy|hz|ia|id|ie|ig|ii|ik|ilo|io|is|it|iu|ja|jp|jam|jbo|jv|ka|kaa|kab|kbd|kg|ki|kj|kk|kl|km|kn|ko|koi|kr|krc|ks|ksh|ku|kv|kw|ky|la|lad|lb|lbe|lez|lg|li|lij|lmo|ln|lo|lrc|lt|ltg|lv|lzh|zh-classical|mai|map-bms|mdf|mg|mh|mhr|mi|min|mk|ml|mn|mo|mr|mrj|ms|mt|mus|mwl|my|myv|mzn|na|nah|nan|zh-min-nan|nap|nb|no|nds|nds-nl|ne|ne|new|ng|nl|nn|no|nov|nrm|nso|nv|ny|oc|olo|om|or|os|pa|pag|pam|pap|pcd|pdc|pfl|pi|pih|pl|pms|pnb|pnt|ps|pt|qu|rm|rmy|rn|ro|roa-rup|roa-tara|ru|rue|rup|rw|sa|sah|sc|scn|sco|sd|se|sg|sgs|sh|si|simple|sk|sl|sm|sn|so|sq|sr|srn|ss|st|stq|su|sv|sw|szl|ta|tcy|te|tet|tg|th|ti|tk|tl|tn|to|tpi|tr|ts|tt|tum|tw|ty|tyv|udm|ug|uk|ur|uz|ve|vec|vep|vi|vls|vo|vro|wa|war|wo|wuu|xal|xh|xmf|yi|yo|yue|zh-yue|za|zea|zu)\:(?!(wiktionary|wikt|wikinews|n|wikibooks|b|wikiquote|q|wikisource|s|oldwikisource|species|wikispecies|wikiversity|v|betawikiversity|wikimedia|foundation|wmf|wikivoyage|voy|commons|c|meta|metawikipedia|m|strategy|incubator|mediawikiwiki|mw|mediawiki|quality|otrswiki|otrs|ticket|phabricator|bugzilla|mediazilla|phab|nost|testwiki|wikidata|d|outreach|outreachwiki|toollabs|wikitech|dbdump|download|gerrit|mail|mailarchive|rev|spcom|sulutil|svn|tools|tswiki|wm2016|wm2017|wmania|User|Wikipedia|MediaWiki|File|Image|WP|Project|Template|Help|Special|U|利用者)\:)|(?<=\r|\n)(\=){2,}.*\[\[.*\]\].*(\=){2,}|\[\[(JP|JA|EN)\:\:'

ls_t = '\{\{Link style\|time=\d{4}\-\d{2}\-\d{2}T\d{2}\:\d{2}\:\d{2}\+00:00\}\}'

count = 0

tot_num = len(list(cat.articles(namespaces=0, recurse=True)))
print(tot_num)

for page in gen:
    count += 1

    art_txt = page.text
    ilh_num = len(re.findall(ilh, art_txt, re.I))
    if (ilh_num > 0): continue

    new_art_txt = re.sub(ls_t + '\n', '', art_txt, flags=re.I)
    new_art_txt = re.sub(ls_t, '', new_art_txt, flags=re.I)

    page.text = new_art_txt
    percentage = 100 * count / tot_num

    print(format(percentage, '0.3f'), '%:', page.title(), 'has', ilh_num,
Ejemplo n.º 6
0
        match = CSD_TEMPLATE.search(self.text)
        return match.group(1) if match else None

    def get_csd_rev(self):
        csd_revs = (rev for rev in self.page_object.revisions()
                    if CSD_SUMMARY.search(rev.comment))
        try:
            return next(csd_revs)
        except StopIteration:
            return None


site = pywikibot.Site()
rules = AlexNewArtBotResult(rulesName)

cat = pywikibot.Category(site, catName)

# Find scores for each article in the category
articles = [Article(page) for page in cat.articles(namespaces=(0))]
articles.sort(key=lambda a: a.score, reverse=True)
articles = articles[:NUM_ARTICLES]

# Upload to the wiki
content = ""
content += "== CSD alerts =="
now = datetime.datetime.utcnow()
for each_article in articles:
    csd_rev = each_article.get_csd_rev()
    if csd_rev:
        deletion_delta = now - csd_rev.timestamp
        age_in_hours = float(deletion_delta.total_seconds()) / 3600
Ejemplo n.º 7
0
config_page = pywikibot.Page(site, config_page_name)
cfg = config_page.text
cfg = json.loads(cfg)
print(json.dumps(cfg, indent=4, ensure_ascii=False))

if not cfg['enable']:
    exit('disabled\n')

reported_pages = set()
for page in pywikibot.Page(site, cfg['np_page']).linkedPages(namespaces=[0]):
    reported_pages.add(page.title())
# print(reported_pages)

afd_pages = set()
for page in pywikibot.Category(site,
                               cfg['afd_category']).members(namespaces=[0]):
    afd_pages.add(page.title())
# print(afd_pages)

othertext = ''
text_dict = {}
for cate in pywikibot.Category(site, cfg['np_category']).members():
    if cate.namespace().id != 14:
        othertext += '# [[:{}]]\n'.format(cate.title())
        continue
    m = re.search(r'^Category:自(\d+)年(\d+)月主題關注度不足的條目$', cate.title())
    key = None
    if m:
        key = int(m.group(1)) * 100 + int(m.group(2))
        text_dict[key] = '=== [[:{}]] ===\n'.format(cate.title())
    for page in cate.members():
Ejemplo n.º 8
0
def importPadsCheckbox():
    category = pywikibot.Category(site, catnameimport)
    gen = pagegenerators.CategorizedPageGenerator(category)
    print('Reading [[Category:%s]]' % (catnameimport))
    #pages = [pywikibot.Page(site, 'Usuari:TeixidoraBot/Sandbox3')]
    #for page in pages:
    for page in gen:
        print('\n== %s ==\n' % (page.title()))
        #get pads to import
        padnotes = page.text.split('{{Esdeveniment pr/padnotes')[1:]
        padnumber = 0
        pads = []
        for padnote in padnotes:
            padnote = padnote.split('}}')[0]
            try:
                padnumber += 1
                padurl = re.findall(r'(?im)notes pad url\s*=\s*(https?://[^\s\|]+)', padnote)[0]
                importornot = re.findall(r'(?im)bot import\s*=\s*Si', padnote) and True or False
                pads.append([padnumber, padurl, importornot])
            except:
                pass
        print('Pads found:\n%s' % (pads))
        contentall = ''
        padurls = []
        for padnumber, padurl, importornot in pads:
            print('Padurl: %s' % (padurl))
            if importornot:
                #get pad content
                padurls.append(padurl)
                content = getPadContent(url=padurl)
                content = removeNoImport(content=content)
                contentall += '\n' + content
                #create redirect from apunts to apunts/01
                """if padnumber == 1:
                    redapuntstitle = '%s/apunts' % (page.title())
                    redapuntspage = pywikibot.Page(site, redapuntstitle)
                    if not redapuntspage.exists():
                        redapuntspage.text = '#REDIRECT [[%s/apunts/01]]' % (page.title())
                        redapuntspage.save('BOT - Redirect')"""
                #save pad and params
                apuntstitle = '%s/apunts/%02d' % (page.title(), padnumber)
                apuntspage = pywikibot.Page(site, apuntstitle)
                if not apuntspage.exists() or \
                    (apuntspage.exists() and len(apuntspage.text) < 5) or \
                    '/Sandbox' in apuntstitle or \
                    getPageHistoryAuthors(page=apuntspage) == [botname]:
                    #import labels into template parameters and remove them from content
                    content = importLabels(padurl=padurl, page=page, content=content)
                    #import pad into apunts
                    importPad(padurl=padurl, content=content, apuntspage=apuntspage)
                    #remove checkbox
                    switchCheckbox(oldvalue='Si', newvalue='Fet', page=page, apuntspage=apuntspage)
                    print('Imported correctly')
                else:
                    log(log='[[%s|Apunts page]] exists, skiping. [%s Pad] not imported.' % (apuntstitle, padurl))
                    switchCheckbox(oldvalue='Si', newvalue='Fet', page=page, apuntspage=apuntspage)
            else:
                print('Not set to import, skiping...')
        #import keywords
        if padurls:
            importKeywords(padurls=padurls, page=page, content=contentall)
Ejemplo n.º 9
0
 def addCategory(self, catName):
     cat = pywikibot.Category(self.site, "Catégorie:" + catName)
     for page in cat.articles(recurse=True):
         if page.userName() != 'CoalémosBot':
             self.pages.append(page)
Ejemplo n.º 10
0
def main():
    # page generator
    gen = None
    # This temporary array is used to read the page title if one single
    # page to work on is specified by the arguments.
    pageTitle = []
    # Which namespaces should be processed?
    # default to [] which means all namespaces will be processed
    namespaces = []
    # Never ask before changing a page
    always = False
    # No verbose output
    verbose = True

    # Process global args and prepare generator args parser
    local_args = pywikibot.handleArgs()
    genFactory = pagegenerators.GeneratorFactory()

    for arg in local_args:
        if arg.startswith('-xml'):
            if len(arg) == 4:
                xmlFilename = i18n.input('pywikibot-enter-xml-filename')
            else:
                xmlFilename = arg[5:]
            gen = XmlDumpNoReferencesPageGenerator(xmlFilename)
        elif arg.startswith('-namespace:'):
            try:
                namespaces.append(int(arg[11:]))
            except ValueError:
                namespaces.append(arg[11:])
        elif arg == '-always':
            always = True
        elif arg == '-quiet':
            verbose = False
        else:
            if not genFactory.handleArg(arg):
                pageTitle.append(arg)

    if pageTitle:
        page = pywikibot.Page(pywikibot.Site(), ' '.join(pageTitle))
        gen = iter([page])
    if not gen:
        gen = genFactory.getCombinedGenerator()
    if not gen:
        site = pywikibot.Site()
        try:
            cat = maintenance_category[site.family.name][site.lang]
        except:
            pass
        else:
            if not namespaces:
                namespaces = [0]
            cat = pywikibot.Category(
                site, "%s:%s" % (site.category_namespace(), cat))
            gen = pagegenerators.CategorizedPageGenerator(cat)
    if not gen:
        pywikibot.showHelp('noreferences')
    else:
        if namespaces:
            gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
        preloadingGen = pagegenerators.PreloadingGenerator(gen)
        bot = NoReferencesBot(preloadingGen, always, verbose)
        bot.run()
Ejemplo n.º 11
0
 def __init__(self, **kwargs):
     """Initialzer."""
     super(InformationBot, self).__init__(**kwargs)
     lang_tmp_cat = pywikibot.Category(self.site, self.lang_tmp_cat)
     self.lang_tmps = lang_tmp_cat.articles(namespaces=[10])
Ejemplo n.º 12
0
def add_dates(site, category_name, template_names):
    print('Розчищаємо', category_name)
    cat = pywikibot.Category(site, 'Категорія:' + category_name)

    for page in pagegenerators.PreloadingGenerator(cat.articles(), 10):
        fix_page(site, page)
Ejemplo n.º 13
0
 def newCategoryGenerator(bot, category):
     cat = pywikibot.Category(bot.site, category)
     return pagegenerators.CategorizedPageGenerator(cat)
Ejemplo n.º 14
0
 def test_init(self):
     """Test the category's __init__ for one condition that can't be dry."""
     site = self.get_site()
     with self.assertRaisesRegex(ValueError, self.NOCATEGORYNAMESPACE_RE):
         pywikibot.Category(site, 'Wikipedia:Test')
Ejemplo n.º 15
0
def do_cat(cat):
    cat = pywikibot.Category(site, cat)
    gen = cat.articles(namespaces=[0], content=True)
    for page in gen:
        do_page(page, cat)
Ejemplo n.º 16
0
def main():
    config.use_mwparserfromhell = False
    locale.setlocale(locale.LC_ALL, 'fr_FR.utf-8')

    db = False

    global test
    global dry

    dry = False  # À activer seulement pour les tests
    test = False  # À activer seulement pour tester le script sur une seule page
    ns_test = False
    recurse_test = False
    for arg in pywikibot.handleArgs():
        if arg == "-dry":
            dry = True
            pywikibot.output(u'(dry is ON)')

        elif arg[0:6] == "-test:":
            test = True
            titre_page_test = arg[6:]

        elif arg[0:4] == "-ns:":
            ns_test = True
            namespaces_test_value = [int(i) for i in arg[4:].split(',')]

        elif arg[0:9] == "-recurse:":
            recurse_test = True
            recurse_test_value = bool(arg[9:])

    comment_modele = u"%(nombre_articles)i articles) (Bot: Mise à jour de la liste des articles récents (%(precision_pages)s)"
    site = pywikibot.Site()
    titre_modele = u"Articles récents"
    modele = pywikibot.Page(site, titre_modele, ns=10)
    gen = pagegenerators.ReferringPageGenerator(modele,
                                                onlyTemplateInclusion=True)

    matchDebut1 = u"<!-- Ce tableau est créé automatiquement par un robot. Articles Récents DEBUT -->"
    matchFin1 = u"\n<!-- Ce tableau est créé automatiquement par un robot. Articles Récents FIN -->"

    matchDebut2 = u"<!-- Ce tableau est créé automatiquement par un robot. Articles Récents Liste DEBUT -->"
    matchFin2 = u"\n<!-- Ce tableau est créé automatiquement par un robot. Articles Récents Liste FIN -->"

    if test:
        pywikibot.output(u'(test is ON)')
        gen = [pywikibot.Page(site, titre_page_test)]

    if ns_test:
        pywikibot.output(u'(ns_test is ON)')

    for main_page in gen:
        try:
            comment = comment_modele
            pywikibot.output(
                u"\n========================\nTraitement de %s\n========================"
                % main_page.title())
            text = main_page.get()

            #####################
            ### Récupération des informations sur la page
            #####################
            templates = textlib.extract_templates_and_params_regex(text)
            template_in_use = None
            for tuple in templates:
                if tuple[0] != u'Articles récents':
                    continue
                else:
                    template_in_use = tuple[1]
                    break

            if not template_in_use:
                pywikibot.output(
                    u"Aucun modèle {{Articles récents}} détecté sur la page %s"
                    % main_page.title())
                continue

            titre_categorie = check_and_return_parameter(
                template_in_use, u'catégorie')
            if not titre_categorie:
                continue
            cat = pywikibot.Category(site, titre_categorie)

            nbMax = check_and_return_parameter(template_in_use, 'nbMax', 10)
            try:
                nbMax = int(nbMax)
            except:
                pywikibot.output(u'Erreur : nbMax incorrect')
                continue

            namespaces = check_and_return_parameter(template_in_use,
                                                    'namespaces', '0')
            namespaces = namespaces.split(',')
            try:
                namespaces = [int(k) for k in namespaces]
            except:
                pywikibot.output(
                    u'Erreur : des namespaces spécifiés ne sont pas des entiers'
                )
                continue

            recurse = check_and_return_parameter(template_in_use, 'recurse',
                                                 '0')
            if recurse.lower().strip() in ('oui', '1'):
                recurse = True
            else:
                recurse = False

            delai_creation = check_and_return_parameter(
                template_in_use, 'delai', '0')
            try:
                delai_creation = int(delai_creation)
            except:
                pywikibot.output(u'Erreur : delai incorrect')
                continue

            format_date = check_and_return_parameter(template_in_use,
                                                     u'date') or None
            if format_date:
                try:
                    test_date = datetime.datetime.now()
                    test_date.strftime(format_date)
                except:
                    format_date = None
                    pywikibot.output(u'Erreur : format de date incorrect')

            puce = check_and_return_parameter(template_in_use, 'puces', '#')

            listeRecents = text[(text.index(matchDebut1) +
                                 len(matchDebut1)):text.index(matchFin1)]

            # Permet d'enlever le premier élément (vide) de la liste
            listeRecents = listeRecents.split('\n%s ' % puce)[1:]

            listeRecents_old = [page for page in listeRecents]
            listeRecents = list()
            dico_dates_presentes = {}

            for recent in listeRecents_old:
                r = re.search(u"(\[\[.*\]\]) ?(\(.+\))?", recent)
                if r:
                    listeRecents.append(r.group(1))
                    if r.group(2):
                        dico_dates_presentes[r.group(1)] = r.group(2)[1:-1]
                else:
                    pass

            text = re.sub(
                re.compile(u"%s.*%s" % (matchDebut2, matchFin2), re.S),
                u"%s%s" % (matchDebut2, matchFin2), text)
            #####################

            # Au cas où il n'y aurait aucune nouvelle page mais
            # une ou des pages ayant été supprimée(s)
            exception_maj = False

            # Pour préciser le résumé d'édition
            precisions_comment = u""

            pywikibot.output('stade 0')
            #####################
            ### Vérification des pages récentes actuelles (en cas de suppression)
            #####################
            for titre_article in listeRecents:
                try:
                    page = pywikibot.Page(
                        site, re.sub(u"\[\[(.*)\]\]", "\\1", titre_article
                                     ))  # Pour enlever les crochets : [[…]].
                    # Si la page existe toujours et n'est pas une
                    # redirection, on la laisse dans la liste…
                    page.get()

                    if format_date and not dico_dates_presentes.has_key(
                            titre_article) and find_date(page, cat):
                        # Date trouvée alors qu'elle n'y était pas.
                        exception_maj = True
                        dico_dates_presentes[titre_article] = find_date(
                            page, cat).strftime(format_date)

                except pywikibot.NoPage:
                    pywikibot.output(u"La page %s n'existe plus." %
                                     page.title(asLink=True))

                    pywikibot.output(
                        u"Suppression de la page %s de la liste listeRecents" %
                        page.title(asLink=True))
                    precisions_comment += (u"; - %s" % titre_article)
                    listeRecents.remove(titre_article)

                    # On force la mise à jour de la page, même si aucun nouvel article
                    # récent n'est trouvé.
                    exception_maj = True
                except pywikibot.IsRedirectPage:
                    pywikibot.output(
                        u"La page %s n'est plus qu'une redirection." %
                        page.title(asLink=True))

                    try:
                        nouvelle_page = page.getRedirectTarget()
                        pywikibot.output(
                            u"Modification du titre la page %s (renommée en %s)"
                            % (page.title(asLink=True),
                               nouvelle_page.title(asLink=True,
                                                   withSection=False)))
                        precisions_comment += (
                            u"; - %s ; + %s" %
                            (titre_article,
                             nouvelle_page.title(asLink=True,
                                                 withSection=False)))

                        if not nouvelle_page.title(
                                asLink=True,
                                withSection=False) in listeRecents:
                            listeRecents[listeRecents.index(
                                titre_article)] = nouvelle_page.title(
                                    asLink=True, withSection=False)
                        else:
                            pywikibot.output(
                                u"La page destination était déjà présente dans la liste"
                            )
                            listeRecents.pop(listeRecents.index(titre_article))

                        # On force la mise à jour de la page, même si aucun nouvel article
                        # récent n'est trouvé.
                        exception_maj = True

                    except:
                        pywikibot.output(
                            u"an error occured (CircularRedirect?)")
                #except KeyboardInterrupt:
                #	pywikibot.stopme()
                except:
                    try:
                        pywikibot.output(
                            u"Erreur inconnue lors du traitement de la page %s"
                            % page.title(asLink=True))
                    except:
                        pywikibot.output(
                            u"Erreur inconnue lors du traitement d'une page")
                else:
                    # Si pas d'erreur : on passe à la page suivante
                    continue

            if precisions_comment:
                precisions_comment = precisions_comment[
                    2:]  # Pour supprimer le '; '
            #####################

            #####################
            ### Recherches des articles nouveaux
            #####################
            precisions_comment2 = u""

            # Récupération de la dernière mise à jour de la page par le bot
            db = _mysql.connect(
                host='tools-db',
                db='s51245__totoazero',
                read_default_file="/data/project/totoazero/replica.my.cnf")
            results = db.query(
                'SELECT last FROM maj_articles_recents WHERE page="%s"' %
                main_page.title().replace('"', '\\"').encode('utf-8'))
            results = db.store_result()
            result = results.fetch_row(maxrows=0)
            pywikibot.output(("last check was " + str(result)))
            if result:
                first_passage = False
                t = result[0][0]
                timestamp = pywikibot.Timestamp.strptime(
                    t, "%Y-%m-%d %H:%M:%S")

                # Permet de ne générer que la liste des articles ajoutés à la
                # catégorie après la dernière modification de la page
                # contenant le modèle {{Articles récents}}.
                #list_new.extend([page for page in site.categorymembers(cat, starttime=timestamp, sortby='timestamp', namespaces=[0])])
                list_new = [
                    page for page in cat.articles(starttime=timestamp,
                                                  sortby='timestamp',
                                                  namespaces=namespaces,
                                                  recurse=recurse)
                ]
                list_new.reverse()

            else:  # nouvelle page, premier passage du bot
                first_passage = True

                timestamp = main_page.editTime()
                if delai_creation > 0:
                    timestamp -= datetime.timedelta(hours=delai_creation)

                # Génération de la première liste, pour éviter si possible de
                # laisser la page vide.
                list_new = [page for page in cat.newest_pages(total=nbMax)]

                # TODO : mieux ?
                #list_new = [page for page in cat.articles(sortby='timestamp', namespaces=namespaces, recurse=recurse)]

            pywikibot.output('stade 2')
            now = datetime.datetime.now()

            # NB : exception_maj peut être passer à True si un article
            # a été supprimé de la catégorie.
            if len(list_new) == 0 and not exception_maj:
                # Inutile d'aller plus loin s'il n'y a aucun nouvel article.
                end_page(main_page, now, first_passage)
                continue

            # Liste des pages pour requête SQL sur base frwiki_p
            list_new_str = '("'
            list_new_str += '", "'.join([
                page.title(asLink=False, underscore=True).replace('"', '\\"')
                for page in list_new
            ])
            list_new_str += '")'
            pywikibot.output(list_new_str)

            # Fonctionne uniquement avec les pages du ns 0 pour le moment
            frwiki_p = _mysql.connect(
                host='frwiki.analytics.db.svc.eqiad.wmflabs',
                db='frwiki_p',
                read_default_file="/data/project/totoazero/replica.my.cnf")
            pywikibot.output(
                'SELECT page_title, page_id FROM page where page_title IN %s AND page_namespace=0'
                % list_new_str.encode('utf-8'))
            results = frwiki_p.query(
                'SELECT page_title, page_id FROM page where page_title IN %s AND page_namespace=0'
                % list_new_str.encode('utf-8'))
            results = frwiki_p.store_result()
            result = results.fetch_row(maxrows=0)
            pywikibot.output(result)

            dico_result = {}
            for tuple in result:
                title = tuple[0]
                id = tuple[1]
                dico_result[title] = id
            pywikibot.output(dico_result)

            dico_timestamp = {}

            pywikibot.output('stade 3')
            frwiki_p = _mysql.connect(
                host='frwiki.analytics.db.svc.eqiad.wmflabs',
                db='frwiki_p',
                read_default_file="/data/project/totoazero/replica.my.cnf")
            for key in dico_result:
                id = dico_result[key]

                pywikibot.output(
                    'SELECT cl_from, cl_timestamp FROM categorylinks WHERE cl_from = %s AND cl_to = "%s"'
                    % (id.encode('utf-8'),
                       cat.title(asLink=False,
                                 underscore=True,
                                 withNamespace=False).encode('utf-8')))
                results = frwiki_p.query(
                    'SELECT cl_from, cl_timestamp FROM categorylinks WHERE cl_from = %s AND cl_to = "%s"'
                    % (id.encode('utf-8'),
                       cat.title(asLink=False,
                                 underscore=True,
                                 withNamespace=False).encode('utf-8')))
                results = frwiki_p.store_result()
                result = results.fetch_row(maxrows=0)
                if result:
                    dico_timestamp[key.decode(
                        'utf-8')] = pywikibot.Timestamp.strptime(
                            result[0][1], "%Y-%m-%d %H:%M:%S")
                else:
                    pywikibot.output(u"pas de date trouvée pour %s" %
                                     key.decode('utf-8'))

            pywikibot.output(dico_timestamp)

            # Permet de mettre les nouvelles pages comme des titres :
            # nécessaires plus loin !
            list_new = [page.title(asLink=True) for page in list_new]

            # Permet de récupérer des infos sur la catégorie.
            # NB : Si ralentit le script, l'item cat_info['pages']
            #      correspondant au nombre de pages contenues
            #      dans la catégorie doit pouvoir être remplacé
            #      par len(listeCategorie) + len(list_new).
            cat_info = site.categoryinfo(cat)
            pywikibot.output(cat_info)

            pywikibot.output('stade 4')
            list_new_old = list()
            list_new_old.extend(list_new)

            pywikibot.output('delai_creation is %s' % delai_creation)
            #for titre_page in list_new_old:
            #	print titre_page

            for titre_page in list_new_old:
                # NB : titre_page est du type [[Nom de la page]]
                pywikibot.output("----------")
                pywikibot.output(u"Page récemment ajoutée : %s" % titre_page)
                if not titre_page in listeRecents:
                    if delai_creation:
                        # Délai imposé (en heures) depuis la création de l'article,
                        # au-delà duquel l'article récemment ajouté à la catégorie
                        # ne doit pas figurer dans la liste.
                        # Exemple :  delai_creation = 24
                        # 	 => le bot liste uniquement les articles créés il y
                        # 		a moins de 24h.
                        page = pywikibot.Page(site, titre_page[2:-2])

                        # NB : date_creation et date_plus_petite_requise
                        #      sont du type pywikibot.Timestamp
                        date_creation = page.getVersionHistory()[-1][1]
                        pywikibot.output(date_creation)

                        if delai_creation > 0:
                            date_plus_petite_requise = pywikibot.Timestamp.now(
                            ) - datetime.timedelta(hours=delai_creation)
                        elif delai_creation == -1:
                            # 'timestamp' a été défini plus haut comme étant la date de dernière
                            # édition du bot sur la page.
                            date_plus_petite_requise = timestamp

                        pywikibot.output(date_plus_petite_requise)

                        if date_plus_petite_requise > date_creation:
                            pywikibot.output(u"Vérification du délai : Non")
                            pywikibot.output(
                                u"La page ne satisfait pas le délai depuis la création imposé."
                            )
                            list_new.remove(titre_page)
                            continue
                        else:
                            pywikibot.output(u"Vérification du délai : OK")

                    precisions_comment2 += (u"; + %s" % titre_page)
                else:
                    # Si l'article se trouve déjà dans la liste listeRecents
                    # il est inutile de le rajouter à nouveau.
                    list_new.remove(titre_page)

                    pywikibot.output(
                        u"L'article était déjà présent sur la page.")

                    # Re-vérification pour voir si list_new contient toujours
                    # au moins une page.
                    if len(list_new) == 0 and not exception_maj:
                        # Inutile d'aller plus loin s'il n'y a aucun nouvel article.
                        pywikibot.output('Nothing left.')
                        continue

            # Re-vérification pour voir si list_new contient toujours
            # au moins une page.
            if len(list_new) == 0 and not exception_maj:
                # Inutile d'aller plus loin s'il n'y a aucun nouvel article.
                end_page(main_page, now, first_passage)
                continue

            if precisions_comment:  # Si precisions_comment contient déjà des infos (suppression de pages)
                precisions_comment += precisions_comment2
            else:
                precisions_comment = precisions_comment2[
                    2:]  # Pour supprimer le '; '

            pywikibot.output('stade 5')

            # Pour compléter le résumé d'édition
            comment = comment % {
                'nombre_articles': cat_info['pages'],
                'precision_pages': precisions_comment
            }

            #####################
            ### Création de la liste des articles récents
            #####################
            liste_nouveaux_recents = list()
            liste_nouveaux_recents.extend(list_new)
            # Si le nombre d'articles nouveaux est strictement au nombre maximum
            # d'articles récents qui doivent figurer.
            if len(liste_nouveaux_recents) < nbMax:
                i = 0
                while len(liste_nouveaux_recents) != nbMax:
                    if len(listeRecents) < i + 1:
                        # Dans le cas où la liste listeRecents ne contiendrait pas
                        # assez d'éléments.
                        break
                    liste_nouveaux_recents.append(listeRecents[i])
                    i += 1
                    if i == len(listeRecents
                                ):  # Pourrait provoquer une erreur de longueur
                        break
            elif len(liste_nouveaux_recents) > nbMax:
                liste_nouveaux_recents = liste_nouveaux_recents[0:(nbMax - 1)]

            # La liste liste_nouveaux_recents contient désormais
            # nbMax articles récents exactement

            pywikibot.output('stade 6')
            liste_nouveaux_recents_string = u"<!-- Ce tableau est créé automatiquement par un robot. Articles Récents DEBUT -->"
            for titre_article in liste_nouveaux_recents:
                liste_nouveaux_recents_string += u'\n%s %s' % (puce,
                                                               titre_article)
                if format_date and dico_timestamp.has_key(
                        titre_article[2:-2].replace(' ', '_')):
                    pywikibot.output('stade 6-1')
                    pywikibot.output(
                        dico_timestamp[titre_article[2:-2].replace(
                            ' ', '_')].strftime(format_date))
                    try:
                        liste_nouveaux_recents_string += (
                            ' (' + dico_timestamp[titre_article[2:-2].replace(
                                ' ',
                                '_')].strftime(format_date).decode('utf-8') +
                            ')')
                    except:
                        try:
                            liste_nouveaux_recents_string += (
                                ' (' +
                                dico_timestamp[titre_article[2:-2].replace(
                                    ' ', '_')].strftime(format_date) + ')')
                        except:
                            raise "erreur au stade 6-1"

                elif dico_dates_presentes.has_key(titre_article):
                    pywikibot.output('stade 6-2')
                    pywikibot.output(dico_dates_presentes[titre_article])
                    try:
                        liste_nouveaux_recents_string += (
                            ' (' + dico_dates_presentes[titre_article] + ')')
                    except:  # UnicodeEncodeError:
                        try:
                            liste_nouveaux_recents_string += (
                                ' (' +
                                dico_dates_presentes[titre_article].decode(
                                    'utf-8') + ')')
                        except:
                            raise "erreur au stade 6-2"

            liste_nouveaux_recents_string += u"\n<!-- Ce tableau est créé automatiquement par un robot. Articles Récents FIN -->"
            #####################

            #####################
            ### Mise à jour du contenu de la page
            #####################
            new_text = text

            pywikibot.output('stade 7')
            # Mise à jour de la liste des articles récents (listeRecents)
            new_text = re.sub(
                re.compile(u'%s.*%s' % (matchDebut1, matchFin1), re.S),
                liste_nouveaux_recents_string, new_text)

            pywikibot.output(new_text)

            pywikibot.output(u'Commentaire: %s' % comment)

            if not dry:
                main_page.put(new_text, comment=comment)
                end_page(main_page, now, first_passage)
            else:
                pywikibot.showDiff(main_page.get(), new_text)
            #####################
        except Exception, myexception:
            pywikibot.output("Erreur lors du traitement de la page %s" %
                             main_page.title(asLink=True))
            _errorhandler.handle(
                myexception,
                level='warning',
                addtags={'page': main_page.title(asLink=True)})
Ejemplo n.º 17
0
    def run(self):
        """Run the bot."""
        # validate L10N
        self.template_list = self.site.category_redirects()
        if not self.template_list:
            pywikibot.warning('No redirect templates defined for {}'.format(
                self.site))
            return
        if not self.get_cat():
            pywikibot.warning('No redirect category found for {}'.format(
                self.site))
            return

        self.user = self.site.user()  # invokes login()
        self.newredirs = []

        localtime = time.localtime()
        today = '{:04d}-{:02d}-{:02d}'.format(*localtime[:3])
        self.datafile = pywikibot.config.datafilepath(
            '{}-catmovebot-data'.format(self.site.dbName()))
        try:
            with open(self.datafile, 'rb') as inp:
                self.record = pickle.load(inp)
        except IOError:
            self.record = {}
        if self.record:
            with open(self.datafile + '.bak', 'wb') as f:
                pickle.dump(self.record, f, protocol=config.pickle_protocol)
        # regex to match soft category redirects
        # TODO: enhance and use textlib.MultiTemplateMatchBuilder
        # note that any templates containing optional "category:" are
        # incorrect and will be fixed by the bot
        template_regex = re.compile(
            r"""{{\s*(?:%(prefix)s\s*:\s*)?  # optional "template:"
                     (?:%(template)s)\s*\|   # catredir template name
                     (\s*%(catns)s\s*:\s*)?  # optional "category:"
                     ([^|}]+)                # redirect target cat
                     (?:\|[^|}]*)*}}         # optional arguments 2+, ignored
             """ % {
                'prefix':
                self.site.namespace(10).lower(),
                'template':
                '|'.join(
                    item.replace(' ', '[ _]+') for item in self.template_list),
                'catns':
                self.site.namespace(14)
            }, re.I | re.X)

        self.check_hard_redirect()

        comment = i18n.twtranslate(self.site, self.move_comment)
        counts = {}
        nonemptypages = []
        redircat = self.cat

        pywikibot.output('\nChecking {} category redirect pages'.format(
            redircat.categoryinfo['subcats']))
        catpages = set()
        for cat in redircat.subcategories():
            catpages.add(cat)
            cat_title = cat.title(with_ns=False)
            if 'category redirect' in cat_title:
                message = i18n.twtranslate(
                    self.site, 'category_redirect-log-ignoring',
                    {'oldcat': cat.title(as_link=True, textlink=True)})
                self.log_text.append(message)
                continue
            if hasattr(cat, '_catinfo'):
                # skip empty categories that don't return a "categoryinfo" key
                catdata = cat.categoryinfo
                if 'size' in catdata and int(catdata['size']):
                    # save those categories that have contents
                    nonemptypages.append(cat)
            if cat_title not in self.record:
                # make sure every redirect has a self.record entry
                self.record[cat_title] = {today: None}
                with suppress(Error):
                    self.newredirs.append('*# {} → {}'.format(
                        cat.title(as_link=True, textlink=True),
                        cat.getCategoryRedirectTarget().title(as_link=True,
                                                              textlink=True)))
                # do a null edit on cat
                with suppress(Exception):
                    cat.save()

        # delete self.record entries for non-existent categories
        for cat_name in list(self.record):
            if pywikibot.Category(self.site,
                                  self.catprefix + cat_name) not in catpages:
                del self.record[cat_name]

        pywikibot.output(
            '\nMoving pages out of {} redirected categories.'.format(
                len(nonemptypages)))

        for cat in pagegenerators.PreloadingGenerator(nonemptypages):
            try:
                if not cat.isCategoryRedirect():
                    message = i18n.twtranslate(
                        self.site, 'category_redirect-log-false-positive',
                        {'oldcat': cat.title(as_link=True, textlink=True)})
                    self.log_text.append(message)
                    continue
            except Error:
                message = i18n.twtranslate(
                    self.site, 'category_redirect-log-not-loaded',
                    {'oldcat': cat.title(as_link=True, textlink=True)})
                self.log_text.append(message)
                continue
            cat_title = cat.title(with_ns=False)
            if not self.readyToEdit(cat):
                counts[cat_title] = None
                message = i18n.twtranslate(
                    self.site, 'category_redirect-log-skipping',
                    {'oldcat': cat.title(as_link=True, textlink=True)})
                self.log_text.append(message)
                continue
            dest = cat.getCategoryRedirectTarget()
            if not dest.exists():
                message = i18n.twtranslate(
                    self.site, 'category_redirect-problem-redirects', {
                        'oldcat': cat.title(as_link=True, textlink=True),
                        'redpage': dest.title(as_link=True, textlink=True)
                    })
                self.problems.append(message)
                # do a null edit on cat to update any special redirect
                # categories this wiki might maintain
                with suppress(Exception):
                    cat.save()
                continue
            if dest.isCategoryRedirect():
                double = dest.getCategoryRedirectTarget()
                if double in (dest, cat):
                    message = i18n.twtranslate(
                        self.site, 'category_redirect-log-loop',
                        {'oldcat': dest.title(as_link=True, textlink=True)})
                    self.log_text.append(message)
                    # do a null edit on cat
                    with suppress(Exception):
                        cat.save()
                else:
                    message = i18n.twtranslate(
                        self.site, 'category_redirect-log-double', {
                            'oldcat': cat.title(as_link=True, textlink=True),
                            'newcat': dest.title(as_link=True, textlink=True),
                            'targetcat': double.title(as_link=True,
                                                      textlink=True)
                        })
                    self.log_text.append(message)
                    oldtext = cat.text
                    # remove the old redirect from the old text,
                    # leaving behind any non-redirect text
                    oldtext = template_regex.sub('', oldtext)
                    newtext = ('{{%(redirtemp)s|%(ncat)s}}' % {
                        'redirtemp': self.template_list[0],
                        'ncat': double.title(with_ns=False)
                    })
                    newtext = newtext + oldtext.strip()
                    try:
                        cat.text = newtext
                        cat.save(
                            i18n.twtranslate(self.site,
                                             self.dbl_redir_comment))
                    except Error as e:
                        message = i18n.twtranslate(
                            self.site, 'category_redirect-log-failed',
                            {'error': e})
                        self.log_text.append(message)
                continue

            found, moved = self.move_contents(cat_title,
                                              dest.title(with_ns=False),
                                              editSummary=comment)
            if found is None:
                message = i18n.twtranslate(
                    self.site, 'category_redirect-log-move-error',
                    {'oldcat': cat.title(as_link=True, textlink=True)})
                self.log_text.append(message)
            elif found:
                self.record[cat_title][today] = found
                message = i18n.twtranslate(
                    self.site, 'category_redirect-log-moved', {
                        'oldcat': cat.title(as_link=True, textlink=True),
                        'found': found,
                        'moved': moved
                    })
                self.log_text.append(message)
            counts[cat_title] = found
            # do a null edit on cat
            with suppress(Exception):
                cat.save()

        self.teardown()
Ejemplo n.º 18
0
import pywikibot
from pywikibot import pagegenerators
siteC = pywikibot.Site(u'commons', u'commons')
siteC.login()

category = pywikibot.Category(siteC, u'Images from Wiki Loves Africa 2021')
gen = pagegenerators.CategorizedPageGenerator(category)
cmp_sound = 0
cmp_video = 0

for file in gen:

    title = file.title()
    text = file.text
    print('Analyze: ' + file.title())

    if title[-4:] == '.webm' or title[-4:] == '.ogv':
        if '[[Category:Videos from Wiki Loves Africa 2021]]' not in text:
            newtext = text + '\r\n' + '[[Category:Videos from Wiki Loves Africa 2021]]'
            file.text = newtext
            file.save(u"Add category video WLA")
            print(file.title() + ' - WLA ***************************')
            cmp_sound = cmp_sound + 1
    elif title[-4:] == '.ogg' or title[-4:] == '.wav':
        if '[[Category:Audio from Wiki Loves Africa 2021]]' not in text:
            newtext = text + '\r\n' + '[[Category:Audio from Wiki Loves Africa 2021]]'
            file.text = newtext
            file.save(u"Add category sound WLA")
            print(file.title() + ' - WLA ***************************')
            cmp_video = cmp_video + 1
Ejemplo n.º 19
0
            checkbroken = False
        elif arg.startswith('-keepparent'):
            removeparent = False
        elif arg.startswith('-all'):
            main = False
        elif not workingcatname:
            workingcatname = arg

    if not workingcatname:
        pywikibot.bot.suggest_help(missing_parameters=['working category'])
        sys.exit(0)

    mysite = pywikibot.Site()
    summary = i18n.twtranslate(mysite, 'makecat-create',
                               {'cat': workingcatname})
    workingcat = pywikibot.Category(
        mysite, u'%s:%s' % (mysite.namespaces.CATEGORY, workingcatname))
    filename = pywikibot.config.datafilepath(
        'category',
        workingcatname.encode('ascii', 'xmlcharrefreplace') + '_exclude.txt')
    try:
        f = codecs.open(filename, 'r', encoding=mysite.encoding())
        for line in f.readlines():
            # remove trailing newlines and carriage returns
            try:
                while line[-1] in ['\n', '\r']:
                    line = line[:-1]
            except IndexError:
                pass
            pl = pywikibot.Page(mysite, line)
            checked[pl] = pl
        f.close()
Ejemplo n.º 20
0
Author: Fae, http://j.mp/faewm
Permissions: CC-BY-SA-4.0
'''

import pywikibot, sys, re, string, time
from sys import argv
from time import sleep
from colorama import Fore, Back, Style, init
init()

site = pywikibot.getSite('commons', 'commons')

print Fore.GREEN + NOTICE, Fore.WHITE

catname = u"Category:Images from the Canadian Copyright Collection at the British Library"
category = pywikibot.Category(site, catname)
pairs = []
count = 0
for image in category.members():
    if image.namespace() != ":File:":
        continue
    if not re.search("\(HS85-10-.{4,}\).jpg", image.title()):
        continue
    pair = []
    for ext in ['tif', 'tiff']:
        tiff = re.sub("\.jpg$", " original." + ext, image.title())
        tim = pywikibot.ImagePage(site, tiff)
        if tim.exists():
            pair = [image, tim]
            break
    if pair == []:
Ejemplo n.º 21
0
def main():
    site = pywikibot.Site('wikiapiary', 'wikiapiary')
    catname = 'Category:Website'
    cat = pywikibot.Category(site, catname)
    gen = pagegenerators.CategorizedPageGenerator(cat, start='Spyropedia')
    pre = pagegenerators.PreloadingGenerator(gen)

    for page in pre:
        if page.isRedirectPage():
            continue

        wtitle = page.title()
        wtext = page.text

        #if not wtitle.startswith('5'):
        #    continue

        if re.search('Internet Archive', wtext):
            #print('It has IA parameter')
            pass
        else:
            print('\n', '#' * 50, '\n', wtitle, '\n', '#' * 50)
            print('https://wikiapiary.com/wiki/%s' %
                  (re.sub(' ', '_', wtitle)))
            print('Missing IA parameter')

            if re.search(r'(?i)API URL=http', wtext):
                apiurl = re.findall(r'(?i)API URL=(http[^\n]+?)\n', wtext)[0]
                print('API:', apiurl)
            else:
                print('No API found in WikiApiary, skiping')
                continue

            indexurl = 'index.php'.join(apiurl.rsplit('api.php', 1))
            urliasearch = 'https://archive.org/search.php?query=originalurl:"%s" OR originalurl:"%s"' % (
                apiurl, indexurl)
            f = urllib.request.urlopen(urliasearch)
            raw = f.read().decode('utf-8')
            if re.search(r'(?i)Your search did not match any items', raw):
                print('No dumps found at Internet Archive')
            else:
                itemidentifier = re.findall(
                    r'<a href="/details/([^ ]+?)" title=', raw)[0]
                itemurl = 'https://archive.org/details/%s' % (itemidentifier)
                print('Item found:', itemurl)

                metaurl = 'https://archive.org/download/%s/%s_files.xml' % (
                    itemidentifier, itemidentifier)
                g = urllib.request.urlopen(metaurl)
                raw2 = g.read().decode('utf-8')
                raw2 = raw2.split('</file>')
                itemfiles = []
                for raw2_ in raw2:
                    try:
                        x = re.findall(
                            r'(?im)<file name="[^ ]+-(\d{8})-[^ ]+" source="original">',
                            raw2_)[0]
                        y = re.findall(r'(?im)<size>(\d+)</size>', raw2_)[0]
                        itemfiles.append([int(x), int(y)])
                    except:
                        pass

                itemfiles.sort(reverse=True)
                print(itemfiles)
                itemdate = str(itemfiles[0][0])[0:4] + '/' + str(
                    itemfiles[0][0])[4:6] + '/' + str(itemfiles[0][0])[6:8]
                itemsize = itemfiles[0][1]

                iaparams = """|Internet Archive identifier=%s
|Internet Archive URL=%s
|Internet Archive added date=%s 00:00:00 
|Internet Archive file size=%s""" % (itemidentifier, itemurl, itemdate,
                                     itemsize)
                newtext = page.text
                newtext = re.sub(r'(?im)\n\}\}', '\n%s\n}}' % (iaparams),
                                 newtext)

                if page.text != newtext:
                    pywikibot.showDiff(page.text, newtext)
                    page.text = newtext
                    page.save('BOT - Adding dump details: %s, %s, %s bytes' %
                              (itemidentifier, itemdate, itemsize),
                              botflag=True)
Ejemplo n.º 22
0
def main(*args):
    """
    Process command line arguments and perform task.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    # Loading the comments
    global categoryToCheck, project_inserted
    # always, define a generator to understand if the user sets one,
    # defining what's genFactory
    always = False
    generator = False
    show = False
    moveBlockCheck = False
    protectedpages = False
    protectType = 'edit'
    namespace = 0

    # To prevent Infinite loops
    errorCount = 0

    # Process global args and prepare generator args parser
    local_args = pywikibot.handle_args(args)
    genFactory = pagegenerators.GeneratorFactory()

    # Process local args
    for arg in local_args:
        if arg == '-always':
            always = True
        elif arg == '-move':
            moveBlockCheck = True
        elif arg == '-show':
            show = True
        elif arg.startswith('-protectedpages'):
            protectedpages = True
            if len(arg) > 15:
                namespace = int(arg[16:])
        elif arg.startswith('-moveprotected'):
            protectedpages = True
            protectType = 'move'
            if len(arg) > 14:
                namespace = int(arg[15:])
        else:
            genFactory.handleArg(arg)

    if config.mylang not in project_inserted:
        pywikibot.output(u"Your project is not supported by this script.\n"
                         u"You have to edit the script and add it!")
        return

    site = pywikibot.Site()

    if protectedpages:
        generator = site.protectedpages(namespace=namespace, type=protectType)
    # Take the right templates to use, the category and the comment
    TSP = i18n.translate(site, templateSemiProtection)
    TTP = i18n.translate(site, templateTotalProtection)
    TSMP = i18n.translate(site, templateSemiMoveProtection)
    TTMP = i18n.translate(site, templateTotalMoveProtection)
    TNR = i18n.translate(site, templateNoRegex)
    TU = i18n.translate(site, templateUnique)

    categories = i18n.translate(site, categoryToCheck)
    commentUsed = i18n.twtranslate(site, 'blockpageschecker-summary')
    if not generator:
        generator = genFactory.getCombinedGenerator()
    if not generator:
        generator = list()
        pywikibot.output(u'Loading categories...')
        # Define the category if no other generator has been setted
        for CAT in categories:
            cat = pywikibot.Category(site, CAT)
            # Define the generator
            gen = pagegenerators.CategorizedPageGenerator(cat)
            for pageCat in gen:
                generator.append(pageCat)
        pywikibot.output(u'Categories loaded, start!')
    # Main Loop
    preloadingGen = pagegenerators.PreloadingGenerator(generator, step=60)
    for page in preloadingGen:
        pagename = page.title(asLink=True)
        pywikibot.output('Loading %s...' % pagename)
        try:
            text = page.text
        except pywikibot.NoPage:
            pywikibot.output("%s doesn't exist! Skipping..." % pagename)
            continue
        except pywikibot.IsRedirectPage:
            pywikibot.output("%s is a redirect! Skipping..." % pagename)
            if show:
                showQuest(page)
            continue
        # FIXME: This check does not work :
        # PreloadingGenerator cannot set correctly page.editRestriction
        # (see bug 55322)
        # if not page.canBeEdited():
        #    pywikibot.output("%s is sysop-protected : this account can't edit "
        #                     "it! Skipping..." % pagename)
        #    continue
        restrictions = page.protection()
        try:
            editRestr = restrictions['edit']
        except KeyError:
            editRestr = None
        if not page.canBeEdited():
            pywikibot.output(u"%s is protected: "
                             u"this account can't edit it! Skipping..." %
                             pagename)
            continue

        # Understand, according to the template in the page, what should be the
        # protection and compare it with what there really is.
        TemplateInThePage = understandBlock(text, TTP, TSP, TSMP, TTMP, TU)
        # Only to see if the text is the same or not...
        oldtext = text
        # keep track of the changes for each step (edit then move)
        changes = -1

        if not editRestr:
            # page is not edit-protected
            # Deleting the template because the page doesn't need it.
            if TU:
                replaceToPerform = u'|'.join(TTP + TSP + TU)
            else:
                replaceToPerform = u'|'.join(TTP + TSP)
            text, changes = re.subn(
                '<noinclude>(%s)</noinclude>' % replaceToPerform, '', text)
            if changes == 0:
                text, changes = re.subn('(%s)' % replaceToPerform, '', text)
            msg = u'The page is editable for all'
            if not moveBlockCheck:
                msg += u', deleting the template..'
            pywikibot.output(u'%s.' % msg)

        elif editRestr[0] == 'sysop':
            # total edit protection
            if (TemplateInThePage[0] == 'sysop-total' and TTP) or \
               (TemplateInThePage[0] == 'unique' and TU):
                msg = 'The page is protected to the sysop'
                if not moveBlockCheck:
                    msg += ', skipping...'
                pywikibot.output(msg)
            else:
                pywikibot.output(
                    u'The page is protected to the sysop, but the '
                    u'template seems not correct. Fixing...')
                if TU:
                    text, changes = re.subn(TemplateInThePage[1], TNR[4], text)
                else:
                    text, changes = re.subn(TemplateInThePage[1], TNR[1], text)

        elif TSP or TU:
            # implicitely editRestr[0] = 'autoconfirmed', edit-Semi-protection
            if TemplateInThePage[0] == 'autoconfirmed-total' or \
               TemplateInThePage[0] == 'unique':
                msg = 'The page is editable only for the autoconfirmed users'
                if not moveBlockCheck:
                    msg += ', skipping...'
                pywikibot.output(msg)
            else:
                pywikibot.output(u'The page is editable only for the '
                                 u'autoconfirmed users, but the template '
                                 u'seems not correct. Fixing...')
                if TU:
                    text, changes = re.subn(TemplateInThePage[1], TNR[4], text)
                else:
                    text, changes = re.subn(TemplateInThePage[1], TNR[0], text)

        if changes == 0:
            # We tried to fix edit-protection templates, but it did not work.
            pywikibot.warning('No edit-protection template could be found')

        if moveBlockCheck and changes > -1:
            # checking move protection now
            try:
                moveRestr = restrictions['move']
            except KeyError:
                moveRestr = False
            changes = -1

            if not moveRestr:
                pywikibot.output(u'The page is movable for all, deleting the '
                                 u'template...')
                # Deleting the template because the page doesn't need it.
                if TU:
                    replaceToPerform = u'|'.join(TSMP + TTMP + TU)
                else:
                    replaceToPerform = u'|'.join(TSMP + TTMP)
                text, changes = re.subn(
                    '<noinclude>(%s)</noinclude>' % replaceToPerform, '', text)
                if changes == 0:
                    text, changes = re.subn('(%s)' % replaceToPerform, '',
                                            text)
            elif moveRestr[0] == 'sysop':
                # move-total-protection
                if (TemplateInThePage[0] == 'sysop-move' and TTMP) or \
                   (TemplateInThePage[0] == 'unique' and TU):
                    pywikibot.output(u'The page is protected from moving to '
                                     u'the sysop, skipping...')
                    if TU:
                        # no changes needed, better to revert the old text.
                        text = oldtext
                else:
                    pywikibot.output(u'The page is protected from moving to '
                                     u'the sysop, but the template seems not '
                                     u'correct. Fixing...')
                    if TU:
                        text, changes = re.subn(TemplateInThePage[1], TNR[4],
                                                text)
                    else:
                        text, changes = re.subn(TemplateInThePage[1], TNR[3],
                                                text)

            elif TSMP or TU:
                # implicitely moveRestr[0] = 'autoconfirmed',
                # move-semi-protection
                if TemplateInThePage[0] == 'autoconfirmed-move' or \
                   TemplateInThePage[0] == 'unique':
                    pywikibot.output(u'The page is movable only for the '
                                     u'autoconfirmed users, skipping...')
                    if TU:
                        # no changes needed, better to revert the old text.
                        text = oldtext
                else:
                    pywikibot.output(u'The page is movable only for the '
                                     u'autoconfirmed users, but the template '
                                     u'seems not correct. Fixing...')
                    if TU:
                        text, changes = re.subn(TemplateInThePage[1], TNR[4],
                                                text)
                    else:
                        text, changes = re.subn(TemplateInThePage[1], TNR[2],
                                                text)

            if changes == 0:
                # We tried to fix move-protection templates, but it did not work
                pywikibot.warning('No move-protection template could be found')

        if oldtext != text:
            # Ok, asking if the change has to be performed and do it if yes.
            pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" %
                             page.title())
            pywikibot.showDiff(oldtext, text)
            if not always:
                choice = pywikibot.input_choice(
                    u'Do you want to accept these '
                    u'changes?', [('Yes', 'y'), ('No', 'n'), ('All', 'a')],
                    'n')
                if choice == 'a':
                    always = True
            if always or choice == 'y':
                while True:
                    try:
                        page.put(text, commentUsed, force=True)
                    except pywikibot.EditConflict:
                        pywikibot.output(u'Edit conflict! skip!')
                        break
                    except pywikibot.ServerError:
                        # Sometimes there is this error that's quite annoying
                        # because can block the whole process for nothing.
                        errorCount += 1
                        if errorCount < 5:
                            pywikibot.output(u'Server Error! Wait..')
                            time.sleep(3)
                            continue
                        else:
                            # Prevent Infinite Loops
                            raise pywikibot.ServerError(u'Fifth Server Error!')
                    except pywikibot.SpamfilterError as e:
                        pywikibot.output(u'Cannot change %s because of '
                                         u'blacklist entry %s' %
                                         (page.title(), e.url))
                        break
                    except pywikibot.LockedPage:
                        pywikibot.output(u'The page is still protected. '
                                         u'Skipping...')
                        break
                    except pywikibot.PageNotSaved as error:
                        pywikibot.output(u'Error putting page: %s' %
                                         (error.args, ))
                        break
                    else:
                        # Break only if the errors are one after the other
                        errorCount = 0
                        break
Ejemplo n.º 23
0
def main():
    summary_commandline, gen, template = None, None, None
    namespaces, PageTitles, exceptions = [], [], []
    encat, newcatfile = '', ''
    autoText, autoTitle = False, False
    recentcat, newcat = False, False
    genFactory = pagegenerators.GeneratorFactory()
    for arg in pywikibot.handleArgs():
        if arg == '-autotitle':
            autoTitle = True
        elif arg == '-autotext':
            autoText = True
        elif arg.startswith('-page'):
            if len(arg) == 5:
                PageTitles.append(
                    pywikibot.input(u'Which page do you want to chage?'))
            else:
                PageTitles.append(arg[6:])
            break
        elif arg.startswith('-except:'):
            exceptions.append(arg[8:])
        elif arg.startswith('-template:'):
            template = arg[10:]
        elif arg.startswith('-facat:'):
            facat = arg.replace(u'Category:',
                                u'').replace(u'category:',
                                             u'').replace(u'زمرو:', u'')
            encat = englishdictionry(u'زمرو:' + facat[7:], fa_site,
                                     en_site).replace(u'Category:',
                                                      u'').replace(
                                                          u'category:', u'')
            break
        elif arg.startswith('-encat:'):
            encat = arg[7:].replace(u'Category:',
                                    u'').replace(u'category:',
                                                 u'').replace(u'زمرو:', u'')
            break
        elif arg.startswith('-newcatfile:'):
            newcatfile = arg[12:]
            break
        elif arg.startswith('-recentcat'):
            arg = arg.replace(':', '')
            if len(arg) == 10:
                genfa = pagegenerators.RecentchangesPageGenerator()
            else:
                genfa = pagegenerators.RecentchangesPageGenerator(
                    number=int(arg[10:]))
            genfa = pagegenerators.DuplicateFilterPageGenerator(genfa)
            genfa = pagegenerators.NamespaceFilterPageGenerator(genfa, [14])
            preloadingGen = pagegenerators.PreloadingGenerator(genfa, 60)
            recentcat = True
            break
        elif arg.startswith('-newcat'):
            arg = arg.replace(':', '')
            if len(arg) == 7:
                genfa = pagegenerators.NewpagesPageGenerator(step=100,
                                                             namespaces=14)
            else:
                genfa = pagegenerators.NewpagesPageGenerator(step=int(arg[7:]),
                                                             namespaces=14)
            preloadingGen = pagegenerators.PreloadingGenerator(genfa, 60)
            newcat = True
            break
        elif arg.startswith('-namespace:'):
            namespaces.append(int(arg[11:]))
        elif arg.startswith('-summary:'):
            pywikibot.setAction(arg[9:])
            summary_commandline = True
        else:
            generator = genFactory.handleArg(arg)
            if generator:
                gen = genFactory.getCombinedGenerator(gen)
    if encat != '':
        encatfalist, encatlists = encatlist(encat)
        if encatlists:
            for encat in encatlists:
                encat = englishdictionry(encat, en_site, fa_site)
                if encat:
                    run([encat])
        if encatfalist is not False:
            run(encatfalist)
    if PageTitles:
        pages = [
            pywikibot.Page(fa_site, PageTitle) for PageTitle in PageTitles
        ]
        gen = iter(pages)
    if recentcat:
        for workpage in preloadingGen:
            workpage = workpage.title()
            cat = pywikibot.Category(fa_site, workpage)
            gent = pagegenerators.CategorizedPageGenerator(cat)
            run(gent)
        pywikibot.stopme()
        sys.exit()
    if newcat:
        for workpage in preloadingGen:
            workpage = workpage.title()
            workpage = englishdictionry(workpage, fa_site, en_site)
            if workpage is not False:
                encatfalist, encatlists = encatlist(workpage)
                if encatlists:
                    for encat in encatlists:
                        encat = englishdictionry(encat, en_site, fa_site)
                        if encat:
                            run([encat])
                if encatfalist is not False:
                    run(encatfalist)
        pywikibot.stopme()
        sys.exit()
    if newcatfile:
        text2 = codecs.open(newcatfile, 'r', 'utf8')
        text = text2.read()
        linken = re.findall(ur'\[\[.*?\]\]', text, re.S)
        if linken:
            for workpage in linken:
                pywikibot.output(u'\03{lightblue}Working on --- Link ' +
                                 workpage + u' at th newcatfile\03{default}')
                workpage = workpage.split(u'|')[0].replace(u'[[', u'').replace(
                    u']]', u'').strip()
                workpage = englishdictionry(workpage, fa_site, en_site)
                if workpage is not False:
                    encatfalist, encatlists = encatlist(workpage)
                    workpage = englishdictionry(workpage, fa_site, en_site)
                    if encatlists:
                        run(encatlists)
                    if encatfalist is not False:
                        run(encatfalist)
        pywikibot.stopme()
        sys.exit()
    if not gen:
        pywikibot.stopme()
        sys.exit()
    if namespaces != []:
        gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
    preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber=60)
    run(preloadingGen)
Ejemplo n.º 24
0
    def run(self):
        """Run the bot."""
        # validate L10N
        self.template_list = self.site.category_redirects()
        if not self.template_list:
            pywikibot.warning(u"No redirect templates defined for %s" %
                              self.site)
            return
        if not self.get_cat():
            pywikibot.warning(u"No redirect category found for %s" % self.site)
            return

        user = self.site.user()  # invokes login()
        newredirs = []

        localtime = time.localtime()
        today = '%04d-%02d-%02d' % localtime[:3]
        edit_request_page = pywikibot.Page(
            self.site, u"User:%s/category edit requests" % user)
        datafile = pywikibot.config.datafilepath("%s-catmovebot-data" %
                                                 self.site.dbName())
        try:
            with open(datafile, "rb") as inp:
                record = cPickle.load(inp)
        except IOError:
            record = {}
        if record:
            with open(datafile + ".bak", "wb") as f:
                cPickle.dump(record, f, protocol=config.pickle_protocol)
        # regex to match soft category redirects
        # TODO: enhance and use textlib._MultiTemplateMatchBuilder
        #  note that any templates containing optional "category:" are
        #  incorrect and will be fixed by the bot
        template_regex = re.compile(
            r"""{{\s*(?:%(prefix)s\s*:\s*)?  # optional "template:"
                     (?:%(template)s)\s*\|   # catredir template name
                     (\s*%(catns)s\s*:\s*)?  # optional "category:"
                     ([^|}]+)                # redirect target cat
                     (?:\|[^|}]*)*}}         # optional arguments 2+, ignored
             """ % {
                'prefix':
                self.site.namespace(10).lower(),
                'template':
                "|".join(
                    item.replace(" ", "[ _]+") for item in self.template_list),
                'catns':
                self.site.namespace(14)
            }, re.I | re.X)

        self.check_hard_redirect()

        comment = i18n.twtranslate(self.site, self.move_comment)
        counts = {}
        nonemptypages = []
        redircat = self.cat

        pywikibot.output(u"\nChecking %d category redirect pages" %
                         redircat.categoryinfo['subcats'])
        catpages = set()
        for cat in redircat.subcategories():
            catpages.add(cat)
            cat_title = cat.title(withNamespace=False)
            if "category redirect" in cat_title:
                self.log_text.append(u"* Ignoring %s" %
                                     cat.title(asLink=True, textlink=True))
                continue
            if hasattr(cat, "_catinfo"):
                # skip empty categories that don't return a "categoryinfo" key
                catdata = cat.categoryinfo
                if "size" in catdata and int(catdata['size']):
                    # save those categories that have contents
                    nonemptypages.append(cat)
            if cat_title not in record:
                # make sure every redirect has a record entry
                record[cat_title] = {today: None}
                try:
                    newredirs.append("*# %s -> %s" %
                                     (cat.title(asLink=True, textlink=True),
                                      cat.getCategoryRedirectTarget().title(
                                          asLink=True, textlink=True)))
                except pywikibot.Error:
                    pass
                # do a null edit on cat
                try:
                    cat.save()
                except Exception:
                    pass

        # delete record entries for non-existent categories
        for cat_name in record.keys():
            if pywikibot.Category(self.site,
                                  self.catprefix + cat_name) not in catpages:
                del record[cat_name]

        pywikibot.output(u"\nMoving pages out of %s redirected categories." %
                         len(nonemptypages))

        for cat in pagegenerators.PreloadingGenerator(nonemptypages):
            try:
                if not cat.isCategoryRedirect():
                    self.log_text.append(u"* False positive: %s" %
                                         cat.title(asLink=True, textlink=True))
                    continue
            except pywikibot.Error:
                self.log_text.append(u"* Could not load %s; ignoring" %
                                     cat.title(asLink=True, textlink=True))
                continue
            cat_title = cat.title(withNamespace=False)
            if not self.readyToEdit(cat):
                counts[cat_title] = None
                self.log_text.append(u"* Skipping %s; in cooldown period." %
                                     cat.title(asLink=True, textlink=True))
                continue
            dest = cat.getCategoryRedirectTarget()
            if not dest.exists():
                self.problems.append("# %s redirects to %s" %
                                     (cat.title(asLink=True, textlink=True),
                                      dest.title(asLink=True, textlink=True)))
                # do a null edit on cat to update any special redirect
                # categories this wiki might maintain
                try:
                    cat.save()
                except Exception:
                    pass
                continue
            if dest.isCategoryRedirect():
                double = dest.getCategoryRedirectTarget()
                if double == dest or double == cat:
                    self.log_text.append(
                        u"* Redirect loop from %s" %
                        dest.title(asLink=True, textlink=True))
                    # do a null edit on cat
                    try:
                        cat.save()
                    except Exception:
                        pass
                else:
                    self.log_text.append(
                        u"* Fixed double-redirect: %s -> %s -> %s" %
                        (cat.title(asLink=True, textlink=True),
                         dest.title(asLink=True, textlink=True),
                         double.title(asLink=True, textlink=True)))
                    oldtext = cat.text
                    # remove the old redirect from the old text,
                    # leaving behind any non-redirect text
                    oldtext = template_regex.sub("", oldtext)
                    newtext = (u"{{%(redirtemp)s|%(ncat)s}}" % {
                        'redirtemp': self.template_list[0],
                        'ncat': double.title(withNamespace=False)
                    })
                    newtext = newtext + oldtext.strip()
                    try:
                        cat.text = newtext
                        cat.save(
                            i18n.twtranslate(self.site,
                                             self.dbl_redir_comment))
                    except pywikibot.Error as e:
                        self.log_text.append("** Failed: %s" % e)
                continue

            found, moved = self.move_contents(cat_title,
                                              dest.title(withNamespace=False),
                                              editSummary=comment)
            if found is None:
                self.log_text.append(u"* [[:%s%s]]: error in move_contents" %
                                     (self.catprefix, cat_title))
            elif found:
                record[cat_title][today] = found
                self.log_text.append(u"* [[:%s%s]]: %d found, %d moved" %
                                     (self.catprefix, cat_title, found, moved))
            counts[cat_title] = found
            # do a null edit on cat
            try:
                cat.save()
            except Exception:
                pass

        with open(datafile, "wb") as f:
            cPickle.dump(record, f, protocol=config.pickle_protocol)

        self.log_text.sort()
        self.problems.sort()
        newredirs.sort()
        comment = i18n.twtranslate(self.site, self.maint_comment)
        self.log_page.text = (
            u"\n== %i-%02i-%02iT%02i:%02i:%02iZ ==\n" % time.gmtime()[:6] +
            u'\n'.join(self.log_text) +
            u'\n* New redirects since last report:\n' + u'\n'.join(newredirs) +
            u'\n' + u'\n'.join(self.problems) + u'\n' + self.get_log_text())
        self.log_page.save(comment)
        if self.edit_requests:
            edit_request_page.text = (self.edit_request_text % {
                'itemlist':
                u"\n" + u"\n".join((self.edit_request_item % item)
                                   for item in self.edit_requests)
            })
            edit_request_page.save(comment)
Ejemplo n.º 25
0
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: str
    """
    global workingcat, parentcats
    global checked, tocheck
    global excludefile

    checked = {}
    tocheck = DequeGenerator()

    workingcatname = ''

    options = {}
    local_args = pywikibot.handle_args(args)
    for arg in local_args:
        option = arg[1:]
        if not arg.startswith('-'):
            if not workingcatname:
                workingcatname = arg
            else:
                pywikibot.warning('Working category "{}" is already given.'
                                  .format(workingcatname))
        else:
            options[option] = True

    if not workingcatname:
        pywikibot.bot.suggest_help(missing_parameters=['working category'])
        return

    mysite = pywikibot.Site()
    summary = i18n.twtranslate(mysite, 'makecat-create',
                               {'cat': workingcatname})

    bot = MakeCatBot(site=mysite, **options)

    workingcat = pywikibot.Category(mysite, '{0}{1}'
                                            .format(mysite.namespaces.CATEGORY,
                                                    workingcatname))
    filename = pywikibot.config.datafilepath(
        'category',
        workingcatname.encode('ascii', 'xmlcharrefreplace').decode('ascii')
        + '_exclude.txt')
    try:
        with codecs.open(filename, 'r', encoding=mysite.encoding()) as f:
            for line in f.readlines():
                # remove leading and trailing spaces, LF and CR
                line = line.strip()
                if not line:
                    continue
                pl = pywikibot.Page(mysite, line)
                checked[pl] = pl

        excludefile = codecs.open(filename, 'a', encoding=mysite.encoding())
    except IOError:
        # File does not exist
        excludefile = codecs.open(filename, 'w', encoding=mysite.encoding())

    # Get parent categories in order to `removeparent`
    try:
        parentcats = workingcat.categories()
    except pywikibot.Error:
        parentcats = []

    # Do not include articles already in subcats; only checking direct subcats
    subcatlist = list(workingcat.subcategories())
    if subcatlist:
        subcatlist = pagegenerators.PreloadingGenerator(subcatlist)
        for cat in subcatlist:
            artlist = list(cat.articles())
            for page in artlist:
                checked[page] = page

    # Fetch articles in category, and mark as already checked (seen)
    # If category is empty, ask user if they want to look for pages
    # in a different category.
    articles = list(workingcat.articles(content=True))
    if not articles:
        pywikibot.output('Category {} does not exist or is empty. '
                         'Which page to start with?'
                         .format(workingcatname))
        answer = pywikibot.input('(Default is [[{}]]):'.format(workingcatname))
        if not answer:
            answer = workingcatname
        pywikibot.output('' + answer)
        pl = pywikibot.Page(mysite, answer)
        articles = [pl]

    for pl in articles:
        checked[pl] = pl
        bot.include(pl, summary=summary)

    gen = pagegenerators.DequePreloadingGenerator(tocheck)

    for page in gen:
        if bot.checkbroken or page.exists():
            bot.asktoadd(page, summary)
Ejemplo n.º 26
0
sys.path.append(".")

if os.path.exists('pywikibot.lwp'):
    os.remove('pywikibot.lwp') #pepega

import pywikibot

import azurlane.load_src
import azurlane.weapon
import azurlane.wiki

equip_stats_srcs = azurlane.load_src.load_sharecfg('equip_data_statistics')

site = pywikibot.Site('azurlane')  # The site we want to run our bot on
site.login()
category = pywikibot.Category(site, 'Submarine Torpedo')

for page in category.articles():
    equip_wikis = page.text.split('|-|')
    edit_message = 'Bot: set armor modifiers:'
    edited = False

    for equip_wiki_index, equip_wiki in enumerate(equip_wikis):
        equip_id = int(azurlane.wiki.get_template_value(equip_wiki, 'BaseID'))
        equip = equip_stats_srcs[equip_id]
        weapon_id = equip['weapon_id'][1]
        weapon = azurlane.weapon.WeaponStats(weapon_id)
        prev_armor_type = ['CoefMax', 'Coef', 'PatternSpread', 'Spread']
        for armor_type_index, armor_type in enumerate(['ArmorModL', 'ArmorModM', 'ArmorModH']):
            old_value = azurlane.wiki.get_template_value(equip_wiki, armor_type)
            new_value = str(int(round(weapon.armor_modifiers[armor_type_index] * 100)))
Ejemplo n.º 27
0
from equipment_page_map import equipment_page_map

equip_stats_srcs = azurlane.load_src.load_sharecfg('equip_data_statistics')
upgrade_srcs = azurlane.load_src.load_sharecfg('equip_upgrade_data')

upgrades = []

for upgrade_data in upgrade_srcs.values():
    output_id = upgrade_data['target_id']
    source_id = upgrade_data['upgrade_from']
    upgrades.append((source_id, output_id))

site = pywikibot.Site('azurlane')  # The site we want to run our bot on
site.login()
category = pywikibot.Category(site, 'Equipment')

def equipment_box(equip_id):
    equip = equip_stats_srcs[equip_id]
    link = equipment_page_map[equip['id']]
    name = link.split('#')[0]
    result = '{{EquipmentBox|%d|%s|%s|%s}} ' % (equip['rarity'], name, link, equip['icon'])
    if link == name:
        result += '[[%s]]' % name
    else:
        result += '[[%s|%s]]' % (link, name)
    return result

for page in category.articles():
    equip_wikis = page.text.split('|-|')
    edit_message = 'Bot: remove extraneous template arguments'
Ejemplo n.º 28
0
    parser.add_argument('-v', '--version', action="version", version="%(prog)s " + version)
    parser.add_argument('-p', '--start', type=str, metavar="START",
                        help='format: YYYYMMDD - start date for counting page views', default='20190101')
    parser.add_argument('-q', '--end', type=str, metavar="END",
                        help='format: YYYYMMDD - end date for counting page views', default='20190701')
    args = parser.parse_args()

    # We are using the German language edition of Wikipedia for all
    # queries.
    site = pywikibot.Site(args.lang, args.site)

    # decide what to do
    if args.category:
        # Given the (German) name of a category, extract statistics
        # for all articles belonging to that category.
        page = pywikibot.Category(site, args.category)

        # check, whether this really is a category page
        if not page.is_categorypage():
            sys.exit(args.category + " is not a category page")

        for i, a in enumerate(page.articles(namespaces=[0])):
            stats = get_page_stats(args.start, args.end, end_date, a)
            print_stats(i, a.title(), stats, args.sep)

    if args.languages:
        # Given the (German) name of an article, extract statistics
        # for all available language editions.
        #
        page = pywikibot.Page(site, args.languages)