def load_pages(book_name, opt, cache): # It's more efficient to do two pass, in the first we don't preload # contents but only check if the cache is ok. remaining_pages = [] pages = [] page_ns_name = page_prefixes['wikisource'][opt.lang] page_name = page_ns_name + u':' + book_name + u'/' gen = pagegen.PrefixingPageGenerator(page_name, site=opt.site) for p in gen: page_nr = int(re.match(u'.*/(\d+)$', p.title()).group(1)) if not cache.has_key( page_nr) or cache[page_nr][0] != p.latestRevision(): remaining_pages.append(p) else: pages.append((None, page_nr, p.latestRevision())) # and in the second pass we preload contents for cache miss, # imported pages are never cached, but that's not a no big deal for p in pagegen.PreloadingGenerator(remaining_pages): text = p.get() if not is_imported_page(text): page_nr = int(re.match(u'.*/(\d+)$', p.title()).group(1)) pages.append((text, page_nr, p.latestRevision())) return pages
def handlePotd(self, potd): """ Handle one picture of the day :param potd: The POTD template of that day """ regex = u'\{\{Potd filename\|([^\|]+)\|(\d\d\d\d)\|(\d{1,2})|(\d{1,2})\}\}' match = re.search(regex, potd.text) captions = {} filename = None if match: filename = match.group(1) for potdlang in pagegenerators.PrefixingPageGenerator( potd.title() + u'_(', site=self.site, includeredirects=False): potdinfo = self.handlePotdLang(potdlang) if potdinfo: (lang, caption) = potdinfo captions[lang] = caption # Reshufle so I don't end up getting the captions all the time if captions and filename: filepage = pywikibot.FilePage(self.site, title=filename) if filepage.exists(): # Might run into redirects mediaid = u'M%s' % (filepage.pageid, ) if not self.mediaInfoExists(mediaid): self.addCaptions(mediaid, captions)
currentDateTime = datetime.datetime.utcnow() mailFrom = 'WikiBG Admin Notifier <*****@*****.**>' mailRcpt = '*****@*****.**' mailSubj = 'Промени в "Заявки към администраторите"' mailBody = '' mail = MIMEMultipart('alternative') mail.set_charset('utf-8') mail['From'] = mailFrom mail['To'] = mailRcpt mail['Subject'] = Header(mailSubj.encode('utf-8'), 'utf-8') adminReqPagesPrevYear = pagegenerators.PrefixingPageGenerator( 'Заявки към администраторите/' + str(currentDateTime.year - 1), namespace='Уикипедия', includeredirects=False) adminReqPagesCurrYear = pagegenerators.PrefixingPageGenerator( 'Заявки към администраторите/' + str(currentDateTime.year), namespace='Уикипедия', includeredirects=False) adminReqPages = pagegenerators.CombinedPageGenerator( [adminReqPagesPrevYear, adminReqPagesCurrYear]) adminReqPagesRecent = pagegenerators.EdittimeFilterPageGenerator( adminReqPages, last_edit_start=lastDateTime) revisionCount = 0 for reqPage in adminReqPagesRecent:
def main(): lang = 'en' wikisite = pywikibot.Site(lang, 'wikipedia') site = pywikibot.Site('wikidata', 'wikidata') repo = site.data_repository() #https://en.wikipedia.org/wiki/Category:YearParamUsageCheck_tracking_categories #https://en.wikipedia.org/wiki/Category:Year_by_category_%E2%80%94_used_with_year_parameter(s)_equals_year_in_page_title method = 'all' if len(sys.argv) > 1: method = sys.argv[1] if method == 'all' or method == 'method1': groups = [ #['Category:%s births' % (i) for i in range(100, 2050)], #['Category:%s deaths' % (i) for i in range(100, 2050)], #['Category:%s establishments' % (i) for i in range(100, 2050)], #['Category:%s disestablishments' % (i) for i in range(100, 2050)], #['Category:%s books' % (i) for i in range(100, 2050)], #['Category:%s comic debuts' % (i) for i in range(100, 2050)], #['Category:%s compositions' % (i) for i in range(100, 2050)], #['Category:%s documents' % (i) for i in range(100, 2050)], #['Category:%s films' % (i) for i in range(1850, 2050)], #['Category:%s musicals' % (i) for i in range(100, 2050)], #['Category:%s operas' % (i) for i in range(100, 2050)], #['Category:%s paintings' % (i) for i in range(100, 2050)], #['Category:%s plays' % (i) for i in range(100, 2050)], #['Category:%s poems' % (i) for i in range(100, 2050)], #['Category:%s sculptures' % (i) for i in range(100, 2050)], #['Category:%s short stories' % (i) for i in range(100, 2050)], #['Category:%s songs' % (i) for i in range(100, 2050)], #['Category:%s treaties' % (i) for i in range(100, 2050)], #['Category:%s works' % (i) for i in range(100, 2050)], #['%s in film' % (i) for i in range(1850, 2050)], ] for titles in groups: for c in range(0, len(titles)): title = titles[c] titleprev = c > 0 and titles[c - 1] or '' titlenext = c < len(titles) - 1 and titles[c + 1] or '' print('\n==', title.encode('utf-8'), '==') page = pywikibot.Page(wikisite, title) if not page.exists() or page.isRedirectPage(): print("Page doesnt exist or is redirect: %s" % (page.title().encode('utf-8'))) continue item = pywikibot.ItemPage.fromPage(page) if item: core(repo=repo, item=item, page=page, lang=lang, wikisite=wikisite, titleprev=titleprev, titlenext=titlenext) else: print("Page doest have item") if method == 'all' or method == 'method2': #cat = pywikibot.Category(wikisite, 'Category:Year by category — used with year parameter(s) equals year in page title') #cat = pywikibot.Category(wikisite, 'Category:YearParamUsageCheck tracking categories') cat = pywikibot.Category(wikisite, 'Category:Categories by year') #gen = pagegenerators.SubCategoriesPageGenerator(cat) gen = pagegenerators.SubCategoriesPageGenerator(cat, recurse=5) for page in gen: print('\n==', page.title().encode('utf-8'), '==') year = '' titleprev = '' titlenext = '' if re.findall(r'(?m)^Category:(\d{4}) [^\d]+$', page.title()): year = int( re.findall(r'(?m)^Category:(\d{4}) [^\d]+$', page.title())[0]) titleprev = re.sub(r'(?m)^(Category):%s ([^\d]+)$' % (year), r'\1:%s \2' % (year - 1), page.title()) titlenext = re.sub(r'(?m)^(Category):%s ([^\d]+)$' % (year), r'\1:%s \2' % (year + 1), page.title()) elif re.findall(r'(?m)^Category:[^\d]+ in (\d{4})$', page.title()): year = int( re.findall(r'(?m)^Category:[^\d]+ in (\d{4})$', page.title())[0]) titleprev = re.sub(r'(?m)^(Category):([^\d]+ in) %s$' % (year), r'\1:\2 %s' % (year - 1), page.title()) titlenext = re.sub(r'(?m)^(Category):([^\d]+ in) %s$' % (year), r'\1:\2 %s' % (year + 1), page.title()) else: print("Not a yearly category") continue if not year or len(str(year)) != 4: print("Couldnt parse correct year from page name") continue print(year) item = '' try: item = pywikibot.ItemPage.fromPage(page) except: print("No wikidata item for this page") continue if item: if titleprev and titlenext: core(repo=repo, item=item, page=page, lang=lang, wikisite=wikisite, titleprev=titleprev, titlenext=titlenext) else: print("Not titleprev or titlenext") else: print("Page doest have item") if method == 'all' or method == 'method3': for year in range(1000, 2050): prefix = '%s in ' % (year) prefixprev = '%s in ' % (year - 1) prefixnext = '%s in ' % (year + 1) gen = pagegenerators.PrefixingPageGenerator(prefix, namespace=0, includeredirects=False, site=wikisite, total=None, content=False) for page in gen: if not page.title().startswith(prefix): break if ' in science' in page.title(): continue print('\n==', page.title().encode('utf-8'), '==') titleprev = '' titlenext = '' if re.findall(r'(?m)^%s([^\d]+)$' % (prefix), page.title()): titleprev = re.sub(r'(?m)^%s([^\d]+)$' % (prefix), r'%s\1' % (prefixprev), page.title()) titlenext = re.sub(r'(?m)^%s([^\d]+)$' % (prefix), r'%s\1' % (prefixnext), page.title()) else: print("Not a yearly page") continue item = '' try: item = pywikibot.ItemPage.fromPage(page) except: print("No wikidata item for this page") continue if item: if titleprev and titlenext: core(repo=repo, item=item, page=page, lang=lang, wikisite=wikisite, titleprev=titleprev, titlenext=titlenext) else: print("Not titleprev or titlenext") else: print("Page doest have item")
'1.21.0': 'Q21683643', '1.22.0': 'Q21683645', '1.23.0': 'Q21683646', '1.24.0': 'Q21683648', '1.25.0': 'Q21683649', '1.26.0': 'Q21683659', '1.27.0': 'Q21683650', } def normalize_version(ver): ver = ver.split(' ')[0] if len(ver.split('.')) == 2: ver += '.0' return ver gen = pagegenerators.PrefixingPageGenerator('Manual:Hooks/', site=site, content=True, includeredirects=False) for page in gen: if len(page.title().split('/')) > 2: #print('Skipping %s because it has 2+ subpage parts' % page.title()) continue code = mwparserfromhell.parse(page.text) dep = None for temp in code.filter_templates(): if temp.name == 'TNT' and temp.get(1).value.strip() == 'MediaWikiHook': if temp.has('deprecated'): dep = str(temp.get('deprecated').value).strip() break elif temp.name == 'TNT' and temp.get(1).value.strip() == 'Deprecated': dep = str(temp.get('2').value).strip()