def test_NamespaceFilterPageGenerator(self): self.assertFunction("NamespaceFilterPageGenerator") site = self.site gen = pagegenerators.PagesFromTitlesGenerator(self.titles, site) gen = pagegenerators.NamespaceFilterPageGenerator(gen, 0, site) self.assertEqual(len(tuple(gen)), 3) gen = pagegenerators.PagesFromTitlesGenerator(self.titles, site) gen = pagegenerators.NamespaceFilterPageGenerator(gen, 1, site) self.assertEqual(len(tuple(gen)), 4) gen = pagegenerators.PagesFromTitlesGenerator(self.titles, site) gen = pagegenerators.NamespaceFilterPageGenerator(gen, 10, site) self.assertEqual(len(tuple(gen)), 6) gen = pagegenerators.PagesFromTitlesGenerator(self.titles, site) gen = pagegenerators.NamespaceFilterPageGenerator(gen, (1, 10), site) self.assertEqual(len(tuple(gen)), 10) gen = pagegenerators.PagesFromTitlesGenerator(self.titles, site) gen = pagegenerators.NamespaceFilterPageGenerator( gen, ('Talk', 'Template'), site) self.assertEqual(len(tuple(gen)), 10)
def user_talk_pages(): "A generator for valid user talk pages." titles = ["User talk:" + name for name in people_to_notify.keys()] for user_talk_page in [page for page in pagegenerators.PagesFromTitlesGenerator(titles) if page.exists() and not page.isRedirectPage()]: # First, a sanity check username = user_talk_page.title(withNamespace=False) if username not in people_to_notify: continue # Then yield the page and username yield (user_talk_page, username)
def test_RegexBodyFilterPageGenerator(self): self.assertFunction("RegexBodyFilterPageGenerator") gen = pagegenerators.PagesFromTitlesGenerator(self.titles, site=self.site) pages = [] for p in gen: p.text = u"This is the content of %s as a sample" % p.title() pages.append(p) gen = pagegenerators.RegexBodyFilterPageGenerator(iter(pages), '/doc') self.assertTrue(len(tuple(gen)) == 2) gen = pagegenerators.RegexBodyFilterPageGenerator(iter(pages), 'This') self.assertTrue(len(tuple(gen)) == 13) gen = pagegenerators.RegexBodyFilterPageGenerator(iter(pages), 'talk', quantifier='none') self.assertTrue(len(tuple(gen)) == 9)
def main(argv): parser = argparse.ArgumentParser() parser.add_argument('--debug', '-d', help='enable debugging output', action='store_true') parser.add_argument('--category', '--cat', '-c', help='specify starting category (default "{}")'.format( defaultCategory), default=defaultCategory) parser.add_argument('--repeat', '-r', help='number of minutes in which to repeat', type=int) parser.add_argument('--always', help='always save changes without prompting', action='store_true') parser.add_argument('pages', help='List of page titles to process', nargs=argparse.REMAINDER) args = parser.parse_args(argv[1:]) site = pywikibot.Site() while True: # Select an appropriate page generator based on the --category # argument and/or positional 'page' arguments if args.pages: pagegen = pagegenerators.PagesFromTitlesGenerator(args.pages) else: cat = pywikibot.Category(site, 'Category:' + args.category) pagegen = pagegenerators.CategorizedPageGenerator(cat) bot = PhotoCatBot(generator=pagegen, debug=args.debug, always=args.always) bot.run() if args.repeat: nextrun = args.repeat * 60 print "{}: Next run in {} seconds...".format( time.asctime(), nextrun) time.sleep(nextrun) else: break
def test_RegexBodyFilterPageGenerator(self): self.assertFunction("RegexBodyFilterPageGenerator") gen = pagegenerators.PagesFromTitlesGenerator(self.titles, site=self.site) pages = [] for p in gen: p.text = u"This is the content of %s as a sample" % p.title() pages.append(p) gen = pagegenerators.RegexBodyFilterPageGenerator(iter(pages), '/doc') self.assertPagelistTitles(gen, ('Template:!/Doc', 'Template:Template/Doc')) gen = pagegenerators.RegexBodyFilterPageGenerator(iter(pages), 'This') self.assertPagelistTitles(gen, self.titles) gen = pagegenerators.RegexBodyFilterPageGenerator(iter(pages), 'talk', quantifier='none') self.assertEqual(len(tuple(gen)), 9)
def test_PagesFromTitlesGenerator(self): self.assertFunction("PagesFromTitlesGenerator") gen = pagegenerators.PagesFromTitlesGenerator(self.titles) self.assertTrue(len(self.titles) == len(tuple(gen)))
return year property_map = { 'year': ('P571', lambda x: "+" + get_year(x) + "-01-01T00:00:00Z/9"), 'width_metric': ('P2049', lambda x: x.replace(",", ".") + "U174728"), 'height_metric': ('P2048', lambda x: x.replace(",", ".") + "U174728"), 'title': ('P1476', lambda x: 'en:"' + x.lstrip('«').rstrip('»') + '"'), 'artist': ('P170', find_by_label), 'image_file': ('P18', lambda x: '"' + x + '"'), 'image': ('P18', lambda x: '"' + x + '"'), 'museum': ('P195', find_by_label), } site = pywikibot.Site("en", "wikipedia") for page in pagegenerators.PagesFromTitlesGenerator([find_by_sitelink(QID)], site): print(QID + "\tP31\tQ3305213") # print(QID + "\tDen\t\"painting\"") itemData = {} for (template, args) in page.templatesWithParams(): # print(template.title()) if template.title() == 'Template:Infobox Artwork' \ or template.title() == 'Template:Infobox Painting' \ or template.title() == 'Template:Infobox artwork' \ or template.title() == 'Template:Infobox painting': # print(args) argmap = dict( arg.split('=', maxsplit=1) for arg in args if '=' in arg) for name in property_map: if name in argmap: value = argmap[name]
def test_PagesFromTitlesGenerator(self): self.assertFunction("PagesFromTitlesGenerator") gen = pagegenerators.PagesFromTitlesGenerator(self.titles, self.site) self.assertPagelistTitles(gen, self.titles)
def test_CategoryFilterPageGenerator(self): """Test CategoryFilterPageGenerator.""" site = self.site gen = pagegenerators.PagesFromTitlesGenerator(self.titles, site) gen = pagegenerators.CategoryFilterPageGenerator(gen, self.catfilter_list, site) self.assertEqual(len(tuple(gen)), 7)
def prune_list_of_people(people_to_notify): "Removes people who shouldn't be notified from the list." # Define a couple of helper functions... # ... one purely for logging purposes, def print_people_left(what_was_removed): "Print the number of people left after removing something." # This has been made a no-op for performance. pass #print("%d people for %d noms left after removing %s." % # (len(people_to_notify), # len(functools.reduce(operator.add, # people_to_notify.values(), # [])), # what_was_removed)) # ... and another simply to save keystrokes. def user_talk_pages(): "A generator for valid user talk pages." titles = ["User talk:" + name for name in people_to_notify.keys()] for user_talk_page in [ page for page in pagegenerators.PagesFromTitlesGenerator(titles) if page.exists() and not page.isRedirectPage() ]: # First, a sanity check username = user_talk_page.title(withNamespace=False) if username not in people_to_notify: continue # Then yield the page and username yield (user_talk_page, username) # Prune empty entries people_to_notify = {k: v for k, v in people_to_notify.items() if k} print_people_left("empty entries") # Prune talk pages that don't exist user_talk_titles = [ "User talk:" + name for name in people_to_notify.keys() ] titles_gen = pagegenerators.PagesFromTitlesGenerator(user_talk_titles) usernames_without_talk_pages = [ page.title(withNamespace=False) for page in titles_gen ] for each_username in usernames_without_talk_pages: del people_to_notify[each_username] # Prune people I've already notified with open(CONFIG.get("dyknotifier", "ALREADY_NOTIFIED_FILE")) as already_notified_file: try: already_notified_data = json.load(already_notified_file) except ValueError as error: if error.message != "No JSON object could be decoded": raise else: already_notified_data = {} # Since the outer dict in the file is keyed on month string, # smush all the values together to get a dict keyed on username already_notified = {} for month_dict in already_notified_data.values(): for month_username, month_items in month_dict.items(): already_notified[month_username] =\ already_notified.get(month_username, []) + month_items # Now that we've built a dict, filter the list for each username for username, prior_nominations in already_notified.items(): if username not in people_to_notify: continue prior_nominations = [ CONFIG.get("dyknotifier", "NOMINATION_TEMPLATE") + x for x in prior_nominations ] proposed = set(people_to_notify[username]) people_to_notify[username] = list(proposed - set(prior_nominations)) people_to_notify = {k: v for k, v in people_to_notify.items() if v} print_people_left("already-notified people") # Prune user talk pages that link to this nom. for user_talk_page, username in user_talk_pages(): people_to_notify[username] = [ nom for nom in people_to_notify[username] if nom not in user_talk_page.get() ] people_to_notify = {k: v for k, v in people_to_notify.items() if v} print_people_left("linked people") return people_to_notify