def test_intersect_newpages_and_recentchanges(self): """Test intersection betweem newpages and recentchanges.""" site = self.get_site() self.assertEqualItertools([ pagegenerators.NewpagesPageGenerator(site=site, total=50), pagegenerators.RecentChangesPageGenerator(site=site, total=200) ])
def GetWiktionaryData(): auth = tweepy.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) api = tweepy.API(auth) print(api.me().name) siteWiktionnaire = pywikibot.Site('fr', u'wiktionary') elapse = True while True: try: if elapse == True: str_end = siteWiktionnaire.getcurrenttimestamp() end = int(str_end) print(end) start = end - 300 print(start) for page in pagegenerators.RecentChangesPageGenerator( end, start): print(page.title()) ''' main ''' if str(page.title()).find(":") == -1: ''' Créations ''' try: if (page.exists() == True): timestamprev = page.oldest_revision.timestamp rev = str(timestamprev.year) if int(timestamprev.month) < 10: rev = rev + "0" rev = rev + str(timestamprev.month) if int(timestamprev.day) < 10: rev = rev + "0" rev = rev + str(timestamprev.day) if int(timestamprev.hour) < 10: rev = rev + "0" rev = rev + str(timestamprev.hour) if int(timestamprev.minute) < 10: rev = rev + "0" rev = rev + str(timestamprev.minute) if int(timestamprev.second) < 10: rev = rev + "0" rev = rev + str(timestamprev.second) print(str(rev)) print(str(start)) if int(rev) > start: titre = str(page.title()) url = str(page.full_url()) msg = "[[ " + titre + " ]] " + url + " " print(msg) api.update_status(msg) except ValueError: print("Oops!") str_mnt = siteWiktionnaire.getcurrenttimestamp() mnt = int(str_mnt) - 300 print(mnt) if mnt > end: elapse = True else: elapse = False except ValueError: print("Oops!")
import re import pywikibot from pywikibot import pagegenerators site = pywikibot.Site() gen = pagegenerators.RecentChangesPageGenerator(total=500, topOnly=True, namespaces=0, showRedirects=False) def getEnglishTokenV2(string): with open("items_english.txt", encoding="utf16") as english_items: for line in english_items: line = line.rstrip() if string in line: regex = re.compile("\"(.[^\"]+)\"[\t\s]+\"(.[^\"]+)\"") matches = re.finditer(regex, line) for match in matches: if match and match.group(2) == string: if "DOTA_Item_" in match.group( 1 ) or "DOTA_Tooltip_Ability_item_" in match.group(1): return match.group(1) else: break english_items.close()
def main(*args): """ Handle arguments using standard pywikibot args handling and then runs the bot main functionality. """ global ignore_sites, DEBUG_MODE report_page = None generator = None talk_template = None page_of_pages = None days = None namespace = 0 live_check = False genFactory = pagegenerators.GeneratorFactory() report_log = report_logger.ReportLogger() for arg in pywikibot.handle_args(args): site = pywikibot.Site() if arg.startswith('-talkTemplate:'): talk_template = arg[len("-talkTemplate:"):] elif arg.startswith('-pagesLinkedFrom:'): page_of_pages = arg[len("-pagesLinkedFrom:"):] elif arg.startswith('-WikiEd'): fill_wikiEd_pages(site) # init wikiEd pages collection elif arg.startswith('-live:'): live_check = True elif arg.startswith('-recentchanges:'): days = float(arg[len("-recentchanges:"):]) elif arg.startswith('-api_recentchanges:'): source = pagegenerators.RecentChangesPageGenerator( namespaces=[0], showBot=False, total=int(arg[len("-api_recentchanges:"):]), changetype=['edit'], showRedirects=False) generator = [(p, p.latestRevision(), p.previousRevision()) for p in source] elif arg.startswith('-report:'): report_page = arg[len("-report:"):] elif arg.startswith('-debug_mode'): DEBUG_MODE = True print('DEBUG MODE!') elif arg.startswith('-reportlogger'): report_log = report_logger.DbReportLogger(pywikibot.Site()) print('using report logger') elif arg.startswith('-blacklist:'): ignore_sites = parse_blacklist(arg[len("-blacklist:"):]) elif genFactory.handleArg(arg): # general page generators for checking the latest revision gen = genFactory.getCombinedGenerator() gen = pagegenerators.PreloadingGenerator(gen) generator = [(p, p.latestRevision(), 0) for p in gen if p.exists()] if (not generator) and (talk_template or page_of_pages or days): if not days: days = MAX_AGE generator = db_changes_generator(site, talk_template, page_of_pages, days, namespace) if generator is None and not live_check: pywikibot.showHelp() else: if live_check: log('running live') bot = PlagiaBotLive(pywikibot.Site(), report_page, report_log=report_log) else: log('running non live') bot = PlagiaBot(pywikibot.Site(), generator, report_page, report_log=report_log) bot.run()
def main(*args): parser = argparse.ArgumentParser() parser.add_argument( "-simulate", help="option for test purposes", default=False, action="store_true" ) parser.add_argument( "-site", help="Site on which the bot is working", default='fr', type=str ) parser.add_argument( "-summary", help="Summary", default="Mise en forme", type=str ) parser.add_argument( "-r", help="goes through the pages modified by humans in the last 24 hours", default=24, action="store_true" ) parser.add_argument( "--recent", help="goes through the pages modified by humans in the last X hours", default=0, type=float ) parser.add_argument( "-users", default=None ) parser.add_argument( "-cat", help="Category to crawl", type=str ) parser.add_argument( "-pages", help="List of pages, separated by '/'", type=str ) parser.add_argument( "-total", help="Maximum number of pages", type=int, default=None ) parser.add_argument( '-start', type=str, default=None ) args = parser.parse_args() pywikibot.config.simulate = args.simulate summary = args.summary site = pywikibot.Site(args.site, fam='wiktionary') if args.r or args.recent: users = None if args.users: users = list(args.users.split(',')) t = args.recent or 24 end = datetime.now() - timedelta(hours=t) gen = pagegenerators.RecentChangesPageGenerator(site=site, namespaces=0, showBot=False, end=end, topOnly=True, user=users) if args.cat: cat = pywikibot.Category(site, f'Catégorie:{args.cat}') gen = pagegenerators.CategorizedPageGenerator(cat, namespaces=0, total=args.total, start=args.start) if args.pages: gen = [pywikibot.Page(site, x) for x in args.pages.split('/')] bot = MyBot(site, gen, summary, langs=('sv', 'no', 'da', 'nn', 'nb', 'fi')) bot.run()
import shelve from pywikibot import pagegenerators site = pywikibot.Site() mypage = pywikibot.Page(site, u"User:Bellezzasolo Bot/Pings") count = 0 runtracker = shelve.open("RunTrack", writeback=True) if 'lastrun' not in runtracker: runtracker['lastrun'] = datetime.datetime.utcnow() - datetime.timedelta( minutes=30) if runtracker['lastrun'] < datetime.datetime.utcnow() - datetime.timedelta( hours=24): runtracker['lastrun'] = datetime.datetime.utcnow() - datetime.timedelta( hours=24) while True: gen = pagegenerators.RecentChangesPageGenerator(namespaces=[1,3,5,7,9,11,13,15,101,109,\ 119,829]) currun = datetime.datetime.utcnow() print("last run: {0}".format(runtracker['lastrun'])) for page in gen: try: if page.editTime() < runtracker['lastrun']: #OK, we've checked this before or it is too old print("Run completed, sleeping") time.sleep(60) runtracker['lastrun'] = currun break except: #Problem with page, may be deleted continue for template in page.templatesWithParams(): #print(template[0]._link.canonical_title())