예제 #1
0
 def test_intersect_newpages_and_recentchanges(self):
     """Test intersection betweem newpages and recentchanges."""
     site = self.get_site()
     self.assertEqualItertools([
         pagegenerators.NewpagesPageGenerator(site=site, total=50),
         pagegenerators.RecentChangesPageGenerator(site=site, total=200)
     ])
예제 #2
0
def GetWiktionaryData():
    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)
    api = tweepy.API(auth)
    print(api.me().name)
    siteWiktionnaire = pywikibot.Site('fr', u'wiktionary')
    elapse = True
    while True:
        try:
            if elapse == True:

                str_end = siteWiktionnaire.getcurrenttimestamp()
                end = int(str_end)
                print(end)

                start = end - 300
                print(start)

                for page in pagegenerators.RecentChangesPageGenerator(
                        end, start):
                    print(page.title())
                    ''' main '''
                    if str(page.title()).find(":") == -1:
                        ''' Créations '''
                        try:
                            if (page.exists() == True):
                                timestamprev = page.oldest_revision.timestamp
                                rev = str(timestamprev.year)
                                if int(timestamprev.month) < 10:
                                    rev = rev + "0"
                                rev = rev + str(timestamprev.month)
                                if int(timestamprev.day) < 10:
                                    rev = rev + "0"
                                rev = rev + str(timestamprev.day)
                                if int(timestamprev.hour) < 10:
                                    rev = rev + "0"
                                rev = rev + str(timestamprev.hour)
                                if int(timestamprev.minute) < 10:
                                    rev = rev + "0"
                                rev = rev + str(timestamprev.minute)
                                if int(timestamprev.second) < 10:
                                    rev = rev + "0"
                                rev = rev + str(timestamprev.second)

                                print(str(rev))
                                print(str(start))
                                if int(rev) > start:
                                    titre = str(page.title())
                                    url = str(page.full_url())
                                    msg = "[[ " + titre + " ]] " + url + " "
                                    print(msg)
                                    api.update_status(msg)
                        except ValueError:
                            print("Oops!")

            str_mnt = siteWiktionnaire.getcurrenttimestamp()
            mnt = int(str_mnt) - 300
            print(mnt)

            if mnt > end:
                elapse = True
            else:
                elapse = False
        except ValueError:
            print("Oops!")
예제 #3
0
import re
import pywikibot
from pywikibot import pagegenerators

site = pywikibot.Site()

gen = pagegenerators.RecentChangesPageGenerator(total=500,
                                                topOnly=True,
                                                namespaces=0,
                                                showRedirects=False)


def getEnglishTokenV2(string):
    with open("items_english.txt", encoding="utf16") as english_items:
        for line in english_items:
            line = line.rstrip()

            if string in line:
                regex = re.compile("\"(.[^\"]+)\"[\t\s]+\"(.[^\"]+)\"")
                matches = re.finditer(regex, line)

                for match in matches:
                    if match and match.group(2) == string:
                        if "DOTA_Item_" in match.group(
                                1
                        ) or "DOTA_Tooltip_Ability_item_" in match.group(1):
                            return match.group(1)
                        else:
                            break
    english_items.close()
예제 #4
0
def main(*args):
    """
    Handle arguments using standard pywikibot args handling and then runs the bot main functionality.

    """
    global ignore_sites, DEBUG_MODE
    report_page = None
    generator = None
    talk_template = None
    page_of_pages = None
    days = None
    namespace = 0
    live_check = False
    genFactory = pagegenerators.GeneratorFactory()
    report_log = report_logger.ReportLogger()
    for arg in pywikibot.handle_args(args):
        site = pywikibot.Site()
        if arg.startswith('-talkTemplate:'):
            talk_template = arg[len("-talkTemplate:"):]
        elif arg.startswith('-pagesLinkedFrom:'):
            page_of_pages = arg[len("-pagesLinkedFrom:"):]
        elif arg.startswith('-WikiEd'):
            fill_wikiEd_pages(site)  # init wikiEd pages collection
        elif arg.startswith('-live:'):
            live_check = True
        elif arg.startswith('-recentchanges:'):
            days = float(arg[len("-recentchanges:"):])
        elif arg.startswith('-api_recentchanges:'):
            source = pagegenerators.RecentChangesPageGenerator(
                namespaces=[0],
                showBot=False,
                total=int(arg[len("-api_recentchanges:"):]),
                changetype=['edit'],
                showRedirects=False)
            generator = [(p, p.latestRevision(), p.previousRevision())
                         for p in source]
        elif arg.startswith('-report:'):
            report_page = arg[len("-report:"):]
        elif arg.startswith('-debug_mode'):
            DEBUG_MODE = True
            print('DEBUG MODE!')
        elif arg.startswith('-reportlogger'):
            report_log = report_logger.DbReportLogger(pywikibot.Site())
            print('using report logger')
        elif arg.startswith('-blacklist:'):
            ignore_sites = parse_blacklist(arg[len("-blacklist:"):])
        elif genFactory.handleArg(arg):
            # general page generators for checking the latest revision
            gen = genFactory.getCombinedGenerator()
            gen = pagegenerators.PreloadingGenerator(gen)
            generator = [(p, p.latestRevision(), 0) for p in gen if p.exists()]

    if (not generator) and (talk_template or page_of_pages or days):
        if not days:
            days = MAX_AGE
        generator = db_changes_generator(site, talk_template, page_of_pages,
                                         days, namespace)
    if generator is None and not live_check:
        pywikibot.showHelp()
    else:
        if live_check:
            log('running live')
            bot = PlagiaBotLive(pywikibot.Site(),
                                report_page,
                                report_log=report_log)
        else:
            log('running non live')
            bot = PlagiaBot(pywikibot.Site(),
                            generator,
                            report_page,
                            report_log=report_log)
        bot.run()
예제 #5
0
파일: surbot.py 프로젝트: Surkal/Surbot
def main(*args):
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-simulate",
        help="option for test purposes",
        default=False,
        action="store_true"
    )
    parser.add_argument(
        "-site",
        help="Site on which the bot is working",
        default='fr',
        type=str
    )
    parser.add_argument(
        "-summary",
        help="Summary",
        default="Mise en forme",
        type=str
    )
    parser.add_argument(
        "-r",
        help="goes through the pages modified by humans in the last 24 hours",
        default=24,
        action="store_true"
    )
    parser.add_argument(
        "--recent",
        help="goes through the pages modified by humans in the last X hours",
        default=0,
        type=float
    )
    parser.add_argument(
        "-users",
        default=None
    )
    parser.add_argument(
        "-cat",
        help="Category to crawl",
        type=str
    )
    parser.add_argument(
        "-pages",
        help="List of pages, separated by '/'",
        type=str
    )
    parser.add_argument(
        "-total",
        help="Maximum number of pages",
        type=int,
        default=None
    )
    parser.add_argument(
        '-start',
        type=str,
        default=None
    )
    args = parser.parse_args()

    pywikibot.config.simulate = args.simulate

    summary = args.summary
    site = pywikibot.Site(args.site, fam='wiktionary')

    if args.r or args.recent:
        users = None
        if args.users:
            users = list(args.users.split(','))

        t = args.recent or 24
        end = datetime.now() - timedelta(hours=t)
        gen = pagegenerators.RecentChangesPageGenerator(site=site, namespaces=0,
                                                        showBot=False, end=end,
                                                        topOnly=True, user=users)

    if args.cat:
        cat = pywikibot.Category(site, f'Catégorie:{args.cat}')
        gen = pagegenerators.CategorizedPageGenerator(cat, namespaces=0,
                                                      total=args.total,
                                                      start=args.start)

    if args.pages:
        gen = [pywikibot.Page(site, x) for x in args.pages.split('/')]

    bot = MyBot(site, gen, summary, langs=('sv', 'no', 'da', 'nn', 'nb', 'fi'))
    bot.run()
예제 #6
0
import shelve
from pywikibot import pagegenerators

site = pywikibot.Site()
mypage = pywikibot.Page(site, u"User:Bellezzasolo Bot/Pings")
count = 0
runtracker = shelve.open("RunTrack", writeback=True)
if 'lastrun' not in runtracker:
    runtracker['lastrun'] = datetime.datetime.utcnow() - datetime.timedelta(
        minutes=30)
if runtracker['lastrun'] < datetime.datetime.utcnow() - datetime.timedelta(
        hours=24):
    runtracker['lastrun'] = datetime.datetime.utcnow() - datetime.timedelta(
        hours=24)
while True:
    gen = pagegenerators.RecentChangesPageGenerator(namespaces=[1,3,5,7,9,11,13,15,101,109,\
                                                                119,829])
    currun = datetime.datetime.utcnow()
    print("last run: {0}".format(runtracker['lastrun']))
    for page in gen:
        try:
            if page.editTime() < runtracker['lastrun']:
                #OK, we've checked this before or it is too old
                print("Run completed, sleeping")
                time.sleep(60)
                runtracker['lastrun'] = currun
                break
        except:
            #Problem with page, may be deleted
            continue
        for template in page.templatesWithParams():
            #print(template[0]._link.canonical_title())