Python MySQLPageGenerator 예제들, pagegenerators.MySQLPageGenerator Python 예제들

예제 #1

0

파일 보기

def main():
    #page generator
    gen = None
    # This temporary array is used to read the page title if one single
    # page to work on is specified by the arguments.
    pageTitle = []
    # Which namespaces should be processed?
    # default to [] which means all namespaces will be processed
    namespaces = []
    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    genFactory = pagegenerators.GeneratorFactory()
    always = False

    for arg in pywikibot.handleArgs():
        if arg.startswith('-xml'):
            if len(arg) == 4:
                xmlFilename = pywikibot.input(
                    u'Please enter the XML dump\'s filename:')
            else:
                xmlFilename = arg[5:]
            gen = XmlDumpSelflinkPageGenerator(xmlFilename)
        elif arg == '-sql':
            # NOT WORKING YET
            query = """
SELECT page_namespace, page_title
FROM page JOIN pagelinks JOIN text ON (page_id = pl_from AND page_id = old_id)
WHERE pl_title = page_title
AND pl_namespace = page_namespace
AND page_namespace = 0
AND (old_text LIKE concat('%[[', page_title, ']]%')
    OR old_text LIKE concat('%[[', page_title, '|%'))
LIMIT 100"""
            gen = pagegenerators.MySQLPageGenerator(query)
        elif arg.startswith('-namespace:'):
            try:
                namespaces.append(int(arg[11:]))
            except ValueError:
                namespaces.append(arg[11:])
        elif arg == '-always':
            always = True
        else:
            if not genFactory.handleArg(arg):
                pageTitle.append(arg)

    if pageTitle:
        page = pywikibot.Page(pywikibot.getSite(), ' '.join(pageTitle))
        gen = iter([page])
    if not gen:
        gen = genFactory.getCombinedGenerator()
    if not gen:
        pywikibot.showHelp('selflink')
    else:
        if namespaces != []:
            gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
        preloadingGen = pagegenerators.PreloadingGenerator(gen)
        bot = SelflinkBot(preloadingGen, always)
        bot.run()

예제 #2

0

파일 보기

파일: object_location_rijksmonumenten.py 프로젝트: xqt/toollabs

def getRijksmonumentWithoutLocation():
    query = """SELECT page_namespace, page_title FROM page
JOIN templatelinks ON page_id=tl_from
WHERE page_namespace=6 AND page_is_redirect=0
AND tl_namespace=10 AND tl_title='Rijksmonument'
AND NOT EXISTS(
SELECT * FROM categorylinks
WHERE page_id=cl_from
AND cl_to='Media_with_locations')"""

    result = pagegenerators.MySQLPageGenerator(query)
    return result

예제 #3

0

파일 보기

def main():
    summary = None
    generator = None
    always = False
    i = 0
    amount = 50
    query = "SELECT page_namespace, page_title FROM page JOIN templatelinks ON page_id=tl_from WHERE page_namespace=0 AND page_is_redirect=0 AND tl_title LIKE 'Co%rdinaten'"
    while True:
        thisquery = query + " LIMIT " + str(i) + " , " + str(amount)
        generator = pagegenerators.MySQLPageGenerator(thisquery)
        i = i + amount

        for page in generator:
            workOnPage(page)

예제 #4

0

파일 보기

파일: intersect_categories.py 프로젝트: xqt/toollabs

def getImages(categoryA, categoryB):
    '''
    Get images both in categoryA and in categoryB
    '''
    result = None

    query = u"""SELECT DISTINCT file.page_namespace, file.page_title FROM page AS file
		JOIN categorylinks AS cat0A ON file.page_id=cat0A.cl_from
		JOIN categorylinks AS cat0B ON file.page_id=cat0B.cl_from

		JOIN page AS pcat0B ON cat0B.cl_to=pcat0B.page_title
		JOIN categorylinks AS cat1B ON pcat0B.page_id=cat1B.cl_from

		JOIN page AS pcat1B ON cat1B.cl_to=pcat1B.page_title
		JOIN categorylinks AS cat2B ON pcat1B.page_id=cat2B.cl_from

		JOIN page AS pcat2B ON cat2B.cl_to=pcat2B.page_title
		JOIN categorylinks AS cat3B ON pcat2B.page_id=cat3B.cl_from

                JOIN page AS pcat3B ON cat3B.cl_to=pcat3B.page_title
                JOIN categorylinks AS cat4B ON pcat3B.page_id=cat4B.cl_from

		WHERE file.page_namespace=6 AND file.page_is_redirect=0 AND
		pcat0B.page_namespace=14 AND pcat0B.page_is_redirect=0 AND
		pcat1B.page_namespace=14 AND pcat1B.page_is_redirect=0 AND
		pcat2B.page_namespace=14 AND pcat2B.page_is_redirect=0 AND
		pcat3B.page_namespace=14 AND pcat3B.page_is_redirect=0 AND
		cat0A.cl_to='%s' AND (
		cat0B.cl_to='%s' OR
		cat1B.cl_to='%s' OR
		cat2B.cl_to='%s' OR
		cat3B.cl_to='%s' OR
		cat4B.cl_to='%s')"""

    catA = categoryA.replace(u' ', u'_').replace(u'\'', u'\\\'')
    catB = categoryB.replace(u' ', u'_').replace(u'\'', u'\\\'')

    print query % (catA, catB, catB, catB, catB, catB)

    result = pagegenerators.MySQLPageGenerator(
        query % (catA, catB, catB, catB, catB, catB))
    return result

예제 #5

0

파일 보기

def main():
    quietMode = False  # use -quiet to get less output
    # if the -file argument is used, page titles are stored in this array.
    # otherwise it will only contain one page.
    articles = []
    # if -file is not used, this temporary array is used to read the page title.
    page_title = []

    # Which namespaces should be processed?
    # default to [] which means all namespaces will be processed
    namespaces = []

    xmlfilename = None
    gen = None

    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    genFactory = pagegenerators.GeneratorFactory()

    for arg in pywikibot.handleArgs():
        if arg.startswith('-xml'):
            if len(arg) == 4:
                xmlfilename = pywikibot.input(
                    u'Please enter the XML dump\'s filename:')
            else:
                xmlfilename = arg[5:]
            gen = TableXmlDumpPageGenerator(xmlfilename)
        elif arg == '-sql':
            query = u"""
SELECT page_namespace, page_title
FROM page JOIN text ON (page_id = old_id)
WHERE old_text LIKE '%<table%'
LIMIT 200"""
            gen = pagegenerators.MySQLPageGenerator(query)
        elif arg.startswith('-namespace:'):
            try:
                namespaces.append(int(arg[11:]))
            except ValueError:
                namespaces.append(arg[11:])
        elif arg.startswith('-skip:'):
            articles = articles[articles.index(arg[6:]):]
        elif arg.startswith('-auto'):
            config.table2wikiAskOnlyWarnings = True
            config.table2wikiSkipWarnings = True
            print "Automatic mode!\n"
        elif arg.startswith('-quiet'):
            quietMode = True
        else:
            if not genFactory.handleArg(arg):
                page_title.append(arg)

    # if the page is given as a command line argument,
    # connect the title's parts with spaces
    if page_title != []:
        page_title = ' '.join(page_title)
        page = pywikibot.Page(pywikibot.getSite(), page_title)
        gen = iter([page])

    if not gen:
        gen = genFactory.getCombinedGenerator()

    if gen:
        if namespaces != []:
            gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
        preloadingGen = pagegenerators.PreloadingGenerator(gen)
        bot = Table2WikiRobot(preloadingGen, quietMode)
        bot.run()
    else:
        pywikibot.showHelp('table2wiki')

예제 #6

0

파일 보기

파일: replace.py 프로젝트: h4ck3rm1k3/rootstriker-fec-bot

def main(*args):
    add_cat = None
    gen = None
    # summary message
    summary_commandline = False
    # Array which will collect commandline parameters.
    # First element is original text, second element is replacement text.
    commandline_replacements = []
    # A list of 2-tuples of original text and replacement text.
    replacements = []
    # Don't edit pages which contain certain texts.
    exceptions = {
        'title': [],
        'text-contains': [],
        'inside': [],
        'inside-tags': [],
        'require-title': [],  # using a seperate requirements dict needs some
    }  # major refactoring of code.

    # Should the elements of 'replacements' and 'exceptions' be interpreted
    # as regular expressions?
    regex = False
    # Predefined fixes from dictionary 'fixes' (see above).
    fix = None
    # the dump's path, either absolute or relative, which will be used
    # if -xml flag is present
    xmlFilename = None
    useSql = False
    PageTitles = []
    # will become True when the user presses a ('yes to all') or uses the
    # -always flag.
    acceptall = False
    # Will become True if the user inputs the commandline parameter -nocase
    caseInsensitive = False
    # Will become True if the user inputs the commandline parameter -dotall
    dotall = False
    # Will become True if the user inputs the commandline parameter -multiline
    multiline = False
    # Do all hits when they overlap
    allowoverlap = False
    # Do not recurse replacement
    recursive = False
    # This is the maximum number of pages to load per query
    maxquerysize = 60
    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    genFactory = pagegenerators.GeneratorFactory()
    # Load default summary message.
    # BUG WARNING: This is probably incompatible with the -lang parameter.
    editSummary = pywikibot.translate(pywikibot.getSite(), msg)
    # Between a regex and another (using -fix) sleep some time (not to waste
    # too much CPU
    sleep = None
    # Do not save the page titles, rather work on wiki
    titlefile = None
    filename = None
    # If we save, primary behaviour is append rather then new file
    append = True

    # Read commandline parameters.
    for arg in pywikibot.handleArgs(*args):
        if arg == '-regex':
            regex = True
        elif arg.startswith('-xmlstart'):
            if len(arg) == 9:
                xmlStart = pywikibot.input(
                    u'Please enter the dumped article to start with:')
            else:
                xmlStart = arg[10:]
        elif arg.startswith('-xml'):
            if len(arg) == 4:
                xmlFilename = pywikibot.input(
                    u'Please enter the XML dump\'s filename:')
            else:
                xmlFilename = arg[5:]
        elif arg == '-sql':
            useSql = True
        elif arg.startswith('-page'):
            if len(arg) == 5:
                PageTitles.append(
                    pywikibot.input(u'Which page do you want to change?'))
            else:
                PageTitles.append(arg[6:])
        elif arg.startswith('-savenew'):
            append = False
            if len(arg) == 8:
                filename = pywikibot.input(
                    u'Please enter the filename to save the titles \n(will be deleted if exists):'
                )
            else:
                filename = arg[9:]
        elif arg.startswith('-save'):
            if len(arg) == 5:
                filename = pywikibot.input(
                    u'Please enter the filename to save the titles:')
            else:
                filename = arg[6:]
        elif arg.startswith('-excepttitle:'):
            exceptions['title'].append(arg[13:])
        elif arg.startswith('-requiretitle:'):
            exceptions['require-title'].append(arg[14:])
        elif arg.startswith('-excepttext:'):
            exceptions['text-contains'].append(arg[12:])
        elif arg.startswith('-exceptinside:'):
            exceptions['inside'].append(arg[14:])
        elif arg.startswith('-exceptinsidetag:'):
            exceptions['inside-tags'].append(arg[17:])
        elif arg.startswith('-fix:'):
            fix = arg[5:]
        elif arg.startswith('-sleep:'):
            sleep = float(arg[7:])
        elif arg == '-always':
            acceptall = True
        elif arg == '-recursive':
            recursive = True
        elif arg == '-nocase':
            caseInsensitive = True
        elif arg == '-dotall':
            dotall = True
        elif arg == '-multiline':
            multiline = True
        elif arg.startswith('-addcat:'):
            add_cat = arg[8:]
        elif arg.startswith('-summary:'):
            editSummary = arg[9:]
            summary_commandline = True
        elif arg.startswith('-allowoverlap'):
            allowoverlap = True
        elif arg.startswith('-query:'):
            maxquerysize = int(arg[7:])
        else:
            if not genFactory.handleArg(arg):
                commandline_replacements.append(arg)

    if (len(commandline_replacements) % 2):
        raise pywikibot.Error, 'require even number of replacements.'
    elif (len(commandline_replacements) == 2 and fix is None):
        replacements.append(
            (commandline_replacements[0], commandline_replacements[1]))
        if not summary_commandline:
            editSummary = pywikibot.translate(pywikibot.getSite(), msg) % (
                ' (-%s +%s)' %
                (commandline_replacements[0], commandline_replacements[1]))
    elif (len(commandline_replacements) > 1):
        if (fix is None):
            for i in xrange(0, len(commandline_replacements), 2):
                replacements.append((commandline_replacements[i],
                                     commandline_replacements[i + 1]))
            if not summary_commandline:
                pairs = [(commandline_replacements[i],
                          commandline_replacements[i + 1])
                         for i in range(0, len(commandline_replacements), 2)]
                replacementsDescription = '(%s)' % ', '.join(
                    [('-' + pair[0] + ' +' + pair[1]) for pair in pairs])
                editSummary = pywikibot.translate(pywikibot.getSite(), msg ) \
                              % replacementsDescription
        else:
            raise pywikibot.Error(
                'Specifying -fix with replacements is undefined')
    elif fix is None:
        old = pywikibot.input(
            u'Please enter the text that should be replaced:')
        new = pywikibot.input(u'Please enter the new text:')
        change = '(-' + old + ' +' + new
        replacements.append((old, new))
        while True:
            old = pywikibot.input(
                u'Please enter another text that should be replaced, or press Enter to start:'
            )
            if old == '':
                change = change + ')'
                break
            new = pywikibot.input(u'Please enter the new text:')
            change = change + ' & -' + old + ' +' + new
            replacements.append((old, new))
        if not summary_commandline:
            default_summary_message = pywikibot.translate(
                pywikibot.getSite(), msg) % change
            pywikibot.output(u'The summary message will default to: %s' %
                             default_summary_message)
            summary_message = pywikibot.input(
                u'Press Enter to use this default message, or enter a description of the\nchanges your bot will make:'
            )
            if summary_message == '':
                summary_message = default_summary_message
            editSummary = summary_message

    else:
        # Perform one of the predefined actions.
        try:
            fix = fixes.fixes[fix]
        except KeyError:
            pywikibot.output(u'Available predefined fixes are: %s' %
                             fixes.fixes.keys())
            return
        if "regex" in fix:
            regex = fix['regex']
        if "msg" in fix:
            editSummary = pywikibot.translate(pywikibot.getSite(), fix['msg'])
        if "exceptions" in fix:
            exceptions = fix['exceptions']
        if "nocase" in fix:
            caseInsensitive = fix['nocase']
        replacements = fix['replacements']

    #Set the regular expression flags
    flags = re.UNICODE
    if caseInsensitive:
        flags = flags | re.IGNORECASE
    if dotall:
        flags = flags | re.DOTALL
    if multiline:
        flags = flags | re.MULTILINE

    # Pre-compile all regular expressions here to save time later
    for i in range(len(replacements)):
        old, new = replacements[i]
        if not regex:
            old = re.escape(old)
        oldR = re.compile(old, flags)
        replacements[i] = oldR, new

    for exceptionCategory in [
            'title', 'require-title', 'text-contains', 'inside'
    ]:
        if exceptionCategory in exceptions:
            patterns = exceptions[exceptionCategory]
            if not regex:
                patterns = [re.escape(pattern) for pattern in patterns]
            patterns = [re.compile(pattern, flags) for pattern in patterns]
            exceptions[exceptionCategory] = patterns

    if xmlFilename:
        try:
            xmlStart
        except NameError:
            xmlStart = None
        gen = XmlDumpReplacePageGenerator(xmlFilename, xmlStart, replacements,
                                          exceptions)
    elif useSql:
        whereClause = 'WHERE (%s)' % ' OR '.join([
            "old_text RLIKE '%s'" % prepareRegexForMySQL(old.pattern)
            for (old, new) in replacements
        ])
        if exceptions:
            exceptClause = 'AND NOT (%s)' % ' OR '.join([
                "old_text RLIKE '%s'" % prepareRegexForMySQL(exc.pattern)
                for exc in exceptions
            ])
        else:
            exceptClause = ''
        query = u"""
SELECT page_namespace, page_title
FROM page
JOIN text ON (page_id = old_id)
%s
%s
LIMIT 200""" % (whereClause, exceptClause)
        gen = pagegenerators.MySQLPageGenerator(query)
    elif PageTitles:
        pages = [
            pywikibot.Page(pywikibot.getSite(), PageTitle)
            for PageTitle in PageTitles
        ]
        gen = iter(pages)

    gen = genFactory.getCombinedGenerator(gen)
    if not gen:
        # syntax error, show help text from the top of this file
        pywikibot.showHelp('replace')
        return
    if xmlFilename:
        # XML parsing can be quite slow, so use smaller batches and
        # longer lookahead.
        preloadingGen = pagegenerators.PreloadingGenerator(gen,
                                                           pageNumber=20,
                                                           lookahead=100)
    else:
        preloadingGen = pagegenerators.PreloadingGenerator(
            gen, pageNumber=maxquerysize)

    #Finally we open the file for page titles or set article to None
    if filename:
        try:
            #This opens in strict error mode, that means bot will stop
            #on encoding errors with ValueError.
            #See http://docs.python.org/library/codecs.html#codecs.open
            titlefile = codecs.open(filename,
                                    encoding='utf-8',
                                    mode=(lambda x: x and 'a' or 'w')(append))
        except IOError:
            pywikibot.output("%s cannot be opened for writing." % filename)
            return
    bot = ReplaceRobot(preloadingGen, replacements, exceptions, acceptall,
                       allowoverlap, recursive, add_cat, sleep, editSummary,
                       titlefile)
    try:
        bot.run()
    finally:
        if titlefile:
            #Just for the spirit of programming (it was flushed)
            titlefile.close()