def main(): site = pywikibot.Site('en', 'wikipedia') generator = pagegenerators.PreloadingGenerator( pagegenerators.LinksearchPageGenerator(u'www.artnet.com/artists/', namespaces=[0], site=site)) bot = ArtnetRobot(generator) bot.run()
def file_from_external_link(self, uri): """Identify files from a Nationalmuseum uri. Hits are any files containing a link to the eMuseumPlus uri. @param uri: reference url on nationalmuseum.se @type uri: str @return: matching images @rtype: list """ images = [] uri = uri.split('://')[1] objgen = pagegenerators.LinksearchPageGenerator(uri, namespaces=[6], site=self.commons) for page in objgen: images.append(pywikibot.FilePage(self.commons, page.title())) # I have no clue how the above results in duplicates, but it does so... images = list(set(images)) return images
def main(*args): """ Process command line arguments and perform task. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: str """ spam_external_url = None protocol = 'http' options = {} local_args = pywikibot.handle_args(args) gen_factory = pagegenerators.GeneratorFactory() for arg in local_args: if arg == '-always': options['always'] = True elif arg.startswith('-protocol:'): protocol = arg.partition(':')[2] elif arg.startswith('-summary:'): options['summary'] = arg.partition(':')[2] elif gen_factory.handleArg(arg): continue else: spam_external_url = arg if not spam_external_url: pywikibot.bot.suggest_help(missing_parameters=['spam site']) return link_search = pagegenerators.LinksearchPageGenerator(spam_external_url, protocol=protocol) generator = gen_factory.getCombinedGenerator(gen=link_search) generator = pagegenerators.PreloadingGenerator(generator) bot = SpamRemoveBot(generator, spam_external_url, **options) bot.run()