def processFile(row, imageDir): metadata = getMetadata(row) title = getTitle(metadata) description = getDescription(metadata) # Check of the title already exists site = wikipedia.getSite('commons', 'commons') page = wikipedia.ImagePage(site, title) if page.exists(): wikipedia.output(u'The file %s already exists. Probably already uploaded by me. Skipping' % title) return False wikipedia.output(u'Preparing upload for %s.' % title) wikipedia.output(description) # Download and dezoomify the image tempfile = imageDir + metadata.get('id') + u'.jpg' try: dezoomify.Dezoomify(url=metadata.get('link'), debug=True, out=tempfile) except IOError as e: #wikipedia.output(e) wikipedia.output(u'Dezoomify failed') return False # Check for dupe. This probably doesn't work, but it doesn't hurt either. duplicates = findDuplicateImages(tempfile) if duplicates: wikipedia.output(u'Found duplicate image at %s' % duplicates.pop()) return False bot = upload.UploadRobot(url=tempfile, description=description, useFilename=title, keepFilename=True, verifyDescription=False) bot.run()
def main(): oldImage = None newImage = None summary = '' always = False loose = False # read command line parameters for arg in pywikibot.handleArgs(): if arg == '-always': always = True elif arg == '-loose': loose = True elif arg.startswith('-summary'): if len(arg) == len('-summary'): summary = pywikibot.input(u'Choose an edit summary: ') else: summary = arg[len('-summary:'):] else: if oldImage: newImage = arg else: oldImage = arg if not oldImage: pywikibot.showHelp('image') else: mysite = pywikibot.getSite() ns = mysite.image_namespace() oldImagePage = pywikibot.ImagePage(mysite, ns + ':' + oldImage) gen = pagegenerators.FileLinksGenerator(oldImagePage) preloadingGen = pagegenerators.PreloadingGenerator(gen) bot = ImageRobot(preloadingGen, oldImage, newImage, summary, always, loose) bot.run()
def run(self): for page in self.generator: if self.interwiki: imagelist = [] for linkedPage in page.interwiki(): imagelist += linkedPage.imagelinks(followRedirects = True) elif page.isImage(): imagePage = pywikibot.ImagePage(page.site(), page.title()) imagelist = [imagePage] else: imagelist = page.imagelinks(followRedirects = True) while len(imagelist)>0: self.showImageList(imagelist) if len(imagelist) == 1: # no need to query the user, only one possibility todo = 0 else: pywikibot.output(u"Give the number of the image to transfer.") todo = pywikibot.input(u"To end uploading, press enter:") if not todo: break todo = int(todo) if todo in range(len(imagelist)): if imagelist[todo].fileIsOnCommons(): pywikibot.output(u'The image is already on Wikimedia Commons.') else: self.transferImage(imagelist[todo], debug = False) # remove the selected image from the list imagelist = imagelist[:todo] + imagelist[todo + 1:] else: pywikibot.output(u'No such image number.')
def UnusedFilesGenerator(number=100, repeat=False, site=None, extension=None): if site is None: site = wikipedia.getSite() for page in site.unusedfiles(number=number, repeat=repeat, extension=extension): yield wikipedia.ImagePage(page.site(), page.title())
def _uploadedImagesOld(self, number=10): """Yield ImagePages from Special:Log&type=upload""" regexp = re.compile( '<li[^>]*>(?P<date>.+?)\s+<a href=.*?>(?P<user>.+?)</a> ' '.* uploaded "<a href=".*?"(?P<new> class="new")? ' 'title="(Image|File):(?P<image>.+?)"\s*>' '(?:.*?<span class="comment">(?P<comment>.*?)</span>)?', re.UNICODE) path = self.site().log_address(number, mode='upload', user=self.name()) html = self.site().getUrl(path) redlink_key = self.site().mediawiki_message('red-link-title') redlink_tail_len = None if redlink_key.startswith('$1 '): redlink_tail_len = len(redlink_key[3:]) for m in regexp.finditer(html): image = m.group('image') deleted = False if m.group('new'): deleted = True if redlink_tail_len: image = image[0:0 - redlink_tail_len] date = m.group('date') comment = m.group('comment') or '' yield pywikibot.ImagePage(self.site(), image), date, comment, deleted
def tagNowCommons(wImage, cImage, timestamp): site = wikipedia.getSite() language = site.language() family = site.family.name imagepage = wikipedia.ImagePage(wikipedia.getSite(), wImage) if not imagepage.exists() or imagepage.isRedirectPage(): return if skips.get(family) and skips.get(family).get(language): localskips = skips.get(family).get(language) else: localskips = skips.get('_default') for template in imagepage.templates(): title = template.replace(u'_', u' ').strip() if title in localskips: return text = imagepage.get() oldtext = text text = u'{{NowCommons|File:%s|date=%s|bot=~~~}}\n' % (cImage.replace( u'_', u' '), timestamp) + text comment = u'File is available on Wikimedia Commons.' wikipedia.showDiff(oldtext, text) try: imagepage.put(text, comment) #print u'put' except wikipedia.LockedPage: return
def processImage(self, page): """ Work on a single image """ if page.exists() and (page.namespace() == 6) and \ (not page.isRedirectPage()): imagepage = pywikibot.ImagePage(page.site(), page.title()) #First do autoskip. if self.doiskip(imagepage): pywikibot.output( u'Skipping %s : Got a template on the skip list.' % page.title()) return False text = imagepage.get() foundMatch = False for (regex, replacement) in licenseTemplates[page.site().language()]: match = re.search(regex, text, flags=re.IGNORECASE) if match: foundMatch = True if not foundMatch: pywikibot.output( u'Skipping %s : No suitable license template was found.' % page.title()) return False self.prefetchQueue.put(self.getNewFields(imagepage))
def categorizeImages(generator, onlyFilter, onlyUncat): ''' Loop over all images in generator and try to categorize them. Get category suggestions from CommonSense. ''' for page in generator: if page.exists() and (page.namespace() == 6) and (not page.isRedirectPage()): imagepage = wikipedia.ImagePage(page.site(), page.title()) wikipedia.output(u'Working on ' + imagepage.title()) if (onlyUncat and not (u'Uncategorized' in imagepage.templates())): wikipedia.output(u'No Uncategorized template found') else: currentCats = getCurrentCats(imagepage) if (onlyFilter): commonshelperCats = [] usage = [] galleries = [] else: (commonshelperCats, usage, galleries) = getCommonshelperCats(imagepage) newcats = applyAllFilters(commonshelperCats + currentCats) if (len(newcats) > 0 and not (set(currentCats) == set(newcats))): for cat in newcats: wikipedia.output(u' Found new cat: ' + cat) saveImagePage(imagepage, newcats, usage, galleries, onlyFilter)
def ImageGenerator(generator): """ Wraps around another generator. Yields the same pages, but as Image objects instead of Page objects. Makes sense only if it is ascertained that only categories are being retrieved. """ for page in generator: yield pywikibot.ImagePage(page.site(), page.title())
def listfiles_generator(): site = wikipedia.getSite() path = filelist_address(site.family, site.lang) ns = site.image_namespace() html = site.getUrl(path) entryR = re.compile('<a href=".+?" title="(?P<title>%s:.+?)">.+?</a>' % ns) for m in entryR.finditer(html): title = m.group('title') image = wikipedia.ImagePage(site, title) yield image
def moveimage(name): #HACK name = str(name) name = name.title() name = wikipedia.ImagePage(wikien, name) if wikipedia.Page(commons, name.title()).exists(): print '%s is already on the commons.' % (name.title()) ncd(name) return uploadres = upload(name) if uploadres == False: return False ncd(name)
def uploadedImages(self, number=10): if not self.site().has_api() or self.site().versionnumber() < 11: for c in self._uploadedImagesOld(number): yield c return for s in self.site().logpages(number, mode='upload', user=self.name(), dump=True): yield wikipedia.ImagePage( self.site(), s['title']), s['timestamp'], s['comment'], s['pageid'] > 0 return
def main(args): generator = None always = False # Load a lot of default generators genFactory = pagegenerators.GeneratorFactory() for arg in pywikibot.handleArgs(): genFactory.handleArg(arg) generator = genFactory.getCombinedGenerator() if not generator: raise add_text.NoEnoughData( 'You have to specify the generator you want to use for the script!' ) pregenerator = pagegenerators.PreloadingGenerator(generator) for page in pregenerator: if page.exists() and (page.namespace() == 6) and \ (not page.isRedirectPage()): imagepage = pywikibot.ImagePage(page.site(), page.title()) foundNowCommons = False for template in imagepage.templates(): #FIXME: Move the templates list to a lib. if template in pywikibot.translate(imagepage.site(), nowCommons): foundNowCommons = True if foundNowCommons: pywikibot.output( u'The file %s is already tagged with NowCommons' % imagepage.title()) else: imagehash = imagepage.getHash() commons = pywikibot.getSite(u'commons', u'commons') duplicates = commons.getFilesFromAnHash(imagehash) if duplicates: duplicate = duplicates.pop() pywikibot.output(u'Found duplicate image at %s' % duplicate) comment = i18n.twtranslate( imagepage.site(), 'commons-file-now-available', { 'localfile': imagepage.titleWithoutNamespace(), 'commonsfile': duplicate }) template = pywikibot.translate(imagepage.site(), nowCommonsTemplate) newtext = imagepage.get() + template % (duplicate, ) pywikibot.showDiff(imagepage.get(), newtext) imagepage.put(newtext, comment)
def main(args): ''' Main loop. ''' site = wikipedia.getSite(u'commons', u'commons') wikipedia.setSite(site) conn = None cursor = None (conn, cursor) = geograph_lib.connectDatabase() conn2 = None cursor2 = None (conn2, cursor2) = geograph_lib.connectDatabase2('sql-s2.toolserver.org', u'u_multichill_commons_categories_p') conn3 = None cursor3 = None (conn3, cursor3) = geograph_lib.connectDatabase2('commonswiki-p.db.toolserver.org', u'commonswiki_p') generator = None genFactory = pagegenerators.GeneratorFactory() for arg in wikipedia.handleArgs(): genFactory.handleArg(arg) generator = genFactory.getCombinedGenerator() if generator: for page in generator: if page.exists() and page.namespace()==6 and not page.isRedirectPage(): wikipedia.output(page.title()) id = getGeographId(page) if id: geograph_lib.categorizeImage(page, id, cursor, cursor2) else: topics = getTopics(cursor) for (topic,) in topics: images = getImagesWithTopic(cursor3, topic) for (imageName, id) in images: try: page = wikipedia.ImagePage(wikipedia.getSite(), u'File:' + imageName) if page.exists() and page.namespace()==6 and not page.isRedirectPage(): wikipedia.output(page.title()) geograph_lib.categorizeImage(page, id, cursor, cursor2) except UnicodeDecodeError: print "UnicodeDecodeError, can't find the source. yah! :-(" pass
def getImagelinks(self, page, min=0, step=50, sort=""): q = """ SELECT il_to FROM %s.imagelinks WHERE il_from=( SELECT page_id FROM %s.page WHERE page_title=%%s AND page_namespace=%%s) """ % ( (page.site().dbName(), ) * 2) q += sort for row in self._generate( q, min, step, (page.titleWithoutNamespace(True).encode('utf-8'), page.namespace())): yield wikipedia.ImagePage( page.site(), page.site().image_namespace() + ":" + row['il_to'].decode('utf-8'), page.site())
def tagNowCommons(page): imagepage = pywikibot.ImagePage(page.site(), page.title()) site = page.site() language = site.language() family = site.family.name if not imagepage.fileIsOnCommons(): if family in skips and language in skips[family]: localskips = skips[family][language] else: localskips = skips['_default'] for template in imagepage.templates(): #FIXME: Move the templates list to a lib. if template in localskips: pywikibot.output( u'The file %s is already tagged with NowCommons' % imagepage.title()) return imagehash = imagepage.getHash() commons = pywikibot.getSite(u'commons', u'commons') duplicates = commons.getFilesFromAnHash(imagehash) if duplicates: duplicate = duplicates.pop() pywikibot.output(u'Found duplicate image at %s' % duplicate) comment = i18n.twtranslate( imagepage.site(), 'commons-file-now-available', { 'localfile': imagepage.title(withNamespace=False), 'commonsfile': duplicate }) template = pywikibot.translate(imagepage.site(), nowCommonsTemplate) newtext = imagepage.get() + template % (duplicate, ) pywikibot.showDiff(imagepage.get(), newtext) try: imagepage.put(newtext, comment) except pywikibot.LockedPage: return
def uploadedImages(self, number=10): """ Yield tuples describing files uploaded by this user. Each tuple is composed of a pywikibot.Page, the timestamp comment (unicode) and a bool (always False...). Pages returned are not guaranteed to be unique. @param total: limit result to this number of pages @type total: int """ if self.isAnonymous(): raise StopIteration if not self.site().has_api() or self.site().versionnumber() < 11: for c in self._uploadedImagesOld(number): yield c return for item in self.site().logpages(number, mode='upload', user=self.username, dump=True): yield pywikibot.ImagePage(self.site(), item['title']), \ item['timestamp'], item['comment'], item['pageid'] > 0 return
def main(): wikipedia.setSite(wikipedia.getSite(u'commons', u'commons')) generator = None genFactory = pagegenerators.GeneratorFactory() target = u'/Users/hay/tmp/wlm/' for arg in wikipedia.handleArgs(): if arg.startswith('-target:'): target = arg[len('-target:'):] else: genFactory.handleArg(arg) generator = genFactory.getCombinedGenerator() if generator: # Get a preloading generator with only images pgenerator = pagegenerators.PreloadingGenerator( pagegenerators.NamespaceFilterPageGenerator(generator, [6])) for page in pgenerator: imagepage = wikipedia.ImagePage(page.site(), page.title()) downloadFile(imagepage, target)
def handleArg(self, arg): """Parse one argument at a time. If it is recognized as an argument that specifies a generator, a generator is created and added to the accumulation list, and the function returns true. Otherwise, it returns false, so that caller can try parsing the argument. Call getCombinedGenerator() after all arguments have been parsed to get the final output generator. """ site = wikipedia.getSite() gen = None if arg.startswith('-filelinks'): fileLinksPageTitle = arg[11:] if not fileLinksPageTitle: fileLinksPageTitle = wikipedia.input( u'Links to which image page should be processed?') if fileLinksPageTitle.startswith(site.namespace(6) + ":"): fileLinksPage = wikipedia.ImagePage(site, fileLinksPageTitle) else: fileLinksPage = wikipedia.ImagePage( site, 'Image:' + fileLinksPageTitle) gen = FileLinksGenerator(fileLinksPage) elif arg.startswith('-unusedfiles'): if len(arg) == 12: gen = UnusedFilesGenerator() else: gen = UnusedFilesGenerator(number=int(arg[13:])) elif arg.startswith('-unwatched'): if len(arg) == 10: gen = UnwatchedPagesPageGenerator() else: gen = UnwatchedPagesPageGenerator(number=int(arg[11:])) elif arg.startswith('-usercontribs'): gen = UserContributionsGenerator(arg[14:]) elif arg.startswith('-withoutinterwiki'): if len(arg) == 17: gen = WithoutInterwikiPageGenerator() else: gen = WithoutInterwikiPageGenerator(number=int(arg[18:])) elif arg.startswith('-interwiki'): title = arg[11:] if not title: title = wikipedia.input(u'Which page should be processed?') page = wikipedia.Page(site, title) gen = InterwikiPageGenerator(page) elif arg.startswith('-randomredirect'): if len(arg) == 15: gen = RandomRedirectPageGenerator() else: gen = RandomRedirectPageGenerator(number=int(arg[16:])) elif arg.startswith('-random'): if len(arg) == 7: gen = RandomPageGenerator() else: gen = RandomPageGenerator(number=int(arg[8:])) elif arg.startswith('-recentchanges'): if len(arg) == 14: gen = RecentchangesPageGenerator() else: gen = RecentchangesPageGenerator(number=int(arg[15:])) elif arg.startswith('-file'): textfilename = arg[6:] if not textfilename: textfilename = wikipedia.input( u'Please enter the local file name:') gen = TextfilePageGenerator(textfilename) elif arg.startswith('-namespace'): if len(arg) == len('-namespace'): self.namespaces.append( wikipedia.input(u'What namespace are you filtering on?')) else: self.namespaces.extend(arg[len('-namespace:'):].split(",")) return True elif arg.startswith('-catr'): gen = self.getCategoryGen(arg, len('-catr'), recurse=True) elif arg.startswith('-category'): gen = self.getCategoryGen(arg, len('-category')) elif arg.startswith('-cat'): gen = self.getCategoryGen(arg, len('-cat')) elif arg.startswith('-subcatsr'): gen = self.setSubCategoriesGen(arg, 9, recurse=True) elif arg.startswith('-subcats'): gen = self.setSubCategoriesGen(arg, 8) # This parameter is deprecated, catr should be used instead. elif arg.startswith('-subcat'): gen = self.getCategoryGen(arg, 7, recurse=True) elif arg.startswith('-page'): if len(arg) == len('-page'): gen = [ wikipedia.Page( site, wikipedia.input(u'What page do you want to use?')) ] else: gen = [wikipedia.Page(site, arg[len('-page:'):])] elif arg.startswith('-uncatfiles'): gen = UnCategorizedImageGenerator() elif arg.startswith('-uncatcat'): gen = UnCategorizedCategoryGenerator() elif arg.startswith('-uncat'): gen = UnCategorizedPageGenerator() elif arg.startswith('-ref'): referredPageTitle = arg[5:] if not referredPageTitle: referredPageTitle = wikipedia.input( u'Links to which page should be processed?') referredPage = wikipedia.Page(site, referredPageTitle) gen = ReferringPageGenerator(referredPage) elif arg.startswith('-links'): linkingPageTitle = arg[7:] if not linkingPageTitle: linkingPageTitle = wikipedia.input( u'Links from which page should be processed?') linkingPage = wikipedia.Page(site, linkingPageTitle) gen = LinkedPageGenerator(linkingPage) elif arg.startswith('-weblink'): url = arg[9:] if not url: url = wikipedia.input( u'Pages with which weblink should be processed?') gen = LinksearchPageGenerator(url) elif arg.startswith('-transcludes'): transclusionPageTitle = arg[len('-transcludes:'):] if not transclusionPageTitle: transclusionPageTitle = wikipedia.input( u'Pages that transclude which page should be processed?') transclusionPage = wikipedia.Page( site, "%s:%s" % (site.namespace(10), transclusionPageTitle)) gen = ReferringPageGenerator(transclusionPage, onlyTemplateInclusion=True) elif arg.startswith('-gorandom'): for firstPage in RandomPageGenerator(number=1): firstPageTitle = firstPage.title() namespace = wikipedia.Page(site, firstPageTitle).namespace() firstPageTitle = wikipedia.Page( site, firstPageTitle).titleWithoutNamespace() gen = AllpagesPageGenerator(firstPageTitle, namespace, includeredirects=False) elif arg.startswith('-start'): if arg.startswith('-startxml'): wikipedia.output(u'-startxml : wrong parameter') sys.exit() firstPageTitle = arg[7:] if not firstPageTitle: firstPageTitle = wikipedia.input( u'At which page do you want to start?') namespace = wikipedia.Page(site, firstPageTitle).namespace() firstPageTitle = wikipedia.Page( site, firstPageTitle).titleWithoutNamespace() gen = AllpagesPageGenerator(firstPageTitle, namespace, includeredirects=False) elif arg.startswith('-prefixindex'): prefix = arg[13:] namespace = None if not prefix: prefix = wikipedia.input( u'What page names are you looking for?') gen = PrefixingPageGenerator(prefix=prefix) elif arg.startswith('-newimages'): limit = arg[11:] or wikipedia.input( u'How many images do you want to load?') gen = NewimagesPageGenerator(number=int(limit)) elif arg.startswith('-new'): if len(arg) >= 5: gen = NewpagesPageGenerator(number=int(arg[5:])) else: gen = NewpagesPageGenerator(number=60) elif arg.startswith('-imagelinks'): imagelinkstitle = arg[len('-imagelinks:'):] if not imagelinkstitle: imagelinkstitle = wikipedia.input( u'Images on which page should be processed?') imagelinksPage = wikipedia.Page(site, imagelinkstitle) gen = ImagesPageGenerator(imagelinksPage) elif arg.startswith('-search'): mediawikiQuery = arg[8:] if not mediawikiQuery: mediawikiQuery = wikipedia.input( u'What do you want to search for?') # In order to be useful, all namespaces are required gen = SearchPageGenerator(mediawikiQuery, namespaces=[]) elif arg.startswith('-google'): gen = GoogleSearchPageGenerator(arg[8:]) elif arg.startswith('-titleregex'): if len(arg) == 6: regex = wikipedia.input( u'What page names are you looking for?') else: regex = arg[7:] gen = RegexFilterPageGenerator(site.allpages(), regex) elif arg.startswith('-yahoo'): gen = YahooSearchPageGenerator(arg[7:]) else: pass if gen: self.gens.append(gen) return self.getCombinedGenerator() else: return False
def _parseCategory(self, purge=False, startFrom=None): """ Yields all articles and subcategories that are in this category by API. Set startFrom to a string which is the title of the page to start from. Yielded results are tuples in the form (tag, page) where tag is one of the constants ARTICLE and SUBCATEGORY, and title is the Page or Category object. Note that results of this method need not be unique. This should not be used outside of this module. """ if not self.site().has_api() or self.site().versionnumber() < 11: for tag, page in self._oldParseCategory(purge, startFrom): yield tag, page return currentPageOffset = None params = { 'action': 'query', 'list': 'categorymembers', 'cmtitle': self.title(), 'cmprop': ['title', 'ids', 'sortkey', 'timestamp'], #'': '', } while True: if wikipedia.config.special_page_limit > 500: params['cmlimit'] = 500 else: params['cmlimit'] = wikipedia.config.special_page_limit if currentPageOffset: params['cmcontinue'] = currentPageOffset wikipedia.output( 'Getting [[%s]] list from %s...' % (self.title(), currentPageOffset[:-1])) # cmcontinue last key is '|' elif startFrom: params['cmstartsortkey'] = startFrom wikipedia.output('Getting [[%s]] list starting at %s...' % (self.title(), startFrom)) else: wikipedia.output('Getting [[%s]]...' % self.title()) wikipedia.get_throttle() data = query.GetData(params, self.site()) if 'error' in data: raise RuntimeError("%s" % data['error']) count = 0 for memb in data['query']['categorymembers']: count += 1 # For MediaWiki versions where subcats look like articles if memb['ns'] == 14: yield SUBCATEGORY, Category(self.site(), memb['title'], sortKey=memb['sortkey']) elif memb['ns'] == 6: yield ARTICLE, wikipedia.ImagePage(self.site(), memb['title']) else: yield ARTICLE, wikipedia.Page(self.site(), memb['title'], defaultNamespace=memb['ns']) if count >= params['cmlimit']: break # try to find a link to the next list page if 'query-continue' in data and count < params['cmlimit']: currentPageOffset = data['query-continue']['categorymembers'][ 'cmcontinue'] else: break
def main(args): generator = None #newname = ""; imagepage = None always = False category = u'' # Load a lot of default generators genFactory = pagegenerators.GeneratorFactory() for arg in pywikibot.handleArgs(): if arg == '-always': always = True elif arg.startswith('-cc:'): category = arg[len('-cc:'):] else: genFactory.handleArg(arg) generator = genFactory.getCombinedGenerator() if not generator: raise add_text.NoEnoughData( 'You have to specify the generator you want to use for the script!' ) pregenerator = pagegenerators.PreloadingGenerator(generator) for page in pregenerator: skip = False if page.exists() and (page.namespace() == 6) and (not page.isRedirectPage()): imagepage = pywikibot.ImagePage(page.site(), page.title()) #First do autoskip. if doiskip(imagepage.get()): pywikibot.output("Skipping " + page.title()) skip = True else: # The first upload is last in the list. try: username = imagepage.getLatestUploader()[0] except NotImplementedError: #No API, using the page file instead (datetime, username, resolution, size, comment) = imagepage.getFileVersionHistory().pop() if always: newname = imagepage.titleWithoutNamespace() CommonsPage = pywikibot.Page( pywikibot.getSite('commons', 'commons'), u'File:' + newname) if CommonsPage.exists(): skip = True else: while True: # Do the Tkdialog to accept/reject and change te name (newname, skip) = Tkdialog(imagepage.titleWithoutNamespace(), imagepage.get(), username, imagepage.permalink(), imagepage.templates()).getnewname() if skip: pywikibot.output('Skipping this image') break # Did we enter a new name? if len(newname) == 0: #Take the old name newname = imagepage.titleWithoutNamespace() else: newname = newname.decode('utf-8') # Check if the image already exists CommonsPage = pywikibot.Page( pywikibot.getSite('commons', 'commons'), u'File:' + newname) if not CommonsPage.exists(): break else: pywikibot.output( 'Image already exists, pick another name or skip this image' ) # We dont overwrite images, pick another name, go to the start of the loop if not skip: imageTransfer(imagepage, newname, category).start() pywikibot.output(u'Still ' + str(threading.activeCount()) + u' active threads, lets wait') for openthread in threading.enumerate(): if openthread != threading.currentThread(): openthread.join() pywikibot.output(u'All threads are done')
def _parseCategory(self, purge=False, startFrom=None, sortby=None, sortdir=None, endsort=None): """ Yields all articles and subcategories that are in this category by API. Set startFrom to a string which is the title of the page to start from. Yielded results are tuples in the form (tag, page) where tag is one of the constants ARTICLE and SUBCATEGORY, and title is the Page or Category object. Note that results of this method need not be unique. This should not be used outside of this module. """ if not self.site().has_api() or self.site().versionnumber() < 11: for tag, page in self._oldParseCategory(purge, startFrom): yield tag, page return currentPageOffset = None params = { 'action': 'query', 'list': 'categorymembers', 'cmtitle': self.title(), 'cmprop': ['title', 'ids', 'sortkey', 'timestamp'], #'': '', } if self.site().versionnumber() > 16: params['cmprop'].append('sortkeyprefix') if sortby: params['cmsort'] = sortby if sortdir: params['cmdir'] = sortdir while True: if pywikibot.config.special_page_limit > 500: params['cmlimit'] = 500 else: params['cmlimit'] = pywikibot.config.special_page_limit if currentPageOffset: params.update(currentPageOffset) pywikibot.output( 'Getting [[%s]] list from %s...' % (self.title(), "%s=%s" % currentPageOffset.popitem())) else: msg = 'Getting [[%s]] list' % self.title() # category sort keys are uppercase if startFrom: startFrom = startFrom.upper() params['cmstartsortkey'] = startFrom msg += ' starting at %s' % startFrom if endsort: endsort = endsort.upper() params['cmendsortkey'] = endsort msg += ' ending at %s' % endsort pywikibot.output(msg + u'...') pywikibot.get_throttle() data = query.GetData(params, self.site()) if 'error' in data: raise RuntimeError("%s" % data['error']) count = 0 for memb in data['query']['categorymembers']: count += 1 # For MediaWiki versions where subcats look like articles if memb['ns'] == 14: if 'sortkeyprefix' in memb: sortKeyPrefix = memb['sortkeyprefix'] else: sortKeyPrefix = None yield SUBCATEGORY, Category(self.site(), memb['title'], sortKey=memb['sortkey'], sortKeyPrefix=sortKeyPrefix) elif memb['ns'] == 6: yield ARTICLE, pywikibot.ImagePage(self.site(), memb['title']) else: page = pywikibot.Page(self.site(), memb['title'], defaultNamespace=memb['ns']) if 'sortkeyprefix' in memb: page.sortkeyprefix = memb['sortkeyprefix'] else: page.sortkeyprefix = None yield ARTICLE, page if count >= params['cmlimit']: break # try to find a link to the next list page if 'query-continue' in data and count < params['cmlimit']: currentPageOffset = data['query-continue']['categorymembers'] else: break
def _oldParseCategory(self, purge=False, startFrom=None): """Yields all articles and subcategories that are in this category. Set purge to True to instruct MediaWiki not to serve a cached version. Set startFrom to a string which is the title of the page to start from. Yielded results are tuples in the form (tag, page) where tag is one of the constants ARTICLE and SUBCATEGORY, and title is the Page or Category object. Note that results of this method need not be unique. This should not be used outside of this module. """ if self.site().versionnumber() < 4: Rtitle = re.compile('title\s?=\s?\"([^\"]*)\"') elif self.site().versionnumber() < 8: # FIXME seems to parse all links Rtitle = re.compile('/\S*(?: title\s?=\s?)?\"([^\"]*)\"') else: Rtitle = re.compile( '<li>(?:<span.*?>)?<a href=\".*?\"\s?title\s?=\s?\"' '([^\"]*)\"\>\+?[^\<\+]') if self.site().versionnumber() < 8: Rsubcat = None Rimage = None else: Rsubcat = re.compile( 'CategoryTreeLabelCategory\"\s?href=\".+?\">(.+?)</a>') Rimage = re.compile( '<div class\s?=\s?\"thumb\"\sstyle=\"[^\"]*\">' '(?:<div style=\"[^\"]*\">)?<a href=\".*?\"' '(?:\sclass="image")?\stitle\s?=\s?\"([^\"]*)\"') # regular expression matching the "(next 200)" link RLinkToNextPage = re.compile('&from=(.*?)" title="') if startFrom: currentPageOffset = urllib.quote( startFrom.encode(self.site().encoding())) else: currentPageOffset = None while True: path = self.site().get_address(self.urlname()) if purge: path += '&action=purge' if currentPageOffset: path += '&from=' + currentPageOffset pywikibot.output( 'Getting [[%s]] starting at %s...' % (self.title(), pywikibot.url2link(currentPageOffset, self.site(), self.site()))) else: pywikibot.output('Getting [[%s]]...' % self.title()) pywikibot.get_throttle() txt = self.site().getUrl(path) # index where subcategory listing begins if self.site().versionnumber() >= 9: # These IDs were introduced in 1.9 if '<div id="mw-subcategories">' in txt: ibegin = txt.index('<div id="mw-subcategories">') elif '<div id="mw-pages">' in txt: ibegin = txt.index('<div id="mw-pages">') elif '<div id="mw-category-media">' in txt: ibegin = txt.index('<div id="mw-category-media">') else: # No pages return else: # does not work for cats without text ibegin = txt.index('<!-- start content -->') # TODO: This parses category text and may think they are # pages in category! Check for versions before 1.9 # index where article listing ends if '<div class="printfooter">' in txt: iend = txt.index('<div class="printfooter">') elif '<div class="catlinks">' in txt: iend = txt.index('<div class="catlinks">') else: iend = txt.index('<!-- end content -->') txt = txt[ibegin:iend] for title in Rtitle.findall(txt): if title == self.title(): # This is only a link to "previous 200" or "next 200". # Ignore it. pass # For MediaWiki versions where subcats look like articles elif isCatTitle(title, self.site()): yield SUBCATEGORY, Category(self.site(), title) else: yield ARTICLE, pywikibot.Page(self.site(), title) if Rsubcat: # For MediaWiki versions where subcats look differently for titleWithoutNamespace in Rsubcat.findall(txt): title = 'Category:%s' % titleWithoutNamespace yield SUBCATEGORY, Category(self.site(), title) if Rimage: # For MediaWiki versions where images work through galleries for title in Rimage.findall(txt): # In some MediaWiki versions, the titles contain the # namespace, but they don't in other (newer) versions. Use # the ImagePage's defaultNamespace feature to get everything # correctly. yield ARTICLE, pywikibot.ImagePage(self.site(), title) # try to find a link to the next list page matchObj = RLinkToNextPage.search(txt) if matchObj: currentPageOffset = matchObj.group(1) else: break
lcontent = pywikibot.translate(site, content) category = pywikibot.translate(site, cat) putmsg = pywikibot.translate(site, msg) #from non-free copyright tag category get all EDPtemplate templatecat = catlib.Category(site, category) templatelist = templatecat.articlesList() #from References of EDP template get all non-free images for tempalte in templatelist: images = [page for page in tempalte.getReferences() if page.isImage()] for image in images: imagetitle = image.title() imagepage = pywikibot.ImagePage(site, imagetitle) #from imagepage get all usingPages of non-articles pimages = [ puseimage for puseimage in imagepage.usingPages() if puseimage.namespace() <> 0 ] for pimage in pimages: ns = pimage.namespace() pimagetitle = pimage.title() c = u'\nfond an used the image [[%s]] in [[%s]]: ' \ % (imagetitle, pimagetitle) text = pimage.get() try: re.search('<!--(.*?)' + imagetitle + '(.*?)-->', text, re.I).group(0)
def main(): site = wikipedia.getSite(u'commons', u'commons') #Array of images to work on images = [] imageTitleA = u'' imageTitleB = u'' familyA = u'' familyB = u'' langA = u'' langB = u'' imagePageA = None imagePageB = None for arg in wikipedia.handleArgs(): if arg.startswith('-familyA:'): if len(arg) == len('-familyA:'): familyA = wikipedia.input(u'What family do you want to use?') else: familyA = arg[len('-familyA:'):] elif arg.startswith('-familyB:'): if len(arg) == len('-familyB:'): familyB = wikipedia.input(u'What family do you want to use?') else: familyB = arg[len('-familyB:'):] elif arg.startswith('-langA:'): if len(arg) == len('-langA:'): langA = wikipedia.input(u'What language do you want to use?') else: langA = arg[len('-langA:'):] elif arg.startswith('-langB:'): if len(arg) == len('-langB:'): langB = wikipedia.input(u'What language do you want to use?') else: langB = arg[len('langB:'):] else: images.append(arg) if not (len(images) == 2): raise wikipedia.Error, 'require two images to work on.' else: imageTitleA = images[0] imageTitleB = images[1] if not (imageTitleA == u''): if not (langA == u''): if not (familyA == u''): imagePageA = wikipedia.ImagePage( wikipedia.getSite(langA, familyA), imageTitleA) else: imagePageA = wikipedia.ImagePage( wikipedia.getSite(langA, u'wikipedia'), imageTitleA) else: imagePageA = wikipedia.ImagePage( wikipedia.getSite(u'commons', u'commons'), imageTitleA) if not (imageTitleB == u''): if not (langB == u''): if not (familyB == u''): imagePageB = wikipedia.ImagePage( wikipedia.getSite(langB, familyB), imageTitleB) else: imagePageB = wikipedia.ImagePage( wikipedia.getSite(langB, u'wikipedia'), imageTitleB) else: imagePageB = wikipedia.ImagePage( wikipedia.getSite(u'commons', u'commons'), imageTitleB) if imagePageA and imagePageB: matchImagePages(imagePageA, imagePageB)
def run(self): commons = pywikibot.getSite('commons', 'commons') comment = pywikibot.translate(self.site, nowCommonsMessage) for page in self.getPageGenerator(): if use_hash: # Page -> Has the namespace | commons image -> Not images_list = page # 0 -> local image, 1 -> commons image page = pywikibot.Page(self.site, images_list[0]) else: # If use_hash is true, we have already print this before, no need # Show the title of the page we're working on. # Highlight the title in purple. pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title()) try: localImagePage = pywikibot.ImagePage(self.site, page.title()) if localImagePage.fileIsOnCommons(): pywikibot.output(u'File is already on Commons.') continue md5 = localImagePage.getFileMd5Sum() if use_hash: filenameOnCommons = images_list[1] else: filenameOnCommons = self.findFilenameOnCommons( localImagePage) if not filenameOnCommons and not use_hash: pywikibot.output(u'NowCommons template not found.') continue commonsImagePage = pywikibot.ImagePage(commons, 'Image:%s' % filenameOnCommons) if localImagePage.title(withNamespace=False) == \ commonsImagePage.title(withNamespace=False) and use_hash: pywikibot.output( u'The local and the commons images have the same name') if localImagePage.title(withNamespace=False) != \ commonsImagePage.title(withNamespace=False): usingPages = list(localImagePage.usingPages()) if usingPages and usingPages != [localImagePage]: pywikibot.output( u'\"\03{lightred}%s\03{default}\" is still used in %i pages.' % (localImagePage.title(withNamespace=False), len(usingPages))) if replace == True: pywikibot.output( u'Replacing \"\03{lightred}%s\03{default}\" by \"\03{lightgreen}%s\03{default}\".' % (localImagePage.title(withNamespace=False), commonsImagePage.title(withNamespace=False))) oImageRobot = image.ImageRobot( pg.FileLinksGenerator(localImagePage), localImagePage.title(withNamespace=False), commonsImagePage.title(withNamespace=False), '', replacealways, replaceloose) oImageRobot.run() # If the image is used with the urlname the # previous function won't work if len(list(pywikibot.ImagePage(self.site, page.title()).usingPages())) > 0 and \ replaceloose: oImageRobot = image.ImageRobot( pg.FileLinksGenerator( localImagePage), self.urlname( localImagePage.title( withNamespace=False)), commonsImagePage.title( withNamespace=False), '', replacealways, replaceloose) oImageRobot.run() # refresh because we want the updated list usingPages = len(list(pywikibot.ImagePage( self.site, page.title()).usingPages())) if usingPages > 0 and use_hash: # just an enter pywikibot.input( u'There are still %s pages with this image, confirm the manual removal from them please.' % usingPages) else: pywikibot.output(u'Please change them manually.') continue else: pywikibot.output( u'No page is using \"\03{lightgreen}%s\03{default}\" anymore.' % localImagePage.title(withNamespace=False)) commonsText = commonsImagePage.get() if replaceonly == False: if md5 == commonsImagePage.getFileMd5Sum(): pywikibot.output( u'The image is identical to the one on Commons.') if len(localImagePage.getFileVersionHistory()) > 1 and not use_hash: pywikibot.output( u"This image has a version history. Please delete it manually after making sure that the old versions are not worth keeping.""") continue if autonomous == False: pywikibot.output( u'\n\n>>>> Description on \03{lightpurple}%s\03{default} <<<<\n' % page.title()) pywikibot.output(localImagePage.get()) pywikibot.output( u'\n\n>>>> Description on \03{lightpurple}%s\03{default} <<<<\n' % commonsImagePage.title()) pywikibot.output(commonsText) choice = pywikibot.inputChoice( u'Does the description on Commons contain all required source and license\n' u'information?', ['yes', 'no'], ['y', 'N'], 'N') if choice.lower() in ['y', 'yes']: localImagePage.delete( comment + ' [[:commons:Image:%s]]' % filenameOnCommons, prompt = False) else: localImagePage.delete( comment + ' [[:commons:Image:%s]]' % filenameOnCommons, prompt = False) else: pywikibot.output( u'The image is not identical to the one on Commons.') except (pywikibot.NoPage, pywikibot.IsRedirectPage), e: pywikibot.output(u'%s' % e[0]) continue
def handleArg(self, arg): """Parse one argument at a time. If it is recognized as an argument that specifies a generator, a generator is created and added to the accumulation list, and the function returns true. Otherwise, it returns false, so that caller can try parsing the argument. Call getCombinedGenerator() after all arguments have been parsed to get the final output generator. """ site = pywikibot.getSite() gen = None if arg.startswith('-filelinks'): fileLinksPageTitle = arg[11:] if not fileLinksPageTitle: fileLinksPageTitle = i18n.input( 'pywikibot-enter-file-links-processing') if fileLinksPageTitle.startswith(site.namespace(6) + ":"): fileLinksPage = pywikibot.ImagePage(site, fileLinksPageTitle) else: fileLinksPage = pywikibot.ImagePage( site, 'Image:' + fileLinksPageTitle) gen = FileLinksGenerator(fileLinksPage) elif arg.startswith('-unusedfiles'): if len(arg) == 12: gen = UnusedFilesGenerator() else: gen = UnusedFilesGenerator(number=int(arg[13:])) elif arg.startswith('-unwatched'): if len(arg) == 10: gen = UnwatchedPagesPageGenerator() else: gen = UnwatchedPagesPageGenerator(number=int(arg[11:])) elif arg.startswith('-usercontribs'): args = arg[14:].split(';') number = None try: number = int(args[1]) except: number = 250 gen = UserContributionsGenerator(args[0], number, namespaces=self.getNamespaces) elif arg.startswith('-withoutinterwiki'): if len(arg) == 17: gen = WithoutInterwikiPageGenerator() else: gen = WithoutInterwikiPageGenerator(number=int(arg[18:])) elif arg.startswith('-interwiki'): title = arg[11:] if not title: title = i18n.input('pywikibot-enter-page-processing') page = pywikibot.Page(site, title) gen = InterwikiPageGenerator(page) elif arg.startswith('-randomredirect'): if len(arg) == 15: gen = RandomRedirectPageGenerator() else: gen = RandomRedirectPageGenerator(number=int(arg[16:])) elif arg.startswith('-random'): if len(arg) == 7: gen = RandomPageGenerator() else: gen = RandomPageGenerator(number=int(arg[8:])) elif arg.startswith('-recentchanges'): if len(arg) >= 15: gen = RecentchangesPageGenerator(number=int(arg[15:]), nobots=False) else: gen = RecentchangesPageGenerator(nobots=False) gen = DuplicateFilterPageGenerator(gen) elif arg.startswith('-rc-nobots'): if len(arg) >= 11: gen = RecentchangesPageGenerator(number=int(arg[11:]), nobots=True) else: gen = RecentchangesPageGenerator(nobots=True) gen = DuplicateFilterPageGenerator(gen) elif arg.startswith('-file'): textfilename = arg[6:] if not textfilename: textfilename = pywikibot.input( u'Please enter the local file name:') gen = TextfilePageGenerator(textfilename) elif arg.startswith('-namespace'): if len(arg) == len('-namespace'): self.namespaces.append( pywikibot.input(u'What namespace are you filtering on?')) else: self.namespaces.extend(arg[len('-namespace:'):].split(",")) return True elif arg.startswith('-ns'): if len(arg) == len('-ns'): self.namespaces.append( pywikibot.input(u'What namespace are you filtering on?')) else: self.namespaces.extend(arg[len('-ns:'):].split(",")) return True elif arg.startswith('-limit'): if len(arg) == len('-limit'): self.limit = int(pywikibot.input("What is the limit value?")) else: self.limit = int(arg[len('-limit:'):]) return True elif arg.startswith('-catr'): gen = self.getCategoryGen(arg, len('-catr'), recurse=True) elif arg.startswith('-category'): gen = self.getCategoryGen(arg, len('-category')) elif arg.startswith('-cat'): gen = self.getCategoryGen(arg, len('-cat')) elif arg.startswith('-subcatsr'): gen = self.setSubCategoriesGen(arg, 9, recurse=True) elif arg.startswith('-subcats'): gen = self.setSubCategoriesGen(arg, 8) elif arg.startswith('-page'): if len(arg) == len('-page'): gen = [ pywikibot.Page( site, pywikibot.input(u'What page do you want to use?')) ] else: gen = [pywikibot.Page(site, arg[len('-page:'):])] elif arg.startswith('-uncatfiles'): gen = UnCategorizedImageGenerator() elif arg.startswith('-uncatcat'): gen = UnCategorizedCategoryGenerator() elif arg.startswith('-uncattemplates'): gen = UnCategorizedTemplatesGenerator() elif arg.startswith('-uncat'): gen = UnCategorizedPageGenerator() elif arg.startswith('-ref'): referredPageTitle = arg[5:] if not referredPageTitle: referredPageTitle = pywikibot.input( u'Links to which page should be processed?') referredPage = pywikibot.Page(site, referredPageTitle) gen = ReferringPageGenerator(referredPage) elif arg.startswith('-links'): linkingPageTitle = arg[7:] if not linkingPageTitle: linkingPageTitle = pywikibot.input( u'Links from which page should be processed?') linkingPage = pywikibot.Page(site, linkingPageTitle) gen = LinkedPageGenerator(linkingPage) elif arg.startswith('-weblink'): url = arg[9:] if not url: url = pywikibot.input( u'Pages with which weblink should be processed?') gen = LinksearchPageGenerator(url) elif arg.startswith('-transcludes'): transclusionPageTitle = arg[len('-transcludes:'):] if not transclusionPageTitle: transclusionPageTitle = pywikibot.input( u'Pages that transclude which page should be processed?') transclusionPage = pywikibot.Page( site, "%s:%s" % (site.namespace(10), transclusionPageTitle)) gen = ReferringPageGenerator(transclusionPage, onlyTemplateInclusion=True) elif arg.startswith('-gorandom'): for firstPage in RandomPageGenerator(number=1): firstPageTitle = firstPage.title() namespace = pywikibot.Page(site, firstPageTitle).namespace() firstPageTitle = pywikibot.Page( site, firstPageTitle).title(withNamespace=False) gen = AllpagesPageGenerator(firstPageTitle, namespace, includeredirects=False) elif arg.startswith('-start'): firstPageTitle = arg[7:] if not firstPageTitle: firstPageTitle = pywikibot.input( u'At which page do you want to start?') if self.namespaces != []: namespace = self.namespaces[0] else: namespace = pywikibot.Page(site, firstPageTitle).namespace() firstPageTitle = pywikibot.Page( site, firstPageTitle).title(withNamespace=False) gen = AllpagesPageGenerator(firstPageTitle, namespace, includeredirects=False) elif arg.startswith('-redirectonly'): firstPageTitle = arg[14:] if not firstPageTitle: firstPageTitle = pywikibot.input( u'At which page do you want to start?') namespace = pywikibot.Page(site, firstPageTitle).namespace() firstPageTitle = pywikibot.Page( site, firstPageTitle).title(withNamespace=False) gen = AllpagesPageGenerator(firstPageTitle, namespace, includeredirects='only') elif arg.startswith('-prefixindex'): prefix = arg[13:] namespace = None if not prefix: prefix = pywikibot.input( u'What page names are you looking for?') gen = PrefixingPageGenerator(prefix=prefix) elif arg.startswith('-newimages'): limit = arg[11:] or pywikibot.input( u'How many images do you want to load?') gen = NewimagesPageGenerator(number=int(limit)) elif arg == ('-new') or arg.startswith('-new:'): if len(arg) >= 5: gen = NewpagesPageGenerator(number=int(arg[5:])) else: gen = NewpagesPageGenerator(number=60) elif arg.startswith('-imagelinks'): imagelinkstitle = arg[len('-imagelinks:'):] if not imagelinkstitle: imagelinkstitle = pywikibot.input( u'Images on which page should be processed?') imagelinksPage = pywikibot.Page(site, imagelinkstitle) gen = ImagesPageGenerator(imagelinksPage) elif arg.startswith('-search'): mediawikiQuery = arg[8:] if not mediawikiQuery: mediawikiQuery = pywikibot.input( u'What do you want to search for?') gen = SearchPageGenerator(mediawikiQuery, number=None, namespaces=self.getNamespaces) elif arg.startswith('-titleregex'): if len(arg) == 11: regex = pywikibot.input( u'What page names are you looking for?') else: regex = arg[12:] gen = RegexFilterPageGenerator(site.allpages(), [regex]) elif arg.startswith('-yahoo'): gen = YahooSearchPageGenerator(arg[7:]) elif arg.startswith('-'): mode, log, user = arg.partition('log') # exclude -log, -nolog if log == 'log' and mode not in ['-', '-no']: number = 500 if not user: user = None else: try: number = int(user[1:]) user = None except ValueError: user = user[1:] if user: result = user.split(';') user = result[0] try: number = int(result[1]) except: pass gen = LogpagesPageGenerator(number, mode[1:], user) if gen: self.gens.append(gen) return self.getCombinedGenerator() else: return False
def doImage(self, image): r = re.compile(u'\|', re.UNICODE | re.DOTALL) data = re.split(r, image) imageName = data[0] newImageName = data[0] r = re.compile(u'^\s*$', re.UNICODE | re.DOTALL) if len(data) >= 2 and not re.match(r, data[1]): newImageName = data[1] sourceWiki = u'anime' if len(data) >= 3: sourceWiki = data[2] exclusionMode = u'normal' if len(data) >= 4: exclusionMode = data[3] exclusionInfo = u'' if len(data) >= 5: exclusionInfo = data[4] sourceSite = None outputSites = [] sourceImage = None sourcePage = None wikipedia.output(u'Doing Image %s' % imageName) for site in self.siteList: if site.family.name == sourceWiki: sourceSite = site if exclusionMode == u'normal': outputSites.append(site) elif exclusionMode == u'include': r = re.compile(u',', re.UNICODE | re.DOTALL) includes = re.split(r, exclusionInfo) if site.family.name in includes: outputSites.append(site) elif exclusionMode == u'exclude': r = re.compile(u',', re.UNICODE | re.DOTALL) excludes = re.split(r, exclusionInfo) if site.family.name not in includes: outputSites.append(site) else: wikipedia.output(u'Unknown exclusion mode. Skiping %s.' % imageName) return False if sourceSite == None: wikipedia.output(u'No source site found. Skiping %s.' % imageName) return False try: sourceDescriptionPage = wikipedia.Page(sourceSite, imageName, None, 6) #6=Image Namespace sourceImagePage = wikipedia.ImagePage( sourceSite, sourceDescriptionPage.title()) except wikipedia.NoPage: wikipedia.output(u'No source page found. Skiping %s.' % imageName) return False sourceURL = sourceImagePage.fileUrl() if '://' not in sourceURL: sourceURL = u'http://%s%s' % (sourceSite.hostname(), sourceURL) # Get file contents uo = wikipedia.MyURLopener() sourceFile = uo.open(sourceURL, "rb") wikipedia.output(u'Reading file %s' % sourceURL) sourceContents = sourceFile.read() if sourceContents.find( "The requested URL was not found on this server.") != -1: wikipedia.output("Couldn't download the image. Skiping.") return False sourceFile.close() #Setup Description Page pageDescription = sourceDescriptionPage.get() r = re.compile(u'== Summary ==\n?') if re.search(r, pageDescription): pageDescription = re.sub(r, u'', pageDescription) mirrorText = u'{{networkMirror|%s|%s}}' % (imageName, sourceSite.family.name) comm = re.compile(u'({{commons(\|[^{}]*)?}})', re.IGNORECASE) if re.search(comm, pageDescription): pageDescription = re.sub(comm, u'\\1\n%s' % mirrorText, pageDescription) else: pageDescription = u'%s%s' % (mirrorText, pageDescription) pageDescription = u'== Summary ==\n%s' % pageDescription for site in outputSites: if sourceSite.family.name != site.family.name or imageName != newImageName: doUpload = False doDescription = False try: siteDescriptionPage = wikipedia.Page( site, newImageName, None, 6) #6=Image Namespace siteImagePage = wikipedia.ImagePage( site, siteDescriptionPage.title()) siteURL = siteImagePage.fileUrl() if '://' not in siteURL: siteURL = u'http://%s%s' % (site.hostname(), siteURL) uo2 = wikipedia.MyURLopener() siteFile = uo2.open(siteURL, "rb") wikipedia.output(u'Reading file %s' % siteURL) siteContents = siteFile.read() if sourceContents.find( "The requested URL was not found on this server." ) != -1: wikipedia.output( "Couldn't download the image at new location.") doUpload = True break siteFile.close() if siteContents != sourceContents: doUpload = True if siteDescriptionPage.get() != pageDescription: doDescription = True except wikipedia.NoPage: doUpload = True doDescription = True if doUpload: bot = upload.UploadRobot(url=sourceURL, useFilename=newImageName, keepFilename=True, verifyDescription=False, description=msg['en'], targetSite=site, urlEncoding=sourceSite.encoding()) bot.run() if doDescription: siteDescriptionPage.put(pageDescription)