def main(args): ''' Main loop. ''' site = wikipedia.getSite(u'commons', u'commons') wikipedia.setSite(site) conn = None cursor = None (conn, cursor) = connectDatabase() conn2 = None cursor2 = None (conn2, cursor2) = connectDatabase2('sql-s2.toolserver.org', u'u_multichill_commons_categories_p') conn3 = None cursor3 = None (conn3, cursor3) = connectDatabase2('commonswiki-p.db.toolserver.org', u'commonswiki_p') topics = getTopics(cursor) images = {} for (topic, ) in topics: images[topic] = getImagesWithTopicCount(cursor3, topic) print images[topic] outputStats(topics, images) '''
def main(): ''' The main loop ''' wikipedia.setSite(wikipedia.getSite(u'nl', u'wikipedia')) conn = None cursor = None (conn, cursor) = connectDatabase() items = getNumberOfItems(cursor) images = getNumberOfImages(cursor) addresses = {} names = {} pages = list(set(items.keys() + images.keys())) pages.sort() for key in pages: print key page = wikipedia.Page(wikipedia.getSite(), key) text = page.get() addresses[key] = getNumberOfAddresses(text) names[key] = getNumberOfNames(text) #print key + u' - ' + str(addresses[key]) + u' - ' + str(names[key]) updateStats(pages, items, addresses, names, images)
def main(): wikipedia.setSite(wikipedia.getSite(u'commons', u'commons')) # Connect database, we need that conn = None cursor = None (conn, cursor) = connectDatabase() generator = None genFactory = pagegenerators.GeneratorFactory() for arg in wikipedia.handleArgs(): genFactory.handleArg(arg) generator = genFactory.getCombinedGenerator() if not generator: generator = getRijksmonumentWithoutLocation() # Get a preloading generator with only images pgenerator = pagegenerators.PreloadingGenerator( pagegenerators.NamespaceFilterPageGenerator(generator, [6])) for page in pgenerator: locationTemplate = locateImage(page, conn, cursor) if locationTemplate: addLocation(page, locationTemplate)
def main(): ''' The main loop ''' wikipedia.setSite(wikipedia.getSite(u'commons', u'commons')) conn = None cursor = None (conn, cursor) = connectDatabase() imagerecat.initLists() generator = None; genFactory = pagegenerators.GeneratorFactory() mark = True for arg in wikipedia.handleArgs(): if arg.startswith('-dontmark'): mark = False elif arg.startswith('-page'): if len(arg) == 5: generator = [wikipedia.Page(wikipedia.getSite(), wikipedia.input(u'What page do you want to use?'))] else: generator = [wikipedia.Page(wikipedia.getSite(), arg[6:])] elif arg.startswith('-yesterday'): generator = [wikipedia.Page(wikipedia.getSite(), u'Category:Media_needing_categories_as_of_' + getYesterday())] else: generator = genFactory.handleArg(arg) if generator: for page in generator: if((page.namespace() == 14) and (page.title().startswith(u'Category:Media needing categories as of'))): wikipedia.output(u'Working on ' + page.title()) for (image, gals, cats) in getImagesToCategorize(cursor, page.titleWithoutNamespace()): categorizeImage(image, gals, imagerecat.applyAllFilters(cats)) if (mark): categoriesChecked(page.title())
def main(args): ''' Main loop. Get a generator and options. Work on all images in the generator. ''' generator = None onlyFilter = False onlyUncat = False genFactory = pagegenerators.GeneratorFactory() global search_wikis global hint_wiki site = pywikibot.getSite(u'commons', u'commons') pywikibot.setSite(site) for arg in pywikibot.handleArgs(): if arg == '-onlyfilter': onlyFilter = True elif arg == '-onlyuncat': onlyUncat = True elif arg.startswith('-hint:'): hint_wiki = arg [len('-hint:'):] elif arg.startswith('-onlyhint'): search_wikis = arg [len('-onlyhint:'):] else: genFactory.handleArg(arg) generator = genFactory.getCombinedGenerator() if not generator: generator = pagegenerators.CategorizedPageGenerator( catlib.Category(site, u'Category:Media needing categories'), recurse=True) initLists() categorizeImages(generator, onlyFilter, onlyUncat) pywikibot.output(u'All done')
def main(args): ''' Main loop. ''' site = wikipedia.getSite(u'commons', u'commons') wikipedia.setSite(site) sourcedir = u'/mnt/user-store/OS_OpenData/1_250_000_Scale_Raster/data/' destinationdirjpg = u'/mnt/user-store/OS_OpenData/1_250_000_Scale_Raster/outputjpg/' destinationdirtif = u'/mnt/user-store/OS_OpenData/1_250_000_Scale_Raster/outputtif/' basefilename = u'Ordnance_Survey_1-250000_-_' sourcename = u'1:250 000 Scale Colour Raster' scale = u'250.000' squares = [] for sourcefilename in glob.glob(sourcedir + u"*.tif"): square = sourcefilename.replace(sourcedir, u'').replace(u'.tif', u'') squares.append(square) for square in squares: print square OSlib.processSquare(square, squares, scale, sourcedir, sourcename, basefilename, u'jpg', destinationdirjpg) OSlib.processSquare(square, squares, scale, sourcedir, sourcename, basefilename, u'tif', destinationdirtif) '''
def main(): ''' The main loop ''' wikipedia.setSite(wikipedia.getSite(u'commons', u'commons')) conn = None cursor = None uncat = u'' (conn, cursor) = connectDatabase() for arg in wikipedia.handleArgs(): if arg.startswith('-date'): if len(arg) == 5: uncat = u'Media_needing_categories_as_of_' + wikipedia.input( u'What page do you want to use?') else: uncat = u'Media_needing_categories_as_of_' + arg[6:] elif arg.startswith('-yesterday'): uncat = u'Media_needing_categories_as_of_' + getYesterday() if uncat: uncat = uncat.replace(' ', '_') for (user, images) in getUsersToNotify(cursor, uncat): notifyUser(user, images, uncat) else: wikipedia.output(u'Please specify date to work with "-date:' + getYesterday() + u'" or "-yesterday"')
def main(): ''' The main loop ''' wikipedia.setSite(wikipedia.getSite(u'commons', u'commons')) conn = None cursor = None (conn, cursor) = connectDatabase() # Get datetime date = datetime.utcnow().strftime('%Y%m%d%H%M') # Get number of uncategorized files uncatQuery=u"SELECT COUNT(DISTINCT(page_title)) FROM page JOIN categorylinks ON page_id=cl_from WHERE page_namespace=6 AND page_is_redirect=0 AND cl_to LIKE 'Media\_needing\_categories\_as\_of\_%'" uncatCount = getCount(cursor, uncatQuery) # Get number of files to be checked checkQuery=u"SELECT COUNT(DISTINCT(page_title)) FROM page JOIN categorylinks ON page_id=cl_from WHERE page_namespace=6 AND page_is_redirect=0 AND cl_to LIKE 'Media\_needing\_category\_review\_as\_of\_%'" checkCount = getCount(cursor, checkQuery) # Get total totalCount = int(uncatCount) + int(checkCount) # Update the stats page with this number updateStats(date, uncatCount, checkCount, totalCount)
def main(args): ''' Main loop. ''' site = wikipedia.getSite(u'commons', u'commons') wikipedia.setSite(site) conn = None cursor = None (conn, cursor) = connectDatabase() #conn2 = None #cursor2 = None #(conn2, cursor2) = connectDatabase2('sql-s2.toolserver.org', u'u_multichill_commons_categories_p') conn3 = None cursor3 = None (conn3, cursor3) = connectDatabase2('commonswiki-p.db.toolserver.org', u'commonswiki_p') topics = getTopics(cursor) images = {} cats = [] for (topic,) in topics: cats.extend(getCategoriesToSplit(cursor3, topic)) outputCategoriesToSplit(cats) '''
def main(args): ''' Grab a bunch of images and tag them if they are not categorized. ''' generator = None genFactory = pagegenerators.GeneratorFactory() site = pywikibot.getSite(u'commons', u'commons') pywikibot.setSite(site) for arg in pywikibot.handleArgs(): if arg.startswith('-yesterday'): generator = uploadedYesterday(site) elif arg.startswith('-recentchanges'): generator = recentChanges(site=site, delay=120) else: genFactory.handleArg(arg) if not generator: generator = genFactory.getCombinedGenerator() if not generator: pywikibot.output( u'You have to specify the generator you want to use for the program!') else: pregenerator = pagegenerators.PreloadingGenerator(generator) for page in pregenerator: if page.exists() and (page.namespace() == 6) \ and (not page.isRedirectPage()) : if isUncat(page): addUncat(page)
def main(args): ''' Main loop. ''' site = wikipedia.getSite(u'commons', u'commons') wikipedia.setSite(site) conn = None cursor = None (conn, cursor) = connectDatabase() conn2 = None cursor2 = None (conn2, cursor2) = connectDatabase2('sql-s2.toolserver.org', u'u_multichill_commons_categories_p') conn3 = None cursor3 = None (conn3, cursor3) = connectDatabase2('commonswiki-p.db.toolserver.org', u'commonswiki_p') topics = getTopics(cursor) images = {} for (topic,) in topics: images[topic] = getImagesWithTopicCount(cursor3, topic) print images[topic] outputStats(topics, images) '''
def main(args): ''' Main loop. ''' site = wikipedia.getSite(u'commons', u'commons') wikipedia.setSite(site) sourcedir=u'/mnt/user-store/OS_OpenData/1_250_000_Scale_Raster/data/' destinationdirjpg=u'/mnt/user-store/OS_OpenData/1_250_000_Scale_Raster/outputjpg/' destinationdirtif=u'/mnt/user-store/OS_OpenData/1_250_000_Scale_Raster/outputtif/' basefilename=u'Ordnance_Survey_1-250000_-_' sourcename=u'1:250 000 Scale Colour Raster' scale=u'250.000' squares = [] for sourcefilename in glob.glob(sourcedir + u"*.tif"): square = sourcefilename.replace(sourcedir, u'').replace(u'.tif', u'') squares.append(square) for square in squares: print square OSlib.processSquare(square, squares, scale, sourcedir, sourcename, basefilename, u'jpg', destinationdirjpg) OSlib.processSquare(square, squares, scale, sourcedir, sourcename, basefilename, u'tif', destinationdirtif) '''
def main(): wikipedia.setSite(wikipedia.getSite(u'commons', u'commons')) bigcategory = u'' target = u'' generator = None for arg in wikipedia.handleArgs(): if arg.startswith('-page'): if len(arg) == 5: generator = [wikipedia.Page(wikipedia.getSite(), wikipedia.input(u'What page do you want to use?'))] else: generator = [wikipedia.Page(wikipedia.getSite(), arg[6:])] elif arg.startswith('-bigcat'): if len(arg) == 7: bigcategory = wikipedia.input(u'What category do you want to split out?') else: bigcategory = arg[8:] elif arg.startswith('-target'): if len(arg) == 7: target = wikipedia.input(u'What category is the target category?') else: target = arg[8:] if not bigcategory==u'': splitOutCategory(bigcategory, target) else: if not generator: generator = pagegenerators.NamespaceFilterPageGenerator(pagegenerators.ReferringPageGenerator(wikipedia.Page(wikipedia.getSite(), u'Template:Intersect categories'), onlyTemplateInclusion=True), [14]) for cat in generator: intersectCategories(cat)
def main(args): ''' Main loop. ''' site = wikipedia.getSite(u'commons', u'commons') wikipedia.setSite(site) conn = None cursor = None (conn, cursor) = connectDatabase() #conn2 = None #cursor2 = None #(conn2, cursor2) = connectDatabase2('sql-s2.toolserver.org', u'u_multichill_commons_categories_p') conn3 = None cursor3 = None (conn3, cursor3) = connectDatabase2('commonswiki-p.db.toolserver.org', u'commonswiki_p') topics = getTopics(cursor) images = {} cats = [] for (topic, ) in topics: cats.extend(getCategoriesToSplit(cursor3, topic)) outputCategoriesToSplit(cats) '''
def main(): countrycode = u'' # Connect database, we need that (conn, cursor) = connectDatabase() (conn2, cursor2) = connectDatabase2() generator = None genFactory = pagegenerators.GeneratorFactory() for arg in wikipedia.handleArgs(): if arg.startswith('-countrycode:'): countrycode = arg [len('-countrycode:'):] lang = wikipedia.getSite().language() wikipedia.setSite(wikipedia.getSite(u'commons', u'commons')) if countrycode: if not mconfig.countries.get((countrycode, lang)): wikipedia.output(u'I have no config for countrycode "%s" in language "%s"' % (countrycode, lang)) return False wikipedia.output(u'Working on countrycode "%s" in language "%s"' % (countrycode, lang)) locateCountry(countrycode, lang, mconfig.countries.get((countrycode, lang)), conn, cursor, conn2, cursor2) else: for (countrycode, lang), countryconfig in mconfig.countries.iteritems(): if not countryconfig.get('autoGeocode'): wikipedia.output(u'"%s" in language "%s" is not supported in auto geocode mode (yet).' % (countrycode, lang)) else: wikipedia.output(u'Working on countrycode "%s" in language "%s"' % (countrycode, lang)) locateCountry(countrycode, lang, countryconfig, conn, cursor, conn2, cursor2)
def main(): ''' The main loop ''' wikipedia.setSite(wikipedia.getSite(u'commons', u'commons')) conn = None cursor = None (conn, cursor) = connectDatabase() # Get datetime date = datetime.utcnow().strftime('%Y%m%d%H%M') # Get number of uncategorized files uncatQuery = u"SELECT COUNT(DISTINCT(page_title)) FROM page JOIN categorylinks ON page_id=cl_from WHERE page_namespace=6 AND page_is_redirect=0 AND cl_to LIKE 'Media\_needing\_categories\_as\_of\_%'" uncatCount = getCount(cursor, uncatQuery) # Get number of files to be checked checkQuery = u"SELECT COUNT(DISTINCT(page_title)) FROM page JOIN categorylinks ON page_id=cl_from WHERE page_namespace=6 AND page_is_redirect=0 AND cl_to LIKE 'Media\_needing\_category\_review\_as\_of\_%'" checkCount = getCount(cursor, checkQuery) # Get total totalCount = int(uncatCount) + int(checkCount) # Update the stats page with this number updateStats(date, uncatCount, checkCount, totalCount)
def main(args): """ Main loop. """ workdir = u"" textfile = u"" records = {} site = wikipedia.getSite(u"commons", u"commons") wikipedia.setSite(site) if not (len(args) == 2): wikipedia.output(u"Too few arguments. Usage: NARA_uploader.py <directory> <textfile>") sys.exit() if os.path.isdir(args[0]): workdir = args[0] else: wikipedia.output(u"%s doesn't appear to be a directory. Exiting" % (args[0],)) sys.exit() textfile = args[1] records = getRecords(textfile) # print records sourcefilenames = glob.glob(workdir + u"/*.TIF") for sourcefilename in sourcefilenames: filename = os.path.basename(sourcefilename) # This will give an ugly error if the id is unknown if not records.get(filename): wikipedia.output(u"Can't find %s in %s. Skipping this file." % (filename, textfile)) elif os.path.getsize(sourcefilename) >= 1024 * 1024 * 100: wikipedia.output(u"%s too big. Skipping this file." % (sourcefilename,)) else: fileId = records.get(filename) duplicates = findDuplicateImages(sourcefilename) if duplicates: wikipedia.output(u"Found duplicate image at %s" % duplicates.pop()) else: # No metadata handling. We use a webtool description = getDescription(fileId) categories = u"{{Uncategorized-NARA|year=2011|month=September|day=21}}\n" description = description + categories print fileId title = getTitle(fileId, description) wikipedia.output(title) wikipedia.output(description) bot = upload.UploadRobot( url=sourcefilename.decode(sys.getfilesystemencoding()), description=description, useFilename=title, keepFilename=True, verifyDescription=False, ) bot.run()
def main(): wikipedia.output(u'Testing 1 2 3') generator = None genFactory = pagegenerators.GeneratorFactory() site = wikipedia.getSite(u'commons', u'commons') wikipedia.setSite(site) for arg in wikipedia.handleArgs(): if arg.startswith('-page'): if len(arg) == 5: generator = [ wikipedia.Page( site, wikipedia.input(u'What page do you want to use?')) ] else: generator = [wikipedia.Page(site, arg[6:])] else: generator = genFactory.handleArg(arg) if generator: for page in generator: if (page.namespace() == 14): sort_TOL_Category(catlib.Category(site, page.title())) else: wikipedia.output(u'No categories to work on!')
def main(): wikipedia.setSite(wikipedia.getSite(u'commons', u'commons')) # Connect database, we need that conn = None cursor = None (conn, cursor) = connectDatabase() generator = None genFactory = pagegenerators.GeneratorFactory() for arg in wikipedia.handleArgs(): genFactory.handleArg(arg) generator = genFactory.getCombinedGenerator() if not generator: generator = getRijksmonumentWithoutLocation() # Get a preloading generator with only images pgenerator = pagegenerators.PreloadingGenerator(pagegenerators.NamespaceFilterPageGenerator(generator, [6])) for page in pgenerator: locationTemplate = locateImage(page, conn, cursor) if locationTemplate: addLocation(page, locationTemplate)
def main(args): ''' Main loop. ''' genFactory = pagegenerators.GeneratorFactory() start_id = 0 end_id = 0 updaterun = False site = wikipedia.getSite('commons', 'commons') wikipedia.setSite(site) updatePage = wikipedia.Page(site, u'User:BotMultichillT/Air_Force_latest') interval = 100 for arg in wikipedia.handleArgs(): if arg.startswith('-start_id'): if len(arg) == 9: start_id = wikipedia.input( u'What is the id of the photo you want to start at?') else: start_id = arg[10:] elif arg.startswith('-end_id'): if len(arg) == 7: end_id = wikipedia.input( u'What is the id of the photo you want to end at?') else: end_id = arg[8:] elif arg == u'-updaterun': updaterun = True elif arg.startswith('-interval'): if len(arg) == 9: interval = wikipedia.input( u'What interval do you want to use?') else: interval = arg[10:] else: genFactory.handleArg(arg) generator = genFactory.getCombinedGenerator() # Do we have a pagenerator? if generator: for page in generator: if page.namespace() == 14: processCategory(page) # Is updaterun set? elif updaterun: start_id = int(updatePage.get()) end_id = start_id + int(interval) last_id = processPhotos(int(start_id), int(end_id)) comment = u'Worked from ' + str(start_id) + u' to ' + str(last_id) updatePage.put(str(last_id), comment) # Do we have a start_id and a end_id elif int(start_id) > 0 and int(end_id) > 0: last_id = processPhotos(int(start_id), int(end_id)) # Use the default generator else: print "Screw this, will implement later"
def main(args): ''' Main loop. ''' genFactory = pagegenerators.GeneratorFactory() start_id = 0 end_id = 0 updaterun = False site = wikipedia.getSite('commons', 'commons') wikipedia.setSite(site) updatePage = wikipedia.Page(site, u'User:BotMultichillT/Air_Force_latest') interval=100 for arg in wikipedia.handleArgs(): if arg.startswith('-start_id'): if len(arg) == 9: start_id = wikipedia.input(u'What is the id of the photo you want to start at?') else: start_id = arg[10:] elif arg.startswith('-end_id'): if len(arg) == 7: end_id = wikipedia.input(u'What is the id of the photo you want to end at?') else: end_id = arg[8:] elif arg==u'-updaterun': updaterun = True elif arg.startswith('-interval'): if len(arg) == 9: interval = wikipedia.input(u'What interval do you want to use?') else: interval = arg[10:] else: genFactory.handleArg(arg) generator = genFactory.getCombinedGenerator() # Do we have a pagenerator? if generator: for page in generator: if page.namespace()==14: processCategory(page) # Is updaterun set? elif updaterun: start_id = int(updatePage.get()) end_id = start_id + int(interval) last_id = processPhotos(int(start_id), int(end_id)) comment = u'Worked from ' + str(start_id) + u' to ' + str(last_id) updatePage.put(str(last_id), comment) # Do we have a start_id and a end_id elif int(start_id) > 0 and int(end_id) > 0: last_id = processPhotos(int(start_id), int(end_id)) # Use the default generator else: print "Screw this, will implement later"
def main(): ''' The main loop ''' wikipedia.setSite(wikipedia.getSite(u'commons', u'commons')) conn = None cursor = None (conn, cursor) = connectDatabase() for templateTitle in getUncategorizedTemplates(cursor): tagUncategorized(templateTitle)
def main(): ''' The main loop ''' wikipedia.setSite(wikipedia.getSite(u'commons', u'commons')) conn = None cursor = None (conn, cursor) = connectDatabase() #images = getImagesToCategorize(cursor) for (image, category) in getImagesToCategorize(cursor): categorizeImage(image, category)
def main(args): ''' Main loop. ''' workdir = u'' textfile = u'' records = {} site = wikipedia.getSite(u'commons', u'commons') wikipedia.setSite(site) if not (len(args)==2): wikipedia.output(u'Too few arguments. Usage: NARA_uploader.py <directory> <textfile>') sys.exit() if os.path.isdir(args[0]): workdir = args[0] else: wikipedia.output(u'%s doesn\'t appear to be a directory. Exiting' % (args[0],)) sys.exit() textfile = args[1] records = getRecords(textfile) #print records sourcefilenames = glob.glob(workdir + u"/*.TIF") for sourcefilename in sourcefilenames: filename = os.path.basename(sourcefilename) # This will give an ugly error if the id is unknown if not records.get(filename): wikipedia.output(u'Can\'t find %s in %s. Skipping this file.' % (filename, textfile)) else: fileId = records.get(filename) duplicates = findDuplicateImages(sourcefilename) if duplicates: wikipedia.output(u'Found duplicate image at %s' % duplicates.pop()) else: # No metadata handling. We use a webtool description = getDescription(fileId) categories = u'{{Uncategorized-NARA|year={{subst:CURRENTYEAR}}|month={{subst:CURRENTMONTHNAME}}|day={{subst:CURRENTDAY}}}}\n' description = description + categories title = getTitle(fileId, description) wikipedia.output(title) wikipedia.output(description) bot = upload.UploadRobot(url=sourcefilename.decode(sys.getfilesystemencoding()), description=description, useFilename=title, keepFilename=True, verifyDescription=False) bot.run()
def main(): site = wikipedia.getSite(u"commons", u"commons") wikipedia.setSite(site) conn = None cursor = None (conn, cursor) = connectDatabase() subjects = getSubjects(cursor) # subjects = [u'Engineers'] for subject in subjects: sort_by_country_category(cursor, subject)
def main(): site = wikipedia.getSite(u'commons', u'commons') wikipedia.setSite(site) conn = None cursor = None (conn, cursor) = connectDatabase() subjects = getSubjects(cursor) #subjects = [u'Engineers'] for subject in subjects: sort_by_country_category(cursor, subject)
def main(): wikipedia.setSite(wikipedia.getSite(u'commons', u'commons')) generator = None for arg in wikipedia.handleArgs(): if arg.startswith('-page'): if len(arg) == 5: generator = [wikipedia.Page(wikipedia.getSite(), wikipedia.input(u'What page do you want to use?'))] else: generator = [wikipedia.Page(wikipedia.getSite(), arg[6:])] if not generator: generator = pagegenerators.NamespaceFilterPageGenerator(pagegenerators.ReferringPageGenerator(wikipedia.Page(wikipedia.getSite(), u'Template:Populate category'), onlyTemplateInclusion=True), [14]) for cat in generator: populateCategory(cat)
def main(): ''' The main loop ''' wikipedia.setSite(wikipedia.getSite(u'commons', u'commons')) conn = None cursor = None (conn, cursor) = connectDatabase() langs = getLangs(cursor) for baseTemplate in getBaseTemplates(cursor): #print baseTemplate checkTemplate(cursor, baseTemplate, langs)
def main(): ''' The main loop ''' wikipedia.setSite(wikipedia.getSite(u'commons', u'commons')) conn = None cursor = None (conn, cursor) = connectDatabase() imagerecat.initLists() generator = None genFactory = pagegenerators.GeneratorFactory() mark = True for arg in wikipedia.handleArgs(): if arg.startswith('-dontmark'): mark = False elif arg.startswith('-page'): if len(arg) == 5: generator = [ wikipedia.Page( wikipedia.getSite(), wikipedia.input(u'What page do you want to use?')) ] else: generator = [wikipedia.Page(wikipedia.getSite(), arg[6:])] elif arg.startswith('-yesterday'): generator = [ wikipedia.Page( wikipedia.getSite(), u'Category:Media_needing_categories_as_of_' + getYesterday()) ] else: generator = genFactory.handleArg(arg) if generator: for page in generator: if ((page.namespace() == 14) and (page.title().startswith( u'Category:Media needing categories as of'))): wikipedia.output(u'Working on ' + page.title()) for (image, gals, cats) in getImagesToCategorize( cursor, page.titleWithoutNamespace()): categorizeImage(image, gals, imagerecat.applyAllFilters(cats)) if (mark): categoriesChecked(page.title())
def main(): site = wikipedia.getSite(u'commons', u'commons') wikipedia.setSite(site) imagerecat.initLists() flickr = flickrapi.FlickrAPI(api_key) groupId = '1044478@N20' #photos = flickr.flickr.groups_search(text='73509078@N00', per_page='10') = 1044478@N20 for photoId in getPhotosInGroup(flickr=flickr, group_id=groupId): (photoInfo, photoSizes) = getPhoto(flickr=flickr, photo_id=photoId) if isAllowedLicense(photoInfo=photoInfo): tags = getTags(photoInfo=photoInfo) if photoCanUpload(tags=tags): # Get the url of the largest photo photoUrl = getPhotoUrl(photoSizes=photoSizes) # Download this photo photo = downloadPhoto(photoUrl=photoUrl) # Check if it exists at Commons duplicates = findDuplicateImages(photo=photo) if duplicates: wikipedia.output(u'Found duplicate image at %s' % duplicates.pop()) else: flinfoDescription = getFlinfoDescription(photoId=photoId) tagDescription = getTagDescription(tags=tags) tagCategories = getTagCategories(tags) filename = getFilename(photoInfo=photoInfo) #print filename photoDescription = buildDescription( flinfoDescription, tagDescription, tagCategories) if (wikipedia.Page(title=u'File:' + filename, site=wikipedia.getSite()).exists()): # I should probably check if the hash is the same and if not upload it under a different name wikipedia.output(u'File:' + filename + u' already exists!') else: #Do the actual upload #Would be nice to check before I upload if the file is already at Commons #Not that important for this program, but maybe for derived programs bot = upload.UploadRobot(url=photoUrl, description=photoDescription, useFilename=filename, keepFilename=True, verifyDescription=False) bot.run() wikipedia.output('All done')
def main(): wikipedia.setSite(wikipedia.getSite(u'commons', u'commons')) #Get the api key if config.flickr['api_key']: flickr = flickrapi.FlickrAPI(config.flickr['api_key']) else: wikipedia.output('Flickr api key not found! Get yourself an api key') wikipedia.output( 'Any flickr user can get a key at http://www.flickr.com/services/api/keys/apply/' ) return group_id = u'1710854@N24' addCategory = u'Rijksmonumenten' removeCategories = True autonomous = True totalPhotos = 0 uploadedPhotos = 0 # Do we mark the images as reviewed right away? if config.flickr['review']: flickrreview = config.flickr['review'] else: flickrreview = False # Set the Flickr reviewer if config.flickr['reviewer']: reviewer = config.flickr['reviewer'] elif 'commons' in config.sysopnames['commons']: print config.sysopnames['commons'] reviewer = config.sysopnames['commons']['commons'] elif 'commons' in config.usernames['commons']: reviewer = config.usernames['commons']['commons'] else: reviewer = u'' for photo_id in flickrripper.getPhotos(flickr=flickr, group_id=group_id): uploadedPhotos += processPhoto(flickr, photo_id, flickrreview, reviewer, addCategory, removeCategories, autonomous) totalPhotos += 1 wikipedia.output(u'Finished running') wikipedia.output(u'Total photos: ' + str(totalPhotos)) wikipedia.output(u'Uploaded photos: ' + str(uploadedPhotos))
def main(args): ''' Main loop. ''' site = wikipedia.getSite(u'commons', u'commons') wikipedia.setSite(site) conn = None cursor = None (conn, cursor) = geograph_lib.connectDatabase() conn2 = None cursor2 = None (conn2, cursor2) = geograph_lib.connectDatabase2('sql-s2.toolserver.org', u'u_multichill_commons_categories_p') conn3 = None cursor3 = None (conn3, cursor3) = geograph_lib.connectDatabase2('commonswiki-p.db.toolserver.org', u'commonswiki_p') generator = None genFactory = pagegenerators.GeneratorFactory() for arg in wikipedia.handleArgs(): genFactory.handleArg(arg) generator = genFactory.getCombinedGenerator() if generator: for page in generator: if page.exists() and page.namespace()==6 and not page.isRedirectPage(): wikipedia.output(page.title()) id = getGeographId(page) if id: geograph_lib.categorizeImage(page, id, cursor, cursor2) else: topics = getTopics(cursor) for (topic,) in topics: images = getImagesWithTopic(cursor3, topic) for (imageName, id) in images: try: page = wikipedia.ImagePage(wikipedia.getSite(), u'File:' + imageName) if page.exists() and page.namespace()==6 and not page.isRedirectPage(): wikipedia.output(page.title()) geograph_lib.categorizeImage(page, id, cursor, cursor2) except UnicodeDecodeError: print "UnicodeDecodeError, can't find the source. yah! :-(" pass
def main(): ''' The main loop ''' wikipedia.setSite(wikipedia.getSite(u'commons', u'commons')) conn = None cursor = None (conn, cursor) = connectDatabase() # Get datetime to start with date = datetime(2008, 12, 01) # Print the header print 'Date, deleted, ok, uncategorized, to be checked' while(date < datetime.utcnow()): (deletedImages, okImages, uncategorizedImages, toBeCheckedImages) = getDayStats(cursor, date.strftime('%Y%m%d')) print date.strftime('%Y%m%d') + u', ' + str(deletedImages) + u', ' + str(okImages) + u', ' + str(uncategorizedImages) + u', ' + str(toBeCheckedImages) date = date + timedelta(days=1)
def upload(self, logfilename, donedir): print self.template # p = wikipedia.Page(wikipedia.getSite(u'commons', u'commons'), 'User:Aude/sandbox2') # p.put(self.template, 'AAA-image template, tests') do_upload = wikipedia.input( u'\nContinue uploading %s ?' % (self.uploadname) ) if (do_upload == 'y'): print 'Uploading %s' % (self.uploadname) site = wikipedia.getSite(u'commons', u'commons') wikipedia.setSite(site) bot = upload.UploadRobot(url=self.filelocation, description=self.template, useFilename = self.uploadname, keepFilename = True, verifyDescription=False) bot.run() logfile = open(logfilename, 'a') logfile.write('%s,uploaded,%s\n' % (self.filename, datetime.today())) logfile.close() dst = '%s%s' % (donedir, self.filename) shutil.move(self.filelocation, dst) else: print u'\nUpload of image %s cancelled.\n\n' % (self.filename)
def main(): wikipedia.setSite(wikipedia.getSite(u'commons', u'commons')) #Get the api key if config.flickr['api_key']: flickr = flickrapi.FlickrAPI(config.flickr['api_key']) else: wikipedia.output('Flickr api key not found! Get yourself an api key') wikipedia.output('Any flickr user can get a key at http://www.flickr.com/services/api/keys/apply/') return group_id = u'1516413@N22' addCategory = u'Rijksmonumenten' removeCategories = True autonomous = True totalPhotos = 0 uploadedPhotos = 0 # Do we mark the images as reviewed right away? if config.flickr['review']: flickrreview = config.flickr['review'] else: flickrreview = False # Set the Flickr reviewer if config.flickr['reviewer']: reviewer = config.flickr['reviewer'] elif 'commons' in config.sysopnames['commons']: print config.sysopnames['commons'] reviewer = config.sysopnames['commons']['commons'] elif 'commons' in config.usernames['commons']: reviewer = config.usernames['commons']['commons'] else: reviewer = u'' for photo_id in flickrripper.getPhotos(flickr=flickr, group_id=group_id): uploadedPhotos += processPhoto(flickr, photo_id, flickrreview, reviewer, addCategory, removeCategories, autonomous) totalPhotos += 1 wikipedia.output(u'Finished running') wikipedia.output(u'Total photos: ' + str(totalPhotos)) wikipedia.output(u'Uploaded photos: ' + str(uploadedPhotos))
def main(): wikipedia.setSite(wikipedia.getSite(u'commons', u'commons')) bigcategory = u'' target = u'' generator = None for arg in wikipedia.handleArgs(): if arg.startswith('-page'): if len(arg) == 5: generator = [ wikipedia.Page( wikipedia.getSite(), wikipedia.input(u'What page do you want to use?')) ] else: generator = [wikipedia.Page(wikipedia.getSite(), arg[6:])] elif arg.startswith('-bigcat'): if len(arg) == 7: bigcategory = wikipedia.input( u'What category do you want to split out?') else: bigcategory = arg[8:] elif arg.startswith('-target'): if len(arg) == 7: target = wikipedia.input( u'What category is the target category?') else: target = arg[8:] if not bigcategory == u'': splitOutCategory(bigcategory, target) else: if not generator: generator = pagegenerators.NamespaceFilterPageGenerator( pagegenerators.ReferringPageGenerator( wikipedia.Page(wikipedia.getSite(), u'Template:Intersect categories'), onlyTemplateInclusion=True), [14]) for cat in generator: intersectCategories(cat)
def main(): wikipedia.setSite(wikipedia.getSite(u'commons', u'commons')) generator = None genFactory = pagegenerators.GeneratorFactory() target = u'/Users/hay/tmp/wlm/' for arg in wikipedia.handleArgs(): if arg.startswith('-target:'): target = arg [len('-target:'):] else: genFactory.handleArg(arg) generator = genFactory.getCombinedGenerator() if generator: # Get a preloading generator with only images pgenerator = pagegenerators.PreloadingGenerator(pagegenerators.NamespaceFilterPageGenerator(generator, [6])) for page in pgenerator: imagepage = wikipedia.ImagePage(page.site(), page.title()) downloadFile(imagepage, target)
def main(): wikipedia.setSite(wikipedia.getSite(u'commons', u'commons')) # Connect database, we need that conn = None cursor = None (conn, cursor) = connectDatabase() generator = None genFactory = pagegenerators.GeneratorFactory() for arg in wikipedia.handleArgs(): genFactory.handleArg(arg) generator = genFactory.getCombinedGenerator() if generator: # Get a preloading generator with only images pgenerator = pagegenerators.PreloadingGenerator(pagegenerators.NamespaceFilterPageGenerator(generator, [6])) for page in pgenerator: addTopicCategory(page, conn, cursor)
def main(): wikipedia.output(u'Testing 1 2 3') generator = None; genFactory = pagegenerators.GeneratorFactory() site = wikipedia.getSite(u'commons', u'commons') wikipedia.setSite(site) for arg in wikipedia.handleArgs(): if arg.startswith('-page'): if len(arg) == 5: generator = [wikipedia.Page(site, wikipedia.input(u'What page do you want to use?'))] else: generator = [wikipedia.Page(site, arg[6:])] else: generator = genFactory.handleArg(arg) if generator: for page in generator: if(page.namespace() == 14): sort_TOL_Category(catlib.Category(site, page.title())) else: wikipedia.output(u'No categories to work on!')
def main(): wikipedia.setSite(wikipedia.getSite(u'commons', u'commons')) # Connect database, we need that conn = None cursor = None (conn, cursor) = connectDatabase() generator = None genFactory = pagegenerators.GeneratorFactory() for arg in wikipedia.handleArgs(): genFactory.handleArg(arg) generator = genFactory.getCombinedGenerator() if generator: # Get a preloading generator with only images pgenerator = pagegenerators.PreloadingGenerator( pagegenerators.NamespaceFilterPageGenerator(generator, [6])) for page in pgenerator: categorizeImage(page, conn, cursor)
def main(): site = wikipedia.getSite(u'commons', u'commons') wikipedia.setSite(site) imagerecat.initLists() flickr = flickrapi.FlickrAPI(api_key) groupId = '1044478@N20' #photos = flickr.flickr.groups_search(text='73509078@N00', per_page='10') = 1044478@N20 for photoId in getPhotosInGroup(flickr=flickr, group_id=groupId): (photoInfo, photoSizes) = getPhoto(flickr=flickr, photo_id=photoId) if isAllowedLicense(photoInfo=photoInfo): tags=getTags(photoInfo=photoInfo) if photoCanUpload(tags=tags): # Get the url of the largest photo photoUrl = getPhotoUrl(photoSizes=photoSizes) # Download this photo photo = downloadPhoto(photoUrl=photoUrl) # Check if it exists at Commons duplicates = findDuplicateImages(photo=photo) if duplicates: wikipedia.output(u'Found duplicate image at %s' % duplicates.pop()) else: flinfoDescription = getFlinfoDescription(photoId=photoId) tagDescription = getTagDescription(tags=tags) tagCategories = getTagCategories(tags) filename = getFilename(photoInfo=photoInfo) #print filename photoDescription = buildDescription(flinfoDescription, tagDescription, tagCategories) if (wikipedia.Page(title=u'File:'+ filename, site=wikipedia.getSite()).exists()): # I should probably check if the hash is the same and if not upload it under a different name wikipedia.output(u'File:' + filename + u' already exists!') else: #Do the actual upload #Would be nice to check before I upload if the file is already at Commons #Not that important for this program, but maybe for derived programs bot = upload.UploadRobot(url=photoUrl, description=photoDescription, useFilename=filename, keepFilename=True, verifyDescription=False) bot.run() wikipedia.output('All done')
def main(args): ''' Main loop. ''' site = wikipedia.getSite(u'commons', u'commons') wikipedia.setSite(site) conn = None cursor = None (conn, cursor) = connectDatabase() conn2 = None cursor2 = None (conn2, cursor2) = connectDatabase2('commonswiki-p.db.toolserver.org', u'commonswiki_p') imageSet = getImagesToCorrect(cursor2) #print imageSet for (pageName, fileId) in imageSet: wikipedia.output(pageName) if not pageName == u'' and not fileId == u'': #Get page contents page = wikipedia.Page(site, pageName) if page.exists(): categories = page.categories() #Get metadata metadata = getMetadata(fileId, cursor) #Check if we got metadata if metadata: #Get description description = getDescription(metadata) description = wikipedia.replaceCategoryLinks( description, categories, site) comment = u'Fixing description of Geograph image with broken template' wikipedia.output(description) page.put(description, comment)
def main(): wikipedia.setSite(wikipedia.getSite(u'commons', u'commons')) generator = None genFactory = pagegenerators.GeneratorFactory() target = u'/Users/hay/tmp/wlm/' for arg in wikipedia.handleArgs(): if arg.startswith('-target:'): target = arg[len('-target:'):] else: genFactory.handleArg(arg) generator = genFactory.getCombinedGenerator() if generator: # Get a preloading generator with only images pgenerator = pagegenerators.PreloadingGenerator( pagegenerators.NamespaceFilterPageGenerator(generator, [6])) for page in pgenerator: imagepage = wikipedia.ImagePage(page.site(), page.title()) downloadFile(imagepage, target)
def main(args): ''' Main loop. ''' site = wikipedia.getSite(u'commons', u'commons') wikipedia.setSite(site) conn = None cursor = None (conn, cursor) = connectDatabase() conn2 = None cursor2 = None (conn2, cursor2) = connectDatabase2('commonswiki-p.db.toolserver.org', u'commonswiki_p') imageSet = getImagesToCorrect(cursor2) #print imageSet for (pageName, fileId) in imageSet: wikipedia.output(pageName) if not pageName==u'' and not fileId==u'': #Get page contents page = wikipedia.Page(site, pageName) if page.exists(): categories = page.categories() #Get metadata metadata = getMetadata(fileId, cursor) #Check if we got metadata if metadata: #Get description description = getDescription(metadata) description = wikipedia.replaceCategoryLinks(description, categories, site) comment= u'Fixing description of Geograph image with broken template' wikipedia.output(description) page.put(description, comment)
def main(): ''' The main loop ''' wikipedia.setSite(wikipedia.getSite(u'commons', u'commons')) conn = None cursor = None uncat = u'' (conn, cursor) = connectDatabase() for arg in wikipedia.handleArgs(): if arg.startswith('-date'): if len(arg) == 5: uncat = u'Media_needing_categories_as_of_' + wikipedia.input(u'What page do you want to use?') else: uncat = u'Media_needing_categories_as_of_' + arg[6:] elif arg.startswith('-yesterday'): uncat = u'Media_needing_categories_as_of_' + getYesterday() if uncat: uncat = uncat.replace(' ', '_') for (user, images) in getUsersToNotify(cursor, uncat): notifyUser(user, images, uncat) else: wikipedia.output(u'Please specify date to work with "-date:' + getYesterday() + u'" or "-yesterday"')
def main(args): ''' Main loop. ''' workdir = u'' textfile = u'' records = {} site = wikipedia.getSite(u'commons', u'commons') wikipedia.setSite(site) if (len(args) < 3): wikipedia.output( u'Too few arguments. Usage: NARA_uploader.py <original dir> <textfile> <derivative dir> [start filename]' ) sys.exit() if os.path.isdir(args[0]): workdir = args[0] else: wikipedia.output(u'%s doesn\'t appear to be a directory. Exiting.' % (args[0], )) sys.exit() derivativeDirectory = args[2] if os.path.exists( derivativeDirectory) and not os.path.isdir(derivativeDirectory): wikipedia.output(u"%s exists, but isn't a directory. Exiting." % derivativeDirectory) sys.exit() elif not os.path.exists(derivativeDirectory): wikipedia.output(u'%s doesn\'t appear to exist. Creating.' % derivativeDirectory) os.mkdir(derivativeDirectory) try: startFile = args[3] startFileFound = False startPath = os.path.join(workdir, startFile) if not os.path.exists(startPath) or os.path.isdir(startPath): wikipedia.output( u"%s doesn't exist, or it is directory. Exiting." % startPath) sys.exit() except IndexError: startFile = None textfile = args[1] records = getRecords(textfile) #print records sourcefilenames = glob.glob(workdir + u"/*.TIF") sourcefilenames.sort() for sourcefilename in sourcefilenames: wikipedia.output(u'\nProcessing %s' % sourcefilename) if startFile: #if we want to skip to a file fileHead, fileTail = os.path.split(sourcefilename) if not startFileFound: if fileTail != startFile: wikipedia.output('Skipping %s' % sourcefilename) continue else: #we have fond the start point startFileFound = True filename = os.path.basename(sourcefilename) # This will give an ugly error if the id is unknown if not records.get(filename): wikipedia.output(u'Can\'t find %s in %s. Skipping this file.' % (filename, textfile)) elif os.path.getsize(sourcefilename) >= 1024 * 1024 * 100: wikipedia.output(u'%s too big. Skipping this file.' % (sourcefilename, )) else: fileId = records.get(filename) wikipedia.output(u'Found file ID: %d' % fileId) #generate all the files we might need to upload filesToUpload = createDerivatives(sourcefilename, derivativeDirectory) duplicateFiletypes = {} #check for duplicates of the original on wiki for fileInfo in filesToUpload: if fileInfo['ext'] != '.tif': continue foundDuplicates = findDuplicateImagesByHash(fileInfo['name']) duplicateFiletypes = addDuplicatesToList( fileInfo, foundDuplicates, duplicateFiletypes) # follow the naming + description from the tif if it exists, or make it up from the description if '.tif' in duplicateFiletypes: title = duplicateFiletypes['.tif'] wikipedia.output( u'Fetching description from TIF file page: %s' % title) description = wikipedia.Page(site, 'File:' + title).get() else: description = fetchDescriptionFromWebtool(fileId) if not description: wikipedia.output(u'No description! Skipping this file.') continue else: categories = u'{{Uncategorized-NARA|year={{subst:CURRENTYEAR}}|month={{subst:CURRENTMONTHNAME}}|day={{subst:CURRENTDAY}}}}\n' description = description + categories title = getTitle(fileId, description) if not title: continue #check for duplicates of the derivatives (using the filename we just made) for fileInfo in filesToUpload: if fileInfo['ext'] == '.tif': continue titleRoot, ext = os.path.splitext(title) fileTitle = titleRoot + fileInfo['ext'] foundDuplicates = findDuplicateImagesByName(fileTitle) duplicateFiletypes = addDuplicatesToList( fileInfo, foundDuplicates, duplicateFiletypes) #construct the gallery filesToUpload = setDestinations(filesToUpload, title) gallery = createDerivativeGallery(filesToUpload, title) #for every file, including original and derivatives for fileInfo in filesToUpload: titleRoot, ext = os.path.splitext(title) fileTitle = titleRoot + fileInfo['ext'] if fileInfo[ 'ext'] in duplicateFiletypes: #we have a duplicate: add derivs if needed currentFilename = duplicateFiletypes[fileInfo['ext']] currentFilePage = wikipedia.Page(site, 'File:' + currentFilename) currentDescription = currentFilePage.get() currentDescription = addDerivativesToDescription( currentDescription, gallery, title) if currentDescription: wikipedia.output( 'Updating the description for %s:\n\n%s' % (currentFilename, currentDescription)) currentFilePage.put( currentDescription, comment="Adding other versions to the description." ) else: wikipedia.output('Gallery exists on page %s' % currentFilename) else: #upload the file with generated info wikipedia.output(fileInfo['name'] + ' --> ' + fileInfo['dest']) newDescription = addDerivativesToDescription( description, gallery, title) if newDescription: #if the gallery add failed due to existing gallery, just carry on with the original description = newDescription fileDescription = removeTIFFParameter( description, fileInfo['ext']) wikipedia.output(fileDescription) bot = upload.UploadRobot(url=fileInfo['name'].decode( sys.getfilesystemencoding()), description=fileDescription, useFilename=fileInfo['dest'], keepFilename=True, verifyDescription=False) bot.run()
def main(): site = pywikibot.getSite(u'commons', u'commons') pywikibot.setSite(site) #imagerecat.initLists() photoset = u'' #public (popular photos), full (all photos), user ID number size = u'original' minx = u'' miny = u'' maxx = u'' maxy = u'' start_id = u'' end_id = u'' addCategory = u'' autonomous = False totalPhotos = 0 uploadedPhotos = 0 # Do we mark the images as reviewed right away? if config.panoramio['review']: panoramioreview = config.panoramio['review'] else: panoramioreview = False # Set the Panoramio reviewer if config.panoramio['reviewer']: reviewer = config.panoramio['reviewer'] elif 'commons' in config.sysopnames['commons']: print config.sysopnames['commons'] reviewer = config.sysopnames['commons']['commons'] elif 'commons' in config.usernames['commons']: reviewer = config.usernames['commons']['commons'] else: reviewer = u'' # Should be renamed to overrideLicense or something like that override = u'' for arg in pywikibot.handleArgs(): if arg.startswith('-set'): if len(arg) == 4: photoset = pywikibot.input(u'What is the set?') else: photoset = arg[5:] elif arg.startswith('-start_id'): if len(arg) == 9: start_id = pywikibot.input( u'What is the id of the photo you want to start at?') else: start_id = arg[10:] elif arg.startswith('-end_id'): if len(arg) == 7: end_id = pywikibot.input( u'What is the id of the photo you want to end at?') else: end_id = arg[8:] elif arg.startswith('-tags'): if len(arg) == 5: tags = pywikibot.input( u'What is the tag you want to filter out (currently only one supported)?' ) else: tags = arg[6:] elif arg == '-panoramioreview': panoramioreview = True elif arg.startswith('-reviewer'): if len(arg) == 9: reviewer = pywikibot.input(u'Who is the reviewer?') else: reviewer = arg[10:] elif arg.startswith('-override'): if len(arg) == 9: override = pywikibot.input(u'What is the override text?') else: override = arg[10:] elif arg.startswith('-addcategory'): if len(arg) == 12: addCategory = pywikibot.input( u'What category do you want to add?') else: addCategory = arg[13:] elif arg == '-autonomous': autonomous = True if photoset: for photoInfo in getPhotos(photoset, start_id, end_id): photoInfo = getLicense(photoInfo) #time.sleep(10) uploadedPhotos += processPhoto(photoInfo, panoramioreview, reviewer, override, addCategory, autonomous) totalPhotos += 1 else: usage() pywikibot.output(u'Finished running') pywikibot.output(u'Total photos: ' + str(totalPhotos)) pywikibot.output(u'Uploaded photos: ' + str(uploadedPhotos))
def main(): site = pywikibot.getSite(u'commons', u'commons') pywikibot.setSite(site) ## imagerecat.initLists() photoset = u'' # public (popular photos), full (all photos), user ID number size = u'original' minx = u'' miny = u'' maxx = u'' maxy = u'' start_id = u'' end_id = u'' addCategory = u'' autonomous = False totalPhotos = 0 uploadedPhotos = 0 # Do we mark the images as reviewed right away? if config.panoramio['review']: panoramioreview = config.panoramio['review'] else: panoramioreview = False # Set the Panoramio reviewer if config.panoramio['reviewer']: reviewer = config.panoramio['reviewer'] elif 'commons' in config.sysopnames['commons']: print config.sysopnames['commons'] reviewer = config.sysopnames['commons']['commons'] elif 'commons' in config.usernames['commons']: reviewer = config.usernames['commons']['commons'] else: reviewer = u'' # Should be renamed to overrideLicense or something like that override = u'' for arg in pywikibot.handleArgs(): if arg.startswith('-set'): if len(arg) == 4: photoset = pywikibot.input(u'What is the set?') else: photoset = arg[5:] elif arg.startswith('-start_id'): if len(arg) == 9: start_id = pywikibot.input( u'What is the id of the photo you want to start at?') else: start_id = arg[10:] elif arg.startswith('-end_id'): if len(arg) == 7: end_id = pywikibot.input( u'What is the id of the photo you want to end at?') else: end_id = arg[8:] elif arg.startswith('-tags'): if len(arg) == 5: tags = pywikibot.input( u'What is the tag you want to filter out (currently only ' u'one supported)?') else: tags = arg[6:] elif arg == '-panoramioreview': panoramioreview = True elif arg.startswith('-reviewer'): if len(arg) == 9: reviewer = pywikibot.input(u'Who is the reviewer?') else: reviewer = arg[10:] elif arg.startswith('-override'): if len(arg) == 9: override = pywikibot.input(u'What is the override text?') else: override = arg[10:] elif arg.startswith('-addcategory'): if len(arg) == 12: addCategory = pywikibot.input( u'What category do you want to add?') else: addCategory = arg[13:] elif arg == '-autonomous': autonomous = True if photoset: for photoInfo in getPhotos(photoset, start_id, end_id): photoInfo = getLicense(photoInfo) #time.sleep(10) uploadedPhotos += processPhoto(photoInfo, panoramioreview, reviewer, override, addCategory, autonomous) totalPhotos += 1 else: usage() pywikibot.output(u'Finished running') pywikibot.output(u'Total photos: ' + str(totalPhotos)) pywikibot.output(u'Uploaded photos: ' + str(uploadedPhotos))
def main(): site = pywikibot.getSite(u'commons', u'commons') pywikibot.setSite(site) #imagerecat.initLists() #Get the api key if config.flickr['api_key']: flickr = flickrapi.FlickrAPI(config.flickr['api_key']) else: pywikibot.output('Flickr api key not found! Get yourself an api key') pywikibot.output( 'Any flickr user can get a key at http://www.flickr.com/services/api/keys/apply/' ) return group_id = u'' photoset_id = u'' user_id = u'' start_id = u'' end_id = u'' tags = u'' addCategory = u'' removeCategories = False autonomous = False totalPhotos = 0 uploadedPhotos = 0 # Do we mark the images as reviewed right away? if config.flickr['review']: flickrreview = config.flickr['review'] else: flickrreview = False # Set the Flickr reviewer if config.flickr['reviewer']: reviewer = config.flickr['reviewer'] elif 'commons' in config.sysopnames['commons']: print config.sysopnames['commons'] reviewer = config.sysopnames['commons']['commons'] elif 'commons' in config.usernames['commons']: reviewer = config.usernames['commons']['commons'] else: reviewer = u'' # Should be renamed to overrideLicense or something like that override = u'' for arg in pywikibot.handleArgs(): if arg.startswith('-group_id'): if len(arg) == 9: group_id = pywikibot.input( u'What is the group_id of the pool?') else: group_id = arg[10:] elif arg.startswith('-photoset_id'): if len(arg) == 12: photoset_id = pywikibot.input(u'What is the photoset_id?') else: photoset_id = arg[13:] elif arg.startswith('-user_id'): if len(arg) == 8: user_id = pywikibot.input( u'What is the user_id of the flickr user?') else: user_id = arg[9:] elif arg.startswith('-start_id'): if len(arg) == 9: start_id = pywikibot.input( u'What is the id of the photo you want to start at?') else: start_id = arg[10:] elif arg.startswith('-end_id'): if len(arg) == 7: end_id = pywikibot.input( u'What is the id of the photo you want to end at?') else: end_id = arg[8:] elif arg.startswith('-tags'): if len(arg) == 5: tags = pywikibot.input( u'What is the tag you want to filter out (currently only one supported)?' ) else: tags = arg[6:] elif arg == '-flickrreview': flickrreview = True elif arg.startswith('-reviewer'): if len(arg) == 9: reviewer = pywikibot.input(u'Who is the reviewer?') else: reviewer = arg[10:] elif arg.startswith('-override'): if len(arg) == 9: override = pywikibot.input(u'What is the override text?') else: override = arg[10:] elif arg.startswith('-addcategory'): if len(arg) == 12: addCategory = pywikibot.input( u'What category do you want to add?') else: addCategory = arg[13:] elif arg == '-removecategories': removeCategories = True elif arg == '-autonomous': autonomous = True if user_id or group_id or photoset_id: for photo_id in getPhotos(flickr, user_id, group_id, photoset_id, start_id, end_id, tags): uploadedPhotos += processPhoto(flickr, photo_id, flickrreview, reviewer, override, addCategory, removeCategories, autonomous) totalPhotos += 1 else: usage() pywikibot.output(u'Finished running') pywikibot.output(u'Total photos: ' + str(totalPhotos)) pywikibot.output(u'Uploaded photos: ' + str(uploadedPhotos))
def main(args): ''' Main loop. ''' site = wikipedia.getSite(u'commons', u'commons') wikipedia.setSite(site) start_id = 0 conn = None cursor = None (conn, cursor) = geograph_lib.connectDatabase() conn2 = None cursor2 = None (conn2, cursor2) = geograph_lib.connectDatabase2( 'sql-s2.toolserver.org', u'u_multichill_commons_categories_p') if (len(args) > 1): if len(args) > 2: start_id = int(args[2]) sourcedir = args[0] destinationdir = args[1] if os.path.isdir(sourcedir) and os.path.isdir(destinationdir): #print sourcedir for subdir in os.listdir(sourcedir): #print subdir if os.path.isdir(sourcedir + subdir): #print subdir sourcefilenames = glob.glob(sourcedir + subdir + u"/*.jpg") sourcefilenames = filterSourceFilenames(sourcefilenames) for sourcefilename in sourcefilenames: # First get the file id fileId = getFileId(sourcefilename) if fileId >= start_id: wikipedia.output(str(fileId)) duplicates = findDuplicateImages(sourcefilename) if duplicates: wikipedia.output( u'Found duplicate image at %s' % duplicates.pop()) else: #Get metadata metadata = geograph_lib.getMetadata( fileId, cursor) #Check if we got metadata if metadata: #Get description description = geograph_lib.getDescription( metadata) # The hard part, find suitable categories # categories = geograph_lib.getCategories(metadata, cursor, cursor2) categories = '{{Uncategorized-Geograph|gridref=%s|year={{subst:CURRENTYEAR}}|month={{subst:CURRENTMONTHNAME}}|day={{subst:CURRENTDAY}}}}\n' % ( metadata.get('grid_reference'), ) #print categories description = description + categories wikipedia.output(description) #Get destinationfilename destinationFilename = geograph_lib.getTitle( metadata) #Copy file to destination dir shutil.copy( unicode(sourcefilename), unicode(destinationdir + destinationFilename + u'.jpg')) #And save the description as well outputDescriptionFile( destinationdir + destinationFilename + u'.txt', description)