def main(): ''' The main loop ''' wikipedia.setSite(wikipedia.getSite(u'commons', u'commons')) conn = None cursor = None (conn, cursor) = connectDatabase() imagerecat.initLists() generator = None; genFactory = pagegenerators.GeneratorFactory() mark = True for arg in wikipedia.handleArgs(): if arg.startswith('-dontmark'): mark = False elif arg.startswith('-page'): if len(arg) == 5: generator = [wikipedia.Page(wikipedia.getSite(), wikipedia.input(u'What page do you want to use?'))] else: generator = [wikipedia.Page(wikipedia.getSite(), arg[6:])] elif arg.startswith('-yesterday'): generator = [wikipedia.Page(wikipedia.getSite(), u'Category:Media_needing_categories_as_of_' + getYesterday())] else: generator = genFactory.handleArg(arg) if generator: for page in generator: if((page.namespace() == 14) and (page.title().startswith(u'Category:Media needing categories as of'))): wikipedia.output(u'Working on ' + page.title()) for (image, gals, cats) in getImagesToCategorize(cursor, page.titleWithoutNamespace()): categorizeImage(image, gals, imagerecat.applyAllFilters(cats)) if (mark): categoriesChecked(page.title())
def main(): ''' The main loop ''' wikipedia.setSite(wikipedia.getSite(u'commons', u'commons')) conn = None cursor = None (conn, cursor) = connectDatabase() imagerecat.initLists() generator = None genFactory = pagegenerators.GeneratorFactory() mark = True for arg in wikipedia.handleArgs(): if arg.startswith('-dontmark'): mark = False elif arg.startswith('-page'): if len(arg) == 5: generator = [ wikipedia.Page( wikipedia.getSite(), wikipedia.input(u'What page do you want to use?')) ] else: generator = [wikipedia.Page(wikipedia.getSite(), arg[6:])] elif arg.startswith('-yesterday'): generator = [ wikipedia.Page( wikipedia.getSite(), u'Category:Media_needing_categories_as_of_' + getYesterday()) ] else: generator = genFactory.handleArg(arg) if generator: for page in generator: if ((page.namespace() == 14) and (page.title().startswith( u'Category:Media needing categories as of'))): wikipedia.output(u'Working on ' + page.title()) for (image, gals, cats) in getImagesToCategorize( cursor, page.titleWithoutNamespace()): categorizeImage(image, gals, imagerecat.applyAllFilters(cats)) if (mark): categoriesChecked(page.title())
def main(): site = wikipedia.getSite(u'commons', u'commons') wikipedia.setSite(site) imagerecat.initLists() flickr = flickrapi.FlickrAPI(api_key) groupId = '1044478@N20' #photos = flickr.flickr.groups_search(text='73509078@N00', per_page='10') = 1044478@N20 for photoId in getPhotosInGroup(flickr=flickr, group_id=groupId): (photoInfo, photoSizes) = getPhoto(flickr=flickr, photo_id=photoId) if isAllowedLicense(photoInfo=photoInfo): tags = getTags(photoInfo=photoInfo) if photoCanUpload(tags=tags): # Get the url of the largest photo photoUrl = getPhotoUrl(photoSizes=photoSizes) # Download this photo photo = downloadPhoto(photoUrl=photoUrl) # Check if it exists at Commons duplicates = findDuplicateImages(photo=photo) if duplicates: wikipedia.output(u'Found duplicate image at %s' % duplicates.pop()) else: flinfoDescription = getFlinfoDescription(photoId=photoId) tagDescription = getTagDescription(tags=tags) tagCategories = getTagCategories(tags) filename = getFilename(photoInfo=photoInfo) #print filename photoDescription = buildDescription( flinfoDescription, tagDescription, tagCategories) if (wikipedia.Page(title=u'File:' + filename, site=wikipedia.getSite()).exists()): # I should probably check if the hash is the same and if not upload it under a different name wikipedia.output(u'File:' + filename + u' already exists!') else: #Do the actual upload #Would be nice to check before I upload if the file is already at Commons #Not that important for this program, but maybe for derived programs bot = upload.UploadRobot(url=photoUrl, description=photoDescription, useFilename=filename, keepFilename=True, verifyDescription=False) bot.run() wikipedia.output('All done')
def main(): site = wikipedia.getSite(u'commons', u'commons') wikipedia.setSite(site) imagerecat.initLists() flickr = flickrapi.FlickrAPI(api_key) groupId = '1044478@N20' #photos = flickr.flickr.groups_search(text='73509078@N00', per_page='10') = 1044478@N20 for photoId in getPhotosInGroup(flickr=flickr, group_id=groupId): (photoInfo, photoSizes) = getPhoto(flickr=flickr, photo_id=photoId) if isAllowedLicense(photoInfo=photoInfo): tags=getTags(photoInfo=photoInfo) if photoCanUpload(tags=tags): # Get the url of the largest photo photoUrl = getPhotoUrl(photoSizes=photoSizes) # Download this photo photo = downloadPhoto(photoUrl=photoUrl) # Check if it exists at Commons duplicates = findDuplicateImages(photo=photo) if duplicates: wikipedia.output(u'Found duplicate image at %s' % duplicates.pop()) else: flinfoDescription = getFlinfoDescription(photoId=photoId) tagDescription = getTagDescription(tags=tags) tagCategories = getTagCategories(tags) filename = getFilename(photoInfo=photoInfo) #print filename photoDescription = buildDescription(flinfoDescription, tagDescription, tagCategories) if (wikipedia.Page(title=u'File:'+ filename, site=wikipedia.getSite()).exists()): # I should probably check if the hash is the same and if not upload it under a different name wikipedia.output(u'File:' + filename + u' already exists!') else: #Do the actual upload #Would be nice to check before I upload if the file is already at Commons #Not that important for this program, but maybe for derived programs bot = upload.UploadRobot(url=photoUrl, description=photoDescription, useFilename=filename, keepFilename=True, verifyDescription=False) bot.run() wikipedia.output('All done')
def __init__ ( self, pagegenerator, prefetchQueue): self.pagegenerator = pagegenerator self.prefetchQueue = prefetchQueue imagerecat.initLists() threading.Thread.__init__ ( self )
def __init__(self, pagegenerator, prefetchQueue): self.pagegenerator = pagegenerator self.prefetchQueue = prefetchQueue imagerecat.initLists() threading.Thread.__init__(self)