def __init__(self, config): self.config = config self.dbConnector = Database(config) self._fetchProjectSettings() self.sqlBuilder = SQLStringBuilder(config) self.annoParser = AnnotationParser(config)
def __init__(self, config): self.config = config self.dbConnector = Database(config) self.project_immutables = {} # project settings that cannot be changed (project shorthand -> {settings}) self._fetchProjectSettings() self.sqlBuilder = SQLStringBuilder() self.annoParser = AnnotationParser()
def __init__(self, config, passiveMode=False): self.config = config self.dbConnector = Database(config) self.countPattern = re.compile('\_[0-9]+$') self.passiveMode = passiveMode self.tempDir = self.config.getProperty('FileServer', 'tempfiles_dir', type=str, fallback=tempfile.gettempdir())
def __init__(self, config): self.config = config self.dbConnector = Database(config) # load default UI settings try: # check if custom default styles are provided self.defaultUIsettings = json.load(open('config/default_ui_settings.json', 'r')) except: # resort to built-in styles self.defaultUIsettings = json.load(open('modules/ProjectAdministration/static/json/default_ui_settings.json', 'r'))
def __init__(self, config): self.config = config self.dbConn = Database(config) self.sqlBuilder = SQLStringBuilder(config) self.training = False # will be set to True once start_training is called (and False as soon as everything about the training process has finished) self.celery_app = current_app self.celery_app.set_current() self.celery_app.set_default() self.watchdog = None # note: watchdog only created if users poll status (i.e., if there's activity) #TODO self.messageProcessor = MessageProcessor(self.celery_app) self.messageProcessor.start()
def __init__(self, config, passiveMode=False, verbose_start=False): self.config = config if verbose_start: print('AIWorker'.ljust(22), end='') try: self.dbConnector = Database(config) self.passiveMode = passiveMode self._init_fileserver() except Exception as e: if verbose_start: LogDecorator.print_status('fail') raise Exception( f'Could not launch AIWorker (message: "{str(e)}").') if verbose_start: LogDecorator.print_status('ok')
def __init__(self, config, app): if config.getProperty('Project', 'demoMode', type=bool, fallback=False): raise Exception('AIWorker cannot be launched in demo mode.') self.config = config self.dbConnector = Database(config) self._init_fileserver() self._init_model_instance() self._init_al_instance()
def __init__(self, config): self.config = config self.dbConnector = Database(config) self.celery_app = current_app self.celery_app.set_current() self.celery_app.set_default() self.dataWorker = DataWorker(config) self.jobs = {} # dict per project of jobs
class AdminMiddleware: def __init__(self, config): self.config = config self.dbConnector = Database(config) def getServiceDetails(self, warn_error=False): ''' Queries the indicated AIController and FileServer modules for availability and their version. Returns metadata about the setup of AIDE accordingly. Raises an Exception if not running on the main host. If "warn_error" is True, a warning statement is printed to the command line if the version of AIDE on the attached AIController and/or FileServer is not the same as on the host, or if the servers cannot be contacted. ''' if warn_error: print('Contacting AIController...', end='') # check if running on the main host modules = os.environ['AIDE_MODULES'].strip().split(',') modules = set([m.strip() for m in modules]) if not 'LabelUI' in modules: # not running on main host raise Exception('Not a main host; cannot query service details.') aic_uri = self.config.getProperty('Server', 'aiController_uri', type=str, fallback=None) fs_uri = self.config.getProperty('Server', 'dataServer_uri', type=str, fallback=None) if not is_localhost(aic_uri): # AIController runs on a different machine; poll for version of AIDE try: aic_response = requests.get(os.path.join(aic_uri, 'version')) aic_version = aic_response.text if warn_error and aic_version != AIDE_VERSION: print( 'WARNING: AIDE version of connected AIController differs from main host.' ) print(f'\tAIController URI: {aic_uri}') print(f'\tAIController AIDE version: {aic_version}') print(f'\tAIDE version on this machine: {AIDE_VERSION}') elif warn_error: LogDecorator.print_status('ok') except Exception as e: if warn_error: LogDecorator.print_status('fail') print( f'WARNING: error connecting to AIController (message: "{str(e)}").' ) aic_version = None else: aic_version = AIDE_VERSION if warn_error: LogDecorator.print_status('ok') if not is_localhost(fs_uri): # same for the file server if warn_error: print('Contacting FileServer...', end='') try: fs_response = requests.get(os.path.join(fs_uri, 'version')) fs_version = fs_response.text if warn_error and fs_version != AIDE_VERSION: LogDecorator.print_status('warn') print( 'WARNING: AIDE version of connected FileServer differs from main host.' ) print(f'\tFileServer URI: {fs_uri}') print(f'\tFileServer AIDE version: {fs_version}') print(f'\tAIDE version on this machine: {AIDE_VERSION}') elif warn_error: LogDecorator.print_status('ok') except Exception as e: if warn_error: LogDecorator.print_status('fail') print( f'WARNING: error connecting to FileServer (message: "{str(e)}").' ) fs_version = None else: fs_version = AIDE_VERSION if warn_error: LogDecorator.print_status('ok') # query database dbVersion = self.dbConnector.execute('SHOW server_version;', None, 1)[0]['server_version'] try: dbVersion = dbVersion.split(' ')[0].strip() except: pass dbInfo = self.dbConnector.execute('SELECT version() AS version;', None, 1)[0]['version'] return { 'aide_version': AIDE_VERSION, 'AIController': { 'uri': aic_uri, 'aide_version': aic_version }, 'FileServer': { 'uri': fs_uri, 'aide_version': fs_version }, 'Database': { 'version': dbVersion, 'details': dbInfo } } def getCeleryWorkerDetails(self): ''' Queries all Celery workers for their details (name, URL, capabilities, AIDE version, etc.) ''' result = {} i = current_app.control.inspect() workers = i.stats() if workers is None or not len(workers): return result for w in workers: aiwV = celeryWorkerCommons.get_worker_details.s() try: res = aiwV.apply_async(queue=w) res = res.get(timeout=20) #TODO: timeout (in seconds) result[w] = res result[w]['online'] = True except Exception as e: result[w] = {'online': False, 'message': str(e)} return result def getProjectDetails(self): ''' Returns projects and statistics about them (number of images, disk usage, etc.). ''' # get all projects projects = {} response = self.dbConnector.execute( ''' SELECT shortname, name, owner, annotationtype, predictiontype, ispublic, demomode, ai_model_enabled, interface_enabled, archived, COUNT(username) AS num_users FROM aide_admin.project AS p JOIN aide_admin.authentication AS auth ON p.shortname = auth.project GROUP BY shortname ''', None, 'all') for r in response: projDef = {} for key in r.keys(): if key == 'interface_enabled': projDef[key] = r['interface_enabled'] and not r['archived'] if key != 'shortname': projDef[key] = r[key] projects[r['shortname']] = projDef # get statistics (number of annotations, predictions, prediction models, etc.) for project in projects.keys(): stats = self.dbConnector.execute( sql.SQL(''' SELECT COUNT(*) AS count FROM {id_img} UNION ALL SELECT COUNT(*) FROM {id_anno} UNION ALL SELECT COUNT(*) FROM {id_pred} UNION ALL SELECT SUM(viewcount) FROM {id_iu} UNION ALL SELECT COUNT(*) FROM {id_cnnstate} ''').format(id_img=sql.Identifier(project, 'image'), id_anno=sql.Identifier(project, 'annotation'), id_pred=sql.Identifier(project, 'prediction'), id_iu=sql.Identifier(project, 'image_user'), id_cnnstate=sql.Identifier(project, 'cnnstate')), None, 'all') projects[project]['num_img'] = stats[0]['count'] projects[project]['num_anno'] = stats[1]['count'] projects[project]['num_pred'] = stats[2]['count'] projects[project]['total_viewcount'] = stats[3]['count'] projects[project]['num_cnnstates'] = stats[4]['count'] # time statistics (last viewed) stats = self.dbConnector.execute( sql.SQL(''' SELECT MIN(first_checked) AS first_checked, MAX(last_checked) AS last_checked FROM {id_iu}; ''').format(id_iu=sql.Identifier(project, 'image_user')), None, 1) try: projects[project]['first_checked'] = stats[0][ 'first_checked'].timestamp() except: projects[project]['first_checked'] = None try: projects[project]['last_checked'] = stats[0][ 'last_checked'].timestamp() except: projects[project]['last_checked'] = None return projects def getUserDetails(self): ''' Returns details about the user (name, number of enrolled projects, last activity, etc.). ''' users = {} userData = self.dbConnector.execute( ''' SELECT name, email, isSuperUser, canCreateProjects, last_login, project, isAdmin, admitted_until, blocked_until FROM aide_admin.user AS u LEFT OUTER JOIN aide_admin.authentication AS auth ON u.name = auth.username ''', None, 'all') for ud in userData: if not ud['name'] in users: users[ud['name']] = { 'email': ud['email'], 'canCreateProjects': ud['cancreateprojects'], 'isSuperUser': ud['issuperuser'], 'last_login': (None if ud['last_login'] is None else ud['last_login'].timestamp()), 'enrolled_projects': {} } if ud['project'] is not None: admitted_until = ud['admitted_until'] if isinstance(admitted_until, datetime.datetime): admitted_until = admitted_until.timestamp() blocked_until = ud['admitted_until'] if isinstance(blocked_until, datetime.datetime): blocked_until = blocked_until.timestamp() users[ud['name']]['enrolled_projects'][ud['project']] = { 'admitted_until': admitted_until, 'blocked_until': blocked_until } return users def setCanCreateProjects(self, username, allowCreateProjects): ''' Sets or unsets the flag on whether a user can create new projects or not. ''' # check if user exists userExists = self.dbConnector.execute( ''' SELECT * FROM aide_admin.user WHERE name = %s; ''', (username, ), 1) if not len(userExists): return { 'success': False, 'message': f'User with name "{username}" does not exist.' } result = self.dbConnector.execute( ''' UPDATE aide_admin.user SET cancreateprojects = %s WHERE name = %s; SELECT cancreateprojects FROM aide_admin.user WHERE name = %s; ''', (allowCreateProjects, username, username), 1) result = result[0]['cancreateprojects'] return {'success': (result == allowCreateProjects)}
'--modelPath', type=str, help= 'Directory (absolute path) on this machine of the model state file to be considered.' ) args = parser.parse_args() # setup print('Setup...') if not 'AIDE_CONFIG_PATH' in os.environ: os.environ['AIDE_CONFIG_PATH'] = str(args.settings_filepath) from util.configDef import Config from modules.Database.app import Database config = Config() dbConn = Database(config) # load model class function print('Load and verify state dict...') from util.helpers import get_class_executable, current_time modelClass = getattr( get_class_executable( config.getProperty('AIController', 'model_lib_path')), 'model_class') # load state dict stateDict = torch.load(open(args.modelPath, 'rb')) # verify model state model = modelClass.loadFromStateDict(stateDict)
class AIControllerWorker: def __init__(self, config, celery_app): self.config = config self.dbConn = Database(config) self.sqlBuilder = SQLStringBuilder(config) self.celery_app = celery_app def _get_num_available_workers(self): #TODO: filter for right tasks and queues #TODO: limit to n tasks per worker i = self.celery_app.control.inspect() if i is not None: stats = i.stats() if stats is not None: return len(i.stats()) return 1 #TODO def get_training_images(self, project, epoch=None, numEpochs=None, minTimestamp='lastState', includeGoldenQuestions=True, minNumAnnoPerImage=0, maxNumImages=None, numChunks=1): ''' Queries the database for the latest images to be used for model training. Returns a list with image UUIDs accordingly, split into the number of available workers. #TODO: includeGoldenQuestions ''' # sanity checks if not (isinstance(minTimestamp, datetime) or minTimestamp == 'lastState' or minTimestamp == -1 or minTimestamp is None): raise ValueError( '{} is not a recognized property for variable "minTimestamp"'. format(str(minTimestamp))) # query image IDs queryVals = [] if minTimestamp is None: timestampStr = sql.SQL('') elif minTimestamp == 'lastState': timestampStr = sql.SQL(''' WHERE iu.last_checked > COALESCE(to_timestamp(0), (SELECT MAX(timecreated) FROM {id_cnnstate}))''').format( id_cnnstate=sql.Identifier(project, 'cnnstate')) elif isinstance(minTimestamp, datetime): timestampStr = sql.SQL( 'WHERE iu.last_checked > COALESCE(to_timestamp(0), %s)') queryVals.append(minTimestamp) elif isinstance(minTimestamp, int) or isinstance(minTimestamp, float): timestampStr = sql.SQL( 'WHERE iu.last_checked > COALESCE(to_timestamp(0), to_timestamp(%s))' ) queryVals.append(minTimestamp) if minNumAnnoPerImage > 0: queryVals.append(minNumAnnoPerImage) if maxNumImages is None or maxNumImages <= 0: limitStr = sql.SQL('') else: limitStr = sql.SQL('LIMIT %s') queryVals.append(maxNumImages) if minNumAnnoPerImage <= 0: queryStr = sql.SQL(''' SELECT newestAnno.image FROM ( SELECT image, last_checked FROM {id_iu} AS iu JOIN ( SELECT id AS iid FROM {id_img} WHERE corrupt IS NULL OR corrupt = FALSE ) AS imgQ ON iu.image = imgQ.iid {timestampStr} ORDER BY iu.last_checked ASC {limitStr} ) AS newestAnno; ''').format(id_iu=sql.Identifier(project, 'image_user'), id_img=sql.Identifier(project, 'image'), timestampStr=timestampStr, limitStr=limitStr) else: queryStr = sql.SQL(''' SELECT newestAnno.image FROM ( SELECT image, last_checked FROM {id_iu} AS iu JOIN ( SELECT id AS iid FROM {id_img} WHERE corrupt IS NULL OR corrupt = FALSE ) AS imgQ ON iu.image = imgQ.iid {timestampStr} {conjunction} image IN ( SELECT image FROM ( SELECT image, COUNT(*) AS cnt FROM {id_anno} GROUP BY image ) AS annoCount WHERE annoCount.cnt >= %s ) ORDER BY iu.last_checked ASC {limitStr} ) AS newestAnno; ''').format(id_iu=sql.Identifier(project, 'image_user'), id_img=sql.Identifier(project, 'image'), id_anno=sql.Identifier(project, 'annotation'), timestampStr=timestampStr, conjunction=(sql.SQL('WHERE') if minTimestamp is None else sql.SQL('AND')), limitStr=limitStr) imageIDs = self.dbConn.execute(queryStr, tuple(queryVals), 'all') imageIDs = [i['image'] for i in imageIDs] if numChunks > 1: # split for distribution across workers (TODO: also specify subset size for multiple jobs; randomly draw if needed) imageIDs = array_split(imageIDs, max(1, len(imageIDs) // numChunks)) else: imageIDs = [imageIDs] print("Assembled training images into {} chunks (length of first: {})". format(len(imageIDs), len(imageIDs[0]))) return imageIDs def get_inference_images(self, project, epoch=None, numEpochs=None, goldenQuestionsOnly=False, forceUnlabeled=False, maxNumImages=None, numChunks=1): ''' Queries the database for the latest images to be used for inference after model training. Returns a list with image UUIDs accordingly, split into the number of available workers. #TODO: goldenQuestionsOnly ''' if maxNumImages is None or maxNumImages <= 0: queryResult = self.dbConn.execute( ''' SELECT maxNumImages_inference FROM aide_admin.project WHERE shortname = %s;''', (project, ), 1) maxNumImages = queryResult[0]['maxnumimages_inference'] queryVals = (maxNumImages, ) # load the IDs of the images that are being subjected to inference sql = self.sqlBuilder.getInferenceQueryString(project, forceUnlabeled, maxNumImages) imageIDs = self.dbConn.execute(sql, queryVals, 'all') imageIDs = [i['image'] for i in imageIDs] # # split for distribution across workers # if maxNumWorkers != 1: # # only query the number of available workers if more than one is specified to save time # num_available = self._get_num_available_workers() # if maxNumWorkers == -1: # maxNumWorkers = num_available #TODO: more than one process per worker? # else: # maxNumWorkers = min(maxNumWorkers, num_available) if numChunks > 1: imageIDs = array_split(imageIDs, max(1, len(imageIDs) // numChunks)) else: imageIDs = [imageIDs] return imageIDs
def __init__(self, config): self.config = config self.dbConnector = Database(config) self.usersLoggedIn = {} # username -> {timestamp, sessionToken}
Wrapper for the Celery message broker concerning the Model Marketplace module. 2020-21 Benjamin Kellenberger ''' import os from celery import current_app from .marketplaceWorker import ModelMarketplaceWorker from modules.Database.app import Database from util.configDef import Config # initialize Model Marketplace worker modules = os.environ['AIDE_MODULES'] config = Config() worker = ModelMarketplaceWorker(config, Database(config)) @current_app.task(name='ModelMarketplace.shareModel') def share_model(project, username, modelID, modelName, modelDescription, tags, public, anonymous): return worker.shareModel(project, username, modelID, modelName, modelDescription, tags, public, anonymous) @current_app.task(name='ModelMarketplace.importModelDatabase') def import_model_database(modelID, project, username): return worker.importModelDatabase(project, username, modelID) @current_app.task(name='ModelMarketplace.importModelURI')
def __init__(self, config, passiveMode=False): self.config = config self.dbConnector = Database(config) self.passiveMode = passiveMode self._init_fileserver()
def __init__(self, config): self.config = config self.dbConnector = Database(config) self.labelUImiddleware = DBMiddleware(config) self._init_available_ai_models()
class DataWorker: FILENAMES_PROHIBITED_CHARS = ( '<', '<', '>', '>', '..', '/', '\\', '|', '?', '*', ':' # for macOS ) NUM_IMAGES_LIMIT = 4096 # maximum number of images that can be queried at once (to avoid bottlenecks) def __init__(self, config, passiveMode=False): self.config = config self.dbConnector = Database(config) self.countPattern = re.compile('\_[0-9]+$') self.passiveMode = passiveMode self.tempDir = self.config.getProperty('FileServer', 'tempfiles_dir', type=str, fallback=tempfile.gettempdir()) def aide_internal_notify(self, message): ''' Used for AIDE administrative communication, e.g. for setting up queues. ''' if self.passiveMode: return if 'task' in message: if message['task'] == 'create_project_folders': # set up folders for a newly created project if 'projectName' in message: destPath = os.path.join( self.config.getProperty('FileServer', 'staticfiles_dir'), message['projectName']) os.makedirs(destPath, exist_ok=True) ''' Image administration functionalities ''' def listImages(self, project, folder=None, imageAddedRange=None, lastViewedRange=None, viewcountRange=None, numAnnoRange=None, numPredRange=None, orderBy=None, order='desc', startFrom=None, limit=None): ''' Returns a list of images, with ID, filename, date image was added, viewcount, number of annotations, number of predictions, and last time viewed, for a given project. The list can be filtered by all those properties (e.g. date and time image was added, last checked; number of annotations, etc.), as well as limited in length (images are sorted by date_added). ''' queryArgs = [] filterStr = '' if folder is not None and isinstance(folder, str): filterStr += ' filename LIKE %s ' queryArgs.append(folder + '%') if imageAddedRange is not None: #TODO filterStr += 'AND date_added >= to_timestamp(%s) AND date_added <= to_timestamp(%s) ' queryArgs.append(imageAddedRange[0]) queryArgs.append(imageAddedRange[1]) if lastViewedRange is not None: #TODO filterStr += 'AND last_viewed >= to_timestamp(%s) AND last_viewed <= to_timestamp(%s) ' queryArgs.append(lastViewedRange[0]) queryArgs.append(lastViewedRange[1]) if viewcountRange is not None: filterStr += 'AND viewcount >= %s AND viewcount <= %s ' queryArgs.append(viewcountRange[0]) queryArgs.append(viewcountRange[1]) if numAnnoRange is not None: filterStr += 'AND num_anno >= %s AND numAnno <= %s ' queryArgs.append(numAnnoRange[0]) queryArgs.append(numAnnoRange[1]) if numPredRange is not None: filterStr += 'AND num_pred >= %s AND num_pred <= %s ' queryArgs.append(numPredRange[0]) queryArgs.append(numPredRange[1]) if startFrom is not None: if not isinstance(startFrom, UUID): try: startFrom = UUID(startFrom) except: startFrom = None if startFrom is not None: filterStr += ' AND img.id > %s ' queryArgs.append(startFrom) filterStr = filterStr.strip() if filterStr.startswith('AND'): filterStr = filterStr[3:] if len(filterStr.strip()): filterStr = 'WHERE ' + filterStr filterStr = sql.SQL(filterStr) orderStr = sql.SQL('ORDER BY img.id ASC') if orderBy is not None: orderStr = sql.SQL('ORDER BY {} {}, img.id ASC').format( sql.SQL(orderBy), sql.SQL(order)) limitStr = sql.SQL('') if isinstance(limit, float): if not math.isnan(limit): limit = int(limit) else: limit = self.NUM_IMAGES_LIMIT elif isinstance(limit, str): try: limit = int(limit) except: limit = self.NUM_IMAGES_LIMIT elif not isinstance(limit, int): limit = self.NUM_IMAGES_LIMIT limit = max(min(limit, self.NUM_IMAGES_LIMIT), 1) limitStr = sql.SQL('LIMIT %s') queryArgs.append(limit) queryStr = sql.SQL(''' SELECT img.id, filename, EXTRACT(epoch FROM date_added) AS date_added, COALESCE(viewcount, 0) AS viewcount, EXTRACT(epoch FROM last_viewed) AS last_viewed, COALESCE(num_anno, 0) AS num_anno, COALESCE(num_pred, 0) AS num_pred, img.isGoldenQuestion FROM {id_img} AS img FULL OUTER JOIN ( SELECT image, COUNT(*) AS viewcount, MAX(last_checked) AS last_viewed FROM {id_iu} GROUP BY image ) AS iu ON img.id = iu.image FULL OUTER JOIN ( SELECT image, COUNT(*) AS num_anno FROM {id_anno} GROUP BY image ) AS anno ON img.id = anno.image FULL OUTER JOIN ( SELECT image, COUNT(*) AS num_pred FROM {id_pred} GROUP BY image ) AS pred ON img.id = pred.image {filter} {order} {limit} ''').format(id_img=sql.Identifier(project, 'image'), id_iu=sql.Identifier(project, 'image_user'), id_anno=sql.Identifier(project, 'annotation'), id_pred=sql.Identifier(project, 'prediction'), filter=filterStr, order=orderStr, limit=limitStr) result = self.dbConnector.execute(queryStr, tuple(queryArgs), 'all') for idx in range(len(result)): result[idx]['id'] = str(result[idx]['id']) return result def uploadImages(self, project, images, existingFiles='keepExisting', splitImages=False, splitProperties=None): ''' Receives a dict of files (bottle.py file format), verifies their file extension and checks if they are loadable by PIL. If they are, they are saved to disk in the project's image folder, and registered in the database. Parameter "existingFiles" can be set as follows: - "keepExisting" (default): if an image already exists on disk with the same path/file name, the new image will be renamed with an underscore and trailing number. - "skipExisting": do not save images that already exist on disk under the same path/file name. - "replaceExisting": overwrite images that exist with the same path/file name. Note: in this case all existing anno- tations, predictions, and other metadata about those images, will be removed from the database. If "splitImages" is True, the uploaded images will be automati- cally divided into patches on a regular grid according to what is defined in "splitProperties". For example, the following definition: splitProperties = { 'patchSize': (800, 600), 'stride': (400, 300), 'tight': True } would divide the images into patches of size 800x600, with over- lap of 50% (denoted by the "stride" being half the "patchSize"), and with all patches completely inside the original image (para- meter "tight" makes the last patches to the far left and bottom of the image being fully inside the original image; they are shif- ted if needed). Instead of the full images, the patches are stored on disk and re- ferenced through the database. The name format for patches is "imageName_x_y.jpg", with "imageName" denoting the name of the ori- ginal image, and "x" and "y" the left and top position of the patch inside the original image. Returns image keys for images that were successfully saved, and keys and error messages for those that were not. ''' imgPaths_valid = [] imgs_valid = [] imgs_warn = {} imgs_error = {} for key in images.keys(): try: nextUpload = images[key] nextFileName = nextUpload.raw_filename #TODO: check if raw_filename is compatible with uploads made from Windows # check if correct file suffix _, ext = os.path.splitext(nextFileName) if not ext.lower() in valid_image_extensions: raise Exception(f'Invalid file type (*{ext})') # check if loadable as image cache = io.BytesIO() nextUpload.save(cache) try: image = Image.open(cache) except Exception: raise Exception('File is not a valid image.') # prepare image(s) to save to disk parent, filename = os.path.split(nextFileName) destFolder = os.path.join( self.config.getProperty('FileServer', 'staticfiles_dir'), project, parent) os.makedirs(destFolder, exist_ok=True) images = [] filenames = [] if not splitImages: # upload the single image directly images.append(image) filenames.append(filename) else: # split image into patches instead images, coords = split_image(image, splitProperties['patchSize'], splitProperties['stride'], splitProperties['tight']) bareFileName, ext = os.path.splitext(filename) filenames = [ f'{bareFileName}_{c[0]}_{c[1]}{ext}' for c in coords ] # register and save all the images for i in range(len(images)): subImage = images[i] subFilename = filenames[i] absFilePath = os.path.join(destFolder, subFilename) # check if an image with the same name does not already exist newFileName = subFilename fileExists = os.path.exists(absFilePath) if fileExists: if existingFiles == 'keepExisting': # rename new file while (os.path.exists(absFilePath)): # rename file fn, ext = os.path.splitext(newFileName) match = self.countPattern.search(fn) if match is None: newFileName = fn + '_1' + ext else: # parse number number = int(fn[match.span()[0] + 1:match.span()[1]]) newFileName = fn[:match.span( )[0]] + '_' + str(number + 1) + ext absFilePath = os.path.join( destFolder, newFileName) if not os.path.exists(absFilePath): imgs_warn[ key] = 'An image with name "{}" already exists under given path on disk. Image has been renamed to "{}".'.format( subFilename, newFileName) elif existingFiles == 'skipExisting': # ignore new file imgs_warn[ key] = f'Image "{newFileName}" already exists on disk and has been skipped.' imgs_valid.append(key) imgPaths_valid.append( os.path.join(parent, newFileName)) continue elif existingFiles == 'replaceExisting': # overwrite new file; first remove metadata queryStr = sql.SQL(''' DELETE FROM {id_iu} WHERE image = ( SELECT id FROM {id_img} WHERE filename = %s ); DELETE FROM {id_anno} WHERE image = ( SELECT id FROM {id_img} WHERE filename = %s ); DELETE FROM {id_pred} WHERE image = ( SELECT id FROM {id_img} WHERE filename = %s ); DELETE FROM {id_img} WHERE filename = %s; ''').format( id_iu=sql.Identifier(project, 'image_user'), id_anno=sql.Identifier(project, 'annotation'), id_pred=sql.Identifier(project, 'prediction'), id_img=sql.Identifier(project, 'image')) self.dbConnector.execute(queryStr, tuple([nextFileName] * 4), None) # remove file try: os.remove(absFilePath) imgs_warn[key] = 'Image "{}" already existed on disk and has been deleted.\n'.format(newFileName) + \ 'All metadata (views, annotations, predictions) have been removed from the database.' except: imgs_warn[key] = 'Image "{}" already existed on disk but could not be deleted.\n'.format(newFileName) + \ 'All metadata (views, annotations, predictions) have been removed from the database.' # write to disk fileParent, _ = os.path.split(absFilePath) if len(fileParent): os.makedirs(fileParent, exist_ok=True) subImage.save(absFilePath) imgs_valid.append(key) imgPaths_valid.append(os.path.join(parent, newFileName)) except Exception as e: imgs_error[key] = str(e) # register valid images in database if len(imgPaths_valid): queryStr = sql.SQL(''' INSERT INTO {id_img} (filename) VALUES %s ON CONFLICT (filename) DO NOTHING; ''').format(id_img=sql.Identifier(project, 'image')) self.dbConnector.insert(queryStr, [(i, ) for i in imgPaths_valid]) result = { 'imgs_valid': imgs_valid, 'imgPaths_valid': imgPaths_valid, 'imgs_warn': imgs_warn, 'imgs_error': imgs_error } return result def scanForImages(self, project): ''' Searches the project image folder on disk for files that are valid, but have not (yet) been added to the database. Returns a list of paths with files. ''' # scan disk for files projectFolder = os.path.join( self.config.getProperty('FileServer', 'staticfiles_dir'), project) if (not os.path.isdir(projectFolder)) and ( not os.path.islink(projectFolder)): # no folder exists for the project (should not happen due to broadcast at project creation) return [] imgs_disk = listDirectory(projectFolder, recursive=True) # get all existing file paths from database imgs_database = set() queryStr = sql.SQL(''' SELECT filename FROM {id_img}; ''').format(id_img=sql.Identifier(project, 'image')) result = self.dbConnector.execute(queryStr, None, 'all') for r in range(len(result)): imgs_database.add(result[r]['filename']) # filter imgs_candidates = imgs_disk.difference(imgs_database) return list(imgs_candidates) def addExistingImages(self, project, imageList=None): ''' Scans the project folder on the file system for images that are physically saved, but not (yet) added to the database. Adds them to the project's database schema. If an imageList iterable is provided, only the intersection between identified images on disk and in the iterable are added. If 'imageList' is a string with contents 'all', all untracked images will be added. Returns a list of image IDs and file names that were eventually added to the project database schema. ''' # get all images on disk that are not in database imgs_candidates = self.scanForImages(project) if imageList is None or (isinstance(imageList, str) and imageList.lower() == 'all'): imgs_add = imgs_candidates else: if isinstance(imageList, dict): imageList = list(imageList.keys()) imgs_add = list(set(imgs_candidates).intersection(set(imageList))) if not len(imgs_add): return 0, [] # add to database queryStr = sql.SQL(''' INSERT INTO {id_img} (filename) VALUES %s; ''').format(id_img=sql.Identifier(project, 'image')) self.dbConnector.insert(queryStr, tuple([(i, ) for i in imgs_add])) # get IDs of newly added images queryStr = sql.SQL(''' SELECT id, filename FROM {id_img} WHERE filename IN %s; ''').format(id_img=sql.Identifier(project, 'image')) result = self.dbConnector.execute(queryStr, (tuple(imgs_add), ), 'all') status = (0 if result is not None and len(result) else 1) #TODO return status, result def removeImages(self, project, imageList, forceRemove=False, deleteFromDisk=False): ''' Receives an iterable of image IDs and removes them from the project database schema, including associated user views, annotations, and predictions made. Only removes entries if no user views, annotations, and predictions exist, or else if "forceRemove" is True. If "deleteFromDisk" is True, the image files are also deleted from the project directory on the file system. Returns a list of images that were deleted. ''' imageList = tuple([(UUID(i), ) for i in imageList]) queryArgs = [] deleteArgs = [] if forceRemove: queryStr = sql.SQL(''' SELECT id, filename FROM {id_img} WHERE id IN %s; ''').format(id_img=sql.Identifier(project, 'image')) queryArgs = tuple([imageList]) deleteStr = sql.SQL(''' DELETE FROM {id_iu} WHERE image IN %s; DELETE FROM {id_anno} WHERE image IN %s; DELETE FROM {id_pred} WHERE image IN %s; DELETE FROM {id_img} WHERE id IN %s; ''').format(id_iu=sql.Identifier(project, 'image_user'), id_anno=sql.Identifier(project, 'annotation'), id_pred=sql.Identifier(project, 'prediction'), id_img=sql.Identifier(project, 'image')) deleteArgs = tuple([imageList] * 4) else: queryStr = sql.SQL(''' SELECT id, filename FROM {id_img} WHERE id IN %s AND id NOT IN ( SELECT image FROM {id_iu} WHERE image IN %s UNION ALL SELECT image FROM {id_anno} WHERE image IN %s UNION ALL SELECT image FROM {id_pred} WHERE image IN %s ); ''').format(id_img=sql.Identifier(project, 'image'), id_iu=sql.Identifier(project, 'image_user'), id_anno=sql.Identifier(project, 'annotation'), id_pred=sql.Identifier(project, 'prediction')) queryArgs = tuple([imageList] * 4) deleteStr = sql.SQL(''' DELETE FROM {id_img} WHERE id IN %s AND id NOT IN ( SELECT image FROM {id_iu} WHERE image IN %s UNION ALL SELECT image FROM {id_anno} WHERE image IN %s UNION ALL SELECT image FROM {id_pred} WHERE image IN %s ); ''').format(id_img=sql.Identifier(project, 'image'), id_iu=sql.Identifier(project, 'image_user'), id_anno=sql.Identifier(project, 'annotation'), id_pred=sql.Identifier(project, 'prediction')) deleteArgs = tuple([imageList] * 4) # retrieve images to be deleted imgs_del = self.dbConnector.execute(queryStr, queryArgs, 'all') if imgs_del is None: imgs_del = [] if len(imgs_del): # delete images self.dbConnector.execute(deleteStr, deleteArgs, None) if deleteFromDisk: projectFolder = os.path.join( self.config.getProperty('FileServer', 'staticfiles_dir'), project) if os.path.isdir(projectFolder) or os.path.islink( projectFolder): for i in imgs_del: filePath = os.path.join(projectFolder, i['filename']) if os.path.isfile(filePath): os.remove(filePath) # convert UUID for idx in range(len(imgs_del)): imgs_del[idx]['id'] = str(imgs_del[idx]['id']) return imgs_del def removeOrphanedImages(self, project): ''' Queries the project's image entries in the database and retrieves entries for which no image can be found on disk anymore. Removes and returns those entries and all associated (meta-) data from the database. ''' imgs_DB = self.dbConnector.execute( sql.SQL(''' SELECT id, filename FROM {id_img}; ''').format(id_img=sql.Identifier(project, 'image')), None, 'all') projectFolder = os.path.join( self.config.getProperty('FileServer', 'staticfiles_dir'), project) if (not os.path.isdir(projectFolder)) and ( not os.path.islink(projectFolder)): return [] imgs_disk = listDirectory(projectFolder, recursive=True) imgs_disk = set(imgs_disk) # get orphaned images imgs_orphaned = [] for i in imgs_DB: if i['filename'] not in imgs_disk: imgs_orphaned.append(i['id']) # imgs_orphaned = list(set(imgs_DB).difference(imgs_disk)) if not len(imgs_orphaned): return [] # remove self.dbConnector.execute( sql.SQL(''' DELETE FROM {id_iu} WHERE image IN %s; DELETE FROM {id_anno} WHERE image IN %s; DELETE FROM {id_pred} WHERE image IN %s; DELETE FROM {id_img} WHERE id IN %s; ''').format(id_iu=sql.Identifier(project, 'image_user'), id_anno=sql.Identifier(project, 'annotation'), id_pred=sql.Identifier(project, 'prediction'), id_img=sql.Identifier(project, 'image')), tuple([tuple(imgs_orphaned)] * 4), None) return imgs_orphaned def prepareDataDownload(self, project, dataType='annotation', userList=None, dateRange=None, extraFields=None, segmaskFilenameOptions=None, segmaskEncoding='rgb'): ''' Polls the database for project data according to the specified restrictions: - dataType: "annotation" or "prediction" - userList: for type "annotation": None (all users) or an iterable of user names - dateRange: None (all dates) or two values for a mini- mum and maximum timestamp - extraFields: None (no field) or dict of keywords and bools for additional fields (e.g. browser meta) to be queried. - segmaskFilenameOptions: customization parameters for segmentation mask images' file names. - segmaskEncoding: encoding of the segmentation mask pixel values ("rgb" or "indexed") Creates a file in this machine's temporary directory and returns the file name to it. Note that in some cases (esp. for semantic segmentation), the number of queryable entries may be limited due to file size and free disk space restrictions. An upper cei- ling is specified in the configuration *.ini file ('TODO') ''' now = datetime.now(tz=pytz.utc) # argument check if userList is None: userList = [] elif isinstance(userList, str): userList = [userList] if dateRange is None: dateRange = [] elif len(dateRange) == 1: dateRange = [dateRange, now] if extraFields is None or not isinstance(extraFields, dict): extraFields = {'meta': False} else: if not 'meta' in extraFields or not isinstance( extraFields['meta'], bool): extraFields['meta'] = False if segmaskFilenameOptions is None: segmaskFilenameOptions = { 'baseName': 'filename', 'prefix': '', 'suffix': '' } else: if not 'baseName' in segmaskFilenameOptions or \ segmaskFilenameOptions['baseName'] not in ('filename', 'id'): segmaskFilenameOptions['baseName'] = 'filename' try: segmaskFilenameOptions['prefix'] = str( segmaskFilenameOptions['prefix']) except: segmaskFilenameOptions['prefix'] = '' try: segmaskFilenameOptions['suffix'] = str( segmaskFilenameOptions['suffix']) except: segmaskFilenameOptions['suffix'] = '' for char in self.FILENAMES_PROHIBITED_CHARS: segmaskFilenameOptions['prefix'] = segmaskFilenameOptions[ 'prefix'].replace(char, '_') segmaskFilenameOptions['suffix'] = segmaskFilenameOptions[ 'suffix'].replace(char, '_') # check metadata type: need to deal with segmentation masks separately if dataType == 'annotation': metaField = 'annotationtype' elif dataType == 'prediction': metaField = 'predictiontype' else: raise Exception('Invalid dataType specified ({})'.format(dataType)) metaType = self.dbConnector.execute( ''' SELECT {} FROM aide_admin.project WHERE shortname = %s; '''.format(metaField), (project, ), 1)[0][metaField] if metaType.lower() == 'segmentationmasks': is_segmentation = True fileExtension = '.zip' # create indexed color palette for segmentation masks if segmaskEncoding == 'indexed': try: indexedColors = [] labelClasses = self.dbConnector.execute( sql.SQL(''' SELECT idx, color FROM {id_lc} ORDER BY idx ASC; ''').format( id_lc=sql.Identifier(project, 'labelclass')), None, 'all') currentIndex = 1 for lc in labelClasses: if lc['idx'] == 0: # background class continue while currentIndex < lc['idx']: # gaps in label classes; fill with zeros indexedColors.extend([0, 0, 0]) currentIndex += 1 color = lc['color'] if color is None: # no color specified; add from defaults #TODO indexedColors.extend([0, 0, 0]) else: # convert to RGB format indexedColors.extend(helpers.hexToRGB(color)) except: # an error occurred; don't convert segmentation mask to indexed colors indexedColors = None else: indexedColors = None else: is_segmentation = False fileExtension = '.txt' #TODO: support JSON? # prepare output file filename = 'aide_query_{}'.format( now.strftime('%Y-%m-%d_%H-%M-%S')) + fileExtension destPath = os.path.join(self.tempDir, 'aide/downloadRequests', project) os.makedirs(destPath, exist_ok=True) destPath = os.path.join(destPath, filename) # generate query queryArgs = [] tableID = sql.Identifier(project, dataType) userStr = sql.SQL('') iuStr = sql.SQL('') dateStr = sql.SQL('') queryFields = [ 'filename', 'isGoldenQuestion', 'date_image_added', 'last_requested_image', 'image_corrupt' # default image fields ] if dataType == 'annotation': iuStr = sql.SQL(''' JOIN (SELECT image AS iu_image, username AS iu_username, viewcount, last_checked, last_time_required FROM {id_iu}) AS iu ON t.image = iu.iu_image AND t.username = iu.iu_username ''').format(id_iu=sql.Identifier(project, 'image_user')) if len(userList): userStr = sql.SQL('WHERE username IN %s') queryArgs.append(tuple(userList)) queryFields.extend( getattr(QueryStrings_annotation, metaType).value) queryFields.extend([ 'username', 'viewcount', 'last_checked', 'last_time_required' ]) #TODO: make customizable else: queryFields.extend( getattr(QueryStrings_prediction, metaType).value) if len(dateRange): if len(userStr.string): dateStr = sql.SQL( ' AND timecreated >= to_timestamp(%s) AND timecreated <= to_timestamp(%s)' ) else: dateStr = sql.SQL( 'WHERE timecreated >= to_timestamp(%s) AND timecreated <= to_timestamp(%s)' ) queryArgs.extend(dateRange) if not is_segmentation: # join label classes lcStr = sql.SQL(''' JOIN (SELECT id AS lcID, name AS labelclass_name, idx AS labelclass_index FROM {id_lc} ) AS lc ON label = lc.lcID ''').format(id_lc=sql.Identifier(project, 'labelclass')) queryFields.extend(['labelclass_name', 'labelclass_index']) else: lcStr = sql.SQL('') # remove redundant query fields queryFields = set(queryFields) for key in extraFields.keys(): if not extraFields[key]: queryFields.remove(key) queryFields = list(queryFields) queryStr = sql.SQL(''' SELECT * FROM {tableID} AS t JOIN ( SELECT id AS imgID, filename, isGoldenQuestion, date_added AS date_image_added, last_requested AS last_requested_image, corrupt AS image_corrupt FROM {id_img} ) AS img ON t.image = img.imgID {lcStr} {iuStr} {userStr} {dateStr} ''').format(tableID=tableID, id_img=sql.Identifier(project, 'image'), lcStr=lcStr, iuStr=iuStr, userStr=userStr, dateStr=dateStr) # query and process data if is_segmentation: mainFile = zipfile.ZipFile(destPath, 'w', zipfile.ZIP_DEFLATED) else: mainFile = open(destPath, 'w') metaStr = '; '.join(queryFields) + '\n' with self.dbConnector.execute_cursor(queryStr, tuple(queryArgs)) as cursor: while True: b = cursor.fetchone() if b is None: break if is_segmentation: # convert and store segmentation mask separately segmask_filename = 'segmentation_masks/' if segmaskFilenameOptions['baseName'] == 'id': innerFilename = b['image'] parent = '' else: innerFilename = b['filename'] parent, innerFilename = os.path.split(innerFilename) finalFilename = os.path.join( parent, segmaskFilenameOptions['prefix'] + innerFilename + segmaskFilenameOptions['suffix'] + '.tif') segmask_filename += finalFilename segmask = base64ToImage(b['segmentationmask'], b['width'], b['height']) if indexedColors is not None and len(indexedColors) > 0: # convert to indexed color and add color palette from label classes segmask = segmask.convert('RGB').convert( 'P', palette=Image.ADAPTIVE, colors=3) segmask.putpalette(indexedColors) # save bio = io.BytesIO() segmask.save(bio, 'TIFF') mainFile.writestr(segmask_filename, bio.getvalue()) # store metadata metaLine = '' for field in queryFields: if field.lower() == 'segmentationmask': continue metaLine += '{}; '.format(b[field.lower()]) metaStr += metaLine + '\n' if is_segmentation: mainFile.writestr('query.txt', metaStr) else: mainFile.write(metaStr) if is_segmentation: # append separate text file for label classes labelclassQuery = sql.SQL(''' SELECT id, name, color, labelclassgroup, idx AS labelclass_index FROM {id_lc}; ''').format(id_lc=sql.Identifier(project, 'labelclass')) result = self.dbConnector.execute(labelclassQuery, None, 'all') lcStr = 'id,name,color,labelclassgroup,labelclass_index\n' for r in result: lcStr += '{},{},{},{},{}\n'.format(r['id'], r['name'], r['color'], r['labelclassgroup'], r['labelclass_index']) mainFile.writestr('labelclasses.csv', lcStr) mainFile.close() return filename def watchImageFolders(self): ''' Queries all projects that have the image folder watch functionality enabled and updates the projects, one by one, with the latest image changes. ''' projects = self.dbConnector.execute( ''' SELECT shortname, watch_folder_remove_missing_enabled FROM aide_admin.project WHERE watch_folder_enabled IS TRUE; ''', None, 'all') for p in projects: pName = p['shortname'] # add new images _, imgs_added = self.addExistingImages(pName, None) # remove orphaned images (if enabled) if p['watch_folder_remove_missing_enabled']: imgs_orphaned = self.removeOrphanedImages(pName) if len(imgs_added) or len(imgs_orphaned): print( f'[Project {pName}] {len(imgs_added)} new images found and added, {len(imgs_orphaned)} orphaned images removed from database.' ) elif len(imgs_added): print( f'[Project {pName}] {len(imgs_added)} new images found and added.' ) def deleteProject(self, project, deleteFiles=False): ''' Irreproducibly deletes a project, including all data and metadata, from the database. If "deleteFiles" is True, then any data on disk (images, etc.) are also deleted. This cannot be undone. ''' print(f'Deleting project with shortname "{project}"...') # remove database entries print('\tRemoving database entries...') self.dbConnector.execute( ''' DELETE FROM aide_admin.authentication WHERE project = %s; DELETE FROM aide_admin.project WHERE shortname = %s; ''', ( project, project, ), None ) # already done by DataAdministration.middleware, but we do it again to be sure self.dbConnector.execute(''' DROP SCHEMA IF EXISTS {} CASCADE; '''.format(project), None, None) #TODO: Identifier? if deleteFiles: print('\tRemoving files...') messages = [] def _onError(function, path, excinfo): #TODO from celery.contrib import rdb rdb.set_trace() messages.append({ 'function': function, 'path': path, 'excinfo': excinfo }) try: shutil.rmtree(os.path.join( self.config.getProperty('FileServer', 'staticfiles_dir'), project), onerror=_onError) except Exception as e: messages.append(str(e)) return messages return 0
class ProjectConfigMiddleware: # prohibited project shortnames PROHIBITED_SHORTNAMES = [ 'con', # for MS Windows 'prn', 'aux', 'nul', 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9', 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9' ] # prohibited project names (both as a whole and for shortnames) PROHIBITED_NAMES = [ '', 'project', 'getavailableaimodels', 'getbackdrops', 'verifyprojectname', 'verifyprojectshort', 'newproject', 'createproject', 'statistics', 'static', 'getcreateaccountunrestricted' 'getprojects', 'about', 'favicon.ico', 'logincheck', 'logout', 'login', 'dologin', 'createaccount', 'loginscreen', 'accountexists', 'getauthentication', 'getusernames', 'docreateaccount', 'admin', 'getservicedetails', 'getceleryworkerdetails', 'getprojectdetails', 'getuserdetails', 'setpassword' ] # prohibited name prefixes PROHIBITED_NAME_PREFIXES = ['/', '?', '&'] # patterns that are prohibited anywhere for shortnames (replaced with underscores) SHORTNAME_PATTERNS_REPLACE = [ '|', '?', '*', ':' # for macOS ] # patterns that are prohibited anywhere for both short and long names (no replacement) PROHIBITED_STRICT = ['<', '<', '>', '>', '..', '/', '\\'] def __init__(self, config): self.config = config self.dbConnector = Database(config) # load default UI settings try: # check if custom default styles are provided self.defaultUIsettings = json.load( open('config/default_ui_settings.json', 'r')) except: # resort to built-in styles self.defaultUIsettings = json.load( open( 'modules/ProjectAdministration/static/json/default_ui_settings.json', 'r')) @staticmethod def _recursive_update(dictObject, target): ''' Recursively iterates over all keys and sub-keys of "dictObject" and its sub-dicts and copies over values from dict "target", if they are available. ''' for key in dictObject.keys(): if key in target: if isinstance(dictObject[key], dict): ProjectConfigMiddleware._recursive_update( dictObject[key], target[key]) else: dictObject[key] = target[key] def getPlatformInfo(self, project, parameters=None): ''' AIDE setup-specific platform metadata. ''' # parse parameters (if provided) and compare with mutable entries allParams = set(['server_uri', 'server_dir', 'watch_folder_interval']) if parameters is not None and parameters != '*': if isinstance(parameters, str): parameters = [parameters.lower()] else: parameters = [p.lower() for p in parameters] set(parameters).intersection_update(allParams) else: parameters = allParams parameters = list(parameters) response = {} for param in parameters: if param.lower() == 'server_uri': uri = os.path.join( self.config.getProperty('Server', 'dataServer_uri'), project, 'files') response[param] = uri elif param.lower() == 'server_dir': sdir = os.path.join( self.config.getProperty('FileServer', 'staticfiles_dir'), project) response[param] = sdir elif param.lower() == 'watch_folder_interval': interval = self.config.getProperty('FileServer', 'watch_folder_interval', type=float, fallback=60) response[param] = interval return response def getProjectImmutables(self, project): queryStr = 'SELECT annotationType, predictionType, demoMode FROM aide_admin.project WHERE shortname = %s;' result = self.dbConnector.execute(queryStr, (project, ), 1) if result and len(result): return { 'annotationType': result[0]['annotationtype'], 'predictionType': result[0]['predictiontype'] } else: return None def getProjectInfo(self, project, parameters=None): # parse parameters (if provided) and compare with mutable entries allParams = set([ 'name', 'description', 'ispublic', 'secret_token', 'demomode', 'interface_enabled', 'ui_settings', 'segmentation_ignore_unlabeled', 'ai_model_enabled', 'ai_model_library', 'ai_model_settings', 'ai_alcriterion_library', 'ai_alcriterion_settings', 'numimages_autotrain', 'minnumannoperimage', 'maxnumimages_train', 'watch_folder_enabled', 'watch_folder_remove_missing_enabled' ]) if parameters is not None and parameters != '*': if isinstance(parameters, str): parameters = [parameters.lower()] else: parameters = [p.lower() for p in parameters] set(parameters).intersection_update(allParams) else: parameters = allParams parameters = list(parameters) sqlParameters = ','.join(parameters) queryStr = sql.SQL(''' SELECT {} FROM aide_admin.project WHERE shortname = %s; ''').format(sql.SQL(sqlParameters)) result = self.dbConnector.execute(queryStr, (project, ), 1) result = result[0] # assemble response response = {} for param in parameters: value = result[param] if param == 'ui_settings': value = json.loads(value) # auto-complete with defaults where missing value = check_args(value, self.defaultUIsettings) response[param] = value return response def renewSecretToken(self, project): ''' Creates a new secret token, invalidating the old one. ''' try: newToken = secrets.token_urlsafe(32) result = self.dbConnector.execute( '''UPDATE aide_admin.project SET secret_token = %s WHERE shortname = %s; SELECT secret_token FROM aide_admin.project WHERE shortname = %s; ''', ( newToken, project, project, ), 1) return result[0]['secret_token'] except: # this normally should not happen, since we checked for the validity of the project return None def getProjectUsers(self, project): ''' Returns a list of users that are enrolled in the project, as well as their roles within the project. ''' queryStr = sql.SQL( 'SELECT * FROM aide_admin.authentication WHERE project = %s;') result = self.dbConnector.execute(queryStr, (project, ), 'all') return result def createProject(self, username, properties): ''' Receives the most basic, mostly non-changeable settings for a new project ("properties") with the following entries: - shortname - owner (the current username) - name - description - annotationType - predictionType More advanced settings (UI config, AI model, etc.) will be configured after the initial project creation stage. Verifies whether these settings are available for a new project. If they are, it creates a new database schema for the project, adds an entry for it to the admin schema table and makes the current user admin. Returns True in this case. Otherwise raises an exception. ''' shortname = properties['shortname'] # verify availability of the project name and shortname if not self.getProjectNameAvailable(properties['name']): raise Exception('Project name "{}" unavailable.'.format( properties['name'])) if not self.getProjectShortNameAvailable(shortname): raise Exception( 'Project shortname "{}" unavailable.'.format(shortname)) # load base SQL with open('modules/ProjectAdministration/static/sql/create_schema.sql', 'r') as f: queryStr = sql.SQL(f.read()) # determine annotation and prediction types and add fields accordingly annotationFields = list( getattr(Fields_annotation, properties['annotationType']).value) predictionFields = list( getattr(Fields_prediction, properties['predictionType']).value) # create project schema self.dbConnector.execute( queryStr.format( id_schema=sql.Identifier(shortname), id_auth=sql.Identifier( self.config.getProperty('Database', 'user')), id_image=sql.Identifier(shortname, 'image'), id_iu=sql.Identifier(shortname, 'image_user'), id_labelclassGroup=sql.Identifier(shortname, 'labelclassgroup'), id_labelclass=sql.Identifier(shortname, 'labelclass'), id_annotation=sql.Identifier(shortname, 'annotation'), id_cnnstate=sql.Identifier(shortname, 'cnnstate'), id_prediction=sql.Identifier(shortname, 'prediction'), id_workflow=sql.Identifier(shortname, 'workflow'), id_workflowHistory=sql.Identifier(shortname, 'workflowhistory'), annotation_fields=sql.SQL(', ').join( [sql.SQL(field) for field in annotationFields]), prediction_fields=sql.SQL(', ').join( [sql.SQL(field) for field in predictionFields])), None, None) # register project self.dbConnector.execute( ''' INSERT INTO aide_admin.project (shortname, name, description, owner, secret_token, interface_enabled, annotationType, predictionType, isPublic, demoMode, ui_settings) VALUES ( %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s ); ''', (shortname, properties['name'], (properties['description'] if 'description' in properties else ''), username, secrets.token_urlsafe(32), False, properties['annotationType'], properties['predictionType'], False, False, json.dumps(self.defaultUIsettings)), None) # register user in project self.dbConnector.execute( ''' INSERT INTO aide_admin.authentication (username, project, isAdmin) VALUES (%s, %s, true); ''', ( username, shortname, ), None) # notify FileServer instance(s) to set up project folders process = fileServer_interface.aide_internal_notify.si({ 'task': 'create_project_folders', 'projectName': shortname }) process.apply_async(queue='aide_broadcast', ignore_result=True) return True def updateProjectSettings(self, project, projectSettings): ''' TODO ''' # check UI settings first if 'ui_settings' in projectSettings: if isinstance(projectSettings['ui_settings'], str): projectSettings['ui_settings'] = json.loads( projectSettings['ui_settings']) fieldNames = [('welcomeMessage', str), ('numImagesPerBatch', int), ('minImageWidth', int), ('numImageColumns_max', int), ('defaultImage_w', int), ('defaultImage_h', int), ('styles', dict), ('enableEmptyClass', bool), ('showPredictions', bool), ('showPredictions_minConf', float), ('carryOverPredictions', bool), ('carryOverRule', str), ('carryOverPredictions_minConf', float), ('defaultBoxSize_w', int), ('defaultBoxSize_h', int), ('minBoxSize_w', int), ('minBoxSize_h', int)] uiSettings_new, uiSettingsKeys_new = parse_parameters( projectSettings['ui_settings'], fieldNames, absent_ok=True, escape=True) #TODO: escape # adopt current settings and replace values accordingly uiSettings = self.dbConnector.execute( '''SELECT ui_settings FROM aide_admin.project WHERE shortname = %s; ''', (project, ), 1) uiSettings = json.loads(uiSettings[0]['ui_settings']) for kIdx in range(len(uiSettingsKeys_new)): if isinstance(uiSettings[uiSettingsKeys_new[kIdx]], dict): ProjectConfigMiddleware._recursive_update( uiSettings[uiSettingsKeys_new[kIdx]], uiSettings_new[kIdx]) else: uiSettings[uiSettingsKeys_new[kIdx]] = uiSettings_new[kIdx] # auto-complete with defaults where missing uiSettings = check_args(uiSettings, self.defaultUIsettings) projectSettings['ui_settings'] = json.dumps(uiSettings) # parse remaining parameters fieldNames = [('description', str), ('isPublic', bool), ('secret_token', str), ('demoMode', bool), ('ui_settings', str), ('interface_enabled', bool), ('watch_folder_enabled', bool), ('watch_folder_remove_missing_enabled', bool)] vals, params = parse_parameters(projectSettings, fieldNames, absent_ok=True, escape=False) vals.append(project) # commit to DB queryStr = sql.SQL('''UPDATE aide_admin.project SET {} WHERE shortname = %s; ''').format( sql.SQL(',').join( [sql.SQL('{} = %s'.format(item)) for item in params])) self.dbConnector.execute(queryStr, tuple(vals), None) return True def updateClassDefinitions(self, project, classdef, removeMissing=False): ''' Updates the project's class definitions. if "removeMissing" is set to True, label classes that are present in the database, but not in "classdef," will be removed. Label class groups will only be removed if they do not reference any label class present in "classdef." This functionality is disallowed in the case of segmentation masks. ''' # check if project contains segmentation masks metaType = self.dbConnector.execute( ''' SELECT annotationType, predictionType FROM aide_admin.project WHERE shortname = %s; ''', (project, ), 1)[0] is_segmentation = any( ['segmentationmasks' in m.lower() for m in metaType.values()]) if is_segmentation: removeMissing = False # get current classes from database db_classes = {} db_groups = {} if removeMissing: queryStr = sql.SQL(''' SELECT * FROM {id_lc} AS lc FULL OUTER JOIN ( SELECT id AS lcgid, name AS lcgname, parent, color FROM {id_lcg} ) AS lcg ON lc.labelclassgroup = lcg.lcgid ''').format(id_lc=sql.Identifier(project, 'labelclass'), id_lcg=sql.Identifier(project, 'labelclassgroup')) result = self.dbConnector.execute(queryStr, None, 'all') for r in result: if r['id'] is not None: db_classes[r['id']] = r if r['lcgid'] is not None: if not r['lcgid'] in db_groups: db_groups[r['lcgid']] = {**r, **{'num_children': 0}} elif not 'lcgid' in db_groups[r['lcgid']]: db_groups[r['lcgid']] = {**db_groups[r['lcgid']], **r} if r['labelclassgroup'] is not None: if not r['labelclassgroup'] in db_groups: db_groups[r['labelclassgroup']] = {'num_children': 1} else: db_groups[r['labelclassgroup']]['num_children'] += 1 # parse provided class definitions list unique_keystrokes = set() classes_update = [] classgroups_update = [] def _parse_item(item, parent=None): # get or create ID for item try: itemID = uuid.UUID(item['id']) except: itemID = uuid.uuid1() while itemID in classes_update or itemID in classgroups_update: itemID = uuid.uuid1() entry = { 'id': itemID, 'name': item['name'], 'color': (None if not 'color' in item else item['color']), 'keystroke': None, 'labelclassgroup': parent } if 'children' in item: # label class group classgroups_update.append(entry) for child in item['children']: _parse_item(child, itemID) else: # label class if 'keystroke' in item and not item[ 'keystroke'] in unique_keystrokes: entry['keystroke'] = item['keystroke'] unique_keystrokes.add(item['keystroke']) classes_update.append(entry) for item in classdef: _parse_item(item, None) # apply changes if removeMissing: queryArgs = [] if len(classes_update): # remove all missing label classes lcSpec = sql.SQL('WHERE id NOT IN %s') queryArgs.append(tuple([(l['id'], ) for l in classes_update])) else: # remove all label classes lcgSpec = sql.SQL('') if len(classgroups_update): # remove all missing labelclass groups lcgSpec = sql.SQL('WHERE id NOT IN %s') queryArgs.append( tuple([(l['id'], ) for l in classgroups_update])) else: # remove all labelclass groups lcgSpec = sql.SQL('') queryStr = sql.SQL(''' DELETE FROM {id_lc} {lcSpec}; DELETE FROM {id_lcg} {lcgSpec}; ''').format(id_lc=sql.Identifier(project, 'labelclass'), id_lcg=sql.Identifier(project, 'labelclassgroup'), lcSpec=lcSpec, lcgSpec=lcgSpec) self.dbConnector.execute(queryStr, tuple(queryArgs), None) # add/update in order (groups, set their parents, label classes) groups_new = [( g['id'], g['name'], g['color'], ) for g in classgroups_update] queryStr = sql.SQL(''' INSERT INTO {id_lcg} (id, name, color) VALUES %s ON CONFLICT (id) DO UPDATE SET name = EXCLUDED.name, color = EXCLUDED.color; ''').format( #TODO: on conflict(name) id_lcg=sql.Identifier(project, 'labelclassgroup')) self.dbConnector.insert(queryStr, groups_new) # set parents groups_parents = [ ( g['id'], g['labelclassgroup'], ) for g in classgroups_update if ('labelclassgroup' in g and g['labelclassgroup'] is not None) ] queryStr = sql.SQL(''' UPDATE {id_lcg} AS lcg SET parent = q.parent FROM (VALUES %s) AS q(id, parent) WHERE lcg.id = q.id; ''').format(id_lcg=sql.Identifier(project, 'labelclassgroup')) self.dbConnector.insert(queryStr, groups_parents) # insert/update label classes lcdata = [( l['id'], l['name'], l['color'], l['keystroke'], l['labelclassgroup'], ) for l in classes_update] queryStr = sql.SQL(''' INSERT INTO {id_lc} (id, name, color, keystroke, labelclassgroup) VALUES %s ON CONFLICT (id) DO UPDATE SET name = EXCLUDED.name, color = EXCLUDED.color, keystroke = EXCLUDED.keystroke, labelclassgroup = EXCLUDED.labelclassgroup; ''').format( #TODO: on conflict(name) id_lc=sql.Identifier(project, 'labelclass')) self.dbConnector.insert(queryStr, lcdata) return True def getProjectNameAvailable(self, projectName): ''' Returns True if the provided project (long) name is available. ''' if not isinstance(projectName, str): return False projectName = projectName.strip().lower() if not len(projectName): return False # check if name matches prohibited AIDE keywords (we do not replace long names) if projectName in self.PROHIBITED_STRICT or any( [p in projectName for p in self.PROHIBITED_STRICT]): return False if projectName in self.PROHIBITED_NAMES: return False if any( [projectName.startswith(p) for p in self.PROHIBITED_NAME_PREFIXES]): return False # check if name is already taken result = self.dbConnector.execute( '''SELECT 1 AS result FROM aide_admin.project WHERE name = %s; ''', (projectName, ), 1) if result is None or not len(result): return True else: return result[0]['result'] != 1 def getProjectShortNameAvailable(self, projectName): ''' Returns True if the provided project shortname is available. In essence, "available" means that a database schema with the given name can be created (this includes Postgres schema name conventions). Returns False otherwise. ''' if not isinstance(projectName, str): return False projectName = projectName.strip().lower() if not len(projectName): return False # check if name matches prohibited AIDE keywords; replace where possible if projectName in self.PROHIBITED_STRICT or any( [p in projectName for p in self.PROHIBITED_STRICT]): return False if projectName in self.PROHIBITED_NAMES or projectName in self.PROHIBITED_SHORTNAMES: return False if any( [projectName.startswith(p) for p in self.PROHIBITED_NAME_PREFIXES]): return False for p in self.SHORTNAME_PATTERNS_REPLACE: projectName = projectName.replace(p, '_') # check if provided name is valid as per Postgres conventions matches = re.findall('(^(pg_|[0-9]).*|.*(\$|\s)+.*)', projectName) if len(matches): return False # check if project shorthand already exists in database result = self.dbConnector.execute( '''SELECT 1 AS result FROM information_schema.schemata WHERE schema_name ilike %s UNION ALL SELECT 1 FROM aide_admin.project WHERE shortname ilike %s; ''', ( projectName, projectName, ), 2) if result is None or not len(result): return True if len(result) == 2: return result[0]['result'] != 1 and result[1]['result'] != 1 elif len(result) == 1: return result[0]['result'] != 1 else: return True
def __init__(self, config): self.config = config self.dbConnector = Database(config) self.labelUImiddleware = DBMiddleware(config)
class ModelMarketplaceMiddleware: def __init__(self, config): self.config = config self.dbConnector = Database(config) self.labelUImiddleware = DBMiddleware(config) def getModelsMarketplace(self, project): ''' Returns a dict of model state meta data, filtered by the project settings (model library; annotation type, prediction type). ''' # get project meta data (immutables, model library) projectMeta = self.dbConnector.execute( ''' SELECT annotationType, predictionType, ai_model_library FROM aide_admin.project WHERE shortname = %s; ''', (project,), 1 ) if projectMeta is None or not len(projectMeta): raise Exception(f'Project {project} could not be found in database.') projectMeta = projectMeta[0] annotationType = projectMeta['annotationtype'] predictionType = projectMeta['predictiontype'] # get matching model states result = self.dbConnector.execute( ''' SELECT * FROM ( SELECT id, name, description, labelclasses, model_library, annotationType, predictionType, EXTRACT(epoch FROM timeCreated) AS time_created, alcriterion_library, public, anonymous, selectCount, CASE WHEN anonymous THEN NULL ELSE author END AS author, CASE WHEN anonymous THEN NULL ELSE origin_project END AS origin_project, CASE WHEN anonymous THEN NULL ELSE origin_uuid END AS origin_uuid FROM aide_admin.modelMarketplace WHERE annotationType = %s AND predictionType = %s AND ( public = TRUE OR origin_project = %s ) ) AS mm LEFT OUTER JOIN ( SELECT name AS projectName, shortname FROM aide_admin.project ) AS pn ON mm.origin_project = pn.shortname; ''', (annotationType, predictionType, project), 'all' ) if result is not None and len(result): matchingStates = {} for r in result: stateID = str(r['id']) values = {} for key in r.keys(): if isinstance(r[key], UUID): values[key] = str(r[key]) else: values[key] = r[key] matchingStates[stateID] = values else: matchingStates = {} return matchingStates def importModel(self, project, modelID): # get model meta data meta = self.dbConnector.execute(''' SELECT id, name, description, labelclasses, model_library, annotationType, predictionType, timeCreated, alcriterion_library, public, anonymous, selectCount, CASE WHEN anonymous THEN NULL ELSE author END AS author, CASE WHEN anonymous THEN NULL ELSE origin_project END AS origin_project, CASE WHEN anonymous THEN NULL ELSE origin_uuid END AS origin_uuid FROM aide_admin.modelMarketplace WHERE id = %s; ''', (UUID(modelID),), 1) # check if model is suitable for current project #TODO return 0 def shareModel(self, project, username, modelID, modelName, modelDescription, public, anonymous): #TODO: export as Celery task modelID = UUID(modelID) # check if model hasn't already been shared isShared = self.dbConnector.execute(''' SELECT author FROM aide_admin.modelMarketplace WHERE origin_project = %s AND origin_uuid = %s; ''', (project, modelID), 'all') if isShared is not None and len(isShared): author = isShared[0]['author'] return { 'status': 2, 'message': f'Model state has already been shared by {author}.' } # get project immutables immutables = self.labelUImiddleware.get_project_immutables(project) # get label class info labelclasses = json.dumps(self.labelUImiddleware.getClassDefinitions(project, False)) # check if name is unique (TODO: required?) nameTaken = self.dbConnector.execute(''' SELECT COUNT(*) AS cnt FROM aide_admin.modelMarketplace WHERE name = %s; ''', (modelName,), 'all') if nameTaken is not None and len(nameTaken) and nameTaken[0]['cnt']: return { 'status': 3, 'message': f'A model state with name "{modelName}" already exists in the Model Marketplace.' } sharedModelID = self.dbConnector.execute(sql.SQL(''' INSERT INTO aide_admin.modelMarketplace (name, description, labelclasses, author, statedict, model_library, alCriterion_library, annotationType, predictionType, origin_project, origin_uuid, public, anonymous) SELECT %s, %s, %s, %s, statedict, model_library, alCriterion_library, %s, %s, %s, id, %s, %s FROM {id_cnnstate} AS cnnS WHERE id = %s RETURNING id; ''').format( id_cnnstate=sql.Identifier(project, 'cnnstate') ), (modelName, modelDescription, labelclasses, username, immutables['annotationType'], immutables['predictionType'], project, public, anonymous, UUID(modelID))) return { 'status': 0, 'shared_model_id': sharedModelID }
2019-21 Benjamin Kellenberger ''' import os from celery import current_app from modules.AIWorker.app import AIWorker from modules.Database.app import Database from util.configDef import Config # init AIWorker modules = os.environ['AIDE_MODULES'] passiveMode = (os.environ['PASSIVE_MODE'] == '1' if 'PASSIVE_MODE' in os.environ else False) or not ('aiworker' in modules.lower()) config = Config() worker = AIWorker(config, Database(config), passiveMode) @current_app.task(name='AIWorker.aide_internal_notify') def aide_internal_notify(message): return worker.aide_internal_notify(message) @current_app.task(name='AIWorker.call_update_model', rate_limit=1) def call_update_model(blank, numEpochs, project): return worker.call_update_model(numEpochs, project) @current_app.task(name='AIWorker.call_train', rate_limit=1) def call_train(data, index, epoch, numEpochs, project, aiModelSettings): if len(data) == 2 and data[1] is None:
class DBMiddleware(): def __init__(self, config): self.config = config self.dbConnector = Database(config) self.project_immutables = { } # project settings that cannot be changed (project shorthand -> {settings}) self._fetchProjectSettings() self.sqlBuilder = SQLStringBuilder() self.annoParser = AnnotationParser() def _fetchProjectSettings(self): # AI controller URI aiControllerURI = self.config.getProperty('Server', 'aiController_uri') if aiControllerURI is None or aiControllerURI.strip() == '': # no AI backend configured aiControllerURI = None # global, project-independent settings self.globalSettings = { 'indexURI': self.config.getProperty('Server', 'index_uri', type=str, fallback='/'), 'dataServerURI': self.config.getProperty('Server', 'dataServer_uri'), 'aiControllerURI': aiControllerURI } # default styles try: # check if custom default styles are provided self.defaultStyles = json.load( open('config/default_ui_settings.json', 'r')) except: # resort to built-in styles self.defaultStyles = json.load( open( 'modules/ProjectAdministration/static/json/default_ui_settings.json', 'r')) def _assemble_annotations(self, project, cursor, hideGoldenQuestionInfo): response = {} while True: b = cursor.fetchone() if b is None: break imgID = str(b['image']) if not imgID in response: response[imgID] = { 'fileName': b['filename'], 'predictions': {}, 'annotations': {}, 'last_checked': None } viewcount = b['viewcount'] if viewcount is not None: response[imgID]['viewcount'] = viewcount last_checked = b['last_checked'] if last_checked is not None: if response[imgID]['last_checked'] is None: response[imgID]['last_checked'] = last_checked else: response[imgID]['last_checked'] = max( response[imgID]['last_checked'], last_checked) if not hideGoldenQuestionInfo: response[imgID]['isGoldenQuestion'] = b['isgoldenquestion'] # parse annotations and predictions entryID = str(b['id']) if b['ctype'] is not None: colnames = self.sqlBuilder.getColnames( self.project_immutables[project]['annotationType'], self.project_immutables[project]['predictionType'], b['ctype']) entry = {} for c in colnames: value = b[c] if isinstance(value, datetime): value = value.timestamp() elif isinstance(value, UUID): value = str(value) entry[c] = value if b['ctype'] == 'annotation': response[imgID]['annotations'][entryID] = entry elif b['ctype'] == 'prediction': response[imgID]['predictions'][entryID] = entry return response def _set_images_requested(self, project, imageIDs): ''' Sets column "last_requested" of relation "image" to the current date. This is done during image querying to signal that an image has been requested, but not (yet) viewed. ''' # prepare insertion values now = datetime.now(tz=pytz.utc) vals = [] for key in imageIDs: vals.append(key) if len(vals): queryStr = sql.SQL(''' UPDATE {id_img} SET last_requested = %s WHERE id IN %s; ''').format(id_img=sql.Identifier(project, 'image')) self.dbConnector.execute(queryStr, ( now, tuple(vals), ), None) def _get_sample_metadata(self, metaType): ''' Returns a dummy annotation or prediction for the sample image in the "exampleData" folder, depending on the "metaType" specified (i.e., labels, points, boundingBoxes, or segmentationMasks). ''' if metaType == 'labels': return { 'id': '00000000-0000-0000-0000-000000000000', 'label': '00000000-0000-0000-0000-000000000000', 'confidence': 1.0, 'priority': 1.0, 'viewcount': None } elif metaType == 'points' or metaType == 'boundingBoxes': return { 'id': '00000000-0000-0000-0000-000000000000', 'label': '00000000-0000-0000-0000-000000000000', 'x': 0.542959427207637, 'y': 0.5322069489713102, 'width': 0.6133651551312653, 'height': 0.7407598263401316, 'confidence': 1.0, 'priority': 1.0, 'viewcount': None } elif metaType == 'segmentationMasks': # read segmentation mask from disk segmask = Image.open( 'modules/LabelUI/static/exampleData/sample_segmentationMask.tif' ) segmask, width, height = helpers.imageToBase64(segmask) return { 'id': '00000000-0000-0000-0000-000000000000', 'width': width, 'height': height, 'segmentationmask': segmask, 'confidence': 1.0, 'priority': 1.0, 'viewcount': None } else: return {} def get_project_immutables(self, project): if project not in self.project_immutables: queryStr = 'SELECT annotationType, predictionType, demoMode FROM aide_admin.project WHERE shortname = %s;' result = self.dbConnector.execute(queryStr, (project, ), 1) if result and len(result): self.project_immutables[project] = { 'annotationType': result[0]['annotationtype'], 'predictionType': result[0]['predictiontype'], 'demoMode': helpers.checkDemoMode(project, self.dbConnector) } else: return None return self.project_immutables[project] def get_dynamic_project_settings(self, project): queryStr = 'SELECT ui_settings FROM aide_admin.project WHERE shortname = %s;' result = self.dbConnector.execute(queryStr, (project, ), 1) result = json.loads(result[0]['ui_settings']) # complete styles with defaults where necessary (may be required for project that got upgraded from v1) result = helpers.check_args(result, self.defaultStyles) return result def getProjectSettings(self, project): ''' Queries the database for general project-specific metadata, such as: - Classes: names, indices, default colors - Annotation type: one of {class labels, positions, bboxes} ''' # publicly available info from DB projSettings = self.getProjectInfo(project) # label classes projSettings['classes'] = self.getClassDefinitions(project) # static and dynamic project settings and properties from configuration file projSettings = { **projSettings, **self.get_project_immutables(project), **self.get_dynamic_project_settings(project), **self.globalSettings } # append project shorthand to AIController URI if 'aiControllerURI' in projSettings and projSettings[ 'aiControllerURI'] is not None and len( projSettings['aiControllerURI']): projSettings['aiControllerURI'] = os.path.join( projSettings['aiControllerURI'], project) + '/' return projSettings def getProjectInfo(self, project): ''' Returns safe, shareable information about the project (i.e., users don't need to be part of the project to see these data). ''' queryStr = ''' SELECT shortname, name, description, demoMode, interface_enabled, archived, ai_model_enabled, ai_model_library, ai_alcriterion_library, segmentation_ignore_unlabeled FROM aide_admin.project WHERE shortname = %s ''' result = self.dbConnector.execute(queryStr, (project, ), 1)[0] # provide flag if AI model is available aiModelAvailable = all([ result['ai_model_enabled'], result['ai_model_library'] is not None and len(result['ai_model_library']), result['ai_alcriterion_library'] is not None and len(result['ai_alcriterion_library']) ]) return { 'projectShortname': result['shortname'], 'projectName': result['name'], 'projectDescription': result['description'], 'demoMode': result['demomode'], 'interface_enabled': result['interface_enabled'] and not result['archived'], 'ai_model_available': aiModelAvailable, 'segmentation_ignore_unlabeled': result['segmentation_ignore_unlabeled'] } def getClassDefinitions(self, project, showHidden=False): ''' Returns a dictionary with entries for all classes in the project. ''' # query data if showHidden: hiddenSpec = '' else: hiddenSpec = 'WHERE hidden IS false' queryStr = sql.SQL(''' SELECT 'group' AS type, id, NULL as idx, name, color, parent, NULL AS keystroke, NULL AS hidden FROM {} UNION ALL SELECT 'class' AS type, id, idx, name, color, labelclassgroup, keystroke, hidden FROM {} {}; ''').format(sql.Identifier(project, 'labelclassgroup'), sql.Identifier(project, 'labelclass'), sql.SQL(hiddenSpec)) classData = self.dbConnector.execute(queryStr, None, 'all') # assemble entries first allEntries = {} numClasses = 0 for cl in classData: id = str(cl['id']) entry = { 'id': id, 'name': cl['name'], 'color': cl['color'], 'parent': str(cl['parent']) if cl['parent'] is not None else None, 'hidden': cl['hidden'] } if cl['type'] == 'group': entry['entries'] = {} else: entry['index'] = cl['idx'] entry['keystroke'] = cl['keystroke'] numClasses += 1 allEntries[id] = entry # transform into tree def _find_parent(tree, parentID): if parentID is None: return None elif 'id' in tree and tree['id'] == parentID: return tree elif 'entries' in tree: for ek in tree['entries'].keys(): rv = _find_parent(tree['entries'][ek], parentID) if rv is not None: return rv return None else: return None allEntries = {'entries': allEntries} for key in list(allEntries['entries'].keys()): entry = allEntries['entries'][key] parentID = entry['parent'] del entry['parent'] if parentID is None: # entry or group with no parent: append to root directly allEntries['entries'][key] = entry else: # move item parent = _find_parent(allEntries, parentID) parent['entries'][key] = entry del allEntries['entries'][key] allEntries['numClasses'] = numClasses return allEntries def getBatch_fixed(self, project, username, data, hideGoldenQuestionInfo=True): ''' Returns entries from the database based on the list of data entry identifiers specified. ''' if not len(data): return {'entries': {}} # query projImmutables = self.get_project_immutables(project) queryStr = self.sqlBuilder.getFixedImagesQueryString( project, projImmutables['annotationType'], projImmutables['predictionType'], projImmutables['demoMode']) # parse results queryVals = ( tuple(UUID(d) for d in data), username, username, ) if projImmutables['demoMode']: queryVals = (tuple(UUID(d) for d in data), ) with self.dbConnector.execute_cursor(queryStr, queryVals) as cursor: try: response = self._assemble_annotations(project, cursor, hideGoldenQuestionInfo) # self.dbConnector.conn.commit() except Exception as e: print(e) # self.dbConnector.conn.rollback() finally: pass # cursor.close() # mark images as requested self._set_images_requested(project, response) return {'entries': response} def getBatch_auto(self, project, username, order='unlabeled', subset='default', limit=None, hideGoldenQuestionInfo=True): ''' TODO: description ''' # query projImmutables = self.get_project_immutables(project) queryStr = self.sqlBuilder.getNextBatchQueryString( project, projImmutables['annotationType'], projImmutables['predictionType'], order, subset, projImmutables['demoMode']) # limit (TODO: make 128 a hyperparameter) if limit is None: limit = 128 else: limit = min(int(limit), 128) # parse results queryVals = ( username, username, limit, username, ) if projImmutables[ 'demoMode']: #TODO: demoMode can now change dynamically queryVals = (limit, ) with self.dbConnector.execute_cursor(queryStr, queryVals) as cursor: response = self._assemble_annotations(project, cursor, hideGoldenQuestionInfo) # mark images as requested self._set_images_requested(project, response) return {'entries': response} def getBatch_timeRange(self, project, minTimestamp, maxTimestamp, userList, skipEmptyImages=False, limit=None, goldenQuestionsOnly=False, hideGoldenQuestionInfo=True): ''' Returns images that have been annotated within the given time range and/or by the given user(s). All arguments are optional. Useful for reviewing existing annotations. ''' # query string projImmutables = self.get_project_immutables(project) queryStr = self.sqlBuilder.getDateQueryString( project, projImmutables['annotationType'], minTimestamp, maxTimestamp, userList, skipEmptyImages, goldenQuestionsOnly) # check validity and provide arguments queryVals = [] if userList is not None: queryVals.append(tuple(userList)) if minTimestamp is not None: queryVals.append(minTimestamp) if maxTimestamp is not None: queryVals.append(maxTimestamp) if skipEmptyImages and userList is not None: queryVals.append(tuple(userList)) # limit (TODO: make 128 a hyperparameter) if limit is None: limit = 128 else: limit = min(int(limit), 128) queryVals.append(limit) if userList is not None: queryVals.append(tuple(userList)) # query and parse results with self.dbConnector.execute_cursor(queryStr, tuple(queryVals)) as cursor: try: response = self._assemble_annotations(project, cursor, hideGoldenQuestionInfo) # self.dbConnector.conn.commit() except Exception as e: print(e) # self.dbConnector.conn.rollback() finally: pass # cursor.close() # # mark images as requested # self._set_images_requested(project, response) return {'entries': response} def get_timeRange(self, project, userList, skipEmptyImages=False, goldenQuestionsOnly=False): ''' Returns two timestamps denoting the temporal limits within which images have been viewed by the users provided in the userList. Arguments: - userList: string (single user name) or list of strings (multiple). Can also be None; in this case all annotations will be checked. - skipEmptyImages: if True, only images that contain at least one annotation will be considered. - goldenQuestionsOnly: if True, only images flagged as golden questions will be shown. ''' # query string queryStr = self.sqlBuilder.getTimeRangeQueryString( project, userList, skipEmptyImages, goldenQuestionsOnly) arguments = (None if userList is None else tuple(userList)) result = self.dbConnector.execute(queryStr, (arguments, ), numReturn=1) if result is not None and len(result): return { 'minTimestamp': result[0]['mintimestamp'], 'maxTimestamp': result[0]['maxtimestamp'], } else: return {'error': 'no annotations made'} def get_sampleData(self, project): ''' Returns a sample image from the project, with annotations (from one of the admins) and predictions. If no image, no annotations, and/or no predictions are available, a built-in default is returned instead. ''' projImmutables = self.get_project_immutables(project) queryStr = self.sqlBuilder.getSampleDataQueryString( project, projImmutables['annotationType'], projImmutables['predictionType']) # query and parse results response = None with self.dbConnector.execute_cursor(queryStr, None) as cursor: try: response = self._assemble_annotations(project, cursor, True) except: pass if response is None or not len(response): # no valid data found for project; fall back to sample data response = { '00000000-0000-0000-0000-000000000000': { 'fileName': '/static/interface/exampleData/sample_image.jpg', 'viewcount': 1, 'annotations': { '00000000-0000-0000-0000-000000000000': self._get_sample_metadata( projImmutables['annotationType']) }, 'predictions': { '00000000-0000-0000-0000-000000000000': self._get_sample_metadata( projImmutables['predictionType']) }, 'last_checked': None, 'isGoldenQuestion': True } } return response def submitAnnotations(self, project, username, submissions): ''' Sends user-provided annotations to the database. ''' projImmutables = self.get_project_immutables(project) if projImmutables['demoMode']: return 1 # assemble values colnames = getattr(QueryStrings_annotation, projImmutables['annotationType']).value values_insert = [] values_update = [] meta = (None if not 'meta' in submissions else json.dumps( submissions['meta'])) # for deletion: remove all annotations whose image ID matches but whose annotation ID is not among the submitted ones ids = [] viewcountValues = [] for imageKey in submissions['entries']: entry = submissions['entries'][imageKey] try: lastChecked = entry['timeCreated'] lastTimeRequired = entry['timeRequired'] if lastTimeRequired is None: lastTimeRequired = 0 except: lastChecked = datetime.now(tz=pytz.utc) lastTimeRequired = 0 try: numInteractions = int(entry['numInteractions']) except: numInteractions = 0 if 'annotations' in entry and len(entry['annotations']): for annotation in entry['annotations']: # assemble annotation values annotationTokens = self.annoParser.parseAnnotation( annotation) annoValues = [] for cname in colnames: if cname == 'id': if cname in annotationTokens: # cast and only append id if the annotation is an existing one annoValues.append(UUID( annotationTokens[cname])) ids.append(UUID(annotationTokens[cname])) elif cname == 'image': annoValues.append(UUID(imageKey)) elif cname == 'label' and annotationTokens[ cname] is not None: annoValues.append(UUID(annotationTokens[cname])) elif cname == 'timeCreated': try: annoValues.append( dateutil.parser.parse( annotationTokens[cname])) except: annoValues.append(datetime.now(tz=pytz.utc)) elif cname == 'timeRequired': timeReq = annotationTokens[cname] if timeReq is None: timeReq = 0 annoValues.append(timeReq) elif cname == 'username': annoValues.append(username) elif cname in annotationTokens: annoValues.append(annotationTokens[cname]) elif cname == 'unsure': if 'unsure' in annotationTokens and annotationTokens[ 'unsure'] is not None: annoValues.append(annotationTokens[cname]) else: annoValues.append(False) elif cname == 'meta': annoValues.append(meta) else: annoValues.append(None) if 'id' in annotationTokens: # existing annotation; update values_update.append(tuple(annoValues)) else: # new annotation values_insert.append(tuple(annoValues)) viewcountValues.append( (username, imageKey, 1, lastChecked, lastChecked, lastTimeRequired, lastTimeRequired, numInteractions, meta)) # delete all annotations that are not in submitted batch imageKeys = list(UUID(k) for k in submissions['entries']) if len(imageKeys): if len(ids): queryStr = sql.SQL(''' DELETE FROM {id_anno} WHERE username = %s AND id IN ( SELECT idQuery.id FROM ( SELECT * FROM {id_anno} WHERE id NOT IN %s ) AS idQuery JOIN ( SELECT * FROM {id_anno} WHERE image IN %s ) AS imageQuery ON idQuery.id = imageQuery.id); ''').format(id_anno=sql.Identifier(project, 'annotation')) self.dbConnector.execute(queryStr, ( username, tuple(ids), tuple(imageKeys), )) else: # no annotations submitted; delete all annotations submitted before queryStr = sql.SQL(''' DELETE FROM {id_anno} WHERE username = %s AND image IN %s; ''').format(id_anno=sql.Identifier(project, 'annotation')) self.dbConnector.execute(queryStr, ( username, tuple(imageKeys), )) # insert new annotations if len(values_insert): queryStr = sql.SQL(''' INSERT INTO {id_anno} ({cols}) VALUES %s ; ''').format( id_anno=sql.Identifier(project, 'annotation'), cols=sql.SQL(', ').join([sql.SQL(c) for c in colnames[1:] ]) # skip 'id' column ) self.dbConnector.insert(queryStr, values_insert) # update existing annotations if len(values_update): updateCols = [] for col in colnames: if col == 'label': updateCols.append(sql.SQL('label = UUID(e.label)')) elif col == 'timeRequired': # we sum the required times together updateCols.append( sql.SQL( 'timeRequired = COALESCE(a.timeRequired,0) + COALESCE(e.timeRequired,0)' )) else: updateCols.append( sql.SQL('{col} = e.{col}').format(col=sql.SQL(col))) queryStr = sql.SQL(''' UPDATE {id_anno} AS a SET {updateCols} FROM (VALUES %s) AS e({colnames}) WHERE e.id = a.id ''').format(id_anno=sql.Identifier(project, 'annotation'), updateCols=sql.SQL(', ').join(updateCols), colnames=sql.SQL(', ').join( [sql.SQL(c) for c in colnames])) self.dbConnector.insert(queryStr, values_update) # viewcount table queryStr = sql.SQL(''' INSERT INTO {id_iu} (username, image, viewcount, first_checked, last_checked, last_time_required, total_time_required, num_interactions, meta) VALUES %s ON CONFLICT (username, image) DO UPDATE SET viewcount = image_user.viewcount + 1, last_checked = EXCLUDED.last_checked, last_time_required = EXCLUDED.last_time_required, total_time_required = EXCLUDED.total_time_required + image_user.total_time_required, num_interactions = EXCLUDED.num_interactions + image_user.num_interactions, meta = EXCLUDED.meta; ''').format(id_iu=sql.Identifier(project, 'image_user')) self.dbConnector.insert(queryStr, viewcountValues) return 0 def setGoldenQuestions(self, project, submissions): ''' Receives an iterable of tuples (uuid, bool) and updates the property "isGoldenQuestion" of the images accordingly. ''' projImmutables = self.get_project_immutables(project) if projImmutables['demoMode']: return 1 queryStr = sql.SQL(''' UPDATE {id_img} AS img SET isGoldenQuestion = c.isGoldenQuestion FROM (VALUES %s) AS c (id, isGoldenQuestion) WHERE c.id = img.id; ''').format(id_img=sql.Identifier(project, 'image')) self.dbConnector.insert(queryStr, submissions) return 0
def __init__(self, config): self.config = config self.dbConnector = Database(config)
class ModelMarketplaceMiddleware: def __init__(self, config): self.config = config self.dbConnector = Database(config) self.labelUImiddleware = DBMiddleware(config) self._init_available_ai_models() def _init_available_ai_models(self): ''' Checks the locally installed model implementations and retains a list of those that support sharing (i.e., the addition of new label classes). ''' self.availableModels = set() for key in PREDICTION_MODELS: if 'canAddLabelclasses' in PREDICTION_MODELS[key] and \ PREDICTION_MODELS[key]['canAddLabelclasses'] is True: self.availableModels.add(key) def getModelsMarketplace(self, project, username): ''' Returns a dict of model state meta data, filtered by the project settings (model library; annotation type, prediction type). ''' # get project meta data (immutables, model library) projectMeta = self.dbConnector.execute( ''' SELECT annotationType, predictionType, ai_model_library FROM aide_admin.project WHERE shortname = %s; ''', (project, ), 1) if projectMeta is None or not len(projectMeta): raise Exception( f'Project {project} could not be found in database.') projectMeta = projectMeta[0] annotationType = projectMeta['annotationtype'] predictionType = projectMeta['predictiontype'] # get matching model states result = self.dbConnector.execute( ''' SELECT id, name, description, labelclasses, model_library, annotationType, predictionType, EXTRACT(epoch FROM timeCreated) AS time_created, alcriterion_library, public, anonymous, selectCount, is_owner, shared, tags, CASE WHEN NOT is_owner AND anonymous THEN NULL ELSE author END AS author, CASE WHEN NOT is_owner AND anonymous THEN NULL ELSE origin_project END AS origin_project, CASE WHEN NOT is_owner AND anonymous THEN NULL ELSE origin_uuid END AS origin_uuid FROM ( SELECT *, CASE WHEN author = %s AND origin_project = %s THEN TRUE ELSE FALSE END AS is_owner FROM aide_admin.modelMarketplace WHERE annotationType = %s AND predictionType = %s AND ( (public = TRUE AND shared = TRUE) OR origin_project = %s ) ) AS mm LEFT OUTER JOIN ( SELECT name AS projectName, shortname FROM aide_admin.project ) AS pn ON mm.origin_project = pn.shortname; ''', (username, project, annotationType, predictionType, project), 'all') if result is not None and len(result): matchingStates = {} for r in result: stateID = str(r['id']) values = {} for key in r.keys(): if isinstance(r[key], UUID): values[key] = str(r[key]) else: values[key] = r[key] matchingStates[stateID] = values else: matchingStates = {} return matchingStates def importModel(self, project, username, modelID): if not isinstance(modelID, UUID): modelID = UUID(modelID) # get model meta data meta = self.dbConnector.execute( ''' SELECT id, name, description, labelclasses, model_library, annotationType, predictionType, timeCreated, alcriterion_library, public, anonymous, selectCount, CASE WHEN anonymous THEN NULL ELSE author END AS author, CASE WHEN anonymous THEN NULL ELSE origin_project END AS origin_project, CASE WHEN anonymous THEN NULL ELSE origin_uuid END AS origin_uuid FROM aide_admin.modelMarketplace WHERE id = %s; ''', (modelID, ), 1) if meta is None or not len(meta): return { 'status': 2, 'message': f'Model state with id "{str(modelID)}" could not be found in the model marketplace.' } meta = meta[0] # check if model type is registered with AIDE modelLib = meta['model_library'] if modelLib not in self.availableModels: return { 'status': 3, 'message': f'Model type with identifier "{modelLib}" does not support sharing across project, or else is not registered with this installation of AIDE.' } # check if model hasn't already been imported to current project modelExists = self.dbConnector.execute( sql.SQL(''' SELECT id FROM {id_cnnstate} WHERE marketplace_origin_id = %s; ''').format(id_cnnstate=sql.Identifier(project, 'cnnstate')), (modelID), 1) if modelExists is not None and len(modelExists): # model already exists in project; update timestamp to make it current (TODO: find a better solution) self.dbConnector.execute( sql.SQL(''' UPDATE {id_cnnstate} SET timeCreated = NOW() WHERE marketplace_origin_id = %s; ''').format(id_cnnstate=sql.Identifier(project, 'cnnstate')), (modelID)) return { 'status': 0, 'message': 'Model had already been imported to project and was elevated to be the most current.' } # check if model is suitable for current project immutables = self.labelUImiddleware.get_project_immutables(project) errors = '' if immutables['annotationType'] != meta['annotationtype']: meta_at = meta['annotationtype'] proj_at = immutables['annotationType'] errors = f'Annotation type of model ({meta_at}) is not compatible with project\'s annotation type ({proj_at}).' if immutables['predictionType'] != meta['predictiontype']: meta_pt = meta['predictiontype'] proj_pt = immutables['predictionType'] errors += f'\nPrediction type of model ({meta_pt}) is not compatible with project\'s prediction type ({proj_pt}).' if len(errors): return {'status': 4, 'message': errors} # set project's selected model self.dbConnector.execute( ''' UPDATE aide_admin.project SET ai_model_library = %s WHERE shortname = %s; ''', (meta['model_library'], project)) # finally, increase selection counter and import model state #TODO: - retain existing alCriterion library insertedModelID = self.dbConnector.execute( sql.SQL(''' UPDATE aide_admin.modelMarketplace SET selectCount = selectCount + 1 WHERE id = %s; INSERT INTO {id_cnnstate} (marketplace_origin_id, stateDict, timeCreated, partial, model_library, alCriterion_library) SELECT id, stateDict, timeCreated, FALSE, model_library, alCriterion_library FROM aide_admin.modelMarketplace WHERE id = %s RETURNING id; ''').format(id_cnnstate=sql.Identifier(project, 'cnnstate')), (modelID, modelID), 1) return {'status': 0, 'id': str(insertedModelID)} def shareModel(self, project, username, modelID, modelName, modelDescription, tags, public, anonymous): #TODO: export as Celery task if not isinstance(modelID, UUID): modelID = UUID(modelID) if tags is None: tags = '' elif isinstance(tags, list) or isinstance(tags, tuple): tags = ';;'.join(tags) # check if model class supports sharing isShareable = self.dbConnector.execute( ''' SELECT ai_model_library FROM aide_admin.project WHERE shortname = %s; ''', (project, ), 1) if isShareable is None or not len(isShareable): return { 'status': 1, 'message': f'Project {project} could not be found in database.' } modelLib = isShareable[0]['ai_model_library'] if modelLib not in self.availableModels: return { 'status': 2, 'message': f'The model with id "{modelLib}" does not support sharing, or else is not installed in this instance of AIDE.' } # check if model hasn't already been shared isShared = self.dbConnector.execute( ''' SELECT id, author, shared FROM aide_admin.modelMarketplace WHERE origin_project = %s AND origin_uuid = %s; ''', (project, modelID), 'all') if isShared is not None and len(isShared): if not isShared[0]['shared']: # model had been shared in the past but then hidden; unhide self.dbConnector.execute( ''' UPDATE aide_admin.modelMarketplace SET shared = True WHERE origin_project = %s AND origin_uuid = %s; ''', (project, modelID)) # update shared model meta data self.dbConnector.execute( ''' UPDATE aide_admin.modelMarketplace SET name = %s, description = %s, public = %s, anonymous = %s, tags = %s WHERE id = %s AND author = %s; ''', (modelName, modelDescription, public, anonymous, tags, isShared[0]['id'], username), None) return {'status': 0} # check if model hasn't been imported from the marketplace isImported = self.dbConnector.execute( sql.SQL(''' SELECT marketplace_origin_id FROM {id_cnnstate} WHERE id = %s; ''').format(id_cnnstate=sql.Identifier(project, 'cnnstate')), (modelID, ), 1) if isImported is not None and len(isImported): marketplaceID = isImported[0]['marketplace_origin_id'] if marketplaceID is not None: return { 'status': 4, 'message': f'The selected model is already shared through the marketplace (id "{str(marketplaceID)}").' } # get project immutables immutables = self.labelUImiddleware.get_project_immutables(project) # get label class info labelclasses = json.dumps( self.labelUImiddleware.getClassDefinitions(project, False)) # check if name is unique (TODO: required?) nameTaken = self.dbConnector.execute( ''' SELECT COUNT(*) AS cnt FROM aide_admin.modelMarketplace WHERE name = %s; ''', (modelName, ), 'all') if nameTaken is not None and len(nameTaken) and nameTaken[0]['cnt']: return { 'status': 5, 'message': f'A model state with name "{modelName}" already exists in the Model Marketplace.' } # share model state sharedModelID = self.dbConnector.execute( sql.SQL(''' INSERT INTO aide_admin.modelMarketplace (name, description, tags, labelclasses, author, statedict, model_library, alCriterion_library, annotationType, predictionType, origin_project, origin_uuid, public, anonymous) SELECT %s, %s, %s, %s, %s, statedict, model_library, alCriterion_library, %s, %s, %s, id, %s, %s FROM {id_cnnstate} AS cnnS WHERE id = %s RETURNING id; ''').format(id_cnnstate=sql.Identifier(project, 'cnnstate')), (modelName, modelDescription, tags, labelclasses, username, immutables['annotationType'], immutables['predictionType'], project, public, anonymous, modelID)) return {'status': 0, 'shared_model_id': str(sharedModelID)} def reshareModel(self, project, username, modelID): ''' Unlike "shareModel", this checks for a model that had already been shared in the past, but then hidden from the marketplace, and simply turns the "shared" attribute back on, if the model could be found. ''' # get origin UUID of model and delegate to "shareModel" function modelID = self.dbConnector.execute( ''' SELECT origin_uuid FROM aide_admin.modelMarketplace WHERE origin_project = %s AND author = %s AND id = %s; ''', (project, username, UUID(modelID)), 1) if modelID is None or not len(modelID): return { 'status': 2, 'message': f'Model with ID "{str(modelID)}" could not be found on the Model Marketplace.' } modelID = modelID[0]['origin_uuid'] return self.shareModel(project, username, modelID, None, None, None, None, None) def unshareModel(self, project, username, modelID): ''' Sets the "shared" flag to False for a given model state, if the provided username is the owner of it. ''' if not isinstance(modelID, UUID): modelID = UUID(modelID) # check if user is authorized isAuthorized = self.dbConnector.execute( ''' SELECT shared FROM aide_admin.modelMarketplace WHERE author = %s AND id = %s; ''', (username, modelID), 1) if isAuthorized is None or not len(isAuthorized): # model does not exist or else user has no modification rights return { 'status': 2, 'message': f'Model with id "{str(modelID)}"" does not exist or else user {username} does not have modification rights to it.' } # unshare self.dbConnector.execute( ''' UPDATE aide_admin.modelMarketplace SET shared = FALSE WHERE author = %s AND id = %s; ''', (username, modelID)) return {'status': 0}
class ProjectStatisticsMiddleware: def __init__(self, config): self.config = config self.dbConnector = Database(config) def getProjectStatistics(self, project): ''' Returns statistics, such as number of images (seen), number of annotations, etc., on a global and per-user, but class-agnostic basis. ''' queryStr = sql.SQL(''' SELECT NULL AS username, COUNT(*) AS num_img, NULL::bigint AS num_anno FROM {id_img} UNION ALL SELECT NULL AS username, COUNT(DISTINCT(image)) AS num_img, NULL AS num_anno FROM {id_iu} UNION ALL SELECT NULL AS username, COUNT(DISTINCT(gq.id)) AS num_img, NULL AS num_anno FROM ( SELECT id FROM {id_img} WHERE isGoldenQuestion = TRUE ) AS gq UNION ALL SELECT NULL AS username, NULL AS num_img, COUNT(DISTINCT(image)) AS num_anno FROM {id_anno} UNION ALL SELECT NULL AS username, NULL AS num_img, COUNT(*) AS num_anno FROM {id_anno} UNION ALL SELECT username, iu_cnt AS num_img, anno_cnt AS num_anno FROM ( SELECT u.username, iu_cnt, anno_cnt FROM ( SELECT DISTINCT(username) FROM ( SELECT username FROM {id_auth} WHERE project = %s UNION ALL SELECT username FROM {id_iu} UNION ALL SELECT username FROM {id_anno} ) AS uQuery ) AS u LEFT OUTER JOIN ( SELECT username, COUNT(*) AS iu_cnt FROM {id_iu} GROUP BY username ) AS iu ON u.username = iu.username LEFT OUTER JOIN ( SELECT username, COUNT(*) AS anno_cnt FROM {id_anno} GROUP BY username ) AS anno ON u.username = anno.username ORDER BY u.username ) AS q; ''').format(id_img=sql.Identifier(project, 'image'), id_iu=sql.Identifier(project, 'image_user'), id_anno=sql.Identifier(project, 'annotation'), id_auth=sql.Identifier('aide_admin', 'authentication')) result = self.dbConnector.execute(queryStr, (project, ), 'all') response = { 'num_images': result[0]['num_img'], 'num_viewed': result[1]['num_img'], 'num_goldenQuestions': result[2]['num_img'], 'num_annotated': result[3]['num_anno'], 'num_annotations': result[4]['num_anno'] } if len(result) > 5: response['user_stats'] = {} for i in range(5, len(result)): uStats = { 'num_viewed': result[i]['num_img'], 'num_annotations': result[i]['num_anno'] } response['user_stats'][result[i]['username']] = uStats return response def getLabelclassStatistics(self, project): ''' Returns annotation statistics on a per-label class basis. TODO: does not work for segmentationMasks (no label fields) ''' queryStr = sql.SQL(''' SELECT lc.name, COALESCE(num_anno, 0) AS num_anno, COALESCE(num_pred, 0) AS num_pred FROM {id_lc} AS lc FULL OUTER JOIN ( SELECT label, COUNT(*) AS num_anno FROM {id_anno} AS anno GROUP BY label ) AS annoCnt ON lc.id = annoCnt.label FULL OUTER JOIN ( SELECT label, COUNT(*) AS num_pred FROM {id_pred} AS pred GROUP BY label ) AS predCnt ON lc.id = predCnt.label ''').format(id_lc=sql.Identifier(project, 'labelclass'), id_anno=sql.Identifier(project, 'annotation'), id_pred=sql.Identifier(project, 'prediction')) result = self.dbConnector.execute(queryStr, None, 'all') response = {} if result is not None and len(result): for i in range(len(result)): nextResult = result[i] response[nextResult['name']] = { 'num_anno': nextResult['num_anno'], 'num_pred': nextResult['num_pred'] } return response @staticmethod def _calc_geometric_stats(tp, fp, fn): tp, fp, fn = float(tp), float(fp), float(fn) try: precision = tp / (tp + fp) except: precision = 0.0 try: recall = tp / (tp + fn) except: recall = 0.0 try: f1 = 2 * precision * recall / (precision + recall) except: f1 = 0.0 return precision, recall, f1 def getPerformanceStatistics(self, project, entities_eval, entity_target, entityType='user', threshold=0.5, goldenQuestionsOnly=True): ''' Compares the accuracy of a list of users or model states with a target user. The following measures of accuracy are reported, depending on the annotation type: - image labels: overall accuracy - points: RMSE (distance to closest point with the same label; in pixels) overall accuracy (labels) - bounding boxes: IoU (max. with any target bounding box, regardless of label) overall accuracy (labels) - segmentation masks: TODO Value 'threshold' determines the geometric requirement for an annotation to be counted as correct (or incorrect) as follows: - points: maximum euclidean distance in pixels to closest target - bounding boxes: minimum IoU with best matching target If 'goldenQuestionsOnly' is True, only images with flag 'isGoldenQuestion' = True will be considered for evaluation. ''' entityType = entityType.lower() # get annotation type for project annoType = self.dbConnector.execute( '''SELECT annotationType FROM aide_admin.project WHERE shortname = %s;''', (project, ), 1) annoType = annoType[0]['annotationtype'] # for segmentation masks: get label classes and their ordinals #TODO: implement per-class statistics for all types labelClasses = {} lcDef = self.dbConnector.execute( sql.SQL(''' SELECT id, idx, color FROM {id_lc}; ''').format(id_lc=sql.Identifier(project, 'labelclass')), None, 'all') if lcDef is not None: for l in lcDef: labelClasses[str(l['id'])] = (l['idx'], l['color']) else: # no label classes defined return {} # compose args list and complete query queryArgs = [entity_target, tuple(entities_eval)] if annoType == 'points' or annoType == 'boundingBoxes': queryArgs.append(threshold) if annoType == 'points': queryArgs.append(threshold) if goldenQuestionsOnly: sql_goldenQuestion = sql.SQL('''JOIN ( SELECT id FROM {id_img} WHERE isGoldenQuestion = true ) AS qi ON qi.id = q2.image''').format( id_img=sql.Identifier(project, 'image')) else: sql_goldenQuestion = sql.SQL('') # result tokens tokens = {} tokens_normalize = [] if annoType == 'labels': tokens = { 'num_matches': 0, 'correct': 0, 'incorrect': 0, 'overall_accuracy': 0.0 } tokens_normalize = ['overall_accuracy'] elif annoType == 'points': tokens = { 'num_pred': 0, 'num_target': 0, 'tp': 0, 'fp': 0, 'fn': 0, 'avg_dist': 0.0 } tokens_normalize = ['avg_dist'] elif annoType == 'boundingBoxes': tokens = { 'num_pred': 0, 'num_target': 0, 'tp': 0, 'fp': 0, 'fn': 0, 'avg_iou': 0.0 } tokens_normalize = ['avg_iou'] elif annoType == 'segmentationMasks': tokens = { 'num_matches': 0, 'overall_accuracy': 0.0, 'per_class': {} } for clID in labelClasses.keys(): tokens['per_class'][clID] = { 'num_matches': 0, 'prec': 0.0, 'rec': 0.0, 'f1': 0.0 } tokens_normalize = [] if entityType == 'user': queryStr = getattr(StatisticalFormulas_user, annoType).value queryStr = sql.SQL(queryStr).format( id_anno=sql.Identifier(project, 'annotation'), id_iu=sql.Identifier(project, 'image_user'), sql_goldenQuestion=sql_goldenQuestion) else: queryStr = getattr(StatisticalFormulas_model, annoType).value queryStr = sql.SQL(queryStr).format( id_anno=sql.Identifier(project, 'annotation'), id_iu=sql.Identifier(project, 'image_user'), id_pred=sql.Identifier(project, 'prediction'), sql_goldenQuestion=sql_goldenQuestion) #TODO: update points query (according to bboxes); re-write stats parsing below # get stats response = {} with self.dbConnector.execute_cursor(queryStr, tuple(queryArgs)) as cursor: while True: b = cursor.fetchone() if b is None: break if entityType == 'user': entity = b['username'] else: entity = str(b['cnnstate']) if not entity in response: response[entity] = tokens.copy() if annoType in ('points', 'boundingBoxes'): response[entity]['num_matches'] = 1 if b['num_target'] > 0: response[entity]['num_matches'] += 1 if annoType == 'segmentationMasks': # decode segmentation masks try: mask_target = np.array( base64ToImage(b['q1segmask'], b['q1width'], b['q1height'])) mask_source = np.array( base64ToImage(b['q2segmask'], b['q2width'], b['q2height'])) if mask_target.shape == mask_source.shape and np.any( mask_target) and np.any(mask_source): # calculate OA intersection = (mask_target > 0) * (mask_source > 0) if np.any(intersection): oa = np.mean(mask_target[intersection] == mask_source[intersection]) response[entity]['overall_accuracy'] += oa response[entity]['num_matches'] += 1 # calculate per-class precision and recall values for clID in labelClasses.keys(): idx = labelClasses[clID][0] tp = np.sum((mask_target == idx) * (mask_source == idx)) fp = np.sum((mask_target != idx) * (mask_source == idx)) fn = np.sum((mask_target == idx) * (mask_source != idx)) if (tp + fp + fn) > 0: prec, rec, f1 = self._calc_geometric_stats( tp, fp, fn) response[entity]['per_class'][clID][ 'num_matches'] += 1 response[entity]['per_class'][clID][ 'prec'] += prec response[entity]['per_class'][clID][ 'rec'] += rec response[entity]['per_class'][clID][ 'f1'] += f1 except Exception as e: print( f'TODO: error in segmentation mask statistics calculation ("{str(e)}").' ) else: for key in tokens.keys(): if key == 'correct' or key == 'incorrect': # classification correct = b['label_correct'] # ignore None if correct is True: response[entity]['correct'] += 1 response[entity]['num_matches'] += 1 elif correct is False: response[entity]['incorrect'] += 1 response[entity]['num_matches'] += 1 elif key in b and b[key] is not None: response[entity][key] += b[key] for entity in response.keys(): for t in tokens_normalize: if t in response[entity]: if t == 'overall_accuracy': response[entity][t] = float(response[entity]['correct']) / \ float(response[entity]['correct'] + response[entity]['incorrect']) elif annoType in ('points', 'boundingBoxes'): response[entity][t] /= response[entity]['num_matches'] if annoType == 'points' or annoType == 'boundingBoxes': prec, rec, f1 = self._calc_geometric_stats( response[entity]['tp'], response[entity]['fp'], response[entity]['fn']) response[entity]['prec'] = prec response[entity]['rec'] = rec response[entity]['f1'] = f1 elif annoType == 'segmentationMasks': # normalize OA response[entity]['overall_accuracy'] /= response[entity][ 'num_matches'] # normalize all label class values as well for lcID in labelClasses.keys(): numMatches = response[entity]['per_class'][lcID][ 'num_matches'] if numMatches > 0: response[entity]['per_class'][lcID][ 'prec'] /= numMatches response[entity]['per_class'][lcID][ 'rec'] /= numMatches response[entity]['per_class'][lcID]['f1'] /= numMatches return {'label_classes': labelClasses, 'per_entity': response} def getUserAnnotationSpeeds(self, project, users, goldenQuestionsOnly=False): ''' Returns, for each username in "users" list, the mean, median and lower and upper quartile (25% and 75%) of the time required in a given project. ''' # prepare output response = {} for u in users: response[u] = { 'avg': float('nan'), 'median': float('nan'), 'perc_25': float('nan'), 'perc_75': float('nan') } if goldenQuestionsOnly: gqStr = sql.SQL(''' JOIN {id_img} AS img ON anno.image = img.id WHERE img.isGoldenQuestion = true ''').format(id_img=sql.Identifier(project, 'image')) else: gqStr = sql.SQL('') queryStr = sql.SQL(''' SELECT username, avg(timeRequired) AS avg, percentile_cont(0.50) WITHIN GROUP (ORDER BY timeRequired ASC) AS median, percentile_cont(0.25) WITHIN GROUP (ORDER BY timeRequired ASC) AS perc_25, percentile_cont(0.75) WITHIN GROUP (ORDER BY timeRequired ASC) AS perc_75 FROM ( SELECT username, timeRequired FROM {id_anno} AS anno {gqStr} ) AS q WHERE username IN %s GROUP BY username ''').format(id_anno=sql.Identifier(project, 'annotation'), gqStr=gqStr) result = self.dbConnector.execute(queryStr, (tuple(users), ), 'all') if result is not None: for r in result: user = r['username'] response[user] = { 'avg': float(r['avg']), 'median': float(r['median']), 'perc_25': float(r['perc_25']), 'perc_75': float(r['perc_75']), } return response def getUserFinished(self, project, username): ''' Returns True if the user has viewed all images in the project, and False otherwise. We deliberately do not reveal more information to the general user, in order to e.g. sustain the golden question limitation system. ''' queryStr = sql.SQL(''' SELECT COUNT(*) AS cnt FROM {id_iu} WHERE viewcount > 0 AND username = %s UNION ALL SELECT COUNT(*) AS cnt FROM {id_img}; ''').format(id_img=sql.Identifier(project, 'image'), id_iu=sql.Identifier(project, 'image_user')) result = self.dbConnector.execute(queryStr, (username, ), 2) return result[0]['cnt'] >= result[1]['cnt'] def getTimeActivity(self, project, type='images', numDaysMax=31, perUser=False): ''' Returns a histogram of the number of images viewed (if type = 'images') or annotations made (if type = 'annotations') over the last numDaysMax. If perUser is True, statistics are returned on a user basis. ''' if type == 'images': id_table = sql.Identifier(project, 'image_user') time_field = sql.SQL('last_checked') else: id_table = sql.Identifier(project, 'annotation') time_field = sql.SQL('timeCreated') if perUser: userSpec = sql.SQL(', username') else: userSpec = sql.SQL('') queryStr = sql.SQL(''' SELECT to_char({time_field}, 'YYYY-Mon-dd') AS month_day, MIN({time_field}) AS date_of_day, COUNT(*) AS cnt {user_spec} FROM {id_table} WHERE {time_field} IS NOT NULL GROUP BY month_day {user_spec} ORDER BY date_of_day ASC LIMIT %s ''').format(time_field=time_field, id_table=id_table, user_spec=userSpec) result = self.dbConnector.execute(queryStr, (numDaysMax, ), 'all') #TODO: homogenize series and add missing days if perUser: response = {} else: response = {'counts': [], 'timestamps': [], 'labels': []} for row in result: if perUser: if row['username'] not in response: response[row['username']] = { 'counts': [], 'timestamps': [], 'labels': [] } response[row['username']]['counts'].append(row['cnt']) response[row['username']]['timestamps'].append( row['date_of_day'].timestamp()) response[row['username']]['labels'].append(row['month_day']) else: response['counts'].append(row['cnt']) response['timestamps'].append(row['date_of_day'].timestamp()) response['labels'].append(row['month_day']) return response
class AIWorker(): def __init__(self, config, passiveMode=False): self.config = config self.dbConnector = Database(config) self.passiveMode = passiveMode self._init_fileserver() def _init_fileserver(self): ''' The AIWorker has a special routine to detect whether the instance it is running on also hosts the file server. If it does, data are loaded directly from disk to avoid going through the loopback network. ''' self.fileServer = fileserver.FileServer(self.config) def _init_model_instance(self, project, modelLibrary, modelSettings): # try to parse model settings if modelSettings is not None and len(modelSettings): try: modelSettings = json.loads(modelSettings) except Exception as err: print( 'WARNING: could not read model options. Error message: {message}.' .format(message=str(err))) modelSettings = None else: modelSettings = None # import class object modelClass = get_class_executable(modelLibrary) # verify functions and arguments requiredFunctions = { '__init__': ['project', 'config', 'dbConnector', 'fileServer', 'options'], 'train': ['stateDict', 'data', 'updateStateFun'], 'average_model_states': ['stateDicts', 'updateStateFun'], 'inference': ['stateDict', 'data', 'updateStateFun'] } functionNames = [ func for func in dir(modelClass) if callable(getattr(modelClass, func)) ] for key in requiredFunctions: if not key in functionNames: raise Exception( 'Class {} is missing required function {}.'.format( modelLibrary, key)) # check function arguments bidirectionally funArgs = inspect.getargspec(getattr(modelClass, key)) for arg in requiredFunctions[key]: if not arg in funArgs.args: raise Exception( 'Method {} of class {} is missing required argument {}.' .format(modelLibrary, key, arg)) for arg in funArgs.args: if arg != 'self' and not arg in requiredFunctions[key]: raise Exception( 'Unsupported argument {} of method {} in class {}.'. format(arg, key, modelLibrary)) # create AI model instance return modelClass( project=project, config=self.config, dbConnector=self.dbConnector, fileServer=self.fileServer.get_secure_instance(project), options=modelSettings) def _init_alCriterion_instance(self, project, alLibrary, alSettings): ''' Creates the Active Learning (AL) criterion provider instance. ''' # try to parse settings if alSettings is not None and len(alSettings): try: alSettings = json.loads(alSettings) except Exception as err: print( 'WARNING: could not read AL criterion options. Error message: {message}.' .format(message=str(err))) alSettings = None else: alSettings = None # import class object modelClass = get_class_executable(alLibrary) # verify functions and arguments requiredFunctions = { '__init__': ['project', 'config', 'dbConnector', 'fileServer', 'options'], 'rank': ['data', 'updateStateFun'] } functionNames = [ func for func in dir(modelClass) if callable(getattr(modelClass, func)) ] for key in requiredFunctions: if not key in functionNames: raise Exception( 'Class {} is missing required function {}.'.format( alLibrary, key)) # check function arguments bidirectionally funArgs = inspect.getargspec(getattr(modelClass, key)) for arg in requiredFunctions[key]: if not arg in funArgs.args: raise Exception( 'Method {} of class {} is missing required argument {}.' .format(alLibrary, key, arg)) for arg in funArgs.args: if arg != 'self' and not arg in requiredFunctions[key]: raise Exception( 'Unsupported argument {} of method {} in class {}.'. format(arg, key, alLibrary)) # create AI model instance return modelClass( project=project, config=self.config, dbConnector=self.dbConnector, fileServer=self.fileServer.get_secure_instance(project), options=alSettings) def _get_model_instance(self, project): ''' Returns the class instance of the model specified in the given project. TODO: cache models? ''' # get model settings for project queryStr = ''' SELECT ai_model_library, ai_model_settings FROM aide_admin.project WHERE shortname = %s; ''' result = self.dbConnector.execute(queryStr, (project, ), 1) modelLibrary = result[0]['ai_model_library'] modelSettings = result[0]['ai_model_settings'] # create new model instance modelInstance = self._init_model_instance(project, modelLibrary, modelSettings) return modelInstance def _get_alCriterion_instance(self, project): ''' Returns the class instance of the Active Learning model specified in the project. TODO: cache models? ''' # get model settings for project queryStr = ''' SELECT ai_alCriterion_library, ai_alCriterion_settings FROM aide_admin.project WHERE shortname = %s; ''' result = self.dbConnector.execute(queryStr, (project, ), 1) modelLibrary = result[0]['ai_alcriterion_library'] modelSettings = result[0]['ai_alcriterion_settings'] # create new model instance modelInstance = self._init_alCriterion_instance( project, modelLibrary, modelSettings) return modelInstance def aide_internal_notify(self, message): ''' Used for AIDE administrative communication between AIController and AIWorker(s), e.g. for setting up queues. ''' if self.passiveMode: return # not required (yet) def call_train(self, data, epoch, numEpochs, project, subset): # get project-specific model modelInstance = self._get_model_instance(project) return functional._call_train(project, data, epoch, numEpochs, subset, getattr(modelInstance, 'train'), self.dbConnector, self.fileServer) def call_average_model_states(self, epoch, numEpochs, project): # get project-specific model modelInstance = self._get_model_instance(project) return functional._call_average_model_states( project, epoch, numEpochs, getattr(modelInstance, 'average_model_states'), self.dbConnector, self.fileServer) def call_inference(self, imageIDs, epoch, numEpochs, project): # get project-specific model and AL criterion modelInstance = self._get_model_instance(project) alCriterionInstance = self._get_alCriterion_instance(project) return functional._call_inference( project, imageIDs, epoch, numEpochs, getattr(modelInstance, 'inference'), getattr(alCriterionInstance, 'rank'), self.dbConnector, self.fileServer, self.config.getProperty('AIWorker', 'inference_batch_size_limit', type=int, fallback=-1)) def verify_model_state(self, project, modelLibrary, stateDict, modelOptions): ''' Launches a dummy training-averaging-inference chain on a received model state and returns True if the chain could be executed without any errors (else False). Does not store anything in the database. Inputs: - project: str, project shortname (used to retrieve sample data) - modelLibrary: str, identifier of the AI model - stateDict: bytes object, AI model state - modelOptions: str, model settings to be tested (optional) Returns a dict with the following entries: - valid: bool, True if the provided state dict and (optionally) model options are valid (i.e., can be used to perform training, averaging, and inference), or False otherwise. - messages: str, text describing the error(s) encounte- red if there are any. ''' #TODO raise NotImplementedError('Not yet implemented.')
class DBMiddleware(): def __init__(self, config): self.config = config self.dbConnector = Database(config) self._fetchProjectSettings() self.sqlBuilder = SQLStringBuilder(config) self.annoParser = AnnotationParser(config) def _fetchProjectSettings(self): # AI controller URI aiControllerURI = self.config.getProperty('Server', 'aiController_uri') if aiControllerURI is None or aiControllerURI.strip() == '': # no AI backend configured aiControllerURI = None # LabelUI drawing styles with open( self.config.getProperty( 'LabelUI', 'styles_file', type=str, fallback='modules/LabelUI/static/json/styles.json'), 'r') as f: styles = json.load(f) # Image backdrops for index screen with open( self.config.getProperty( 'Project', 'backdrops_file', type=str, fallback='modules/LabelUI/static/json/backdrops.json'), 'r') as f: backdrops = json.load(f) # Welcome message for UI tutorial with open( self.config.getProperty( 'Project', 'welcome_message_file', type=str, fallback= 'modules/LabelUI/static/templates/welcome_message.html'), 'r') as f: welcomeMessage = f.readlines() self.projectSettings = { 'projectName': self.config.getProperty('Project', 'projectName'), 'projectDescription': self.config.getProperty('Project', 'projectDescription'), 'indexURI': self.config.getProperty('Server', 'index_uri', type=str, fallback='/'), 'dataServerURI': self.config.getProperty('Server', 'dataServer_uri'), 'aiControllerURI': aiControllerURI, 'dataType': self.config.getProperty('Project', 'dataType', fallback='images'), 'classes': self.getClassDefinitions(), 'enableEmptyClass': self.config.getProperty('Project', 'enableEmptyClass', fallback='no'), 'annotationType': self.config.getProperty('Project', 'annotationType'), 'predictionType': self.config.getProperty('Project', 'predictionType'), 'showPredictions': self.config.getProperty('LabelUI', 'showPredictions', fallback='yes'), 'showPredictions_minConf': self.config.getProperty('LabelUI', 'showPredictions_minConf', type=float, fallback=0.5), 'carryOverPredictions': self.config.getProperty('LabelUI', 'carryOverPredictions', fallback='no'), 'carryOverRule': self.config.getProperty('LabelUI', 'carryOverRule', fallback='maxConfidence'), 'carryOverPredictions_minConf': self.config.getProperty('LabelUI', 'carryOverPredictions_minConf', type=float, fallback=0.75), 'defaultBoxSize_w': self.config.getProperty('LabelUI', 'defaultBoxSize_w', type=int, fallback=10), 'defaultBoxSize_h': self.config.getProperty('LabelUI', 'defaultBoxSize_h', type=int, fallback=10), 'minBoxSize_w': self.config.getProperty('Project', 'box_minWidth', type=int, fallback=1), 'minBoxSize_h': self.config.getProperty('Project', 'box_minHeight', type=int, fallback=1), 'numImagesPerBatch': self.config.getProperty('LabelUI', 'numImagesPerBatch', type=int, fallback=1), 'minImageWidth': self.config.getProperty('LabelUI', 'minImageWidth', type=int, fallback=300), 'numImageColumns_max': self.config.getProperty('LabelUI', 'numImageColumns_max', type=int, fallback=1), 'defaultImage_w': self.config.getProperty('LabelUI', 'defaultImage_w', type=int, fallback=800), 'defaultImage_h': self.config.getProperty('LabelUI', 'defaultImage_h', type=int, fallback=600), 'styles': styles['styles'], 'backdrops': backdrops, 'welcomeMessage': welcomeMessage, 'demoMode': self.config.getProperty('Project', 'demoMode', type=bool, fallback=False) } def _assemble_annotations(self, cursor): response = {} while True: b = cursor.fetchone() if b is None: break imgID = str(b['image']) if not imgID in response: response[imgID] = { 'fileName': b['filename'], 'predictions': {}, 'annotations': {}, 'last_checked': None } viewcount = b['viewcount'] if viewcount is not None: response[imgID]['viewcount'] = viewcount last_checked = b['last_checked'] if last_checked is not None: if response[imgID]['last_checked'] is None: response[imgID]['last_checked'] = last_checked else: response[imgID]['last_checked'] = max( response[imgID]['last_checked'], last_checked) # parse annotations and predictions entryID = str(b['id']) if b['ctype'] is not None: colnames = self.sqlBuilder.getColnames(b['ctype']) entry = {} for c in colnames: value = b[c] if isinstance(value, datetime): value = value.timestamp() elif isinstance(value, UUID): value = str(value) entry[c] = value if b['ctype'] == 'annotation': response[imgID]['annotations'][entryID] = entry elif b['ctype'] == 'prediction': response[imgID]['predictions'][entryID] = entry return response def getProjectSettings(self): ''' Queries the database for general project-specific metadata, such as: - Classes: names, indices, default colors - Annotation type: one of {class labels, positions, bboxes} ''' return self.projectSettings def getProjectInfo(self): ''' Returns safe, shareable information about the project. ''' return { 'projectName': self.projectSettings['projectName'], 'projectDescription': self.projectSettings['projectDescription'], 'demoMode': self.config.getProperty('Project', 'demoMode', type=bool, fallback=False), 'backdrops': self.projectSettings['backdrops']['images'] } def getClassDefinitions(self): ''' Returns a dictionary with entries for all classes in the project. ''' classdef = { 'entries': { 'default': {} # default group for ungrouped label classes } } schema = self.config.getProperty('Database', 'schema') # query data sql = ''' SELECT 'group' AS type, id, NULL as idx, name, color, parent, NULL AS keystroke FROM {schema}.labelclassgroup UNION ALL SELECT 'class' AS type, id, idx, name, color, labelclassgroup, keystroke FROM {schema}.labelclass; '''.format(schema=schema) classData = self.dbConnector.execute(sql, None, 'all') # assemble entries first allEntries = {} numClasses = 0 for cl in classData: id = str(cl['id']) entry = { 'id': id, 'name': cl['name'], 'color': cl['color'], 'parent': str(cl['parent']) if cl['parent'] is not None else None, } if cl['type'] == 'group': entry['entries'] = {} else: entry['index'] = cl['idx'] entry['keystroke'] = cl['keystroke'] numClasses += 1 allEntries[id] = entry # transform into tree def _find_parent(tree, parentID): if parentID is None: return tree['entries']['default'] elif 'id' in tree and tree['id'] == parentID: return tree elif 'entries' in tree: for ek in tree['entries'].keys(): rv = _find_parent(tree['entries'][ek], parentID) if rv is not None: return rv return None else: return None allEntries['default'] = {'name': '(other)', 'entries': {}} allEntries = {'entries': allEntries} for key in list(allEntries['entries'].keys()): if key == 'default': continue if key in allEntries['entries']: entry = allEntries['entries'][key] parentID = entry['parent'] del entry['parent'] if 'entries' in entry and parentID is None: # group, but no parent: append to root directly allEntries['entries'][key] = entry else: # move item parent = _find_parent(allEntries, parentID) parent['entries'][key] = entry del allEntries['entries'][key] classdef = allEntries classdef['numClasses'] = numClasses return classdef def getBatch_fixed(self, username, data): ''' Returns entries from the database based on the list of data entry identifiers specified. ''' # query sql = self.sqlBuilder.getFixedImagesQueryString( self.projectSettings['demoMode']) # parse results queryVals = ( tuple(UUID(d) for d in data), username, username, ) if self.projectSettings['demoMode']: queryVals = (tuple(UUID(d) for d in data), ) with self.dbConnector.execute_cursor(sql, queryVals) as cursor: try: response = self._assemble_annotations(cursor) # self.dbConnector.conn.commit() except Exception as e: print(e) # self.dbConnector.conn.rollback() finally: pass # cursor.close() return {'entries': response} def getBatch_auto(self, username, order='unlabeled', subset='default', limit=None): ''' TODO: description ''' # query sql = self.sqlBuilder.getNextBatchQueryString( order, subset, self.projectSettings['demoMode']) # limit (TODO: make 128 a hyperparameter) if limit is None: limit = 128 else: limit = min(int(limit), 128) # parse results queryVals = ( username, limit, username, ) if self.projectSettings['demoMode']: queryVals = (limit, ) with self.dbConnector.execute_cursor(sql, queryVals) as cursor: response = self._assemble_annotations(cursor) return {'entries': response} def getBatch_timeRange(self, minTimestamp, maxTimestamp, userList, skipEmptyImages=False, limit=None): ''' Returns images that have been annotated within the given time range and/or by the given user(s). All arguments are optional. Useful for reviewing existing annotations. ''' # query string sql = self.sqlBuilder.getDateQueryString(minTimestamp, maxTimestamp, userList, skipEmptyImages) # check validity and provide arguments queryVals = [] if userList is not None: queryVals.append(tuple(userList)) if minTimestamp is not None: queryVals.append(minTimestamp) if maxTimestamp is not None: queryVals.append(maxTimestamp) if skipEmptyImages and userList is not None: queryVals.append(tuple(userList)) # limit (TODO: make 128 a hyperparameter) if limit is None: limit = 128 else: limit = min(int(limit), 128) queryVals.append(limit) if userList is not None: queryVals.append(tuple(userList)) # query and parse results with self.dbConnector.execute_cursor(sql, tuple(queryVals)) as cursor: try: response = self._assemble_annotations(cursor) # self.dbConnector.conn.commit() except Exception as e: print(e) # self.dbConnector.conn.rollback() finally: pass # cursor.close() return {'entries': response} def get_timeRange(self, userList, skipEmptyImages=False): ''' Returns two timestamps denoting the temporal limits within which images have been viewed by the users provided in the userList. Arguments: - userList: string (single user name) or list of strings (multiple). Can also be None; in this case all annotations will be checked. - skipEmptyImages: if True, only images that contain at least one annotation will be considered. ''' # query string sql = self.sqlBuilder.getTimeRangeQueryString(userList, skipEmptyImages) arguments = (None if userList is None else tuple(userList)) result = self.dbConnector.execute(sql, (arguments, ), numReturn=1) if result is not None and len(result): return { 'minTimestamp': result[0]['mintimestamp'], 'maxTimestamp': result[0]['maxtimestamp'], } else: return {'error': 'no annotations made'} def submitAnnotations(self, username, submissions): ''' Sends user-provided annotations to the database. ''' if self.projectSettings['demoMode']: return 0 # assemble values colnames = getattr(QueryStrings_annotation, self.projectSettings['annotationType']).value values_insert = [] values_update = [] meta = (None if not 'meta' in submissions else json.dumps( submissions['meta'])) # for deletion: remove all annotations whose image ID matches but whose annotation ID is not among the submitted ones ids = [] viewcountValues = [] for imageKey in submissions['entries']: entry = submissions['entries'][imageKey] try: lastChecked = entry['timeCreated'] lastTimeRequired = entry['timeRequired'] if lastTimeRequired is None: lastTimeRequired = 0 except: lastChecked = datetime.now(tz=pytz.utc) lastTimeRequired = 0 if 'annotations' in entry and len(entry['annotations']): for annotation in entry['annotations']: # assemble annotation values annotationTokens = self.annoParser.parseAnnotation( annotation) annoValues = [] for cname in colnames: if cname == 'id': if cname in annotationTokens: # cast and only append id if the annotation is an existing one annoValues.append(UUID( annotationTokens[cname])) ids.append(UUID(annotationTokens[cname])) elif cname == 'image': annoValues.append(UUID(imageKey)) elif cname == 'label' and annotationTokens[ cname] is not None: annoValues.append(UUID(annotationTokens[cname])) elif cname == 'timeCreated': try: annoValues.append( dateutil.parser.parse( annotationTokens[cname])) except: annoValues.append(datetime.now(tz=pytz.utc)) elif cname == 'timeRequired': timeReq = annotationTokens[cname] if timeReq is None: timeReq = 0 annoValues.append(timeReq) elif cname == 'username': annoValues.append(username) elif cname in annotationTokens: annoValues.append(annotationTokens[cname]) elif cname == 'unsure': if 'unsure' in annotationTokens and annotationTokens[ 'unsure'] is not None: annoValues.append(annotationTokens[cname]) else: annoValues.append(False) elif cname == 'meta': annoValues.append(meta) else: annoValues.append(None) if 'id' in annotationTokens: # existing annotation; update values_update.append(tuple(annoValues)) else: # new annotation values_insert.append(tuple(annoValues)) viewcountValues.append( (username, imageKey, 1, lastChecked, lastTimeRequired, meta)) schema = self.config.getProperty('Database', 'schema') # delete all annotations that are not in submitted batch imageKeys = list(UUID(k) for k in submissions['entries']) if len(imageKeys): if len(ids): sql = ''' DELETE FROM {schema}.annotation WHERE username = %s AND id IN ( SELECT idQuery.id FROM ( SELECT * FROM {schema}.annotation WHERE id NOT IN %s ) AS idQuery JOIN ( SELECT * FROM {schema}.annotation WHERE image IN %s ) AS imageQuery ON idQuery.id = imageQuery.id); '''.format(schema=schema) self.dbConnector.execute(sql, ( username, tuple(ids), tuple(imageKeys), )) else: # no annotations submitted; delete all annotations submitted before sql = ''' DELETE FROM {schema}.annotation WHERE username = %s AND image IN %s; '''.format(schema=schema) self.dbConnector.execute(sql, ( username, tuple(imageKeys), )) # insert new annotations if len(values_insert): sql = ''' INSERT INTO {}.annotation ({}) VALUES %s ; '''.format( schema, ', '.join(colnames[1:]) # skip 'id' column ) self.dbConnector.insert(sql, values_insert) # update existing annotations if len(values_update): updateCols = '' for col in colnames: if col == 'label': updateCols += '{col} = UUID(e.{col}),'.format(col=col) elif col == 'timeRequired': # we sum the required times together updateCols += '{col} = COALESCE(a.{col},0) + COALESCE(e.{col},0),'.format( col=col) else: updateCols += '{col} = e.{col},'.format(col=col) sql = ''' UPDATE {schema}.annotation AS a SET {updateCols} FROM (VALUES %s) AS e({colnames}) WHERE e.id = a.id; '''.format(schema=schema, updateCols=updateCols.strip(','), colnames=', '.join(colnames)) self.dbConnector.insert(sql, values_update) # viewcount table sql = ''' INSERT INTO {}.image_user (username, image, viewcount, last_checked, last_time_required, meta) VALUES %s ON CONFLICT (username, image) DO UPDATE SET viewcount = image_user.viewcount + 1, last_checked = EXCLUDED.last_checked, last_time_required = EXCLUDED.last_time_required, meta = EXCLUDED.meta; '''.format(schema) self.dbConnector.insert(sql, viewcountValues) return 0
def __init__(self, config, celery_app): self.config = config self.dbConn = Database(config) self.sqlBuilder = SQLStringBuilder(config) self.celery_app = celery_app
class AIMiddleware(): def __init__(self, config): self.config = config self.dbConn = Database(config) self.sqlBuilder = SQLStringBuilder(config) self.training = False # will be set to True once start_training is called (and False as soon as everything about the training process has finished) self.celery_app = current_app self.celery_app.set_current() self.celery_app.set_default() self.watchdog = None # note: watchdog only created if users poll status (i.e., if there's activity) #TODO self.messageProcessor = MessageProcessor(self.celery_app) self.messageProcessor.start() def _init_watchdog(self): ''' Launches a thread that periodically polls the database for new annotations. Once the required number of new annotations is reached, this thread will initiate the training process through the middleware. The thread will be terminated and destroyed; a new thread will only be re-created once the training process has finished. ''' if self.training: return numImages_autoTrain = self.config.getProperty('AIController', 'numImages_autoTrain', type=int, fallback=-1) if numImages_autoTrain == -1: return self.watchdog = Watchdog(self.config, self.dbConn, self) self.watchdog.start() def _get_num_available_workers(self): #TODO: message + queue if no worker available #TODO: limit to n tasks per worker i = self.celery_app.control.inspect() if i is not None: stats = i.stats() if stats is not None: return len(i.stats()) return 1 #TODO def _training_completed(self, trainingJob): ''' To be called after a training process has been completed. If there is more than one worker, the 'average_model_states' instruction of the AI model is called and again awaited for. After successful training, the 'training' flag will be set to False to allow another round of model training. ''' # re-enable training if no other training job is ongoing self.training = self.messageProcessor.task_ongoing('train') def _get_training_job_signature(self, minTimestamp='lastState', minNumAnnoPerImage=0, maxNumImages=None, maxNumWorkers=-1): ''' Assembles (but does not submit) a training job based on the provided parameters. ''' # check if training is still in progress if self.messageProcessor.task_ongoing('train'): raise Exception('Training process already running.') self.training = True try: # sanity checks if not (isinstance(minTimestamp, datetime) or minTimestamp == 'lastState' or minTimestamp == -1 or minTimestamp is None): raise ValueError( '{} is not a recognized property for variable "minTimestamp"' .format(str(minTimestamp))) if maxNumWorkers != 1: # only query the number of available workers if more than one is specified to save time num_workers = min(maxNumWorkers, self._get_num_available_workers()) else: num_workers = maxNumWorkers # query image IDs sql = self.sqlBuilder.getLatestQueryString( minNumAnnoPerImage=minNumAnnoPerImage, limit=maxNumImages) if isinstance(minTimestamp, datetime): imageIDs = self.dbConn.execute(sql, (minTimestamp, ), 'all') else: imageIDs = self.dbConn.execute(sql, None, 'all') imageIDs = [i['image'] for i in imageIDs] if maxNumWorkers > 1: # distribute across workers (TODO: also specify subset size for multiple jobs; randomly draw if needed) images_subset = array_split( imageIDs, max(1, len(imageIDs) // num_workers)) processes = [] for subset in images_subset: processes.append( celery_interface.call_train.si(subset, True)) process = group(processes) else: # call one worker directly # process = celery_interface.call_train.delay(data) #TODO: route to specific worker? http://docs.celeryproject.org/en/latest/userguide/routing.html#manual-routing process = celery_interface.call_train.si(imageIDs, False) return process, num_workers except: self.training = self.messageProcessor.task_ongoing('train') return None def _get_inference_job_signature(self, imageIDs, maxNumWorkers=-1): ''' Assembles (but does not submit) an inference job based on the provided parameters. ''' # setup if maxNumWorkers != 1: # only query the number of available workers if more than one is specified to save time num_available = self._get_num_available_workers() if maxNumWorkers == -1: maxNumWorkers = num_available #TODO: more than one process per worker? else: maxNumWorkers = min(maxNumWorkers, num_available) # distribute across workers images_subset = array_split(imageIDs, max(1, len(imageIDs) // maxNumWorkers)) jobs = [] for subset in images_subset: job = celery_interface.call_inference.si(imageIDs=subset) jobs.append(job) jobGroup = group(jobs) return jobGroup def start_training(self, minTimestamp='lastState', minNumAnnoPerImage=0, maxNumImages=None, maxNumWorkers=-1): ''' Initiates a training round for the model, based on the set of data (images, annotations) as specified in the parameters. Distributes data to the set of registered AIWorker instan- ces, which then call the 'train' function of the AI model given in the configuration. Upon training completion, the model states as returned by the function, and eventually the AIWorker instances are collected, and the AIController calls the 'average_states' function of the AI model to get one single, most recent state. This is finally inserted to the data- base. Note that this function only loads the annotation data from the database, but not the images. Retrieving images is part of the AI model's 'train' function. TODO: feature vectors? Input parameters: - minTimestamp: Defines the earliest point in time of the annotations to be considered for model training. May take one of the following values: - 'lastState' (default): Limits the annotations to those made after the time- stamp of the latest model state. If no model state is found, all annotations are considered. - None, -1, or 'all': Includes all annotations. - (a datetime object): Includes annotations made after a custom timestamp. - minNumAnnoPerImage: Minimum number of annotations per image to be considered for training. This may be useful for e.g. detection tasks with a lot of false alarms in order to limit the "forgetting factor" of the model subject to training. - maxNumImages: Maximum number of images to train on at a time. - maxNumWorkers: Specify the maximum number of workers to distribute training to. If set to 1, the model is trained on just one worker (no model state averaging appended). If set to a number, that number of workers (up to the maximum number of connected) is consulted for training the model. Upon completion, all model state dictionaries are averaged by one random worker. If set to -1, all connected workers are considered. //TODO: load balancing? Returns: - A dict with a status message. May take one of the following: - TODO: status ok, fail, no annotations, etc. Make threaded so that it immediately returns something. ''' process, numWorkers = self._get_training_job_signature( minTimestamp=minTimestamp, minNumAnnoPerImage=minNumAnnoPerImage, maxNumImages=maxNumImages, maxNumWorkers=maxNumWorkers) # submit job task_id = self.messageProcessor.task_id() if numWorkers > 1: # also append average model states job job = process.apply_async( task_id=task_id, ignore_result=False, result_extended=True, headers={ 'type': 'train', 'submitted': str(current_time()) }, link=celery_interface.call_average_model_states.s()) else: job = process.apply_async(task_id=task_id, ignore_result=False, result_extended=True, headers={ 'type': 'train', 'submitted': str(current_time()) }) # start listener self.messageProcessor.register_job(job, 'train', self._training_completed) return 'ok' def _do_inference(self, process): # send job task_id = self.messageProcessor.task_id() result = process.apply_async(task_id=task_id, ignore_result=False, result_extended=True, headers={ 'type': 'inference', 'submitted': str(current_time()) }) # start listener self.messageProcessor.register_job(result, 'inference', None) return def start_inference(self, forceUnlabeled=True, maxNumImages=-1, maxNumWorkers=-1): ''' Performs inference (prediction) on a set of data (images) as specified in the parameters. Distributes data to the set of registered AIWorker instances, which then call the 'inference' function of the AI model given in the configuration. Upon completion, each AIWorker then automatically inserts the latest predictions into the database and reports back to the AIController (this instance) that its job has finished. Note that this function only loads the annotation data from the database, but not the images. Retrieving images is part of the AI model's 'train' function. The AI model, depending on its configuration, may or may not choose to load the images, but just work with the feature vectors as provided through the database directly. This is particularly useful for mo- dels that are supposed to have e.g. a frozen feature extractor, but fine-tune the last prediction branch at each inference time to accelerate the process. Input parameters: - forceUnlabeled: If True, only images that have not been labeled (i.e., with a viewcount of 0) will be predicted on (default). - maxNumImages: Manually override the project settings' maximum number of images to do inference on. If set to -1 (default), the value from the project settings will be chosen. - maxNumWorkers: Manually set the maximum number of AIWorker instances to perform inference at the same time. If set to -1 (default), the data will be divided across all registered workers. ''' # setup if maxNumImages is None or maxNumImages == -1: maxNumImages = self.config.getProperty('AIController', 'maxNumImages_inference', type=int) # load the IDs of the images that are being subjected to inference sql = self.sqlBuilder.getInferenceQueryString(forceUnlabeled, maxNumImages) imageIDs = self.dbConn.execute(sql, None, 'all') imageIDs = [i['image'] for i in imageIDs] process = self._get_inference_job_signature(imageIDs, maxNumWorkers) self._do_inference(process) return 'ok' def inference_fixed(self, imageIDs, maxNumWorkers=-1): ''' Performs inference (prediction) on a fixed set of data (images), as provided by the parameter 'imageIDs'. Distributes data to the set of registered AIWorker instances, which then call the 'inference' function of the AI model given in the configuration. Upon completion, each AIWorker then automatically inserts the latest predictions into the database and reports back to the AIController (this instance) that its job has finished. Note that this function only loads the annotation data from the database, but not the images. Retrieving images is part of the AI model's 'train' function. The AI model, depending on its configuration, may or may not choose to load the images, but just work with the feature vectors as provided through the database directly. This is particularly useful for mo- dels that are supposed to have e.g. a frozen feature extractor, but fine-tune the last prediction branch at each inference time to accelerate the process. Input parameters: - imageIDs: An array containing the UUIDs (or equivalent strings) of the images that need to be inferred on. - maxNumWorkers: Manually set the maximum number of AIWorker instances to perform inference at the same time. If set to -1 (default), the data will be divided across all registered workers. ''' process = self._get_inference_job_signature(imageIDs, maxNumWorkers) self._do_inference(process) return 'ok' def start_train_and_inference(self, minTimestamp='lastState', minNumAnnoPerImage=0, maxNumImages_train=None, maxNumWorkers_train=1, forceUnlabeled_inference=True, maxNumImages_inference=None, maxNumWorkers_inference=1): ''' Submits a model training job, followed by inference. This is the default behavior for the automated model update, since the newly trained model should directly be used to infer new, potentially useful labels. ''' # get training job signature process, numWorkers_train = self._get_training_job_signature( minTimestamp=minTimestamp, minNumAnnoPerImage=minNumAnnoPerImage, maxNumImages=maxNumImages_train, maxNumWorkers=maxNumWorkers_train) # submit job task_id = self.messageProcessor.task_id() if numWorkers_train > 1: # also append average model states job job = process.apply_async( task_id=task_id, ignore_result=False, result_extended=True, headers={ 'type': 'train', 'submitted': str(current_time()) }, link=celery_interface.call_average_model_states.s()) else: job = process.apply_async(task_id=task_id, ignore_result=False, result_extended=True, headers={ 'type': 'train', 'submitted': str(current_time()) }) # start listener thread def chain_inference(*args): self.training = self.messageProcessor.task_ongoing('train') return self.start_inference(forceUnlabeled_inference, maxNumImages_inference, maxNumWorkers_inference) #TODO: chaining doesn't work properly this way... self.messageProcessor.register_job(job, 'train', chain_inference) return 'ok' def check_status(self, project, tasks, workers): ''' Queries the Celery worker results depending on the parameters specified. Returns their status accordingly if they exist. ''' status = {} # project status if project: if self.training: status['project'] = {} else: # notify watchdog that users are active if self.watchdog is None or self.watchdog.stopped(): self._init_watchdog() if self.watchdog is not None: self.watchdog.nudge() status['project'] = { 'num_annotated': self.watchdog.lastCount, 'num_next_training': self.watchdog.annoThreshold } else: status['project'] = {} # running tasks status if tasks: status['tasks'] = self.messageProcessor.poll_status() # get worker status (this is very expensive, as each worker needs to be pinged) if workers: status['workers'] = self.messageProcessor.poll_worker_status() return status
def main(): # parse arguments parser = argparse.ArgumentParser(description='AIDE local model tester') parser.add_argument('--project', type=str, required=True, help='Project shortname to draw sample data from.') parser.add_argument( '--mode', type=str, required=True, help= 'Evaluation mode (function to call). One of {"train", "inference"}.') parser.add_argument( '--modelLibrary', type=str, required=False, help= 'Optional AI model library override. Provide a dot-separated Python import path here.' ) parser.add_argument( '--modelSettings', type=str, required=False, help= 'Optional AI model settings override (absolute or relative path to settings file, or else "none" to not use any predefined settings).' ) args = parser.parse_args() #TODO: continue assert args.mode.lower() in ( 'train', 'inference'), f'"{args.mode}" is not a known evaluation mode.' mode = args.mode.lower() # initialize required modules config = Config() dbConnector = Database(config) fileServer = FileServer(config).get_secure_instance(args.project) aiw = AIWorker(config, dbConnector, True) aicw = AIControllerWorker(config, None) # check if AIDE file server is reachable admin = AdminMiddleware(config, dbConnector) connDetails = admin.getServiceDetails(True, False) fsVersion = connDetails['FileServer']['aide_version'] if not isinstance(fsVersion, str): # no file server running raise Exception( 'ERROR: AIDE file server is not running, but required for running models. Make sure to launch it prior to running this script.' ) elif fsVersion != AIDE_VERSION: print( f'WARNING: the AIDE version of File Server instance ({fsVersion}) differs from this one ({AIDE_VERSION}).' ) # get model trainer instance and settings queryStr = ''' SELECT ai_model_library, ai_model_settings FROM aide_admin.project WHERE shortname = %s; ''' result = dbConnector.execute(queryStr, (args.project, ), 1) if result is None or not len(result): raise Exception( f'Project "{args.project}" could not be found in this installation of AIDE.' ) modelLibrary = result[0]['ai_model_library'] modelSettings = result[0]['ai_model_settings'] customSettingsSpecified = False if hasattr(args, 'modelSettings') and isinstance( args.modelSettings, str) and len(args.modelSettings): # settings override specified if args.modelSettings.lower() == 'none': modelSettings = None customSettingsSpecified = True elif not os.path.isfile(args.modelSettings): print( f'WARNING: model settings override provided, but file cannot be found ("{args.modelSettings}"). Falling back to project default ("{modelSettings}").' ) else: modelSettings = args.modelSettings customSettingsSpecified = True if hasattr(args, 'modelLibrary') and isinstance( args.modelLibrary, str) and len(args.modelLibrary): # library override specified; try to import it try: modelClass = helpers.get_class_executable(args.modelLibrary) if modelClass is None: raise modelLibrary = args.modelLibrary # re-check if current model settings are compatible; warn and set to None if not if modelLibrary != result[0][ 'ai_model_library'] and not customSettingsSpecified: # project model settings are not compatible with provided model print( 'WARNING: custom model library specified differs from the one currently set in project. Model settings will be set to None.' ) modelSettings = None except Exception as e: print( f'WARNING: model library override provided ("{args.modelLibrary}"), but could not be imported. Falling back to project default ("{modelLibrary}").' ) # initialize instance print(f'Using model library "{modelLibrary}".') modelTrainer = aiw._init_model_instance(args.project, modelLibrary, modelSettings) stateDict = None #TODO: load latest unless override is specified? # get data data = aicw.get_training_images(project=args.project, maxNumImages=512) data = __load_metadata(args.project, dbConnector, data[0], (mode == 'train')) # helper functions def updateStateFun(state, message, done=None, total=None): print(message, end='') if done is not None and total is not None: print(f': {done}/{total}') else: print('') # launch task if mode == 'train': result = modelTrainer.train(stateDict, data, updateStateFun) if result is None: raise Exception( 'Training function must return an object (i.e., trained model state) to be stored in the database.' ) elif mode == 'inference': result = modelTrainer.inference(stateDict, data, updateStateFun)
class UserMiddleware(): TOKEN_NUM_BYTES = 64 SALT_NUM_ROUNDS = 12 def __init__(self, config): self.config = config self.dbConnector = Database(config) self.usersLoggedIn = {} # username -> {timestamp, sessionToken} def _current_time(self): return current_time() def _create_token(self): return secrets.token_urlsafe(self.TOKEN_NUM_BYTES) def _compare_tokens(self, tokenA, tokenB): if tokenA is None or tokenB is None: return False return secrets.compare_digest(tokenA, tokenB) def _check_password(self, providedPass, hashedTargetPass): return bcrypt.checkpw(providedPass, hashedTargetPass) def _create_hash(self, password): hash = bcrypt.hashpw(password, bcrypt.gensalt(self.SALT_NUM_ROUNDS)) return hash def _get_user_data(self, username): result = self.dbConnector.execute( 'SELECT last_login, session_token, secret_token FROM aide_admin.user WHERE name = %s;', (username, ), numReturn=1) if not len(result): return None result = result[0] return result def _extend_session_database(self, username, sessionToken): ''' Updates the last login timestamp of the user to the current time and commits the changes to the database. Runs in a thread to be non-blocking. ''' def _extend_session(): now = self._current_time() self.dbConnector.execute( '''UPDATE aide_admin.user SET last_login = %s, session_token = %s WHERE name = %s ''', ( now, sessionToken, username, ), numReturn=None) # also update local cache self.usersLoggedIn[username]['timestamp'] = now eT = Thread(target=_extend_session) eT.start() def _init_or_extend_session(self, username, sessionToken=None): ''' Establishes a "session" for the user (i.e., sets 'time_login' to now). Also creates a new sessionToken if None provided. ''' now = self._current_time() if sessionToken is None: sessionToken = self._create_token() # new session created; add to database self.dbConnector.execute( '''UPDATE aide_admin.user SET last_login = %s, session_token = %s WHERE name = %s ''', ( now, sessionToken, username, ), numReturn=None) # store locally self.usersLoggedIn[username] = { 'timestamp': now, 'sessionToken': sessionToken } # update local cache as well if not username in self.usersLoggedIn: self.usersLoggedIn[username] = { 'timestamp': now, 'sessionToken': sessionToken } else: self.usersLoggedIn[username]['timestamp'] = now self.usersLoggedIn[username]['sessionToken'] = sessionToken # also tell DB about updated tokens self._extend_session_database(username, sessionToken) expires = now + timedelta( 0, self.config.getProperty('UserHandler', 'time_login', type=int)) return sessionToken, now, expires def _invalidate_session(self, username): if username in self.usersLoggedIn: del self.usersLoggedIn[username] self.dbConnector.execute( 'UPDATE aide_admin.user SET session_token = NULL WHERE name = %s', (username, ), numReturn=None) #TODO: feedback that everything is ok? def _check_account_exists(self, username, email): response = {'username': True, 'email': True} if username is None or not len(username): username = '' if email is None or not len(email): email = '' result = self.dbConnector.execute( 'SELECT COUNT(name) AS c FROM aide_admin.user WHERE name = %s UNION ALL SELECT COUNT(name) AS c FROM aide_admin.user WHERE email = %s', ( username, email, ), numReturn=2) response['username'] = (result[0]['c'] > 0) response['email'] = (result[1]['c'] > 0) return response def _check_logged_in(self, username, sessionToken): now = self._current_time() time_login = self.config.getProperty('UserHandler', 'time_login', type=int) if not username in self.usersLoggedIn: # check database result = self._get_user_data(username) if result is None: # account does not exist return False # check for session token if not self._compare_tokens(result['session_token'], sessionToken): # invalid session token provided return False # check for timestamp time_diff = (now - result['last_login']).total_seconds() if time_diff <= time_login: # user still logged in if not username in self.usersLoggedIn: self.usersLoggedIn[username] = { 'timestamp': now, 'sessionToken': sessionToken } else: self.usersLoggedIn[username]['timestamp'] = now # extend user session (commit to DB) if needed if time_diff >= 0.75 * time_login: self._extend_session_database(username, sessionToken) return True else: # session time-out return False # generic error return False else: # check locally if not self._compare_tokens( self.usersLoggedIn[username]['sessionToken'], sessionToken): # invalid session token provided; check database if token has updated # (can happen if user logs in again from another machine) result = self._get_user_data(username) if not self._compare_tokens(result['session_token'], sessionToken): return False else: # update local cache self.usersLoggedIn[username]['sessionToken'] = result[ 'session_token'] self.usersLoggedIn[username]['timestamp'] = now if (now - self.usersLoggedIn[username]['timestamp'] ).total_seconds() <= time_login: # user still logged in return True else: # local cache session time-out; check if database holds more recent timestamp result = self._get_user_data(username) if (now - result['last_login']).total_seconds() <= time_login: # user still logged in; update self._init_or_extend_session(username, sessionToken) else: # session time-out return False # generic error return False # generic error return False def _check_authorized(self, project, username, admin, return_all=False): ''' Verifies whether a user has access rights to a project. If "return_all" is set to True, a dict with the following bools is returned: - enrolled: if the user is member of the project - isAdmin: if the user is a project administrator - isPublic: if the project is publicly visible (*) - demoMode: if the project runs in demo mode (*) (* note that these are here for convenience, but do not count as authorization tokens) If "return_all" is False, only a single bool is returned, with criteria as follows: - if "admin" is set to True, the user must be a project admini- strator - else, the user must be enrolled, admitted, and not blocked for the current date and time In this case, options like the demo mode and public flag are not relevant for the decision. ''' now = current_time() response = {'enrolled': False, 'isAdmin': False, 'isPublic': False} queryStr = sql.SQL(''' SELECT * FROM aide_admin.authentication AS auth JOIN (SELECT shortname, demoMode, isPublic FROM aide_admin.project) AS proj ON auth.project = proj.shortname WHERE project = %s AND username = %s; ''') try: result = self.dbConnector.execute(queryStr, ( project, username, ), 1) if len(result): response['isAdmin'] = result[0]['isadmin'] response['isPublic'] = result[0]['ispublic'] admitted_until = True blocked_until = False if result[0]['admitted_until'] is not None: admitted_until = (result[0]['admitted_until'] >= now) if result[0]['blocked_until'] is not None: blocked_until = (result[0]['blocked_until'] >= now) response['enrolled'] = (admitted_until and not blocked_until) except: # no results to fetch: user is not authenticated pass # check if super user superUser = self._check_user_privileges(username, superuser=True) if superUser: response['enrolled'] = True response['isAdmin'] = True if return_all: return response else: if admin: return response['isAdmin'] else: return response['enrolled'] # if admin: # queryStr = sql.SQL('''SELECT COUNT(*) AS cnt FROM aide_admin.authentication # WHERE project = %s AND username = %s AND isAdmin = %s''') # queryVals = (project,username,admin,) # else: # queryStr = sql.SQL('''SELECT COUNT(*) AS cnt FROM aide_admin.authentication # WHERE project = %s AND username = %s # AND ( # (admitted_until IS NULL OR admitted_until >= now()) # AND # (blocked_until IS NULL OR blocked_until < now()) # )''') # queryVals = (project,username,) # result = self.dbConnector.execute(queryStr, queryVals, 1) # return result[0]['cnt'] == 1 def checkDemoMode(self, project): return checkDemoMode(project, self.dbConnector) def decryptSessionToken(self, username, request): try: userdata = self._get_user_data(username) return request.get_cookie('session_token', secret=userdata['secret_token']) except: return None def encryptSessionToken(self, username, response): userdata = self._get_user_data(username) response.set_cookie('session_token', userdata['session_token'], httponly=True, path='/', secret=userdata['secret_token']) def _check_user_privileges(self, username, superuser=False, canCreateProjects=False, return_all=False): response = {'superuser': False, 'can_create_projects': False} result = self.dbConnector.execute( '''SELECT isSuperUser, canCreateProjects FROM aide_admin.user WHERE name = %s;''', (username, ), 1) if len(result): response['superuser'] = result[0]['issuperuser'] response['can_create_projects'] = result[0]['cancreateprojects'] if return_all: return response else: if superuser and not result[0]['issuperuser']: return False if canCreateProjects and not (result[0]['cancreateprojects'] or result[0]['issuperuser']): return False return True def isAuthenticated(self, username, sessionToken, project=None, admin=False, superuser=False, canCreateProjects=False, extend_session=False, return_all=False): ''' Checks if the user is authenticated to access a service. Returns False if one or more of the following conditions holds: - user is not logged in - 'project' (shortname) is provided, project is configured to be private and user is not in the authenticated users list - 'admin' is True, 'project' (shortname) is provided and user is not an admin of the project - 'superuser' is True and user is not a super user - 'canCreateProjects' is True and user is not authenticated to create (or remove) projects If 'extend_session' is True, the user's session will automatically be prolonged by the max login time specified in the configuration file. If 'return_all' is True, all individual flags (instead of just a single bool) is returned. ''' demoMode = checkDemoMode(project, self.dbConnector) if return_all: returnVals = {} returnVals['logged_in'] = self._check_logged_in( username, sessionToken) if not returnVals['logged_in']: username = None if project is not None: returnVals['project'] = self._check_authorized(project, username, admin, return_all=True) returnVals['project']['demoMode'] = demoMode returnVals['privileges'] = self._check_user_privileges( username, superuser, canCreateProjects, return_all=True) if returnVals['logged_in'] and extend_session: self._init_or_extend_session(username, sessionToken) return returnVals else: # return True if project is in demo mode if demoMode is not None and demoMode: return True if not self._check_logged_in(username, sessionToken): return False if project is not None and not self._check_authorized( project, username, admin): return False if not self._check_user_privileges(username, superuser, canCreateProjects): return False if extend_session: self._init_or_extend_session(username, sessionToken) return True def getAuthentication(self, username, project=None): ''' Returns general authentication properties of the user, regardless of whether they are logged in or not. If a project shortname is specified, this will also return the user access properties for the given project. ''' response = {} if project is None: result = self.dbConnector.execute( '''SELECT * FROM aide_admin.user AS u WHERE name = %s; ''', (username, ), 1) response['canCreateProjects'] = result[0]['cancreateprojects'] response['isSuperUser'] = result[0]['issuperuser'] else: result = self.dbConnector.execute( '''SELECT * FROM aide_admin.user AS u JOIN aide_admin.authentication AS a ON u.name = a.username WHERE name = %s AND project = %s; ''', ( username, project, ), 1) response['canCreateProjects'] = result[0]['cancreateprojects'] response['isSuperUser'] = result[0]['issuperuser'] response['isAdmin'] = result[0]['isadmin'] response['admittedUntil'] = result[0]['admitted_until'] response['blockedUntil'] = result[0]['blocked_until'] return response def getLoginData(self, username, sessionToken): ''' Performs a lookup on the login timestamp dict. If the username cannot be found (also not in the database), they are not logged in (False returned). If the difference between the current time and the recorded login timestamp exceeds a pre-defined threshold, the user is removed from the dict and False is returned. Otherwise returns True if and only if 'sessionToken' matches the entry in the database. ''' if self._check_logged_in(username, sessionToken): # still logged in; extend session sessionToken, now, expires = self._init_or_extend_session( username, sessionToken) return sessionToken, now, expires else: # not logged in or error raise Exception('Not logged in.') def getUserPermissions(self, project, username): ''' Returns the user-to-project relation (e.g., if user is admin). ''' response = { 'demoMode': False, 'isAdmin': False, 'admittedUntil': None, 'blockedUntil': None } try: # demo mode response['demoMode'] = checkDemoMode(project, self.dbConnector) # rest queryStr = sql.SQL( 'SELECT * FROM {id_auth} WHERE project = %s AND username = %s' ).format(id_auth=sql.Identifier('aide_admin', 'authentication')) result = self.dbConnector.execute(queryStr, ( project, username, ), 1) if len(result): response['isAdmin'] = result[0]['isadmin'] response['admittedUntil'] = result[0]['admitted_until'] response['blockedUntil'] = result[0]['blocked_until'] finally: return response def login(self, username, password, sessionToken): # check if logged in if self._check_logged_in(username, sessionToken): # still logged in; extend session sessionToken, now, expires = self._init_or_extend_session( username, sessionToken) return sessionToken, now, expires # get user info userData = self.dbConnector.execute( 'SELECT hash FROM aide_admin.user WHERE name = %s;', (username, ), numReturn=1) if len(userData) == 0: # account does not exist raise InvalidRequestException() userData = userData[0] # verify provided password if self._check_password(password.encode('utf8'), bytes(userData['hash'])): # correct sessionToken, timestamp, expires = self._init_or_extend_session( username, None) return sessionToken, timestamp, expires else: # incorrect self._invalidate_session(username) raise InvalidPasswordException() def logout(self, username, sessionToken): # check if logged in first if self._check_logged_in(username, sessionToken): self._invalidate_session(username) def accountExists(self, username, email): return self._check_account_exists(username, email) def createAccount(self, username, password, email): accExstChck = self._check_account_exists(username, email) if accExstChck['username'] or accExstChck['email']: raise AccountExistsException(username) else: hash = self._create_hash(password.encode('utf8')) queryStr = ''' INSERT INTO aide_admin.user (name, email, hash) VALUES (%s, %s, %s); ''' self.dbConnector.execute(queryStr, ( username, email, hash, ), numReturn=None) sessionToken, timestamp, expires = self._init_or_extend_session( username) return sessionToken, timestamp, expires def getUserNames(self, project=None): if not project: queryStr = 'SELECT name FROM aide_admin.user' queryVals = None else: queryStr = 'SELECT username AS name FROM aide_admin.authentication WHERE project = %s' queryVals = (project, ) result = self.dbConnector.execute(queryStr, queryVals, 'all') response = [r['name'] for r in result] return response def setPassword(self, username, password): hashVal = self._create_hash(password.encode('utf8')) queryStr = ''' UPDATE aide_admin.user SET hash = %s WHERE name = %s; SELECT hash FROM aide_admin.user WHERE name = %s; ''' result = self.dbConnector.execute(queryStr, (hashVal, username, username), 1) if len(result): return {'success': True} else: return { 'success': False, 'message': f'User with name "{username}" does not exist.' }