class DBMiddleware(): def __init__(self, config): self.config = config self.dbConnector = Database(config) self._fetchProjectSettings() self.sqlBuilder = SQLStringBuilder(config) self.annoParser = AnnotationParser(config) def _fetchProjectSettings(self): # AI controller URI aiControllerURI = self.config.getProperty('Server', 'aiController_uri') if aiControllerURI is None or aiControllerURI.strip() == '': # no AI backend configured aiControllerURI = None # LabelUI drawing styles with open( self.config.getProperty( 'LabelUI', 'styles_file', type=str, fallback='modules/LabelUI/static/json/styles.json'), 'r') as f: styles = json.load(f) # Image backdrops for index screen with open( self.config.getProperty( 'Project', 'backdrops_file', type=str, fallback='modules/LabelUI/static/json/backdrops.json'), 'r') as f: backdrops = json.load(f) # Welcome message for UI tutorial with open( self.config.getProperty( 'Project', 'welcome_message_file', type=str, fallback= 'modules/LabelUI/static/templates/welcome_message.html'), 'r') as f: welcomeMessage = f.readlines() self.projectSettings = { 'projectName': self.config.getProperty('Project', 'projectName'), 'projectDescription': self.config.getProperty('Project', 'projectDescription'), 'indexURI': self.config.getProperty('Server', 'index_uri', type=str, fallback='/'), 'dataServerURI': self.config.getProperty('Server', 'dataServer_uri'), 'aiControllerURI': aiControllerURI, 'dataType': self.config.getProperty('Project', 'dataType', fallback='images'), 'classes': self.getClassDefinitions(), 'enableEmptyClass': self.config.getProperty('Project', 'enableEmptyClass', fallback='no'), 'annotationType': self.config.getProperty('Project', 'annotationType'), 'predictionType': self.config.getProperty('Project', 'predictionType'), 'showPredictions': self.config.getProperty('LabelUI', 'showPredictions', fallback='yes'), 'showPredictions_minConf': self.config.getProperty('LabelUI', 'showPredictions_minConf', type=float, fallback=0.5), 'carryOverPredictions': self.config.getProperty('LabelUI', 'carryOverPredictions', fallback='no'), 'carryOverRule': self.config.getProperty('LabelUI', 'carryOverRule', fallback='maxConfidence'), 'carryOverPredictions_minConf': self.config.getProperty('LabelUI', 'carryOverPredictions_minConf', type=float, fallback=0.75), 'defaultBoxSize_w': self.config.getProperty('LabelUI', 'defaultBoxSize_w', type=int, fallback=10), 'defaultBoxSize_h': self.config.getProperty('LabelUI', 'defaultBoxSize_h', type=int, fallback=10), 'minBoxSize_w': self.config.getProperty('Project', 'box_minWidth', type=int, fallback=1), 'minBoxSize_h': self.config.getProperty('Project', 'box_minHeight', type=int, fallback=1), 'numImagesPerBatch': self.config.getProperty('LabelUI', 'numImagesPerBatch', type=int, fallback=1), 'minImageWidth': self.config.getProperty('LabelUI', 'minImageWidth', type=int, fallback=300), 'numImageColumns_max': self.config.getProperty('LabelUI', 'numImageColumns_max', type=int, fallback=1), 'defaultImage_w': self.config.getProperty('LabelUI', 'defaultImage_w', type=int, fallback=800), 'defaultImage_h': self.config.getProperty('LabelUI', 'defaultImage_h', type=int, fallback=600), 'styles': styles['styles'], 'backdrops': backdrops, 'welcomeMessage': welcomeMessage, 'demoMode': self.config.getProperty('Project', 'demoMode', type=bool, fallback=False) } def _assemble_annotations(self, cursor): response = {} while True: b = cursor.fetchone() if b is None: break imgID = str(b['image']) if not imgID in response: response[imgID] = { 'fileName': b['filename'], 'predictions': {}, 'annotations': {}, 'last_checked': None } viewcount = b['viewcount'] if viewcount is not None: response[imgID]['viewcount'] = viewcount last_checked = b['last_checked'] if last_checked is not None: if response[imgID]['last_checked'] is None: response[imgID]['last_checked'] = last_checked else: response[imgID]['last_checked'] = max( response[imgID]['last_checked'], last_checked) # parse annotations and predictions entryID = str(b['id']) if b['ctype'] is not None: colnames = self.sqlBuilder.getColnames(b['ctype']) entry = {} for c in colnames: value = b[c] if isinstance(value, datetime): value = value.timestamp() elif isinstance(value, UUID): value = str(value) entry[c] = value if b['ctype'] == 'annotation': response[imgID]['annotations'][entryID] = entry elif b['ctype'] == 'prediction': response[imgID]['predictions'][entryID] = entry return response def getProjectSettings(self): ''' Queries the database for general project-specific metadata, such as: - Classes: names, indices, default colors - Annotation type: one of {class labels, positions, bboxes} ''' return self.projectSettings def getProjectInfo(self): ''' Returns safe, shareable information about the project. ''' return { 'projectName': self.projectSettings['projectName'], 'projectDescription': self.projectSettings['projectDescription'], 'demoMode': self.config.getProperty('Project', 'demoMode', type=bool, fallback=False), 'backdrops': self.projectSettings['backdrops']['images'] } def getClassDefinitions(self): ''' Returns a dictionary with entries for all classes in the project. ''' classdef = { 'entries': { 'default': {} # default group for ungrouped label classes } } schema = self.config.getProperty('Database', 'schema') # query data sql = ''' SELECT 'group' AS type, id, NULL as idx, name, color, parent, NULL AS keystroke FROM {schema}.labelclassgroup UNION ALL SELECT 'class' AS type, id, idx, name, color, labelclassgroup, keystroke FROM {schema}.labelclass; '''.format(schema=schema) classData = self.dbConnector.execute(sql, None, 'all') # assemble entries first allEntries = {} numClasses = 0 for cl in classData: id = str(cl['id']) entry = { 'id': id, 'name': cl['name'], 'color': cl['color'], 'parent': str(cl['parent']) if cl['parent'] is not None else None, } if cl['type'] == 'group': entry['entries'] = {} else: entry['index'] = cl['idx'] entry['keystroke'] = cl['keystroke'] numClasses += 1 allEntries[id] = entry # transform into tree def _find_parent(tree, parentID): if parentID is None: return tree['entries']['default'] elif 'id' in tree and tree['id'] == parentID: return tree elif 'entries' in tree: for ek in tree['entries'].keys(): rv = _find_parent(tree['entries'][ek], parentID) if rv is not None: return rv return None else: return None allEntries['default'] = {'name': '(other)', 'entries': {}} allEntries = {'entries': allEntries} for key in list(allEntries['entries'].keys()): if key == 'default': continue if key in allEntries['entries']: entry = allEntries['entries'][key] parentID = entry['parent'] del entry['parent'] if 'entries' in entry and parentID is None: # group, but no parent: append to root directly allEntries['entries'][key] = entry else: # move item parent = _find_parent(allEntries, parentID) parent['entries'][key] = entry del allEntries['entries'][key] classdef = allEntries classdef['numClasses'] = numClasses return classdef def getBatch_fixed(self, username, data): ''' Returns entries from the database based on the list of data entry identifiers specified. ''' # query sql = self.sqlBuilder.getFixedImagesQueryString( self.projectSettings['demoMode']) # parse results queryVals = ( tuple(UUID(d) for d in data), username, username, ) if self.projectSettings['demoMode']: queryVals = (tuple(UUID(d) for d in data), ) with self.dbConnector.execute_cursor(sql, queryVals) as cursor: try: response = self._assemble_annotations(cursor) # self.dbConnector.conn.commit() except Exception as e: print(e) # self.dbConnector.conn.rollback() finally: pass # cursor.close() return {'entries': response} def getBatch_auto(self, username, order='unlabeled', subset='default', limit=None): ''' TODO: description ''' # query sql = self.sqlBuilder.getNextBatchQueryString( order, subset, self.projectSettings['demoMode']) # limit (TODO: make 128 a hyperparameter) if limit is None: limit = 128 else: limit = min(int(limit), 128) # parse results queryVals = ( username, limit, username, ) if self.projectSettings['demoMode']: queryVals = (limit, ) with self.dbConnector.execute_cursor(sql, queryVals) as cursor: response = self._assemble_annotations(cursor) return {'entries': response} def getBatch_timeRange(self, minTimestamp, maxTimestamp, userList, skipEmptyImages=False, limit=None): ''' Returns images that have been annotated within the given time range and/or by the given user(s). All arguments are optional. Useful for reviewing existing annotations. ''' # query string sql = self.sqlBuilder.getDateQueryString(minTimestamp, maxTimestamp, userList, skipEmptyImages) # check validity and provide arguments queryVals = [] if userList is not None: queryVals.append(tuple(userList)) if minTimestamp is not None: queryVals.append(minTimestamp) if maxTimestamp is not None: queryVals.append(maxTimestamp) if skipEmptyImages and userList is not None: queryVals.append(tuple(userList)) # limit (TODO: make 128 a hyperparameter) if limit is None: limit = 128 else: limit = min(int(limit), 128) queryVals.append(limit) if userList is not None: queryVals.append(tuple(userList)) # query and parse results with self.dbConnector.execute_cursor(sql, tuple(queryVals)) as cursor: try: response = self._assemble_annotations(cursor) # self.dbConnector.conn.commit() except Exception as e: print(e) # self.dbConnector.conn.rollback() finally: pass # cursor.close() return {'entries': response} def get_timeRange(self, userList, skipEmptyImages=False): ''' Returns two timestamps denoting the temporal limits within which images have been viewed by the users provided in the userList. Arguments: - userList: string (single user name) or list of strings (multiple). Can also be None; in this case all annotations will be checked. - skipEmptyImages: if True, only images that contain at least one annotation will be considered. ''' # query string sql = self.sqlBuilder.getTimeRangeQueryString(userList, skipEmptyImages) arguments = (None if userList is None else tuple(userList)) result = self.dbConnector.execute(sql, (arguments, ), numReturn=1) if result is not None and len(result): return { 'minTimestamp': result[0]['mintimestamp'], 'maxTimestamp': result[0]['maxtimestamp'], } else: return {'error': 'no annotations made'} def submitAnnotations(self, username, submissions): ''' Sends user-provided annotations to the database. ''' if self.projectSettings['demoMode']: return 0 # assemble values colnames = getattr(QueryStrings_annotation, self.projectSettings['annotationType']).value values_insert = [] values_update = [] meta = (None if not 'meta' in submissions else json.dumps( submissions['meta'])) # for deletion: remove all annotations whose image ID matches but whose annotation ID is not among the submitted ones ids = [] viewcountValues = [] for imageKey in submissions['entries']: entry = submissions['entries'][imageKey] try: lastChecked = entry['timeCreated'] lastTimeRequired = entry['timeRequired'] if lastTimeRequired is None: lastTimeRequired = 0 except: lastChecked = datetime.now(tz=pytz.utc) lastTimeRequired = 0 if 'annotations' in entry and len(entry['annotations']): for annotation in entry['annotations']: # assemble annotation values annotationTokens = self.annoParser.parseAnnotation( annotation) annoValues = [] for cname in colnames: if cname == 'id': if cname in annotationTokens: # cast and only append id if the annotation is an existing one annoValues.append(UUID( annotationTokens[cname])) ids.append(UUID(annotationTokens[cname])) elif cname == 'image': annoValues.append(UUID(imageKey)) elif cname == 'label' and annotationTokens[ cname] is not None: annoValues.append(UUID(annotationTokens[cname])) elif cname == 'timeCreated': try: annoValues.append( dateutil.parser.parse( annotationTokens[cname])) except: annoValues.append(datetime.now(tz=pytz.utc)) elif cname == 'timeRequired': timeReq = annotationTokens[cname] if timeReq is None: timeReq = 0 annoValues.append(timeReq) elif cname == 'username': annoValues.append(username) elif cname in annotationTokens: annoValues.append(annotationTokens[cname]) elif cname == 'unsure': if 'unsure' in annotationTokens and annotationTokens[ 'unsure'] is not None: annoValues.append(annotationTokens[cname]) else: annoValues.append(False) elif cname == 'meta': annoValues.append(meta) else: annoValues.append(None) if 'id' in annotationTokens: # existing annotation; update values_update.append(tuple(annoValues)) else: # new annotation values_insert.append(tuple(annoValues)) viewcountValues.append( (username, imageKey, 1, lastChecked, lastTimeRequired, meta)) schema = self.config.getProperty('Database', 'schema') # delete all annotations that are not in submitted batch imageKeys = list(UUID(k) for k in submissions['entries']) if len(imageKeys): if len(ids): sql = ''' DELETE FROM {schema}.annotation WHERE username = %s AND id IN ( SELECT idQuery.id FROM ( SELECT * FROM {schema}.annotation WHERE id NOT IN %s ) AS idQuery JOIN ( SELECT * FROM {schema}.annotation WHERE image IN %s ) AS imageQuery ON idQuery.id = imageQuery.id); '''.format(schema=schema) self.dbConnector.execute(sql, ( username, tuple(ids), tuple(imageKeys), )) else: # no annotations submitted; delete all annotations submitted before sql = ''' DELETE FROM {schema}.annotation WHERE username = %s AND image IN %s; '''.format(schema=schema) self.dbConnector.execute(sql, ( username, tuple(imageKeys), )) # insert new annotations if len(values_insert): sql = ''' INSERT INTO {}.annotation ({}) VALUES %s ; '''.format( schema, ', '.join(colnames[1:]) # skip 'id' column ) self.dbConnector.insert(sql, values_insert) # update existing annotations if len(values_update): updateCols = '' for col in colnames: if col == 'label': updateCols += '{col} = UUID(e.{col}),'.format(col=col) elif col == 'timeRequired': # we sum the required times together updateCols += '{col} = COALESCE(a.{col},0) + COALESCE(e.{col},0),'.format( col=col) else: updateCols += '{col} = e.{col},'.format(col=col) sql = ''' UPDATE {schema}.annotation AS a SET {updateCols} FROM (VALUES %s) AS e({colnames}) WHERE e.id = a.id; '''.format(schema=schema, updateCols=updateCols.strip(','), colnames=', '.join(colnames)) self.dbConnector.insert(sql, values_update) # viewcount table sql = ''' INSERT INTO {}.image_user (username, image, viewcount, last_checked, last_time_required, meta) VALUES %s ON CONFLICT (username, image) DO UPDATE SET viewcount = image_user.viewcount + 1, last_checked = EXCLUDED.last_checked, last_time_required = EXCLUDED.last_time_required, meta = EXCLUDED.meta; '''.format(schema) self.dbConnector.insert(sql, viewcountValues) return 0
class ProjectConfigMiddleware: # prohibited project shortnames PROHIBITED_SHORTNAMES = [ 'con', # for MS Windows 'prn', 'aux', 'nul', 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9', 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9' ] # prohibited project names (both as a whole and for shortnames) PROHIBITED_NAMES = [ '', 'project', 'getavailableaimodels', 'getbackdrops', 'verifyprojectname', 'verifyprojectshort', 'newproject', 'createproject', 'statistics', 'static', 'getcreateaccountunrestricted' 'getprojects', 'about', 'favicon.ico', 'logincheck', 'logout', 'login', 'dologin', 'createaccount', 'loginscreen', 'accountexists', 'getauthentication', 'getusernames', 'docreateaccount', 'admin', 'getservicedetails', 'getceleryworkerdetails', 'getprojectdetails', 'getuserdetails', 'setpassword' ] # prohibited name prefixes PROHIBITED_NAME_PREFIXES = ['/', '?', '&'] # patterns that are prohibited anywhere for shortnames (replaced with underscores) SHORTNAME_PATTERNS_REPLACE = [ '|', '?', '*', ':' # for macOS ] # patterns that are prohibited anywhere for both short and long names (no replacement) PROHIBITED_STRICT = ['<', '<', '>', '>', '..', '/', '\\'] def __init__(self, config): self.config = config self.dbConnector = Database(config) # load default UI settings try: # check if custom default styles are provided self.defaultUIsettings = json.load( open('config/default_ui_settings.json', 'r')) except: # resort to built-in styles self.defaultUIsettings = json.load( open( 'modules/ProjectAdministration/static/json/default_ui_settings.json', 'r')) @staticmethod def _recursive_update(dictObject, target): ''' Recursively iterates over all keys and sub-keys of "dictObject" and its sub-dicts and copies over values from dict "target", if they are available. ''' for key in dictObject.keys(): if key in target: if isinstance(dictObject[key], dict): ProjectConfigMiddleware._recursive_update( dictObject[key], target[key]) else: dictObject[key] = target[key] def getPlatformInfo(self, project, parameters=None): ''' AIDE setup-specific platform metadata. ''' # parse parameters (if provided) and compare with mutable entries allParams = set(['server_uri', 'server_dir', 'watch_folder_interval']) if parameters is not None and parameters != '*': if isinstance(parameters, str): parameters = [parameters.lower()] else: parameters = [p.lower() for p in parameters] set(parameters).intersection_update(allParams) else: parameters = allParams parameters = list(parameters) response = {} for param in parameters: if param.lower() == 'server_uri': uri = os.path.join( self.config.getProperty('Server', 'dataServer_uri'), project, 'files') response[param] = uri elif param.lower() == 'server_dir': sdir = os.path.join( self.config.getProperty('FileServer', 'staticfiles_dir'), project) response[param] = sdir elif param.lower() == 'watch_folder_interval': interval = self.config.getProperty('FileServer', 'watch_folder_interval', type=float, fallback=60) response[param] = interval return response def getProjectImmutables(self, project): queryStr = 'SELECT annotationType, predictionType, demoMode FROM aide_admin.project WHERE shortname = %s;' result = self.dbConnector.execute(queryStr, (project, ), 1) if result and len(result): return { 'annotationType': result[0]['annotationtype'], 'predictionType': result[0]['predictiontype'] } else: return None def getProjectInfo(self, project, parameters=None): # parse parameters (if provided) and compare with mutable entries allParams = set([ 'name', 'description', 'ispublic', 'secret_token', 'demomode', 'interface_enabled', 'ui_settings', 'segmentation_ignore_unlabeled', 'ai_model_enabled', 'ai_model_library', 'ai_model_settings', 'ai_alcriterion_library', 'ai_alcriterion_settings', 'numimages_autotrain', 'minnumannoperimage', 'maxnumimages_train', 'watch_folder_enabled', 'watch_folder_remove_missing_enabled' ]) if parameters is not None and parameters != '*': if isinstance(parameters, str): parameters = [parameters.lower()] else: parameters = [p.lower() for p in parameters] set(parameters).intersection_update(allParams) else: parameters = allParams parameters = list(parameters) sqlParameters = ','.join(parameters) queryStr = sql.SQL(''' SELECT {} FROM aide_admin.project WHERE shortname = %s; ''').format(sql.SQL(sqlParameters)) result = self.dbConnector.execute(queryStr, (project, ), 1) result = result[0] # assemble response response = {} for param in parameters: value = result[param] if param == 'ui_settings': value = json.loads(value) # auto-complete with defaults where missing value = check_args(value, self.defaultUIsettings) response[param] = value return response def renewSecretToken(self, project): ''' Creates a new secret token, invalidating the old one. ''' try: newToken = secrets.token_urlsafe(32) result = self.dbConnector.execute( '''UPDATE aide_admin.project SET secret_token = %s WHERE shortname = %s; SELECT secret_token FROM aide_admin.project WHERE shortname = %s; ''', ( newToken, project, project, ), 1) return result[0]['secret_token'] except: # this normally should not happen, since we checked for the validity of the project return None def getProjectUsers(self, project): ''' Returns a list of users that are enrolled in the project, as well as their roles within the project. ''' queryStr = sql.SQL( 'SELECT * FROM aide_admin.authentication WHERE project = %s;') result = self.dbConnector.execute(queryStr, (project, ), 'all') return result def createProject(self, username, properties): ''' Receives the most basic, mostly non-changeable settings for a new project ("properties") with the following entries: - shortname - owner (the current username) - name - description - annotationType - predictionType More advanced settings (UI config, AI model, etc.) will be configured after the initial project creation stage. Verifies whether these settings are available for a new project. If they are, it creates a new database schema for the project, adds an entry for it to the admin schema table and makes the current user admin. Returns True in this case. Otherwise raises an exception. ''' shortname = properties['shortname'] # verify availability of the project name and shortname if not self.getProjectNameAvailable(properties['name']): raise Exception('Project name "{}" unavailable.'.format( properties['name'])) if not self.getProjectShortNameAvailable(shortname): raise Exception( 'Project shortname "{}" unavailable.'.format(shortname)) # load base SQL with open('modules/ProjectAdministration/static/sql/create_schema.sql', 'r') as f: queryStr = sql.SQL(f.read()) # determine annotation and prediction types and add fields accordingly annotationFields = list( getattr(Fields_annotation, properties['annotationType']).value) predictionFields = list( getattr(Fields_prediction, properties['predictionType']).value) # create project schema self.dbConnector.execute( queryStr.format( id_schema=sql.Identifier(shortname), id_auth=sql.Identifier( self.config.getProperty('Database', 'user')), id_image=sql.Identifier(shortname, 'image'), id_iu=sql.Identifier(shortname, 'image_user'), id_labelclassGroup=sql.Identifier(shortname, 'labelclassgroup'), id_labelclass=sql.Identifier(shortname, 'labelclass'), id_annotation=sql.Identifier(shortname, 'annotation'), id_cnnstate=sql.Identifier(shortname, 'cnnstate'), id_prediction=sql.Identifier(shortname, 'prediction'), id_workflow=sql.Identifier(shortname, 'workflow'), id_workflowHistory=sql.Identifier(shortname, 'workflowhistory'), annotation_fields=sql.SQL(', ').join( [sql.SQL(field) for field in annotationFields]), prediction_fields=sql.SQL(', ').join( [sql.SQL(field) for field in predictionFields])), None, None) # register project self.dbConnector.execute( ''' INSERT INTO aide_admin.project (shortname, name, description, owner, secret_token, interface_enabled, annotationType, predictionType, isPublic, demoMode, ui_settings) VALUES ( %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s ); ''', (shortname, properties['name'], (properties['description'] if 'description' in properties else ''), username, secrets.token_urlsafe(32), False, properties['annotationType'], properties['predictionType'], False, False, json.dumps(self.defaultUIsettings)), None) # register user in project self.dbConnector.execute( ''' INSERT INTO aide_admin.authentication (username, project, isAdmin) VALUES (%s, %s, true); ''', ( username, shortname, ), None) # notify FileServer instance(s) to set up project folders process = fileServer_interface.aide_internal_notify.si({ 'task': 'create_project_folders', 'projectName': shortname }) process.apply_async(queue='aide_broadcast', ignore_result=True) return True def updateProjectSettings(self, project, projectSettings): ''' TODO ''' # check UI settings first if 'ui_settings' in projectSettings: if isinstance(projectSettings['ui_settings'], str): projectSettings['ui_settings'] = json.loads( projectSettings['ui_settings']) fieldNames = [('welcomeMessage', str), ('numImagesPerBatch', int), ('minImageWidth', int), ('numImageColumns_max', int), ('defaultImage_w', int), ('defaultImage_h', int), ('styles', dict), ('enableEmptyClass', bool), ('showPredictions', bool), ('showPredictions_minConf', float), ('carryOverPredictions', bool), ('carryOverRule', str), ('carryOverPredictions_minConf', float), ('defaultBoxSize_w', int), ('defaultBoxSize_h', int), ('minBoxSize_w', int), ('minBoxSize_h', int)] uiSettings_new, uiSettingsKeys_new = parse_parameters( projectSettings['ui_settings'], fieldNames, absent_ok=True, escape=True) #TODO: escape # adopt current settings and replace values accordingly uiSettings = self.dbConnector.execute( '''SELECT ui_settings FROM aide_admin.project WHERE shortname = %s; ''', (project, ), 1) uiSettings = json.loads(uiSettings[0]['ui_settings']) for kIdx in range(len(uiSettingsKeys_new)): if isinstance(uiSettings[uiSettingsKeys_new[kIdx]], dict): ProjectConfigMiddleware._recursive_update( uiSettings[uiSettingsKeys_new[kIdx]], uiSettings_new[kIdx]) else: uiSettings[uiSettingsKeys_new[kIdx]] = uiSettings_new[kIdx] # auto-complete with defaults where missing uiSettings = check_args(uiSettings, self.defaultUIsettings) projectSettings['ui_settings'] = json.dumps(uiSettings) # parse remaining parameters fieldNames = [('description', str), ('isPublic', bool), ('secret_token', str), ('demoMode', bool), ('ui_settings', str), ('interface_enabled', bool), ('watch_folder_enabled', bool), ('watch_folder_remove_missing_enabled', bool)] vals, params = parse_parameters(projectSettings, fieldNames, absent_ok=True, escape=False) vals.append(project) # commit to DB queryStr = sql.SQL('''UPDATE aide_admin.project SET {} WHERE shortname = %s; ''').format( sql.SQL(',').join( [sql.SQL('{} = %s'.format(item)) for item in params])) self.dbConnector.execute(queryStr, tuple(vals), None) return True def updateClassDefinitions(self, project, classdef, removeMissing=False): ''' Updates the project's class definitions. if "removeMissing" is set to True, label classes that are present in the database, but not in "classdef," will be removed. Label class groups will only be removed if they do not reference any label class present in "classdef." This functionality is disallowed in the case of segmentation masks. ''' # check if project contains segmentation masks metaType = self.dbConnector.execute( ''' SELECT annotationType, predictionType FROM aide_admin.project WHERE shortname = %s; ''', (project, ), 1)[0] is_segmentation = any( ['segmentationmasks' in m.lower() for m in metaType.values()]) if is_segmentation: removeMissing = False # get current classes from database db_classes = {} db_groups = {} if removeMissing: queryStr = sql.SQL(''' SELECT * FROM {id_lc} AS lc FULL OUTER JOIN ( SELECT id AS lcgid, name AS lcgname, parent, color FROM {id_lcg} ) AS lcg ON lc.labelclassgroup = lcg.lcgid ''').format(id_lc=sql.Identifier(project, 'labelclass'), id_lcg=sql.Identifier(project, 'labelclassgroup')) result = self.dbConnector.execute(queryStr, None, 'all') for r in result: if r['id'] is not None: db_classes[r['id']] = r if r['lcgid'] is not None: if not r['lcgid'] in db_groups: db_groups[r['lcgid']] = {**r, **{'num_children': 0}} elif not 'lcgid' in db_groups[r['lcgid']]: db_groups[r['lcgid']] = {**db_groups[r['lcgid']], **r} if r['labelclassgroup'] is not None: if not r['labelclassgroup'] in db_groups: db_groups[r['labelclassgroup']] = {'num_children': 1} else: db_groups[r['labelclassgroup']]['num_children'] += 1 # parse provided class definitions list unique_keystrokes = set() classes_update = [] classgroups_update = [] def _parse_item(item, parent=None): # get or create ID for item try: itemID = uuid.UUID(item['id']) except: itemID = uuid.uuid1() while itemID in classes_update or itemID in classgroups_update: itemID = uuid.uuid1() entry = { 'id': itemID, 'name': item['name'], 'color': (None if not 'color' in item else item['color']), 'keystroke': None, 'labelclassgroup': parent } if 'children' in item: # label class group classgroups_update.append(entry) for child in item['children']: _parse_item(child, itemID) else: # label class if 'keystroke' in item and not item[ 'keystroke'] in unique_keystrokes: entry['keystroke'] = item['keystroke'] unique_keystrokes.add(item['keystroke']) classes_update.append(entry) for item in classdef: _parse_item(item, None) # apply changes if removeMissing: queryArgs = [] if len(classes_update): # remove all missing label classes lcSpec = sql.SQL('WHERE id NOT IN %s') queryArgs.append(tuple([(l['id'], ) for l in classes_update])) else: # remove all label classes lcgSpec = sql.SQL('') if len(classgroups_update): # remove all missing labelclass groups lcgSpec = sql.SQL('WHERE id NOT IN %s') queryArgs.append( tuple([(l['id'], ) for l in classgroups_update])) else: # remove all labelclass groups lcgSpec = sql.SQL('') queryStr = sql.SQL(''' DELETE FROM {id_lc} {lcSpec}; DELETE FROM {id_lcg} {lcgSpec}; ''').format(id_lc=sql.Identifier(project, 'labelclass'), id_lcg=sql.Identifier(project, 'labelclassgroup'), lcSpec=lcSpec, lcgSpec=lcgSpec) self.dbConnector.execute(queryStr, tuple(queryArgs), None) # add/update in order (groups, set their parents, label classes) groups_new = [( g['id'], g['name'], g['color'], ) for g in classgroups_update] queryStr = sql.SQL(''' INSERT INTO {id_lcg} (id, name, color) VALUES %s ON CONFLICT (id) DO UPDATE SET name = EXCLUDED.name, color = EXCLUDED.color; ''').format( #TODO: on conflict(name) id_lcg=sql.Identifier(project, 'labelclassgroup')) self.dbConnector.insert(queryStr, groups_new) # set parents groups_parents = [ ( g['id'], g['labelclassgroup'], ) for g in classgroups_update if ('labelclassgroup' in g and g['labelclassgroup'] is not None) ] queryStr = sql.SQL(''' UPDATE {id_lcg} AS lcg SET parent = q.parent FROM (VALUES %s) AS q(id, parent) WHERE lcg.id = q.id; ''').format(id_lcg=sql.Identifier(project, 'labelclassgroup')) self.dbConnector.insert(queryStr, groups_parents) # insert/update label classes lcdata = [( l['id'], l['name'], l['color'], l['keystroke'], l['labelclassgroup'], ) for l in classes_update] queryStr = sql.SQL(''' INSERT INTO {id_lc} (id, name, color, keystroke, labelclassgroup) VALUES %s ON CONFLICT (id) DO UPDATE SET name = EXCLUDED.name, color = EXCLUDED.color, keystroke = EXCLUDED.keystroke, labelclassgroup = EXCLUDED.labelclassgroup; ''').format( #TODO: on conflict(name) id_lc=sql.Identifier(project, 'labelclass')) self.dbConnector.insert(queryStr, lcdata) return True def getProjectNameAvailable(self, projectName): ''' Returns True if the provided project (long) name is available. ''' if not isinstance(projectName, str): return False projectName = projectName.strip().lower() if not len(projectName): return False # check if name matches prohibited AIDE keywords (we do not replace long names) if projectName in self.PROHIBITED_STRICT or any( [p in projectName for p in self.PROHIBITED_STRICT]): return False if projectName in self.PROHIBITED_NAMES: return False if any( [projectName.startswith(p) for p in self.PROHIBITED_NAME_PREFIXES]): return False # check if name is already taken result = self.dbConnector.execute( '''SELECT 1 AS result FROM aide_admin.project WHERE name = %s; ''', (projectName, ), 1) if result is None or not len(result): return True else: return result[0]['result'] != 1 def getProjectShortNameAvailable(self, projectName): ''' Returns True if the provided project shortname is available. In essence, "available" means that a database schema with the given name can be created (this includes Postgres schema name conventions). Returns False otherwise. ''' if not isinstance(projectName, str): return False projectName = projectName.strip().lower() if not len(projectName): return False # check if name matches prohibited AIDE keywords; replace where possible if projectName in self.PROHIBITED_STRICT or any( [p in projectName for p in self.PROHIBITED_STRICT]): return False if projectName in self.PROHIBITED_NAMES or projectName in self.PROHIBITED_SHORTNAMES: return False if any( [projectName.startswith(p) for p in self.PROHIBITED_NAME_PREFIXES]): return False for p in self.SHORTNAME_PATTERNS_REPLACE: projectName = projectName.replace(p, '_') # check if provided name is valid as per Postgres conventions matches = re.findall('(^(pg_|[0-9]).*|.*(\$|\s)+.*)', projectName) if len(matches): return False # check if project shorthand already exists in database result = self.dbConnector.execute( '''SELECT 1 AS result FROM information_schema.schemata WHERE schema_name ilike %s UNION ALL SELECT 1 FROM aide_admin.project WHERE shortname ilike %s; ''', ( projectName, projectName, ), 2) if result is None or not len(result): return True if len(result) == 2: return result[0]['result'] != 1 and result[1]['result'] != 1 elif len(result) == 1: return result[0]['result'] != 1 else: return True
class DataWorker: FILENAMES_PROHIBITED_CHARS = ( '<', '<', '>', '>', '..', '/', '\\', '|', '?', '*', ':' # for macOS ) NUM_IMAGES_LIMIT = 4096 # maximum number of images that can be queried at once (to avoid bottlenecks) def __init__(self, config, passiveMode=False): self.config = config self.dbConnector = Database(config) self.countPattern = re.compile('\_[0-9]+$') self.passiveMode = passiveMode self.tempDir = self.config.getProperty('FileServer', 'tempfiles_dir', type=str, fallback=tempfile.gettempdir()) def aide_internal_notify(self, message): ''' Used for AIDE administrative communication, e.g. for setting up queues. ''' if self.passiveMode: return if 'task' in message: if message['task'] == 'create_project_folders': # set up folders for a newly created project if 'projectName' in message: destPath = os.path.join( self.config.getProperty('FileServer', 'staticfiles_dir'), message['projectName']) os.makedirs(destPath, exist_ok=True) ''' Image administration functionalities ''' def listImages(self, project, folder=None, imageAddedRange=None, lastViewedRange=None, viewcountRange=None, numAnnoRange=None, numPredRange=None, orderBy=None, order='desc', startFrom=None, limit=None): ''' Returns a list of images, with ID, filename, date image was added, viewcount, number of annotations, number of predictions, and last time viewed, for a given project. The list can be filtered by all those properties (e.g. date and time image was added, last checked; number of annotations, etc.), as well as limited in length (images are sorted by date_added). ''' queryArgs = [] filterStr = '' if folder is not None and isinstance(folder, str): filterStr += ' filename LIKE %s ' queryArgs.append(folder + '%') if imageAddedRange is not None: #TODO filterStr += 'AND date_added >= to_timestamp(%s) AND date_added <= to_timestamp(%s) ' queryArgs.append(imageAddedRange[0]) queryArgs.append(imageAddedRange[1]) if lastViewedRange is not None: #TODO filterStr += 'AND last_viewed >= to_timestamp(%s) AND last_viewed <= to_timestamp(%s) ' queryArgs.append(lastViewedRange[0]) queryArgs.append(lastViewedRange[1]) if viewcountRange is not None: filterStr += 'AND viewcount >= %s AND viewcount <= %s ' queryArgs.append(viewcountRange[0]) queryArgs.append(viewcountRange[1]) if numAnnoRange is not None: filterStr += 'AND num_anno >= %s AND numAnno <= %s ' queryArgs.append(numAnnoRange[0]) queryArgs.append(numAnnoRange[1]) if numPredRange is not None: filterStr += 'AND num_pred >= %s AND num_pred <= %s ' queryArgs.append(numPredRange[0]) queryArgs.append(numPredRange[1]) if startFrom is not None: if not isinstance(startFrom, UUID): try: startFrom = UUID(startFrom) except: startFrom = None if startFrom is not None: filterStr += ' AND img.id > %s ' queryArgs.append(startFrom) filterStr = filterStr.strip() if filterStr.startswith('AND'): filterStr = filterStr[3:] if len(filterStr.strip()): filterStr = 'WHERE ' + filterStr filterStr = sql.SQL(filterStr) orderStr = sql.SQL('ORDER BY img.id ASC') if orderBy is not None: orderStr = sql.SQL('ORDER BY {} {}, img.id ASC').format( sql.SQL(orderBy), sql.SQL(order)) limitStr = sql.SQL('') if isinstance(limit, float): if not math.isnan(limit): limit = int(limit) else: limit = self.NUM_IMAGES_LIMIT elif isinstance(limit, str): try: limit = int(limit) except: limit = self.NUM_IMAGES_LIMIT elif not isinstance(limit, int): limit = self.NUM_IMAGES_LIMIT limit = max(min(limit, self.NUM_IMAGES_LIMIT), 1) limitStr = sql.SQL('LIMIT %s') queryArgs.append(limit) queryStr = sql.SQL(''' SELECT img.id, filename, EXTRACT(epoch FROM date_added) AS date_added, COALESCE(viewcount, 0) AS viewcount, EXTRACT(epoch FROM last_viewed) AS last_viewed, COALESCE(num_anno, 0) AS num_anno, COALESCE(num_pred, 0) AS num_pred, img.isGoldenQuestion FROM {id_img} AS img FULL OUTER JOIN ( SELECT image, COUNT(*) AS viewcount, MAX(last_checked) AS last_viewed FROM {id_iu} GROUP BY image ) AS iu ON img.id = iu.image FULL OUTER JOIN ( SELECT image, COUNT(*) AS num_anno FROM {id_anno} GROUP BY image ) AS anno ON img.id = anno.image FULL OUTER JOIN ( SELECT image, COUNT(*) AS num_pred FROM {id_pred} GROUP BY image ) AS pred ON img.id = pred.image {filter} {order} {limit} ''').format(id_img=sql.Identifier(project, 'image'), id_iu=sql.Identifier(project, 'image_user'), id_anno=sql.Identifier(project, 'annotation'), id_pred=sql.Identifier(project, 'prediction'), filter=filterStr, order=orderStr, limit=limitStr) result = self.dbConnector.execute(queryStr, tuple(queryArgs), 'all') for idx in range(len(result)): result[idx]['id'] = str(result[idx]['id']) return result def uploadImages(self, project, images, existingFiles='keepExisting', splitImages=False, splitProperties=None): ''' Receives a dict of files (bottle.py file format), verifies their file extension and checks if they are loadable by PIL. If they are, they are saved to disk in the project's image folder, and registered in the database. Parameter "existingFiles" can be set as follows: - "keepExisting" (default): if an image already exists on disk with the same path/file name, the new image will be renamed with an underscore and trailing number. - "skipExisting": do not save images that already exist on disk under the same path/file name. - "replaceExisting": overwrite images that exist with the same path/file name. Note: in this case all existing anno- tations, predictions, and other metadata about those images, will be removed from the database. If "splitImages" is True, the uploaded images will be automati- cally divided into patches on a regular grid according to what is defined in "splitProperties". For example, the following definition: splitProperties = { 'patchSize': (800, 600), 'stride': (400, 300), 'tight': True } would divide the images into patches of size 800x600, with over- lap of 50% (denoted by the "stride" being half the "patchSize"), and with all patches completely inside the original image (para- meter "tight" makes the last patches to the far left and bottom of the image being fully inside the original image; they are shif- ted if needed). Instead of the full images, the patches are stored on disk and re- ferenced through the database. The name format for patches is "imageName_x_y.jpg", with "imageName" denoting the name of the ori- ginal image, and "x" and "y" the left and top position of the patch inside the original image. Returns image keys for images that were successfully saved, and keys and error messages for those that were not. ''' imgPaths_valid = [] imgs_valid = [] imgs_warn = {} imgs_error = {} for key in images.keys(): try: nextUpload = images[key] nextFileName = nextUpload.raw_filename #TODO: check if raw_filename is compatible with uploads made from Windows # check if correct file suffix _, ext = os.path.splitext(nextFileName) if not ext.lower() in valid_image_extensions: raise Exception(f'Invalid file type (*{ext})') # check if loadable as image cache = io.BytesIO() nextUpload.save(cache) try: image = Image.open(cache) except Exception: raise Exception('File is not a valid image.') # prepare image(s) to save to disk parent, filename = os.path.split(nextFileName) destFolder = os.path.join( self.config.getProperty('FileServer', 'staticfiles_dir'), project, parent) os.makedirs(destFolder, exist_ok=True) images = [] filenames = [] if not splitImages: # upload the single image directly images.append(image) filenames.append(filename) else: # split image into patches instead images, coords = split_image(image, splitProperties['patchSize'], splitProperties['stride'], splitProperties['tight']) bareFileName, ext = os.path.splitext(filename) filenames = [ f'{bareFileName}_{c[0]}_{c[1]}{ext}' for c in coords ] # register and save all the images for i in range(len(images)): subImage = images[i] subFilename = filenames[i] absFilePath = os.path.join(destFolder, subFilename) # check if an image with the same name does not already exist newFileName = subFilename fileExists = os.path.exists(absFilePath) if fileExists: if existingFiles == 'keepExisting': # rename new file while (os.path.exists(absFilePath)): # rename file fn, ext = os.path.splitext(newFileName) match = self.countPattern.search(fn) if match is None: newFileName = fn + '_1' + ext else: # parse number number = int(fn[match.span()[0] + 1:match.span()[1]]) newFileName = fn[:match.span( )[0]] + '_' + str(number + 1) + ext absFilePath = os.path.join( destFolder, newFileName) if not os.path.exists(absFilePath): imgs_warn[ key] = 'An image with name "{}" already exists under given path on disk. Image has been renamed to "{}".'.format( subFilename, newFileName) elif existingFiles == 'skipExisting': # ignore new file imgs_warn[ key] = f'Image "{newFileName}" already exists on disk and has been skipped.' imgs_valid.append(key) imgPaths_valid.append( os.path.join(parent, newFileName)) continue elif existingFiles == 'replaceExisting': # overwrite new file; first remove metadata queryStr = sql.SQL(''' DELETE FROM {id_iu} WHERE image = ( SELECT id FROM {id_img} WHERE filename = %s ); DELETE FROM {id_anno} WHERE image = ( SELECT id FROM {id_img} WHERE filename = %s ); DELETE FROM {id_pred} WHERE image = ( SELECT id FROM {id_img} WHERE filename = %s ); DELETE FROM {id_img} WHERE filename = %s; ''').format( id_iu=sql.Identifier(project, 'image_user'), id_anno=sql.Identifier(project, 'annotation'), id_pred=sql.Identifier(project, 'prediction'), id_img=sql.Identifier(project, 'image')) self.dbConnector.execute(queryStr, tuple([nextFileName] * 4), None) # remove file try: os.remove(absFilePath) imgs_warn[key] = 'Image "{}" already existed on disk and has been deleted.\n'.format(newFileName) + \ 'All metadata (views, annotations, predictions) have been removed from the database.' except: imgs_warn[key] = 'Image "{}" already existed on disk but could not be deleted.\n'.format(newFileName) + \ 'All metadata (views, annotations, predictions) have been removed from the database.' # write to disk fileParent, _ = os.path.split(absFilePath) if len(fileParent): os.makedirs(fileParent, exist_ok=True) subImage.save(absFilePath) imgs_valid.append(key) imgPaths_valid.append(os.path.join(parent, newFileName)) except Exception as e: imgs_error[key] = str(e) # register valid images in database if len(imgPaths_valid): queryStr = sql.SQL(''' INSERT INTO {id_img} (filename) VALUES %s ON CONFLICT (filename) DO NOTHING; ''').format(id_img=sql.Identifier(project, 'image')) self.dbConnector.insert(queryStr, [(i, ) for i in imgPaths_valid]) result = { 'imgs_valid': imgs_valid, 'imgPaths_valid': imgPaths_valid, 'imgs_warn': imgs_warn, 'imgs_error': imgs_error } return result def scanForImages(self, project): ''' Searches the project image folder on disk for files that are valid, but have not (yet) been added to the database. Returns a list of paths with files. ''' # scan disk for files projectFolder = os.path.join( self.config.getProperty('FileServer', 'staticfiles_dir'), project) if (not os.path.isdir(projectFolder)) and ( not os.path.islink(projectFolder)): # no folder exists for the project (should not happen due to broadcast at project creation) return [] imgs_disk = listDirectory(projectFolder, recursive=True) # get all existing file paths from database imgs_database = set() queryStr = sql.SQL(''' SELECT filename FROM {id_img}; ''').format(id_img=sql.Identifier(project, 'image')) result = self.dbConnector.execute(queryStr, None, 'all') for r in range(len(result)): imgs_database.add(result[r]['filename']) # filter imgs_candidates = imgs_disk.difference(imgs_database) return list(imgs_candidates) def addExistingImages(self, project, imageList=None): ''' Scans the project folder on the file system for images that are physically saved, but not (yet) added to the database. Adds them to the project's database schema. If an imageList iterable is provided, only the intersection between identified images on disk and in the iterable are added. If 'imageList' is a string with contents 'all', all untracked images will be added. Returns a list of image IDs and file names that were eventually added to the project database schema. ''' # get all images on disk that are not in database imgs_candidates = self.scanForImages(project) if imageList is None or (isinstance(imageList, str) and imageList.lower() == 'all'): imgs_add = imgs_candidates else: if isinstance(imageList, dict): imageList = list(imageList.keys()) imgs_add = list(set(imgs_candidates).intersection(set(imageList))) if not len(imgs_add): return 0, [] # add to database queryStr = sql.SQL(''' INSERT INTO {id_img} (filename) VALUES %s; ''').format(id_img=sql.Identifier(project, 'image')) self.dbConnector.insert(queryStr, tuple([(i, ) for i in imgs_add])) # get IDs of newly added images queryStr = sql.SQL(''' SELECT id, filename FROM {id_img} WHERE filename IN %s; ''').format(id_img=sql.Identifier(project, 'image')) result = self.dbConnector.execute(queryStr, (tuple(imgs_add), ), 'all') status = (0 if result is not None and len(result) else 1) #TODO return status, result def removeImages(self, project, imageList, forceRemove=False, deleteFromDisk=False): ''' Receives an iterable of image IDs and removes them from the project database schema, including associated user views, annotations, and predictions made. Only removes entries if no user views, annotations, and predictions exist, or else if "forceRemove" is True. If "deleteFromDisk" is True, the image files are also deleted from the project directory on the file system. Returns a list of images that were deleted. ''' imageList = tuple([(UUID(i), ) for i in imageList]) queryArgs = [] deleteArgs = [] if forceRemove: queryStr = sql.SQL(''' SELECT id, filename FROM {id_img} WHERE id IN %s; ''').format(id_img=sql.Identifier(project, 'image')) queryArgs = tuple([imageList]) deleteStr = sql.SQL(''' DELETE FROM {id_iu} WHERE image IN %s; DELETE FROM {id_anno} WHERE image IN %s; DELETE FROM {id_pred} WHERE image IN %s; DELETE FROM {id_img} WHERE id IN %s; ''').format(id_iu=sql.Identifier(project, 'image_user'), id_anno=sql.Identifier(project, 'annotation'), id_pred=sql.Identifier(project, 'prediction'), id_img=sql.Identifier(project, 'image')) deleteArgs = tuple([imageList] * 4) else: queryStr = sql.SQL(''' SELECT id, filename FROM {id_img} WHERE id IN %s AND id NOT IN ( SELECT image FROM {id_iu} WHERE image IN %s UNION ALL SELECT image FROM {id_anno} WHERE image IN %s UNION ALL SELECT image FROM {id_pred} WHERE image IN %s ); ''').format(id_img=sql.Identifier(project, 'image'), id_iu=sql.Identifier(project, 'image_user'), id_anno=sql.Identifier(project, 'annotation'), id_pred=sql.Identifier(project, 'prediction')) queryArgs = tuple([imageList] * 4) deleteStr = sql.SQL(''' DELETE FROM {id_img} WHERE id IN %s AND id NOT IN ( SELECT image FROM {id_iu} WHERE image IN %s UNION ALL SELECT image FROM {id_anno} WHERE image IN %s UNION ALL SELECT image FROM {id_pred} WHERE image IN %s ); ''').format(id_img=sql.Identifier(project, 'image'), id_iu=sql.Identifier(project, 'image_user'), id_anno=sql.Identifier(project, 'annotation'), id_pred=sql.Identifier(project, 'prediction')) deleteArgs = tuple([imageList] * 4) # retrieve images to be deleted imgs_del = self.dbConnector.execute(queryStr, queryArgs, 'all') if imgs_del is None: imgs_del = [] if len(imgs_del): # delete images self.dbConnector.execute(deleteStr, deleteArgs, None) if deleteFromDisk: projectFolder = os.path.join( self.config.getProperty('FileServer', 'staticfiles_dir'), project) if os.path.isdir(projectFolder) or os.path.islink( projectFolder): for i in imgs_del: filePath = os.path.join(projectFolder, i['filename']) if os.path.isfile(filePath): os.remove(filePath) # convert UUID for idx in range(len(imgs_del)): imgs_del[idx]['id'] = str(imgs_del[idx]['id']) return imgs_del def removeOrphanedImages(self, project): ''' Queries the project's image entries in the database and retrieves entries for which no image can be found on disk anymore. Removes and returns those entries and all associated (meta-) data from the database. ''' imgs_DB = self.dbConnector.execute( sql.SQL(''' SELECT id, filename FROM {id_img}; ''').format(id_img=sql.Identifier(project, 'image')), None, 'all') projectFolder = os.path.join( self.config.getProperty('FileServer', 'staticfiles_dir'), project) if (not os.path.isdir(projectFolder)) and ( not os.path.islink(projectFolder)): return [] imgs_disk = listDirectory(projectFolder, recursive=True) imgs_disk = set(imgs_disk) # get orphaned images imgs_orphaned = [] for i in imgs_DB: if i['filename'] not in imgs_disk: imgs_orphaned.append(i['id']) # imgs_orphaned = list(set(imgs_DB).difference(imgs_disk)) if not len(imgs_orphaned): return [] # remove self.dbConnector.execute( sql.SQL(''' DELETE FROM {id_iu} WHERE image IN %s; DELETE FROM {id_anno} WHERE image IN %s; DELETE FROM {id_pred} WHERE image IN %s; DELETE FROM {id_img} WHERE id IN %s; ''').format(id_iu=sql.Identifier(project, 'image_user'), id_anno=sql.Identifier(project, 'annotation'), id_pred=sql.Identifier(project, 'prediction'), id_img=sql.Identifier(project, 'image')), tuple([tuple(imgs_orphaned)] * 4), None) return imgs_orphaned def prepareDataDownload(self, project, dataType='annotation', userList=None, dateRange=None, extraFields=None, segmaskFilenameOptions=None, segmaskEncoding='rgb'): ''' Polls the database for project data according to the specified restrictions: - dataType: "annotation" or "prediction" - userList: for type "annotation": None (all users) or an iterable of user names - dateRange: None (all dates) or two values for a mini- mum and maximum timestamp - extraFields: None (no field) or dict of keywords and bools for additional fields (e.g. browser meta) to be queried. - segmaskFilenameOptions: customization parameters for segmentation mask images' file names. - segmaskEncoding: encoding of the segmentation mask pixel values ("rgb" or "indexed") Creates a file in this machine's temporary directory and returns the file name to it. Note that in some cases (esp. for semantic segmentation), the number of queryable entries may be limited due to file size and free disk space restrictions. An upper cei- ling is specified in the configuration *.ini file ('TODO') ''' now = datetime.now(tz=pytz.utc) # argument check if userList is None: userList = [] elif isinstance(userList, str): userList = [userList] if dateRange is None: dateRange = [] elif len(dateRange) == 1: dateRange = [dateRange, now] if extraFields is None or not isinstance(extraFields, dict): extraFields = {'meta': False} else: if not 'meta' in extraFields or not isinstance( extraFields['meta'], bool): extraFields['meta'] = False if segmaskFilenameOptions is None: segmaskFilenameOptions = { 'baseName': 'filename', 'prefix': '', 'suffix': '' } else: if not 'baseName' in segmaskFilenameOptions or \ segmaskFilenameOptions['baseName'] not in ('filename', 'id'): segmaskFilenameOptions['baseName'] = 'filename' try: segmaskFilenameOptions['prefix'] = str( segmaskFilenameOptions['prefix']) except: segmaskFilenameOptions['prefix'] = '' try: segmaskFilenameOptions['suffix'] = str( segmaskFilenameOptions['suffix']) except: segmaskFilenameOptions['suffix'] = '' for char in self.FILENAMES_PROHIBITED_CHARS: segmaskFilenameOptions['prefix'] = segmaskFilenameOptions[ 'prefix'].replace(char, '_') segmaskFilenameOptions['suffix'] = segmaskFilenameOptions[ 'suffix'].replace(char, '_') # check metadata type: need to deal with segmentation masks separately if dataType == 'annotation': metaField = 'annotationtype' elif dataType == 'prediction': metaField = 'predictiontype' else: raise Exception('Invalid dataType specified ({})'.format(dataType)) metaType = self.dbConnector.execute( ''' SELECT {} FROM aide_admin.project WHERE shortname = %s; '''.format(metaField), (project, ), 1)[0][metaField] if metaType.lower() == 'segmentationmasks': is_segmentation = True fileExtension = '.zip' # create indexed color palette for segmentation masks if segmaskEncoding == 'indexed': try: indexedColors = [] labelClasses = self.dbConnector.execute( sql.SQL(''' SELECT idx, color FROM {id_lc} ORDER BY idx ASC; ''').format( id_lc=sql.Identifier(project, 'labelclass')), None, 'all') currentIndex = 1 for lc in labelClasses: if lc['idx'] == 0: # background class continue while currentIndex < lc['idx']: # gaps in label classes; fill with zeros indexedColors.extend([0, 0, 0]) currentIndex += 1 color = lc['color'] if color is None: # no color specified; add from defaults #TODO indexedColors.extend([0, 0, 0]) else: # convert to RGB format indexedColors.extend(helpers.hexToRGB(color)) except: # an error occurred; don't convert segmentation mask to indexed colors indexedColors = None else: indexedColors = None else: is_segmentation = False fileExtension = '.txt' #TODO: support JSON? # prepare output file filename = 'aide_query_{}'.format( now.strftime('%Y-%m-%d_%H-%M-%S')) + fileExtension destPath = os.path.join(self.tempDir, 'aide/downloadRequests', project) os.makedirs(destPath, exist_ok=True) destPath = os.path.join(destPath, filename) # generate query queryArgs = [] tableID = sql.Identifier(project, dataType) userStr = sql.SQL('') iuStr = sql.SQL('') dateStr = sql.SQL('') queryFields = [ 'filename', 'isGoldenQuestion', 'date_image_added', 'last_requested_image', 'image_corrupt' # default image fields ] if dataType == 'annotation': iuStr = sql.SQL(''' JOIN (SELECT image AS iu_image, username AS iu_username, viewcount, last_checked, last_time_required FROM {id_iu}) AS iu ON t.image = iu.iu_image AND t.username = iu.iu_username ''').format(id_iu=sql.Identifier(project, 'image_user')) if len(userList): userStr = sql.SQL('WHERE username IN %s') queryArgs.append(tuple(userList)) queryFields.extend( getattr(QueryStrings_annotation, metaType).value) queryFields.extend([ 'username', 'viewcount', 'last_checked', 'last_time_required' ]) #TODO: make customizable else: queryFields.extend( getattr(QueryStrings_prediction, metaType).value) if len(dateRange): if len(userStr.string): dateStr = sql.SQL( ' AND timecreated >= to_timestamp(%s) AND timecreated <= to_timestamp(%s)' ) else: dateStr = sql.SQL( 'WHERE timecreated >= to_timestamp(%s) AND timecreated <= to_timestamp(%s)' ) queryArgs.extend(dateRange) if not is_segmentation: # join label classes lcStr = sql.SQL(''' JOIN (SELECT id AS lcID, name AS labelclass_name, idx AS labelclass_index FROM {id_lc} ) AS lc ON label = lc.lcID ''').format(id_lc=sql.Identifier(project, 'labelclass')) queryFields.extend(['labelclass_name', 'labelclass_index']) else: lcStr = sql.SQL('') # remove redundant query fields queryFields = set(queryFields) for key in extraFields.keys(): if not extraFields[key]: queryFields.remove(key) queryFields = list(queryFields) queryStr = sql.SQL(''' SELECT * FROM {tableID} AS t JOIN ( SELECT id AS imgID, filename, isGoldenQuestion, date_added AS date_image_added, last_requested AS last_requested_image, corrupt AS image_corrupt FROM {id_img} ) AS img ON t.image = img.imgID {lcStr} {iuStr} {userStr} {dateStr} ''').format(tableID=tableID, id_img=sql.Identifier(project, 'image'), lcStr=lcStr, iuStr=iuStr, userStr=userStr, dateStr=dateStr) # query and process data if is_segmentation: mainFile = zipfile.ZipFile(destPath, 'w', zipfile.ZIP_DEFLATED) else: mainFile = open(destPath, 'w') metaStr = '; '.join(queryFields) + '\n' with self.dbConnector.execute_cursor(queryStr, tuple(queryArgs)) as cursor: while True: b = cursor.fetchone() if b is None: break if is_segmentation: # convert and store segmentation mask separately segmask_filename = 'segmentation_masks/' if segmaskFilenameOptions['baseName'] == 'id': innerFilename = b['image'] parent = '' else: innerFilename = b['filename'] parent, innerFilename = os.path.split(innerFilename) finalFilename = os.path.join( parent, segmaskFilenameOptions['prefix'] + innerFilename + segmaskFilenameOptions['suffix'] + '.tif') segmask_filename += finalFilename segmask = base64ToImage(b['segmentationmask'], b['width'], b['height']) if indexedColors is not None and len(indexedColors) > 0: # convert to indexed color and add color palette from label classes segmask = segmask.convert('RGB').convert( 'P', palette=Image.ADAPTIVE, colors=3) segmask.putpalette(indexedColors) # save bio = io.BytesIO() segmask.save(bio, 'TIFF') mainFile.writestr(segmask_filename, bio.getvalue()) # store metadata metaLine = '' for field in queryFields: if field.lower() == 'segmentationmask': continue metaLine += '{}; '.format(b[field.lower()]) metaStr += metaLine + '\n' if is_segmentation: mainFile.writestr('query.txt', metaStr) else: mainFile.write(metaStr) if is_segmentation: # append separate text file for label classes labelclassQuery = sql.SQL(''' SELECT id, name, color, labelclassgroup, idx AS labelclass_index FROM {id_lc}; ''').format(id_lc=sql.Identifier(project, 'labelclass')) result = self.dbConnector.execute(labelclassQuery, None, 'all') lcStr = 'id,name,color,labelclassgroup,labelclass_index\n' for r in result: lcStr += '{},{},{},{},{}\n'.format(r['id'], r['name'], r['color'], r['labelclassgroup'], r['labelclass_index']) mainFile.writestr('labelclasses.csv', lcStr) mainFile.close() return filename def watchImageFolders(self): ''' Queries all projects that have the image folder watch functionality enabled and updates the projects, one by one, with the latest image changes. ''' projects = self.dbConnector.execute( ''' SELECT shortname, watch_folder_remove_missing_enabled FROM aide_admin.project WHERE watch_folder_enabled IS TRUE; ''', None, 'all') for p in projects: pName = p['shortname'] # add new images _, imgs_added = self.addExistingImages(pName, None) # remove orphaned images (if enabled) if p['watch_folder_remove_missing_enabled']: imgs_orphaned = self.removeOrphanedImages(pName) if len(imgs_added) or len(imgs_orphaned): print( f'[Project {pName}] {len(imgs_added)} new images found and added, {len(imgs_orphaned)} orphaned images removed from database.' ) elif len(imgs_added): print( f'[Project {pName}] {len(imgs_added)} new images found and added.' ) def deleteProject(self, project, deleteFiles=False): ''' Irreproducibly deletes a project, including all data and metadata, from the database. If "deleteFiles" is True, then any data on disk (images, etc.) are also deleted. This cannot be undone. ''' print(f'Deleting project with shortname "{project}"...') # remove database entries print('\tRemoving database entries...') self.dbConnector.execute( ''' DELETE FROM aide_admin.authentication WHERE project = %s; DELETE FROM aide_admin.project WHERE shortname = %s; ''', ( project, project, ), None ) # already done by DataAdministration.middleware, but we do it again to be sure self.dbConnector.execute(''' DROP SCHEMA IF EXISTS {} CASCADE; '''.format(project), None, None) #TODO: Identifier? if deleteFiles: print('\tRemoving files...') messages = [] def _onError(function, path, excinfo): #TODO from celery.contrib import rdb rdb.set_trace() messages.append({ 'function': function, 'path': path, 'excinfo': excinfo }) try: shutil.rmtree(os.path.join( self.config.getProperty('FileServer', 'staticfiles_dir'), project), onerror=_onError) except Exception as e: messages.append(str(e)) return messages return 0
class DBMiddleware(): def __init__(self, config): self.config = config self.dbConnector = Database(config) self.project_immutables = { } # project settings that cannot be changed (project shorthand -> {settings}) self._fetchProjectSettings() self.sqlBuilder = SQLStringBuilder() self.annoParser = AnnotationParser() def _fetchProjectSettings(self): # AI controller URI aiControllerURI = self.config.getProperty('Server', 'aiController_uri') if aiControllerURI is None or aiControllerURI.strip() == '': # no AI backend configured aiControllerURI = None # global, project-independent settings self.globalSettings = { 'indexURI': self.config.getProperty('Server', 'index_uri', type=str, fallback='/'), 'dataServerURI': self.config.getProperty('Server', 'dataServer_uri'), 'aiControllerURI': aiControllerURI } # default styles try: # check if custom default styles are provided self.defaultStyles = json.load( open('config/default_ui_settings.json', 'r')) except: # resort to built-in styles self.defaultStyles = json.load( open( 'modules/ProjectAdministration/static/json/default_ui_settings.json', 'r')) def _assemble_annotations(self, project, cursor, hideGoldenQuestionInfo): response = {} while True: b = cursor.fetchone() if b is None: break imgID = str(b['image']) if not imgID in response: response[imgID] = { 'fileName': b['filename'], 'predictions': {}, 'annotations': {}, 'last_checked': None } viewcount = b['viewcount'] if viewcount is not None: response[imgID]['viewcount'] = viewcount last_checked = b['last_checked'] if last_checked is not None: if response[imgID]['last_checked'] is None: response[imgID]['last_checked'] = last_checked else: response[imgID]['last_checked'] = max( response[imgID]['last_checked'], last_checked) if not hideGoldenQuestionInfo: response[imgID]['isGoldenQuestion'] = b['isgoldenquestion'] # parse annotations and predictions entryID = str(b['id']) if b['ctype'] is not None: colnames = self.sqlBuilder.getColnames( self.project_immutables[project]['annotationType'], self.project_immutables[project]['predictionType'], b['ctype']) entry = {} for c in colnames: value = b[c] if isinstance(value, datetime): value = value.timestamp() elif isinstance(value, UUID): value = str(value) entry[c] = value if b['ctype'] == 'annotation': response[imgID]['annotations'][entryID] = entry elif b['ctype'] == 'prediction': response[imgID]['predictions'][entryID] = entry return response def _set_images_requested(self, project, imageIDs): ''' Sets column "last_requested" of relation "image" to the current date. This is done during image querying to signal that an image has been requested, but not (yet) viewed. ''' # prepare insertion values now = datetime.now(tz=pytz.utc) vals = [] for key in imageIDs: vals.append(key) if len(vals): queryStr = sql.SQL(''' UPDATE {id_img} SET last_requested = %s WHERE id IN %s; ''').format(id_img=sql.Identifier(project, 'image')) self.dbConnector.execute(queryStr, ( now, tuple(vals), ), None) def _get_sample_metadata(self, metaType): ''' Returns a dummy annotation or prediction for the sample image in the "exampleData" folder, depending on the "metaType" specified (i.e., labels, points, boundingBoxes, or segmentationMasks). ''' if metaType == 'labels': return { 'id': '00000000-0000-0000-0000-000000000000', 'label': '00000000-0000-0000-0000-000000000000', 'confidence': 1.0, 'priority': 1.0, 'viewcount': None } elif metaType == 'points' or metaType == 'boundingBoxes': return { 'id': '00000000-0000-0000-0000-000000000000', 'label': '00000000-0000-0000-0000-000000000000', 'x': 0.542959427207637, 'y': 0.5322069489713102, 'width': 0.6133651551312653, 'height': 0.7407598263401316, 'confidence': 1.0, 'priority': 1.0, 'viewcount': None } elif metaType == 'segmentationMasks': # read segmentation mask from disk segmask = Image.open( 'modules/LabelUI/static/exampleData/sample_segmentationMask.tif' ) segmask, width, height = helpers.imageToBase64(segmask) return { 'id': '00000000-0000-0000-0000-000000000000', 'width': width, 'height': height, 'segmentationmask': segmask, 'confidence': 1.0, 'priority': 1.0, 'viewcount': None } else: return {} def get_project_immutables(self, project): if project not in self.project_immutables: queryStr = 'SELECT annotationType, predictionType, demoMode FROM aide_admin.project WHERE shortname = %s;' result = self.dbConnector.execute(queryStr, (project, ), 1) if result and len(result): self.project_immutables[project] = { 'annotationType': result[0]['annotationtype'], 'predictionType': result[0]['predictiontype'], 'demoMode': helpers.checkDemoMode(project, self.dbConnector) } else: return None return self.project_immutables[project] def get_dynamic_project_settings(self, project): queryStr = 'SELECT ui_settings FROM aide_admin.project WHERE shortname = %s;' result = self.dbConnector.execute(queryStr, (project, ), 1) result = json.loads(result[0]['ui_settings']) # complete styles with defaults where necessary (may be required for project that got upgraded from v1) result = helpers.check_args(result, self.defaultStyles) return result def getProjectSettings(self, project): ''' Queries the database for general project-specific metadata, such as: - Classes: names, indices, default colors - Annotation type: one of {class labels, positions, bboxes} ''' # publicly available info from DB projSettings = self.getProjectInfo(project) # label classes projSettings['classes'] = self.getClassDefinitions(project) # static and dynamic project settings and properties from configuration file projSettings = { **projSettings, **self.get_project_immutables(project), **self.get_dynamic_project_settings(project), **self.globalSettings } # append project shorthand to AIController URI if 'aiControllerURI' in projSettings and projSettings[ 'aiControllerURI'] is not None and len( projSettings['aiControllerURI']): projSettings['aiControllerURI'] = os.path.join( projSettings['aiControllerURI'], project) + '/' return projSettings def getProjectInfo(self, project): ''' Returns safe, shareable information about the project (i.e., users don't need to be part of the project to see these data). ''' queryStr = ''' SELECT shortname, name, description, demoMode, interface_enabled, archived, ai_model_enabled, ai_model_library, ai_alcriterion_library, segmentation_ignore_unlabeled FROM aide_admin.project WHERE shortname = %s ''' result = self.dbConnector.execute(queryStr, (project, ), 1)[0] # provide flag if AI model is available aiModelAvailable = all([ result['ai_model_enabled'], result['ai_model_library'] is not None and len(result['ai_model_library']), result['ai_alcriterion_library'] is not None and len(result['ai_alcriterion_library']) ]) return { 'projectShortname': result['shortname'], 'projectName': result['name'], 'projectDescription': result['description'], 'demoMode': result['demomode'], 'interface_enabled': result['interface_enabled'] and not result['archived'], 'ai_model_available': aiModelAvailable, 'segmentation_ignore_unlabeled': result['segmentation_ignore_unlabeled'] } def getClassDefinitions(self, project, showHidden=False): ''' Returns a dictionary with entries for all classes in the project. ''' # query data if showHidden: hiddenSpec = '' else: hiddenSpec = 'WHERE hidden IS false' queryStr = sql.SQL(''' SELECT 'group' AS type, id, NULL as idx, name, color, parent, NULL AS keystroke, NULL AS hidden FROM {} UNION ALL SELECT 'class' AS type, id, idx, name, color, labelclassgroup, keystroke, hidden FROM {} {}; ''').format(sql.Identifier(project, 'labelclassgroup'), sql.Identifier(project, 'labelclass'), sql.SQL(hiddenSpec)) classData = self.dbConnector.execute(queryStr, None, 'all') # assemble entries first allEntries = {} numClasses = 0 for cl in classData: id = str(cl['id']) entry = { 'id': id, 'name': cl['name'], 'color': cl['color'], 'parent': str(cl['parent']) if cl['parent'] is not None else None, 'hidden': cl['hidden'] } if cl['type'] == 'group': entry['entries'] = {} else: entry['index'] = cl['idx'] entry['keystroke'] = cl['keystroke'] numClasses += 1 allEntries[id] = entry # transform into tree def _find_parent(tree, parentID): if parentID is None: return None elif 'id' in tree and tree['id'] == parentID: return tree elif 'entries' in tree: for ek in tree['entries'].keys(): rv = _find_parent(tree['entries'][ek], parentID) if rv is not None: return rv return None else: return None allEntries = {'entries': allEntries} for key in list(allEntries['entries'].keys()): entry = allEntries['entries'][key] parentID = entry['parent'] del entry['parent'] if parentID is None: # entry or group with no parent: append to root directly allEntries['entries'][key] = entry else: # move item parent = _find_parent(allEntries, parentID) parent['entries'][key] = entry del allEntries['entries'][key] allEntries['numClasses'] = numClasses return allEntries def getBatch_fixed(self, project, username, data, hideGoldenQuestionInfo=True): ''' Returns entries from the database based on the list of data entry identifiers specified. ''' if not len(data): return {'entries': {}} # query projImmutables = self.get_project_immutables(project) queryStr = self.sqlBuilder.getFixedImagesQueryString( project, projImmutables['annotationType'], projImmutables['predictionType'], projImmutables['demoMode']) # parse results queryVals = ( tuple(UUID(d) for d in data), username, username, ) if projImmutables['demoMode']: queryVals = (tuple(UUID(d) for d in data), ) with self.dbConnector.execute_cursor(queryStr, queryVals) as cursor: try: response = self._assemble_annotations(project, cursor, hideGoldenQuestionInfo) # self.dbConnector.conn.commit() except Exception as e: print(e) # self.dbConnector.conn.rollback() finally: pass # cursor.close() # mark images as requested self._set_images_requested(project, response) return {'entries': response} def getBatch_auto(self, project, username, order='unlabeled', subset='default', limit=None, hideGoldenQuestionInfo=True): ''' TODO: description ''' # query projImmutables = self.get_project_immutables(project) queryStr = self.sqlBuilder.getNextBatchQueryString( project, projImmutables['annotationType'], projImmutables['predictionType'], order, subset, projImmutables['demoMode']) # limit (TODO: make 128 a hyperparameter) if limit is None: limit = 128 else: limit = min(int(limit), 128) # parse results queryVals = ( username, username, limit, username, ) if projImmutables[ 'demoMode']: #TODO: demoMode can now change dynamically queryVals = (limit, ) with self.dbConnector.execute_cursor(queryStr, queryVals) as cursor: response = self._assemble_annotations(project, cursor, hideGoldenQuestionInfo) # mark images as requested self._set_images_requested(project, response) return {'entries': response} def getBatch_timeRange(self, project, minTimestamp, maxTimestamp, userList, skipEmptyImages=False, limit=None, goldenQuestionsOnly=False, hideGoldenQuestionInfo=True): ''' Returns images that have been annotated within the given time range and/or by the given user(s). All arguments are optional. Useful for reviewing existing annotations. ''' # query string projImmutables = self.get_project_immutables(project) queryStr = self.sqlBuilder.getDateQueryString( project, projImmutables['annotationType'], minTimestamp, maxTimestamp, userList, skipEmptyImages, goldenQuestionsOnly) # check validity and provide arguments queryVals = [] if userList is not None: queryVals.append(tuple(userList)) if minTimestamp is not None: queryVals.append(minTimestamp) if maxTimestamp is not None: queryVals.append(maxTimestamp) if skipEmptyImages and userList is not None: queryVals.append(tuple(userList)) # limit (TODO: make 128 a hyperparameter) if limit is None: limit = 128 else: limit = min(int(limit), 128) queryVals.append(limit) if userList is not None: queryVals.append(tuple(userList)) # query and parse results with self.dbConnector.execute_cursor(queryStr, tuple(queryVals)) as cursor: try: response = self._assemble_annotations(project, cursor, hideGoldenQuestionInfo) # self.dbConnector.conn.commit() except Exception as e: print(e) # self.dbConnector.conn.rollback() finally: pass # cursor.close() # # mark images as requested # self._set_images_requested(project, response) return {'entries': response} def get_timeRange(self, project, userList, skipEmptyImages=False, goldenQuestionsOnly=False): ''' Returns two timestamps denoting the temporal limits within which images have been viewed by the users provided in the userList. Arguments: - userList: string (single user name) or list of strings (multiple). Can also be None; in this case all annotations will be checked. - skipEmptyImages: if True, only images that contain at least one annotation will be considered. - goldenQuestionsOnly: if True, only images flagged as golden questions will be shown. ''' # query string queryStr = self.sqlBuilder.getTimeRangeQueryString( project, userList, skipEmptyImages, goldenQuestionsOnly) arguments = (None if userList is None else tuple(userList)) result = self.dbConnector.execute(queryStr, (arguments, ), numReturn=1) if result is not None and len(result): return { 'minTimestamp': result[0]['mintimestamp'], 'maxTimestamp': result[0]['maxtimestamp'], } else: return {'error': 'no annotations made'} def get_sampleData(self, project): ''' Returns a sample image from the project, with annotations (from one of the admins) and predictions. If no image, no annotations, and/or no predictions are available, a built-in default is returned instead. ''' projImmutables = self.get_project_immutables(project) queryStr = self.sqlBuilder.getSampleDataQueryString( project, projImmutables['annotationType'], projImmutables['predictionType']) # query and parse results response = None with self.dbConnector.execute_cursor(queryStr, None) as cursor: try: response = self._assemble_annotations(project, cursor, True) except: pass if response is None or not len(response): # no valid data found for project; fall back to sample data response = { '00000000-0000-0000-0000-000000000000': { 'fileName': '/static/interface/exampleData/sample_image.jpg', 'viewcount': 1, 'annotations': { '00000000-0000-0000-0000-000000000000': self._get_sample_metadata( projImmutables['annotationType']) }, 'predictions': { '00000000-0000-0000-0000-000000000000': self._get_sample_metadata( projImmutables['predictionType']) }, 'last_checked': None, 'isGoldenQuestion': True } } return response def submitAnnotations(self, project, username, submissions): ''' Sends user-provided annotations to the database. ''' projImmutables = self.get_project_immutables(project) if projImmutables['demoMode']: return 1 # assemble values colnames = getattr(QueryStrings_annotation, projImmutables['annotationType']).value values_insert = [] values_update = [] meta = (None if not 'meta' in submissions else json.dumps( submissions['meta'])) # for deletion: remove all annotations whose image ID matches but whose annotation ID is not among the submitted ones ids = [] viewcountValues = [] for imageKey in submissions['entries']: entry = submissions['entries'][imageKey] try: lastChecked = entry['timeCreated'] lastTimeRequired = entry['timeRequired'] if lastTimeRequired is None: lastTimeRequired = 0 except: lastChecked = datetime.now(tz=pytz.utc) lastTimeRequired = 0 try: numInteractions = int(entry['numInteractions']) except: numInteractions = 0 if 'annotations' in entry and len(entry['annotations']): for annotation in entry['annotations']: # assemble annotation values annotationTokens = self.annoParser.parseAnnotation( annotation) annoValues = [] for cname in colnames: if cname == 'id': if cname in annotationTokens: # cast and only append id if the annotation is an existing one annoValues.append(UUID( annotationTokens[cname])) ids.append(UUID(annotationTokens[cname])) elif cname == 'image': annoValues.append(UUID(imageKey)) elif cname == 'label' and annotationTokens[ cname] is not None: annoValues.append(UUID(annotationTokens[cname])) elif cname == 'timeCreated': try: annoValues.append( dateutil.parser.parse( annotationTokens[cname])) except: annoValues.append(datetime.now(tz=pytz.utc)) elif cname == 'timeRequired': timeReq = annotationTokens[cname] if timeReq is None: timeReq = 0 annoValues.append(timeReq) elif cname == 'username': annoValues.append(username) elif cname in annotationTokens: annoValues.append(annotationTokens[cname]) elif cname == 'unsure': if 'unsure' in annotationTokens and annotationTokens[ 'unsure'] is not None: annoValues.append(annotationTokens[cname]) else: annoValues.append(False) elif cname == 'meta': annoValues.append(meta) else: annoValues.append(None) if 'id' in annotationTokens: # existing annotation; update values_update.append(tuple(annoValues)) else: # new annotation values_insert.append(tuple(annoValues)) viewcountValues.append( (username, imageKey, 1, lastChecked, lastChecked, lastTimeRequired, lastTimeRequired, numInteractions, meta)) # delete all annotations that are not in submitted batch imageKeys = list(UUID(k) for k in submissions['entries']) if len(imageKeys): if len(ids): queryStr = sql.SQL(''' DELETE FROM {id_anno} WHERE username = %s AND id IN ( SELECT idQuery.id FROM ( SELECT * FROM {id_anno} WHERE id NOT IN %s ) AS idQuery JOIN ( SELECT * FROM {id_anno} WHERE image IN %s ) AS imageQuery ON idQuery.id = imageQuery.id); ''').format(id_anno=sql.Identifier(project, 'annotation')) self.dbConnector.execute(queryStr, ( username, tuple(ids), tuple(imageKeys), )) else: # no annotations submitted; delete all annotations submitted before queryStr = sql.SQL(''' DELETE FROM {id_anno} WHERE username = %s AND image IN %s; ''').format(id_anno=sql.Identifier(project, 'annotation')) self.dbConnector.execute(queryStr, ( username, tuple(imageKeys), )) # insert new annotations if len(values_insert): queryStr = sql.SQL(''' INSERT INTO {id_anno} ({cols}) VALUES %s ; ''').format( id_anno=sql.Identifier(project, 'annotation'), cols=sql.SQL(', ').join([sql.SQL(c) for c in colnames[1:] ]) # skip 'id' column ) self.dbConnector.insert(queryStr, values_insert) # update existing annotations if len(values_update): updateCols = [] for col in colnames: if col == 'label': updateCols.append(sql.SQL('label = UUID(e.label)')) elif col == 'timeRequired': # we sum the required times together updateCols.append( sql.SQL( 'timeRequired = COALESCE(a.timeRequired,0) + COALESCE(e.timeRequired,0)' )) else: updateCols.append( sql.SQL('{col} = e.{col}').format(col=sql.SQL(col))) queryStr = sql.SQL(''' UPDATE {id_anno} AS a SET {updateCols} FROM (VALUES %s) AS e({colnames}) WHERE e.id = a.id ''').format(id_anno=sql.Identifier(project, 'annotation'), updateCols=sql.SQL(', ').join(updateCols), colnames=sql.SQL(', ').join( [sql.SQL(c) for c in colnames])) self.dbConnector.insert(queryStr, values_update) # viewcount table queryStr = sql.SQL(''' INSERT INTO {id_iu} (username, image, viewcount, first_checked, last_checked, last_time_required, total_time_required, num_interactions, meta) VALUES %s ON CONFLICT (username, image) DO UPDATE SET viewcount = image_user.viewcount + 1, last_checked = EXCLUDED.last_checked, last_time_required = EXCLUDED.last_time_required, total_time_required = EXCLUDED.total_time_required + image_user.total_time_required, num_interactions = EXCLUDED.num_interactions + image_user.num_interactions, meta = EXCLUDED.meta; ''').format(id_iu=sql.Identifier(project, 'image_user')) self.dbConnector.insert(queryStr, viewcountValues) return 0 def setGoldenQuestions(self, project, submissions): ''' Receives an iterable of tuples (uuid, bool) and updates the property "isGoldenQuestion" of the images accordingly. ''' projImmutables = self.get_project_immutables(project) if projImmutables['demoMode']: return 1 queryStr = sql.SQL(''' UPDATE {id_img} AS img SET isGoldenQuestion = c.isGoldenQuestion FROM (VALUES %s) AS c (id, isGoldenQuestion) WHERE c.id = img.id; ''').format(id_img=sql.Identifier(project, 'image')) self.dbConnector.insert(queryStr, submissions) return 0