コード例 #1
0
class DBMiddleware():
    def __init__(self, config):
        self.config = config
        self.dbConnector = Database(config)

        self._fetchProjectSettings()
        self.sqlBuilder = SQLStringBuilder(config)
        self.annoParser = AnnotationParser(config)

    def _fetchProjectSettings(self):
        # AI controller URI
        aiControllerURI = self.config.getProperty('Server', 'aiController_uri')
        if aiControllerURI is None or aiControllerURI.strip() == '':
            # no AI backend configured
            aiControllerURI = None

        # LabelUI drawing styles
        with open(
                self.config.getProperty(
                    'LabelUI',
                    'styles_file',
                    type=str,
                    fallback='modules/LabelUI/static/json/styles.json'),
                'r') as f:
            styles = json.load(f)

        # Image backdrops for index screen
        with open(
                self.config.getProperty(
                    'Project',
                    'backdrops_file',
                    type=str,
                    fallback='modules/LabelUI/static/json/backdrops.json'),
                'r') as f:
            backdrops = json.load(f)

        # Welcome message for UI tutorial
        with open(
                self.config.getProperty(
                    'Project',
                    'welcome_message_file',
                    type=str,
                    fallback=
                    'modules/LabelUI/static/templates/welcome_message.html'),
                'r') as f:
            welcomeMessage = f.readlines()

        self.projectSettings = {
            'projectName':
            self.config.getProperty('Project', 'projectName'),
            'projectDescription':
            self.config.getProperty('Project', 'projectDescription'),
            'indexURI':
            self.config.getProperty('Server',
                                    'index_uri',
                                    type=str,
                                    fallback='/'),
            'dataServerURI':
            self.config.getProperty('Server', 'dataServer_uri'),
            'aiControllerURI':
            aiControllerURI,
            'dataType':
            self.config.getProperty('Project', 'dataType', fallback='images'),
            'classes':
            self.getClassDefinitions(),
            'enableEmptyClass':
            self.config.getProperty('Project',
                                    'enableEmptyClass',
                                    fallback='no'),
            'annotationType':
            self.config.getProperty('Project', 'annotationType'),
            'predictionType':
            self.config.getProperty('Project', 'predictionType'),
            'showPredictions':
            self.config.getProperty('LabelUI',
                                    'showPredictions',
                                    fallback='yes'),
            'showPredictions_minConf':
            self.config.getProperty('LabelUI',
                                    'showPredictions_minConf',
                                    type=float,
                                    fallback=0.5),
            'carryOverPredictions':
            self.config.getProperty('LabelUI',
                                    'carryOverPredictions',
                                    fallback='no'),
            'carryOverRule':
            self.config.getProperty('LabelUI',
                                    'carryOverRule',
                                    fallback='maxConfidence'),
            'carryOverPredictions_minConf':
            self.config.getProperty('LabelUI',
                                    'carryOverPredictions_minConf',
                                    type=float,
                                    fallback=0.75),
            'defaultBoxSize_w':
            self.config.getProperty('LabelUI',
                                    'defaultBoxSize_w',
                                    type=int,
                                    fallback=10),
            'defaultBoxSize_h':
            self.config.getProperty('LabelUI',
                                    'defaultBoxSize_h',
                                    type=int,
                                    fallback=10),
            'minBoxSize_w':
            self.config.getProperty('Project',
                                    'box_minWidth',
                                    type=int,
                                    fallback=1),
            'minBoxSize_h':
            self.config.getProperty('Project',
                                    'box_minHeight',
                                    type=int,
                                    fallback=1),
            'numImagesPerBatch':
            self.config.getProperty('LabelUI',
                                    'numImagesPerBatch',
                                    type=int,
                                    fallback=1),
            'minImageWidth':
            self.config.getProperty('LabelUI',
                                    'minImageWidth',
                                    type=int,
                                    fallback=300),
            'numImageColumns_max':
            self.config.getProperty('LabelUI',
                                    'numImageColumns_max',
                                    type=int,
                                    fallback=1),
            'defaultImage_w':
            self.config.getProperty('LabelUI',
                                    'defaultImage_w',
                                    type=int,
                                    fallback=800),
            'defaultImage_h':
            self.config.getProperty('LabelUI',
                                    'defaultImage_h',
                                    type=int,
                                    fallback=600),
            'styles':
            styles['styles'],
            'backdrops':
            backdrops,
            'welcomeMessage':
            welcomeMessage,
            'demoMode':
            self.config.getProperty('Project',
                                    'demoMode',
                                    type=bool,
                                    fallback=False)
        }

    def _assemble_annotations(self, cursor):
        response = {}
        while True:
            b = cursor.fetchone()
            if b is None:
                break

            imgID = str(b['image'])
            if not imgID in response:
                response[imgID] = {
                    'fileName': b['filename'],
                    'predictions': {},
                    'annotations': {},
                    'last_checked': None
                }
            viewcount = b['viewcount']
            if viewcount is not None:
                response[imgID]['viewcount'] = viewcount
            last_checked = b['last_checked']
            if last_checked is not None:
                if response[imgID]['last_checked'] is None:
                    response[imgID]['last_checked'] = last_checked
                else:
                    response[imgID]['last_checked'] = max(
                        response[imgID]['last_checked'], last_checked)

            # parse annotations and predictions
            entryID = str(b['id'])
            if b['ctype'] is not None:
                colnames = self.sqlBuilder.getColnames(b['ctype'])
                entry = {}
                for c in colnames:
                    value = b[c]
                    if isinstance(value, datetime):
                        value = value.timestamp()
                    elif isinstance(value, UUID):
                        value = str(value)
                    entry[c] = value

                if b['ctype'] == 'annotation':
                    response[imgID]['annotations'][entryID] = entry
                elif b['ctype'] == 'prediction':
                    response[imgID]['predictions'][entryID] = entry

        return response

    def getProjectSettings(self):
        '''
            Queries the database for general project-specific metadata, such as:
            - Classes: names, indices, default colors
            - Annotation type: one of {class labels, positions, bboxes}
        '''
        return self.projectSettings

    def getProjectInfo(self):
        '''
            Returns safe, shareable information about the project.
        '''
        return {
            'projectName':
            self.projectSettings['projectName'],
            'projectDescription':
            self.projectSettings['projectDescription'],
            'demoMode':
            self.config.getProperty('Project',
                                    'demoMode',
                                    type=bool,
                                    fallback=False),
            'backdrops':
            self.projectSettings['backdrops']['images']
        }

    def getClassDefinitions(self):
        '''
            Returns a dictionary with entries for all classes in the project.
        '''
        classdef = {
            'entries': {
                'default': {}  # default group for ungrouped label classes
            }
        }
        schema = self.config.getProperty('Database', 'schema')

        # query data
        sql = '''
            SELECT 'group' AS type, id, NULL as idx, name, color, parent, NULL AS keystroke FROM {schema}.labelclassgroup
            UNION ALL
            SELECT 'class' AS type, id, idx, name, color, labelclassgroup, keystroke FROM {schema}.labelclass;
        '''.format(schema=schema)
        classData = self.dbConnector.execute(sql, None, 'all')

        # assemble entries first
        allEntries = {}
        numClasses = 0
        for cl in classData:
            id = str(cl['id'])
            entry = {
                'id': id,
                'name': cl['name'],
                'color': cl['color'],
                'parent':
                str(cl['parent']) if cl['parent'] is not None else None,
            }
            if cl['type'] == 'group':
                entry['entries'] = {}
            else:
                entry['index'] = cl['idx']
                entry['keystroke'] = cl['keystroke']
                numClasses += 1
            allEntries[id] = entry

        # transform into tree
        def _find_parent(tree, parentID):
            if parentID is None:
                return tree['entries']['default']
            elif 'id' in tree and tree['id'] == parentID:
                return tree
            elif 'entries' in tree:
                for ek in tree['entries'].keys():
                    rv = _find_parent(tree['entries'][ek], parentID)
                    if rv is not None:
                        return rv
                return None
            else:
                return None

        allEntries['default'] = {'name': '(other)', 'entries': {}}
        allEntries = {'entries': allEntries}
        for key in list(allEntries['entries'].keys()):
            if key == 'default':
                continue
            if key in allEntries['entries']:
                entry = allEntries['entries'][key]
                parentID = entry['parent']
                del entry['parent']

                if 'entries' in entry and parentID is None:
                    # group, but no parent: append to root directly
                    allEntries['entries'][key] = entry

                else:
                    # move item
                    parent = _find_parent(allEntries, parentID)
                    parent['entries'][key] = entry
                    del allEntries['entries'][key]

        classdef = allEntries
        classdef['numClasses'] = numClasses
        return classdef

    def getBatch_fixed(self, username, data):
        '''
            Returns entries from the database based on the list of data entry identifiers specified.
        '''
        # query
        sql = self.sqlBuilder.getFixedImagesQueryString(
            self.projectSettings['demoMode'])

        # parse results
        queryVals = (
            tuple(UUID(d) for d in data),
            username,
            username,
        )
        if self.projectSettings['demoMode']:
            queryVals = (tuple(UUID(d) for d in data), )

        with self.dbConnector.execute_cursor(sql, queryVals) as cursor:
            try:
                response = self._assemble_annotations(cursor)
                # self.dbConnector.conn.commit()
            except Exception as e:
                print(e)
                # self.dbConnector.conn.rollback()
            finally:
                pass
                # cursor.close()
        return {'entries': response}

    def getBatch_auto(self,
                      username,
                      order='unlabeled',
                      subset='default',
                      limit=None):
        '''
            TODO: description
        '''
        # query
        sql = self.sqlBuilder.getNextBatchQueryString(
            order, subset, self.projectSettings['demoMode'])

        # limit (TODO: make 128 a hyperparameter)
        if limit is None:
            limit = 128
        else:
            limit = min(int(limit), 128)

        # parse results
        queryVals = (
            username,
            limit,
            username,
        )
        if self.projectSettings['demoMode']:
            queryVals = (limit, )

        with self.dbConnector.execute_cursor(sql, queryVals) as cursor:
            response = self._assemble_annotations(cursor)

        return {'entries': response}

    def getBatch_timeRange(self,
                           minTimestamp,
                           maxTimestamp,
                           userList,
                           skipEmptyImages=False,
                           limit=None):
        '''
            Returns images that have been annotated within the given time range and/or
            by the given user(s). All arguments are optional.
            Useful for reviewing existing annotations.
        '''
        # query string
        sql = self.sqlBuilder.getDateQueryString(minTimestamp, maxTimestamp,
                                                 userList, skipEmptyImages)

        # check validity and provide arguments
        queryVals = []
        if userList is not None:
            queryVals.append(tuple(userList))
        if minTimestamp is not None:
            queryVals.append(minTimestamp)
        if maxTimestamp is not None:
            queryVals.append(maxTimestamp)
        if skipEmptyImages and userList is not None:
            queryVals.append(tuple(userList))

        # limit (TODO: make 128 a hyperparameter)
        if limit is None:
            limit = 128
        else:
            limit = min(int(limit), 128)
        queryVals.append(limit)

        if userList is not None:
            queryVals.append(tuple(userList))

        # query and parse results
        with self.dbConnector.execute_cursor(sql, tuple(queryVals)) as cursor:
            try:
                response = self._assemble_annotations(cursor)
                # self.dbConnector.conn.commit()
            except Exception as e:
                print(e)
                # self.dbConnector.conn.rollback()
            finally:
                pass
                # cursor.close()
        return {'entries': response}

    def get_timeRange(self, userList, skipEmptyImages=False):
        '''
            Returns two timestamps denoting the temporal limits within which
            images have been viewed by the users provided in the userList.
            Arguments:
            - userList: string (single user name) or list of strings (multiple).
                        Can also be None; in this case all annotations will be
                        checked.
            - skipEmptyImages: if True, only images that contain at least one
                               annotation will be considered.
        '''
        # query string
        sql = self.sqlBuilder.getTimeRangeQueryString(userList,
                                                      skipEmptyImages)

        arguments = (None if userList is None else tuple(userList))
        result = self.dbConnector.execute(sql, (arguments, ), numReturn=1)

        if result is not None and len(result):
            return {
                'minTimestamp': result[0]['mintimestamp'],
                'maxTimestamp': result[0]['maxtimestamp'],
            }
        else:
            return {'error': 'no annotations made'}

    def submitAnnotations(self, username, submissions):
        '''
            Sends user-provided annotations to the database.
        '''
        if self.projectSettings['demoMode']:
            return 0

        # assemble values
        colnames = getattr(QueryStrings_annotation,
                           self.projectSettings['annotationType']).value
        values_insert = []
        values_update = []

        meta = (None if not 'meta' in submissions else json.dumps(
            submissions['meta']))

        # for deletion: remove all annotations whose image ID matches but whose annotation ID is not among the submitted ones
        ids = []

        viewcountValues = []
        for imageKey in submissions['entries']:
            entry = submissions['entries'][imageKey]

            try:
                lastChecked = entry['timeCreated']
                lastTimeRequired = entry['timeRequired']
                if lastTimeRequired is None: lastTimeRequired = 0
            except:
                lastChecked = datetime.now(tz=pytz.utc)
                lastTimeRequired = 0

            if 'annotations' in entry and len(entry['annotations']):
                for annotation in entry['annotations']:
                    # assemble annotation values
                    annotationTokens = self.annoParser.parseAnnotation(
                        annotation)
                    annoValues = []
                    for cname in colnames:
                        if cname == 'id':
                            if cname in annotationTokens:
                                # cast and only append id if the annotation is an existing one
                                annoValues.append(UUID(
                                    annotationTokens[cname]))
                                ids.append(UUID(annotationTokens[cname]))
                        elif cname == 'image':
                            annoValues.append(UUID(imageKey))
                        elif cname == 'label' and annotationTokens[
                                cname] is not None:
                            annoValues.append(UUID(annotationTokens[cname]))
                        elif cname == 'timeCreated':
                            try:
                                annoValues.append(
                                    dateutil.parser.parse(
                                        annotationTokens[cname]))
                            except:
                                annoValues.append(datetime.now(tz=pytz.utc))
                        elif cname == 'timeRequired':
                            timeReq = annotationTokens[cname]
                            if timeReq is None: timeReq = 0
                            annoValues.append(timeReq)
                        elif cname == 'username':
                            annoValues.append(username)
                        elif cname in annotationTokens:
                            annoValues.append(annotationTokens[cname])
                        elif cname == 'unsure':
                            if 'unsure' in annotationTokens and annotationTokens[
                                    'unsure'] is not None:
                                annoValues.append(annotationTokens[cname])
                            else:
                                annoValues.append(False)
                        elif cname == 'meta':
                            annoValues.append(meta)
                        else:
                            annoValues.append(None)
                    if 'id' in annotationTokens:
                        # existing annotation; update
                        values_update.append(tuple(annoValues))
                    else:
                        # new annotation
                        values_insert.append(tuple(annoValues))

            viewcountValues.append(
                (username, imageKey, 1, lastChecked, lastTimeRequired, meta))

        schema = self.config.getProperty('Database', 'schema')

        # delete all annotations that are not in submitted batch
        imageKeys = list(UUID(k) for k in submissions['entries'])
        if len(imageKeys):
            if len(ids):
                sql = '''
                    DELETE FROM {schema}.annotation WHERE username = %s AND id IN (
                        SELECT idQuery.id FROM (
                            SELECT * FROM {schema}.annotation WHERE id NOT IN %s
                        ) AS idQuery
                        JOIN (
                            SELECT * FROM {schema}.annotation WHERE image IN %s
                        ) AS imageQuery ON idQuery.id = imageQuery.id);
                '''.format(schema=schema)
                self.dbConnector.execute(sql, (
                    username,
                    tuple(ids),
                    tuple(imageKeys),
                ))
            else:
                # no annotations submitted; delete all annotations submitted before
                sql = '''
                    DELETE FROM {schema}.annotation WHERE username = %s AND image IN %s;
                '''.format(schema=schema)
                self.dbConnector.execute(sql, (
                    username,
                    tuple(imageKeys),
                ))

        # insert new annotations
        if len(values_insert):
            sql = '''
                INSERT INTO {}.annotation ({})
                VALUES %s ;
            '''.format(
                schema,
                ', '.join(colnames[1:])  # skip 'id' column
            )
            self.dbConnector.insert(sql, values_insert)

        # update existing annotations
        if len(values_update):
            updateCols = ''
            for col in colnames:
                if col == 'label':
                    updateCols += '{col} = UUID(e.{col}),'.format(col=col)
                elif col == 'timeRequired':
                    # we sum the required times together
                    updateCols += '{col} = COALESCE(a.{col},0) + COALESCE(e.{col},0),'.format(
                        col=col)
                else:
                    updateCols += '{col} = e.{col},'.format(col=col)

            sql = '''
                UPDATE {schema}.annotation AS a
                SET {updateCols}
                FROM (VALUES %s) AS e({colnames})
                WHERE e.id = a.id;
            '''.format(schema=schema,
                       updateCols=updateCols.strip(','),
                       colnames=', '.join(colnames))
            self.dbConnector.insert(sql, values_update)

        # viewcount table
        sql = '''
            INSERT INTO {}.image_user (username, image, viewcount, last_checked, last_time_required, meta)
            VALUES %s 
            ON CONFLICT (username, image) DO UPDATE SET viewcount = image_user.viewcount + 1, last_checked = EXCLUDED.last_checked, last_time_required = EXCLUDED.last_time_required, meta = EXCLUDED.meta;
        '''.format(schema)

        self.dbConnector.insert(sql, viewcountValues)

        return 0
コード例 #2
0
class ProjectConfigMiddleware:

    # prohibited project shortnames
    PROHIBITED_SHORTNAMES = [
        'con',  # for MS Windows
        'prn',
        'aux',
        'nul',
        'com1',
        'com2',
        'com3',
        'com4',
        'com5',
        'com6',
        'com7',
        'com8',
        'com9',
        'lpt1',
        'lpt2',
        'lpt3',
        'lpt4',
        'lpt5',
        'lpt6',
        'lpt7',
        'lpt8',
        'lpt9'
    ]

    # prohibited project names (both as a whole and for shortnames)
    PROHIBITED_NAMES = [
        '', 'project', 'getavailableaimodels', 'getbackdrops',
        'verifyprojectname', 'verifyprojectshort', 'newproject',
        'createproject', 'statistics', 'static', 'getcreateaccountunrestricted'
        'getprojects', 'about', 'favicon.ico', 'logincheck', 'logout', 'login',
        'dologin', 'createaccount', 'loginscreen', 'accountexists',
        'getauthentication', 'getusernames', 'docreateaccount', 'admin',
        'getservicedetails', 'getceleryworkerdetails', 'getprojectdetails',
        'getuserdetails', 'setpassword'
    ]

    # prohibited name prefixes
    PROHIBITED_NAME_PREFIXES = ['/', '?', '&']

    # patterns that are prohibited anywhere for shortnames (replaced with underscores)
    SHORTNAME_PATTERNS_REPLACE = [
        '|',
        '?',
        '*',
        ':'  # for macOS
    ]

    # patterns that are prohibited anywhere for both short and long names (no replacement)
    PROHIBITED_STRICT = ['&lt;', '<', '>', '&gt;', '..', '/', '\\']

    def __init__(self, config):
        self.config = config
        self.dbConnector = Database(config)

        # load default UI settings
        try:
            # check if custom default styles are provided
            self.defaultUIsettings = json.load(
                open('config/default_ui_settings.json', 'r'))
        except:
            # resort to built-in styles
            self.defaultUIsettings = json.load(
                open(
                    'modules/ProjectAdministration/static/json/default_ui_settings.json',
                    'r'))

    @staticmethod
    def _recursive_update(dictObject, target):
        '''
            Recursively iterates over all keys and sub-keys of "dictObject"
            and its sub-dicts and copies over values from dict "target", if
            they are available.
        '''
        for key in dictObject.keys():
            if key in target:
                if isinstance(dictObject[key], dict):
                    ProjectConfigMiddleware._recursive_update(
                        dictObject[key], target[key])
                else:
                    dictObject[key] = target[key]

    def getPlatformInfo(self, project, parameters=None):
        '''
            AIDE setup-specific platform metadata.
        '''
        # parse parameters (if provided) and compare with mutable entries
        allParams = set(['server_uri', 'server_dir', 'watch_folder_interval'])
        if parameters is not None and parameters != '*':
            if isinstance(parameters, str):
                parameters = [parameters.lower()]
            else:
                parameters = [p.lower() for p in parameters]
            set(parameters).intersection_update(allParams)
        else:
            parameters = allParams
        parameters = list(parameters)
        response = {}
        for param in parameters:
            if param.lower() == 'server_uri':
                uri = os.path.join(
                    self.config.getProperty('Server', 'dataServer_uri'),
                    project, 'files')
                response[param] = uri
            elif param.lower() == 'server_dir':
                sdir = os.path.join(
                    self.config.getProperty('FileServer', 'staticfiles_dir'),
                    project)
                response[param] = sdir
            elif param.lower() == 'watch_folder_interval':
                interval = self.config.getProperty('FileServer',
                                                   'watch_folder_interval',
                                                   type=float,
                                                   fallback=60)
                response[param] = interval

        return response

    def getProjectImmutables(self, project):
        queryStr = 'SELECT annotationType, predictionType, demoMode FROM aide_admin.project WHERE shortname = %s;'
        result = self.dbConnector.execute(queryStr, (project, ), 1)
        if result and len(result):
            return {
                'annotationType': result[0]['annotationtype'],
                'predictionType': result[0]['predictiontype']
            }
        else:
            return None

    def getProjectInfo(self, project, parameters=None):

        # parse parameters (if provided) and compare with mutable entries
        allParams = set([
            'name', 'description', 'ispublic', 'secret_token', 'demomode',
            'interface_enabled', 'ui_settings',
            'segmentation_ignore_unlabeled', 'ai_model_enabled',
            'ai_model_library', 'ai_model_settings', 'ai_alcriterion_library',
            'ai_alcriterion_settings', 'numimages_autotrain',
            'minnumannoperimage', 'maxnumimages_train', 'watch_folder_enabled',
            'watch_folder_remove_missing_enabled'
        ])
        if parameters is not None and parameters != '*':
            if isinstance(parameters, str):
                parameters = [parameters.lower()]
            else:
                parameters = [p.lower() for p in parameters]
            set(parameters).intersection_update(allParams)
        else:
            parameters = allParams
        parameters = list(parameters)
        sqlParameters = ','.join(parameters)

        queryStr = sql.SQL('''
        SELECT {} FROM aide_admin.project
        WHERE shortname = %s;
        ''').format(sql.SQL(sqlParameters))
        result = self.dbConnector.execute(queryStr, (project, ), 1)
        result = result[0]

        # assemble response
        response = {}
        for param in parameters:
            value = result[param]
            if param == 'ui_settings':
                value = json.loads(value)

                # auto-complete with defaults where missing
                value = check_args(value, self.defaultUIsettings)
            response[param] = value

        return response

    def renewSecretToken(self, project):
        '''
            Creates a new secret token, invalidating the old one.
        '''
        try:
            newToken = secrets.token_urlsafe(32)
            result = self.dbConnector.execute(
                '''UPDATE aide_admin.project
                SET secret_token = %s
                WHERE shortname = %s;
                SELECT secret_token FROM aide_admin.project
                WHERE shortname = %s;
            ''', (
                    newToken,
                    project,
                    project,
                ), 1)
            return result[0]['secret_token']
        except:
            # this normally should not happen, since we checked for the validity of the project
            return None

    def getProjectUsers(self, project):
        '''
            Returns a list of users that are enrolled in the project,
            as well as their roles within the project.
        '''

        queryStr = sql.SQL(
            'SELECT * FROM aide_admin.authentication WHERE project = %s;')
        result = self.dbConnector.execute(queryStr, (project, ), 'all')
        return result

    def createProject(self, username, properties):
        '''
            Receives the most basic, mostly non-changeable settings for a new project
            ("properties") with the following entries:
            - shortname
            - owner (the current username)
            - name
            - description
            - annotationType
            - predictionType

            More advanced settings (UI config, AI model, etc.) will be configured after
            the initial project creation stage.

            Verifies whether these settings are available for a new project. If they are,
            it creates a new database schema for the project, adds an entry for it to the
            admin schema table and makes the current user admin. Returns True in this case.
            Otherwise raises an exception.
        '''

        shortname = properties['shortname']

        # verify availability of the project name and shortname
        if not self.getProjectNameAvailable(properties['name']):
            raise Exception('Project name "{}" unavailable.'.format(
                properties['name']))
        if not self.getProjectShortNameAvailable(shortname):
            raise Exception(
                'Project shortname "{}" unavailable.'.format(shortname))

        # load base SQL
        with open('modules/ProjectAdministration/static/sql/create_schema.sql',
                  'r') as f:
            queryStr = sql.SQL(f.read())

        # determine annotation and prediction types and add fields accordingly
        annotationFields = list(
            getattr(Fields_annotation, properties['annotationType']).value)
        predictionFields = list(
            getattr(Fields_prediction, properties['predictionType']).value)

        # create project schema
        self.dbConnector.execute(
            queryStr.format(
                id_schema=sql.Identifier(shortname),
                id_auth=sql.Identifier(
                    self.config.getProperty('Database', 'user')),
                id_image=sql.Identifier(shortname, 'image'),
                id_iu=sql.Identifier(shortname, 'image_user'),
                id_labelclassGroup=sql.Identifier(shortname,
                                                  'labelclassgroup'),
                id_labelclass=sql.Identifier(shortname, 'labelclass'),
                id_annotation=sql.Identifier(shortname, 'annotation'),
                id_cnnstate=sql.Identifier(shortname, 'cnnstate'),
                id_prediction=sql.Identifier(shortname, 'prediction'),
                id_workflow=sql.Identifier(shortname, 'workflow'),
                id_workflowHistory=sql.Identifier(shortname,
                                                  'workflowhistory'),
                annotation_fields=sql.SQL(', ').join(
                    [sql.SQL(field) for field in annotationFields]),
                prediction_fields=sql.SQL(', ').join(
                    [sql.SQL(field) for field in predictionFields])), None,
            None)

        # register project
        self.dbConnector.execute(
            '''
            INSERT INTO aide_admin.project (shortname, name, description,
                owner,
                secret_token,
                interface_enabled,
                annotationType, predictionType,
                isPublic, demoMode,
                ui_settings)
            VALUES (
                %s, %s, %s,
                %s,
                %s,
                %s,
                %s, %s,
                %s, %s,
                %s
            );
            ''', (shortname, properties['name'],
                  (properties['description'] if 'description' in properties
                   else ''), username, secrets.token_urlsafe(32), False,
                  properties['annotationType'], properties['predictionType'],
                  False, False, json.dumps(self.defaultUIsettings)), None)

        # register user in project
        self.dbConnector.execute(
            '''
                INSERT INTO aide_admin.authentication (username, project, isAdmin)
                VALUES (%s, %s, true);
            ''', (
                username,
                shortname,
            ), None)

        # notify FileServer instance(s) to set up project folders
        process = fileServer_interface.aide_internal_notify.si({
            'task':
            'create_project_folders',
            'projectName':
            shortname
        })
        process.apply_async(queue='aide_broadcast', ignore_result=True)

        return True

    def updateProjectSettings(self, project, projectSettings):
        '''
            TODO
        '''

        # check UI settings first
        if 'ui_settings' in projectSettings:
            if isinstance(projectSettings['ui_settings'], str):
                projectSettings['ui_settings'] = json.loads(
                    projectSettings['ui_settings'])
            fieldNames = [('welcomeMessage', str), ('numImagesPerBatch', int),
                          ('minImageWidth', int), ('numImageColumns_max', int),
                          ('defaultImage_w', int), ('defaultImage_h', int),
                          ('styles', dict), ('enableEmptyClass', bool),
                          ('showPredictions', bool),
                          ('showPredictions_minConf', float),
                          ('carryOverPredictions', bool),
                          ('carryOverRule', str),
                          ('carryOverPredictions_minConf', float),
                          ('defaultBoxSize_w', int), ('defaultBoxSize_h', int),
                          ('minBoxSize_w', int), ('minBoxSize_h', int)]
            uiSettings_new, uiSettingsKeys_new = parse_parameters(
                projectSettings['ui_settings'],
                fieldNames,
                absent_ok=True,
                escape=True)  #TODO: escape

            # adopt current settings and replace values accordingly
            uiSettings = self.dbConnector.execute(
                '''SELECT ui_settings
                    FROM aide_admin.project
                    WHERE shortname = %s;            
                ''', (project, ), 1)
            uiSettings = json.loads(uiSettings[0]['ui_settings'])
            for kIdx in range(len(uiSettingsKeys_new)):
                if isinstance(uiSettings[uiSettingsKeys_new[kIdx]], dict):
                    ProjectConfigMiddleware._recursive_update(
                        uiSettings[uiSettingsKeys_new[kIdx]],
                        uiSettings_new[kIdx])
                else:
                    uiSettings[uiSettingsKeys_new[kIdx]] = uiSettings_new[kIdx]

            # auto-complete with defaults where missing
            uiSettings = check_args(uiSettings, self.defaultUIsettings)

            projectSettings['ui_settings'] = json.dumps(uiSettings)

        # parse remaining parameters
        fieldNames = [('description', str), ('isPublic', bool),
                      ('secret_token', str), ('demoMode', bool),
                      ('ui_settings', str), ('interface_enabled', bool),
                      ('watch_folder_enabled', bool),
                      ('watch_folder_remove_missing_enabled', bool)]

        vals, params = parse_parameters(projectSettings,
                                        fieldNames,
                                        absent_ok=True,
                                        escape=False)
        vals.append(project)

        # commit to DB
        queryStr = sql.SQL('''UPDATE aide_admin.project
            SET
            {}
            WHERE shortname = %s;
            ''').format(
            sql.SQL(',').join(
                [sql.SQL('{} = %s'.format(item)) for item in params]))

        self.dbConnector.execute(queryStr, tuple(vals), None)

        return True

    def updateClassDefinitions(self, project, classdef, removeMissing=False):
        '''
            Updates the project's class definitions.
            if "removeMissing" is set to True, label classes that are present
            in the database, but not in "classdef," will be removed. Label
            class groups will only be removed if they do not reference any
            label class present in "classdef." This functionality is disallowed
            in the case of segmentation masks.
        '''

        # check if project contains segmentation masks
        metaType = self.dbConnector.execute(
            '''
                SELECT annotationType, predictionType FROM aide_admin.project
                WHERE shortname = %s;
            ''', (project, ), 1)[0]
        is_segmentation = any(
            ['segmentationmasks' in m.lower() for m in metaType.values()])
        if is_segmentation:
            removeMissing = False

        # get current classes from database
        db_classes = {}
        db_groups = {}
        if removeMissing:
            queryStr = sql.SQL('''
                SELECT * FROM {id_lc} AS lc
                FULL OUTER JOIN (
                    SELECT id AS lcgid, name AS lcgname, parent, color
                    FROM {id_lcg}
                ) AS lcg
                ON lc.labelclassgroup = lcg.lcgid
            ''').format(id_lc=sql.Identifier(project, 'labelclass'),
                        id_lcg=sql.Identifier(project, 'labelclassgroup'))
            result = self.dbConnector.execute(queryStr, None, 'all')
            for r in result:
                if r['id'] is not None:
                    db_classes[r['id']] = r
                if r['lcgid'] is not None:
                    if not r['lcgid'] in db_groups:
                        db_groups[r['lcgid']] = {**r, **{'num_children': 0}}
                    elif not 'lcgid' in db_groups[r['lcgid']]:
                        db_groups[r['lcgid']] = {**db_groups[r['lcgid']], **r}
                if r['labelclassgroup'] is not None:
                    if not r['labelclassgroup'] in db_groups:
                        db_groups[r['labelclassgroup']] = {'num_children': 1}
                    else:
                        db_groups[r['labelclassgroup']]['num_children'] += 1

        # parse provided class definitions list
        unique_keystrokes = set()
        classes_update = []
        classgroups_update = []

        def _parse_item(item, parent=None):
            # get or create ID for item
            try:
                itemID = uuid.UUID(item['id'])
            except:
                itemID = uuid.uuid1()
                while itemID in classes_update or itemID in classgroups_update:
                    itemID = uuid.uuid1()

            entry = {
                'id': itemID,
                'name': item['name'],
                'color': (None if not 'color' in item else item['color']),
                'keystroke': None,
                'labelclassgroup': parent
            }
            if 'children' in item:
                # label class group
                classgroups_update.append(entry)
                for child in item['children']:
                    _parse_item(child, itemID)
            else:
                # label class
                if 'keystroke' in item and not item[
                        'keystroke'] in unique_keystrokes:
                    entry['keystroke'] = item['keystroke']
                    unique_keystrokes.add(item['keystroke'])
                classes_update.append(entry)

        for item in classdef:
            _parse_item(item, None)

        # apply changes
        if removeMissing:
            queryArgs = []
            if len(classes_update):
                # remove all missing label classes
                lcSpec = sql.SQL('WHERE id NOT IN %s')
                queryArgs.append(tuple([(l['id'], ) for l in classes_update]))
            else:
                # remove all label classes
                lcgSpec = sql.SQL('')
            if len(classgroups_update):
                # remove all missing labelclass groups
                lcgSpec = sql.SQL('WHERE id NOT IN %s')
                queryArgs.append(
                    tuple([(l['id'], ) for l in classgroups_update]))
            else:
                # remove all labelclass groups
                lcgSpec = sql.SQL('')
            queryStr = sql.SQL('''
                DELETE FROM {id_lc}
                {lcSpec};
                DELETE FROM {id_lcg}
                {lcgSpec};
            ''').format(id_lc=sql.Identifier(project, 'labelclass'),
                        id_lcg=sql.Identifier(project, 'labelclassgroup'),
                        lcSpec=lcSpec,
                        lcgSpec=lcgSpec)
            self.dbConnector.execute(queryStr, tuple(queryArgs), None)

        # add/update in order (groups, set their parents, label classes)
        groups_new = [(
            g['id'],
            g['name'],
            g['color'],
        ) for g in classgroups_update]
        queryStr = sql.SQL('''
            INSERT INTO {id_lcg} (id, name, color)
            VALUES %s
            ON CONFLICT (id) DO UPDATE SET
                name = EXCLUDED.name,
                color = EXCLUDED.color;
        ''').format(  #TODO: on conflict(name)
            id_lcg=sql.Identifier(project, 'labelclassgroup'))
        self.dbConnector.insert(queryStr, groups_new)

        # set parents
        groups_parents = [
            (
                g['id'],
                g['labelclassgroup'],
            ) for g in classgroups_update
            if ('labelclassgroup' in g and g['labelclassgroup'] is not None)
        ]
        queryStr = sql.SQL('''
            UPDATE {id_lcg} AS lcg
            SET parent = q.parent
            FROM (VALUES %s) AS q(id, parent)
            WHERE lcg.id = q.id;
        ''').format(id_lcg=sql.Identifier(project, 'labelclassgroup'))
        self.dbConnector.insert(queryStr, groups_parents)

        # insert/update label classes
        lcdata = [(
            l['id'],
            l['name'],
            l['color'],
            l['keystroke'],
            l['labelclassgroup'],
        ) for l in classes_update]
        queryStr = sql.SQL('''
            INSERT INTO {id_lc} (id, name, color, keystroke, labelclassgroup)
            VALUES %s
            ON CONFLICT (id) DO UPDATE
            SET name = EXCLUDED.name,
            color = EXCLUDED.color,
            keystroke = EXCLUDED.keystroke,
            labelclassgroup = EXCLUDED.labelclassgroup;
        ''').format(  #TODO: on conflict(name)
            id_lc=sql.Identifier(project, 'labelclass'))
        self.dbConnector.insert(queryStr, lcdata)

        return True

    def getProjectNameAvailable(self, projectName):
        '''
            Returns True if the provided project (long) name is available.
        '''
        if not isinstance(projectName, str):
            return False
        projectName = projectName.strip().lower()
        if not len(projectName):
            return False

        # check if name matches prohibited AIDE keywords (we do not replace long names)
        if projectName in self.PROHIBITED_STRICT or any(
            [p in projectName for p in self.PROHIBITED_STRICT]):
            return False
        if projectName in self.PROHIBITED_NAMES:
            return False
        if any(
            [projectName.startswith(p)
             for p in self.PROHIBITED_NAME_PREFIXES]):
            return False

        # check if name is already taken
        result = self.dbConnector.execute(
            '''SELECT 1 AS result
            FROM aide_admin.project
            WHERE name = %s;
            ''', (projectName, ), 1)

        if result is None or not len(result):
            return True
        else:
            return result[0]['result'] != 1

    def getProjectShortNameAvailable(self, projectName):
        '''
            Returns True if the provided project shortname is available.
            In essence, "available" means that a database schema with the given
            name can be created (this includes Postgres schema name conventions).
            Returns False otherwise.
        '''
        if not isinstance(projectName, str):
            return False
        projectName = projectName.strip().lower()
        if not len(projectName):
            return False

        # check if name matches prohibited AIDE keywords; replace where possible
        if projectName in self.PROHIBITED_STRICT or any(
            [p in projectName for p in self.PROHIBITED_STRICT]):
            return False
        if projectName in self.PROHIBITED_NAMES or projectName in self.PROHIBITED_SHORTNAMES:
            return False
        if any(
            [projectName.startswith(p)
             for p in self.PROHIBITED_NAME_PREFIXES]):
            return False
        for p in self.SHORTNAME_PATTERNS_REPLACE:
            projectName = projectName.replace(p, '_')

        # check if provided name is valid as per Postgres conventions
        matches = re.findall('(^(pg_|[0-9]).*|.*(\$|\s)+.*)', projectName)
        if len(matches):
            return False

        # check if project shorthand already exists in database
        result = self.dbConnector.execute(
            '''SELECT 1 AS result
            FROM information_schema.schemata
            WHERE schema_name ilike %s
            UNION ALL
            SELECT 1 FROM aide_admin.project
            WHERE shortname ilike %s;
            ''', (
                projectName,
                projectName,
            ), 2)

        if result is None or not len(result):
            return True

        if len(result) == 2:
            return result[0]['result'] != 1 and result[1]['result'] != 1
        elif len(result) == 1:
            return result[0]['result'] != 1
        else:
            return True
コード例 #3
0
class DataWorker:

    FILENAMES_PROHIBITED_CHARS = (
        '&lt;',
        '<',
        '>',
        '&gt;',
        '..',
        '/',
        '\\',
        '|',
        '?',
        '*',
        ':'  # for macOS
    )

    NUM_IMAGES_LIMIT = 4096  # maximum number of images that can be queried at once (to avoid bottlenecks)

    def __init__(self, config, passiveMode=False):
        self.config = config
        self.dbConnector = Database(config)
        self.countPattern = re.compile('\_[0-9]+$')
        self.passiveMode = passiveMode

        self.tempDir = self.config.getProperty('FileServer',
                                               'tempfiles_dir',
                                               type=str,
                                               fallback=tempfile.gettempdir())

    def aide_internal_notify(self, message):
        '''
            Used for AIDE administrative communication,
            e.g. for setting up queues.
        '''
        if self.passiveMode:
            return
        if 'task' in message:
            if message['task'] == 'create_project_folders':
                # set up folders for a newly created project
                if 'projectName' in message:
                    destPath = os.path.join(
                        self.config.getProperty('FileServer',
                                                'staticfiles_dir'),
                        message['projectName'])
                    os.makedirs(destPath, exist_ok=True)

    ''' Image administration functionalities '''

    def listImages(self,
                   project,
                   folder=None,
                   imageAddedRange=None,
                   lastViewedRange=None,
                   viewcountRange=None,
                   numAnnoRange=None,
                   numPredRange=None,
                   orderBy=None,
                   order='desc',
                   startFrom=None,
                   limit=None):
        '''
            Returns a list of images, with ID, filename,
            date image was added, viewcount, number of annotations,
            number of predictions, and last time viewed, for a given
            project.
            The list can be filtered by all those properties (e.g. 
            date and time image was added, last checked; number of
            annotations, etc.), as well as limited in length (images
            are sorted by date_added).
        '''
        queryArgs = []

        filterStr = ''
        if folder is not None and isinstance(folder, str):
            filterStr += ' filename LIKE %s '
            queryArgs.append(folder + '%')
        if imageAddedRange is not None:  #TODO
            filterStr += 'AND date_added >= to_timestamp(%s) AND date_added <= to_timestamp(%s) '
            queryArgs.append(imageAddedRange[0])
            queryArgs.append(imageAddedRange[1])
        if lastViewedRange is not None:  #TODO
            filterStr += 'AND last_viewed >= to_timestamp(%s) AND last_viewed <= to_timestamp(%s) '
            queryArgs.append(lastViewedRange[0])
            queryArgs.append(lastViewedRange[1])
        if viewcountRange is not None:
            filterStr += 'AND viewcount >= %s AND viewcount <= %s '
            queryArgs.append(viewcountRange[0])
            queryArgs.append(viewcountRange[1])
        if numAnnoRange is not None:
            filterStr += 'AND num_anno >= %s AND numAnno <= %s '
            queryArgs.append(numAnnoRange[0])
            queryArgs.append(numAnnoRange[1])
        if numPredRange is not None:
            filterStr += 'AND num_pred >= %s AND num_pred <= %s '
            queryArgs.append(numPredRange[0])
            queryArgs.append(numPredRange[1])
        if startFrom is not None:
            if not isinstance(startFrom, UUID):
                try:
                    startFrom = UUID(startFrom)
                except:
                    startFrom = None
            if startFrom is not None:
                filterStr += ' AND img.id > %s '
                queryArgs.append(startFrom)
        filterStr = filterStr.strip()
        if filterStr.startswith('AND'):
            filterStr = filterStr[3:]
        if len(filterStr.strip()):
            filterStr = 'WHERE ' + filterStr
        filterStr = sql.SQL(filterStr)

        orderStr = sql.SQL('ORDER BY img.id ASC')
        if orderBy is not None:
            orderStr = sql.SQL('ORDER BY {} {}, img.id ASC').format(
                sql.SQL(orderBy), sql.SQL(order))

        limitStr = sql.SQL('')
        if isinstance(limit, float):
            if not math.isnan(limit):
                limit = int(limit)
            else:
                limit = self.NUM_IMAGES_LIMIT
        elif isinstance(limit, str):
            try:
                limit = int(limit)
            except:
                limit = self.NUM_IMAGES_LIMIT
        elif not isinstance(limit, int):
            limit = self.NUM_IMAGES_LIMIT
        limit = max(min(limit, self.NUM_IMAGES_LIMIT), 1)
        limitStr = sql.SQL('LIMIT %s')
        queryArgs.append(limit)

        queryStr = sql.SQL('''
            SELECT img.id, filename, EXTRACT(epoch FROM date_added) AS date_added,
                COALESCE(viewcount, 0) AS viewcount,
                EXTRACT(epoch FROM last_viewed) AS last_viewed,
                COALESCE(num_anno, 0) AS num_anno,
                COALESCE(num_pred, 0) AS num_pred,
                img.isGoldenQuestion
            FROM {id_img} AS img
            FULL OUTER JOIN (
                SELECT image, COUNT(*) AS viewcount, MAX(last_checked) AS last_viewed
                FROM {id_iu}
                GROUP BY image
            ) AS iu
            ON img.id = iu.image
            FULL OUTER JOIN (
                SELECT image, COUNT(*) AS num_anno
                FROM {id_anno}
                GROUP BY image
            ) AS anno
            ON img.id = anno.image
            FULL OUTER JOIN (
                SELECT image, COUNT(*) AS num_pred
                FROM {id_pred}
                GROUP BY image
            ) AS pred
            ON img.id = pred.image
            {filter}
            {order}
            {limit}
        ''').format(id_img=sql.Identifier(project, 'image'),
                    id_iu=sql.Identifier(project, 'image_user'),
                    id_anno=sql.Identifier(project, 'annotation'),
                    id_pred=sql.Identifier(project, 'prediction'),
                    filter=filterStr,
                    order=orderStr,
                    limit=limitStr)

        result = self.dbConnector.execute(queryStr, tuple(queryArgs), 'all')
        for idx in range(len(result)):
            result[idx]['id'] = str(result[idx]['id'])
        return result

    def uploadImages(self,
                     project,
                     images,
                     existingFiles='keepExisting',
                     splitImages=False,
                     splitProperties=None):
        '''
            Receives a dict of files (bottle.py file format),
            verifies their file extension and checks if they
            are loadable by PIL.
            If they are, they are saved to disk in the project's
            image folder, and registered in the database.
            Parameter "existingFiles" can be set as follows:
            - "keepExisting" (default): if an image already exists on
              disk with the same path/file name, the new image will be
              renamed with an underscore and trailing number.
            - "skipExisting": do not save images that already exist on
              disk under the same path/file name.
            - "replaceExisting": overwrite images that exist with the
              same path/file name. Note: in this case all existing anno-
              tations, predictions, and other metadata about those images,
              will be removed from the database.
            
            If "splitImages" is True, the uploaded images will be automati-
            cally divided into patches on a regular grid according to what
            is defined in "splitProperties". For example, the following
            definition:

                splitProperties = {
                    'patchSize': (800, 600),
                    'stride': (400, 300),
                    'tight': True
                }

            would divide the images into patches of size 800x600, with over-
            lap of 50% (denoted by the "stride" being half the "patchSize"),
            and with all patches completely inside the original image (para-
            meter "tight" makes the last patches to the far left and bottom
            of the image being fully inside the original image; they are shif-
            ted if needed).
            Instead of the full images, the patches are stored on disk and re-
            ferenced through the database. The name format for patches is
            "imageName_x_y.jpg", with "imageName" denoting the name of the ori-
            ginal image, and "x" and "y" the left and top position of the patch
            inside the original image.

            Returns image keys for images that were successfully
            saved, and keys and error messages for those that
            were not.
        '''
        imgPaths_valid = []
        imgs_valid = []
        imgs_warn = {}
        imgs_error = {}
        for key in images.keys():
            try:
                nextUpload = images[key]
                nextFileName = nextUpload.raw_filename
                #TODO: check if raw_filename is compatible with uploads made from Windows

                # check if correct file suffix
                _, ext = os.path.splitext(nextFileName)
                if not ext.lower() in valid_image_extensions:
                    raise Exception(f'Invalid file type (*{ext})')

                # check if loadable as image
                cache = io.BytesIO()
                nextUpload.save(cache)
                try:
                    image = Image.open(cache)
                except Exception:
                    raise Exception('File is not a valid image.')

                # prepare image(s) to save to disk
                parent, filename = os.path.split(nextFileName)
                destFolder = os.path.join(
                    self.config.getProperty('FileServer', 'staticfiles_dir'),
                    project, parent)
                os.makedirs(destFolder, exist_ok=True)

                images = []
                filenames = []

                if not splitImages:
                    # upload the single image directly
                    images.append(image)
                    filenames.append(filename)

                else:
                    # split image into patches instead
                    images, coords = split_image(image,
                                                 splitProperties['patchSize'],
                                                 splitProperties['stride'],
                                                 splitProperties['tight'])
                    bareFileName, ext = os.path.splitext(filename)
                    filenames = [
                        f'{bareFileName}_{c[0]}_{c[1]}{ext}' for c in coords
                    ]

                # register and save all the images
                for i in range(len(images)):
                    subImage = images[i]
                    subFilename = filenames[i]

                    absFilePath = os.path.join(destFolder, subFilename)

                    # check if an image with the same name does not already exist
                    newFileName = subFilename
                    fileExists = os.path.exists(absFilePath)
                    if fileExists:
                        if existingFiles == 'keepExisting':
                            # rename new file
                            while (os.path.exists(absFilePath)):
                                # rename file
                                fn, ext = os.path.splitext(newFileName)
                                match = self.countPattern.search(fn)
                                if match is None:
                                    newFileName = fn + '_1' + ext
                                else:
                                    # parse number
                                    number = int(fn[match.span()[0] +
                                                    1:match.span()[1]])
                                    newFileName = fn[:match.span(
                                    )[0]] + '_' + str(number + 1) + ext

                                absFilePath = os.path.join(
                                    destFolder, newFileName)
                                if not os.path.exists(absFilePath):
                                    imgs_warn[
                                        key] = 'An image with name "{}" already exists under given path on disk. Image has been renamed to "{}".'.format(
                                            subFilename, newFileName)

                        elif existingFiles == 'skipExisting':
                            # ignore new file
                            imgs_warn[
                                key] = f'Image "{newFileName}" already exists on disk and has been skipped.'
                            imgs_valid.append(key)
                            imgPaths_valid.append(
                                os.path.join(parent, newFileName))
                            continue

                        elif existingFiles == 'replaceExisting':
                            # overwrite new file; first remove metadata
                            queryStr = sql.SQL('''
                                DELETE FROM {id_iu}
                                WHERE image = (
                                    SELECT id FROM {id_img}
                                    WHERE filename = %s
                                );
                                DELETE FROM {id_anno}
                                WHERE image = (
                                    SELECT id FROM {id_img}
                                    WHERE filename = %s
                                );
                                DELETE FROM {id_pred}
                                WHERE image = (
                                    SELECT id FROM {id_img}
                                    WHERE filename = %s
                                );
                                DELETE FROM {id_img}
                                WHERE filename = %s;
                            ''').format(
                                id_iu=sql.Identifier(project, 'image_user'),
                                id_anno=sql.Identifier(project, 'annotation'),
                                id_pred=sql.Identifier(project, 'prediction'),
                                id_img=sql.Identifier(project, 'image'))
                            self.dbConnector.execute(queryStr,
                                                     tuple([nextFileName] * 4),
                                                     None)

                            # remove file
                            try:
                                os.remove(absFilePath)
                                imgs_warn[key] = 'Image "{}" already existed on disk and has been deleted.\n'.format(newFileName) + \
                                                    'All metadata (views, annotations, predictions) have been removed from the database.'
                            except:
                                imgs_warn[key] = 'Image "{}" already existed on disk but could not be deleted.\n'.format(newFileName) + \
                                                    'All metadata (views, annotations, predictions) have been removed from the database.'

                    # write to disk
                    fileParent, _ = os.path.split(absFilePath)
                    if len(fileParent):
                        os.makedirs(fileParent, exist_ok=True)
                    subImage.save(absFilePath)

                    imgs_valid.append(key)
                    imgPaths_valid.append(os.path.join(parent, newFileName))

            except Exception as e:
                imgs_error[key] = str(e)

        # register valid images in database
        if len(imgPaths_valid):
            queryStr = sql.SQL('''
                INSERT INTO {id_img} (filename)
                VALUES %s
                ON CONFLICT (filename) DO NOTHING;
            ''').format(id_img=sql.Identifier(project, 'image'))
            self.dbConnector.insert(queryStr, [(i, ) for i in imgPaths_valid])

        result = {
            'imgs_valid': imgs_valid,
            'imgPaths_valid': imgPaths_valid,
            'imgs_warn': imgs_warn,
            'imgs_error': imgs_error
        }

        return result

    def scanForImages(self, project):
        '''
            Searches the project image folder on disk for
            files that are valid, but have not (yet) been added
            to the database.
            Returns a list of paths with files.
        '''

        # scan disk for files
        projectFolder = os.path.join(
            self.config.getProperty('FileServer', 'staticfiles_dir'), project)
        if (not os.path.isdir(projectFolder)) and (
                not os.path.islink(projectFolder)):
            # no folder exists for the project (should not happen due to broadcast at project creation)
            return []
        imgs_disk = listDirectory(projectFolder, recursive=True)

        # get all existing file paths from database
        imgs_database = set()
        queryStr = sql.SQL('''
            SELECT filename FROM {id_img};
        ''').format(id_img=sql.Identifier(project, 'image'))
        result = self.dbConnector.execute(queryStr, None, 'all')
        for r in range(len(result)):
            imgs_database.add(result[r]['filename'])

        # filter
        imgs_candidates = imgs_disk.difference(imgs_database)
        return list(imgs_candidates)

    def addExistingImages(self, project, imageList=None):
        '''
            Scans the project folder on the file system
            for images that are physically saved, but not
            (yet) added to the database.
            Adds them to the project's database schema.
            If an imageList iterable is provided, only
            the intersection between identified images on
            disk and in the iterable are added.

            If 'imageList' is a string with contents 'all',
            all untracked images will be added.

            Returns a list of image IDs and file names that
            were eventually added to the project database schema.
        '''
        # get all images on disk that are not in database
        imgs_candidates = self.scanForImages(project)

        if imageList is None or (isinstance(imageList, str)
                                 and imageList.lower() == 'all'):
            imgs_add = imgs_candidates
        else:
            if isinstance(imageList, dict):
                imageList = list(imageList.keys())
            imgs_add = list(set(imgs_candidates).intersection(set(imageList)))

        if not len(imgs_add):
            return 0, []

        # add to database
        queryStr = sql.SQL('''
            INSERT INTO {id_img} (filename)
            VALUES %s;
        ''').format(id_img=sql.Identifier(project, 'image'))
        self.dbConnector.insert(queryStr, tuple([(i, ) for i in imgs_add]))

        # get IDs of newly added images
        queryStr = sql.SQL('''
            SELECT id, filename FROM {id_img}
            WHERE filename IN %s;
        ''').format(id_img=sql.Identifier(project, 'image'))
        result = self.dbConnector.execute(queryStr, (tuple(imgs_add), ), 'all')

        status = (0 if result is not None and len(result) else 1)  #TODO
        return status, result

    def removeImages(self,
                     project,
                     imageList,
                     forceRemove=False,
                     deleteFromDisk=False):
        '''
            Receives an iterable of image IDs and removes them
            from the project database schema, including associated
            user views, annotations, and predictions made.
            Only removes entries if no user views, annotations, and
            predictions exist, or else if "forceRemove" is True.
            If "deleteFromDisk" is True, the image files are also
            deleted from the project directory on the file system.

            Returns a list of images that were deleted.
        '''

        imageList = tuple([(UUID(i), ) for i in imageList])

        queryArgs = []
        deleteArgs = []
        if forceRemove:
            queryStr = sql.SQL('''
                SELECT id, filename
                FROM {id_img}
                WHERE id IN %s;
            ''').format(id_img=sql.Identifier(project, 'image'))
            queryArgs = tuple([imageList])

            deleteStr = sql.SQL('''
                DELETE FROM {id_iu} WHERE image IN %s;
                DELETE FROM {id_anno} WHERE image IN %s;
                DELETE FROM {id_pred} WHERE image IN %s;
                DELETE FROM {id_img} WHERE id IN %s;
            ''').format(id_iu=sql.Identifier(project, 'image_user'),
                        id_anno=sql.Identifier(project, 'annotation'),
                        id_pred=sql.Identifier(project, 'prediction'),
                        id_img=sql.Identifier(project, 'image'))
            deleteArgs = tuple([imageList] * 4)

        else:
            queryStr = sql.SQL('''
                SELECT id, filename
                FROM {id_img}
                WHERE id IN %s
                AND id NOT IN (
                    SELECT image FROM {id_iu}
                    WHERE image IN %s
                    UNION ALL
                    SELECT image FROM {id_anno}
                    WHERE image IN %s
                    UNION ALL
                    SELECT image FROM {id_pred}
                    WHERE image IN %s
                );
            ''').format(id_img=sql.Identifier(project, 'image'),
                        id_iu=sql.Identifier(project, 'image_user'),
                        id_anno=sql.Identifier(project, 'annotation'),
                        id_pred=sql.Identifier(project, 'prediction'))
            queryArgs = tuple([imageList] * 4)

            deleteStr = sql.SQL('''
                DELETE FROM {id_img}
                WHERE id IN %s
                AND id NOT IN (
                    SELECT image FROM {id_iu}
                    WHERE image IN %s
                    UNION ALL
                    SELECT image FROM {id_anno}
                    WHERE image IN %s
                    UNION ALL
                    SELECT image FROM {id_pred}
                    WHERE image IN %s
                );
            ''').format(id_img=sql.Identifier(project, 'image'),
                        id_iu=sql.Identifier(project, 'image_user'),
                        id_anno=sql.Identifier(project, 'annotation'),
                        id_pred=sql.Identifier(project, 'prediction'))
            deleteArgs = tuple([imageList] * 4)

        # retrieve images to be deleted
        imgs_del = self.dbConnector.execute(queryStr, queryArgs, 'all')

        if imgs_del is None:
            imgs_del = []

        if len(imgs_del):
            # delete images
            self.dbConnector.execute(deleteStr, deleteArgs, None)

            if deleteFromDisk:
                projectFolder = os.path.join(
                    self.config.getProperty('FileServer', 'staticfiles_dir'),
                    project)
                if os.path.isdir(projectFolder) or os.path.islink(
                        projectFolder):
                    for i in imgs_del:
                        filePath = os.path.join(projectFolder, i['filename'])
                        if os.path.isfile(filePath):
                            os.remove(filePath)

            # convert UUID
            for idx in range(len(imgs_del)):
                imgs_del[idx]['id'] = str(imgs_del[idx]['id'])

        return imgs_del

    def removeOrphanedImages(self, project):
        '''
            Queries the project's image entries in the database and retrieves
            entries for which no image can be found on disk anymore. Removes
            and returns those entries and all associated (meta-) data from the
            database.
        '''
        imgs_DB = self.dbConnector.execute(
            sql.SQL('''
            SELECT id, filename FROM {id_img};
        ''').format(id_img=sql.Identifier(project, 'image')), None, 'all')

        projectFolder = os.path.join(
            self.config.getProperty('FileServer', 'staticfiles_dir'), project)
        if (not os.path.isdir(projectFolder)) and (
                not os.path.islink(projectFolder)):
            return []
        imgs_disk = listDirectory(projectFolder, recursive=True)
        imgs_disk = set(imgs_disk)

        # get orphaned images
        imgs_orphaned = []
        for i in imgs_DB:
            if i['filename'] not in imgs_disk:
                imgs_orphaned.append(i['id'])
        # imgs_orphaned = list(set(imgs_DB).difference(imgs_disk))
        if not len(imgs_orphaned):
            return []

        # remove
        self.dbConnector.execute(
            sql.SQL('''
            DELETE FROM {id_iu} WHERE image IN %s;
            DELETE FROM {id_anno} WHERE image IN %s;
            DELETE FROM {id_pred} WHERE image IN %s;
            DELETE FROM {id_img} WHERE id IN %s;
        ''').format(id_iu=sql.Identifier(project, 'image_user'),
                    id_anno=sql.Identifier(project, 'annotation'),
                    id_pred=sql.Identifier(project, 'prediction'),
                    id_img=sql.Identifier(project, 'image')),
            tuple([tuple(imgs_orphaned)] * 4), None)

        return imgs_orphaned

    def prepareDataDownload(self,
                            project,
                            dataType='annotation',
                            userList=None,
                            dateRange=None,
                            extraFields=None,
                            segmaskFilenameOptions=None,
                            segmaskEncoding='rgb'):
        '''
            Polls the database for project data according to the
            specified restrictions:
            - dataType: "annotation" or "prediction"
            - userList: for type "annotation": None (all users) or
                        an iterable of user names
            - dateRange: None (all dates) or two values for a mini-
                         mum and maximum timestamp
            - extraFields: None (no field) or dict of keywords and bools for
                           additional fields (e.g. browser meta) to be queried.
            - segmaskFilenameOptions: customization parameters for segmentation
                                      mask images' file names.
            - segmaskEncoding: encoding of the segmentation mask pixel
                               values ("rgb" or "indexed")
            
            Creates a file in this machine's temporary directory
            and returns the file name to it.
            Note that in some cases (esp. for semantic segmentation),
            the number of queryable entries may be limited due to
            file size and free disk space restrictions. An upper cei-
            ling is specified in the configuration *.ini file ('TODO')
        '''

        now = datetime.now(tz=pytz.utc)

        # argument check
        if userList is None:
            userList = []
        elif isinstance(userList, str):
            userList = [userList]
        if dateRange is None:
            dateRange = []
        elif len(dateRange) == 1:
            dateRange = [dateRange, now]

        if extraFields is None or not isinstance(extraFields, dict):
            extraFields = {'meta': False}
        else:
            if not 'meta' in extraFields or not isinstance(
                    extraFields['meta'], bool):
                extraFields['meta'] = False

        if segmaskFilenameOptions is None:
            segmaskFilenameOptions = {
                'baseName': 'filename',
                'prefix': '',
                'suffix': ''
            }
        else:
            if not 'baseName' in segmaskFilenameOptions or \
                segmaskFilenameOptions['baseName'] not in ('filename', 'id'):
                segmaskFilenameOptions['baseName'] = 'filename'
            try:
                segmaskFilenameOptions['prefix'] = str(
                    segmaskFilenameOptions['prefix'])
            except:
                segmaskFilenameOptions['prefix'] = ''
            try:
                segmaskFilenameOptions['suffix'] = str(
                    segmaskFilenameOptions['suffix'])
            except:
                segmaskFilenameOptions['suffix'] = ''

            for char in self.FILENAMES_PROHIBITED_CHARS:
                segmaskFilenameOptions['prefix'] = segmaskFilenameOptions[
                    'prefix'].replace(char, '_')
                segmaskFilenameOptions['suffix'] = segmaskFilenameOptions[
                    'suffix'].replace(char, '_')

        # check metadata type: need to deal with segmentation masks separately
        if dataType == 'annotation':
            metaField = 'annotationtype'
        elif dataType == 'prediction':
            metaField = 'predictiontype'
        else:
            raise Exception('Invalid dataType specified ({})'.format(dataType))
        metaType = self.dbConnector.execute(
            '''
                SELECT {} FROM aide_admin.project
                WHERE shortname = %s;
            '''.format(metaField), (project, ), 1)[0][metaField]

        if metaType.lower() == 'segmentationmasks':
            is_segmentation = True
            fileExtension = '.zip'

            # create indexed color palette for segmentation masks
            if segmaskEncoding == 'indexed':
                try:
                    indexedColors = []
                    labelClasses = self.dbConnector.execute(
                        sql.SQL('''
                            SELECT idx, color FROM {id_lc} ORDER BY idx ASC;
                        ''').format(
                            id_lc=sql.Identifier(project, 'labelclass')), None,
                        'all')
                    currentIndex = 1
                    for lc in labelClasses:
                        if lc['idx'] == 0:
                            # background class
                            continue
                        while currentIndex < lc['idx']:
                            # gaps in label classes; fill with zeros
                            indexedColors.extend([0, 0, 0])
                            currentIndex += 1
                        color = lc['color']
                        if color is None:
                            # no color specified; add from defaults
                            #TODO
                            indexedColors.extend([0, 0, 0])
                        else:
                            # convert to RGB format
                            indexedColors.extend(helpers.hexToRGB(color))

                except:
                    # an error occurred; don't convert segmentation mask to indexed colors
                    indexedColors = None
            else:
                indexedColors = None

        else:
            is_segmentation = False
            fileExtension = '.txt'  #TODO: support JSON?

        # prepare output file
        filename = 'aide_query_{}'.format(
            now.strftime('%Y-%m-%d_%H-%M-%S')) + fileExtension
        destPath = os.path.join(self.tempDir, 'aide/downloadRequests', project)
        os.makedirs(destPath, exist_ok=True)
        destPath = os.path.join(destPath, filename)

        # generate query
        queryArgs = []
        tableID = sql.Identifier(project, dataType)
        userStr = sql.SQL('')
        iuStr = sql.SQL('')
        dateStr = sql.SQL('')
        queryFields = [
            'filename',
            'isGoldenQuestion',
            'date_image_added',
            'last_requested_image',
            'image_corrupt'  # default image fields
        ]
        if dataType == 'annotation':
            iuStr = sql.SQL('''
                JOIN (SELECT image AS iu_image, username AS iu_username, viewcount, last_checked, last_time_required FROM {id_iu}) AS iu
                ON t.image = iu.iu_image
                AND t.username = iu.iu_username
            ''').format(id_iu=sql.Identifier(project, 'image_user'))
            if len(userList):
                userStr = sql.SQL('WHERE username IN %s')
                queryArgs.append(tuple(userList))

            queryFields.extend(
                getattr(QueryStrings_annotation, metaType).value)
            queryFields.extend([
                'username', 'viewcount', 'last_checked', 'last_time_required'
            ])  #TODO: make customizable

        else:
            queryFields.extend(
                getattr(QueryStrings_prediction, metaType).value)

        if len(dateRange):
            if len(userStr.string):
                dateStr = sql.SQL(
                    ' AND timecreated >= to_timestamp(%s) AND timecreated <= to_timestamp(%s)'
                )
            else:
                dateStr = sql.SQL(
                    'WHERE timecreated >= to_timestamp(%s) AND timecreated <= to_timestamp(%s)'
                )
            queryArgs.extend(dateRange)

        if not is_segmentation:
            # join label classes
            lcStr = sql.SQL('''
                JOIN (SELECT id AS lcID, name AS labelclass_name, idx AS labelclass_index
                    FROM {id_lc}
                ) AS lc
                ON label = lc.lcID
            ''').format(id_lc=sql.Identifier(project, 'labelclass'))
            queryFields.extend(['labelclass_name', 'labelclass_index'])
        else:
            lcStr = sql.SQL('')

        # remove redundant query fields
        queryFields = set(queryFields)
        for key in extraFields.keys():
            if not extraFields[key]:
                queryFields.remove(key)
        queryFields = list(queryFields)

        queryStr = sql.SQL('''
            SELECT * FROM {tableID} AS t
            JOIN (
                SELECT id AS imgID, filename, isGoldenQuestion, date_added AS date_image_added, last_requested AS last_requested_image, corrupt AS image_corrupt
                FROM {id_img}
            ) AS img ON t.image = img.imgID
            {lcStr}
            {iuStr}
            {userStr}
            {dateStr}
        ''').format(tableID=tableID,
                    id_img=sql.Identifier(project, 'image'),
                    lcStr=lcStr,
                    iuStr=iuStr,
                    userStr=userStr,
                    dateStr=dateStr)

        # query and process data
        if is_segmentation:
            mainFile = zipfile.ZipFile(destPath, 'w', zipfile.ZIP_DEFLATED)
        else:
            mainFile = open(destPath, 'w')
        metaStr = '; '.join(queryFields) + '\n'

        with self.dbConnector.execute_cursor(queryStr,
                                             tuple(queryArgs)) as cursor:
            while True:
                b = cursor.fetchone()
                if b is None:
                    break

                if is_segmentation:
                    # convert and store segmentation mask separately
                    segmask_filename = 'segmentation_masks/'

                    if segmaskFilenameOptions['baseName'] == 'id':
                        innerFilename = b['image']
                        parent = ''
                    else:
                        innerFilename = b['filename']
                        parent, innerFilename = os.path.split(innerFilename)
                    finalFilename = os.path.join(
                        parent,
                        segmaskFilenameOptions['prefix'] + innerFilename +
                        segmaskFilenameOptions['suffix'] + '.tif')
                    segmask_filename += finalFilename

                    segmask = base64ToImage(b['segmentationmask'], b['width'],
                                            b['height'])

                    if indexedColors is not None and len(indexedColors) > 0:
                        # convert to indexed color and add color palette from label classes
                        segmask = segmask.convert('RGB').convert(
                            'P', palette=Image.ADAPTIVE, colors=3)
                        segmask.putpalette(indexedColors)

                    # save
                    bio = io.BytesIO()
                    segmask.save(bio, 'TIFF')
                    mainFile.writestr(segmask_filename, bio.getvalue())

                # store metadata
                metaLine = ''
                for field in queryFields:
                    if field.lower() == 'segmentationmask':
                        continue
                    metaLine += '{}; '.format(b[field.lower()])
                metaStr += metaLine + '\n'

        if is_segmentation:
            mainFile.writestr('query.txt', metaStr)
        else:
            mainFile.write(metaStr)

        if is_segmentation:
            # append separate text file for label classes
            labelclassQuery = sql.SQL('''
                SELECT id, name, color, labelclassgroup, idx AS labelclass_index
                FROM {id_lc};
            ''').format(id_lc=sql.Identifier(project, 'labelclass'))
            result = self.dbConnector.execute(labelclassQuery, None, 'all')
            lcStr = 'id,name,color,labelclassgroup,labelclass_index\n'
            for r in result:
                lcStr += '{},{},{},{},{}\n'.format(r['id'], r['name'],
                                                   r['color'],
                                                   r['labelclassgroup'],
                                                   r['labelclass_index'])
            mainFile.writestr('labelclasses.csv', lcStr)

        mainFile.close()

        return filename

    def watchImageFolders(self):
        '''
            Queries all projects that have the image folder watch functionality
            enabled and updates the projects, one by one, with the latest image
            changes.
        '''
        projects = self.dbConnector.execute(
            '''
                SELECT shortname, watch_folder_remove_missing_enabled
                FROM aide_admin.project
                WHERE watch_folder_enabled IS TRUE;
            ''', None, 'all')

        for p in projects:
            pName = p['shortname']

            # add new images
            _, imgs_added = self.addExistingImages(pName, None)

            # remove orphaned images (if enabled)
            if p['watch_folder_remove_missing_enabled']:
                imgs_orphaned = self.removeOrphanedImages(pName)
                if len(imgs_added) or len(imgs_orphaned):
                    print(
                        f'[Project {pName}] {len(imgs_added)} new images found and added, {len(imgs_orphaned)} orphaned images removed from database.'
                    )

            elif len(imgs_added):
                print(
                    f'[Project {pName}] {len(imgs_added)} new images found and added.'
                )

    def deleteProject(self, project, deleteFiles=False):
        '''
            Irreproducibly deletes a project, including all data and metadata, from the database.
            If "deleteFiles" is True, then any data on disk (images, etc.) are also deleted.

            This cannot be undone.
        '''
        print(f'Deleting project with shortname "{project}"...')

        # remove database entries
        print('\tRemoving database entries...')
        self.dbConnector.execute(
            '''
            DELETE FROM aide_admin.authentication
            WHERE project = %s;
            DELETE FROM aide_admin.project
            WHERE shortname = %s;
        ''', (
                project,
                project,
            ), None
        )  # already done by DataAdministration.middleware, but we do it again to be sure

        self.dbConnector.execute('''
            DROP SCHEMA IF EXISTS {} CASCADE;
        '''.format(project), None, None)  #TODO: Identifier?

        if deleteFiles:
            print('\tRemoving files...')

            messages = []

            def _onError(function, path, excinfo):
                #TODO
                from celery.contrib import rdb
                rdb.set_trace()
                messages.append({
                    'function': function,
                    'path': path,
                    'excinfo': excinfo
                })

            try:
                shutil.rmtree(os.path.join(
                    self.config.getProperty('FileServer', 'staticfiles_dir'),
                    project),
                              onerror=_onError)
            except Exception as e:
                messages.append(str(e))

            return messages

        return 0
コード例 #4
0
class DBMiddleware():
    def __init__(self, config):
        self.config = config
        self.dbConnector = Database(config)

        self.project_immutables = {
        }  # project settings that cannot be changed (project shorthand -> {settings})

        self._fetchProjectSettings()
        self.sqlBuilder = SQLStringBuilder()
        self.annoParser = AnnotationParser()

    def _fetchProjectSettings(self):
        # AI controller URI
        aiControllerURI = self.config.getProperty('Server', 'aiController_uri')
        if aiControllerURI is None or aiControllerURI.strip() == '':
            # no AI backend configured
            aiControllerURI = None

        # global, project-independent settings
        self.globalSettings = {
            'indexURI':
            self.config.getProperty('Server',
                                    'index_uri',
                                    type=str,
                                    fallback='/'),
            'dataServerURI':
            self.config.getProperty('Server', 'dataServer_uri'),
            'aiControllerURI':
            aiControllerURI
        }

        # default styles
        try:
            # check if custom default styles are provided
            self.defaultStyles = json.load(
                open('config/default_ui_settings.json', 'r'))
        except:
            # resort to built-in styles
            self.defaultStyles = json.load(
                open(
                    'modules/ProjectAdministration/static/json/default_ui_settings.json',
                    'r'))

    def _assemble_annotations(self, project, cursor, hideGoldenQuestionInfo):
        response = {}
        while True:
            b = cursor.fetchone()
            if b is None:
                break

            imgID = str(b['image'])
            if not imgID in response:
                response[imgID] = {
                    'fileName': b['filename'],
                    'predictions': {},
                    'annotations': {},
                    'last_checked': None
                }
            viewcount = b['viewcount']
            if viewcount is not None:
                response[imgID]['viewcount'] = viewcount
            last_checked = b['last_checked']
            if last_checked is not None:
                if response[imgID]['last_checked'] is None:
                    response[imgID]['last_checked'] = last_checked
                else:
                    response[imgID]['last_checked'] = max(
                        response[imgID]['last_checked'], last_checked)

            if not hideGoldenQuestionInfo:
                response[imgID]['isGoldenQuestion'] = b['isgoldenquestion']

            # parse annotations and predictions
            entryID = str(b['id'])
            if b['ctype'] is not None:
                colnames = self.sqlBuilder.getColnames(
                    self.project_immutables[project]['annotationType'],
                    self.project_immutables[project]['predictionType'],
                    b['ctype'])
                entry = {}
                for c in colnames:
                    value = b[c]
                    if isinstance(value, datetime):
                        value = value.timestamp()
                    elif isinstance(value, UUID):
                        value = str(value)
                    entry[c] = value

                if b['ctype'] == 'annotation':
                    response[imgID]['annotations'][entryID] = entry
                elif b['ctype'] == 'prediction':
                    response[imgID]['predictions'][entryID] = entry

        return response

    def _set_images_requested(self, project, imageIDs):
        '''
            Sets column "last_requested" of relation "image"
            to the current date. This is done during image
            querying to signal that an image has been requested,
            but not (yet) viewed.
        '''
        # prepare insertion values
        now = datetime.now(tz=pytz.utc)
        vals = []
        for key in imageIDs:
            vals.append(key)
        if len(vals):
            queryStr = sql.SQL('''
                UPDATE {id_img}
                SET last_requested = %s
                WHERE id IN %s;
            ''').format(id_img=sql.Identifier(project, 'image'))
            self.dbConnector.execute(queryStr, (
                now,
                tuple(vals),
            ), None)

    def _get_sample_metadata(self, metaType):
        '''
            Returns a dummy annotation or prediction for the sample
            image in the "exampleData" folder, depending on the "metaType"
            specified (i.e., labels, points, boundingBoxes, or segmentationMasks).
        '''
        if metaType == 'labels':
            return {
                'id': '00000000-0000-0000-0000-000000000000',
                'label': '00000000-0000-0000-0000-000000000000',
                'confidence': 1.0,
                'priority': 1.0,
                'viewcount': None
            }
        elif metaType == 'points' or metaType == 'boundingBoxes':
            return {
                'id': '00000000-0000-0000-0000-000000000000',
                'label': '00000000-0000-0000-0000-000000000000',
                'x': 0.542959427207637,
                'y': 0.5322069489713102,
                'width': 0.6133651551312653,
                'height': 0.7407598263401316,
                'confidence': 1.0,
                'priority': 1.0,
                'viewcount': None
            }
        elif metaType == 'segmentationMasks':
            # read segmentation mask from disk
            segmask = Image.open(
                'modules/LabelUI/static/exampleData/sample_segmentationMask.tif'
            )
            segmask, width, height = helpers.imageToBase64(segmask)
            return {
                'id': '00000000-0000-0000-0000-000000000000',
                'width': width,
                'height': height,
                'segmentationmask': segmask,
                'confidence': 1.0,
                'priority': 1.0,
                'viewcount': None
            }
        else:
            return {}

    def get_project_immutables(self, project):
        if project not in self.project_immutables:
            queryStr = 'SELECT annotationType, predictionType, demoMode FROM aide_admin.project WHERE shortname = %s;'
            result = self.dbConnector.execute(queryStr, (project, ), 1)
            if result and len(result):
                self.project_immutables[project] = {
                    'annotationType': result[0]['annotationtype'],
                    'predictionType': result[0]['predictiontype'],
                    'demoMode': helpers.checkDemoMode(project,
                                                      self.dbConnector)
                }
            else:
                return None
        return self.project_immutables[project]

    def get_dynamic_project_settings(self, project):
        queryStr = 'SELECT ui_settings FROM aide_admin.project WHERE shortname = %s;'
        result = self.dbConnector.execute(queryStr, (project, ), 1)
        result = json.loads(result[0]['ui_settings'])

        # complete styles with defaults where necessary (may be required for project that got upgraded from v1)
        result = helpers.check_args(result, self.defaultStyles)

        return result

    def getProjectSettings(self, project):
        '''
            Queries the database for general project-specific metadata, such as:
            - Classes: names, indices, default colors
            - Annotation type: one of {class labels, positions, bboxes}
        '''
        # publicly available info from DB
        projSettings = self.getProjectInfo(project)

        # label classes
        projSettings['classes'] = self.getClassDefinitions(project)

        # static and dynamic project settings and properties from configuration file
        projSettings = {
            **projSettings,
            **self.get_project_immutables(project),
            **self.get_dynamic_project_settings(project),
            **self.globalSettings
        }

        # append project shorthand to AIController URI
        if 'aiControllerURI' in projSettings and projSettings[
                'aiControllerURI'] is not None and len(
                    projSettings['aiControllerURI']):
            projSettings['aiControllerURI'] = os.path.join(
                projSettings['aiControllerURI'], project) + '/'

        return projSettings

    def getProjectInfo(self, project):
        '''
            Returns safe, shareable information about the project
            (i.e., users don't need to be part of the project to see these data).
        '''
        queryStr = '''
            SELECT shortname, name, description, demoMode,
            interface_enabled, archived, ai_model_enabled,
            ai_model_library, ai_alcriterion_library,
            segmentation_ignore_unlabeled
            FROM aide_admin.project
            WHERE shortname = %s
        '''
        result = self.dbConnector.execute(queryStr, (project, ), 1)[0]

        # provide flag if AI model is available
        aiModelAvailable = all([
            result['ai_model_enabled'], result['ai_model_library'] is not None
            and len(result['ai_model_library']),
            result['ai_alcriterion_library'] is not None
            and len(result['ai_alcriterion_library'])
        ])

        return {
            'projectShortname':
            result['shortname'],
            'projectName':
            result['name'],
            'projectDescription':
            result['description'],
            'demoMode':
            result['demomode'],
            'interface_enabled':
            result['interface_enabled'] and not result['archived'],
            'ai_model_available':
            aiModelAvailable,
            'segmentation_ignore_unlabeled':
            result['segmentation_ignore_unlabeled']
        }

    def getClassDefinitions(self, project, showHidden=False):
        '''
            Returns a dictionary with entries for all classes in the project.
        '''

        # query data
        if showHidden:
            hiddenSpec = ''
        else:
            hiddenSpec = 'WHERE hidden IS false'
        queryStr = sql.SQL('''
            SELECT 'group' AS type, id, NULL as idx, name, color, parent, NULL AS keystroke, NULL AS hidden FROM {}
            UNION ALL
            SELECT 'class' AS type, id, idx, name, color, labelclassgroup, keystroke, hidden FROM {}
            {};
            ''').format(sql.Identifier(project, 'labelclassgroup'),
                        sql.Identifier(project, 'labelclass'),
                        sql.SQL(hiddenSpec))

        classData = self.dbConnector.execute(queryStr, None, 'all')

        # assemble entries first
        allEntries = {}
        numClasses = 0
        for cl in classData:
            id = str(cl['id'])
            entry = {
                'id': id,
                'name': cl['name'],
                'color': cl['color'],
                'parent':
                str(cl['parent']) if cl['parent'] is not None else None,
                'hidden': cl['hidden']
            }
            if cl['type'] == 'group':
                entry['entries'] = {}
            else:
                entry['index'] = cl['idx']
                entry['keystroke'] = cl['keystroke']
                numClasses += 1
            allEntries[id] = entry

        # transform into tree
        def _find_parent(tree, parentID):
            if parentID is None:
                return None
            elif 'id' in tree and tree['id'] == parentID:
                return tree
            elif 'entries' in tree:
                for ek in tree['entries'].keys():
                    rv = _find_parent(tree['entries'][ek], parentID)
                    if rv is not None:
                        return rv
                return None
            else:
                return None

        allEntries = {'entries': allEntries}
        for key in list(allEntries['entries'].keys()):
            entry = allEntries['entries'][key]
            parentID = entry['parent']
            del entry['parent']

            if parentID is None:
                # entry or group with no parent: append to root directly
                allEntries['entries'][key] = entry

            else:
                # move item
                parent = _find_parent(allEntries, parentID)
                parent['entries'][key] = entry
                del allEntries['entries'][key]

        allEntries['numClasses'] = numClasses
        return allEntries

    def getBatch_fixed(self,
                       project,
                       username,
                       data,
                       hideGoldenQuestionInfo=True):
        '''
            Returns entries from the database based on the list of data entry identifiers specified.
        '''

        if not len(data):
            return {'entries': {}}

        # query
        projImmutables = self.get_project_immutables(project)
        queryStr = self.sqlBuilder.getFixedImagesQueryString(
            project, projImmutables['annotationType'],
            projImmutables['predictionType'], projImmutables['demoMode'])

        # parse results
        queryVals = (
            tuple(UUID(d) for d in data),
            username,
            username,
        )
        if projImmutables['demoMode']:
            queryVals = (tuple(UUID(d) for d in data), )

        with self.dbConnector.execute_cursor(queryStr, queryVals) as cursor:
            try:
                response = self._assemble_annotations(project, cursor,
                                                      hideGoldenQuestionInfo)
                # self.dbConnector.conn.commit()
            except Exception as e:
                print(e)
                # self.dbConnector.conn.rollback()
            finally:
                pass
                # cursor.close()

        # mark images as requested
        self._set_images_requested(project, response)

        return {'entries': response}

    def getBatch_auto(self,
                      project,
                      username,
                      order='unlabeled',
                      subset='default',
                      limit=None,
                      hideGoldenQuestionInfo=True):
        '''
            TODO: description
        '''
        # query
        projImmutables = self.get_project_immutables(project)
        queryStr = self.sqlBuilder.getNextBatchQueryString(
            project, projImmutables['annotationType'],
            projImmutables['predictionType'], order, subset,
            projImmutables['demoMode'])

        # limit (TODO: make 128 a hyperparameter)
        if limit is None:
            limit = 128
        else:
            limit = min(int(limit), 128)

        # parse results
        queryVals = (
            username,
            username,
            limit,
            username,
        )
        if projImmutables[
                'demoMode']:  #TODO: demoMode can now change dynamically
            queryVals = (limit, )

        with self.dbConnector.execute_cursor(queryStr, queryVals) as cursor:
            response = self._assemble_annotations(project, cursor,
                                                  hideGoldenQuestionInfo)

        # mark images as requested
        self._set_images_requested(project, response)

        return {'entries': response}

    def getBatch_timeRange(self,
                           project,
                           minTimestamp,
                           maxTimestamp,
                           userList,
                           skipEmptyImages=False,
                           limit=None,
                           goldenQuestionsOnly=False,
                           hideGoldenQuestionInfo=True):
        '''
            Returns images that have been annotated within the given time range and/or
            by the given user(s). All arguments are optional.
            Useful for reviewing existing annotations.
        '''
        # query string
        projImmutables = self.get_project_immutables(project)
        queryStr = self.sqlBuilder.getDateQueryString(
            project, projImmutables['annotationType'], minTimestamp,
            maxTimestamp, userList, skipEmptyImages, goldenQuestionsOnly)

        # check validity and provide arguments
        queryVals = []
        if userList is not None:
            queryVals.append(tuple(userList))
        if minTimestamp is not None:
            queryVals.append(minTimestamp)
        if maxTimestamp is not None:
            queryVals.append(maxTimestamp)
        if skipEmptyImages and userList is not None:
            queryVals.append(tuple(userList))

        # limit (TODO: make 128 a hyperparameter)
        if limit is None:
            limit = 128
        else:
            limit = min(int(limit), 128)
        queryVals.append(limit)

        if userList is not None:
            queryVals.append(tuple(userList))

        # query and parse results
        with self.dbConnector.execute_cursor(queryStr,
                                             tuple(queryVals)) as cursor:
            try:
                response = self._assemble_annotations(project, cursor,
                                                      hideGoldenQuestionInfo)
                # self.dbConnector.conn.commit()
            except Exception as e:
                print(e)
                # self.dbConnector.conn.rollback()
            finally:
                pass
                # cursor.close()

        # # mark images as requested
        # self._set_images_requested(project, response)

        return {'entries': response}

    def get_timeRange(self,
                      project,
                      userList,
                      skipEmptyImages=False,
                      goldenQuestionsOnly=False):
        '''
            Returns two timestamps denoting the temporal limits within which
            images have been viewed by the users provided in the userList.
            Arguments:
            - userList: string (single user name) or list of strings (multiple).
                        Can also be None; in this case all annotations will be
                        checked.
            - skipEmptyImages: if True, only images that contain at least one
                               annotation will be considered.
            - goldenQuestionsOnly: if True, only images flagged as golden questions
                                   will be shown.
        '''
        # query string
        queryStr = self.sqlBuilder.getTimeRangeQueryString(
            project, userList, skipEmptyImages, goldenQuestionsOnly)

        arguments = (None if userList is None else tuple(userList))
        result = self.dbConnector.execute(queryStr, (arguments, ), numReturn=1)

        if result is not None and len(result):
            return {
                'minTimestamp': result[0]['mintimestamp'],
                'maxTimestamp': result[0]['maxtimestamp'],
            }
        else:
            return {'error': 'no annotations made'}

    def get_sampleData(self, project):
        '''
            Returns a sample image from the project, with annotations
            (from one of the admins) and predictions.
            If no image, no annotations, and/or no predictions are
            available, a built-in default is returned instead.
        '''
        projImmutables = self.get_project_immutables(project)
        queryStr = self.sqlBuilder.getSampleDataQueryString(
            project, projImmutables['annotationType'],
            projImmutables['predictionType'])

        # query and parse results
        response = None
        with self.dbConnector.execute_cursor(queryStr, None) as cursor:
            try:
                response = self._assemble_annotations(project, cursor, True)
            except:
                pass

        if response is None or not len(response):
            # no valid data found for project; fall back to sample data
            response = {
                '00000000-0000-0000-0000-000000000000': {
                    'fileName':
                    '/static/interface/exampleData/sample_image.jpg',
                    'viewcount': 1,
                    'annotations': {
                        '00000000-0000-0000-0000-000000000000':
                        self._get_sample_metadata(
                            projImmutables['annotationType'])
                    },
                    'predictions': {
                        '00000000-0000-0000-0000-000000000000':
                        self._get_sample_metadata(
                            projImmutables['predictionType'])
                    },
                    'last_checked': None,
                    'isGoldenQuestion': True
                }
            }
        return response

    def submitAnnotations(self, project, username, submissions):
        '''
            Sends user-provided annotations to the database.
        '''
        projImmutables = self.get_project_immutables(project)
        if projImmutables['demoMode']:
            return 1

        # assemble values
        colnames = getattr(QueryStrings_annotation,
                           projImmutables['annotationType']).value
        values_insert = []
        values_update = []

        meta = (None if not 'meta' in submissions else json.dumps(
            submissions['meta']))

        # for deletion: remove all annotations whose image ID matches but whose annotation ID is not among the submitted ones
        ids = []

        viewcountValues = []
        for imageKey in submissions['entries']:
            entry = submissions['entries'][imageKey]

            try:
                lastChecked = entry['timeCreated']
                lastTimeRequired = entry['timeRequired']
                if lastTimeRequired is None: lastTimeRequired = 0
            except:
                lastChecked = datetime.now(tz=pytz.utc)
                lastTimeRequired = 0

            try:
                numInteractions = int(entry['numInteractions'])
            except:
                numInteractions = 0

            if 'annotations' in entry and len(entry['annotations']):
                for annotation in entry['annotations']:
                    # assemble annotation values
                    annotationTokens = self.annoParser.parseAnnotation(
                        annotation)
                    annoValues = []
                    for cname in colnames:
                        if cname == 'id':
                            if cname in annotationTokens:
                                # cast and only append id if the annotation is an existing one
                                annoValues.append(UUID(
                                    annotationTokens[cname]))
                                ids.append(UUID(annotationTokens[cname]))
                        elif cname == 'image':
                            annoValues.append(UUID(imageKey))
                        elif cname == 'label' and annotationTokens[
                                cname] is not None:
                            annoValues.append(UUID(annotationTokens[cname]))
                        elif cname == 'timeCreated':
                            try:
                                annoValues.append(
                                    dateutil.parser.parse(
                                        annotationTokens[cname]))
                            except:
                                annoValues.append(datetime.now(tz=pytz.utc))
                        elif cname == 'timeRequired':
                            timeReq = annotationTokens[cname]
                            if timeReq is None: timeReq = 0
                            annoValues.append(timeReq)
                        elif cname == 'username':
                            annoValues.append(username)
                        elif cname in annotationTokens:
                            annoValues.append(annotationTokens[cname])
                        elif cname == 'unsure':
                            if 'unsure' in annotationTokens and annotationTokens[
                                    'unsure'] is not None:
                                annoValues.append(annotationTokens[cname])
                            else:
                                annoValues.append(False)
                        elif cname == 'meta':
                            annoValues.append(meta)
                        else:
                            annoValues.append(None)
                    if 'id' in annotationTokens:
                        # existing annotation; update
                        values_update.append(tuple(annoValues))
                    else:
                        # new annotation
                        values_insert.append(tuple(annoValues))

            viewcountValues.append(
                (username, imageKey, 1, lastChecked, lastChecked,
                 lastTimeRequired, lastTimeRequired, numInteractions, meta))

        # delete all annotations that are not in submitted batch
        imageKeys = list(UUID(k) for k in submissions['entries'])
        if len(imageKeys):
            if len(ids):
                queryStr = sql.SQL('''
                    DELETE FROM {id_anno} WHERE username = %s AND id IN (
                        SELECT idQuery.id FROM (
                            SELECT * FROM {id_anno} WHERE id NOT IN %s
                        ) AS idQuery
                        JOIN (
                            SELECT * FROM {id_anno} WHERE image IN %s
                        ) AS imageQuery ON idQuery.id = imageQuery.id);
                ''').format(id_anno=sql.Identifier(project, 'annotation'))
                self.dbConnector.execute(queryStr, (
                    username,
                    tuple(ids),
                    tuple(imageKeys),
                ))
            else:
                # no annotations submitted; delete all annotations submitted before
                queryStr = sql.SQL('''
                    DELETE FROM {id_anno} WHERE username = %s AND image IN %s;
                ''').format(id_anno=sql.Identifier(project, 'annotation'))
                self.dbConnector.execute(queryStr, (
                    username,
                    tuple(imageKeys),
                ))

        # insert new annotations
        if len(values_insert):
            queryStr = sql.SQL('''
                INSERT INTO {id_anno} ({cols})
                VALUES %s ;
            ''').format(
                id_anno=sql.Identifier(project, 'annotation'),
                cols=sql.SQL(', ').join([sql.SQL(c) for c in colnames[1:]
                                         ])  # skip 'id' column
            )
            self.dbConnector.insert(queryStr, values_insert)

        # update existing annotations
        if len(values_update):

            updateCols = []
            for col in colnames:
                if col == 'label':
                    updateCols.append(sql.SQL('label = UUID(e.label)'))
                elif col == 'timeRequired':
                    # we sum the required times together
                    updateCols.append(
                        sql.SQL(
                            'timeRequired = COALESCE(a.timeRequired,0) + COALESCE(e.timeRequired,0)'
                        ))
                else:
                    updateCols.append(
                        sql.SQL('{col} = e.{col}').format(col=sql.SQL(col)))

            queryStr = sql.SQL('''
                UPDATE {id_anno} AS a
                SET {updateCols}
                FROM (VALUES %s) AS e({colnames})
                WHERE e.id = a.id
            ''').format(id_anno=sql.Identifier(project, 'annotation'),
                        updateCols=sql.SQL(', ').join(updateCols),
                        colnames=sql.SQL(', ').join(
                            [sql.SQL(c) for c in colnames]))

            self.dbConnector.insert(queryStr, values_update)

        # viewcount table
        queryStr = sql.SQL('''
            INSERT INTO {id_iu} (username, image, viewcount, first_checked, last_checked, last_time_required, total_time_required, num_interactions, meta)
            VALUES %s 
            ON CONFLICT (username, image) DO UPDATE SET viewcount = image_user.viewcount + 1,
                last_checked = EXCLUDED.last_checked,
                last_time_required = EXCLUDED.last_time_required,
                total_time_required = EXCLUDED.total_time_required + image_user.total_time_required,
                num_interactions = EXCLUDED.num_interactions + image_user.num_interactions,
                meta = EXCLUDED.meta;
        ''').format(id_iu=sql.Identifier(project, 'image_user'))
        self.dbConnector.insert(queryStr, viewcountValues)

        return 0

    def setGoldenQuestions(self, project, submissions):
        '''
            Receives an iterable of tuples (uuid, bool) and updates the
            property "isGoldenQuestion" of the images accordingly.
        '''
        projImmutables = self.get_project_immutables(project)
        if projImmutables['demoMode']:
            return 1

        queryStr = sql.SQL('''
            UPDATE {id_img} AS img SET isGoldenQuestion = c.isGoldenQuestion
            FROM (VALUES %s)
            AS c (id, isGoldenQuestion)
            WHERE c.id = img.id;
        ''').format(id_img=sql.Identifier(project, 'image'))
        self.dbConnector.insert(queryStr, submissions)

        return 0