def __init__(self, config):
        self.config = config
        self.dbConnector = Database(config)

        self._fetchProjectSettings()
        self.sqlBuilder = SQLStringBuilder(config)
        self.annoParser = AnnotationParser(config)
Esempio n. 2
0
    def __init__(self, config):
        self.config = config
        self.dbConnector = Database(config)

        self.project_immutables = {}       # project settings that cannot be changed (project shorthand -> {settings})

        self._fetchProjectSettings()
        self.sqlBuilder = SQLStringBuilder()
        self.annoParser = AnnotationParser()
Esempio n. 3
0
    def __init__(self, config, passiveMode=False):
        self.config = config
        self.dbConnector = Database(config)
        self.countPattern = re.compile('\_[0-9]+$')
        self.passiveMode = passiveMode

        self.tempDir = self.config.getProperty('FileServer',
                                               'tempfiles_dir',
                                               type=str,
                                               fallback=tempfile.gettempdir())
    def __init__(self, config):
        self.config = config
        self.dbConnector = Database(config)

        # load default UI settings
        try:
            # check if custom default styles are provided
            self.defaultUIsettings = json.load(open('config/default_ui_settings.json', 'r'))
        except:
            # resort to built-in styles
            self.defaultUIsettings = json.load(open('modules/ProjectAdministration/static/json/default_ui_settings.json', 'r'))
    def __init__(self, config):
        self.config = config
        self.dbConn = Database(config)
        self.sqlBuilder = SQLStringBuilder(config)

        self.training = False  # will be set to True once start_training is called (and False as soon as everything about the training process has finished)

        self.celery_app = current_app
        self.celery_app.set_current()
        self.celery_app.set_default()

        self.watchdog = None  # note: watchdog only created if users poll status (i.e., if there's activity)

        #TODO
        self.messageProcessor = MessageProcessor(self.celery_app)
        self.messageProcessor.start()
Esempio n. 6
0
    def __init__(self, config, passiveMode=False, verbose_start=False):
        self.config = config

        if verbose_start:
            print('AIWorker'.ljust(22), end='')

        try:
            self.dbConnector = Database(config)
            self.passiveMode = passiveMode
            self._init_fileserver()
        except Exception as e:
            if verbose_start:
                LogDecorator.print_status('fail')
            raise Exception(
                f'Could not launch AIWorker (message: "{str(e)}").')

        if verbose_start:
            LogDecorator.print_status('ok')
Esempio n. 7
0
    def __init__(self, config, app):
        if config.getProperty('Project', 'demoMode', type=bool,
                              fallback=False):
            raise Exception('AIWorker cannot be launched in demo mode.')

        self.config = config
        self.dbConnector = Database(config)
        self._init_fileserver()
        self._init_model_instance()
        self._init_al_instance()
    def __init__(self, config):
        self.config = config
        self.dbConnector = Database(config)
        self.celery_app = current_app
        self.celery_app.set_current()
        self.celery_app.set_default()

        self.dataWorker = DataWorker(config)

        self.jobs = {}      # dict per project of jobs
Esempio n. 9
0
class AdminMiddleware:
    def __init__(self, config):
        self.config = config
        self.dbConnector = Database(config)

    def getServiceDetails(self, warn_error=False):
        '''
            Queries the indicated AIController and FileServer
            modules for availability and their version. Returns
            metadata about the setup of AIDE accordingly.
            Raises an Exception if not running on the main host.
            If "warn_error" is True, a warning statement is printed
            to the command line if the version of AIDE on the attached
            AIController and/or FileServer is not the same as on the
            host, or if the servers cannot be contacted.
        '''
        if warn_error:
            print('Contacting AIController...', end='')

        # check if running on the main host
        modules = os.environ['AIDE_MODULES'].strip().split(',')
        modules = set([m.strip() for m in modules])
        if not 'LabelUI' in modules:
            # not running on main host
            raise Exception('Not a main host; cannot query service details.')

        aic_uri = self.config.getProperty('Server',
                                          'aiController_uri',
                                          type=str,
                                          fallback=None)
        fs_uri = self.config.getProperty('Server',
                                         'dataServer_uri',
                                         type=str,
                                         fallback=None)

        if not is_localhost(aic_uri):
            # AIController runs on a different machine; poll for version of AIDE
            try:
                aic_response = requests.get(os.path.join(aic_uri, 'version'))
                aic_version = aic_response.text
                if warn_error and aic_version != AIDE_VERSION:
                    print(
                        'WARNING: AIDE version of connected AIController differs from main host.'
                    )
                    print(f'\tAIController URI: {aic_uri}')
                    print(f'\tAIController AIDE version:    {aic_version}')
                    print(f'\tAIDE version on this machine: {AIDE_VERSION}')

                elif warn_error:
                    LogDecorator.print_status('ok')
            except Exception as e:
                if warn_error:
                    LogDecorator.print_status('fail')
                    print(
                        f'WARNING: error connecting to AIController (message: "{str(e)}").'
                    )
                aic_version = None
        else:
            aic_version = AIDE_VERSION
            if warn_error:
                LogDecorator.print_status('ok')

        if not is_localhost(fs_uri):
            # same for the file server
            if warn_error:
                print('Contacting FileServer...', end='')
            try:
                fs_response = requests.get(os.path.join(fs_uri, 'version'))
                fs_version = fs_response.text
                if warn_error and fs_version != AIDE_VERSION:
                    LogDecorator.print_status('warn')
                    print(
                        'WARNING: AIDE version of connected FileServer differs from main host.'
                    )
                    print(f'\tFileServer URI: {fs_uri}')
                    print(f'\tFileServer AIDE version:       {fs_version}')
                    print(f'\tAIDE version on this machine: {AIDE_VERSION}')
                elif warn_error:
                    LogDecorator.print_status('ok')
            except Exception as e:
                if warn_error:
                    LogDecorator.print_status('fail')
                    print(
                        f'WARNING: error connecting to FileServer (message: "{str(e)}").'
                    )
                fs_version = None
        else:
            fs_version = AIDE_VERSION
            if warn_error:
                LogDecorator.print_status('ok')

        # query database
        dbVersion = self.dbConnector.execute('SHOW server_version;', None,
                                             1)[0]['server_version']
        try:
            dbVersion = dbVersion.split(' ')[0].strip()
        except:
            pass
        dbInfo = self.dbConnector.execute('SELECT version() AS version;', None,
                                          1)[0]['version']
        return {
            'aide_version': AIDE_VERSION,
            'AIController': {
                'uri': aic_uri,
                'aide_version': aic_version
            },
            'FileServer': {
                'uri': fs_uri,
                'aide_version': fs_version
            },
            'Database': {
                'version': dbVersion,
                'details': dbInfo
            }
        }

    def getCeleryWorkerDetails(self):
        '''
            Queries all Celery workers for their details (name,
            URL, capabilities, AIDE version, etc.)
        '''
        result = {}

        i = current_app.control.inspect()
        workers = i.stats()

        if workers is None or not len(workers):
            return result

        for w in workers:
            aiwV = celeryWorkerCommons.get_worker_details.s()
            try:
                res = aiwV.apply_async(queue=w)
                res = res.get(timeout=20)  #TODO: timeout (in seconds)
                result[w] = res
                result[w]['online'] = True
            except Exception as e:
                result[w] = {'online': False, 'message': str(e)}
        return result

    def getProjectDetails(self):
        '''
            Returns projects and statistics about them
            (number of images, disk usage, etc.).
        '''

        # get all projects
        projects = {}
        response = self.dbConnector.execute(
            '''
                SELECT shortname, name, owner, annotationtype, predictiontype,
                    ispublic, demomode, ai_model_enabled, interface_enabled, archived,
                    COUNT(username) AS num_users
                FROM aide_admin.project AS p
                JOIN aide_admin.authentication AS auth
                ON p.shortname = auth.project
                GROUP BY shortname
            ''', None, 'all')

        for r in response:
            projDef = {}
            for key in r.keys():
                if key == 'interface_enabled':
                    projDef[key] = r['interface_enabled'] and not r['archived']
                if key != 'shortname':
                    projDef[key] = r[key]
            projects[r['shortname']] = projDef

        # get statistics (number of annotations, predictions, prediction models, etc.)
        for project in projects.keys():
            stats = self.dbConnector.execute(
                sql.SQL('''
                    SELECT COUNT(*) AS count
                    FROM {id_img}
                    UNION ALL
                    SELECT COUNT(*)
                    FROM {id_anno}
                    UNION ALL
                    SELECT COUNT(*)
                    FROM {id_pred}
                    UNION ALL
                    SELECT SUM(viewcount)
                    FROM {id_iu}
                    UNION ALL
                    SELECT COUNT(*)
                    FROM {id_cnnstate}
                ''').format(id_img=sql.Identifier(project, 'image'),
                            id_anno=sql.Identifier(project, 'annotation'),
                            id_pred=sql.Identifier(project, 'prediction'),
                            id_iu=sql.Identifier(project, 'image_user'),
                            id_cnnstate=sql.Identifier(project, 'cnnstate')),
                None, 'all')
            projects[project]['num_img'] = stats[0]['count']
            projects[project]['num_anno'] = stats[1]['count']
            projects[project]['num_pred'] = stats[2]['count']
            projects[project]['total_viewcount'] = stats[3]['count']
            projects[project]['num_cnnstates'] = stats[4]['count']

            # time statistics (last viewed)
            stats = self.dbConnector.execute(
                sql.SQL('''
                SELECT MIN(first_checked) AS first_checked,
                    MAX(last_checked) AS last_checked
                FROM {id_iu};
            ''').format(id_iu=sql.Identifier(project, 'image_user')), None, 1)
            try:
                projects[project]['first_checked'] = stats[0][
                    'first_checked'].timestamp()
            except:
                projects[project]['first_checked'] = None
            try:
                projects[project]['last_checked'] = stats[0][
                    'last_checked'].timestamp()
            except:
                projects[project]['last_checked'] = None

        return projects

    def getUserDetails(self):
        '''
            Returns details about the user (name, number of
            enrolled projects, last activity, etc.).
        '''
        users = {}
        userData = self.dbConnector.execute(
            '''
                SELECT name, email, isSuperUser, canCreateProjects,
                    last_login, project, isAdmin, admitted_until, blocked_until
                FROM aide_admin.user AS u
                LEFT OUTER JOIN aide_admin.authentication AS auth
                ON u.name = auth.username
            ''', None, 'all')
        for ud in userData:
            if not ud['name'] in users:
                users[ud['name']] = {
                    'email':
                    ud['email'],
                    'canCreateProjects':
                    ud['cancreateprojects'],
                    'isSuperUser':
                    ud['issuperuser'],
                    'last_login': (None if ud['last_login'] is None else
                                   ud['last_login'].timestamp()),
                    'enrolled_projects': {}
                }
                if ud['project'] is not None:
                    admitted_until = ud['admitted_until']
                    if isinstance(admitted_until, datetime.datetime):
                        admitted_until = admitted_until.timestamp()
                    blocked_until = ud['admitted_until']
                    if isinstance(blocked_until, datetime.datetime):
                        blocked_until = blocked_until.timestamp()
                    users[ud['name']]['enrolled_projects'][ud['project']] = {
                        'admitted_until': admitted_until,
                        'blocked_until': blocked_until
                    }
        return users

    def setCanCreateProjects(self, username, allowCreateProjects):
        '''
            Sets or unsets the flag on whether a user
            can create new projects or not.
        '''
        # check if user exists
        userExists = self.dbConnector.execute(
            '''
            SELECT * FROM aide_admin.user
            WHERE name = %s;
        ''', (username, ), 1)
        if not len(userExists):
            return {
                'success': False,
                'message': f'User with name "{username}" does not exist.'
            }

        result = self.dbConnector.execute(
            '''
            UPDATE aide_admin.user
            SET cancreateprojects = %s
            WHERE name = %s;
            SELECT cancreateprojects
            FROM aide_admin.user
            WHERE name = %s;
        ''', (allowCreateProjects, username, username), 1)
        result = result[0]['cancreateprojects']
        return {'success': (result == allowCreateProjects)}
        '--modelPath',
        type=str,
        help=
        'Directory (absolute path) on this machine of the model state file to be considered.'
    )
    args = parser.parse_args()

    # setup
    print('Setup...')
    if not 'AIDE_CONFIG_PATH' in os.environ:
        os.environ['AIDE_CONFIG_PATH'] = str(args.settings_filepath)

    from util.configDef import Config
    from modules.Database.app import Database
    config = Config()
    dbConn = Database(config)

    # load model class function
    print('Load and verify state dict...')
    from util.helpers import get_class_executable, current_time
    modelClass = getattr(
        get_class_executable(
            config.getProperty('AIController', 'model_lib_path')),
        'model_class')

    # load state dict
    stateDict = torch.load(open(args.modelPath, 'rb'))

    # verify model state
    model = modelClass.loadFromStateDict(stateDict)
Esempio n. 11
0
class AIControllerWorker:
    def __init__(self, config, celery_app):
        self.config = config
        self.dbConn = Database(config)
        self.sqlBuilder = SQLStringBuilder(config)
        self.celery_app = celery_app

    def _get_num_available_workers(self):
        #TODO: filter for right tasks and queues
        #TODO: limit to n tasks per worker
        i = self.celery_app.control.inspect()
        if i is not None:
            stats = i.stats()
            if stats is not None:
                return len(i.stats())
        return 1  #TODO

    def get_training_images(self,
                            project,
                            epoch=None,
                            numEpochs=None,
                            minTimestamp='lastState',
                            includeGoldenQuestions=True,
                            minNumAnnoPerImage=0,
                            maxNumImages=None,
                            numChunks=1):
        '''
            Queries the database for the latest images to be used for model training.
            Returns a list with image UUIDs accordingly, split into the number of
            available workers.
            #TODO: includeGoldenQuestions
        '''
        # sanity checks
        if not (isinstance(minTimestamp, datetime) or minTimestamp
                == 'lastState' or minTimestamp == -1 or minTimestamp is None):
            raise ValueError(
                '{} is not a recognized property for variable "minTimestamp"'.
                format(str(minTimestamp)))

        # query image IDs
        queryVals = []

        if minTimestamp is None:
            timestampStr = sql.SQL('')
        elif minTimestamp == 'lastState':
            timestampStr = sql.SQL('''
            WHERE iu.last_checked > COALESCE(to_timestamp(0),
            (SELECT MAX(timecreated) FROM {id_cnnstate}))''').format(
                id_cnnstate=sql.Identifier(project, 'cnnstate'))
        elif isinstance(minTimestamp, datetime):
            timestampStr = sql.SQL(
                'WHERE iu.last_checked > COALESCE(to_timestamp(0), %s)')
            queryVals.append(minTimestamp)
        elif isinstance(minTimestamp, int) or isinstance(minTimestamp, float):
            timestampStr = sql.SQL(
                'WHERE iu.last_checked > COALESCE(to_timestamp(0), to_timestamp(%s))'
            )
            queryVals.append(minTimestamp)

        if minNumAnnoPerImage > 0:
            queryVals.append(minNumAnnoPerImage)

        if maxNumImages is None or maxNumImages <= 0:
            limitStr = sql.SQL('')
        else:
            limitStr = sql.SQL('LIMIT %s')
            queryVals.append(maxNumImages)

        if minNumAnnoPerImage <= 0:
            queryStr = sql.SQL('''
                SELECT newestAnno.image FROM (
                    SELECT image, last_checked FROM {id_iu} AS iu
                    JOIN (
                        SELECT id AS iid
                        FROM {id_img}
                        WHERE corrupt IS NULL OR corrupt = FALSE
                    ) AS imgQ
                    ON iu.image = imgQ.iid
                    {timestampStr}
                    ORDER BY iu.last_checked ASC
                    {limitStr}
                ) AS newestAnno;
            ''').format(id_iu=sql.Identifier(project, 'image_user'),
                        id_img=sql.Identifier(project, 'image'),
                        timestampStr=timestampStr,
                        limitStr=limitStr)

        else:
            queryStr = sql.SQL('''
                SELECT newestAnno.image FROM (
                    SELECT image, last_checked FROM {id_iu} AS iu
                    JOIN (
                        SELECT id AS iid
                        FROM {id_img}
                        WHERE corrupt IS NULL OR corrupt = FALSE
                    ) AS imgQ
                    ON iu.image = imgQ.iid
                    {timestampStr}
                    {conjunction} image IN (
                        SELECT image FROM (
                            SELECT image, COUNT(*) AS cnt
                            FROM {id_anno}
                            GROUP BY image
                            ) AS annoCount
                        WHERE annoCount.cnt >= %s
                    )
                    ORDER BY iu.last_checked ASC
                    {limitStr}
                ) AS newestAnno;
            ''').format(id_iu=sql.Identifier(project, 'image_user'),
                        id_img=sql.Identifier(project, 'image'),
                        id_anno=sql.Identifier(project, 'annotation'),
                        timestampStr=timestampStr,
                        conjunction=(sql.SQL('WHERE') if minTimestamp is None
                                     else sql.SQL('AND')),
                        limitStr=limitStr)

        imageIDs = self.dbConn.execute(queryStr, tuple(queryVals), 'all')
        imageIDs = [i['image'] for i in imageIDs]

        if numChunks > 1:
            # split for distribution across workers (TODO: also specify subset size for multiple jobs; randomly draw if needed)
            imageIDs = array_split(imageIDs, max(1,
                                                 len(imageIDs) // numChunks))
        else:
            imageIDs = [imageIDs]

        print("Assembled training images into {} chunks (length of first: {})".
              format(len(imageIDs), len(imageIDs[0])))
        return imageIDs

    def get_inference_images(self,
                             project,
                             epoch=None,
                             numEpochs=None,
                             goldenQuestionsOnly=False,
                             forceUnlabeled=False,
                             maxNumImages=None,
                             numChunks=1):
        '''
                Queries the database for the latest images to be used for inference after model training.
                Returns a list with image UUIDs accordingly, split into the number of available workers.
                #TODO: goldenQuestionsOnly
            '''
        if maxNumImages is None or maxNumImages <= 0:
            queryResult = self.dbConn.execute(
                '''
                    SELECT maxNumImages_inference
                    FROM aide_admin.project
                    WHERE shortname = %s;''', (project, ), 1)
            maxNumImages = queryResult[0]['maxnumimages_inference']

        queryVals = (maxNumImages, )

        # load the IDs of the images that are being subjected to inference
        sql = self.sqlBuilder.getInferenceQueryString(project, forceUnlabeled,
                                                      maxNumImages)
        imageIDs = self.dbConn.execute(sql, queryVals, 'all')
        imageIDs = [i['image'] for i in imageIDs]

        # # split for distribution across workers
        # if maxNumWorkers != 1:
        #     # only query the number of available workers if more than one is specified to save time
        #     num_available = self._get_num_available_workers()
        #     if maxNumWorkers == -1:
        #         maxNumWorkers = num_available   #TODO: more than one process per worker?
        #     else:
        #         maxNumWorkers = min(maxNumWorkers, num_available)

        if numChunks > 1:
            imageIDs = array_split(imageIDs, max(1,
                                                 len(imageIDs) // numChunks))
        else:
            imageIDs = [imageIDs]
        return imageIDs
Esempio n. 12
0
    def __init__(self, config):
        self.config = config
        self.dbConnector = Database(config)

        self.usersLoggedIn = {}  # username -> {timestamp, sessionToken}
Esempio n. 13
0
    Wrapper for the Celery message broker concerning
    the Model Marketplace module.

    2020-21 Benjamin Kellenberger
'''

import os
from celery import current_app
from .marketplaceWorker import ModelMarketplaceWorker
from modules.Database.app import Database
from util.configDef import Config

# initialize Model Marketplace worker
modules = os.environ['AIDE_MODULES']
config = Config()
worker = ModelMarketplaceWorker(config, Database(config))


@current_app.task(name='ModelMarketplace.shareModel')
def share_model(project, username, modelID, modelName, modelDescription, tags,
                public, anonymous):
    return worker.shareModel(project, username, modelID, modelName,
                             modelDescription, tags, public, anonymous)


@current_app.task(name='ModelMarketplace.importModelDatabase')
def import_model_database(modelID, project, username):
    return worker.importModelDatabase(project, username, modelID)


@current_app.task(name='ModelMarketplace.importModelURI')
 def __init__(self, config, passiveMode=False):
     self.config = config
     self.dbConnector = Database(config)
     self.passiveMode = passiveMode
     self._init_fileserver()
Esempio n. 15
0
 def __init__(self, config):
     self.config = config
     self.dbConnector = Database(config)
     self.labelUImiddleware = DBMiddleware(config)
     self._init_available_ai_models()
Esempio n. 16
0
class DataWorker:

    FILENAMES_PROHIBITED_CHARS = (
        '&lt;',
        '<',
        '>',
        '&gt;',
        '..',
        '/',
        '\\',
        '|',
        '?',
        '*',
        ':'  # for macOS
    )

    NUM_IMAGES_LIMIT = 4096  # maximum number of images that can be queried at once (to avoid bottlenecks)

    def __init__(self, config, passiveMode=False):
        self.config = config
        self.dbConnector = Database(config)
        self.countPattern = re.compile('\_[0-9]+$')
        self.passiveMode = passiveMode

        self.tempDir = self.config.getProperty('FileServer',
                                               'tempfiles_dir',
                                               type=str,
                                               fallback=tempfile.gettempdir())

    def aide_internal_notify(self, message):
        '''
            Used for AIDE administrative communication,
            e.g. for setting up queues.
        '''
        if self.passiveMode:
            return
        if 'task' in message:
            if message['task'] == 'create_project_folders':
                # set up folders for a newly created project
                if 'projectName' in message:
                    destPath = os.path.join(
                        self.config.getProperty('FileServer',
                                                'staticfiles_dir'),
                        message['projectName'])
                    os.makedirs(destPath, exist_ok=True)

    ''' Image administration functionalities '''

    def listImages(self,
                   project,
                   folder=None,
                   imageAddedRange=None,
                   lastViewedRange=None,
                   viewcountRange=None,
                   numAnnoRange=None,
                   numPredRange=None,
                   orderBy=None,
                   order='desc',
                   startFrom=None,
                   limit=None):
        '''
            Returns a list of images, with ID, filename,
            date image was added, viewcount, number of annotations,
            number of predictions, and last time viewed, for a given
            project.
            The list can be filtered by all those properties (e.g. 
            date and time image was added, last checked; number of
            annotations, etc.), as well as limited in length (images
            are sorted by date_added).
        '''
        queryArgs = []

        filterStr = ''
        if folder is not None and isinstance(folder, str):
            filterStr += ' filename LIKE %s '
            queryArgs.append(folder + '%')
        if imageAddedRange is not None:  #TODO
            filterStr += 'AND date_added >= to_timestamp(%s) AND date_added <= to_timestamp(%s) '
            queryArgs.append(imageAddedRange[0])
            queryArgs.append(imageAddedRange[1])
        if lastViewedRange is not None:  #TODO
            filterStr += 'AND last_viewed >= to_timestamp(%s) AND last_viewed <= to_timestamp(%s) '
            queryArgs.append(lastViewedRange[0])
            queryArgs.append(lastViewedRange[1])
        if viewcountRange is not None:
            filterStr += 'AND viewcount >= %s AND viewcount <= %s '
            queryArgs.append(viewcountRange[0])
            queryArgs.append(viewcountRange[1])
        if numAnnoRange is not None:
            filterStr += 'AND num_anno >= %s AND numAnno <= %s '
            queryArgs.append(numAnnoRange[0])
            queryArgs.append(numAnnoRange[1])
        if numPredRange is not None:
            filterStr += 'AND num_pred >= %s AND num_pred <= %s '
            queryArgs.append(numPredRange[0])
            queryArgs.append(numPredRange[1])
        if startFrom is not None:
            if not isinstance(startFrom, UUID):
                try:
                    startFrom = UUID(startFrom)
                except:
                    startFrom = None
            if startFrom is not None:
                filterStr += ' AND img.id > %s '
                queryArgs.append(startFrom)
        filterStr = filterStr.strip()
        if filterStr.startswith('AND'):
            filterStr = filterStr[3:]
        if len(filterStr.strip()):
            filterStr = 'WHERE ' + filterStr
        filterStr = sql.SQL(filterStr)

        orderStr = sql.SQL('ORDER BY img.id ASC')
        if orderBy is not None:
            orderStr = sql.SQL('ORDER BY {} {}, img.id ASC').format(
                sql.SQL(orderBy), sql.SQL(order))

        limitStr = sql.SQL('')
        if isinstance(limit, float):
            if not math.isnan(limit):
                limit = int(limit)
            else:
                limit = self.NUM_IMAGES_LIMIT
        elif isinstance(limit, str):
            try:
                limit = int(limit)
            except:
                limit = self.NUM_IMAGES_LIMIT
        elif not isinstance(limit, int):
            limit = self.NUM_IMAGES_LIMIT
        limit = max(min(limit, self.NUM_IMAGES_LIMIT), 1)
        limitStr = sql.SQL('LIMIT %s')
        queryArgs.append(limit)

        queryStr = sql.SQL('''
            SELECT img.id, filename, EXTRACT(epoch FROM date_added) AS date_added,
                COALESCE(viewcount, 0) AS viewcount,
                EXTRACT(epoch FROM last_viewed) AS last_viewed,
                COALESCE(num_anno, 0) AS num_anno,
                COALESCE(num_pred, 0) AS num_pred,
                img.isGoldenQuestion
            FROM {id_img} AS img
            FULL OUTER JOIN (
                SELECT image, COUNT(*) AS viewcount, MAX(last_checked) AS last_viewed
                FROM {id_iu}
                GROUP BY image
            ) AS iu
            ON img.id = iu.image
            FULL OUTER JOIN (
                SELECT image, COUNT(*) AS num_anno
                FROM {id_anno}
                GROUP BY image
            ) AS anno
            ON img.id = anno.image
            FULL OUTER JOIN (
                SELECT image, COUNT(*) AS num_pred
                FROM {id_pred}
                GROUP BY image
            ) AS pred
            ON img.id = pred.image
            {filter}
            {order}
            {limit}
        ''').format(id_img=sql.Identifier(project, 'image'),
                    id_iu=sql.Identifier(project, 'image_user'),
                    id_anno=sql.Identifier(project, 'annotation'),
                    id_pred=sql.Identifier(project, 'prediction'),
                    filter=filterStr,
                    order=orderStr,
                    limit=limitStr)

        result = self.dbConnector.execute(queryStr, tuple(queryArgs), 'all')
        for idx in range(len(result)):
            result[idx]['id'] = str(result[idx]['id'])
        return result

    def uploadImages(self,
                     project,
                     images,
                     existingFiles='keepExisting',
                     splitImages=False,
                     splitProperties=None):
        '''
            Receives a dict of files (bottle.py file format),
            verifies their file extension and checks if they
            are loadable by PIL.
            If they are, they are saved to disk in the project's
            image folder, and registered in the database.
            Parameter "existingFiles" can be set as follows:
            - "keepExisting" (default): if an image already exists on
              disk with the same path/file name, the new image will be
              renamed with an underscore and trailing number.
            - "skipExisting": do not save images that already exist on
              disk under the same path/file name.
            - "replaceExisting": overwrite images that exist with the
              same path/file name. Note: in this case all existing anno-
              tations, predictions, and other metadata about those images,
              will be removed from the database.
            
            If "splitImages" is True, the uploaded images will be automati-
            cally divided into patches on a regular grid according to what
            is defined in "splitProperties". For example, the following
            definition:

                splitProperties = {
                    'patchSize': (800, 600),
                    'stride': (400, 300),
                    'tight': True
                }

            would divide the images into patches of size 800x600, with over-
            lap of 50% (denoted by the "stride" being half the "patchSize"),
            and with all patches completely inside the original image (para-
            meter "tight" makes the last patches to the far left and bottom
            of the image being fully inside the original image; they are shif-
            ted if needed).
            Instead of the full images, the patches are stored on disk and re-
            ferenced through the database. The name format for patches is
            "imageName_x_y.jpg", with "imageName" denoting the name of the ori-
            ginal image, and "x" and "y" the left and top position of the patch
            inside the original image.

            Returns image keys for images that were successfully
            saved, and keys and error messages for those that
            were not.
        '''
        imgPaths_valid = []
        imgs_valid = []
        imgs_warn = {}
        imgs_error = {}
        for key in images.keys():
            try:
                nextUpload = images[key]
                nextFileName = nextUpload.raw_filename
                #TODO: check if raw_filename is compatible with uploads made from Windows

                # check if correct file suffix
                _, ext = os.path.splitext(nextFileName)
                if not ext.lower() in valid_image_extensions:
                    raise Exception(f'Invalid file type (*{ext})')

                # check if loadable as image
                cache = io.BytesIO()
                nextUpload.save(cache)
                try:
                    image = Image.open(cache)
                except Exception:
                    raise Exception('File is not a valid image.')

                # prepare image(s) to save to disk
                parent, filename = os.path.split(nextFileName)
                destFolder = os.path.join(
                    self.config.getProperty('FileServer', 'staticfiles_dir'),
                    project, parent)
                os.makedirs(destFolder, exist_ok=True)

                images = []
                filenames = []

                if not splitImages:
                    # upload the single image directly
                    images.append(image)
                    filenames.append(filename)

                else:
                    # split image into patches instead
                    images, coords = split_image(image,
                                                 splitProperties['patchSize'],
                                                 splitProperties['stride'],
                                                 splitProperties['tight'])
                    bareFileName, ext = os.path.splitext(filename)
                    filenames = [
                        f'{bareFileName}_{c[0]}_{c[1]}{ext}' for c in coords
                    ]

                # register and save all the images
                for i in range(len(images)):
                    subImage = images[i]
                    subFilename = filenames[i]

                    absFilePath = os.path.join(destFolder, subFilename)

                    # check if an image with the same name does not already exist
                    newFileName = subFilename
                    fileExists = os.path.exists(absFilePath)
                    if fileExists:
                        if existingFiles == 'keepExisting':
                            # rename new file
                            while (os.path.exists(absFilePath)):
                                # rename file
                                fn, ext = os.path.splitext(newFileName)
                                match = self.countPattern.search(fn)
                                if match is None:
                                    newFileName = fn + '_1' + ext
                                else:
                                    # parse number
                                    number = int(fn[match.span()[0] +
                                                    1:match.span()[1]])
                                    newFileName = fn[:match.span(
                                    )[0]] + '_' + str(number + 1) + ext

                                absFilePath = os.path.join(
                                    destFolder, newFileName)
                                if not os.path.exists(absFilePath):
                                    imgs_warn[
                                        key] = 'An image with name "{}" already exists under given path on disk. Image has been renamed to "{}".'.format(
                                            subFilename, newFileName)

                        elif existingFiles == 'skipExisting':
                            # ignore new file
                            imgs_warn[
                                key] = f'Image "{newFileName}" already exists on disk and has been skipped.'
                            imgs_valid.append(key)
                            imgPaths_valid.append(
                                os.path.join(parent, newFileName))
                            continue

                        elif existingFiles == 'replaceExisting':
                            # overwrite new file; first remove metadata
                            queryStr = sql.SQL('''
                                DELETE FROM {id_iu}
                                WHERE image = (
                                    SELECT id FROM {id_img}
                                    WHERE filename = %s
                                );
                                DELETE FROM {id_anno}
                                WHERE image = (
                                    SELECT id FROM {id_img}
                                    WHERE filename = %s
                                );
                                DELETE FROM {id_pred}
                                WHERE image = (
                                    SELECT id FROM {id_img}
                                    WHERE filename = %s
                                );
                                DELETE FROM {id_img}
                                WHERE filename = %s;
                            ''').format(
                                id_iu=sql.Identifier(project, 'image_user'),
                                id_anno=sql.Identifier(project, 'annotation'),
                                id_pred=sql.Identifier(project, 'prediction'),
                                id_img=sql.Identifier(project, 'image'))
                            self.dbConnector.execute(queryStr,
                                                     tuple([nextFileName] * 4),
                                                     None)

                            # remove file
                            try:
                                os.remove(absFilePath)
                                imgs_warn[key] = 'Image "{}" already existed on disk and has been deleted.\n'.format(newFileName) + \
                                                    'All metadata (views, annotations, predictions) have been removed from the database.'
                            except:
                                imgs_warn[key] = 'Image "{}" already existed on disk but could not be deleted.\n'.format(newFileName) + \
                                                    'All metadata (views, annotations, predictions) have been removed from the database.'

                    # write to disk
                    fileParent, _ = os.path.split(absFilePath)
                    if len(fileParent):
                        os.makedirs(fileParent, exist_ok=True)
                    subImage.save(absFilePath)

                    imgs_valid.append(key)
                    imgPaths_valid.append(os.path.join(parent, newFileName))

            except Exception as e:
                imgs_error[key] = str(e)

        # register valid images in database
        if len(imgPaths_valid):
            queryStr = sql.SQL('''
                INSERT INTO {id_img} (filename)
                VALUES %s
                ON CONFLICT (filename) DO NOTHING;
            ''').format(id_img=sql.Identifier(project, 'image'))
            self.dbConnector.insert(queryStr, [(i, ) for i in imgPaths_valid])

        result = {
            'imgs_valid': imgs_valid,
            'imgPaths_valid': imgPaths_valid,
            'imgs_warn': imgs_warn,
            'imgs_error': imgs_error
        }

        return result

    def scanForImages(self, project):
        '''
            Searches the project image folder on disk for
            files that are valid, but have not (yet) been added
            to the database.
            Returns a list of paths with files.
        '''

        # scan disk for files
        projectFolder = os.path.join(
            self.config.getProperty('FileServer', 'staticfiles_dir'), project)
        if (not os.path.isdir(projectFolder)) and (
                not os.path.islink(projectFolder)):
            # no folder exists for the project (should not happen due to broadcast at project creation)
            return []
        imgs_disk = listDirectory(projectFolder, recursive=True)

        # get all existing file paths from database
        imgs_database = set()
        queryStr = sql.SQL('''
            SELECT filename FROM {id_img};
        ''').format(id_img=sql.Identifier(project, 'image'))
        result = self.dbConnector.execute(queryStr, None, 'all')
        for r in range(len(result)):
            imgs_database.add(result[r]['filename'])

        # filter
        imgs_candidates = imgs_disk.difference(imgs_database)
        return list(imgs_candidates)

    def addExistingImages(self, project, imageList=None):
        '''
            Scans the project folder on the file system
            for images that are physically saved, but not
            (yet) added to the database.
            Adds them to the project's database schema.
            If an imageList iterable is provided, only
            the intersection between identified images on
            disk and in the iterable are added.

            If 'imageList' is a string with contents 'all',
            all untracked images will be added.

            Returns a list of image IDs and file names that
            were eventually added to the project database schema.
        '''
        # get all images on disk that are not in database
        imgs_candidates = self.scanForImages(project)

        if imageList is None or (isinstance(imageList, str)
                                 and imageList.lower() == 'all'):
            imgs_add = imgs_candidates
        else:
            if isinstance(imageList, dict):
                imageList = list(imageList.keys())
            imgs_add = list(set(imgs_candidates).intersection(set(imageList)))

        if not len(imgs_add):
            return 0, []

        # add to database
        queryStr = sql.SQL('''
            INSERT INTO {id_img} (filename)
            VALUES %s;
        ''').format(id_img=sql.Identifier(project, 'image'))
        self.dbConnector.insert(queryStr, tuple([(i, ) for i in imgs_add]))

        # get IDs of newly added images
        queryStr = sql.SQL('''
            SELECT id, filename FROM {id_img}
            WHERE filename IN %s;
        ''').format(id_img=sql.Identifier(project, 'image'))
        result = self.dbConnector.execute(queryStr, (tuple(imgs_add), ), 'all')

        status = (0 if result is not None and len(result) else 1)  #TODO
        return status, result

    def removeImages(self,
                     project,
                     imageList,
                     forceRemove=False,
                     deleteFromDisk=False):
        '''
            Receives an iterable of image IDs and removes them
            from the project database schema, including associated
            user views, annotations, and predictions made.
            Only removes entries if no user views, annotations, and
            predictions exist, or else if "forceRemove" is True.
            If "deleteFromDisk" is True, the image files are also
            deleted from the project directory on the file system.

            Returns a list of images that were deleted.
        '''

        imageList = tuple([(UUID(i), ) for i in imageList])

        queryArgs = []
        deleteArgs = []
        if forceRemove:
            queryStr = sql.SQL('''
                SELECT id, filename
                FROM {id_img}
                WHERE id IN %s;
            ''').format(id_img=sql.Identifier(project, 'image'))
            queryArgs = tuple([imageList])

            deleteStr = sql.SQL('''
                DELETE FROM {id_iu} WHERE image IN %s;
                DELETE FROM {id_anno} WHERE image IN %s;
                DELETE FROM {id_pred} WHERE image IN %s;
                DELETE FROM {id_img} WHERE id IN %s;
            ''').format(id_iu=sql.Identifier(project, 'image_user'),
                        id_anno=sql.Identifier(project, 'annotation'),
                        id_pred=sql.Identifier(project, 'prediction'),
                        id_img=sql.Identifier(project, 'image'))
            deleteArgs = tuple([imageList] * 4)

        else:
            queryStr = sql.SQL('''
                SELECT id, filename
                FROM {id_img}
                WHERE id IN %s
                AND id NOT IN (
                    SELECT image FROM {id_iu}
                    WHERE image IN %s
                    UNION ALL
                    SELECT image FROM {id_anno}
                    WHERE image IN %s
                    UNION ALL
                    SELECT image FROM {id_pred}
                    WHERE image IN %s
                );
            ''').format(id_img=sql.Identifier(project, 'image'),
                        id_iu=sql.Identifier(project, 'image_user'),
                        id_anno=sql.Identifier(project, 'annotation'),
                        id_pred=sql.Identifier(project, 'prediction'))
            queryArgs = tuple([imageList] * 4)

            deleteStr = sql.SQL('''
                DELETE FROM {id_img}
                WHERE id IN %s
                AND id NOT IN (
                    SELECT image FROM {id_iu}
                    WHERE image IN %s
                    UNION ALL
                    SELECT image FROM {id_anno}
                    WHERE image IN %s
                    UNION ALL
                    SELECT image FROM {id_pred}
                    WHERE image IN %s
                );
            ''').format(id_img=sql.Identifier(project, 'image'),
                        id_iu=sql.Identifier(project, 'image_user'),
                        id_anno=sql.Identifier(project, 'annotation'),
                        id_pred=sql.Identifier(project, 'prediction'))
            deleteArgs = tuple([imageList] * 4)

        # retrieve images to be deleted
        imgs_del = self.dbConnector.execute(queryStr, queryArgs, 'all')

        if imgs_del is None:
            imgs_del = []

        if len(imgs_del):
            # delete images
            self.dbConnector.execute(deleteStr, deleteArgs, None)

            if deleteFromDisk:
                projectFolder = os.path.join(
                    self.config.getProperty('FileServer', 'staticfiles_dir'),
                    project)
                if os.path.isdir(projectFolder) or os.path.islink(
                        projectFolder):
                    for i in imgs_del:
                        filePath = os.path.join(projectFolder, i['filename'])
                        if os.path.isfile(filePath):
                            os.remove(filePath)

            # convert UUID
            for idx in range(len(imgs_del)):
                imgs_del[idx]['id'] = str(imgs_del[idx]['id'])

        return imgs_del

    def removeOrphanedImages(self, project):
        '''
            Queries the project's image entries in the database and retrieves
            entries for which no image can be found on disk anymore. Removes
            and returns those entries and all associated (meta-) data from the
            database.
        '''
        imgs_DB = self.dbConnector.execute(
            sql.SQL('''
            SELECT id, filename FROM {id_img};
        ''').format(id_img=sql.Identifier(project, 'image')), None, 'all')

        projectFolder = os.path.join(
            self.config.getProperty('FileServer', 'staticfiles_dir'), project)
        if (not os.path.isdir(projectFolder)) and (
                not os.path.islink(projectFolder)):
            return []
        imgs_disk = listDirectory(projectFolder, recursive=True)
        imgs_disk = set(imgs_disk)

        # get orphaned images
        imgs_orphaned = []
        for i in imgs_DB:
            if i['filename'] not in imgs_disk:
                imgs_orphaned.append(i['id'])
        # imgs_orphaned = list(set(imgs_DB).difference(imgs_disk))
        if not len(imgs_orphaned):
            return []

        # remove
        self.dbConnector.execute(
            sql.SQL('''
            DELETE FROM {id_iu} WHERE image IN %s;
            DELETE FROM {id_anno} WHERE image IN %s;
            DELETE FROM {id_pred} WHERE image IN %s;
            DELETE FROM {id_img} WHERE id IN %s;
        ''').format(id_iu=sql.Identifier(project, 'image_user'),
                    id_anno=sql.Identifier(project, 'annotation'),
                    id_pred=sql.Identifier(project, 'prediction'),
                    id_img=sql.Identifier(project, 'image')),
            tuple([tuple(imgs_orphaned)] * 4), None)

        return imgs_orphaned

    def prepareDataDownload(self,
                            project,
                            dataType='annotation',
                            userList=None,
                            dateRange=None,
                            extraFields=None,
                            segmaskFilenameOptions=None,
                            segmaskEncoding='rgb'):
        '''
            Polls the database for project data according to the
            specified restrictions:
            - dataType: "annotation" or "prediction"
            - userList: for type "annotation": None (all users) or
                        an iterable of user names
            - dateRange: None (all dates) or two values for a mini-
                         mum and maximum timestamp
            - extraFields: None (no field) or dict of keywords and bools for
                           additional fields (e.g. browser meta) to be queried.
            - segmaskFilenameOptions: customization parameters for segmentation
                                      mask images' file names.
            - segmaskEncoding: encoding of the segmentation mask pixel
                               values ("rgb" or "indexed")
            
            Creates a file in this machine's temporary directory
            and returns the file name to it.
            Note that in some cases (esp. for semantic segmentation),
            the number of queryable entries may be limited due to
            file size and free disk space restrictions. An upper cei-
            ling is specified in the configuration *.ini file ('TODO')
        '''

        now = datetime.now(tz=pytz.utc)

        # argument check
        if userList is None:
            userList = []
        elif isinstance(userList, str):
            userList = [userList]
        if dateRange is None:
            dateRange = []
        elif len(dateRange) == 1:
            dateRange = [dateRange, now]

        if extraFields is None or not isinstance(extraFields, dict):
            extraFields = {'meta': False}
        else:
            if not 'meta' in extraFields or not isinstance(
                    extraFields['meta'], bool):
                extraFields['meta'] = False

        if segmaskFilenameOptions is None:
            segmaskFilenameOptions = {
                'baseName': 'filename',
                'prefix': '',
                'suffix': ''
            }
        else:
            if not 'baseName' in segmaskFilenameOptions or \
                segmaskFilenameOptions['baseName'] not in ('filename', 'id'):
                segmaskFilenameOptions['baseName'] = 'filename'
            try:
                segmaskFilenameOptions['prefix'] = str(
                    segmaskFilenameOptions['prefix'])
            except:
                segmaskFilenameOptions['prefix'] = ''
            try:
                segmaskFilenameOptions['suffix'] = str(
                    segmaskFilenameOptions['suffix'])
            except:
                segmaskFilenameOptions['suffix'] = ''

            for char in self.FILENAMES_PROHIBITED_CHARS:
                segmaskFilenameOptions['prefix'] = segmaskFilenameOptions[
                    'prefix'].replace(char, '_')
                segmaskFilenameOptions['suffix'] = segmaskFilenameOptions[
                    'suffix'].replace(char, '_')

        # check metadata type: need to deal with segmentation masks separately
        if dataType == 'annotation':
            metaField = 'annotationtype'
        elif dataType == 'prediction':
            metaField = 'predictiontype'
        else:
            raise Exception('Invalid dataType specified ({})'.format(dataType))
        metaType = self.dbConnector.execute(
            '''
                SELECT {} FROM aide_admin.project
                WHERE shortname = %s;
            '''.format(metaField), (project, ), 1)[0][metaField]

        if metaType.lower() == 'segmentationmasks':
            is_segmentation = True
            fileExtension = '.zip'

            # create indexed color palette for segmentation masks
            if segmaskEncoding == 'indexed':
                try:
                    indexedColors = []
                    labelClasses = self.dbConnector.execute(
                        sql.SQL('''
                            SELECT idx, color FROM {id_lc} ORDER BY idx ASC;
                        ''').format(
                            id_lc=sql.Identifier(project, 'labelclass')), None,
                        'all')
                    currentIndex = 1
                    for lc in labelClasses:
                        if lc['idx'] == 0:
                            # background class
                            continue
                        while currentIndex < lc['idx']:
                            # gaps in label classes; fill with zeros
                            indexedColors.extend([0, 0, 0])
                            currentIndex += 1
                        color = lc['color']
                        if color is None:
                            # no color specified; add from defaults
                            #TODO
                            indexedColors.extend([0, 0, 0])
                        else:
                            # convert to RGB format
                            indexedColors.extend(helpers.hexToRGB(color))

                except:
                    # an error occurred; don't convert segmentation mask to indexed colors
                    indexedColors = None
            else:
                indexedColors = None

        else:
            is_segmentation = False
            fileExtension = '.txt'  #TODO: support JSON?

        # prepare output file
        filename = 'aide_query_{}'.format(
            now.strftime('%Y-%m-%d_%H-%M-%S')) + fileExtension
        destPath = os.path.join(self.tempDir, 'aide/downloadRequests', project)
        os.makedirs(destPath, exist_ok=True)
        destPath = os.path.join(destPath, filename)

        # generate query
        queryArgs = []
        tableID = sql.Identifier(project, dataType)
        userStr = sql.SQL('')
        iuStr = sql.SQL('')
        dateStr = sql.SQL('')
        queryFields = [
            'filename',
            'isGoldenQuestion',
            'date_image_added',
            'last_requested_image',
            'image_corrupt'  # default image fields
        ]
        if dataType == 'annotation':
            iuStr = sql.SQL('''
                JOIN (SELECT image AS iu_image, username AS iu_username, viewcount, last_checked, last_time_required FROM {id_iu}) AS iu
                ON t.image = iu.iu_image
                AND t.username = iu.iu_username
            ''').format(id_iu=sql.Identifier(project, 'image_user'))
            if len(userList):
                userStr = sql.SQL('WHERE username IN %s')
                queryArgs.append(tuple(userList))

            queryFields.extend(
                getattr(QueryStrings_annotation, metaType).value)
            queryFields.extend([
                'username', 'viewcount', 'last_checked', 'last_time_required'
            ])  #TODO: make customizable

        else:
            queryFields.extend(
                getattr(QueryStrings_prediction, metaType).value)

        if len(dateRange):
            if len(userStr.string):
                dateStr = sql.SQL(
                    ' AND timecreated >= to_timestamp(%s) AND timecreated <= to_timestamp(%s)'
                )
            else:
                dateStr = sql.SQL(
                    'WHERE timecreated >= to_timestamp(%s) AND timecreated <= to_timestamp(%s)'
                )
            queryArgs.extend(dateRange)

        if not is_segmentation:
            # join label classes
            lcStr = sql.SQL('''
                JOIN (SELECT id AS lcID, name AS labelclass_name, idx AS labelclass_index
                    FROM {id_lc}
                ) AS lc
                ON label = lc.lcID
            ''').format(id_lc=sql.Identifier(project, 'labelclass'))
            queryFields.extend(['labelclass_name', 'labelclass_index'])
        else:
            lcStr = sql.SQL('')

        # remove redundant query fields
        queryFields = set(queryFields)
        for key in extraFields.keys():
            if not extraFields[key]:
                queryFields.remove(key)
        queryFields = list(queryFields)

        queryStr = sql.SQL('''
            SELECT * FROM {tableID} AS t
            JOIN (
                SELECT id AS imgID, filename, isGoldenQuestion, date_added AS date_image_added, last_requested AS last_requested_image, corrupt AS image_corrupt
                FROM {id_img}
            ) AS img ON t.image = img.imgID
            {lcStr}
            {iuStr}
            {userStr}
            {dateStr}
        ''').format(tableID=tableID,
                    id_img=sql.Identifier(project, 'image'),
                    lcStr=lcStr,
                    iuStr=iuStr,
                    userStr=userStr,
                    dateStr=dateStr)

        # query and process data
        if is_segmentation:
            mainFile = zipfile.ZipFile(destPath, 'w', zipfile.ZIP_DEFLATED)
        else:
            mainFile = open(destPath, 'w')
        metaStr = '; '.join(queryFields) + '\n'

        with self.dbConnector.execute_cursor(queryStr,
                                             tuple(queryArgs)) as cursor:
            while True:
                b = cursor.fetchone()
                if b is None:
                    break

                if is_segmentation:
                    # convert and store segmentation mask separately
                    segmask_filename = 'segmentation_masks/'

                    if segmaskFilenameOptions['baseName'] == 'id':
                        innerFilename = b['image']
                        parent = ''
                    else:
                        innerFilename = b['filename']
                        parent, innerFilename = os.path.split(innerFilename)
                    finalFilename = os.path.join(
                        parent,
                        segmaskFilenameOptions['prefix'] + innerFilename +
                        segmaskFilenameOptions['suffix'] + '.tif')
                    segmask_filename += finalFilename

                    segmask = base64ToImage(b['segmentationmask'], b['width'],
                                            b['height'])

                    if indexedColors is not None and len(indexedColors) > 0:
                        # convert to indexed color and add color palette from label classes
                        segmask = segmask.convert('RGB').convert(
                            'P', palette=Image.ADAPTIVE, colors=3)
                        segmask.putpalette(indexedColors)

                    # save
                    bio = io.BytesIO()
                    segmask.save(bio, 'TIFF')
                    mainFile.writestr(segmask_filename, bio.getvalue())

                # store metadata
                metaLine = ''
                for field in queryFields:
                    if field.lower() == 'segmentationmask':
                        continue
                    metaLine += '{}; '.format(b[field.lower()])
                metaStr += metaLine + '\n'

        if is_segmentation:
            mainFile.writestr('query.txt', metaStr)
        else:
            mainFile.write(metaStr)

        if is_segmentation:
            # append separate text file for label classes
            labelclassQuery = sql.SQL('''
                SELECT id, name, color, labelclassgroup, idx AS labelclass_index
                FROM {id_lc};
            ''').format(id_lc=sql.Identifier(project, 'labelclass'))
            result = self.dbConnector.execute(labelclassQuery, None, 'all')
            lcStr = 'id,name,color,labelclassgroup,labelclass_index\n'
            for r in result:
                lcStr += '{},{},{},{},{}\n'.format(r['id'], r['name'],
                                                   r['color'],
                                                   r['labelclassgroup'],
                                                   r['labelclass_index'])
            mainFile.writestr('labelclasses.csv', lcStr)

        mainFile.close()

        return filename

    def watchImageFolders(self):
        '''
            Queries all projects that have the image folder watch functionality
            enabled and updates the projects, one by one, with the latest image
            changes.
        '''
        projects = self.dbConnector.execute(
            '''
                SELECT shortname, watch_folder_remove_missing_enabled
                FROM aide_admin.project
                WHERE watch_folder_enabled IS TRUE;
            ''', None, 'all')

        for p in projects:
            pName = p['shortname']

            # add new images
            _, imgs_added = self.addExistingImages(pName, None)

            # remove orphaned images (if enabled)
            if p['watch_folder_remove_missing_enabled']:
                imgs_orphaned = self.removeOrphanedImages(pName)
                if len(imgs_added) or len(imgs_orphaned):
                    print(
                        f'[Project {pName}] {len(imgs_added)} new images found and added, {len(imgs_orphaned)} orphaned images removed from database.'
                    )

            elif len(imgs_added):
                print(
                    f'[Project {pName}] {len(imgs_added)} new images found and added.'
                )

    def deleteProject(self, project, deleteFiles=False):
        '''
            Irreproducibly deletes a project, including all data and metadata, from the database.
            If "deleteFiles" is True, then any data on disk (images, etc.) are also deleted.

            This cannot be undone.
        '''
        print(f'Deleting project with shortname "{project}"...')

        # remove database entries
        print('\tRemoving database entries...')
        self.dbConnector.execute(
            '''
            DELETE FROM aide_admin.authentication
            WHERE project = %s;
            DELETE FROM aide_admin.project
            WHERE shortname = %s;
        ''', (
                project,
                project,
            ), None
        )  # already done by DataAdministration.middleware, but we do it again to be sure

        self.dbConnector.execute('''
            DROP SCHEMA IF EXISTS {} CASCADE;
        '''.format(project), None, None)  #TODO: Identifier?

        if deleteFiles:
            print('\tRemoving files...')

            messages = []

            def _onError(function, path, excinfo):
                #TODO
                from celery.contrib import rdb
                rdb.set_trace()
                messages.append({
                    'function': function,
                    'path': path,
                    'excinfo': excinfo
                })

            try:
                shutil.rmtree(os.path.join(
                    self.config.getProperty('FileServer', 'staticfiles_dir'),
                    project),
                              onerror=_onError)
            except Exception as e:
                messages.append(str(e))

            return messages

        return 0
class ProjectConfigMiddleware:

    # prohibited project shortnames
    PROHIBITED_SHORTNAMES = [
        'con',  # for MS Windows
        'prn',
        'aux',
        'nul',
        'com1',
        'com2',
        'com3',
        'com4',
        'com5',
        'com6',
        'com7',
        'com8',
        'com9',
        'lpt1',
        'lpt2',
        'lpt3',
        'lpt4',
        'lpt5',
        'lpt6',
        'lpt7',
        'lpt8',
        'lpt9'
    ]

    # prohibited project names (both as a whole and for shortnames)
    PROHIBITED_NAMES = [
        '', 'project', 'getavailableaimodels', 'getbackdrops',
        'verifyprojectname', 'verifyprojectshort', 'newproject',
        'createproject', 'statistics', 'static', 'getcreateaccountunrestricted'
        'getprojects', 'about', 'favicon.ico', 'logincheck', 'logout', 'login',
        'dologin', 'createaccount', 'loginscreen', 'accountexists',
        'getauthentication', 'getusernames', 'docreateaccount', 'admin',
        'getservicedetails', 'getceleryworkerdetails', 'getprojectdetails',
        'getuserdetails', 'setpassword'
    ]

    # prohibited name prefixes
    PROHIBITED_NAME_PREFIXES = ['/', '?', '&']

    # patterns that are prohibited anywhere for shortnames (replaced with underscores)
    SHORTNAME_PATTERNS_REPLACE = [
        '|',
        '?',
        '*',
        ':'  # for macOS
    ]

    # patterns that are prohibited anywhere for both short and long names (no replacement)
    PROHIBITED_STRICT = ['&lt;', '<', '>', '&gt;', '..', '/', '\\']

    def __init__(self, config):
        self.config = config
        self.dbConnector = Database(config)

        # load default UI settings
        try:
            # check if custom default styles are provided
            self.defaultUIsettings = json.load(
                open('config/default_ui_settings.json', 'r'))
        except:
            # resort to built-in styles
            self.defaultUIsettings = json.load(
                open(
                    'modules/ProjectAdministration/static/json/default_ui_settings.json',
                    'r'))

    @staticmethod
    def _recursive_update(dictObject, target):
        '''
            Recursively iterates over all keys and sub-keys of "dictObject"
            and its sub-dicts and copies over values from dict "target", if
            they are available.
        '''
        for key in dictObject.keys():
            if key in target:
                if isinstance(dictObject[key], dict):
                    ProjectConfigMiddleware._recursive_update(
                        dictObject[key], target[key])
                else:
                    dictObject[key] = target[key]

    def getPlatformInfo(self, project, parameters=None):
        '''
            AIDE setup-specific platform metadata.
        '''
        # parse parameters (if provided) and compare with mutable entries
        allParams = set(['server_uri', 'server_dir', 'watch_folder_interval'])
        if parameters is not None and parameters != '*':
            if isinstance(parameters, str):
                parameters = [parameters.lower()]
            else:
                parameters = [p.lower() for p in parameters]
            set(parameters).intersection_update(allParams)
        else:
            parameters = allParams
        parameters = list(parameters)
        response = {}
        for param in parameters:
            if param.lower() == 'server_uri':
                uri = os.path.join(
                    self.config.getProperty('Server', 'dataServer_uri'),
                    project, 'files')
                response[param] = uri
            elif param.lower() == 'server_dir':
                sdir = os.path.join(
                    self.config.getProperty('FileServer', 'staticfiles_dir'),
                    project)
                response[param] = sdir
            elif param.lower() == 'watch_folder_interval':
                interval = self.config.getProperty('FileServer',
                                                   'watch_folder_interval',
                                                   type=float,
                                                   fallback=60)
                response[param] = interval

        return response

    def getProjectImmutables(self, project):
        queryStr = 'SELECT annotationType, predictionType, demoMode FROM aide_admin.project WHERE shortname = %s;'
        result = self.dbConnector.execute(queryStr, (project, ), 1)
        if result and len(result):
            return {
                'annotationType': result[0]['annotationtype'],
                'predictionType': result[0]['predictiontype']
            }
        else:
            return None

    def getProjectInfo(self, project, parameters=None):

        # parse parameters (if provided) and compare with mutable entries
        allParams = set([
            'name', 'description', 'ispublic', 'secret_token', 'demomode',
            'interface_enabled', 'ui_settings',
            'segmentation_ignore_unlabeled', 'ai_model_enabled',
            'ai_model_library', 'ai_model_settings', 'ai_alcriterion_library',
            'ai_alcriterion_settings', 'numimages_autotrain',
            'minnumannoperimage', 'maxnumimages_train', 'watch_folder_enabled',
            'watch_folder_remove_missing_enabled'
        ])
        if parameters is not None and parameters != '*':
            if isinstance(parameters, str):
                parameters = [parameters.lower()]
            else:
                parameters = [p.lower() for p in parameters]
            set(parameters).intersection_update(allParams)
        else:
            parameters = allParams
        parameters = list(parameters)
        sqlParameters = ','.join(parameters)

        queryStr = sql.SQL('''
        SELECT {} FROM aide_admin.project
        WHERE shortname = %s;
        ''').format(sql.SQL(sqlParameters))
        result = self.dbConnector.execute(queryStr, (project, ), 1)
        result = result[0]

        # assemble response
        response = {}
        for param in parameters:
            value = result[param]
            if param == 'ui_settings':
                value = json.loads(value)

                # auto-complete with defaults where missing
                value = check_args(value, self.defaultUIsettings)
            response[param] = value

        return response

    def renewSecretToken(self, project):
        '''
            Creates a new secret token, invalidating the old one.
        '''
        try:
            newToken = secrets.token_urlsafe(32)
            result = self.dbConnector.execute(
                '''UPDATE aide_admin.project
                SET secret_token = %s
                WHERE shortname = %s;
                SELECT secret_token FROM aide_admin.project
                WHERE shortname = %s;
            ''', (
                    newToken,
                    project,
                    project,
                ), 1)
            return result[0]['secret_token']
        except:
            # this normally should not happen, since we checked for the validity of the project
            return None

    def getProjectUsers(self, project):
        '''
            Returns a list of users that are enrolled in the project,
            as well as their roles within the project.
        '''

        queryStr = sql.SQL(
            'SELECT * FROM aide_admin.authentication WHERE project = %s;')
        result = self.dbConnector.execute(queryStr, (project, ), 'all')
        return result

    def createProject(self, username, properties):
        '''
            Receives the most basic, mostly non-changeable settings for a new project
            ("properties") with the following entries:
            - shortname
            - owner (the current username)
            - name
            - description
            - annotationType
            - predictionType

            More advanced settings (UI config, AI model, etc.) will be configured after
            the initial project creation stage.

            Verifies whether these settings are available for a new project. If they are,
            it creates a new database schema for the project, adds an entry for it to the
            admin schema table and makes the current user admin. Returns True in this case.
            Otherwise raises an exception.
        '''

        shortname = properties['shortname']

        # verify availability of the project name and shortname
        if not self.getProjectNameAvailable(properties['name']):
            raise Exception('Project name "{}" unavailable.'.format(
                properties['name']))
        if not self.getProjectShortNameAvailable(shortname):
            raise Exception(
                'Project shortname "{}" unavailable.'.format(shortname))

        # load base SQL
        with open('modules/ProjectAdministration/static/sql/create_schema.sql',
                  'r') as f:
            queryStr = sql.SQL(f.read())

        # determine annotation and prediction types and add fields accordingly
        annotationFields = list(
            getattr(Fields_annotation, properties['annotationType']).value)
        predictionFields = list(
            getattr(Fields_prediction, properties['predictionType']).value)

        # create project schema
        self.dbConnector.execute(
            queryStr.format(
                id_schema=sql.Identifier(shortname),
                id_auth=sql.Identifier(
                    self.config.getProperty('Database', 'user')),
                id_image=sql.Identifier(shortname, 'image'),
                id_iu=sql.Identifier(shortname, 'image_user'),
                id_labelclassGroup=sql.Identifier(shortname,
                                                  'labelclassgroup'),
                id_labelclass=sql.Identifier(shortname, 'labelclass'),
                id_annotation=sql.Identifier(shortname, 'annotation'),
                id_cnnstate=sql.Identifier(shortname, 'cnnstate'),
                id_prediction=sql.Identifier(shortname, 'prediction'),
                id_workflow=sql.Identifier(shortname, 'workflow'),
                id_workflowHistory=sql.Identifier(shortname,
                                                  'workflowhistory'),
                annotation_fields=sql.SQL(', ').join(
                    [sql.SQL(field) for field in annotationFields]),
                prediction_fields=sql.SQL(', ').join(
                    [sql.SQL(field) for field in predictionFields])), None,
            None)

        # register project
        self.dbConnector.execute(
            '''
            INSERT INTO aide_admin.project (shortname, name, description,
                owner,
                secret_token,
                interface_enabled,
                annotationType, predictionType,
                isPublic, demoMode,
                ui_settings)
            VALUES (
                %s, %s, %s,
                %s,
                %s,
                %s,
                %s, %s,
                %s, %s,
                %s
            );
            ''', (shortname, properties['name'],
                  (properties['description'] if 'description' in properties
                   else ''), username, secrets.token_urlsafe(32), False,
                  properties['annotationType'], properties['predictionType'],
                  False, False, json.dumps(self.defaultUIsettings)), None)

        # register user in project
        self.dbConnector.execute(
            '''
                INSERT INTO aide_admin.authentication (username, project, isAdmin)
                VALUES (%s, %s, true);
            ''', (
                username,
                shortname,
            ), None)

        # notify FileServer instance(s) to set up project folders
        process = fileServer_interface.aide_internal_notify.si({
            'task':
            'create_project_folders',
            'projectName':
            shortname
        })
        process.apply_async(queue='aide_broadcast', ignore_result=True)

        return True

    def updateProjectSettings(self, project, projectSettings):
        '''
            TODO
        '''

        # check UI settings first
        if 'ui_settings' in projectSettings:
            if isinstance(projectSettings['ui_settings'], str):
                projectSettings['ui_settings'] = json.loads(
                    projectSettings['ui_settings'])
            fieldNames = [('welcomeMessage', str), ('numImagesPerBatch', int),
                          ('minImageWidth', int), ('numImageColumns_max', int),
                          ('defaultImage_w', int), ('defaultImage_h', int),
                          ('styles', dict), ('enableEmptyClass', bool),
                          ('showPredictions', bool),
                          ('showPredictions_minConf', float),
                          ('carryOverPredictions', bool),
                          ('carryOverRule', str),
                          ('carryOverPredictions_minConf', float),
                          ('defaultBoxSize_w', int), ('defaultBoxSize_h', int),
                          ('minBoxSize_w', int), ('minBoxSize_h', int)]
            uiSettings_new, uiSettingsKeys_new = parse_parameters(
                projectSettings['ui_settings'],
                fieldNames,
                absent_ok=True,
                escape=True)  #TODO: escape

            # adopt current settings and replace values accordingly
            uiSettings = self.dbConnector.execute(
                '''SELECT ui_settings
                    FROM aide_admin.project
                    WHERE shortname = %s;            
                ''', (project, ), 1)
            uiSettings = json.loads(uiSettings[0]['ui_settings'])
            for kIdx in range(len(uiSettingsKeys_new)):
                if isinstance(uiSettings[uiSettingsKeys_new[kIdx]], dict):
                    ProjectConfigMiddleware._recursive_update(
                        uiSettings[uiSettingsKeys_new[kIdx]],
                        uiSettings_new[kIdx])
                else:
                    uiSettings[uiSettingsKeys_new[kIdx]] = uiSettings_new[kIdx]

            # auto-complete with defaults where missing
            uiSettings = check_args(uiSettings, self.defaultUIsettings)

            projectSettings['ui_settings'] = json.dumps(uiSettings)

        # parse remaining parameters
        fieldNames = [('description', str), ('isPublic', bool),
                      ('secret_token', str), ('demoMode', bool),
                      ('ui_settings', str), ('interface_enabled', bool),
                      ('watch_folder_enabled', bool),
                      ('watch_folder_remove_missing_enabled', bool)]

        vals, params = parse_parameters(projectSettings,
                                        fieldNames,
                                        absent_ok=True,
                                        escape=False)
        vals.append(project)

        # commit to DB
        queryStr = sql.SQL('''UPDATE aide_admin.project
            SET
            {}
            WHERE shortname = %s;
            ''').format(
            sql.SQL(',').join(
                [sql.SQL('{} = %s'.format(item)) for item in params]))

        self.dbConnector.execute(queryStr, tuple(vals), None)

        return True

    def updateClassDefinitions(self, project, classdef, removeMissing=False):
        '''
            Updates the project's class definitions.
            if "removeMissing" is set to True, label classes that are present
            in the database, but not in "classdef," will be removed. Label
            class groups will only be removed if they do not reference any
            label class present in "classdef." This functionality is disallowed
            in the case of segmentation masks.
        '''

        # check if project contains segmentation masks
        metaType = self.dbConnector.execute(
            '''
                SELECT annotationType, predictionType FROM aide_admin.project
                WHERE shortname = %s;
            ''', (project, ), 1)[0]
        is_segmentation = any(
            ['segmentationmasks' in m.lower() for m in metaType.values()])
        if is_segmentation:
            removeMissing = False

        # get current classes from database
        db_classes = {}
        db_groups = {}
        if removeMissing:
            queryStr = sql.SQL('''
                SELECT * FROM {id_lc} AS lc
                FULL OUTER JOIN (
                    SELECT id AS lcgid, name AS lcgname, parent, color
                    FROM {id_lcg}
                ) AS lcg
                ON lc.labelclassgroup = lcg.lcgid
            ''').format(id_lc=sql.Identifier(project, 'labelclass'),
                        id_lcg=sql.Identifier(project, 'labelclassgroup'))
            result = self.dbConnector.execute(queryStr, None, 'all')
            for r in result:
                if r['id'] is not None:
                    db_classes[r['id']] = r
                if r['lcgid'] is not None:
                    if not r['lcgid'] in db_groups:
                        db_groups[r['lcgid']] = {**r, **{'num_children': 0}}
                    elif not 'lcgid' in db_groups[r['lcgid']]:
                        db_groups[r['lcgid']] = {**db_groups[r['lcgid']], **r}
                if r['labelclassgroup'] is not None:
                    if not r['labelclassgroup'] in db_groups:
                        db_groups[r['labelclassgroup']] = {'num_children': 1}
                    else:
                        db_groups[r['labelclassgroup']]['num_children'] += 1

        # parse provided class definitions list
        unique_keystrokes = set()
        classes_update = []
        classgroups_update = []

        def _parse_item(item, parent=None):
            # get or create ID for item
            try:
                itemID = uuid.UUID(item['id'])
            except:
                itemID = uuid.uuid1()
                while itemID in classes_update or itemID in classgroups_update:
                    itemID = uuid.uuid1()

            entry = {
                'id': itemID,
                'name': item['name'],
                'color': (None if not 'color' in item else item['color']),
                'keystroke': None,
                'labelclassgroup': parent
            }
            if 'children' in item:
                # label class group
                classgroups_update.append(entry)
                for child in item['children']:
                    _parse_item(child, itemID)
            else:
                # label class
                if 'keystroke' in item and not item[
                        'keystroke'] in unique_keystrokes:
                    entry['keystroke'] = item['keystroke']
                    unique_keystrokes.add(item['keystroke'])
                classes_update.append(entry)

        for item in classdef:
            _parse_item(item, None)

        # apply changes
        if removeMissing:
            queryArgs = []
            if len(classes_update):
                # remove all missing label classes
                lcSpec = sql.SQL('WHERE id NOT IN %s')
                queryArgs.append(tuple([(l['id'], ) for l in classes_update]))
            else:
                # remove all label classes
                lcgSpec = sql.SQL('')
            if len(classgroups_update):
                # remove all missing labelclass groups
                lcgSpec = sql.SQL('WHERE id NOT IN %s')
                queryArgs.append(
                    tuple([(l['id'], ) for l in classgroups_update]))
            else:
                # remove all labelclass groups
                lcgSpec = sql.SQL('')
            queryStr = sql.SQL('''
                DELETE FROM {id_lc}
                {lcSpec};
                DELETE FROM {id_lcg}
                {lcgSpec};
            ''').format(id_lc=sql.Identifier(project, 'labelclass'),
                        id_lcg=sql.Identifier(project, 'labelclassgroup'),
                        lcSpec=lcSpec,
                        lcgSpec=lcgSpec)
            self.dbConnector.execute(queryStr, tuple(queryArgs), None)

        # add/update in order (groups, set their parents, label classes)
        groups_new = [(
            g['id'],
            g['name'],
            g['color'],
        ) for g in classgroups_update]
        queryStr = sql.SQL('''
            INSERT INTO {id_lcg} (id, name, color)
            VALUES %s
            ON CONFLICT (id) DO UPDATE SET
                name = EXCLUDED.name,
                color = EXCLUDED.color;
        ''').format(  #TODO: on conflict(name)
            id_lcg=sql.Identifier(project, 'labelclassgroup'))
        self.dbConnector.insert(queryStr, groups_new)

        # set parents
        groups_parents = [
            (
                g['id'],
                g['labelclassgroup'],
            ) for g in classgroups_update
            if ('labelclassgroup' in g and g['labelclassgroup'] is not None)
        ]
        queryStr = sql.SQL('''
            UPDATE {id_lcg} AS lcg
            SET parent = q.parent
            FROM (VALUES %s) AS q(id, parent)
            WHERE lcg.id = q.id;
        ''').format(id_lcg=sql.Identifier(project, 'labelclassgroup'))
        self.dbConnector.insert(queryStr, groups_parents)

        # insert/update label classes
        lcdata = [(
            l['id'],
            l['name'],
            l['color'],
            l['keystroke'],
            l['labelclassgroup'],
        ) for l in classes_update]
        queryStr = sql.SQL('''
            INSERT INTO {id_lc} (id, name, color, keystroke, labelclassgroup)
            VALUES %s
            ON CONFLICT (id) DO UPDATE
            SET name = EXCLUDED.name,
            color = EXCLUDED.color,
            keystroke = EXCLUDED.keystroke,
            labelclassgroup = EXCLUDED.labelclassgroup;
        ''').format(  #TODO: on conflict(name)
            id_lc=sql.Identifier(project, 'labelclass'))
        self.dbConnector.insert(queryStr, lcdata)

        return True

    def getProjectNameAvailable(self, projectName):
        '''
            Returns True if the provided project (long) name is available.
        '''
        if not isinstance(projectName, str):
            return False
        projectName = projectName.strip().lower()
        if not len(projectName):
            return False

        # check if name matches prohibited AIDE keywords (we do not replace long names)
        if projectName in self.PROHIBITED_STRICT or any(
            [p in projectName for p in self.PROHIBITED_STRICT]):
            return False
        if projectName in self.PROHIBITED_NAMES:
            return False
        if any(
            [projectName.startswith(p)
             for p in self.PROHIBITED_NAME_PREFIXES]):
            return False

        # check if name is already taken
        result = self.dbConnector.execute(
            '''SELECT 1 AS result
            FROM aide_admin.project
            WHERE name = %s;
            ''', (projectName, ), 1)

        if result is None or not len(result):
            return True
        else:
            return result[0]['result'] != 1

    def getProjectShortNameAvailable(self, projectName):
        '''
            Returns True if the provided project shortname is available.
            In essence, "available" means that a database schema with the given
            name can be created (this includes Postgres schema name conventions).
            Returns False otherwise.
        '''
        if not isinstance(projectName, str):
            return False
        projectName = projectName.strip().lower()
        if not len(projectName):
            return False

        # check if name matches prohibited AIDE keywords; replace where possible
        if projectName in self.PROHIBITED_STRICT or any(
            [p in projectName for p in self.PROHIBITED_STRICT]):
            return False
        if projectName in self.PROHIBITED_NAMES or projectName in self.PROHIBITED_SHORTNAMES:
            return False
        if any(
            [projectName.startswith(p)
             for p in self.PROHIBITED_NAME_PREFIXES]):
            return False
        for p in self.SHORTNAME_PATTERNS_REPLACE:
            projectName = projectName.replace(p, '_')

        # check if provided name is valid as per Postgres conventions
        matches = re.findall('(^(pg_|[0-9]).*|.*(\$|\s)+.*)', projectName)
        if len(matches):
            return False

        # check if project shorthand already exists in database
        result = self.dbConnector.execute(
            '''SELECT 1 AS result
            FROM information_schema.schemata
            WHERE schema_name ilike %s
            UNION ALL
            SELECT 1 FROM aide_admin.project
            WHERE shortname ilike %s;
            ''', (
                projectName,
                projectName,
            ), 2)

        if result is None or not len(result):
            return True

        if len(result) == 2:
            return result[0]['result'] != 1 and result[1]['result'] != 1
        elif len(result) == 1:
            return result[0]['result'] != 1
        else:
            return True
    def __init__(self, config):
        self.config = config
        self.dbConnector = Database(config)

        self.labelUImiddleware = DBMiddleware(config)
class ModelMarketplaceMiddleware:

    def __init__(self, config):
        self.config = config
        self.dbConnector = Database(config)

        self.labelUImiddleware = DBMiddleware(config)
    

    def getModelsMarketplace(self, project):
        '''
            Returns a dict of model state meta data,
            filtered by the project settings (model library;
            annotation type, prediction type).
        '''

        # get project meta data (immutables, model library)
        projectMeta = self.dbConnector.execute(
            '''
                SELECT annotationType, predictionType, ai_model_library
                FROM aide_admin.project
                WHERE shortname = %s;
            ''',
            (project,),
            1
        )
        if projectMeta is None or not len(projectMeta):
            raise Exception(f'Project {project} could not be found in database.')
        projectMeta = projectMeta[0]
        annotationType = projectMeta['annotationtype']
        predictionType = projectMeta['predictiontype']

        # get matching model states
        result = self.dbConnector.execute(
            '''
                SELECT * FROM (
                    SELECT id, name, description, labelclasses, model_library,
                    annotationType, predictionType, EXTRACT(epoch FROM timeCreated) AS time_created, alcriterion_library,
                    public, anonymous, selectCount,
                    CASE WHEN anonymous THEN NULL ELSE author END AS author,
                    CASE WHEN anonymous THEN NULL ELSE origin_project END AS origin_project,
                    CASE WHEN anonymous THEN NULL ELSE origin_uuid END AS origin_uuid
                    FROM aide_admin.modelMarketplace
                    WHERE annotationType = %s AND
                    predictionType = %s
                    AND (
                        public = TRUE OR
                        origin_project = %s
                    )
                ) AS mm
                LEFT OUTER JOIN (
                    SELECT name AS projectName, shortname
                    FROM aide_admin.project
                ) AS pn
                ON mm.origin_project = pn.shortname;
            ''',
            (annotationType, predictionType, project),
            'all'
        )
        if result is not None and len(result):
            matchingStates = {}
            for r in result:
                stateID = str(r['id'])
                values = {}
                for key in r.keys():
                    if isinstance(r[key], UUID):
                        values[key] = str(r[key])
                    else:
                        values[key] = r[key]
                matchingStates[stateID] = values
        else:
            matchingStates = {}

        return matchingStates



    def importModel(self, project, modelID):

        # get model meta data
        meta = self.dbConnector.execute('''
            SELECT id, name, description, labelclasses, model_library,
            annotationType, predictionType, timeCreated, alcriterion_library,
            public, anonymous, selectCount,
            CASE WHEN anonymous THEN NULL ELSE author END AS author,
            CASE WHEN anonymous THEN NULL ELSE origin_project END AS origin_project,
            CASE WHEN anonymous THEN NULL ELSE origin_uuid END AS origin_uuid
            FROM aide_admin.modelMarketplace
            WHERE id = %s;
        ''', (UUID(modelID),), 1)

        # check if model is suitable for current project
        #TODO
        return 0


    
    def shareModel(self, project, username, modelID, modelName, modelDescription,
                    public, anonymous):
        #TODO: export as Celery task

        modelID = UUID(modelID)

        # check if model hasn't already been shared
        isShared = self.dbConnector.execute('''
            SELECT author
            FROM aide_admin.modelMarketplace
            WHERE origin_project = %s AND origin_uuid = %s;
        ''', (project, modelID), 'all')
        if isShared is not None and len(isShared):
            author = isShared[0]['author']
            return {
                'status': 2,
                'message': f'Model state has already been shared by {author}.'
            }

        # get project immutables
        immutables = self.labelUImiddleware.get_project_immutables(project)

        # get label class info
        labelclasses = json.dumps(self.labelUImiddleware.getClassDefinitions(project, False))

        # check if name is unique (TODO: required?)
        nameTaken = self.dbConnector.execute('''
            SELECT COUNT(*) AS cnt
            FROM aide_admin.modelMarketplace
            WHERE name = %s;
        ''', (modelName,), 'all')
        if nameTaken is not None and len(nameTaken) and nameTaken[0]['cnt']:
            return {
                'status': 3,
                'message': f'A model state with name "{modelName}" already exists in the Model Marketplace.'
            }

        sharedModelID = self.dbConnector.execute(sql.SQL('''
            INSERT INTO aide_admin.modelMarketplace
            (name, description, labelclasses, author, statedict,
            model_library, alCriterion_library,
            annotationType, predictionType,
            origin_project, origin_uuid, public, anonymous)

            SELECT %s, %s, %s, %s, statedict,
            model_library, alCriterion_library,
            %s, %s,
            %s, id, %s, %s
            FROM {id_cnnstate} AS cnnS
            WHERE id = %s
            RETURNING id;
        ''').format(
            id_cnnstate=sql.Identifier(project, 'cnnstate')
        ), (modelName, modelDescription, labelclasses, username,
            immutables['annotationType'], immutables['predictionType'],
            project, public, anonymous, UUID(modelID)))

        return {
            'status': 0,
            'shared_model_id': sharedModelID
        }
    2019-21 Benjamin Kellenberger
'''

import os
from celery import current_app
from modules.AIWorker.app import AIWorker
from modules.Database.app import Database
from util.configDef import Config

# init AIWorker
modules = os.environ['AIDE_MODULES']
passiveMode = (os.environ['PASSIVE_MODE'] == '1' if 'PASSIVE_MODE'
               in os.environ else False) or not ('aiworker' in modules.lower())
config = Config()
worker = AIWorker(config, Database(config), passiveMode)


@current_app.task(name='AIWorker.aide_internal_notify')
def aide_internal_notify(message):
    return worker.aide_internal_notify(message)


@current_app.task(name='AIWorker.call_update_model', rate_limit=1)
def call_update_model(blank, numEpochs, project):
    return worker.call_update_model(numEpochs, project)


@current_app.task(name='AIWorker.call_train', rate_limit=1)
def call_train(data, index, epoch, numEpochs, project, aiModelSettings):
    if len(data) == 2 and data[1] is None:
Esempio n. 21
0
class DBMiddleware():
    def __init__(self, config):
        self.config = config
        self.dbConnector = Database(config)

        self.project_immutables = {
        }  # project settings that cannot be changed (project shorthand -> {settings})

        self._fetchProjectSettings()
        self.sqlBuilder = SQLStringBuilder()
        self.annoParser = AnnotationParser()

    def _fetchProjectSettings(self):
        # AI controller URI
        aiControllerURI = self.config.getProperty('Server', 'aiController_uri')
        if aiControllerURI is None or aiControllerURI.strip() == '':
            # no AI backend configured
            aiControllerURI = None

        # global, project-independent settings
        self.globalSettings = {
            'indexURI':
            self.config.getProperty('Server',
                                    'index_uri',
                                    type=str,
                                    fallback='/'),
            'dataServerURI':
            self.config.getProperty('Server', 'dataServer_uri'),
            'aiControllerURI':
            aiControllerURI
        }

        # default styles
        try:
            # check if custom default styles are provided
            self.defaultStyles = json.load(
                open('config/default_ui_settings.json', 'r'))
        except:
            # resort to built-in styles
            self.defaultStyles = json.load(
                open(
                    'modules/ProjectAdministration/static/json/default_ui_settings.json',
                    'r'))

    def _assemble_annotations(self, project, cursor, hideGoldenQuestionInfo):
        response = {}
        while True:
            b = cursor.fetchone()
            if b is None:
                break

            imgID = str(b['image'])
            if not imgID in response:
                response[imgID] = {
                    'fileName': b['filename'],
                    'predictions': {},
                    'annotations': {},
                    'last_checked': None
                }
            viewcount = b['viewcount']
            if viewcount is not None:
                response[imgID]['viewcount'] = viewcount
            last_checked = b['last_checked']
            if last_checked is not None:
                if response[imgID]['last_checked'] is None:
                    response[imgID]['last_checked'] = last_checked
                else:
                    response[imgID]['last_checked'] = max(
                        response[imgID]['last_checked'], last_checked)

            if not hideGoldenQuestionInfo:
                response[imgID]['isGoldenQuestion'] = b['isgoldenquestion']

            # parse annotations and predictions
            entryID = str(b['id'])
            if b['ctype'] is not None:
                colnames = self.sqlBuilder.getColnames(
                    self.project_immutables[project]['annotationType'],
                    self.project_immutables[project]['predictionType'],
                    b['ctype'])
                entry = {}
                for c in colnames:
                    value = b[c]
                    if isinstance(value, datetime):
                        value = value.timestamp()
                    elif isinstance(value, UUID):
                        value = str(value)
                    entry[c] = value

                if b['ctype'] == 'annotation':
                    response[imgID]['annotations'][entryID] = entry
                elif b['ctype'] == 'prediction':
                    response[imgID]['predictions'][entryID] = entry

        return response

    def _set_images_requested(self, project, imageIDs):
        '''
            Sets column "last_requested" of relation "image"
            to the current date. This is done during image
            querying to signal that an image has been requested,
            but not (yet) viewed.
        '''
        # prepare insertion values
        now = datetime.now(tz=pytz.utc)
        vals = []
        for key in imageIDs:
            vals.append(key)
        if len(vals):
            queryStr = sql.SQL('''
                UPDATE {id_img}
                SET last_requested = %s
                WHERE id IN %s;
            ''').format(id_img=sql.Identifier(project, 'image'))
            self.dbConnector.execute(queryStr, (
                now,
                tuple(vals),
            ), None)

    def _get_sample_metadata(self, metaType):
        '''
            Returns a dummy annotation or prediction for the sample
            image in the "exampleData" folder, depending on the "metaType"
            specified (i.e., labels, points, boundingBoxes, or segmentationMasks).
        '''
        if metaType == 'labels':
            return {
                'id': '00000000-0000-0000-0000-000000000000',
                'label': '00000000-0000-0000-0000-000000000000',
                'confidence': 1.0,
                'priority': 1.0,
                'viewcount': None
            }
        elif metaType == 'points' or metaType == 'boundingBoxes':
            return {
                'id': '00000000-0000-0000-0000-000000000000',
                'label': '00000000-0000-0000-0000-000000000000',
                'x': 0.542959427207637,
                'y': 0.5322069489713102,
                'width': 0.6133651551312653,
                'height': 0.7407598263401316,
                'confidence': 1.0,
                'priority': 1.0,
                'viewcount': None
            }
        elif metaType == 'segmentationMasks':
            # read segmentation mask from disk
            segmask = Image.open(
                'modules/LabelUI/static/exampleData/sample_segmentationMask.tif'
            )
            segmask, width, height = helpers.imageToBase64(segmask)
            return {
                'id': '00000000-0000-0000-0000-000000000000',
                'width': width,
                'height': height,
                'segmentationmask': segmask,
                'confidence': 1.0,
                'priority': 1.0,
                'viewcount': None
            }
        else:
            return {}

    def get_project_immutables(self, project):
        if project not in self.project_immutables:
            queryStr = 'SELECT annotationType, predictionType, demoMode FROM aide_admin.project WHERE shortname = %s;'
            result = self.dbConnector.execute(queryStr, (project, ), 1)
            if result and len(result):
                self.project_immutables[project] = {
                    'annotationType': result[0]['annotationtype'],
                    'predictionType': result[0]['predictiontype'],
                    'demoMode': helpers.checkDemoMode(project,
                                                      self.dbConnector)
                }
            else:
                return None
        return self.project_immutables[project]

    def get_dynamic_project_settings(self, project):
        queryStr = 'SELECT ui_settings FROM aide_admin.project WHERE shortname = %s;'
        result = self.dbConnector.execute(queryStr, (project, ), 1)
        result = json.loads(result[0]['ui_settings'])

        # complete styles with defaults where necessary (may be required for project that got upgraded from v1)
        result = helpers.check_args(result, self.defaultStyles)

        return result

    def getProjectSettings(self, project):
        '''
            Queries the database for general project-specific metadata, such as:
            - Classes: names, indices, default colors
            - Annotation type: one of {class labels, positions, bboxes}
        '''
        # publicly available info from DB
        projSettings = self.getProjectInfo(project)

        # label classes
        projSettings['classes'] = self.getClassDefinitions(project)

        # static and dynamic project settings and properties from configuration file
        projSettings = {
            **projSettings,
            **self.get_project_immutables(project),
            **self.get_dynamic_project_settings(project),
            **self.globalSettings
        }

        # append project shorthand to AIController URI
        if 'aiControllerURI' in projSettings and projSettings[
                'aiControllerURI'] is not None and len(
                    projSettings['aiControllerURI']):
            projSettings['aiControllerURI'] = os.path.join(
                projSettings['aiControllerURI'], project) + '/'

        return projSettings

    def getProjectInfo(self, project):
        '''
            Returns safe, shareable information about the project
            (i.e., users don't need to be part of the project to see these data).
        '''
        queryStr = '''
            SELECT shortname, name, description, demoMode,
            interface_enabled, archived, ai_model_enabled,
            ai_model_library, ai_alcriterion_library,
            segmentation_ignore_unlabeled
            FROM aide_admin.project
            WHERE shortname = %s
        '''
        result = self.dbConnector.execute(queryStr, (project, ), 1)[0]

        # provide flag if AI model is available
        aiModelAvailable = all([
            result['ai_model_enabled'], result['ai_model_library'] is not None
            and len(result['ai_model_library']),
            result['ai_alcriterion_library'] is not None
            and len(result['ai_alcriterion_library'])
        ])

        return {
            'projectShortname':
            result['shortname'],
            'projectName':
            result['name'],
            'projectDescription':
            result['description'],
            'demoMode':
            result['demomode'],
            'interface_enabled':
            result['interface_enabled'] and not result['archived'],
            'ai_model_available':
            aiModelAvailable,
            'segmentation_ignore_unlabeled':
            result['segmentation_ignore_unlabeled']
        }

    def getClassDefinitions(self, project, showHidden=False):
        '''
            Returns a dictionary with entries for all classes in the project.
        '''

        # query data
        if showHidden:
            hiddenSpec = ''
        else:
            hiddenSpec = 'WHERE hidden IS false'
        queryStr = sql.SQL('''
            SELECT 'group' AS type, id, NULL as idx, name, color, parent, NULL AS keystroke, NULL AS hidden FROM {}
            UNION ALL
            SELECT 'class' AS type, id, idx, name, color, labelclassgroup, keystroke, hidden FROM {}
            {};
            ''').format(sql.Identifier(project, 'labelclassgroup'),
                        sql.Identifier(project, 'labelclass'),
                        sql.SQL(hiddenSpec))

        classData = self.dbConnector.execute(queryStr, None, 'all')

        # assemble entries first
        allEntries = {}
        numClasses = 0
        for cl in classData:
            id = str(cl['id'])
            entry = {
                'id': id,
                'name': cl['name'],
                'color': cl['color'],
                'parent':
                str(cl['parent']) if cl['parent'] is not None else None,
                'hidden': cl['hidden']
            }
            if cl['type'] == 'group':
                entry['entries'] = {}
            else:
                entry['index'] = cl['idx']
                entry['keystroke'] = cl['keystroke']
                numClasses += 1
            allEntries[id] = entry

        # transform into tree
        def _find_parent(tree, parentID):
            if parentID is None:
                return None
            elif 'id' in tree and tree['id'] == parentID:
                return tree
            elif 'entries' in tree:
                for ek in tree['entries'].keys():
                    rv = _find_parent(tree['entries'][ek], parentID)
                    if rv is not None:
                        return rv
                return None
            else:
                return None

        allEntries = {'entries': allEntries}
        for key in list(allEntries['entries'].keys()):
            entry = allEntries['entries'][key]
            parentID = entry['parent']
            del entry['parent']

            if parentID is None:
                # entry or group with no parent: append to root directly
                allEntries['entries'][key] = entry

            else:
                # move item
                parent = _find_parent(allEntries, parentID)
                parent['entries'][key] = entry
                del allEntries['entries'][key]

        allEntries['numClasses'] = numClasses
        return allEntries

    def getBatch_fixed(self,
                       project,
                       username,
                       data,
                       hideGoldenQuestionInfo=True):
        '''
            Returns entries from the database based on the list of data entry identifiers specified.
        '''

        if not len(data):
            return {'entries': {}}

        # query
        projImmutables = self.get_project_immutables(project)
        queryStr = self.sqlBuilder.getFixedImagesQueryString(
            project, projImmutables['annotationType'],
            projImmutables['predictionType'], projImmutables['demoMode'])

        # parse results
        queryVals = (
            tuple(UUID(d) for d in data),
            username,
            username,
        )
        if projImmutables['demoMode']:
            queryVals = (tuple(UUID(d) for d in data), )

        with self.dbConnector.execute_cursor(queryStr, queryVals) as cursor:
            try:
                response = self._assemble_annotations(project, cursor,
                                                      hideGoldenQuestionInfo)
                # self.dbConnector.conn.commit()
            except Exception as e:
                print(e)
                # self.dbConnector.conn.rollback()
            finally:
                pass
                # cursor.close()

        # mark images as requested
        self._set_images_requested(project, response)

        return {'entries': response}

    def getBatch_auto(self,
                      project,
                      username,
                      order='unlabeled',
                      subset='default',
                      limit=None,
                      hideGoldenQuestionInfo=True):
        '''
            TODO: description
        '''
        # query
        projImmutables = self.get_project_immutables(project)
        queryStr = self.sqlBuilder.getNextBatchQueryString(
            project, projImmutables['annotationType'],
            projImmutables['predictionType'], order, subset,
            projImmutables['demoMode'])

        # limit (TODO: make 128 a hyperparameter)
        if limit is None:
            limit = 128
        else:
            limit = min(int(limit), 128)

        # parse results
        queryVals = (
            username,
            username,
            limit,
            username,
        )
        if projImmutables[
                'demoMode']:  #TODO: demoMode can now change dynamically
            queryVals = (limit, )

        with self.dbConnector.execute_cursor(queryStr, queryVals) as cursor:
            response = self._assemble_annotations(project, cursor,
                                                  hideGoldenQuestionInfo)

        # mark images as requested
        self._set_images_requested(project, response)

        return {'entries': response}

    def getBatch_timeRange(self,
                           project,
                           minTimestamp,
                           maxTimestamp,
                           userList,
                           skipEmptyImages=False,
                           limit=None,
                           goldenQuestionsOnly=False,
                           hideGoldenQuestionInfo=True):
        '''
            Returns images that have been annotated within the given time range and/or
            by the given user(s). All arguments are optional.
            Useful for reviewing existing annotations.
        '''
        # query string
        projImmutables = self.get_project_immutables(project)
        queryStr = self.sqlBuilder.getDateQueryString(
            project, projImmutables['annotationType'], minTimestamp,
            maxTimestamp, userList, skipEmptyImages, goldenQuestionsOnly)

        # check validity and provide arguments
        queryVals = []
        if userList is not None:
            queryVals.append(tuple(userList))
        if minTimestamp is not None:
            queryVals.append(minTimestamp)
        if maxTimestamp is not None:
            queryVals.append(maxTimestamp)
        if skipEmptyImages and userList is not None:
            queryVals.append(tuple(userList))

        # limit (TODO: make 128 a hyperparameter)
        if limit is None:
            limit = 128
        else:
            limit = min(int(limit), 128)
        queryVals.append(limit)

        if userList is not None:
            queryVals.append(tuple(userList))

        # query and parse results
        with self.dbConnector.execute_cursor(queryStr,
                                             tuple(queryVals)) as cursor:
            try:
                response = self._assemble_annotations(project, cursor,
                                                      hideGoldenQuestionInfo)
                # self.dbConnector.conn.commit()
            except Exception as e:
                print(e)
                # self.dbConnector.conn.rollback()
            finally:
                pass
                # cursor.close()

        # # mark images as requested
        # self._set_images_requested(project, response)

        return {'entries': response}

    def get_timeRange(self,
                      project,
                      userList,
                      skipEmptyImages=False,
                      goldenQuestionsOnly=False):
        '''
            Returns two timestamps denoting the temporal limits within which
            images have been viewed by the users provided in the userList.
            Arguments:
            - userList: string (single user name) or list of strings (multiple).
                        Can also be None; in this case all annotations will be
                        checked.
            - skipEmptyImages: if True, only images that contain at least one
                               annotation will be considered.
            - goldenQuestionsOnly: if True, only images flagged as golden questions
                                   will be shown.
        '''
        # query string
        queryStr = self.sqlBuilder.getTimeRangeQueryString(
            project, userList, skipEmptyImages, goldenQuestionsOnly)

        arguments = (None if userList is None else tuple(userList))
        result = self.dbConnector.execute(queryStr, (arguments, ), numReturn=1)

        if result is not None and len(result):
            return {
                'minTimestamp': result[0]['mintimestamp'],
                'maxTimestamp': result[0]['maxtimestamp'],
            }
        else:
            return {'error': 'no annotations made'}

    def get_sampleData(self, project):
        '''
            Returns a sample image from the project, with annotations
            (from one of the admins) and predictions.
            If no image, no annotations, and/or no predictions are
            available, a built-in default is returned instead.
        '''
        projImmutables = self.get_project_immutables(project)
        queryStr = self.sqlBuilder.getSampleDataQueryString(
            project, projImmutables['annotationType'],
            projImmutables['predictionType'])

        # query and parse results
        response = None
        with self.dbConnector.execute_cursor(queryStr, None) as cursor:
            try:
                response = self._assemble_annotations(project, cursor, True)
            except:
                pass

        if response is None or not len(response):
            # no valid data found for project; fall back to sample data
            response = {
                '00000000-0000-0000-0000-000000000000': {
                    'fileName':
                    '/static/interface/exampleData/sample_image.jpg',
                    'viewcount': 1,
                    'annotations': {
                        '00000000-0000-0000-0000-000000000000':
                        self._get_sample_metadata(
                            projImmutables['annotationType'])
                    },
                    'predictions': {
                        '00000000-0000-0000-0000-000000000000':
                        self._get_sample_metadata(
                            projImmutables['predictionType'])
                    },
                    'last_checked': None,
                    'isGoldenQuestion': True
                }
            }
        return response

    def submitAnnotations(self, project, username, submissions):
        '''
            Sends user-provided annotations to the database.
        '''
        projImmutables = self.get_project_immutables(project)
        if projImmutables['demoMode']:
            return 1

        # assemble values
        colnames = getattr(QueryStrings_annotation,
                           projImmutables['annotationType']).value
        values_insert = []
        values_update = []

        meta = (None if not 'meta' in submissions else json.dumps(
            submissions['meta']))

        # for deletion: remove all annotations whose image ID matches but whose annotation ID is not among the submitted ones
        ids = []

        viewcountValues = []
        for imageKey in submissions['entries']:
            entry = submissions['entries'][imageKey]

            try:
                lastChecked = entry['timeCreated']
                lastTimeRequired = entry['timeRequired']
                if lastTimeRequired is None: lastTimeRequired = 0
            except:
                lastChecked = datetime.now(tz=pytz.utc)
                lastTimeRequired = 0

            try:
                numInteractions = int(entry['numInteractions'])
            except:
                numInteractions = 0

            if 'annotations' in entry and len(entry['annotations']):
                for annotation in entry['annotations']:
                    # assemble annotation values
                    annotationTokens = self.annoParser.parseAnnotation(
                        annotation)
                    annoValues = []
                    for cname in colnames:
                        if cname == 'id':
                            if cname in annotationTokens:
                                # cast and only append id if the annotation is an existing one
                                annoValues.append(UUID(
                                    annotationTokens[cname]))
                                ids.append(UUID(annotationTokens[cname]))
                        elif cname == 'image':
                            annoValues.append(UUID(imageKey))
                        elif cname == 'label' and annotationTokens[
                                cname] is not None:
                            annoValues.append(UUID(annotationTokens[cname]))
                        elif cname == 'timeCreated':
                            try:
                                annoValues.append(
                                    dateutil.parser.parse(
                                        annotationTokens[cname]))
                            except:
                                annoValues.append(datetime.now(tz=pytz.utc))
                        elif cname == 'timeRequired':
                            timeReq = annotationTokens[cname]
                            if timeReq is None: timeReq = 0
                            annoValues.append(timeReq)
                        elif cname == 'username':
                            annoValues.append(username)
                        elif cname in annotationTokens:
                            annoValues.append(annotationTokens[cname])
                        elif cname == 'unsure':
                            if 'unsure' in annotationTokens and annotationTokens[
                                    'unsure'] is not None:
                                annoValues.append(annotationTokens[cname])
                            else:
                                annoValues.append(False)
                        elif cname == 'meta':
                            annoValues.append(meta)
                        else:
                            annoValues.append(None)
                    if 'id' in annotationTokens:
                        # existing annotation; update
                        values_update.append(tuple(annoValues))
                    else:
                        # new annotation
                        values_insert.append(tuple(annoValues))

            viewcountValues.append(
                (username, imageKey, 1, lastChecked, lastChecked,
                 lastTimeRequired, lastTimeRequired, numInteractions, meta))

        # delete all annotations that are not in submitted batch
        imageKeys = list(UUID(k) for k in submissions['entries'])
        if len(imageKeys):
            if len(ids):
                queryStr = sql.SQL('''
                    DELETE FROM {id_anno} WHERE username = %s AND id IN (
                        SELECT idQuery.id FROM (
                            SELECT * FROM {id_anno} WHERE id NOT IN %s
                        ) AS idQuery
                        JOIN (
                            SELECT * FROM {id_anno} WHERE image IN %s
                        ) AS imageQuery ON idQuery.id = imageQuery.id);
                ''').format(id_anno=sql.Identifier(project, 'annotation'))
                self.dbConnector.execute(queryStr, (
                    username,
                    tuple(ids),
                    tuple(imageKeys),
                ))
            else:
                # no annotations submitted; delete all annotations submitted before
                queryStr = sql.SQL('''
                    DELETE FROM {id_anno} WHERE username = %s AND image IN %s;
                ''').format(id_anno=sql.Identifier(project, 'annotation'))
                self.dbConnector.execute(queryStr, (
                    username,
                    tuple(imageKeys),
                ))

        # insert new annotations
        if len(values_insert):
            queryStr = sql.SQL('''
                INSERT INTO {id_anno} ({cols})
                VALUES %s ;
            ''').format(
                id_anno=sql.Identifier(project, 'annotation'),
                cols=sql.SQL(', ').join([sql.SQL(c) for c in colnames[1:]
                                         ])  # skip 'id' column
            )
            self.dbConnector.insert(queryStr, values_insert)

        # update existing annotations
        if len(values_update):

            updateCols = []
            for col in colnames:
                if col == 'label':
                    updateCols.append(sql.SQL('label = UUID(e.label)'))
                elif col == 'timeRequired':
                    # we sum the required times together
                    updateCols.append(
                        sql.SQL(
                            'timeRequired = COALESCE(a.timeRequired,0) + COALESCE(e.timeRequired,0)'
                        ))
                else:
                    updateCols.append(
                        sql.SQL('{col} = e.{col}').format(col=sql.SQL(col)))

            queryStr = sql.SQL('''
                UPDATE {id_anno} AS a
                SET {updateCols}
                FROM (VALUES %s) AS e({colnames})
                WHERE e.id = a.id
            ''').format(id_anno=sql.Identifier(project, 'annotation'),
                        updateCols=sql.SQL(', ').join(updateCols),
                        colnames=sql.SQL(', ').join(
                            [sql.SQL(c) for c in colnames]))

            self.dbConnector.insert(queryStr, values_update)

        # viewcount table
        queryStr = sql.SQL('''
            INSERT INTO {id_iu} (username, image, viewcount, first_checked, last_checked, last_time_required, total_time_required, num_interactions, meta)
            VALUES %s 
            ON CONFLICT (username, image) DO UPDATE SET viewcount = image_user.viewcount + 1,
                last_checked = EXCLUDED.last_checked,
                last_time_required = EXCLUDED.last_time_required,
                total_time_required = EXCLUDED.total_time_required + image_user.total_time_required,
                num_interactions = EXCLUDED.num_interactions + image_user.num_interactions,
                meta = EXCLUDED.meta;
        ''').format(id_iu=sql.Identifier(project, 'image_user'))
        self.dbConnector.insert(queryStr, viewcountValues)

        return 0

    def setGoldenQuestions(self, project, submissions):
        '''
            Receives an iterable of tuples (uuid, bool) and updates the
            property "isGoldenQuestion" of the images accordingly.
        '''
        projImmutables = self.get_project_immutables(project)
        if projImmutables['demoMode']:
            return 1

        queryStr = sql.SQL('''
            UPDATE {id_img} AS img SET isGoldenQuestion = c.isGoldenQuestion
            FROM (VALUES %s)
            AS c (id, isGoldenQuestion)
            WHERE c.id = img.id;
        ''').format(id_img=sql.Identifier(project, 'image'))
        self.dbConnector.insert(queryStr, submissions)

        return 0
 def __init__(self, config):
     self.config = config
     self.dbConnector = Database(config)
Esempio n. 23
0
class ModelMarketplaceMiddleware:
    def __init__(self, config):
        self.config = config
        self.dbConnector = Database(config)
        self.labelUImiddleware = DBMiddleware(config)
        self._init_available_ai_models()

    def _init_available_ai_models(self):
        '''
            Checks the locally installed model implementations
            and retains a list of those that support sharing
            (i.e., the addition of new label classes).
        '''
        self.availableModels = set()
        for key in PREDICTION_MODELS:
            if 'canAddLabelclasses' in PREDICTION_MODELS[key] and \
                PREDICTION_MODELS[key]['canAddLabelclasses'] is True:
                self.availableModels.add(key)

    def getModelsMarketplace(self, project, username):
        '''
            Returns a dict of model state meta data,
            filtered by the project settings (model library;
            annotation type, prediction type).
        '''

        # get project meta data (immutables, model library)
        projectMeta = self.dbConnector.execute(
            '''
                SELECT annotationType, predictionType, ai_model_library
                FROM aide_admin.project
                WHERE shortname = %s;
            ''', (project, ), 1)
        if projectMeta is None or not len(projectMeta):
            raise Exception(
                f'Project {project} could not be found in database.')
        projectMeta = projectMeta[0]
        annotationType = projectMeta['annotationtype']
        predictionType = projectMeta['predictiontype']

        # get matching model states
        result = self.dbConnector.execute(
            '''
                SELECT id, name, description, labelclasses, model_library,
                    annotationType, predictionType, EXTRACT(epoch FROM timeCreated) AS time_created, alcriterion_library,
                    public, anonymous, selectCount,
                    is_owner, shared, tags,
                    CASE WHEN NOT is_owner AND anonymous THEN NULL ELSE author END AS author,
                    CASE WHEN NOT is_owner AND anonymous THEN NULL ELSE origin_project END AS origin_project,
                    CASE WHEN NOT is_owner AND anonymous THEN NULL ELSE origin_uuid END AS origin_uuid
                FROM (
                    SELECT *,
                    CASE WHEN author = %s AND origin_project = %s THEN TRUE ELSE FALSE END AS is_owner
                    FROM aide_admin.modelMarketplace
                    WHERE annotationType = %s AND
                    predictionType = %s
                    AND (
                        (public = TRUE AND shared = TRUE) OR
                        origin_project = %s
                    )
                ) AS mm
                LEFT OUTER JOIN (
                    SELECT name AS projectName, shortname
                    FROM aide_admin.project
                ) AS pn
                ON mm.origin_project = pn.shortname;
            ''', (username, project, annotationType, predictionType, project),
            'all')
        if result is not None and len(result):
            matchingStates = {}
            for r in result:
                stateID = str(r['id'])
                values = {}
                for key in r.keys():
                    if isinstance(r[key], UUID):
                        values[key] = str(r[key])
                    else:
                        values[key] = r[key]
                matchingStates[stateID] = values
        else:
            matchingStates = {}

        return matchingStates

    def importModel(self, project, username, modelID):

        if not isinstance(modelID, UUID):
            modelID = UUID(modelID)

        # get model meta data
        meta = self.dbConnector.execute(
            '''
            SELECT id, name, description, labelclasses, model_library,
            annotationType, predictionType, timeCreated, alcriterion_library,
            public, anonymous, selectCount,
            CASE WHEN anonymous THEN NULL ELSE author END AS author,
            CASE WHEN anonymous THEN NULL ELSE origin_project END AS origin_project,
            CASE WHEN anonymous THEN NULL ELSE origin_uuid END AS origin_uuid
            FROM aide_admin.modelMarketplace
            WHERE id = %s;
        ''', (modelID, ), 1)

        if meta is None or not len(meta):
            return {
                'status':
                2,
                'message':
                f'Model state with id "{str(modelID)}" could not be found in the model marketplace.'
            }
        meta = meta[0]

        # check if model type is registered with AIDE
        modelLib = meta['model_library']
        if modelLib not in self.availableModels:
            return {
                'status':
                3,
                'message':
                f'Model type with identifier "{modelLib}" does not support sharing across project, or else is not registered with this installation of AIDE.'
            }

        # check if model hasn't already been imported to current project
        modelExists = self.dbConnector.execute(
            sql.SQL('''
            SELECT id
            FROM {id_cnnstate}
            WHERE marketplace_origin_id = %s;
        ''').format(id_cnnstate=sql.Identifier(project, 'cnnstate')),
            (modelID), 1)
        if modelExists is not None and len(modelExists):
            # model already exists in project; update timestamp to make it current (TODO: find a better solution)
            self.dbConnector.execute(
                sql.SQL('''
                UPDATE {id_cnnstate}
                SET timeCreated = NOW()
                WHERE marketplace_origin_id = %s;
            ''').format(id_cnnstate=sql.Identifier(project, 'cnnstate')),
                (modelID))
            return {
                'status':
                0,
                'message':
                'Model had already been imported to project and was elevated to be the most current.'
            }

        # check if model is suitable for current project
        immutables = self.labelUImiddleware.get_project_immutables(project)
        errors = ''
        if immutables['annotationType'] != meta['annotationtype']:
            meta_at = meta['annotationtype']
            proj_at = immutables['annotationType']
            errors = f'Annotation type of model ({meta_at}) is not compatible with project\'s annotation type ({proj_at}).'
        if immutables['predictionType'] != meta['predictiontype']:
            meta_pt = meta['predictiontype']
            proj_pt = immutables['predictionType']
            errors += f'\nPrediction type of model ({meta_pt}) is not compatible with project\'s prediction type ({proj_pt}).'
        if len(errors):
            return {'status': 4, 'message': errors}

        # set project's selected model
        self.dbConnector.execute(
            '''
            UPDATE aide_admin.project
            SET ai_model_library = %s
            WHERE shortname = %s;
        ''', (meta['model_library'], project))

        # finally, increase selection counter and import model state
        #TODO: - retain existing alCriterion library
        insertedModelID = self.dbConnector.execute(
            sql.SQL('''
            UPDATE aide_admin.modelMarketplace
            SET selectCount = selectCount + 1
            WHERE id = %s;
            INSERT INTO {id_cnnstate} (marketplace_origin_id, stateDict, timeCreated, partial, model_library, alCriterion_library)
            SELECT id, stateDict, timeCreated, FALSE, model_library, alCriterion_library
            FROM aide_admin.modelMarketplace
            WHERE id = %s
            RETURNING id;
        ''').format(id_cnnstate=sql.Identifier(project, 'cnnstate')),
            (modelID, modelID), 1)
        return {'status': 0, 'id': str(insertedModelID)}

    def shareModel(self, project, username, modelID, modelName,
                   modelDescription, tags, public, anonymous):
        #TODO: export as Celery task

        if not isinstance(modelID, UUID):
            modelID = UUID(modelID)

        if tags is None:
            tags = ''
        elif isinstance(tags, list) or isinstance(tags, tuple):
            tags = ';;'.join(tags)

        # check if model class supports sharing
        isShareable = self.dbConnector.execute(
            '''
            SELECT ai_model_library
            FROM aide_admin.project
            WHERE shortname = %s;
        ''', (project, ), 1)
        if isShareable is None or not len(isShareable):
            return {
                'status': 1,
                'message': f'Project {project} could not be found in database.'
            }
        modelLib = isShareable[0]['ai_model_library']
        if modelLib not in self.availableModels:
            return {
                'status':
                2,
                'message':
                f'The model with id "{modelLib}" does not support sharing, or else is not installed in this instance of AIDE.'
            }

        # check if model hasn't already been shared
        isShared = self.dbConnector.execute(
            '''
            SELECT id, author, shared
            FROM aide_admin.modelMarketplace
            WHERE origin_project = %s AND origin_uuid = %s;
        ''', (project, modelID), 'all')
        if isShared is not None and len(isShared):
            if not isShared[0]['shared']:
                # model had been shared in the past but then hidden; unhide
                self.dbConnector.execute(
                    '''
                    UPDATE aide_admin.modelMarketplace
                    SET shared = True
                    WHERE origin_project = %s AND origin_uuid = %s;
                ''', (project, modelID))

            # update shared model meta data
            self.dbConnector.execute(
                '''
                UPDATE aide_admin.modelMarketplace
                SET name = %s,
                description = %s,
                public = %s,
                anonymous = %s,
                tags = %s
                WHERE id = %s AND author = %s;
            ''', (modelName, modelDescription, public, anonymous, tags,
                  isShared[0]['id'], username), None)
            return {'status': 0}

        # check if model hasn't been imported from the marketplace
        isImported = self.dbConnector.execute(
            sql.SQL('''
            SELECT marketplace_origin_id
            FROM {id_cnnstate}
            WHERE id = %s;
        ''').format(id_cnnstate=sql.Identifier(project, 'cnnstate')),
            (modelID, ), 1)
        if isImported is not None and len(isImported):
            marketplaceID = isImported[0]['marketplace_origin_id']
            if marketplaceID is not None:
                return {
                    'status':
                    4,
                    'message':
                    f'The selected model is already shared through the marketplace (id "{str(marketplaceID)}").'
                }

        # get project immutables
        immutables = self.labelUImiddleware.get_project_immutables(project)

        # get label class info
        labelclasses = json.dumps(
            self.labelUImiddleware.getClassDefinitions(project, False))

        # check if name is unique (TODO: required?)
        nameTaken = self.dbConnector.execute(
            '''
            SELECT COUNT(*) AS cnt
            FROM aide_admin.modelMarketplace
            WHERE name = %s;
        ''', (modelName, ), 'all')
        if nameTaken is not None and len(nameTaken) and nameTaken[0]['cnt']:
            return {
                'status':
                5,
                'message':
                f'A model state with name "{modelName}" already exists in the Model Marketplace.'
            }

        # share model state
        sharedModelID = self.dbConnector.execute(
            sql.SQL('''
            INSERT INTO aide_admin.modelMarketplace
            (name, description, tags, labelclasses, author, statedict,
            model_library, alCriterion_library,
            annotationType, predictionType,
            origin_project, origin_uuid, public, anonymous)

            SELECT %s, %s, %s, %s, %s, statedict,
            model_library, alCriterion_library,
            %s, %s,
            %s, id, %s, %s
            FROM {id_cnnstate} AS cnnS
            WHERE id = %s
            RETURNING id;
        ''').format(id_cnnstate=sql.Identifier(project, 'cnnstate')),
            (modelName, modelDescription, tags, labelclasses, username,
             immutables['annotationType'], immutables['predictionType'],
             project, public, anonymous, modelID))

        return {'status': 0, 'shared_model_id': str(sharedModelID)}

    def reshareModel(self, project, username, modelID):
        '''
            Unlike "shareModel", this checks for a model that had already been
            shared in the past, but then hidden from the marketplace, and simply
            turns the "shared" attribute back on, if the model could be found.
        '''
        # get origin UUID of model and delegate to "shareModel" function
        modelID = self.dbConnector.execute(
            '''
            SELECT origin_uuid FROM aide_admin.modelMarketplace
            WHERE origin_project = %s
            AND author = %s
            AND id = %s;
        ''', (project, username, UUID(modelID)), 1)
        if modelID is None or not len(modelID):
            return {
                'status':
                2,
                'message':
                f'Model with ID "{str(modelID)}" could not be found on the Model Marketplace.'
            }

        modelID = modelID[0]['origin_uuid']
        return self.shareModel(project, username, modelID, None, None, None,
                               None, None)

    def unshareModel(self, project, username, modelID):
        '''
            Sets the "shared" flag to False for a given model state,
            if the provided username is the owner of it.
        '''
        if not isinstance(modelID, UUID):
            modelID = UUID(modelID)

        # check if user is authorized
        isAuthorized = self.dbConnector.execute(
            '''
            SELECT shared FROM aide_admin.modelMarketplace
            WHERE author = %s AND id = %s;
        ''', (username, modelID), 1)

        if isAuthorized is None or not len(isAuthorized):
            # model does not exist or else user has no modification rights
            return {
                'status':
                2,
                'message':
                f'Model with id "{str(modelID)}"" does not exist or else user {username} does not have modification rights to it.'
            }

        # unshare
        self.dbConnector.execute(
            '''
            UPDATE aide_admin.modelMarketplace
            SET shared = FALSE
            WHERE author = %s AND id = %s;
        ''', (username, modelID))

        return {'status': 0}
Esempio n. 24
0
class ProjectStatisticsMiddleware:
    def __init__(self, config):
        self.config = config
        self.dbConnector = Database(config)

    def getProjectStatistics(self, project):
        '''
            Returns statistics, such as number of images (seen),
            number of annotations, etc., on a global and per-user,
            but class-agnostic basis.
        '''
        queryStr = sql.SQL('''
            SELECT NULL AS username, COUNT(*) AS num_img, NULL::bigint AS num_anno FROM {id_img}
            UNION ALL
            SELECT NULL AS username, COUNT(DISTINCT(image)) AS num_img, NULL AS num_anno FROM {id_iu}
            UNION ALL
            SELECT NULL AS username, COUNT(DISTINCT(gq.id)) AS num_img, NULL AS num_anno FROM (
                SELECT id FROM {id_img} WHERE isGoldenQuestion = TRUE
            ) AS gq
            UNION ALL
            SELECT NULL AS username, NULL AS num_img, COUNT(DISTINCT(image)) AS num_anno FROM {id_anno}
            UNION ALL
            SELECT NULL AS username, NULL AS num_img, COUNT(*) AS num_anno FROM {id_anno}
            UNION ALL
            SELECT username, iu_cnt AS num_img, anno_cnt AS num_anno FROM (
            SELECT u.username, iu_cnt, anno_cnt
            FROM (
                SELECT DISTINCT(username) FROM (
                    SELECT username FROM {id_auth}
                    WHERE project = %s
                    UNION ALL
                    SELECT username FROM {id_iu}
                    UNION ALL
                    SELECT username FROM {id_anno}
                ) AS uQuery
            ) AS u
            LEFT OUTER JOIN (
                SELECT username, COUNT(*) AS iu_cnt
                FROM {id_iu}
                GROUP BY username
            ) AS iu
            ON u.username = iu.username
            LEFT OUTER JOIN (
                SELECT username, COUNT(*) AS anno_cnt
                FROM {id_anno}
                GROUP BY username
            ) AS anno
            ON u.username = anno.username
            ORDER BY u.username
        ) AS q;
        ''').format(id_img=sql.Identifier(project, 'image'),
                    id_iu=sql.Identifier(project, 'image_user'),
                    id_anno=sql.Identifier(project, 'annotation'),
                    id_auth=sql.Identifier('aide_admin', 'authentication'))
        result = self.dbConnector.execute(queryStr, (project, ), 'all')

        response = {
            'num_images': result[0]['num_img'],
            'num_viewed': result[1]['num_img'],
            'num_goldenQuestions': result[2]['num_img'],
            'num_annotated': result[3]['num_anno'],
            'num_annotations': result[4]['num_anno']
        }
        if len(result) > 5:
            response['user_stats'] = {}
            for i in range(5, len(result)):
                uStats = {
                    'num_viewed': result[i]['num_img'],
                    'num_annotations': result[i]['num_anno']
                }
                response['user_stats'][result[i]['username']] = uStats
        return response

    def getLabelclassStatistics(self, project):
        '''
            Returns annotation statistics on a per-label class
            basis.
            TODO: does not work for segmentationMasks (no label fields)
        '''
        queryStr = sql.SQL('''
            SELECT lc.name, COALESCE(num_anno, 0) AS num_anno, COALESCE(num_pred, 0) AS num_pred
            FROM {id_lc} AS lc
            FULL OUTER JOIN (
                SELECT label, COUNT(*) AS num_anno
                FROM {id_anno} AS anno
                GROUP BY label
            ) AS annoCnt
            ON lc.id = annoCnt.label
            FULL OUTER JOIN (
                SELECT label, COUNT(*) AS num_pred
                FROM {id_pred} AS pred
                GROUP BY label
            ) AS predCnt
            ON lc.id = predCnt.label
        ''').format(id_lc=sql.Identifier(project, 'labelclass'),
                    id_anno=sql.Identifier(project, 'annotation'),
                    id_pred=sql.Identifier(project, 'prediction'))
        result = self.dbConnector.execute(queryStr, None, 'all')

        response = {}
        if result is not None and len(result):
            for i in range(len(result)):
                nextResult = result[i]
                response[nextResult['name']] = {
                    'num_anno': nextResult['num_anno'],
                    'num_pred': nextResult['num_pred']
                }
        return response

    @staticmethod
    def _calc_geometric_stats(tp, fp, fn):
        tp, fp, fn = float(tp), float(fp), float(fn)
        try:
            precision = tp / (tp + fp)
        except:
            precision = 0.0
        try:
            recall = tp / (tp + fn)
        except:
            recall = 0.0
        try:
            f1 = 2 * precision * recall / (precision + recall)
        except:
            f1 = 0.0
        return precision, recall, f1

    def getPerformanceStatistics(self,
                                 project,
                                 entities_eval,
                                 entity_target,
                                 entityType='user',
                                 threshold=0.5,
                                 goldenQuestionsOnly=True):
        '''
            Compares the accuracy of a list of users or model states with a target
            user.
            The following measures of accuracy are reported, depending on the
            annotation type:
            - image labels: overall accuracy
            - points:
                    RMSE (distance to closest point with the same label; in pixels)
                    overall accuracy (labels)
            - bounding boxes:
                    IoU (max. with any target bounding box, regardless of label)
                    overall accuracy (labels)
            - segmentation masks:
                    TODO

            Value 'threshold' determines the geometric requirement for an annotation to be
            counted as correct (or incorrect) as follows:
                - points: maximum euclidean distance in pixels to closest target
                - bounding boxes: minimum IoU with best matching target

            If 'goldenQuestionsOnly' is True, only images with flag 'isGoldenQuestion' = True
            will be considered for evaluation.
        '''
        entityType = entityType.lower()

        # get annotation type for project
        annoType = self.dbConnector.execute(
            '''SELECT annotationType
            FROM aide_admin.project WHERE shortname = %s;''', (project, ), 1)
        annoType = annoType[0]['annotationtype']

        # for segmentation masks: get label classes and their ordinals      #TODO: implement per-class statistics for all types
        labelClasses = {}
        lcDef = self.dbConnector.execute(
            sql.SQL('''
            SELECT id, idx, color FROM {id_lc};
        ''').format(id_lc=sql.Identifier(project, 'labelclass')), None, 'all')
        if lcDef is not None:
            for l in lcDef:
                labelClasses[str(l['id'])] = (l['idx'], l['color'])

        else:
            # no label classes defined
            return {}

        # compose args list and complete query
        queryArgs = [entity_target, tuple(entities_eval)]
        if annoType == 'points' or annoType == 'boundingBoxes':
            queryArgs.append(threshold)
            if annoType == 'points':
                queryArgs.append(threshold)

        if goldenQuestionsOnly:
            sql_goldenQuestion = sql.SQL('''JOIN (
                    SELECT id
                    FROM {id_img}
                    WHERE isGoldenQuestion = true
                ) AS qi
                ON qi.id = q2.image''').format(
                id_img=sql.Identifier(project, 'image'))
        else:
            sql_goldenQuestion = sql.SQL('')

        # result tokens
        tokens = {}
        tokens_normalize = []
        if annoType == 'labels':
            tokens = {
                'num_matches': 0,
                'correct': 0,
                'incorrect': 0,
                'overall_accuracy': 0.0
            }
            tokens_normalize = ['overall_accuracy']
        elif annoType == 'points':
            tokens = {
                'num_pred': 0,
                'num_target': 0,
                'tp': 0,
                'fp': 0,
                'fn': 0,
                'avg_dist': 0.0
            }
            tokens_normalize = ['avg_dist']
        elif annoType == 'boundingBoxes':
            tokens = {
                'num_pred': 0,
                'num_target': 0,
                'tp': 0,
                'fp': 0,
                'fn': 0,
                'avg_iou': 0.0
            }
            tokens_normalize = ['avg_iou']
        elif annoType == 'segmentationMasks':
            tokens = {
                'num_matches': 0,
                'overall_accuracy': 0.0,
                'per_class': {}
            }
            for clID in labelClasses.keys():
                tokens['per_class'][clID] = {
                    'num_matches': 0,
                    'prec': 0.0,
                    'rec': 0.0,
                    'f1': 0.0
                }
            tokens_normalize = []

        if entityType == 'user':
            queryStr = getattr(StatisticalFormulas_user, annoType).value
            queryStr = sql.SQL(queryStr).format(
                id_anno=sql.Identifier(project, 'annotation'),
                id_iu=sql.Identifier(project, 'image_user'),
                sql_goldenQuestion=sql_goldenQuestion)

        else:
            queryStr = getattr(StatisticalFormulas_model, annoType).value
            queryStr = sql.SQL(queryStr).format(
                id_anno=sql.Identifier(project, 'annotation'),
                id_iu=sql.Identifier(project, 'image_user'),
                id_pred=sql.Identifier(project, 'prediction'),
                sql_goldenQuestion=sql_goldenQuestion)

        #TODO: update points query (according to bboxes); re-write stats parsing below

        # get stats
        response = {}
        with self.dbConnector.execute_cursor(queryStr,
                                             tuple(queryArgs)) as cursor:
            while True:
                b = cursor.fetchone()
                if b is None:
                    break

                if entityType == 'user':
                    entity = b['username']
                else:
                    entity = str(b['cnnstate'])

                if not entity in response:
                    response[entity] = tokens.copy()
                if annoType in ('points', 'boundingBoxes'):
                    response[entity]['num_matches'] = 1
                    if b['num_target'] > 0:
                        response[entity]['num_matches'] += 1

                if annoType == 'segmentationMasks':
                    # decode segmentation masks
                    try:
                        mask_target = np.array(
                            base64ToImage(b['q1segmask'], b['q1width'],
                                          b['q1height']))
                        mask_source = np.array(
                            base64ToImage(b['q2segmask'], b['q2width'],
                                          b['q2height']))

                        if mask_target.shape == mask_source.shape and np.any(
                                mask_target) and np.any(mask_source):

                            # calculate OA
                            intersection = (mask_target > 0) * (mask_source >
                                                                0)
                            if np.any(intersection):
                                oa = np.mean(mask_target[intersection] ==
                                             mask_source[intersection])
                                response[entity]['overall_accuracy'] += oa
                                response[entity]['num_matches'] += 1

                            # calculate per-class precision and recall values
                            for clID in labelClasses.keys():
                                idx = labelClasses[clID][0]
                                tp = np.sum((mask_target == idx) *
                                            (mask_source == idx))
                                fp = np.sum((mask_target != idx) *
                                            (mask_source == idx))
                                fn = np.sum((mask_target == idx) *
                                            (mask_source != idx))
                                if (tp + fp + fn) > 0:
                                    prec, rec, f1 = self._calc_geometric_stats(
                                        tp, fp, fn)
                                    response[entity]['per_class'][clID][
                                        'num_matches'] += 1
                                    response[entity]['per_class'][clID][
                                        'prec'] += prec
                                    response[entity]['per_class'][clID][
                                        'rec'] += rec
                                    response[entity]['per_class'][clID][
                                        'f1'] += f1

                    except Exception as e:
                        print(
                            f'TODO: error in segmentation mask statistics calculation ("{str(e)}").'
                        )

                else:
                    for key in tokens.keys():
                        if key == 'correct' or key == 'incorrect':
                            # classification
                            correct = b['label_correct']
                            # ignore None
                            if correct is True:
                                response[entity]['correct'] += 1
                                response[entity]['num_matches'] += 1
                            elif correct is False:
                                response[entity]['incorrect'] += 1
                                response[entity]['num_matches'] += 1
                        elif key in b and b[key] is not None:
                            response[entity][key] += b[key]

        for entity in response.keys():
            for t in tokens_normalize:
                if t in response[entity]:
                    if t == 'overall_accuracy':
                        response[entity][t] = float(response[entity]['correct']) / \
                            float(response[entity]['correct'] + response[entity]['incorrect'])
                    elif annoType in ('points', 'boundingBoxes'):
                        response[entity][t] /= response[entity]['num_matches']

            if annoType == 'points' or annoType == 'boundingBoxes':
                prec, rec, f1 = self._calc_geometric_stats(
                    response[entity]['tp'], response[entity]['fp'],
                    response[entity]['fn'])
                response[entity]['prec'] = prec
                response[entity]['rec'] = rec
                response[entity]['f1'] = f1

            elif annoType == 'segmentationMasks':
                # normalize OA
                response[entity]['overall_accuracy'] /= response[entity][
                    'num_matches']

                # normalize all label class values as well
                for lcID in labelClasses.keys():
                    numMatches = response[entity]['per_class'][lcID][
                        'num_matches']
                    if numMatches > 0:
                        response[entity]['per_class'][lcID][
                            'prec'] /= numMatches
                        response[entity]['per_class'][lcID][
                            'rec'] /= numMatches
                        response[entity]['per_class'][lcID]['f1'] /= numMatches

        return {'label_classes': labelClasses, 'per_entity': response}

    def getUserAnnotationSpeeds(self,
                                project,
                                users,
                                goldenQuestionsOnly=False):
        '''
            Returns, for each username in "users" list,
            the mean, median and lower and upper quartile
            (25% and 75%) of the time required in a given project.
        '''
        # prepare output
        response = {}
        for u in users:
            response[u] = {
                'avg': float('nan'),
                'median': float('nan'),
                'perc_25': float('nan'),
                'perc_75': float('nan')
            }

        if goldenQuestionsOnly:
            gqStr = sql.SQL('''
                JOIN {id_img} AS img
                ON anno.image = img.id
                WHERE img.isGoldenQuestion = true
            ''').format(id_img=sql.Identifier(project, 'image'))
        else:
            gqStr = sql.SQL('')

        queryStr = sql.SQL('''
            SELECT username, avg(timeRequired) AS avg,
            percentile_cont(0.50) WITHIN GROUP (ORDER BY timeRequired ASC) AS median,
            percentile_cont(0.25) WITHIN GROUP (ORDER BY timeRequired ASC) AS perc_25,
            percentile_cont(0.75) WITHIN GROUP (ORDER BY timeRequired ASC) AS perc_75
            FROM (
                SELECT username, timeRequired
                FROM {id_anno} AS anno
                {gqStr}
            ) AS q
            WHERE username IN %s
            GROUP BY username
        ''').format(id_anno=sql.Identifier(project, 'annotation'), gqStr=gqStr)
        result = self.dbConnector.execute(queryStr, (tuple(users), ), 'all')
        if result is not None:
            for r in result:
                user = r['username']
                response[user] = {
                    'avg': float(r['avg']),
                    'median': float(r['median']),
                    'perc_25': float(r['perc_25']),
                    'perc_75': float(r['perc_75']),
                }
        return response

    def getUserFinished(self, project, username):
        '''
            Returns True if the user has viewed all images in the project,
            and False otherwise.
            We deliberately do not reveal more information to the general
            user, in order to e.g. sustain the golden question limitation
            system.
        '''
        queryStr = sql.SQL('''
            SELECT COUNT(*) AS cnt FROM {id_iu}
            WHERE viewcount > 0 AND username = %s
            UNION ALL
            SELECT COUNT(*) AS cnt FROM {id_img};
        ''').format(id_img=sql.Identifier(project, 'image'),
                    id_iu=sql.Identifier(project, 'image_user'))
        result = self.dbConnector.execute(queryStr, (username, ), 2)
        return result[0]['cnt'] >= result[1]['cnt']

    def getTimeActivity(self,
                        project,
                        type='images',
                        numDaysMax=31,
                        perUser=False):
        '''
            Returns a histogram of the number of images viewed (if type = 'images')
            or annotations made (if type = 'annotations') over the last numDaysMax.
            If perUser is True, statistics are returned on a user basis.
        '''
        if type == 'images':
            id_table = sql.Identifier(project, 'image_user')
            time_field = sql.SQL('last_checked')
        else:
            id_table = sql.Identifier(project, 'annotation')
            time_field = sql.SQL('timeCreated')

        if perUser:
            userSpec = sql.SQL(', username')
        else:
            userSpec = sql.SQL('')
        queryStr = sql.SQL('''
            SELECT to_char({time_field}, 'YYYY-Mon-dd') AS month_day, MIN({time_field}) AS date_of_day, COUNT(*) AS cnt {user_spec}
            FROM {id_table}
            WHERE {time_field} IS NOT NULL
            GROUP BY month_day {user_spec}
            ORDER BY date_of_day ASC
            LIMIT %s
        ''').format(time_field=time_field,
                    id_table=id_table,
                    user_spec=userSpec)
        result = self.dbConnector.execute(queryStr, (numDaysMax, ), 'all')

        #TODO: homogenize series and add missing days

        if perUser:
            response = {}
        else:
            response = {'counts': [], 'timestamps': [], 'labels': []}

        for row in result:
            if perUser:
                if row['username'] not in response:
                    response[row['username']] = {
                        'counts': [],
                        'timestamps': [],
                        'labels': []
                    }
                response[row['username']]['counts'].append(row['cnt'])
                response[row['username']]['timestamps'].append(
                    row['date_of_day'].timestamp())
                response[row['username']]['labels'].append(row['month_day'])
            else:
                response['counts'].append(row['cnt'])
                response['timestamps'].append(row['date_of_day'].timestamp())
                response['labels'].append(row['month_day'])
        return response
class AIWorker():
    def __init__(self, config, passiveMode=False):
        self.config = config
        self.dbConnector = Database(config)
        self.passiveMode = passiveMode
        self._init_fileserver()

    def _init_fileserver(self):
        '''
            The AIWorker has a special routine to detect whether the instance it is running on
            also hosts the file server. If it does, data are loaded directly from disk to avoid
            going through the loopback network.
        '''
        self.fileServer = fileserver.FileServer(self.config)

    def _init_model_instance(self, project, modelLibrary, modelSettings):

        # try to parse model settings
        if modelSettings is not None and len(modelSettings):
            try:
                modelSettings = json.loads(modelSettings)
            except Exception as err:
                print(
                    'WARNING: could not read model options. Error message: {message}.'
                    .format(message=str(err)))
                modelSettings = None
        else:
            modelSettings = None

        # import class object
        modelClass = get_class_executable(modelLibrary)

        # verify functions and arguments
        requiredFunctions = {
            '__init__':
            ['project', 'config', 'dbConnector', 'fileServer', 'options'],
            'train': ['stateDict', 'data', 'updateStateFun'],
            'average_model_states': ['stateDicts', 'updateStateFun'],
            'inference': ['stateDict', 'data', 'updateStateFun']
        }
        functionNames = [
            func for func in dir(modelClass)
            if callable(getattr(modelClass, func))
        ]

        for key in requiredFunctions:
            if not key in functionNames:
                raise Exception(
                    'Class {} is missing required function {}.'.format(
                        modelLibrary, key))

            # check function arguments bidirectionally
            funArgs = inspect.getargspec(getattr(modelClass, key))
            for arg in requiredFunctions[key]:
                if not arg in funArgs.args:
                    raise Exception(
                        'Method {} of class {} is missing required argument {}.'
                        .format(modelLibrary, key, arg))
            for arg in funArgs.args:
                if arg != 'self' and not arg in requiredFunctions[key]:
                    raise Exception(
                        'Unsupported argument {} of method {} in class {}.'.
                        format(arg, key, modelLibrary))

        # create AI model instance
        return modelClass(
            project=project,
            config=self.config,
            dbConnector=self.dbConnector,
            fileServer=self.fileServer.get_secure_instance(project),
            options=modelSettings)

    def _init_alCriterion_instance(self, project, alLibrary, alSettings):
        '''
            Creates the Active Learning (AL) criterion provider instance.
        '''
        # try to parse settings
        if alSettings is not None and len(alSettings):
            try:
                alSettings = json.loads(alSettings)
            except Exception as err:
                print(
                    'WARNING: could not read AL criterion options. Error message: {message}.'
                    .format(message=str(err)))
                alSettings = None
        else:
            alSettings = None

        # import class object
        modelClass = get_class_executable(alLibrary)

        # verify functions and arguments
        requiredFunctions = {
            '__init__':
            ['project', 'config', 'dbConnector', 'fileServer', 'options'],
            'rank': ['data', 'updateStateFun']
        }
        functionNames = [
            func for func in dir(modelClass)
            if callable(getattr(modelClass, func))
        ]

        for key in requiredFunctions:
            if not key in functionNames:
                raise Exception(
                    'Class {} is missing required function {}.'.format(
                        alLibrary, key))

            # check function arguments bidirectionally
            funArgs = inspect.getargspec(getattr(modelClass, key))
            for arg in requiredFunctions[key]:
                if not arg in funArgs.args:
                    raise Exception(
                        'Method {} of class {} is missing required argument {}.'
                        .format(alLibrary, key, arg))
            for arg in funArgs.args:
                if arg != 'self' and not arg in requiredFunctions[key]:
                    raise Exception(
                        'Unsupported argument {} of method {} in class {}.'.
                        format(arg, key, alLibrary))

        # create AI model instance
        return modelClass(
            project=project,
            config=self.config,
            dbConnector=self.dbConnector,
            fileServer=self.fileServer.get_secure_instance(project),
            options=alSettings)

    def _get_model_instance(self, project):
        '''
            Returns the class instance of the model specified in the given
            project.
            TODO: cache models?
        '''
        # get model settings for project
        queryStr = '''
            SELECT ai_model_library, ai_model_settings FROM aide_admin.project
            WHERE shortname = %s;
        '''
        result = self.dbConnector.execute(queryStr, (project, ), 1)
        modelLibrary = result[0]['ai_model_library']
        modelSettings = result[0]['ai_model_settings']

        # create new model instance
        modelInstance = self._init_model_instance(project, modelLibrary,
                                                  modelSettings)

        return modelInstance

    def _get_alCriterion_instance(self, project):
        '''
            Returns the class instance of the Active Learning model
            specified in the project.
            TODO: cache models?
        '''
        # get model settings for project
        queryStr = '''
            SELECT ai_alCriterion_library, ai_alCriterion_settings FROM aide_admin.project
            WHERE shortname = %s;
        '''
        result = self.dbConnector.execute(queryStr, (project, ), 1)
        modelLibrary = result[0]['ai_alcriterion_library']
        modelSettings = result[0]['ai_alcriterion_settings']

        # create new model instance
        modelInstance = self._init_alCriterion_instance(
            project, modelLibrary, modelSettings)

        return modelInstance

    def aide_internal_notify(self, message):
        '''
            Used for AIDE administrative communication between AIController
            and AIWorker(s), e.g. for setting up queues.
        '''
        if self.passiveMode:
            return
        # not required (yet)

    def call_train(self, data, epoch, numEpochs, project, subset):

        # get project-specific model
        modelInstance = self._get_model_instance(project)

        return functional._call_train(project, data, epoch, numEpochs, subset,
                                      getattr(modelInstance, 'train'),
                                      self.dbConnector, self.fileServer)

    def call_average_model_states(self, epoch, numEpochs, project):

        # get project-specific model
        modelInstance = self._get_model_instance(project)

        return functional._call_average_model_states(
            project, epoch, numEpochs,
            getattr(modelInstance, 'average_model_states'), self.dbConnector,
            self.fileServer)

    def call_inference(self, imageIDs, epoch, numEpochs, project):

        # get project-specific model and AL criterion
        modelInstance = self._get_model_instance(project)
        alCriterionInstance = self._get_alCriterion_instance(project)

        return functional._call_inference(
            project, imageIDs, epoch, numEpochs,
            getattr(modelInstance, 'inference'),
            getattr(alCriterionInstance, 'rank'), self.dbConnector,
            self.fileServer,
            self.config.getProperty('AIWorker',
                                    'inference_batch_size_limit',
                                    type=int,
                                    fallback=-1))

    def verify_model_state(self, project, modelLibrary, stateDict,
                           modelOptions):
        '''
            Launches a dummy training-averaging-inference chain
            on a received model state and returns True if the chain
            could be executed without any errors (else False). Does
            not store anything in the database.
            Inputs:
                - project:      str, project shortname (used to retrieve
                                sample data)
                - modelLibrary: str, identifier of the AI model
                - stateDict:    bytes object, AI model state
                - modelOptions: str, model settings to be tested
                                (optional)
            
            Returns a dict with the following entries:
                - valid:    bool, True if the provided state dict and
                            (optionally) model options are valid (i.e.,
                            can be used to perform training, averaging,
                            and inference), or False otherwise.
                - messages: str, text describing the error(s) encounte-
                            red if there are any.
        '''
        #TODO
        raise NotImplementedError('Not yet implemented.')
class DBMiddleware():
    def __init__(self, config):
        self.config = config
        self.dbConnector = Database(config)

        self._fetchProjectSettings()
        self.sqlBuilder = SQLStringBuilder(config)
        self.annoParser = AnnotationParser(config)

    def _fetchProjectSettings(self):
        # AI controller URI
        aiControllerURI = self.config.getProperty('Server', 'aiController_uri')
        if aiControllerURI is None or aiControllerURI.strip() == '':
            # no AI backend configured
            aiControllerURI = None

        # LabelUI drawing styles
        with open(
                self.config.getProperty(
                    'LabelUI',
                    'styles_file',
                    type=str,
                    fallback='modules/LabelUI/static/json/styles.json'),
                'r') as f:
            styles = json.load(f)

        # Image backdrops for index screen
        with open(
                self.config.getProperty(
                    'Project',
                    'backdrops_file',
                    type=str,
                    fallback='modules/LabelUI/static/json/backdrops.json'),
                'r') as f:
            backdrops = json.load(f)

        # Welcome message for UI tutorial
        with open(
                self.config.getProperty(
                    'Project',
                    'welcome_message_file',
                    type=str,
                    fallback=
                    'modules/LabelUI/static/templates/welcome_message.html'),
                'r') as f:
            welcomeMessage = f.readlines()

        self.projectSettings = {
            'projectName':
            self.config.getProperty('Project', 'projectName'),
            'projectDescription':
            self.config.getProperty('Project', 'projectDescription'),
            'indexURI':
            self.config.getProperty('Server',
                                    'index_uri',
                                    type=str,
                                    fallback='/'),
            'dataServerURI':
            self.config.getProperty('Server', 'dataServer_uri'),
            'aiControllerURI':
            aiControllerURI,
            'dataType':
            self.config.getProperty('Project', 'dataType', fallback='images'),
            'classes':
            self.getClassDefinitions(),
            'enableEmptyClass':
            self.config.getProperty('Project',
                                    'enableEmptyClass',
                                    fallback='no'),
            'annotationType':
            self.config.getProperty('Project', 'annotationType'),
            'predictionType':
            self.config.getProperty('Project', 'predictionType'),
            'showPredictions':
            self.config.getProperty('LabelUI',
                                    'showPredictions',
                                    fallback='yes'),
            'showPredictions_minConf':
            self.config.getProperty('LabelUI',
                                    'showPredictions_minConf',
                                    type=float,
                                    fallback=0.5),
            'carryOverPredictions':
            self.config.getProperty('LabelUI',
                                    'carryOverPredictions',
                                    fallback='no'),
            'carryOverRule':
            self.config.getProperty('LabelUI',
                                    'carryOverRule',
                                    fallback='maxConfidence'),
            'carryOverPredictions_minConf':
            self.config.getProperty('LabelUI',
                                    'carryOverPredictions_minConf',
                                    type=float,
                                    fallback=0.75),
            'defaultBoxSize_w':
            self.config.getProperty('LabelUI',
                                    'defaultBoxSize_w',
                                    type=int,
                                    fallback=10),
            'defaultBoxSize_h':
            self.config.getProperty('LabelUI',
                                    'defaultBoxSize_h',
                                    type=int,
                                    fallback=10),
            'minBoxSize_w':
            self.config.getProperty('Project',
                                    'box_minWidth',
                                    type=int,
                                    fallback=1),
            'minBoxSize_h':
            self.config.getProperty('Project',
                                    'box_minHeight',
                                    type=int,
                                    fallback=1),
            'numImagesPerBatch':
            self.config.getProperty('LabelUI',
                                    'numImagesPerBatch',
                                    type=int,
                                    fallback=1),
            'minImageWidth':
            self.config.getProperty('LabelUI',
                                    'minImageWidth',
                                    type=int,
                                    fallback=300),
            'numImageColumns_max':
            self.config.getProperty('LabelUI',
                                    'numImageColumns_max',
                                    type=int,
                                    fallback=1),
            'defaultImage_w':
            self.config.getProperty('LabelUI',
                                    'defaultImage_w',
                                    type=int,
                                    fallback=800),
            'defaultImage_h':
            self.config.getProperty('LabelUI',
                                    'defaultImage_h',
                                    type=int,
                                    fallback=600),
            'styles':
            styles['styles'],
            'backdrops':
            backdrops,
            'welcomeMessage':
            welcomeMessage,
            'demoMode':
            self.config.getProperty('Project',
                                    'demoMode',
                                    type=bool,
                                    fallback=False)
        }

    def _assemble_annotations(self, cursor):
        response = {}
        while True:
            b = cursor.fetchone()
            if b is None:
                break

            imgID = str(b['image'])
            if not imgID in response:
                response[imgID] = {
                    'fileName': b['filename'],
                    'predictions': {},
                    'annotations': {},
                    'last_checked': None
                }
            viewcount = b['viewcount']
            if viewcount is not None:
                response[imgID]['viewcount'] = viewcount
            last_checked = b['last_checked']
            if last_checked is not None:
                if response[imgID]['last_checked'] is None:
                    response[imgID]['last_checked'] = last_checked
                else:
                    response[imgID]['last_checked'] = max(
                        response[imgID]['last_checked'], last_checked)

            # parse annotations and predictions
            entryID = str(b['id'])
            if b['ctype'] is not None:
                colnames = self.sqlBuilder.getColnames(b['ctype'])
                entry = {}
                for c in colnames:
                    value = b[c]
                    if isinstance(value, datetime):
                        value = value.timestamp()
                    elif isinstance(value, UUID):
                        value = str(value)
                    entry[c] = value

                if b['ctype'] == 'annotation':
                    response[imgID]['annotations'][entryID] = entry
                elif b['ctype'] == 'prediction':
                    response[imgID]['predictions'][entryID] = entry

        return response

    def getProjectSettings(self):
        '''
            Queries the database for general project-specific metadata, such as:
            - Classes: names, indices, default colors
            - Annotation type: one of {class labels, positions, bboxes}
        '''
        return self.projectSettings

    def getProjectInfo(self):
        '''
            Returns safe, shareable information about the project.
        '''
        return {
            'projectName':
            self.projectSettings['projectName'],
            'projectDescription':
            self.projectSettings['projectDescription'],
            'demoMode':
            self.config.getProperty('Project',
                                    'demoMode',
                                    type=bool,
                                    fallback=False),
            'backdrops':
            self.projectSettings['backdrops']['images']
        }

    def getClassDefinitions(self):
        '''
            Returns a dictionary with entries for all classes in the project.
        '''
        classdef = {
            'entries': {
                'default': {}  # default group for ungrouped label classes
            }
        }
        schema = self.config.getProperty('Database', 'schema')

        # query data
        sql = '''
            SELECT 'group' AS type, id, NULL as idx, name, color, parent, NULL AS keystroke FROM {schema}.labelclassgroup
            UNION ALL
            SELECT 'class' AS type, id, idx, name, color, labelclassgroup, keystroke FROM {schema}.labelclass;
        '''.format(schema=schema)
        classData = self.dbConnector.execute(sql, None, 'all')

        # assemble entries first
        allEntries = {}
        numClasses = 0
        for cl in classData:
            id = str(cl['id'])
            entry = {
                'id': id,
                'name': cl['name'],
                'color': cl['color'],
                'parent':
                str(cl['parent']) if cl['parent'] is not None else None,
            }
            if cl['type'] == 'group':
                entry['entries'] = {}
            else:
                entry['index'] = cl['idx']
                entry['keystroke'] = cl['keystroke']
                numClasses += 1
            allEntries[id] = entry

        # transform into tree
        def _find_parent(tree, parentID):
            if parentID is None:
                return tree['entries']['default']
            elif 'id' in tree and tree['id'] == parentID:
                return tree
            elif 'entries' in tree:
                for ek in tree['entries'].keys():
                    rv = _find_parent(tree['entries'][ek], parentID)
                    if rv is not None:
                        return rv
                return None
            else:
                return None

        allEntries['default'] = {'name': '(other)', 'entries': {}}
        allEntries = {'entries': allEntries}
        for key in list(allEntries['entries'].keys()):
            if key == 'default':
                continue
            if key in allEntries['entries']:
                entry = allEntries['entries'][key]
                parentID = entry['parent']
                del entry['parent']

                if 'entries' in entry and parentID is None:
                    # group, but no parent: append to root directly
                    allEntries['entries'][key] = entry

                else:
                    # move item
                    parent = _find_parent(allEntries, parentID)
                    parent['entries'][key] = entry
                    del allEntries['entries'][key]

        classdef = allEntries
        classdef['numClasses'] = numClasses
        return classdef

    def getBatch_fixed(self, username, data):
        '''
            Returns entries from the database based on the list of data entry identifiers specified.
        '''
        # query
        sql = self.sqlBuilder.getFixedImagesQueryString(
            self.projectSettings['demoMode'])

        # parse results
        queryVals = (
            tuple(UUID(d) for d in data),
            username,
            username,
        )
        if self.projectSettings['demoMode']:
            queryVals = (tuple(UUID(d) for d in data), )

        with self.dbConnector.execute_cursor(sql, queryVals) as cursor:
            try:
                response = self._assemble_annotations(cursor)
                # self.dbConnector.conn.commit()
            except Exception as e:
                print(e)
                # self.dbConnector.conn.rollback()
            finally:
                pass
                # cursor.close()
        return {'entries': response}

    def getBatch_auto(self,
                      username,
                      order='unlabeled',
                      subset='default',
                      limit=None):
        '''
            TODO: description
        '''
        # query
        sql = self.sqlBuilder.getNextBatchQueryString(
            order, subset, self.projectSettings['demoMode'])

        # limit (TODO: make 128 a hyperparameter)
        if limit is None:
            limit = 128
        else:
            limit = min(int(limit), 128)

        # parse results
        queryVals = (
            username,
            limit,
            username,
        )
        if self.projectSettings['demoMode']:
            queryVals = (limit, )

        with self.dbConnector.execute_cursor(sql, queryVals) as cursor:
            response = self._assemble_annotations(cursor)

        return {'entries': response}

    def getBatch_timeRange(self,
                           minTimestamp,
                           maxTimestamp,
                           userList,
                           skipEmptyImages=False,
                           limit=None):
        '''
            Returns images that have been annotated within the given time range and/or
            by the given user(s). All arguments are optional.
            Useful for reviewing existing annotations.
        '''
        # query string
        sql = self.sqlBuilder.getDateQueryString(minTimestamp, maxTimestamp,
                                                 userList, skipEmptyImages)

        # check validity and provide arguments
        queryVals = []
        if userList is not None:
            queryVals.append(tuple(userList))
        if minTimestamp is not None:
            queryVals.append(minTimestamp)
        if maxTimestamp is not None:
            queryVals.append(maxTimestamp)
        if skipEmptyImages and userList is not None:
            queryVals.append(tuple(userList))

        # limit (TODO: make 128 a hyperparameter)
        if limit is None:
            limit = 128
        else:
            limit = min(int(limit), 128)
        queryVals.append(limit)

        if userList is not None:
            queryVals.append(tuple(userList))

        # query and parse results
        with self.dbConnector.execute_cursor(sql, tuple(queryVals)) as cursor:
            try:
                response = self._assemble_annotations(cursor)
                # self.dbConnector.conn.commit()
            except Exception as e:
                print(e)
                # self.dbConnector.conn.rollback()
            finally:
                pass
                # cursor.close()
        return {'entries': response}

    def get_timeRange(self, userList, skipEmptyImages=False):
        '''
            Returns two timestamps denoting the temporal limits within which
            images have been viewed by the users provided in the userList.
            Arguments:
            - userList: string (single user name) or list of strings (multiple).
                        Can also be None; in this case all annotations will be
                        checked.
            - skipEmptyImages: if True, only images that contain at least one
                               annotation will be considered.
        '''
        # query string
        sql = self.sqlBuilder.getTimeRangeQueryString(userList,
                                                      skipEmptyImages)

        arguments = (None if userList is None else tuple(userList))
        result = self.dbConnector.execute(sql, (arguments, ), numReturn=1)

        if result is not None and len(result):
            return {
                'minTimestamp': result[0]['mintimestamp'],
                'maxTimestamp': result[0]['maxtimestamp'],
            }
        else:
            return {'error': 'no annotations made'}

    def submitAnnotations(self, username, submissions):
        '''
            Sends user-provided annotations to the database.
        '''
        if self.projectSettings['demoMode']:
            return 0

        # assemble values
        colnames = getattr(QueryStrings_annotation,
                           self.projectSettings['annotationType']).value
        values_insert = []
        values_update = []

        meta = (None if not 'meta' in submissions else json.dumps(
            submissions['meta']))

        # for deletion: remove all annotations whose image ID matches but whose annotation ID is not among the submitted ones
        ids = []

        viewcountValues = []
        for imageKey in submissions['entries']:
            entry = submissions['entries'][imageKey]

            try:
                lastChecked = entry['timeCreated']
                lastTimeRequired = entry['timeRequired']
                if lastTimeRequired is None: lastTimeRequired = 0
            except:
                lastChecked = datetime.now(tz=pytz.utc)
                lastTimeRequired = 0

            if 'annotations' in entry and len(entry['annotations']):
                for annotation in entry['annotations']:
                    # assemble annotation values
                    annotationTokens = self.annoParser.parseAnnotation(
                        annotation)
                    annoValues = []
                    for cname in colnames:
                        if cname == 'id':
                            if cname in annotationTokens:
                                # cast and only append id if the annotation is an existing one
                                annoValues.append(UUID(
                                    annotationTokens[cname]))
                                ids.append(UUID(annotationTokens[cname]))
                        elif cname == 'image':
                            annoValues.append(UUID(imageKey))
                        elif cname == 'label' and annotationTokens[
                                cname] is not None:
                            annoValues.append(UUID(annotationTokens[cname]))
                        elif cname == 'timeCreated':
                            try:
                                annoValues.append(
                                    dateutil.parser.parse(
                                        annotationTokens[cname]))
                            except:
                                annoValues.append(datetime.now(tz=pytz.utc))
                        elif cname == 'timeRequired':
                            timeReq = annotationTokens[cname]
                            if timeReq is None: timeReq = 0
                            annoValues.append(timeReq)
                        elif cname == 'username':
                            annoValues.append(username)
                        elif cname in annotationTokens:
                            annoValues.append(annotationTokens[cname])
                        elif cname == 'unsure':
                            if 'unsure' in annotationTokens and annotationTokens[
                                    'unsure'] is not None:
                                annoValues.append(annotationTokens[cname])
                            else:
                                annoValues.append(False)
                        elif cname == 'meta':
                            annoValues.append(meta)
                        else:
                            annoValues.append(None)
                    if 'id' in annotationTokens:
                        # existing annotation; update
                        values_update.append(tuple(annoValues))
                    else:
                        # new annotation
                        values_insert.append(tuple(annoValues))

            viewcountValues.append(
                (username, imageKey, 1, lastChecked, lastTimeRequired, meta))

        schema = self.config.getProperty('Database', 'schema')

        # delete all annotations that are not in submitted batch
        imageKeys = list(UUID(k) for k in submissions['entries'])
        if len(imageKeys):
            if len(ids):
                sql = '''
                    DELETE FROM {schema}.annotation WHERE username = %s AND id IN (
                        SELECT idQuery.id FROM (
                            SELECT * FROM {schema}.annotation WHERE id NOT IN %s
                        ) AS idQuery
                        JOIN (
                            SELECT * FROM {schema}.annotation WHERE image IN %s
                        ) AS imageQuery ON idQuery.id = imageQuery.id);
                '''.format(schema=schema)
                self.dbConnector.execute(sql, (
                    username,
                    tuple(ids),
                    tuple(imageKeys),
                ))
            else:
                # no annotations submitted; delete all annotations submitted before
                sql = '''
                    DELETE FROM {schema}.annotation WHERE username = %s AND image IN %s;
                '''.format(schema=schema)
                self.dbConnector.execute(sql, (
                    username,
                    tuple(imageKeys),
                ))

        # insert new annotations
        if len(values_insert):
            sql = '''
                INSERT INTO {}.annotation ({})
                VALUES %s ;
            '''.format(
                schema,
                ', '.join(colnames[1:])  # skip 'id' column
            )
            self.dbConnector.insert(sql, values_insert)

        # update existing annotations
        if len(values_update):
            updateCols = ''
            for col in colnames:
                if col == 'label':
                    updateCols += '{col} = UUID(e.{col}),'.format(col=col)
                elif col == 'timeRequired':
                    # we sum the required times together
                    updateCols += '{col} = COALESCE(a.{col},0) + COALESCE(e.{col},0),'.format(
                        col=col)
                else:
                    updateCols += '{col} = e.{col},'.format(col=col)

            sql = '''
                UPDATE {schema}.annotation AS a
                SET {updateCols}
                FROM (VALUES %s) AS e({colnames})
                WHERE e.id = a.id;
            '''.format(schema=schema,
                       updateCols=updateCols.strip(','),
                       colnames=', '.join(colnames))
            self.dbConnector.insert(sql, values_update)

        # viewcount table
        sql = '''
            INSERT INTO {}.image_user (username, image, viewcount, last_checked, last_time_required, meta)
            VALUES %s 
            ON CONFLICT (username, image) DO UPDATE SET viewcount = image_user.viewcount + 1, last_checked = EXCLUDED.last_checked, last_time_required = EXCLUDED.last_time_required, meta = EXCLUDED.meta;
        '''.format(schema)

        self.dbConnector.insert(sql, viewcountValues)

        return 0
Esempio n. 27
0
 def __init__(self, config, celery_app):
     self.config = config
     self.dbConn = Database(config)
     self.sqlBuilder = SQLStringBuilder(config)
     self.celery_app = celery_app
class AIMiddleware():
    def __init__(self, config):
        self.config = config
        self.dbConn = Database(config)
        self.sqlBuilder = SQLStringBuilder(config)

        self.training = False  # will be set to True once start_training is called (and False as soon as everything about the training process has finished)

        self.celery_app = current_app
        self.celery_app.set_current()
        self.celery_app.set_default()

        self.watchdog = None  # note: watchdog only created if users poll status (i.e., if there's activity)

        #TODO
        self.messageProcessor = MessageProcessor(self.celery_app)
        self.messageProcessor.start()

    def _init_watchdog(self):
        '''
            Launches a thread that periodically polls the database for new
            annotations. Once the required number of new annotations is reached,
            this thread will initiate the training process through the middleware.
            The thread will be terminated and destroyed; a new thread will only be
            re-created once the training process has finished.
        '''
        if self.training:
            return

        numImages_autoTrain = self.config.getProperty('AIController',
                                                      'numImages_autoTrain',
                                                      type=int,
                                                      fallback=-1)
        if numImages_autoTrain == -1:
            return

        self.watchdog = Watchdog(self.config, self.dbConn, self)
        self.watchdog.start()

    def _get_num_available_workers(self):
        #TODO: message + queue if no worker available
        #TODO: limit to n tasks per worker
        i = self.celery_app.control.inspect()
        if i is not None:
            stats = i.stats()
            if stats is not None:
                return len(i.stats())
        return 1  #TODO

    def _training_completed(self, trainingJob):
        '''
            To be called after a training process has been completed.
            If there is more than one worker, the 'average_model_states'
            instruction of the AI model is called and again awaited for.
            After successful training, the 'training' flag will be set
            to False to allow another round of model training.
        '''

        # re-enable training if no other training job is ongoing
        self.training = self.messageProcessor.task_ongoing('train')

    def _get_training_job_signature(self,
                                    minTimestamp='lastState',
                                    minNumAnnoPerImage=0,
                                    maxNumImages=None,
                                    maxNumWorkers=-1):
        '''
            Assembles (but does not submit) a training job based on the provided parameters.
        '''
        # check if training is still in progress
        if self.messageProcessor.task_ongoing('train'):
            raise Exception('Training process already running.')

        self.training = True

        try:

            # sanity checks
            if not (isinstance(minTimestamp, datetime)
                    or minTimestamp == 'lastState' or minTimestamp == -1
                    or minTimestamp is None):
                raise ValueError(
                    '{} is not a recognized property for variable "minTimestamp"'
                    .format(str(minTimestamp)))

            if maxNumWorkers != 1:
                # only query the number of available workers if more than one is specified to save time
                num_workers = min(maxNumWorkers,
                                  self._get_num_available_workers())
            else:
                num_workers = maxNumWorkers

            # query image IDs
            sql = self.sqlBuilder.getLatestQueryString(
                minNumAnnoPerImage=minNumAnnoPerImage, limit=maxNumImages)

            if isinstance(minTimestamp, datetime):
                imageIDs = self.dbConn.execute(sql, (minTimestamp, ), 'all')
            else:
                imageIDs = self.dbConn.execute(sql, None, 'all')

            imageIDs = [i['image'] for i in imageIDs]

            if maxNumWorkers > 1:

                # distribute across workers (TODO: also specify subset size for multiple jobs; randomly draw if needed)
                images_subset = array_split(
                    imageIDs, max(1,
                                  len(imageIDs) // num_workers))

                processes = []
                for subset in images_subset:
                    processes.append(
                        celery_interface.call_train.si(subset, True))
                process = group(processes)

            else:
                # call one worker directly
                # process = celery_interface.call_train.delay(data) #TODO: route to specific worker? http://docs.celeryproject.org/en/latest/userguide/routing.html#manual-routing
                process = celery_interface.call_train.si(imageIDs, False)

            return process, num_workers

        except:
            self.training = self.messageProcessor.task_ongoing('train')
            return None

    def _get_inference_job_signature(self, imageIDs, maxNumWorkers=-1):
        '''
            Assembles (but does not submit) an inference job based on the provided parameters.
        '''
        # setup
        if maxNumWorkers != 1:
            # only query the number of available workers if more than one is specified to save time
            num_available = self._get_num_available_workers()
            if maxNumWorkers == -1:
                maxNumWorkers = num_available  #TODO: more than one process per worker?
            else:
                maxNumWorkers = min(maxNumWorkers, num_available)

        # distribute across workers
        images_subset = array_split(imageIDs,
                                    max(1,
                                        len(imageIDs) // maxNumWorkers))
        jobs = []
        for subset in images_subset:
            job = celery_interface.call_inference.si(imageIDs=subset)
            jobs.append(job)

        jobGroup = group(jobs)
        return jobGroup

    def start_training(self,
                       minTimestamp='lastState',
                       minNumAnnoPerImage=0,
                       maxNumImages=None,
                       maxNumWorkers=-1):
        '''
            Initiates a training round for the model, based on the set of data (images, annotations)
            as specified in the parameters. Distributes data to the set of registered AIWorker instan-
            ces, which then call the 'train' function of the AI model given in the configuration. Upon
            training completion, the model states as returned by the function, and eventually the 
            AIWorker instances are collected, and the AIController calls the 'average_states' function
            of the AI model to get one single, most recent state. This is finally inserted to the data-
            base.
            Note that this function only loads the annotation data from the database, but not the images.
            Retrieving images is part of the AI model's 'train' function. TODO: feature vectors?

            Input parameters:
            - minTimestamp: Defines the earliest point in time of the annotations to be considered for
                            model training. May take one of the following values:
                            - 'lastState' (default): Limits the annotations to those made after the time-
                                                     stamp of the latest model state. If no model state is
                                                     found, all annotations are considered.
                            - None, -1, or 'all': Includes all annotations.
                            - (a datetime object): Includes annotations made after a custom timestamp.
            - minNumAnnoPerImage: Minimum number of annotations per image to be considered for training.
                                  This may be useful for e.g. detection tasks with a lot of false alarms
                                  in order to limit the "forgetting factor" of the model subject to training.
            - maxNumImages: Maximum number of images to train on at a time.
            - maxNumWorkers: Specify the maximum number of workers to distribute training to. If set to 1,
                             the model is trained on just one worker (no model state averaging appended).
                             If set to a number, that number of workers (up to the maximum number of connected)
                             is consulted for training the model. Upon completion, all model state dictionaries
                             are averaged by one random worker.
                             If set to -1, all connected workers are considered. //TODO: load balancing?

            Returns:
            - A dict with a status message. May take one of the following:
                - TODO: status ok, fail, no annotations, etc. Make threaded so that it immediately returns something.
        '''

        process, numWorkers = self._get_training_job_signature(
            minTimestamp=minTimestamp,
            minNumAnnoPerImage=minNumAnnoPerImage,
            maxNumImages=maxNumImages,
            maxNumWorkers=maxNumWorkers)

        # submit job
        task_id = self.messageProcessor.task_id()
        if numWorkers > 1:
            # also append average model states job
            job = process.apply_async(
                task_id=task_id,
                ignore_result=False,
                result_extended=True,
                headers={
                    'type': 'train',
                    'submitted': str(current_time())
                },
                link=celery_interface.call_average_model_states.s())
        else:
            job = process.apply_async(task_id=task_id,
                                      ignore_result=False,
                                      result_extended=True,
                                      headers={
                                          'type': 'train',
                                          'submitted': str(current_time())
                                      })

        # start listener
        self.messageProcessor.register_job(job, 'train',
                                           self._training_completed)

        return 'ok'

    def _do_inference(self, process):

        # send job
        task_id = self.messageProcessor.task_id()
        result = process.apply_async(task_id=task_id,
                                     ignore_result=False,
                                     result_extended=True,
                                     headers={
                                         'type': 'inference',
                                         'submitted': str(current_time())
                                     })

        # start listener
        self.messageProcessor.register_job(result, 'inference', None)

        return

    def start_inference(self,
                        forceUnlabeled=True,
                        maxNumImages=-1,
                        maxNumWorkers=-1):
        '''
            Performs inference (prediction) on a set of data (images) as specified in the parameters. Distributes
            data to the set of registered AIWorker instances, which then call the 'inference' function of the AI
            model given in the configuration. Upon completion, each AIWorker then automatically inserts the latest
            predictions into the database and reports back to the AIController (this instance) that its job has
            finished.
            Note that this function only loads the annotation data from the database, but not the images.
            Retrieving images is part of the AI model's 'train' function.
            The AI model, depending on its configuration, may or may not choose to load the images, but just work
            with the feature vectors as provided through the database directly. This is particularly useful for mo-
            dels that are supposed to have e.g. a frozen feature extractor, but fine-tune the last prediction branch
            at each inference time to accelerate the process.

            Input parameters:
            - forceUnlabeled: If True, only images that have not been labeled (i.e., with a viewcount of 0) will be
                              predicted on (default).
            - maxNumImages: Manually override the project settings' maximum number of images to do inference on.
                            If set to -1 (default), the value from the project settings will be chosen.
            - maxNumWorkers: Manually set the maximum number of AIWorker instances to perform inference at the same
                             time. If set to -1 (default), the data will be divided across all registered workers.
        '''

        # setup
        if maxNumImages is None or maxNumImages == -1:
            maxNumImages = self.config.getProperty('AIController',
                                                   'maxNumImages_inference',
                                                   type=int)

        # load the IDs of the images that are being subjected to inference
        sql = self.sqlBuilder.getInferenceQueryString(forceUnlabeled,
                                                      maxNumImages)
        imageIDs = self.dbConn.execute(sql, None, 'all')
        imageIDs = [i['image'] for i in imageIDs]

        process = self._get_inference_job_signature(imageIDs, maxNumWorkers)
        self._do_inference(process)
        return 'ok'

    def inference_fixed(self, imageIDs, maxNumWorkers=-1):
        '''
            Performs inference (prediction) on a fixed set of data (images), as provided by the parameter 'imageIDs'.
            Distributes data to the set of registered AIWorker instances, which then call the 'inference' function of
            the AI model given in the configuration. Upon completion, each AIWorker then automatically inserts the
            latest predictions into the database and reports back to the AIController (this instance) that its job has
            finished.
            Note that this function only loads the annotation data from the database, but not the images.
            Retrieving images is part of the AI model's 'train' function.
            The AI model, depending on its configuration, may or may not choose to load the images, but just work
            with the feature vectors as provided through the database directly. This is particularly useful for mo-
            dels that are supposed to have e.g. a frozen feature extractor, but fine-tune the last prediction branch
            at each inference time to accelerate the process.

            Input parameters:
            - imageIDs: An array containing the UUIDs (or equivalent strings) of the images that need to be inferred on.
            - maxNumWorkers: Manually set the maximum number of AIWorker instances to perform inference at the same
                             time. If set to -1 (default), the data will be divided across all registered workers.
        '''

        process = self._get_inference_job_signature(imageIDs, maxNumWorkers)
        self._do_inference(process)
        return 'ok'

    def start_train_and_inference(self,
                                  minTimestamp='lastState',
                                  minNumAnnoPerImage=0,
                                  maxNumImages_train=None,
                                  maxNumWorkers_train=1,
                                  forceUnlabeled_inference=True,
                                  maxNumImages_inference=None,
                                  maxNumWorkers_inference=1):
        '''
            Submits a model training job, followed by inference.
            This is the default behavior for the automated model update, since the newly trained model should directly
            be used to infer new, potentially useful labels.
        '''

        # get training job signature
        process, numWorkers_train = self._get_training_job_signature(
            minTimestamp=minTimestamp,
            minNumAnnoPerImage=minNumAnnoPerImage,
            maxNumImages=maxNumImages_train,
            maxNumWorkers=maxNumWorkers_train)

        # submit job
        task_id = self.messageProcessor.task_id()
        if numWorkers_train > 1:
            # also append average model states job
            job = process.apply_async(
                task_id=task_id,
                ignore_result=False,
                result_extended=True,
                headers={
                    'type': 'train',
                    'submitted': str(current_time())
                },
                link=celery_interface.call_average_model_states.s())
        else:
            job = process.apply_async(task_id=task_id,
                                      ignore_result=False,
                                      result_extended=True,
                                      headers={
                                          'type': 'train',
                                          'submitted': str(current_time())
                                      })

        # start listener thread
        def chain_inference(*args):
            self.training = self.messageProcessor.task_ongoing('train')
            return self.start_inference(forceUnlabeled_inference,
                                        maxNumImages_inference,
                                        maxNumWorkers_inference)

        #TODO: chaining doesn't work properly this way...
        self.messageProcessor.register_job(job, 'train', chain_inference)

        return 'ok'

    def check_status(self, project, tasks, workers):
        '''
            Queries the Celery worker results depending on the parameters specified.
            Returns their status accordingly if they exist.
        '''
        status = {}

        # project status
        if project:
            if self.training:
                status['project'] = {}
            else:
                # notify watchdog that users are active
                if self.watchdog is None or self.watchdog.stopped():
                    self._init_watchdog()
                if self.watchdog is not None:
                    self.watchdog.nudge()
                    status['project'] = {
                        'num_annotated': self.watchdog.lastCount,
                        'num_next_training': self.watchdog.annoThreshold
                    }
                else:
                    status['project'] = {}

        # running tasks status
        if tasks:
            status['tasks'] = self.messageProcessor.poll_status()

        # get worker status (this is very expensive, as each worker needs to be pinged)
        if workers:
            status['workers'] = self.messageProcessor.poll_worker_status()

        return status
Esempio n. 29
0
def main():

    # parse arguments
    parser = argparse.ArgumentParser(description='AIDE local model tester')
    parser.add_argument('--project',
                        type=str,
                        required=True,
                        help='Project shortname to draw sample data from.')
    parser.add_argument(
        '--mode',
        type=str,
        required=True,
        help=
        'Evaluation mode (function to call). One of {"train", "inference"}.')
    parser.add_argument(
        '--modelLibrary',
        type=str,
        required=False,
        help=
        'Optional AI model library override. Provide a dot-separated Python import path here.'
    )
    parser.add_argument(
        '--modelSettings',
        type=str,
        required=False,
        help=
        'Optional AI model settings override (absolute or relative path to settings file, or else "none" to not use any predefined settings).'
    )
    args = parser.parse_args()
    #TODO: continue

    assert args.mode.lower() in (
        'train', 'inference'), f'"{args.mode}" is not a known evaluation mode.'
    mode = args.mode.lower()

    # initialize required modules
    config = Config()
    dbConnector = Database(config)
    fileServer = FileServer(config).get_secure_instance(args.project)
    aiw = AIWorker(config, dbConnector, True)
    aicw = AIControllerWorker(config, None)

    # check if AIDE file server is reachable
    admin = AdminMiddleware(config, dbConnector)
    connDetails = admin.getServiceDetails(True, False)
    fsVersion = connDetails['FileServer']['aide_version']
    if not isinstance(fsVersion, str):
        # no file server running
        raise Exception(
            'ERROR: AIDE file server is not running, but required for running models. Make sure to launch it prior to running this script.'
        )
    elif fsVersion != AIDE_VERSION:
        print(
            f'WARNING: the AIDE version of File Server instance ({fsVersion}) differs from this one ({AIDE_VERSION}).'
        )

    # get model trainer instance and settings
    queryStr = '''
        SELECT ai_model_library, ai_model_settings FROM aide_admin.project
        WHERE shortname = %s;
    '''
    result = dbConnector.execute(queryStr, (args.project, ), 1)
    if result is None or not len(result):
        raise Exception(
            f'Project "{args.project}" could not be found in this installation of AIDE.'
        )

    modelLibrary = result[0]['ai_model_library']
    modelSettings = result[0]['ai_model_settings']

    customSettingsSpecified = False
    if hasattr(args, 'modelSettings') and isinstance(
            args.modelSettings, str) and len(args.modelSettings):
        # settings override specified
        if args.modelSettings.lower() == 'none':
            modelSettings = None
            customSettingsSpecified = True
        elif not os.path.isfile(args.modelSettings):
            print(
                f'WARNING: model settings override provided, but file cannot be found ("{args.modelSettings}"). Falling back to project default ("{modelSettings}").'
            )
        else:
            modelSettings = args.modelSettings
            customSettingsSpecified = True

    if hasattr(args, 'modelLibrary') and isinstance(
            args.modelLibrary, str) and len(args.modelLibrary):
        # library override specified; try to import it
        try:
            modelClass = helpers.get_class_executable(args.modelLibrary)
            if modelClass is None:
                raise
            modelLibrary = args.modelLibrary

            # re-check if current model settings are compatible; warn and set to None if not
            if modelLibrary != result[0][
                    'ai_model_library'] and not customSettingsSpecified:
                # project model settings are not compatible with provided model
                print(
                    'WARNING: custom model library specified differs from the one currently set in project. Model settings will be set to None.'
                )
                modelSettings = None

        except Exception as e:
            print(
                f'WARNING: model library override provided ("{args.modelLibrary}"), but could not be imported. Falling back to project default ("{modelLibrary}").'
            )

    # initialize instance
    print(f'Using model library "{modelLibrary}".')
    modelTrainer = aiw._init_model_instance(args.project, modelLibrary,
                                            modelSettings)

    stateDict = None  #TODO: load latest unless override is specified?

    # get data
    data = aicw.get_training_images(project=args.project, maxNumImages=512)
    data = __load_metadata(args.project, dbConnector, data[0],
                           (mode == 'train'))

    # helper functions
    def updateStateFun(state, message, done=None, total=None):
        print(message, end='')
        if done is not None and total is not None:
            print(f': {done}/{total}')
        else:
            print('')

    # launch task
    if mode == 'train':
        result = modelTrainer.train(stateDict, data, updateStateFun)
        if result is None:
            raise Exception(
                'Training function must return an object (i.e., trained model state) to be stored in the database.'
            )

    elif mode == 'inference':
        result = modelTrainer.inference(stateDict, data, updateStateFun)
Esempio n. 30
0
class UserMiddleware():

    TOKEN_NUM_BYTES = 64
    SALT_NUM_ROUNDS = 12

    def __init__(self, config):
        self.config = config
        self.dbConnector = Database(config)

        self.usersLoggedIn = {}  # username -> {timestamp, sessionToken}

    def _current_time(self):
        return current_time()

    def _create_token(self):
        return secrets.token_urlsafe(self.TOKEN_NUM_BYTES)

    def _compare_tokens(self, tokenA, tokenB):
        if tokenA is None or tokenB is None:
            return False
        return secrets.compare_digest(tokenA, tokenB)

    def _check_password(self, providedPass, hashedTargetPass):
        return bcrypt.checkpw(providedPass, hashedTargetPass)

    def _create_hash(self, password):
        hash = bcrypt.hashpw(password, bcrypt.gensalt(self.SALT_NUM_ROUNDS))
        return hash

    def _get_user_data(self, username):
        result = self.dbConnector.execute(
            'SELECT last_login, session_token, secret_token FROM aide_admin.user WHERE name = %s;',
            (username, ),
            numReturn=1)
        if not len(result):
            return None
        result = result[0]
        return result

    def _extend_session_database(self, username, sessionToken):
        '''
            Updates the last login timestamp of the user to the current
            time and commits the changes to the database.
            Runs in a thread to be non-blocking.
        '''
        def _extend_session():
            now = self._current_time()

            self.dbConnector.execute(
                '''UPDATE aide_admin.user SET last_login = %s,
                    session_token = %s
                    WHERE name = %s
                ''', (
                    now,
                    sessionToken,
                    username,
                ),
                numReturn=None)

            # also update local cache
            self.usersLoggedIn[username]['timestamp'] = now

        eT = Thread(target=_extend_session)
        eT.start()

    def _init_or_extend_session(self, username, sessionToken=None):
        '''
            Establishes a "session" for the user (i.e., sets 'time_login'
            to now).
            Also creates a new sessionToken if None provided.
        '''
        now = self._current_time()

        if sessionToken is None:
            sessionToken = self._create_token()

            # new session created; add to database
            self.dbConnector.execute(
                '''UPDATE aide_admin.user SET last_login = %s, session_token = %s
                WHERE name = %s
            ''', (
                    now,
                    sessionToken,
                    username,
                ),
                numReturn=None)

            # store locally
            self.usersLoggedIn[username] = {
                'timestamp': now,
                'sessionToken': sessionToken
            }

        # update local cache as well
        if not username in self.usersLoggedIn:
            self.usersLoggedIn[username] = {
                'timestamp': now,
                'sessionToken': sessionToken
            }
        else:
            self.usersLoggedIn[username]['timestamp'] = now
            self.usersLoggedIn[username]['sessionToken'] = sessionToken

            # also tell DB about updated tokens
            self._extend_session_database(username, sessionToken)

        expires = now + timedelta(
            0, self.config.getProperty('UserHandler', 'time_login', type=int))

        return sessionToken, now, expires

    def _invalidate_session(self, username):
        if username in self.usersLoggedIn:
            del self.usersLoggedIn[username]
        self.dbConnector.execute(
            'UPDATE aide_admin.user SET session_token = NULL WHERE name = %s',
            (username, ),
            numReturn=None)
        #TODO: feedback that everything is ok?

    def _check_account_exists(self, username, email):
        response = {'username': True, 'email': True}
        if username is None or not len(username): username = ''
        if email is None or not len(email): email = ''
        result = self.dbConnector.execute(
            'SELECT COUNT(name) AS c FROM aide_admin.user WHERE name = %s UNION ALL SELECT COUNT(name) AS c FROM aide_admin.user WHERE email = %s',
            (
                username,
                email,
            ),
            numReturn=2)

        response['username'] = (result[0]['c'] > 0)
        response['email'] = (result[1]['c'] > 0)

        return response

    def _check_logged_in(self, username, sessionToken):
        now = self._current_time()
        time_login = self.config.getProperty('UserHandler',
                                             'time_login',
                                             type=int)
        if not username in self.usersLoggedIn:
            # check database
            result = self._get_user_data(username)
            if result is None:
                # account does not exist
                return False

            # check for session token
            if not self._compare_tokens(result['session_token'], sessionToken):
                # invalid session token provided
                return False

            # check for timestamp
            time_diff = (now - result['last_login']).total_seconds()
            if time_diff <= time_login:
                # user still logged in
                if not username in self.usersLoggedIn:
                    self.usersLoggedIn[username] = {
                        'timestamp': now,
                        'sessionToken': sessionToken
                    }
                else:
                    self.usersLoggedIn[username]['timestamp'] = now

                # extend user session (commit to DB) if needed
                if time_diff >= 0.75 * time_login:
                    self._extend_session_database(username, sessionToken)

                return True

            else:
                # session time-out
                return False

            # generic error
            return False

        else:
            # check locally
            if not self._compare_tokens(
                    self.usersLoggedIn[username]['sessionToken'],
                    sessionToken):
                # invalid session token provided; check database if token has updated
                # (can happen if user logs in again from another machine)
                result = self._get_user_data(username)
                if not self._compare_tokens(result['session_token'],
                                            sessionToken):
                    return False

                else:
                    # update local cache
                    self.usersLoggedIn[username]['sessionToken'] = result[
                        'session_token']
                    self.usersLoggedIn[username]['timestamp'] = now

            if (now - self.usersLoggedIn[username]['timestamp']
                ).total_seconds() <= time_login:
                # user still logged in
                return True

            else:
                # local cache session time-out; check if database holds more recent timestamp
                result = self._get_user_data(username)
                if (now - result['last_login']).total_seconds() <= time_login:
                    # user still logged in; update
                    self._init_or_extend_session(username, sessionToken)

                else:
                    # session time-out
                    return False

            # generic error
            return False

        # generic error
        return False

    def _check_authorized(self, project, username, admin, return_all=False):
        '''
            Verifies whether a user has access rights to a project.
            If "return_all" is set to True, a dict with the following bools
            is returned:
            - enrolled: if the user is member of the project
            - isAdmin: if the user is a project administrator
            - isPublic: if the project is publicly visible (*)
            - demoMode: if the project runs in demo mode (*)

            (* note that these are here for convenience, but do not count
            as authorization tokens)


            If "return_all" is False, only a single bool is returned, with
            criteria as follows:
            - if "admin" is set to True, the user must be a project admini-
              strator
            - else, the user must be enrolled, admitted, and not blocked for
              the current date and time

            In this case, options like the demo mode and public flag are not
            relevant for the decision.
        '''
        now = current_time()
        response = {'enrolled': False, 'isAdmin': False, 'isPublic': False}

        queryStr = sql.SQL('''
            SELECT * FROM aide_admin.authentication AS auth
            JOIN (SELECT shortname, demoMode, isPublic FROM aide_admin.project) AS proj
            ON auth.project = proj.shortname
            WHERE project = %s AND username = %s;
        ''')
        try:
            result = self.dbConnector.execute(queryStr, (
                project,
                username,
            ), 1)
            if len(result):
                response['isAdmin'] = result[0]['isadmin']
                response['isPublic'] = result[0]['ispublic']
                admitted_until = True
                blocked_until = False
                if result[0]['admitted_until'] is not None:
                    admitted_until = (result[0]['admitted_until'] >= now)
                if result[0]['blocked_until'] is not None:
                    blocked_until = (result[0]['blocked_until'] >= now)
                response['enrolled'] = (admitted_until and not blocked_until)
        except:
            # no results to fetch: user is not authenticated
            pass

        # check if super user
        superUser = self._check_user_privileges(username, superuser=True)
        if superUser:
            response['enrolled'] = True
            response['isAdmin'] = True

        if return_all:
            return response
        else:
            if admin:
                return response['isAdmin']
            else:
                return response['enrolled']

        # if admin:
        #     queryStr = sql.SQL('''SELECT COUNT(*) AS cnt FROM aide_admin.authentication
        #         WHERE project = %s AND username = %s AND isAdmin = %s''')
        #     queryVals = (project,username,admin,)
        # else:
        #     queryStr = sql.SQL('''SELECT COUNT(*) AS cnt FROM aide_admin.authentication
        #         WHERE project = %s AND username = %s
        #         AND (
        #             (admitted_until IS NULL OR admitted_until >= now())
        #             AND
        #             (blocked_until IS NULL OR blocked_until < now())
        #         )''')
        #     queryVals = (project,username,)
        # result = self.dbConnector.execute(queryStr, queryVals, 1)
        # return result[0]['cnt'] == 1

    def checkDemoMode(self, project):
        return checkDemoMode(project, self.dbConnector)

    def decryptSessionToken(self, username, request):
        try:
            userdata = self._get_user_data(username)
            return request.get_cookie('session_token',
                                      secret=userdata['secret_token'])
        except:
            return None

    def encryptSessionToken(self, username, response):
        userdata = self._get_user_data(username)
        response.set_cookie('session_token',
                            userdata['session_token'],
                            httponly=True,
                            path='/',
                            secret=userdata['secret_token'])

    def _check_user_privileges(self,
                               username,
                               superuser=False,
                               canCreateProjects=False,
                               return_all=False):
        response = {'superuser': False, 'can_create_projects': False}
        result = self.dbConnector.execute(
            '''SELECT isSuperUser, canCreateProjects
            FROM aide_admin.user WHERE name = %s;''', (username, ), 1)

        if len(result):
            response['superuser'] = result[0]['issuperuser']
            response['can_create_projects'] = result[0]['cancreateprojects']

        if return_all:
            return response

        else:
            if superuser and not result[0]['issuperuser']:
                return False
            if canCreateProjects and not (result[0]['cancreateprojects']
                                          or result[0]['issuperuser']):
                return False
            return True

    def isAuthenticated(self,
                        username,
                        sessionToken,
                        project=None,
                        admin=False,
                        superuser=False,
                        canCreateProjects=False,
                        extend_session=False,
                        return_all=False):
        '''
            Checks if the user is authenticated to access a service.
            Returns False if one or more of the following conditions holds:
            - user is not logged in
            - 'project' (shortname) is provided, project is configured to be private and user is not in the
                authenticated users list
            - 'admin' is True, 'project' (shortname) is provided and user is not an admin of the project
            - 'superuser' is True and user is not a super user
            - 'canCreateProjects' is True and user is not authenticated to create (or remove) projects

            If 'extend_session' is True, the user's session will automatically be prolonged by the max login time
            specified in the configuration file.
            If 'return_all' is True, all individual flags (instead of just a single bool) is returned.
        '''

        demoMode = checkDemoMode(project, self.dbConnector)

        if return_all:
            returnVals = {}
            returnVals['logged_in'] = self._check_logged_in(
                username, sessionToken)
            if not returnVals['logged_in']:
                username = None
            if project is not None:
                returnVals['project'] = self._check_authorized(project,
                                                               username,
                                                               admin,
                                                               return_all=True)
                returnVals['project']['demoMode'] = demoMode
            returnVals['privileges'] = self._check_user_privileges(
                username, superuser, canCreateProjects, return_all=True)
            if returnVals['logged_in'] and extend_session:
                self._init_or_extend_session(username, sessionToken)
            return returnVals

        else:
            # return True if project is in demo mode
            if demoMode is not None and demoMode:
                return True
            if not self._check_logged_in(username, sessionToken):
                return False
            if project is not None and not self._check_authorized(
                    project, username, admin):
                return False
            if not self._check_user_privileges(username, superuser,
                                               canCreateProjects):
                return False
            if extend_session:
                self._init_or_extend_session(username, sessionToken)
            return True

    def getAuthentication(self, username, project=None):
        '''
            Returns general authentication properties of the user, regardless of whether
            they are logged in or not.
            If a project shortname is specified, this will also return the user access
            properties for the given project.
        '''

        response = {}
        if project is None:
            result = self.dbConnector.execute(
                '''SELECT * FROM aide_admin.user AS u
                    WHERE name = %s;
                ''', (username, ), 1)
            response['canCreateProjects'] = result[0]['cancreateprojects']
            response['isSuperUser'] = result[0]['issuperuser']
        else:
            result = self.dbConnector.execute(
                '''SELECT * FROM aide_admin.user AS u
                    JOIN aide_admin.authentication AS a
                    ON u.name = a.username
                    WHERE name = %s
                    AND project = %s;
                ''', (
                    username,
                    project,
                ), 1)
            response['canCreateProjects'] = result[0]['cancreateprojects']
            response['isSuperUser'] = result[0]['issuperuser']
            response['isAdmin'] = result[0]['isadmin']
            response['admittedUntil'] = result[0]['admitted_until']
            response['blockedUntil'] = result[0]['blocked_until']

        return response

    def getLoginData(self, username, sessionToken):
        '''
            Performs a lookup on the login timestamp dict.
            If the username cannot be found (also not in the database),
            they are not logged in (False returned).
            If the difference between the current time and the recorded
            login timestamp exceeds a pre-defined threshold, the user is
            removed from the dict and False is returned.
            Otherwise returns True if and only if 'sessionToken' matches
            the entry in the database.
        '''
        if self._check_logged_in(username, sessionToken):
            # still logged in; extend session
            sessionToken, now, expires = self._init_or_extend_session(
                username, sessionToken)
            return sessionToken, now, expires

        else:
            # not logged in or error
            raise Exception('Not logged in.')

    def getUserPermissions(self, project, username):
        '''
            Returns the user-to-project relation (e.g., if user is admin).
        '''
        response = {
            'demoMode': False,
            'isAdmin': False,
            'admittedUntil': None,
            'blockedUntil': None
        }

        try:
            # demo mode
            response['demoMode'] = checkDemoMode(project, self.dbConnector)

            # rest
            queryStr = sql.SQL(
                'SELECT * FROM {id_auth} WHERE project = %s AND username = %s'
            ).format(id_auth=sql.Identifier('aide_admin', 'authentication'))
            result = self.dbConnector.execute(queryStr, (
                project,
                username,
            ), 1)
            if len(result):
                response['isAdmin'] = result[0]['isadmin']
                response['admittedUntil'] = result[0]['admitted_until']
                response['blockedUntil'] = result[0]['blocked_until']

        finally:
            return response

    def login(self, username, password, sessionToken):

        # check if logged in
        if self._check_logged_in(username, sessionToken):
            # still logged in; extend session
            sessionToken, now, expires = self._init_or_extend_session(
                username, sessionToken)
            return sessionToken, now, expires

        # get user info
        userData = self.dbConnector.execute(
            'SELECT hash FROM aide_admin.user WHERE name = %s;', (username, ),
            numReturn=1)
        if len(userData) == 0:
            # account does not exist
            raise InvalidRequestException()
        userData = userData[0]

        # verify provided password
        if self._check_password(password.encode('utf8'),
                                bytes(userData['hash'])):
            # correct
            sessionToken, timestamp, expires = self._init_or_extend_session(
                username, None)
            return sessionToken, timestamp, expires

        else:
            # incorrect
            self._invalidate_session(username)
            raise InvalidPasswordException()

    def logout(self, username, sessionToken):
        # check if logged in first
        if self._check_logged_in(username, sessionToken):
            self._invalidate_session(username)

    def accountExists(self, username, email):
        return self._check_account_exists(username, email)

    def createAccount(self, username, password, email):
        accExstChck = self._check_account_exists(username, email)
        if accExstChck['username'] or accExstChck['email']:
            raise AccountExistsException(username)

        else:
            hash = self._create_hash(password.encode('utf8'))

            queryStr = '''
                INSERT INTO aide_admin.user (name, email, hash)
                VALUES (%s, %s, %s);
            '''
            self.dbConnector.execute(queryStr, (
                username,
                email,
                hash,
            ),
                                     numReturn=None)
            sessionToken, timestamp, expires = self._init_or_extend_session(
                username)
            return sessionToken, timestamp, expires

    def getUserNames(self, project=None):
        if not project:
            queryStr = 'SELECT name FROM aide_admin.user'
            queryVals = None
        else:
            queryStr = 'SELECT username AS name FROM aide_admin.authentication WHERE project = %s'
            queryVals = (project, )
        result = self.dbConnector.execute(queryStr, queryVals, 'all')
        response = [r['name'] for r in result]
        return response

    def setPassword(self, username, password):
        hashVal = self._create_hash(password.encode('utf8'))
        queryStr = '''
            UPDATE aide_admin.user
            SET hash = %s
            WHERE name = %s;
            SELECT hash
            FROM aide_admin.user
            WHERE name = %s;
        '''
        result = self.dbConnector.execute(queryStr,
                                          (hashVal, username, username), 1)
        if len(result):
            return {'success': True}
        else:
            return {
                'success': False,
                'message': f'User with name "{username}" does not exist.'
            }