Ejemplo n.º 1
0
    def scanForImages(self, project):
        '''
            Searches the project image folder on disk for
            files that are valid, but have not (yet) been added
            to the database.
            Returns a list of paths with files.
        '''

        # scan disk for files
        projectFolder = os.path.join(
            self.config.getProperty('FileServer', 'staticfiles_dir'), project)
        if (not os.path.isdir(projectFolder)) and (
                not os.path.islink(projectFolder)):
            # no folder exists for the project (should not happen due to broadcast at project creation)
            return []
        imgs_disk = listDirectory(projectFolder, recursive=True)

        # get all existing file paths from database
        imgs_database = set()
        queryStr = sql.SQL('''
            SELECT filename FROM {id_img};
        ''').format(id_img=sql.Identifier(project, 'image'))
        result = self.dbConnector.execute(queryStr, None, 'all')
        for r in range(len(result)):
            imgs_database.add(result[r]['filename'])

        # filter
        imgs_candidates = imgs_disk.difference(imgs_database)
        return list(imgs_candidates)
Ejemplo n.º 2
0
    def removeOrphanedImages(self, project):
        '''
            Queries the project's image entries in the database and retrieves
            entries for which no image can be found on disk anymore. Removes
            and returns those entries and all associated (meta-) data from the
            database.
        '''
        imgs_DB = self.dbConnector.execute(
            sql.SQL('''
            SELECT id, filename FROM {id_img};
        ''').format(id_img=sql.Identifier(project, 'image')), None, 'all')

        projectFolder = os.path.join(
            self.config.getProperty('FileServer', 'staticfiles_dir'), project)
        if (not os.path.isdir(projectFolder)) and (
                not os.path.islink(projectFolder)):
            return []
        imgs_disk = listDirectory(projectFolder, recursive=True)
        imgs_disk = set(imgs_disk)

        # get orphaned images
        imgs_orphaned = []
        for i in imgs_DB:
            if i['filename'] not in imgs_disk:
                imgs_orphaned.append(i['id'])
        # imgs_orphaned = list(set(imgs_DB).difference(imgs_disk))
        if not len(imgs_orphaned):
            return []

        # remove
        self.dbConnector.execute(
            sql.SQL('''
            DELETE FROM {id_iu} WHERE image IN %s;
            DELETE FROM {id_anno} WHERE image IN %s;
            DELETE FROM {id_pred} WHERE image IN %s;
            DELETE FROM {id_img} WHERE id IN %s;
        ''').format(id_iu=sql.Identifier(project, 'image_user'),
                    id_anno=sql.Identifier(project, 'annotation'),
                    id_pred=sql.Identifier(project, 'prediction'),
                    id_img=sql.Identifier(project, 'image')),
            tuple([tuple(imgs_orphaned)] * 4), None)

        return imgs_orphaned
Ejemplo n.º 3
0
    # check if running on file server
    imgBaseDir = config.getProperty('FileServer', 'staticfiles_dir')
    if not os.path.isdir(imgBaseDir):
        raise Exception(
            f'"{imgBaseDir}" is not a valid directory on this machine. Are you running the script from the file server?'
        )

    if not imgBaseDir.endswith(os.sep):
        imgBaseDir += os.sep

    # locate all images and their base names
    print('Locating image paths...')
    imgs = set()
    imgFiles = listDirectory(
        imgBaseDir, recursive=True
    )  #glob.glob(os.path.join(imgBaseDir, '**'), recursive=True)  #TODO: check if correct
    imgFiles = list(imgFiles)
    for i in tqdm(imgFiles):
        if os.path.isdir(i):
            continue

        _, ext = os.path.splitext(i)
        if ext.lower() not in VALID_IMAGE_EXTENSIONS:
            continue

        baseName = i.replace(imgBaseDir, '')
        imgs.add(baseName)

    # ignore images that are already in database
    print('Filter images already in database...')