def scanForImages(self, project): ''' Searches the project image folder on disk for files that are valid, but have not (yet) been added to the database. Returns a list of paths with files. ''' # scan disk for files projectFolder = os.path.join( self.config.getProperty('FileServer', 'staticfiles_dir'), project) if (not os.path.isdir(projectFolder)) and ( not os.path.islink(projectFolder)): # no folder exists for the project (should not happen due to broadcast at project creation) return [] imgs_disk = listDirectory(projectFolder, recursive=True) # get all existing file paths from database imgs_database = set() queryStr = sql.SQL(''' SELECT filename FROM {id_img}; ''').format(id_img=sql.Identifier(project, 'image')) result = self.dbConnector.execute(queryStr, None, 'all') for r in range(len(result)): imgs_database.add(result[r]['filename']) # filter imgs_candidates = imgs_disk.difference(imgs_database) return list(imgs_candidates)
def removeOrphanedImages(self, project): ''' Queries the project's image entries in the database and retrieves entries for which no image can be found on disk anymore. Removes and returns those entries and all associated (meta-) data from the database. ''' imgs_DB = self.dbConnector.execute( sql.SQL(''' SELECT id, filename FROM {id_img}; ''').format(id_img=sql.Identifier(project, 'image')), None, 'all') projectFolder = os.path.join( self.config.getProperty('FileServer', 'staticfiles_dir'), project) if (not os.path.isdir(projectFolder)) and ( not os.path.islink(projectFolder)): return [] imgs_disk = listDirectory(projectFolder, recursive=True) imgs_disk = set(imgs_disk) # get orphaned images imgs_orphaned = [] for i in imgs_DB: if i['filename'] not in imgs_disk: imgs_orphaned.append(i['id']) # imgs_orphaned = list(set(imgs_DB).difference(imgs_disk)) if not len(imgs_orphaned): return [] # remove self.dbConnector.execute( sql.SQL(''' DELETE FROM {id_iu} WHERE image IN %s; DELETE FROM {id_anno} WHERE image IN %s; DELETE FROM {id_pred} WHERE image IN %s; DELETE FROM {id_img} WHERE id IN %s; ''').format(id_iu=sql.Identifier(project, 'image_user'), id_anno=sql.Identifier(project, 'annotation'), id_pred=sql.Identifier(project, 'prediction'), id_img=sql.Identifier(project, 'image')), tuple([tuple(imgs_orphaned)] * 4), None) return imgs_orphaned
# check if running on file server imgBaseDir = config.getProperty('FileServer', 'staticfiles_dir') if not os.path.isdir(imgBaseDir): raise Exception( f'"{imgBaseDir}" is not a valid directory on this machine. Are you running the script from the file server?' ) if not imgBaseDir.endswith(os.sep): imgBaseDir += os.sep # locate all images and their base names print('Locating image paths...') imgs = set() imgFiles = listDirectory( imgBaseDir, recursive=True ) #glob.glob(os.path.join(imgBaseDir, '**'), recursive=True) #TODO: check if correct imgFiles = list(imgFiles) for i in tqdm(imgFiles): if os.path.isdir(i): continue _, ext = os.path.splitext(i) if ext.lower() not in VALID_IMAGE_EXTENSIONS: continue baseName = i.replace(imgBaseDir, '') imgs.add(baseName) # ignore images that are already in database print('Filter images already in database...')