import numpy as np import SlideRunner.general.dependencies from SlideRunner.dataAccess.database import Database import os import openslide import sqlite3 import cv2 import sys if (len(sys.argv) < 2) or (sys.argv[1] not in ['3', '6', '12']): print('syntax: exportDataset_ODAEL_xWSI.py x') print(' where x is the number of WSI to export (3,6,12)') exit() DB = Database() basepath = '../../WSI/' patchSize = 128 os.system('mkdir -p DataODAEL_%sWSI' % sys.argv[1]) dirs = ['Mitosis', 'Mitosislike', 'Tumorcells', 'Granulocytes'] for k in dirs: os.system('mkdir -p DataODAEL_%sWSI/train/%s' % (sys.argv[1], k)) os.system('mkdir -p DataODAEL_%sWSI/test/%s' % (sys.argv[1], k)) def listOfSlides(DB): DB.execute('SELECT uid,filename from Slides') return DB.fetchall()
from SlideRunner.dataAccess.database import Database DB = Database() slidelist_test = ['27', '30', '31', '6', '18', '20', '1', '2', '3', '9', '11'] clause = ','.join(slidelist_test) files = [ 'MITOS_WSI_CCMCT_ODAEL_50HPF.sqlite', 'MITOS_WSI_CCMCT_ODAEL_10HPF.sqlite', 'MITOS_WSI_CCMCT_ODAEL_5HPF.sqlite', 'MITOS_WSI_CCMCT_ODAEL_12WSI.sqlite', 'MITOS_WSI_CCMCT_ODAEL_6WSI.sqlite', 'MITOS_WSI_CCMCT_ODAEL_3WSI.sqlite', 'MITOS_WSI_CCMCT_ODAEL.sqlite' ] for f in files: DB.open('databases/' + f) cnt = DB.execute( 'SELECT COUNT(*) FROM Annotations where agreedClass==2 and slide not in (%s)' % clause).fetchone()[0] print('%40s: %d mitotic figures in training set' % (f, cnt)) DB.open(files[-1]) slidelist_train = [ s[0] for s in DB.execute( 'SELECT Slides.filename, count(*) as cnt FROM Slides left join Annotations on Annotations.slide == Slides.uid where Slides.uid not in (%s) and Annotations.agreedClass==2 group by slide order by cnt asc' % clause).fetchall() ] slidelist_cnt = {
# Export Dataset (patch-wise) to learn classifier easier import numpy as np import SlideRunner.general.dependencies from SlideRunner.dataAccess.database import Database from SlideRunner.dataAccess.annotations import ViewingProfile import os import openslide import sqlite3 import cv2 import sys DB = Database() basepath='../WSI/' patchSize=128 os.system('mkdir -p DataODAEL') dirs = ['Mitosis', 'Mitosislike', 'Tumorcells', 'Granulocytes'] for k in dirs: os.system('mkdir -p DataODAEL/train/%s' % (k)) os.system('mkdir -p DataODAEL/test/%s' % (k)) def listOfSlides(DB): DB.execute('SELECT uid,filename from Slides') return DB.fetchall() test_slide_filenames = ['be10fa37ad6e88e1f406.svs', 'f3741e764d39ccc4d114.svs', 'c86cd41f96331adf3856.svs',
if (len(sys.argv) < 5): print( 'Syntax: Inference-RetinaNet-var.py ModelSavepath.pkl Database.sqlite DatasetName val/test [SlideDir]' ) exit() fname = 'RetinaNet-CMC-CODAEL-512sh-repetition1.pkl' if len(sys.argv) > 1: fname = sys.argv[1] size = 512 path = Path('./') database = Database() database.open(str( sys.argv[2])) #Slides_Mitosis_final_checked_cleaned.sqlite')) slidedir = 'WSI' if len(sys.argv) < 5 else sys.argv[5] datasetname = sys.argv[3] size = 512 level = 0 files = [] # In[3]: test_slide_filenames = [ 'be10fa37ad6e88e1f406.svs', 'f3741e764d39ccc4d114.svs', 'c86cd41f96331adf3856.svs', '552c51bfb88fd3e65ffe.svs',
def test_pushannos(): imageset = 1 product_id = 1 exm = ExactManager('sliderunner_unittest', 'unittestpw', EXACT_UNITTEST_URL) randstr = ''.join( ['{:02x}'.format(x) for x in np.random.randint(0, 255, 6)]) imagename = f'dummy{randstr}.tiff' # generate dummy image dummy = np.random.randint(0, 255, (200, 200, 3)) cv2.imwrite(imagename, dummy) exm.upload_image_to_imageset(imageset_id=imageset, filename=imagename) imageset_details = exm.retrieve_imageset(imageset) # print(imageset_details) for imset in imageset_details['images']: if (imset['name'] == imagename): imageid = imset['id'] DB = Database().create(':memory:') # DB = Database().create('test.sqlite') # Add slide to database slideuid = DB.insertNewSlide(imagename, '') DB.insertClass('BB') DB.insertAnnotator('sliderunner_unittest') DB.insertAnnotator( 'otherexpert') # we will only send annotations of the marked expert DB.insertClass('POLY') coords = np.array([[100, 200], [150, 220], [180, 250]]) DB.insertNewPolygonAnnotation(annoList=coords, slideUID=1, classID=2, annotator=1) coords = np.array([[150, 250], [350, 220], [0, 250]]) DB.insertNewPolygonAnnotation(annoList=coords, slideUID=1, classID=2, annotator=1) coords = np.array([[150, 255], [350, 210], [50, 250]]) DB.insertNewPolygonAnnotation(annoList=coords, slideUID=1, classID=2, annotator=2) DB.setExactPerson(1) #empty image annos = exm.retrieve_annotations(imageid) for anno in annos: exm.delete_annotation(anno['id'], keep_deleted_element=False) # for anno in exm.retrieve_annotations(imageid): # print(anno) # All annotations have been removed assert (len(exm.retrieve_annotations(imageid)) == 0) exm.sync(imageid, imageset_id=imageset, product_id=product_id, slideuid=slideuid, database=DB) # Only 2 annotations have been inserted assert (len(exm.retrieve_annotations(imageid)) == 2) uuids = [x['unique_identifier'] for x in exm.retrieve_annotations(imageid)] # All were created with correct guid for dbanno in list(DB.annotations.keys())[:-1]: assert (DB.annotations[dbanno].guid in uuids) print('--- resync ---') # Sync again exm.sync(imageid, imageset_id=imageset, product_id=product_id, slideuid=slideuid, database=DB) # No change assert (len(exm.retrieve_annotations(imageid)) == 2) # All were created with correct guid uuids = [x['unique_identifier'] for x in exm.retrieve_annotations(imageid)] for dbanno in list(DB.annotations.keys())[:-1]: assert (DB.annotations[dbanno].guid in uuids) print('--- local update created ---') # Now let's create a local update - keep same exact_id (crucial!) DB.loadIntoMemory(1) DB.setAnnotationLabel(classId=1, person=1, annoIdx=1, entryId=DB.annotations[1].labels[0].uid, exact_id=DB.annotations[1].labels[0].exact_id) # Sync again exm.sync(imageid, imageset_id=imageset, product_id=product_id, slideuid=slideuid, database=DB) # check if remote has been updated annos = exm.retrieve_annotations(imageid) for anno in annos: if (anno['id'] == DB.annotations[1].labels[0].exact_id): assert (anno['annotation_type']['name'] == 'BB') annotype_id = anno['annotation_type']['id'] # Now update remotely and see if changes are reflected newguid = str(uuid.uuid4()) created = exm.create_annotation(image_id=imageid, annotationtype_id=annotype_id, vector=[[90, 80], [20, 30]], last_modified=time.time(), guid=newguid, description='abcdef') exm.sync(imageid, imageset_id=imageset, product_id=product_id, slideuid=slideuid, database=DB) found = False for annoI in DB.annotations: anno = DB.annotations[annoI] if (anno.guid == newguid): found = True assert (anno.annotationType == AnnotationType.POLYGON) assert (anno.text == 'abcdef') assert (anno.labels[0].exact_id == created['annotations']['id']) assert (found) # also check in stored database DB.loadIntoMemory(1) for annoI in DB.annotations: anno = DB.annotations[annoI] if (anno.guid == newguid): found = True assert (anno.annotationType == AnnotationType.POLYGON) assert (anno.labels[0].exact_id == created['annotations']['id']) # Clean up --> remove all annotations annos = exm.retrieve_annotations(imageid) for anno in annos: exm.delete_annotation(anno['id'], keep_deleted_element=False) # All gone assert (len(exm.retrieve_annotations(imageid)) == 0) # Now delete image exm.delete_image(imageid) os.remove(imagename) exm.terminate()
def calculate_F1_fromCSV(databasefile, csv_directory, hotclass=1, verbose=False): DB = Database() DB = DB.open(databasefile) import os import csv result_boxes = {} for root, dirs, files in os.walk(csv_directory): if len(dirs) > 0: continue slidenr = int(root.split('/')[-1]) result_boxes['%02d_test.tif' % slidenr] = [] f = open(root + '/' + files[0], 'r') for y, x in csv.reader(f, delimiter=','): result_boxes['%02d_test.tif' % slidenr].append([int(x), int(y), 1]) sTP, sFN, sFP = 0, 0, 0 F1dict = dict() sP = 0 print('Calculating F1 for test set of %d files' % len(result_boxes), ':', result_boxes.keys()) mitcount = DB.execute( f'SELECT COUNT(*) FROM Annotations where agreedClass={hotclass}' ).fetchall() print('Official count of mitotic figures in DB: ', mitcount) slideids = [] for resfile in result_boxes: boxes = np.array(result_boxes[resfile]) TP, FP, FN, F1 = 0, 0, 0, 0 slide_id = DB.findSlideWithFilename(resfile, '') slideids.append(str(slide_id)) DB.loadIntoMemory(slide_id) annoList = [] for annoI in DB.annotations: anno = DB.annotations[annoI] if anno.agreedClass == hotclass: annoList.append([anno.x1, anno.y1]) centers_DB = np.array(annoList) if boxes.shape[0] > 0: score = boxes[:, -1] F1, TP, FP, FN = _F1_core(centers_DB, boxes, score, 0) if (centers_DB.shape[0] != TP + FN): print(resfile, centers_DB.shape[0], TP + FN) else: # no detections --> missed all FN = centers_DB.shape[0] if (verbose): print(f'{resfile}: F1:{F1}, TP:{TP}, FP:{FP}, FN:{FN}') sTP += TP sFP += FP sP += centers_DB.shape[0] sFN += FN F1dict[resfile] = F1 print('Overall: ') sF1 = 2 * sTP / (2 * sTP + sFP + sFN) print('F1: ', sF1) print('Precision: %.3f ' % (sTP / (sTP + sFP))) print('Recall: %.3f' % (sTP / (sTP + sFN))) return sF1, F1dict
def optimize_threshold(databasefile, result_boxes=None, resfile=None, hotclass=2, minthres=0.5): DB = Database() DB = DB.open(databasefile) if (result_boxes is None): if resfile is None: raise ValueError( 'At least one of resfile/result_boxes must be given') if (resfile[-3:] == 'bz2'): f = bz2.BZ2File(resfile, 'rb') else: f = open(resfile, 'rb') result_boxes = pickle.load(f) sTP, sFN, sFP = 0, 0, 0 F1dict = dict() MIN_THR = minthres result_boxes = nms(result_boxes, MIN_THR) TPd, FPd, FNd, F1d = dict(), dict(), dict(), dict() thresholds = np.arange(MIN_THR, 0.99, 0.01) print( 'Optimizing threshold for validation set of %d files: ' % len(result_boxes.keys()), ','.join(list(result_boxes.keys()))) for resfile in result_boxes: boxes = np.array(result_boxes[resfile]) TP, FP, FN = 0, 0, 0 TPd[resfile] = list() FPd[resfile] = list() FNd[resfile] = list() F1d[resfile] = list() if (boxes.shape[0] > 0): score = boxes[:, -1] DB.loadIntoMemory(DB.findSlideWithFilename(resfile, '')) # perform NMS on detections annoList = [] for annoI in DB.annotations: anno = DB.annotations[annoI] if anno.agreedClass == hotclass: annoList.append([anno.x1, anno.y1]) centers_DB = np.array(annoList) for det_thres in thresholds: F1, TP, FP, FN = _F1_core(centers_DB, boxes, score, det_thres) TPd[resfile] += [TP] FPd[resfile] += [FP] FNd[resfile] += [FN] F1d[resfile] += [F1] else: for det_thres in thresholds: TPd[resfile] += [0] FPd[resfile] += [0] FNd[resfile] += [0] F1d[resfile] += [0] F1 = 0 F1dict[resfile] = F1 allTP = np.zeros(len(thresholds)) allFP = np.zeros(len(thresholds)) allFN = np.zeros(len(thresholds)) allF1 = np.zeros(len(thresholds)) allF1M = np.zeros(len(thresholds)) for k in range(len(thresholds)): allTP[k] = np.sum([TPd[x][k] for x in result_boxes]) allFP[k] = np.sum([FPd[x][k] for x in result_boxes]) allFN[k] = np.sum([FNd[x][k] for x in result_boxes]) allF1[k] = 2 * allTP[k] / (2 * allTP[k] + allFP[k] + allFN[k]) allF1M[k] = np.mean([F1d[x][k] for x in result_boxes]) print('Best threshold: F1=', np.max(allF1), 'Threshold=', thresholds[np.argmax(allF1)]) return thresholds[np.argmax(allF1)], allF1, thresholds
def calculate_F1(databasefile, result_boxes=None, resfile=None, det_thres=0.5, hotclass=2, verbose=False): DB = Database() DB = DB.open(databasefile) if (result_boxes is None): if resfile is None: raise ValueError( 'At least one of resfile/result_boxes must be given') if (resfile[-3:] == 'bz2'): f = bz2.BZ2File(resfile, 'rb') else: f = open(resfile, 'rb') result_boxes = pickle.load(f) sTP, sFN, sFP = 0, 0, 0 F1dict = dict() sP = 0 result_boxes = nms(result_boxes, det_thres) print('Calculating F1 for test set of %d files' % len(result_boxes), ':', result_boxes.keys()) mitcount = DB.execute( f'SELECT COUNT(*) FROM Annotations where agreedClass={hotclass}' ).fetchall() print('Official count of mitotic figures in DB: ', mitcount) slideids = [] for resfile in result_boxes: boxes = np.array(result_boxes[resfile]) TP, FP, FN, F1 = 0, 0, 0, 0 slide_id = DB.findSlideWithFilename(resfile, '') slideids.append(str(slide_id)) DB.loadIntoMemory(slide_id) annoList = [] for annoI in DB.annotations: anno = DB.annotations[annoI] if anno.agreedClass == hotclass: annoList.append([anno.x1, anno.y1]) centers_DB = np.array(annoList) if boxes.shape[0] > 0: score = boxes[:, -1] F1, TP, FP, FN = _F1_core(centers_DB, boxes, score, det_thres) if (centers_DB.shape[0] != TP + FN): print(resfile, centers_DB.shape[0], TP + FN) else: # no detections --> missed all FN = centers_DB.shape[0] if (verbose): print(f'{resfile}: F1:{F1}, TP:{TP}, FP:{FP}, FN:{FN}') sTP += TP sFP += FP sP += centers_DB.shape[0] sFN += FN F1dict[resfile] = F1 print('Overall: ') sF1 = 2 * sTP / (2 * sTP + sFP + sFN) print('F1: ', sF1) print('Precision: %.3f ' % (sTP / (sTP + sFP))) print('Recall: %.3f' % (sTP / (sTP + sFN))) return sF1, F1dict
from SlideRunner.dataAccess.database import Database import SlideRunner.dataAccess.annotations as annotations import openslide import os import numpy as np import sys if len(sys.argv) < 2: print('syntax:', sys.argv[0], '<area in WSI>') else: hpf = int(sys.argv[1]) os.system('mkdir -p %dHPF' % hpf) DB = Database() DB.open('../databases/MITOS_WSI_CCMCT_ODAEL.sqlite') DBRK = Database() DBRK.open('databases/HighMCAreas.sqlite') os.system( 'cp ../databases/MITOS_WSI_CCMCT_ODAEL.sqlite MITOS_WSI_CCMCT_ODAEL_%dHPF.sqlite' % hpf) DBnew = Database() DBnew.open('MITOS_WSI_CCMCT_ODAEL_%dHPF.sqlite' % hpf) DBnew.execute('ATTACH `../databases/MITOS_WSI_CCMCT_ODAEL.sqlite` as orig') for uid, filename in DB.listOfSlides(): print(uid, filename)