Beispiel #1
0
import numpy as np
import SlideRunner.general.dependencies
from SlideRunner.dataAccess.database import Database
import os
import openslide
import sqlite3
import cv2

import sys

if (len(sys.argv) < 2) or (sys.argv[1] not in ['3', '6', '12']):
    print('syntax: exportDataset_ODAEL_xWSI.py x')
    print('   where x is the number of WSI to export (3,6,12)')
    exit()

DB = Database()

basepath = '../../WSI/'
patchSize = 128

os.system('mkdir -p DataODAEL_%sWSI' % sys.argv[1])

dirs = ['Mitosis', 'Mitosislike', 'Tumorcells', 'Granulocytes']
for k in dirs:
    os.system('mkdir -p DataODAEL_%sWSI/train/%s' % (sys.argv[1], k))
    os.system('mkdir -p DataODAEL_%sWSI/test/%s' % (sys.argv[1], k))


def listOfSlides(DB):
    DB.execute('SELECT uid,filename from Slides')
    return DB.fetchall()
Beispiel #2
0
from SlideRunner.dataAccess.database import Database

DB = Database()

slidelist_test = ['27', '30', '31', '6', '18', '20', '1', '2', '3', '9', '11']
clause = ','.join(slidelist_test)

files = [
    'MITOS_WSI_CCMCT_ODAEL_50HPF.sqlite', 'MITOS_WSI_CCMCT_ODAEL_10HPF.sqlite',
    'MITOS_WSI_CCMCT_ODAEL_5HPF.sqlite', 'MITOS_WSI_CCMCT_ODAEL_12WSI.sqlite',
    'MITOS_WSI_CCMCT_ODAEL_6WSI.sqlite', 'MITOS_WSI_CCMCT_ODAEL_3WSI.sqlite',
    'MITOS_WSI_CCMCT_ODAEL.sqlite'
]

for f in files:
    DB.open('databases/' + f)

    cnt = DB.execute(
        'SELECT COUNT(*) FROM Annotations where agreedClass==2 and slide not in (%s)'
        % clause).fetchone()[0]

    print('%40s: %d mitotic figures in training set' % (f, cnt))

DB.open(files[-1])
slidelist_train = [
    s[0] for s in DB.execute(
        'SELECT Slides.filename, count(*) as cnt FROM Slides left join Annotations on Annotations.slide == Slides.uid where Slides.uid not in (%s) and Annotations.agreedClass==2 group by slide order by cnt asc'
        % clause).fetchall()
]

slidelist_cnt = {
Beispiel #3
0
# Export Dataset (patch-wise) to learn classifier easier


import numpy as np 
import SlideRunner.general.dependencies
from SlideRunner.dataAccess.database import Database
from SlideRunner.dataAccess.annotations import ViewingProfile
import os
import openslide
import sqlite3
import cv2
import sys

DB = Database()

basepath='../WSI/'
patchSize=128

os.system('mkdir -p DataODAEL')

dirs = ['Mitosis', 'Mitosislike', 'Tumorcells', 'Granulocytes']
for k in dirs:
    os.system('mkdir -p DataODAEL/train/%s' % (k))
    os.system('mkdir -p DataODAEL/test/%s' % (k))

def listOfSlides(DB):
    DB.execute('SELECT uid,filename from Slides')
    return DB.fetchall()
test_slide_filenames = ['be10fa37ad6e88e1f406.svs',
                        'f3741e764d39ccc4d114.svs',
                        'c86cd41f96331adf3856.svs',
if (len(sys.argv) < 5):
    print(
        'Syntax: Inference-RetinaNet-var.py ModelSavepath.pkl Database.sqlite DatasetName val/test [SlideDir]'
    )
    exit()

fname = 'RetinaNet-CMC-CODAEL-512sh-repetition1.pkl'

if len(sys.argv) > 1:
    fname = sys.argv[1]

size = 512
path = Path('./')

database = Database()
database.open(str(
    sys.argv[2]))  #Slides_Mitosis_final_checked_cleaned.sqlite'))
slidedir = 'WSI' if len(sys.argv) < 5 else sys.argv[5]
datasetname = sys.argv[3]

size = 512
level = 0

files = []

# In[3]:

test_slide_filenames = [
    'be10fa37ad6e88e1f406.svs', 'f3741e764d39ccc4d114.svs',
    'c86cd41f96331adf3856.svs', '552c51bfb88fd3e65ffe.svs',
Beispiel #5
0
def test_pushannos():
    imageset = 1
    product_id = 1

    exm = ExactManager('sliderunner_unittest', 'unittestpw',
                       EXACT_UNITTEST_URL)

    randstr = ''.join(
        ['{:02x}'.format(x) for x in np.random.randint(0, 255, 6)])
    imagename = f'dummy{randstr}.tiff'

    # generate dummy image
    dummy = np.random.randint(0, 255, (200, 200, 3))
    cv2.imwrite(imagename, dummy)

    exm.upload_image_to_imageset(imageset_id=imageset, filename=imagename)

    imageset_details = exm.retrieve_imageset(imageset)
    #        print(imageset_details)
    for imset in imageset_details['images']:
        if (imset['name'] == imagename):
            imageid = imset['id']

    DB = Database().create(':memory:')
    #    DB = Database().create('test.sqlite')
    # Add slide to database
    slideuid = DB.insertNewSlide(imagename, '')
    DB.insertClass('BB')
    DB.insertAnnotator('sliderunner_unittest')
    DB.insertAnnotator(
        'otherexpert')  # we will only send annotations of the marked expert
    DB.insertClass('POLY')

    coords = np.array([[100, 200], [150, 220], [180, 250]])

    DB.insertNewPolygonAnnotation(annoList=coords,
                                  slideUID=1,
                                  classID=2,
                                  annotator=1)

    coords = np.array([[150, 250], [350, 220], [0, 250]])
    DB.insertNewPolygonAnnotation(annoList=coords,
                                  slideUID=1,
                                  classID=2,
                                  annotator=1)

    coords = np.array([[150, 255], [350, 210], [50, 250]])
    DB.insertNewPolygonAnnotation(annoList=coords,
                                  slideUID=1,
                                  classID=2,
                                  annotator=2)

    DB.setExactPerson(1)
    #empty image
    annos = exm.retrieve_annotations(imageid)
    for anno in annos:
        exm.delete_annotation(anno['id'], keep_deleted_element=False)


#    for anno in exm.retrieve_annotations(imageid):
#        print(anno)
# All annotations have been removed
    assert (len(exm.retrieve_annotations(imageid)) == 0)

    exm.sync(imageid,
             imageset_id=imageset,
             product_id=product_id,
             slideuid=slideuid,
             database=DB)

    # Only 2 annotations have been inserted
    assert (len(exm.retrieve_annotations(imageid)) == 2)

    uuids = [x['unique_identifier'] for x in exm.retrieve_annotations(imageid)]
    # All were created with correct guid
    for dbanno in list(DB.annotations.keys())[:-1]:
        assert (DB.annotations[dbanno].guid in uuids)

    print('--- resync ---')

    # Sync again
    exm.sync(imageid,
             imageset_id=imageset,
             product_id=product_id,
             slideuid=slideuid,
             database=DB)

    # No change
    assert (len(exm.retrieve_annotations(imageid)) == 2)

    # All were created with correct guid
    uuids = [x['unique_identifier'] for x in exm.retrieve_annotations(imageid)]
    for dbanno in list(DB.annotations.keys())[:-1]:
        assert (DB.annotations[dbanno].guid in uuids)

    print('--- local update created ---')

    # Now let's create a local update - keep same exact_id (crucial!)
    DB.loadIntoMemory(1)
    DB.setAnnotationLabel(classId=1,
                          person=1,
                          annoIdx=1,
                          entryId=DB.annotations[1].labels[0].uid,
                          exact_id=DB.annotations[1].labels[0].exact_id)

    # Sync again
    exm.sync(imageid,
             imageset_id=imageset,
             product_id=product_id,
             slideuid=slideuid,
             database=DB)

    # check if remote has been updated
    annos = exm.retrieve_annotations(imageid)
    for anno in annos:
        if (anno['id'] == DB.annotations[1].labels[0].exact_id):
            assert (anno['annotation_type']['name'] == 'BB')
            annotype_id = anno['annotation_type']['id']

    # Now update remotely and see if changes are reflected
    newguid = str(uuid.uuid4())
    created = exm.create_annotation(image_id=imageid,
                                    annotationtype_id=annotype_id,
                                    vector=[[90, 80], [20, 30]],
                                    last_modified=time.time(),
                                    guid=newguid,
                                    description='abcdef')

    exm.sync(imageid,
             imageset_id=imageset,
             product_id=product_id,
             slideuid=slideuid,
             database=DB)
    found = False
    for annoI in DB.annotations:
        anno = DB.annotations[annoI]
        if (anno.guid == newguid):
            found = True
            assert (anno.annotationType == AnnotationType.POLYGON)
            assert (anno.text == 'abcdef')
            assert (anno.labels[0].exact_id == created['annotations']['id'])

    assert (found)

    # also check in stored database
    DB.loadIntoMemory(1)
    for annoI in DB.annotations:
        anno = DB.annotations[annoI]
        if (anno.guid == newguid):
            found = True
            assert (anno.annotationType == AnnotationType.POLYGON)
            assert (anno.labels[0].exact_id == created['annotations']['id'])

    # Clean up --> remove all annotations
    annos = exm.retrieve_annotations(imageid)
    for anno in annos:
        exm.delete_annotation(anno['id'], keep_deleted_element=False)

    # All gone
    assert (len(exm.retrieve_annotations(imageid)) == 0)

    # Now delete image
    exm.delete_image(imageid)

    os.remove(imagename)
    exm.terminate()
def calculate_F1_fromCSV(databasefile,
                         csv_directory,
                         hotclass=1,
                         verbose=False):

    DB = Database()
    DB = DB.open(databasefile)
    import os
    import csv

    result_boxes = {}
    for root, dirs, files in os.walk(csv_directory):
        if len(dirs) > 0: continue
        slidenr = int(root.split('/')[-1])
        result_boxes['%02d_test.tif' % slidenr] = []
        f = open(root + '/' + files[0], 'r')
        for y, x in csv.reader(f, delimiter=','):
            result_boxes['%02d_test.tif' % slidenr].append([int(x), int(y), 1])

    sTP, sFN, sFP = 0, 0, 0
    F1dict = dict()
    sP = 0

    print('Calculating F1 for test set of %d files' % len(result_boxes), ':',
          result_boxes.keys())
    mitcount = DB.execute(
        f'SELECT COUNT(*) FROM Annotations where agreedClass={hotclass}'
    ).fetchall()
    print('Official count of mitotic figures in DB: ', mitcount)

    slideids = []

    for resfile in result_boxes:
        boxes = np.array(result_boxes[resfile])

        TP, FP, FN, F1 = 0, 0, 0, 0
        slide_id = DB.findSlideWithFilename(resfile, '')
        slideids.append(str(slide_id))
        DB.loadIntoMemory(slide_id)

        annoList = []
        for annoI in DB.annotations:
            anno = DB.annotations[annoI]
            if anno.agreedClass == hotclass:
                annoList.append([anno.x1, anno.y1])

        centers_DB = np.array(annoList)

        if boxes.shape[0] > 0:
            score = boxes[:, -1]

            F1, TP, FP, FN = _F1_core(centers_DB, boxes, score, 0)
            if (centers_DB.shape[0] != TP + FN):
                print(resfile, centers_DB.shape[0], TP + FN)
        else:  # no detections --> missed all
            FN = centers_DB.shape[0]

        if (verbose):
            print(f'{resfile}: F1:{F1}, TP:{TP}, FP:{FP}, FN:{FN}')

        sTP += TP
        sFP += FP
        sP += centers_DB.shape[0]
        sFN += FN
        F1dict[resfile] = F1

    print('Overall: ')
    sF1 = 2 * sTP / (2 * sTP + sFP + sFN)
    print('F1: ', sF1)
    print('Precision: %.3f ' % (sTP / (sTP + sFP)))
    print('Recall: %.3f' % (sTP / (sTP + sFN)))

    return sF1, F1dict
def optimize_threshold(databasefile,
                       result_boxes=None,
                       resfile=None,
                       hotclass=2,
                       minthres=0.5):

    DB = Database()
    DB = DB.open(databasefile)

    if (result_boxes is None):
        if resfile is None:
            raise ValueError(
                'At least one of resfile/result_boxes must be given')

    if (resfile[-3:] == 'bz2'):
        f = bz2.BZ2File(resfile, 'rb')
    else:
        f = open(resfile, 'rb')

    result_boxes = pickle.load(f)

    sTP, sFN, sFP = 0, 0, 0
    F1dict = dict()

    MIN_THR = minthres

    result_boxes = nms(result_boxes, MIN_THR)
    TPd, FPd, FNd, F1d = dict(), dict(), dict(), dict()
    thresholds = np.arange(MIN_THR, 0.99, 0.01)

    print(
        'Optimizing threshold for validation set of %d files: ' %
        len(result_boxes.keys()), ','.join(list(result_boxes.keys())))

    for resfile in result_boxes:
        boxes = np.array(result_boxes[resfile])

        TP, FP, FN = 0, 0, 0
        TPd[resfile] = list()
        FPd[resfile] = list()
        FNd[resfile] = list()
        F1d[resfile] = list()

        if (boxes.shape[0] > 0):
            score = boxes[:, -1]

            DB.loadIntoMemory(DB.findSlideWithFilename(resfile, ''))

            # perform NMS on detections

            annoList = []
            for annoI in DB.annotations:
                anno = DB.annotations[annoI]
                if anno.agreedClass == hotclass:
                    annoList.append([anno.x1, anno.y1])

            centers_DB = np.array(annoList)

            for det_thres in thresholds:
                F1, TP, FP, FN = _F1_core(centers_DB, boxes, score, det_thres)
                TPd[resfile] += [TP]
                FPd[resfile] += [FP]
                FNd[resfile] += [FN]
                F1d[resfile] += [F1]
        else:
            for det_thres in thresholds:
                TPd[resfile] += [0]
                FPd[resfile] += [0]
                FNd[resfile] += [0]
                F1d[resfile] += [0]
            F1 = 0

        F1dict[resfile] = F1

    allTP = np.zeros(len(thresholds))
    allFP = np.zeros(len(thresholds))
    allFN = np.zeros(len(thresholds))
    allF1 = np.zeros(len(thresholds))
    allF1M = np.zeros(len(thresholds))

    for k in range(len(thresholds)):
        allTP[k] = np.sum([TPd[x][k] for x in result_boxes])
        allFP[k] = np.sum([FPd[x][k] for x in result_boxes])
        allFN[k] = np.sum([FNd[x][k] for x in result_boxes])
        allF1[k] = 2 * allTP[k] / (2 * allTP[k] + allFP[k] + allFN[k])
        allF1M[k] = np.mean([F1d[x][k] for x in result_boxes])

    print('Best threshold: F1=', np.max(allF1), 'Threshold=',
          thresholds[np.argmax(allF1)])

    return thresholds[np.argmax(allF1)], allF1, thresholds
def calculate_F1(databasefile,
                 result_boxes=None,
                 resfile=None,
                 det_thres=0.5,
                 hotclass=2,
                 verbose=False):

    DB = Database()
    DB = DB.open(databasefile)

    if (result_boxes is None):
        if resfile is None:
            raise ValueError(
                'At least one of resfile/result_boxes must be given')

    if (resfile[-3:] == 'bz2'):
        f = bz2.BZ2File(resfile, 'rb')
    else:
        f = open(resfile, 'rb')

    result_boxes = pickle.load(f)

    sTP, sFN, sFP = 0, 0, 0
    F1dict = dict()
    sP = 0

    result_boxes = nms(result_boxes, det_thres)

    print('Calculating F1 for test set of %d files' % len(result_boxes), ':',
          result_boxes.keys())
    mitcount = DB.execute(
        f'SELECT COUNT(*) FROM Annotations where agreedClass={hotclass}'
    ).fetchall()
    print('Official count of mitotic figures in DB: ', mitcount)

    slideids = []

    for resfile in result_boxes:
        boxes = np.array(result_boxes[resfile])

        TP, FP, FN, F1 = 0, 0, 0, 0
        slide_id = DB.findSlideWithFilename(resfile, '')
        slideids.append(str(slide_id))
        DB.loadIntoMemory(slide_id)

        annoList = []
        for annoI in DB.annotations:
            anno = DB.annotations[annoI]
            if anno.agreedClass == hotclass:
                annoList.append([anno.x1, anno.y1])

        centers_DB = np.array(annoList)

        if boxes.shape[0] > 0:
            score = boxes[:, -1]

            F1, TP, FP, FN = _F1_core(centers_DB, boxes, score, det_thres)
            if (centers_DB.shape[0] != TP + FN):
                print(resfile, centers_DB.shape[0], TP + FN)
        else:  # no detections --> missed all
            FN = centers_DB.shape[0]

        if (verbose):
            print(f'{resfile}: F1:{F1}, TP:{TP}, FP:{FP}, FN:{FN}')

        sTP += TP
        sFP += FP
        sP += centers_DB.shape[0]
        sFN += FN
        F1dict[resfile] = F1

    print('Overall: ')
    sF1 = 2 * sTP / (2 * sTP + sFP + sFN)
    print('F1: ', sF1)
    print('Precision: %.3f ' % (sTP / (sTP + sFP)))
    print('Recall: %.3f' % (sTP / (sTP + sFN)))

    return sF1, F1dict
Beispiel #9
0
from SlideRunner.dataAccess.database import Database
import SlideRunner.dataAccess.annotations as annotations
import openslide
import os
import numpy as np
import sys

if len(sys.argv) < 2:
    print('syntax:', sys.argv[0], '<area in WSI>')
else:
    hpf = int(sys.argv[1])

os.system('mkdir -p %dHPF' % hpf)
DB = Database()
DB.open('../databases/MITOS_WSI_CCMCT_ODAEL.sqlite')

DBRK = Database()
DBRK.open('databases/HighMCAreas.sqlite')

os.system(
    'cp ../databases/MITOS_WSI_CCMCT_ODAEL.sqlite MITOS_WSI_CCMCT_ODAEL_%dHPF.sqlite'
    % hpf)
DBnew = Database()
DBnew.open('MITOS_WSI_CCMCT_ODAEL_%dHPF.sqlite' % hpf)

DBnew.execute('ATTACH `../databases/MITOS_WSI_CCMCT_ODAEL.sqlite` as orig')

for uid, filename in DB.listOfSlides():

    print(uid, filename)