def main(args): # baseimgs = sorted(glob.glob('{}/*img.jpg'.format(args.s))) # predictions = sorted(glob.glob('{}/*{}'.format(args.p, args.r))) # baseimgs, predictions = matching_basenames(baseimgs, predictions) if args.color == 'cubehelix': colors = sns.cubehelix_palette(args.n_colors) else: try: colors = sns.color_palette(args.color, n_colors=args.n_colors) except Exception as e: traceback.print_tb(e.__traceback__) print('{} not a valid seaborn colormap name'.format(args.color)) print('Defaulting to "RdBu_r"') colors = sns.color_palette("RdBu_r", n_colors=args.n_colors) mixture = [0.3, 0.7] with open(args.s, 'r') as f: baseimgs = [x.strip() for x in f] with open(args.p, 'r') as f: predictions = [x.strip() for x in f] for bi, pr in zip(baseimgs, predictions): dst = repext(pr, args.suffix) if os.path.exists(dst): print('Exists {}'.format(dst)) continue combo = overlay_img(bi, pr, mixture, colors, dst) print('{} --> {}'.format(combo.shape, dst)) cv2.imwrite(dst, combo)
def main(args): # these are loaded in order with open(args.slides, 'r') as f: slidelist = [l.strip() for l in f] with open(args.probs, 'r') as f: problist = [l.strip() for l in f] colors = define_colors(args.colorname, args.n_colors, add_white=True, shuffle=False) print(colors) idx = 0 for slide, prob in zip(slidelist, problist): dst = repext(prob, '.ovr.jpg') if os.path.exists(dst): print('{} Exists.'.format(dst)) continue print(slide, '-->', dst) ret = overlay_img(slide, prob, colors, mixture=[0.3, 0.7]) cv2.imwrite(dst, ret) idx += 1 if idx % 10 == 0: print(idx)
def main(args): with open(args.lst, 'r') as f: srclist = [x.strip() for x in f] for src in srclist: dst = repext(src, args.suffix) if os.path.exists(dst): print('Exists', src, '-->', dst) continue # Loading data from ramdisk incurs a one-time copy cost rdsrc = cpramdisk(src, args.ramdisk) print('File:', rdsrc) try: slide = Slide(src, args) slide.initialize_output('wsi', 3, mode='full', compute_fn=compute_fn) ret = slide.compute('wsi', args) print('Saving {} --> {}'.format(ret.shape, dst)) cv2.imwrite(dst, ret) except Exception as e: traceback.print_tb(e.__traceback__) finally: print('Removing {}'.format(rdsrc)) os.remove(rdsrc)
def overlay_img(base, pred, mixture, colors, dst): img = cv2.imread(base) ishape = img.shape[:2][::-1] # Find pure black and white in the img gray = np.mean(img, axis=-1) img_w = gray > 220 img_b = gray < 10 y = np.load(pred) ymin, ymax = y.min(), y.max() if y.shape[-1] == 1: y = np.squeeze(y) # Using a foreground mask : yshape = y.shape[::-1] gray_s = cv2.resize(gray, dsize=yshape, interpolation=cv2.INTER_LINEAR) img_w_s = gray_s > 230 img_b_s = gray_s < 20 background = (img_w_s + img_b_s).astype(np.bool) foreground = np.logical_not(background) print(y.shape, y.dtype) print(background.shape, background.dtype) print(foreground.shape, foreground.dtype) y_fg = y[foreground] y_fg = softmax(y_fg) y[background] = 0. y[foreground] = y_fg print(y.min(), y.max()) print('nnz(y) =', (y != 0).sum(), '/', np.prod(y.shape)) bins = np.linspace(0., y.max(), args.n_colors - 1) ydig = np.digitize(y, bins) # Emphasize sparse positive points ydig = cv2.dilate(ydig.astype(np.uint8), kernel=kernel, iterations=1) savehist(ydig, colors, dst) print('ydig', np.unique(ydig)) ydig_dst = repext(dst, '.ydig.png') print(ydig.shape, '-->', ydig_dst) cv2.imwrite(ydig_dst, ydig * (255. / args.n_colors)) ydig = cv2.resize(ydig, fx=0, fy=0, dsize=ishape, interpolation=cv2.INTER_NEAREST) # Find unprocessed space # ymax[np.sum(y, axis=-1) < 1e-2] = 4 # white ycolor = color_mask(ydig, colors) img = np.add(img * mixture[0], ycolor * mixture[1]) # Whiten the background # channels = np.split(img, 3, axis=-1) # for c in channels: # c[img_w] = 255 # c[img_b] = 255 # img = np.dstack(channels) return cv2.convertScaleAbs(img)
def savehist(ydig, colors, dst): plt.clf() hist = repext(dst, '.hist.png') print('hist --> {}'.format(hist)) N, bins, patches = plt.hist(ydig.ravel(), bins=50, log=False, density=True) for k in range(50): patches[k].set_facecolor(colors[k]) plt.savefig(hist, bbox_inches='tight')
def overlay_img(base, pred, mixture, colors, dst): img = cv2.imread(base) ishape = img.shape[:2][::-1] # Find pure black and white in the img gray = np.mean(img, axis=-1) img_w = gray > 220 img_b = gray < 10 y = np.load(pred) y = y[:, :, args.c] ymin, ymax = y.min(), y.max() if y.shape[-1] == 1: y = np.squeeze(y) bins = np.linspace(0., 1., args.n_colors - 1) ydig = np.digitize(y, bins) # Emphasize sparse positive points ydig = cv2.dilate(ydig.astype(np.uint8), kernel=kernel, iterations=1) img_w_s = cv2.resize(gray, dsize=(ydig.shape[::-1])) > 220 ydig_nowhite = ydig[np.logical_not(img_w_s)] savehist(ydig_nowhite, colors, dst) ydig_dst = repext(dst, '.ydig.png') print(ydig.shape, '-->', ydig_dst) cv2.imwrite(ydig_dst, ydig * (255. / args.n_colors)) ydig = cv2.resize(ydig, fx=0, fy=0, dsize=ishape, interpolation=cv2.INTER_NEAREST) ycolor = color_mask(ydig, colors) img = np.add(img * mixture[0], ycolor * mixture[1]) # Whiten the background channels = np.split(img, 3, axis=-1) for c in channels: c[img_w] = 255 c[img_b] = 255 img = np.dstack(channels) return cv2.convertScaleAbs(img)
def main(args): # Define a compute_fn that should do three things: # 1. define an iterator over the slide's tiles # 2. compute an output with given model parameter # 3. if args.iter_type == 'python': def compute_fn(slide, args, model=None): print('Slide with {}'.format(len(slide.tile_list))) it_factory = PythonIterator(slide, args) for k, (img, idx) in enumerate(it_factory.yield_batch()): prob = model(img) if k % 50 == 0: print('Batch #{:04d} idx:{} img:{} prob:{}'.format( k, idx.shape, img.shape, prob.shape)) slide.place_batch(prob, idx, 'prob', mode='tile') ret = slide.output_imgs['prob'] return ret # Tensorflow multithreaded queue-based iterator (in eager mode) elif args.iter_type == 'tf': def compute_fn(slide, args, model=None): assert tf.executing_eagerly() print('Slide with {}'.format(len(slide.tile_list))) # In eager mode, we return a tf.contrib.eager.Iterator eager_iterator = TensorflowIterator(slide, args).make_iterator() # The iterator can be used directly. Ququeing and multithreading # are handled in the backend by the tf.data.Dataset ops features, indices = [], [] for k, (img, idx) in enumerate(eager_iterator): # img = tf.expand_dims(img, axis=0) features.append( model.encode_bag(img, training=False, return_z=True)) indices.append(idx.numpy()) img, idx = img.numpy(), idx.numpy() if k % 50 == 0: print('Batch #{:04d}\t{}'.format(k, img.shape)) features = tf.concat(features, axis=0) z_att, att = model.mil_attention(features, training=False, return_raw_att=True) att = np.squeeze(att) indices = np.concatenate(indices) slide.place_batch(att, indices, 'att', mode='tile') ret = slide.output_imgs['att'] return ret # Set up the model first encoder_args = get_encoder_args(args.encoder) model = MilkEager(encoder_args=encoder_args, mil_type=args.mil, deep_classifier=args.deep_classifier, batch_size=args.batchsize, temperature=args.temperature, heads=args.heads) x = tf.zeros((1, 1, args.process_size, args.process_size, 3)) _ = model(x, verbose=True, head='all', training=True) model.load_weights(args.snapshot, by_name=True) # keras Model subclass model.summary() # Read list of inputs with open(args.slides, 'r') as f: slides = [x.strip() for x in f] # Loop over slides for src in slides: # Dirty substitution of the file extension give us the # destination. Do this first so we can just skip the slide # if this destination already exists. # Set the --suffix option to reflect the model / type of processed output dst = repext(src, args.suffix) # Loading data from ramdisk incurs a one-time copy cost rdsrc = cpramdisk(src, args.ramdisk) print('File:', rdsrc) # Wrapped inside of a try-except-finally. # We want to make sure the slide gets cleaned from # memory in case there's an error or stop signal in the # middle of processing. try: # Initialze the side from our temporary path, with # the arguments passed in from command-line. # This returns an svsutils.Slide object slide = Slide(rdsrc, args) # This step will eventually be included in slide creation # with some default compute_fn's provided by svsutils # For now, do it case-by-case, and use the compute_fn # that we defined just above. slide.initialize_output('att', args.n_classes, mode='tile', compute_fn=compute_fn) # Call the compute function to compute this output. # Again, this may change to something like... # slide.compute_all # which would loop over all the defined output types. ret = slide.compute('att', args, model=model) print('{} --> {}'.format(ret.shape, dst)) np.save(dst, ret[:, :, ::-1]) except Exception as e: print(e) traceback.print_tb(e.__traceback__) finally: print('Removing {}'.format(rdsrc)) os.remove(rdsrc)
def main(args): # Define a compute_fn that should do three things: # 1. define an iterator over the slide's tiles # 2. compute an output with a given model / arguments # 3. return a reconstructed slide def compute_fn(slide, args, model=None, n_dropout=10 ): assert tf.executing_eagerly() print('Slide with {}'.format(len(slide.tile_list))) # In eager mode, we return a tf.contrib.eager.Iterator eager_iterator = TensorflowIterator(slide, args).make_iterator() # The iterator can be used directly. Ququeing and multithreading # are handled in the backend by the tf.data.Dataset ops features, indices = [], [] for k, (img, idx) in enumerate(eager_iterator): # img = tf.expand_dims(img, axis=0) features.append( model.encode_bag(img, training=False, return_z=True) ) indices.append(idx.numpy()) img, idx = img.numpy(), idx.numpy() if k % 50 == 0: print('Batch #{:04d}\t{}'.format(k, img.shape)) features = tf.concat(features, axis=0) ## Sample-dropout # features = features.numpy() # print(features.shape) # n_instances = features.shape[0] # att = np.zeros(n_instances) # n_choice = int(n_instances * 0.7) # all_heads = list(range(args.heads)) # for j in range(n_dropout): # idx = np.random.choice(range(n_instances), n_choice, replace=False) # print(idx) # fdrop = features[idx, :] z_att, att = model.mil_attention(features, training=False, return_raw_att=True) # att[idx] += np.squeeze(attdrop) yhat_multihead = model.apply_classifier(z_att, heads=all_heads, training=False) print('yhat mean {}'.format(np.mean(yhat_multihead, axis=0))) indices = np.concatenate(indices) att = np.squeeze(att) slide.place_batch(att, indices, 'att', mode='tile') ret = slide.output_imgs['att'] print('Got attention image: {}'.format(ret.shape)) return ret, features.numpy() ## Begin main script: # Set up the model first encoder_args = get_encoder_args(args.encoder) model = MilkEager(encoder_args=encoder_args, mil_type=args.mil, deep_classifier=args.deep_classifier, batch_size=args.batchsize, temperature=args.temperature, heads = args.heads) x = tf.zeros((1, 1, args.process_size, args.process_size, 3)) all_heads = [0,1,2,3,4,5,6,7,8,9] _ = model(x, verbose=True, heads=all_heads, training=True) model.load_weights(args.snapshot, by_name=True) # keras Model subclass model.summary() # Read list of inputs with open(args.slides, 'r') as f: slides = [x.strip() for x in f] # Loop over slides for src in slides: # Dirty substitution of the file extension give us the # destination. Do this first so we can just skip the slide # if this destination already exists. # Set the --suffix option to reflect the model / type of processed output dst = repext(src, args.suffix) featdst = repext(src, args.suffix+'.feat.npy') # Loading data from ramdisk incurs a one-time copy cost rdsrc = cpramdisk(src, args.ramdisk) print('\n\nFile:', rdsrc) # Wrapped inside of a try-except-finally. # We want to make sure the slide gets cleaned from # memory in case there's an error or stop signal in the # middle of processing. try: # Initialze the side from our temporary path, with # the arguments passed in from command-line. # This returns an svsutils.Slide object slide = Slide(rdsrc, args) # This step will eventually be included in slide creation # with some default compute_fn's provided by svsutils # For now, do it case-by-case, and use the compute_fn # that we defined just above. slide.initialize_output('att', args.n_classes, mode='tile', compute_fn=compute_fn) # Call the compute function to compute this output. # Again, this may change to something like... # slide.compute_all # which would loop over all the defined output types. ret, features = slide.compute('att', args, model=model) print('{} --> {}'.format(ret.shape, dst)) print('{} --> {}'.format(features.shape, featdst)) np.save(dst, ret) np.save(featdst, features) except Exception as e: print(e) traceback.print_tb(e.__traceback__) finally: print('Removing {}'.format(rdsrc)) os.remove(rdsrc)
def main(args, sess): # Define a compute_fn that should do three things: # 1. define an iterator over the slide's tiles # 2. compute an output with given model parameter # 3. asseble / gather the output # # compute_fn - function can define part of a computation # graph in eager mode -- possibly in graph mode. # We should completely reset the graph each call then # I still don't know how nodes are actually represented in memory # or if keeping them around has a real cost. def compute_fn(slide, args, sess=None): # assert tf.executing_eagerly() print('\n\nSlide with {}'.format(len(slide.tile_list))) # I'm not sure if spinning up new ops every time is bad. # In this example the iterator is separate from the # infernce function, it can also be set up with the two # connected to skip the feed_dict tf_iterator = TensorflowIterator(slide, args).make_iterator() img_op, idx_op = tf_iterator.get_next() # prob_op = model(img_op) # sess.run(tf.global_variables_initializer()) # The iterator can be used directly. Ququeing and multithreading # are handled in the backend by the tf.data.Dataset ops # for k, (img, idx) in enumerate(eager_iterator): k, nk = 0, 0 while True: try: img, idx = sess.run([ img_op, idx_op, ]) prob = model.inference(img) nk += img.shape[0] slide.place_batch(prob, idx, 'prob', mode='full', clobber=True) k += 1 if k % 50 == 0: prstr = 'Batch #{:04d} idx:{} img:{} ({:2.2f}-{:2.2f}) prob:{} T {} \ '.format(k, idx.shape, img.shape, img.min(), img.max(), prob.shape, nk) print(prstr) if args.verbose: print('More info: ') print('img: ', img.dtype, img.min(), img.max(), img.mean()) pmax = np.argmax(prob, axis=-1).ravel() for u in range(args.n_classes): count_u = (pmax == u).sum() print('- class {:02d} : {}'.format(u, count_u)) except tf.errors.OutOfRangeError: print('Finished.') print('Total: {}'.format(nk)) break except Exception as e: print(e) traceback.print_tb(e.__traceback__) break # We've exited the loop. Clean up the iterator del tf_iterator, idx_op, img_op # slide.make_outputs() slide.make_outputs() ret = slide.output_imgs['prob'] return ret # Set up the model first model = gg.get_model(args.model, sess, args.process_size, args.n_classes) # NOTE big time wasted because you have to initialize, # THEN run the restore op to replace the already-created weights sess.run(tf.global_variables_initializer()) model.restore(args.snapshot) # Read list of inputs with open(args.slides, 'r') as f: slides = [x.strip() for x in f] # Loop over slides; Record times nslides = len(slides) successes, ntiles, total_time, fpss = [], [], [], [] for i, src in enumerate(slides): # Dirty substitution of the file extension give us the # destination. Do this first so we can just skip the slide # if this destination already exists. # Set the --suffix option to reflect the model / type of processed output dst = repext(src, args.suffix) if os.path.exists(dst): print('{} Exists.'.format(dst)) continue # Loading data from ramdisk incurs a one-time copy cost rdsrc = cpramdisk(src, args.ramdisk) # Wrapped inside of a try-except-finally. # We want to make sure the slide gets cleaned from # memory in case there's an error or stop signal in the # middle of processing. try: # Initialze the side from our temporary path, with # the arguments passed in from command-line. # This returns an svsutils.Slide object print('\n\n-------------------------------') print('File:', rdsrc, '{:04d} / {:04d}'.format(i, nslides)) t0 = time.time() slide = Slide(rdsrc, args) # This step will eventually be included in slide creation # with some default compute_fn's provided by svsutils # For now, do it case-by-case, and use the compute_fn # that we defined just above. # TODO pull the expected output size from the model.. ? # support common model types - keras, tfmodels, tfhub.. slide.initialize_output('prob', args.n_classes, mode='full', compute_fn=compute_fn) # Call the compute function to compute this output. # Again, this may change to something like... # slide.compute_all # which would loop over all the defined output types. ret = slide.compute('prob', args, sess=sess) print('{} --> {}'.format(ret.shape, dst)) ret = (ret * 255).astype(np.uint8) np.save(dst, ret) # If it finishes, record some stats tend = time.time() deltat = tend - t0 fps = len(slide.tile_list) / float(deltat) successes.append(rdsrc) ntiles.append(len(slide.tile_list)) total_time.append(deltat) fpss.append(fps) except Exception as e: print(e) traceback.print_tb(e.__traceback__) finally: print('Removing {}'.format(rdsrc)) os.remove(rdsrc) try: print('Cleaning slide object') slide.close() del slide except: print('No slide object not found to clean up ?') write_times(args.timefile, successes, ntiles, total_time, fpss)
def main(args, sess): # Define a compute_fn that should do three things: # 1. define an iterator over the slide's tiles # 2. compute an output with given model parameter # 3. # def compute_fn(slide, args, model=None): # print('Slide with {}'.format(len(slide.tile_list))) # it_factory = PythonIterator(slide, args) # for k, (img, idx) in enumerate(it_factory.yield_batch()): # prob = model.predict_on_batch(img) # if k % 50 == 0: # print('Batch #{:04d} idx:{} img:{} prob:{} \ # '.format(k, idx.shape, img.shape, prob.shape)) # slide.place_batch(prob, idx, 'prob', mode='tile') # ret = slide.output_imgs['prob'] # return ret # Tensorflow multithreaded queue-based iterator (in eager mode) # elif args.iter_type == 'tf': def compute_fn(slide, args, sess=None, img_pl=None, prob_op=None): # assert tf.executing_eagerly() print('\n\nSlide with {}'.format(len(slide.tile_list))) # I'm not sure if spinning up new ops every time is bad. tf_iterator = TensorflowIterator(slide, args).make_iterator() img_op, idx_op = tf_iterator.get_next() # prob_op = model(img_op) # sess.run(tf.global_variables_initializer()) # The iterator can be used directly. Ququeing and multithreading # are handled in the backend by the tf.data.Dataset ops # for k, (img, idx) in enumerate(eager_iterator): k, nk = 0, 0 while True: try: img, idx = sess.run([ img_op, idx_op, ]) prob = sess.run(prob_op, {img_pl: img}) nk += img.shape[0] if k % 50 == 0: print('Batch #{:04d} idx:{} img:{} ({}) prob:{} T {} \ '.format(k, idx.shape, img.max(), img.shape, prob.shape, nk)) slide.place_batch(prob, idx, 'prob', mode='tile') k += 1 except tf.errors.OutOfRangeError: print('Finished.') print('Total: {}'.format(nk)) break finally: ret = slide.output_imgs['prob'] return ret # Set up the model first # Set up a placeholder for the input img_pl = tf.placeholder(tf.float32, (None, args.process_size, args.process_size, 3)) model = load_model(args.snapshot) prob_op = model(img_pl) sess.run(tf.global_variables_initializer()) # Read list of inputs with open(args.slides, 'r') as f: slides = [x.strip() for x in f] # Loop over slides for src in slides: # Dirty substitution of the file extension give us the # destination. Do this first so we can just skip the slide # if this destination already exists. # Set the --suffix option to reflect the model / type of processed output dst = repext(src, args.suffix) # Loading data from ramdisk incurs a one-time copy cost rdsrc = cpramdisk(src, args.ramdisk) print('File:', rdsrc) # Wrapped inside of a try-except-finally. # We want to make sure the slide gets cleaned from # memory in case there's an error or stop signal in the # middle of processing. try: # Initialze the side from our temporary path, with # the arguments passed in from command-line. # This returns an svsutils.Slide object print('\n\n-------------------------------') slide = Slide(rdsrc, args) # This step will eventually be included in slide creation # with some default compute_fn's provided by svsutils # For now, do it case-by-case, and use the compute_fn # that we defined just above. slide.initialize_output('prob', 4, mode='tile', compute_fn=compute_fn) # Call the compute function to compute this output. # Again, this may change to something like... # slide.compute_all # which would loop over all the defined output types. ret = slide.compute('prob', args, sess=sess, img_pl=img_pl, prob_op=prob_op) print('{} --> {}'.format(ret.shape, dst)) np.save(dst, ret) except Exception as e: print(e) traceback.print_tb(e.__traceback__) finally: print('Removing {}'.format(rdsrc)) os.remove(rdsrc)