Example #1
0
def main(args):
    # baseimgs = sorted(glob.glob('{}/*img.jpg'.format(args.s)))
    # predictions = sorted(glob.glob('{}/*{}'.format(args.p, args.r)))
    # baseimgs, predictions = matching_basenames(baseimgs, predictions)

    if args.color == 'cubehelix':
        colors = sns.cubehelix_palette(args.n_colors)
    else:
        try:
            colors = sns.color_palette(args.color, n_colors=args.n_colors)
        except Exception as e:
            traceback.print_tb(e.__traceback__)
            print('{} not a valid seaborn colormap name'.format(args.color))
            print('Defaulting to "RdBu_r"')
            colors = sns.color_palette("RdBu_r", n_colors=args.n_colors)

    mixture = [0.3, 0.7]

    with open(args.s, 'r') as f:
        baseimgs = [x.strip() for x in f]

    with open(args.p, 'r') as f:
        predictions = [x.strip() for x in f]

    for bi, pr in zip(baseimgs, predictions):
        dst = repext(pr, args.suffix)
        if os.path.exists(dst):
            print('Exists {}'.format(dst))
            continue

        combo = overlay_img(bi, pr, mixture, colors, dst)
        print('{} --> {}'.format(combo.shape, dst))
        cv2.imwrite(dst, combo)
def main(args):
    # these are loaded in order
    with open(args.slides, 'r') as f:
        slidelist = [l.strip() for l in f]

    with open(args.probs, 'r') as f:
        problist = [l.strip() for l in f]

    colors = define_colors(args.colorname,
                           args.n_colors,
                           add_white=True,
                           shuffle=False)
    print(colors)

    idx = 0
    for slide, prob in zip(slidelist, problist):
        dst = repext(prob, '.ovr.jpg')
        if os.path.exists(dst):
            print('{} Exists.'.format(dst))
            continue

        print(slide, '-->', dst)
        ret = overlay_img(slide, prob, colors, mixture=[0.3, 0.7])
        cv2.imwrite(dst, ret)
        idx += 1
        if idx % 10 == 0:
            print(idx)
Example #3
0
def main(args):
    with open(args.lst, 'r') as f:
        srclist = [x.strip() for x in f]

    for src in srclist:

        dst = repext(src, args.suffix)
        if os.path.exists(dst):
            print('Exists', src, '-->', dst)
            continue

        # Loading data from ramdisk incurs a one-time copy cost
        rdsrc = cpramdisk(src, args.ramdisk)
        print('File:', rdsrc)

        try:
            slide = Slide(src, args)
            slide.initialize_output('wsi',
                                    3,
                                    mode='full',
                                    compute_fn=compute_fn)
            ret = slide.compute('wsi', args)
            print('Saving {} --> {}'.format(ret.shape, dst))
            cv2.imwrite(dst, ret)
        except Exception as e:
            traceback.print_tb(e.__traceback__)
        finally:
            print('Removing {}'.format(rdsrc))
            os.remove(rdsrc)
Example #4
0
def overlay_img(base, pred, mixture, colors, dst):
    img = cv2.imread(base)
    ishape = img.shape[:2][::-1]

    # Find pure black and white in the img
    gray = np.mean(img, axis=-1)
    img_w = gray > 220
    img_b = gray < 10

    y = np.load(pred)
    ymin, ymax = y.min(), y.max()
    if y.shape[-1] == 1:
        y = np.squeeze(y)

    # Using a foreground mask :
    yshape = y.shape[::-1]
    gray_s = cv2.resize(gray, dsize=yshape, interpolation=cv2.INTER_LINEAR)
    img_w_s = gray_s > 230
    img_b_s = gray_s < 20
    background = (img_w_s + img_b_s).astype(np.bool)
    foreground = np.logical_not(background)
    print(y.shape, y.dtype)
    print(background.shape, background.dtype)
    print(foreground.shape, foreground.dtype)
    y_fg = y[foreground]
    y_fg = softmax(y_fg)
    y[background] = 0.
    y[foreground] = y_fg
    print(y.min(), y.max())
    print('nnz(y) =', (y != 0).sum(), '/', np.prod(y.shape))

    bins = np.linspace(0., y.max(), args.n_colors - 1)
    ydig = np.digitize(y, bins)
    # Emphasize sparse positive points
    ydig = cv2.dilate(ydig.astype(np.uint8), kernel=kernel, iterations=1)
    savehist(ydig, colors, dst)
    print('ydig', np.unique(ydig))
    ydig_dst = repext(dst, '.ydig.png')
    print(ydig.shape, '-->', ydig_dst)
    cv2.imwrite(ydig_dst, ydig * (255. / args.n_colors))

    ydig = cv2.resize(ydig,
                      fx=0,
                      fy=0,
                      dsize=ishape,
                      interpolation=cv2.INTER_NEAREST)

    # Find unprocessed space
    # ymax[np.sum(y, axis=-1) < 1e-2] = 4 # white

    ycolor = color_mask(ydig, colors)
    img = np.add(img * mixture[0], ycolor * mixture[1])
    # Whiten the background
    # channels = np.split(img, 3, axis=-1)
    # for c in channels:
    #   c[img_w] = 255
    #   c[img_b] = 255
    # img = np.dstack(channels)
    return cv2.convertScaleAbs(img)
Example #5
0
def savehist(ydig, colors, dst):
    plt.clf()
    hist = repext(dst, '.hist.png')
    print('hist --> {}'.format(hist))
    N, bins, patches = plt.hist(ydig.ravel(), bins=50, log=False, density=True)
    for k in range(50):
        patches[k].set_facecolor(colors[k])
    plt.savefig(hist, bbox_inches='tight')
Example #6
0
def overlay_img(base, pred, mixture, colors, dst):
    img = cv2.imread(base)
    ishape = img.shape[:2][::-1]

    # Find pure black and white in the img
    gray = np.mean(img, axis=-1)
    img_w = gray > 220
    img_b = gray < 10

    y = np.load(pred)
    y = y[:, :, args.c]
    ymin, ymax = y.min(), y.max()
    if y.shape[-1] == 1:
        y = np.squeeze(y)

    bins = np.linspace(0., 1., args.n_colors - 1)
    ydig = np.digitize(y, bins)

    # Emphasize sparse positive points
    ydig = cv2.dilate(ydig.astype(np.uint8), kernel=kernel, iterations=1)
    img_w_s = cv2.resize(gray, dsize=(ydig.shape[::-1])) > 220
    ydig_nowhite = ydig[np.logical_not(img_w_s)]
    savehist(ydig_nowhite, colors, dst)

    ydig_dst = repext(dst, '.ydig.png')
    print(ydig.shape, '-->', ydig_dst)
    cv2.imwrite(ydig_dst, ydig * (255. / args.n_colors))
    ydig = cv2.resize(ydig,
                      fx=0,
                      fy=0,
                      dsize=ishape,
                      interpolation=cv2.INTER_NEAREST)

    ycolor = color_mask(ydig, colors)
    img = np.add(img * mixture[0], ycolor * mixture[1])
    # Whiten the background
    channels = np.split(img, 3, axis=-1)
    for c in channels:
        c[img_w] = 255
        c[img_b] = 255
    img = np.dstack(channels)
    return cv2.convertScaleAbs(img)
Example #7
0
def main(args):
    # Define a compute_fn that should do three things:
    # 1. define an iterator over the slide's tiles
    # 2. compute an output with given model parameter
    # 3.

    if args.iter_type == 'python':

        def compute_fn(slide, args, model=None):
            print('Slide with {}'.format(len(slide.tile_list)))
            it_factory = PythonIterator(slide, args)
            for k, (img, idx) in enumerate(it_factory.yield_batch()):
                prob = model(img)
                if k % 50 == 0:
                    print('Batch #{:04d} idx:{} img:{} prob:{}'.format(
                        k, idx.shape, img.shape, prob.shape))
                slide.place_batch(prob, idx, 'prob', mode='tile')
            ret = slide.output_imgs['prob']
            return ret

    # Tensorflow multithreaded queue-based iterator (in eager mode)
    elif args.iter_type == 'tf':

        def compute_fn(slide, args, model=None):
            assert tf.executing_eagerly()
            print('Slide with {}'.format(len(slide.tile_list)))

            # In eager mode, we return a tf.contrib.eager.Iterator
            eager_iterator = TensorflowIterator(slide, args).make_iterator()

            # The iterator can be used directly. Ququeing and multithreading
            # are handled in the backend by the tf.data.Dataset ops
            features, indices = [], []
            for k, (img, idx) in enumerate(eager_iterator):
                # img = tf.expand_dims(img, axis=0)
                features.append(
                    model.encode_bag(img, training=False, return_z=True))
                indices.append(idx.numpy())

                img, idx = img.numpy(), idx.numpy()
                if k % 50 == 0:
                    print('Batch #{:04d}\t{}'.format(k, img.shape))

            features = tf.concat(features, axis=0)
            z_att, att = model.mil_attention(features,
                                             training=False,
                                             return_raw_att=True)
            att = np.squeeze(att)
            indices = np.concatenate(indices)
            slide.place_batch(att, indices, 'att', mode='tile')
            ret = slide.output_imgs['att']
            return ret

    # Set up the model first
    encoder_args = get_encoder_args(args.encoder)
    model = MilkEager(encoder_args=encoder_args,
                      mil_type=args.mil,
                      deep_classifier=args.deep_classifier,
                      batch_size=args.batchsize,
                      temperature=args.temperature,
                      heads=args.heads)

    x = tf.zeros((1, 1, args.process_size, args.process_size, 3))
    _ = model(x, verbose=True, head='all', training=True)
    model.load_weights(args.snapshot, by_name=True)

    # keras Model subclass
    model.summary()

    # Read list of inputs
    with open(args.slides, 'r') as f:
        slides = [x.strip() for x in f]

    # Loop over slides
    for src in slides:
        # Dirty substitution of the file extension give us the
        # destination. Do this first so we can just skip the slide
        # if this destination already exists.
        # Set the --suffix option to reflect the model / type of processed output
        dst = repext(src, args.suffix)

        # Loading data from ramdisk incurs a one-time copy cost
        rdsrc = cpramdisk(src, args.ramdisk)
        print('File:', rdsrc)

        # Wrapped inside of a try-except-finally.
        # We want to make sure the slide gets cleaned from
        # memory in case there's an error or stop signal in the
        # middle of processing.
        try:
            # Initialze the side from our temporary path, with
            # the arguments passed in from command-line.
            # This returns an svsutils.Slide object
            slide = Slide(rdsrc, args)

            # This step will eventually be included in slide creation
            # with some default compute_fn's provided by svsutils
            # For now, do it case-by-case, and use the compute_fn
            # that we defined just above.
            slide.initialize_output('att',
                                    args.n_classes,
                                    mode='tile',
                                    compute_fn=compute_fn)

            # Call the compute function to compute this output.
            # Again, this may change to something like...
            #     slide.compute_all
            # which would loop over all the defined output types.
            ret = slide.compute('att', args, model=model)
            print('{} --> {}'.format(ret.shape, dst))
            np.save(dst, ret[:, :, ::-1])
        except Exception as e:
            print(e)
            traceback.print_tb(e.__traceback__)
        finally:
            print('Removing {}'.format(rdsrc))
            os.remove(rdsrc)
Example #8
0
def main(args):


  # Define a compute_fn that should do three things:
  # 1. define an iterator over the slide's tiles
  # 2. compute an output with a given model / arguments
  # 3. return a reconstructed slide
  def compute_fn(slide, args, model=None, n_dropout=10 ):
    assert tf.executing_eagerly()
    print('Slide with {}'.format(len(slide.tile_list)))

    # In eager mode, we return a tf.contrib.eager.Iterator
    eager_iterator = TensorflowIterator(slide, args).make_iterator()

    # The iterator can be used directly. Ququeing and multithreading
    # are handled in the backend by the tf.data.Dataset ops
    features, indices = [], []
    for k, (img, idx) in enumerate(eager_iterator):
      # img = tf.expand_dims(img, axis=0)
      features.append( model.encode_bag(img, training=False, return_z=True) )
      indices.append(idx.numpy())

      img, idx = img.numpy(), idx.numpy()
      if k % 50 == 0:
        print('Batch #{:04d}\t{}'.format(k, img.shape))

    features = tf.concat(features, axis=0)

    ## Sample-dropout
    # features = features.numpy()
    # print(features.shape)
    # n_instances = features.shape[0]
    # att = np.zeros(n_instances)
    # n_choice = int(n_instances * 0.7)
    # all_heads = list(range(args.heads))
    # for j in range(n_dropout):
    #   idx = np.random.choice(range(n_instances), n_choice, replace=False)
    #   print(idx)
    #   fdrop = features[idx, :]

    z_att, att = model.mil_attention(features,
                                     training=False, 
                                     return_raw_att=True)

    # att[idx] += np.squeeze(attdrop)
    yhat_multihead = model.apply_classifier(z_att, heads=all_heads, 
      training=False)
    print('yhat mean {}'.format(np.mean(yhat_multihead, axis=0)))

    indices = np.concatenate(indices)
    att = np.squeeze(att)
    slide.place_batch(att, indices, 'att', mode='tile')
    ret = slide.output_imgs['att']
    print('Got attention image: {}'.format(ret.shape))

    return ret, features.numpy()




  ## Begin main script:
  # Set up the model first
  encoder_args = get_encoder_args(args.encoder)
  model = MilkEager(encoder_args=encoder_args,
                    mil_type=args.mil,
                    deep_classifier=args.deep_classifier,
                    batch_size=args.batchsize,
                    temperature=args.temperature,
                    heads = args.heads)
  
  x = tf.zeros((1, 1, args.process_size,
                args.process_size, 3))
  all_heads = [0,1,2,3,4,5,6,7,8,9]
  _ = model(x, verbose=True, heads=all_heads, training=True)
  model.load_weights(args.snapshot, by_name=True)

  # keras Model subclass
  model.summary()

  # Read list of inputs
  with open(args.slides, 'r') as f:
    slides = [x.strip() for x in f]

  # Loop over slides
  for src in slides:
    # Dirty substitution of the file extension give us the
    # destination. Do this first so we can just skip the slide
    # if this destination already exists.
    # Set the --suffix option to reflect the model / type of processed output
    dst = repext(src, args.suffix)
    featdst = repext(src, args.suffix+'.feat.npy')

    # Loading data from ramdisk incurs a one-time copy cost
    rdsrc = cpramdisk(src, args.ramdisk)
    print('\n\nFile:', rdsrc)

    # Wrapped inside of a try-except-finally.
    # We want to make sure the slide gets cleaned from 
    # memory in case there's an error or stop signal in the 
    # middle of processing.
    try:
      # Initialze the side from our temporary path, with 
      # the arguments passed in from command-line.
      # This returns an svsutils.Slide object
      slide = Slide(rdsrc, args)

      # This step will eventually be included in slide creation
      # with some default compute_fn's provided by svsutils
      # For now, do it case-by-case, and use the compute_fn
      # that we defined just above.
      slide.initialize_output('att', args.n_classes, mode='tile',
        compute_fn=compute_fn)

      # Call the compute function to compute this output.
      # Again, this may change to something like...
      #     slide.compute_all
      # which would loop over all the defined output types.
      ret, features = slide.compute('att', args, model=model)
      print('{} --> {}'.format(ret.shape, dst))
      print('{} --> {}'.format(features.shape, featdst))
      np.save(dst, ret)
      np.save(featdst, features)
    except Exception as e:
      print(e)
      traceback.print_tb(e.__traceback__)
    finally:
      print('Removing {}'.format(rdsrc))
      os.remove(rdsrc)
Example #9
0
def main(args, sess):
    # Define a compute_fn that should do three things:
    # 1. define an iterator over the slide's tiles
    # 2. compute an output with given model parameter
    # 3. asseble / gather the output
    #
    # compute_fn - function can define part of a computation
    # graph in eager mode -- possibly in graph mode.
    # We should completely reset the graph each call then
    # I still don't know how nodes are actually represented in memory
    # or if keeping them around has a real cost.

    def compute_fn(slide, args, sess=None):
        # assert tf.executing_eagerly()
        print('\n\nSlide with {}'.format(len(slide.tile_list)))

        # I'm not sure if spinning up new ops every time is bad.
        # In this example the iterator is separate from the
        # infernce function, it can also be set up with the two
        # connected to skip the feed_dict
        tf_iterator = TensorflowIterator(slide, args).make_iterator()
        img_op, idx_op = tf_iterator.get_next()
        # prob_op = model(img_op)
        # sess.run(tf.global_variables_initializer())

        # The iterator can be used directly. Ququeing and multithreading
        # are handled in the backend by the tf.data.Dataset ops
        # for k, (img, idx) in enumerate(eager_iterator):
        k, nk = 0, 0
        while True:
            try:
                img, idx = sess.run([
                    img_op,
                    idx_op,
                ])
                prob = model.inference(img)
                nk += img.shape[0]
                slide.place_batch(prob, idx, 'prob', mode='full', clobber=True)
                k += 1

                if k % 50 == 0:
                    prstr = 'Batch #{:04d} idx:{} img:{} ({:2.2f}-{:2.2f}) prob:{} T {} \
          '.format(k, idx.shape, img.shape, img.min(), img.max(), prob.shape,
                    nk)
                    print(prstr)
                    if args.verbose:
                        print('More info: ')
                        print('img: ', img.dtype, img.min(), img.max(),
                              img.mean())
                        pmax = np.argmax(prob, axis=-1).ravel()
                        for u in range(args.n_classes):
                            count_u = (pmax == u).sum()
                            print('- class {:02d} : {}'.format(u, count_u))

            except tf.errors.OutOfRangeError:
                print('Finished.')
                print('Total: {}'.format(nk))
                break

            except Exception as e:
                print(e)
                traceback.print_tb(e.__traceback__)
                break

        # We've exited the loop. Clean up the iterator
        del tf_iterator, idx_op, img_op

        # slide.make_outputs()
        slide.make_outputs()
        ret = slide.output_imgs['prob']
        return ret

    # Set up the model first
    model = gg.get_model(args.model, sess, args.process_size, args.n_classes)
    # NOTE big time wasted because you have to initialize,
    # THEN run the restore op to replace the already-created weights
    sess.run(tf.global_variables_initializer())
    model.restore(args.snapshot)

    # Read list of inputs
    with open(args.slides, 'r') as f:
        slides = [x.strip() for x in f]

    # Loop over slides; Record times
    nslides = len(slides)
    successes, ntiles, total_time, fpss = [], [], [], []
    for i, src in enumerate(slides):
        # Dirty substitution of the file extension give us the
        # destination. Do this first so we can just skip the slide
        # if this destination already exists.
        # Set the --suffix option to reflect the model / type of processed output
        dst = repext(src, args.suffix)
        if os.path.exists(dst):
            print('{} Exists.'.format(dst))
            continue

        # Loading data from ramdisk incurs a one-time copy cost
        rdsrc = cpramdisk(src, args.ramdisk)

        # Wrapped inside of a try-except-finally.
        # We want to make sure the slide gets cleaned from
        # memory in case there's an error or stop signal in the
        # middle of processing.
        try:
            # Initialze the side from our temporary path, with
            # the arguments passed in from command-line.
            # This returns an svsutils.Slide object
            print('\n\n-------------------------------')
            print('File:', rdsrc, '{:04d} / {:04d}'.format(i, nslides))
            t0 = time.time()
            slide = Slide(rdsrc, args)

            # This step will eventually be included in slide creation
            # with some default compute_fn's provided by svsutils
            # For now, do it case-by-case, and use the compute_fn
            # that we defined just above.
            # TODO pull the expected output size from the model.. ?
            # support common model types - keras, tfmodels, tfhub..
            slide.initialize_output('prob',
                                    args.n_classes,
                                    mode='full',
                                    compute_fn=compute_fn)

            # Call the compute function to compute this output.
            # Again, this may change to something like...
            #     slide.compute_all
            # which would loop over all the defined output types.
            ret = slide.compute('prob', args, sess=sess)
            print('{} --> {}'.format(ret.shape, dst))
            ret = (ret * 255).astype(np.uint8)
            np.save(dst, ret)

            # If it finishes, record some stats
            tend = time.time()
            deltat = tend - t0
            fps = len(slide.tile_list) / float(deltat)
            successes.append(rdsrc)
            ntiles.append(len(slide.tile_list))
            total_time.append(deltat)
            fpss.append(fps)
        except Exception as e:
            print(e)
            traceback.print_tb(e.__traceback__)
        finally:
            print('Removing {}'.format(rdsrc))
            os.remove(rdsrc)
            try:
                print('Cleaning slide object')
                slide.close()
                del slide
            except:
                print('No slide object not found to clean up ?')

    write_times(args.timefile, successes, ntiles, total_time, fpss)
Example #10
0
def main(args, sess):
    # Define a compute_fn that should do three things:
    # 1. define an iterator over the slide's tiles
    # 2. compute an output with given model parameter
    # 3.

    # def compute_fn(slide, args, model=None):
    #   print('Slide with {}'.format(len(slide.tile_list)))
    #   it_factory = PythonIterator(slide, args)
    #   for k, (img, idx) in enumerate(it_factory.yield_batch()):
    #     prob = model.predict_on_batch(img)
    #     if k % 50 == 0:
    #       print('Batch #{:04d} idx:{} img:{} prob:{} \
    #       '.format(k, idx.shape, img.shape, prob.shape))
    #     slide.place_batch(prob, idx, 'prob', mode='tile')
    #   ret = slide.output_imgs['prob']
    #   return ret

    # Tensorflow multithreaded queue-based iterator (in eager mode)
    # elif args.iter_type == 'tf':

    def compute_fn(slide, args, sess=None, img_pl=None, prob_op=None):
        # assert tf.executing_eagerly()
        print('\n\nSlide with {}'.format(len(slide.tile_list)))

        # I'm not sure if spinning up new ops every time is bad.
        tf_iterator = TensorflowIterator(slide, args).make_iterator()
        img_op, idx_op = tf_iterator.get_next()
        # prob_op = model(img_op)
        # sess.run(tf.global_variables_initializer())

        # The iterator can be used directly. Ququeing and multithreading
        # are handled in the backend by the tf.data.Dataset ops
        # for k, (img, idx) in enumerate(eager_iterator):
        k, nk = 0, 0
        while True:
            try:
                img, idx = sess.run([
                    img_op,
                    idx_op,
                ])
                prob = sess.run(prob_op, {img_pl: img})
                nk += img.shape[0]
                if k % 50 == 0:
                    print('Batch #{:04d} idx:{} img:{} ({}) prob:{} T {} \
          '.format(k, idx.shape, img.max(), img.shape, prob.shape, nk))
                slide.place_batch(prob, idx, 'prob', mode='tile')
                k += 1
            except tf.errors.OutOfRangeError:
                print('Finished.')
                print('Total: {}'.format(nk))
                break
            finally:
                ret = slide.output_imgs['prob']
        return ret

    # Set up the model first

    # Set up a placeholder for the input
    img_pl = tf.placeholder(tf.float32,
                            (None, args.process_size, args.process_size, 3))
    model = load_model(args.snapshot)
    prob_op = model(img_pl)
    sess.run(tf.global_variables_initializer())

    # Read list of inputs
    with open(args.slides, 'r') as f:
        slides = [x.strip() for x in f]

    # Loop over slides
    for src in slides:
        # Dirty substitution of the file extension give us the
        # destination. Do this first so we can just skip the slide
        # if this destination already exists.
        # Set the --suffix option to reflect the model / type of processed output
        dst = repext(src, args.suffix)

        # Loading data from ramdisk incurs a one-time copy cost
        rdsrc = cpramdisk(src, args.ramdisk)
        print('File:', rdsrc)

        # Wrapped inside of a try-except-finally.
        # We want to make sure the slide gets cleaned from
        # memory in case there's an error or stop signal in the
        # middle of processing.
        try:
            # Initialze the side from our temporary path, with
            # the arguments passed in from command-line.
            # This returns an svsutils.Slide object
            print('\n\n-------------------------------')
            slide = Slide(rdsrc, args)

            # This step will eventually be included in slide creation
            # with some default compute_fn's provided by svsutils
            # For now, do it case-by-case, and use the compute_fn
            # that we defined just above.
            slide.initialize_output('prob',
                                    4,
                                    mode='tile',
                                    compute_fn=compute_fn)

            # Call the compute function to compute this output.
            # Again, this may change to something like...
            #     slide.compute_all
            # which would loop over all the defined output types.
            ret = slide.compute('prob',
                                args,
                                sess=sess,
                                img_pl=img_pl,
                                prob_op=prob_op)
            print('{} --> {}'.format(ret.shape, dst))
            np.save(dst, ret)
        except Exception as e:
            print(e)
            traceback.print_tb(e.__traceback__)
        finally:
            print('Removing {}'.format(rdsrc))
            os.remove(rdsrc)