Example #1
0
 def setUp(self):
     tf.reset_default_graph()
     model_class = get_model('fasterrcnn')
     image_resize = model_class.base_config.dataset.image_preprocessing
     self.config = EasyDict({
         'image_resize_min': image_resize.min_size,
         'image_resize_max': image_resize.max_size
     })
Example #2
0
def get_config(config_files, override_params=None):
    custom_config = load_config_files(config_files)
    model_class = get_model(custom_config['model']['type'])
    model_base_config = get_base_config(model_class)
    config = get_model_config(model_base_config, custom_config,
                              override_params)

    return config
Example #3
0
 def setUp(self):
     tf.reset_default_graph()
     model_class = get_model("fasterrcnn")
     base_config = get_base_config(model_class)
     image_resize = base_config.dataset.image_preprocessing
     self.config = EasyDict({
         "image_resize_min": image_resize.min_size,
         "image_resize_max": image_resize.max_size,
     })
Example #4
0
def get_config(config_files, override_params=None):
    custom_config = load_config_files(config_files)
    model_class = get_model(custom_config['model']['type'])
    model_base_config = get_base_config(model_class)
    config = get_model_config(
        model_base_config, custom_config, override_params
    )

    return config
Example #5
0
 def setUp(self):
     tf.reset_default_graph()
     model_class = get_model('fasterrcnn')
     base_config = get_base_config(model_class)
     image_resize = base_config.dataset.image_preprocessing
     self.config = EasyDict({
         'image_resize_min': image_resize.min_size,
         'image_resize_max': image_resize.max_size
     })
    def get_config(self, model_type, override_params=None):
        custom_config = load_config_files(self.config.config_files)
        model_class = get_model('fasterrcnn')
        model_base_config = get_base_config(model_class)
        config = get_model_config(model_base_config, custom_config,
                                  override_params)

        config.model.type = model_type

        return config
Example #7
0
def get_prediction(model_name, image, checkpoint_file=None, classes_file=None):

    model_class = get_model(model_name)

    if model_name in LOADED_MODELS:
        image_tensor, output, graph, session = LOADED_MODELS[model_name]
    else:
        graph = tf.Graph()
        session = tf.Session(graph=graph)

        with graph.as_default():
            image_tensor = tf.placeholder(tf.float32, (1, None, None, 3))
            model = model_class(model_class.base_config)
            output = model(image_tensor)
            if checkpoint_file:
                saver = tf.train.Saver(sharded=True, allow_empty=True)
                saver.restore(session, checkpoint_file)
            else:
                init_op = tf.group(tf.global_variables_initializer(),
                                   tf.local_variables_initializer())
                session.run(init_op)

        LOADED_MODELS[model_name] = (image_tensor, output, graph, session)

    classification_prediction = output['classification_prediction']
    objects_tf = classification_prediction['objects']
    objects_labels_tf = classification_prediction['labels']
    objects_labels_prob_tf = classification_prediction['probs']
    image_resize_config = model_class.base_config.dataset.image_preprocessing

    image_array, scale_factor = resize_image(
        image, float(image_resize_config.min_size),
        float(image_resize_config.max_size))

    start_time = time.time()
    objects, objects_labels, objects_labels_prob = session.run(
        [objects_tf, objects_labels_tf, objects_labels_prob_tf],
        feed_dict={image_tensor: image_array})
    end_time = time.time()

    if classes_file:
        # Gets the names of the classes
        class_labels = json.load(tf.gfile.GFile(classes_file))
        objects_labels = [class_labels[obj] for obj in objects_labels]

    else:
        objects_labels = objects_labels.tolist()

    return {
        'objects': objects.tolist(),
        'objects_labels': objects_labels,
        'objects_labels_prob': objects_labels_prob.tolist(),
        'inference_time': end_time - start_time,
        'scale_factor': scale_factor,
    }
Example #8
0
    def get_config(self, model_type, override_params=None):
        custom_config = load_config_files(self.config.config_files)
        model_class = get_model('fasterrcnn')
        model_base_config = get_base_config(model_class)
        config = get_model_config(
            model_base_config, custom_config, override_params
        )

        config.model.type = model_type

        return config
Example #9
0
def eval(
    dataset_split,
    config_files,
    watch,
    from_global_step,
    override_params,
    files_per_class,
    max_detections,
):
    """Evaluate models using dataset."""

    # If the config file is empty, our config will be the base_config for the
    # default model.
    try:
        config = get_config(config_files, override_params=override_params)
    except KeyError:
        raise KeyError("model.type should be set on the custom config.")

    if not config.train.job_dir:
        raise KeyError("`job_dir` should be set.")
    if not config.train.run_name:
        raise KeyError("`run_name` should be set.")

    # `run_dir` is where the actual checkpoint and logs are located.
    run_dir = os.path.join(config.train.job_dir, config.train.run_name)

    # Only activate debug for if needed for debug visualization mode.
    if not config.train.debug:
        config.train.debug = config.eval.image_vis == "debug"

    if config.train.debug or config.train.tf_debug:
        tf.logging.set_verbosity(tf.logging.DEBUG)
    else:
        tf.logging.set_verbosity(tf.logging.INFO)

    # Build the dataset tensors, overriding the default dataset split.
    config.dataset.split = dataset_split

    # Disable data augmentation.
    config.dataset.data_augmentation = []

    # Attempt to get class names, if available.
    classes_file = os.path.join(config.dataset.dir, "classes.json")
    if tf.gfile.Exists(classes_file):
        class_labels = json.load(tf.gfile.GFile(classes_file))
    else:
        class_labels = None

    if config.model.type == "fasterrcnn":
        # Override max detections with specified value.
        if config.model.network.with_rcnn:
            config.model.rcnn.proposals.total_max_detections = max_detections
        else:
            config.model.rpn.proposals.post_nms_top_n = max_detections

        # Also overwrite `min_prob_threshold` in order to use all detections.
        config.model.rcnn.proposals.min_prob_threshold = 0.0
    elif config.model.type == "ssd":
        config.model.proposals.total_max_detections = max_detections
        config.model.proposals.min_prob_threshold = 0.0
    else:
        raise ValueError("Model type '{}' not supported".format(
            config.model.type))

    # Only a single run over the dataset to calculate metrics.
    config.train.num_epochs = 1

    # Seed setup.
    if config.train.seed:
        tf.set_random_seed(config.train.seed)

    # Set pretrained as not training.
    config.model.base_network.trainable = False

    model_class = get_model(config.model.type)
    model = model_class(config)
    dataset_class = get_dataset(config.dataset.type)
    dataset = dataset_class(config)
    train_dataset = dataset()

    train_image = train_dataset["image"]
    train_objects = train_dataset["bboxes"]
    train_filename = train_dataset["filename"]

    # Build the graph of the model to evaluate, retrieving required
    # intermediate tensors.
    prediction_dict = model(train_image, train_objects)

    if config.model.type == "ssd" or config.model.network.with_rcnn:
        pred = prediction_dict["classification_prediction"]
        pred_objects = pred["objects"]
        pred_objects_classes = pred["labels"]
        pred_objects_scores = pred["probs"]
    else:
        # Force the num_classes to 1.
        config.model.network.num_classes = 1

        pred = prediction_dict["rpn_prediction"]
        pred_objects = pred["proposals"]
        pred_objects_scores = pred["scores"]
        # When using only RPN all classes are 0.
        pred_objects_classes = tf.zeros((tf.shape(pred_objects_scores)[0], ),
                                        dtype=tf.int32)

    # Retrieve *all* the losses from the model and calculate their streaming
    # means, so we get the loss over the whole dataset.
    batch_losses = model.loss(prediction_dict, return_all=True)
    losses = {}
    for loss_name, loss_tensor in batch_losses.items():
        loss_mean, _ = tf.metrics.mean(
            loss_tensor,
            name=loss_name,
            metrics_collections="metrics",
            updates_collections="metric_ops",
        )
        full_loss_name = "{}_losses/{}".format(dataset_split, loss_name)
        losses[full_loss_name] = loss_mean

    metric_ops = tf.get_collection("metric_ops")

    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())

    # Using a global saver instead of the one for the model.
    saver = tf.train.Saver(sharded=True, allow_empty=True)

    # Aggregate the required ops to evaluate into a dict.
    ops = {
        "init_op": init_op,
        "metric_ops": metric_ops,
        "pred_objects": pred_objects,
        "pred_objects_classes": pred_objects_classes,
        "pred_objects_scores": pred_objects_scores,
        "train_objects": train_objects,
        "losses": losses,
        "prediction_dict": prediction_dict,
        "filename": train_filename,
        "train_image": train_image,
    }

    metrics_scope = "{}_metrics".format(dataset_split)

    # Use global writer for all checkpoints. We don't want to write different
    # files for each checkpoint.
    writer = tf.summary.FileWriter(run_dir)

    files_to_visualize = {}

    last_global_step = from_global_step
    while True:
        # Get the checkpoint files to evaluate.
        try:
            checkpoints = get_checkpoints(run_dir,
                                          last_global_step,
                                          last_only=not watch)
        except ValueError as e:
            if not watch:
                tf.logging.error("Missing checkpoint.")
                raise e

            tf.logging.warning(
                "Missing checkpoint; Checking again in a moment")
            time.sleep(5)
            continue

        for checkpoint in checkpoints:
            # Always returned in order, so it's safe to assign directly.
            tf.logging.info(
                "Evaluating global_step {} using checkpoint '{}'".format(
                    checkpoint["global_step"], checkpoint["file"]))
            try:
                start = time.time()
                evaluate_once(
                    config,
                    writer,
                    saver,
                    ops,
                    checkpoint,
                    class_labels=class_labels,
                    metrics_scope=metrics_scope,
                    image_vis=config.eval.image_vis,
                    files_per_class=files_per_class,
                    files_to_visualize=files_to_visualize,
                )
                last_global_step = checkpoint["global_step"]
                tf.logging.info("Evaluated in {:.2f}s".format(time.time() -
                                                              start))
            except tf.errors.NotFoundError:
                # The checkpoint is not ready yet. It was written in the
                # checkpoints file, but it still hasn't been completely saved.
                tf.logging.info("Checkpoint {} is not ready yet. "
                                "Checking again in a moment.".format(
                                    checkpoint["file"]))
                time.sleep(5)
                continue

        # If no watching was requested, finish the execution.
        if not watch:
            return

        # Sleep for a moment and check for new checkpoints.
        tf.logging.info("All checkpoints evaluated; sleeping for a moment")
        time.sleep(5)
Example #10
0
def evaluate(dataset_split, config_files, job_dir, watch,
             from_global_step, override_params, files_per_class):
    """
    Evaluate models using dataset.
    """
    # If the config file is empty, our config will be the base_config for the
    # default model.
    try:
        config = get_config(config_files, override_params=override_params)
    except KeyError:
        raise KeyError('model.type should be set on the custom config.')

    config.train.job_dir = job_dir or config.train.job_dir

    # Only activate debug for if needed for debug visualization mode.
    if not config.train.debug:
        config.train.debug = config.eval.image_vis == 'debug'

    if config.train.debug or config.train.tf_debug:
        tf.logging.set_verbosity(tf.logging.DEBUG)
    else:
        tf.logging.set_verbosity(tf.logging.INFO)

    # Build the dataset tensors, overriding the default dataset split.
    config.dataset.split = dataset_split
    # Disable data augmentation.
    config.dataset.data_augmentation = []

    # Only a single run over the dataset to calculate metrics.
    config.train.num_epochs = 1

    # Seed setup
    if config.train.seed:
        tf.set_random_seed(config.train.seed)

    # Set pretrained as not training
    config.model.base_network.trainable = False

    model_class = get_model(config.model.type)
    model = model_class(config)
    dataset_class = get_dataset(config.dataset.type)
    dataset = dataset_class(config)
    train_dataset = dataset()

    train_image = train_dataset['image']
    train_objects = train_dataset['bboxes']
    train_filename = train_dataset['filename']

    # Build the graph of the model to evaluate, retrieving required
    # intermediate tensors.
    prediction_dict = model(
        train_image, train_objects
    )

    if config.model.network.with_rcnn:
        pred = prediction_dict['classification_prediction']
        pred_objects = pred['objects']
        pred_objects_classes = pred['labels']
        pred_objects_scores = pred['probs']
    else:
        # Force the num_classes to 1
        config.model.network.num_classes = 1

        pred = prediction_dict['rpn_prediction']
        pred_objects = pred['proposals']
        pred_objects_scores = pred['scores']
        # When using only RPN all classes are 0.
        pred_objects_classes = tf.zeros(
            (tf.shape(pred_objects_scores)[0],), dtype=tf.int32
        )

    # Retrieve *all* the losses from the model and calculate their streaming
    # means, so we get the loss over the whole dataset.
    batch_losses = model.loss(prediction_dict, return_all=True)
    losses = {}
    for loss_name, loss_tensor in batch_losses.items():
        loss_mean, _ = tf.metrics.mean(
            loss_tensor, name=loss_name,
            metrics_collections='metrics',
            updates_collections='metric_ops',
        )
        full_loss_name = '{}_losses/{}'.format(dataset_split, loss_name)
        losses[full_loss_name] = loss_mean

    metric_ops = tf.get_collection('metric_ops')

    init_op = tf.group(
        tf.global_variables_initializer(),
        tf.local_variables_initializer()
    )

    # Using a global saver instead of the one for the model.
    saver = tf.train.Saver(sharded=True, allow_empty=True)

    # Aggregate the required ops to evaluate into a dict..
    ops = {
        'init_op': init_op,
        'metric_ops': metric_ops,
        'pred_objects': pred_objects,
        'pred_objects_classes': pred_objects_classes,
        'pred_objects_scores': pred_objects_scores,
        'train_objects': train_objects,
        'losses': losses,
        'prediction_dict': prediction_dict,
        'filename': train_filename,
        'train_image': train_image
    }

    metrics_scope = '{}_metrics'.format(dataset_split)

    # Use global writer for all checkpoints. We don't want to write different
    # files for each checkpoint.
    writer = tf.summary.FileWriter(config.train.job_dir)

    files_to_visualize = {}

    last_global_step = from_global_step
    while True:
        # Get the checkpoint files to evaluate.
        try:
            checkpoints = get_checkpoints(config, last_global_step)
        except ValueError as e:
            if not watch:
                tf.logging.error('Missing checkpoint.')
                raise e

            tf.logging.warning(
                'Missing checkpoint; Checking again in a minute')
            time.sleep(60)
            continue

        for checkpoint in checkpoints:
            # Always returned in order, so it's safe to assign directly.
            tf.logging.info(
                'Evaluating global_step {} using checkpoint \'{}\''.format(
                    checkpoint['global_step'], checkpoint['file']
                )
            )
            try:
                start = time.time()
                evaluate_once(
                    config, writer, saver, ops, checkpoint,
                    metrics_scope=metrics_scope,
                    image_vis=config.eval.image_vis,
                    files_per_class=files_per_class,
                    files_to_visualize=files_to_visualize
                )
                last_global_step = checkpoint['global_step']
                tf.logging.info('Evaluated in {:.2f}s'.format(
                    time.time() - start
                ))
            except tf.errors.NotFoundError:
                # The checkpoint is not ready yet. It was written in the
                # checkpoints file, but it still hasn't been completely saved.
                tf.logging.info(
                    'Checkpoint {} is not ready yet. '
                    'Checking again in a minute.'.format(
                        checkpoint['file']
                    )
                )
                time.sleep(60)
                continue

        # If no watching was requested, finish the execution.
        if not watch:
            return

        # Sleep for a minute and check for new checkpoints.
        tf.logging.info('All checkpoints evaluated; sleeping for a minute')
        time.sleep(60)
Example #11
0
def evaluate(model_type, dataset_split, config_file, job_dir, watch,
             from_global_step, override_params, image_vis, files_per_class):
    """
    Evaluate models using dataset.
    """
    model_cls = get_model(model_type)
    config = model_cls.base_config

    config = get_model_config(model_cls.base_config, config_file,
                              override_params)

    config.train.job_dir = job_dir or config.train.job_dir
    # Only activate debug for image visualizations.
    config.train.debug = image_vis

    if config.train.debug or config.train.tf_debug:
        tf.logging.set_verbosity(tf.logging.DEBUG)
    else:
        tf.logging.set_verbosity(tf.logging.INFO)

    # Build the dataset tensors, overriding the default dataset split.
    config.dataset.split = dataset_split
    # Disable data augmentation.
    config.dataset.data_augmentation = []

    # Only a single run over the dataset to calculate metrics.
    config.train.num_epochs = 1

    # Seed setup
    if config.train.seed:
        tf.set_random_seed(config.train.seed)

    # Set pretrained as not training
    config.base_network.trainable = False

    model = model_cls(config)
    dataset = TFRecordDataset(config)
    train_dataset = dataset()

    train_image = train_dataset['image']
    train_objects = train_dataset['bboxes']
    train_filename = train_dataset['filename']

    # TODO: This is not the best place to configure rank? Why is rank not
    # transmitted through the queue
    train_image.set_shape((None, None, 3))
    # We add fake batch dimension to train data. TODO: DEFINITELY NOT THE BEST
    # PLACE
    train_image = tf.expand_dims(train_image, 0)

    # Build the graph of the model to evaluate, retrieving required
    # intermediate tensors.
    prediction_dict = model(train_image, train_objects)

    pred = prediction_dict['classification_prediction']
    pred_objects = pred['objects']
    pred_objects_classes = pred['labels']
    pred_objects_scores = pred['probs']

    # Retrieve *all* the losses from the model and calculate their streaming
    # means, so we get the loss over the whole dataset.
    batch_losses = model.loss(prediction_dict, return_all=True)
    losses = {}
    for loss_name, loss_tensor in batch_losses.items():
        loss_mean, _ = tf.metrics.mean(
            loss_tensor,
            name=loss_name,
            metrics_collections='metrics',
            updates_collections='metric_ops',
        )
        full_loss_name = '{}_losses/{}'.format(dataset_split, loss_name)
        losses[full_loss_name] = loss_mean

    metric_ops = tf.get_collection('metric_ops')

    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())

    # Using a global saver instead of the one for the model.
    saver = tf.train.Saver(sharded=True, allow_empty=True)

    # Aggregate the required ops to evaluate into a dict..
    ops = {
        'init_op': init_op,
        'metric_ops': metric_ops,
        'pred_objects': pred_objects,
        'pred_objects_classes': pred_objects_classes,
        'pred_objects_scores': pred_objects_scores,
        'train_objects': train_objects,
        'losses': losses,
        'prediction_dict': prediction_dict,
        'filename': train_filename
    }

    metrics_scope = '{}_metrics'.format(dataset_split)

    # Use global writer for all checkpoints. We don't want to write different
    # files for each checkpoint.
    writer = tf.summary.FileWriter(config.train.job_dir)

    files_to_visualize = {}

    last_global_step = from_global_step
    while True:
        # Get the checkpoint files to evaluate.
        try:
            checkpoints = get_checkpoints(config, last_global_step)
        except ValueError as e:
            if not watch:
                tf.logging.error('Missing checkpoint.')
                raise e

            tf.logging.warning(
                'Missing checkpoint; Checking again in a minute')
            time.sleep(60)
            continue

        for checkpoint in checkpoints:
            # Always returned in order, so it's safe to assign directly.
            tf.logging.info(
                'Evaluating global_step {} using checkpoint \'{}\''.format(
                    checkpoint['global_step'], checkpoint['file']))
            try:
                start = time.time()
                evaluate_once(writer,
                              saver,
                              ops,
                              config.network.num_classes,
                              checkpoint,
                              metrics_scope=metrics_scope,
                              image_vis=image_vis,
                              files_per_class=files_per_class,
                              files_to_visualize=files_to_visualize)
                last_global_step = checkpoint['global_step']
                tf.logging.info('Evaluated in {:.2f}s'.format(time.time() -
                                                              start))
            except tf.errors.NotFoundError:
                # The checkpoint is not ready yet. It was written in the
                # checkpoints file, but it still hasn't been completely saved.
                tf.logging.info('Checkpoint {} is not ready yet. '
                                'Checking again in a minute.'.format(
                                    checkpoint['file']))
                time.sleep(60)
                continue

        # If no watching was requested, finish the execution.
        if not watch:
            return

        # Sleep for a minute and check for new checkpoints.
        tf.logging.info('All checkpoints evaluated; sleeping for a minute')
        time.sleep(60)
Example #12
0
def run_local(config, environment=None):
    model_class = get_model(config.model.type)
    image_vis = config.train.get('image_vis')
    var_vis = config.train.get('var_vis')

    if config.train.get('seed') is not None:
        tf.set_random_seed(config.train.seed)

    if config.train.debug or config.train.tf_debug:
        tf.logging.set_verbosity(tf.logging.DEBUG)
    else:
        tf.logging.set_verbosity(tf.logging.INFO)

    model = model_class(config)

    num_gpus = config.train.get('num_gpus')
    if num_gpus is None:
        num_gpus = 1
    gpu_devices = ['gpu:{}'.format(i) for i in range(num_gpus)]
    gpu_indices = [i for i in range(num_gpus)]

    global_step = tf.train.get_or_create_global_step()

    optimizer = get_optimizer(config.train, global_step)

    def forward_pass_and_gradients(train_dataset):
        """
        Create forward loss and grads on each device
        """
        train_image = train_dataset['image']
        train_filename = train_dataset['filename']
        train_bboxes = train_dataset['bboxes']

        prediction_dict = model(train_image, train_bboxes, is_training=True)
        total_loss = model.loss(prediction_dict)

        # TODO: Is this necesarry? Couldn't we just get them from the
        # trainable vars collection? We should probably improve our
        # usage of collections.
        trainable_vars = model.get_trainable_vars()

        # Compute, clip and apply gradients
        with tf.name_scope('gradients'):
            grads_and_vars = optimizer.compute_gradients(
                total_loss, trainable_vars)

            if config.train.clip_by_norm:
                grads_and_vars = clip_gradients_by_norm(grads_and_vars)

        return prediction_dict, total_loss, grads_and_vars

    def build_train_ops(device_grads):
        training_ops = []
        # average all gradients
        grads_to_reduce = [[g for g, _ in grad_vars]
                           for grad_vars in device_grads]
        algorithm = batch_allreduce.AllReduceSpecAlgorithm(
            'nccl', gpu_indices, 0, 10)
        reduced_grads, _ = algorithm.batch_all_reduce(grads_to_reduce, 0, 0, 0)
        reduced_device_grads = [[
            (g, v) for g, (_, v) in zip(grads, grad_vars)
        ] for grads, grad_vars in zip(reduced_grads, device_grads)]

        for i, device in enumerate(gpu_devices):
            with tf.device(device):
                update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
                with tf.control_dependencies(update_ops):
                    train_op = optimizer.apply_gradients(
                        reduced_device_grads[i], global_step=global_step)
                    training_ops.append(train_op)
        train_ops = tf.group(*(training_ops), name='train_ops_group')
        return train_ops

    try:
        dataset_class = get_dataset(config.dataset.type)
        dataset = dataset_class(config)
    except InvalidDataDirectory as exc:
        tf.logging.error("Error while reading dataset, {}".format(exc))
        sys.exit(1)
    device_losses = []
    device_gradvars = []

    for device in gpu_devices:
        train_dataset = dataset()
        with tf.device(device):
            prediction_dict, loss, gradvars = forward_pass_and_gradients(
                train_dataset)
            device_losses.append(loss)
            device_gradvars.append(gradvars)

    train_filename = train_dataset['filename']

    train_op = build_train_ops(device_gradvars)
    # average losses
    average_loss = tf.reduce_mean(device_losses)

    # Create custom init for slots in optimizer, as we don't save them to
    # our checkpoints. An example of slots in an optimizer are the Momentum
    # variables in MomentumOptimizer. We do this because slot variables can
    # effectively duplicate the size of your checkpoint!
    trainable_vars = model.get_trainable_vars()
    slot_variables = [
        optimizer.get_slot(var, name) for name in optimizer.get_slot_names()
        for var in trainable_vars
    ]
    slot_init = tf.variables_initializer(slot_variables,
                                         name='optimizer_slots_initializer')

    # Create saver for saving/restoring model
    model_saver = tf.train.Saver(
        set(tf.global_variables()) - set(slot_variables),
        name='model_saver',
        max_to_keep=config.train.get('checkpoints_max_keep', 1),
    )

    # Create saver for loading pretrained checkpoint into base network
    base_checkpoint_vars = model.get_base_network_checkpoint_vars()
    checkpoint_file = model.get_checkpoint_file()
    if base_checkpoint_vars and checkpoint_file:
        base_net_checkpoint_saver = tf.train.Saver(
            base_checkpoint_vars, name='base_net_checkpoint_saver')

        # We'll send this fn to Scaffold init_fn
        def load_base_net_checkpoint(_, session):
            base_net_checkpoint_saver.restore(session, checkpoint_file)
    else:
        load_base_net_checkpoint = None

    tf.logging.info('Starting training for {}'.format(model))

    run_options = None
    if config.train.full_trace:
        run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)

    # Create custom Scaffold to make sure we run our own init_op when model
    # is not restored from checkpoint.
    summary_op = [model.summary]
    summaries = tf.summary.merge_all()
    if summaries is not None:
        summary_op.append(summaries)
    summary_op = tf.summary.merge(summary_op)

    # `ready_for_local_init_op` is hardcoded to 'ready' as local init doesn't
    # depend on global init and `local_init_op` only runs when it is set as
    # 'ready' (an empty string tensor sets it as ready).
    is_chief = True
    local_var_init_op = tf.local_variables_initializer()
    table_init_ops = tf.tables_initializer()
    variable_mgr_init_ops = [local_var_init_op]
    variable_mgr_init_ops.extend([table_init_ops])
    variable_mgr_init_ops.extend([slot_init])
    local_var_init_op_group = tf.group(*variable_mgr_init_ops)
    scaffold = tf.train.Scaffold(
        saver=model_saver,
        init_op=tf.global_variables_initializer() if is_chief else tf.no_op(),
        local_init_op=local_var_init_op_group,
        ready_for_local_init_op=tf.constant([], dtype=tf.string),
        summary_op=summary_op,
        init_fn=load_base_net_checkpoint,
    )

    # Custom hooks for our session
    hooks = []
    chief_only_hooks = []

    if config.train.tf_debug:
        debug_hook = tf_debug.LocalCLIDebugHook()
        debug_hook.add_tensor_filter('has_inf_or_nan', tf_debug.has_inf_or_nan)
        hooks.extend([debug_hook])

    if not config.train.job_dir:
        tf.logging.warning(
            '`job_dir` is not defined. Checkpoints and logs will not be saved.'
        )
        checkpoint_dir = None
    elif config.train.run_name:
        # Use run_name when available
        checkpoint_dir = os.path.join(config.train.job_dir,
                                      config.train.run_name)
    else:
        checkpoint_dir = config.train.job_dir

    should_add_hooks = (config.train.display_every_steps
                        or config.train.display_every_secs
                        and checkpoint_dir is not None)
    if should_add_hooks:
        if not config.train.debug and image_vis == 'debug':
            tf.logging.warning('ImageVisHook will not run without debug mode.')
        elif image_vis is not None:
            # ImageVis only runs on the chief.
            chief_only_hooks.append(
                ImageVisHook(prediction_dict,
                             image=train_dataset['image'],
                             gt_bboxes=train_dataset['bboxes'],
                             config=config.model,
                             output_dir=checkpoint_dir,
                             every_n_steps=config.train.display_every_steps,
                             every_n_secs=config.train.display_every_secs,
                             image_visualization_mode=image_vis))

        if var_vis is not None:
            # VarVis only runs on the chief.
            chief_only_hooks.append(
                VarVisHook(
                    every_n_steps=config.train.display_every_steps,
                    every_n_secs=config.train.display_every_secs,
                    mode=var_vis,
                    output_dir=checkpoint_dir,
                    vars_summary=model.vars_summary,
                ))

    step = -1
    target = ''
    config_proto = tf.ConfigProto()
    config_proto.allow_soft_placement = True
    with tf.train.MonitoredTrainingSession(
            master=target,
            is_chief=is_chief,
            checkpoint_dir=checkpoint_dir,
            scaffold=scaffold,
            hooks=hooks,
            chief_only_hooks=chief_only_hooks,
            save_checkpoint_secs=config.train.save_checkpoint_secs,
            save_summaries_steps=config.train.save_summaries_steps,
            save_summaries_secs=config.train.save_summaries_secs,
            config=config_proto,
    ) as sess:

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        try:
            while not coord.should_stop():
                before = time.time()
                _, train_loss, step, filename = sess.run(
                    [train_op, average_loss, global_step, train_filename],
                    options=run_options)

                # TODO: Add image summary every once in a while.

                tf.logging.info(
                    'step: {}, file: {}, train_loss: {}, in {:.2f}s'.format(
                        step, filename, train_loss,
                        time.time() - before))

                if is_chief and step == 1:
                    # We save the run after first batch to make sure everything
                    # works properly.
                    save_run(config, environment=environment)

        except tf.errors.OutOfRangeError:
            tf.logging.info('{}finished training after {} epoch limit'.format(
                log_prefix, config.train.num_epochs))

            # TODO: Print summary
        finally:
            coord.request_stop()

        # Wait for all threads to stop.
        coord.join(threads)

        return step
Example #13
0
def get_prediction(image,
                   config,
                   total=None,
                   session=None,
                   fetches=None,
                   image_tensor=None,
                   class_labels=None,
                   return_tf_vars=False):
    """
    Gets the prediction given by the model `model_type` of the image `image`.
    If a checkpoint exists in the job's directory, load it.
    The names of the classes will be obtained from the dataset directory.
    Returns a dictionary with the objects, their labels and probabilities,
    the inference time and the scale factor. Also if the `return_tf_vars` is
    True, returns the image tensor, the entire prediction of the model and
    the sesssion.
    """

    if session is None and fetches is None and image_tensor is None:
        # Don't use data augmentation in predictions
        config.dataset.data_augmentation = None

        dataset_class = get_dataset(config.dataset.type)
        model_class = get_model(config.model.type)
        dataset = dataset_class(config)
        model = model_class(config)

        graph = tf.Graph()
        session = tf.Session(graph=graph)

        with graph.as_default():
            image_tensor = tf.placeholder(tf.float32, (None, None, 3))
            image_tf, _, process_meta = dataset.preprocess(image_tensor)
            pred_dict = model(image_tf)

            # Restore checkpoint
            if config.train.job_dir:
                job_dir = config.train.job_dir
                if config.train.run_name:
                    job_dir = os.path.join(job_dir, config.train.run_name)
                ckpt = tf.train.get_checkpoint_state(job_dir)
                if not ckpt or not ckpt.all_model_checkpoint_paths:
                    raise ValueError(
                        'Could not find checkpoint in {}.'.format(job_dir))
                ckpt = ckpt.all_model_checkpoint_paths[-1]
                saver = tf.train.Saver(sharded=True, allow_empty=True)
                saver.restore(session, ckpt)
                tf.logging.info('Loaded checkpoint.')
            else:
                # A prediction without checkpoint is just used for testing
                tf.logging.warning(
                    'Could not load checkpoint. Using initialized model.')
                init_op = tf.group(tf.global_variables_initializer(),
                                   tf.local_variables_initializer())
                session.run(init_op)

            if config.model.network.with_rcnn:
                cls_prediction = pred_dict['classification_prediction']
                objects_tf = cls_prediction['objects']
                objects_labels_tf = cls_prediction['labels']
                objects_labels_prob_tf = cls_prediction['probs']
            else:
                rpn_prediction = pred_dict['rpn_prediction']
                objects_tf = rpn_prediction['proposals']
                objects_labels_prob_tf = rpn_prediction['scores']
                # All labels without RCNN are zero
                objects_labels_tf = tf.zeros(tf.shape(objects_labels_prob_tf),
                                             dtype=tf.int32)

            fetches = {
                'objects': objects_tf,
                'labels': objects_labels_tf,
                'probs': objects_labels_prob_tf,
                'scale_factor': process_meta['scale_factor']
            }

            # If in debug mode, return the full prediction dictionary.
            if config.train.debug:
                fetches['_debug'] = pred_dict

    elif session is None or fetches is None or image_tensor is None:
        raise ValueError(
            'Either all `session`, `fetches` and `image_tensor` are None, '
            'or neither of them are.')

    start_time = time.time()
    fetched = session.run(fetches, feed_dict={image_tensor: np.array(image)})
    end_time = time.time()

    objects = fetched['objects']
    objects_labels = fetched['labels']
    objects_labels_prob = fetched['probs']
    scale_factor = fetched['scale_factor']

    objects_labels = objects_labels.tolist()

    if class_labels is not None:
        objects_labels = [class_labels[obj] for obj in objects_labels]

    # Scale objects to original image dimensions
    objects /= scale_factor

    objects = objects.tolist()
    objects_labels_prob = objects_labels_prob.tolist()

    if total is not None:
        objects = objects[:total]
        objects_labels = objects_labels[:total]
        objects_labels_prob = objects_labels_prob[:total]

    res = {
        'objects': objects,
        'objects_labels': objects_labels,
        'objects_labels_prob': objects_labels_prob,
        'inference_time': end_time - start_time,
    }

    if return_tf_vars:
        res['image_tensor'] = image_tensor
        res['fetches'] = fetches
        res['session'] = session

    return res
Example #14
0
def train(job_id, service_account_json, bucket_name, region, config_files,
          dataset, scale_tier, master_type, worker_type, worker_count,
          parameter_server_type, parameter_server_count):

    project_id = get_project_id(service_account_json)
    if project_id is None:
        raise ValueError(
            'Missing "project_id" in service_account_json "{}"'.format(
                service_account_json))

    if bucket_name is None:
        client_id = get_client_id(service_account_json)
        bucket_name = 'luminoth-{}'.format(client_id)
        click.echo(
            'Bucket name not specified. Using "{}".'.format(bucket_name))

    credentials = get_credentials(service_account_json)
    validate_region(region, project_id, credentials)

    # Creates bucket for logs and models if it doesn't exist
    bucket = get_bucket(service_account_json, bucket_name)

    if not job_id:
        job_id = 'train_{}'.format(datetime.now().strftime("%Y%m%d_%H%M%S"))

    # Define path in bucket to store job's config, logs, etc.
    base_path = 'lumi_{}'.format(job_id)

    package_path = build_package(bucket, base_path)

    # Check if absolute or relative dataset path
    if not dataset.startswith('gs://'):
        dataset = 'gs://{}'.format(dataset)

    args = []

    args.extend([
        '-o',
        'dataset.dir={}'.format(dataset),
    ])

    override_params = [
        'dataset.dir={}'.format(dataset),
    ]

    custom_config = load_config(config_files)
    model_class = get_model(custom_config.model.type)
    config = get_model_config(
        model_class.base_config,
        custom_config,
        override_params,
    )
    # We should validate config before submitting job

    # Update final config file to job bucket
    config_path = os.path.join(base_path, DEFAULT_CONFIG_FILENAME)
    upload_data(bucket, config_path, dump_config(config))

    args = ['--config', 'gs://{}/{}'.format(bucket_name, config_path)]

    cloudml = cloud_service(credentials, 'ml')

    training_inputs = {
        'scaleTier': scale_tier,
        'packageUris': ['gs://{}/{}'.format(bucket_name, package_path)],
        'pythonModule': 'luminoth.train',
        'args': args,
        'region': region,
        'jobDir': 'gs://{}/{}/'.format(bucket_name, base_path),
        'runtimeVersion': RUNTIME_VERSION
    }

    if scale_tier == 'CUSTOM':
        training_inputs['masterType'] = master_type
        training_inputs['workerType'] = worker_type
        training_inputs['workerCount'] = worker_count
        if parameter_server_count > 0:
            training_inputs['parameterServerCount'] = parameter_server_count
            training_inputs['parameterServerType'] = parameter_server_type

    job_spec = {'jobId': job_id, 'trainingInput': training_inputs}

    jobrequest = cloudml.projects().jobs().create(
        body=job_spec, parent='projects/{}'.format(project_id))

    try:
        click.echo('Submitting training job.')
        res = jobrequest.execute()
        click.echo('Job {} submitted successfully.'.format(job_id))
        click.echo('state = {}, createTime = {}'.format(
            res.get('state'), res.get('createTime')))

        save_run(config, environment='gcloud', extra_config=job_spec)

    except Exception as err:
        click.echo('There was an error creating the training job. '
                   'Check the details: \n{}'.format(err._get_reason()))
def eval(dataset_split, config_files, watch, from_global_step,
         override_params, files_per_class, iou_threshold, min_probability):
    """Evaluate models using dataset."""

    # If the config file is empty, our config will be the base_config for the
    # default model.
    try:
        config = get_config(config_files, override_params=override_params)
    except KeyError:
        raise KeyError('model.type should be set on the custom config.')

    if not config.train.job_dir:
        raise KeyError('`job_dir` should be set.')
    if not config.train.run_name:
        raise KeyError('`run_name` should be set.')

    # `run_dir` is where the actual checkpoint and logs are located.
    run_dir = os.path.join(config.train.job_dir, config.train.run_name)

    # Only activate debug for if needed for debug visualization mode.
    if not config.train.debug:
        config.train.debug = config.eval.image_vis == 'debug'

    if config.train.debug or config.train.tf_debug:
        tf.logging.set_verbosity(tf.logging.DEBUG)
    else:
        tf.logging.set_verbosity(tf.logging.INFO)

    # Build the dataset tensors, overriding the default dataset split.
    config.dataset.split = dataset_split
    # Disable data augmentation.
    config.dataset.data_augmentation = []

    # Only a single run over the dataset to calculate metrics.
    config.train.num_epochs = 1

    if config.model.network.with_rcnn:
        config.model.rcnn.proposals.min_prob_threshold = min_probability
    else:
        config.model.rpn.proposals.min_prob_threshold = min_probability

    # Seed setup
    if config.train.seed:
        tf.set_random_seed(config.train.seed)

    # Set pretrained as not training
    config.model.base_network.trainable = False

    model_class = get_model(config.model.type)
    model = model_class(config)
    dataset_class = get_dataset(config.dataset.type)
    dataset = dataset_class(config)
    train_dataset = dataset()

    train_image = train_dataset['image']
    train_objects = train_dataset['bboxes']
    train_filename = train_dataset['filename']

    # Build the graph of the model to evaluate, retrieving required
    # intermediate tensors.
    prediction_dict = model(train_image, train_objects)

    if config.model.network.with_rcnn:
        pred = prediction_dict['classification_prediction']
        pred_objects = pred['objects']
        pred_objects_classes = pred['labels']
        pred_objects_scores = pred['probs']
    else:
        # Force the num_classes to 1
        config.model.network.num_classes = 1

        pred = prediction_dict['rpn_prediction']
        pred_objects = pred['proposals']
        pred_objects_scores = pred['scores']
        # When using only RPN all classes are 0.
        pred_objects_classes = tf.zeros(
            (tf.shape(pred_objects_scores)[0],), dtype=tf.int32
        )

    # Retrieve *all* the losses from the model and calculate their streaming
    # means, so we get the loss over the whole dataset.
    batch_losses = model.loss(prediction_dict, return_all=True)
    losses = {}
    for loss_name, loss_tensor in batch_losses.items():
        loss_mean, _ = tf.metrics.mean(
            loss_tensor, name=loss_name,
            metrics_collections='metrics',
            updates_collections='metric_ops',
        )
        full_loss_name = '{}_losses/{}'.format(dataset_split, loss_name)
        losses[full_loss_name] = loss_mean

    metric_ops = tf.get_collection('metric_ops')

    init_op = tf.group(
        tf.global_variables_initializer(),
        tf.local_variables_initializer()
    )

    # Using a global saver instead of the one for the model.
    saver = tf.train.Saver(sharded=True, allow_empty=True)

    # Aggregate the required ops to evaluate into a dict..
    ops = {
        'init_op': init_op,
        'metric_ops': metric_ops,
        'pred_objects': pred_objects,
        'pred_objects_classes': pred_objects_classes,
        'pred_objects_scores': pred_objects_scores,
        'train_objects': train_objects,
        'losses': losses,
        'prediction_dict': prediction_dict,
        'filename': train_filename,
        'train_image': train_image
    }

    metrics_scope = '{}_metrics'.format(dataset_split)

    # Use global writer for all checkpoints. We don't want to write different
    # files for each checkpoint.
    writer = tf.summary.FileWriter(run_dir)

    files_to_visualize = {}

    last_global_step = from_global_step
    while True:
        # Get the checkpoint files to evaluate.
        try:
            checkpoints = get_checkpoints(
                run_dir, last_global_step, last_only=not watch
            )
        except ValueError as e:
            if not watch:
                tf.logging.error('Missing checkpoint.')
                raise e

            tf.logging.warning(
                'Missing checkpoint; Checking again in a moment')
            time.sleep(5)
            continue

        for checkpoint in checkpoints:
            # Always returned in order, so it's safe to assign directly.
            tf.logging.info(
                'Evaluating global_step {} using checkpoint \'{}\''.format(
                    checkpoint['global_step'], checkpoint['file']
                )
            )
            try:
                start = time.time()
                evaluate_once(
                    config, writer, saver, ops, checkpoint,
                    metrics_scope=metrics_scope,
                    image_vis=config.eval.image_vis,
                    files_per_class=files_per_class,
                    files_to_visualize=files_to_visualize,
                    iou_threshold=iou_threshold,
                    min_probability=min_probability
                )
                last_global_step = checkpoint['global_step']
                tf.logging.info('Evaluated in {:.2f}s'.format(
                    time.time() - start
                ))
            except tf.errors.NotFoundError:
                # The checkpoint is not ready yet. It was written in the
                # checkpoints file, but it still hasn't been completely saved.
                tf.logging.info(
                    'Checkpoint {} is not ready yet. '
                    'Checking again in a moment.'.format(
                        checkpoint['file']
                    )
                )
                time.sleep(5)
                continue

        # If no watching was requested, finish the execution.
        if not watch:
            return

        # Sleep for a moment and check for new checkpoints.
        tf.logging.info('All checkpoints evaluated; sleeping for a moment')
        time.sleep(5)
Example #16
0
def get_prediction(model_type, image, config_file, session=None,
                   prediction_dict=None, image_tensor=None,
                   return_tf_vars=False):
    """
    Gets the prediction given by the model `model_type` of the image `image`.
    If a checkpoint exists in the job's directory, load it.
    The names of the classes will be obtained from the dataset directory.
    Returns a dictionary with the objects, their labels and probabilities,
    the inference time and the scale factor. Also if the `return_tf_vars` is
    True, returns the image tensor, the entire prediction of the model and
    the sesssion.
    """
    model_class = get_model(model_type)
    config = get_model_config(
        model_class.base_config, config_file, None
    )

    if session is None or prediction_dict is None or image_tensor is None:
        graph = tf.Graph()
        session = tf.Session(graph=graph)

        with graph.as_default():
            image_tensor = tf.placeholder(tf.float32, (1, None, None, 3))
            model = model_class(model_class.base_config)
            prediction_dict = model(image_tensor)

            # Restore checkpoint
            if config.train.job_dir and config.train.run_name:
                ckpt = tf.train.get_checkpoint_state(os.path.join(
                    config.train.job_dir, config.train.run_name))
                if not ckpt or not ckpt.all_model_checkpoint_paths:
                    raise ValueError('Could not find checkpoint in {}.'.format(
                        config.train.job_dir
                    ))
                ckpt = ckpt.all_model_checkpoint_paths[-1]
                ckpt_dir = os.path.join('.', ckpt)
                saver = tf.train.Saver(sharded=True, allow_empty=True)
                saver.restore(session, ckpt_dir)
            # A prediction without checkpoint is just used for testing
            else:
                init_op = tf.group(
                    tf.global_variables_initializer(),
                    tf.local_variables_initializer()
                )
                session.run(init_op)

    classification_prediction = prediction_dict['classification_prediction']
    objects_tf = classification_prediction['objects']
    objects_labels_tf = classification_prediction['labels']
    objects_labels_prob_tf = classification_prediction['probs']
    image_resize_config = model_class.base_config.dataset.image_preprocessing

    image_array, scale_factor = resize_image(
        image, float(image_resize_config.min_size),
        float(image_resize_config.max_size)
    )

    start_time = time.time()
    objects, objects_labels, objects_labels_prob = session.run([
        objects_tf, objects_labels_tf, objects_labels_prob_tf
    ], feed_dict={
        image_tensor: image_array
    })
    end_time = time.time()

    if config.dataset.dir:
        # Gets the names of the classes
        classes_file = os.path.join(config.dataset.dir, 'classes.json')
        class_labels = json.load(tf.gfile.GFile(classes_file))
        objects_labels = [class_labels[obj] for obj in objects_labels]

    else:
        objects_labels = objects_labels.tolist()

    res = {
        'objects': objects.tolist(),
        'objects_labels': objects_labels,
        'objects_labels_prob': objects_labels_prob.tolist(),
        'inference_time': end_time - start_time,
        'scale_factor': scale_factor,
    }

    if return_tf_vars:
        res['image_tensor'] = image_tensor
        res['prediction_dict'] = prediction_dict
        res['session'] = session

    return res
Example #17
0
    def __init__(self, config):

        if config.dataset.dir:
            # Gets the names of the classes
            classes_file = os.path.join(config.dataset.dir, 'classes.json')
            if tf.gfile.Exists(classes_file):
                self.class_labels = json.load(tf.gfile.GFile(classes_file))
            else:
                self.class_labels = None

        # Don't use data augmentation in predictions
        config.dataset.data_augmentation = None

        dataset_class = get_dataset(config.dataset.type)
        model_class = get_model(config.model.type)
        dataset = dataset_class(config)
        model = model_class(config)

        graph = tf.Graph()
        tf_config = tf.ConfigProto()
        tf_config.gpu_options.allow_growth = True
        self.session = tf.Session(config=tf_config, graph=graph)

        with graph.as_default():
            self.image_placeholder = tf.placeholder(
                tf.float32, (None, None, 3)
            )
            image_tf, _, process_meta = dataset.preprocess(
                self.image_placeholder
            )
            pred_dict = model(image_tf)

            # Restore checkpoint
            if config.train.job_dir:
                job_dir = config.train.job_dir
                if config.train.run_name:
                    job_dir = os.path.join(job_dir, config.train.run_name)
                ckpt = tf.train.get_checkpoint_state(job_dir)
                if not ckpt or not ckpt.all_model_checkpoint_paths:
                    raise ValueError('Could not find checkpoint in {}.'.format(
                        job_dir
                    ))
                ckpt = ckpt.all_model_checkpoint_paths[-1]
                saver = tf.train.Saver(sharded=True, allow_empty=True)
                saver.restore(self.session, ckpt)
                tf.logging.info('Loaded checkpoint.')
            else:
                # A prediction without checkpoint is just used for testing
                tf.logging.warning(
                    'Could not load checkpoint. Using initialized model.')
                init_op = tf.group(
                    tf.global_variables_initializer(),
                    tf.local_variables_initializer()
                )
                self.session.run(init_op)

            if config.model.type == 'ssd':
                cls_prediction = pred_dict['classification_prediction']
                objects_tf = cls_prediction['objects']
                objects_labels_tf = cls_prediction['labels']
                objects_labels_prob_tf = cls_prediction['probs']
            elif config.model.type == 'fasterrcnn':
                if config.model.network.get('with_rcnn', False):
                    cls_prediction = pred_dict['classification_prediction']
                    objects_tf = cls_prediction['objects']
                    objects_labels_tf = cls_prediction['labels']
                    objects_labels_prob_tf = cls_prediction['probs']
                else:
                    rpn_prediction = pred_dict['rpn_prediction']
                    objects_tf = rpn_prediction['proposals']
                    objects_labels_prob_tf = rpn_prediction['scores']
                    # All labels without RCNN are zero
                    objects_labels_tf = tf.zeros(
                        tf.shape(objects_labels_prob_tf), dtype=tf.int32
                    )
            else:
                raise ValueError(
                    "Model type '{}' not supported".format(config.model.type)
                )

            self.fetches = {
                'objects': objects_tf,
                'labels': objects_labels_tf,
                'probs': objects_labels_prob_tf,
                'scale_factor': process_meta['scale_factor']
            }

            # If in debug mode, return the full prediction dictionary.
            if config.train.debug:
                self.fetches['_debug'] = pred_dict
Example #18
0
def detect_tile_nuclei(slide_path, tile_position, args, it_kwargs,
                       src_mu_lab=None, src_sigma_lab=None, debug=False):

    # =========================================================================
    # ======================= Tile Loading ====================================
    # =========================================================================
    print('\n>> Loading Tile ... \n')

    csv_dict = {}

    csv_dict['PreparationTime'] = []
    csv_dict['ColorDeconvTime'] = []
    csv_dict['TotalTileLoadingTime'] = []

    csv_dict['CKPTLoadingTime'] = []
    csv_dict['ModelInfernceTime'] = []
    csv_dict['DetectionTime'] = []

    csv_dict['ROIShape'] = []
    csv_dict['ObjectsDict'] = []
    csv_dict['NumObjects'] = []

    csv_dict['AnnotationWritingTime'] = []

    csv_dict['AnnotationDict'] = []
    csv_dict['AnalysisDict'] = []

    start_time = time.time()
    total_tileloading_start_time = time.time()

    ts = large_image.getTileSource(slide_path)
    tile_info = ts.getSingleTile(
        tile_position=tile_position,
        format=large_image.tilesource.TILE_FORMAT_NUMPY,
        **it_kwargs)
    im_tile = tile_info['tile'][:, :, :3]
    csv_dict['ROIShape'] = im_tile.shape[:2]

    prep_time = time.time() - start_time
    csv_dict['PreparationTime'] = round(prep_time, 3)

    # =========================================================================
    # =================Img Normalization & Color Deconv========================
    # =========================================================================
    print('\n>> Color Deconvolving ... \n')
    start_time = time.time()

    im_nmzd = htk_cnorm.reinhard(
        im_tile,
        REFERENCE_MU_LAB,
        REFERENCE_STD_LAB,
        src_mu=src_mu_lab,
        src_sigma=src_sigma_lab
    )

    # perform color decovolution
    if args.deconv_method == 'ruifrok':

        w = cli_utils.get_stain_matrix(args)
        im_stains = htk_cdeconv.color_deconvolution(
            im_nmzd, w).Stains.astype(np.float)[:, :, :2]

    elif args.deconv_method == 'macenko':

        w_est = htk_cdeconv.rgb_separate_stains_macenko_pca(im_tile, 255)
        im_stains = htk_cdeconv.color_deconvolution(
            im_tile, w_est, 255).Stains.astype(np.float)
        ch1 = htk_cdeconv.find_stain_index(
            htk_cdeconv.stain_color_map[args.stain_1], w_est)
        ch2 = htk_cdeconv.find_stain_index(
            htk_cdeconv.stain_color_map[args.stain_2], w_est)
        im_stains = im_stains[:, :, [ch1, ch2]]

    else:

        raise ValueError('Invalid deconvolution method parameter.')

    # =========================================================================
    # ====================== Fuse the stain1 & stain2 pix======================
    # =========================================================================

    # compute nuclear foreground mask
    im_fgnd_mask_stain_1 = im_stains[
        :, :, 0] < threshold_yen(im_stains[:, :, 0])
    im_fgnd_mask_stain_2 = im_stains[
        :, :, 1] < threshold_yen(im_stains[:, :, 1])
    im_fgnd_seg_mask = im_fgnd_mask_stain_1 | im_fgnd_mask_stain_2

    # segment nuclei
    im_nuc_det_input = np.squeeze(np.min(im_stains[:, :, :2], axis=2))
    print('---> Fusing 2 Stains')
    deconv_time = time.time() - start_time
    csv_dict['ColorDeconvTime'] = round(deconv_time, 3)

    # =========================================================================
    # ================= Nuclie Detection Deep Learning Block ==================
    # =========================================================================

    total_tileloading_time = time.time() - total_tileloading_start_time
    csv_dict['TotalTileLoadingTime'] = round(total_tileloading_time, 3)

    start_time = time.time()

    config = get_config(CONFIG)
    config.model.rcnn.proposals.total_max_detections = args.max_det
    config.model.rcnn.proposals.min_prob_threshold = args.min_prob
    im_nuc_det_input = np.stack((im_nuc_det_input,) * 3, axis=-1)

    # ====================================================================================================================================
    tf.reset_default_graph()

    dataset_class = get_dataset('object_detection')
    model_class = get_model('fasterrcnn')
    dataset = dataset_class(config)
    model = model_class(config)

    graph = tf.Graph()
    session = tf.Session(graph=graph)

    with graph.as_default():
        image_placeholder = tf.placeholder(
            tf.float32, (None, None, 3), name='Input_Placeholder'
        )
        pred_dict = model(image_placeholder)

        ckpt_loading_start_time = time.time()

        saver = tf.train.Saver(sharded=True, allow_empty=True)
        saver.restore(session, CKPT_DIR)
        tf.logging.info('Loaded checkpoint.')

        ckpt_loading_time = time.time() - ckpt_loading_start_time
        csv_dict['CKPTLoadingTime'] = round(ckpt_loading_time, 3)

        inference_start_time = time.time()

        cls_prediction = pred_dict['classification_prediction']
        objects_tf = cls_prediction['objects']
        objects_labels_tf = cls_prediction['labels']
        objects_labels_prob_tf = cls_prediction['probs']

        fetches = {
            'objects': objects_tf,
            'labels': objects_labels_tf,
            'probs': objects_labels_prob_tf,
        }

        fetched = session.run(fetches, feed_dict={
            image_placeholder: np.array(im_nuc_det_input)
        })

        inference_time = time.time() - inference_start_time
        csv_dict['ModelInfernceTime'] = round(inference_time, 3)

        objects = fetched['objects']
        labels = fetched['labels'].tolist()
        probs = fetched['probs'].tolist()

        # Cast to int to consistently return the same type in Python 2 and 3
        objects = [
            [int(round(coord)) for coord in obj]
            for obj in objects.tolist()
        ]

        predictions = sorted([
            {
                'bbox': obj,
                'label': label,
                'prob': round(prob, 4),
            } for obj, label, prob in zip(objects, labels, probs)
        ], key=lambda x: x['prob'], reverse=True)

    print('\n>> Finishing Detection ... \n')
    print('***** Number of Detected Cells ****** : ', len(predictions))
    detection_time = time.time() - start_time
    csv_dict['DetectionTime'] = round(detection_time, 3)
    csv_dict['NumObjects'] = len(predictions)
    csv_dict['ObjectsDict'] = predictions

    # =========================================================================
    # ======================= TODO: Implement border deletion =================
    # =========================================================================

    # =========================================================================
    # ======================= Write Annotations ===============================
    # =========================================================================

    start_time = time.time()

    objects_df = pd.DataFrame(objects)
    formatted_annot_list,\
        formatter_analysis_list = cli_utils.convert_preds_to_utilformat(
            objects_df,
            probs,
            args.ignore_border_nuclei,
            im_tile_size=args.analysis_tile_size)

    nuclei_annot_list = cli_utils.create_tile_nuclei_annotations(
        formatted_annot_list, tile_info, args.nuclei_annotation_format)
    csv_dict['AnnotationDict'] = nuclei_annot_list

    csv_dict['AnalysisDict'] = formatter_analysis_list

    num_nuclei = len(nuclei_annot_list)

    anot_time = time.time() - start_time
    csv_dict['AnnotationWritingTime'] = round(anot_time, 3)

    return csv_dict