def main(args):
    """ main """

    if not os.path.exists(args.test_ds):
        print("{} does not exists".format(args.test_ds))
        return 1

    # export model.pb from session dir. Skip if model.pb already exists
    model.export(train.NUM_CLASSES, train.SESSION_DIR, "model-best-0",
                 train.MODEL_PATH)

    results_dir = "{}/results".format(
        os.path.dirname(os.path.abspath(__file__)))
    files = {
        label:
        open(results_dir + "/VOC2012/Main/comp3_det_test_{}.txt".format(label),
             "w")
        for label in pascal.CLASSES
    }

    graph = model.load(train.MODEL_PATH, args.device)
    with graph.as_default():
        # (?, n, n, NUM_CLASSES) tensor
        logits = graph.get_tensor_by_name(model.OUTPUT_TENSOR_NAME + ":0")
        images_ = graph.get_tensor_by_name(model.INPUT_TENSOR_NAME + ":0")
        # each cell in coords (batch_position, i, j) -> is a probability vector
        per_region_probabilities = tf.nn.softmax(
            tf.reshape(logits, [-1, train.NUM_CLASSES]))
        # [tested positions, train.NUM_CLASSES]

        # array[0]=values, [1]=indices
        # get every probabiliy, because we can use localization to do classification
        top_k = tf.nn.top_k(per_region_probabilities, k=train.NUM_CLASSES)
        # each with shape [tested_positions, k]

        k = 2
        input_side = model.INPUT_SIDE + model.DOWNSAMPLING_FACTOR * model.LAST_CONV_INPUT_STRIDE * k

        test_queue, test_filename_queue = pascal.test(
            args.test_ds, 29, input_side,
            args.test_ds + "/ImageSets/Main/test.txt")

        init_op = tf.group(tf.global_variables_initializer(),
                           tf.initialize_local_variables())

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:

            sess.run(init_op)
            coordinator = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess,
                                                   coord=coordinator)

            try:
                processed = 0
                while not coordinator.should_stop():
                    image_batch, filename_batch = sess.run(
                        [test_queue, test_filename_queue])

                    probability_map, top_values, top_indices = sess.run(
                        [logits, top_k[0], top_k[1]],
                        feed_dict={images_: image_batch})

                    # let's think to the net as a big net, with the last layer (before the FC
                    # layers for classification) with a receptive field of
                    # LAST_KERNEL_SIDE x LAST_KERNEL_SIDE. Lets approximate the net with this last kernel:
                    # If the image is scaled down to  LAST_KERNEL_SIDE x LAST_KERNEL_SIDE
                    # the output is a single point.
                    # if the image is scaled down to something bigger
                    # (that make the output side of contolution integer) the result is a spacial map
                    # of points. Every point has a depth of num classes.

                    # for every image in the input batch
                    probability_coords = 0
                    for batch_elem_id in range(len(image_batch)):
                        # scaling factor between original image and resized image
                        decoded_filename = filename_batch[
                            batch_elem_id].decode("utf-8")

                        image = sess.run(
                            image_processing.read_image_jpg(args.test_ds +
                                                            "/JPEGImages/" +
                                                            decoded_filename +
                                                            ".jpg"))
                        full_image_scaling_factors = np.array([
                            image.shape[1] / input_side,
                            image.shape[0] / input_side
                        ])

                        glance = defaultdict(list)

                        group = defaultdict(lambda: defaultdict(float))
                        for pmap_y in range(probability_map.shape[1]):
                            # calculate position in the downsampled image ds
                            ds_y = pmap_y * model.LAST_CONV_OUTPUT_STRIDE
                            for pmap_x in range(probability_map.shape[2]):
                                ds_x = pmap_x * model.LAST_CONV_OUTPUT_STRIDE

                                if top_indices[probability_coords][
                                        0] != pascal.BACKGROUND_CLASS_ID:

                                    # create coordinates of rect in the downsampled image
                                    # convert to numpy array in order to use broadcast ops
                                    coord = [
                                        ds_x, ds_y,
                                        ds_x + model.LAST_KERNEL_SIDE,
                                        ds_y + model.LAST_KERNEL_SIDE
                                    ]
                                    # if something is found, append rectagle to the
                                    # map of rectalges per class
                                    rect = utils.upsample_and_shift(
                                        coord, model.DOWNSAMPLING_FACTOR,
                                        [0, 0], full_image_scaling_factors)

                                    prob = top_values[probability_coords][0]
                                    label = pascal.CLASSES[
                                        top_indices[probability_coords][0]]

                                    rect_prob = [rect, prob]
                                    glance[label].append(rect_prob)
                                    group[label]["count"] += 1
                                    group[label]["prob"] += prob

                                # update probability coord value
                                probability_coords += 1

                        classes = group.keys()
                        print('Found {} classes: {}'.format(
                            len(classes), classes))

                        # find out the minimum amount of intersection among regions
                        # in the original image, that can be used to trigger a match
                        # or 2, is s square. 0 dim is batch
                        map_side = probability_map.shape[1]
                        map_area = map_side**2

                        min_intersection = map_side

                        # Save the relative frequency for every class
                        # To trigger a match, at least a fraction of intersection should be present
                        for label in group:
                            group[label]["prob"] /= group[label]["count"]
                            group[label][
                                "rf"] = group[label]["count"] / map_area

                        # merge overlapping rectangles for each class.
                        # return a map of {"label": [rect, prob, count]
                        localize = utils.group_overlapping_regions(
                            glance, eps=RECT_SIMILARITY)

                        detected_labels = set()
                        for label, rect_prob_list in localize.items():
                            for rect_prob in rect_prob_list:
                                count = rect_prob[2]
                                freq = group[label]["rf"]
                                if count >= min_intersection and freq > 0.1:
                                    detected_labels.add(label)
                                    confidence = rect_prob[1]
                                    rect = rect_prob[0]
                                    left = rect[0]
                                    top = rect[1]
                                    right = rect[2]
                                    bottom = rect[3]
                                    files[label].write(
                                        "{} {} {} {} {} {}\n".format(
                                            decoded_filename, confidence, left,
                                            top, right, bottom))

                        processed += 1

            except tf.errors.OutOfRangeError:
                print("[I] Done. Test completed!")
                print("Processed {} images".format(processed))
            finally:
                coordinator.request_stop()

            coordinator.join(threads)

        for label in files:
            files[label].close()
Exemple #2
0
def main(args):
    """ main """

    if not os.path.exists(args.image_path):
        print("{} does not exists".format(args.image_path))
        return 1

    # export model.pb from session dir. Skip if model.pb already exists
    model.export(train.NUM_CLASSES, train.SESSION_DIR, "model-best-0",
                 train.MODEL_PATH)

    graph = model.load(train.MODEL_PATH, args.device)
    with graph.as_default():
        # (?, n, n, NUM_CLASSES) tensor
        logits = graph.get_tensor_by_name(model.OUTPUT_TENSOR_NAME + ":0")
        images_ = graph.get_tensor_by_name(model.INPUT_TENSOR_NAME + ":0")
        # each cell in coords (batch_position, i, j) -> is a probability vector
        per_region_probabilities = tf.nn.softmax(
            tf.reshape(logits, [-1, train.NUM_CLASSES]))
        # [tested positions, train.NUM_CLASSES]

        # array[0]=values, [1]=indices
        # get every probabiliy, because we can use localization to do classification
        top_k = tf.nn.top_k(per_region_probabilities, k=train.NUM_CLASSES)
        # each with shape [tested_positions, k]

        original_image = tf.image.convert_image_dtype(
            image_processing.read_image(
                tf.constant(args.image_path), 3,
                args.image_path.split('.')[-1]),
            dtype=tf.uint8)
        original_image_dim = tf.shape(original_image)

        k = 2
        eval_image_side = tf.cond(
            tf.less_equal(
                tf.minimum(original_image_dim[0], original_image_dim[1]),
                tf.constant(model.INPUT_SIDE)),
            lambda: tf.constant(model.INPUT_SIDE),
            lambda: tf.constant(model.INPUT_SIDE + model.DOWNSAMPLING_FACTOR * model.LAST_CONV_INPUT_STRIDE * k)
        )

        eval_image = tf.expand_dims(
            image_processing.zm_mp(
                image_processing.resize_bl(original_image, eval_image_side)), 0)

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:

            input_image, input_image_side, image = sess.run(
                [eval_image, eval_image_side, original_image])

            start = time.time()
            probability_map, top_values, top_indices = sess.run(
                [logits, top_k[0], top_k[1]], feed_dict={images_: input_image})

            # let's think to the net as a big net, with the last layer (before the FC
            # layers for classification) with a receptive field of
            # LAST_KERNEL_SIDE x LAST_KERNEL_SIDE. Lets approximate the net with this last kernel:
            # If the image is scaled down to  LAST_KERNEL_SIDE x LAST_KERNEL_SIDE
            # the output is a single point.
            # if the image is scaled down to something bigger
            # (that make the output side of contolution integer) the result is a spacial map
            # of points. Every point has a depth of num classes.

            # for every image in the input batch
            probability_coords = 0
            for _ in range(len(input_image)):
                # scaling factor between original image and resized image
                full_image_scaling_factors = np.array([
                    image.shape[1] / input_image_side,
                    image.shape[0] / input_image_side
                ])

                glance = defaultdict(list)
                # select count(*), avg(prob) from map group by label, order by count, avg.
                group = defaultdict(lambda: defaultdict(float))
                for pmap_y in range(probability_map.shape[1]):
                    # calculate position in the downsampled image ds
                    ds_y = pmap_y * model.LAST_CONV_OUTPUT_STRIDE
                    for pmap_x in range(probability_map.shape[2]):
                        ds_x = pmap_x * model.LAST_CONV_OUTPUT_STRIDE

                        if top_indices[probability_coords][
                                0] != pascal.BACKGROUND_CLASS_ID:

                            # create coordinates of rect in the downsampled image
                            # convert to numpy array in order to use broadcast ops
                            coord = [
                                ds_x, ds_y, ds_x + model.LAST_KERNEL_SIDE,
                                ds_y + model.LAST_KERNEL_SIDE
                            ]
                            # if something is found, append rectagle to the
                            # map of rectalges per class
                            rect = utils.upsample_and_shift(
                                coord, model.DOWNSAMPLING_FACTOR, [0, 0],
                                full_image_scaling_factors)

                            prob = top_values[probability_coords][0]
                            label = pascal.CLASSES[top_indices[
                                probability_coords][0]]

                            rect_prob = [rect, prob]
                            glance[label].append(rect_prob)
                            group[label]["count"] += 1
                            group[label]["prob"] += prob

                        # update probability coord value
                        probability_coords += 1

                classes = group.keys()
                print('Found {} classes: {}'.format(len(classes), classes))

                # find out the minimum amount of intersection among regions
                # in the original image, that can be used to trigger a match
                # or 2, is s square. 0 dim is batch
                map_side = probability_map.shape[1]
                map_area = map_side**2

                min_intersection = map_side
                print('min intersection: ', min_intersection)

                # Save the relative frequency for every class
                # To trigger a match, at least a fraction of intersection should be present
                for label in group:
                    group[label]["prob"] /= group[label]["count"]
                    group[label]["rf"] = group[label]["count"] / map_area
                    print(label, group[label])

                # merge overlapping rectangles for each class.
                # return a map of {"label": [rect, prob, count]
                localize = utils.group_overlapping_regions(
                    glance, eps=RECT_SIMILARITY)
                end_time = time.time() - start
                print("time: {}".format(end_time))

                # now I can convert RGB to BGR to display image with OpenCV
                # I can't do that before, because ROIs gets extracted on RGB image
                # in order to be processed without errors by Tensorflow
                image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
                for label, rect_prob_list in localize.items():
                    for rect_prob in rect_prob_list:
                        rect = rect_prob[0]
                        prob = rect_prob[1]
                        count = rect_prob[2]
                        freq = group[label]["rf"]
                        if count >= min_intersection and freq > 0.1:
                            utils.draw_box(
                                image,
                                rect,
                                "{}({:.3})".format(label, prob),
                                utils.LABEL_COLORS[label],
                                thickness=2)

                cv2.imshow("img", image)
                cv2.waitKey(0)
                return 0
def main(args):
    """ main """

    if not os.path.exists(args.test_ds):
        print("{} does not exists".format(args.test_ds))
        return 1

    # export model.pb from session dir. Skip if model.pb already exists
    model.export(train.NUM_CLASSES, train.SESSION_DIR, "model-0",
                 train.MODEL_PATH)

    graph = model.load(train.MODEL_PATH, args.device)
    with graph.as_default():
        logits = graph.get_tensor_by_name(model.OUTPUT_TENSOR_NAME + ":0")
        logits = tf.squeeze(logits, [1, 2])

        # sparse labels, pgnet output -> 20 possible values
        labels_ = tf.placeholder(tf.int64, [None])

        predicted_labels = tf.argmax(logits, 1)

        top_1_op = tf.nn.in_top_k(logits, labels_, 1)
        top_5_op = tf.nn.in_top_k(logits, labels_, 5)

        image_queue, label_queue = pascifar.test(args.test_ds, BATCH_SIZE,
                                                 model.INPUT_SIDE,
                                                 args.test_ds + "/ts.csv")

        # initialize all variables
        init_op = tf.group(tf.global_variables_initializer(),
                           tf.initialize_local_variables())

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            sess.run(init_op)

            # Start input enqueue threads.
            print("Starting input enqueue threads. Please wait...")
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)

            try:
                count_top_1 = 0.0
                count_top_5 = 0.0
                processed = 0
                while not coord.should_stop():
                    image_batch, label_batch = sess.run(
                        [image_queue, label_queue])
                    print(label_batch)

                    top_1, top_5, pred_lab = sess.run(
                        [top_1_op, top_5_op, predicted_labels],
                        feed_dict={
                            "images_:0": image_batch,
                            labels_: label_batch,
                        })
                    count_top_1 += np.sum(top_1)
                    count_top_5 += np.sum(top_5)
                    processed += 1
                    print(pred_lab)
                    print(label_batch)
                    print(top_1, top_5)

            except tf.errors.OutOfRangeError:
                total_sample_count = processed * BATCH_SIZE
                precision_at_1 = count_top_1 / total_sample_count
                recall_at_5 = count_top_5 / total_sample_count

                print(
                    'precision @ 1 = {} recall @ 5 = {} [{} examples]'.format(
                        precision_at_1, recall_at_5, total_sample_count))
            finally:
                # When done, ask the threads to stop.
                coord.request_stop()

            # Wait for threads to finish.
            coord.join(threads)
def main(args):
    """ main """

    if not os.path.exists(args.test_ds):
        print("{} does not exists".format(args.test_ds))
        return 1

    # export model.pb from session dir. Skip if model.pb already exists
    model.export(train.NUM_CLASSES, train.SESSION_DIR, "model-best-0",
                 train.MODEL_PATH)

    results_dir = "{}/results".format(
        os.path.dirname(os.path.abspath(__file__)))
    files = {
        label:
        open(results_dir + "/VOC2012/Main/comp3_det_test_{}.txt".format(label),
             "w")
        for label in pascal.CLASSES
    }

    graph = model.load(train.MODEL_PATH, args.device)
    with graph.as_default():
        # (?, n, n, NUM_CLASSES) tensor
        logits = graph.get_tensor_by_name(model.OUTPUT_TENSOR_NAME + ":0")
        images_ = graph.get_tensor_by_name(model.INPUT_TENSOR_NAME + ":0")
        # each cell in coords (batch_position, i, j) -> is a probability vector
        per_region_probabilities = tf.nn.softmax(
            tf.reshape(logits, [-1, train.NUM_CLASSES]))
        # [tested positions, train.NUM_CLASSES]

        # array[0]=values, [1]=indices
        # get every probabiliy, because we can use localization to do classification
        top_k = tf.nn.top_k(per_region_probabilities, k=train.NUM_CLASSES)
        # each with shape [tested_positions, k]

        k = 2
        input_side = model.INPUT_SIDE + model.DOWNSAMPLING_FACTOR * model.LAST_CONV_INPUT_STRIDE * k

        test_queue, test_filename_queue = pascal.test(
            args.test_ds, 29, input_side,
            args.test_ds + "/ImageSets/Main/test.txt")

        init_op = tf.group(tf.global_variables_initializer(),
                           tf.initialize_local_variables())

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:

            sess.run(init_op)
            coordinator = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coordinator)

            try:
                processed = 0
                while not coordinator.should_stop():
                    image_batch, filename_batch = sess.run(
                        [test_queue, test_filename_queue])

                    probability_map, top_values, top_indices = sess.run(
                        [logits, top_k[0], top_k[1]],
                        feed_dict={images_: image_batch})

                    # let's think to the net as a big net, with the last layer (before the FC
                    # layers for classification) with a receptive field of
                    # LAST_KERNEL_SIDE x LAST_KERNEL_SIDE. Lets approximate the net with this last kernel:
                    # If the image is scaled down to  LAST_KERNEL_SIDE x LAST_KERNEL_SIDE
                    # the output is a single point.
                    # if the image is scaled down to something bigger
                    # (that make the output side of contolution integer) the result is a spacial map
                    # of points. Every point has a depth of num classes.

                    # for every image in the input batch
                    probability_coords = 0
                    for batch_elem_id in range(len(image_batch)):
                        # scaling factor between original image and resized image
                        decoded_filename = filename_batch[batch_elem_id].decode(
                            "utf-8")

                        image = sess.run(
                            image_processing.read_image_jpg(
                                args.test_ds + "/JPEGImages/" + decoded_filename
                                + ".jpg"))
                        full_image_scaling_factors = np.array([
                            image.shape[1] / input_side,
                            image.shape[0] / input_side
                        ])

                        glance = defaultdict(list)

                        group = defaultdict(lambda: defaultdict(float))
                        for pmap_y in range(probability_map.shape[1]):
                            # calculate position in the downsampled image ds
                            ds_y = pmap_y * model.LAST_CONV_OUTPUT_STRIDE
                            for pmap_x in range(probability_map.shape[2]):
                                ds_x = pmap_x * model.LAST_CONV_OUTPUT_STRIDE

                                if top_indices[probability_coords][
                                        0] != pascal.BACKGROUND_CLASS_ID:

                                    # create coordinates of rect in the downsampled image
                                    # convert to numpy array in order to use broadcast ops
                                    coord = [
                                        ds_x, ds_y,
                                        ds_x + model.LAST_KERNEL_SIDE,
                                        ds_y + model.LAST_KERNEL_SIDE
                                    ]
                                    # if something is found, append rectagle to the
                                    # map of rectalges per class
                                    rect = utils.upsample_and_shift(
                                        coord, model.DOWNSAMPLING_FACTOR,
                                        [0, 0], full_image_scaling_factors)

                                    prob = top_values[probability_coords][0]
                                    label = pascal.CLASSES[top_indices[
                                        probability_coords][0]]

                                    rect_prob = [rect, prob]
                                    glance[label].append(rect_prob)
                                    group[label]["count"] += 1
                                    group[label]["prob"] += prob

                                # update probability coord value
                                probability_coords += 1

                        classes = group.keys()
                        print('Found {} classes: {}'.format(
                            len(classes), classes))

                        # find out the minimum amount of intersection among regions
                        # in the original image, that can be used to trigger a match
                        # or 2, is s square. 0 dim is batch
                        map_side = probability_map.shape[1]
                        map_area = map_side**2

                        min_intersection = map_side

                        # Save the relative frequency for every class
                        # To trigger a match, at least a fraction of intersection should be present
                        for label in group:
                            group[label]["prob"] /= group[label]["count"]
                            group[label]["rf"] = group[label][
                                "count"] / map_area

                        # merge overlapping rectangles for each class.
                        # return a map of {"label": [rect, prob, count]
                        localize = utils.group_overlapping_regions(
                            glance, eps=RECT_SIMILARITY)

                        detected_labels = set()
                        for label, rect_prob_list in localize.items():
                            for rect_prob in rect_prob_list:
                                count = rect_prob[2]
                                freq = group[label]["rf"]
                                if count >= min_intersection and freq > 0.1:
                                    detected_labels.add(label)
                                    confidence = rect_prob[1]
                                    rect = rect_prob[0]
                                    left = rect[0]
                                    top = rect[1]
                                    right = rect[2]
                                    bottom = rect[3]
                                    files[label].write(
                                        "{} {} {} {} {} {}\n".format(
                                            decoded_filename, confidence, left,
                                            top, right, bottom))

                        processed += 1

            except tf.errors.OutOfRangeError:
                print("[I] Done. Test completed!")
                print("Processed {} images".format(processed))
            finally:
                coordinator.request_stop()

            coordinator.join(threads)

        for label in files:
            files[label].close()
def main(args):
    """ main """

    if not os.path.exists(args.image_path):
        print("{} does not exists".format(args.image_path))
        return 1

    # export model.pb from session dir. Skip if model.pb already exists
    model.export(train.NUM_CLASSES, train.SESSION_DIR, "model-best-0",
                 train.MODEL_PATH)

    graph = model.load(train.MODEL_PATH, args.device)
    with graph.as_default():
        # (?, n, n, NUM_CLASSES) tensor
        logits = graph.get_tensor_by_name(model.OUTPUT_TENSOR_NAME + ":0")
        images_ = graph.get_tensor_by_name(model.INPUT_TENSOR_NAME + ":0")
        # each cell in coords (batch_position, i, j) -> is a probability vector
        per_region_probabilities = tf.nn.softmax(
            tf.reshape(logits, [-1, train.NUM_CLASSES]))
        # [tested positions, train.NUM_CLASSES]

        # array[0]=values, [1]=indices
        # get every probabiliy, because we can use localization to do classification
        top_k = tf.nn.top_k(per_region_probabilities, k=train.NUM_CLASSES)
        # each with shape [tested_positions, k]

        original_image = tf.image.convert_image_dtype(
            image_processing.read_image(
                tf.constant(args.image_path), 3,
                args.image_path.split('.')[-1]),
            dtype=tf.uint8)

        original_image_dim = tf.shape(original_image)

        k = 2
        eval_image_side = tf.cond(
            tf.less_equal(
                tf.minimum(original_image_dim[0], original_image_dim[1]),
                tf.constant(model.INPUT_SIDE)),
            lambda: tf.constant(model.INPUT_SIDE),
            lambda: tf.constant(model.INPUT_SIDE + model.DOWNSAMPLING_FACTOR * model.LAST_CONV_INPUT_STRIDE * k)
        )

        eval_image = tf.expand_dims(
            image_processing.zm_mp(
                image_processing.resize_bl(original_image, eval_image_side)), 0)

        # roi placehoder
        roi_ = tf.placeholder(tf.uint8)
        # rop preprocessing, single image classification
        roi_preproc = image_processing.zm_mp(
            image_processing.resize_bl(
                tf.image.convert_image_dtype(roi_, tf.float32),
                model.INPUT_SIDE))

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:

            input_image, input_image_side, image = sess.run(
                [eval_image, eval_image_side, original_image])

            start = time.time()
            probability_map, top_values, top_indices = sess.run(
                [logits, top_k[0], top_k[1]], feed_dict={images_: input_image})

            # let's think to the net as a big net, with the last layer (before the FC
            # layers for classification) with a receptive field of
            # LAST_KERNEL_SIDE x LAST_KERNEL_SIDE. Lets approximate the net with this last kernel:
            # If the image is scaled down to  LAST_KERNEL_SIDE x LAST_KERNEL_SIDE
            # the output is a single point.
            # if the image is scaled down to something bigger
            # (that make the output side of contolution integer) the result is a spacial map
            # of points. Every point has a depth of num classes.

            # for every image in the input batch
            probability_coords = 0
            for _ in range(len(input_image)):
                # scaling factor between original image and resized image
                full_image_scaling_factors = np.array([
                    image.shape[1] / input_image_side,
                    image.shape[0] / input_image_side
                ])

                glance = defaultdict(list)
                # select count(*), avg(prob) from map group by label, order by count, avg.
                group = defaultdict(lambda: defaultdict(float))
                for pmap_y in range(probability_map.shape[1]):
                    # calculate position in the downsampled image ds
                    ds_y = pmap_y * model.LAST_CONV_OUTPUT_STRIDE
                    for pmap_x in range(probability_map.shape[2]):
                        ds_x = pmap_x * model.LAST_CONV_OUTPUT_STRIDE

                        if top_indices[probability_coords][
                                0] != pascal.BACKGROUND_CLASS_ID:
                            # create coordinates of rect in the downsampled image
                            # convert to numpy array in order to use broadcast ops
                            coord = [
                                ds_x, ds_y, ds_x + model.LAST_KERNEL_SIDE,
                                ds_y + model.LAST_KERNEL_SIDE
                            ]
                            # if something is found, append rectagle to the
                            # map of rectalges per class
                            rect = utils.upsample_and_shift(
                                coord, model.DOWNSAMPLING_FACTOR, [0, 0],
                                full_image_scaling_factors)

                            prob = top_values[probability_coords][0]
                            label = pascal.CLASSES[top_indices[
                                probability_coords][0]]

                            rect_prob = [rect, prob]
                            glance[label].append(rect_prob)
                            group[label]["count"] += 1
                            group[label]["prob"] += prob

                        # update probability coord value
                        probability_coords += 1

                classes = group.keys()
                print('Found {} classes: {}'.format(len(classes), classes))

                # merge overlapping rectangles for each class
                global_rect_prob = utils.group_overlapping_regions(
                    glance, eps=RECT_SIMILARITY)

                # loop preserving order, because rois are evaluated in order
                rois = []
                rois_count = 0
                for label, rect_prob_list in sorted(global_rect_prob.items()):
                    # extract rectangles for each image and classify it.
                    # if the classification gives the same global label as top-1(2,3?) draw it
                    # else skip it.

                    for rect_prob in rect_prob_list:
                        rect = rect_prob[0]
                        y2 = rect[3]
                        y1 = rect[1]
                        x2 = rect[2]
                        x1 = rect[0]
                        roi = image[y1:y2, x1:x2]

                        rois.append(
                            sess.run(roi_preproc, feed_dict={roi_: roi}))
                        rois_count += 1

                # evaluate top values for every image in the batch of rois
                rois_top_values, rois_top_indices = sess.run(
                    [top_k[0], top_k[1]], feed_dict={images_: rois})

                roi_id = 0
                # localization dictionary. ["label"] => [[rect, prob], ...]
                localize = defaultdict(list)
                # classification dictionary.
                #[(rect)] => [top_values[0..num_cl], top_indices[0..num_cl]]
                classify = defaultdict(list)

                for label, rect_prob_list in sorted(global_rect_prob.items()):
                    # loop over rect with the current label
                    for rect_prob in rect_prob_list:
                        # remove background class from avaiable classes
                        # need to use tolist because rois_top_indices[roi_id] is
                        # a ndarray (Tensorflow always returns ndarray, even if
                        # the data is 1-D)
                        bg_pos = rois_top_indices[roi_id].tolist().index(
                            pascal.BACKGROUND_CLASS_ID)
                        roi_top_probs = np.delete(rois_top_values[roi_id],
                                                  bg_pos)
                        roi_top_indices = np.delete(rois_top_indices[roi_id],
                                                    bg_pos)

                        roi_label = pascal.CLASSES[roi_top_indices[0]]
                        if label == roi_label:
                            localize[label].append(
                                [rect_prob[0], roi_top_probs[0]])

                            classify[tuple(rect_prob[0])] = [
                                roi_top_indices, roi_top_probs
                            ]
                        roi_id += 1

                end_time = time.time() - start
                print("time: {}".format(end_time))

                # now I can convert RGB to BGR to display image with OpenCV
                # I can't do that before, because ROIs gets extracted on RGB image
                # in order to be processed without errors by Tensorflow
                image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
                for label, rect_prob_list in localize.items():
                    for rect_prob in rect_prob_list:
                        utils.draw_box(
                            image,
                            rect_prob[0],
                            "{}({:.3})".format(label, rect_prob[1]),
                            utils.LABEL_COLORS[label],
                            thickness=2)

                cv2.imshow("img", image)
                cv2.waitKey(0)
                return 0
Exemple #6
0
def main(args):
    """ main """

    if not os.path.exists(args.image_path):
        print("{} does not exists".format(args.image_path))
        return 1

    # export model.pb from session dir. Skip if model.pb already exists
    model.export(train.NUM_CLASSES, train.SESSION_DIR, "model-best-0",
                 train.MODEL_PATH)

    graph = model.load(train.MODEL_PATH, args.device)
    with graph.as_default():
        # (?, n, n, NUM_CLASSES) tensor
        logits = graph.get_tensor_by_name(model.OUTPUT_TENSOR_NAME + ":0")
        images_ = graph.get_tensor_by_name(model.INPUT_TENSOR_NAME + ":0")
        # each cell in coords (batch_position, i, j) -> is a probability vector
        per_region_probabilities = tf.nn.softmax(
            tf.reshape(logits, [-1, train.NUM_CLASSES]))
        # [tested positions, train.NUM_CLASSES]

        # array[0]=values, [1]=indices
        # get every probabiliy, because we can use localization to do classification
        top_k = tf.nn.top_k(per_region_probabilities, k=train.NUM_CLASSES)
        # each with shape [tested_positions, k]

        original_image = tf.image.convert_image_dtype(
            image_processing.read_image(tf.constant(args.image_path), 3,
                                        args.image_path.split('.')[-1]),
            dtype=tf.uint8)

        original_image_dim = tf.shape(original_image)

        k = 2
        eval_image_side = tf.cond(
            tf.less_equal(
                tf.minimum(original_image_dim[0], original_image_dim[1]),
                tf.constant(model.INPUT_SIDE)),
            lambda: tf.constant(model.INPUT_SIDE),
            lambda: tf.constant(model.INPUT_SIDE + model.DOWNSAMPLING_FACTOR *
                                model.LAST_CONV_INPUT_STRIDE * k))

        eval_image = tf.expand_dims(
            image_processing.zm_mp(
                image_processing.resize_bl(original_image, eval_image_side)),
            0)

        # roi placehoder
        roi_ = tf.placeholder(tf.uint8)
        # rop preprocessing, single image classification
        roi_preproc = image_processing.zm_mp(
            image_processing.resize_bl(
                tf.image.convert_image_dtype(roi_, tf.float32),
                model.INPUT_SIDE))

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:

            input_image, input_image_side, image = sess.run(
                [eval_image, eval_image_side, original_image])

            start = time.time()
            probability_map, top_values, top_indices = sess.run(
                [logits, top_k[0], top_k[1]], feed_dict={images_: input_image})

            # let's think to the net as a big net, with the last layer (before the FC
            # layers for classification) with a receptive field of
            # LAST_KERNEL_SIDE x LAST_KERNEL_SIDE. Lets approximate the net with this last kernel:
            # If the image is scaled down to  LAST_KERNEL_SIDE x LAST_KERNEL_SIDE
            # the output is a single point.
            # if the image is scaled down to something bigger
            # (that make the output side of contolution integer) the result is a spacial map
            # of points. Every point has a depth of num classes.

            # for every image in the input batch
            probability_coords = 0
            for _ in range(len(input_image)):
                # scaling factor between original image and resized image
                full_image_scaling_factors = np.array([
                    image.shape[1] / input_image_side,
                    image.shape[0] / input_image_side
                ])

                glance = defaultdict(list)
                # select count(*), avg(prob) from map group by label, order by count, avg.
                group = defaultdict(lambda: defaultdict(float))
                for pmap_y in range(probability_map.shape[1]):
                    # calculate position in the downsampled image ds
                    ds_y = pmap_y * model.LAST_CONV_OUTPUT_STRIDE
                    for pmap_x in range(probability_map.shape[2]):
                        ds_x = pmap_x * model.LAST_CONV_OUTPUT_STRIDE

                        if top_indices[probability_coords][
                                0] != pascal.BACKGROUND_CLASS_ID:
                            # create coordinates of rect in the downsampled image
                            # convert to numpy array in order to use broadcast ops
                            coord = [
                                ds_x, ds_y, ds_x + model.LAST_KERNEL_SIDE,
                                ds_y + model.LAST_KERNEL_SIDE
                            ]
                            # if something is found, append rectagle to the
                            # map of rectalges per class
                            rect = utils.upsample_and_shift(
                                coord, model.DOWNSAMPLING_FACTOR, [0, 0],
                                full_image_scaling_factors)

                            prob = top_values[probability_coords][0]
                            label = pascal.CLASSES[
                                top_indices[probability_coords][0]]

                            rect_prob = [rect, prob]
                            glance[label].append(rect_prob)
                            group[label]["count"] += 1
                            group[label]["prob"] += prob

                        # update probability coord value
                        probability_coords += 1

                classes = group.keys()
                print('Found {} classes: {}'.format(len(classes), classes))

                # merge overlapping rectangles for each class
                global_rect_prob = utils.group_overlapping_regions(
                    glance, eps=RECT_SIMILARITY)

                # loop preserving order, because rois are evaluated in order
                rois = []
                rois_count = 0
                for label, rect_prob_list in sorted(global_rect_prob.items()):
                    # extract rectangles for each image and classify it.
                    # if the classification gives the same global label as top-1(2,3?) draw it
                    # else skip it.

                    for rect_prob in rect_prob_list:
                        rect = rect_prob[0]
                        y2 = rect[3]
                        y1 = rect[1]
                        x2 = rect[2]
                        x1 = rect[0]
                        roi = image[y1:y2, x1:x2]

                        rois.append(
                            sess.run(roi_preproc, feed_dict={roi_: roi}))
                        rois_count += 1

                # evaluate top values for every image in the batch of rois
                rois_top_values, rois_top_indices = sess.run(
                    [top_k[0], top_k[1]], feed_dict={images_: rois})

                roi_id = 0
                # localization dictionary. ["label"] => [[rect, prob], ...]
                localize = defaultdict(list)
                # classification dictionary.
                #[(rect)] => [top_values[0..num_cl], top_indices[0..num_cl]]
                classify = defaultdict(list)

                for label, rect_prob_list in sorted(global_rect_prob.items()):
                    # loop over rect with the current label
                    for rect_prob in rect_prob_list:
                        # remove background class from avaiable classes
                        # need to use tolist because rois_top_indices[roi_id] is
                        # a ndarray (Tensorflow always returns ndarray, even if
                        # the data is 1-D)
                        bg_pos = rois_top_indices[roi_id].tolist().index(
                            pascal.BACKGROUND_CLASS_ID)
                        roi_top_probs = np.delete(rois_top_values[roi_id],
                                                  bg_pos)
                        roi_top_indices = np.delete(rois_top_indices[roi_id],
                                                    bg_pos)

                        roi_label = pascal.CLASSES[roi_top_indices[0]]
                        if label == roi_label:
                            localize[label].append(
                                [rect_prob[0], roi_top_probs[0]])

                            classify[tuple(rect_prob[0])] = [
                                roi_top_indices, roi_top_probs
                            ]
                        roi_id += 1

                end_time = time.time() - start
                print("time: {}".format(end_time))

                # now I can convert RGB to BGR to display image with OpenCV
                # I can't do that before, because ROIs gets extracted on RGB image
                # in order to be processed without errors by Tensorflow
                image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
                for label, rect_prob_list in localize.items():
                    for rect_prob in rect_prob_list:
                        utils.draw_box(image,
                                       rect_prob[0],
                                       "{}({:.3})".format(label, rect_prob[1]),
                                       utils.LABEL_COLORS[label],
                                       thickness=2)

                cv2.imshow("img", image)
                cv2.waitKey(0)
                return 0
def main(args):
    """ main """

    if not os.path.exists(args.test_ds):
        print("{} does not exists".format(args.test_ds))
        return 1

    # export model.pb from session dir. Skip if model.pb already exists
    model.export(train.NUM_CLASSES, train.SESSION_DIR, "model-best-0",
                 train.MODEL_PATH)

    results_dir = "{}/results".format(
        os.path.dirname(os.path.abspath(__file__)))
    files = {
        label:
        open(results_dir + "/VOC2012/Main/comp3_det_test_{}.txt".format(label),
             "w")
        for label in pascal.CLASSES
    }

    graph = model.load(train.MODEL_PATH, args.device)
    with graph.as_default():
        # (?, n, n, NUM_CLASSES) tensor
        logits = graph.get_tensor_by_name(model.OUTPUT_TENSOR_NAME + ":0")
        images_ = graph.get_tensor_by_name(model.INPUT_TENSOR_NAME + ":0")
        # each cell in coords (batch_position, i, j) -> is a probability vector
        per_region_probabilities = tf.nn.softmax(
            tf.reshape(logits, [-1, train.NUM_CLASSES]))
        # [tested positions, train.NUM_CLASSES]

        # array[0]=values, [1]=indices
        # get every probabiliy, because we can use localization to do classification
        top_k = tf.nn.top_k(per_region_probabilities, k=train.NUM_CLASSES)
        # each with shape [tested_positions, k]

        k = 2
        input_side = model.INPUT_SIDE + model.DOWNSAMPLING_FACTOR * model.LAST_CONV_INPUT_STRIDE * k

        test_queue, test_filename_queue = pascal.test(
            args.test_ds, 1, input_side,
            args.test_ds + "/ImageSets/Main/test.txt")

        # roi placehoder
        roi_ = tf.placeholder(tf.uint8)
        # rop preprocessing, single image classification
        roi_preproc = image_processing.zm_mp(
            image_processing.resize_bl(
                tf.image.convert_image_dtype(roi_, tf.float32),
                model.INPUT_SIDE))

        init_op = tf.group(tf.global_variables_initializer(),
                           tf.initialize_local_variables())

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:

            sess.run(init_op)
            coordinator = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coordinator)

            try:
                processed = 0
                while not coordinator.should_stop():
                    image_batch, filename_batch = sess.run(
                        [test_queue, test_filename_queue])

                    probability_map, top_values, top_indices = sess.run(
                        [logits, top_k[0], top_k[1]],
                        feed_dict={images_: image_batch})

                    # let's think to the net as a big net, with the last layer (before the FC
                    # layers for classification) with a receptive field of
                    # LAST_KERNEL_SIDE x LAST_KERNEL_SIDE. Lets approximate the net with this last kernel:
                    # If the image is scaled down to  LAST_KERNEL_SIDE x LAST_KERNEL_SIDE
                    # the output is a single point.
                    # if the image is scaled down to something bigger
                    # (that make the output side of contolution integer) the result is a spacial map
                    # of points. Every point has a depth of num classes.

                    # for every image in the input batch
                    probability_coords = 0
                    for batch_elem_id in range(len(image_batch)):
                        # scaling factor between original image and resized image
                        decoded_filename = filename_batch[batch_elem_id].decode(
                            "utf-8")

                        image = sess.run(
                            image_processing.read_image_jpg(
                                args.test_ds + "/JPEGImages/" + decoded_filename
                                + ".jpg"))
                        full_image_scaling_factors = np.array([
                            image.shape[1] / input_side,
                            image.shape[0] / input_side
                        ])

                        glance = defaultdict(list)

                        group = defaultdict(lambda: defaultdict(float))
                        for pmap_y in range(probability_map.shape[1]):
                            # calculate position in the downsampled image ds
                            ds_y = pmap_y * model.LAST_CONV_OUTPUT_STRIDE
                            for pmap_x in range(probability_map.shape[2]):
                                ds_x = pmap_x * model.LAST_CONV_OUTPUT_STRIDE

                                if top_indices[probability_coords][
                                        0] != pascal.BACKGROUND_CLASS_ID:

                                    # create coordinates of rect in the downsampled image
                                    # convert to numpy array in order to use broadcast ops
                                    coord = [
                                        ds_x, ds_y,
                                        ds_x + model.LAST_KERNEL_SIDE,
                                        ds_y + model.LAST_KERNEL_SIDE
                                    ]
                                    # if something is found, append rectagle to the
                                    # map of rectalges per class
                                    rect = utils.upsample_and_shift(
                                        coord, model.DOWNSAMPLING_FACTOR,
                                        [0, 0], full_image_scaling_factors)

                                    prob = top_values[probability_coords][0]
                                    label = pascal.CLASSES[top_indices[
                                        probability_coords][0]]

                                    rect_prob = [rect, prob]
                                    glance[label].append(rect_prob)
                                    group[label]["count"] += 1
                                    group[label]["prob"] += prob

                                # update probability coord value
                                probability_coords += 1

                        classes = group.keys()
                        print('Found {} classes: {}'.format(
                            len(classes), classes))

                        # merge overlapping rectangles for each class
                        global_rect_prob = utils.group_overlapping_regions(
                            glance, eps=RECT_SIMILARITY)

                        # loop preserving order, because rois are evaluated in order
                        rois = []
                        rois_count = 0
                        for label, rect_prob_list in sorted(
                                global_rect_prob.items()):
                            # extract rectangles for each image and classify it.
                            # if the classification gives the same global label as top-1(2,3?) draw it
                            # else skip it.

                            for rect_prob in rect_prob_list:
                                rect = rect_prob[0]
                                y2 = rect[3]
                                y1 = rect[1]
                                x2 = rect[2]
                                x1 = rect[0]
                                roi = image[y1:y2, x1:x2]

                                rois.append(
                                    sess.run(roi_preproc, feed_dict={roi_: roi
                                                                    }))
                                rois_count += 1

                        # evaluate top values for every image in the batch of rois
                        rois_top_values, rois_top_indices = sess.run(
                            [top_k[0], top_k[1]], feed_dict={images_: rois})

                        roi_id = 0
                        detected_labels = set()
                        for label, rect_prob_list in sorted(
                                global_rect_prob.items()):
                            # loop over rect with the current label
                            for rect_prob in rect_prob_list:
                                # remove background class from avaiable classes
                                # need to use tolist because rois_top_indices[roi_id] is
                                # a ndarray (Tensorflow always returns ndarray, even if
                                # the data is 1-D)
                                bg_pos = rois_top_indices[roi_id].tolist(
                                ).index(pascal.BACKGROUND_CLASS_ID)
                                roi_top_probs = np.delete(
                                    rois_top_values[roi_id], bg_pos)
                                roi_top_indices = np.delete(
                                    rois_top_indices[roi_id], bg_pos)

                                roi_label = pascal.CLASSES[roi_top_indices[0]]
                                if label == roi_label:
                                    detected_labels.add(label)
                                    confidence = roi_top_probs[0]
                                    rect = rect_prob[0]
                                    left = rect[0]
                                    top = rect[1]
                                    right = rect[2]
                                    bottom = rect[3]
                                    files[label].write(
                                        "{} {} {} {} {} {}\n".format(
                                            decoded_filename, confidence, left,
                                            top, right, bottom))

                                roi_id += 1

                        processed += 1

            except tf.errors.OutOfRangeError:
                print("[I] Done. Test completed!")
                print("Processed {} images".format(processed))
            finally:
                coordinator.request_stop()

            coordinator.join(threads)

        for label in files:
            files[label].close()
Exemple #8
0
def main(args):
    """ main """

    if not os.path.exists(args.test_ds):
        print("{} does not exists".format(args.test_ds))
        return 1

    # export model.pb from session dir. Skip if model.pb already exists
    model.export(train.NUM_CLASSES, train.SESSION_DIR, "model-best-0",
                 train.MODEL_PATH)

    results_dir = "{}/results".format(
        os.path.dirname(os.path.abspath(__file__)))
    files = {
        label:
        open(results_dir + "/VOC2012/Main/comp3_det_test_{}.txt".format(label),
             "w")
        for label in pascal.CLASSES
    }

    graph = model.load(train.MODEL_PATH, args.device)
    with graph.as_default():
        # (?, n, n, NUM_CLASSES) tensor
        logits = graph.get_tensor_by_name(model.OUTPUT_TENSOR_NAME + ":0")
        images_ = graph.get_tensor_by_name(model.INPUT_TENSOR_NAME + ":0")
        # each cell in coords (batch_position, i, j) -> is a probability vector
        per_region_probabilities = tf.nn.softmax(
            tf.reshape(logits, [-1, train.NUM_CLASSES]))
        # [tested positions, train.NUM_CLASSES]

        # array[0]=values, [1]=indices
        # get every probabiliy, because we can use localization to do classification
        top_k = tf.nn.top_k(per_region_probabilities, k=train.NUM_CLASSES)
        # each with shape [tested_positions, k]

        k = 2
        input_side = model.INPUT_SIDE + model.DOWNSAMPLING_FACTOR * model.LAST_CONV_INPUT_STRIDE * k

        test_queue, test_filename_queue = pascal.test(
            args.test_ds, 1, input_side,
            args.test_ds + "/ImageSets/Main/test.txt")

        # roi placehoder
        roi_ = tf.placeholder(tf.uint8)
        # rop preprocessing, single image classification
        roi_preproc = image_processing.zm_mp(
            image_processing.resize_bl(
                tf.image.convert_image_dtype(roi_, tf.float32),
                model.INPUT_SIDE))

        init_op = tf.group(tf.global_variables_initializer(),
                           tf.initialize_local_variables())

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:

            sess.run(init_op)
            coordinator = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess,
                                                   coord=coordinator)

            try:
                processed = 0
                while not coordinator.should_stop():
                    image_batch, filename_batch = sess.run(
                        [test_queue, test_filename_queue])

                    probability_map, top_values, top_indices = sess.run(
                        [logits, top_k[0], top_k[1]],
                        feed_dict={images_: image_batch})

                    # let's think to the net as a big net, with the last layer (before the FC
                    # layers for classification) with a receptive field of
                    # LAST_KERNEL_SIDE x LAST_KERNEL_SIDE. Lets approximate the net with this last kernel:
                    # If the image is scaled down to  LAST_KERNEL_SIDE x LAST_KERNEL_SIDE
                    # the output is a single point.
                    # if the image is scaled down to something bigger
                    # (that make the output side of contolution integer) the result is a spacial map
                    # of points. Every point has a depth of num classes.

                    # for every image in the input batch
                    probability_coords = 0
                    for batch_elem_id in range(len(image_batch)):
                        # scaling factor between original image and resized image
                        decoded_filename = filename_batch[
                            batch_elem_id].decode("utf-8")

                        image = sess.run(
                            image_processing.read_image_jpg(args.test_ds +
                                                            "/JPEGImages/" +
                                                            decoded_filename +
                                                            ".jpg"))
                        full_image_scaling_factors = np.array([
                            image.shape[1] / input_side,
                            image.shape[0] / input_side
                        ])

                        glance = defaultdict(list)

                        group = defaultdict(lambda: defaultdict(float))
                        for pmap_y in range(probability_map.shape[1]):
                            # calculate position in the downsampled image ds
                            ds_y = pmap_y * model.LAST_CONV_OUTPUT_STRIDE
                            for pmap_x in range(probability_map.shape[2]):
                                ds_x = pmap_x * model.LAST_CONV_OUTPUT_STRIDE

                                if top_indices[probability_coords][
                                        0] != pascal.BACKGROUND_CLASS_ID:

                                    # create coordinates of rect in the downsampled image
                                    # convert to numpy array in order to use broadcast ops
                                    coord = [
                                        ds_x, ds_y,
                                        ds_x + model.LAST_KERNEL_SIDE,
                                        ds_y + model.LAST_KERNEL_SIDE
                                    ]
                                    # if something is found, append rectagle to the
                                    # map of rectalges per class
                                    rect = utils.upsample_and_shift(
                                        coord, model.DOWNSAMPLING_FACTOR,
                                        [0, 0], full_image_scaling_factors)

                                    prob = top_values[probability_coords][0]
                                    label = pascal.CLASSES[
                                        top_indices[probability_coords][0]]

                                    rect_prob = [rect, prob]
                                    glance[label].append(rect_prob)
                                    group[label]["count"] += 1
                                    group[label]["prob"] += prob

                                # update probability coord value
                                probability_coords += 1

                        classes = group.keys()
                        print('Found {} classes: {}'.format(
                            len(classes), classes))

                        # merge overlapping rectangles for each class
                        global_rect_prob = utils.group_overlapping_regions(
                            glance, eps=RECT_SIMILARITY)

                        # loop preserving order, because rois are evaluated in order
                        rois = []
                        rois_count = 0
                        for label, rect_prob_list in sorted(
                                global_rect_prob.items()):
                            # extract rectangles for each image and classify it.
                            # if the classification gives the same global label as top-1(2,3?) draw it
                            # else skip it.

                            for rect_prob in rect_prob_list:
                                rect = rect_prob[0]
                                y2 = rect[3]
                                y1 = rect[1]
                                x2 = rect[2]
                                x1 = rect[0]
                                roi = image[y1:y2, x1:x2]

                                rois.append(
                                    sess.run(roi_preproc,
                                             feed_dict={roi_: roi}))
                                rois_count += 1

                        # evaluate top values for every image in the batch of rois
                        rois_top_values, rois_top_indices = sess.run(
                            [top_k[0], top_k[1]], feed_dict={images_: rois})

                        roi_id = 0
                        detected_labels = set()
                        for label, rect_prob_list in sorted(
                                global_rect_prob.items()):
                            # loop over rect with the current label
                            for rect_prob in rect_prob_list:
                                # remove background class from avaiable classes
                                # need to use tolist because rois_top_indices[roi_id] is
                                # a ndarray (Tensorflow always returns ndarray, even if
                                # the data is 1-D)
                                bg_pos = rois_top_indices[roi_id].tolist(
                                ).index(pascal.BACKGROUND_CLASS_ID)
                                roi_top_probs = np.delete(
                                    rois_top_values[roi_id], bg_pos)
                                roi_top_indices = np.delete(
                                    rois_top_indices[roi_id], bg_pos)

                                roi_label = pascal.CLASSES[roi_top_indices[0]]
                                if label == roi_label:
                                    detected_labels.add(label)
                                    confidence = roi_top_probs[0]
                                    rect = rect_prob[0]
                                    left = rect[0]
                                    top = rect[1]
                                    right = rect[2]
                                    bottom = rect[3]
                                    files[label].write(
                                        "{} {} {} {} {} {}\n".format(
                                            decoded_filename, confidence, left,
                                            top, right, bottom))

                                roi_id += 1

                        processed += 1

            except tf.errors.OutOfRangeError:
                print("[I] Done. Test completed!")
                print("Processed {} images".format(processed))
            finally:
                coordinator.request_stop()

            coordinator.join(threads)

        for label in files:
            files[label].close()