def __init__(self,
                 model_dir,
                 in_dir,
                 out_dir,
                 nms_th_for_all_scale=0.5,
                 score_th=0.2,
                 scales=([384, 384], [768, 384], [768, 768]),
                 min_side_scale=384,
                 save_res_path='eval_res.txt'
                 ):
        if os.path.exists(in_dir):
            self.in_dir = in_dir
        else:
            raise ValueError('{} does not existed!!!'.format(in_dir))

        self.out_dir = out_dir
        self.suffixes = ['.png', '.PNG', '.jpg', '.jpeg']

        self.img_path, self.img_num = self.get_img_path()

        self.nms_th_for_all_scale = nms_th_for_all_scale
        self.nms_threshold = 0.45
        self.score_th = score_th
        print('self.score_th', self.score_th)
        self.make_out_dir()
        self.text_scales = scales
        self.data_format = 'NHWC'
        self.select_threshold = 0.01
        self.min_side_scale = min_side_scale
        self.max_side_scale = self.min_side_scale * 2  # 384 * 2
        self.save_xml_flag = True
        self.save_txt_flag = True
        self.dynamic_scale_flag = False
        self.allow_padding = False
        self.allow_post_processing = False
        self.allow_eval_flag = False
        self.resize_flag = False
        self.save_eval_resut_path = save_res_path
        self.model_path = None

        self.config = tf.ConfigProto(allow_soft_placement=True)
        self.config.gpu_options.allow_growth = True

        self.graph = tf.Graph()
        self.session_text = tf.Session(graph=self.graph, config=self.config)

        with self.session_text.as_default():
            with self.graph.as_default():
                self.img_text = tf.placeholder(
                    tf.float32, shape=(None, None, 3))
                print(len(self.text_scales))
                self.scale_text = tf.placeholder(tf.int32, shape=(2))

                img_pre_text, label_pre_text, bboxes_pre_text, self.bboxes_img_text, xs_text, ys_text = ssd_vgg_preprocessing.preprocess_for_eval(
                    self.img_text,
                    None,
                    None,
                    None,
                    None,
                    self.scale_text,
                    self.data_format,
                    resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE)
                image_text_4d = tf.expand_dims(img_pre_text, 0)
                image_text_4d = tf.cast(image_text_4d, tf.float32)
                self.image_text_4d = image_text_4d
                self.net_text = txtbox_384.TextboxNet()
                with slim.arg_scope(
                        self.net_text.arg_scope(data_format=self.data_format)):
                    self.predictions_text, self.localisations_text, self.logits_text, self.endpoints_text, self.l_shape = self.net_text.net(
                        self.image_text_4d,
                        is_training=False,
                        reuse=tf.AUTO_REUSE,
                        update_feat_shapes=True)
                saver_text = tf.train.Saver()
                if os.path.isdir(model_dir):
                    ckpt_path = tf.train.latest_checkpoint(model_dir)
                    self.model_path = os.path.join(model_dir, ckpt_path)
                else:
                    ckpt_path = model_dir
                    self.model_path = ckpt_path
                print(model_dir)
                saver_text.restore(self.session_text, ckpt_path)

        logging.info("Textbox++ model initialized.")
Ejemplo n.º 2
0
# Evaluation pre-processing: resize to SSD net shape.
image_pre, labels_pre, bboxes_pre, bbox_img, xs, ys = ssd_vgg_preprocessing.preprocess_for_eval(
    img_input,
    None,
    None,
    None,
    None,
    net_shape,
    data_format,
    resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE)
image_4d = tf.expand_dims(image_pre, 0)
image_4d = tf.cast(image_4d, tf.float32)
# Define the txt_box model.
reuse = True if 'txt_net' in locals() else None

txt_net = txtbox_384.TextboxNet()
print(txt_net.params.img_shape)
print('reuse:', reuse)

with slim.arg_scope(txt_net.arg_scope(data_format=data_format)):
    predictions, localisations, logits, end_points = txt_net.net(
        image_4d, is_training=False, reuse=reuse)

ckpt_dir = 'model'

isess.run(tf.compat.v1.global_variables_initializer())

saver = tf.compat.v1.train.Saver()

ckpt_filename = tf.train.latest_checkpoint(ckpt_dir)
if ckpt_dir and ckpt_filename:
Ejemplo n.º 3
0
def main(_):
    if not FLAGS.dataset_dir:
        raise ValueError(
            'You must supply the dataset directory with --dataset_dir')
    # Sets the threshold for what messages will be logged. (DEBUG / INFO / WARN / ERROR / FATAL)
    tf.logging.set_verbosity(tf.logging.DEBUG)

    with tf.Graph().as_default():
        # Config model_deploy. Keep TF Slim Models structure.
        # Useful if want to need multiple GPUs and/or servers in the future.
        deploy_config = model_deploy.DeploymentConfig(
            num_clones=FLAGS.num_clones,
            clone_on_cpu=FLAGS.clone_on_cpu,
            replica_id=0,
            num_replicas=1,
            num_ps_tasks=0)

        # Create global_step, the training iteration counter.
        with tf.device(deploy_config.variables_device()):
            global_step = slim.create_global_step()

        # Select the dataset.
        dataset = TFrecords2Dataset.get_datasets(FLAGS.dataset_dir)

        # Get the TextBoxes++ network and its anchors.
        text_net = txtbox_384.TextboxNet()

        # Stage 2 training using the 768x768 input size.
        if FLAGS.large_training:
            # replace the input image shape and the extracted feature map size from each indicated layer which
            #associated to each textbox layer.
            text_net.params = text_net.params._replace(img_shape=(768, 768))
            text_net.params = text_net.params._replace(
                feat_shapes=[(96, 96), (48, 48), (24, 24), (12,
                                                            12), (10,
                                                                  10), (8, 8)])

        img_shape = text_net.params.img_shape
        print('img_shape: ' + str(img_shape))

        # Compute the default anchor boxes with the given image shape, get anchor list.
        text_anchors = text_net.anchors(img_shape)

        # Print the training configuration before training.
        tf_utils.print_configuration(FLAGS.__flags, text_net.params,
                                     dataset.data_sources, FLAGS.train_dir)

        # =================================================================== #
        # Create a dataset provider and batches.
        # =================================================================== #
        with tf.device(deploy_config.inputs_device()):
            # setting the dataset provider
            with tf.name_scope(FLAGS.dataset_name + '_data_provider'):
                provider = slim.dataset_data_provider.DatasetDataProvider(
                    dataset,
                    num_readers=FLAGS.num_readers,
                    common_queue_capacity=1000 * FLAGS.batch_size,
                    common_queue_min=300 * FLAGS.batch_size,
                    shuffle=True)
            # Get for SSD network: image, labels, bboxes.
            [image, shape, glabels, gbboxes, x1, x2, x3, x4, y1, y2, y3,
             y4] = provider.get([
                 'image', 'shape', 'object/label', 'object/bbox',
                 'object/oriented_bbox/x1', 'object/oriented_bbox/x2',
                 'object/oriented_bbox/x3', 'object/oriented_bbox/x4',
                 'object/oriented_bbox/y1', 'object/oriented_bbox/y2',
                 'object/oriented_bbox/y3', 'object/oriented_bbox/y4'
             ])
            gxs = tf.transpose(tf.stack([x1, x2, x3, x4]))  #shape = (N,4)
            gys = tf.transpose(tf.stack([y1, y2, y3, y4]))
            image = tf.identity(image, 'input_image')
            init_op = tf.global_variables_initializer()
            # tf.global_variables_initializer()

            # Pre-processing image, labels and bboxes.
            training_image_crop_area = FLAGS.training_image_crop_area
            area_split = training_image_crop_area.split(',')
            assert len(area_split) == 2
            training_image_crop_area = [
                float(area_split[0]),
                float(area_split[1])
            ]

            image, glabels, gbboxes, gxs, gys= \
                ssd_vgg_preprocessing.preprocess_for_train(image, glabels, gbboxes, gxs, gys,
                                                        img_shape,
                                                        data_format='NHWC', crop_area_range=training_image_crop_area)

            # Encode groundtruth labels and bboxes.
            image = tf.identity(image, 'processed_image')

            glocalisations, gscores, glabels = \
                text_net.bboxes_encode( glabels, gbboxes, text_anchors, gxs, gys)
            batch_shape = [1] + [len(text_anchors)] * 3

            # Training batches and queue.
            r = tf.train.batch(tf_utils.reshape_list(
                [image, glocalisations, gscores, glabels]),
                               batch_size=FLAGS.batch_size,
                               num_threads=FLAGS.num_preprocessing_threads,
                               capacity=5 * FLAGS.batch_size)

            b_image, b_glocalisations, b_gscores, b_glabels= \
                tf_utils.reshape_list(r, batch_shape)

            # Intermediate queueing: unique batch computation pipeline for all
            # GPUs running the training.
            batch_queue = slim.prefetch_queue.prefetch_queue(
                tf_utils.reshape_list(
                    [b_image, b_glocalisations, b_gscores, b_glabels]),
                capacity=2 * deploy_config.num_clones)

        # =================================================================== #
        # Define the model running on every GPU.
        # =================================================================== #
        def clone_fn(batch_queue):

            #Allows data parallelism by creating multiple
            #clones of network_fn.
            # Dequeue batch.
            b_image, b_glocalisations, b_gscores, b_glabels = \
                tf_utils.reshape_list(batch_queue.dequeue(), batch_shape)

            # Construct TextBoxes network.
            arg_scope = text_net.arg_scope(weight_decay=FLAGS.weight_decay)
            with slim.arg_scope(arg_scope):
                predictions,localisations, logits, end_points = \
                    text_net.net(b_image, is_training=True)
            # Add loss function.

            text_net.losses(logits,
                            localisations,
                            b_glabels,
                            b_glocalisations,
                            b_gscores,
                            match_threshold=FLAGS.match_threshold,
                            negative_ratio=FLAGS.negative_ratio,
                            alpha=FLAGS.loss_alpha,
                            label_smoothing=FLAGS.label_smoothing,
                            batch_size=FLAGS.batch_size)
            return end_points

        # Gather initial tensorboard summaries.
        summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

        # =================================================================== #
        # Add summaries from first clone.
        # =================================================================== #
        clones = model_deploy.create_clones(deploy_config, clone_fn,
                                            [batch_queue])
        first_clone_scope = deploy_config.clone_scope(0)
        # Gather update_ops from the first clone. These contain, for example,
        # the updates for the batch_norm variables created by network_fn.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

        # Add summaries for end_points.
        end_points = clones[0].outputs
        for end_point in end_points:
            x = end_points[end_point]
            summaries.add(tf.summary.histogram('activations/' + end_point, x))
            summaries.add(
                tf.summary.scalar('sparsity/' + end_point,
                                  tf.nn.zero_fraction(x)))
        # Add summaries for losses.
        for loss in tf.get_collection(tf.GraphKeys.LOSSES):
            summaries.add(tf.summary.scalar(loss.op.name, loss))
        # Add summaries for extra losses.
        for loss in tf.get_collection('EXTRA_LOSSES'):
            summaries.add(tf.summary.scalar(loss.op.name, loss))
        # Add summaries for variables.
        for variable in slim.get_model_variables():
            summaries.add(tf.summary.histogram(variable.op.name, variable))

        # =================================================================== #
        # Configure the moving averages.
        # =================================================================== #
        if FLAGS.moving_average_decay:
            moving_average_variables = slim.get_model_variables()
            variable_averages = tf.train.ExponentialMovingAverage(
                FLAGS.moving_average_decay, global_step)
        else:
            moving_average_variables, variable_averages = None, None

        # =================================================================== #
        # Configure the optimization procedure.
        # =================================================================== #
        with tf.device(deploy_config.optimizer_device()):
            learning_rate = tf_utils.configure_learning_rate(
                FLAGS, dataset.num_samples, global_step)
            optimizer = tf_utils.configure_optimizer(FLAGS, learning_rate)
            # Add summaries for learning_rate.
            summaries.add(tf.summary.scalar('learning_rate', learning_rate))

        if FLAGS.moving_average_decay:
            # Update ops executed locally by trainer.
            update_ops.append(
                variable_averages.apply(moving_average_variables))

        # Variables to train.
        variables_to_train = tf_utils.get_variables_to_train(FLAGS)

        # and returns a train_tensor and summary_op
        total_loss, clones_gradients = model_deploy.optimize_clones(
            clones, optimizer, var_list=variables_to_train)

        # Add total_loss to summary.
        summaries.add(tf.summary.scalar('total_loss', total_loss))

        # Create gradient updates.
        grad_updates = optimizer.apply_gradients(clones_gradients,
                                                 global_step=global_step)
        update_ops.append(grad_updates)
        update_op = tf.group(*update_ops)
        train_tensor = control_flow_ops.with_dependencies([update_op],
                                                          total_loss,
                                                          name='train_op')

        # Add the summaries from the first clone. These contain the summaries
        summaries |= set(
            tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope))
        # Merge all summaries together.
        summary_op = tf.summary.merge(list(summaries), name='summary_op')

        # =================================================================== #
        # Kicks off the training.
        # =================================================================== #
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction)

        config = tf.ConfigProto(log_device_placement=False,
                                allow_soft_placement=True,
                                gpu_options=gpu_options)

        saver = tf.train.Saver(max_to_keep=100,
                               keep_checkpoint_every_n_hours=1.0,
                               write_version=2,
                               pad_step_number=False)

        slim.learning.train(
            train_tensor,
            logdir=FLAGS.train_dir,
            master='',
            is_chief=True,
            # init_op=init_op,
            init_fn=tf_utils.get_init_fn(FLAGS),
            summary_op=summary_op,  ##output variables to logdir
            number_of_steps=FLAGS.max_number_of_steps,
            log_every_n_steps=FLAGS.log_every_n_steps,
            save_summaries_secs=FLAGS.save_summaries_secs,
            saver=saver,
            save_interval_secs=FLAGS.save_interval_secs,
            session_config=config,
            sync_optimizer=None)
Ejemplo n.º 4
0
def run():
    if not FLAGS.dataset_dir:
        raise ValueError(
            'You must supply the dataset directory with --dataset_dir')

    print('-----start test-------')
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    with tf.device('/GPU:0'):
        dataset = TFrecords2Dataset.get_datasets(FLAGS.dataset_dir)
        print(dataset)
        provider = slim.dataset_data_provider.DatasetDataProvider(
            dataset,
            num_readers=FLAGS.num_readers,
            common_queue_capacity=20 * FLAGS.batch_size,
            common_queue_min=10 * FLAGS.batch_size,
            shuffle=True)
        print('provider:', provider)
        [image, shape, glabels, gbboxes, x1, x2, x3, x4, y1, y2, y3,
         y4] = provider.get([
             'image', 'shape', 'object/label', 'object/bbox',
             'object/oriented_bbox/x1', 'object/oriented_bbox/x2',
             'object/oriented_bbox/x3', 'object/oriented_bbox/x4',
             'object/oriented_bbox/y1', 'object/oriented_bbox/y2',
             'object/oriented_bbox/y3', 'object/oriented_bbox/y4'
         ])
        print('image:', image)
        print('shape:', shape)
        print('glabel:', glabels)
        print('gboxes:', gbboxes)

        gxs = tf.transpose(tf.stack([x1, x2, x3, x4]))  #shape = (N,4)
        gys = tf.transpose(tf.stack([y1, y2, y3, y4]))

        image = tf.identity(image, 'input_image')
        text_shape = (384, 384)
        image, glabels, gbboxes, gxs, gys = ssd_vgg_preprocessing.preprocess_image(
            image,
            glabels,
            gbboxes,
            gxs,
            gys,
            text_shape,
            is_training=True,
            data_format='NHWC')

        x1, x2, x3, x4 = tf.unstack(gxs, axis=1)
        y1, y2, y3, y4 = tf.unstack(gys, axis=1)

        text_net = txtbox_384.TextboxNet()
        text_anchors = text_net.anchors(text_shape)
        e_localisations, e_scores, e_labels = text_net.bboxes_encode(
            glabels, gbboxes, text_anchors, gxs, gys)

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.7)

    config = tf.ConfigProto(log_device_placement=False,
                            gpu_options=gpu_options,
                            allow_soft_placement=True)
    with tf.Session(config=config) as sess:
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess, coord)
        j = 0
        all_time = 0
        try:
            while not coord.should_stop() and j < show_pic_sum:
                start_time = time.time()
                image_sess, label_sess, gbbox_sess, x1_sess, x2_sess, x3_sess, x4_sess, y1_sess, y2_sess, y3_sess, y4_sess, p_localisations, p_scores, p_labels = sess.run(
                    [
                        image, glabels, gbboxes, x1, x2, x3, x4, y1, y2, y3,
                        y4, e_localisations, e_scores, e_labels
                    ])
                end_time = time.time() - start_time
                all_time += end_time
                image_np = image_sess
                # print(image_np)
                # print('label_sess:',label_sess)

                p_labels_concat = np.concatenate(p_labels)
                p_scores_concat = np.concatenate(p_scores)
                debug = False
                if debug is True:
                    print(p_labels)
                    print('l_labels:',
                          len(p_labels_concat[p_labels_concat.nonzero()]),
                          p_labels_concat[p_labels_concat.nonzero()])
                    print('p_socres:',
                          len(p_scores_concat[p_scores_concat.nonzero()]),
                          p_scores_concat[p_scores_concat.nonzero()])
                    # print(img_np.shape)

                    print('label_sess:',
                          np.array(list(label_sess)).shape, list(label_sess))
                img_np = np.array(image_np)
                cv2.imwrite('{}/{}.png'.format(save_dir, j), img_np)
                img_np = cv2.imread('{}/{}.png'.format(save_dir, j))

                h, w, d = img_np.shape

                label_sess = list(label_sess)
                # for i , label in enumerate(label_sess):
                i = 0
                num_correct = 0

                for label in label_sess:
                    # print(int(label) == 1)
                    if int(label) == 1:
                        num_correct += 1
                        img_np = draw_polygon(img_np, x1_sess[i] * w,
                                              y1_sess[i] * h, x2_sess[i] * w,
                                              y2_sess[i] * h, x3_sess[i] * w,
                                              y3_sess[i] * h, x4_sess[i] * w,
                                              y4_sess[i] * h)
                    if int(label) == 0:
                        img_np = draw_polygon(img_np,
                                              x1_sess[i] * w,
                                              y1_sess[i] * h,
                                              x2_sess[i] * w,
                                              y2_sess[i] * h,
                                              x3_sess[i] * w,
                                              y3_sess[i] * h,
                                              x4_sess[i] * w,
                                              y4_sess[i] * h,
                                              color=(0, 0, 255))
                    i += 1
                img_np = cv2.cvtColor(img_np, cv2.COLOR_BGR2RGB)
                cv2.imwrite(
                    '{}'.format(os.path.join(save_dir,
                                             str(j) + '.png')), img_np)
                j += 1
                print('correct:', num_correct)
        except tf.errors.OutOfRangeError:
            print('done')
        finally:
            print('done')
            coord.request_stop()
        print('all time:', all_time, 'average:', all_time / show_pic_sum)
        coord.join(threads=threads)