Beispiel #1
0
def tower_loss(images,
               score_maps,
               geo_maps,
               training_masks,
               reuse_variables=None):
    # Build inference graph
    with tf.variable_scope(tf.get_variable_scope(), reuse=reuse_variables):
        # 模型定义!!!,f_score是和原图大小一样的是否是前景的概率图, f_geometry是5张图,4张是上下左右值,1张是旋转角度值
        f_score, f_geometry = model.model(images, is_training=True)

    #              def loss(y_true_cls, y_pred_cls, y_true_geo, y_pred_geo,training_mask):
    model_loss = model.loss(score_maps, f_score, geo_maps, f_geometry,
                            training_masks)

    total_loss = tf.add_n(
        [model_loss] + tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))

    tf.summary.image('input', images)
    tf.summary.image('score_map', score_maps)
    tf.summary.image('score_map_pred', f_score * 255)
    tf.summary.image('geo_map_0', geo_maps[:, :, :, 0:1])
    tf.summary.image('geo_map_#0_pred', f_geometry[:, :, :, 0:1])
    tf.summary.image('geo_map_#1_pred', f_geometry[:, :, :, 0:1])
    tf.summary.image('training_masks', training_masks)
    tf.summary.scalar('model_loss', model_loss)
    tf.summary.scalar('total_loss', total_loss)

    return total_loss, model_loss, f_score, f_geometry
Beispiel #2
0
def tower_loss(images, gt_score_maps, gt_threshold_map, gt_score_mask,
               gt_thresh_mask, reuse_variables):

    with tf.variable_scope(tf.get_variable_scope(), reuse=reuse_variables):
        binarize_map, threshold_map, thresh_binary = model.model(images, is_training=True)

    model_loss = compute_loss(binarize_map, threshold_map, thresh_binary,
                              gt_score_maps, gt_threshold_map, gt_score_mask, gt_thresh_mask)

    total_loss = tf.add_n([model_loss] + tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))

    # add summary
    if reuse_variables is None:
        tf.summary.image('gt/input_imgs', images)
        tf.summary.image('gt/score_map', gt_score_maps)
        tf.summary.image('gt/threshold_map', gt_threshold_map * 255)
        tf.summary.image('gt/score_mask', gt_score_mask)
        tf.summary.image('gt/thresh_mask', gt_thresh_mask)

        tf.summary.image('pred/binarize_map', binarize_map)
        tf.summary.image('pred/threshold_map', threshold_map * 255)
        tf.summary.image('pred/thresh_binary', thresh_binary)

        tf.summary.scalar('model_loss', model_loss)
        tf.summary.scalar('total_loss', total_loss)

    return total_loss, model_loss, binarize_map, threshold_map, thresh_binary
Beispiel #3
0
def predict(im):
    import os
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list

    with tf.get_default_graph().as_default():
        input_images = tf.placeholder(tf.float32,
                                      shape=[None, None, None, 3],
                                      name='input_images')
        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)
        seg_maps_pred = model.model(input_images, is_training=False)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())
        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)

            model_path = os.path.join(
                FLAGS.checkpoint_path,
                os.path.basename(ckpt_state.model_checkpoint_path))
            logger.info('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            start_time = time.time()
            im_resized, (ratio_h, ratio_w) = resize_image(im)
            h, w, _ = im_resized.shape
            timer = {'net': 0, 'pse': 0}
            start = time.time()
            seg_maps = sess.run(seg_maps_pred,
                                feed_dict={input_images: [im_resized]})
            timer['net'] = time.time() - start

            boxes, kernels, timer = detect(seg_maps=seg_maps,
                                           timer=timer,
                                           image_w=w,
                                           image_h=h)

            if boxes is not None:
                boxes = boxes.reshape((-1, 4, 2))
                boxes[:, :, 0] /= ratio_w
                boxes[:, :, 1] /= ratio_h
                h, w, _ = im.shape
                boxes[:, :, 0] = np.clip(boxes[:, :, 0], 0, w)
                boxes[:, :, 1] = np.clip(boxes[:, :, 1], 0, h)

            duration = time.time() - start_time
            logger.info('[timing] {}'.format(duration))

            # return boxes
            return im, boxes
def tower_loss(images, annotation,class_labels,reuse_variables=None):
    with tf.variable_scope(tf.get_variable_scope(), reuse=reuse_variables):
        logits = model.model(images, is_training=True)
    pred = tf.argmax(logits, dimension=3)

    model_loss = model.loss(annotation, logits,class_labels)
    total_loss = tf.add_n([model_loss] + tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))

    # add summary
    if reuse_variables is None:
        tf.summary.scalar('model_loss', model_loss)
        tf.summary.scalar('total_loss', total_loss)
    return total_loss, model_loss,pred
Beispiel #5
0
def main(argv=None):
    import os
    if os.path.exists(FLAGS.result_path):
        shutil.rmtree(FLAGS.result_path)
    os.makedirs(FLAGS.result_path)

    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list
    pascal_voc_lut = pascal_segmentation_lut()

    with tf.get_default_graph().as_default():
        input_images = tf.placeholder(tf.float32,
                                      shape=[None, None, None, 3],
                                      name='input_images')
        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        logits = model.model(input_images, is_training=False)
        pred = tf.argmax(logits, dimension=3)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(
                FLAGS.checkpoint_path,
                os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            im_fn_list = get_images()
            for im_fn in im_fn_list:
                im = cv2.imread(im_fn)[:, :, ::-1]
                im_resized, (ratio_h, ratio_w) = resize_image(im, size=32)

                start = time.time()
                pred_re = sess.run([pred],
                                   feed_dict={input_images: [im_resized]})
                pred_re = np.array(np.squeeze(pred_re))

                img = visualize_segmentation_adaptive(pred_re, pascal_voc_lut)
                _diff_time = time.time() - start
                cv2.imwrite(
                    os.path.join(FLAGS.result_path, os.path.basename(im_fn)),
                    img)

                print('{}: cost {:.0f}ms').format(im_fn, _diff_time * 1000)
def tower_loss(images,
               score_maps,
               geo_maps,
               training_masks,
               reuse_variables=None):
    # Build inference graph
    with tf.variable_scope(tf.get_variable_scope(), reuse=reuse_variables):
        f_score, f_geometry = model.model(images, is_training=True)

    model_loss = model.loss(score_maps, f_score, geo_maps, f_geometry,
                            training_masks)

    total_loss = tf.add_n(
        [model_loss] + tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))

    # cls_score = f_score
    cls_score = tf.nn.softmax(f_score)
    geo_score_1 = tf.nn.softmax(f_geometry[:, :, :, 0:2])
    # geo_score_2 = tf.nn.softmax(f_geometry[:,:,:,2:4])
    # geo_score_3 = tf.nn.softmax(f_geometry[:,:,:,4:6])
    # geo_score_4 = tf.nn.softmax(f_geometry[:,:,:,6:8])
    # geo_score_5 = tf.nn.softmax(f_geometry[:,:,:,8:10])
    # geo_score_6 = tf.nn.softmax(f_geometry[:,:,:,10:12])
    # geo_score_7 = tf.nn.softmax(f_geometry[:,:,:,12:14])
    # geo_score_8 = tf.nn.softmax(f_geometry[:,:,:,14:16])

    # add summary
    if reuse_variables is None:
        tf.summary.image('input', images, max_outputs=1)
        tf.summary.image('score_map', score_maps, max_outputs=1)
        # tf.summary.image('score_map_pred', cls_score * 255, max_outputs=1)
        tf.summary.image('score_map_pred',
                         cls_score[:, :, :, 1:2] * 255,
                         max_outputs=1)
        tf.summary.image('geo_map_0', geo_maps[:, :, :, 0:1], max_outputs=1)
        tf.summary.image('geo_map_1_pred',
                         geo_score_1[:, :, :, 1:2] * 255,
                         max_outputs=1)
        # tf.summary.image('geo_map_2_pred', geo_score_2[:, :, :, 1:2] * 255, max_outputs=1)
        # tf.summary.image('geo_map_3_pred', geo_score_3[:, :, :, 1:2] * 255, max_outputs=1)
        # tf.summary.image('geo_map_4_pred', geo_score_4[:, :, :, 1:2] * 255, max_outputs=1)
        # tf.summary.image('geo_map_5_pred', geo_score_5[:, :, :, 1:2] * 255, max_outputs=1)
        # tf.summary.image('geo_map_6_pred', geo_score_6[:, :, :, 1:2] * 255, max_outputs=1)
        # tf.summary.image('geo_map_7_pred', geo_score_7[:, :, :, 1:2] * 255, max_outputs=1)
        # tf.summary.image('geo_map_8_pred', geo_score_8[:, :, :, 1:2] * 255, max_outputs=1)
        tf.summary.scalar('model_loss', model_loss)
        tf.summary.scalar('total_loss', total_loss)

    return total_loss, model_loss
Beispiel #7
0
def tower_loss(images, seg_maps_gt, training_masks, reuse_variables=None):
    # Build inference graph
    with tf.variable_scope(tf.get_variable_scope(), reuse=reuse_variables):
        seg_maps_pred = model.model(images, is_training=True)

    model_loss = model.loss(seg_maps_gt, seg_maps_pred, training_masks)
    total_loss = tf.add_n([model_loss] + tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))

    # add summary
    if reuse_variables is None:
        tf.summary.image('input', images)
        tf.summary.image('seg_map_0_gt', seg_maps_gt[:, :, :, 0:1] * 255)
        tf.summary.image('seg_map_0_pred', seg_maps_pred[:, :, :, 0:1] * 255)
        tf.summary.image('training_masks', training_masks)
        tf.summary.scalar('model_loss', model_loss)
        tf.summary.scalar('total_loss', total_loss)

    return total_loss, model_loss
Beispiel #8
0
    def __init__(self, ckpt_path, gpuid='0'):
        os.environ['CUDA_VISIBLE_DEVICES'] = gpuid
        tf.reset_default_graph()
        self._input_images = tf.placeholder(tf.float32,
                                            shape=[None, None, None, 3],
                                            name='input_images')
        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        self._binarize_map, self._threshold_map, self._thresh_binary = model.model(
            self._input_images, is_training=False)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5)
        gpu_config = tf.ConfigProto(log_device_placement=False,
                                    gpu_options=gpu_options,
                                    allow_soft_placement=True)
        self.sess = tf.Session(config=gpu_config)
        saver.restore(self.sess, ckpt_path)
        self.decoder = SegDetectorRepresenter()
        print('restore model from:', ckpt_path)
Beispiel #9
0
def main(argv=None):
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list

    if not os.path.exists(FLAGS.result_path):
        os.makedirs(FLAGS.result_path)

    filename_queue = tf.train.string_input_producer([FLAGS.test_data_path],
                                                    num_epochs=1)
    image, annotation = read_tfrecord_and_decode_into_image_annotation_pair_tensors(
        filename_queue)

    image_batch_tensor = tf.expand_dims(image, axis=0)
    annotation_batch_tensor = tf.expand_dims(annotation, axis=0)

    input_image_shape = tf.shape(image_batch_tensor)
    image_height_width = input_image_shape[1:3]
    image_height_width_float = tf.to_float(image_height_width)
    image_height_width_multiple = tf.to_int32(
        tf.round(image_height_width_float / 32) * 32)

    image_batch_tensor = tf.image.resize_images(image_batch_tensor,
                                                image_height_width_multiple)

    global_step = tf.get_variable('global_step', [],
                                  initializer=tf.constant_initializer(0),
                                  trainable=False)
    logits = model.model(FLAGS.model_type,
                         image_batch_tensor,
                         is_training=False)
    pred = tf.argmax(logits, dimension=3)
    pred = tf.expand_dims(pred, 3)
    pred = tf.image.resize_bilinear(images=pred, size=image_height_width)
    annotation_batch_tensor = tf.image.resize_bilinear(
        images=annotation_batch_tensor, size=image_height_width)
    annotation_batch_tensor = tf.div(annotation_batch_tensor, 255)

    pred = tf.reshape(pred, [
        -1,
    ])
    gt = tf.reshape(annotation_batch_tensor, [
        -1,
    ])

    acc, acc_update_op = tf.contrib.metrics.streaming_accuracy(pred, gt)
    miou, miou_update_op = tf.contrib.metrics.streaming_mean_iou(
        pred, gt, num_classes=FLAGS.num_classes)

    with tf.get_default_graph().as_default():
        global_vars_init_op = tf.global_variables_initializer()
        local_vars_init_op = tf.local_variables_initializer()
        init = tf.group(local_vars_init_op, global_vars_init_op)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1.0)
        config = tf.ConfigProto(allow_soft_placement=True,
                                log_device_placement=False,
                                gpu_options=gpu_options)
        config.gpu_options.allow_growth = True

        with tf.Session(config=config) as sess:
            sess.run(init)
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(
                FLAGS.checkpoint_path,
                os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(coord=coord)

            for i in range(150):
                start = time.time()
                image_np, annotation_np, pred_np, tmp_acc, tmp_miou = sess.run(
                    [image, annotation, pred, acc_update_op, miou_update_op])
                _diff_time = time.time() - start
                print('{}: cost {:.0f}ms').format(i, _diff_time * 1000)
                # upsampled_predictions = pred_np.squeeze()
                # plt.subplot(131)
                # plt.imshow(image_np)
                # plt.subplot(132)
                # plt.imshow(annotation_np.squeeze(), cmap='gray')
                # plt.subplot(133)
                # plt.imshow(np.reshape(pred_np, (annotation_np.shape[0], annotation_np.shape[1])).squeeze(), cmap='gray')
                # plt.savefig(os.path.join(FLAGS.result_path, str(i) + '.png'))
                prediction = np.reshape(
                    pred_np, (annotation_np.shape[0],
                              annotation_np.shape[1])).squeeze() * 255
                cv2.imwrite(os.path.join(FLAGS.result_path,
                                         str(i) + '.png'), prediction)
            print('Test Finished !')

    coord.request_stop()
    coord.join(threads)
def main(argv=None):

    import os
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list

    try:
        os.makedirs(FLAGS.output_dir)
    except OSError as e:
        if e.errno != 17:
            raise

    with tf.get_default_graph().as_default():
        input_images = tf.placeholder(tf.float32,
                                      shape=[None, None, None, 3],
                                      name='input_images')
        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)
        seg_maps_pred = model.model(input_images, is_training=False)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())
        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(
                FLAGS.checkpoint_path,
                os.path.basename(ckpt_state.model_checkpoint_path))
            logger.info('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            im_fn_list = get_images()
            for im_fn in im_fn_list:
                im = cv2.imread(im_fn)[:, :, ::-1]
                logger.debug('image file:{}'.format(im_fn))

                start_time = time.time()
                im_resized, (ratio_h, ratio_w) = resize_image(im)
                h, w, _ = im_resized.shape
                # options = tf.RunOptions(trace_level = tf.RunOptions.FULL_TRACE)
                # run_metadata = tf.RunMetadata()
                timer = {'net': 0, 'pse': 0}
                start = time.time()
                seg_maps = sess.run(seg_maps_pred,
                                    feed_dict={input_images: [im_resized]})
                timer['net'] = time.time() - start
                # fetched_timeline = timeline.Timeline(run_metadata.step_stats)
                # chrome_trace = fetched_timeline.generate_chrome_trace_format()
                # with open(os.path.join(FLAGS.output_dir, os.path.basename(im_fn).split('.')[0]+'.json'), 'w') as f:
                #     f.write(chrome_trace)

                boxes, kernels, timer = detect(seg_maps=seg_maps,
                                               timer=timer,
                                               image_w=w,
                                               image_h=h)
                logger.info('{} : net {:.0f}ms, pse {:.0f}ms'.format(
                    im_fn, timer['net'] * 1000, timer['pse'] * 1000))

                if boxes is not None:
                    boxes = boxes.reshape((-1, 4, 2))
                    boxes[:, :, 0] /= ratio_w
                    boxes[:, :, 1] /= ratio_h
                    h, w, _ = im.shape
                    boxes[:, :, 0] = np.clip(boxes[:, :, 0], 0, w)
                    boxes[:, :, 1] = np.clip(boxes[:, :, 1], 0, h)

                duration = time.time() - start_time
                logger.info('[timing] {}'.format(duration))

                # save to file
                if boxes is not None:
                    res_file = os.path.join(
                        FLAGS.output_dir, '{}.txt'.format(
                            os.path.splitext(os.path.basename(im_fn))[0]))

                    with open(res_file, 'w') as f:
                        num = 0
                        for i in range(len(boxes)):
                            # to avoid submitting errors
                            box = boxes[i]
                            if np.linalg.norm(box[0] -
                                              box[1]) < 5 or np.linalg.norm(
                                                  box[3] - box[0]) < 5:
                                continue

                            num += 1

                            f.write('{},{},{},{},{},{},{},{}\r\n'.format(
                                box[0, 0], box[0, 1], box[1, 0], box[1, 1],
                                box[2, 0], box[2, 1], box[3, 0], box[3, 1]))
                            cv2.polylines(
                                im[:, :, ::-1],
                                [box.astype(np.int32).reshape((-1, 1, 2))],
                                True,
                                color=(255, 255, 0),
                                thickness=2)
                if not FLAGS.no_write_images:
                    img_path = os.path.join(FLAGS.output_dir,
                                            os.path.basename(im_fn))
                    cv2.imwrite(img_path, im[:, :, ::-1])

    #===========================================================================================================
    #Converting to 4-co-ordinates txt

    path = test_data_path + '/'  #input_images
    gt_path = output_dir + '/'  #8 co-ordinates txt
    out_path = APP_ROOT + '/output_label'  #4 co-ordinates txt

    if not os.path.exists(out_path):
        os.makedirs(out_path)
    else:
        shutil.rmtree(out_path)
        os.mkdir(out_path)

    files = os.listdir(path)
    files.sort()
    #files=files[:100]
    for file in files:
        _, basename = os.path.split(file)
        if basename.lower().split('.')[-1] not in ['jpg', 'png', 'jpeg']:
            continue
        stem, ext = os.path.splitext(basename)
        gt_file = os.path.join(gt_path + stem + '.txt')
        img_path = os.path.join(path, file)
        print('Reading image ' + os.path.splitext(file)[0])
        img = cv.imread(img_path)
        img_size = img.shape
        im_size_min = np.min(img_size[0:2])
        im_size_max = np.max(img_size[0:2])

        with open(gt_file, 'r') as f:
            lines = f.readlines()
        for line in lines:
            splitted_line = line.strip().lower().split(',')
            pt_x = np.zeros((4, 1))
            pt_y = np.zeros((4, 1))
            pt_x[0, 0] = int(float(splitted_line[0]))
            pt_y[0, 0] = int(float(splitted_line[1]))
            pt_x[1, 0] = int(float(splitted_line[2]))
            pt_y[1, 0] = int(float(splitted_line[3]))
            pt_x[2, 0] = int(float(splitted_line[4]))
            pt_y[2, 0] = int(float(splitted_line[5]))
            pt_x[3, 0] = int(float(splitted_line[6]))
            pt_y[3, 0] = int(float(splitted_line[7]))

            ind_x = np.argsort(pt_x, axis=0)
            pt_x = pt_x[ind_x]
            pt_y = pt_y[ind_x]

            if pt_y[0] < pt_y[1]:
                pt1 = (pt_x[0], pt_y[0])
                pt3 = (pt_x[1], pt_y[1])
            else:
                pt1 = (pt_x[1], pt_y[1])
                pt3 = (pt_x[0], pt_y[0])

            if pt_y[2] < pt_y[3]:
                pt2 = (pt_x[2], pt_y[2])
                pt4 = (pt_x[3], pt_y[3])
            else:
                pt2 = (pt_x[3], pt_y[3])
                pt4 = (pt_x[2], pt_y[2])

            xmin = int(min(pt1[0], pt2[0]))
            ymin = int(min(pt1[1], pt2[1]))
            xmax = int(max(pt2[0], pt4[0]))
            ymax = int(max(pt3[1], pt4[1]))

            if xmin < 0:
                xmin = 0
            if xmax > img_size[1] - 1:
                xmax = img_size[1] - 1
            if ymin < 0:
                ymin = 0
            if ymax > img_size[0] - 1:
                ymax = img_size[0] - 1

            with open(os.path.join(out_path, stem) + '.txt', 'a') as f:
                f.writelines(str(int(xmin)))
                f.writelines(" ")
                f.writelines(str(int(ymin)))
                f.writelines(" ")
                f.writelines(str(int(xmax)))
                f.writelines(" ")
                f.writelines(str(int(ymax)))
                f.writelines("\n")
Beispiel #11
0
def main(argv=None):
    import os
    # os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list
    t0 = time.time()
    try:
        os.makedirs(FLAGS.output_dir)
    except OSError as e:
        if e.errno != 17:
            raise

    im_fn_list = get_images()
    for im_fn in im_fn_list:
        points_list = []
        tf.reset_default_graph()
        with tf.get_default_graph().as_default():
            input_images = tf.placeholder(tf.float32,
                                          shape=[None, None, None, 3],
                                          name='input_images')
            global_step = tf.get_variable(
                'global_step', [],
                initializer=tf.constant_initializer(0),
                trainable=False)
            seg_maps_pred = model.model(input_images, is_training=False)

            variable_averages = tf.train.ExponentialMovingAverage(
                0.997, global_step)
            saver = tf.train.Saver(variable_averages.variables_to_restore())
            with tf.Session(config=tf.ConfigProto(
                    allow_soft_placement=True)) as sess:
                ckpt_state = tf.train.get_checkpoint_state(
                    FLAGS.checkpoint_path)
                model_path = os.path.join(
                    FLAGS.checkpoint_path,
                    os.path.basename(ckpt_state.model_checkpoint_path))

                logger.info('Restore from {}'.format(model_path))
                saver.restore(sess, model_path)

                im = cv2.imread(im_fn)[:, :, ::-1]
                draw_img = im[:, :, ::-1].copy()
                logger.debug('image file:{}'.format(im_fn))

                start_time = time.time()
                im_resized, (ratio_h, ratio_w) = resize_image(im)
                h, w, _ = im_resized.shape
                # options = tf.RunOptions(trace_level = tf.RunOptions.FULL_TRACE)
                # run_metadata = tf.RunMetadata()
                timer = {'net': 0, 'pse': 0}
                start = time.time()
                seg_maps = sess.run(seg_maps_pred,
                                    feed_dict={input_images: [im_resized]})
                timer['net'] = time.time() - start
                # fetched_timeline = timeline.Timeline(run_metadata.step_stats)
                # chrome_trace = fetched_timeline.generate_chrome_trace_format()
                # with open(os.path.join(FLAGS.output_dir, os.path.basename(im_fn).split('.')[0]+'.json'), 'w') as f:
                #     f.write(chrome_trace)

                boxes, kernels, timer = detect(seg_maps=seg_maps,
                                               timer=timer,
                                               image_w=w,
                                               image_h=h)
                logger.info('{} : net {:.0f}ms, pse {:.0f}ms'.format(
                    im_fn, timer['net'] * 1000, timer['pse'] * 1000))

                if boxes is not None:
                    boxes = boxes.reshape((-1, 4, 2))
                    boxes[:, :, 0] /= ratio_w
                    boxes[:, :, 1] /= ratio_h
                    h, w, _ = im.shape
                    boxes[:, :, 0] = np.clip(boxes[:, :, 0], 0, w)
                    boxes[:, :, 1] = np.clip(boxes[:, :, 1], 0, h)

                duration = time.time() - start_time
                logger.info('[timing] {}'.format(duration))

                # save to file
                if boxes is not None:
                    res_file = os.path.join(
                        FLAGS.output_dir, '{}.txt'.format(
                            os.path.splitext(os.path.basename(im_fn))[0]))

                    with open(res_file, 'w') as f:
                        num = 0
                        for i in range(len(boxes)):
                            # to avoid submitting errors
                            box = boxes[i]
                            if np.linalg.norm(box[0] -
                                              box[1]) < 5 or np.linalg.norm(
                                                  box[3] - box[0]) < 5:
                                continue

                            num += 1

                            f.write('{},{},{},{},{},{},{},{}\r\n'.format(
                                box[0, 0], box[0, 1], box[1, 0], box[1, 1],
                                box[2, 0], box[2, 1], box[3, 0], box[3, 1]))

                            yDim, xDim = im[:, :, ::-1].shape[:2]
                            if box[0, 0] > box[2, 0]:  # box point1在右下角,顺时针
                                pt1 = (max(1, box[2, 0]), max(1, box[2, 1]))
                                pt2 = (box[3, 0], box[3, 1])
                                pt3 = (min(box[0, 0],
                                           xDim - 2), min(yDim - 2, box[0, 1]))
                                pt4 = (box[1, 0], box[1, 1])
                            else:  # box point1在左下角, 顺时针
                                pt1 = (max(1, box[1, 0]), max(1, box[2, 1]))
                                pt2 = (box[2, 0], box[2, 1])
                                pt3 = (min(box[3, 0],
                                           xDim - 2), min(yDim - 2, box[3, 1]))
                                pt4 = (box[0, 0], box[0, 1])

                            points = [pt1, pt2, pt3, pt4]
                            points_list.append(points)

                            cv2.polylines(
                                im[:, :, ::-1],
                                [box.astype(np.int32).reshape((-1, 1, 2))],
                                True,
                                color=(255, 255, 0),
                                thickness=2)

        tf.reset_default_graph()
        keras.backend.clear_session()
        input = Input(shape=(32, None, 1), name='the_input')
        y_pred = dense_cnn(input, nclass)
        recognition_model = Model(input=input, outputs=y_pred)
        model_path = './recognition/...'
        recognition_model.load_weights(model_path)
        if os.path.exists(model_path):
            print('loading models')
        else:
            print('model do not exist')
            break

        j = 0
        txt_path = os.path.join(FLAGS.output_dir,
                                im_fn.split('/')[-1].split('.')[0])
        with open('{}.txt'.format(txt_path), 'a', encoding='utf-8') as outf:
            for points in points_list:
                j += 1
                pt1 = points[0]
                pt2 = points[1]
                pt3 = points[2]
                pt4 = points[3]
                degree = degrees(atan2(pt2[1] - pt1[1], pt2[0] - pt1[0]))
                text_img = dumpRotateImage(im[:, :, ::-1], degree, pt1, pt2,
                                           pt3, pt4)
                text_img = cv2.cvtColor(text_img, cv2.COLOR_BGR2GRAY)

                text_h, text_w = text_img.shape[:2]
                if text_h // text_w > 1:
                    continue
                dst_h = 32
                dst_w = text_w * dst_h // text_h
                text_img = cv2.resize(text_img, (dst_w, dst_h))
                X = text_img.reshape([1, 32, -1, 1])
                y_pred = recognition_model.predict(X)
                y_pred = y_pred[:, :, :]
                out = _decode(y_pred)
                img_PIL = Image.fromarray(
                    cv2.cvtColor(draw_img, cv2.COLOR_BGR2RGB))
                font = ImageFont.truetype('./utils/simsun.ttc', 12)
                fillColor = (255, 0, 0)
                draw = ImageDraw.Draw(img_PIL)
                if out is None:
                    out = ''
                draw.text(pt4, out, font=font, fill=fillColor)
                draw_img = cv2.cvtColor(np.asarray(img_PIL), cv2.COLOR_RGB2BGR)
                outf.write('{}. \t{}\n'.format(j, out))

            if not FLAGS.no_write_images:
                img_path = os.path.join(FLAGS.output_dir,
                                        os.path.basename(im_fn))
                cv2.imwrite(img_path, draw_img)

    print('total time = ', time.time() - t0)
Beispiel #12
0
def main(argv=None):
    import os
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list

    try:
        os.makedirs(FLAGS.output_dir)
    except OSError as e:
        if e.errno != 17:
            raise

    if not os.path.isdir(os.path.join(FLAGS.output_dir, "crop")):
        os.makedirs(os.path.join(FLAGS.output_dir, "crop"))

    with tf.get_default_graph().as_default():
        input_images = tf.placeholder(tf.float32,
                                      shape=[None, None, None, 3],
                                      name='input_images')
        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)
        seg_maps_pred = model.model(input_images, is_training=False)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())
        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            #          ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            #           model_path = os.path.join(FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path))
            #            logger.info('Restore from {}'.format(model_path))
            saver.restore(sess, FLAGS.checkpoint_path)

            im_fn_list = get_images()
            for im_fn in im_fn_list:
                im = cv2.imread(im_fn)[:, :, ::-1]
                logger.debug('image file:{}'.format(im_fn))

                start_time = time.time()
                im_resized, (ratio_h, ratio_w) = resize_image(im)
                h, w, _ = im_resized.shape
                # options = tf.RunOptions(trace_level = tf.RunOptions.FULL_TRACE)
                # run_metadata = tf.RunMetadata()
                timer = {'net': 0, 'pse': 0}
                start = time.time()
                seg_maps = sess.run(seg_maps_pred,
                                    feed_dict={input_images: [im_resized]})
                timer['net'] = time.time() - start
                # fetched_timeline = timeline.Timeline(run_metadata.step_stats)
                # chrome_trace = fetched_timeline.generate_chrome_trace_format()
                # with open(os.path.join(FLAGS.output_dir, os.path.basename(im_fn).split('.')[0]+'.json'), 'w') as f:
                #     f.write(chrome_trace)

                boxes, kernels, timer = detect(seg_maps=seg_maps,
                                               timer=timer,
                                               image_w=w,
                                               image_h=h)
                logger.info('{} : net {:.0f}ms, pse {:.0f}ms'.format(
                    im_fn, timer['net'] * 1000, timer['pse'] * 1000))

                if boxes is not None:
                    boxes = boxes.reshape((-1, 4, 2))
                    boxes[:, :, 0] /= ratio_w
                    boxes[:, :, 1] /= ratio_h
                    h, w, _ = im.shape
                    boxes[:, :, 0] = np.clip(boxes[:, :, 0], 0, w)
                    boxes[:, :, 1] = np.clip(boxes[:, :, 1], 0, h)

                duration = time.time() - start_time
                logger.info('[timing] {}'.format(duration))

                # save to file
                if boxes is not None:
                    res_file = os.path.join(
                        FLAGS.output_dir, '{}.txt'.format(
                            os.path.splitext(os.path.basename(im_fn))[0]))

                    with open(res_file, 'w') as f:
                        num = 0
                        for i in xrange(len(boxes)):
                            # to avoid submitting errors
                            box = boxes[i]
                            if np.linalg.norm(box[0] -
                                              box[1]) < 5 or np.linalg.norm(
                                                  box[3] - box[0]) < 5:
                                continue

                            num += 1

                            f.write('{},{},{},{},{},{},{},{}\r\n'.format(
                                box[0, 0], box[0, 1], box[1, 0], box[1, 1],
                                box[2, 0], box[2, 1], box[3, 0], box[3, 1]))
                            if not FLAGS.is_cropping:
                                cv2.polylines(
                                    im[:, :, ::-1],
                                    [box.astype(np.int32).reshape((-1, 1, 2))],
                                    True,
                                    color=(255, 255, 0),
                                    thickness=2)
                            else:
                                lt_x = box[2, 0]
                                lt_y = box[2, 1]
                                rt_x = box[3, 0]
                                rt_y = box[3, 1]
                                lb_x = box[1, 0]
                                lb_y = box[1, 1]
                                rb_x = box[0, 0]
                                rb_y = box[0, 1]
                                if lt_x > lb_x:
                                    lt_x = lb_x
                                if lt_y > rt_y:
                                    lt_y = rt_y
                                if rt_x < rb_x:
                                    rt_x = rb_x
                                if rt_y > lt_y:
                                    rt_y = lt_y
                                if lb_x > lt_x:
                                    lb_x = lt_x
                                if lb_y < rb_y:
                                    lb_y = rb_y
                                if rb_x < rt_x:
                                    rb_x = rt_x
                                if rb_y < lb_y:
                                    rb_y = lb_y


#                                 padding = 3
#                                 lt_x -= padding
#                                 lt_y -= padding
#                                 lb_x -= padding
#                                 lb_y += padding
#                                 rt_x += padding
#                                 rt_y -= padding
#                                 rb_x += padding
#                                 rb_y += padding
                                crop_img = im[int(lt_y):int(lb_y),
                                              int(lt_x):int(rt_x)]
                                cv2.imwrite(
                                    os.path.join(FLAGS.output_dir, "crop",
                                                 ("%d_" % i) +
                                                 os.path.basename(im_fn)),
                                    crop_img[:, :, ::-1])

                if not FLAGS.no_write_images:
                    img_path = os.path.join(FLAGS.output_dir,
                                            os.path.basename(im_fn))
                    cv2.imwrite(img_path, im[:, :, ::-1])
def main(argv=None):
    import os
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list

    try:
        os.makedirs(FLAGS.output_dir)
    except OSError as e:
        if e.errno != 17:
            raise

    with tf.get_default_graph().as_default():
        input_images = tf.placeholder(tf.float32,
                                      shape=[None, None, None, 3],
                                      name='input_images')
        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        f_score, f_geometry = model.model(input_images, is_training=False)

        cls_score = tf.nn.softmax(f_score)[:, :, :, 1:2]

        pixel_score = tf.reshape(f_geometry, [-1, 2])
        pixel_score = tf.nn.softmax(pixel_score)
        pixel_shape = tf.shape(f_geometry)
        pixel_score = tf.reshape(
            pixel_score,
            [pixel_shape[0], pixel_shape[1], pixel_shape[2], pixel_shape[3]])

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        timer = {'net': 0}

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(
                FLAGS.checkpoint_path,
                os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            im_fn_list = get_images()
            for im_fn in im_fn_list:
                im = cv2.imread(im_fn)[:, :, ::-1]
                start_time = time.time()
                im_resized, (ratio_h, ratio_w) = resize_image(im)
                print ratio_h, ratio_w
                # score, geometry = sess.run([f_score, f_geometry], feed_dict={input_images: [im_resized]})
                score, geometry = sess.run(
                    [cls_score, pixel_score],
                    feed_dict={input_images: [im_resized]})
                timer['net'] = time.time() - start_time

                print 'net time:' + str(timer['net'] * 1000) + 'ms'

                cv2.imwrite('./score_map.jpg',
                            np.array(score[0, :, :, 0] * 255, dtype=np.uint8))

                score_map_res = pixel_detect(score_map=score, geo_map=geometry)

                cv2.imwrite('./img.jpg', score_map_res * 255)

                # pdb.set_trace()

                boxes = []

                im2, contours, hierarchy = cv2.findContours(
                    score_map_res, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
                # pdb.set_trace()

                im_ori = cv2.imread(im_fn)
                im_ori_resize = cv2.resize(im_ori,
                                           (int(im_ori.shape[1] * ratio_w),
                                            int(im_ori.shape[0] * ratio_h)))

                for i in range(len(contours)):
                    np_contours = np.array(np.reshape(contours[i], [-1, 2]),
                                           dtype=np.float32)
                    rectangle = cv2.minAreaRect(np_contours)
                    box = np.int0(cv2.boxPoints(rectangle))

                    box[:, 0] = box[:, 0] * 4
                    box[:, 1] = box[:, 1] * 4

                    cv2.drawContours(im_ori_resize, [box], -1, (0, 255, 0), 3)

                    # pdb.set_trace()
                    box[:, 0] = box[:, 0] / ratio_w
                    box[:, 1] = box[:, 1] / ratio_h
                    boxes.append(box)

                img_path = os.path.join(FLAGS.output_dir,
                                        os.path.basename(im_fn))
                cv2.imwrite(img_path, im_ori_resize)

                # save to file
                if boxes is not None:
                    res_file = os.path.join(
                        FLAGS.output_dir, 'res_{}.txt'.format(
                            os.path.basename(im_fn).split('.')[0]))

                    with open(res_file, 'w') as f:
                        for box in boxes:
                            # pdb.set_trace()
                            # box = sort_poly(box.astype(np.int32))
                            box = order_points(box)
                            f.write('{},{},{},{},{},{},{},{}\r\n'.format(
                                box[0, 0], box[0, 1], box[1, 0], box[1, 1],
                                box[2, 0], box[2, 1], box[3, 0], box[3, 1]))
Beispiel #14
0
    def train(self):
        # iteration number
        global_step = tf.Variable(1,
                                  dtype=tf.int32,
                                  trainable=False,
                                  name='iter_number')

        # training graph
        iterator = self._data_layer()
        image_orig, image_noisy = iterator.get_next()
        training = tf.placeholder(tf.bool, name='is_training')
        logits = model(image_noisy, training=training)
        loss = self._loss_functions(logits, image_orig)
        optimizer = self._optimizer(loss, global_step)

        # summary placeholders
        streaming_loss_p = tf.placeholder(tf.float32)
        validation_loss_p = tf.placeholder(tf.float32)
        summ_op_train = tf.summary.scalar('streaming_loss', streaming_loss_p)
        summ_op_test = tf.summary.scalar('validation_loss', validation_loss_p)

        # don't allocate entire gpu memory
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True

        with tf.Session(config=config) as sess:
            sess.run(tf.global_variables_initializer())
            sess.run(iterator.initializer)

            writer = tf.summary.FileWriter(self.checkpoint_path, sess.graph)

            saver = tf.train.Saver(max_to_keep=None)  # keep all checkpoints
            ckpt = tf.train.get_checkpoint_state(self.checkpoint_path)

            # resume training if a checkpoint exists
            if ckpt and ckpt.model_checkpoint_path:
                saver.restore(sess, ckpt.model_checkpoint_path)
                print('Loaded parameters from {}'.format(
                    ckpt.model_checkpoint_path))

            initial_step = global_step.eval()

            # train the model
            streaming_loss = 0
            for i in range(initial_step, self.num_iter + 1):
                _, loss_batch = sess.run([optimizer, loss],
                                         feed_dict={training: True})

                if not np.isfinite(loss_batch):
                    print('loss diverged, stopping')
                    exit()

                # log summary
                streaming_loss += loss_batch
                if i % self.log_iter == self.log_iter - 1:
                    streaming_loss /= self.log_iter
                    print(i + 1, streaming_loss)
                    summary_train = sess.run(
                        summ_op_train,
                        feed_dict={streaming_loss_p: streaming_loss})
                    writer.add_summary(summary_train, global_step=i)
                    streaming_loss = 0

                # save model
                if i % self.save_iter == self.save_iter - 1:
                    saver.save(sess,
                               os.path.join(self.checkpoint_path,
                                            'checkpoint'),
                               global_step=global_step)
                    print("Model saved!")

                # run validation
                if i % self.val_iter == self.val_iter - 1:
                    print("Running validation.")
                    self.data_generator.set_mode(is_training=False)
                    sess.run(iterator.initializer)

                    validation_loss = 0
                    for j in range(self.data_generator.num_val //
                                   self.batch_size):
                        loss_batch = sess.run(loss,
                                              feed_dict={training: False})
                        validation_loss += loss_batch
                    validation_loss /= j

                    print("Validation loss: {}".format(validation_loss))

                    summary_test = sess.run(
                        summ_op_test,
                        feed_dict={validation_loss_p: validation_loss})
                    writer.add_summary(summary_test, global_step=i)

                    self.data_generator.set_mode(is_training=True)
                    sess.run(iterator.initializer)

            writer.close()
Beispiel #15
0
def init_model():
    input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image')
    _, classes = model.model(input_image)
    return input_image,classes
Beispiel #16
0
def main(argv=None):
    import os
    if os.path.exists(FLAGS.result_path):
        shutil.rmtree(FLAGS.result_path)
    os.makedirs(FLAGS.result_path)

    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list
    pascal_voc_lut = pascal_segmentation_lut()

    with tf.get_default_graph().as_default():
        input_images = tf.placeholder(tf.float32,
                                      shape=[None, None, None, 3],
                                      name='input_images')
        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        logits = model.model(input_images, is_training=False)
        pred = tf.argmax(logits, dimension=3)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        # saver = tf.train.Saver(variable_averages.variables_to_restore())
        saver = tf.train.Saver(tf.global_variables())

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            # ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            # model_path = os.path.join(FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path))

            # ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
            # restore_step=int(ckpt.split('.')[0].split('_')[-1])

            model_path = FLAGS.checkpoint_path

            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            ###############################
            pkl_file = 'data/arroyo_seg.pkl'
            with open(pkl_file, 'rb') as f:
                obj = pickle.load(f)
            im_fn_list, anno_files = obj

            # im_fn_list = get_images()
            for im_fn, seg_fn in zip(im_fn_list, anno_files):
                # im = cv2.imread(im_fn)[:, :, ::-1]
                im = np.array(Image.open(im_fn))
                seg = np.array(Image.open(seg_fn))
                im_resized, (ratio_h, ratio_w) = resize_image(im, size=32)
                # import ipdb; ipdb.set_trace()

                start = time.time()
                pred_re = sess.run([pred],
                                   feed_dict={input_images: [im_resized]})
                pred_re = np.array(np.squeeze(pred_re))

                seg[seg == 255] = 0

                img = visualize_segmentation_adaptive(pred_re, pascal_voc_lut)
                img_seg = visualize_segmentation_adaptive(seg, pascal_voc_lut)

                # import ipdb; ipdb.set_trace()
                #img_true=return_overlayed_img(Image.fromarray(img), Image.fromarray(seg))
                #img_pred=return_overlayed_img(Image.fromarray(img), Image.fromarray(pred_re))

                _diff_time = time.time() - start
                cv2.imwrite(
                    os.path.join(FLAGS.result_path, os.path.basename(im_fn)),
                    np.hstack((img, img_seg)))

                print('{}: cost {:.0f}ms'.format(im_fn, _diff_time * 1000))
Beispiel #17
0
def ckpt2pb(ckptpath):

    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
    tf.reset_default_graph()
    input_images = tf.placeholder(tf.float32,
                                  shape=[None, None, None, 3],
                                  name='input_images')
    global_step = tf.get_variable('global_step', [],
                                  initializer=tf.constant_initializer(0),
                                  trainable=False)

    binarize_map, threshold_map, thresh_binary = model.model(input_images,
                                                             is_training=False)

    variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step)
    saver = tf.train.Saver(variable_averages.variables_to_restore())
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5)
    gpu_config = tf.ConfigProto(log_device_placement=False,
                                gpu_options=gpu_options,
                                allow_soft_placement=True)
    sess = tf.Session(config=gpu_config)
    saver.restore(sess, ckptpath)
    input_graph_def = sess.graph.as_graph_def()

    for node in input_graph_def.node:
        if node.op == 'RefSwitch':
            node.op = 'Switch'
            for index in xrange(len(node.input)):
                if 'moving_' in node.input[index]:
                    node.input[index] = node.input[index] + '/read'
        elif node.op == 'AssignSub':
            node.op = 'Sub'
            if 'use_locking' in node.attr: del node.attr['use_locking']
        elif node.op == 'AssignAdd':
            node.op = 'Add'
            if 'use_locking' in node.attr: del node.attr['use_locking']

    constant_graph = graph_util.convert_variables_to_constants(
        sess, input_graph_def,
        ['feature_fusion/binarize_branch/Conv2d_transpose_1/Sigmoid'])

    output_graph_def = optimize_for_inference(
        input_graph_def=constant_graph,
        input_node_names=['input_images'],
        output_node_names=[
            'feature_fusion/binarize_branch/Conv2d_transpose_1/Sigmoid'
        ],
        placeholder_type_enum=[tf.float32.as_datatype_enum])
    # 转化为tlite文件
    #converter = tf.contrib.lite.TFLiteConverter.from_frozen_graph(args.output_file, ['image_batch'],
    #                                                              ['pfld_inference/fc/BiasAdd'],
    #                                                              {"image_batch": [1, 112, 112, 3]
    #                                                               }
    #                                                              )
    # converter.allow_custom_ops = True
    # converter.inference_type = _types_pb2.QUANTIZED_UINT8
    #converter.post_training_quantize = True
    #tflite_model = converter.convert()

    with tf.gfile.FastGFile('db.pb', mode='wb') as f:
        f.write(output_graph_def.SerializeToString())
Beispiel #18
0
def main(argv=None):
    import os
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list


    try:
        os.makedirs(FLAGS.output_dir)
    except OSError as e:
        if e.errno != 17:
            raise

    with tf.get_default_graph().as_default():
        input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images')
        global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False)

        # [1/4H, 1/4W,1], [1/4*h,1/4*w,4]
        f_score, f_geometry = model.model(input_images, is_training=False)

        variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.model_path)
            model_path = os.path.join(FLAGS.model_path, os.path.basename(ckpt_state.model_model_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            im_fn_list = get_images()
            for im_fn in im_fn_list:
                im = cv2.imread(im_fn)[:, :, ::-1]
                start_time = time.time()
                # 调整图像为32的倍数,但是基本上保持原图大小

                im_resized,ratio_h, ratio_w = data_util.resize_image(im)

                timer = {'net': 0, 'restore': 0, 'nms': 0}
                start = time.time()
                score, geometry = sess.run([f_score, f_geometry], feed_dict={input_images: [im_resized]})
                timer['net'] = time.time() - start

                boxes, timer = detect(score_map=score, geo_map=geometry, timer=timer)
                print('{} : net {:.0f}ms, restore {:.0f}ms, nms {:.0f}ms'.format(
                    im_fn, timer['net']*1000, timer['restore']*1000, timer['nms']*1000))

                if boxes is not None:
                    boxes = boxes[:, :8].reshape((-1, 4, 2))
                    boxes[:, :, 0] /= ratio_w
                    boxes[:, :, 1] /= ratio_h

                duration = time.time() - start_time
                print('[timing] {}'.format(duration))

                # save to file
                if boxes is not None:
                    res_file = os.path.join(
                        FLAGS.output_dir,
                        '{}.txt'.format(
                            os.path.basename(im_fn).split('.')[0]))

                    with open(res_file, 'w') as f:
                        for box in boxes:
                            # to avoid submitting errors
                            box = sort_poly(box.astype(np.int32))
                            if np.linalg.norm(box[0] - box[1]) < 5 or np.linalg.norm(box[3]-box[0]) < 5:
                                continue
                            f.write('{},{},{},{},{},{},{},{}\r\n'.format(
                                box[0, 0], box[0, 1], box[1, 0], box[1, 1], box[2, 0], box[2, 1], box[3, 0], box[3, 1],
                            ))
                            cv2.polylines(im[:, :, ::-1], [box.astype(np.int32).reshape((-1, 1, 2))], True, color=(255, 255, 0), thickness=1)
                if not FLAGS.no_write_images:
                    img_path = os.path.join(FLAGS.output_dir, os.path.basename(im_fn))
                    cv2.imwrite(img_path, im[:, :, ::-1])
Beispiel #19
0
def main(argv=None):
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list
    pascal_voc_lut = pascal_segmentation_lut()

    filename_queue = tf.train.string_input_producer([FLAGS.test_data_path],
                                                    num_epochs=1)
    image, annotation = read_tfrecord_and_decode_into_image_annotation_pair_tensors(
        filename_queue)

    image_batch_tensor = tf.expand_dims(image, axis=0)
    annotation_batch_tensor = tf.expand_dims(annotation, axis=0)

    input_image_shape = tf.shape(image_batch_tensor)
    image_height_width = input_image_shape[1:3]
    image_height_width_float = tf.to_float(image_height_width)
    image_height_width_multiple = tf.to_int32(
        tf.round(image_height_width_float / 32) * 32)

    image_batch_tensor = tf.image.resize_images(image_batch_tensor,
                                                image_height_width_multiple)

    global_step = tf.get_variable('global_step', [],
                                  initializer=tf.constant_initializer(0),
                                  trainable=False)
    logits = model.model(image_batch_tensor, is_training=False)
    pred = tf.argmax(logits, dimension=3)
    pred = tf.expand_dims(pred, 3)
    pred = tf.image.resize_nearest_neighbor(images=pred,
                                            size=image_height_width)
    annotation_batch_tensor = tf.image.resize_nearest_neighbor(
        images=annotation_batch_tensor, size=image_height_width)

    pred = tf.reshape(pred, [
        -1,
    ])
    gt = tf.reshape(annotation_batch_tensor, [
        -1,
    ])
    temp = tf.less_equal(gt, FLAGS.num_classes - 1)
    weights = tf.cast(temp, tf.int32)
    gt = tf.where(temp, gt, tf.cast(temp, tf.uint8))
    acc, acc_update_op = tf.contrib.metrics.streaming_accuracy(pred,
                                                               gt,
                                                               weights=weights)
    miou, miou_update_op = tf.contrib.metrics.streaming_mean_iou(
        pred, gt, num_classes=FLAGS.num_classes, weights=weights)

    with tf.get_default_graph().as_default():
        global_vars_init_op = tf.global_variables_initializer()
        local_vars_init_op = tf.local_variables_initializer()
        init = tf.group(local_vars_init_op, global_vars_init_op)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            sess.run(init)
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(
                FLAGS.checkpoint_path,
                os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(coord=coord)

            for i in range(1449):
                start = time.time()
                image_np, annotation_np, pred_np, tmp_acc, tmp_miou = sess.run(
                    [image, annotation, pred, acc_update_op, miou_update_op])
                _diff_time = time.time() - start
                print('{}: cost {:.0f}ms').format(i, _diff_time * 1000)
                #upsampled_predictions = pred_np.squeeze()
                #plt.imshow(image_np)
                #plt.show()
                #visualize_segmentation_adaptive(upsampled_predictions, pascal_voc_lut)
            acc_res = sess.run(acc)
            miou_res = sess.run(miou)
            print("Pascal VOC 2012 validation dataset pixel accuracy: " +
                  str(acc_res))
            print("Pascal VOC 2012 validation dataset Mean IoU: " +
                  str(miou_res))

    coord.request_stop()
    coord.join(threads)
Beispiel #20
0
def main(argv=None):

    # 选择GPU
    if FLAGS.gpu!="1" and FLAGS.gpu!="0":
        logger.error("无法确定使用哪一个GPU,退出")
        exit()
    logger.info("使用GPU%s显卡进行训练",FLAGS.gpu)
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    logger.info(
        "本次使用的参数:\nlearning_rate:%f\ndecay_steps:%f\nmax_steps:%d\nevaluate_steps:%d\nmodel:%s\nlambda1:%d\nlogs_path:%s\nrestore:%r\ndebug:%r\nsave_checkpoint_steps:%d", \
        FLAGS.learning_rate,
        FLAGS.decay_steps,
        FLAGS.max_steps,
        FLAGS.evaluate_steps,
        FLAGS.model,
        FLAGS.lambda1,
        FLAGS.logs_path,
        FLAGS.restore,
        FLAGS.debug,
        FLAGS.save_checkpoint_steps)

    now = datetime.datetime.now()
    StyleTime = now.strftime("%Y-%m-%d-%H-%M-%S")
    os.makedirs(os.path.join(FLAGS.logs_path, StyleTime))
    if not os.path.exists(FLAGS.model):
        os.makedirs(FLAGS.model)


    # 输入图像数据的维度[批次,  高度,  宽度,  3通道]
    ph_input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='ph_input_image')
    ph_label = tf.placeholder(tf.int64,   shape=[None], name='ph_label')
    global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False)
    learning_rate = tf.Variable(FLAGS.learning_rate, trainable=False)

    tf.summary.scalar('learning_rate', learning_rate)
    adam_opt = tf.train.AdamOptimizer(learning_rate) # 默认是learning_rate是0.001,而且后期会不断的根据梯度调整,一般不用设这个数,所以我索性去掉了

    # gpu_id = int(FLAGS.gpu)
    # with tf.device('/gpu:%d' % gpu_id):
    #     with tf.name_scope('model_%d' % gpu_id) as scope:
    cls_prob,cls_preb = model.model(ph_input_image)
    cross_entropy = model.loss(cls_prob,ph_label)
    batch_norm_updates_op = tf.group(*tf.get_collection(tf.GraphKeys.UPDATE_OPS))
    #计算梯度
    grads = adam_opt.compute_gradients(cross_entropy)
    # logger.info("计算图定义完毕,定义在gpu:%d上", gpu_id)
    # 使用计算得到的梯度来更新对应的variable
    apply_gradient_op = adam_opt.apply_gradients(grads, global_step=global_step)

    # 这个是定义召回率、精确度和F1
    v_recall = tf.Variable(0.001, trainable=False)
    v_precision = tf.Variable(0.001, trainable=False)
    v_accuracy = tf.Variable(0.001, trainable=False)
    v_f1 = tf.Variable(0.001, trainable=False)
    tf.summary.scalar("Recall",v_recall)
    tf.summary.scalar("Precision",v_precision)
    tf.summary.scalar("F1",v_f1)
    summary_op = tf.summary.merge_all()
    logger.info("summary定义完毕")

    variable_averages = tf.train.ExponentialMovingAverage(
        FLAGS.moving_average_decay, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())

    # 某些操作执行的依赖关系,这时我们可以使用tf.control_dependencies()来实现
    # 我依赖于
    with tf.control_dependencies([variables_averages_op, apply_gradient_op, batch_norm_updates_op]):
        train_op = tf.no_op(name='train_op') # no_op啥也不干,但是它依赖的操作都会被干一遍

    saver = tf.train.Saver(tf.global_variables(), max_to_keep=100)
    summary_writer = tf.summary.FileWriter(os.path.join(FLAGS.logs_path,StyleTime), tf.get_default_graph())

    if FLAGS.pretrained_model_path is not None:
        logger.info('加载vgg模型:%s',FLAGS.pretrained_model_path)
        variable_restore_op = slim.assign_from_checkpoint_fn(FLAGS.pretrained_model_path,
                                                             slim.get_trainable_variables(),
                                                             ignore_missing_vars=True)
    # 早停用的变量
    best_f1 = 0
    early_stop_counter = 0

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = 0.95
    config.allow_soft_placement = True

    with tf.Session(config=config) as sess:
        if FLAGS.restore:
            ckpt = tf.train.latest_checkpoint(FLAGS.model)
            logger.debug("最新的模型文件:%s",ckpt) #有点担心learning rate也被恢复
            saver.restore(sess, ckpt)
        else:
            logger.info("从头开始训练模型")
            sess.run(tf.global_variables_initializer())
            if FLAGS.pretrained_model_path is not None:
                variable_restore_op(sess)

        logger.debug("开始加载训练数据")
        # 是的,get_batch返回的是一个generator
        data_generator = data_provider.get_batch(num_workers=FLAGS.num_readers,label_file=FLAGS.train_label,batch_num=FLAGS.train_batch)
        start = time.time()
        train_start_time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(start))

        logger.debug("开始训练")
        for step in range(FLAGS.max_steps):

            image_list,label_list = next(data_generator) # next(<迭代器>)来返回下一个结果
            logger.debug("成功加载图片%d张,标签%d个:",len(image_list),len(label_list))

            image_list = data_util.prepare4vgg(image_list)
            logger.debug("开始第%d步训练,运行sess.run,数据shape:%r",step,image_list.shape)

            _, summary_str,classes = sess.run([train_op, summary_op, cls_prob],
                feed_dict = {ph_input_image: image_list , ph_label: label_list}) # data[3]是图像的路径,传入sess是为了调试画图用 np.array(image_list)
            logger.info("结束第%d步训练,结束sess.run",step)
            summary_writer.add_summary(summary_str, global_step=step)

            if step!=0 and step % FLAGS.evaluate_steps == 0:
                logger.info("在第%d步,开始进行模型评估",step)

                # data[4]是大框的坐标,是个数组,8个值
                accuracy_value,precision_value,recall_value,f1_value = validate(sess,cls_preb,ph_input_image,ph_label)

                if f1_value>best_f1:
                    logger.info("新F1值[%f]大于过去最好的F1值[%f],早停计数器重置",f1_value,best_f1)
                    best_f1 = f1_value
                    early_stop_counter = 0
                    # 每次效果好的话,就保存一个模型
                    filename = ('ctpn-{:s}-{:d}'.format(train_start_time,step + 1) + '.ckpt')
                    filename = os.path.join(FLAGS.model, filename)
                    saver.save(sess, filename)
                    logger.info("在第%d步,保存了最好的模型文件:%s,F1:%f",step,filename,best_f1)
                else:
                    logger.info("新F1值[%f]小于过去最好的F1值[%f],早停计数器+1", f1_value, best_f1)
                    early_stop_counter+= 1

                # 更新F1,Recall和Precision
                sess.run([tf.assign(v_f1,       f1_value),
                          tf.assign(v_recall,   recall_value),
                          tf.assign(v_precision,precision_value),
                          tf.assign(v_accuracy, accuracy_value)])
                logger.info("在第%d步,模型评估结束", step)

                if early_stop_counter> FLAGS.early_stop:
                    logger.warning("达到了早停计数次数:%d次,训练提前结束",early_stop_counter)
                    break

            if step != 0 and step % FLAGS.decay_steps == 0:
                logger.info("学习率(learning rate)衰减:%f=>%f",learning_rate.eval(),learning_rate.eval() * FLAGS.decay_rate)
                sess.run(tf.assign(learning_rate, learning_rate.eval() * FLAGS.decay_rate))