def image_resize(self, images, resize_shape):
        """
        重塑一组图像大小
        :param images: 一组图像,数据格式:[图片数量,高度,宽度,通道]
        :param resize_shape: 重塑形状,形状格式:[高度, 宽度, 通道]
        :return: 重塑后的图像数据,数组形式
        """
        print("进行图像大小的重塑...")
        # 获得样本数量
        num_examples = images.shape[0]
        # 初始化重塑图像样本的形状
        resize_images = np.zeros(shape=[
            num_examples, resize_shape[0], resize_shape[1], resize_shape[2]
        ],
                                 dtype=np.uint8)

        # 循环迭代所有图像
        for index in range(num_examples):
            # 重塑制定形状的图片并附加到列表中
            # image = resize(images[index], output_shape=resize_shape)
            image = scipy.misc.imresize(images[index],
                                        size=resize_shape,
                                        interp='bicubic')
            resize_images[index] = image
            tool.view_bar("重塑图像大小", index + 1, num_examples)
        return resize_images
Exemple #2
0
def split_coco(imgs_path,
               annotaions_path,
               dst_dir,
               num_catetory=20,
               num_per_category=18):
    """

    :param origin_path:
    :param split_ratio:
    :return:
    """

    dataset = json.load(open(annotaions_path, 'r'))

    sub_annotations_path = os.path.join(dst_dir, 'Annotations')
    sub_img_path = os.path.join(dst_dir, 'Images')

    anns, cats, imgs, img_anns, cate_imgs = create_index(dataset)

    img_id_list, category_id_list = get_img_per_categorise(
        cate_imgs, num_catetory, num_per_category)

    img_name_dict = {}
    for i, img_id in enumerate(img_id_list):
        img_name_dict[img_id] = '0' * (
            12 - len(str(img_id))) + '{0}.jpg'.format(img_id)

    #----------------------------write annotaion info-----------------------------------
    images_list, annotations_list = get_images_annotaion_info(
        img_id_list, imgs, img_anns, category_id_list)
    new_dataset = defaultdict(list)
    new_dataset['info'] = dataset['info']
    new_dataset['licenses'] = dataset['licenses']
    new_dataset['images'] = images_list
    new_dataset['annotations'] = annotations_list
    new_dataset['categories'] = dataset['categories']

    makedir(sub_annotations_path)
    json_path = os.path.join(sub_annotations_path, 'instances.json')
    with open(json_path, 'w') as fw:
        json.dump(new_dataset, fw)
    print(
        'Successful write the number of {0} annotations respect to {1} images to {2}'
        .format(len(new_dataset['annotations']), len(new_dataset['images']),
                json_path))

    #---------------------------------remove image---------------------------------------
    makedir(sub_img_path)

    num_samples = 0
    for img_id, img_name in img_name_dict.items():
        shutil.copy(os.path.join(imgs_path, img_name),
                    os.path.join(sub_img_path, '{0}.jpg'.format(img_id)))
        num_samples += 1
        view_bar("split coco:", num_samples, len(img_name_dict))

    print('Successful copy the number of {0} images to {1}'.format(
        len(img_name_dict), sub_img_path))
Exemple #3
0
def convert_pascal_to_tfrecord():
    xml_path = os.path.join(FLAGS.VOC_dir, FLAGS.xml_dir)
    image_path = os.path.join(FLAGS.VOC_dir, FLAGS.image_dir)
    save_path = os.path.join(
        FLAGS.save_dir, FLAGS.dataset + '_' + FLAGS.save_name + '.tfrecord')
    makedirs(FLAGS.save_dir)

    # writer_options = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.ZLIB)
    # writer = tf.python_io.TFRecordWriter(path=save_path, options=writer_options)
    writer = tf.python_io.TFRecordWriter(path=save_path)
    for count, xml in enumerate(glob.glob(xml_path + '/*.xml')):

        img_name = xml.split('/')[-1].split('.')[0] + FLAGS.img_format
        img_path = image_path + '/' + img_name

        if not os.path.exists(img_path):
            print('{} is not exist!'.format(img_path))
            continue

        img_height, img_width, gtbox_label = read_xml_gtbox_and_label(xml)

        # if img_height != 600 or img_width != 600:
        #     continue

        img = cv2.imread(img_path)[:, :, ::-1]

        feature = tf.train.Features(
            feature={
                # do not need encode() in linux
                'img_name': _bytes_feature(img_name.encode()),
                # 'img_name': _bytes_feature(img_name),
                'img_height': _int64_feature(img_height),
                'img_width': _int64_feature(img_width),
                'img': _bytes_feature(img.tostring()),
                'gtboxes_and_label': _bytes_feature(gtbox_label.tostring()),
                'num_objects': _int64_feature(gtbox_label.shape[0])
            })

        example = tf.train.Example(features=feature)

        writer.write(example.SerializeToString())

        view_bar('Conversion progress', count + 1,
                 len(glob.glob(xml_path + '/*.xml')))

    print('\nConversion is complete!')
    writer.close()
Exemple #4
0
def face_filter(src_path, dst_path):
    people_list = os.listdir(src_path)
    for i, people in enumerate(people_list):
        people_image_path = os.path.join(src_path, people)
        people_image_list = os.listdir(people_image_path)
        for image_name in people_image_list:
            image_path = os.path.join(people_image_path, image_name)
            print(image_path)
            bboxes = fdetector.detect(image_path, remove_inner_face=False)
            if len(bboxes) == 0:
                if dst_path is None:
                    os.remove(image_path)
                else:
                    people_image_dstpath = os.path.join(dst_path, people)
                    if not os.path.exists(people_image_dstpath):
                        os.makedirs(people_image_dstpath)
                    dst_image_path = os.path.join(people_image_dstpath,
                                                  image_name)
                    shutil.move(image_path, dst_image_path)
            elif len(bboxes) > 1:
                img = cv2.imread(image_path)
                for j, bbox in enumerate(bboxes):
                    x1, y1, x2, y2 = bbox
                    width_delta = (x2 - x1)
                    height_delta = (y2 - y1)
                    # cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 0))

                    x1 -= width_delta
                    y1 -= height_delta
                    x2 += width_delta
                    y2 += height_delta
                    x1 = max(int(x1), 0)
                    y1 = max(int(y1), 0)
                    x2 = min(int(x2), img.shape[1])
                    y2 = min(int(y2), img.shape[0])
                    # cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0))
                    imsave = img[y1:y2, x1:x2, :]
                    crop_image_name = image_name.split('.')[0] + '-' + str(
                        j) + '.jpg'
                    image_path = os.path.join(people_image_path,
                                              crop_image_name)
                    cv2.imwrite(image_path, imsave)
                # cv2.imshow('show', img)
                # cv2.waitKey(0)

        tools.view_bar('face_filter: ', i + 1, len(people_list))
Exemple #5
0
def split_pascal(origin_path, dst_path, split_rate=0.8):
    """
    split pascal dataset
    :param origin_path:
    :return:
    """
    image_path = os.path.join(origin_path, 'JPEGImages')
    xml_path = os.path.join(origin_path, 'Annotations')

    image_train_path = os.path.join(dst_path, 'train', 'JPEGImages')
    xml_train_path = os.path.join(dst_path, 'train', 'Annotations')
    image_val_path = os.path.join(dst_path, 'val', 'JPEGImages')
    xml_val_path = os.path.join(dst_path, 'val', 'Annotations')
    makedir(image_train_path)
    makedir(xml_train_path)
    makedir(image_val_path)
    makedir(xml_val_path)

    image_list = os.listdir(image_path)
    image_name = [image.split('.')[0] for image in image_list]
    image_name = np.random.permutation(image_name)
    train_image_name = image_name[:int(math.ceil(len(image_name) *
                                                 split_rate))]
    val_image_name = image_name[int(math.ceil(len(image_name) * split_rate)):]

    for n, image in enumerate(train_image_name):
        shutil.copy(os.path.join(image_path, image + '.jpg'),
                    os.path.join(image_train_path, image + '.jpg'))
        shutil.copy(os.path.join(xml_path, image + '.xml'),
                    os.path.join(xml_train_path, image + '.xml'))
        view_bar(message="split train dataset:",
                 num=n,
                 total=len(train_image_name))
    print('Total of {0} data split to {1}'.format(
        len(train_image_name), os.path.dirname(image_train_path)))

    for n, image in enumerate(val_image_name):
        shutil.copy(os.path.join(image_path, image + '.jpg'),
                    os.path.join(image_val_path, image + '.jpg'))
        shutil.copy(os.path.join(xml_path, image + '.xml'),
                    os.path.join(xml_val_path, image + '.xml'))
        view_bar(message="split val dataset:",
                 num=n,
                 total=len(val_image_name))
    print('Total of {0} data split to {1}'.format(
        len(val_image_name), os.path.dirname(image_val_path)))
Exemple #6
0
            # 若读完整个数据则不再循环
            if j > len(file_list) - 1:
                break

            # 预测结果
            outputs = net(images)
            # outputs = F.softmax(outputs, dim=1)
            # _, preds = torch.max(outputs, 1)
            preds = torch.argmax(outputs, 1)
            predict_result = preds.numpy().tolist()
            # print(predict_result)
            # print(preds.numpy().tolist())
            # print(type(preds))
            # print(j)
            content = '{} {}\n'.format(file_list[j],
                                       class_name[predict_result[0]])
            file.write(content)
            j = j + 1
            tool.view_bar('测试数据:', j + 1, len(file_list))

        # # 将结果写入结果文件中
        # with open(result_file, mode='a+') as file:
        #     for i in range(images.size(0)):
        #         content = '{} {}\n'.format(file_list[j], class_name[predict_result[i]])
        #         file.write(content)
        #         j = j+1
        # print('结果保存完成...')

# print()
# print('micro_f1_score:{}, macro_f1_score:{}'.format(micro_f1, macro_f1))
def convert_pascal_to_tfrecord():
    json_file = os.path.join(FLAGS.root_dir, FLAGS.json_file)
    image_path = os.path.join(FLAGS.root_dir, FLAGS.image_dir)
    save_path = os.path.join(
        FLAGS.save_dir, FLAGS.dataset + '_' + FLAGS.save_name + '.tfrecord')
    makedirs(FLAGS.save_dir)

    # writer_options = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.ZLIB)
    # writer = tf.python_io.TFRecordWriter(path=save_path, options=writer_options)
    writer = tf.python_io.TFRecordWriter(path=save_path)

    with open(json_file, 'r') as fr:
        all_gts = json.load(fr)
        images = all_gts['images']
        annotations = all_gts['annotations']

    all_gt_label = {}

    for annotation in annotations:
        image_id = annotation['image_id']
        # print(image_id-1)  # 57533
        if image_id > len(images):
            continue
        if images[image_id - 1]['file_name'] in all_gt_label.keys():
            # all_gt_label[images[image_id - 1]['file_name']]['gtboxes'].append(annotation['segmentation'])
            all_gt_label[images[image_id - 1]['file_name']]['gtboxes'].append(
                coordinate_convert_r(annotation['rbbox']))
            all_gt_label[images[image_id - 1]['file_name']]['labels'].append(
                annotation['category_id'])
        else:
            all_gt_label[images[image_id - 1]['file_name']] = {
                'height': images[image_id - 1]['height'],
                'width': images[image_id - 1]['width'],
                # 'gtboxes': [annotation['segmentation']],
                'gtboxes': [coordinate_convert_r(annotation['rbbox'])],
                'labels': [annotation['category_id']]
            }
    count = 0
    for img_name in all_gt_label.keys():
        img = cv2.imread(os.path.join(image_path, img_name))
        img_height = all_gt_label[img_name]['height']
        img_width = all_gt_label[img_name]['width']
        gtboxes = np.array(all_gt_label[img_name]['gtboxes']).reshape([-1, 8])
        labels = np.array(all_gt_label[img_name]['labels']).reshape([-1, 1])
        gtboxes_and_label = np.array(
            np.concatenate([gtboxes, labels], axis=-1), np.int32)

        feature = tf.train.Features(
            feature={
                # do not need encode() in linux
                'img_name': _bytes_feature(img_name.encode()),
                # 'img_name': _bytes_feature(img_name),
                'img_height': _int64_feature(img_height),
                'img_width': _int64_feature(img_width),
                'img': _bytes_feature(img.tostring()),
                'gtboxes_and_label': _bytes_feature(
                    gtboxes_and_label.tostring()),
                'num_objects': _int64_feature(gtboxes_and_label.shape[0])
            })

        example = tf.train.Example(features=feature)

        writer.write(example.SerializeToString())

        view_bar('Conversion progress', count + 1, len(all_gt_label.keys()))
        count += 1

    print('\nConversion is complete!')
    writer.close()
Exemple #8
0
    def exucute_detect(self, image_path, save_path):
        """
        execute object detect
        :param detect_net:
        :param image_path:
        :return:
        """
        input_image = tf.placeholder(dtype=tf.uint8,
                                     shape=(None, None, 3),
                                     name='inputs_images')

        resize_img = self.image_process(input_image)
        # expend dimension
        image_batch = tf.expand_dims(input=resize_img,
                                     axis=0)  # (1, None, None, 3)

        self.detect_net.images_batch = image_batch
        # img_shape = tf.shape(inputs_img)
        # load detect network
        detection_boxes, detection_scores, detection_category = self.detect_net.inference(
        )

        # restore pretrain weight
        restorer, restore_ckpt = self.detect_net.get_restorer()
        # config gpu to growth train
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True

        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())
        with tf.Session(config=config) as sess:
            sess.run(init_op)

            if restorer is not None:
                restorer.restore(sess, save_path=restore_ckpt)
                print('Successful restore model from {0}'.format(restore_ckpt))

            # construct image path list
            format_list = ('.jpg', '.png', '.jpeg', '.tif', '.tiff')
            if os.path.isfile(image_path):
                image_name_list = [image_path]
            else:
                image_name_list = [
                    img_name for img_name in os.listdir(image_path)
                    if img_name.endswith(format_list)
                    and os.path.isfile(os.path.join(image_path, img_name))
                ]

            assert len(image_name_list) != 0
            print(
                "test_dir has no imgs there. Note that, we only support img format of {0}"
                .format(format_list))
            #+++++++++++++++++++++++++++++++++++++start detect+++++++++++++++++++++++++++++++++++++++++++++++++++++=++
            makedir(save_path)
            fw = open(os.path.join(save_path, 'detect_bbox.txt'), 'w')

            for index, img_name in enumerate(image_name_list):

                detect_dict = {}
                bgr_img = cv.imread(os.path.join(image_path, img_name))
                rgb_img = cv.cvtColor(
                    bgr_img, cv.COLOR_BGR2RGB
                )  # convert channel from BGR to RGB (cv is BGR)

                start_time = time.perf_counter()
                # image resize and white process
                # construct feed_dict
                feed_dict = {input_image: rgb_img}
                resized_img, detected_boxes, detected_scores, detected_categories = \
                    sess.run([resize_img, detection_boxes, detection_scores, detection_category],
                             feed_dict=feed_dict)
                end_time = time.perf_counter()

                # select object according to threshold
                object_indices = detected_scores >= cfgs.SHOW_SCORE_THRSHOLD
                object_scores = detected_scores[object_indices]
                object_boxes = detected_boxes[object_indices]
                object_categories = detected_categories[object_indices]

                final_detections_img = draw_box_in_img.draw_boxes_with_label_and_scores(
                    resized_img,
                    boxes=object_boxes,
                    labels=object_categories,
                    scores=object_scores)
                final_detections_img = cv.cvtColor(final_detections_img,
                                                   cv.COLOR_RGB2BGR)
                cv.imwrite(os.path.join(save_path, img_name),
                           final_detections_img)
                # resize boxes and image according to raw input image
                raw_h, raw_w = rgb_img.shape[0], rgb_img.shape[1]
                resized_h, resized_w = resized_img.shape[1], resized_img.shape[
                    2]
                x_min, y_min, x_max, y_max = object_boxes[:, 0], object_boxes[:, 1], object_boxes[:, 2], \
                                             object_boxes[:, 3]
                x_min = x_min * raw_w / resized_w
                y_min = y_min * raw_h / resized_h
                x_max = x_max * raw_w / resized_w
                y_max = y_max * raw_h / resized_h

                object_boxes = np.stack([x_min, y_min, x_max, y_max], axis=1)
                # final_detections= cv.resize(final_detections[:, :, ::-1], (raw_w, raw_h))

                # recover to raw size
                detect_dict['score'] = object_scores
                detect_dict['boxes'] = object_boxes
                detect_dict['categories'] = object_categories
                # convert from RGB to BG
                fw.write(f'\n{img_name}')
                for score, boxes, categories in zip(object_scores,
                                                    object_boxes,
                                                    object_categories):
                    fw.write('\n\tscore:' + str(score))
                    fw.write('\tbboxes:' + str(boxes))
                    fw.write('\tcategories:' + str(categories))

                view_bar(
                    '{} image cost {} second'.format(img_name,
                                                     (end_time - start_time)),
                    index + 1, len(image_name_list))

            fw.close()
Exemple #9
0
def convert_pascal_to_tfrecord(dataset_path,
                               save_path,
                               record_capacity=2000,
                               shuffling=False):
    """
    convert pascal dataset to rfrecord
    :param img_path:
    :param xml_path:
    :param save_path:
    :param record_capacity:
    :return:
    """
    # record_file = os.path.join(FLAGS.save_dir, FLAGS.save_name+'.tfrecord')
    years = [s.strip() for s in FLAGS.year.split(',')]
    # get image and xml list
    img_name_list = []
    img_xml_list = []

    for year in years:
        img_path = os.path.join(dataset_path, 'VOC' + year, FLAGS.image_dir)
        xml_path = os.path.join(dataset_path, 'VOC' + year, FLAGS.xml_dir)
        xml_list = [
            xml_file for xml_file in glob.glob(os.path.join(xml_path, '*.xml'))
        ]
        img_list = [
            os.path.join(
                img_path,
                os.path.basename(xml).replace('xml', FLAGS.img_format))
            for xml in xml_list
        ]
        img_name_list.extend(img_list)
        img_xml_list.extend(xml_list)

    if shuffling:
        shuffled_index = list(range(len(img_name_list)))
        random.seed(0)
        random.shuffle(shuffled_index)
        img_name_shuffle = [img_name_list[index] for index in shuffled_index]
        img_xml_shuffle = [img_xml_list[index] for index in shuffled_index]
        img_name_list = img_name_shuffle
        img_xml_list = img_xml_shuffle

    remainder_num = len(img_name_list) % record_capacity
    if remainder_num == 0:
        num_record = int(len(img_name_list) / record_capacity)
    else:
        num_record = int(len(img_name_list) / record_capacity) + 1

    num_samples = 0
    for index in range(num_record):
        record_filename = os.path.join(save_path, f'{index}.record')
        write = tf.io.TFRecordWriter(record_filename)
        if index < num_record - 1:
            sub_img_list = img_name_list[index * record_capacity:(index + 1) *
                                         record_capacity]
            sub_xml_list = img_xml_list[index * record_capacity:(index + 1) *
                                        record_capacity]
        else:
            sub_img_list = img_name_list[(index * record_capacity):(
                index * record_capacity + remainder_num)]
            sub_xml_list = img_xml_list[(index * record_capacity):(
                index * record_capacity + remainder_num)]

        try:
            for img_file, xml_file in zip(sub_img_list, sub_xml_list):

                img_height, img_width, gtbox_label = read_xml_gtbox_and_label(
                    xml_file)
                # note image channel format of opencv if rgb
                bgr_image = cv.imread(img_file)
                # BGR TO RGB
                rgb_image = cv.cvtColor(bgr_image, cv.COLOR_BGR2RGB)

                image_record = serialize_example(image=rgb_image,
                                                 img_height=img_height,
                                                 img_width=img_width,
                                                 img_depth=3,
                                                 filename=img_file,
                                                 gtbox_label=gtbox_label)
                write.write(record=image_record)

                num_samples += 1
                view_bar(message='\nConversion progress',
                         num=num_samples,
                         total=len(img_name_list))

        except Exception as e:
            print(e)
            continue
        write.close()
    print('\nThere are {0} samples convert to {1}'.format(
        num_samples, save_path))
Exemple #10
0
    def exucute_detect(self, image_path, save_path):
        """
        execute object detect
        :param detect_net:
        :param image_path:
        :return:
        """
        # load detect network
        pred_sbbox_batch, pred_mbbox_batch, pred_lbbox_batch = self.detector.pred_sbbox, self.detector.pred_mbbox, self.detector.pred_lbbox
        # TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!!
        gpu_options = tf.GPUOptions(allow_growth=True)
        config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options)

        init_op = tf.group(
            tf.global_variables_initializer(),
            tf.local_variables_initializer()
        )

        with tf.Session(config=config) as sess:
            sess.run(init_op)

            # restore pretrain weight
            if self.ckpt_path is not None:
                restorer = tf.train.Saver()
                restorer.restore(sess, self.ckpt_path)
            else:
                restorer, ckpt_path = self.detector.get_restorer(is_training=False)
                restorer.restore(sess, ckpt_path)
            print('*'*80 +'\nSuccessful restore model from {0}\n'.format(self.ckpt_path) + '*'*80)

            # construct image path list
            format_list = ('.jpg', '.png', '.jpeg', '.tif', '.tiff')
            if os.path.isfile(image_path):
                image_name_list = [image_path]
            else:
                image_name_list = [img_name for img_name in os.listdir(image_path)
                              if img_name.endswith(format_list) and os.path.isfile(os.path.join(image_path, img_name))]

            assert len(image_name_list) != 0
            print("test_dir has no imgs there. Note that, we only support img format of {0}".format(format_list))
            #+++++++++++++++++++++++++++++++++++++start detect+++++++++++++++++++++++++++++++++++++++++++++++++++++=++
            makedir(save_path)
            fw = open(os.path.join(save_path, 'detect_bbox.txt'), 'w')

            for index, img_name in enumerate(image_name_list):

                detect_dict = {}

                original_image, image_batch, original_size = self.image_process(img_path=os.path.join(image_path, img_name))

                start_time = time.perf_counter()
                # image resize and white process
                # construct feed_dict
                # Run SSD network.]
                feed_dict = {self.input_data: image_batch,
                             self.trainable: False}

                pred_sbbox, pred_mbbox, pred_lbbox = sess.run([pred_sbbox_batch, pred_mbbox_batch, pred_lbbox_batch],
                                                              feed_dict=feed_dict)

                pred_bbox = np.concatenate([np.reshape(pred_sbbox, (-1, 5 + self.num_classes)),
                                            np.reshape(pred_mbbox, (-1, 5 + self.num_classes)),
                                            np.reshape(pred_lbbox, (-1, 5 + self.num_classes))], axis=0)

                bboxes = box_utils.postprocess_boxes(pred_bbox, original_size, self.input_size[0], self.score_threshold)
                bboxes = box_utils.nms(bboxes, self.num_threshold, method='nms')
                end_time = time.perf_counter()

                image = draw_box_in_image.draw_bbox(original_image, bboxes, classes=self.class_name)
                image = Image.fromarray(image)
                image.save(os.path.join(save_path, img_name))

                # resize boxes and image according to raw input image
                # final_detections= cv.resize(final_detections[:, :, ::-1], (raw_w, raw_h))

                # recover to raw size
                bboxes = np.array(bboxes)
                rbboxes = bboxes[:, :4]
                rscores = bboxes[:, 4]
                rclasses = bboxes[:, 5]
                # convert from RGB to BG
                fw.write(f'\n{img_name}')
                for score, boxes, categories in zip(rscores, rbboxes, rclasses):
                    fw.write('\n\tscore:' + str(score))
                    fw.write('\tbboxes:' + str(boxes))
                    fw.write('\tcategories:' + str(int(categories)))

                view_bar('{} image cost {} second'.format(img_name, (end_time - start_time)), index + 1,
                               len(image_name_list))
            fw.close()
Exemple #11
0
def convert_coco_to_tfrecord(src_path,
                             save_path,
                             record_capacity=2000,
                             raw_coco=True):
    """

   :param src_path:
   :param save_path:
   :param record_capacity:
   :param raw_coco:
   :return:
   """

    imgs_path = os.path.join(src_path, FLAGS.image_dir)
    anns_path = os.path.join(src_path, FLAGS.anns_dir)

    # img_name_list = glob.glob(os.path.join(img_path,'*'+FLAGS.img_format))
    annotation_list = glob.glob(os.path.join(anns_path, '*.json'))
    anns, cats, imgs, img_anns, cate_imgs = create_index(annotation_list[0])
    image_id_list = [img_id for img_id in img_anns.keys()]

    remainder_num = len(image_id_list) % record_capacity
    if remainder_num == 0:
        num_record = int(len(image_id_list) / record_capacity)
    else:
        num_record = int(len(image_id_list) / record_capacity) + 1
    for index in range(num_record):
        makedir(save_path)
        record_filename = os.path.join(save_path, f'{index}.record')
        write = tf.io.TFRecordWriter(record_filename)
        if index < num_record - 1:
            sub_img_id_list = image_id_list[index *
                                            record_capacity:(index + 1) *
                                            record_capacity]
        else:
            sub_img_id_list = image_id_list[(index * record_capacity):(
                index * record_capacity + remainder_num)]

        num_samples = 0
        for index, img_id in enumerate(sub_img_id_list):
            try:
                # get gtbox_label
                gtbox_label = read_json_gtbox_label(img_anns[img_id])
                # get image name
                if raw_coco:
                    img_name = '0' * (
                        12 - len(str(img_id))) + f'{img_id}.{FLAGS.img_format}'
                else:
                    img_name = '{0}.jpg'.format(img_id)

                img_path = os.path.join(imgs_path, img_name)

                # load image
                bgr_image = cv.imread(img_path)
                # BGR TO RGB
                rgb_image = cv.cvtColor(bgr_image, cv.COLOR_BGR2RGB)
                img_height = rgb_image.shape[0]
                img_width = rgb_image.shape[1]

                image_record = serialize_example(image=rgb_image,
                                                 img_height=img_height,
                                                 img_width=img_width,
                                                 img_depth=3,
                                                 filename=img_name,
                                                 gtbox_label=gtbox_label)
                write.write(record=image_record)

                num_samples += 1
                view_bar(message='\nConversion progress',
                         num=num_samples,
                         total=len(img_anns))

            except Exception as e:
                print(e)
                continue
        write.close()
        print('There are {0} samples convert to {1}'.format(
            num_samples, save_path))
Exemple #12
0
def convert_pascal_to_tfrecord(dataset_path,
                               save_path,
                               record_capacity=2000,
                               shuffling=False):
    """
    convert pascal dataset to rfrecord
    :param img_path:
    :param xml_path:
    :param save_path:
    :param record_capacity:
    :return:
    """
    index_name = read_class_names(cfgs.CLASSES)
    name_index = {}
    for index, name in index_name.items():
        name_index[name] = int(index)
    years = [s.strip() for s in FLAGS.year.split(',')]
    # record_file = os.path.join(FLAGS.save_dir, FLAGS.save_name+'.tfrecord')

    # get image and xml list
    img_name_list = []
    img_xml_list = []

    for year in years:
        img_path = os.path.join(dataset_path, 'VOC' + year, FLAGS.image_dir)
        xml_path = os.path.join(dataset_path, 'VOC' + year, FLAGS.xml_dir)
        xml_list = [
            xml_file for xml_file in glob.glob(os.path.join(xml_path, '*.xml'))
        ]
        img_list = [
            os.path.join(
                img_path,
                os.path.basename(xml).replace('xml', FLAGS.img_format))
            for xml in xml_list
        ]
        img_name_list.extend(img_list)
        img_xml_list.extend(xml_list)

    if shuffling:
        shuffled_index = list(range(len(img_name_list)))
        random.seed(0)
        random.shuffle(shuffled_index)
        img_name_shuffle = [img_name_list[index] for index in shuffled_index]
        img_xml_shuffle = [img_xml_list[index] for index in shuffled_index]
        img_name_list = img_name_shuffle
        img_xml_list = img_xml_shuffle

    remainder_num = len(img_name_list) % record_capacity
    if remainder_num == 0:
        num_record = int(len(img_name_list) / record_capacity)
    else:
        num_record = int(len(img_name_list) / record_capacity) + 1

    num_samples = 0
    for index in range(num_record):
        record_filename = os.path.join(save_path, f'{index}.record')
        write = tf.io.TFRecordWriter(record_filename)
        if index < num_record - 1:
            sub_img_list = img_name_list[index * record_capacity:(index + 1) *
                                         record_capacity]
            sub_xml_list = img_xml_list[index * record_capacity:(index + 1) *
                                        record_capacity]
        else:
            sub_img_list = img_name_list[(index * record_capacity):(
                index * record_capacity + remainder_num)]
            sub_xml_list = img_xml_list[(index * record_capacity):(
                index * record_capacity + remainder_num)]

        try:
            for img_file, xml_file in zip(sub_img_list, sub_xml_list):

                image, shape, bboxes, labels, labels_text, difficult, truncated = process_image(
                    img_file, xml_file, class_name=name_index)

                image_record = serialize_example(img_file, image, labels,
                                                 labels_text, bboxes, shape,
                                                 difficult, truncated)
                write.write(record=image_record)

                num_samples += 1
                view_bar(message='\nConversion progress',
                         num=num_samples,
                         total=len(img_name_list))
        except Exception as e:
            print(e)
            continue
        write.close()
    print('\nThere are {0} samples convert to {1}'.format(
        num_samples, save_path))
Exemple #13
0
    def exucute_detect(self, image_path, save_path):
        """
        execute object detect
        :param detect_net:
        :param image_path:
        :return:
        """
        input_image = tf.placeholder(dtype=tf.uint8,
                                     shape=(None, None, 3),
                                     name='inputs_images')

        image_pre, labels_pre, bboxes_pre = self.image_process(
            input_image, img_shape=self.net_shape, img_format=self.data_format)
        # expend dimension
        image_batch = tf.expand_dims(input=image_pre,
                                     axis=0)  # (1, None, None, 3)

        # img_shape = tf.shape(inputs_img)
        # load detect network
        reuse = True if 'ssd_net' in locals() else None
        with slim.arg_scope(
                self.ssd_net.arg_scope(data_format=self.data_format)):
            detection_category, detection_bbox, _, _ = self.ssd_net.net(
                image_batch, is_training=False, reuse=reuse)

        # restore pretrain weight
        restorer = tf.train.Saver()

        # TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!!
        gpu_options = tf.GPUOptions(allow_growth=True)
        config = tf.ConfigProto(log_device_placement=False,
                                gpu_options=gpu_options)

        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())

        with tf.Session(config=config) as sess:
            sess.run(init_op)

            if self.ckpt_path is not None:
                restorer.restore(sess, self.ckpt_path)
            else:
                self.ckpt_path = self.ssd_net.restore_ckpt(sess)
            print('*' * 80 + '\nSuccessful restore model from {0}\n'.format(
                self.ckpt_path) + '*' * 80)

            # construct image path list
            format_list = ('.jpg', '.png', '.jpeg', '.tif', '.tiff')
            if os.path.isfile(image_path):
                image_name_list = [image_path]
            else:
                image_name_list = [
                    img_name for img_name in os.listdir(image_path)
                    if img_name.endswith(format_list)
                    and os.path.isfile(os.path.join(image_path, img_name))
                ]

            assert len(image_name_list) != 0
            print(
                "test_dir has no imgs there. Note that, we only support img format of {0}"
                .format(format_list))
            #+++++++++++++++++++++++++++++++++++++start detect+++++++++++++++++++++++++++++++++++++++++++++++++++++=++
            makedir(save_path)
            fw = open(os.path.join(save_path, 'detect_bbox.txt'), 'w')

            for index, img_name in enumerate(image_name_list):

                detect_dict = {}
                bgr_img = cv.imread(os.path.join(image_path, img_name))
                rgb_img = cv.cvtColor(
                    bgr_img, cv.COLOR_BGR2RGB
                )  # convert channel from BGR to RGB (cv is BGR)

                start_time = time.perf_counter()
                # image resize and white process
                # construct feed_dict
                # Run SSD network.]
                feed_dict = {input_image: rgb_img}
                image, category, bbox = sess.run(
                    [image_batch, detection_category, detection_bbox],
                    feed_dict=feed_dict)

                # Get classes and bboxes from the net outputs.
                rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select(
                    category,
                    bbox,
                    self.ssd_anchors,
                    select_threshold=self.select_threshold,
                    img_shape=self.net_shape,
                    num_classes=self.num_classes,
                    decode=True)

                rbboxes = np_methods.bboxes_clip(self.bbox_image, rbboxes)
                rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses,
                                                                    rscores,
                                                                    rbboxes,
                                                                    top_k=400)
                rclasses, rscores, rbboxes = np_methods.bboxes_nms(
                    rclasses,
                    rscores,
                    rbboxes,
                    nms_threshold=self.nms_threshold)
                # Resize bboxes to original image shape. Note: useless for Resize.WARP!
                rbboxes = np_methods.bboxes_resize(self.bbox_image, rbboxes)
                end_time = time.perf_counter()

                rbboxes = np_methods.bboxes_recover(rbboxes, rgb_img)
                final_detections_img = draw_box_in_image.draw_boxes_with_label_and_scores(
                    rgb_img, rbboxes, rclasses, rscores)
                final_detections_img = cv.cvtColor(final_detections_img,
                                                   cv.COLOR_RGB2BGR)
                cv.imwrite(os.path.join(save_path, img_name),
                           final_detections_img)
                # resize boxes and image according to raw input image
                # final_detections= cv.resize(final_detections[:, :, ::-1], (raw_w, raw_h))

                # recover to raw size
                detect_dict['score'] = rscores
                detect_dict['boxes'] = rbboxes
                detect_dict['categories'] = rclasses
                # convert from RGB to BG
                fw.write(f'\n{img_name}')
                for score, boxes, categories in zip(rscores, rbboxes,
                                                    rclasses):
                    fw.write('\n\tscore:' + str(score))
                    fw.write('\tbboxes:' + str(boxes))
                    fw.write('\tcategories:' + str(int(categories)))

                view_bar(
                    '{} image cost {} second'.format(img_name,
                                                     (end_time - start_time)),
                    index + 1, len(image_name_list))

            fw.close()