def image_resize(self, images, resize_shape): """ 重塑一组图像大小 :param images: 一组图像,数据格式:[图片数量,高度,宽度,通道] :param resize_shape: 重塑形状,形状格式:[高度, 宽度, 通道] :return: 重塑后的图像数据,数组形式 """ print("进行图像大小的重塑...") # 获得样本数量 num_examples = images.shape[0] # 初始化重塑图像样本的形状 resize_images = np.zeros(shape=[ num_examples, resize_shape[0], resize_shape[1], resize_shape[2] ], dtype=np.uint8) # 循环迭代所有图像 for index in range(num_examples): # 重塑制定形状的图片并附加到列表中 # image = resize(images[index], output_shape=resize_shape) image = scipy.misc.imresize(images[index], size=resize_shape, interp='bicubic') resize_images[index] = image tool.view_bar("重塑图像大小", index + 1, num_examples) return resize_images
def split_coco(imgs_path, annotaions_path, dst_dir, num_catetory=20, num_per_category=18): """ :param origin_path: :param split_ratio: :return: """ dataset = json.load(open(annotaions_path, 'r')) sub_annotations_path = os.path.join(dst_dir, 'Annotations') sub_img_path = os.path.join(dst_dir, 'Images') anns, cats, imgs, img_anns, cate_imgs = create_index(dataset) img_id_list, category_id_list = get_img_per_categorise( cate_imgs, num_catetory, num_per_category) img_name_dict = {} for i, img_id in enumerate(img_id_list): img_name_dict[img_id] = '0' * ( 12 - len(str(img_id))) + '{0}.jpg'.format(img_id) #----------------------------write annotaion info----------------------------------- images_list, annotations_list = get_images_annotaion_info( img_id_list, imgs, img_anns, category_id_list) new_dataset = defaultdict(list) new_dataset['info'] = dataset['info'] new_dataset['licenses'] = dataset['licenses'] new_dataset['images'] = images_list new_dataset['annotations'] = annotations_list new_dataset['categories'] = dataset['categories'] makedir(sub_annotations_path) json_path = os.path.join(sub_annotations_path, 'instances.json') with open(json_path, 'w') as fw: json.dump(new_dataset, fw) print( 'Successful write the number of {0} annotations respect to {1} images to {2}' .format(len(new_dataset['annotations']), len(new_dataset['images']), json_path)) #---------------------------------remove image--------------------------------------- makedir(sub_img_path) num_samples = 0 for img_id, img_name in img_name_dict.items(): shutil.copy(os.path.join(imgs_path, img_name), os.path.join(sub_img_path, '{0}.jpg'.format(img_id))) num_samples += 1 view_bar("split coco:", num_samples, len(img_name_dict)) print('Successful copy the number of {0} images to {1}'.format( len(img_name_dict), sub_img_path))
def convert_pascal_to_tfrecord(): xml_path = os.path.join(FLAGS.VOC_dir, FLAGS.xml_dir) image_path = os.path.join(FLAGS.VOC_dir, FLAGS.image_dir) save_path = os.path.join( FLAGS.save_dir, FLAGS.dataset + '_' + FLAGS.save_name + '.tfrecord') makedirs(FLAGS.save_dir) # writer_options = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.ZLIB) # writer = tf.python_io.TFRecordWriter(path=save_path, options=writer_options) writer = tf.python_io.TFRecordWriter(path=save_path) for count, xml in enumerate(glob.glob(xml_path + '/*.xml')): img_name = xml.split('/')[-1].split('.')[0] + FLAGS.img_format img_path = image_path + '/' + img_name if not os.path.exists(img_path): print('{} is not exist!'.format(img_path)) continue img_height, img_width, gtbox_label = read_xml_gtbox_and_label(xml) # if img_height != 600 or img_width != 600: # continue img = cv2.imread(img_path)[:, :, ::-1] feature = tf.train.Features( feature={ # do not need encode() in linux 'img_name': _bytes_feature(img_name.encode()), # 'img_name': _bytes_feature(img_name), 'img_height': _int64_feature(img_height), 'img_width': _int64_feature(img_width), 'img': _bytes_feature(img.tostring()), 'gtboxes_and_label': _bytes_feature(gtbox_label.tostring()), 'num_objects': _int64_feature(gtbox_label.shape[0]) }) example = tf.train.Example(features=feature) writer.write(example.SerializeToString()) view_bar('Conversion progress', count + 1, len(glob.glob(xml_path + '/*.xml'))) print('\nConversion is complete!') writer.close()
def face_filter(src_path, dst_path): people_list = os.listdir(src_path) for i, people in enumerate(people_list): people_image_path = os.path.join(src_path, people) people_image_list = os.listdir(people_image_path) for image_name in people_image_list: image_path = os.path.join(people_image_path, image_name) print(image_path) bboxes = fdetector.detect(image_path, remove_inner_face=False) if len(bboxes) == 0: if dst_path is None: os.remove(image_path) else: people_image_dstpath = os.path.join(dst_path, people) if not os.path.exists(people_image_dstpath): os.makedirs(people_image_dstpath) dst_image_path = os.path.join(people_image_dstpath, image_name) shutil.move(image_path, dst_image_path) elif len(bboxes) > 1: img = cv2.imread(image_path) for j, bbox in enumerate(bboxes): x1, y1, x2, y2 = bbox width_delta = (x2 - x1) height_delta = (y2 - y1) # cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 0)) x1 -= width_delta y1 -= height_delta x2 += width_delta y2 += height_delta x1 = max(int(x1), 0) y1 = max(int(y1), 0) x2 = min(int(x2), img.shape[1]) y2 = min(int(y2), img.shape[0]) # cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0)) imsave = img[y1:y2, x1:x2, :] crop_image_name = image_name.split('.')[0] + '-' + str( j) + '.jpg' image_path = os.path.join(people_image_path, crop_image_name) cv2.imwrite(image_path, imsave) # cv2.imshow('show', img) # cv2.waitKey(0) tools.view_bar('face_filter: ', i + 1, len(people_list))
def split_pascal(origin_path, dst_path, split_rate=0.8): """ split pascal dataset :param origin_path: :return: """ image_path = os.path.join(origin_path, 'JPEGImages') xml_path = os.path.join(origin_path, 'Annotations') image_train_path = os.path.join(dst_path, 'train', 'JPEGImages') xml_train_path = os.path.join(dst_path, 'train', 'Annotations') image_val_path = os.path.join(dst_path, 'val', 'JPEGImages') xml_val_path = os.path.join(dst_path, 'val', 'Annotations') makedir(image_train_path) makedir(xml_train_path) makedir(image_val_path) makedir(xml_val_path) image_list = os.listdir(image_path) image_name = [image.split('.')[0] for image in image_list] image_name = np.random.permutation(image_name) train_image_name = image_name[:int(math.ceil(len(image_name) * split_rate))] val_image_name = image_name[int(math.ceil(len(image_name) * split_rate)):] for n, image in enumerate(train_image_name): shutil.copy(os.path.join(image_path, image + '.jpg'), os.path.join(image_train_path, image + '.jpg')) shutil.copy(os.path.join(xml_path, image + '.xml'), os.path.join(xml_train_path, image + '.xml')) view_bar(message="split train dataset:", num=n, total=len(train_image_name)) print('Total of {0} data split to {1}'.format( len(train_image_name), os.path.dirname(image_train_path))) for n, image in enumerate(val_image_name): shutil.copy(os.path.join(image_path, image + '.jpg'), os.path.join(image_val_path, image + '.jpg')) shutil.copy(os.path.join(xml_path, image + '.xml'), os.path.join(xml_val_path, image + '.xml')) view_bar(message="split val dataset:", num=n, total=len(val_image_name)) print('Total of {0} data split to {1}'.format( len(val_image_name), os.path.dirname(image_val_path)))
# 若读完整个数据则不再循环 if j > len(file_list) - 1: break # 预测结果 outputs = net(images) # outputs = F.softmax(outputs, dim=1) # _, preds = torch.max(outputs, 1) preds = torch.argmax(outputs, 1) predict_result = preds.numpy().tolist() # print(predict_result) # print(preds.numpy().tolist()) # print(type(preds)) # print(j) content = '{} {}\n'.format(file_list[j], class_name[predict_result[0]]) file.write(content) j = j + 1 tool.view_bar('测试数据:', j + 1, len(file_list)) # # 将结果写入结果文件中 # with open(result_file, mode='a+') as file: # for i in range(images.size(0)): # content = '{} {}\n'.format(file_list[j], class_name[predict_result[i]]) # file.write(content) # j = j+1 # print('结果保存完成...') # print() # print('micro_f1_score:{}, macro_f1_score:{}'.format(micro_f1, macro_f1))
def convert_pascal_to_tfrecord(): json_file = os.path.join(FLAGS.root_dir, FLAGS.json_file) image_path = os.path.join(FLAGS.root_dir, FLAGS.image_dir) save_path = os.path.join( FLAGS.save_dir, FLAGS.dataset + '_' + FLAGS.save_name + '.tfrecord') makedirs(FLAGS.save_dir) # writer_options = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.ZLIB) # writer = tf.python_io.TFRecordWriter(path=save_path, options=writer_options) writer = tf.python_io.TFRecordWriter(path=save_path) with open(json_file, 'r') as fr: all_gts = json.load(fr) images = all_gts['images'] annotations = all_gts['annotations'] all_gt_label = {} for annotation in annotations: image_id = annotation['image_id'] # print(image_id-1) # 57533 if image_id > len(images): continue if images[image_id - 1]['file_name'] in all_gt_label.keys(): # all_gt_label[images[image_id - 1]['file_name']]['gtboxes'].append(annotation['segmentation']) all_gt_label[images[image_id - 1]['file_name']]['gtboxes'].append( coordinate_convert_r(annotation['rbbox'])) all_gt_label[images[image_id - 1]['file_name']]['labels'].append( annotation['category_id']) else: all_gt_label[images[image_id - 1]['file_name']] = { 'height': images[image_id - 1]['height'], 'width': images[image_id - 1]['width'], # 'gtboxes': [annotation['segmentation']], 'gtboxes': [coordinate_convert_r(annotation['rbbox'])], 'labels': [annotation['category_id']] } count = 0 for img_name in all_gt_label.keys(): img = cv2.imread(os.path.join(image_path, img_name)) img_height = all_gt_label[img_name]['height'] img_width = all_gt_label[img_name]['width'] gtboxes = np.array(all_gt_label[img_name]['gtboxes']).reshape([-1, 8]) labels = np.array(all_gt_label[img_name]['labels']).reshape([-1, 1]) gtboxes_and_label = np.array( np.concatenate([gtboxes, labels], axis=-1), np.int32) feature = tf.train.Features( feature={ # do not need encode() in linux 'img_name': _bytes_feature(img_name.encode()), # 'img_name': _bytes_feature(img_name), 'img_height': _int64_feature(img_height), 'img_width': _int64_feature(img_width), 'img': _bytes_feature(img.tostring()), 'gtboxes_and_label': _bytes_feature( gtboxes_and_label.tostring()), 'num_objects': _int64_feature(gtboxes_and_label.shape[0]) }) example = tf.train.Example(features=feature) writer.write(example.SerializeToString()) view_bar('Conversion progress', count + 1, len(all_gt_label.keys())) count += 1 print('\nConversion is complete!') writer.close()
def exucute_detect(self, image_path, save_path): """ execute object detect :param detect_net: :param image_path: :return: """ input_image = tf.placeholder(dtype=tf.uint8, shape=(None, None, 3), name='inputs_images') resize_img = self.image_process(input_image) # expend dimension image_batch = tf.expand_dims(input=resize_img, axis=0) # (1, None, None, 3) self.detect_net.images_batch = image_batch # img_shape = tf.shape(inputs_img) # load detect network detection_boxes, detection_scores, detection_category = self.detect_net.inference( ) # restore pretrain weight restorer, restore_ckpt = self.detect_net.get_restorer() # config gpu to growth train config = tf.ConfigProto() config.gpu_options.allow_growth = True init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) with tf.Session(config=config) as sess: sess.run(init_op) if restorer is not None: restorer.restore(sess, save_path=restore_ckpt) print('Successful restore model from {0}'.format(restore_ckpt)) # construct image path list format_list = ('.jpg', '.png', '.jpeg', '.tif', '.tiff') if os.path.isfile(image_path): image_name_list = [image_path] else: image_name_list = [ img_name for img_name in os.listdir(image_path) if img_name.endswith(format_list) and os.path.isfile(os.path.join(image_path, img_name)) ] assert len(image_name_list) != 0 print( "test_dir has no imgs there. Note that, we only support img format of {0}" .format(format_list)) #+++++++++++++++++++++++++++++++++++++start detect+++++++++++++++++++++++++++++++++++++++++++++++++++++=++ makedir(save_path) fw = open(os.path.join(save_path, 'detect_bbox.txt'), 'w') for index, img_name in enumerate(image_name_list): detect_dict = {} bgr_img = cv.imread(os.path.join(image_path, img_name)) rgb_img = cv.cvtColor( bgr_img, cv.COLOR_BGR2RGB ) # convert channel from BGR to RGB (cv is BGR) start_time = time.perf_counter() # image resize and white process # construct feed_dict feed_dict = {input_image: rgb_img} resized_img, detected_boxes, detected_scores, detected_categories = \ sess.run([resize_img, detection_boxes, detection_scores, detection_category], feed_dict=feed_dict) end_time = time.perf_counter() # select object according to threshold object_indices = detected_scores >= cfgs.SHOW_SCORE_THRSHOLD object_scores = detected_scores[object_indices] object_boxes = detected_boxes[object_indices] object_categories = detected_categories[object_indices] final_detections_img = draw_box_in_img.draw_boxes_with_label_and_scores( resized_img, boxes=object_boxes, labels=object_categories, scores=object_scores) final_detections_img = cv.cvtColor(final_detections_img, cv.COLOR_RGB2BGR) cv.imwrite(os.path.join(save_path, img_name), final_detections_img) # resize boxes and image according to raw input image raw_h, raw_w = rgb_img.shape[0], rgb_img.shape[1] resized_h, resized_w = resized_img.shape[1], resized_img.shape[ 2] x_min, y_min, x_max, y_max = object_boxes[:, 0], object_boxes[:, 1], object_boxes[:, 2], \ object_boxes[:, 3] x_min = x_min * raw_w / resized_w y_min = y_min * raw_h / resized_h x_max = x_max * raw_w / resized_w y_max = y_max * raw_h / resized_h object_boxes = np.stack([x_min, y_min, x_max, y_max], axis=1) # final_detections= cv.resize(final_detections[:, :, ::-1], (raw_w, raw_h)) # recover to raw size detect_dict['score'] = object_scores detect_dict['boxes'] = object_boxes detect_dict['categories'] = object_categories # convert from RGB to BG fw.write(f'\n{img_name}') for score, boxes, categories in zip(object_scores, object_boxes, object_categories): fw.write('\n\tscore:' + str(score)) fw.write('\tbboxes:' + str(boxes)) fw.write('\tcategories:' + str(categories)) view_bar( '{} image cost {} second'.format(img_name, (end_time - start_time)), index + 1, len(image_name_list)) fw.close()
def convert_pascal_to_tfrecord(dataset_path, save_path, record_capacity=2000, shuffling=False): """ convert pascal dataset to rfrecord :param img_path: :param xml_path: :param save_path: :param record_capacity: :return: """ # record_file = os.path.join(FLAGS.save_dir, FLAGS.save_name+'.tfrecord') years = [s.strip() for s in FLAGS.year.split(',')] # get image and xml list img_name_list = [] img_xml_list = [] for year in years: img_path = os.path.join(dataset_path, 'VOC' + year, FLAGS.image_dir) xml_path = os.path.join(dataset_path, 'VOC' + year, FLAGS.xml_dir) xml_list = [ xml_file for xml_file in glob.glob(os.path.join(xml_path, '*.xml')) ] img_list = [ os.path.join( img_path, os.path.basename(xml).replace('xml', FLAGS.img_format)) for xml in xml_list ] img_name_list.extend(img_list) img_xml_list.extend(xml_list) if shuffling: shuffled_index = list(range(len(img_name_list))) random.seed(0) random.shuffle(shuffled_index) img_name_shuffle = [img_name_list[index] for index in shuffled_index] img_xml_shuffle = [img_xml_list[index] for index in shuffled_index] img_name_list = img_name_shuffle img_xml_list = img_xml_shuffle remainder_num = len(img_name_list) % record_capacity if remainder_num == 0: num_record = int(len(img_name_list) / record_capacity) else: num_record = int(len(img_name_list) / record_capacity) + 1 num_samples = 0 for index in range(num_record): record_filename = os.path.join(save_path, f'{index}.record') write = tf.io.TFRecordWriter(record_filename) if index < num_record - 1: sub_img_list = img_name_list[index * record_capacity:(index + 1) * record_capacity] sub_xml_list = img_xml_list[index * record_capacity:(index + 1) * record_capacity] else: sub_img_list = img_name_list[(index * record_capacity):( index * record_capacity + remainder_num)] sub_xml_list = img_xml_list[(index * record_capacity):( index * record_capacity + remainder_num)] try: for img_file, xml_file in zip(sub_img_list, sub_xml_list): img_height, img_width, gtbox_label = read_xml_gtbox_and_label( xml_file) # note image channel format of opencv if rgb bgr_image = cv.imread(img_file) # BGR TO RGB rgb_image = cv.cvtColor(bgr_image, cv.COLOR_BGR2RGB) image_record = serialize_example(image=rgb_image, img_height=img_height, img_width=img_width, img_depth=3, filename=img_file, gtbox_label=gtbox_label) write.write(record=image_record) num_samples += 1 view_bar(message='\nConversion progress', num=num_samples, total=len(img_name_list)) except Exception as e: print(e) continue write.close() print('\nThere are {0} samples convert to {1}'.format( num_samples, save_path))
def exucute_detect(self, image_path, save_path): """ execute object detect :param detect_net: :param image_path: :return: """ # load detect network pred_sbbox_batch, pred_mbbox_batch, pred_lbbox_batch = self.detector.pred_sbbox, self.detector.pred_mbbox, self.detector.pred_lbbox # TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!! gpu_options = tf.GPUOptions(allow_growth=True) config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) init_op = tf.group( tf.global_variables_initializer(), tf.local_variables_initializer() ) with tf.Session(config=config) as sess: sess.run(init_op) # restore pretrain weight if self.ckpt_path is not None: restorer = tf.train.Saver() restorer.restore(sess, self.ckpt_path) else: restorer, ckpt_path = self.detector.get_restorer(is_training=False) restorer.restore(sess, ckpt_path) print('*'*80 +'\nSuccessful restore model from {0}\n'.format(self.ckpt_path) + '*'*80) # construct image path list format_list = ('.jpg', '.png', '.jpeg', '.tif', '.tiff') if os.path.isfile(image_path): image_name_list = [image_path] else: image_name_list = [img_name for img_name in os.listdir(image_path) if img_name.endswith(format_list) and os.path.isfile(os.path.join(image_path, img_name))] assert len(image_name_list) != 0 print("test_dir has no imgs there. Note that, we only support img format of {0}".format(format_list)) #+++++++++++++++++++++++++++++++++++++start detect+++++++++++++++++++++++++++++++++++++++++++++++++++++=++ makedir(save_path) fw = open(os.path.join(save_path, 'detect_bbox.txt'), 'w') for index, img_name in enumerate(image_name_list): detect_dict = {} original_image, image_batch, original_size = self.image_process(img_path=os.path.join(image_path, img_name)) start_time = time.perf_counter() # image resize and white process # construct feed_dict # Run SSD network.] feed_dict = {self.input_data: image_batch, self.trainable: False} pred_sbbox, pred_mbbox, pred_lbbox = sess.run([pred_sbbox_batch, pred_mbbox_batch, pred_lbbox_batch], feed_dict=feed_dict) pred_bbox = np.concatenate([np.reshape(pred_sbbox, (-1, 5 + self.num_classes)), np.reshape(pred_mbbox, (-1, 5 + self.num_classes)), np.reshape(pred_lbbox, (-1, 5 + self.num_classes))], axis=0) bboxes = box_utils.postprocess_boxes(pred_bbox, original_size, self.input_size[0], self.score_threshold) bboxes = box_utils.nms(bboxes, self.num_threshold, method='nms') end_time = time.perf_counter() image = draw_box_in_image.draw_bbox(original_image, bboxes, classes=self.class_name) image = Image.fromarray(image) image.save(os.path.join(save_path, img_name)) # resize boxes and image according to raw input image # final_detections= cv.resize(final_detections[:, :, ::-1], (raw_w, raw_h)) # recover to raw size bboxes = np.array(bboxes) rbboxes = bboxes[:, :4] rscores = bboxes[:, 4] rclasses = bboxes[:, 5] # convert from RGB to BG fw.write(f'\n{img_name}') for score, boxes, categories in zip(rscores, rbboxes, rclasses): fw.write('\n\tscore:' + str(score)) fw.write('\tbboxes:' + str(boxes)) fw.write('\tcategories:' + str(int(categories))) view_bar('{} image cost {} second'.format(img_name, (end_time - start_time)), index + 1, len(image_name_list)) fw.close()
def convert_coco_to_tfrecord(src_path, save_path, record_capacity=2000, raw_coco=True): """ :param src_path: :param save_path: :param record_capacity: :param raw_coco: :return: """ imgs_path = os.path.join(src_path, FLAGS.image_dir) anns_path = os.path.join(src_path, FLAGS.anns_dir) # img_name_list = glob.glob(os.path.join(img_path,'*'+FLAGS.img_format)) annotation_list = glob.glob(os.path.join(anns_path, '*.json')) anns, cats, imgs, img_anns, cate_imgs = create_index(annotation_list[0]) image_id_list = [img_id for img_id in img_anns.keys()] remainder_num = len(image_id_list) % record_capacity if remainder_num == 0: num_record = int(len(image_id_list) / record_capacity) else: num_record = int(len(image_id_list) / record_capacity) + 1 for index in range(num_record): makedir(save_path) record_filename = os.path.join(save_path, f'{index}.record') write = tf.io.TFRecordWriter(record_filename) if index < num_record - 1: sub_img_id_list = image_id_list[index * record_capacity:(index + 1) * record_capacity] else: sub_img_id_list = image_id_list[(index * record_capacity):( index * record_capacity + remainder_num)] num_samples = 0 for index, img_id in enumerate(sub_img_id_list): try: # get gtbox_label gtbox_label = read_json_gtbox_label(img_anns[img_id]) # get image name if raw_coco: img_name = '0' * ( 12 - len(str(img_id))) + f'{img_id}.{FLAGS.img_format}' else: img_name = '{0}.jpg'.format(img_id) img_path = os.path.join(imgs_path, img_name) # load image bgr_image = cv.imread(img_path) # BGR TO RGB rgb_image = cv.cvtColor(bgr_image, cv.COLOR_BGR2RGB) img_height = rgb_image.shape[0] img_width = rgb_image.shape[1] image_record = serialize_example(image=rgb_image, img_height=img_height, img_width=img_width, img_depth=3, filename=img_name, gtbox_label=gtbox_label) write.write(record=image_record) num_samples += 1 view_bar(message='\nConversion progress', num=num_samples, total=len(img_anns)) except Exception as e: print(e) continue write.close() print('There are {0} samples convert to {1}'.format( num_samples, save_path))
def convert_pascal_to_tfrecord(dataset_path, save_path, record_capacity=2000, shuffling=False): """ convert pascal dataset to rfrecord :param img_path: :param xml_path: :param save_path: :param record_capacity: :return: """ index_name = read_class_names(cfgs.CLASSES) name_index = {} for index, name in index_name.items(): name_index[name] = int(index) years = [s.strip() for s in FLAGS.year.split(',')] # record_file = os.path.join(FLAGS.save_dir, FLAGS.save_name+'.tfrecord') # get image and xml list img_name_list = [] img_xml_list = [] for year in years: img_path = os.path.join(dataset_path, 'VOC' + year, FLAGS.image_dir) xml_path = os.path.join(dataset_path, 'VOC' + year, FLAGS.xml_dir) xml_list = [ xml_file for xml_file in glob.glob(os.path.join(xml_path, '*.xml')) ] img_list = [ os.path.join( img_path, os.path.basename(xml).replace('xml', FLAGS.img_format)) for xml in xml_list ] img_name_list.extend(img_list) img_xml_list.extend(xml_list) if shuffling: shuffled_index = list(range(len(img_name_list))) random.seed(0) random.shuffle(shuffled_index) img_name_shuffle = [img_name_list[index] for index in shuffled_index] img_xml_shuffle = [img_xml_list[index] for index in shuffled_index] img_name_list = img_name_shuffle img_xml_list = img_xml_shuffle remainder_num = len(img_name_list) % record_capacity if remainder_num == 0: num_record = int(len(img_name_list) / record_capacity) else: num_record = int(len(img_name_list) / record_capacity) + 1 num_samples = 0 for index in range(num_record): record_filename = os.path.join(save_path, f'{index}.record') write = tf.io.TFRecordWriter(record_filename) if index < num_record - 1: sub_img_list = img_name_list[index * record_capacity:(index + 1) * record_capacity] sub_xml_list = img_xml_list[index * record_capacity:(index + 1) * record_capacity] else: sub_img_list = img_name_list[(index * record_capacity):( index * record_capacity + remainder_num)] sub_xml_list = img_xml_list[(index * record_capacity):( index * record_capacity + remainder_num)] try: for img_file, xml_file in zip(sub_img_list, sub_xml_list): image, shape, bboxes, labels, labels_text, difficult, truncated = process_image( img_file, xml_file, class_name=name_index) image_record = serialize_example(img_file, image, labels, labels_text, bboxes, shape, difficult, truncated) write.write(record=image_record) num_samples += 1 view_bar(message='\nConversion progress', num=num_samples, total=len(img_name_list)) except Exception as e: print(e) continue write.close() print('\nThere are {0} samples convert to {1}'.format( num_samples, save_path))
def exucute_detect(self, image_path, save_path): """ execute object detect :param detect_net: :param image_path: :return: """ input_image = tf.placeholder(dtype=tf.uint8, shape=(None, None, 3), name='inputs_images') image_pre, labels_pre, bboxes_pre = self.image_process( input_image, img_shape=self.net_shape, img_format=self.data_format) # expend dimension image_batch = tf.expand_dims(input=image_pre, axis=0) # (1, None, None, 3) # img_shape = tf.shape(inputs_img) # load detect network reuse = True if 'ssd_net' in locals() else None with slim.arg_scope( self.ssd_net.arg_scope(data_format=self.data_format)): detection_category, detection_bbox, _, _ = self.ssd_net.net( image_batch, is_training=False, reuse=reuse) # restore pretrain weight restorer = tf.train.Saver() # TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!! gpu_options = tf.GPUOptions(allow_growth=True) config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) with tf.Session(config=config) as sess: sess.run(init_op) if self.ckpt_path is not None: restorer.restore(sess, self.ckpt_path) else: self.ckpt_path = self.ssd_net.restore_ckpt(sess) print('*' * 80 + '\nSuccessful restore model from {0}\n'.format( self.ckpt_path) + '*' * 80) # construct image path list format_list = ('.jpg', '.png', '.jpeg', '.tif', '.tiff') if os.path.isfile(image_path): image_name_list = [image_path] else: image_name_list = [ img_name for img_name in os.listdir(image_path) if img_name.endswith(format_list) and os.path.isfile(os.path.join(image_path, img_name)) ] assert len(image_name_list) != 0 print( "test_dir has no imgs there. Note that, we only support img format of {0}" .format(format_list)) #+++++++++++++++++++++++++++++++++++++start detect+++++++++++++++++++++++++++++++++++++++++++++++++++++=++ makedir(save_path) fw = open(os.path.join(save_path, 'detect_bbox.txt'), 'w') for index, img_name in enumerate(image_name_list): detect_dict = {} bgr_img = cv.imread(os.path.join(image_path, img_name)) rgb_img = cv.cvtColor( bgr_img, cv.COLOR_BGR2RGB ) # convert channel from BGR to RGB (cv is BGR) start_time = time.perf_counter() # image resize and white process # construct feed_dict # Run SSD network.] feed_dict = {input_image: rgb_img} image, category, bbox = sess.run( [image_batch, detection_category, detection_bbox], feed_dict=feed_dict) # Get classes and bboxes from the net outputs. rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select( category, bbox, self.ssd_anchors, select_threshold=self.select_threshold, img_shape=self.net_shape, num_classes=self.num_classes, decode=True) rbboxes = np_methods.bboxes_clip(self.bbox_image, rbboxes) rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400) rclasses, rscores, rbboxes = np_methods.bboxes_nms( rclasses, rscores, rbboxes, nms_threshold=self.nms_threshold) # Resize bboxes to original image shape. Note: useless for Resize.WARP! rbboxes = np_methods.bboxes_resize(self.bbox_image, rbboxes) end_time = time.perf_counter() rbboxes = np_methods.bboxes_recover(rbboxes, rgb_img) final_detections_img = draw_box_in_image.draw_boxes_with_label_and_scores( rgb_img, rbboxes, rclasses, rscores) final_detections_img = cv.cvtColor(final_detections_img, cv.COLOR_RGB2BGR) cv.imwrite(os.path.join(save_path, img_name), final_detections_img) # resize boxes and image according to raw input image # final_detections= cv.resize(final_detections[:, :, ::-1], (raw_w, raw_h)) # recover to raw size detect_dict['score'] = rscores detect_dict['boxes'] = rbboxes detect_dict['categories'] = rclasses # convert from RGB to BG fw.write(f'\n{img_name}') for score, boxes, categories in zip(rscores, rbboxes, rclasses): fw.write('\n\tscore:' + str(score)) fw.write('\tbboxes:' + str(boxes)) fw.write('\tcategories:' + str(int(categories))) view_bar( '{} image cost {} second'.format(img_name, (end_time - start_time)), index + 1, len(image_name_list)) fw.close()