def main(): # 调试模式 cudnn.deterministic = cfg.cudnn_deterministic cudnn.benchmark = cfg.cudnn_benchmark if cudnn.deterministic: torch.manual_seed(1) torch.cuda.manual_seed_all(1) np.random.seed(1) random.seed(1) use_gpu = torch.cuda.is_available() and cfg.cuda device = torch.device('cuda' if use_gpu else 'cpu') # 数据读取 annotations = get_all_loader_annotations(print_fn=print, training=False) dataset = Dataset(annotations, batch_size=1, sub_means=True, training=False) data_loader = data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=0, pin_memory=True) data_iterator = iter(data_loader) # 模型 if cfg.model == 'vgg16': model = Vgg16() # net 用于获取模型参数, model 用于训练 else: model = None model = model.to(device) # gpu 运行 net_utils.load_model(cfg.test.model, model, use_gpu=use_gpu) # 恢复模型参数 model.eval() # 测试模式 num_images = len(dataset) for i in range(num_images): items = next(data_iterator) images, im_info, gt_boxes, num_boxes = [x.to(device) for x in items] # 前向 with torch.no_grad(): result = model(images, im_info, gt_boxes, num_boxes) rois, pred_cls_prob, pred_loc, _, _, _, _, _ = result # 恢复预测结果 # [k, num_cls], [k, 4] im_info, rois, pred_cls_prob, pred_loc = im_info.cpu(), rois.cpu( ), pred_cls_prob.cpu(), pred_loc.cpu() scores, boxes = inference_utils.process_boxes(im_info, rois, pred_cls_prob, pred_loc) # 逐类别 nms results = inference_utils.nms_all(scores, boxes, score_threshold=0.05) results = inference_utils.get_top_k_boxes(results) # 整理成可视化需要的格式 pred_boxes = np.zeros([0, 4], dtype=np.float32) pred_scores = np.zeros([0], dtype=np.float32) pred_labels = [] for j in range(len(results)): if len(results[j]) == 0: continue obj_boxes = results[j] pred_boxes = np.concatenate([pred_boxes, obj_boxes[:, 0:4]], axis=0) pred_scores = np.concatenate([pred_scores, obj_boxes[:, 4]], axis=0) pred_labels += [j] * len(obj_boxes) pred_boxes = pred_boxes.reshape(-1, 4) pred_scores = pred_scores.reshape(-1) pred_labels = np.array(pred_labels, dtype=np.int32).reshape(-1) image = dataset.load_image(i)[:, :, ::-1] # bgr -> rgb gt_boxes = np.concatenate( [annotations[i]['boxes'], annotations[i]['labels'].reshape(-1, 1)], axis=1) visualization.show_prediction(image, (pred_boxes, pred_labels, pred_scores), gt_boxes, add_mean=False)
class TFRecord(object): def __init__(self): self.data_path = path_params['data_path'] self.tfrecord_dir = path_params['tfrecord_dir'] self.train_tfrecord_name = path_params['train_tfrecord_name'] self.test_tfrecord_name = path_params['test_tfrecord_name'] self.image_size = model_params['image_size'] self.cell_size = model_params['cell_size'] self.class_num = model_params['num_classes'] self.class_ind = dict(zip(CLASSES, range(self.class_num))) self.batch_size = solver_params['batch_size'] self.flipped = solver_params['flipped'] self.dataset = Dataset() # 数值形式的数据,首先转换为string,再转换为int形式进行保存 def _int64_feature(self, value): return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) # 数组形式的数据,首先转换为string,再转换为二进制形式进行保存 def _bytes_feature(self, value): return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) def create_tfrecord(self): # 获取作为训练验证集的图片序列 trainval_path = os.path.join(self.data_path, 'ImageSets', 'Main', 'trainval.txt') if self.flipped: tf_file = os.path.join(self.tfrecord_dir, self.train_tfrecord_name) if not os.path.exists(tf_file): # 循环写入每一张图像和标签到tfrecord文件 writer = tf.python_io.TFRecordWriter(tf_file) with open(trainval_path, 'r') as read: lines = read.readlines() for line in lines: image_num = line[0:-1] # 获得当前样本数据和标签信息 image, image_flipped = self.dataset.load_image( image_num=image_num) label, label_flipped = self.dataset.load_annotation( image_num=image_num) # 转换为字符串 image_string = image.tostring() image_flipped_string = image_flipped.tostring() # 转换为字符串 label_string = label.tostring() label_flipped_string = label_flipped.tostring() example = tf.train.Example(features=tf.train.Features( feature={ 'image': self._bytes_feature(image_string), 'label': self._bytes_feature(label_string) })) writer.write(example.SerializeToString()) example = tf.train.Example(features=tf.train.Features( feature={ 'image': self._bytes_feature( image_flipped_string), 'label': self._bytes_feature( label_flipped_string) })) writer.write(example.SerializeToString()) writer.close() print('Finish trainval.tfrecord Done') else: tf_file = os.path.join(self.tfrecord_dir, self.train_tfrecord_name) if not os.path.exists(tf_file): # 循环写入每一张图像和标签到tfrecord文件 writer = tf.python_io.TFRecordWriter(tf_file) with open(trainval_path, 'r') as read: lines = read.readlines() for line in lines: image_num = line[0:-1] image = self.dataset.load_image(image_num) label = self.dataset.load_annotation(image_num) image_string = image.tostring() label_string = label.tostring() example = tf.train.Example(features=tf.train.Features( feature={ 'image': tf.train.Feature(bytes_list=tf.train.BytesList( value=[image_string])), 'label': tf.train.Feature(bytes_list=tf.train.BytesList( value=[label_string])) })) writer.write(example.SerializeToString()) writer.close() print('Finish trainval.tfrecord Done') def parse_single_example(self, file_name): """ :param file_name:待解析的tfrecord文件的名称 :return: 从文件中解析出的单个样本的相关特征,image, label """ tfrecord_file = os.path.join(self.tfrecord_dir, self.train_tfrecord_name) # 定义解析TFRecord文件操作 reader = tf.TFRecordReader() # 创建样本文件名称队列 filename_queue = tf.train.string_input_producer([tfrecord_file]) # 解析单个样本文件 _, serialized_example = reader.read(filename_queue) features = tf.parse_single_example(serialized_example, features={ 'image': tf.FixedLenFeature([], tf.string), 'label': tf.FixedLenFeature([], tf.string) }) image = features['image'] label = features['label'] return image, label def parse_batch_examples(self, file_name): """ :param file_name:待解析的tfrecord文件的名称 :return: 解析得到的batch_size个样本 """ batch_size = self.batch_size min_after_dequeue = 100 num_threads = 8 capacity = min_after_dequeue + 3 * batch_size image, label = self.parse_single_example(file_name) image_batch, label_batch = tf.train.shuffle_batch( [image, label], batch_size=batch_size, num_threads=num_threads, capacity=capacity, min_after_dequeue=min_after_dequeue) # 进行解码 image_batch = tf.decode_raw(image_batch, tf.float32) label_batch = tf.decode_raw(label_batch, tf.float32) # 转换为网络输入所要求的形状 image_batch = tf.reshape( image_batch, [self.batch_size, self.image_size, self.image_size, 3]) label_batch = tf.reshape(label_batch, [ self.batch_size, self.cell_size, self.cell_size, 5 + self.class_num ]) return image_batch, label_batch
class TFRecord(object): def __init__(self): self.data_path = path_params['data_path'] self.tfrecord_dir = path_params['tfrecord_dir'] self.train_tfrecord_name = path_params['train_tfrecord_name'] self.input_width = model_params['input_width'] self.input_height = model_params['input_height'] self.channels = model_params['channels'] self.class_num = len(model_params['classes']) self.batch_size = solver_params['batch_size'] self.dataset = Dataset() def _int64_feature(self, value): if not isinstance(value, list): value = [value] return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) def _float_feature(self, value): if not isinstance(value, list): value = [value] return tf.train.Feature(float_list=tf.train.FloatList(value=[value])) def _bytes_feature(self, value): if not isinstance(value, list): value = [value] return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) def create_tfrecord(self): # 获取作为训练验证集的图片序列 trainval_path = os.path.join(self.data_path, 'ImageSets', 'Main', 'trainval.txt') tf_file = os.path.join(self.tfrecord_dir, self.train_tfrecord_name) if os.path.exists(tf_file): os.remove(tf_file) writer = tf.python_io.TFRecordWriter(tf_file) with open(trainval_path, 'r') as read: lines = read.readlines() for line in lines: num = line[0:-1] image = self.dataset.load_image(num) image_shape = image.shape boxes = self.dataset.load_label(num) if len(boxes) == 0: continue while len(boxes) < 300: boxes = np.append(boxes, [[0.0, 0.0, 0.0, 0.0, 0.0]], axis=0) boxes = np.array(boxes, dtype=np.float32) image_string = image.tobytes() boxes_string = boxes.tobytes() example = tf.train.Example(features=tf.train.Features( feature={ 'image': tf.train.Feature(bytes_list=tf.train.BytesList( value=[image_string])), 'bbox': tf.train.Feature(bytes_list=tf.train.BytesList( value=[boxes_string])), 'height': tf.train.Feature(int64_list=tf.train.Int64List( value=[image_shape[0]])), 'width': tf.train.Feature(int64_list=tf.train.Int64List( value=[image_shape[1]])), })) writer.write(example.SerializeToString()) writer.close() print('Finish trainval.tfrecord Done') def parse_single_example(self, serialized_example): """ :param file_name:待解析的tfrecord文件的名称 :return: 从文件中解析出的单个样本的相关特征,image, label """ features = tf.parse_single_example(serialized_example, features={ 'image': tf.FixedLenFeature([], tf.string), 'bbox': tf.FixedLenFeature([], tf.string), 'height': tf.FixedLenFeature([], tf.int64), 'width': tf.FixedLenFeature([], tf.int64) }) tf_image = tf.decode_raw(features['image'], tf.uint8) tf_bbox = tf.decode_raw(features['bbox'], tf.float32) tf_height = features['height'] tf_width = features['width'] # 转换为网络输入所要求的形状 tf_image = tf.reshape(tf_image, [tf_height, tf_width, 3]) tf_label = tf.reshape(tf_bbox, [150, 5]) # preprocess tf_image, y_true_13, y_true_26, y_true_52 = tf.py_func( self.dataset.preprocess_data, inp=[tf_image, tf_label, self.input_height, self.input_width], Tout=[tf.float32, tf.float32, tf.float32, tf.float32]) return tf_image, y_true_13, y_true_26, y_true_52 def create_dataset(self, filenames, batch_num, batch_size=1, is_shuffle=False): """ :param filenames: record file names :param batch_size: batch size :param is_shuffle: whether shuffle :param n_repeats: number of repeats :return: """ dataset = tf.data.TFRecordDataset(filenames) dataset = dataset.map(self.parse_single_example, num_parallel_calls=8) if is_shuffle: dataset = dataset.shuffle(batch_num) dataset = dataset.batch(batch_size) dataset = dataset.repeat() dataset = dataset.prefetch(batch_size) return dataset