def yolov4_model_create(input_size, num_class=len(utils.read_class_names( cfg.YOLO.CLASSES))): """ # input layer # YOLOv4 -> return [conv_sbbox, conv_mbbox, conv_lbbox] # return tensor of shape [batch_size, output_size, output_size, anchor_per_scale, 5 + num_classes] contains (x, y, w, h, score, probability) # create model """ input_layer = tf.keras.layers.Input([input_size, input_size, 3]) sml_feature_maps = YOLOv4( input_layer, num_class) # Return [conv_sbbox, conv_mbbox, conv_lbbox], len = 3 output_layers = [] # decode -> output_layers for feature_map in sml_feature_maps: # print("feature_map: ", feature_map) output_layer = decode(feature_map, num_class) # print("decode -> ",output_layer.shape) output_layers.append(output_layer) model = tf.keras.Model(input_layer, output_layers, name="YOLOv4_jam") # create model # model.summary() logging.info("YOLOv4 Model built.") return model
def draw_bbox(image, bboxes, classes=utils.read_class_names(cfg.YOLO.CLASSES), show_label=True): """ bboxes: [x_min, y_min, x_max, y_max, probability, cls_id] format coordinates. """ num_classes = len(classes) image_h, image_w, _ = image.shape # colors hsv_tuples = [(1.0 * x / num_classes, 1., 1.) for x in range(num_classes)] colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors)) # random color random.seed(0) random.shuffle(colors) random.seed(None) for i, bbox in enumerate(bboxes): coor = np.array(bbox[:4], dtype=np.int32) fontScale = 0.6 score = bbox[4] class_ind = int(bbox[5]) bbox_color = colors[class_ind] bbox_thick = int(0.6 * (image_h + image_w) / 600) c1, c2 = (coor[0], coor[1]), (coor[2], coor[3]) cv2.rectangle(image, c1, c2, bbox_color, bbox_thick) if show_label: bbox_mess = '%s: %.2f' % (classes[class_ind], score) t_size = cv2.getTextSize(bbox_mess, 0, fontScale, thickness=bbox_thick // 2)[0] cv2.rectangle(image, c1, (c1[0] + t_size[0], c1[1] - t_size[1] - 3), bbox_color, -1) # filled cv2.putText(image, bbox_mess, (c1[0], c1[1] - 2), cv2.FONT_HERSHEY_SIMPLEX, fontScale, (0, 0, 0), bbox_thick // 2, lineType=cv2.LINE_AA) return image
def detecting_info(bboxes): CLASSES = utils.read_class_names(cfg.YOLO.CLASSES) for bbox in bboxes: coordinate = np.array(bbox[:4], dtype=np.int32) score = bbox[4] class_index = int(bbox[5]) class_name = CLASSES[class_index] score = '%.4f' % score xmin, ymin, xmax, ymax = list(map(str, coordinate)) print( f"{class_name:8}: {score:6}, coordinate: ({xmin}, {ymin}, {xmax}, {ymax})" ) logging.info("Detecting done...")
def __init__(self, dataset_type): self.annot_path = cfg.TRAIN.ANNOT_PATH if dataset_type == 'train' else cfg.TEST.ANNOT_PATH self.input_sizes = cfg.TRAIN.INPUT_SIZE if dataset_type == 'train' else cfg.TEST.INPUT_SIZE self.batch_size = cfg.TRAIN.BATCH_SIZE if dataset_type == 'train' else cfg.TEST.BATCH_SIZE self.data_aug = cfg.TRAIN.DATA_AUG if dataset_type == 'train' else cfg.TEST.DATA_AUG self.train_input_sizes = cfg.TRAIN.INPUT_SIZE self.strides = np.array(cfg.YOLO.STRIDES) self.classes = utils.read_class_names(cfg.YOLO.CLASSES) self.num_classes = len(self.classes) self.anchors = np.array(utils.get_anchors(cfg.YOLO.ANCHORS)) self.anchor_per_scale = cfg.YOLO.ANCHOR_PER_SCALE # 3 self.max_bbox_per_scale = 150 self.annotations = self.load_annotations(dataset_type) self.num_samples = len(self.annotations) # len self.num_batchs = int(np.ceil(self.num_samples / self.batch_size)) # batchs self.batch_count = 0
def yolov4_model_create_4_train(input_size=cfg.TRAIN.INPUT_SIZE, num_class=len( utils.read_class_names(cfg.YOLO.CLASSES)), anchors=utils.get_anchors(cfg.YOLO.ANCHORS), strides=np.array(cfg.YOLO.STRIDES), xyscale=cfg.YOLO.XYSCALE): input_layer = tf.keras.layers.Input([input_size, input_size, 3]) feature_maps = YOLOv4(input_layer, num_class) bbox_tensors = [] for i, fm in enumerate(feature_maps): bbox_tensor = decode_train(fm, num_class, strides, anchors, i, xyscale) bbox_tensors.append(fm) bbox_tensors.append(bbox_tensor) model = tf.keras.Model(input_layer, bbox_tensors, name="YOLOv4_4_training") logging.info("YOLOv4 Model built.") return model
class YOLO(object): # basic settings _default_settings = { "STRIDES": np.array(cfg.YOLO.STRIDES), "ANCHORS": utils.get_anchors(cfg.YOLO.ANCHORS), "SCORE_THRESHOLD": cfg.YOLO.SCORE_THRESHOLD, "IOU_THRESHOLD": cfg.YOLO.IOU_THRESHOLD, "NUM_CLASS": len(utils.read_class_names(cfg.YOLO.CLASSES)), "CLASSES": utils.read_class_names(cfg.YOLO.CLASSES), "XYSCALE": cfg.YOLO.XYSCALE, "YOLOv4_WEIGHTS": cfg.YOLO.WEIGHTS, "IMAGE_OUTPUT_PATH": cfg.YOLO.IMAGE_OUTPUT_PATH, "VIDEO_OUTPUT_PATH": cfg.YOLO.VIDEO_OUTPUT_PATH, "SAVE_OR_NOT": cfg.SAVE_OR_NOT } # init def __init__(self, input_size, **params): self.INPUT_SIZE = input_size self.__dict__.update(self._default_settings) # create model and load weights def create_model(self, input_size=cfg.YOLO.INPUT_SIZE, darknet_weights=None): # create model model = api.yolov4_model_create(input_size) # laod weights if darknet_weights != None: api.load_darknet_weights(model, darknet_weights) return model # image detect def image_detecting(self, image, show_info=False): # process image # image must be opencv read image_original, image_processed, image_original_size = api.image_preprocess_before_predicting( image=image, fixed_size=self.INPUT_SIZE) # create model # model = api.yolov4_model_create(self.INPUT_SIZE) # # laod weights # utils.load_darknet_weights(model, self.YOLOv4_WEIGHTS) # create model & laod weights model = self.create_model(self.INPUT_SIZE, self.YOLOv4_WEIGHTS) time_begin = time.time() # predict bboxes_predicted = model.predict(image_processed) # bbox process bboxes_processed = api.bboxes_optimizing( bboxes_pred=bboxes_predicted, image_original_size=image_original_size, input_size=self.INPUT_SIZE, anchors=self.ANCHORS, strides=self.STRIDES, xyscale=self.XYSCALE, score_thr=self.SCORE_THRESHOLD, iou_thr=self.IOU_THRESHOLD, nms_method="nms", show_info=show_info) # draw bbox image_with_bboxes = api.draw_bbox(image_original, bboxes_processed) time_end = time.time() logging.info(f"Time consumed: {time_end - time_begin}") # show image = Image.fromarray(image_with_bboxes) image.show() if self.SAVE_OR_NOT: image.save(self.IMAGE_OUTPUT_PATH) # video detect def video_detecting(self, video, video_saver, show_info=False): # create model # model = api.yolov4_model_create(self.INPUT_SIZE) # # laod weights # utils.load_darknet_weights(model, self.YOLOv4_WEIGHTS) # create model & laod weights model = self.create_model(self.INPUT_SIZE, self.YOLOv4_WEIGHTS) # time span list times = [] time_begin = time.time() while True: """ returns: ret,frame。 - ret: bool. read correctly->true, end of the video -> false - frame: image, 3-dim """ return_value, frame = video.read() if return_value: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(frame) else: time_end = time.time() logging.info("Video detecting done.") logging.info(f"Time consumed: {time_end - time_begin} s") break frame, image_processed, frame_size = api.image_preprocess_before_predicting( image=frame, fixed_size=self.INPUT_SIZE) # # get size # frame_size = frame.shape[:2] # # frame(image) pre-porcess # image_processed = utils.image_preporcess(np.copy(frame), [self.INPUT_SIZE, self.INPUT_SIZE]) # time which start to detect the frame prev_time = time.time() # logging.info("Video detecting...") # predict bboxes_predicted = model.predict(image_processed) # bbox process bboxes_processed = api.bboxes_optimizing( bboxes_pred=bboxes_predicted, image_original_size=frame_size, input_size=self.INPUT_SIZE, anchors=self.ANCHORS, strides=self.STRIDES, xyscale=self.XYSCALE, score_thr=0.25, iou_thr=0.213, nms_method="nms", show_info=show_info) curr_time = time.time() # time which start to detect the frame exec_time = curr_time - prev_time # execute time. times.append(exec_time) # save time span image = api.draw_bbox(frame, bboxes_processed) image = cv2.putText( image, "Time: {:.2f}ms".format(sum(times) / len(times) * 1000), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) if self.SAVE_OR_NOT: video_saver.write(image) cv2.imshow("result", result) if cv2.waitKey(1) & 0xFF == ord('q'): break
def main(_argv): # file ops predicted_dir_path = './mAP/predicted' ground_truth_dir_path = './mAP/ground-truth' if os.path.exists(predicted_dir_path): shutil.rmtree(predicted_dir_path) if os.path.exists(ground_truth_dir_path): shutil.rmtree(ground_truth_dir_path) if os.path.exists(cfg.TEST.DECTECTED_IMAGE_PATH): shutil.rmtree(cfg.TEST.DECTECTED_IMAGE_PATH) os.mkdir(predicted_dir_path) os.mkdir(ground_truth_dir_path) os.mkdir(cfg.TEST.DECTECTED_IMAGE_PATH) # GPU or CPU physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) print("GPU!!!!!!!!!!!") else: print("NO GPU!!!!!!!!!!!") # setting prep NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) CLASSES = utils.read_class_names(cfg.YOLO.CLASSES) # create model model = api.yolov4_model_create(FLAGS.input_size) # laod weights api.load_darknet_weights(model, cfg.YOLO.WEIGHTS) num_lines = sum(1 for line in open( cfg.TEST.ANNOT_PATH)) # get the number of line of annotation with open(cfg.TEST.ANNOT_PATH, 'r') as annotation_file: # example annotation # ./data/coco/images/val2017/000000289343.jpg 473,395,511, # 423,16 204,235,264, # 412,0 0,499,339,605, # 13 204,304,256,456,1 for num, line in enumerate(annotation_file): # strip()删除开头和结尾的\t\r\n这些字符 # split()返回各块的一个列表 # 以上例子返回如下: # ['./data/coco/images/val2017/000000289343.jpg', # '473,395,511,423,16', # '204,235,264,412,0', # '0,499,339,605,13', # '204,304,256,456,1'] annotation = line.strip().split() image_path = annotation[0] image_name = image_path.split('/')[-1] image = cv2.imread(image_path, cv2.IMREAD_UNCHANGED) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # get ground truth bbox data -> [x1, y1, x2, y2, class] bbox_data_gt = np.array( [list(map(int, box.split(','))) for box in annotation[1:]]) if len(bbox_data_gt) == 0: bboxes_gt = [] classes_gt = [] else: bboxes_gt, classes_gt = bbox_data_gt[:, :4], bbox_data_gt[:, 4] ground_truth_path = os.path.join(ground_truth_dir_path, str(num) + '.txt') print('=> ground truth of %s:' % image_name) num_bbox_gt = len(bboxes_gt) """ ground-truth:指的是真实框的txt 对应绘制mAP的 get_gt_txt.py 文件 结果为: 3 194 400 208 414 0 43 372 57 386 0 277 201 291 215 1 143 134 199 190 1 299 49 341 91 5 150 218 192 260 5 303 170 331 198 7 101 92 129 120 1 150 293 206 349 5 0 102 112 214 8 200 89 312 201 """ with open(ground_truth_path, 'w') as f: for i in range(num_bbox_gt): class_name = CLASSES[classes_gt[i]] xmin, ymin, xmax, ymax = list(map(str, bboxes_gt[i])) bbox_mess = ' '.join([class_name, xmin, ymin, xmax, ymax ]) + '\n' f.write(bbox_mess) print('\t' + str(bbox_mess).strip()) print('=> predict result of %s:' % image_name) predict_result_path = os.path.join(predicted_dir_path, str(num) + '.txt') image_original, image_processed, image_original_size = api.image_preprocess_before_predicting( image=image, fixed_size=FLAGS.input_size) # predict # image shape -> (1, 608, 608, 3) bboxes_predicted = model.predict(image_processed) bboxes_processed = api.bboxes_optimizing( bboxes_pred=bboxes_predicted, image_original_size=image_original_size, input_size=FLAGS.input_size, show_info=False, show_detected_objects_numbers=False) # Predict Process # image_size = image.shape[:2] # image_data = utils.image_preporcess(np.copy(image), [input_size, input_size]) # pred_bbox = model.predict(image_data) # XYSCALE = cfg.YOLO.XYSCALE # pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE=XYSCALE) # pred_bbox = tf.concat(pred_bbox, axis=0) # bboxes = utils.postprocess_boxes(pred_bbox, image_size, input_size, cfg.TEST.SCORE_THRESHOLD) # bboxes = utils.nms(bboxes, cfg.TEST.IOU_THRESHOLD, method='nms') # save predicted test images if cfg.TEST.DECTECTED_IMAGE_PATH is not None: image = api.draw_bbox(image, bboxes_processed) cv2.imwrite(cfg.TEST.DECTECTED_IMAGE_PATH + image_name, image) """ detection-results:指的是预测结果的txt 对应绘制mAP的 get_dr_txt.py文件 结果为: 0 0.9426 277 201 291 214 0 0.9347 43 372 57 386 1 0.9877 143 133 199 189 1 0.9842 150 293 205 348 1 0.9663 299 49 341 90 5 0.9919 302 169 330 198 5 0.9823 0 102 112 213 5 0.9684 150 218 190 259 7 0.9927 101 92 129 119 8 0.9695 199 88 314 202 """ with open(predict_result_path, 'w') as f: for bbox in bboxes_processed: coor = np.array(bbox[:4], dtype=np.int32) score = bbox[4] class_ind = int(bbox[5]) class_name = CLASSES[class_ind] score = '%.4f' % score xmin, ymin, xmax, ymax = list(map(str, coor)) bbox_mess = ' '.join( [class_name, score, xmin, ymin, xmax, ymax]) + '\n' f.write(bbox_mess) print('\t' + str(bbox_mess).strip()) print(num, num_lines)
def main(_argv): # train data dataset_train = Dataset(dataset_type="train") print(type(dataset_train)) # log_dir logdir_train = "logs/gradient_tape/" + datetime.now().strftime( "%Y%m%d-%H%M%S") + "/train" steps_per_epoch = len(dataset_train) first_stage_epochs = cfg.TRAIN.FISRT_STAGE_EPOCHS # 20 second_stage_epochs = cfg.TRAIN.SECOND_STAGE_EPOCHS # 30 global_steps = tf.Variable(1, trainable=False, dtype=tf.int64) warmup_steps = cfg.TRAIN.WARMUP_EPOCHS * steps_per_epoch # 5 * steps_per_epoch total_steps = (first_stage_epochs + second_stage_epochs) * steps_per_epoch NUM_CLASS = len(utils.read_class_names(cfg.YOLO.CLASSES)) STRIDES = np.array(cfg.YOLO.STRIDES) IOU_LOSS_THRESH = cfg.YOLO.IOU_LOSS_THRESH XYSCALE = cfg.YOLO.XYSCALE ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS) LOAD_WEIGHTS = None model = api.yolov4_model_create_4_train(input_size=cfg.TRAIN.INPUT_SIZE) if LOAD_WEIGHTS == None: print("Training from scratch") else: utils.load_weights_tiny(model, FLAGS.weights) optimizer = tf.keras.optimizers.Adam() train_summary_writer = tf.summary.create_file_writer(logdir_train) def train_step(image_data, target): with tf.GradientTape() as tape: pred_result = model(image_data, training=True) ciou_loss = conf_loss = prob_loss = 0 # optimizing process for i in range(3): conv, pred = pred_result[i * 2], pred_result[i * 2 + 1] # 0,1 2,3 4,5 loss_items = compute_loss(pred, conv, target[i][0], target[i][1], STRIDES=STRIDES, NUM_CLASS=NUM_CLASS, IOU_LOSS_THRESH=IOU_LOSS_THRESH, i=i) ciou_loss += loss_items[0] conf_loss += loss_items[1] prob_loss += loss_items[2] total_loss = ciou_loss + conf_loss + prob_loss gradients = tape.gradient(total_loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) tf.print( "=> STEP %4d lr: %.6f ciou_loss: %4.2f conf_loss: %4.2f " "prob_loss: %4.2f total_loss: %4.2f" % (global_steps, optimizer.lr.numpy(), ciou_loss, conf_loss, prob_loss, total_loss)) # update learning rate global_steps.assign_add(1) if global_steps < warmup_steps: lr = global_steps / warmup_steps * cfg.TRAIN.LR_INIT # cfg.TRAIN.LR_INIT = 1e-3, cfg.TRAIN.LR_END = 1e-6 else: lr = cfg.TRAIN.LR_END + 0.5 * ( cfg.TRAIN.LR_INIT - cfg.TRAIN.LR_END) * ((1 + tf.cos( (global_steps - warmup_steps) / (total_steps - warmup_steps) * np.pi))) optimizer.lr.assign(lr.numpy()) # writing summary data with train_summary_writer.as_default(): tf.summary.scalar("lr", optimizer.lr, step=global_steps) tf.summary.scalar("loss/total_loss", total_loss, step=global_steps) tf.summary.scalar("loss/ciou_loss", ciou_loss, step=global_steps) tf.summary.scalar("loss/conf_loss", conf_loss, step=global_steps) tf.summary.scalar("loss/prob_loss", prob_loss, step=global_steps) train_summary_writer.flush() for epoch in range(first_stage_epochs + second_stage_epochs): for image_data, target in dataset_train: train_step(image_data, target) model.save_weights("./checkpoints/yolov4")