def __init__(self): # hyper-parameters for bounding boxes shape self.frame_window = 10 self.emotion_offsets = (20, 40) # loading models self.face_detector = Decode('data/voc_classes.txt', './weights/best_model.h5') self.emotion_classifier = load_model( 'model/fer2013_mini_XCEPTION.102-0.66.hdf5', compile=False) self.emotion_labels = get_labels('fer2013') # getting input model shapes for inference self.emotion_target_size = self.emotion_classifier.input_shape[1:3] # starting lists for calculating modes self.emotion_window = []
def __init__(self): # self.input_shape越大,精度会上升,但速度会下降。 # self.input_shape = (320, 320) self.input_shape = (416, 416) # self.input_shape = (608, 608) # COCO self.file = 'data/coco_classes.txt' self.annotation_path = 'annotation/coco2017_val.txt' self.classes = read_class_names(self.file) # 是否保存画框的照片 # self.write_image = False self.write_image = True self.write_image_path = "./mAP/detection/" self.show_label = True self.num_classes = len(self.classes) # 只用keras self._decode = Decode(0.3, 0.45, self.input_shape, 'yolo_bgr_mAP_46.h5', self.file)
class_names = get_classes(cfg.classes_path) num_classes = len(class_names) _anchors = copy.deepcopy(cfg.anchors) num_anchors = len(cfg.anchor_masks[0]) # 每个输出层有几个先验框 _anchors = np.array(_anchors) _anchors = np.reshape(_anchors, (-1, num_anchors, 2)) _anchors = _anchors.astype(np.float32) # 步id,无需设置,会自动读。 iter_id = 0 # 多尺度训练 inputs = layers.Input(shape=(None, None, 3)) model_body = YOLOv4(inputs, num_classes, num_anchors) _decode = Decode(cfg.conf_thresh, cfg.nms_thresh, cfg.input_shape, model_body, class_names) # 模式。 0-从头训练,1-读取之前的模型继续训练(model_path可以是'yolov4.h5'、'./weights/step00001000.h5'这些。) pattern = cfg.pattern if pattern == 1: model_body.load_weights(cfg.model_path, by_name=True) # , skip_mismatch=True) strs = cfg.model_path.split('step') if len(strs) == 2: iter_id = int(strs[1][:8]) # 冻结,使得需要的显存减少。6G的卡建议这样配置。11G的卡建议不冻结。 # freeze_before = 'conv2d_60' # freeze_before = 'conv2d_72' freeze_before = 'conv2d_86' for i in range(len(model_body.layers)):
# 多尺度训练 inputs = P.data(name='input_1', shape=[-1, 3, -1, -1], append_batch_size=False, dtype='float32') if algorithm == 'YOLOv4': output_l, output_m, output_s = YOLOv4(inputs, num_classes, num_anchors, is_test=False, trainable=True) elif algorithm == 'YOLOv3': backbone = Resnet50Vd() head = YOLOv3Head( keep_prob=1.0) # 一定要设置keep_prob=1.0, 为了得到一致的推理结果 yolov3 = YOLOv3(backbone, head) output_l, output_m, output_s = yolov3(inputs) eval_fetch_list = [output_l, output_m, output_s] eval_prog = eval_prog.clone(for_test=True) gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) place = fluid.CUDAPlace(gpu_id) if use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) fluid.load(eval_prog, model_path, executor=exe) _decode = Decode(algorithm, anchors, conf_thresh, nms_thresh, input_shape, exe, eval_prog, all_classes) test_dev(_decode, eval_fetch_list, images, test_pre_path, eval_batch_size, draw_image)
output_l, output_m, output_s = YOLOv4(inputs, num_classes, num_anchors, is_test=False, trainable=True) eval_fetch_list = [output_l, output_m, output_s] eval_prog = eval_prog.clone(for_test=True) # 参数随机初始化 gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) place = fluid.CUDAPlace(gpu_id) if use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) compiled_eval_prog = fluid.compiler.CompiledProgram(eval_prog) _decode = Decode(cfg.conf_thresh, cfg.nms_thresh, cfg.input_shape, exe, compiled_eval_prog, class_names) if cfg.pattern == 1: fluid.load(train_prog, cfg.model_path, executor=exe) strs = cfg.model_path.split('weights/') if len(strs) == 2: iter_id = int(strs[1]) # 种类id _catid2clsid = copy.deepcopy(catid2clsid) _clsid2catid = copy.deepcopy(clsid2catid) if num_classes != 80: # 如果不是COCO数据集,而是自定义数据集 _catid2clsid = {} _clsid2catid = {} for k in range(num_classes): _catid2clsid[k] = k
num_classes = len(class_names) # 创建模型 Backbone = select_backbone(cfg.backbone_type) backbone = Backbone(**cfg.backbone) Head = select_head(cfg.head_type) head = Head(yolo_loss=None, nms_cfg=cfg.nms_cfg, **cfg.head) model = PPYOLO(backbone, head) if use_gpu: model = model.cuda() model.load_state_dict(torch.load(model_path)) model.eval( ) # 必须调用model.eval()来设置dropout和batch normalization layers在运行推理前,切换到评估模式。 head.set_dropblock(is_test=True) _decode = Decode(model, class_names, use_gpu, cfg, for_test=True) if not os.path.exists('images/res/'): os.mkdir('images/res/') path_dir = os.listdir('images/test') # 读数据的线程 test_dic = {} thr = threading.Thread(target=read_test_data, args=(path_dir, _decode, test_dic)) thr.start() key_list = list(test_dic.keys()) key_len = len(key_list) while key_len == 0: time.sleep(0.01) key_list = list(test_dic.keys())
classes_path = 'data/coco_classes.txt' # model_path可以是'yolov4.h5'、'./weights/step00001000.h5'这些。 # model_path = 'yolov4.h5' model_path = './weights/step00001000.h5' # input_shape越大,精度会上升,但速度会下降。 # input_shape = (320, 320) input_shape = (416, 416) # input_shape = (608, 608) num_anchors = 3 all_classes = get_classes(classes_path) num_classes = len(all_classes) inputs = layers.Input(shape=(None, None, 3)) yolo = YOLOv4(inputs, num_classes, num_anchors) yolo.load_weights(model_path, by_name=True) _decode = Decode(0.05, 0.45, input_shape, yolo, all_classes) # detect images in test floder. for (root, dirs, files) in os.walk('images/test'): if files: start = time.time() for f in files: path = os.path.join(root, f) image = cv2.imread(path) image, boxes, scores, classes = _decode.detect_image( image, draw_image=True) cv2.imwrite('images/res/' + f, image) print('total time: {0:.6f}s'.format(time.time() - start))
set_session(tf.Session(config=config)) # parameters for loading data and images facesDetect = { 'classes_path': 'data/voc_classes.txt', 'model_path': './weights/best_model.h5' } emotion_model_path = 'model/fer2013_mini_XCEPTION.102-0.66.hdf5' emotion_labels = get_labels('fer2013') # hyper-parameters for bounding boxes shape frame_window = 10 emotion_offsets = (20, 40) # loading models face_detector = Decode(facesDetect['classes_path'], facesDetect['model_path']) emotion_classifier = load_model(emotion_model_path, compile=False) # getting input model shapes for inference emotion_target_size = emotion_classifier.input_shape[1:3] # starting lists for calculating modes emotion_window = [] def job1(): global frame1 global a capture = cv2.VideoCapture(0) if capture.isOpened(): capture.set(cv2.CAP_PROP_FRAME_WIDTH, 240)
import cv2 import os import numpy as np import tensorflow as tf from model.decode_np import Decode # 6G的卡,训练时如果要预测,则设置use_gpu = False,否则显存不足。 use_gpu = False use_gpu = True # 显存分配。 if use_gpu: os.environ["CUDA_VISIBLE_DEVICES"] = "0" else: os.environ["CUDA_VISIBLE_DEVICES"] = "-1" from keras.backend.tensorflow_backend import set_session config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.75 set_session(tf.Session(config=config)) if __name__ == '__main__': classes_path = 'data/voc_classes.txt' model_path = './weights/best_model.h5' # 是否给图片画框。不画可以提速。读图片、后处理还可以继续优化。 draw_image = True # draw_image = False _decode = Decode(classes_path, model_path) _decode.webcam_detect(draw_image)
backbone = Resnet50Vd() head = YOLOv3Head( keep_prob=1.0) # 一定要设置keep_prob=1.0, 为了得到一致的推理结果 yolov3 = YOLOv3(backbone, head) output_l, output_m, output_s = yolov3(inputs) eval_fetch_list = [output_l, output_m, output_s] eval_prog = eval_prog.clone(for_test=True) # 参数随机初始化 gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) place = fluid.CUDAPlace(gpu_id) if use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) compiled_eval_prog = fluid.compiler.CompiledProgram(eval_prog) _decode = Decode(algorithm, cfg.anchors, cfg.conf_thresh, cfg.nms_thresh, cfg.input_shape, exe, compiled_eval_prog, class_names) if cfg.pattern == 1: fluid.load(train_prog, cfg.model_path, executor=exe) strs = cfg.model_path.split('weights/') if len(strs) == 2: iter_id = int(strs[1]) # 种类id _catid2clsid = copy.deepcopy(catid2clsid) _clsid2catid = copy.deepcopy(clsid2catid) if num_classes != 80: # 如果不是COCO数据集,而是自定义数据集 _catid2clsid = {} _clsid2catid = {} for k in range(num_classes): _catid2clsid[k] = k
num_filters, None, None), name='target2', dtype='float32') targets = [target0_tensor, target1_tensor, target2_tensor] else: targets = [target0_tensor, target1_tensor] loss_list = keras.layers.Lambda(yolo.get_loss, name='yolo_loss', arguments={ 'target_num': target_num, })([*outputs, gt_bbox_tensor, *targets]) train_model = keras.models.Model(inputs=[x, gt_bbox_tensor, *targets], outputs=loss_list) loss_n = len(loss_list) _decode = Decode(predict_model, class_names, use_gpu, cfg, for_test=False) # 加载权重 if cfg.train_cfg['model_path'] is not None: # 加载参数, 跳过形状不匹配的。 train_model.load_weights(cfg.train_cfg['model_path'], by_name=True, skip_mismatch=True) strs = cfg.train_cfg['model_path'].split('step') if len(strs) == 2: iter_id = int(strs[1][:8]) # 冻结,使得需要的显存减少。低显存的卡建议这样配置。 backbone.freeze()
ins_anno_ids = val_dataset.getAnnIds(imgIds=img_id, iscrowd=False) # 读取这张图片所有标注anno的id if len(ins_anno_ids) == 0: continue img_anno = val_dataset.loadImgs(img_id)[0] images.append(img_anno) all_classes = get_classes(cfg.classes_path) num_classes = len(all_classes) # 创建模型 Backbone = select_backbone(cfg.backbone_type) backbone = Backbone(**cfg.backbone) Head = select_head(cfg.head_type) head = Head(yolo_loss=None, nms_cfg=cfg.nms_cfg, **cfg.head) ppyolo = PPYOLO(backbone, head) if use_gpu: ppyolo = ppyolo.cuda() ppyolo.load_state_dict(torch.load(model_path)) ppyolo.eval() # 必须调用model.eval()来设置dropout和batch normalization layers在运行推理前,切换到评估模式. 不这样做的化会产生不一致的推理结果. _clsid2catid = copy.deepcopy(clsid2catid) if num_classes != 80: # 如果不是COCO数据集,而是自定义数据集 _clsid2catid = {} for k in range(num_classes): _clsid2catid[k] = k _decode = Decode(ppyolo, all_classes, use_gpu, cfg, for_test=False) box_ap = eval(_decode, images, eval_pre_path, anno_file, eval_batch_size, _clsid2catid, draw_image, draw_thresh)
images.append(img_anno) all_classes = get_classes(cfg.classes_path) num_classes = len(all_classes) # 创建模型 Backbone = select_backbone(cfg.backbone_type) backbone = Backbone(**cfg.backbone) Head = select_head(cfg.head_type) cfg.head['drop_block'] = False # 预测时关闭DropBlock,以获得一致的推理结果。 head = Head(yolo_loss=None, nms_cfg=cfg.nms_cfg, **cfg.head) yolo = YOLO(backbone, head) x = keras.layers.Input(shape=(None, None, 3), name='x', dtype='float32') im_size = keras.layers.Input(shape=(2, ), name='im_size', dtype='int32') outputs = yolo.get_outputs(x) preds = yolo.get_prediction(outputs, im_size) predict_model = keras.models.Model(inputs=[x, im_size], outputs=preds) predict_model.load_weights(model_path, by_name=True, skip_mismatch=True) predict_model.summary(line_length=130) _clsid2catid = copy.deepcopy(clsid2catid) if num_classes != 80: # 如果不是COCO数据集,而是自定义数据集 _clsid2catid = {} for k in range(num_classes): _clsid2catid[k] = k _decode = Decode(predict_model, all_classes, use_gpu, cfg, for_test=False) box_ap = eval(_decode, images, eval_pre_path, anno_file, eval_batch_size, _clsid2catid, draw_image, draw_thresh)
def job1(): global count1 global frame use_gpu = True # 显存分配。 if use_gpu: os.environ["CUDA_VISIBLE_DEVICES"] = "0" else: os.environ["CUDA_VISIBLE_DEVICES"] = "-1" from keras.backend.tensorflow_backend import set_session config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.8 set_session(tf.Session(config=config)) facesDetect = {'classes_path': 'data/voc_classes.txt', 'model_path': './weights/best_model.h5'} emotion_model_path = 'model/fer2013_mini_XCEPTION.102-0.66.hdf5' emotion_labels = get_labels('fer2013') # hyper-parameters for bounding boxes shape frame_window = 10 emotion_offsets = (20, 40) # loading models face_detector = Decode(facesDetect['classes_path'], facesDetect['model_path']) emotion_classifier = load_model(emotion_model_path, compile=False) # getting input model shapes for inference emotion_target_size = emotion_classifier.input_shape[1:3] emotion_window1 = [] record1 = {'angry':[0], 'disgust':[0], 'fear':[0], 'happy':[0], 'sad':[0], 'surprise':[0], 'neutral':[0]} #record_diff = {'angry':[], 'disgust':[], 'fear':[], 'happy':[], 'sad':[], 'surprise':[], 'neutral':[]}''' emo_record1 = [] capture = cv2.VideoCapture(0) while True: if True: bgr_image = capture.read()[1] gray_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY) rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB) faces = face_detector.detect_image(bgr_image)[1] if faces is None: faces = () # print(faces) for face_coordinates in faces: x1, y1, x2, y2 = face_coordinates face_coordinates = [int(x1), int(y1), int(x2-x1), int(y2-y1)] x1, x2, y1, y2 = apply_offsets(face_coordinates, emotion_offsets) gray_face = gray_image[y1:y2, x1:x2] try: gray_face = cv2.resize(gray_face, (emotion_target_size)) except: continue gray_face = preprocess_input(gray_face, True) gray_face = np.expand_dims(gray_face, 0) gray_face = np.expand_dims(gray_face, -1) emotion_prediction = emotion_classifier.predict(gray_face) ###############移動平均公式################ '''for idx, probability in enumerate(emotion_prediction[0]): alpha = 0.5 record[emotion_labels[idx]].append(record[emotion_labels[idx]][-1] + alpha * (round(probability*100, 2)-record[emotion_labels[idx]][-1])) emotion_prediction[0][idx] = record[emotion_labels[idx]][-1] if len(record[emotion_labels[idx]])>10: record[emotion_labels[idx]].pop(0) #print(record) #print()''' ######################################### #################自創權重############## emotion_prediction[0] = weights_change(emo_record1, emotion_prediction[0]) data=[] for idx, probability in enumerate(emotion_prediction[0]): data.append((emotion_labels[idx], probability)) rd1.append(data) count1+=1 emo_record1.append(np.argmax(emotion_prediction)) if len(emo_record1)>10: emo_record1.pop(0) ####################################### emotion_probability = np.max(emotion_prediction) emotion_label_arg = np.argmax(emotion_prediction) emotion_text = emotion_labels[emotion_label_arg] emotion_window1.append(emotion_text) if len(emotion_window1) > frame_window: emotion_window1.pop(0) try: emotion_mode1 = mode(emotion_window1) except: continue if emotion_text == 'angry': color = emotion_probability * np.asarray((255, 0, 0)) elif emotion_text == 'sad': color = emotion_probability * np.asarray((0, 0, 255)) elif emotion_text == 'happy': color = emotion_probability * np.asarray((255, 255, 0)) elif emotion_text == 'surprise': color = emotion_probability * np.asarray((0, 255, 255)) else: color = emotion_probability * np.asarray((0, 255, 0)) color = color.astype(int) color = color.tolist() draw_bounding_box(face_coordinates, rgb_image, color) draw_text(face_coordinates, rgb_image, emotion_mode1, color, 0, -45, 1, 1) bgr_image = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2BGR) cv2.imshow('window_frame1', bgr_image) if cv2.waitKey(1) & 0xFF == 27: break cv2.destroyAllWindows()
import cv2 import os import time from model.decode_np import Decode if __name__ == '__main__': file = 'data/coco_classes.txt' model_path = 'yolov4.h5' # input_shape越大,精度会上升,但速度会下降。 # input_shape = (320, 320) input_shape = (416, 416) # input_shape = (608, 608) _decode = Decode(0.05, 0.45, input_shape, model_path, file) # detect images in test floder. for (root, dirs, files) in os.walk('images/test'): if files: start = time.time() for f in files: # print(f) path = os.path.join(root, f) image = cv2.imread(path) image = _decode.detect_image(image) cv2.imwrite('images/res/' + f, image) print('total time: {0:.6f}s'.format(time.time() - start)) # detect videos one at a time in videos/test folder # video = 'library1.mp4'
all_classes = get_classes(classes_path) num_classes = len(all_classes) startup_prog = fluid.Program() eval_prog = fluid.Program() with fluid.program_guard(eval_prog, startup_prog): with fluid.unique_name.guard(): # 多尺度训练 inputs = P.data(name='input_1', shape=[-1, 3, -1, -1], append_batch_size=False, dtype='float32') output_l, output_m, output_s = YOLOv4(inputs, num_classes, num_anchors, is_test=False, trainable=True) eval_fetch_list = [output_l, output_m, output_s] eval_prog = eval_prog.clone(for_test=True) gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) place = fluid.CUDAPlace(gpu_id) if use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) fluid.load(eval_prog, model_path, executor=exe) _decode = Decode(conf_thresh, nms_thresh, input_shape, exe, eval_prog, all_classes) test_dev(_decode, eval_fetch_list, images, test_pre_path, test_batch_size, draw_image)
# 步id,无需设置,会自动读。 iter_id = 0 # 验证 # input_shape越大,精度会上升,但速度会下降。 # input_shape = (320, 320) # input_shape = (416, 416) input_shape = (608, 608) # 验证时的分数阈值和nms_iou阈值 conf_thresh = 0.05 nms_thresh = 0.45 # 多尺度训练 inputs = layers.Input(shape=(None, None, 3)) model_body = YOLOv4(inputs, num_classes, num_anchors) _decode = Decode(conf_thresh, nms_thresh, input_shape, model_body, class_names) # 模式。 0-从头训练,1-读取之前的模型继续训练(model_path可以是'yolov4.h5'、'./weights/step00001000.h5'这些。) pattern = 1 max_bbox_per_scale = 150 iou_loss_thresh = 0.7 if pattern == 1: lr = 0.0001 batch_size = 8 model_path = 'yolov4.h5' # model_path = './weights/step00001000.h5' model_body.load_weights(model_path, by_name=True) strs = model_path.split('step') if len(strs) == 2: iter_id = int(strs[1][:8])
iou_loss = IouLoss(**cfg.iou_loss) iou_aware_loss = None if cfg.head['iou_aware']: IouAwareLoss = select_loss(cfg.iou_aware_loss_type) iou_aware_loss = IouAwareLoss(**cfg.iou_aware_loss) Loss = select_loss(cfg.yolo_loss_type) yolo_loss = Loss(iou_loss=iou_loss, iou_aware_loss=iou_aware_loss, **cfg.yolo_loss) Head = select_head(cfg.head_type) head = Head(yolo_loss=yolo_loss, is_train=True, nms_cfg=cfg.nms_cfg, **cfg.head) ppyolo = PPYOLO(backbone, head) _decode = Decode(ppyolo, class_names, use_gpu, cfg, for_test=False) # 加载权重 if cfg.train_cfg['model_path'] is not None: # 加载参数, 跳过形状不匹配的。 load_weights(ppyolo, cfg.train_cfg['model_path']) strs = cfg.train_cfg['model_path'].split('step') if len(strs) == 2: iter_id = int(strs[1][:8]) # 冻结,使得需要的显存减少。低显存的卡建议这样配置。 backbone.freeze() if use_gpu: # 如果有gpu可用,模型(包括了权重weight)存放在gpu显存里 ppyolo = ppyolo.cuda()
continue img_anno = val_dataset.loadImgs(img_id)[0] images.append(img_anno) all_classes = get_classes(cfg.classes_path) num_classes = len(all_classes) # 创建模型 Backbone = select_backbone(cfg.backbone_type) backbone = Backbone(**cfg.backbone) Fpn = select_fpn(cfg.fpn_type) fpn = Fpn(**cfg.fpn) Head = select_head(cfg.head_type) head = Head(fcos_loss=None, nms_cfg=cfg.nms_cfg, **cfg.head) fcos = FCOS(backbone, fpn, head) if use_gpu: fcos = fcos.cuda() fcos.load_state_dict(torch.load(model_path)) fcos.eval( ) # 必须调用model.eval()来设置dropout和batch normalization layers在运行推理前,切换到评估模式。 _clsid2catid = copy.deepcopy(clsid2catid) if num_classes != 80: # 如果不是COCO数据集,而是自定义数据集 _clsid2catid = {} for k in range(num_classes): _clsid2catid[k] = k _decode = Decode(fcos, all_classes, use_gpu, cfg, for_test=False) box_ap = eval(_decode, images, eval_pre_path, anno_file, eval_batch_size, _clsid2catid, draw_image, draw_thresh)
# 是否给图片画框。不画可以提速。读图片、后处理还可以继续优化。 draw_image = True # draw_image = False num_anchors = 3 all_classes = get_classes(classes_path) num_classes = len(all_classes) yolo = YOLOv4(num_classes, num_anchors) if torch.cuda.is_available(): # 如果有gpu可用,模型(包括了权重weight)存放在gpu显存里 yolo = yolo.cuda() yolo.load_state_dict(torch.load(model_path)) yolo.eval( ) # 必须调用model.eval()来设置dropout和batch normalization layers在运行推理前,切换到评估模式. 不这样做的化会产生不一致的推理结果. _decode = Decode(conf_thresh, nms_thresh, input_shape, yolo, all_classes) if not os.path.exists('images/res/'): os.mkdir('images/res/') path_dir = os.listdir('images/test') # warm up if use_gpu: for k, filename in enumerate(path_dir): image = cv2.imread('images/test/' + filename) image, boxes, scores, classes = _decode.detect_image( image, draw_image=False) if k == 10: break time_stat = deque(maxlen=20) start_time = time.time()
num_classes = len(class_names) # 步id,无需设置,会自动读。 iter_id = 0 # 创建模型 Backbone = select_backbone(cfg.backbone_type) backbone = Backbone(**cfg.backbone) Fpn = select_fpn(cfg.fpn_type) fpn = Fpn(**cfg.fpn) Loss = select_loss(cfg.fcos_loss_type) fcos_loss = Loss(**cfg.fcos_loss) Head = select_head(cfg.head_type) head = Head(fcos_loss=fcos_loss, nms_cfg=cfg.nms_cfg, **cfg.head) fcos = FCOS(backbone, fpn, head) _decode = Decode(fcos, class_names, use_gpu, cfg, for_test=False) # 加载权重 if cfg.train_cfg['model_path'] is not None: # 加载参数, 跳过形状不匹配的。 load_weights(fcos, cfg.train_cfg['model_path']) strs = cfg.train_cfg['model_path'].split('step') if len(strs) == 2: iter_id = int(strs[1][:8]) # 冻结,使得需要的显存减少。低显存的卡建议这样配置。 backbone.freeze() if use_gpu: # 如果有gpu可用,模型(包括了权重weight)存放在gpu显存里 fcos = fcos.cuda()
import cv2 import os import time from model.decode_np import Decode if __name__ == '__main__': file = 'data/coco_classes.txt' model_path = 'yolo_bgr_mAP_46.h5' # input_shape越大,精度会上升,但速度会下降。 # input_shape = (320, 320) input_shape = (416, 416) # input_shape = (608, 608) _decode = Decode(0.6, 0.5, input_shape, model_path, file) # detect images in test floder. for (root, dirs, files) in os.walk('images/test'): if files: start = time.time() for f in files: # print(f) path = os.path.join(root, f) image = cv2.imread(path) image = _decode.detect_image(image) cv2.imwrite('images/res/' + f, image) print('total time: {0:.6f}s'.format(time.time() - start)) # detect videos one at a time in videos/test folder # video = 'library1.mp4'
cfg = TrainConfig() class_names = get_classes(cfg.classes_path) num_classes = len(class_names) _anchors = copy.deepcopy(cfg.anchors) num_anchors = len(cfg.anchor_masks[0]) # 每个输出层有几个先验框 _anchors = np.array(_anchors) _anchors = np.reshape(_anchors, (-1, num_anchors, 2)) _anchors = _anchors.astype(np.float32) # 步id,无需设置,会自动读。 iter_id = 0 # 创建模型 yolo = YOLOv4(num_classes, num_anchors) _decode = Decode(cfg.conf_thresh, cfg.nms_thresh, cfg.input_shape, yolo, class_names) # 模式。 0-从头训练,1-读取之前的模型继续训练(model_path可以是'yolov4.h5'、'./weights/step00001000.h5'这些。) pattern = cfg.pattern if pattern == 1: # 加载参数, 跳过形状不匹配的。 yolo_state_dict = yolo.state_dict() pretrained_dict = torch.load(cfg.model_path) new_state_dict = OrderedDict() for k, v in pretrained_dict.items(): if k in yolo_state_dict: shape_1 = yolo_state_dict[k].shape shape_2 = pretrained_dict[k].shape if shape_1 == shape_2: new_state_dict[k] = v else: