class Detector(nn.Module): def __init__(self, config_path, weights_path, input_size=None, conf_thresh=0.5, nms_thresh=0.4): super(Detector, self).__init__() self.input_size = input_size self.conf_thresh = conf_thresh self.nms_thresh = nms_thresh # Initialize Darknet for detection self.model = Darknet(config_path, input_size=input_size) self.model.load_weights(weights_path) self.model.eval() def device(self): return next(self.model.parameters()).device def forward(self, frame, swapRB=False): x = image_to_tensor(frame, swapRB) _, _, fh, fw = x.size() device = self.device() x = x.to(device) x = letterbox_resize(x, self.input_size, constant_value=127.5) x = x / 255.0 with torch.no_grad(): y = self.model.forward(x) output = [] for i, prediction in enumerate(y): # Enumerate on batch detection = non_max_suppression(prediction.cpu(), self.conf_thresh, self.nms_thresh) if detection is not None: detection = bbox_fit(detection, (fh, fw), self.input_size).to(device) output.append(detection) return output def update(self, conf_thresh=None, nms_thresh=None, weights_path=None): if conf_thresh is not None: self.conf_thresh = conf_thresh if nms_thresh is not None: self.nms_thresh = nms_thresh if weights_path is not None: device = self.device() self.model.cpu().load_weights(weights_path) self.model.to(device)
def test_car_detect(car_cfg_path='./car.cfg', car_det_weights_path='g:/Car_DR/car_360000.weights'): """ imgs_path: 图像数据路径 """ inp_dim = 768 prob_th = 0.2 # 车辆检测概率阈值 nms_th = 0.4 # NMS阈值 num_cls = 1 # 只检测车辆1类 # 初始化车辆检测模型及参数 Net = Darknet(car_cfg_path) Net.load_weights(car_det_weights_path) Net.net_info['height'] = inp_dim # 车辆检测输入分辨率 Net.to(device) Net.eval() # 测试模式 print('=> car detection model initiated.') # 读取图像数据 img = Image.open( 'f:/FaceRecognition_torch_0_4/imgs_21/det_2018_08_21_63_1.jpg') img2det = process_img(img, inp_dim) img2det = img2det.to(device) # 图像数据放到device # 测试车辆检测 prediction = Net.forward(img2det, CUDA=True) # 计算scaling factor orig_img_size = list(img.size) output = process_predict(prediction, prob_th, num_cls, nms_th, inp_dim, orig_img_size) orig_img = np.asarray(img) if type(output) != int: # 将检测框bbox绘制到原图上 draw_car_bbox(output, orig_img) cv2.imshow('test', orig_img) cv2.waitKey()
optimizer = optim.Adam(model.parameters()) dataloader = DataLoader(cd_dataset, batch_size=4, shuffle=True) print(dataloader) #trainin the model for i_batch, sample_batched in enumerate(dataloader, 1): # import sys # sys.exit() print("data getting load") print(i_batch, sample_batched['image'].size(), sample_batched['bBox'].size()) loss = model.forward(sample_batched['image'], sample_batched['bBox'], None) optimizer.zero_grad() epoch_loss += loss.item() loss.backward() optimizer.step() #testing the model for i_batch, sample_batched in enumerate(dataloader, 1): break print("data getting load") print(i_batch, sample_batched['image'].size(), sample_batched['bBox'].size())
imgfile2 = 'inria/Train/pos/crop001002.png' sized = read_and_size_image(imgfile, darknet_model.width, darknet_model.height) sized2 = read_and_size_image(imgfile2, darknet_model.width, darknet_model.height) img2 = read_and_size_image(imgfile2) sized3 = torch.randn(sized2.shape) batch = torch.cat([sized, sized2, sized3], dim=0) #sized = img.resize((darknet_model.width, darknet_model.height)) # move the darknet model to the GPU darknet_model = darknet_model sized = torch.autograd.Variable(batch) output = darknet_model.forward(batch) get_max_probability(output, 0, 80) '''deze wegdoen img_interp = F.interpolate(img, size=(200,200), mode='bilinear', align_corners=True) #zien = tvfunc.to_pil_image(img_interp.squeeze(0)) #zien.show() printability_file = 'non_printability/30values.txt' img_height = 500 img_width = 500 printability_array = get_printability_array(printability_file, img_height, img_width) #good_patch = torch.from_numpy(np.tile([0.7098,0.32157,0.2],(img_height,img_width,1))).float() #good_patch = good_patch.view(img_height, img_width, 3).transpose(0,1).transpose(0,2).contiguous().unsqueeze(0) good_patch = read_and_size_image('data/horse.jpg') #deze wegdoen'''
class Car_DC(): def __init__(self, src_path, dst_path, car_cfg_path=local_car_cfg_path, car_det_weights_path=local_car_det_weights_path, inp_dim=768, prob_th=0.2, nms_th=0.4, num_classes=1): """ model initialization """ # super parameters self.inp_dim = inp_dim self.prob_th = prob_th self.nms_th = nms_th self.num_classes = num_classes self.dst_path = dst_path self.video_path = src_path # initialize vehicle detection model self.detector = Darknet(car_cfg_path) self.detector.load_weights(car_det_weights_path) # set input dimension of image self.detector.net_info['height'] = self.inp_dim self.detector.to(device) self.detector.eval() # evaluation mode print('=> car detection model initiated.') # initiate multilabel classifier self.classifier = Car_Classifier(num_cls=19, model_path=local_model_path) #init car recognition self.img_width, self.img_height = 224, 224 self.model = load_model() self.model.load_weights('models/model.96-0.89.hdf5') cars_meta = scipy.io.loadmat('devkit/cars_meta') class_names = cars_meta['class_names'] # shape=(1, 196) self.class_names = np.transpose(class_names) def cls_draw_bbox(self, output, orig_img): """ 1. predict vehicle's attributes based on bbox of vehicle 2. draw bbox to orig_img """ pt_1s = [] pt_2s = [] label_1 = [] label_2 = [] label_3 = [] # 1 for det in output: # rectangle points pt_1 = tuple(det[1:3].int()) # the left-up point pt_2 = tuple(det[3:5].int()) # the right down point pt_1s.append(pt_1) pt_2s.append(pt_2) # turn BGR back to RGB ROI = Image.fromarray( orig_img[pt_1[1]: pt_2[1], pt_1[0]: pt_2[0]][:, :, ::-1]) # ROI.show() # call classifier to predict car_color, car_direction, car_type = self.classifier.predict(ROI) label = str(car_color + ' ' + car_direction + ' ' + car_type) print('=> predicted label: ', label) label_1.append(str(car_color)) label_2.append(str(car_direction)) label_3.append(str(car_type)) # 2 color = (0, 255, 0) for i, det in enumerate(output): pt_1 = pt_1s[i] pt_2 = pt_2s[i] # draw bounding box cv2.rectangle(orig_img, pt_1, pt_2, color, thickness=2) # get str text size txt_size = cv2.getTextSize( label, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0] # Convert cv2 numpy array to PIL image cv2_im = cv2.cvtColor(orig_img, cv2.COLOR_BGR2RGB) pil_im = Image.fromarray(cv2_im) # Draw a label with a chinese name in the filled box font = ImageFont.truetype('./font/simhei.ttf', int(txt_size[1] * 0.8), encoding="utf-8")# draw = ImageDraw.Draw(pil_im) # draw text background rect and text #car color pt_11 = pt_2[0], pt_1[1] pt_12 = pt_2[0] + txt_size[0] + 3, pt_1[1] + txt_size[1] + 3 fname = color_dict[label_1[i]] draw.text((pt_11[0], pt_11[1]), ' 颜色: ' + fname, fill=(0, 255, 0), font=font) #car direction pt_21 = pt_2[0], pt_12[1] pt_22 = pt_2[0] + txt_size[0] + 3, pt_12[1] + txt_size[1] + 3 fname = direction_dict[label_2[i]] draw.text((pt_21[0], pt_21[1]), ' 朝向: ' + fname, fill=(0, 255, 0), font=font) #car type pt_31 = pt_2[0], pt_22[1] fname = type_dict[label_3[i]] draw.text((pt_31[0], pt_31[1]), ' 车型: ' + fname, fill=(0, 255, 0), font=font) # Convert PIL image to cv2 numpy array orig_img = cv2.cvtColor(np.array(pil_im), cv2.COLOR_RGB2BGR) return orig_img def process_predict(self, prediction, prob_th, num_cls, nms_th, inp_dim, orig_img_size): """ processing detections """ scaling_factor = min([inp_dim / float(x) for x in orig_img_size]) # W, H scaling factor output = post_process(prediction, prob_th, num_cls, nms=True, nms_conf=nms_th, CUDA=True) # post-process such as nms if type(output) != int: output[:, [1, 3]] -= (inp_dim - scaling_factor * orig_img_size[0]) / 2.0 # x, w output[:, [2, 4]] -= (inp_dim - scaling_factor * orig_img_size[1]) / 2.0 # y, h output[:, 1:5] /= scaling_factor for i in range(output.shape[0]): output[i, [1, 3]] = torch.clamp( output[i, [1, 3]], 0.0, orig_img_size[0]) output[i, [2, 4]] = torch.clamp( output[i, [2, 4]], 0.0, orig_img_size[1]) return output def car_recognition(self, output, orig_img): labels = [] pt_1s = [] pt_2s = [] for det in output: # rectangle points pt_1 = tuple(det[1:3].int()) # the left-up point pt_2 = tuple(det[3:5].int()) # the right down point pt_1s.append(pt_1) pt_2s.append(pt_2) # turn BGR back to RGB ROI = Image.fromarray( orig_img[pt_1[1]: pt_2[1], pt_1[0]: pt_2[0]][:, :, ::-1]) img = cv2.cvtColor(np.asarray(ROI),cv2.COLOR_RGB2BGR) bgr_img = cv2.resize(img, (self.img_width, self.img_height), cv2.INTER_CUBIC) rgb_img = cv2.cvtColor(bgr_img, cv2.COLOR_BGR2RGB) rgb_img = np.expand_dims(rgb_img, 0) preds = self.model.predict(rgb_img) prob = np.max(preds) class_id = np.argmax(preds) label = str(self.class_names[class_id][0][0]) #print(label) labels.append(label) text = ('{}, {}'.format(self.class_names[class_id][0][0], prob)) for i, det in enumerate(output): pt_1 = pt_1s[i] pt_2 = pt_2s[i] # get str text size txt_size = cv2.getTextSize( label, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0] # Convert cv2 numpy array to PIL image cv2_im = cv2.cvtColor(orig_img, cv2.COLOR_BGR2RGB) pil_im = Image.fromarray(cv2_im) # Draw a label with a chinese name in the filled box font = ImageFont.truetype('./font/simhei.ttf', int(txt_size[1] * 0.8), encoding="utf-8")# draw = ImageDraw.Draw(pil_im) # draw text background rect and text #car color pt_11 = pt_2[0], pt_1[1] + (txt_size[1] + 3) * 3 pt_12 = pt_2[0] + txt_size[0] + 3, pt_11[1] + txt_size[1] + 3 fname = car_label[labels[i]] draw.text((pt_11[0], pt_11[1]), ' 车类: ' + fname, fill=(0, 255, 0), font=font) # Convert PIL image to cv2 numpy array orig_img = cv2.cvtColor(np.array(pil_im), cv2.COLOR_RGB2BGR) return orig_img def detect_classify(self): """ detect and classify """ #read and save video cap = cv2.VideoCapture(self.video_path) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(self.dst_path, fourcc, 25.0, (960, 540)) while(cap.isOpened()): # read image data ret, x = cap.read() re_img = cv2.resize(x, (960, 540), cv2.INTER_LINEAR) img = Image.fromarray(cv2.cvtColor(re_img, cv2.COLOR_BGR2RGB)) img2det = process_img(img, self.inp_dim) img2det = img2det.to(device) # put image data to device # vehicle detection prediction = self.detector.forward(img2det, CUDA=True) # calculating scaling factor orig_img_size = list(img.size) output = self.process_predict(prediction, self.prob_th, self.num_classes, self.nms_th, self.inp_dim, orig_img_size) orig_img = cv2.cvtColor(np.asarray( img), cv2.COLOR_RGB2BGR) # RGB => BGR if type(output) != int: orig_img = self.cls_draw_bbox(output, orig_img) orig_img = self.car_recognition(output, orig_img) out.write(orig_img) cap.release() out.release()
def start(self): # Inicializacion de variables globales global classes, BBox, colors, phase, frame, initBBox, true_class_filter # PREPARACION DE LA FASE DE DETECCION CUDA = torch.cuda.is_available() text = 'No class filter selected' classes = load_classes('model/{}/model.names'.format( self.model_folder)) colors = pkl.load(open('pallete', 'rb')) num_classes = len(classes) if [i for i in self.class_filter if not (i in classes)]: if self.label_info: text = 'WARNING: {} class/classes are not included in the selected model. Updating the searching list...'.format( [i for i in self.class_filter if not (i in classes)]) self.label_info.setText(text) else: print( 'WARNING: {} class/classes are not included in the selected model. Updating the searching list...' .format( [i for i in self.class_filter if not (i in classes)])) true_class_filter = [i for i in self.class_filter if (i in classes)] # Configuracion de la red if self.label_info: text += '\nLoading network...' self.label_info.setText(text) else: print('Loading network.....') model = Darknet('model/{}/model.cfg'.format(self.model_folder)) model.load_weights('model/{}/model.weights'.format(self.model_folder)) if self.label_info: text += '\nNetwork succesfully loaded' self.label_info.setText(text) else: print('Network successfully loaded') model.net_info['height'] = self.reso_det inp_dim_det = int(model.net_info['height']) assert inp_dim_det % 32 == 0 assert inp_dim_det > 32 # Si hay un dispositivo CUDA se carga en el el modelo if CUDA: model.cuda() # Modelo en modo de evaluacion model.eval() # PREPARACION DE LA FASE DE TRACKING inp_dim_track = int(self.reso_track) OPENCV_OBJECT_TRACKERS = { 'csrt': cv2.TrackerCSRT_create, 'kcf': cv2.TrackerKCF_create, 'boosting': cv2.TrackerBoosting_create, 'mil': cv2.TrackerMIL_create, 'tld': cv2.TrackerTLD_create, 'medianflow': cv2.TrackerMedianFlow_create, 'mosse': cv2.TrackerMOSSE_create } # INICIALIZACION DE LA FUENTE if self.source == '0' or self.source == '1': self.cap = cv2.VideoCapture(int(self.source)) mode = 'cam' self.window_name = 'Camera ' + self.source else: if self.label_info: # via GUI se obtiene el path completo self.cap = cv2.VideoCapture(self.source) else: # via terminal solo escribimos el nombre del archivo self.cap = cv2.VideoCapture('videos/{}'.format(self.source)) mode = 'file' self.window_name = self.source assert self.cap.isOpened(), 'Cannot capture source' phase = 'det' initBBox = [] cont = 0 frames = 0 cv2.namedWindow(self.window_name) cv2.setMouseCallback(self.window_name, click_det2track) while self.cap.isOpened(): grab, frame = self.cap.read() start = time.time() if grab: # Fase de deteccion if phase == 'det': if mode == 'cam': img = prep_image_c(frame, inp_dim_det) elif mode == 'file': img = prep_image_f(frame, inp_dim_det) im_dim = frame.shape[1], frame.shape[0] im_dim = torch.FloatTensor(im_dim).repeat(1, 2) if CUDA: im_dim = im_dim.cuda() img = img.cuda() # Inicializacion la lista de BBox detectadas BBox = [] output = model.forward(Variable(img), CUDA) output = write_results(output, self.confidence, num_classes, nms_conf=self.nms_thresh) if type(output) == int: frames += 1 cv2.imshow(self.window_name, frame) key = cv2.waitKey(1) if key & 0xFF == ord('q'): break continue if mode == 'cam': output[:, 1:5] = torch.clamp(output[:, 1:5], 0.0, float(inp_dim_det)) im_dim = im_dim.repeat(output.size(0), 1) / inp_dim_det output[:, 1:5] *= im_dim elif mode == 'file': im_dim = im_dim.repeat(output.size(0), 1) scaling_factor = torch.min(inp_dim_det / im_dim, 1)[0].view(-1, 1) output[:, [1, 3]] -= (inp_dim_det - scaling_factor * im_dim[:, 0].view(-1, 1)) / 2 output[:, [2, 4]] -= (inp_dim_det - scaling_factor * im_dim[:, 1].view(-1, 1)) / 2 output[:, 1:5] /= scaling_factor for i in range(output.shape[0]): output[i, [1, 3]] = torch.clamp( output[i, [1, 3]], 0.0, im_dim[i, 0]) output[i, [2, 4]] = torch.clamp( output[i, [2, 4]], 0.0, im_dim[i, 1]) list(map(lambda x: write(x, frame), output)) cv2.imshow(self.window_name, frame) key = cv2.waitKey(1) if key & 0xFF == ord('q'): break frames += 1 if self.label_info: self.label_info.setText( text + '\nDETECTION PHASE:' + '\n {0: .2f} fps'.format( float(1 / (time.time() - start)))) # Fase de tracking elif phase == 'track': ratio = frame.shape[0] / inp_dim_track img = imutils.resize(frame, height=inp_dim_track) if initBBox: (success, box) = tracker.update(img) if success: cont = 0 (x, y, w, h) = [int(v) for v in box] x, y, w, h = prep_rect(x, y, w, h, ratio) cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2) else: cont += 1 if self.label_info: self.label_info.setText( text + '\nTRACKING PHASE' + '\nObject lost ({})'.format(cont)) else: print('Object lost ', cont) else: (x, y, w, h) = [int(v) for v in track_rect] initBBox = (prep_rect(x, y, w, h, float(1 / ratio))) tracker = OPENCV_OBJECT_TRACKERS[self.tracker_alg]() tracker.init(img, initBBox) if cont > 100: phase = 'det' cont = 0 initBBox = [] cv2.imshow(self.window_name, frame) key = cv2.waitKey(1) if key & 0xFF == ord('q'): break frames += 1 if self.label_info: self.label_info.setText( text + '\nTRACKING PHASE:' + '\n {0: .2f} fps'.format( float(1 / (time.time() - start)))) else: break else: break if not self.label_info: cv2.destroyWindow(self.window_name) self.cap.release() torch.cuda.empty_cache()
class Car_DC(): def __init__(self, src_dir, dst_dir, car_cfg_path=local_car_cfg_path, car_det_weights_path=local_car_det_weights_path, inp_dim=768, prob_th=0.2, nms_th=0.4, num_classes=1): """ model initialization """ # super parameters self.inp_dim = inp_dim self.prob_th = prob_th self.nms_th = nms_th self.num_classes = num_classes self.dst_dir = dst_dir # clear dst_dir if os.path.exists(self.dst_dir): for x in os.listdir(self.dst_dir): if x.endswith('.jpg'): os.remove(self.dst_dir + '/' + x) else: os.makedirs(self.dst_dir) # initialize vehicle detection model self.detector = Darknet(car_cfg_path) self.detector.load_weights(car_det_weights_path) # set input dimension of image self.detector.net_info['height'] = self.inp_dim self.detector.to(device) self.detector.eval() # evaluation mode print('=> car detection model initiated.') # initiate multilabel classifier self.classifier = Car_Classifier(num_cls=19, model_path=local_model_path) # initiate imgs_path self.imgs_path = [os.path.join(src_dir, x) for x in os.listdir( src_dir) if x.endswith('.jpg')] def cls_draw_bbox(self, output, orig_img): """ 1. predict vehicle's attributes based on bbox of vehicle 2. draw bbox to orig_img """ labels = [] pt_1s = [] pt_2s = [] # 1 for det in output: # rectangle points pt_1 = tuple(det[1:3].int()) # the left-up point pt_2 = tuple(det[3:5].int()) # the right down point pt_1s.append(pt_1) pt_2s.append(pt_2) # turn BGR back to RGB ROI = Image.fromarray( orig_img[pt_1[1]: pt_2[1], pt_1[0]: pt_2[0]][:, :, ::-1]) # ROI.show() # call classifier to predict car_color, car_direction, car_type = self.classifier.predict(ROI) label = str(car_color + ' ' + car_direction + ' ' + car_type) labels.append(label) print('=> predicted label: ', label) # 2 color = (0, 215, 255) for i, det in enumerate(output): pt_1 = pt_1s[i] pt_2 = pt_2s[i] # draw bounding box cv2.rectangle(orig_img, pt_1, pt_2, color, thickness=2) # get str text size txt_size = cv2.getTextSize( label, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0] # pt_2 = pt_1[0] + txt_size[0] + 3, pt_1[1] + txt_size[1] + 5 pt_2 = pt_1[0] + txt_size[0] + 3, pt_1[1] - txt_size[1] - 5 # draw text background rect cv2.rectangle(orig_img, pt_1, pt_2, color, thickness=-1) # text # draw text cv2.putText(orig_img, labels[i], (pt_1[0], pt_1[1]), # pt_1[1] + txt_size[1] + 4 cv2.FONT_HERSHEY_PLAIN, 2, [225, 255, 255], 2) def process_predict(self, prediction, prob_th, num_cls, nms_th, inp_dim, orig_img_size): """ processing detections """ scaling_factor = min([inp_dim / float(x) for x in orig_img_size]) # W, H scaling factor output = post_process(prediction, prob_th, num_cls, nms=True, nms_conf=nms_th, CUDA=True) # post-process such as nms if type(output) != int: output[:, [1, 3]] -= (inp_dim - scaling_factor * orig_img_size[0]) / 2.0 # x, w output[:, [2, 4]] -= (inp_dim - scaling_factor * orig_img_size[1]) / 2.0 # y, h output[:, 1:5] /= scaling_factor for i in range(output.shape[0]): output[i, [1, 3]] = torch.clamp( output[i, [1, 3]], 0.0, orig_img_size[0]) output[i, [2, 4]] = torch.clamp( output[i, [2, 4]], 0.0, orig_img_size[1]) return output def detect_classify(self): """ detect and classify """ for x in self.imgs_path: # read image data img = Image.open(x) img2det = process_img(img, self.inp_dim) img2det = img2det.to(device) # put image data to device # vehicle detection prediction = self.detector.forward(img2det, CUDA=True) # calculating scaling factor orig_img_size = list(img.size) output = self.process_predict(prediction, self.prob_th, self.num_classes, self.nms_th, self.inp_dim, orig_img_size) orig_img = cv2.cvtColor(np.asarray( img), cv2.COLOR_RGB2BGR) # RGB => BGR if type(output) != int: self.cls_draw_bbox(output, orig_img) dst_path = self.dst_dir + '/' + os.path.split(x)[1] if not os.path.exists(dst_path): cv2.imwrite(dst_path, orig_img)
class Car_DC(): def __init__(self, src_dir, dst_dir, car_cfg_path=local_car_cfg_path, car_det_weights_path=local_car_det_weights_path, inp_dim=768, prob_th=0.2, nms_th=0.4, num_classes=1): """ model initialization """ # super parameters self.inp_dim = inp_dim self.prob_th = prob_th self.nms_th = nms_th self.num_classes = num_classes self.dst_dir = dst_dir # clear dst_dir if os.path.exists(self.dst_dir): for x in os.listdir(self.dst_dir): if x.endswith('.jpg'): os.remove(self.dst_dir + '/' + x) else: os.makedirs(self.dst_dir) # initialize vehicle detection model self.detector = Darknet(car_cfg_path) self.detector.load_weights(car_det_weights_path) # set input dimension of image self.detector.net_info['height'] = self.inp_dim self.detector.to(device) self.detector.eval() # evaluation mode print('=> car detection model initiated.') # initiate multilabel classifier self.classifier = Car_Classifier(num_cls=19, model_path=local_model_path) # initiate imgs_path # self.imgs_path = [os.path.join(src_dir, x) for x in os.listdir(src_dir) if x.endswith('.jpg') or x.endswith('.png')] # MODIFIED! self.imgs_path = [ os.path.join(src_dir, x) for x in os.listdir(src_dir) if x.startswith('set') and x.endswith('_image') ] self.imgs_path = [ os.path.join(x, y) for x in self.imgs_path for y in os.listdir(x) ] self.imgs_path.sort() self.imgs_path = [ os.path.join(x, y) for x in self.imgs_path for y in os.listdir(x) ] self.imgs_path = [ os.path.join(x, y) for x in self.imgs_path for y in os.listdir(x) if y.endswith('.jpg') or y.endswith('.png') ] def cls_draw_bbox(self, output, orig_img): """ 1. predict vehicle's attributes based on bbox of vehicle 2. draw bbox to orig_img """ labels = [] pt_1s = [] pt_2s = [] car_color, car_direction, car_type = None, None, None # 1 for det in output: if len(det) == 7: continue # rectangle points pt_1 = tuple(det[1:3].int()) # the left-up point pt_2 = tuple(det[3:5].int()) # the right down point pt_1s.append(pt_1) pt_2s.append(pt_2) # turn BGR back to RGB ROI = Image.fromarray(orig_img[pt_1[1]:pt_2[1], pt_1[0]:pt_2[0]][:, :, ::-1]) # # ROI.show() # # call classifier to predict car_color, car_direction, car_type = self.classifier.predict(ROI) label = str(car_color + ' ' + car_direction + ' ' + car_type) labels.append(label) print('=> predicted label: ', label) break # 2 color = (0, 215, 255) for i, det in enumerate(output): if len(det) == 7: continue pt_1 = pt_1s[i] pt_2 = pt_2s[i] # draw bounding box cv2.rectangle(orig_img, pt_1, pt_2, color, thickness=2) # get str text size txt_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0] # pt_2 = pt_1[0] + txt_size[0] + 3, pt_1[1] + txt_size[1] + 5 pt_2 = pt_1[0] + txt_size[0] + 3, pt_1[1] - txt_size[1] - 5 # # draw text background rect cv2.rectangle(orig_img, pt_1, pt_2, color, thickness=-1) # text # draw text cv2.putText( orig_img, labels[i], (pt_1[0], pt_1[1]), # pt_1[1] + txt_size[1] + 4 cv2.FONT_HERSHEY_PLAIN, 2, [225, 255, 255], 2) break return car_color, car_direction, car_type def process_predict(self, prediction, prob_th, num_cls, nms_th, inp_dim, orig_img_size): """ processing detections """ scaling_factor = min([inp_dim / float(x) for x in orig_img_size]) # W, H scaling factor output = post_process(prediction, prob_th, num_cls, nms=True, nms_conf=nms_th, CUDA=True) # post-process such as nms if type(output) != int: output[:, [1, 3]] -= (inp_dim - scaling_factor * orig_img_size[0]) / 2.0 # x, w output[:, [2, 4]] -= (inp_dim - scaling_factor * orig_img_size[1]) / 2.0 # y, h output[:, 1:5] /= scaling_factor for i in range(output.shape[0]): output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0, orig_img_size[0]) output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0, orig_img_size[1]) return output def detect_classify(self, query_pair): pre_path = '' color_dict = {} type_dict = {} # cars = [] # all_cars_per_camera = {} index_list_all = [] index_list_per_camera = [] pre_camera_id = self.imgs_path[0].split('/')[3] stream_i = 0 print("\n\nProcessing stream %d...\n" % stream_i) tracklet_i = 0 """ detect and classify """ for x in self.imgs_path: curr_path = os.path.split(x)[0] # read image data img = cv2.imread(x) img = cv2.copyMakeBorder(img, BORDER, BORDER, BORDER, BORDER, cv2.BORDER_CONSTANT, value=(100, 100, 100)) img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_RGB2BGR)) img2det = process_img(img, self.inp_dim) img2det = img2det.to(device) # put image data to device # vehicle detection prediction = self.detector.forward(img2det, CUDA=True) # calculating scaling factor orig_img_size = list(img.size) output = self.process_predict(prediction, self.prob_th, self.num_classes, self.nms_th, self.inp_dim, orig_img_size) orig_img = cv2.cvtColor(np.asarray(img), cv2.COLOR_RGB2BGR) # RGB => BGR if type(output) != int: # print('\n', x) car_color, car_direction, car_type = self.cls_draw_bbox( output, orig_img) dst_path = self.dst_dir + '/' + os.path.split(x)[1] # if not os.path.exists(dst_path): # cv2.imwrite(dst_path, orig_img) if curr_path != pre_path and pre_path != '': start_length = os.path.split(os.path.split(pre_path)[0])[1] detect_color = max(color_dict, key=color_dict.get) detect_type = max(type_dict, key=type_dict.get) print("Tracklet %d detects " % tracklet_i, detect_color, detect_type) # add_to_all(all_cars_per_camera, detect_color, detect_type) compare_query_append(query_pair, detect_color, detect_type, index_list_per_camera, tracklet_i, start_length) tracklet_i += 1 color_dict.clear() type_dict.clear() curr_camera_id = x.split('/')[3] if curr_camera_id != pre_camera_id: print("The query result on stream %d:" % stream_i, index_list_per_camera) index_list_all.append(deepcopy(index_list_per_camera)) index_list_per_camera.clear() pre_camera_id = curr_camera_id stream_i += 1 tracklet_i = 0 print("\n\nProcessing stream %d...\n" % stream_i) if car_color != None: if car_color not in color_dict: color_dict[car_color] = 0 color_dict[car_color] += 1 if car_type != None: if car_type not in type_dict: type_dict[car_type] = 0 type_dict[car_type] += 1 pre_path = curr_path # add the last one if pre_path != '': start_length = os.path.split(os.path.split(pre_path)[0])[1] detect_color = max(color_dict, key=color_dict.get) detect_type = max(type_dict, key=type_dict.get) print("Tracklet %d detects " % tracklet_i, detect_color, detect_type) compare_query_append(query_pair, detect_color, detect_type, index_list_per_camera, tracklet_i, start_length) # print(all_cars_per_camera) color_dict.clear() type_dict.clear() print("The query result on stream %d:" % stream_i, index_list_per_camera) index_list_all.append(deepcopy(index_list_per_camera)) return index_list_all
#load the image """ 这里得到的batch是包含batch_size个数(最后一组除外)的图片信息。 """ start = time.time() if CUDA: batch = batch.cuda() # grad好像是代表梯度,但是我还不明白为啥这块这么写,应该是torch的原因 with torch.no_grad(): # 调用Darknet类的forward函数 """ 这块返回的是三维的prediction 分别是[ batch_size , 每张图片所有预测的边框数 , 每个bounding box的的属性(85列) ] """ prediction = model.forward(Variable(batch), CUDA) prediction = write_results(prediction, confidence, num_classes, nms_conf=nms_thesh) """ 这里返回的就是2维的了。进过NMS处理过后的prediction。维度信息分别是 [本次batch_size个图片所有的bounding box数,每个bounding box的维度信息(batch_size内图片索引,***左上角坐标***,***右下角坐标***,置信度,属于这个类别的置信度,属于那个类别)] """ end = time.time() # If the output of the write_results function for batch is an int(0), # meaning there is no detection, we use continue to skip the rest loop. if type(prediction) == int:
# change resolution # cap = pic.set(3,680) # cap = pic.set(4,480) while True: _, image = cap.read() h, w = image.shape[:2] blob = cv2.dnn.blobFromImage(image, 1 / 255.0, (416, 416), swapRB=True, crop=False) net.setInput(blob) start = time.perf_counter() layer_outputs = net.forward(ln) time_took = time.perf_counter() - start #print("Time took:", time_took) print("FPS: ", 1 / time_took) boxes, confidences, class_ids = [], [], [] # loop over each of the layer outputs for output in layer_outputs: # loop over each of the object detections for detection in output: # extract the class id (label) and confidence (as a probability) of # the current object detection scores = detection[5:] class_id = np.argmax(scores) confidence = scores[class_id] # discard weak predictions by ensuring the detected
class Car_DC(): def __init__(self, src_dir, dst_dir, car_cfg_path=local_car_cfg_path, car_det_weights_path=local_car_det_weights_path, inp_dim=768, prob_th=0.2, nms_th=0.4, num_classes=1): """ model initialization """ # super parameters self.inp_dim = inp_dim self.prob_th = prob_th self.nms_th = nms_th self.num_classes = num_classes self.dst_dir = dst_dir # clear dst_dir if os.path.exists(self.dst_dir): for x in os.listdir(self.dst_dir): if x.endswith('.jpg'): os.remove(self.dst_dir + '/' + x) else: os.makedirs(self.dst_dir) # initialize vehicle detection model self.detector = Darknet(car_cfg_path) self.detector.load_weights(car_det_weights_path) # set input dimension of image self.detector.net_info['height'] = self.inp_dim self.detector.to(device) self.detector.eval() # evaluation mode #print('=> car detection model initiated.') # initiate multilabel classifier self.classifier = Car_Classifier(num_cls=19, model_path=local_model_path) # initiate imgs_path self.imgs_path = [os.path.join(src_dir, x) for x in os.listdir( src_dir) if x.endswith('.jpg')] aaa = 1 def cls_draw_bbox(self, output, orig_img): """ 1. predict vehicle's attributes based on bbox of vehicle 2. draw bbox to orig_img """ labels = [] pt_1s = [] pt_2s = [] # 1 for det in output: # rectangle points pt_1 = tuple(det[1:3].int()) # the left-up point pt_2 = tuple(det[3:5].int()) # the right down point pt_1s.append(pt_1) pt_2s.append(pt_2) # turn BGR back to RGB ROI = Image.fromarray( orig_img[pt_1[1]: pt_2[1], pt_1[0]: pt_2[0]][:, :, ::-1]) # ROI.show() # call classifier to predict car_color, car_direction, car_type = self.classifier.predict(ROI) label = str(car_color + ' ' + car_direction + ' ' + car_type) labels.append(label) print('=> predicted label: ', label) # 2 color = (0, 215, 255) for i, det in enumerate(output): pt_1 = pt_1s[i] pt_2 = pt_2s[i] # draw bounding box cv2.rectangle(orig_img, pt_1, pt_2, color, thickness=2) # get str text size txt_size = cv2.getTextSize( label, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0] # pt_2 = pt_1[0] + txt_size[0] + 3, pt_1[1] + txt_size[1] + 5 pt_2 = pt_1[0] + txt_size[0] + 3, pt_1[1] - txt_size[1] - 5 # draw text background rect cv2.rectangle(orig_img, pt_1, pt_2, color, thickness=-1) # text # draw text cv2.putText(orig_img, labels[i], (pt_1[0], pt_1[1]), # pt_1[1] + txt_size[1] + 4 cv2.FONT_HERSHEY_PLAIN, 2, [225, 255, 255], 2) def process_predict(self, prediction, prob_th, num_cls, nms_th, inp_dim, orig_img_size): """ processing detections """ scaling_factor = min([inp_dim / float(x) for x in orig_img_size]) # W, H scaling factor output = post_process(prediction, prob_th, num_cls, nms=True, nms_conf=nms_th, CUDA=True) # post-process such as nms if type(output) != int: output[:, [1, 3]] -= (inp_dim - scaling_factor * orig_img_size[0]) / 2.0 # x, w output[:, [2, 4]] -= (inp_dim - scaling_factor * orig_img_size[1]) / 2.0 # y, h output[:, 1:5] /= scaling_factor for i in range(output.shape[0]): output[i, [1, 3]] = torch.clamp( output[i, [1, 3]], 0.0, orig_img_size[0]) output[i, [2, 4]] = torch.clamp( output[i, [2, 4]], 0.0, orig_img_size[1]) #只识别较大的车 tmp = 0 list_width = [] if(len(output) == 1): return output else: while(tmp<len(output)): list_width.append(output[:, 3].tolist()[tmp] - output[:, 1].tolist()[tmp]) tmp+=1 max_width = max(list_width) #最大宽度 max_index =np.argmax(np.array(max_width)) #最大索引 output = output[max_index].unsqueeze(0) #output上升一级维度 return output def detect_classify(self): """ detect and classify """ for x in self.imgs_path: # read image data img = Image.open(x) img2det = process_img(img, self.inp_dim) img2det = img2det.to(device) # put image data to device # vehicle detection prediction = self.detector.forward(img2det, CUDA=True) # calculating scaling factor orig_img_size = list(img.size) output = self.process_predict(prediction, self.prob_th, self.num_classes, self.nms_th, self.inp_dim, orig_img_size) #orig_img = cv2.cvtColor(np.asarray( #img), cv2.COLOR_RGB2BGR) # RGB => BGR #if type(output) != int: #self.cls_draw_bbox(output, orig_img) #dst_path = self.dst_dir + '/' + os.path.split(x)[1] #if not os.path.exists(dst_path): #cv2.imwrite(dst_path, orig_img) # [left-up(x),left-up(y),right-down(x),right-down(y)] --当检测到的目标大于一个 x_left = output[:, 1].item() y_left = output[:, 2].item() x_right = output[:, 3].item() y_right = output[:, 4].item() # centriod[x,y] x_centriod = (x_left + x_right) / 2 y_centriod = (y_left + y_right) / 2 w_rect = x_right - x_left h_rect = y_right - y_left # 4 corners point x_leftup = x_centriod - w_rect/2 y_leftup = y_centriod - h_rect/2 x_leftdown = x_centriod - w_rect / 2 y_leftdown = y_centriod + h_rect / 2 x_rightup = x_centriod + w_rect / 2 y_rightup = y_centriod - h_rect / 2 x_rightdown = x_centriod + w_rect / 2 y_rightdown = y_centriod + h_rect / 2 # new lists to deposit the 4 corners point leftup = [int(x_leftup), int(y_leftup)] leftdown = [int(x_leftdown), int(y_leftdown)] rightup = [int(x_rightup), int(y_rightup)] rightdown = [int(x_rightdown), int(y_rightdown)] return leftup, leftdown, rightup, rightdown
class Car_DC(): def __init__(self, src_dir, dst_dir, car_cfg_path=local_car_cfg_path, car_det_weights_path=local_car_det_weights_path, inp_dim=768, prob_th=0.2, nms_th=0.4, num_classes=1): """ model initialization """ # super parameters self.inp_dim = inp_dim self.prob_th = prob_th self.nms_th = nms_th self.num_classes = num_classes self.dst_dir = dst_dir # clear dst_dir if os.path.exists(self.dst_dir): for x in os.listdir(self.dst_dir): if x.endswith('.jpg'): os.remove(self.dst_dir + '/' + x) else: os.makedirs(self.dst_dir) # initialize vehicle detection model self.detector = Darknet(car_cfg_path) self.detector.load_weights(car_det_weights_path) # set input dimension of image self.detector.net_info['height'] = self.inp_dim self.detector.to(device) self.detector.eval() # evaluation mode print('=> car detection model initiated.') # initiate multilabel classifier self.classifier = CarClassifier(num_cls=19, model_path=local_model_path) # initiate imgs_path self.imgs_path = [ os.path.join(src_dir, x) for x in os.listdir(src_dir) if x.endswith('.jpg') ] def cls_draw_bbox_write(self, output, orig_img, imgobj, img_path): """ 1. predict vehicle's attributes based on bbox of vehicle 2. draw bbox to orig_img """ labels = [] pt_1s = [] pt_2s = [] # 1 for det in output: # rectangle points pt_1 = tuple(det[1:3].int()) # the left-up point pt_2 = tuple(det[3:5].int()) # the right down point pt_1s.append(pt_1) pt_2s.append(pt_2) try: # turn BGR back to RGB ROI = Image.fromarray(orig_img[pt_1[1]:pt_2[1], pt_1[0]:pt_2[0]][:, :, ::-1]) # ROI.show() # call classifier to predict car_color, car_direction, car_type = self.classifier.predict( ROI) label = str(car_color + ' ' + car_direction + ' ' + car_type) labels.append(label) print('=> predicted label: ', label) except: print('no detected area') return # 2 color = (0, 215, 255) # 框的颜色 for i, det in enumerate(output): pt_1 = pt_1s[i] pt_2 = pt_2s[i] # draw bounding box cv2.rectangle(orig_img, pt_1, pt_2, color, thickness=2) img_temp = imgobj[pt_1[1]:pt_2[1], pt_1[0]:pt_2[0]] dst_path = self.dst_dir + '/' + os.path.split( img_path)[1] + labels[i] + str(i) + '.jpg' if not os.path.exists(dst_path): cv2.imwrite(dst_path, img_temp) # get str text size txt_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0] # pt_2 = pt_1[0] + txt_size[0] + 3, pt_1[1] + txt_size[1] + 5 pt_2 = pt_1[0] + txt_size[0] + 3, pt_1[1] - txt_size[1] - 5 # draw text background rect cv2.rectangle(orig_img, pt_1, pt_2, color, thickness=-1) # text # draw text cv2.putText( orig_img, labels[i], (pt_1[0], pt_1[1]), # pt_1[1] + txt_size[1] + 4 cv2.FONT_HERSHEY_PLAIN, 2, [225, 255, 255], 2) return labels def cls_draw_bbox(self, output, orig_img): """ 1. predict vehicle's attributes based on bbox of vehicle 2. draw bbox to orig_img """ labels = [] pt_1s = [] pt_2s = [] # 1 for det in output: # rectangle points pt_1 = tuple(det[1:3].int()) # the left-up point pt_2 = tuple(det[3:5].int()) # the right down point pt_1s.append(pt_1) pt_2s.append(pt_2) try: # turn BGR back to RGB ROI = Image.fromarray(orig_img[pt_1[1]:pt_2[1], pt_1[0]:pt_2[0]][:, :, ::-1]) # ROI.show() # call classifier to predict car_color, car_direction, car_type = self.classifier.predict( ROI) label = str(car_color + ' ' + car_direction + ' ' + car_type) labels.append(label) # print('=> predicted label: ', label) except: print('no detected area') return # 2 color = (0, 215, 255) for i, det in enumerate(output): pt_1 = pt_1s[i] pt_2 = pt_2s[i] # draw bounding box cv2.rectangle(orig_img, pt_1, pt_2, color, thickness=2) # get str text size txt_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0] # pt_2 = pt_1[0] + txt_size[0] + 3, pt_1[1] + txt_size[1] + 5 pt_2 = pt_1[0] + txt_size[0] + 3, pt_1[1] - txt_size[1] - 5 # draw text background rect cv2.rectangle(orig_img, pt_1, pt_2, color, thickness=-1) # text # draw text cv2.putText( orig_img, labels[i], (pt_1[0], pt_1[1]), # pt_1[1] + txt_size[1] + 4 cv2.FONT_HERSHEY_PLAIN, 2, [225, 255, 255], 2) def process_predict(self, prediction, prob_th, num_cls, nms_th, inp_dim, orig_img_size): """ processing detections """ scaling_factor = min([inp_dim / float(x) for x in orig_img_size]) # W, H scaling factor output = post_process(prediction, prob_th, num_cls, nms=True, nms_conf=nms_th, CUDA=use_cuda) # post-process such as nms if type(output) != int: output[:, [1, 3]] -= (inp_dim - scaling_factor * orig_img_size[0]) / 2.0 # x, w output[:, [2, 4]] -= (inp_dim - scaling_factor * orig_img_size[1]) / 2.0 # y, h output[:, 1:5] /= scaling_factor for i in range(output.shape[0]): output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0, orig_img_size[0]) output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0, orig_img_size[1]) return output def detect_classify(self): """ detect and classify """ for x in self.imgs_path: # read image data img = Image.open(x) imgobj = cv2.imread(x) img2det = process_img(img, self.inp_dim) img2det = img2det.to(device) # put image data to device # vehicle detection prediction = self.detector.forward(img2det, CUDA=use_cuda) # calculating scaling factor orig_img_size = list(img.size) output = self.process_predict(prediction, self.prob_th, self.num_classes, self.nms_th, self.inp_dim, orig_img_size) orig_img = cv2.cvtColor(np.asarray(img), cv2.COLOR_RGB2BGR) # RGB => BGR if type(output) != int: car_info = self.cls_draw_bbox_write(output, orig_img, imgobj, x) dst_path = self.dst_dir + '/' + os.path.split(x)[1] if not os.path.exists(dst_path): cv2.imwrite(dst_path, orig_img) return len(car_info) else: return 0 def detect_classify_video(self, video_path, res_path): """ detect in video frames """ # myvideo = cv2.VideoCapture(video_path) # retval = cv2.VideoCapture.grab() # 获得视频的格式 videoCapture = cv2.VideoCapture(video_path) # 获得码率及尺寸 fps = videoCapture.get(cv2.CAP_PROP_FPS) size = (int(videoCapture.get(cv2.CAP_PROP_FRAME_WIDTH)), int(videoCapture.get(cv2.CAP_PROP_FRAME_HEIGHT))) # 指定写视频的格式, I420-avi, MJPG-mp4 videoWriter = cv2.VideoWriter( res_path, cv2.VideoWriter_fourcc('X', 'V', 'I', 'D'), fps, size) # 读帧 success, frame = videoCapture.read() while success: # cv2.imshow("Oto Video", frame) #显示 cv2.waitKey(int(1000 / int(fps))) # 延迟 # 检测图片 img = cv2.cvtColor(np.asarray(frame), cv2.COLOR_RGB2BGR) img2det = process_img(img, self.inp_dim) img2det = img2det.to(device) # put image data to device # vehicle detection prediction = self.detector.forward(img2det, CUDA=use_cuda) # calculating scaling factor orig_img_size = list(size) output = self.process_predict(prediction, self.prob_th, self.num_classes, self.nms_th, self.inp_dim, orig_img_size) orig_img = cv2.cvtColor(np.asarray(img), cv2.COLOR_RGB2BGR) # RGB => BGR if type(output) != int: self.cls_draw_bbox(output, orig_img) videoWriter.write(orig_img) # 写视频帧 success, frame = videoCapture.read() # 获取下一帧
class Car_DR(): def __init__(self, src_dir, dst_dir, car_cfg_path='./car.cfg', car_det_weights_path='g:/Car_DR/car_360000.weights', inp_dim=768, prob_th=0.2, nms_th=0.4, num_classes=1): """ 模型初始化 """ # 超参数 self.inp_dim = inp_dim self.prob_th = prob_th self.nms_th = nms_th self.num_classes = num_classes self.dst_dir = dst_dir # 清空dst_dir if os.path.exists(self.dst_dir): for x in os.listdir(self.dst_dir): if x.endswith('.jpg'): os.remove(self.dst_dir + '/' + x) else: os.makedirs(self.dst_dir) # 初始化车辆检测模型及参数 self.Net = Darknet(car_cfg_path) self.Net.load_weights(car_det_weights_path) self.Net.net_info['height'] = self.inp_dim # 车辆检测输入分辨率 self.Net.to(device) self.Net.eval() # 测试模式 print('=> car detection model initiated.') # 初始化车辆多标签分类管理器 self.manager = Manager(model_path=model_path, attrib_path=attrib_path) # 统计src_dir文件 self.imgs_path = [ os.path.join(src_dir, x) for x in os.listdir(src_dir) if x.endswith('.jpg') ] def cls_draw_bbox(self, output, orig_img): """ orig_img是通过opencv读取的numpy array格式: 通道顺序BGR 在bbox基础上预测车辆属性 将bbox绘制到原图上 """ labels = [] pt_1s = [] pt_2s = [] # 获取车辆属性labels for det in output: # rectangle points pt_1 = tuple(det[1:3].int()) # the left-up point pt_2 = tuple(det[3:5].int()) # the right down point pt_1s.append(pt_1) pt_2s.append(pt_2) # 调用分类器预测车辆属性: BGR => RGB ROI = Image.fromarray(orig_img[pt_1[1]:pt_2[1], pt_1[0]:pt_2[0]][:, :, ::-1]) # ROI.show() car_color, car_direction, car_type = self.manager.predict(ROI) label = str(car_color + ' ' + car_direction + ' ' + car_type) labels.append(label) print('=> predicted label: ', label) # 将bbox绘制到原图 color = (0, 215, 255) for i, det in enumerate(output): pt_1 = pt_1s[i] pt_2 = pt_2s[i] # 绘制bounding box cv2.rectangle(orig_img, pt_1, pt_2, color, thickness=2) # 获取文本大小 txt_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0] # 文字大小 # pt_2 = pt_1[0] + txt_size[0] + 3, pt_1[1] + txt_size[1] + 5 pt_2 = pt_1[0] + txt_size[0] + 3, pt_1[1] - txt_size[1] - 5 # 绘制文本底色矩形 cv2.rectangle(orig_img, pt_1, pt_2, color, thickness=-1) # text # 绘制文本 cv2.putText( orig_img, labels[i], (pt_1[0], pt_1[1]), # pt_1[1] + txt_size[1] + 4 cv2.FONT_HERSHEY_PLAIN, 2, [225, 255, 255], 2) def cls_and_draw(self, output, orig_img): """ orig_img是PIL Image图像格式 在bbox基础上预测车辆属性 将bbox绘制到原图上 """ labels = [] x_ys = [] w_hs = [] # 获取车辆属性labels for det in output: # rectangle x_y = tuple(det[1:3].int()) # x, y w_h = tuple(det[3:5].int()) # w, h x_ys.append(x_y) w_hs.append(w_h) # 调用分类器预测车辆属性: BGR => RGB box = (int(x_y[0]), int(x_y[1]), int(x_y[0] + w_h[0]), int(x_y[1] + w_h[1])) # left, upper, right, lower ROI = orig_img.crop(box) car_color, car_direction, car_type = self.manager.predict(ROI) label = car_color + ' ' + car_direction + ' ' + car_type print('=> label: ', label) labels.append(label) # 将bbox绘制到原图 for i, det in enumerate(output): x_y = x_ys[i] w_h = w_hs[i] color = (0, 215, 255) cv2.rectangle(np.asarray(orig_img), x_y, w_h, color, thickness=2) # bounding box txt_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0] # 文字大小 w_h = x_y[0] + txt_size[0] + 4, x_y[1] + txt_size[1] + 4 cv2.rectangle(np.asarray(orig_img), x_y, w_h, color, thickness=-1) # text cv2.putText(np.asarray(orig_img), labels[i], (x_y[0], x_y[1] + txt_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 2, [225, 255, 255], 2) def predict(self): """ 批量检测和识别, 将检测, 识别结果输出到dst_dir """ for x in self.imgs_path: # 读取图像数据 img = Image.open(x) img2det = process_img(img, self.inp_dim) img2det = img2det.to(device) # 图像数据放到device # 车辆检测 prediction = self.Net.forward(img2det, CUDA=True) # 计算scaling factor orig_img_size = list(img.size) output = process_predict(prediction, self.prob_th, self.num_classes, self.nms_th, self.inp_dim, orig_img_size) orig_img = cv2.cvtColor(np.asarray(img), cv2.COLOR_RGB2BGR) # RGB => BGR if type(output) != int: # 将检测框bbox绘制到原图上 # draw_car_bbox(output, orig_img) self.cls_draw_bbox(output, orig_img) # self.cls_and_draw(output, img) dst_path = self.dst_dir + '/' + os.path.split(x)[1] if not os.path.exists(dst_path): cv2.imwrite(dst_path, orig_img)