class Detector(nn.Module):
    def __init__(self, config_path, weights_path, input_size=None, conf_thresh=0.5, nms_thresh=0.4):
        super(Detector, self).__init__()
        self.input_size = input_size
        self.conf_thresh = conf_thresh
        self.nms_thresh = nms_thresh

        # Initialize Darknet for detection
        self.model = Darknet(config_path, input_size=input_size)
        self.model.load_weights(weights_path)
        self.model.eval()

    def device(self):
        return next(self.model.parameters()).device

    def forward(self, frame, swapRB=False):
        x = image_to_tensor(frame, swapRB)
        _, _, fh, fw = x.size()

        device = self.device()
        x = x.to(device)
        x = letterbox_resize(x, self.input_size, constant_value=127.5)
        x = x / 255.0

        with torch.no_grad():
            y = self.model.forward(x)

        output = []
        for i, prediction in enumerate(y):  # Enumerate on batch
            detection = non_max_suppression(prediction.cpu(), self.conf_thresh, self.nms_thresh)
            if detection is not None:
                detection = bbox_fit(detection, (fh, fw), self.input_size).to(device)
            output.append(detection)

        return output

    def update(self, conf_thresh=None, nms_thresh=None, weights_path=None):
        if conf_thresh is not None:
            self.conf_thresh = conf_thresh

        if nms_thresh is not None:
            self.nms_thresh = nms_thresh

        if weights_path is not None:
            device = self.device()
            self.model.cpu().load_weights(weights_path)
            self.model.to(device)
def test_car_detect(car_cfg_path='./car.cfg',
                    car_det_weights_path='g:/Car_DR/car_360000.weights'):
    """
    imgs_path: 图像数据路径
    """
    inp_dim = 768
    prob_th = 0.2  # 车辆检测概率阈值
    nms_th = 0.4  # NMS阈值
    num_cls = 1  # 只检测车辆1类

    # 初始化车辆检测模型及参数
    Net = Darknet(car_cfg_path)
    Net.load_weights(car_det_weights_path)
    Net.net_info['height'] = inp_dim  # 车辆检测输入分辨率
    Net.to(device)
    Net.eval()  # 测试模式
    print('=> car detection model initiated.')

    # 读取图像数据
    img = Image.open(
        'f:/FaceRecognition_torch_0_4/imgs_21/det_2018_08_21_63_1.jpg')
    img2det = process_img(img, inp_dim)
    img2det = img2det.to(device)  # 图像数据放到device

    # 测试车辆检测
    prediction = Net.forward(img2det, CUDA=True)

    # 计算scaling factor
    orig_img_size = list(img.size)
    output = process_predict(prediction, prob_th, num_cls, nms_th, inp_dim,
                             orig_img_size)

    orig_img = np.asarray(img)
    if type(output) != int:
        # 将检测框bbox绘制到原图上
        draw_car_bbox(output, orig_img)

    cv2.imshow('test', orig_img)
    cv2.waitKey()
Exemple #3
0
optimizer = optim.Adam(model.parameters())
dataloader = DataLoader(cd_dataset, batch_size=4, shuffle=True)

print(dataloader)

#trainin the model
for i_batch, sample_batched in enumerate(dataloader, 1):

    #    import sys
    #    sys.exit()
    print("data getting load")

    print(i_batch, sample_batched['image'].size(),
          sample_batched['bBox'].size())

    loss = model.forward(sample_batched['image'], sample_batched['bBox'], None)

    optimizer.zero_grad()

    epoch_loss += loss.item()
    loss.backward()
    optimizer.step()

#testing the model

for i_batch, sample_batched in enumerate(dataloader, 1):
    break

    print("data getting load")
    print(i_batch, sample_batched['image'].size(),
          sample_batched['bBox'].size())
    imgfile2 = 'inria/Train/pos/crop001002.png'
    sized = read_and_size_image(imgfile, darknet_model.width,
                                darknet_model.height)
    sized2 = read_and_size_image(imgfile2, darknet_model.width,
                                 darknet_model.height)
    img2 = read_and_size_image(imgfile2)
    sized3 = torch.randn(sized2.shape)

    batch = torch.cat([sized, sized2, sized3], dim=0)
    #sized = img.resize((darknet_model.width, darknet_model.height))

    # move the darknet model to the GPU
    darknet_model = darknet_model
    sized = torch.autograd.Variable(batch)
    output = darknet_model.forward(batch)
    get_max_probability(output, 0, 80)
'''deze wegdoen
img_interp = F.interpolate(img, size=(200,200), mode='bilinear', align_corners=True)

#zien = tvfunc.to_pil_image(img_interp.squeeze(0))
#zien.show()

printability_file = 'non_printability/30values.txt'
img_height = 500
img_width = 500
printability_array = get_printability_array(printability_file, img_height, img_width)
#good_patch = torch.from_numpy(np.tile([0.7098,0.32157,0.2],(img_height,img_width,1))).float()
#good_patch = good_patch.view(img_height, img_width, 3).transpose(0,1).transpose(0,2).contiguous().unsqueeze(0)
good_patch = read_and_size_image('data/horse.jpg')
#deze wegdoen'''
Exemple #5
0
class Car_DC():
    def __init__(self,
                 src_path,
                 dst_path,
                 car_cfg_path=local_car_cfg_path,
                 car_det_weights_path=local_car_det_weights_path,
                 inp_dim=768,
                 prob_th=0.2,
                 nms_th=0.4,
                 num_classes=1):
        """
        model initialization
        """
        # super parameters
        self.inp_dim = inp_dim
        self.prob_th = prob_th
        self.nms_th = nms_th
        self.num_classes = num_classes
        self.dst_path = dst_path
        self.video_path = src_path

        # initialize vehicle detection model
        self.detector = Darknet(car_cfg_path)
        self.detector.load_weights(car_det_weights_path)
        # set input dimension of image
        self.detector.net_info['height'] = self.inp_dim
        self.detector.to(device)
        self.detector.eval()  # evaluation mode
        print('=> car detection model initiated.')

        # initiate multilabel classifier
        self.classifier = Car_Classifier(num_cls=19,
                                         model_path=local_model_path)
        
        #init car recognition
        self.img_width, self.img_height = 224, 224
        self.model = load_model()
        self.model.load_weights('models/model.96-0.89.hdf5')

        cars_meta = scipy.io.loadmat('devkit/cars_meta')
        class_names = cars_meta['class_names']  # shape=(1, 196)
        self.class_names = np.transpose(class_names)

    def cls_draw_bbox(self, output, orig_img):
        """
        1. predict vehicle's attributes based on bbox of vehicle
        2. draw bbox to orig_img
        """
        pt_1s = []
        pt_2s = []
        label_1 = []
        label_2 = []
        label_3 = []
        # 1
        for det in output:
            # rectangle points
            pt_1 = tuple(det[1:3].int())  # the left-up point
            pt_2 = tuple(det[3:5].int())  # the right down point
            pt_1s.append(pt_1)
            pt_2s.append(pt_2)

            # turn BGR back to RGB
            ROI = Image.fromarray(
                orig_img[pt_1[1]: pt_2[1],
                         pt_1[0]: pt_2[0]][:, :, ::-1])
            # ROI.show()

            # call classifier to predict
            car_color, car_direction, car_type = self.classifier.predict(ROI)
            label = str(car_color + ' ' + car_direction + ' ' + car_type)
            print('=> predicted label: ', label)
            label_1.append(str(car_color))
            label_2.append(str(car_direction))
            label_3.append(str(car_type))

        # 2
        color = (0, 255, 0)
        for i, det in enumerate(output):
            pt_1 = pt_1s[i]
            pt_2 = pt_2s[i]

            # draw bounding box
            cv2.rectangle(orig_img, pt_1, pt_2, color, thickness=2)

            # get str text size
            txt_size = cv2.getTextSize(
                label, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0]
            
            # Convert cv2 numpy array to PIL image
            cv2_im = cv2.cvtColor(orig_img, cv2.COLOR_BGR2RGB)
            pil_im = Image.fromarray(cv2_im)
            # Draw a label with a chinese name in the filled box
            font = ImageFont.truetype('./font/simhei.ttf', int(txt_size[1] * 0.8), encoding="utf-8")#
            draw = ImageDraw.Draw(pil_im)

            # draw text background rect and text
            #car color
            pt_11 = pt_2[0], pt_1[1]
            pt_12 = pt_2[0] + txt_size[0] + 3, pt_1[1] + txt_size[1] + 3
            fname = color_dict[label_1[i]]
            draw.text((pt_11[0], pt_11[1]), ' 颜色: ' + fname, fill=(0, 255, 0), font=font)
    
            #car direction
            pt_21 = pt_2[0], pt_12[1]
            pt_22 = pt_2[0] + txt_size[0] + 3, pt_12[1] + txt_size[1] + 3
            fname = direction_dict[label_2[i]]
            draw.text((pt_21[0], pt_21[1]), ' 朝向: ' + fname, fill=(0, 255, 0), font=font)

            #car type
            pt_31 = pt_2[0], pt_22[1]
            fname = type_dict[label_3[i]]
            draw.text((pt_31[0], pt_31[1]), ' 车型: ' + fname, fill=(0, 255, 0), font=font)
                        
            # Convert PIL image to cv2 numpy array
            orig_img = cv2.cvtColor(np.array(pil_im), cv2.COLOR_RGB2BGR)
        return orig_img

    def process_predict(self,
                        prediction,
                        prob_th,
                        num_cls,
                        nms_th,
                        inp_dim,
                        orig_img_size):
        """
        processing detections
        """
        scaling_factor = min([inp_dim / float(x)
                              for x in orig_img_size])  # W, H scaling factor
        output = post_process(prediction,
                              prob_th,
                              num_cls,
                              nms=True,
                              nms_conf=nms_th,
                              CUDA=True)  # post-process such as nms

        if type(output) != int:
            output[:, [1, 3]] -= (inp_dim - scaling_factor *
                                  orig_img_size[0]) / 2.0  # x, w
            output[:, [2, 4]] -= (inp_dim - scaling_factor *
                                  orig_img_size[1]) / 2.0  # y, h
            output[:, 1:5] /= scaling_factor
            for i in range(output.shape[0]):
                output[i, [1, 3]] = torch.clamp(
                    output[i, [1, 3]], 0.0, orig_img_size[0])
                output[i, [2, 4]] = torch.clamp(
                    output[i, [2, 4]], 0.0, orig_img_size[1])
        return output
    
    def car_recognition(self, output, orig_img):
        labels = []
        pt_1s = []
        pt_2s = []
        for det in output:
            # rectangle points
            pt_1 = tuple(det[1:3].int())  # the left-up point
            pt_2 = tuple(det[3:5].int())  # the right down point
            pt_1s.append(pt_1)
            pt_2s.append(pt_2)

            # turn BGR back to RGB
            ROI = Image.fromarray(
                orig_img[pt_1[1]: pt_2[1],
                         pt_1[0]: pt_2[0]][:, :, ::-1])
            img = cv2.cvtColor(np.asarray(ROI),cv2.COLOR_RGB2BGR)
            bgr_img = cv2.resize(img, (self.img_width, self.img_height), cv2.INTER_CUBIC)
            rgb_img = cv2.cvtColor(bgr_img, cv2.COLOR_BGR2RGB)
            rgb_img = np.expand_dims(rgb_img, 0)
            preds = self.model.predict(rgb_img)
            prob = np.max(preds)
            class_id = np.argmax(preds)
            label = str(self.class_names[class_id][0][0])
            #print(label)
            labels.append(label)
            text = ('{}, {}'.format(self.class_names[class_id][0][0], prob))
            
        for i, det in enumerate(output):
            pt_1 = pt_1s[i]
            pt_2 = pt_2s[i]

            # get str text size
            txt_size = cv2.getTextSize(
                label, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0]
            
            # Convert cv2 numpy array to PIL image
            cv2_im = cv2.cvtColor(orig_img, cv2.COLOR_BGR2RGB)
            pil_im = Image.fromarray(cv2_im)
            # Draw a label with a chinese name in the filled box
            font = ImageFont.truetype('./font/simhei.ttf', int(txt_size[1] * 0.8), encoding="utf-8")#
            draw = ImageDraw.Draw(pil_im)
            # draw text background rect and text
            #car color
            pt_11 = pt_2[0], pt_1[1] + (txt_size[1] + 3) * 3
            pt_12 = pt_2[0] + txt_size[0] + 3, pt_11[1] + txt_size[1] + 3
            fname = car_label[labels[i]]
            draw.text((pt_11[0], pt_11[1]), ' 车类: ' + fname, fill=(0, 255, 0), font=font)
                       
            # Convert PIL image to cv2 numpy array
            orig_img = cv2.cvtColor(np.array(pil_im), cv2.COLOR_RGB2BGR)
        return orig_img            
            

    def detect_classify(self):
        """        
        detect and classify
        """
        #read and save video
        cap = cv2.VideoCapture(self.video_path)
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        out = cv2.VideoWriter(self.dst_path, fourcc, 25.0, (960, 540))
        
        while(cap.isOpened()):
            # read image data
            ret, x = cap.read()
            re_img = cv2.resize(x, (960, 540), cv2.INTER_LINEAR)
            img = Image.fromarray(cv2.cvtColor(re_img, cv2.COLOR_BGR2RGB))
            img2det = process_img(img, self.inp_dim)
            img2det = img2det.to(device)  # put image data to device

            # vehicle detection
            prediction = self.detector.forward(img2det, CUDA=True)

            # calculating scaling factor
            orig_img_size = list(img.size)
            output = self.process_predict(prediction,
                                          self.prob_th,
                                          self.num_classes,
                                          self.nms_th,
                                          self.inp_dim,
                                          orig_img_size)

            orig_img = cv2.cvtColor(np.asarray(
                img), cv2.COLOR_RGB2BGR)  # RGB => BGR
            if type(output) != int:
                orig_img = self.cls_draw_bbox(output, orig_img)
                orig_img = self.car_recognition(output, orig_img)
            out.write(orig_img)
        cap.release()
        out.release()
Exemple #6
0
    def start(self):

        # Inicializacion de variables globales
        global classes, BBox, colors, phase, frame, initBBox, true_class_filter

        # PREPARACION DE LA FASE DE DETECCION

        CUDA = torch.cuda.is_available()

        text = 'No class filter selected'

        classes = load_classes('model/{}/model.names'.format(
            self.model_folder))
        colors = pkl.load(open('pallete', 'rb'))

        num_classes = len(classes)
        if [i for i in self.class_filter if not (i in classes)]:
            if self.label_info:
                text = 'WARNING: {} class/classes are not included in the selected model. Updating the searching list...'.format(
                    [i for i in self.class_filter if not (i in classes)])
                self.label_info.setText(text)
            else:
                print(
                    'WARNING: {} class/classes are not included in the selected model. Updating the searching list...'
                    .format(
                        [i for i in self.class_filter if not (i in classes)]))
        true_class_filter = [i for i in self.class_filter if (i in classes)]

        # Configuracion de la red
        if self.label_info:
            text += '\nLoading network...'
            self.label_info.setText(text)
        else:
            print('Loading network.....')

        model = Darknet('model/{}/model.cfg'.format(self.model_folder))
        model.load_weights('model/{}/model.weights'.format(self.model_folder))

        if self.label_info:
            text += '\nNetwork succesfully loaded'
            self.label_info.setText(text)
        else:
            print('Network successfully loaded')

        model.net_info['height'] = self.reso_det
        inp_dim_det = int(model.net_info['height'])
        assert inp_dim_det % 32 == 0
        assert inp_dim_det > 32

        # Si hay un dispositivo CUDA se carga en el el modelo
        if CUDA:
            model.cuda()

        # Modelo en modo de evaluacion
        model.eval()

        # PREPARACION DE LA FASE DE TRACKING

        inp_dim_track = int(self.reso_track)

        OPENCV_OBJECT_TRACKERS = {
            'csrt': cv2.TrackerCSRT_create,
            'kcf': cv2.TrackerKCF_create,
            'boosting': cv2.TrackerBoosting_create,
            'mil': cv2.TrackerMIL_create,
            'tld': cv2.TrackerTLD_create,
            'medianflow': cv2.TrackerMedianFlow_create,
            'mosse': cv2.TrackerMOSSE_create
        }

        # INICIALIZACION DE LA FUENTE

        if self.source == '0' or self.source == '1':
            self.cap = cv2.VideoCapture(int(self.source))
            mode = 'cam'
            self.window_name = 'Camera ' + self.source
        else:
            if self.label_info:  # via GUI se obtiene el path completo
                self.cap = cv2.VideoCapture(self.source)
            else:  # via terminal solo escribimos el nombre del archivo
                self.cap = cv2.VideoCapture('videos/{}'.format(self.source))
            mode = 'file'
            self.window_name = self.source
        assert self.cap.isOpened(), 'Cannot capture source'

        phase = 'det'
        initBBox = []
        cont = 0
        frames = 0

        cv2.namedWindow(self.window_name)
        cv2.setMouseCallback(self.window_name, click_det2track)

        while self.cap.isOpened():
            grab, frame = self.cap.read()

            start = time.time()

            if grab:
                # Fase de deteccion
                if phase == 'det':

                    if mode == 'cam':
                        img = prep_image_c(frame, inp_dim_det)
                    elif mode == 'file':
                        img = prep_image_f(frame, inp_dim_det)

                    im_dim = frame.shape[1], frame.shape[0]
                    im_dim = torch.FloatTensor(im_dim).repeat(1, 2)

                    if CUDA:
                        im_dim = im_dim.cuda()
                        img = img.cuda()

                    # Inicializacion la lista de BBox detectadas
                    BBox = []

                    output = model.forward(Variable(img), CUDA)
                    output = write_results(output,
                                           self.confidence,
                                           num_classes,
                                           nms_conf=self.nms_thresh)

                    if type(output) == int:
                        frames += 1
                        cv2.imshow(self.window_name, frame)
                        key = cv2.waitKey(1)
                        if key & 0xFF == ord('q'):
                            break
                        continue

                    if mode == 'cam':

                        output[:, 1:5] = torch.clamp(output[:, 1:5], 0.0,
                                                     float(inp_dim_det))

                        im_dim = im_dim.repeat(output.size(0), 1) / inp_dim_det
                        output[:, 1:5] *= im_dim

                    elif mode == 'file':

                        im_dim = im_dim.repeat(output.size(0), 1)
                        scaling_factor = torch.min(inp_dim_det / im_dim,
                                                   1)[0].view(-1, 1)

                        output[:, [1, 3]] -= (inp_dim_det - scaling_factor *
                                              im_dim[:, 0].view(-1, 1)) / 2
                        output[:, [2, 4]] -= (inp_dim_det - scaling_factor *
                                              im_dim[:, 1].view(-1, 1)) / 2

                        output[:, 1:5] /= scaling_factor

                        for i in range(output.shape[0]):
                            output[i, [1, 3]] = torch.clamp(
                                output[i, [1, 3]], 0.0, im_dim[i, 0])
                            output[i, [2, 4]] = torch.clamp(
                                output[i, [2, 4]], 0.0, im_dim[i, 1])

                    list(map(lambda x: write(x, frame), output))

                    cv2.imshow(self.window_name, frame)
                    key = cv2.waitKey(1)
                    if key & 0xFF == ord('q'):
                        break
                    frames += 1

                    if self.label_info:
                        self.label_info.setText(
                            text + '\nDETECTION PHASE:' +
                            '\n   {0: .2f} fps'.format(
                                float(1 / (time.time() - start))))

                # Fase de tracking
                elif phase == 'track':

                    ratio = frame.shape[0] / inp_dim_track

                    img = imutils.resize(frame, height=inp_dim_track)

                    if initBBox:
                        (success, box) = tracker.update(img)

                        if success:
                            cont = 0
                            (x, y, w, h) = [int(v) for v in box]
                            x, y, w, h = prep_rect(x, y, w, h, ratio)
                            cv2.rectangle(frame, (x, y), (x + w, y + h),
                                          (0, 255, 0), 2)

                        else:
                            cont += 1
                            if self.label_info:
                                self.label_info.setText(
                                    text + '\nTRACKING PHASE' +
                                    '\nObject lost ({})'.format(cont))
                            else:
                                print('Object lost ', cont)

                    else:
                        (x, y, w, h) = [int(v) for v in track_rect]
                        initBBox = (prep_rect(x, y, w, h, float(1 / ratio)))
                        tracker = OPENCV_OBJECT_TRACKERS[self.tracker_alg]()
                        tracker.init(img, initBBox)

                    if cont > 100:
                        phase = 'det'
                        cont = 0
                        initBBox = []

                    cv2.imshow(self.window_name, frame)
                    key = cv2.waitKey(1)
                    if key & 0xFF == ord('q'):
                        break
                    frames += 1

                    if self.label_info:
                        self.label_info.setText(
                            text + '\nTRACKING PHASE:' +
                            '\n   {0: .2f} fps'.format(
                                float(1 / (time.time() - start))))

                else:
                    break

            else:
                break

        if not self.label_info:
            cv2.destroyWindow(self.window_name)

        self.cap.release()

        torch.cuda.empty_cache()
Exemple #7
0
class Car_DC():
    def __init__(self,
                 src_dir,
                 dst_dir,
                 car_cfg_path=local_car_cfg_path,
                 car_det_weights_path=local_car_det_weights_path,
                 inp_dim=768,
                 prob_th=0.2,
                 nms_th=0.4,
                 num_classes=1):
        """
        model initialization
        """
        # super parameters
        self.inp_dim = inp_dim
        self.prob_th = prob_th
        self.nms_th = nms_th
        self.num_classes = num_classes
        self.dst_dir = dst_dir

        # clear dst_dir
        if os.path.exists(self.dst_dir):
            for x in os.listdir(self.dst_dir):
                if x.endswith('.jpg'):
                    os.remove(self.dst_dir + '/' + x)
        else:
            os.makedirs(self.dst_dir)

        # initialize vehicle detection model
        self.detector = Darknet(car_cfg_path)
        self.detector.load_weights(car_det_weights_path)
        # set input dimension of image
        self.detector.net_info['height'] = self.inp_dim
        self.detector.to(device)
        self.detector.eval()  # evaluation mode
        print('=> car detection model initiated.')

        # initiate multilabel classifier
        self.classifier = Car_Classifier(num_cls=19,
                                         model_path=local_model_path)

        # initiate imgs_path
        self.imgs_path = [os.path.join(src_dir, x) for x in os.listdir(
            src_dir) if x.endswith('.jpg')]

    def cls_draw_bbox(self, output, orig_img):
        """
        1. predict vehicle's attributes based on bbox of vehicle
        2. draw bbox to orig_img
        """
        labels = []
        pt_1s = []
        pt_2s = []

        # 1
        for det in output:
            # rectangle points
            pt_1 = tuple(det[1:3].int())  # the left-up point
            pt_2 = tuple(det[3:5].int())  # the right down point
            pt_1s.append(pt_1)
            pt_2s.append(pt_2)

            # turn BGR back to RGB
            ROI = Image.fromarray(
                orig_img[pt_1[1]: pt_2[1],
                         pt_1[0]: pt_2[0]][:, :, ::-1])
            # ROI.show()

            # call classifier to predict
            car_color, car_direction, car_type = self.classifier.predict(ROI)
            label = str(car_color + ' ' + car_direction + ' ' + car_type)
            labels.append(label)
            print('=> predicted label: ', label)

        # 2
        color = (0, 215, 255)
        for i, det in enumerate(output):
            pt_1 = pt_1s[i]
            pt_2 = pt_2s[i]

            # draw bounding box
            cv2.rectangle(orig_img, pt_1, pt_2, color, thickness=2)

            # get str text size
            txt_size = cv2.getTextSize(
                label, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0]
            # pt_2 = pt_1[0] + txt_size[0] + 3, pt_1[1] + txt_size[1] + 5
            pt_2 = pt_1[0] + txt_size[0] + 3, pt_1[1] - txt_size[1] - 5

            # draw text background rect
            cv2.rectangle(orig_img, pt_1, pt_2, color, thickness=-1)  # text

            # draw text
            cv2.putText(orig_img, labels[i], (pt_1[0], pt_1[1]),  # pt_1[1] + txt_size[1] + 4
                        cv2.FONT_HERSHEY_PLAIN, 2, [225, 255, 255], 2)

    def process_predict(self,
                        prediction,
                        prob_th,
                        num_cls,
                        nms_th,
                        inp_dim,
                        orig_img_size):
        """
        processing detections
        """
        scaling_factor = min([inp_dim / float(x)
                              for x in orig_img_size])  # W, H scaling factor
        output = post_process(prediction,
                              prob_th,
                              num_cls,
                              nms=True,
                              nms_conf=nms_th,
                              CUDA=True)  # post-process such as nms

        if type(output) != int:
            output[:, [1, 3]] -= (inp_dim - scaling_factor *
                                  orig_img_size[0]) / 2.0  # x, w
            output[:, [2, 4]] -= (inp_dim - scaling_factor *
                                  orig_img_size[1]) / 2.0  # y, h
            output[:, 1:5] /= scaling_factor
            for i in range(output.shape[0]):
                output[i, [1, 3]] = torch.clamp(
                    output[i, [1, 3]], 0.0, orig_img_size[0])
                output[i, [2, 4]] = torch.clamp(
                    output[i, [2, 4]], 0.0, orig_img_size[1])
        return output

    def detect_classify(self):
        """
        detect and classify
        """
        for x in self.imgs_path:
            # read image data
            img = Image.open(x)
            img2det = process_img(img, self.inp_dim)
            img2det = img2det.to(device)  # put image data to device

            # vehicle detection
            prediction = self.detector.forward(img2det, CUDA=True)

            # calculating scaling factor
            orig_img_size = list(img.size)
            output = self.process_predict(prediction,
                                          self.prob_th,
                                          self.num_classes,
                                          self.nms_th,
                                          self.inp_dim,
                                          orig_img_size)

            orig_img = cv2.cvtColor(np.asarray(
                img), cv2.COLOR_RGB2BGR)  # RGB => BGR
            if type(output) != int:
                self.cls_draw_bbox(output, orig_img)
                dst_path = self.dst_dir + '/' + os.path.split(x)[1]
                if not os.path.exists(dst_path):
                    cv2.imwrite(dst_path, orig_img)
Exemple #8
0
class Car_DC():
    def __init__(self,
                 src_dir,
                 dst_dir,
                 car_cfg_path=local_car_cfg_path,
                 car_det_weights_path=local_car_det_weights_path,
                 inp_dim=768,
                 prob_th=0.2,
                 nms_th=0.4,
                 num_classes=1):
        """
        model initialization
        """
        # super parameters
        self.inp_dim = inp_dim
        self.prob_th = prob_th
        self.nms_th = nms_th
        self.num_classes = num_classes
        self.dst_dir = dst_dir

        # clear dst_dir
        if os.path.exists(self.dst_dir):
            for x in os.listdir(self.dst_dir):
                if x.endswith('.jpg'):
                    os.remove(self.dst_dir + '/' + x)
        else:
            os.makedirs(self.dst_dir)

        # initialize vehicle detection model
        self.detector = Darknet(car_cfg_path)
        self.detector.load_weights(car_det_weights_path)
        # set input dimension of image
        self.detector.net_info['height'] = self.inp_dim
        self.detector.to(device)
        self.detector.eval()  # evaluation mode
        print('=> car detection model initiated.')

        # initiate multilabel classifier
        self.classifier = Car_Classifier(num_cls=19,
                                         model_path=local_model_path)

        # initiate imgs_path
        # self.imgs_path = [os.path.join(src_dir, x) for x in os.listdir(src_dir) if x.endswith('.jpg') or x.endswith('.png')]

        # MODIFIED!
        self.imgs_path = [
            os.path.join(src_dir, x) for x in os.listdir(src_dir)
            if x.startswith('set') and x.endswith('_image')
        ]
        self.imgs_path = [
            os.path.join(x, y) for x in self.imgs_path for y in os.listdir(x)
        ]
        self.imgs_path.sort()
        self.imgs_path = [
            os.path.join(x, y) for x in self.imgs_path for y in os.listdir(x)
        ]
        self.imgs_path = [
            os.path.join(x, y) for x in self.imgs_path for y in os.listdir(x)
            if y.endswith('.jpg') or y.endswith('.png')
        ]

    def cls_draw_bbox(self, output, orig_img):
        """
        1. predict vehicle's attributes based on bbox of vehicle
        2. draw bbox to orig_img
        """
        labels = []
        pt_1s = []
        pt_2s = []

        car_color, car_direction, car_type = None, None, None

        # 1
        for det in output:
            if len(det) == 7:
                continue

            # rectangle points
            pt_1 = tuple(det[1:3].int())  # the left-up point
            pt_2 = tuple(det[3:5].int())  # the right down point
            pt_1s.append(pt_1)
            pt_2s.append(pt_2)

            # turn BGR back to RGB
            ROI = Image.fromarray(orig_img[pt_1[1]:pt_2[1],
                                           pt_1[0]:pt_2[0]][:, :, ::-1])
            # # ROI.show()

            # # call classifier to predict
            car_color, car_direction, car_type = self.classifier.predict(ROI)
            label = str(car_color + ' ' + car_direction + ' ' + car_type)
            labels.append(label)
            print('=> predicted label: ', label)
            break

        # 2
        color = (0, 215, 255)
        for i, det in enumerate(output):
            if len(det) == 7:
                continue

            pt_1 = pt_1s[i]
            pt_2 = pt_2s[i]

            # draw bounding box
            cv2.rectangle(orig_img, pt_1, pt_2, color, thickness=2)

            # get str text size
            txt_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0]
            # pt_2 = pt_1[0] + txt_size[0] + 3, pt_1[1] + txt_size[1] + 5
            pt_2 = pt_1[0] + txt_size[0] + 3, pt_1[1] - txt_size[1] - 5

            # # draw text background rect
            cv2.rectangle(orig_img, pt_1, pt_2, color, thickness=-1)  # text

            # draw text
            cv2.putText(
                orig_img,
                labels[i],
                (pt_1[0], pt_1[1]),  # pt_1[1] + txt_size[1] + 4
                cv2.FONT_HERSHEY_PLAIN,
                2,
                [225, 255, 255],
                2)
            break

        return car_color, car_direction, car_type

    def process_predict(self, prediction, prob_th, num_cls, nms_th, inp_dim,
                        orig_img_size):
        """
        processing detections
        """
        scaling_factor = min([inp_dim / float(x)
                              for x in orig_img_size])  # W, H scaling factor

        output = post_process(prediction,
                              prob_th,
                              num_cls,
                              nms=True,
                              nms_conf=nms_th,
                              CUDA=True)  # post-process such as nms

        if type(output) != int:
            output[:,
                   [1, 3]] -= (inp_dim -
                               scaling_factor * orig_img_size[0]) / 2.0  # x, w
            output[:,
                   [2, 4]] -= (inp_dim -
                               scaling_factor * orig_img_size[1]) / 2.0  # y, h
            output[:, 1:5] /= scaling_factor
            for i in range(output.shape[0]):
                output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0,
                                                orig_img_size[0])
                output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0,
                                                orig_img_size[1])

        return output

    def detect_classify(self, query_pair):
        pre_path = ''
        color_dict = {}
        type_dict = {}
        # cars = []
        # all_cars_per_camera = {}
        index_list_all = []
        index_list_per_camera = []

        pre_camera_id = self.imgs_path[0].split('/')[3]

        stream_i = 0
        print("\n\nProcessing stream %d...\n" % stream_i)

        tracklet_i = 0
        """
        detect and classify
        """
        for x in self.imgs_path:
            curr_path = os.path.split(x)[0]

            # read image data
            img = cv2.imread(x)
            img = cv2.copyMakeBorder(img,
                                     BORDER,
                                     BORDER,
                                     BORDER,
                                     BORDER,
                                     cv2.BORDER_CONSTANT,
                                     value=(100, 100, 100))
            img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_RGB2BGR))

            img2det = process_img(img, self.inp_dim)
            img2det = img2det.to(device)  # put image data to device

            # vehicle detection
            prediction = self.detector.forward(img2det, CUDA=True)

            # calculating scaling factor
            orig_img_size = list(img.size)
            output = self.process_predict(prediction, self.prob_th,
                                          self.num_classes, self.nms_th,
                                          self.inp_dim, orig_img_size)

            orig_img = cv2.cvtColor(np.asarray(img),
                                    cv2.COLOR_RGB2BGR)  # RGB => BGR
            if type(output) != int:
                # print('\n', x)
                car_color, car_direction, car_type = self.cls_draw_bbox(
                    output, orig_img)
                dst_path = self.dst_dir + '/' + os.path.split(x)[1]
                # if not os.path.exists(dst_path):
                # cv2.imwrite(dst_path, orig_img)

            if curr_path != pre_path and pre_path != '':
                start_length = os.path.split(os.path.split(pre_path)[0])[1]
                detect_color = max(color_dict, key=color_dict.get)
                detect_type = max(type_dict, key=type_dict.get)
                print("Tracklet %d detects " % tracklet_i, detect_color,
                      detect_type)
                # add_to_all(all_cars_per_camera, detect_color, detect_type)
                compare_query_append(query_pair, detect_color, detect_type,
                                     index_list_per_camera, tracklet_i,
                                     start_length)
                tracklet_i += 1

                color_dict.clear()
                type_dict.clear()

                curr_camera_id = x.split('/')[3]
                if curr_camera_id != pre_camera_id:
                    print("The query result on stream %d:" % stream_i,
                          index_list_per_camera)
                    index_list_all.append(deepcopy(index_list_per_camera))
                    index_list_per_camera.clear()

                    pre_camera_id = curr_camera_id

                    stream_i += 1
                    tracklet_i = 0
                    print("\n\nProcessing stream %d...\n" % stream_i)

            if car_color != None:
                if car_color not in color_dict:
                    color_dict[car_color] = 0
                color_dict[car_color] += 1

            if car_type != None:
                if car_type not in type_dict:
                    type_dict[car_type] = 0
                type_dict[car_type] += 1

            pre_path = curr_path

        # add the last one
        if pre_path != '':
            start_length = os.path.split(os.path.split(pre_path)[0])[1]
            detect_color = max(color_dict, key=color_dict.get)
            detect_type = max(type_dict, key=type_dict.get)
            print("Tracklet %d detects " % tracklet_i, detect_color,
                  detect_type)
            compare_query_append(query_pair, detect_color, detect_type,
                                 index_list_per_camera, tracklet_i,
                                 start_length)
            # print(all_cars_per_camera)
            color_dict.clear()
            type_dict.clear()

            print("The query result on stream %d:" % stream_i,
                  index_list_per_camera)
            index_list_all.append(deepcopy(index_list_per_camera))

        return index_list_all
Exemple #9
0
    #load the image
    """
    这里得到的batch是包含batch_size个数(最后一组除外)的图片信息。
    """
    start = time.time()
    if CUDA:

        batch = batch.cuda()
        # grad好像是代表梯度,但是我还不明白为啥这块这么写,应该是torch的原因
    with torch.no_grad():
        # 调用Darknet类的forward函数
        """
        这块返回的是三维的prediction 
        分别是[  batch_size  ,   每张图片所有预测的边框数  ,  每个bounding box的的属性(85列)  ]
        """
        prediction = model.forward(Variable(batch), CUDA)

    prediction = write_results(prediction,
                               confidence,
                               num_classes,
                               nms_conf=nms_thesh)
    """
    这里返回的就是2维的了。进过NMS处理过后的prediction。维度信息分别是
    [本次batch_size个图片所有的bounding box数,每个bounding box的维度信息(batch_size内图片索引,***左上角坐标***,***右下角坐标***,置信度,属于这个类别的置信度,属于那个类别)]
    """

    end = time.time()
    # If the output of the write_results function for batch is an int(0),
    # meaning there is no detection, we use continue to skip the rest loop.
    if type(prediction) == int:
Exemple #10
0
# change resolution
# cap = pic.set(3,680)
# cap = pic.set(4,480)

while True:
    _, image = cap.read()

    h, w = image.shape[:2]
    blob = cv2.dnn.blobFromImage(image,
                                 1 / 255.0, (416, 416),
                                 swapRB=True,
                                 crop=False)
    net.setInput(blob)
    start = time.perf_counter()
    layer_outputs = net.forward(ln)
    time_took = time.perf_counter() - start
    #print("Time took:", time_took)
    print("FPS: ", 1 / time_took)
    boxes, confidences, class_ids = [], [], []

    # loop over each of the layer outputs
    for output in layer_outputs:
        # loop over each of the object detections
        for detection in output:
            # extract the class id (label) and confidence (as a probability) of
            # the current object detection
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            # discard weak predictions by ensuring the detected
Exemple #11
0
class Car_DC():
    def __init__(self,
                 src_dir,
                 dst_dir,
                 car_cfg_path=local_car_cfg_path,
                 car_det_weights_path=local_car_det_weights_path,
                 inp_dim=768,
                 prob_th=0.2,
                 nms_th=0.4,
                 num_classes=1):
        """
        model initialization
        """
        # super parameters
        self.inp_dim = inp_dim
        self.prob_th = prob_th
        self.nms_th = nms_th
        self.num_classes = num_classes
        self.dst_dir = dst_dir

        # clear dst_dir
        if os.path.exists(self.dst_dir):
            for x in os.listdir(self.dst_dir):
                if x.endswith('.jpg'):
                    os.remove(self.dst_dir + '/' + x)
        else:
            os.makedirs(self.dst_dir)

        # initialize vehicle detection model
        self.detector = Darknet(car_cfg_path)
        self.detector.load_weights(car_det_weights_path)
        # set input dimension of image
        self.detector.net_info['height'] = self.inp_dim
        self.detector.to(device)
        self.detector.eval()  # evaluation mode
        #print('=> car detection model initiated.')

        # initiate multilabel classifier
        self.classifier = Car_Classifier(num_cls=19,
                                         model_path=local_model_path)

        # initiate imgs_path
        self.imgs_path = [os.path.join(src_dir, x) for x in os.listdir(
            src_dir) if x.endswith('.jpg')]

        aaa = 1

    def cls_draw_bbox(self, output, orig_img):
        """
        1. predict vehicle's attributes based on bbox of vehicle
        2. draw bbox to orig_img
        """
        labels = []
        pt_1s = []
        pt_2s = []

        # 1
        for det in output:
            # rectangle points
            pt_1 = tuple(det[1:3].int())  # the left-up point
            pt_2 = tuple(det[3:5].int())  # the right down point
            pt_1s.append(pt_1)
            pt_2s.append(pt_2)

            # turn BGR back to RGB
            ROI = Image.fromarray(
                orig_img[pt_1[1]: pt_2[1],
                         pt_1[0]: pt_2[0]][:, :, ::-1])
            # ROI.show()

            # call classifier to predict
            car_color, car_direction, car_type = self.classifier.predict(ROI)
            label = str(car_color + ' ' + car_direction + ' ' + car_type)
            labels.append(label)
            print('=> predicted label: ', label)

        # 2
        color = (0, 215, 255)
        for i, det in enumerate(output):
            pt_1 = pt_1s[i]
            pt_2 = pt_2s[i]

            # draw bounding box
            cv2.rectangle(orig_img, pt_1, pt_2, color, thickness=2)

            # get str text size
            txt_size = cv2.getTextSize(
                label, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0]
            # pt_2 = pt_1[0] + txt_size[0] + 3, pt_1[1] + txt_size[1] + 5
            pt_2 = pt_1[0] + txt_size[0] + 3, pt_1[1] - txt_size[1] - 5

            # draw text background rect
            cv2.rectangle(orig_img, pt_1, pt_2, color, thickness=-1)  # text

            # draw text
            cv2.putText(orig_img, labels[i], (pt_1[0], pt_1[1]),  # pt_1[1] + txt_size[1] + 4
                        cv2.FONT_HERSHEY_PLAIN, 2, [225, 255, 255], 2)

    def process_predict(self,
                        prediction,
                        prob_th,
                        num_cls,
                        nms_th,
                        inp_dim,
                        orig_img_size):
        """
        processing detections
        """
        scaling_factor = min([inp_dim / float(x)
                              for x in orig_img_size])  # W, H scaling factor
        output = post_process(prediction,
                              prob_th,
                              num_cls,
                              nms=True,
                              nms_conf=nms_th,
                              CUDA=True)  # post-process such as nms

        if type(output) != int:

            output[:, [1, 3]] -= (inp_dim - scaling_factor *
                                  orig_img_size[0]) / 2.0  # x, w
            output[:, [2, 4]] -= (inp_dim - scaling_factor *
                                  orig_img_size[1]) / 2.0  # y, h
            output[:, 1:5] /= scaling_factor
            for i in range(output.shape[0]):
                output[i, [1, 3]] = torch.clamp(
                    output[i, [1, 3]], 0.0, orig_img_size[0])
                output[i, [2, 4]] = torch.clamp(
                    output[i, [2, 4]], 0.0, orig_img_size[1])

        #只识别较大的车
        tmp = 0
        list_width = []
        if(len(output) == 1):
            return output
        else:
            while(tmp<len(output)):
                list_width.append(output[:, 3].tolist()[tmp] - output[:, 1].tolist()[tmp])
                tmp+=1
            max_width = max(list_width)                           #最大宽度
            max_index =np.argmax(np.array(max_width))             #最大索引
            output = output[max_index].unsqueeze(0)               #output上升一级维度

            return output

    def detect_classify(self):
        """
        detect and classify
        """
        for x in self.imgs_path:
            # read image data
            img = Image.open(x)
            img2det = process_img(img, self.inp_dim)
            img2det = img2det.to(device)  # put image data to device

            # vehicle detection
            prediction = self.detector.forward(img2det, CUDA=True)

            # calculating scaling factor
            orig_img_size = list(img.size)
            output = self.process_predict(prediction,
                                          self.prob_th,
                                          self.num_classes,
                                          self.nms_th,
                                          self.inp_dim,
                                          orig_img_size)

            #orig_img = cv2.cvtColor(np.asarray(
                #img), cv2.COLOR_RGB2BGR)  # RGB => BGR
            #if type(output) != int:
                #self.cls_draw_bbox(output, orig_img)
                #dst_path = self.dst_dir + '/' + os.path.split(x)[1]
                #if not os.path.exists(dst_path):
                    #cv2.imwrite(dst_path, orig_img)

            # [left-up(x),left-up(y),right-down(x),right-down(y)] --当检测到的目标大于一个
            x_left = output[:, 1].item()
            y_left = output[:, 2].item()
            x_right = output[:, 3].item()
            y_right = output[:, 4].item()

            # centriod[x,y]
            x_centriod = (x_left + x_right) / 2
            y_centriod = (y_left + y_right) / 2
            w_rect = x_right - x_left
            h_rect = y_right - y_left

            # 4 corners point
            x_leftup = x_centriod - w_rect/2
            y_leftup = y_centriod - h_rect/2
            x_leftdown = x_centriod - w_rect / 2
            y_leftdown = y_centriod + h_rect / 2
            x_rightup = x_centriod + w_rect / 2
            y_rightup = y_centriod - h_rect / 2
            x_rightdown = x_centriod + w_rect / 2
            y_rightdown = y_centriod + h_rect / 2

            # new lists to deposit the 4 corners point
            leftup = [int(x_leftup), int(y_leftup)]
            leftdown = [int(x_leftdown), int(y_leftdown)]
            rightup = [int(x_rightup), int(y_rightup)]
            rightdown = [int(x_rightdown), int(y_rightdown)]

            return leftup, leftdown, rightup, rightdown
Exemple #12
0
class Car_DC():
    def __init__(self,
                 src_dir,
                 dst_dir,
                 car_cfg_path=local_car_cfg_path,
                 car_det_weights_path=local_car_det_weights_path,
                 inp_dim=768,
                 prob_th=0.2,
                 nms_th=0.4,
                 num_classes=1):
        """
        model initialization
        """
        # super parameters
        self.inp_dim = inp_dim
        self.prob_th = prob_th
        self.nms_th = nms_th
        self.num_classes = num_classes
        self.dst_dir = dst_dir

        # clear dst_dir
        if os.path.exists(self.dst_dir):
            for x in os.listdir(self.dst_dir):
                if x.endswith('.jpg'):
                    os.remove(self.dst_dir + '/' + x)
        else:
            os.makedirs(self.dst_dir)

        # initialize vehicle detection model
        self.detector = Darknet(car_cfg_path)
        self.detector.load_weights(car_det_weights_path)
        # set input dimension of image
        self.detector.net_info['height'] = self.inp_dim
        self.detector.to(device)
        self.detector.eval()  # evaluation mode
        print('=> car detection model initiated.')

        # initiate multilabel classifier
        self.classifier = CarClassifier(num_cls=19,
                                        model_path=local_model_path)

        # initiate imgs_path
        self.imgs_path = [
            os.path.join(src_dir, x) for x in os.listdir(src_dir)
            if x.endswith('.jpg')
        ]

    def cls_draw_bbox_write(self, output, orig_img, imgobj, img_path):
        """
        1. predict vehicle's attributes based on bbox of vehicle
        2. draw bbox to orig_img
        """
        labels = []
        pt_1s = []
        pt_2s = []

        # 1
        for det in output:
            # rectangle points
            pt_1 = tuple(det[1:3].int())  # the left-up point
            pt_2 = tuple(det[3:5].int())  # the right down point
            pt_1s.append(pt_1)
            pt_2s.append(pt_2)

            try:
                # turn BGR back to RGB
                ROI = Image.fromarray(orig_img[pt_1[1]:pt_2[1],
                                               pt_1[0]:pt_2[0]][:, :, ::-1])
                # ROI.show()
                # call classifier to predict
                car_color, car_direction, car_type = self.classifier.predict(
                    ROI)
                label = str(car_color + ' ' + car_direction + ' ' + car_type)
                labels.append(label)
                print('=> predicted label: ', label)
            except:
                print('no detected area')
                return

        # 2
        color = (0, 215, 255)  # 框的颜色
        for i, det in enumerate(output):
            pt_1 = pt_1s[i]
            pt_2 = pt_2s[i]

            # draw bounding box
            cv2.rectangle(orig_img, pt_1, pt_2, color, thickness=2)
            img_temp = imgobj[pt_1[1]:pt_2[1], pt_1[0]:pt_2[0]]
            dst_path = self.dst_dir + '/' + os.path.split(
                img_path)[1] + labels[i] + str(i) + '.jpg'
            if not os.path.exists(dst_path):
                cv2.imwrite(dst_path, img_temp)

            # get str text size
            txt_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0]
            # pt_2 = pt_1[0] + txt_size[0] + 3, pt_1[1] + txt_size[1] + 5
            pt_2 = pt_1[0] + txt_size[0] + 3, pt_1[1] - txt_size[1] - 5

            # draw text background rect
            cv2.rectangle(orig_img, pt_1, pt_2, color, thickness=-1)  # text

            # draw text
            cv2.putText(
                orig_img,
                labels[i],
                (pt_1[0], pt_1[1]),  # pt_1[1] + txt_size[1] + 4
                cv2.FONT_HERSHEY_PLAIN,
                2,
                [225, 255, 255],
                2)
        return labels

    def cls_draw_bbox(self, output, orig_img):
        """
        1. predict vehicle's attributes based on bbox of vehicle
        2. draw bbox to orig_img
        """
        labels = []
        pt_1s = []
        pt_2s = []

        # 1
        for det in output:
            # rectangle points
            pt_1 = tuple(det[1:3].int())  # the left-up point
            pt_2 = tuple(det[3:5].int())  # the right down point
            pt_1s.append(pt_1)
            pt_2s.append(pt_2)

            try:
                # turn BGR back to RGB
                ROI = Image.fromarray(orig_img[pt_1[1]:pt_2[1],
                                               pt_1[0]:pt_2[0]][:, :, ::-1])
                # ROI.show()
                # call classifier to predict
                car_color, car_direction, car_type = self.classifier.predict(
                    ROI)
                label = str(car_color + ' ' + car_direction + ' ' + car_type)
                labels.append(label)
                # print('=> predicted label: ', label)
            except:
                print('no detected area')
                return
        # 2
        color = (0, 215, 255)
        for i, det in enumerate(output):
            pt_1 = pt_1s[i]
            pt_2 = pt_2s[i]

            # draw bounding box
            cv2.rectangle(orig_img, pt_1, pt_2, color, thickness=2)

            # get str text size
            txt_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0]
            # pt_2 = pt_1[0] + txt_size[0] + 3, pt_1[1] + txt_size[1] + 5
            pt_2 = pt_1[0] + txt_size[0] + 3, pt_1[1] - txt_size[1] - 5

            # draw text background rect
            cv2.rectangle(orig_img, pt_1, pt_2, color, thickness=-1)  # text

            # draw text
            cv2.putText(
                orig_img,
                labels[i],
                (pt_1[0], pt_1[1]),  # pt_1[1] + txt_size[1] + 4
                cv2.FONT_HERSHEY_PLAIN,
                2,
                [225, 255, 255],
                2)

    def process_predict(self, prediction, prob_th, num_cls, nms_th, inp_dim,
                        orig_img_size):
        """
        processing detections
        """
        scaling_factor = min([inp_dim / float(x)
                              for x in orig_img_size])  # W, H scaling factor
        output = post_process(prediction,
                              prob_th,
                              num_cls,
                              nms=True,
                              nms_conf=nms_th,
                              CUDA=use_cuda)  # post-process such as nms

        if type(output) != int:
            output[:,
                   [1, 3]] -= (inp_dim -
                               scaling_factor * orig_img_size[0]) / 2.0  # x, w
            output[:,
                   [2, 4]] -= (inp_dim -
                               scaling_factor * orig_img_size[1]) / 2.0  # y, h
            output[:, 1:5] /= scaling_factor
            for i in range(output.shape[0]):
                output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0,
                                                orig_img_size[0])
                output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0,
                                                orig_img_size[1])
        return output

    def detect_classify(self):
        """
        detect and classify
        """
        for x in self.imgs_path:
            # read image data
            img = Image.open(x)
            imgobj = cv2.imread(x)
            img2det = process_img(img, self.inp_dim)
            img2det = img2det.to(device)  # put image data to device

            # vehicle detection
            prediction = self.detector.forward(img2det, CUDA=use_cuda)

            # calculating scaling factor
            orig_img_size = list(img.size)
            output = self.process_predict(prediction, self.prob_th,
                                          self.num_classes, self.nms_th,
                                          self.inp_dim, orig_img_size)

            orig_img = cv2.cvtColor(np.asarray(img),
                                    cv2.COLOR_RGB2BGR)  # RGB => BGR
            if type(output) != int:
                car_info = self.cls_draw_bbox_write(output, orig_img, imgobj,
                                                    x)
                dst_path = self.dst_dir + '/' + os.path.split(x)[1]
                if not os.path.exists(dst_path):
                    cv2.imwrite(dst_path, orig_img)
                return len(car_info)
            else:
                return 0

    def detect_classify_video(self, video_path, res_path):
        """
        detect in video frames
        """
        # myvideo = cv2.VideoCapture(video_path)
        # retval = cv2.VideoCapture.grab()
        # 获得视频的格式
        videoCapture = cv2.VideoCapture(video_path)

        # 获得码率及尺寸
        fps = videoCapture.get(cv2.CAP_PROP_FPS)
        size = (int(videoCapture.get(cv2.CAP_PROP_FRAME_WIDTH)),
                int(videoCapture.get(cv2.CAP_PROP_FRAME_HEIGHT)))

        # 指定写视频的格式, I420-avi, MJPG-mp4
        videoWriter = cv2.VideoWriter(
            res_path, cv2.VideoWriter_fourcc('X', 'V', 'I', 'D'), fps, size)

        # 读帧
        success, frame = videoCapture.read()

        while success:
            # cv2.imshow("Oto Video", frame) #显示
            cv2.waitKey(int(1000 / int(fps)))  # 延迟
            # 检测图片
            img = cv2.cvtColor(np.asarray(frame), cv2.COLOR_RGB2BGR)
            img2det = process_img(img, self.inp_dim)
            img2det = img2det.to(device)  # put image data to device

            # vehicle detection
            prediction = self.detector.forward(img2det, CUDA=use_cuda)

            # calculating scaling factor
            orig_img_size = list(size)
            output = self.process_predict(prediction, self.prob_th,
                                          self.num_classes, self.nms_th,
                                          self.inp_dim, orig_img_size)

            orig_img = cv2.cvtColor(np.asarray(img),
                                    cv2.COLOR_RGB2BGR)  # RGB => BGR
            if type(output) != int:
                self.cls_draw_bbox(output, orig_img)
                videoWriter.write(orig_img)  # 写视频帧

            success, frame = videoCapture.read()  # 获取下一帧
class Car_DR():
    def __init__(self,
                 src_dir,
                 dst_dir,
                 car_cfg_path='./car.cfg',
                 car_det_weights_path='g:/Car_DR/car_360000.weights',
                 inp_dim=768,
                 prob_th=0.2,
                 nms_th=0.4,
                 num_classes=1):
        """
        模型初始化
        """
        # 超参数
        self.inp_dim = inp_dim
        self.prob_th = prob_th
        self.nms_th = nms_th
        self.num_classes = num_classes
        self.dst_dir = dst_dir

        # 清空dst_dir
        if os.path.exists(self.dst_dir):
            for x in os.listdir(self.dst_dir):
                if x.endswith('.jpg'):
                    os.remove(self.dst_dir + '/' + x)
        else:
            os.makedirs(self.dst_dir)

        # 初始化车辆检测模型及参数
        self.Net = Darknet(car_cfg_path)
        self.Net.load_weights(car_det_weights_path)
        self.Net.net_info['height'] = self.inp_dim  # 车辆检测输入分辨率
        self.Net.to(device)
        self.Net.eval()  # 测试模式
        print('=> car detection model initiated.')

        # 初始化车辆多标签分类管理器
        self.manager = Manager(model_path=model_path, attrib_path=attrib_path)

        # 统计src_dir文件
        self.imgs_path = [
            os.path.join(src_dir, x) for x in os.listdir(src_dir)
            if x.endswith('.jpg')
        ]

    def cls_draw_bbox(self, output, orig_img):
        """
        orig_img是通过opencv读取的numpy array格式: 通道顺序BGR
        在bbox基础上预测车辆属性
        将bbox绘制到原图上
        """
        labels = []
        pt_1s = []
        pt_2s = []

        # 获取车辆属性labels
        for det in output:
            # rectangle points
            pt_1 = tuple(det[1:3].int())  # the left-up point
            pt_2 = tuple(det[3:5].int())  # the right down point
            pt_1s.append(pt_1)
            pt_2s.append(pt_2)

            # 调用分类器预测车辆属性: BGR => RGB
            ROI = Image.fromarray(orig_img[pt_1[1]:pt_2[1],
                                           pt_1[0]:pt_2[0]][:, :, ::-1])
            # ROI.show()

            car_color, car_direction, car_type = self.manager.predict(ROI)
            label = str(car_color + ' ' + car_direction + ' ' + car_type)
            labels.append(label)
            print('=> predicted label: ', label)

        # 将bbox绘制到原图
        color = (0, 215, 255)
        for i, det in enumerate(output):
            pt_1 = pt_1s[i]
            pt_2 = pt_2s[i]

            # 绘制bounding box
            cv2.rectangle(orig_img, pt_1, pt_2, color, thickness=2)

            # 获取文本大小
            txt_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2,
                                       2)[0]  # 文字大小
            # pt_2 = pt_1[0] + txt_size[0] + 3, pt_1[1] + txt_size[1] + 5
            pt_2 = pt_1[0] + txt_size[0] + 3, pt_1[1] - txt_size[1] - 5

            # 绘制文本底色矩形
            cv2.rectangle(orig_img, pt_1, pt_2, color, thickness=-1)  # text

            # 绘制文本
            cv2.putText(
                orig_img,
                labels[i],
                (pt_1[0], pt_1[1]),  # pt_1[1] + txt_size[1] + 4
                cv2.FONT_HERSHEY_PLAIN,
                2,
                [225, 255, 255],
                2)

    def cls_and_draw(self, output, orig_img):
        """
        orig_img是PIL Image图像格式
        在bbox基础上预测车辆属性
        将bbox绘制到原图上
        """
        labels = []
        x_ys = []
        w_hs = []

        # 获取车辆属性labels
        for det in output:
            # rectangle
            x_y = tuple(det[1:3].int())  # x, y
            w_h = tuple(det[3:5].int())  # w, h
            x_ys.append(x_y)
            w_hs.append(w_h)

            # 调用分类器预测车辆属性: BGR => RGB
            box = (int(x_y[0]), int(x_y[1]), int(x_y[0] + w_h[0]),
                   int(x_y[1] + w_h[1]))  # left, upper, right, lower
            ROI = orig_img.crop(box)

            car_color, car_direction, car_type = self.manager.predict(ROI)
            label = car_color + ' ' + car_direction + ' ' + car_type
            print('=> label: ', label)
            labels.append(label)

        # 将bbox绘制到原图
        for i, det in enumerate(output):
            x_y = x_ys[i]
            w_h = w_hs[i]

            color = (0, 215, 255)
            cv2.rectangle(np.asarray(orig_img), x_y, w_h, color,
                          thickness=2)  # bounding box

            txt_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2,
                                       2)[0]  # 文字大小
            w_h = x_y[0] + txt_size[0] + 4, x_y[1] + txt_size[1] + 4
            cv2.rectangle(np.asarray(orig_img), x_y, w_h, color,
                          thickness=-1)  # text
            cv2.putText(np.asarray(orig_img), labels[i],
                        (x_y[0], x_y[1] + txt_size[1] + 4),
                        cv2.FONT_HERSHEY_PLAIN, 2, [225, 255, 255], 2)

    def predict(self):
        """
        批量检测和识别, 将检测, 识别结果输出到dst_dir
        """
        for x in self.imgs_path:
            # 读取图像数据
            img = Image.open(x)
            img2det = process_img(img, self.inp_dim)
            img2det = img2det.to(device)  # 图像数据放到device

            # 车辆检测
            prediction = self.Net.forward(img2det, CUDA=True)

            # 计算scaling factor
            orig_img_size = list(img.size)
            output = process_predict(prediction, self.prob_th,
                                     self.num_classes, self.nms_th,
                                     self.inp_dim, orig_img_size)

            orig_img = cv2.cvtColor(np.asarray(img),
                                    cv2.COLOR_RGB2BGR)  # RGB => BGR
            if type(output) != int:
                # 将检测框bbox绘制到原图上
                # draw_car_bbox(output, orig_img)
                self.cls_draw_bbox(output, orig_img)
                # self.cls_and_draw(output, img)
                dst_path = self.dst_dir + '/' + os.path.split(x)[1]
                if not os.path.exists(dst_path):
                    cv2.imwrite(dst_path, orig_img)