Esempio n. 1
0
def process_detections(det, img_shape, img0):
    """Process detections."""
    output_dict = {"shellfishDetection": list()}
    gn = torch.tensor(img0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
    if det is not None and len(det):
        # Rescale boxes from img_size to img0 size
        det[:, :4] = scale_coords(img_shape, det[:, :4], img0.shape).round()

        # Write results
        for *xyxy, conf, cls in reversed(det):
            xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                    gn).view(-1).tolist()  # normalized xywh
            output_dict["shellfishDetection"].append({
                "boundingPoly": {
                    "normalizedVertices": [{
                        "x": xywh[0],
                        "y": xywh[1],
                        "width": xywh[2],
                        "height": xywh[3],
                    }]
                },
                "name":
                NAMES[int(cls)],
                "score":
                float(conf.numpy()),
            })

            label = '%s %.2f' % (NAMES[int(cls)], conf)
            plot_one_box(xyxy,
                         img0,
                         label=label,
                         color=COLORS[int(cls)],
                         line_thickness=3)
    return output_dict
def corner_detection(corner_model, imgple, im0, xyxy, colors, cls, c1, c2):
    imgple_ori = deepcopy(imgple)
    imgple = cv2.cvtColor(imgple, cv2.COLOR_BGR2RGB)
    h_ple, w_ple, _ = imgple.shape
    imgple = Image.fromarray(imgple).convert('RGB')
    imgple = test_data_transforms(imgple)
    if torch.cuda.is_available():
        imgple = imgple.unsqueeze(0).cuda()
    else:
        imgple = imgple.unsqueeze(0)
    output = corner_model(imgple)
    luc_x, luc_y, ldc_x, ldc_y, rdc_x, rdc_y, ruc_x, ruc_y = tuple(
        output.detach().cpu().numpy()[0])
    luc_xo, luc_yo, ldc_xo, ldc_yo, rdc_xo, rdc_yo, ruc_xo, ruc_yo = int(luc_x * w_ple), int(luc_y * h_ple), \
                                                                     int(ldc_x * w_ple), int(ldc_y * h_ple), \
                                                                     int(rdc_x * w_ple), int(rdc_y * h_ple), \
                                                                     int(ruc_x * w_ple), int(ruc_y * h_ple)
    luc_x, luc_y, ldc_x, ldc_y, rdc_x, rdc_y, ruc_x, ruc_y = int(luc_x * w_ple + c1[0]), int(luc_y * h_ple + c1[1]), \
                                                             int(ldc_x * w_ple + c1[0]), int(ldc_y * h_ple + c1[1]), \
                                                             int(rdc_x * w_ple + c1[0]), int(rdc_y * h_ple + c1[1]), \
                                                             int(ruc_x * w_ple + c1[0]), int(ruc_y * h_ple + c1[1])
    cv2.circle(im0, (rdc_x, rdc_y), 3, [255, 0, 0], 1)
    cv2.circle(im0, (ldc_x, ldc_y), 3, [255, 0, 0], 1)
    cv2.circle(im0, (luc_x, luc_y), 3, [255, 0, 0], 1)
    cv2.circle(im0, (ruc_x, ruc_y), 3, [255, 0, 0], 1)
    mean_width = 190  # np.mean(widths)
    mean_height = 60  # np.mean(height)
    plot_one_box(xyxy, im0, color=colors[int(cls)], line_thickness=3)
    start_points = [[luc_xo, luc_yo], [ruc_xo, ruc_yo], [ldc_xo, ldc_yo],
                    [rdc_xo, rdc_yo]]
    warp_img = warp(mean_height, mean_width, start_points, imgple_ori)
    return warp_img, im0
def main_process(input_img):
	img0 = input_img.copy()

	img = letterbox(img0, new_shape=imgsz)[0]
	img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
	img = np.ascontiguousarray(img)

	img = torch.from_numpy(img).to(device)
	img = img.half() if half else img.float()
	img /= 255.0
	if img.ndimension() == 3:
		img = img.unsqueeze(0)

	t1 = time_synchronized()
	pred = model(img, augment=True)[0]
	pred = non_max_suppression(pred, my_confidence, my_threshold, classes=my_filterclasses, agnostic=None)
	t2 = time_synchronized()

	total = 0
	for i, det in enumerate(pred):
		gn = torch.tensor(img0.shape)[[1, 0, 1, 0]]
		if det is not None and len(det):
			det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img0.shape).round()
			for *xyxy, conf, cls in reversed(det):
				xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()
				label = '%sbaht (%.0f%%)' % (names[int(cls)], conf*100)
				total += int(names[int(cls)])
				plot_one_box(xyxy, img0, label=label, color=colors[int(cls)], line_thickness=3)
				print(label)
	print('Done. (%.3fs)' % (t2 - t1))
	
	# cv2.rectangle(img0,(0,10),(250,90),(0,0,0),-1)
	img0 = cv2.putText(img0, "total "+str(total)+" Baht", (10,45+30*3), cv2.FONT_HERSHEY_DUPLEX, 1, (0,0,255), 2)
	
	return img0
def objectdetect(frame, count):
    dict_object = {}
    img = letterbox(frame, new_shape=imgsz)[0]
    img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
    img = np.ascontiguousarray(img)
    img = torch.from_numpy(img).to(device)
    img = img.half() if half else img.float()  # uint8 to fp16/32
    img /= 255.0
    if img.ndimension() == 3:
        img = img.unsqueeze(0)
    pred = model(img, augment=opt.augment)[0]
    pred = non_max_suppression(pred,
                               opt.conf_thres,
                               opt.iou_thres,
                               classes=opt.classes)
    for index, detect in enumerate(pred):
        if detect is not None and len(detect):
            # Rescale boxes from img_size to im0 size
            detect[:, :4] = scale_coords(img.shape[2:], detect[:, :4],
                                         frame.shape).round()
            for *xyxy, conf, cls in detect:
                label = names[int(cls)]
                x1, y1, x2, y2 = int(xyxy[0]), int(xyxy[1]), int(xyxy[2]), int(
                    xyxy[3])
                dict_object[label] = frame[y1:y2, x1:x2]
                plot_one_box(xyxy,
                             frame,
                             label=label,
                             color=colors[int(cls)],
                             line_thickness=3)
    cv2.imwrite(f'images/frame{count}.jpg', frame)
    plt.imshow(frame)
    plt.show()
    return dict_object
def webcam_out(q1, q2, q3, q4):
    score = 0
    Before_flag = False
    while True:
        if not q1.empty():
            frame = q1.get()
            if not q2.empty():
                q2num, label, colors, name = q2.get()
                q4.put(q2num)
            if not q3.empty():
                poses, num = q3.get()
            try:
                frame2 = frame[num[1]:num[3], num[0]:num[2]].copy()
                canvas, score, Before_flag, status = draw_person_pose(
                    frame2, poses, score, Before_flag, name)
                frame[num[1]:num[3], num[0]:num[2]] = canvas
            except:
                pass
            try:
                plot_one_box(q2num,
                             frame,
                             label=label,
                             color=colors,
                             line_thickness=3)
            except:
                pass
            cv2.imshow("webcam", frame)
        if cv2.waitKey(1) > 0: break
Esempio n. 6
0
def draw_bbox(img, pred, boxes):

    img_c = img.copy()
    if boxes.shape != torch.Size([0]):
        for box in boxes:

            x1 = int((box[1] - box[3] // 2))
            y1 = int((box[2] - box[4] // 2))
            x2 = int((box[1] + box[3] // 2))
            y2 = int((box[2] + box[4] // 2))
            cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 0), 2)
    if pred != None:
        for box in pred:
            new_line = None
            x1 = int(box[0])
            y1 = int(box[1])
            x2 = int(box[2])
            y2 = int(box[3])
            conf = box[4]
            cls = box[5]
            # if cls == 0:
            # x = int((x2 - x1)/2+x1)
            # y = int((y2 - y1)/2+y1)
            # (x,y1)
            # (x,y2)
            # (x1,y)
            # (x2,y)
            # 四分辅助线
            # cv2.line(img,(x,y1),(x,y2),(114,114,114),2)
            # cv2.line(img,(x1,y),(x2,y),(114,114,114),2)
            # 判断车头车尾
            # for box1 in pred:
            #     cls1 = box1[5]
            #     if cls1 > 5:
            #         x1_ = int(box1[0])
            #         y1_ = int(box1[1])
            #         x2_ = int(box1[2])
            #         y2_ = int(box1[3])
            #         if x1_>x1 and x2_<x2 and y1_>y1 and y2_<y2:
            #             c_y1_ = (y2_-y1_)/2+y1_
            #             d = c_y1_ - y1
            #             if d > (y2 - y1)/2:
            #                 new_line = ' FRONT'
            obj_conf = box[6]
            cls_conf = box[7]
            text = '%s|%.2f' % (names[int(cls)], conf)
            cv2.rectangle(img, (x1, y1), (x2, y2), (0, 0, 255), 2)
            cv2.putText(img, text, (x1, y1 + 20), cv2.FONT_HERSHEY_SIMPLEX,
                        0.75, (0, 0, 255), 2)
            label = '%s %.2f %.5f %.5f' % (names[int(cls)], conf, obj_conf,
                                           cls_conf)
            if new_line:
                label += new_line
            plot_one_box(box,
                         img_c,
                         label=label,
                         color=colors[int(cls)],
                         line_thickness=3)
    return img, img_c
def process_image(transform,processing_model,img):
    global network, class_names, class_colors
    tracks = []
    # imgs = []
    (device,model,names,colors,imgsz) = processing_model
    # view_img = True
    try:
        im0 = img.copy()

        img = letterbox(im0)[0] #, new_shape=(imgsz,imgsz))[0]
        # Convert
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img)

        img = torch.from_numpy(img).to(device)
        
        img = img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        pred = model(img, augment=False)[0]

        # Apply NMS
        pred = non_max_suppression(pred, 0.25, 0.45, classes=0)#, agnostic=opt.agnostic_nms)

        # # Apply Classifier
        # if classify:
        #     pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image

            s = '%g: ' % i

            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):

                    label = '%s %.2f' % (names[int(cls)], conf)
                    plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3)

        img = im0
        tracks = pred
    except Exception as e:
        track = traceback.format_exc()
        print(track)
        print("YOLO 5 Exception",e)
        pass                
    return tracks,img
Esempio n. 8
0
def detect():
    imgsz = check_img_size(512, s=model.stride.max())  # check img_size
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Run inference

    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model_reco(
        img.half() if half else img) if device.type != 'cpu' else None
    _ = model(img.half() if half else img) if device.type != 'cpu' else None
    #if device.type != 'cpu' else None  # run once
    img_list = [
        file for file in os.listdir('test_img') if file.endswith('.jpg')
    ]
    for j in img_list:
        start = time.time()
        new_name = j[:-4] + '.png'
        img0 = cv2.imread('test_img/' + j)
        img = letterbox(img0, new_shape=512)[0]

        # Convert
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img)
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

# Inference
        pred = model(img, augment=False)[0]
        pred = non_max_suppression(pred, 0.4, 0.5, classes=0, agnostic=False)

        # Process detections
        for i, det in enumerate(pred):
            gn = torch.tensor(img0.shape)[[1, 0, 1,
                                           0]]  # normalization gain whwh
            if det is not None and len(det):
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          img0.shape).round()
                for *xyxy, conf, cls in reversed(det):
                    x1, y1, x2, y2 = int(xyxy[0]), int(xyxy[1]), int(
                        xyxy[2]), int(xyxy[3])
                    crop = img0[y1:y2, x1:x2]
                    value = final_test.detect(crop, device, model_reco, half)
                    print(value)
                    plot_one_box(xyxy,
                                 img0,
                                 label=value,
                                 color=colors[int(cls)],
                                 line_thickness=3)
        cv2.imwrite('result/{}'.format(new_name), img0)
        end = time.time()
        print('Time::', end - start)
Esempio n. 9
0
    def prediction(self, frame, sceneId, timeID):
        is_warning = False
        scene = self.sm.get_scene(sceneId)
        img_org = frame.copy()
        img = letterbox(frame, new_shape=640)[0]
        img = img[:, :, ::-1].transpose(2, 0, 1)
        img = np.ascontiguousarray(img)
        img = torch.from_numpy(img).to(self.device).half()
        img /= 255.0
        img = img.unsqueeze(0)
        t1 = time_synchronized()
        # Inference
        pred = self.model(img)[0]
        pred = non_max_suppression(pred, self.conf_thresh, self.iou_thresh)
        t2 = time_synchronized()
        #print('detect inference cost. (%.3fs)' % (t2 - t1))
        # Process detections
        for i, det in enumerate(pred):
	        if det is not None and len(det):
		        det[:, :4] = scale_coords(img.shape[2:], det[:, :4], frame.shape).round()
		        
        if det is not None and len(det):
            for *box, conf, cls in reversed(det):
                label = f'{self.names[int(cls)]} {conf:.2f}'
                c1, c2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
                w_c = int(box[2])-int(box[0])
                h_c = int(box[3])-int(box[1])
                c1_new,c2_new = (int(box[0]), int(box[1]+h_c/6)), (int(box[2]), int(box[1]+h_c*7/12))
                point = (int((box[0]+box[2])/2), int(box[3]))
                b_in_zone = scene.point_warn_zone_test(point)
                if b_in_zone == False:
                    continue
                if self.names[int(cls)] in ['person']:
                    #plot_one_box(box, frame, label=label, color=(0,255,0), line_thickness=3)
                    #frame_crop = frame[c1[1]:c2[1], c1[0]:c2[0]]
                    frame_crop = frame[c1_new[1]:c2_new[1], c1_new[0]:c2_new[0]]
                    is_warning = self.prediction2(frame_crop)
                    plot_one_box(box, frame, label=label, color=(0,255,0), line_thickness=3)
                    cv2.rectangle(frame, c1_new, c2_new, (0,0,255), 3)
                    if is_warning:
                        break
                    
        cv2.polylines(frame, scene.warn_polygons, True, (0, 255, 255), 2)
        if is_warning:
            cv2.putText(frame, "WARNING", (5,30), cv2.FONT_HERSHEY_SIMPLEX, 1,  (0, 0, 255), 2)
            self.write_frame(img_org, sceneId, timeID)
        else:
            cv2.putText(frame, "NORMAL", (5,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

        imshow_name = "image"+str(sceneId)
        frame_resize = cv2.resize(frame, (1280, 720))
        cv2.imshow(imshow_name, frame_resize)
        if cv2.waitKey(1) & 0xFF == (ord('q') or ord('Q')):
            raise Exception("exit")
        return is_warning, frame
Esempio n. 10
0
    def prediction(self, frame, sceneId, timeID, zone):
        is_warning = False
        img_org = frame.copy()
        zone = [zone.reshape(zone.shape[0], 1, zone.shape[1])]
        img = letterbox(frame, new_shape=640)[0]
        img = img[:, :, ::-1].transpose(2, 0, 1)
        img = np.ascontiguousarray(img)
        img = torch.from_numpy(img).to(self.device).half()
        img /= 255.0
        img = img.unsqueeze(0)
        t1 = time_synchronized()
        # Inference
        with torch.no_grad():
            pred = self.model(img)[0]
        pred = non_max_suppression(pred, self.conf_thresh, self.iou_thresh)
        t2 = time_synchronized()
        #print('Vehicle detect inference cost. (%.3fs)' % (t2 - t1))
        # Process detections
        for i, det in enumerate(pred):
            if det is not None and len(det):
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          frame.shape).round()

        if det is not None and len(det):
            for *box, conf, cls in reversed(det):
                label = f'{self.names[int(cls)]} {conf:.2f}'
                c1, c2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
                point = (int((box[0] + box[2]) / 2), int(
                    (box[1] + box[3]) / 2))
                b_in_zone = self.point_zone_test(point, zone)
                if b_in_zone == False:
                    continue
                if self.names[int(cls)] in [
                        'car', 'motorcycle', 'bus', 'truck'
                ]:
                    plot_one_box(box,
                                 frame,
                                 label=label,
                                 color=(0, 0, 255),
                                 line_thickness=3)
                    is_warning = True
        cv2.polylines(frame, zone, True, (0, 255, 255), 2)
        if is_warning:
            cv2.putText(frame, "WARNING", (5, 30), cv2.FONT_HERSHEY_SIMPLEX, 1,
                        (0, 0, 255), 2)
            self.write_frame(img_org, sceneId, timeID)
        else:
            cv2.putText(frame, "NORMAL", (5, 30), cv2.FONT_HERSHEY_SIMPLEX, 1,
                        (0, 255, 0), 2)
        imshow_name = "vehicle" + str(sceneId)
        cv2.imshow(imshow_name, frame)
        if cv2.waitKey(1) & 0xFF == (ord('q') or ord('Q')):
            raise Exception("exit")
        return is_warning, frame
def webcam_out(q1, q2, q3, q4, q5, q6, q7):
    Squat_score, Bench_score, Dead_score = 0, 0, 0
    Squat_Before_flag, Bench_Before_flag, Dead_Before_flag = False, False, False
    fourcc = 'mp4v'  # output video codec
    x = 0
    while True:
        if not q1.empty():
            frame = q1.get()
            if not q2.empty():
                q2num, label, colors, name = q2.get()
                q4.put(q2num)
            if not q3.empty():
                poses, num = q3.get()
            try:
                frame2 = frame[num[1]:num[3], num[0]:num[2]].copy()
                canvas, Squat_score, Bench_score, Dead_score, Squat_Before_flag, Bench_Before_flag, Dead_Before_flag, \
                squat_status, bench_status, dead_status = draw_person_pose(frame2, poses, Squat_score, Bench_score, Dead_score,\
                     Squat_Before_flag, Bench_Before_flag, Dead_Before_flag, name)
                frame[num[1]:num[3], num[0]:num[2]] = canvas
                outq6 = Squat_score, Bench_score, Dead_score
                q6.put(outq6)
            except:
                pass
            try:
                plot_one_box(q2num,
                             frame,
                             label=label,
                             color=colors,
                             line_thickness=3)
            except:
                pass
            q5.put(frame)
        if not q7.empty():
            fps, w, h, source, ret = q7.get()
            try:
                vid_writer
            except UnboundLocalError:
                vid_writer = cv2.VideoWriter(
                    f"{source[:-4]}_pose_estimation.mp4",
                    cv2.VideoWriter_fourcc(*fourcc), fps, (w, h))
            try:
                vid_writer.write(frame)
            except UnboundLocalError:
                pass
            x = 0
        elif q7.empty():
            try:
                vid_writer
                if x == 2000:
                    vid_writer.release()
                    break
            except UnboundLocalError:
                pass
            x += 1
Esempio n. 12
0
def inference(img_path: Path):
    img, size_orig, size = load_image(str(img_path),
                                      img_size=imgsz,
                                      augment=augment)
    # img = cv2.imread(str(img_path))
    if rgb:
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    im0s = img.copy()

    img = torch.from_numpy(img).to(device)
    img = img.permute(2, 0, 1)
    img = img.half() if half else img.float()  # uint8 to fp16/32
    img /= 255.0  # 0 - 255 to 0.0 - 1.0
    if img.ndimension() == 3:
        img = img.unsqueeze(0)

    # Inference
    t1 = time_synchronized()
    pred = model(img, augment=augment)[0]

    # Apply NMS
    pred = non_max_suppression(pred,
                               conf_thres,
                               iou_thres,
                               classes=classes,
                               agnostic=agnostic_nms)
    t2 = time_synchronized()

    torch.cuda.synchronize()
    # INFO: Apply Classifier deleted

    # Process detections
    for i, det in enumerate(pred):  # detections per image
        p, s, im0 = img_path, '', im0s

        s += '%gx%g ' % img.shape[2:]  # print string
        h, w = im0s.shape[:2]
        gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain
        if det is not None and len(det):
            # Rescale boxes from img_size to im0 size
            det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                      im0.shape).round()

            for *xyxy, conf, cls in reversed(det):
                label = '%s %.2f' % (names[int(cls)], conf)
                im0 = plot_one_box(xyxy,
                                   im0,
                                   label=label,
                                   color=colors[int(cls)],
                                   line_thickness=3,
                                   font=font)

        # Print time (inference + NMS)
        logger.info('%sDone. (%.3fs)' % (s, t2 - t1))

    return im0
Esempio n. 13
0
    def __call__(self, trte='test', cf=False):
        p = f'{self.dp}/images/{trte}'

        imgs = [
            f'{self.dp}/images/{trte}/{x}' for x in os.listdir(p)
            if x.endswith('.jpg')
        ]
        imgs = sorted(imgs)
        for i, imgp in enumerate(imgs):
            stem = Path(imgp).stem
            labelp = f'{self.dp}/labels/{trte}/{Path(imgp).stem}.txt'
            img = cv2.imread(imgp)
            h, w, c = img.shape

            with open(labelp, 'r') as f:
                label = f.readlines()
            label = np.array([x.split() for x in label], dtype=np.float32)

            classes = label[:, 0]
            bboxes = label[:, 1::]
            bboxes = xywh2xyxy(bboxes)
            for j in range(len(label)):
                cls = classes[j]
                bbox = bboxes[j]
                bbox[0] *= w
                bbox[1] *= h
                bbox[2] *= w
                bbox[3] *= h
                plot_one_box(bbox,
                             img,
                             label=f'{self.names[int(cls)]}',
                             color=self.colors[int(cls)])

            print(
                f'imgs: {len(imgs)} stem: {stem} img_shape: {img.shape} lb: {label}'
            )
            # cr = np.any(label[:, 0] == 1)
            crit = 'fogged' in stem if cf else True
            if crit:
                cv2.imshow('xx', img)
                if cv2.waitKey(0) == ord('q'):
                    exit()
Esempio n. 14
0
def detect(source, weights, view_img=True, imgsz=640, conf_thres=0.8, iou_thres=0.7, classes=None, agnostic_nms=True,
           focal_distance=0.03, car_height=1.7):
    device = select_device('0')
    half = True
    model = attempt_load(weights, map_location=device)
    model.half()
    dataset = LoadImages(source, img_size=imgsz)
    colors = (0, 0, 255)
    names = model.module.names if hasattr(model, 'module') else model.names

    img = torch.zeros((1, 3, imgsz, imgsz), device=device)
    _ = model(img.half())

    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()
        img /= 255.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)
        pred = model(img, augment=True)[0]
        pred = non_max_suppression(pred, conf_thres, iou_thres, classes=classes, agnostic=agnostic_nms)

        for i, det in enumerate(pred):
            p, s, im0 = path, '', im0s
            s += '%gx%g ' % img.shape[2:]

            if det is not None and len(det):
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
                # print(det)
                # print(det.shape)
                # print(det[0][4])
                # max_det = det[0]
                for *xyxy, conf, cls in reversed(det):
                    if view_img:

                        car_img_height = xyxy[3] - xyxy[1]
                        label = '%s %.2f' % (names[int(cls)], conf)
                        distance = (
                                           focal_distance / car_img_height) * car_height * 10000 - 1 # distance = (f/obj height) * real height
                        result_distance = str(round(distance.item())) + 'm'
                        inner = plot_one_box(xyxy, im0, label=label, color=colors, line_thickness=3)
                        tl = 3 or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1
                        c1, c2 = (int(xyxy[0]), int(xyxy[1])), (int(xyxy[2]), int(xyxy[3]))
                        tf = max(tl - 1, 1)

                        if inner is True:
                            cv2.putText(im0, result_distance, (c1[0] - 15, c1[1] + 75), 0, tl / 3, [225, 255, 255],
                                        thickness=tf,
                                        lineType=cv2.LINE_AA)

            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration
Esempio n. 15
0
def detect(source, save_img=False):
    weights= 'final_weights.pt'
    imgsz = 832
    # Padded resize
    img = letterbox(source, new_shape=imgsz)[0]

    # Convert
    img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
    img = ascontiguousarray(img)

    set_logging()
    device = select_device('')

    model = attempt_load(weights, map_location=device)
    check_img_size(imgsz, s = model.stride.max())

    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0,255) for _ in range(3)] for _ in range(len(names))]

    img = torch.from_numpy(img).to(device)
    img = img.float()
    img /= 255.0
    if img.ndimension() == 3:
        img = img.unsqueeze(0)

    pred = model(img, augment=False)[0]
    pred = non_max_suppression(pred, 0.4, 0.5, classes=None, agnostic=False)

    for i, det in enumerate(pred):

        if det is not None and len(det):
            # Rescale boxes from img_size to im0 size
            det[:, :4] = scale_coords(img.shape[2:], det[:, :4], source.shape).round()

            for *xyxy, conf, cls in reversed(det):
                label = '%s %.2f' % (names[int(cls)], conf)
                plot_one_box(xyxy, source, label=label, color=colors[int(cls)], line_thickness=3)

    # cv2.imshow('abc', source)
    # cv2.waitKey(5000)
    return source
Esempio n. 16
0
    def predict(self, img0, img=None, draw_bndbox=False, bndbox_format='min_max_list'):
        if img is None:
            img = self.send_whatever_to_device(img0)
        else:
            img = self.send_to_device(img)

        pred = self.model(img, augment=self.augment)[0]
        pred = non_max_suppression(pred, self.conf_thres, self.iou_thres, classes=self.classes, agnostic=self.agnostic_nms)

        det = pred[0]

        if det is not None and len(det):
            det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img0.shape).round()

        if draw_bndbox:
            for *xyxy, conf, cls in det:
                label = '%s %.2f' % (self.names[int(cls)], conf)
                plot_one_box(xyxy, img0, label=label, color=self.colors[int(cls)])

        if bndbox_format == 'min_max_list':
            min_max_list = self.min_max_list(det)
            return min_max_list
Esempio n. 17
0
def draw_bbox(img,pred,boxes):
    img_c = img.copy()
    for box in boxes:
        x1 = int((box[1] - box[3] / 2))
        y1 = int((box[2] - box[4] / 2))
        x2 = int((box[1] + box[3] / 2))
        y2 = int((box[2] + box[4] / 2))
        cv2.rectangle(img, (x1, y1), (x2, y2), (255,0,0), 2)
    if pred != None:
        for box in pred:
            x1 = int(box[0])
            y1 = int(box[1])
            x2 = int(box[2])
            y2 = int(box[3])
            conf = box[4]
            cls = box[5]
            obj_conf = box[6]
            cls_conf = box[7]
            text = '%s|%.2f'%(names[int(cls)], conf)
            cv2.rectangle(img, (x1, y1), (x2, y2), (0,0,255), 2)
            cv2.putText(img, text, (x1, y1 + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0,0,255), 2)
            label = '%s %.2f %.2f %.2f' % (names[int(cls)], conf, obj_conf, cls_conf)
            plot_one_box(box, img_c, label=label, color=colors[int(cls)], line_thickness=3)
    return img,img_c
Esempio n. 18
0
def draw_frame(frame_read,
               img,
               pred,
               cls_names,
               colors,
               out_img_name,
               save_img=True):

    # Process detections for per image
    for i, det in enumerate(pred):
        print("det: ", det)
        s_log = ''
        if det is not None and len(det):
            # Rescale boxes from img_size to im_ori size
            det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                      frame_read.shape).round()

            # Print results
            for c in det[:, -1].unique():
                n = (det[:, -1] == c).sum()  # detections per class
                s_log += '%g %ss, ' % (n, cls_names[int(c)])  # add to string

            # Write results
            for *xyxy, conf, cls in reversed(det):
                # Add bbox to image
                label = '%s %.2f' % (cls_names[int(cls)], conf)
                plot_one_box(xyxy,
                             frame_read,
                             label=label,
                             color=colors[int(cls)])

            print('s_log:  %s Done' % (s_log))

            # Save results (image with detections)
            if save_img:
                cv2.imwrite(out_img_name, frame_read)
def detect():
    # Run inference
    pipe = 0
    pipe = 'http://192.168.1.7:8080/video'
    # pipe = 'video/MVI_4381.MOV'
    cap = cv2.VideoCapture(pipe)
    while True:
        ret_val, frame = cap.read()
        img = letterbox(frame, new_shape=imgsz)[0]
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img)
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)
        pred = model(img, augment=opt.augment)[0]
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes)
        for index, detect in enumerate(pred):
            if detect is not None and len(detect):
                # Rescale boxes from img_size to frame size
                detect[:, :4] = scale_coords(img.shape[2:], detect[:, :4],
                                             frame.shape).round()
                for *xyxy, conf, cls in detect:
                    label = names[int(cls)]
                    plot_one_box(xyxy,
                                 frame,
                                 label=label,
                                 color=colors[int(cls)],
                                 line_thickness=2)
        cv2.imshow("ObjectDetect", frame)
        if cv2.waitKey(1) == ord('q'):  # q to quit
            raise StopIteration
Esempio n. 20
0
                    xywh = (xyxy2xywh(torch.tensor(xyxy).view(
                        1, 4))).view(-1).tolist()
                    cls = int(cls)
                    img_object.append(xywh)
                    cls_object.append(names[cls])

                    if names[cls] == "hero" and conf > hero_conf:
                        hero_conf = conf
                        hero_index = idx

                    if view_img:  # Add bbox to image
                        label = '%s %.2f' % (names[int(cls)], conf)
                        plot_one_box(xyxy,
                                     img0,
                                     label=label,
                                     color=colors[int(cls)],
                                     line_thickness=2)

                # 游戏
                thx = 30  # 捡东西时,x方向的阈值
                thy = 30  # 捡东西时,y方向的阈值
                attx = 150  # 攻击时,x方向的阈值
                atty = 50  # 攻击时,y方向的阈值

                if current_door(
                        img0) == 1 and time.time() - door1_time_start > 10:
                    door1_time_start = time.time()
                    # move(direct="RIGHT", action_cache=action_cache, press_delay=press_delay,
                    #      release_delay=release_delay)
                    # ReleaseKey(direct_dic["RIGHT"])
Esempio n. 21
0
def detect(weights='mdp/weights/weights.pt',
           source='mdp/videos',
           output='mdp/output',
           img_size=416,
           conf_thres=0.01,
           iou_thres=0.5,
           device='',
           classes=None,
           agnostic_nms=False,
           augment=False,
           update=False,
           scale_percent=50):

    save_img = True
    predicted_label = None
    out, imgsz = output, img_size
    webcam = source.isnumeric() or source.startswith('rtsp') or source.startswith('http') or source.endswith('.txt')

    # Initialize
    set_logging()
    device = select_device(device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        # save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))]

    # Run inference
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img) if device.type != 'cpu' else None  # run once

    row_num = 0
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred, conf_thres, iou_thres, classes=classes, agnostic=agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            save_path = str(Path(out) / Path(p).name)
            txt_path = str(Path(out) / Path(p).stem) + ('_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()

                # Print results
                for c in det[:, -1].detach().unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                # Write results
                for *xyxy, conf, cls in det:
                    predicted_label = names[int(cls)]
                    if predicted_label:
                        label_id = label_id_mapping.get(predicted_label)

                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh

                        print(('%s ' * 5 + '\n') % (label_id, *xywh))  # label format

                        # r = requests.post(source, json={'label': label_id})  # send result to rpi
                        # print(r.text)

                        if False and conf < confidence_threshold(label_id):  # fine tune for up arrow (white)
                            # cv2.imshow('ImageWindow', im0)
                            break
                        # if not check_bounding_box(xywh):
                        #     # cv2.imshow('ImageWindow', im0)
                        #     break

                        label = '%s %.2f' % (label_id, conf)
                        good, text = check_bounding_box(xywh, im0.shape[0], im0.shape[1])
                        if not good:
                            label = text

                        plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3)

                        # cv2.imshow('ImageWindow', im0)

                        break
            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release()  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h))
                    vid_writer.write(im0)

            if cv2.waitKey(1) == ord('q'):  # q to quit
                raise StopIteration
Esempio n. 22
0
def detect(save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source.isnumeric() or source.startswith(
        'rtsp') or source.startswith('http') or source.endswith('.txt')
    # Initialize
    set_logging()
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    # add by zlf at 20201027
    # load FP16 model
    # model=torch.load(weights)['model']
    # for n,p in model.named_parameters():
    #     print(p.dtype)

    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size

    # 20201019 load model by zlf
    # new method of loading weight;only for 'torch.save(model,state_dict())'
    # net = Model('./models/yolov5s.yaml').cuda()
    # state_dict = torch.jit.load('QuantCRNN_1_14000.pt', map_location=torch.device('cpu'))
    # model = state_dict
    # model.half().cuda()
    # model_dict = net.state_dict()
    #
    # for k, v in state_dict.items():
    #     name = k[7:]  # remove `module.`
    #     model_dict[name] = v
    # net.load_state_dict(model_dict, strict=True)
    # model = net
    # imgsz = 320
    # add by zlf at 20201009

    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        #TODO:cudnn.benchmark = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    # names = model.module.names if hasattr(model, 'module') else model.names
    names = [
        'CAR', 'CARPLATE', 'BICYCLE', 'TRICYCLE', 'PEOPLE', 'MOTORCYCLE',
        'LOGO_AUDI', 'LOGO_BENZE', 'LOGO_BENZC', 'LOGO_BMW', 'LOGO_BUICK',
        'LOGO_CHEVROLET', 'LOGO_CITROEN', 'LOGO_FORD', 'LOGO_HONDA',
        'LOGO_HYUNDAI', 'LOGO_KIA', 'LOGO_MAZDA', 'LOGO_NISSAN',
        'LOGO_PEUGEOT', 'LOGO_SKODA', 'LOGO_SUZUKI', 'LOGO_TOYOTA',
        'LOGO_VOLVO', 'LOGO_VW', 'LOGO_ZHONGHUA', 'LOGO_SUBARU', 'LOGO_LEXUS',
        'LOGO_CADILLAC', 'LOGO_LANDROVER', 'LOGO_JEEP', 'LOGO_BYD',
        'LOGO_BYDYUAN', 'LOGO_BYDTANG', 'LOGO_CHERY', 'LOGO_CARRY',
        'LOGO_HAVAL', 'LOGO_GREATWALL', 'LOGO_GREATWALLOLD', 'LOGO_ROEWE',
        'LOGO_JAC', 'LOGO_HAFEI', 'LOGO_SGMW', 'LOGO_CASY', 'LOGO_CHANAJNX',
        'LOGO_CHANGAN', 'LOGO_CHANA', 'LOGO_CHANGANCS', 'LOGO_XIALI',
        'LOGO_FAW', 'LOGO_YQBT', 'LOGO_REDFLAG', 'LOGO_GEELY', 'LOGO_EMGRAND',
        'LOGO_GLEAGLE', 'LOGO_ENGLON', 'LOGO_BAOJUN', 'LOGO_DF', 'LOGO_JINBEI',
        'LOGO_BAIC', 'LOGO_WEIWANG', 'LOGO_HUANSU', 'LOGO_FOTON', 'LOGO_HAIMA',
        'LOGO_ZOTYEAUTO', 'LOGO_MITSUBISHI', 'LOGO_RENAULT', 'LOGO_MG',
        'LOGO_DODGE', 'LOGO_FIAT', 'LOGO_INFINITI', 'LOGO_MINI', 'LOGO_TESLA',
        'LOGO_SMART', 'LOGO_BORGWARD', 'LOGO_JAGUAR', 'LOGO_HUMMER',
        'LOGO_PORSCHE', 'LOGO_LAMBORGHINI', 'LOGO_DS', 'LOGO_CROWN',
        'LOGO_LUXGEN', 'LOGO_ACURA', 'LOGO_LINCOLN', 'LOGO_SOUEAST',
        'LOGO_VENUCIA', 'LOGO_TRUMPCHI', 'LOGO_LEOPAARD', 'LOGO_ZXAUTO',
        'LOGO_LIFAN', 'LOGO_HUANGHAI', 'LOGO_HAWTAI', 'LOGO_REIZ',
        'LOGO_CHANGHE', 'LOGO_GOLDENDRAGON', 'LOGO_YUTONG', 'LOGO_HUIZHONG',
        'LOGO_JMC', 'LOGO_JMCYUSHENG', 'LOGO_LANDWIND', 'LOGO_NAVECO',
        'LOGO_QOROS', 'LOGO_OPEL', 'LOGO_YUEJING'
    ]

    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    # _ = model(img.half() if half else img) if device.type != 'cpu' else None  # run once
    for path, img, im0s, vid_cap in dataset:
        f1.write('%s:' % (path.split('/')[-1]))
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()

        pred = model(img, augment=opt.augment)[0]
        # pred = model(img.cuda())

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            save_path = str(Path(out) / Path(p).name)
            txt_path = str(Path(out) / Path(p).stem) + (
                '_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                # Write results
                for *xyxy, conf, cls, obj_conf, cls_conf in reversed(
                        det):  # add by zlf at 20201026
                    # add by zlf at 20201019
                    x1 = int(xyxy[0].item())
                    y1 = int(xyxy[1].item())
                    x2 = int(xyxy[2].item())
                    y2 = int(xyxy[3].item())
                    f1.write(
                        "[%s,%.2f,%d,%d,%d,%d]" %
                        (names[int(cls.item())], round(
                            (conf.item() * 100), 2), x1, y1, x2, y2))
                    # add by zlf at 20201019

                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * 5 + '\n') %
                                    (cls, *xywh))  # label format

                    if save_img or view_img:  # Add bbox to image
                        label = '%s|%.2f|%.2f|%.2f' % (names[int(cls)], conf,
                                                       obj_conf, cls_conf)
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors[int(cls)],
                                     line_thickness=3)
            f1.write('\n')
            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*fourcc), fps,
                            (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % Path(out))
        if platform.system() == 'Darwin' and not opt.update:  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))
def detect(save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source.isnumeric() or source.startswith(
        'rtsp') or source.startswith('http') or source.endswith('.txt')

    # Initialize
    set_logging()
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    if os.path.exists(opt.features):
        shutil.rmtree(opt.features)  # delete features output folder
    if os.path.exists(opt.crops):
        shutil.rmtree(opt.crops)  # delete output folder with object crops
    os.makedirs(out)  # make new output folder
    os.makedirs(opt.features)  # make new output folder
    os.makedirs(opt.crops)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # log file dictionary: save frames when track_id object is detected
    log_frames = {"FPS": dataset.cap.get(cv2.CAP_PROP_FPS)}
    print("FRAMES PER SECOND ", dataset.cap.get(cv2.CAP_PROP_FPS))

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Find index corresponding to a person
    idx_person = names.index("person")

    # Deep SORT: initialize the tracker
    cfg = get_config()
    cfg.merge_from_file(opt.config_deepsort)
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)
        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            save_path = str(Path(out) / Path(p).name)
            txt_path = str(Path(out) / Path(p).stem) + (
                '_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                # Deep SORT: person class only
                idxs_ppl = (
                    det[:, -1] == idx_person
                ).nonzero(as_tuple=False).squeeze(
                    dim=1)  # 1. List of indices with 'person' class detections
                dets_ppl = det[idxs_ppl, :
                               -1]  # 2. Torch.tensor with 'person' detections
                print('\n {} people were detected!'.format(len(idxs_ppl)))

                # Deep SORT: convert data into a proper format
                xywhs = xyxy2xywh(dets_ppl[:, :-1]).to("cpu")
                confs = dets_ppl[:, 4].to("cpu")

                # Deep SORT: feed detections to the tracker
                if len(dets_ppl) != 0:
                    trackers, features = deepsort.update(xywhs, confs, im0)
                    for d in trackers:
                        ##### DEEP SORT feature object saver ####
                        track_id = d[4]
                        fname_features = opt.features + '/ID_{}'.format(
                            track_id)
                        fname_crops = opt.crops + '/ID_{}'.format(track_id)
                        if not os.path.exists(fname_features):
                            os.mkdir(fname_features)
                            os.mkdir(fname_crops)
                            log_frames['ID_' + str(track_id)] = []

                        # choose format to save feature arrays on your machine:
                        # https://machinelearningmastery.com/how-to-save-a-numpy-array-to-file-for-machine-learning/
                        save_format = 'csv'
                        filename = fname_features + "/feature_frame_" + str(
                            dataset.frame)
                        if save_format == 'csv':
                            savetxt(filename + '.csv',
                                    features[track_id],
                                    delimiter=',')
                            #data = numpy.loadtxt('data.csv', delimiter=',')
                        elif save_format == 'npy':
                            save(filename + '.npy', features[track_id])
                            #data = numpy.load('data.npy')
                        elif save_format == 'npz':
                            savez_compressed(filename + '.npz',
                                             features[track_id])
                            # dict_data = load('data.npz'); data = dict_data['arr_0']
                        # update log file with track_id detection history
                        log_frames['ID_' + str(track_id)].append(dataset.frame)
                        # save croped image
                        im_crop = im0[d[1]:d[3], d[0]:d[2], :]
                        cv2.imwrite(filename=fname_crops + "/image_crop_" +
                                    str(dataset.frame) + '.jpg',
                                    img=im_crop)
                        plot_one_box(d[:4],
                                     im0,
                                     label='ID' + str(int(d[4])),
                                     color=colors[1],
                                     line_thickness=1)

            # DEEP SORT: save updated log file
            log_format = 'txt'
            if log_format == 'txt':
                f_log = open(opt.features + "/log_detection.txt", "w")
                f_log.write(str(log_frames))
            elif log_format == 'pkl':
                f_log = open(opt.features + "/log_detection.pkl", "wb")
                pickle.dump(log_frames, f_log)
            f_log.close()
            ###################################

            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*fourcc), fps,
                            (w, h))
                    vid_writer.write(im0)
    if save_txt or save_img:
        print('Results saved to %s' % Path(out))
        if platform.system() == 'Darwin' and not opt.update:  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))
Esempio n. 24
0
def detect(save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source.isnumeric() or source.startswith(
        'rtsp') or source.startswith('http') or source.endswith('.txt')

    # Initialize
    set_logging()
    device = select_device(opt.device)
    folder_main = out.split('/')[0]
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    folder_features = folder_main + '/features'
    if os.path.exists(folder_features):
        shutil.rmtree(folder_features)  # delete features output folder
    folder_crops = folder_main + '/image_crops'
    if os.path.exists(folder_crops):
        shutil.rmtree(folder_crops)  # delete output folder with object crops
    os.makedirs(out)  # make new output folder
    os.makedirs(folder_features)  # make new output folder
    os.makedirs(folder_crops)  # make new output folder

    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # frames per second
    fps = dataset.cap.get(cv2.CAP_PROP_FPS)
    critical_time_frames = opt.time * fps

    # COUNTER: initialization
    counter = VoteCounter(critical_time_frames, fps)
    print('CRITICAL TIME IS ', opt.time, 'sec, or ', counter.critical_time,
          ' frames')

    # Find index corresponding to a person
    idx_person = names.index("person")

    # Deep SORT: initialize the tracker
    cfg = get_config()
    cfg.merge_from_file(opt.config_deepsort)
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)

    # AlphaPose: initialization
    args_p = update_config(opt.config_alphapose)
    cfg_p = update_config(args_p.ALPHAPOSE.cfg)

    args_p.ALPHAPOSE.tracking = args_p.ALPHAPOSE.pose_track or args_p.ALPHAPOSE.pose_flow

    demo = SingleImageAlphaPose(args_p.ALPHAPOSE, cfg_p, device)

    output_pose = opt.output.split('/')[0] + '/pose'
    if not os.path.exists(output_pose):
        os.mkdir(output_pose)

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # COUNTER: compute urn centoid (1st frame only) and plot a bounding box around it
        if dataset.frame == 1:
            counter.read_urn_coordinates(opt.urn, im0s, opt.radius)
        counter.plot_urn_bbox(im0s)

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)
        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            save_path = str(Path(out) / Path(p).name)
            txt_path = str(Path(out) / Path(p).stem) + (
                '_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                # Deep SORT: person class only
                idxs_ppl = (
                    det[:, -1] == idx_person
                ).nonzero(as_tuple=False).squeeze(
                    dim=1)  # 1. List of indices with 'person' class detections
                dets_ppl = det[idxs_ppl, :
                               -1]  # 2. Torch.tensor with 'person' detections
                print('\n {} people were detected!'.format(len(idxs_ppl)))

                # Deep SORT: convert data into a proper format
                xywhs = xyxy2xywh(dets_ppl[:, :-1]).to("cpu")
                confs = dets_ppl[:, 4].to("cpu")

                # Deep SORT: feed detections to the tracker
                if len(dets_ppl) != 0:
                    trackers, features = deepsort.update(xywhs, confs, im0)
                    # tracks inside a critical sphere
                    trackers_inside = []
                    for i, d in enumerate(trackers):
                        plot_one_box(d[:-1],
                                     im0,
                                     label='ID' + str(int(d[-1])),
                                     color=colors[1],
                                     line_thickness=1)

                        # COUNTER
                        d_include = counter.centroid_distance(
                            d, im0, colors[1], dataset.frame)
                        if d_include:
                            trackers_inside.append(d)

                    # ALPHAPOSE: show skeletons for bounding boxes inside the critical sphere
                    if len(trackers_inside) > 0:
                        pose = demo.process('frame_' + str(dataset.frame), im0,
                                            trackers_inside)
                        im0 = demo.vis(im0, pose)
                        demo.writeJson([pose],
                                       output_pose,
                                       form=args_p.ALPHAPOSE.format,
                                       for_eval=args_p.ALPHAPOSE.eval)

                        counter.save_features_and_crops(
                            im0, dataset.frame, trackers_inside, features,
                            folder_main)

            cv2.putText(im0, 'Voted ' + str(len(counter.voters_count)),
                        (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255),
                        2)

            print('NUM VOTERS', len(counter.voters))
            print(list(counter.voters.keys()))

            # COUNTER
            if len(counter.voters) > 0:
                counter.save_voter_trajectory(dataset.frame, folder_main)

            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*fourcc), fps,
                            (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % Path(out))
        if platform.system() == 'Darwin' and not opt.update:  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))
Esempio n. 25
0
def detect(opt, save_img=False):
    out, source, weights, imgsz, namelist = \
        opt.output, opt.source, opt.weights, opt.img_size, opt.namelist

    set_logging()
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder

    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None

    save_img = True
    dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half()
              if half else img)  #if device.type != 'cpu' else None  # run once
    idx = 0
    ckname = []
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        idx += 1
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()
        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)
        cnt = 0
        # Process detections
        for i, det in enumerate(pred):  # detections per image
            p, s, im0 = path, '', im0s
            save_path = str(Path(out) / Path(p).name)
            txt_path = str(Path(out) / Path(p).stem) + (
                '_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh

            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()
                cntname = 0
                # Write results
                img2 = im0.copy()
                nperson = []
                nname = []
                for *xyxy, conf, cls in reversed(det):

                    if save_img:  # Add bbox to image
                        label = '%s %.2f' % (names[int(cls)], conf)
                        ########################################################################################################
                        ##classes 변수 생성 (이름)
                        classes = names[int(cls)]
                        ##classes 변수 함수에 추가
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors[int(cls)],
                                     line_thickness=3,
                                     classes=classes)
                        ##사람이라고 판단한 물체의 각 좌표 리스트에 저장
                        if classes == 'person':
                            nperson.append([
                                int(xyxy[0]),
                                int(xyxy[1]),
                                int(xyxy[2]),
                                int(xyxy[3])
                            ])
                        if classes == 'name':
                            nname.append([
                                int(xyxy[0]),
                                int(xyxy[1]),
                                int(xyxy[2]),
                                int(xyxy[3])
                            ])
                ##이름 리스트의 크기가 0보다 클 때 미리 복사해둔 프레임의 구역으로 이미지 덮기
                #print(len(nperson))
                if len(nname) > 0:
                    key = 45
                    for pi in range(len(nperson)):
                        check = False
                        for ii in range(len(nname)):
                            if nname[ii][1]>=nperson[pi][1] and nname[ii][3]<=nperson[pi][3] and nname[ii][0]>=nperson[pi][0] \
                                    and nname[ii][2]<=nperson[pi][2] and check==False:
                                check = True
                                proi = img2[nname[ii][1]:nname[ii][3],
                                            nname[ii][0]:nname[ii][2]]
                                temp_img = "{0}_{1}_{2}_{3}.jpg".format(
                                    nname[ii][1], nname[ii][3], nname[ii][0],
                                    nname[ii][2])
                                image_path = "./temp/{0}".format(temp_img)
                                img_shape = proi.shape
                                # print(proi)
                                #image_path2 = "./temp/tt_{0}".format(temp_img)

                                #######################################
                                encrypt_function(proi, image_path, key)
                                # os.remove(image_path)
                                text_ = decrypt_function(
                                    image_path, key, img_shape)
                                #cv2.imwrite(image_path2, text_)
                                #########################################
                                #print("coord:",nname[ii][1],nname[ii][3],nname[ii][0],nname[ii][2])

                                # OCR (이름 매칭 확인) => return True / False
                                result, check_name = ocr.check_name(
                                    text_, namelist)
                                if result == True:
                                    cntname += 1
                                    if check_name not in ckname:
                                        ckname.append(check_name)
                                    roi = img2[nperson[pi][1]:nperson[pi][3],
                                               nperson[pi][0]:nperson[pi][2]]
                                    im0[nperson[pi][1]:nperson[pi][3],
                                        nperson[pi][0]:nperson[pi][2]] = roi
                #cv2.imwrite('.\check\{}.jpg'.format(idx),im0)
                ########################################################################################################
            # Print time (inference + NMS)
            #print('%sDone. (%.3fs)' % (s, t2 - t1))
            removeAllFile('./temp')
            # Save results (image with detections)
            if save_img:
                if vid_path != save_path:  # new video
                    vid_path = save_path
                    if isinstance(vid_writer, cv2.VideoWriter):
                        vid_writer.release()  # release previous video writer
                    fourcc = 'mp4v'  # output video codec
                    fps = vid_cap.get(cv2.CAP_PROP_FPS)
                    w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                    h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                    vid_writer = cv2.VideoWriter(
                        './output.mp4', cv2.VideoWriter_fourcc(*fourcc), fps,
                        (w, h))
                vid_writer.write(im0)
def detect(save_img=False):
    print_div('INTIL')
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size

    # Initialize
    print_div('GET DEVICE')
    set_logging()
    device = select_device(opt.device)
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    print_div('LOAD MODEL')
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    print_div('LOAD MODEL_CLASSIFIER')
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Get names and colors
    print_div('SET LABEL COLOR')
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Run inference
    ###############################################################################
    print_div("RUN INFERENCE")

    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once

    video_path = source
    cap = cv2.VideoCapture(video_path)

    print_div('Start Play VIDEO')
    while cap.isOpened():

        ret, frame = cap.read()
        t0 = time.time()

        if not ret:
            print_div('No Frame')
            break

        fps_t1 = time.time()

        img, img0 = img_preprocess(frame)  # img: Resize , img0:Orginal
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS : 取得每項預測的數值
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier : 取得該數值的LAbel
        if classify:
            pred = apply_classifier(pred, modelc, img, img0)

        # Draw Box
        for i, det in enumerate(pred):

            s = '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(img0.shape)[[1, 0, 1,
                                           0]]  # normalization gain whwh
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          img0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):

                    label = '%s %.2f' % (names[int(cls)], conf)
                    plot_one_box(xyxy,
                                 img0,
                                 label=label,
                                 color=colors[int(cls)],
                                 line_thickness=3)

        # Print Results(inference + NMS)
        print_div('%sDone. (%.3fs)' % (s, t2 - t1))

        # Draw Image
        x, y, w, h = (img0.shape[1] // 4), 25, (img0.shape[1] // 2), 30
        cv2.rectangle(img0, (x, 10), (x + w, y + h), (0, 0, 0), -1)

        rescale = 0.5
        re_img0 = (int(img0.shape[1] * rescale), int(img0.shape[0] * rescale))

        cv2.putText(
            img0, '{} | inference: {:.4f}s | fps: {:.4f}'.format(
                opt.weights[0], t2 - t1, 1 / (time.time() - t0)),
            (x + 20, y + 20), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
        cv2.imshow('Stream_Detected', cv2.resize(img0, re_img0))

        key = cv2.waitKey(1)
        if key == ord('q'): break

    # After break
    cap.release()
    cv2.destroyAllWindows()
Esempio n. 27
0
def detect(opt, dp, save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
     opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source == '0' or source.startswith('rtsp') or source.startswith(
        'http') or source.endswith('.txt')

    # Initialize
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max(
    ))  # check img_size 如果不是32的倍数,就向上取整调整至32的倍数并答应warning

    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if opt.use_roi:
        print(dp.cl)
        # print(dp.cl[0], dp.cl[1])
        # cl = opt.control_line
        cl = dp.cl
        roi_in_pixels = np.array([0, cl[0], 1280,
                                  cl[1]])  # two points coor, x1, y1, x2, y2
    else:
        roi_in_pixels = None

    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz, roi=roi_in_pixels)

    # Get names and colors
    names = model.module.names if hasattr(
        model, 'module') else model.names  # 解决GPU保存的模型多了module属性的问题
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]  # 随机颜色,对应names,names是class

    # prune
    # torch_utils.prune(model, 0.7)
    # model.eval()

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once 空跑一次,释放!!牛逼

    detected_img_id = 0
    time_list = [None] * len(dataset)
    for iii, (path, img, im0s, vid_cap, recover) in enumerate(dataset):
        # print(img.shape, im0s.shape, vid_cap)
        # exit()

        # img.shape [3, 384, 640] im0s.shape [720, 1280, 3] None
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)  # 从[3, h, w]转换为[batch_size, 3, h, w]的形式

        # Inference
        t1 = time_synchronized()
        # print('aug', opt.augment)  # False
        pred = model(img, augment=opt.augment)[0]
        # print(pred.shape) [1, 15120, 25]
        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()
        print(f'infer time:{t2-t1:.4f}s ', end='')
        time_list[iii] = t2 - t1

        # print('\n', len(pred), pred, recover)  # list 长度是bs,代表每张图, 元素tensor,代表检测到的目标,每个tensor.shape [n, 6] xy4, conf, cls

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if opt.use_roi and det is not None:
                small_img_shape = torch.from_numpy(
                    np.array([recover[1], recover[0]]).astype(np.float))
                det[:,
                    0], det[:,
                            2] = det[:, 0] + recover[2], det[:, 2] + recover[2]
                det[:,
                    1], det[:,
                            3] = det[:, 1] + recover[3], det[:, 3] + recover[3]
            else:
                small_img_shape = img.shape[2::]
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s  # im0s是原图

            save_path = str(Path(out) /
                            Path(p).name)  # output/filenamexxxx.jpg
            txt_path = str(Path(out) / Path(p).stem) + (
                '_%g' % dataset.frame if dataset.mode == 'video' else '')
            # output/filenamexxxx.txt
            s += '%gx%g ' % img.shape[2:]  # print string, 640x640
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            # 本来是[720, 1280, 3],重复取,变成[1280, 720, 1280, 720]
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(
                    small_img_shape, det[:, :4],
                    im0.shape).round()  # 转换成原图的x1 y1 x2 y1,像素值
                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)]
                                       )  # add to string # i.e. 1 crosswalk
                # s += f'{det[:, 4].item():.4f} '
                # print(n)

                # Write results
                for *xyxy, conf, cls in det:
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        with open(txt_path + '.txt', 'a') as f:
                            x, y, w, h = xywh
                            string = f"{int(cls)} {conf.item():.4f} {x:.6f} {y:.6f} {w:.6f} {h:.6f}\n"
                            f.write(string)  # label format

                    if save_img or view_img:  # Add bbox to image
                        label = '%s %.2f' % (names[int(cls)], conf)
                        # print(type(im0), im0.shape) array, 720, 1280, 3
                        if names[int(cls)] in opt.plot_classes:
                            # color = colors[int(cls)]
                            color = (255, 85, 33)
                            plot_one_box(xyxy,
                                         im0,
                                         label=label,
                                         color=color,
                                         line_thickness=5)

            # Print time (inference + NMS)
            prt_str = '%sDone. (%.5fs)' % (s, t2 - t1)
            print(prt_str)
            os.system(f'echo "{prt_str}" >> {opt.output}/detect.log')

            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    im0 = dp.dmpost(im0,
                                    det,
                                    det_id=detected_img_id,
                                    filename=Path(p).name,
                                    names=names)
                    cv2.imwrite(save_path, im0)
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*fourcc), fps,
                            (w, h))
                    # print(detected_img_id, p, txt_path)
                    tmp_filename = Path(txt_path).stem
                    im0 = dp.dmpost(im0,
                                    det,
                                    det_id=detected_img_id,
                                    filename=tmp_filename,
                                    names=names)
                    vid_writer.write(im0)
            detected_img_id += 1

    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        if platform == 'darwin' and not opt.update:  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))
    time_arr = np.array(time_list)
    prnt = f'Done. Network mean inference time: {np.mean(time_arr):.5f}s,  Mean FPS: {1/np.mean(time_arr):.4f}.'
    print(f'\nModel size {opt.img_size} inference {prnt}')
    os.system(f'echo "{prnt}" >> {opt.output}/detect.log')
    os.system(f'echo "useroi {opt.img_size} {prnt}" >> detect2.log')
Esempio n. 28
0
    def detect(self, save_img=False):
        # Get names and colors
        names = self.model.module.names if hasattr(
            self.model, 'module') else self.model.names
        #colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))]
        # Run inference
        #t0 = time.time()
        img = torch.zeros((1, 3, self.imgsz, self.imgsz),
                          device=self.device)  # init img
        _ = self.model(img.half() if self.half else img
                       ) if self.device.type != 'cpu' else None  # run once
        for path, img, im0s, vid_cap in self.dataset:
            img = torch.from_numpy(img).to(self.device)
            img = img.half() if self.half else img.float()  # uint8 to fp16/32
            img /= 255.0  # 0 - 255 to 0.0 - 1.0
            if img.ndimension() == 3:
                img = img.unsqueeze(0)

            # Inference
            t1 = time_synchronized()
            pred = self.model(img, augment=self.opt.augment)[0]

            # Apply NMS
            pred = non_max_suppression(pred,
                                       self.opt.conf_thres,
                                       self.opt.iou_thres,
                                       classes=self.opt.classes,
                                       agnostic=self.opt.agnostic_nms)
            t2 = time_synchronized()

            # Apply Classifier
            if self.classify:
                pred = apply_classifier(pred, self.modelc, img, im0s)
            #print("pred",pred)
            # Process detections
            for i, det in enumerate(pred):  # detections per image
                '''
                if self.webcam:  # batch_size >= 1
                    p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
                else:
                '''
                p, s, im0 = path, '', im0s

                save_path = str(Path(self.out) / Path(p).name)
                txt_path = str(Path(self.out) / Path(p).stem) + (
                    '_%g' %
                    self.dataset.frame if self.dataset.mode == 'video' else '')
                #s += '%gx%g ' % img.shape[2:]  # print string
                gn = torch.tensor(im0.shape)[[1, 0, 1,
                                              0]]  # normalization gain whwh
                # detect 했을 경우
                if det is not None and len(det):
                    total = 0.0
                    # Rescale boxes from img_size to im0 size
                    #print("type : " , type(det))
                    #print("det : " , det)
                    goods_type = None
                    percent = None
                    more_than_90 = 0
                    det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                              im0.shape).round()

                    # Print results
                    for c in det[:, -1].unique():
                        n = (det[:, -1] == c).sum()  # detections per class
                        s += '%g %s, ' % (n, names[int(c)])  # add to string

                    # Write results
                    for *xyxy, conf, cls in reversed(det):
                        if self.save_txt:  # Write to file
                            xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                    gn).view(-1).tolist()  # normalized xywh
                            with open(txt_path + '.txt', 'a') as f:
                                f.write(('%g ' * 5 + '\n') %
                                        (cls, *xywh))  # label format
                        #print(cls)
                        if self.save_img or self.view_img:  # Add bbox to image
                            goods_type = names[int(cls)]
                            percent = '%.2f' % (conf)
                            #print(type(percent))
                            percent = float(percent)
                            #print("percent",type(percent))
                            label = '%s %.2f' % (names[int(cls)], conf)
                            print(percent)
                            if percent > 0.85:
                                #plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3)
                                plot_one_box(xyxy,
                                             im0,
                                             label=label,
                                             color=(0, 0, 255),
                                             line_thickness=3)
                                total = total + percent
                                more_than_90 += 1

                    #avg = total/len(det)
                    if more_than_90 != 0:
                        avg = total / more_than_90
                        avg = round(avg, 2)
                        #print(total)
                        #print(more_than_90)
                        #print(avg)
                        print("names : ", names[int(cls)])
                        #print("확률 : %.2f", percent )
                        cv_img, name, color = self.d.load_image(goods_type)
                        if cv_img is not None:
                            qt_img = self.convert_cv_qt(cv_img)
                            self.updateFeatureLable(qt_img)
                            #self.infomsg_append("[DETECT] 품종 : %s, 코드 : %s, 개수 : %d" % (name, goods_type, len(det)))

                            #img_time = datetime.datetime.now().strftime("%Y-%m-%d,%H:%M:%S")
                            img_time = datetime.datetime.now().strftime(
                                "%H:%M:%S")
                            img_date = datetime.datetime.now().strftime(
                                "%Y_%m_%d")
                            self.infomsg_append(
                                img_time + ",%s,%s,%d" %
                                (name, goods_type, more_than_90))
                            #log_string = img_time + "," + name + ","+goods_type+"," + str(len(det)) +","+ avg
                            log_string = img_time + "," + name + "," + goods_type + "," + str(
                                more_than_90) + "," + str(avg)
                            try:
                                if not os.path.exists("log"):
                                    os.makedirs("log")
                            except OSError:
                                print('Error: Creating directory. log')
                            f = open("./log/" + img_date + '_log.csv',
                                     mode='at',
                                     encoding='utf-8')
                            f.writelines(log_string + '\n')
                            f.close()
                            print(log_string)
                            #print("db 이미지 업로드 성공")

                    #detect 없을 시
                    else:
                        print("해당 품목 db에서 조회불가 ")
                        self.iv.clear()
                        self.f_label.clear()
                else:
                    print("detect 없음")
                    #self.infomsg_append("[DETECT] 위 품종은 신규 학습이 필요합니다.")
                    #self.infomsg_append("detect 학습 필요")

                print(s)
                # Print time (inference + NMS)
                print('%sDone. (%.3fs)' % (s, t2 - t1))
                self.iv.setImage(self.convert_cv_qt(im0))

                # Stream results
                if self.view_img:
                    cv2.imshow(p, im0)
                    if cv2.waitKey(1) == ord('q'):  # q to quit
                        raise StopIteration

                # Save results (image with detections)
                if self.save_img:
                    if self.dataset.mode == 'images':
                        cv2.imwrite(save_path, im0)
                    else:
                        if vid_path != save_path:  # new video
                            vid_path = save_path
                            if isinstance(vid_writer, cv2.VideoWriter):
                                vid_writer.release(
                                )  # release previous video writer

                            fourcc = 'mp4v'  # output video codec
                            fps = vid_cap.get(cv2.CAP_PROP_FPS)
                            w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                            h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                            vid_writer = cv2.VideoWriter(
                                save_path, cv2.VideoWriter_fourcc(*fourcc),
                                fps, (w, h))
                        vid_writer.write(im0)
        '''
Esempio n. 29
0
def detect(save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source.isnumeric() or source.startswith(
        'rtsp') or source.startswith('http') or source.endswith('.txt')

    # Initialize
    set_logging()
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            save_path = str(Path(out) / Path(p).name)
            txt_path = str(Path(out) / Path(p).stem) + (
                '_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * 5 + '\n') %
                                    (cls, *xywh))  # label format

                    if save_img or view_img:  # Add bbox to image
                        label = '%s %.2f' % (names[int(cls)], conf)
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors[int(cls)],
                                     line_thickness=3)

            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*fourcc), fps,
                            (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % Path(out))
        if platform.system() == 'Darwin' and not opt.update:  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))
Esempio n. 30
0
def detect(save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source.isnumeric() or source.startswith('rtsp') or source.startswith('http') or source.endswith('.txt')

    # Initialize
    set_logging()
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))]

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img) if device.type != 'cpu' else None  # run once
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per img
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            img_crops = im0s.copy() # creates a copy making img_crops for cropped versions and im0 separate for bbox version

            out_path = str(Path(out))    
            file_name = str(Path(p).name).split('.')[0] # gets name of file without extension
            save_path = f"{Path(out)}/{Path(p).name}"
            txt_path = str(Path(out) / Path(p).stem) + ('_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
            
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
                
                handles_ymax = []
                handle_mids = []

                tailgates_ymin = []
                tailgates_ymax = []
                tailgate_ythird_coord = []

                px_ratio = 1

                crop_coords = {}

                info_to_csv = {
                    'file': file_name, 'objects_detected':True, 'handle_loc':None, 'handle_width':None, 
                    'handle_height':None, 'handle_process':None, 'tg_width':None, 'tg_height':None, 
                    'tg_process':None, 'px_ratio':None}
                
                field_names = ['file','objects_detected','handle_loc','handle_width','handle_height',
                                'handle_process', 'tg_width','tg_height','tg_process','px_ratio']

                csv_filepath = f'./tailgate_data.csv'

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                det_sorted = sorted(det, key=lambda x: x[-1]) # sort detected items by last index which is class

                # Write results
                for *xyxy, conf, cls in reversed(det_sorted): #coords, confidence, classes.... reversed for some reason? But actually helpful since plate is cls 2
                    x1, y1, x2, y2 = int(xyxy[0]),int(xyxy[1]),int(xyxy[2]),int(xyxy[3])

                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * 5 + '\n') % (cls, *xywh))  # label format


                    if int(cls) == 3: #truck cropping (future development; requires retraining with a truck class)
                        img_crops = img_crops[y1:y2, x1:x2]

                    elif int(cls) == 2: # license plate
                        license_width = abs(int(x2 - x1))
                        px_ratio = license_width / 12   # number of pixels per inch as license plates are 12"
                        info_to_csv['px_ratio'] = px_ratio
                        # im_p = img_crops[y1:y2, x1:x2] # currently no need to crop 
                        # cv2.imwrite(f'{out_path}/{file_name}_p_edge.png', im_p) # currently no need to output the picture of the license plate
                    
                    elif int(cls) == 1: #handle
                        # print(f'handle y1,y2,x1,x2: {y1},{y2},{x1},{x2}')
                        im_h = img_crops[y1:y2, x1:x2]
                        crop_coords['h'] = [y1,y2,x1,x2]

                        cv2.imwrite(f'{out_path}/{file_name}_yolo_h.png', im_h)
                        
                    elif int(cls) == 0: #tailgate
                        im_t = img_crops[y1:y2, x1:x2]
                        # print(f'tailgate y1,y2,x1,x2: {y1},{y2},{x1},{x2}')
                        crop_coords['tg'] = [y1,y2,x1,x2]

                        cv2.imwrite(f'{out_path}/{file_name}_yolo_tg.png', im_t)

                    if save_img or view_img:  # Add bbox to image
                        #label = '%s %.2f' % (names[int(cls)], conf) #confidence not needed
                        label = '%s ' % (names[int(cls)])
                        coord1, coord2, dim_label = plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], 
                                                                line_thickness=3, px_ratio=px_ratio)
                        
                        #get important points for line drawing
                        if int(cls) == 1 and int(abs(y1-y2)) < 175: #handle
                            coord1, coord2, dim_label = plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], 
                                                                    line_thickness=3, px_ratio=px_ratio)
                            ymax = max(coord1[1], coord2[1])
                            handles_ymax.append(ymax)

                            xmid = int((coord1[0] + coord2[0]) / 2)
                            ymid = int((coord1[1] + coord2[1]) / 2)
                            handle_mids.append([xmid, ymid])
                            
                            #im_h = im0[coord1[0]:coord2[0], coord1[1]:coord2[1]]
                        
                        elif int(cls) == 0: #tailgate
                            coord1, coord2, dim_label = plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], 
                                                                    line_thickness=3, px_ratio=px_ratio)
                            tailgate_xmin = min(coord1[0], coord2[0])

                            ymax = max(coord1[1], coord2[1])
                            tailgates_ymax.append(ymax)
                            ymin = min(coord1[1], coord2[1])
                            tailgates_ymin.append(ymin)
                            tailgate_ythird = int(abs(coord1[1]-coord2[1])/3+ymin)
                            tailgate_ythird_coord.append([tailgate_xmin, tailgate_ythird])

                            #im_t = img[coord1[0]:coord2[0], coord1[1]:coord2[1]]

                        else:
                            pass
                            
                            

                # function draws and labels the distance from bottom of handle to bottom of tailgate
                # if handle in top 1/3 of tailgate, returns the y coord of handle bottom,
                #  else returns False
                adj_tailgate_top, info_to_csv = draw_dist_btm_h_to_btm_t(im0, handle_mids, handles_ymax, 
                                                            tailgates_ymax, tailgate_ythird_coord, px_ratio, info_to_csv)

                if adj_tailgate_top > crop_coords['tg'][0]:
                    # This all affects final_tailgate()
                    crop_coords['diff_adjust'] = int(adj_tailgate_top - crop_coords['tg'][0])
                    # crop_coords['diff_adjust'] = int(adj_tailgate_top)
                    crop_coords['tg'][0] = int(adj_tailgate_top)
                    transp_h = False
                else:
                    transp_h, full_handle_process = handle_detect_and_mask(im_h)
                    info_to_csv['handle_process'] = (" >>> ").join(full_handle_process)
                    crop_coords['diff_adjust'] = False
                    try:
                        cv2.imwrite(f'{out_path}/{file_name}_transparent_h.png', transp_h)
                    except:
                        pass
                    

            
                #function gets the handle surrounded by transparency
                transp_tg, full_tailgate_process = tailgate_detect_and_mask(im_t)
                info_to_csv['tg_process'] = (" >>> ").join(full_tailgate_process)
                try:
                    cv2.imwrite(f'{out_path}/{file_name}_transparent_tg.png', transp_tg)
                except:
                    pass
                

                final_image, info_to_csv = final_truck(img_crops, transp_tg, transp_h, 
                                            crop_coords['tg'], crop_coords['h'], crop_coords['diff_adjust'], info_to_csv)

                cv2.imwrite(f'{out_path}/{file_name}_full_transparency.png', final_image)
                
            else:
                info_to_csv['objects_detected'] = False


            # write or append info_to_csv
            if os.path.isfile(csv_filepath):
                append_dict_as_row(csv_filepath, info_to_csv, field_names)
            else:
                create_csv_headers_from_dict(csv_filepath, info_to_csv, field_names)
                append_dict_as_row(csv_filepath, info_to_csv, field_names)  


            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                    
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release()  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % Path(out))
        if platform.system() == 'Darwin' and not opt.update:  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))