Python Sort.update Exemples, utils.sort.Sort.update Python Exemples

Exemple #1

0

Afficher le fichier

def main():
    print("[INFO] sampling frames...")
    detector = dlib.get_frontal_face_detector()
    predictor = dlib.shape_predictor(
        'model/shape_predictor_5_face_landmarks.dat')
    time.sleep(2.0)
    stream = WebcamVideoStream(args['src']).start()
    fps = FPS().start()
    start = time.time()
    mot_tracker = Sort()
    grabbed, frame = stream.read()
    while grabbed:
        frame = cv2.resize(frame, (1280, 720))
        if fps._numFrames % 3 == 0:
            rects = detector(frame, 0)
        dets = np.array([get_pos_from_rect(rect) for rect in rects])
        ages = np.empty((len(dets)))
        genders = np.empty((len(dets)))
        if len(rects) > 0:
            shapes = dlib.full_object_detections()
            for rect in rects:
                shapes.append(predictor(frame, rect))
            faces = dlib.get_face_chips(frame, shapes, size=96, padding=0.4)
            faces = np.array(faces)
            faces = model.prep_image(faces)
            result = get_result(faces)
            genders, ages = model.decode_prediction(result)

        mot_tracker.update(dets, genders, ages)
        for tracker in mot_tracker.trackers:
            (left, top, right, bottom) = convert_x_to_bbox(
                tracker.kf.x[:4, :]).astype('int').flatten()
            cv2.rectangle(frame, (left, top), (right, bottom), (0, 255, 0), 2)
            age = tracker.smooth_age()
            gender = 'M' if tracker.smooth_gender() == 1 else 'F'
            cv2.putText(frame, "id: {} {} {}".format(tracker.id, gender, age),
                        (left - 10, top - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 255, 0), 2)
        cv2.putText(frame, "{:.1f} FPS".format(fps.fps()), (1100, 50),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
        cv2.namedWindow("Frame", cv2.WINDOW_NORMAL)
        cv2.imshow("Frame", frame)
        key = cv2.waitKey(1) & 0xFF
        fps.update()
        grabbed, frame = stream.read()
    fps.stop()
    stream.release()
    cv2.destroyAllWindows()

Exemple #2

0

Afficher le fichier

def kalman_filter_tracking(det_bb, video_n_frames, model_type):
    """
    This function assigns a track value to an object by using kalman filters.
    It also adjust bounding box coordinates based on tracking information.
    """
    bb_id_updated = []
    tracker = Sort(model_type=model_type)
    for frame_num in range(1, video_n_frames + 1):
        #Get only bb of current frame
        dets_all_info = list(filter(lambda x: x[0] == frame_num, det_bb))
        dets = np.array([[bb[3], bb[4], bb[5], bb[6]]
                         for bb in dets_all_info])  #[[x1,y1,x2,y2]]
        #Apply kalman filtering
        trackers = tracker.update(dets)
        #Obtain id and bb in correct format
        for bb_dets, bb_update in zip(dets_all_info, trackers):
            bb_id_updated.append([
                bb_dets[0], bb_dets[1],
                int(bb_update[4]), bb_update[0], bb_update[1], bb_update[2],
                bb_update[3], bb_dets[7]
            ])
            # bb_id_updated.append([bb_dets[0], bb_dets[1], int(bb_update[4]), bb_dets[3],bb_dets[4], bb_dets[5], bb_dets[6], bb_dets[7]])
    return bb_id_updated

Exemple #3

0

Afficher le fichier

Fichier : run6.py Projet : Defrawy/Real-Time-Action-Recognition

class Ui_MainWindow(QtWidgets.QWidget):
    def __init__(self, parent=None):
        super(Ui_MainWindow, self).__init__(parent)
        self.tracker = Sort(settings.sort_max_age, settings.sort_min_hit)
        self.timer_camera = QtCore.QTimer()
        # self.cap = cv2.VideoCapture()
        # self.cap = cv2.VideoCapture()
        self.CAM_NUM = 0
        self.set_ui()
        self.slot_init()
        self.__flag_mode = 0
        self.fps = 0.00
        self.data = {}
        self.memory = {}
        self.joints = []
        self.current = []
        self.previous = []
        self.fourcc = cv2.VideoWriter_fourcc(
            *'mp4v')  # Be sure to use lower case
        self.out = cv2.VideoWriter('x.mp4', self.fourcc, 1,
                                   (settings.winWidth, settings.winHeight))

    def set_ui(self):

        self.__layout_main = QtWidgets.QHBoxLayout()
        self.__layout_fun_button = QtWidgets.QVBoxLayout()
        self.__layout_data_show = QtWidgets.QVBoxLayout()

        self.button_open_camera = QtWidgets.QPushButton(u'Camera OFF')

        self.button_mode_1 = QtWidgets.QPushButton(u'Attitude Estimation OFF')
        self.button_mode_2 = QtWidgets.QPushButton(u'Multiplayer tracking OFF')
        self.button_mode_3 = QtWidgets.QPushButton(u'Behavior recognition OFF')
        self.button_video = QtWidgets.QPushButton("Load Video")

        self.button_close = QtWidgets.QPushButton(u'Exit')

        self.button_open_camera.setMinimumHeight(50)
        self.button_mode_1.setMinimumHeight(50)
        self.button_mode_2.setMinimumHeight(50)
        self.button_mode_3.setMinimumHeight(50)
        self.button_video.setMinimumHeight(50)

        self.button_close.setMinimumHeight(50)

        self.button_close.move(10, 100)

        self.infoBox = QtWidgets.QTextBrowser(self)
        self.infoBox.setGeometry(QtCore.QRect(10, 300, 200, 180))

        # 信息显示
        self.label_show_camera = QtWidgets.QLabel()
        self.label_move = QtWidgets.QLabel()
        self.label_move.setFixedSize(200, 200)

        self.label_show_camera.setFixedSize(settings.winWidth + 1,
                                            settings.winHeight + 1)
        self.label_show_camera.setAutoFillBackground(True)

        self.__layout_fun_button.addWidget(self.button_open_camera)
        self.__layout_fun_button.addWidget(self.button_mode_1)
        self.__layout_fun_button.addWidget(self.button_mode_2)
        self.__layout_fun_button.addWidget(self.button_mode_3)
        self.__layout_fun_button.addWidget(self.button_video)
        self.__layout_fun_button.addWidget(self.button_close)
        self.__layout_fun_button.addWidget(self.label_move)

        self.__layout_main.addLayout(self.__layout_fun_button)
        self.__layout_main.addWidget(self.label_show_camera)

        self.setLayout(self.__layout_main)
        self.label_move.raise_()
        self.setWindowTitle(
            u'Real-time multi-person attitude estimation and behavior recognition system'
        )

    def slot_init(self):
        self.button_open_camera.clicked.connect(self.switch_to_camera)
        self.timer_camera.timeout.connect(self.show_camera)

        self.button_mode_1.clicked.connect(self.button_event2)
        self.button_mode_2.clicked.connect(self.button_event2)
        self.button_mode_3.clicked.connect(self.button_event2)
        self.button_video.clicked.connect(self.getfile)
        self.button_close.clicked.connect(self.close)

    def button_event2(self):
        sender = self.sender()
        if sender == self.button_mode_1 and self.timer_camera.isActive():
            if self.__flag_mode != 1:
                self.__flag_mode = 1
                self.button_mode_1.setText(u'Attitude Estimation ON')
                self.button_mode_2.setText(u'Multiplayer Tracking OFF')
                self.button_mode_3.setText(u'Behavior Recognition OFF')
            else:
                self.__flag_mode = 0
                self.button_mode_1.setText(u'Attitude Estimation OFF')
                self.infoBox.setText(u'Camera is on')
        elif sender == self.button_mode_2 and self.timer_camera.isActive():
            if self.__flag_mode != 2:
                self.__flag_mode = 2
                self.button_mode_1.setText(u'Attitude Estimation OFF')
                self.button_mode_2.setText(u'Multiplayer tracking ON')
                self.button_mode_3.setText(u'Behavior recognition OFF')
            else:
                self.__flag_mode = 0
                self.button_mode_2.setText(u'Multiplayer tracking OFF')
                self.infoBox.setText(u'camera已打开')
        elif sender == self.button_mode_3 and self.timer_camera.isActive():
            if self.__flag_mode != 3:
                self.__flag_mode = 3
                self.button_mode_1.setText(u'Attitude Estimation OFF')
                self.button_mode_2.setText(u'Multiplayer tracking OFF')
                self.button_mode_3.setText(u'Behavior recognition ON')
            else:
                self.__flag_mode = 0
                self.button_mode_3.setText(u'Behavior recognition OFF')
                self.infoBox.setText(u'camera已打开')
        else:
            self.__flag_mode = 0
            self.button_mode_1.setText(u'Attitude Estimation OFF')
            self.button_mode_2.setText(u'Multiplayer tracking OFF')
            self.button_mode_3.setText(u'Behavior recognition OFF')

    def show_camera(self):
        start = time.time()
        ret, frame = self.cap.read()
        # if not ret:
        #     print('this should save the video')
        #     self.timer_camera.stop()
        #     self.out.release()

        if ret:
            show_s = cv2.resize(frame, (settings.winWidth, settings.winHeight))
            show = cv2.cvtColor(show_s, cv2.COLOR_BGR2RGB)
            if self.__flag_mode == 1:
                self.infoBox.setText(u'当前为人体Attitude Estimation模式')
                humans = poseEstimator.inference(show)
                show = TfPoseEstimator.draw_humans(show, humans, imgcopy=False)

            elif self.__flag_mode == 2:
                self.infoBox.setText(u'当前为Multiplayer tracking模式')
                humans = poseEstimator.inference(show)
                show, joints, bboxes, xcenter, sk = TfPoseEstimator.get_skeleton(
                    show, humans, imgcopy=False)
                show = TfPoseEstimator.draw_humans(show, humans, imgcopy=False)
                height = show.shape[0]
                width = show.shape[1]
                if bboxes:
                    result = np.array(bboxes)
                    det = result[:, 0:5]
                    det[:, 0] = det[:, 0] * width
                    det[:, 1] = det[:, 1] * height
                    det[:, 2] = det[:, 2] * width
                    det[:, 3] = det[:, 3] * height
                    trackers = self.tracker.update(det)

                    for d in trackers:
                        xmin = int(d[0])
                        ymin = int(d[1])
                        xmax = int(d[2])
                        ymax = int(d[3])
                        label = int(d[4])
                        cv2.rectangle(show, (xmin, ymin), (xmax, ymax),
                                      (int(settings.c[label % 32, 0]),
                                       int(settings.c[label % 32, 1]),
                                       int(settings.c[label % 32, 2])), 4)

            # This is the part to get the kt points
            elif self.__flag_mode == 3:
                self.infoBox.setText(u'当前为人体Behavior recognition模式')
                humans = poseEstimator.inference(show)
                ori = np.copy(show)
                show, joints, bboxes, xcenter, sk = TfPoseEstimator.get_skeleton(
                    show, humans, imgcopy=False)
                show = TfPoseEstimator.draw_humans(show, humans, imgcopy=False)
                height = show.shape[0]
                width = show.shape[1]
                if bboxes:
                    result = np.array(bboxes)
                    det = result[:, 0:5]
                    det[:, 0] = det[:, 0] * width
                    det[:, 1] = det[:, 1] * height
                    det[:, 2] = det[:, 2] * width
                    det[:, 3] = det[:, 3] * height
                    trackers = self.tracker.update(det)
                    self.current = [i[-1] for i in trackers]

                    if len(self.previous) > 0:
                        for item in self.previous:
                            if item not in self.current and item in self.data:
                                del self.data[item]
                            if item not in self.current and item in self.memory:
                                del self.memory[item]

                    self.previous = self.current
                    for d in trackers:
                        xmin = int(d[0])
                        ymin = int(d[1])
                        xmax = int(d[2])
                        ymax = int(d[3])
                        label = int(d[4])
                        try:
                            j = np.argmin(
                                np.array([
                                    abs(i - (xmax + xmin) / 2.)
                                    for i in xcenter
                                ]))
                        except:
                            j = 0
                        if joint_filter(joints[j]):
                            joints[j] = joint_completion(
                                joint_completion(joints[j]))
                            if label not in self.data:
                                self.data[label] = [joints[j]]
                                self.memory[label] = 0
                            else:
                                self.data[label].append(joints[j])

                            if len(self.data[label]) == settings.L:
                                pred = actionPredictor().move_status(
                                    self.data[label])
                                if pred == 0:
                                    pred = self.memory[label]
                                else:
                                    self.memory[label] = pred
                                self.data[label].pop(0)

                                location = self.data[label][-1][1]
                                if location[0] <= 30:
                                    location = (51, location[1])
                                if location[1] <= 10:
                                    location = (location[0], 31)

                                cv2.putText(
                                    show, settings.move_status[pred],
                                    (location[0] - 30, location[1] - 10),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0),
                                    2)

                        cv2.rectangle(show, (xmin, ymin), (xmax, ymax),
                                      (int(settings.c[label % 32, 0]),
                                       int(settings.c[label % 32, 1]),
                                       int(settings.c[label % 32, 2])), 4)

            self.out.write(show_s)  # Write out frame to video

            end = time.time()
            self.fps = 1. / (end - start)
            cv2.putText(show, 'FPS: %.2f' % self.fps, (30, 30),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
            showImage = QtGui.QImage(show.data, show.shape[1], show.shape[0],
                                     QtGui.QImage.Format_RGB888)
            self.label_show_camera.setPixmap(
                QtGui.QPixmap.fromImage(showImage))
        else:
            print('this should save the video')
            self.timer_camera.stop()
            self.out.release()

    def closeEvent(self, event):
        # self.out.release()
        ok = QtWidgets.QPushButton()
        cancel = QtWidgets.QPushButton()

        msg = QtWidgets.QMessageBox(QtWidgets.QMessageBox.Warning,
                                    u"shut down",
                                    u"Are you sure you want to quit?")

        msg.addButton(ok, QtWidgets.QMessageBox.ActionRole)
        msg.addButton(cancel, QtWidgets.QMessageBox.RejectRole)
        ok.setText(u'Yes')
        cancel.setText(u'Cancel')
        if msg.exec_() == QtWidgets.QMessageBox.RejectRole:
            event.ignore()
        else:
            if self.cap.isOpened():
                self.cap.release()
            if self.timer_camera.isActive():
                self.timer_camera.stop()
            event.accept()
            print("System exited.")

    def getfile(self):
        dlg = QFileDialog()
        dlg.setFileMode(QFileDialog.AnyFile)
        fname = dlg.getOpenFileName(self, 'Open file', 'c:\\')
        # self.le.setPixmap(QPixmap(fname))
        if fname:
            self.load_video(fname[0])

# def getfiles(self):
#    dlg = QFileDialog()
#    dlg.setFileMode(QFileDialog.AnyFile)
#    dlg.setFilter("Text files (*.txt)")
#    filenames = QStringList()

#    if dlg.exec_():
#       filenames = dlg.selectedFiles()
#       f = open(filenames[0], 'r')

#       with f:
#          data = f.read()
#          self.contents.setText(data)

    def load_video(self, name):
        print(name)
        self.cap = cv2.VideoCapture(name)
        # self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, settings.winWidth)
        # self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, settings.winHeight)
        self.timer_camera.stop()
        self.timer_camera.start(1)

    def switch_to_camera(self):
        indicators['camera'] = not indicators['camera']
        [self.stop_camera, self.start_camera][int(indicators['camera'])]()
        self.button_open_camera.setText(['Camera OFF', 'Camera ON'
                                         ][int(indicators['camera'])])

    def stop_camera(self):
        self.out.release()
        self.cap.release()
        self.timer_camera.stop()

    def start_camera(self):
        self.cap = cv2.VideoCapture(0)
        self.timer_camera.stop()
        self.timer_camera.start(1)

Exemple #4

0

Afficher le fichier

Fichier : run.py Projet : shivam2003sy/Virtual-Trial-Room

class Ui_MainWindow(QtWidgets.QWidget):

    def __init__(self, parent=None):
        super(Ui_MainWindow, self).__init__(parent)
        self.tracker = Sort(settings.sort_max_age, settings.sort_min_hit)
        self.timer_camera = QtCore.QTimer()
        self.cap = cv2.VideoCapture()
        self.CAM_NUM = 0
        self.set_ui()
        self.slot_init()
        self.__flag_mode = 0
        self.fps = 0.00
        self.data = {}
        self.memory = {}
        self.joints = []
        self.current = []
        self.previous = []

    def set_ui(self):

        self.__layout_main = QtWidgets.QHBoxLayout()
        self.__layout_fun_button = QtWidgets.QVBoxLayout()
        self.__layout_data_show = QtWidgets.QVBoxLayout()

        self.button_open_camera = QtWidgets.QPushButton(u'camera OFF')

        self.button_mode_1 = QtWidgets.QPushButton(u'Attitude estimation OFF')
        self.button_mode_2 = QtWidgets.QPushButton(u'Multiplayer tracking OFF')
        self.button_mode_3 = QtWidgets.QPushButton(u'Behavior recognition OFF')

        self.button_close = QtWidgets.QPushButton(u'Exit')

        self.button_open_camera.setMinimumHeight(50)
        self.button_mode_1.setMinimumHeight(50)
        self.button_mode_2.setMinimumHeight(50)
        self.button_mode_3.setMinimumHeight(50)

        self.button_close.setMinimumHeight(50)

        self.button_close.move(10, 100)

        self.infoBox = QtWidgets.QTextBrowser(self)
        self.infoBox.setGeometry(QtCore.QRect(10, 300, 200, 180))

        # 信息显示
        self.label_show_camera = QtWidgets.QLabel()
        self.label_move = QtWidgets.QLabel()
        self.label_move.setFixedSize(200, 200)

        self.label_show_camera.setFixedSize(settings.winWidth + 1, settings.winHeight + 1)
        self.label_show_camera.setAutoFillBackground(True)

        self.__layout_fun_button.addWidget(self.button_open_camera)
        self.__layout_fun_button.addWidget(self.button_mode_1)
        self.__layout_fun_button.addWidget(self.button_mode_2)
        self.__layout_fun_button.addWidget(self.button_mode_3)
        self.__layout_fun_button.addWidget(self.button_close)
        self.__layout_fun_button.addWidget(self.label_move)

        self.__layout_main.addLayout(self.__layout_fun_button)
        self.__layout_main.addWidget(self.label_show_camera)

        self.setLayout(self.__layout_main)
        self.label_move.raise_()
        self.setWindowTitle(u'Real-time multi-person attitude estimation and behavior recognition system')

    def slot_init(self):
        self.button_open_camera.clicked.connect(self.button_event)
        self.timer_camera.timeout.connect(self.show_camera)

        self.button_mode_1.clicked.connect(self.button_event)
        self.button_mode_2.clicked.connect(self.button_event)
        self.button_mode_3.clicked.connect(self.button_event)
        self.button_close.clicked.connect(self.close)

    def button_event(self):
        sender = self.sender()
        if sender == self.button_mode_1 and self.timer_camera.isActive():
            if self.__flag_mode != 1:
                self.__flag_mode = 1
                self.button_mode_1.setText(u'Attitude estimation ON')
                self.button_mode_2.setText(u'Multiplayer tracking OFF')
                self.button_mode_3.setText(u'Behavior recognition OFF')
            else:
                self.__flag_mode = 0
                self.button_mode_1.setText(u'Attitude estimation OFF')
                self.infoBox.setText(u'Camera is on')
        elif sender == self.button_mode_2 and self.timer_camera.isActive():
            if self.__flag_mode != 2:
                self.__flag_mode = 2
                self.button_mode_1.setText(u'Attitude estimation OFF')
                self.button_mode_2.setText(u'Multiplayer tracking ON')
                self.button_mode_3.setText(u'Behavior recognition OFF')
            else:
                self.__flag_mode = 0
                self.button_mode_2.setText(u'Multiplayer tracking OFF')
                self.infoBox.setText(u'Camera is on')
        elif sender == self.button_mode_3 and self.timer_camera.isActive():
            if self.__flag_mode != 3:
                self.__flag_mode = 3
                self.button_mode_1.setText(u'Attitude estimation OFF')
                self.button_mode_2.setText(u'Multiplayer tracking OFF')
                self.button_mode_3.setText(u'Behavior recognitionON')
            else:
                self.__flag_mode = 0
                self.button_mode_3.setText(u'Behavior recognition OFF')
                self.infoBox.setText(u'Camera is on')
        else:
            self.__flag_mode = 0
            self.button_mode_1.setText(u'Attitude estimation OFF')
            self.button_mode_2.setText(u'Multiplayer tracking OFF')
            self.button_mode_3.setText(u'Behavior recognition OFF')
            if self.timer_camera.isActive() == False:
                flag = self.cap.open(self.CAM_NUM)
                self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, settings.winWidth)
                self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, settings.winHeight)
                if flag == False:
                    msg = QtWidgets.QMessageBox.warning(self, u"Warning", u"Please check if the camera and computer are connected correctly",
                                                        buttons=QtWidgets.QMessageBox.Ok,
                                                        defaultButton=QtWidgets.QMessageBox.Ok)
                else:
                    self.timer_camera.start(1)
                    self.button_open_camera.setText(u'camera ON')
                    self.infoBox.setText(u'Camera is on')
            else:
                self.timer_camera.stop()
                self.cap.release()
                self.label_show_camera.clear()
                self.button_open_camera.setText(u'camera OFF')
                self.infoBox.setText(u'Camera is off')

    def show_camera(self):
        start = time.time()
        ret, frame = self.cap.read()
        show = cv2.resize(frame, (settings.winWidth, settings.winHeight))
        show = cv2.cvtColor(show, cv2.COLOR_BGR2RGB)
        if ret:
            if self.__flag_mode == 1:
                self.infoBox.setText(u'Current pose estimation model')
                humans = poseEstimator.inference(show)
                
                
                show = TfPoseEstimator.draw_humans(show, humans, imgcopy=False)

            elif self.__flag_mode == 2:
                self.infoBox.setText(u'Currently multiplayer tracking mode')
                humans = poseEstimator.inference(show)
                show, joints, bboxes, xcenter, sk = TfPoseEstimator.get_skeleton(show, humans, imgcopy=False)
                height = show.shape[0]
                width = show.shape[1]
                if bboxes:
                    result = np.array(bboxes)
                    det = result[:, 0:5]
                    det[:, 0] = det[:, 0] * width
                    det[:, 1] = det[:, 1] * height
                    det[:, 2] = det[:, 2] * width
                    det[:, 3] = det[:, 3] * height
                    trackers = self.tracker.update(det)

                    for d in trackers:
                        xmin = int(d[0])
                        ymin = int(d[1])
                        xmax = int(d[2])
                        ymax = int(d[3])
                        label = int(d[4])
                        cv2.rectangle(show, (xmin, ymin), (xmax, ymax),
                                      (int(settings.c[label % 32, 0]),
                                       int(settings.c[label % 32, 1]),
                                       int(settings.c[label % 32, 2])), 4)

            elif self.__flag_mode == 3:
                self.infoBox.setText(u'Current human behavior recognition model')
                humans = poseEstimator.inference(show)
                ori = np.copy(show)
                show, joints, bboxes, xcenter, sk= TfPoseEstimator.get_skeleton(show, humans, imgcopy=False)
                height = show.shape[0]
                width = show.shape[1]
                if bboxes:
                    result = np.array(bboxes)
                    det = result[:, 0:5]
                    det[:, 0] = det[:, 0] * width
                    det[:, 1] = det[:, 1] * height
                    det[:, 2] = det[:, 2] * width
                    det[:, 3] = det[:, 3] * height
                    trackers = self.tracker.update(det)
                    self.current = [i[-1] for i in trackers]

                    if len(self.previous) > 0:
                        for item in self.previous:
                            if item not in self.current and item in self.data:
                                del self.data[item]
                            if item not in self.current and item in self.memory:
                                del self.memory[item]

                    self.previous = self.current
                    for d in trackers:
                        xmin = int(d[0])
                        ymin = int(d[1])
                        xmax = int(d[2])
                        ymax = int(d[3])
                        label = int(d[4])
                        try:
                            j = np.argmin(np.array([abs(i - (xmax + xmin) / 2.) for i in xcenter]))
                        except:
                            j = 0
                        if joint_filter(joints[j]):
                            joints[j] = joint_completion(joint_completion(joints[j]))
                            if label not in self.data:
                                self.data[label] = [joints[j]]
                                self.memory[label] = 0
                            else:
                                self.data[label].append(joints[j])

                            if len(self.data[label]) == settings.L:
                                pred = actionPredictor().move_status(self.data[label])
                                if pred == 0:
                                    pred = self.memory[label]
                                else:
                                    self.memory[label] = pred
                                self.data[label].pop(0)

                                location = self.data[label][-1][1]
                                if location[0] <= 30:
                                    location = (51, location[1])
                                if location[1] <= 10:
                                    location = (location[0], 31)

                                cv2.putText(show, settings.move_status[pred], (location[0] - 30, location[1] - 10),
                                            cv2.FONT_HERSHEY_SIMPLEX, 0.8,
                                            (0, 255, 0), 2)

                        cv2.rectangle(show, (xmin, ymin), (xmax, ymax),
                                      (int(settings.c[label % 32, 0]),
                                       int(settings.c[label % 32, 1]),
                                       int(settings.c[label % 32, 2])), 4)

            end = time.time()
            self.fps = 1. / (end - start)
            cv2.putText(show, 'FPS: %.2f' % self.fps, (30, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
            showImage = QtGui.QImage(show.data, show.shape[1], show.shape[0], QtGui.QImage.Format_RGB888)
            self.label_show_camera.setPixmap(QtGui.QPixmap.fromImage(showImage))

    def closeEvent(self, event):
        ok = QtWidgets.QPushButton()
        cancel = QtWidgets.QPushButton()

        msg = QtWidgets.QMessageBox(QtWidgets.QMessageBox.Warning, u"Close", u"yes")

        msg.addButton(ok, QtWidgets.QMessageBox.ActionRole)
        msg.addButton(cancel, QtWidgets.QMessageBox.RejectRole)
        ok.setText(u'determine')
        cancel.setText(u'No')
        if msg.exec_() == QtWidgets.QMessageBox.RejectRole:
            event.ignore()
        else:
            if self.cap.isOpened():
                self.cap.release()
            if self.timer_camera.isActive():
                self.timer_camera.stop()
            event.accept()
            print("System exited.")

Exemple #5

0

Afficher le fichier

Fichier : main_task_kalman.py Projet : amirunpri2018/mcv-m6-2019-team1

def main():

    # Ground truth file path:

    if INPUT == 'gt_txt':

        gt_file = os.path.join(ROOT_DIR, 'data', 'AICity_data', 'train', 'S03',
                               'c010', 'gt', 'gt.txt')

        # Get BBox detection from list
        df = ut.get_bboxes_from_MOTChallenge(gt_file)

    elif INPUT == 'yolo3':

        gt_file = os.path.join(ROOT_DIR, 'data', 'AICity_data', 'train', 'S03',
                               'c010', 'det', 'det_yolo3.txt')

        # Get BBox detection from list
        df = ut.get_bboxes_from_MOTChallenge(gt_file)

    elif INPUT == 'gt_xml':

        gt_file = os.path.join(ROOT_DIR, 'data', 'AICity_data', 'train', 'S03',
                               'c010', 'gt', 'm6-full_annotation_crop.xml')

        # Get BBox detection from list
        df = ut_xml.get_bboxes_from_aicity_file(gt_file)

        # Adapt GT to final metric calculation (add bbox and track_id columns):
        df.loc[:, 'track_id'] = df['id'].values.tolist()
        df.loc[:, 'img_id'] = df['frame'].values.tolist()

        boxes = []
        for index, row in df.iterrows():
            #boxes.append([row['ytl'], row['xtl'], row['ybr'], row['xbr']])
            boxes.append([row['xtl'], row['ytl'], row['xbr'], row['ybr']])

        df['boxes'] = boxes

    elif INPUT == 'cnn_out':

        #gt_file = os.path.join(ROOT_DIR,
        #                      'data', 'AICity_data', 'train', 'S03',
        #                      'c010', 'det', 'results_cnn.csv')

        gt_file = os.path.join(ROOT_DIR, 'data', 'AICity_data', 'train', 'S03',
                               'c010', 'det', 'results_for_map.csv')

        # Get BBox detection from list
        df = pd.read_csv(gt_file)

        # Adapt GT to final metric calculation (add bbox and track_id columns):

        boxes = []
        for index, row in df.iterrows():
            #boxes.append([row['ytl'], row['xtl'], row['ybr'], row['xbr']])
            #boxes.append([row['xtl'], row['ytl'], row['xbr'], row['ybr']])  # Total output from Cnn GOOD ONE!
            boxes.append([row['xbr'], row['ybr'], row['xtl'],
                          row['ytl']])  # Total output from Cnn TEST
        df['boxes'] = boxes

    # Display data:
    colours = np.random.rand(32, 3)  # Used only for display

    # Sort and group bbox by frame:
    df.sort_values(by=['frame'])
    df_grouped = df.groupby('frame')

    total_time = 0.0
    total_frames = 0
    out = []

    if display:
        plt.ion()  # for iterative display
        fig, ax = plt.subplots(1, 2, figsize=(20, 20))

    # Create instance of the SORT tracker
    mot_tracker = Sort()

    for f, df_group in df_grouped:

        frame = int(df_group['frame'].values[0])
        print(frame)
        if frame > 220:
            break

        if INPUT == 'gt_txt' or INPUT == 'yolo3':
            df_gt = df_group[['ymin', 'xmin', 'ymax', 'xmax']].values.tolist()

        elif INPUT == 'gt_xml':
            df_gt = df_group[['ytl', 'xtl', 'ybr', 'xbr']].values.tolist()

        elif INPUT == 'cnn_out':
            #ToDo: When we correct the order of CNN output CHANGE this order to the same as gt_xml!!
            #df_gt = df_group[['ybr', 'xtl', 'ytl', 'xbr']].values.tolist()
            df_gt = df_group[['ybr', 'xbr', 'ytl', 'xtl']].values.tolist()

        # Reshape GT format for Kalman tracking algorithm:
        # [x1,y1,x2,y2] format for the tracker input:

        df_gt = np.asarray(df_gt)
        dets = np.stack([df_gt[:, 1], df_gt[:, 0], df_gt[:, 3], df_gt[:, 2]],
                        axis=1)
        dets = np.reshape(dets, (len(dets), -1))
        dets = np.asarray(dets, dtype=np.float64, order='C')

        if (display):
            fn = frames_dir + '/frame_%03d.jpg' % (frame)  # read the frame
            im = io.imread(fn)
            ax[0].imshow(im)
            ax[0].axis('off')
            ax[0].set_title('Original R-CNN detections (untracked)')
            for j in range(np.shape(dets)[0]):
                color = 'r'
                coords = (dets[j, 0].astype(np.float),
                          dets[j, 1].astype(np.float)), dets[j, 2].astype(
                              np.float) - dets[j, 0].astype(
                                  np.float), dets[j, 3].astype(
                                      np.float) - dets[j, 1].astype(np.float)
                ax[0].add_patch(
                    plt.Rectangle(*coords, fill=False, edgecolor=color, lw=3))

        total_frames += 1

        if (display):
            ax[1].imshow(im)
            ax[1].axis('off')
            ax[1].set_title('Tracked Targets')

        start_time = time.time()
        trackers = mot_tracker.update(dets)
        cycle_time = time.time() - start_time
        total_time += cycle_time

        out.append([frame, trackers])

        for d in trackers:
            if (display):
                d = d.astype(np.uint32)
                ax[1].add_patch(
                    patches.Rectangle((d[0], d[1]),
                                      d[2] - d[0],
                                      d[3] - d[1],
                                      fill=False,
                                      lw=3,
                                      ec=colours[d[4] % 32, :]))
                ax[1].set_adjustable('box-forced')

        if (save):
            plt.savefig(
                os.path.join(results_dir,
                             'video_kalman_' + str(frame) + '.png'))

        if (display):
            dp.clear_output(wait=True)
            dp.display(plt.gcf())
            time.sleep(0.000001)
            ax[0].cla()
            ax[1].cla()

    plt.show()

    print("Total Tracking took: %.3f for %d frames or %.1f FPS" %
          (total_time, total_frames, total_frames / total_time))

    ############################################################################################################

    # Result of Kalman tracking (pandas format):

    df_kalman = ut.kalman_out_to_pandas_for_map(out)

    if INPUT == 'gt_txt' or INPUT == 'yolo3':
        df_gt = ut.get_bboxes_from_MOTChallenge_for_map(gt_file)

    elif INPUT == 'gt_xml':
        df_gt = df

    elif INPUT == 'cnn_out':
        df_gt = None

    # If ground truth was used, save ground truth adapted to map_metric.py:

    if df_gt is not None:

        ut.save_pkl(df_gt, os.path.join(results_dir, INPUT + '_gt_panda.pkl'))
        df_gt_corr = ut.panda_to_json_gt(df_gt)
        ut.save_json(
            df_gt_corr,
            os.path.join(results_dir, INPUT + '_gt_ground_truth_boxes.json'))

    # Save kalman filter output:

    ut.save_pkl(df_kalman,
                os.path.join(results_dir, INPUT + '_kalman_predictions.pkl'))
    df_pred_corr = ut.panda_to_json_predicted(df_kalman)
    ut.save_json(df_pred_corr,
                 os.path.join(results_dir, INPUT + '_predicted_boxes.json'))

Exemple #6

0

Afficher le fichier

def main():

    for CAM in cameras:
        # Set useful directories
        frames_dir = os.path.join(ROOT_DIR, 'train', SEQ, CAM, 'frames')

        results_dir = os.path.join(OUTPUT_DIR, WEEK, TASK)

        # Create folders if they don't exist
        if not os.path.isdir(results_dir):
            os.mkdir(results_dir)

        tstamp_path = os.path.join(ROOT_DIR, 'cam_timestamp', SEQ + '.txt')
        time_offset, fps = ut.obtain_timeoff_fps(tstamp_path, CAM)

        # Ground truth file path:

        gt_det = os.path.join(ROOT_DIR, 'train', SEQ, CAM, 'det',
                              INPUT + '.txt')

        # Save gt file as pkl:

        if CREATE_GT_PKL == True:
            gt_txt = os.path.join(ROOT_DIR, 'train', SEQ, CAM, 'gt', 'gt.txt')
            save_gt_pkl = os.path.join(ROOT_DIR, 'train', SEQ, CAM, 'gt',
                                       'gt.pkl')
            code_ut.getBBox_from_gt(gt_txt, save_in=save_gt_pkl)

        # Get BBox detection from list
        df = ut.get_bboxes_from_MOTChallenge(gt_det)
        df.loc[:, 'time_stamp'] = df['frame'].values.tolist()

        # Display data:
        colours = np.random.rand(32, 3)  # Used only for display

        # Sort and group bbox by frame:
        df.sort_values(by=['frame'])
        df_grouped = df.groupby('frame')

        total_time = 0.0
        total_frames = 0
        out = []

        if display:
            plt.ion()  # for iterative display
            fig, ax = plt.subplots(1, 2, figsize=(20, 20))

        # Create instance of the SORT tracker
        mot_tracker = Sort()

        for f, df_group in df_grouped:

            frame = int(df_group['frame'].values[0])

            time_stamp = ut.timestamp_calc(frame, time_offset, fps)

            df_gt = df_group[['ymin', 'xmin', 'ymax', 'xmax']].values.tolist()

            # Reshape GT format for Kalman tracking algorithm:
            # [x1,y1,x2,y2] format for the tracker input:

            df_gt = np.asarray(df_gt)
            dets = np.stack(
                [df_gt[:, 1], df_gt[:, 0], df_gt[:, 3], df_gt[:, 2]], axis=1)
            dets = np.reshape(dets, (len(dets), -1))
            dets = np.asarray(dets, dtype=np.float64, order='C')

            if (display):
                fn = frames_dir + '/frame_%04d.jpg' % (frame)  # read the frame
                im = io.imread(fn)
                ax[0].imshow(im)
                ax[0].axis('off')
                ax[0].set_title('Original R-CNN detections (untracked)')
                for j in range(np.shape(dets)[0]):
                    color = 'r'
                    coords = (dets[j, 0].astype(np.float), dets[j, 1].astype(
                        np.float)), dets[j, 2].astype(
                            np.float) - dets[j, 0].astype(
                                np.float), dets[j, 3].astype(
                                    np.float) - dets[j, 1].astype(np.float)
                    ax[0].add_patch(
                        plt.Rectangle(*coords,
                                      fill=False,
                                      edgecolor=color,
                                      lw=3))

            total_frames += 1

            if (display):
                ax[1].imshow(im)
                ax[1].axis('off')
                ax[1].set_title('Tracked Targets')

            start_time = time.time()
            trackers = mot_tracker.update(dets)
            cycle_time = time.time() - start_time
            total_time += cycle_time

            out.append([frame, trackers, time_stamp])

            for d in trackers:
                if (display):
                    d = d.astype(np.uint32)
                    ax[1].add_patch(
                        patches.Rectangle((d[0], d[1]),
                                          d[2] - d[0],
                                          d[3] - d[1],
                                          fill=False,
                                          lw=3,
                                          ec=colours[d[4] % 32, :]))
                    ax[1].set_adjustable('box-forced')

            if (save):
                plt.savefig(
                    os.path.join(results_dir,
                                 'video_kalman_' + str(frame) + '.png'))

            if (display):
                dp.clear_output(wait=True)
                dp.display(plt.gcf())
                time.sleep(0.000001)
                ax[0].cla()
                ax[1].cla()

        plt.show()

        print("Total Tracking took: %.3f for %d frames or %.1f FPS" %
              (total_time, total_frames, total_frames / total_time))

        ############################################################################################################

        # Result of Kalman tracking (pandas format):

        df_kalman = ut.kalman_out_to_pandas_for_map(out)

        # Save kalman filter output:

        ut.save_pkl(
            df_kalman,
            os.path.join(results_dir,
                         INPUT + SEQ + CAM + '_kalman_predictions.pkl'))

Exemple #7

0

Afficher le fichier

class Ui_MainWindow(QtWidgets.QWidget):
    def __init__(self, parent=None):
        super(Ui_MainWindow, self).__init__(parent)
        self.tracker = Sort(settings.sort_max_age, settings.sort_min_hit)
        self.timer_camera = QtCore.QTimer()
        self.cap = cv2.VideoCapture()
        self.CAM_NUM = 0
        self.set_ui()
        self.slot_init()
        self.__flag_mode = 0
        self.fps = 0.00
        self.data = {}
        self.memory = {}
        self.joints = []
        self.current = []
        self.previous = []

        self.frames_count = 0  #用于计数每一次预测前摄入的帧数
        self.pred_num_frames = 16  #设置多少帧预测一次
        self.pred_data = {}  #用于存储每一次预测用的数据
        self.one_frame = []

    def set_ui(self):

        self.__layout_main = QtWidgets.QHBoxLayout()
        self.__layout_fun_button = QtWidgets.QVBoxLayout()
        self.__layout_data_show = QtWidgets.QVBoxLayout()

        self.button_open_camera = QtWidgets.QPushButton(u'相机 OFF')

        self.button_mode_1 = QtWidgets.QPushButton(u'姿态估计 OFF')
        self.button_mode_2 = QtWidgets.QPushButton(u'多人跟踪 OFF')
        self.button_mode_3 = QtWidgets.QPushButton(u'行为识别 OFF')

        self.button_close = QtWidgets.QPushButton(u'退出')

        self.button_open_camera.setMinimumHeight(50)
        self.button_mode_1.setMinimumHeight(50)
        self.button_mode_2.setMinimumHeight(50)
        self.button_mode_3.setMinimumHeight(50)

        self.button_close.setMinimumHeight(50)

        self.button_close.move(10, 100)

        self.infoBox = QtWidgets.QTextBrowser(self)
        self.infoBox.setGeometry(QtCore.QRect(10, 300, 200, 180))

        # 信息显示
        self.label_show_camera = QtWidgets.QLabel()
        self.label_move = QtWidgets.QLabel()
        self.label_move.setFixedSize(200, 200)

        self.label_show_camera.setFixedSize(settings.winWidth + 1,
                                            settings.winHeight + 1)
        self.label_show_camera.setAutoFillBackground(True)

        self.__layout_fun_button.addWidget(self.button_open_camera)
        self.__layout_fun_button.addWidget(self.button_mode_1)
        self.__layout_fun_button.addWidget(self.button_mode_2)
        self.__layout_fun_button.addWidget(self.button_mode_3)
        self.__layout_fun_button.addWidget(self.button_close)
        self.__layout_fun_button.addWidget(self.label_move)

        self.__layout_main.addLayout(self.__layout_fun_button)
        self.__layout_main.addWidget(self.label_show_camera)

        self.setLayout(self.__layout_main)
        self.label_move.raise_()
        self.setWindowTitle(u'实时多人姿态估计与行为识别系统')

    def slot_init(self):
        self.button_open_camera.clicked.connect(self.button_event)
        self.timer_camera.timeout.connect(self.show_camera)

        self.button_mode_1.clicked.connect(self.button_event)
        self.button_mode_2.clicked.connect(self.button_event)
        self.button_mode_3.clicked.connect(self.button_event)
        self.button_close.clicked.connect(self.close)

    def button_event(self):
        sender = self.sender()
        if sender == self.button_mode_1 and self.timer_camera.isActive():
            if self.__flag_mode != 1:
                self.__flag_mode = 1
                self.button_mode_1.setText(u'姿态估计 ON')
                self.button_mode_2.setText(u'多人跟踪 OFF')
                self.button_mode_3.setText(u'行为识别 OFF')
            else:
                self.__flag_mode = 0
                self.button_mode_1.setText(u'姿态估计 OFF')
                self.infoBox.setText(u'相机已打开')
        elif sender == self.button_mode_2 and self.timer_camera.isActive():
            if self.__flag_mode != 2:
                self.__flag_mode = 2
                self.button_mode_1.setText(u'姿态估计 OFF')
                self.button_mode_2.setText(u'多人跟踪 ON')
                self.button_mode_3.setText(u'行为识别 OFF')
            else:
                self.__flag_mode = 0
                self.button_mode_2.setText(u'多人跟踪 OFF')
                self.infoBox.setText(u'相机已打开')
        elif sender == self.button_mode_3 and self.timer_camera.isActive():
            if self.__flag_mode != 3:
                self.__flag_mode = 3
                self.button_mode_1.setText(u'姿态估计 OFF')
                self.button_mode_2.setText(u'多人跟踪 OFF')
                self.button_mode_3.setText(u'行为识别 ON')
            else:
                self.__flag_mode = 0
                self.button_mode_3.setText(u'行为识别 OFF')
                self.infoBox.setText(u'相机已打开')
        else:
            self.__flag_mode = 0
            self.button_mode_1.setText(u'姿态估计 OFF')
            self.button_mode_2.setText(u'多人跟踪 OFF')
            self.button_mode_3.setText(u'行为识别 OFF')
            if self.timer_camera.isActive() == False:
                flag = self.cap.open(self.CAM_NUM)
                self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, settings.winWidth)
                self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, settings.winHeight)
                if flag == False:
                    msg = QtWidgets.QMessageBox.warning(
                        self,
                        u"Warning",
                        u"请检测相机与电脑是否连接正确",
                        buttons=QtWidgets.QMessageBox.Ok,
                        defaultButton=QtWidgets.QMessageBox.Ok)
                else:
                    self.timer_camera.start(1)
                    self.button_open_camera.setText(u'相机 ON')
                    self.infoBox.setText(u'相机已打开')
            else:
                self.timer_camera.stop()
                self.cap.release()
                self.label_show_camera.clear()
                self.button_open_camera.setText(u'相机 OFF')
                self.infoBox.setText(u'相机已关闭')

    def show_camera(self):
        start = time.time()
        ret, frame = self.cap.read()
        show = cv2.resize(frame, (settings.winWidth, settings.winHeight))
        show = cv2.cvtColor(show, cv2.COLOR_BGR2RGB)
        if ret:
            if self.__flag_mode == 1:
                self.infoBox.setText(u'当前为人体姿态估计模式')
                humans = poseEstimator.inference(show)
                show = TfPoseEstimator.draw_humans(show, humans, imgcopy=False)

            elif self.__flag_mode == 2:
                self.infoBox.setText(u'当前为多人跟踪模式')
                humans = poseEstimator.inference(show)
                show, joints, bboxes, xcenter = TfPoseEstimator.get_skeleton(
                    show, humans, imgcopy=False)
                height = show.shape[0]
                width = show.shape[1]
                if bboxes:
                    result = np.array(bboxes)
                    det = result[:, 0:5]
                    det[:, 0] = det[:, 0] * width
                    det[:, 1] = det[:, 1] * height
                    det[:, 2] = det[:, 2] * width
                    det[:, 3] = det[:, 3] * height
                    trackers = self.tracker.update(det)

                    for d in trackers:
                        xmin = int(d[0])
                        ymin = int(d[1])
                        xmax = int(d[2])
                        ymax = int(d[3])
                        label = int(d[4])
                        cv2.rectangle(show, (xmin, ymin), (xmax, ymax),
                                      (int(settings.c[label % 32, 0]),
                                       int(settings.c[label % 32, 1]),
                                       int(settings.c[label % 32, 2])), 4)

            elif self.__flag_mode == 3:
                self.infoBox.setText(u'当前为人体行为识别模式')
                humans = poseEstimator.inference(show)
                ori = np.copy(show)
                show, joints, bboxes, xcenter, sk = TfPoseEstimator.get_skeleton(
                    show, humans, imgcopy=False)
                height = show.shape[0]
                width = show.shape[1]
                if bboxes:
                    result = np.array(bboxes)
                    det = result[:, 0:5]
                    det[:, 0] = det[:, 0] * width
                    det[:, 1] = det[:, 1] * height
                    det[:, 2] = det[:, 2] * width
                    det[:, 3] = det[:, 3] * height
                    trackers = self.tracker.update(det)  #有几个框就生成几个追踪器
                    self.current = [i[-1] for i in trackers]

                    if len(self.previous) > 0:
                        for item in self.previous:
                            if item not in self.current and item in self.data:
                                del self.data[item]
                            if item not in self.current and item in self.memory:
                                del self.memory[item]

                    self.previous = self.current
                    for d in trackers:
                        xmin = int(d[0])
                        ymin = int(d[1])
                        xmax = int(d[2])
                        ymax = int(d[3])
                        label = int(d[4])

                        try:
                            j = np.argmin(
                                np.array([
                                    abs(i - (xmax + xmin) / 2.)
                                    for i in xcenter
                                ]))
                        except:
                            j = 0

                        if joint_filter(joints[j]):
                            pred = 0
                            joints[j] = joint_completion(
                                joint_completion(joints[j]))
                            if len(joints[j]) == 18:
                                if label not in self.data:
                                    self.data[label] = [
                                        joints[j]
                                    ]  #self.data[label]:[{0:(xj1,yj1),1:(xj2,yj2),...,n:(xjn,yjn)},{},{}]
                                    self.memory[label] = 0
                                else:
                                    self.data[label].append(joints[j])

                            #print('len(self.data[label]):{0}'.format(len(self.data[label])))
                            if len(
                                    self.data[label]
                            ) == settings.L:  #self.data[label]:[16,{18,key:(x,y)}]
                                for i in range(settings.L):
                                    keys = self.data[label][i].keys()
                                    keys = sorted(keys)
                                    for key in keys:
                                        self.one_frame.append(
                                            np.array(self.data[label][i][key]))
                                    if label not in self.pred_data:
                                        self.pred_data[label] = [
                                            np.array(self.one_frame)
                                        ]
                                    else:
                                        self.pred_data[label].append(
                                            np.array(self.one_frame)
                                        )  #[[[x0 y0],..[x17 y17]],..[]]
                                    self.one_frame = []
                                self.pred_data[label] = np.array(
                                    self.pred_data[label])

                                pred = sk_cnn_actionPredicter.predict(
                                    self.pred_data[label])

                                #print('预测动作下标:{0}'.format(pred))
                                self.pred_data[label] = []
                                self.data[label].pop(0)

                            location = self.data[label][-1][1]
                            if location[0] <= 30:
                                location = (51, location[1])
                            if location[1] <= 10:
                                location = (location[0], 31)

                            #print('预测动作类别:{0}'.format(settings.move_status[pred]))
                            cv2.putText(show, settings.move_status[pred],
                                        (location[0] - 30, location[1] - 10),
                                        cv2.FONT_HERSHEY_SIMPLEX, 0.8,
                                        (0, 255, 0), 2)

                        cv2.rectangle(show, (xmin, ymin), (xmax, ymax),
                                      (int(settings.c[label % 32, 0]),
                                       int(settings.c[label % 32, 1]),
                                       int(settings.c[label % 32, 2])), 4)

            end = time.time()
            self.fps = 1. / (end - start)
            cv2.putText(show, 'FPS: %.2f' % self.fps, (30, 30),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
            showImage = QtGui.QImage(show.data, show.shape[1], show.shape[0],
                                     QtGui.QImage.Format_RGB888)
            self.label_show_camera.setPixmap(
                QtGui.QPixmap.fromImage(showImage))

    def closeEvent(self, event):
        ok = QtWidgets.QPushButton()
        cancel = QtWidgets.QPushButton()

        msg = QtWidgets.QMessageBox(QtWidgets.QMessageBox.Warning, u"关闭",
                                    u"是否关闭！")

        msg.addButton(ok, QtWidgets.QMessageBox.ActionRole)
        msg.addButton(cancel, QtWidgets.QMessageBox.RejectRole)
        ok.setText(u'确定')
        cancel.setText(u'取消')
        if msg.exec_() == QtWidgets.QMessageBox.RejectRole:
            event.ignore()
        else:
            if self.cap.isOpened():
                self.cap.release()
            if self.timer_camera.isActive():
                self.timer_camera.stop()
            event.accept()
            print("System exited.")

Exemple #8

0

Afficher le fichier

def main(cap_queue):

    yolo = YOLO(res="1024")
    sort = Sort(iou_threshold=0.05)
    whitelist = [
        "person",
        "bicycle",
        "car",
        "motorbike",
        "aeroplane",
        "bus",
        "truck",
        "boat",
        "fire hydrant",
        "stop sign",
        "parking meter",
        "bird",
        "cat",
        "dog",
        "backpack",
        "umbrella",
        "handbag",
        "suitcase",
    ]
    frames = 0
    while True:
        frame, original_dim = cap_queue.get(block=True)
        frames += 1
        outputs = yolo.get_results(frame)
        detections = []
        labels = []

        for output in outputs:
            label = yolo.classes[int(output[-1])]
            if not label in whitelist:
                del label
                continue

            tl = tuple(output[1:3].int())
            br = tuple(output[3:5].int())

            detections.append(
                np.array(
                    [tl[0].item(), tl[1].item(), br[0].item(), br[1].item()]))
            labels.append(label)

            del tl, br, label
        del outputs

        detections = np.array(detections)
        should_write = False

        if detections.shape[0] > 0:
            try:
                alive, _ = sort.update(detections, labels)
            except IndexError:
                del frame, detections, labels
                continue

            should_write = False
            for trk in alive:
                t = trk.get_state()[0]
                try:
                    bbox = [int(t[0]), int(t[1]), int(t[2]), int(t[3])]
                except ValueError:
                    continue
                cv2.rectangle(
                    frame,
                    (int(bbox[0]), int(bbox[1])),
                    (int(bbox[2]), int(bbox[3])),
                    trk.color,
                    2,
                )
                cv2.putText(
                    frame,
                    "{}:id {}".format(trk.get_label(), str(trk.id)),
                    (int(bbox[0]), int(bbox[1]) - 12),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.0005 * frame.shape[0],
                    trk.color,
                    2,
                )
                for location in trk.locations:
                    x1, y1, x2, y2 = location[1]
                    cv2.circle(frame, (((int(x1) + int(x2)) // 2), int(y2)), 3,
                               trk.color, -1)
                    del x1, y1, x2, y2

                pred_x, pred_y = predict_location(trk, amount_to_predict=10)
                center_x = (int(bbox[0]) + int(bbox[2])) // 2
                center_y = (int(bbox[1]) + int(bbox[3])) // 2
                cv2.line(
                    frame,
                    (int(pred_x), int(pred_y)),
                    (center_x, center_y),
                    trk.color,
                    8,
                )
                # text = "{}".format(
                #     distance_from_xy((center_x, center_y), (int(pred_x), int(pred_y)))
                # )
                # midpoint = ((center_x + pred_x) // 2, (center_y + pred_y) // 2)
                # cv2.putText(frame, text, (int(midpoint[0]), int(midpoint[1])), cv2.FONT_HERSHEY_SIMPLEX, 1, trk.color)
                if len(alive) > 1:
                    for trk2 in alive:
                        if trk.id == trk2.id:
                            continue
                    ttc = time_til_collision(trk, trk2)
                    ttc_thresh = 5
                    if ttc > 0 and ttc < ttc_thresh:

                        print(
                            "Potential accident between ID {} and {}.\nTTC:{}s"
                            .format(trk.id, trk2.id, ttc))
                        cv2.putText(
                            frame,
                            "ID {} and {} have TTC {}".format(
                                trk.id, trk2.id, ttc),
                            (0, 20),
                            cv2.FONT_HERSHEY_SIMPLEX,
                            1,
                            (0, 0, 255),
                        )
                        should_write = True
                    del ttc_thresh, ttc

                del t, bbox, pred_x, pred_y, center_x, center_y
        # write_queue.put(frame[0 : original_dim[0], 0 : original_dim[1]])
        cv2.imshow("Object Tracking", frame[0:original_dim[0],
                                            0:original_dim[1]])

        key = cv2.waitKey(1) & 0xFF
        if key == ord("q"):
            raise KeyboardInterrupt
        del should_write

Exemple #9

0

Afficher le fichier

Fichier : run.py Projet : freakcap/action-recognition-anomaly-detection

class Ui_MainWindow(QtWidgets.QWidget):

    def __init__(self, parent=None):
        super(Ui_MainWindow, self).__init__(parent)
        self.tracker = Sort(settings.sort_max_age, settings.sort_min_hit)
        self.timer_camera = QtCore.QTimer()
        self.cap = cv2.VideoCapture()
        self.CAM_NUM = 0
        self.set_ui()
        self.slot_init()
        self.__flag_mode = 0
        self.fps = 0.00
        self.data = {}
        self.memory = {}
        self.joints = []
        self.current = []
        self.previous = []

    def set_ui(self):

        self.__layout_main = QtWidgets.QHBoxLayout()
        self.__layout_fun_button = QtWidgets.QVBoxLayout()
        self.__layout_data_show = QtWidgets.QVBoxLayout()
        self.__layout_time_input_from = QtWidgets.QHBoxLayout()
        self.__layout_time_input_to = QtWidgets.QHBoxLayout()

        self.button_open_camera = QtWidgets.QPushButton(u'Realtime (Camera) OFF')

        self.button_mode_1 = QtWidgets.QPushButton(u'Attitude estimation OFF')
        self.button_mode_2 = QtWidgets.QPushButton(u'Multiplayer tracking OFF')
        self.button_mode_3 = QtWidgets.QPushButton(u'Behavior recognition OFF')
        self.textbox_from = QLineEdit(self)
        self.textbox_to = QLineEdit(self)
        self.nameLabel_from = QLabel(self)
        self.nameLabel_to = QLabel(self)
       # self.nameLabel_uploadInfo = QLabel(self)
        
        
        self.button_mode_4 = QtWidgets.QPushButton(u'Upload prerecorded file')
        self.button_mode_5 = QtWidgets.QPushButton(u'Face recognition OFF')

        self.button_close = QtWidgets.QPushButton(u'Exit')

        self.button_open_camera.setMinimumHeight(50)
        self.button_mode_1.setMinimumHeight(50)
        self.button_mode_2.setMinimumHeight(50)
        self.button_mode_3.setMinimumHeight(50)
        self.button_mode_4.setMinimumHeight(50)
        self.button_mode_5.setMinimumHeight(50)
        self.textbox_from.setMinimumHeight(35)
        self.textbox_to.setMinimumHeight(35)
        self.nameLabel_from.setText('FROM:  ')
        self.nameLabel_to.setText('To:       ')
      #  self.nameLabel_uploadInfo.setText('For Scanning a prerecorded footage ( insert time period (from and to) to narrow down search):')
       

        self.button_close.setMinimumHeight(50)

        self.button_close.move(10, 100)

        # self.infoBox = QtWidgets.QTextBrowser(self)
        # self.infoBox.setGeometry(QtCore.QRect(10, 300, 200, 180))

     
        self.label_show_camera = QtWidgets.QLabel()
        self.label_move = QtWidgets.QLabel()
        self.label_move.setFixedSize(200,200)

        self.label_show_camera.setFixedSize(settings.winWidth + 1, settings.winHeight + 1)
        self.label_show_camera.setAutoFillBackground(True)
	

        self.__layout_time_input_from.addWidget(self.nameLabel_from)
        self.__layout_time_input_from.addWidget(self.textbox_from)
        self.__layout_time_input_to.addWidget(self.nameLabel_to)
        self.__layout_time_input_to.addWidget(self.textbox_to)

        self.__layout_fun_button.addWidget(self.button_open_camera)
        self.__layout_fun_button.addWidget(self.button_mode_1)
        self.__layout_fun_button.addWidget(self.button_mode_2)
        self.__layout_fun_button.addWidget(self.button_mode_3)
        self.__layout_fun_button.addWidget(self.button_mode_5)
        #self.__layout_fun_button.addWidget(self.nameLabel_uploadInfo)
        self.__layout_fun_button.addLayout(self.__layout_time_input_from)
        self.__layout_fun_button.addLayout(self.__layout_time_input_to)
        
        self.__layout_fun_button.addWidget(self.button_mode_4)
        self.__layout_fun_button.addWidget(self.button_close)
        self.__layout_fun_button.addWidget(self.label_move)

        self.__layout_main.addLayout(self.__layout_fun_button)
        self.__layout_main.addWidget(self.label_show_camera)

        self.setLayout(self.__layout_main)
        self.label_move.raise_()
        self.setWindowTitle(u'Anomaly Detection using Action Recognition')

    def slot_init(self):
        self.button_open_camera.clicked.connect(self.button_event)
        self.timer_camera.timeout.connect(self.show_camera)

        self.button_mode_1.clicked.connect(self.button_event)
        self.button_mode_2.clicked.connect(self.button_event)
        self.button_mode_3.clicked.connect(self.button_event)
        self.button_mode_5.clicked.connect(self.button_event)
        self.button_mode_4.clicked.connect(self.timestamp)  # Upload button
        self.button_close.clicked.connect(self.close)

    def button_event(self):
        sender = self.sender()
        if sender == self.button_mode_1 and self.timer_camera.isActive():
            if self.__flag_mode != 1:
                self.__flag_mode = 1
                self.button_mode_1.setText(u'Attitude estimation ON')
                self.button_mode_2.setText(u'Multiplayer tracking OFF')
                self.button_mode_3.setText(u'Behavior recognition OFF')
            else:
                self.__flag_mode = 0
                self.button_mode_1.setText(u'Attitude estimation OFF')
               # self.infoBox.setText(u'Camera is on')
        elif sender == self.button_mode_2 and self.timer_camera.isActive():
            if self.__flag_mode != 2:
                self.__flag_mode = 2
                self.button_mode_1.setText(u'Attitude estimation OFF')
                self.button_mode_2.setText(u'Multiplayer tracking ON')
                self.button_mode_3.setText(u'Behavior recognition OFF')
            else:
                self.__flag_mode = 0
                self.button_mode_2.setText(u'Multiplayer tracking OFF')
               # self.infoBox.setText(u'Camera is on')
        elif sender == self.button_mode_5 and self.timer_camera.isActive():
            if self.__flag_mode != 5:
                self.__flag_mode = 5
                self.button_mode_1.setText(u'Attitude estimation OFF')
                self.button_mode_2.setText(u'Multiplayer tracking OFF')
                self.button_mode_3.setText(u'Behavior recognition OFF')
                self.button_mode_5.setText(u'Face recognition ON')
            else:
                self.__flag_mode = 0
                self.button_mode_5.setText(u'Face recognition OFF')
               # self.infoBox.setText(u'Camera is on')
        elif sender == self.button_mode_3 and self.timer_camera.isActive():
            if self.__flag_mode != 3:
                self.__flag_mode = 3
                self.button_mode_1.setText(u'Attitude estimation OFF')
                self.button_mode_2.setText(u'Multiplayer tracking OFF')
                self.button_mode_3.setText(u'Behavior recognition ON')
            else:
                self.__flag_mode = 0
                self.button_mode_3.setText(u'Behavior recognition OFF')
               # self.infoBox.setText(u'Camera is on')
       
        else:
            self.__flag_mode = 0
            self.button_mode_1.setText(u'Attitude estimation OFF')
            self.button_mode_2.setText(u'Multiplayer tracking OFF')
            self.button_mode_3.setText(u'Behavior recognition OFF')
            self.button_mode_5.setText(u'Face recognition OFF')
            if self.timer_camera.isActive() == False:
                flag = self.cap.open(self.CAM_NUM)
                self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, settings.winWidth)
                self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, settings.winHeight)
                if flag == False:
                    msg = QtWidgets.QMessageBox.warning(self, u"Warning", u"Please check if the camera and computer are connected correctly",
                                                        buttons=QtWidgets.QMessageBox.Ok,
                                                        defaultButton=QtWidgets.QMessageBox.Ok)
                else:
                    self.timer_camera.start(1)
                    self.button_open_camera.setText(u'Realtime (Camera) ON')
                  #  self.infoBox.setText(u'Camera is on')
            else:
                self.timer_camera.stop()
                self.cap.release()
                self.label_show_camera.clear()
                self.button_open_camera.setText(u'Realtime (Camera) OFF')
               # self.infoBox.setText(u'Camera is off')


    def timestamp(self):
	    filePath = QFileDialog.getOpenFileName(self,'Single File',"~/",'*.*')
	    print(filePath[0])


	    prototxt = './weights/deploy.prototxt'


		# The path to the pretrained Caffe model.
	    model = './weights/weights.caffemodel'


		# for counting how many images detected in video
	    counter = 0
	    confidence_thr = 0.5
	    cap = cv2.VideoCapture(filePath[0])
	    net = cv2.dnn.readNetFromCaffe(prototxt, model)
	    ip_time = time.strptime(self.textbox_from.text(), "%H.%M.%S")
	    stop_time = time.strptime(self.textbox_to.text(), "%H.%M.%S")

	    seconds = (ip_time.tm_hour*60*60*1000 + ip_time.tm_min*60*1000 + ip_time.tm_sec*1000)/1000
	    stop_seconds = (stop_time.tm_hour*60*60*1000 + stop_time.tm_min*60*1000 + stop_time.tm_sec*1000)/1000

	    cap.set(0,seconds*1000)
		#print('playing from ' + str(int(seconds)) + ' seconds')
		#print('till ' + str(int(stop_seconds)) + ' seconds')
		#print('press q to abort')
	    timestamp = 0

	    faces = [];  #list containing the times at which faces occur in the stream

	    st_time = time.time() 
	    while(timestamp<=stop_seconds):

	        ret, frame = cap.read()
	        timestamp = seconds + time.time() - st_time
		    
		    
	        frame = cv2.putText(frame, "{0:.0f}".format(timestamp), (30,30),cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0,0,0), 2)

	        if str(type(frame)) == "<class 'NoneType'>":
	            break
		    


		    #kernel = np.array([[0,-1,0], [-1,8,-1], [0,-1,0]])
		    #frame = cv2.filter2D(frame, -1, kernel)

	        (h, w) = frame.shape[:2]
	        blob = cv2.dnn.blobFromImage(cv2.resize(frame, (300, 300)), 1.0,(300, 300), (104.0, 177.0, 123.0))
		 
	        net.setInput(blob)
	        detections = net.forward()


	        for i in range(0, detections.shape[2]):

	            confidence = detections[0, 0, i, 2]
	            if confidence < 0.5:
	                continue
		        
	            faces.append(int(timestamp))
		        
	            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
	            coordinate = (startX, startY, endX, endY) = box.astype("int")
	            detected_face = frame[startY:endY, startX:endX]
	            name_for_face = './Cropped_Faces/face_at_'+str(int(timestamp))+'.jpg'
	            counter += 1

	            cv2.imwrite(name_for_face, detected_face)
	            text = "{:.2f}%".format(confidence * 100)
	            y = startY - 10 if startY - 10 > 10 else startY + 10
	            cv2.rectangle(frame, (startX, startY), (endX, endY),(0, 255, 0), 2)
	            cv2.putText(frame, text, (startX, y),cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 0, 255), 2)

	        cv2.imshow("Feed", frame)
	        key = cv2.waitKey(1) & 0xFF
	        if key == ord("q"):
	        	break   
	    cap.release()
	    cv2.destroyAllWindows()    


	    faces_unique = [];            #list containing the unique times (in seconds) at which faces occured  

	    for i in range(1,len(faces)):
	        if(faces[i] != faces[i-1]):
	            faces_unique.append(faces[i])


    


    def show_camera(self):
        start = time.time()
        ret, frame = self.cap.read()
        show = cv2.resize(frame, (settings.winWidth, settings.winHeight))
        show = cv2.cvtColor(show, cv2.COLOR_BGR2RGB)
        if ret:
            if self.__flag_mode == 1:
              #  self.infoBox.setText(u'Current human body Attitude estimation Mode')
                humans = poseEstimator.inference(show)
                show = TfPoseEstimator.draw_humans(show, humans, imgcopy=False)

            elif self.__flag_mode == 2:
              #  self.infoBox.setText(u'Currently Multiplayer tracking Mode')
                humans = poseEstimator.inference(show)
                show, joints, bboxes, xcenter, sk = TfPoseEstimator.get_skeleton(show, humans, imgcopy=False)
                height = show.shape[0]
                width = show.shape[1]
                if bboxes:
                    result = np.array(bboxes)
                    det = result[:, 0:5]
                    det[:, 0] = det[:, 0] * width
                    det[:, 1] = det[:, 1] * height
                    det[:, 2] = det[:, 2] * width
                    det[:, 3] = det[:, 3] * height
                    trackers = self.tracker.update(det)

                    for d in trackers:
                        xmin = int(d[0])
                        ymin = int(d[1])
                        xmax = int(d[2])
                        ymax = int(d[3])
                        label = int(d[4])
                        cv2.rectangle(show, (xmin, ymin), (xmax, ymax),
                                      (int(settings.c[label % 32, 0]),
                                       int(settings.c[label % 32, 1]),
                                       int(settings.c[label % 32, 2])), 4)
        
            elif self.__flag_mode == 3:
              #  self.infoBox.setText(u'Current human body Behavior recognition Mode')
                humans = poseEstimator.inference(show)
                ori = np.copy(show)
                show, joints, bboxes, xcenter, sk= TfPoseEstimator.get_skeleton(show, humans, imgcopy=False)
                height = show.shape[0]
                width = show.shape[1]
                if bboxes:
                    result = np.array(bboxes)
                    det = result[:, 0:5]
                    det[:, 0] = det[:, 0] * width
                    det[:, 1] = det[:, 1] * height
                    det[:, 2] = det[:, 2] * width
                    det[:, 3] = det[:, 3] * height
                    trackers = self.tracker.update(det)
                    self.current = [i[-1] for i in trackers]

                    if len(self.previous) > 0:
                        for item in self.previous:
                            if item not in self.current and item in self.data:
                                del self.data[item]
                            if item not in self.current and item in self.memory:
                                del self.memory[item]

                    self.previous = self.current
                    for d in trackers:
                        xmin = int(d[0])
                        ymin = int(d[1])
                        xmax = int(d[2])
                        ymax = int(d[3])
                        label = int(d[4])
                        try:
                            j = np.argmin(np.array([abs(i - (xmax + xmin) / 2.) for i in xcenter]))
                        except:
                            j = 0
                        if joint_filter(joints[j]):
                            joints[j] = joint_completion(joint_completion(joints[j]))
                            if label not in self.data:
                                self.data[label] = [joints[j]]
                                self.memory[label] = 0
                            else:
                                self.data[label].append(joints[j])

                            if len(self.data[label]) == settings.L:
                                pred = actionPredictor().move_status(self.data[label])
                                if pred == 0:
                                    pred = self.memory[label]
                                else:
                                    self.memory[label] = pred
                                self.data[label].pop(0)

                                location = self.data[label][-1][1]
                                if location[0] <= 30:
                                    location = (51, location[1])
                                if location[1] <= 10:
                                    location = (location[0], 31)

                                cv2.putText(show, settings.move_status[pred], (location[0] - 30, location[1] - 10),
                                            cv2.FONT_HERSHEY_SIMPLEX, 0.8,
                                            (0, 255, 0), 2)

                        cv2.rectangle(show, (xmin, ymin), (xmax, ymax),
                                      (int(settings.c[label % 32, 0]),
                                       int(settings.c[label % 32, 1]),
                                       int(settings.c[label % 32, 2])), 4)

            end = time.time()
            self.fps = 1. / (end - start)
            cv2.putText(show, 'FPS: %.2f' % self.fps, (30, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
            showImage = QtGui.QImage(show.data, show.shape[1], show.shape[0], QtGui.QImage.Format_RGB888)
            self.label_show_camera.setPixmap(QtGui.QPixmap.fromImage(showImage))

    def closeEvent(self, event):
        ok = QtWidgets.QPushButton()
        cancel = QtWidgets.QPushButton()

        msg = QtWidgets.QMessageBox(QtWidgets.QMessageBox.Warning, u"Shut down", u"Confirm Shut down！")

        msg.addButton(ok, QtWidgets.QMessageBox.ActionRole)
        msg.addButton(cancel, QtWidgets.QMessageBox.RejectRole)
        ok.setText(u'Yes')
        cancel.setText(u'No')
        if msg.exec_() == QtWidgets.QMessageBox.RejectRole:
            event.ignore()
        else:
            if self.cap.isOpened():
                self.cap.release()
            if self.timer_camera.isActive():
                self.timer_camera.stop()
            event.accept()
            print("System exited.")

Exemple #10

0

Afficher le fichier

class Ui_MainWindow(object):
    def __init__(self, parent=None):
        #super(Ui_MainWindow, self).__init__(parent)
        self.tracker = Sort(settings.sort_max_age, settings.sort_min_hit)
        # self.timer_camera = QtCore.QTimer()
        #self.cap = cv2.VideoCapture()
        self.cap = cv2.VideoCapture('movie.mp4')
        self.CAM_NUM = 0
        #self.set_ui()
        #self.slot_init()
        self.__flag_mode = 3
        self.fps = 0.00
        self.data = {}
        self.memory = {}
        self.joints = []
        self.current = []
        self.previous = []

        self.fourcc = cv2.VideoWriter_fourcc(
            *'mp4v')  # Be sure to use lower case
        self.out = cv2.VideoWriter('x.mp4', self.fourcc, 1, (432, 368))
        # Define the codec and create VideoWriter object

    def show_camera(self, frame):
        start = time.time()
        # ret, frame = self.cap.read()
        show = cv2.resize(frame, (settings.winWidth, settings.winHeight))
        show = cv2.cvtColor(show, cv2.COLOR_BGR2RGB)
        if ret:
            if self.__flag_mode == 1:

                humans = poseEstimator.inference(show)
                show = TfPoseEstimator.draw_humans(show, humans, imgcopy=False)

            elif self.__flag_mode == 2:
                humans = poseEstimator.inference(show)
                show, joints, bboxes, xcenter, sk = TfPoseEstimator.get_skeleton(
                    show, humans, imgcopy=False)
                height = show.shape[0]
                width = show.shape[1]
                if bboxes:
                    result = np.array(bboxes)
                    det = result[:, 0:5]
                    det[:, 0] = det[:, 0] * width
                    det[:, 1] = det[:, 1] * height
                    det[:, 2] = det[:, 2] * width
                    det[:, 3] = det[:, 3] * height
                    trackers = self.tracker.update(det)

                    for d in trackers:
                        xmin = int(d[0])
                        ymin = int(d[1])
                        xmax = int(d[2])
                        ymax = int(d[3])
                        label = int(d[4])
                        cv2.rectangle(show, (xmin, ymin), (xmax, ymax),
                                      (int(settings.c[label % 32, 0]),
                                       int(settings.c[label % 32, 1]),
                                       int(settings.c[label % 32, 2])), 4)

            # This is the part to get the kt points
            elif self.__flag_mode == 3:
                humans = poseEstimator.inference(show)
                ori = np.copy(show)
                show, joints, bboxes, xcenter, sk = TfPoseEstimator.get_skeleton(
                    show, humans, imgcopy=False)
                height = show.shape[0]
                width = show.shape[1]
                if bboxes:
                    result = np.array(bboxes)
                    det = result[:, 0:5]
                    det[:, 0] = det[:, 0] * width
                    det[:, 1] = det[:, 1] * height
                    det[:, 2] = det[:, 2] * width
                    det[:, 3] = det[:, 3] * height
                    trackers = self.tracker.update(det)
                    self.current = [i[-1] for i in trackers]

                    if len(self.previous) > 0:
                        for item in self.previous:
                            if item not in self.current and item in self.data:
                                del self.data[item]
                            if item not in self.current and item in self.memory:
                                del self.memory[item]

                    self.previous = self.current
                    for d in trackers:
                        xmin = int(d[0])
                        ymin = int(d[1])
                        xmax = int(d[2])
                        ymax = int(d[3])
                        label = int(d[4])
                        try:
                            j = np.argmin(
                                np.array([
                                    abs(i - (xmax + xmin) / 2.)
                                    for i in xcenter
                                ]))
                        except:
                            j = 0
                        if joint_filter(joints[j]):
                            joints[j] = joint_completion(
                                joint_completion(joints[j]))
                            if label not in self.data:
                                self.data[label] = [joints[j]]
                                self.memory[label] = 0
                            else:
                                self.data[label].append(joints[j])

                            if len(self.data[label]) == settings.L:
                                pred = actionPredictor().move_status(
                                    self.data[label])
                                if pred == 0:
                                    pred = self.memory[label]
                                else:
                                    self.memory[label] = pred
                                self.data[label].pop(0)

                                location = self.data[label][-1][1]
                                if location[0] <= 30:
                                    location = (51, location[1])
                                if location[1] <= 10:
                                    location = (location[0], 31)

                                cv2.putText(
                                    show, settings.move_status[pred],
                                    (location[0] - 30, location[1] - 10),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0),
                                    2)

                        cv2.rectangle(show, (xmin, ymin), (xmax, ymax),
                                      (int(settings.c[label % 32, 0]),
                                       int(settings.c[label % 32, 1]),
                                       int(settings.c[label % 32, 2])), 4)

            cv2.imshow('window', cv2.resize(show, (432, 368)))
            self.out.write(show)  # Write out frame to video
            end = time.time()

    def release(self):
        self.out.release()

Exemple #11

0

Afficher le fichier

Fichier : actions.py Projet : zhijunhan/Real-Time-Action-Recognition

class actions(object):
    def __init__(self, arguments):
        self.arguments = arguments

        # Frame window dim
        self.winWidth = 640
        self.winHeight = 480

        actionPredictor_params.__init__(self)

        self.fps_time = 0
        #self.step = 15
        self.mode = {
            'Pose Estimation': 'estimation',
            'Tracking': 'tracking',
            'Action Recognition': 'recognition'
        }

        w, h = model_wh(self.arguments.resize)
        if w > 0 and h > 0:
            self.estimator = TfPoseEstimator(get_graph_path(
                self.arguments.model),
                                             target_size=(w, h))
        else:
            self.estimator = TfPoseEstimator(get_graph_path(
                self.arguments.model),
                                             target_size=(432, 368))

        self.cam = cv2.VideoCapture(self.arguments.camera)

        # Tracker based on Sort
        self.sort_max_age = 20
        self.sort_min_hit = 3
        self.tracker = Sort(self.sort_max_age, self.sort_min_hit)

    def proceed(self):
        self._read_frame_()

        if self.ret_val and self.arguments.mode == self.mode['Pose Estimation']:
            self._perform_estimation_()

        elif self.ret_val and self.arguments.mode == self.mode['Tracking']:
            self._perform_tracking_()

        elif self.ret_val and self.arguments.mode == self.mode[
                'Action Recognition']:
            self._perform_recognition_()

        else:
            sys.exit(
                'Abort...please choose correct action mode from "estimation" "tracking" "recognition"'
            )

        self._output_()

    def _joint_filter_(self, joint):
        if 1 not in joint:
            return False
        # Check exist of hip
        if 8 not in joint and 11 not in joint:
            return False
        # Check exist of knee
        if 9 not in joint and 12 not in joint:
            return False
        return True

    def _joint_complete_(self, joint):
        if 8 in joint and 11 not in joint:
            joint[11] = joint[8]
        elif 8 not in joint and 11 in joint:
            joint[8] = joint[11]
        if 9 in joint and 12 not in joint:
            joint[12] = joint[9]
        elif 9 not in joint and 12 in joint:
            joint[9] = joint[12]

        return joint

    def _perform_estimation_(self):
        self.humans = self.estimator.inference(self.image)
        self.image = TfPoseEstimator.draw_humans(self.image,
                                                 self.humans,
                                                 imgcopy=False)

    def _perform_tracking_(self):
        self.humans = self.estimator.inference(self.image)
        self.image, joints, bboxes, xcenter, sk = TfPoseEstimator.get_skeleton(
            self.image, self.humans, imgcopy=False)

        height = self.image.shape[0]
        width = self.image.shape[1]

        if bboxes:
            result = np.array(bboxes)
            det = result[:, 0:5]
            det[:, 0] = det[:, 0] * width
            det[:, 1] = det[:, 1] * height
            det[:, 2] = det[:, 2] * width
            det[:, 3] = det[:, 3] * height
            trackers = self.tracker.update(det)

            for d in trackers:
                xmin = int(d[0])
                ymin = int(d[1])
                xmax = int(d[2])
                ymax = int(d[3])
                label = int(d[4])
                cv2.rectangle(
                    self.image, (xmin, ymin), (xmax, ymax),
                    (int(self.c[label % 32, 0]), int(
                        self.c[label % 32, 1]), int(self.c[label % 32, 2])), 4)

    def _perform_recognition_(self):

        self.predictor = actionPredictor()

        self.humans = self.estimator.inference(self.image)
        self.image, joints, bboxes, xcenter, sk = TfPoseEstimator.get_skeleton(
            self.image, self.humans, imgcopy=False)

        height = self.image.shape[0]
        width = self.image.shape[1]

        if bboxes:
            result = np.array(bboxes)
            det = result[:, 0:5]
            det[:, 0] = det[:, 0] * width
            det[:, 1] = det[:, 1] * height
            det[:, 2] = det[:, 2] * width
            det[:, 3] = det[:, 3] * height
            trackers = self.tracker.update(det)

            self.current = [i[-1] for i in trackers]

            if len(self.previous) > 0:
                for item in self.previous:
                    if item not in self.current and item in self.data:
                        del self.data[item]
                    if item not in self.current and item in self.memory:
                        del self.memory[item]
            self.previous = self.current

            for d in trackers:
                xmin = int(d[0])
                ymin = int(d[1])
                xmax = int(d[2])
                ymax = int(d[3])
                label = int(d[4])

                #logger.debug('label is: %d' % (label))

                # Locate the current person object in current frame
                # Iterated thru xcenter for finding minimum distance between object center coord
                try:
                    j = np.argmin(
                        np.array(
                            [abs(i - (xmax + xmin) / 2.) for i in xcenter]))
                except:
                    j = 0
                # Check if major skeleton points are existing
                if self._joint_filter_(joints[j]):
                    joints[j] = self._joint_complete_(
                        self._joint_complete_(joints[j]))

                    if label not in self.data:
                        #logger.debug('label is: %d' % (label))

                        self.data[label] = [joints[j]]
                        self.memory[label] = 0
                    else:
                        self.data[label].append(joints[j])

                    if len(self.data[label]) == self.step:
                        pred = self.predictor.move_status(self.data[label])

                        #logger.debug(len(self.data[label]))

                        if pred == 0:
                            pred = self.memory[label]
                        else:
                            self.memory[label] = pred
                        self.data[label].pop(0)

                        location = self.data[label][-1][1]
                        #location = functools.reduce(lambda x, y: x + y, self.data[label][:][1]) / len(self.data[label][:][1])
                        #location = sum(self.data[label][:][1]) / float(len(self.data[label][:][1]))

                        if location[0] <= 30:
                            location = (51, location[1])
                        if location[1] <= 10:
                            location = (location[0], 31)

                        cv2.putText(self.image, self.move_status[pred],
                                    (location[0] - 30, location[1] - 10),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.8,
                                    (255, 255, 255), 2)

                cv2.rectangle(
                    self.image, (xmin, ymin), (xmax, ymax),
                    (int(self.c[label % 32, 0]), int(
                        self.c[label % 32, 1]), int(self.c[label % 32, 2])), 4)

    def _read_frame_(self):
        self.ret_val, self.image = self.cam.read()

        self.image = cv2.resize(self.image, (self.winWidth, self.winHeight))

    def _output_(self):
        # Calculate frame averaging step
        FPS = float(1.0 / (time.time() - self.fps_time))
        #logger.debug('FPS: %f' % FPS)

        #self.step = int(0.7 * FPS)
        #logger.debug('step: %d' % self.step)

        cv2.putText(self.image,
                    "FPS: %f" % (1.0 / (time.time() - self.fps_time)),
                    (10, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255),
                    2)
        cv2.imshow('tf-pose-estimation result', self.image)
        self.fps_time = time.time()

Exemple #12

0

Afficher le fichier

Fichier : run.py Projet : zhanghaowei01/behavior-recognition

class Ui_MainWindow(QtWidgets.QWidget):

    def __init__(self, parent=None):
        super(Ui_MainWindow, self).__init__(parent)
        self.tracker = Sort(settings.sort_max_age, settings.sort_min_hit)
        self.timer_camera = QtCore.QTimer()
        self.cap = cv2.VideoCapture()
        self.CAM_NUM = 0
        self.set_ui()
        self.slot_init()
        self.__flag_mode = 0
        self.fps = 0.00
        self.data = {}
        self.memory = {}
        self.joints = []
        self.current = []
        self.previous = []
        self.font = ImageFont.truetype(settings.fontpath, settings.fontsize)

    def set_ui(self):

        self.__layout_main = QtWidgets.QHBoxLayout()
        self.__layout_fun_button = QtWidgets.QVBoxLayout()
        self.__layout_data_show = QtWidgets.QVBoxLayout()

        self.button_open_camera = QtWidgets.QPushButton(u'相机 OFF')

        self.button_mode_1 = QtWidgets.QPushButton(u'姿态估计 OFF')
        self.button_mode_2 = QtWidgets.QPushButton(u'多人跟踪 OFF')
        self.button_mode_3 = QtWidgets.QPushButton(u'行为识别 OFF')

        self.button_close = QtWidgets.QPushButton(u'退出')

        self.button_open_camera.setMinimumHeight(50)
        self.button_mode_1.setMinimumHeight(50)
        self.button_mode_2.setMinimumHeight(50)
        self.button_mode_3.setMinimumHeight(50)

        self.button_close.setMinimumHeight(50)

        self.button_close.move(10, 100)

        self.infoBox = QtWidgets.QTextBrowser(self)
        self.infoBox.setGeometry(QtCore.QRect(10, 300, 200, 180))

        # 信息显示
        self.label_show_camera = QtWidgets.QLabel()
        self.label_move = QtWidgets.QLabel()
        self.label_move.setFixedSize(200, 200)

        self.label_show_camera.setFixedSize(settings.winWidth + 1, settings.winHeight + 1)
        self.label_show_camera.setAutoFillBackground(True)

        self.__layout_fun_button.addWidget(self.button_open_camera)
        self.__layout_fun_button.addWidget(self.button_mode_1)
        self.__layout_fun_button.addWidget(self.button_mode_2)
        self.__layout_fun_button.addWidget(self.button_mode_3)
        self.__layout_fun_button.addWidget(self.button_close)
        self.__layout_fun_button.addWidget(self.label_move)

        self.__layout_main.addLayout(self.__layout_fun_button)
        self.__layout_main.addWidget(self.label_show_camera)

        self.setLayout(self.__layout_main)
        self.label_move.raise_()
        self.setWindowTitle(u'实时多人姿态估计与行为识别系统')

    def slot_init(self):
        self.button_open_camera.clicked.connect(self.button_event)
        self.timer_camera.timeout.connect(self.show_camera)

        self.button_mode_1.clicked.connect(self.button_event)
        self.button_mode_2.clicked.connect(self.button_event)
        self.button_mode_3.clicked.connect(self.button_event)
        self.button_close.clicked.connect(self.close)

    def button_event(self):
        sender = self.sender()
        if sender == self.button_mode_1 and self.timer_camera.isActive():
            if self.__flag_mode != 1:
                self.__flag_mode = 1
                self.button_mode_1.setText(u'姿态估计 ON')
                self.button_mode_2.setText(u'多人跟踪 OFF')
                self.button_mode_3.setText(u'行为识别 OFF')
            else:
                self.__flag_mode = 0
                self.button_mode_1.setText(u'姿态估计 OFF')
                self.infoBox.setText(u'相机已打开')
        elif sender == self.button_mode_2 and self.timer_camera.isActive():
            if self.__flag_mode != 2:
                self.__flag_mode = 2
                self.button_mode_1.setText(u'姿态估计 OFF')
                self.button_mode_2.setText(u'多人跟踪 ON')
                self.button_mode_3.setText(u'行为识别 OFF')
            else:
                self.__flag_mode = 0
                self.button_mode_2.setText(u'多人跟踪 OFF')
                self.infoBox.setText(u'相机已打开')
        elif sender == self.button_mode_3 and self.timer_camera.isActive():
            if self.__flag_mode != 3:
                self.__flag_mode = 3
                self.button_mode_1.setText(u'姿态估计 OFF')
                self.button_mode_2.setText(u'多人跟踪 OFF')
                self.button_mode_3.setText(u'行为识别 ON')
            else:
                self.__flag_mode = 0
                self.button_mode_3.setText(u'行为识别 OFF')
                self.infoBox.setText(u'相机已打开')
        else:
            self.__flag_mode = 0
            self.button_mode_1.setText(u'姿态估计 OFF')
            self.button_mode_2.setText(u'多人跟踪 OFF')
            self.button_mode_3.setText(u'行为识别 OFF')
            if self.timer_camera.isActive() == False:
                flag = self.cap.open(self.CAM_NUM)
                self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, settings.winWidth)
                self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, settings.winHeight)
                if flag == False:
                    msg = QtWidgets.QMessageBox.warning(self, u"Warning", u"请检测相机与电脑是否连接正确",
                                                        buttons=QtWidgets.QMessageBox.Ok,
                                                        defaultButton=QtWidgets.QMessageBox.Ok)
                else:
                    self.timer_camera.start(1)
                    self.button_open_camera.setText(u'相机 ON')
                    self.infoBox.setText(u'相机已打开')
            else:
                self.timer_camera.stop()
                self.cap.release()
                self.label_show_camera.clear()
                self.button_open_camera.setText(u'相机 OFF')
                self.infoBox.setText(u'相机已关闭')

    def show_camera(self):
        start = time.time()
        ret, frame = self.cap.read()
        show = cv2.resize(frame, (settings.winWidth, settings.winHeight))
        show = cv2.cvtColor(show, cv2.COLOR_BGR2RGB)
        if ret:
            if self.__flag_mode == 1:
                self.infoBox.setText(u'当前为人体姿态估计模式')
                humans = poseEstimator.inference(show)
                show = TfPoseEstimator.draw_humans(show, humans, imgcopy=False)

            elif self.__flag_mode == 2:
                self.infoBox.setText(u'当前为多人跟踪模式')
                humans = poseEstimator.inference(show)
                show, joints, bboxes, xcenter, sk = TfPoseEstimator.get_skeleton(show, humans, imgcopy=False)
                height = show.shape[0]
                width = show.shape[1]
                if bboxes:
                    result = np.array(bboxes)
                    det = result[:, 0:5]
                    det[:, 0] = det[:, 0] * width
                    det[:, 1] = det[:, 1] * height
                    det[:, 2] = det[:, 2] * width
                    det[:, 3] = det[:, 3] * height
                    trackers = self.tracker.update(det)

                    for d in trackers:
                        xmin = int(d[0])
                        ymin = int(d[1])
                        xmax = int(d[2])
                        ymax = int(d[3])
                        label = int(d[4])
                        cv2.rectangle(show, (xmin, ymin), (xmax, ymax),
                                      (int(settings.c[label % 32, 0]),
                                       int(settings.c[label % 32, 1]),
                                       int(settings.c[label % 32, 2])), 4)

            elif self.__flag_mode == 3:
                self.infoBox.setText(u'当前为人体行为识别模式')
                humans = poseEstimator.inference(show)
                show = TfPoseEstimator.draw_humans(show, humans, imgcopy=False)
                ori = np.copy(show)
                show, joints, bboxes, xcenter, sk= TfPoseEstimator.get_skeleton(show, humans, imgcopy=False)
                height = show.shape[0]
                width = show.shape[1]
                if bboxes:
                    result = np.array(bboxes)
                    det = result[:, 0:5]
                    det[:, 0] = det[:, 0] * width
                    det[:, 1] = det[:, 1] * height
                    det[:, 2] = det[:, 2] * width
                    det[:, 3] = det[:, 3] * height
                    trackers = self.tracker.update(det)
                    self.current = [i[-1] for i in trackers]

                    if len(self.previous) > 0:
                        for item in self.previous:
                            if item not in self.current and item in self.data:
                                del self.data[item]
                            if item not in self.current and item in self.memory:
                                del self.memory[item]

                    self.previous = self.current
                    for d in trackers:
                        xmin = int(d[0])
                        ymin = int(d[1])
                        xmax = int(d[2])
                        ymax = int(d[3])
                        label = int(d[4])
                        try:
                            j = np.argmin(np.array([abs(i - (xmax + xmin) / 2.) for i in xcenter]))
                        except:
                            j = 0

                        alert = False
                        if joint_filter(joints[j]):
                            joints[j] = joint_completion(joint_completion(joints[j]))
                            if label not in self.data:
                                self.data[label] = [joints[j]]
                                self.memory[label] = 0
                            else:
                                self.data[label].append(joints[j])

                            if len(self.data[label]) == settings.L:
                                pred = actionPredictor().move_status(self.data[label])
                                if pred == 0:
                                    pred = self.memory[label]
                                else:
                                    self.memory[label] = pred
                                self.data[label].pop(0)

                                location = self.data[label][-1][1]
                                if location[0] <= 30:
                                    location = (51, location[1])
                                if location[1] <= 10:
                                    location = (location[0], 31)

                                ## Use simsum.ttc to write Chinese.
                                b, g, r, a = 0, 255, 0, 0
                                img_pil = Image.fromarray(show)
                                draw = ImageDraw.Draw(img_pil)
                                # print('pred', pred)
                                if pred == 8:
                                    draw.text((location[0] - 30, location[1] - 10), settings.move_status[pred], font=self.font, fill=(255, 0, 0, a))
                                    alert = True
                                else:
                                    draw.text((location[0] - 30, location[1] - 10), settings.move_status[pred], font=self.font, fill=(b, g, r, a))
                                show = np.array(img_pil)

                                # cv2.putText(show, settings.move_status[pred], (location[0] - 30, location[1] - 10),
                                #             cv2.FONT_HERSHEY_SIMPLEX, 0.8,
                                #             (0, 255, 0), 2)
                        if alert:
                            colors = (255, 0, 0)
                        else:
                            colors = (int(settings.c[label % 32, 0]),
                                           int(settings.c[label % 32, 1]),
                                           int(settings.c[label % 32, 2]))
                        cv2.rectangle(show, (xmin, ymin), (xmax, ymax),
                                      colors, 4)

                        if alert:
                            cv2.imwrite('files/alerts/fall_' + str(int(time.time())) + '.png', show)

            end = time.time()
            self.fps = 1. / (end - start)
            cv2.putText(show, 'FPS: %.2f' % self.fps, (30, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
            showImage = QtGui.QImage(show.data, show.shape[1], show.shape[0], QtGui.QImage.Format_RGB888)
            self.label_show_camera.setPixmap(QtGui.QPixmap.fromImage(showImage))

    def closeEvent(self, event):
        ok = QtWidgets.QPushButton()
        cancel = QtWidgets.QPushButton()

        msg = QtWidgets.QMessageBox(QtWidgets.QMessageBox.Warning, u"关闭", u"是否关闭！")

        msg.addButton(ok, QtWidgets.QMessageBox.ActionRole)
        msg.addButton(cancel, QtWidgets.QMessageBox.RejectRole)
        ok.setText(u'确定')
        cancel.setText(u'取消')
        if msg.exec_() == QtWidgets.QMessageBox.RejectRole:
            event.ignore()
        else:
            if self.cap.isOpened():
                self.cap.release()
            if self.timer_camera.isActive():
                self.timer_camera.stop()
            event.accept()
            print("System exited.")

Exemple #13

0

Afficher le fichier

def main(cap_queue):
    yolo = YOLO()
    sort = Sort(iou_threshold=0.05)
    whitelist = [
        "person",
        "bicycle",
        "car",
        "motorbike",
        "aeroplane",
        "bus",
        "truck",
        "boat",
        "fire hydrant",
        "stop sign",
        "parking meter",
        "bird",
        "cat",
        "dog",
        "backpack",
        "umbrella",
        "handbag",
        "suitcase",
    ]
    tally = 0
    while tally < 1332:
        frame = cap_queue.get(block=True)
        if type(frame) == int and frame == -1:
            return
        outputs = yolo.get_results(frame)
        detections = []
        labels = []

        for output in outputs:
            label = yolo.classes[int(output[-1])]
            if not label in whitelist:
                del label
                continue

            tl = tuple(output[1:3].int())
            br = tuple(output[3:5].int())

            detections.append(
                np.array(
                    [tl[0].item(), tl[1].item(), br[0].item(), br[1].item()]))
            labels.append(label)

            del tl, br, label
        del outputs

        detections = np.array(detections)
        if detections.shape[0] > 0:
            try:
                alive, _ = sort.update(detections, labels)
            except IndexError:
                del frame, detections, labels
                continue

            for trk in alive:
                t = trk.get_state()[0]
                try:
                    bbox = [int(t[0]), int(t[1]), int(t[2]), int(t[3])]
                except ValueError:
                    continue
                cv2.rectangle(
                    frame,
                    (int(bbox[0]), int(bbox[1])),
                    (int(bbox[2]), int(bbox[3])),
                    trk.color,
                    2,
                )
                cv2.putText(
                    frame,
                    "{}:id {}".format(trk.get_label(), str(trk.id)),
                    (int(bbox[0]), int(bbox[1]) - 12),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.0005 * frame.shape[0],
                    trk.color,
                    2,
                )
                for location in trk.locations:
                    x1, y1, x2, y2 = location[1]
                    cv2.circle(frame, (((int(x1) + int(x2)) // 2), int(y2)), 3,
                               trk.color, -1)
                    del x1, y1, x2, y2

                if len(alive) > 1:
                    for trk2 in alive:
                        if trk == trk2:
                            continue
                        t2 = trk2.get_state()[0]
                        try:
                            bbox2 = [
                                int(t2[0]),
                                int(t2[1]),
                                int(t2[2]),
                                int(t2[3])
                            ]
                        except ValueError:
                            continue

                        d = distance(bbox, bbox2)
                        color = (255, 255, 255) if d > 50 else (0, 255, 255)

                        cv2.line(
                            frame,
                            ((bbox[0] + bbox[2]) // 2, int(bbox[3])),
                            ((bbox2[0] + bbox2[2]) // 2, int(bbox2[3])),
                            color,
                            2,
                        )
                        cv2.putText(
                            frame,
                            str(d),
                            midpoint(bbox, bbox2),
                            cv2.FONT_HERSHEY_SIMPLEX,
                            0.0005 * frame.shape[0],
                            color,
                            2,
                        )
                        del d

                del t, bbox

        cv2.imshow("Object Tracking", frame)
        cv2.imwrite("./tracked/{}.png".format(tally), frame)
        tally += 1
        key = cv2.waitKey(1) & 0xFF
        if key == ord("q"):
            raise KeyboardInterrupt

Exemple #14

0

Afficher le fichier

Fichier : test.py Projet : ArunaavReddy/Liveness-Detection

        if confidence > args["confidence"]:
            rect = []
            # compute the (x, y)-coordinates of the bounding box for
            # the face and extract the face ROI
            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
            (startX, startY, endX, endY) = box.astype("int")
            rect.append(int(detections[0, 0, i, 3] * w))
            rect.append(int(detections[0, 0, i, 4] * h))
            rect.append(int(detections[0, 0, i, 5] * w))
            rect.append(int(detections[0, 0, i, 6] * h))
            rect.append(detections[0, 0, i, 2])
            rect.append(detections[0, 0, i, 1])
            rect.append(detections[0, 0, i, 0])
            boxes.append(rect)

    objects = ct.update(torch.FloatTensor(boxes))
    if objects is not None:

        for x1, y1, x2, y2, obj_id, cls_pred in objects:
            text = "ID {}".format(obj_id)
            cv2.putText(frame, text, (int(x1) + 5, int(y1) + 20),
                        cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 0), 2)

            # extract the face ROI and then preproces it in the exact
            # same manner as our training data
            face = frame[int(y1):int(y2), int(x1):int(x2)]
            gray_face = cv2.cvtColor(face, cv2.COLOR_BGR2GRAY)
            face = cv2.resize(face, (32, 32))
            face = face.astype("float") / 255.0
            face = img_to_array(face)
            face = np.expand_dims(face, axis=0)