Ejemplo n.º 1
0
def run_video_detection(scoreThreshold):
    classes = {}
    inputs = tf.placeholder(tf.float32, [None, 256, 256, 3])
    model = nets.YOLOv3COCO(inputs, nets.Darknet19)
    with tf.Session() as sess:
        sess.run(model.pretrained())
        cap = cv2.VideoCapture(0)
        while (cap.isOpened()):
            classes[currentIndicesDectecting] = currentClassDetecting
            list_of_classes = [currentIndicesDectecting]
            ret, frame = cap.read()
            img = cv2.resize(frame, (256, 256))
            imge = np.array(img).reshape(-1, 256, 256, 3)
            start_time = time.time()
            preds = sess.run(model.preds, {inputs: model.preprocess(imge)})

            boxes = model.get_boxes(preds, imge.shape[1:3])
            cv2.namedWindow('Live Camera', cv2.WINDOW_NORMAL)
            cv2.resizeWindow('Live Camera', 500, 500)
            boxes1 = np.array(boxes)

            for j in list_of_classes:
                count = 0
                if j in classes:
                    lab = classes[j]
                else:
                    lab = 'background'
                if len(boxes1) != 0:

                    for i in range(len(boxes1[j])):
                        box = boxes1[j][i]
                        if boxes1[j][i][4] >= .40:
                            count += 1
                            obj = lab
                            cv2.rectangle(img, (box[0], box[1]),
                                          (box[2], box[3]), (0, 255, 0), 1)
                            cv2.putText(img,
                                        lab, (box[0], box[1]),
                                        cv2.FONT_HERSHEY_SIMPLEX,
                                        .5, (0, 0, 255),
                                        lineType=cv2.LINE_AA)
                            global coun
                            if coun == 1 and lab == currentClassDetecting:
                                engine.say(
                                    str(currentClassDetecting) + "FOUND")
                                engine.runAndWait()
                                engine.stop()
                                coun = 2

            cv2.imshow("Live Camera", img)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

    cap.release()
    cv2.destroyAllWindows()
Ejemplo n.º 2
0
def check_frame(frame):
    tf.disable_v2_behavior()

    inputs = tf.placeholder(tf.float32, [None, 416, 416, 3])
    model = nets.YOLOv3COCO(inputs, nets.Darknet19)
    classes = {
        '0': 'person'
    }  # '1':'bicycle','2':'car','3':'bike','5':'bus','7':'truck'
    list_of_classes = [0, 1, 2, 3, 5, 7]

    with tf.Session() as sess:
        sess.run(model.pretrained())

        print('Checking Frame')

        img = cv2.resize(frame, (416, 416))
        imge = np.array(img).reshape(-1, 416, 416, 3)
        start_time = time.time()

        preds = sess.run(model.preds, {inputs: model.preprocess(imge)})
        boxes = model.get_boxes(preds, imge.shape[1:3])

        boxes1 = np.array(boxes)

        ret = {}

        for j in list_of_classes:
            count = 0
            if str(j) in classes:
                lab = classes[str(j)]
            if len(boxes1) != 0:
                for i in range(len(boxes1[j])):
                    box = boxes1[j][i]

                    if boxes1[j][i][4] >= .40:

                        count += 1

                        cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]),
                                      (0, 255, 0), 1)

            ret[lab] = count

        return ret
    def __init__(self):

        self.url1 = "black.mp4"
        self.inputs = tf.placeholder(tf.float32, [None, 416, 416, 3])
        self.model = nets.YOLOv3COCO(self.inputs, nets.Darknet19)
        # model = nets.YOLOv2(inputs, nets.Darknet19)

        # frame=cv2.imread("D://pyworks//yolo//truck.jpg",1)
        self.count = 0
        self.classes = {
            '0': 'person',
            '1': 'bicycle',
            '2': 'car',
            '3': 'bike',
            '5': 'bus',
            '7': 'truck',
            '8': 'chair'
        }
        self.list_of_classes = [0, 1, 2, 3, 5, 7, 8]
 def yolo(self):
     self.__inputs = tf.placeholder(tf.float32, [None, 416, 416, 3])
     self.__model = nets.YOLOv3COCO(self.__inputs, nets.Darknet19)
     self.__classes = {
         '0': 'person',
         '1': 'bicycle',
         '2': 'car',
         '3': 'bike',
         '5': 'bus',
         '7': 'truck'
     }
     self.__list_of_classes = [0, 1, 2, 3, 5, 7]
     # to display other detected objects,
     # change the classes and list of classes to their respective COCO
     # indices available in the website.
     # Here 0th index is for people and 1 for bicycle and so on.
     # If you want to detect all the classes, add the indices to this list
     try:
         self.__sess = tf.Session()
         self.__sess.run(self.__model.pretrained())
     except Exception as e:
         print(e)
Ejemplo n.º 5
0
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior() 
import tensornets as nets
import cv2
import numpy as np
from os import listdir
from os.path import isfile, join
import time


mypath = 'input'

inputs = tf.placeholder(tf.float32, [None, 416, 416, 3]) 
model = nets.YOLOv3COCO(inputs, nets.Darknet19)

classes={'0':'person'}
list_of_classes=[0]

with tf.Session() as sess:
	sess.run(model.pretrained())
	
	files = [f for f in listdir(mypath) if isfile(join(mypath, f))]
	#change the path to your directory or to '0' for webcam!ssize.empty() in function 'resize'
	print(files)
	for file in files:
		
		try:
			frame = cv2.imread('input/%s' % file)
		except:
			continue
		
def countVehicles(param):
    # param -> path of the video
    # list -> number of vehicles will be written in the list
    # index ->Index at which data has to be written

    tf.disable_v2_behavior()

    # Image size must be '416x416' as YoloV3 network expects that specific image size as input
    img_size = 416
    inputs = tf.placeholder(tf.float32, [None, img_size, img_size, 3])
    model = nets.YOLOv3COCO(inputs, nets.Darknet19)

    ct = CentroidTracker(
        maxDisappeared=5, maxDistance=50
    )  # Look into 'CentroidTracker' for further info about parameters
    trackers = []  # List of all dlib trackers
    trackableObjects = {
    }  # Dictionary of trackable objects containing object's ID and its' corresponding centroid/s
    skip_frames = 10  # Numbers of frames to skip from detecting
    confidence_level = 0.40  # The confidence level of a detection
    total = 0  # Total number of detected objects from classes of interest
    use_original_video_size_as_output_size = True  # Shows original video as output and not the 416x416 image that is used as yolov3 input (NOTE: Detection still happens with 416x416 img size but the output is displayed in original video size if this parameter is True)

    video_path = os.getcwd() + param  # "/videos/4.mp4"
    video_name = os.path.basename(video_path)

    # print("Loading video {video_path}...".format(video_path=video_path))
    if not os.path.exists(video_path):
        print("File does not exist. Exited.")
        exit()

    # YoloV3 detects 80 classes represented below
    all_classes = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", \
         "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", \
         "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", \
         "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", \
         "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", \
         "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", \
         "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", \
         "chair", "sofa", "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse", \
         "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", \
         "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"]

    # Classes of interest (with their corresponding indexes for easier looping)
    classes = {1: 'bicycle', 2: 'car', 3: 'motorbike', 5: 'bus', 7: 'truck'}

    with tf.Session() as sess:
        sess.run(model.pretrained())
        cap = cv2.VideoCapture(video_path)

        # Get video size (just for log purposes)
        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

        # Scale used for output window size and net size
        width_scale = 1
        height_scale = 1

        if use_original_video_size_as_output_size:
            width_scale = width / img_size
            height_scale = height / img_size

        def drawRectangleCV2(img,
                             pt1,
                             pt2,
                             color,
                             thickness,
                             width_scale=width_scale,
                             height_scale=height_scale):
            point1 = (int(pt1[0] * width_scale), int(pt1[1] * height_scale))
            point2 = (int(pt2[0] * width_scale), int(pt2[1] * height_scale))
            return cv2.rectangle(img, point1, point2, color, thickness)

        def drawTextCV2(img,
                        text,
                        pt,
                        font,
                        font_scale,
                        color,
                        lineType,
                        width_scale=width_scale,
                        height_scale=height_scale):
            pt = (int(pt[0] * width_scale), int(pt[1] * height_scale))
            cv2.putText(img, text, pt, font, font_scale, color, lineType)

        def drawCircleCV2(img,
                          center,
                          radius,
                          color,
                          thickness,
                          width_scale=width_scale,
                          height_scale=height_scale):
            center = (int(center[0] * width_scale),
                      int(center[1] * height_scale))
            cv2.circle(img, center, radius, color, thickness)

        # Python 3.5.6 does not support f-strings (next line will generate syntax error)
        #print(f"Loaded {video_path}. Width: {width}, Height: {height}")
        # print("Loaded {video_path}. Width: {width}, Height: {height}".format(video_path=video_path, width=width, height=height))

        skipped_frames_counter = 0

        while (cap.isOpened()):
            try:
                ret, frame = cap.read()
                img = cv2.resize(frame, (img_size, img_size))
            except:
                print(total_str)

            output_img = frame if use_original_video_size_as_output_size else img

            tracker_rects = []

            if skipped_frames_counter == skip_frames:

                # Detecting happens after number of frames have passes specified by 'skip_frames' variable value
                # print("[DETECTING]")

                trackers = []
                skipped_frames_counter = 0  # reset counter

                np_img = np.array(img).reshape(-1, img_size, img_size, 3)

                start_time = time.time()
                predictions = sess.run(model.preds,
                                       {inputs: model.preprocess(np_img)})
                # print("Detection took %s seconds" % (time.time() - start_time))

                # model.get_boxes returns a 80 element array containing information about detected classes
                # each element contains a list of detected boxes, confidence level ...
                detections = model.get_boxes(predictions, np_img.shape[1:3])
                np_detections = np.array(detections)

                # Loop only through classes we are interested in
                for class_index in classes.keys():
                    local_count = 0
                    class_name = classes[class_index]

                    # Loop through detected infos of a class we are interested in
                    for i in range(len(np_detections[class_index])):
                        box = np_detections[class_index][i]

                        if np_detections[class_index][i][4] >= confidence_level:
                            # print("Detected ", class_name, " with confidence of ", np_detections[class_index][i][4])

                            local_count += 1
                            startX, startY, endX, endY = box[0], box[1], box[
                                2], box[3]

                            drawRectangleCV2(output_img, (startX, startY),
                                             (endX, endY), (0, 255, 0), 1)
                            drawTextCV2(output_img, class_name,
                                        (startX, startY),
                                        cv2.FONT_HERSHEY_SIMPLEX, .5,
                                        (0, 0, 255), 1)

                            # Construct a dlib rectangle object from the bounding box coordinates and then start the dlib correlation
                            tracker = dlib.correlation_tracker()
                            rect = dlib.rectangle(int(startX), int(startY),
                                                  int(endX), int(endY))
                            tracker.start_track(img, rect)

                            # Add the tracker to our list of trackers so we can utilize it during skip frames
                            trackers.append(tracker)

                    # Write the total number of detected objects for a given class on this frame
                    # print(class_name," : ", local_count)
            else:

                # If detection is not happening then track previously detected objects (if any)
                # print("[TRACKING]")

                skipped_frames_counter += 1  # Increase the number frames for which we did not use detection

                # Loop through tracker, update each of them and display their rectangle
                for tracker in trackers:
                    tracker.update(img)
                    pos = tracker.get_position()

                    # Unpack the position object
                    startX = int(pos.left())
                    startY = int(pos.top())
                    endX = int(pos.right())
                    endY = int(pos.bottom())

                    # Add the bounding box coordinates to the tracking rectangles list
                    tracker_rects.append((startX, startY, endX, endY))

                    # Draw tracking rectangles
                    drawRectangleCV2(output_img, (startX, startY),
                                     (endX, endY), (255, 0, 0), 1)

            # Use the centroid tracker to associate the (1) old object centroids with (2) the newly computed object centroids
            objects = ct.update(tracker_rects)

            # Loop over the tracked objects
            for (objectID, centroid) in objects.items():
                # Check to see if a trackable object exists for the current object ID
                to = trackableObjects.get(objectID, None)

                if to is None:
                    # If there is no existing trackable object, create one
                    to = TrackableObject(objectID, centroid)
                else:
                    to.centroids.append(centroid)

                    # If the object has not been counted, count it and mark it as counted
                    if not to.counted:
                        total += 1
                        to.counted = True

                # Store the trackable object in our dictionary
                trackableObjects[objectID] = to

                # Draw both the ID of the object and the centroid of the object on the output frame
                object_id = "ID {}".format(objectID)
                drawTextCV2(output_img, object_id,
                            (centroid[0] - 10, centroid[1] - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
                drawCircleCV2(output_img, (centroid[0], centroid[1]), 2,
                              (0, 255, 0), -1)

                # Display the total count so far
                total_str = str(total)
                drawTextCV2(output_img, total_str, (10, 30),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)

            # Display the current frame (with all annotations drawn up to this point)
            cv2.imshow(video_name, output_img)

            key = cv2.waitKey(1) & 0xFF
            if key == ord('q'):  # QUIT (exits)
                break
            elif key == ord('p'):
                cv2.waitKey(0)  # PAUSE (Enter any key to continue)

    cap.release()
    cv2.destroyAllWindows()
    print("Exited")
    """
Ejemplo n.º 7
0
 def __init__(self):
     self.inputs = tf.placeholder(tf.float32, [None, 416, 416, 3])
     self.model = nets.YOLOv3COCO(self.inputs)
     self.person_class = 0
     self.in_size = (416, 416)
Ejemplo n.º 8
0
def deeplearning(cond,turn):
    print('deeplearning start')
    inputs = tf.placeholder(tf.float32, [None, 416, 416, 3])
    model = nets.YOLOv3COCO(inputs, nets.Darknet19)
    # model = nets.YOLOv2(inputs, nets.Darknet19)

    # frame=cv2.imread("D://pyworks//yolo//truck.jpg",1)

    # classes={'0':'person','1':'bicycle','2':'car','3':'bike','5':'bus','7':'truck'}
    classes = {'0': 'person'}

    # list_of_classes=[0,1,2,3,5,7]
    list_of_classes = [0]

    try:
        with tf.Session() as sess:
            cond.acquire()  ### mutex_lock
            sess.run(model.pretrained())
            # "D://pyworks//yolo//videoplayback.mp4"
            while turn.myTurn != 1: cond.wait() #추가해준거임 내 번호가 아닐경우 대기

            # cap=cv2.imread('test.jpg',cv2.IMREAD_COLOR)
            cap = cv2.imread('image.JPG', cv2.IMREAD_COLOR)

            img = cv2.resize(cap, (647, 647), cv2.INTER_AREA)

            image = np.array(img).reshape(-1, 647, 647, 3)
            start_time = time.time()
            preds = sess.run(model.preds, {inputs: model.preprocess(image)})

            print(time.time() - start_time)
            boxes = model.get_boxes(preds, image.shape[1:3])
            cv2.namedWindow('image', cv2.WINDOW_NORMAL)

            cv2.resizeWindow('image', 700, 700)

            # print("--- %s seconds ---" % (time.time() - start_time))
            boxes1 = np.array(boxes)
            for j in list_of_classes:
                count = 0
                if str(j) in classes:
                    lab = classes[str(j)]
                if len(boxes1) != 0:

                    for i in range(len(boxes1[j])):
                        box = boxes1[j][i]

                        if boxes1[j][i][4] >= .40:
                            count += 1

                            cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 1)
                            cv2.putText(img, lab, (box[0], box[1]), cv2.FONT_HERSHEY_SIMPLEX, .5, (0, 0, 255),
                                        lineType=cv2.LINE_AA)
                print(lab, ": ", count)
                ref3.set({'people_number': count})
                ref1.set({'people_number': random.randint(0, 10)})
                ref2.set({'people_number': random.randint(0, 10)})
                ref4.set({'people_number': random.randint(0, 10)})


            cv2.imwrite("result_image.png", img)

            fileUpload("result_image.png")
            print("save end")

        cv2.destroyAllWindows()
        turn.myTurn = 0
        cond.notifyAll()  # notify to all consumers
        cond.release()  ### mutex_unlock

        print('deeplearning end')

    except Exception as e:
        print(str(e))
Ejemplo n.º 9
0
def main(stream, photo):
    with tf.compat.v1.Session() as sess:
        # Get YOLOV3 model from tensornet and store it.
        inputs = tf.compat.v1.placeholder(tf.float32, [None, 416, 416, 3]) 
        model = nets.YOLOv3COCO(inputs, nets.Darknet19)
        # Choose the class to identify. I'm only need the person class to count people.
        classes = {'0' : 'person'}
        indexOfClasses = [0]
        # Train the model.
        sess.run(model.pretrained())

        # Launch webcam to capture the image from the camera. 0 is for the webcam.
        webcamStream = cv.VideoCapture(photo)
        # Start main loop for the webcam stream.
        while(webcamStream.isOpened()):
            # Get the image of the stream.
            ret, frame = webcamStream.read()

            # Create new image based on the stream shape.
            img = cv.resize(frame, (416, 416))
            imgTmp = np.array(img)
            imgTmp = np.reshape(imgTmp, (-1, 416, 416, 3))

            # Run model with the image of the stream for classification and identifying people.
            preds = sess.run(model.preds, {inputs: model.preprocess(imgTmp)})

            # Create boxes for people in the image.
            boxes = model.get_boxes(preds, imgTmp.shape[1:3])

            # Create a windows to display the image.
            cv.namedWindow('image', cv.WINDOW_NORMAL)
            cv.resizeWindow('image', 500, 500)

            # Loop to create the boxes for people found in image if in it and draw the boxe around.
            # Also display the number of classes identify. If 2 people on camera should display 'Number of person: 2'.
            boxes1 = np.array(boxes)
            for classe in indexOfClasses:
                count = 0
                label = classes[str(classe)]
                if len(boxes1) != 0:
                    for index in range(len(boxes1[classe])): 
                        box = boxes1[classe][index]
                        if boxes1[classe][index][4] >= .40: 
                            count += 1
                            cv.rectangle(img, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 3)
                            cv.putText(img, label, (box[0], box[1]), cv.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255), lineType=cv.LINE_AA)
                print('Number of person: ', count)

            # Display the image.
            cv.imshow("image", img)

            # If you give an image as input, show the image with the result for 5 secondes and stop the program.
            if photo != 0:
                cv.waitKey(5000)
                webcamStream.release()
                cv.destroyAllWindows()
                break

            # If you're in a real-time stream and you press 'q', this will quit stop the program.
            if stream == True:
                if cv.waitKey(1) & 0xFF == ord('q'):
                    webcamStream.release()
                    cv.destroyAllWindows()
                    break
Ejemplo n.º 10
0
def main(args=None):
    data_dir_path = '/media/venkatesh/HDD_2/data/AI_city/AIC20_track1/Dataset_A'
    csv_val = 'data/val.csv'
    csv_classes = 'data/class_list.csv'
    model = 'model_files/model_final.pt'
    video_file_info = '/home/venkatesh/Desktop/AIC20_track1/Dataset_A/list_video_id.txt'
    trainedWidth = 416
    trainedHeight = 416
    Output_Folder_Path = 'Results'
    video_path = 'input/cam_1.mp4'

    roi_path = '/media/venkatesh/HDD_2/data/AI_city/AIC20_track1/ROIs/cam_1.txt'

    classes = {
        '0': 'person',
        '1': 'bicycle',
        '2': 'car',
        '3': 'bike',
        '5': 'bus',
        '7': 'truck'
    }
    #    valid_class = cfg.class_labels
    valid_class = [label.lower() for label in cfg.class_labels]
    list_of_classes = [2, 7]

    video_file_list = read_video_info(
        video_file_info)  #glob.glob(os.path.join(data_dir_path, '*.mp4'))
    #

    # Save_Output_Frame = os.path.join(Output_Folder_Path)
    if not os.path.exists(Output_Folder_Path):
        os.makedirs(Output_Folder_Path)

    detection_log_dir = os.path.join(Output_Folder_Path, 'detection')
    if not os.path.exists(detection_log_dir):
        os.makedirs(detection_log_dir)

    #    dataset_val    = CSVDataset(data_dir = data_dir_path, train_file=csv_val, class_list=csv_classes,
    #                             transform=transforms.Compose([Normalizer(), Resizer()]))
    #
    #    sampler_val    = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False)
    #    dataloader_val = DataLoader(dataset_val, num_workers=1, collate_fn=collater, batch_sampler=sampler_val)

    inputs = tf.placeholder(tf.float32, [None, trainedWidth, trainedHeight, 3])
    model = nets.YOLOv3COCO(inputs, nets.Darknet19)

    Threshold_obj_score = 0.1

    ComputationTime_Filename = 'detection_computation_time.txt'
    ComputationTimeFile = open(
        os.path.join(detection_log_dir, ComputationTime_Filename), "w+")
    ComputationTimeFile.write('filename total_time_msec avg_time_msec\n')
    ComputationTimeFile.close()

    with tf.Session() as sess:
        sess.run(model.pretrained())

        for video_file in video_file_list:

            video_path = os.path.join(data_dir_path, video_file)

            det_filename = os.path.splitext(
                os.path.basename(video_file))[0] + '.txt'
            DectionFile = open(os.path.join(detection_log_dir, det_filename),
                               "w+")
            ComputationTimeFile = open(
                os.path.join(detection_log_dir, ComputationTime_Filename),
                "a+")

            video_reader = cv2.VideoCapture(video_path)
            num_files = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))

            with tqdm(total=num_files, file=sys.stdout) as pbar:
                fCount = 0
                DP_computation_Time = []
                for count in range(num_files):
                    _, image = video_reader.read()
                    cam_image_h, cam_image_w, _ = image.shape

                    org_img = image.copy()
                    image_h, image_w, _ = org_img.shape
                    scale_w = cam_image_w / trainedWidth
                    scale_h = cam_image_h / trainedHeight

                    img = cv2.resize(image, (trainedWidth, trainedHeight))
                    _data = np.array(img).reshape(-1, trainedWidth,
                                                  trainedHeight, 3)

                    DP_time_start = time.clock()

                    # Detection module call
                    preds = sess.run(model.preds,
                                     {inputs: model.preprocess(_data)})
                    boxes = model.get_boxes(preds, _data.shape[1:3])

                    DP_time_elapsed = (time.clock() - DP_time_start)
                    DP_computation_Time.append(DP_time_elapsed)

                    temp_boxes = np.array(boxes)
                    obj_bbox = []
                    obj_class = []
                    for obj_class_id in list_of_classes:
                        if str(obj_class_id) in classes:
                            label_name = classes[str(obj_class_id)]
                        if len(temp_boxes) != 0:
                            for i in range(len(temp_boxes[obj_class_id])):
                                bbox = temp_boxes[obj_class_id][i]
                                obj_confidence_score = bbox[4]
                                if obj_confidence_score >= Threshold_obj_score:
                                    x1 = int(bbox[0] * scale_w)
                                    y1 = int(bbox[1] * scale_h)
                                    x2 = int(bbox[2] * scale_w)
                                    y2 = int(bbox[3] * scale_h)
                                    obj_bbox.append([x1, y1, x2, y2])
                                    #                            label_name = labels[int(classification[idxs[0][j]])]
                                    obj_index = valid_class.index(label_name)
                                    obj_class.append(obj_index)
                                    DectionFile.write(
                                        '{} {} {} {} {} {} {:0.4f}\n'.format(
                                            fCount + 1, obj_index, x1, y1, x2,
                                            y2, obj_confidence_score))

                    if cv2.waitKey(25) | 0xFF == ord('q'):
                        cv2.destroyAllWindows()
                        break

                    fCount += 1
                    pbar.set_description('processed: %d' % (fCount))
                    pbar.update(1)

            ComputationTimeFile.write('{} {:0.2f} {:0.2f}\n'.format(
                os.path.basename(video_file),
                np.sum(DP_computation_Time) * 1000,
                (np.mean(DP_computation_Time)) * 1000))
            DectionFile.close()
            ComputationTimeFile.close()