コード例 #1
0
ファイル: demo.py プロジェクト: devedipoint/Deep-SORT-YOLOv4
def main(yolo):
    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0

    # Deep SORT
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    show_detections = True
    writeVideo_flag = True
    asyncVideo_flag = False

    file_path = 'video.webm'
    if asyncVideo_flag:
        video_capture = VideoCaptureAsync(file_path)
    else:
        video_capture = cv2.VideoCapture(file_path)

    if asyncVideo_flag:
        video_capture.start()

    if writeVideo_flag:
        if asyncVideo_flag:
            w = int(video_capture.cap.get(3))
            h = int(video_capture.cap.get(4))
        else:
            w = int(video_capture.get(3))
            h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        out = cv2.VideoWriter('output_yolov4.avi', fourcc, 30, (w, h))
        frame_index = -1

    fps = 0.0
    fps_imutils = imutils.video.FPS().start()

    while True:
        ret, frame = video_capture.read()  # frame shape 640*480*3
        if ret != True:
            break

        t1 = time.time()

        image = Image.fromarray(frame[..., ::-1])  # bgr to rgb
        boxes, confidence, classes = yolo.detect_image(image)

        features = encoder(frame, boxes)
        detections = [
            Detection(bbox, confidence, cls,
                      feature) for bbox, confidence, cls, feature in zip(
                          boxes, confidence, classes, features)
        ]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.cls for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        for det in detections:
            bbox = det.to_tlbr()
            if show_detections and len(classes) > 0:
                det_cls = det.cls
                score = "%.2f" % (det.confidence * 100) + "%"
                cv2.putText(frame,
                            str(det_cls) + " " + score,
                            (int(bbox[0]), int(bbox[3])), 0,
                            1e-3 * frame.shape[0], (0, 255, 0), 1)
                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                              (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()

            adc = "%.2f" % (track.adc *
                            100) + "%"  # Average detection confidence
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
            cv2.putText(frame, "ID: " + str(track.track_id),
                        (int(bbox[0]), int(bbox[1])), 0, 1e-3 * frame.shape[0],
                        (0, 255, 0), 1)
            if not show_detections:
                track_cls = track.cls
                cv2.putText(frame, str(track_cls),
                            (int(bbox[0]), int(bbox[3])), 0,
                            1e-3 * frame.shape[0], (0, 255, 0), 1)
                cv2.putText(
                    frame, 'ADC: ' + adc,
                    (int(bbox[0]), int(bbox[3] + 2e-2 * frame.shape[1])), 0,
                    1e-3 * frame.shape[0], (0, 255, 0), 1)

        #cv2.imshow('', frame)

        if writeVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1

        fps_imutils.update()

        if not asyncVideo_flag:
            fps = (fps + (1. / (time.time() - t1))) / 2
            print("FPS = %f" % (fps))

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    fps_imutils.stop()
    print('imutils FPS: {}'.format(fps_imutils.fps()))

    if asyncVideo_flag:
        video_capture.stop()
    else:
        video_capture.release()

    if writeVideo_flag:
        out.release()
コード例 #2
0
def main(_argv):
    # Definition of the parameters
    max_cosine_distance = 0.4
    nn_budget = None
    nms_max_overlap = 1.0
    TestOutput = []
    # initialize deep sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    # calculate cosine distance metric
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    # initialize tracker
    tracker = Tracker(metric)

    # load configuration for object detector
    config = ConfigProto()
    config.gpu_options.allow_growth = True
    session = InteractiveSession(config=config)
    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    input_size = FLAGS.size
    video_path = FLAGS.video

    # load tflite model if flag is set
    if FLAGS.framework == 'tflite':
        interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
        interpreter.allocate_tensors()
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
        print(input_details)
        print(output_details)
    # otherwise load standard tensorflow saved model
    else:
        saved_model_loaded = tf.saved_model.load(FLAGS.weights,
                                                 tags=[tag_constants.SERVING])
        infer = saved_model_loaded.signatures['serving_default']

    # begin video capture
    try:
        vid = cv2.VideoCapture(int(video_path))
    except:
        vid = cv2.VideoCapture(video_path)

    out = None

    # get video ready to save locally if flag is set
    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))

    frame_num = 0
    # while video is running
    while True:
        return_value, frame = vid.read()
        if return_value:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            image = Image.fromarray(frame)
        else:
            print(
                'Video has ended or failed, try a different video format!, Test reponse'
            )
            AlexFun(TestOutput, video_path)
            #print(TestOutput)
            break
        frame_num += 1
        #print('Frame #: ', frame_num)
        frame_size = frame.shape[:2]
        image_data = cv2.resize(frame, (input_size, input_size))
        image_data = image_data / 255.
        image_data = image_data[np.newaxis, ...].astype(np.float32)
        start_time = time.time()

        # run detections on tflite if flag is set
        if FLAGS.framework == 'tflite':
            interpreter.set_tensor(input_details[0]['index'], image_data)
            interpreter.invoke()
            pred = [
                interpreter.get_tensor(output_details[i]['index'])
                for i in range(len(output_details))
            ]
            # run detections using yolov3 if flag is set
            if FLAGS.model == 'yolov3' and FLAGS.tiny == True:
                boxes, pred_conf = filter_boxes(pred[1],
                                                pred[0],
                                                score_threshold=0.25,
                                                input_shape=tf.constant(
                                                    [input_size, input_size]))
            else:
                boxes, pred_conf = filter_boxes(pred[0],
                                                pred[1],
                                                score_threshold=0.25,
                                                input_shape=tf.constant(
                                                    [input_size, input_size]))
        else:
            batch_data = tf.constant(image_data)
            pred_bbox = infer(batch_data)
            for key, value in pred_bbox.items():
                boxes = value[:, :, 0:4]
                pred_conf = value[:, :, 4:]

        boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
            boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
            scores=tf.reshape(
                pred_conf,
                (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
            max_output_size_per_class=50,
            max_total_size=50,
            iou_threshold=FLAGS.iou,
            score_threshold=FLAGS.score)

        # convert data to numpy arrays and slice out unused elements
        num_objects = valid_detections.numpy()[0]
        bboxes = boxes.numpy()[0]
        bboxes = bboxes[0:int(num_objects)]
        scores = scores.numpy()[0]
        scores = scores[0:int(num_objects)]
        classes = classes.numpy()[0]
        classes = classes[0:int(num_objects)]

        # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height
        original_h, original_w, _ = frame.shape
        bboxes = utils.format_boxes(bboxes, original_h, original_w)

        # store all predictions in one parameter for simplicity when calling functions
        pred_bbox = [bboxes, scores, classes, num_objects]

        # read in all class names from config
        class_names = utils.read_class_names(cfg.YOLO.CLASSES)

        # by default allow all classes in .names file
        allowed_classes = list(class_names.values())

        # custom allowed classes (uncomment line below to customize tracker for only people)
        #allowed_classes = ['person']
        #allowed_classes = ['person', 'car', 'truck']

        # loop through objects and use class index to get class name, allow only classes in allowed_classes list
        names = []
        deleted_indx = []
        for i in range(num_objects):
            class_indx = int(classes[i])
            class_name = class_names[class_indx]
            if class_name not in allowed_classes:
                deleted_indx.append(i)
            else:
                names.append(class_name)
        names = np.array(names)
        count = len(names)
        if FLAGS.count:
            cv2.putText(frame, "Objects being tracked: {}".format(count),
                        (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2,
                        (0, 255, 0), 2)
            print("Objects being tracked: {}".format(count))
        # delete detections that are not in allowed_classes
        bboxes = np.delete(bboxes, deleted_indx, axis=0)
        scores = np.delete(scores, deleted_indx, axis=0)

        # encode yolo detections and feed to tracker
        features = encoder(frame, bboxes)
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                bboxes, scores, names, features)
        ]

        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima supression
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        # update tracks
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            class_name = track.get_class()

            # draw bbox on screen
            color = colors[int(track.track_id) % len(colors)]
            color = [i * 255 for i in color]
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), color, 2)
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1] - 30)),
                          (int(bbox[0]) +
                           (len(class_name) + len(str(track.track_id))) * 17,
                           int(bbox[1])), color, -1)
            cv2.putText(frame, class_name + "-" + str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                        (255, 255, 255), 2)

            # if enable info flag then print details about each track
            if FLAGS.info:
                #print("Tracker ID: {}, Class: {},  BBox Coords (xmin, ymin, xmax, ymax): {}".format(str(track.track_id), class_name, (int(bbox[0]),int(bbox[1]),int(bbox[2]), int(bbox[3]))))
                #TestOutput.append("Tracker ID: {}, Class: {},  BBox Coords (xmin, ymin, xmax, ymax): {}".format(str(track.track_id), class_name, (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]))))
                TestXCO = (int(bbox[0]) + int(bbox[2])) / 2
                TestYCO = (int(bbox[1]) + int(bbox[3])) / 2
                TestTemp = [frame_num]
                TestTemp.append(str(track.track_id))
                TestTemp.append(class_name)
                TestTemp.append(TestXCO)
                TestTemp.append(TestYCO)
                TestTemp.append(int(bbox[0]))
                TestTemp.append(int(bbox[1]))
                TestTemp.append(int(bbox[2]))
                TestTemp.append(int(bbox[3]))
                TestOutput.append(TestTemp)
                # append to a list that line every time, will need to also append when its starting the fram, or include it inline
        # calculate frames per second of running detections
        fps = 1.0 / (time.time() - start_time)
        #print("FPS: %.2f" % fps)
        result = np.asarray(frame)
        result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

        if not FLAGS.dont_show:
            cv2.imshow("Output Video", result)

        # if output flag is set, save video file
        if FLAGS.output:
            out.write(result)
        if cv2.waitKey(1) & 0xFF == ord('q'): break
    cv2.destroyAllWindows()
コード例 #3
0
def main(yolo):

    # Determining the FPS of a video having variable frame rate
    # cv2.CAP_PROP_FPS is not used since it returns 'infinity' for variable frame rate videos
    filename = "Cafe_Hyperion.avi"
    # Determining the total duration of the video
    clip = VideoFileClip(filename)

    cap2 = cv2.VideoCapture(filename)
    co = 0
    ret2 = True
    while ret2:
        ret2, frame2 = cap2.read()
        # Determining the total number of frames
        co += 1
    cap2.release()

    # Computing the average FPS of the video
    Input_FPS = co / clip.duration

    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0
    frame_count = 0
    
    # Implementing Deep Sort algorithm
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename,batch_size=1)
    
    # Cosine distance is used as the metric
    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    tracker = Tracker(metric)
    
    video_capture = cv2.VideoCapture(filename)

    # Define the codec and create a VideoWriter object to save the output video
    out = cv2.VideoWriter('output.mp4', cv2.VideoWriter_fourcc(*'MP4V'), Input_FPS, (int(video_capture.get(3)), int(video_capture.get(4))))

    # To calculate the frames processed by the deep sort algorithm per second
    fps = 0.0

    # Initializing empty variables for counting and tracking purpose
    queue_track_dict = {}         # Count time in queue
    alley_track_dict = {}         # Count time in alley
    store_track_dict = {}         # Count total time in store
    latest_frame = {}             # Track the last frame in which a person was identified
    reidentified = {}             # Yes or No : whether the person has been re-identified at a later point in time
    plot_head_count_store = []    # y-axis for Footfall Analysis
    plot_head_count_queue = []    # y-axis for Footfall Analysis
    plot_time = []                # x-axis for Footfall Analysis

    # Loop to process each frame and track people
    while True:
        ret, frame = video_capture.read()
        if ret != True:
            break

        head_count_store = 0
        head_count_queue = 0
        t1 = time.time()

        image = Image.fromarray(frame[...,::-1])   # BGR to RGB conversion
        boxs = yolo.detect_image(image)
        features = encoder(frame,boxs)
        
        # Getting the detections having score of 0.0 to 1.0
        detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features)]
        
        # Run non-maxima suppression on the bounding boxes
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores)
        detections = [detections[i] for i in indices]
        
        # Call the tracker to associate tracking boxes to detection boxes
        tracker.predict()
        tracker.update(detections)

        # Defining the co-ordinates of the area of interest
        pts = np.array([[0, 0],[105,0],[170,85],[0,143]], np.int32)
        pts = pts.reshape((-1,1,2))     # Queue Area
        pts2 = np.array([[105,0],[320,0],[320,240],[0,240],[0,143],[170,85]], np.int32)
        pts2 = pts2.reshape((-1,1,2))   # Alley Region
        cv2.polylines(frame, [pts], True, (0,255,255), thickness=2)
        cv2.polylines(frame, [pts2], True, (255,0,255), thickness=1)
        
        # Drawing tracker boxes and frame count for people inside the areas of interest
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue 
            bbox = track.to_tlbr()

            # Checking if the person is within an area of interest
            queue_point_test = center_point_inside_polygon(bbox, pts2)
            alley_point_test = center_point_inside_polygon(bbox, pts)

            # Checking if a person has been reidentified in a later frame
            if queue_point_test == 'inside' or alley_point_test == 'inside':
                if track.track_id in latest_frame.keys():
                    if latest_frame[track.track_id] != frame_count - 1:
                        reidentified[track.track_id] = 1

            # Initializing variables incase a new person has been seen by the model
            if queue_point_test == 'inside' or alley_point_test == 'inside':
                head_count_store += 1
                if track.track_id not in store_track_dict.keys():
                    store_track_dict[track.track_id] = 0
                    queue_track_dict[track.track_id] = 0
                    alley_track_dict[track.track_id] = 0
                    reidentified[track.track_id] = 0

            # Processing for people inside the Queue Area
            if queue_point_test == 'inside':
                head_count_queue += 1
                queue_track_dict[track.track_id] += 1
                latest_frame[track.track_id] = frame_count
                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,255,255), 2)
                wait_time = round((queue_track_dict[track.track_id] / Input_FPS), 2)
                cv2.putText(frame, str(track.track_id) + ": " + str(wait_time) + "s", (int(bbox[0]), int(bbox[1])), 0, 0.8, (0, 0, 0), 4)
                cv2.putText(frame, str(track.track_id) + ": " + str(wait_time) + "s", (int(bbox[0]), int(bbox[1])), 0, 0.8, (0, 255, 77), 2)

            # Processing for people inside the Alley Region
            if alley_point_test == 'inside':
                alley_track_dict[track.track_id] += 1
                latest_frame[track.track_id] = frame_count
                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,255,255), 2)
                cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 0.8, (0, 0, 0), 4)
                cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 0.8, (0, 255, 77), 2)

            # Getting the total Store time for a person
            if track.track_id in store_track_dict.keys():
                store_track_dict[track.track_id] = queue_track_dict[track.track_id] + alley_track_dict[track.track_id]

        # Drawing bounding box detections for people inside the store
        for det in detections:
            bbox = det.to_tlbr()

            # Checking if the person is within an area of interest
            queue_point_test = center_point_inside_polygon(bbox, pts)
            alley_point_test = center_point_inside_polygon(bbox, pts2)

            if queue_point_test == 'inside' or alley_point_test == 'inside':
                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255,0,0), 2)

        # Video Overlay - Head Count Data at that instant
        cv2.putText(frame, "Count: " + str(head_count_store), ( 30, 610 ), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (0, 0, 0), 3, cv2.LINE_AA, False)
        cv2.putText(frame, "Count: " + str(head_count_store), ( 30, 610 ), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (0, 255, 77), 2, cv2.LINE_AA, False)

        # Calculating the average wait time in queue
        total_people = len([v for v in queue_track_dict.values() if v > 0])
        total_queue_frames = sum(v for v in queue_track_dict.values() if v > 0)
        avg_queue_frames = 0
        if total_people != 0:
            avg_queue_frames = total_queue_frames / total_people
        avg_queue_time = round((avg_queue_frames / Input_FPS), 2)

        # Video Overlay - Average Wait Time in Queue
        cv2.putText(frame, "Avg Queue Time: " + str(avg_queue_time) + 's', ( 30, 690 ), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (0, 0, 0), 3, cv2.LINE_AA, False)
        cv2.putText(frame, "Avg Queue Time: " + str(avg_queue_time) + 's', ( 30, 690 ), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (0, 255, 77), 2, cv2.LINE_AA, False)

        # Calculating the average wait time in the store
        total_people = len(store_track_dict)
        total_store_frames = sum(store_track_dict.values())
        avg_store_frames = 0
        if total_people != 0:
            avg_store_frames = total_store_frames / total_people
        avg_store_time = round((avg_store_frames / Input_FPS), 2)

        # Video Overlay - Average Store time
        cv2.putText(frame, "Avg Store Time: " + str(avg_store_time) + 's', ( 30, 650 ), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (0, 0, 0), 3, cv2.LINE_AA, False)
        cv2.putText(frame, "Avg Store Time: " + str(avg_store_time) + 's', ( 30, 650 ), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (0, 255, 77), 2, cv2.LINE_AA, False)

        # Write the frame onto the VideoWriter object
        out.write(frame)

        # Calculating the frames processed per second by the model  
        fps  = ( fps + (1./(time.time()-t1)) ) / 2
        frame_count += 1

        # Printing processing status to track completion
        op = "FPS_" + str(frame_count) + "/" + str(co) + ": " + str(round(fps, 2))
        print("\r" + op , end = "")

        # Adding plot values for Footfall Analysis every 2 seconds (hard coded for now)
        if frame_count % 50 == 0:
            plot_time.append(round((frame_count / Input_FPS), 2))
            plot_head_count_store.append(head_count_store)
            plot_head_count_queue.append(head_count_queue)
        
        # Press Q to stop the video
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # Data Processed as per the video provided
    print("\n-----------------------------------------------------------------------")
    print("QUEUE WAIT TIME ( Unique Person ID -> Time spent )\n")
    for k, v in queue_track_dict.items():
        print(k, "->", str(round((v/Input_FPS), 2)) + " seconds")

    print("\n-----------------------------------------------------------------------")
    print("ALLEY TIME ( Unique Person ID -> Time spent )\n")
    for k, v in alley_track_dict.items():
        print(k, "->", str(round((v/Input_FPS), 2)) + " seconds")

    print("\n-----------------------------------------------------------------------")
    print("STORE TIME ( Unique Person ID -> Time spent  )\n")
    for k, v in store_track_dict.items():
        print(k, "->", str(round((v/Input_FPS), 2)) + " seconds")

    # Defining data to be written into the csv file - Detailed Report
    csv_columns = ['Unique Person ID', 'Queue Time in AOI', 'Total Store Time', 'Re-Identified']
    csv_data = []
    csv_row = {}
    detailed_csv_file = 'Detailed_Store_Report.csv'
    for k, v in store_track_dict.items():
         csv_row = {}
         if reidentified[k] == 1:
             reid = 'Yes'
         else:
             reid = 'No'
         csv_row = {csv_columns[0]: k, csv_columns[1]: round((queue_track_dict[k] / Input_FPS), 2), csv_columns[2]: round((v / Input_FPS), 2), csv_columns[3]: reid}
         csv_data.append(csv_row)

    # Writing the data into the csv file - Detailed Report
    with open(detailed_csv_file, 'w') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=csv_columns)
        writer.writeheader()
        for data in csv_data:
            writer.writerow(data)

    # Defining data to be written into the csv file - Brief Report
    csv_columns_brief = ['Total Head Count', 'Total Queue Time', 'Average Queue Time', 'Total Store Time', 'Average Store Time']
    brief_csv_file = 'Brief_Store_Report.csv'
    csv_data_brief = {csv_columns_brief[0]: len(store_track_dict), csv_columns_brief[1]: round((sum(queue_track_dict.values()) / Input_FPS), 2), csv_columns_brief[2]: avg_queue_time, csv_columns_brief[3]: round((sum(store_track_dict.values()) / Input_FPS), 2), csv_columns_brief[4]: avg_store_time}

    # Writing the data into the csv file - Brief Report
    with open(brief_csv_file, 'w') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=csv_columns_brief)
        writer.writeheader()
        writer.writerow(csv_data_brief)

    # Plotting a time-series line graph for store and queue head count data and saving it as a .png file
    plt.plot(plot_time, plot_head_count_queue)
    plt.plot(plot_time, plot_head_count_store)
    plt.legend(['Queue Head Count', 'Store Head Count'], loc='upper left')
    plt.xlabel('Time Stamp (in seconds)')
    plt.ylabel('Head Count')
    plt.xlim(0, round(frame_count / Input_FPS) + 1)
    plt.ylim(0, max(plot_head_count_store) + 2)
    plt.title('Footfall Analysis')
    plt.savefig('Footfall_Analysis.png', bbox_inches='tight')

    # Printing plot data
    for i in range(len(plot_time)):
        print(plot_time[i], plot_head_count_queue[i], plot_head_count_store[i]) 

    # Releasing objects created
    video_capture.release()
    out.release()
    cv2.destroyAllWindows()
コード例 #4
0
def main(_argv):
    # Definition of the parameters
    max_cosine_distance = 0.4
    nn_budget = None
    nms_max_overlap = 1.0

    interpreter = None

    # initialize deep sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    # calculate cosine distance metric
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    # initialize tracker
    tracker = Tracker(metric)

    # load configuration for object detector
    config = ConfigProto()
    config.gpu_options.allow_growth = True
    session = InteractiveSession(config=config)
    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    input_size = FLAGS.size
    video_path = FLAGS.video
    # get video name by using split method
    video_name = video_path.split('/')[-1]
    video_name = video_name.split('.')[0]

    # load tflite model if flag is set
    if FLAGS.framework == 'tflite':
        interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
        interpreter.allocate_tensors()
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()

    # otherwise load standard tensorflow saved model
    else:
        saved_model_loaded = tf.saved_model.load(FLAGS.weights,
                                                 tags=[tag_constants.SERVING])
        infer = saved_model_loaded.signatures['serving_default']

    # begin video capture
    try:
        vid = cv2.VideoCapture(int(video_path))
    except:
        vid = cv2.VideoCapture(video_path)

    # Try to read video if valid
    return_value, frame = vid.read()
    if return_value:
        pass
    else:
        print('Invalid video Directory!!!')

    filename = video_path.split('.')[-2]
    # VideoOut = None
    MinimapOut = None

    # Get total number of frames in a video
    TotalFrames = int(vid.get(cv2.CAP_PROP_FRAME_COUNT))

    # get video ready to save locally if flag is set
    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))

        time_milli = vid.get(cv2.CAP_PROP_POS_MSEC)
        time_milli = time_milli / 1000

        # set frame per seconds
        vid.set(cv2.CAP_PROP_FPS, 1000)
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)

    frame_num = 0
    count = 10
    ObjectDetector = DetectObject()

    for _, i in enumerate(tqdm(range(TotalFrames))):
        return_value, frame = vid.read()
        if return_value:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            image = Image.fromarray(frame)
        else:
            print('Video has ended or failed, try a different video format!')
            break

        # pass in the object detector
        ObjectDetector.interpreter = interpreter
        bboxes, frame, result = ObjectDetector.analyzeDetection(
            return_value, frame, frame_num, FLAGS, infer, encoder,
            nms_max_overlap, tracker)

        # loop through the bounding box and export into the ROI folder.
        for i, j in bboxes.items():
            xmin, ymin, w, h = int(j[0]), int(j[1]), int(j[2]), int(j[3])
            if w <= 0 or h <= 0:
                pass
            else:
                # ROI Extraction
                maskedImage = frame[ymin:ymin + h, xmin:xmin + w]

                roi_name = "./ROI/ROI_frame_%s.jpg" % (str(frame_num))
                cv2.imwrite(roi_name,
                            maskedImage)  # save transformed image to path

        # cv2.imshow('frame',result)

        frame_num += 1

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cv2.destroyAllWindows()
コード例 #5
0
def main(_argv):
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    for physical_device in physical_devices:
        tf.config.experimental.set_memory_growth(physical_device, True)

    if FLAGS.tiny:
        STRIDES = np.array(cfg.YOLO.STRIDES_TINY)
        XYSCALE = cfg.YOLO.XYSCALE_TINY
        if FLAGS.model == 'yolov4':
            ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny)
        else:
            ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_TINY_V3, FLAGS.tiny)
    else:
        STRIDES = np.array(cfg.YOLO.STRIDES)
        XYSCALE = cfg.YOLO.XYSCALE
        if FLAGS.model == 'yolov4':
            ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny)
        else:
            ANCHORS = utils.get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny)

    CLASSES = utils.read_class_names(cfg.YOLO.CLASSES)
    NUM_CLASSES = len(CLASSES)
    input_size = FLAGS.size
    try:
        vid = cv2.VideoCapture(int(FLAGS.video))
    except:
        vid = cv2.VideoCapture(FLAGS.video)

    times = []
    if FLAGS.output:
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))

    if FLAGS.framework == 'tf':
        input_layer = tf.keras.layers.Input([input_size, input_size, 3])
        if FLAGS.tiny:
            if FLAGS.model == 'yolov3':
                feature_maps = YOLOv3_tiny(input_layer, NUM_CLASSES)
            else:
                feature_maps = YOLOv4_tiny(input_layer, NUM_CLASSES)
            bbox_tensors = []
            for i, fm in enumerate(feature_maps):
                bbox_tensor = decode(fm, NUM_CLASSES, i)
                bbox_tensors.append(bbox_tensor)
            model = tf.keras.Model(input_layer, bbox_tensors)
            utils.load_weights_tiny(model, FLAGS.weights, FLAGS.model)
        else:
            if FLAGS.model == 'yolov3':
                feature_maps = YOLOv3(input_layer, NUM_CLASSES)
                bbox_tensors = []
                for i, fm in enumerate(feature_maps):
                    bbox_tensor = decode(fm, NUM_CLASSES, i)
                    bbox_tensors.append(bbox_tensor)
                model = tf.keras.Model(input_layer, bbox_tensors)
                utils.load_weights_v3(model, FLAGS.weights)
            elif FLAGS.model == 'yolov4':
                feature_maps = YOLOv4(input_layer, NUM_CLASSES)
                bbox_tensors = []
                for i, fm in enumerate(feature_maps):
                    bbox_tensor = decode(fm, NUM_CLASSES, i)
                    bbox_tensors.append(bbox_tensor)
                model = tf.keras.Model(input_layer, bbox_tensors)
                if FLAGS.weights.split(".")[len(FLAGS.weights.split(".")) -
                                            1] == "weights":
                    utils.load_weights(model, FLAGS.weights)
                else:
                    model.load_weights(FLAGS.weights).expect_partial()
        model.summary()
    elif FLAGS.framework == 'tflite':
        interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
        interpreter.allocate_tensors()
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
    elif FLAGS.framework == 'trt':
        saved_model_loaded = tf.saved_model.load(FLAGS.weights,
                                                 tags=[tag_constants.SERVING])
        infer = saved_model_loaded.signatures['serving_default']

    max_cosine_distance = 0.7  # 0.5 / 0.7
    nn_budget = None
    model_filename = './weights/tracker/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)
    key_list = list(CLASSES.keys())
    val_list = list(CLASSES.values())
    Track_only = []

    logging.info("Models loaded!")
    while True:
        return_value, frame = vid.read()
        if not return_value:
            logging.warning("Empty Frame")
            break

        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame_size = frame.shape[:2]

        image_data = utils.image_preprocess(np.copy(frame),
                                            [input_size, input_size])
        image_data = image_data[np.newaxis, ...].astype(np.float32)

        t1 = time.time()
        if FLAGS.framework == 'tf':
            pred_bbox = model.predict(image_data)
        elif FLAGS.framework == 'tflite':
            interpreter.set_tensor(input_details[0]['index'], image_data)
            interpreter.invoke()
            pred_bbox = [
                interpreter.get_tensor(output_details[i]['index'])
                for i in range(len(output_details))
            ]
        elif FLAGS.framework == 'trt':
            batched_input = tf.constant(image_data)
            pred_bbox = []
            result = infer(batched_input)
            for _, value in result.items():
                value = value.numpy()
                pred_bbox.append(value)
        t2 = time.time()
        times.append(t2 - t1)
        times = times[-20:]
        ms = sum(times) / len(times) * 1000
        fps = 1000 / ms

        if FLAGS.model == 'yolov4':
            pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES,
                                                XYSCALE)
        else:
            pred_bbox = utils.postprocess_bbbox(pred_bbox, ANCHORS, STRIDES)
        bboxes = utils.postprocess_boxes(pred_bbox, frame_size, input_size,
                                         0.5)  # 0.25
        bboxes = utils.nms(bboxes, 0.5, method='nms')  # 0.213

        boxes, scores, names = [], [], []
        for bbox in bboxes:
            if len(Track_only) != 0 and CLASSES[int(
                    bbox[5])] in Track_only or len(Track_only) == 0:
                boxes.append([
                    bbox[0].astype(int), bbox[1].astype(int),
                    bbox[2].astype(int) - bbox[0].astype(int),
                    bbox[3].astype(int) - bbox[1].astype(int)
                ])
                scores.append(bbox[4])
                names.append(CLASSES[int(bbox[5])])

        boxes = np.array(boxes)
        names = np.array(names)
        scores = np.array(scores)
        features = np.array(encoder(frame, boxes))
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                boxes, scores, names, features)
        ]

        tracker.predict()
        tracker.update(detections)

        tracked_bboxes = []
        for track in tracker.tracks:
            if not track.is_confirmed(
            ) or track.time_since_update > 1:  # 1 / 5
                continue
            bbox = track.to_tlbr()
            class_name = track.get_class()
            tracking_id = track.track_id
            index = key_list[val_list.index(class_name)]
            tracked_bboxes.append(bbox.tolist() + [tracking_id, index])

        image = utils.draw_bbox(frame,
                                tracked_bboxes,
                                classes=CLASSES,
                                tracking=True)

        image = cv2.putText(
            image,
            "Time: {:.2f}ms".format(sum(times) / len(times) * 1000),
            (0, 36),  # 24
            cv2.FONT_HERSHEY_SIMPLEX,
            1.5,
            (0, 0, 255),
            2)

        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        cv2.namedWindow("Detections", cv2.WINDOW_AUTOSIZE)
        cv2.imshow("Detections", image)
        if FLAGS.output:
            out.write(image)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    vid.release()
    if FLAGS.output:
        out.release()
    cv2.destroyAllWindows()
コード例 #6
0
ファイル: model3.py プロジェクト: zyg11/WorkControl
def read(stack) :
    print('Process to read: %s' % os.getpid())
    yolo = YOLO()
    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0
    # deep_sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    tracker = Tracker(metric)
    max_boxs = 0
    face = ['17208019']

    #目标上一帧的点
    history = {}
    #id和标签的字典
    person = {}
    #赋予新标签的id列表
    change = []
    while True:
        if len(stack) != 0:
            frame = stack.pop()
            t1 = time.time()
            frame_count = 0
            localtime = time.asctime(time.localtime(time.time()))
            utils.draw(frame,line.readline())
            # 获取警戒线
            transboundaryline = line.readline()
            utils.draw(frame, transboundaryline)
            img = Image.fromarray(frame)
            #img.save('frame.jpg')
            '''
            cv2.line(frame, (837, 393), (930, 300), (0, 255, 255), 3)
            transboundaryline = t.line_detect_possible_demo(frame)
            '''
            # image = Image.fromarray(frame)
            image = Image.fromarray(frame[..., ::-1])  # bgr to rgb
            boxs = yolo.detect_image(image)
            # print("box_num",len(boxs))
            features = encoder(frame, boxs)

            # score to 1.0 here).
            detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features)]

            # Run non-maxima suppression.
            boxes = np.array([d.tlwh for d in detections])
            scores = np.array([d.confidence for d in detections])
            indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores)
            detections = [detections[i] for i in indices]
            if len(boxs) > max_boxs:
                max_boxs = len(boxs)
            # Call the tracker
            tracker.predict()
            tracker.update(detections)
            #一帧信息
            info = {}
            target = []
            for track in tracker.tracks:
                #一帧中的目标
                per_info = {}
                if not track.is_confirmed() or track.time_since_update > 1:
                    continue
                if track.track_id not in person:
                    person[track.track_id] = str(track.track_id)
                bbox = track.to_tlbr()
                PointX = bbox[0] + ((bbox[2] - bbox[0]) / 2)
                PointY = bbox[3]
                dis = int(PointX) - 1200
                try:
                    if dis<15:
                        if track.track_id not in change:
                            person[track.track_id] = face.pop(0)
                            change.append(track.track_id)
                except:
                    print('非法入侵')
                #当前目标
                if track.track_id not in change:
                    per_info['worker_id'] = 'unknow'+str(track.track_id)
                else:
                    per_info['worker_id'] = person[track.track_id]
                #当前目标坐标
                yoloPoint = (int(PointX), int(PointY))
                per_info['current_point'] = yoloPoint

                # 卡尔曼滤波预测
                if per_info['worker_id'] not in utils.KalmanNmae:
                    utils.myKalman(per_info['worker_id'])
                if per_info['worker_id'] not in utils.lmp:
                    utils.setLMP(per_info['worker_id'])
                cpx, cpy = utils.predict(yoloPoint[0], yoloPoint[1], per_info['worker_id'])

                if cpx[0] == 0.0 or cpy[0] == 0.0:
                    cpx[0] = yoloPoint[0]
                    cpy[0] = yoloPoint[1]
                if frame_count>20:
                    per_info['next_point'] = (int(cpx), int(cpy))
                else:
                    per_info['next_point'] = yoloPoint

                # 写入越线情况
                if per_info['worker_id'] in history:
                    per_info['transboundary'] = 'no'
                    #print(transboundaryline)

                    line1 = [per_info['next_point'],history[per_info['worker_id']]]
                    a = line.IsIntersec2(transboundaryline,line1)

                    if a == '有交点':
                        print('越线提醒')

                        per_info['transboundary'] = 'yes'


                history[per_info['worker_id']] = per_info['current_point']

                frame_count = frame_count+1
                #print(per_info)
                #画目标框
                #cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
                cv2.putText(frame, per_info['worker_id'], (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2)
                target.append(per_info)
            info['time'] = localtime
            #info['frame'] = str(img.tolist()).encode('base64')
            info['frame'] = 'frame'
            info['target'] = target
            #写入josn
            info_json = json.dumps(info)
            info_queue.put(info_json)
            getInfo(info_queue)
            cv2.imshow("img", frame)
            key = cv2.waitKey(1) & 0xFF
            if key == ord('q'):
                break
コード例 #7
0
def main(_argv):
    # Definition of the parameters
    max_cosine_distance = 0.4
    nn_budget = None
    nms_max_overlap = 1.0

    # initialize deep sort
    model_filename = cfg.PATH + '/model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    # calculate cosine distance metric
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    # initialize tracker
    tracker = Tracker(metric)

    # load configuration for object detector
    config = ConfigProto()
    config.gpu_options.allow_growth = True
    session = InteractiveSession(config=config)
    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    input_size = FLAGS.size

    # tf
    saved_model_loaded = tf.saved_model.load(FLAGS.weights,
                                             tags=[tag_constants.SERVING])
    infer = saved_model_loaded.signatures['serving_default']

    while True:
        data = sys.stdin.readline()
        if data:
            data = json.loads(data)
            if data['end']:
                break
            frame = np.array(data['frame_image'], dtype=np.uint8)

            image_data = frame / 255.
            image_data = image_data[np.newaxis, ...].astype(np.float32)

            # tf
            batch_data = tf.constant(image_data)
            pred_bbox = infer(batch_data)
            for key, value in pred_bbox.items():
                boxes = value[:, :, 0:4]
                pred_conf = value[:, :, 4:]

            boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
                boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
                scores=tf.reshape(
                    pred_conf,
                    (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
                max_output_size_per_class=50,
                max_total_size=50,
                iou_threshold=FLAGS.iou,
                score_threshold=FLAGS.score)

            # convert data to numpy arrays and slice out unused elements
            num_objects = valid_detections.numpy()[0]
            bboxes = boxes.numpy()[0]
            bboxes = bboxes[0:int(num_objects)]
            scores = scores.numpy()[0]
            scores = scores[0:int(num_objects)]
            classes = classes.numpy()[0]
            classes = classes[0:int(num_objects)]

            # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height
            original_h, original_w, _ = frame.shape
            bboxes = utils.format_boxes(bboxes, original_h, original_w)

            # store all predictions in one parameter for simplicity when calling functions
            pred_bbox = [bboxes, scores, classes, num_objects]

            # read in all class names from config
            class_names = utils.read_class_names(cfg.YOLO.CLASSES)

            # by default allow all classes in .names file
            allowed_classes = list(class_names.values())

            # custom allowed classes (uncomment line below to customize tracker for only people)
            #allowed_classes = ['person']

            # loop through objects and use class index to get class name, allow only classes in allowed_classes list
            names = []
            deleted_indx = []
            for i in range(num_objects):
                class_indx = int(classes[i])
                class_name = class_names[class_indx]
                if class_name not in allowed_classes:
                    deleted_indx.append(i)
                else:
                    names.append(class_name)
            names = np.array(names)

            # delete detections that are not in allowed_classes
            bboxes = np.delete(bboxes, deleted_indx, axis=0)
            scores = np.delete(scores, deleted_indx, axis=0)

            # encode yolo detections and feed to tracker
            features = encoder(frame, bboxes)
            detections = [
                Detection(bbox, score, class_name, feature)
                for bbox, score, class_name, feature in zip(
                    bboxes, scores, names, features)
            ]

            # run non-maxima supression
            boxs = np.array([d.tlwh for d in detections])
            scores = np.array([d.confidence for d in detections])
            classes = np.array([d.class_name for d in detections])
            indices = preprocessing.non_max_suppression(
                boxs, classes, nms_max_overlap, scores)
            detections = [detections[i] for i in indices]

            # ds = []
            # for detection in detections:
            #     d = dict()
            #     d["bbox"] = detection.tlwh.tolist()
            #     d["confidence"] = detection.confidence
            #     d["class"] = detection.class_name
            #     ds.append(d)
            #
            # # send data to Node (without tracking...)
            # print(json.dumps(ds))

            #Call the tracker
            tracker.predict()
            tracker.update(detections)

            # Store tracks for json...
            tracks = []

            # update tracks
            for track in tracker.tracks:
                if not track.is_confirmed() or track.time_since_update > 1:
                    continue
                class_name = track.get_class()
                t = dict()
                bbs = track.to_tlbr().tolist()
                t["class"] = class_name
                bbox = dict()
                bbox["left"] = bbs[0]
                bbox["top"] = bbs[1]
                bbox["right"] = bbs[2]
                bbox["bottom"] = bbs[3]
                t["bbox"] = bbox
                t["id"] = track.track_id
                t["score"] = track.detection_actual_score
                tracks.append(t)

            #send data to Node!
            print(json.dumps(tracks))
コード例 #8
0
def main(_argv):
    avg=[]
    # Definition of the parameters
    max_cosine_distance = 0.4
    nn_budget = None
    nms_max_overlap = 1.0
    #regression model load
    weight_path='./2_input_model_2-3.5%/'
    loaded_model = tf.keras.models.load_model(weight_path)

    # initialize deep sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    # calculate cosine distance metric
    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    # initialize tracker
    tracker = Tracker(metric)

    # load configuration for object detector
    config = ConfigProto()
    config.gpu_options.allow_growth = True
    session = InteractiveSession(config=config)
    STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    input_size = FLAGS.size
    video_path = FLAGS.video

    # load tflite model if flag is set
    if FLAGS.framework == 'tflite':
        interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
        interpreter.allocate_tensors()
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
        print(input_details)
        print(output_details)
    # otherwise load standard tensorflow saved model
    else:
        saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING])
        infer = saved_model_loaded.signatures['serving_default']

    # begin video capture
    try:
        vid = cv2.VideoCapture(int(video_path))
    except:
        vid = cv2.VideoCapture(video_path)

    out = None

    # get video ready to save locally if flag is set
    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))

    frame_num = 0
    # while video is running
    while True:
        return_value, frame = vid.read()
        if return_value:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            image = Image.fromarray(frame)
        else:
            print('Video has ended or failed, try a different video format!')
            break
        frame_num +=1
        print('Frame #: ', frame_num)
        frame_size = frame.shape[:2]
        image_data = cv2.resize(frame, (input_size, input_size))
        image_data = image_data / 255.
        image_data = image_data[np.newaxis, ...].astype(np.float32)
        start_time = time.time()

        # run detections on tflite if flag is set
        if FLAGS.framework == 'tflite':
            interpreter.set_tensor(input_details[0]['index'], image_data)
            interpreter.invoke()
            pred = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))]
            # run detections using yolov3 if flag is set
            if FLAGS.model == 'yolov3' and FLAGS.tiny == True:
                boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25,
                                                input_shape=tf.constant([input_size, input_size]))
            else:
                boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25,
                                                input_shape=tf.constant([input_size, input_size]))
        else:
            batch_data = tf.constant(image_data)
            pred_bbox = infer(batch_data)
            for key, value in pred_bbox.items():
                boxes = value[:, :, 0:4]
                pred_conf = value[:, :, 4:]

        boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
            boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
            scores=tf.reshape(
                pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
            max_output_size_per_class=50,
            max_total_size=50,
            iou_threshold=FLAGS.iou,
            score_threshold=FLAGS.score
        )

        # convert data to numpy arrays and slice out unused elements
        num_objects = valid_detections.numpy()[0]
        bboxes = boxes.numpy()[0]
        bboxes = bboxes[0:int(num_objects)]
        scores = scores.numpy()[0]
        scores = scores[0:int(num_objects)]
        classes = classes.numpy()[0]
        classes = classes[0:int(num_objects)]

        # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height
        original_h, original_w, _ = frame.shape
        bboxes = utils.format_boxes(bboxes, original_h, original_w)

        # store all predictions in one parameter for simplicity when calling functions
        pred_bbox = [bboxes, scores, classes, num_objects]
        #print("pred_bbox: ",pred_bbox[0])
        #print("scores: ",pred_bbox[1])
        #print("classes :",pred_bbox[2])
        #print("num :",pred_bbox[3])
        #print("width :",width)
        #print("height :",height)
        # read in all class names from config
        class_names = utils.read_class_names(cfg.YOLO.CLASSES)

        # by default allow all classes in .names file
        allowed_classes = list(class_names.values())

        # custom allowed classes (uncomment line below to customize tracker for only people)
        #allowed_classes = ['person']

        # loop through objects and use class index to get class name, allow only classes in allowed_classes list
        names = []
        deleted_indx = []
        for i in range(num_objects):
            class_indx = int(classes[i])
            class_name = class_names[class_indx]
            if class_name not in allowed_classes:
                deleted_indx.append(i)
            else:
                names.append(class_name)
        names = np.array(names)
        count = len(names)
        if FLAGS.count:
            cv2.putText(frame, "Objects being tracked: {}".format(count), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), 2)
            print("Objects being tracked: {}".format(count))
        # delete detections that are not in allowed_classes
        bboxes = np.delete(bboxes, deleted_indx, axis=0)
        scores = np.delete(scores, deleted_indx, axis=0)

        # encode yolo detections and feed to tracker
        features = encoder(frame, bboxes)
        detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip(bboxes, scores, names, features)]

        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima supression
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        #print("boxs ",boxs)
        #print("scores ",scores)
        #print("classes ",classes)
        indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores)
        #print("indices ",indices)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)
        cv2.putText(frame, "using regress", (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (255, 0, 255), 2)
        #cv2.putText(frame, "Objects being detected: {}".format(count), (5, 350), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 0, 255), 2)
        cv2.putText(frame, "frame# {}".format(frame_num), (750, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (255, 0, 255), 2)


        # update tracks
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            class_name = track.get_class()
            if 'entrance' not in classes:
                if len(classes)>1:
                    if(contains_duplicates(classes)==False):
                        #color = (50, 89, 170)
                        check_rect=0
                        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
                        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        ########## set sticker as low priority#############
                        if ((classes[0]=='mat' or 'sensor') and (classes[1]=='mat' or 'sensor')):
                            print("*************NO STK**********************************")
                            color = (50, 89, 170)
                            x1,y1,x2,y2=convert2(width,height,int(boxs[0][0]),int(boxs[0][1]),int(boxs[0][0]+boxs[0][2]),int(boxs[0][1]+boxs[0][3]))#xywh to xmin ymin xmax ymax
                            x3,y3,x4,y4=convert2(width,height,int(bboxes[1][0]),int(bboxes[1][1]),int(bboxes[1][0]+bboxes[1][2]),int(bboxes[1][1]+bboxes[1][3]))#xywh to xmin ymin xmax ymax
                            reg_input=np.array([[class_index(classes[0]),x1,y1,x2,y2,class_index(classes[1]),x3,y3,x4,y4]])
                            predictions = loaded_model.predict(reg_input)
                            a1_pred = predictions[0]
                            b1_pred = predictions[1]
                            c1_pred = predictions[2]
                            d1_pred = predictions[3]
                            xmin,xmax,ymin,ymax=convert(width,height,a1_pred,b1_pred,c1_pred,d1_pred)
                            start_point = (xmin, ymin)
                            end_point = (xmax, ymax)
                            rect1=xmax-xmin
                            rect2=ymax-ymin
                            check_rect=rect2/rect1
                        ################ else condition for sticker ######
                        else:
                            print("*************USE STK**********************************")
                            if ((classes[0]=='famSticker' or 'okmartSticker' or 'sevenSticker')):
                                color = (60, 120, 40)
                                x1,y1,x2,y2=convert2(width,height,int(boxs[0][0]),int(boxs[0][1]),int(boxs[0][0]+boxs[0][2]),int(boxs[0][1]+boxs[0][3]))#xywh to xmin ymin xmax ymax
                                x3,y3,x4,y4=convert2(width,height,int(bboxes[1][0]),int(bboxes[1][1]),int(bboxes[1][0]+bboxes[1][2]),int(bboxes[1][1]+bboxes[1][3]))#xywh to xmin ymin xmax ymax
                                reg_input=np.array([[class_index(classes[0]),x1,y1,x2,y2,class_index(classes[1]),x3,y3,x4,y4]])


                           #### rario ####
                                C1_x=boxs[0][0]+(boxs[0][2]/2)
                                C1_y=boxs[0][1]+(boxs[0][3]/2)
                                C2_x=bboxes[1][0]+(bboxes[1][2]/2)
                                C2_y=bboxes[1][1]+(bboxes[1][3]]/2)
                                Dx = (C2_x - C1_x);
    	                        Dy = (C2_y - C1_y);
                            #The two rectangles do not intersect, and there are two rectangles partially overlapping in the X-axis direction. The minimum distance is the distance between the lower line of the upper rectangle and the upper line of the lower rectangle
                                if((Dx < ((int(boxs[0][0]+boxs[0][2]) + int(bboxes[1][0]+bboxes[1][2]))/ 2)) && (Dy >= ((int(boxs[0][1]+boxs[0][3]) + rint(bboxes[1][1]+bboxes[1][3])) / 2))):
                                    min_dist = Dy - ((int(boxs[0][1]+boxs[0][3]) + int(bboxes[1][1]+bboxes[1][3])) / 2);

                        	 #The two rectangles do not intersect. There are two partially overlapping rectangles in the Y-axis direction. The minimum distance is the distance between the right line of the left rectangle and the left line of the right rectangle
                                elif((Dx >= ((int(boxs[0][0]+boxs[0][2]) + int(bboxes[1][0]+bboxes[1][2]))/ 2)) && (Dy < ((int(boxs[0][1]+boxs[0][3]) + int(bboxes[1][1]+bboxes[1][3])) / 2))):
                                    min_dist = Dx - ((int(boxs[0][0]+boxs[0][2]) + int(bboxes[1][0]+bboxes[1][2]))/ 2);


                        	 #Two rectangles do not intersect, two rectangles that do not overlap in the X-axis and Y-axis directions, the minimum distance is the distance between the two closest vertices,
                        	 # Using the Pythagorean theorem, it is easy to calculate this distance
                                elif((Dx >= ((int(boxs[0][0]+boxs[0][2]) + int(bboxes[1][0]+bboxes[1][2]))/ 2)) && (Dy >= ((int(boxs[0][1]+boxs[0][3]) + int(bboxes[1][1]+bboxes[1][3])) / 2))):
                                    int delta_x = Dx - ((int(boxs[0][0]+boxs[0][2]) + int(bboxes[1][0]+bboxes[1][2]))/ 2);
                                    int delta_y = Dy - ((int(boxs[0][1]+boxs[0][3]) + int(bboxes[1][1]+bboxes[1][3]))/ 2);
                                    min_dist = sqrt(delta_x * delta_x  + delta_y * delta_y);
                        	 #The intersection of two rectangles, the minimum distance is negative, return -1
                                else:
                        		          min_dist = -1;
                                if(classes[1]=='mat'):
                                    if((min_dist/Dy)<3):
                                        predictions = loaded_model.predict(reg_input)
                                        a1_pred = predictions[0]
                                        b1_pred = predictions[1]
                                        c1_pred = predictions[2]
                                        d1_pred = predictions[3]
                                        xmin,xmax,ymin,ymax=convert(width,height,a1_pred,b1_pred,c1_pred,d1_pred)
                                        start_point = (xmin, ymin)
                                        end_point = (xmax, ymax)
                                        rect1=xmax-xmin
                                        rect2=ymax-ymin
                                        check_rect=rect2/rect1
                                    else:
                                        print("not predict")
                                elif(classes[1]=='sensor'):
                                    if((min_dist/Dx)<3):
                                        predictions = loaded_model.predict(reg_input)
                                        a1_pred = predictions[0]
                                        b1_pred = predictions[1]
                                        c1_pred = predictions[2]
                                        d1_pred = predictions[3]
                                        xmin,xmax,ymin,ymax=convert(width,height,a1_pred,b1_pred,c1_pred,d1_pred)
                                        start_point = (xmin, ymin)
                                        end_point = (xmax, ymax)
                                        rect1=xmax-xmin
                                        rect2=ymax-ymin
                                        check_rect=rect2/rect1
                                    else:
                                        print("not predict")
                            elif((classes[1]=='famSticker' or 'okmartSticker' or 'sevenSticker')):
                                color = (60, 120, 40)
                                x1,y1,x2,y2=convert2(width,height,int(boxs[0][0]),int(boxs[0][1]),int(boxs[0][0]+boxs[0][2]),int(boxs[0][1]+boxs[0][3]))#xywh to xmin ymin xmax ymax
                                x3,y3,x4,y4=convert2(width,height,int(bboxes[1][0]),int(bboxes[1][1]),int(bboxes[1][0]+bboxes[1][2]),int(bboxes[1][1]+bboxes[1][3]))#xywh to xmin ymin xmax ymax
                                reg_input=np.array([[class_index(classes[0]),x1,y1,x2,y2,class_index(classes[1]),x3,y3,x4,y4]])


                           #### rario ####
                                C1_x=boxs[0][0]+(boxs[0][2]/2)
                                C1_y=boxs[0][1]+(boxs[0][3]/2)
                                C2_x=bboxes[1][0]+(bboxes[1][2]/2)
                                C2_y=bboxes[1][1]+(bboxes[1][3]/2)
                                Dx = (C2_x - C1_x)
    	                        Dy = (C2_y - C1_y)
                                #The two rectangles do not intersect, and there are two rectangles partially overlapping in the X-axis direction. The minimum distance is the distance between the lower line of the upper rectangle and the upper line of the lower rectangle
                                    if((Dx < ((int(boxs[0][0]+boxs[0][2]) + int(bboxes[1][0]+bboxes[1][2]))/ 2)) && (Dy >= ((int(boxs[0][1]+boxs[0][3]) + rint(bboxes[1][1]+bboxes[1][3])) / 2))):
                                        min_dist = Dy - ((int(boxs[0][1]+boxs[0][3]) + int(bboxes[1][1]+bboxes[1][3])) / 2)

                            	 #The two rectangles do not intersect. There are two partially overlapping rectangles in the Y-axis direction. The minimum distance is the distance between the right line of the left rectangle and the left line of the right rectangle
                                    elif((Dx >= ((int(boxs[0][0]+boxs[0][2]) + int(bboxes[1][0]+bboxes[1][2]))/ 2)) && (Dy < ((int(boxs[0][1]+boxs[0][3]) + int(bboxes[1][1]+bboxes[1][3])) / 2))):
                                        min_dist = Dx - ((int(boxs[0][0]+boxs[0][2]) + int(bboxes[1][0]+bboxes[1][2]))/ 2)


                            	 #Two rectangles do not intersect, two rectangles that do not overlap in the X-axis and Y-axis directions, the minimum distance is the distance between the two closest vertices,
                            	 # Using the Pythagorean theorem, it is easy to calculate this distance
                                    elif((Dx >= ((int(boxs[0][0]+boxs[0][2]) + int(bboxes[1][0]+bboxes[1][2]))/ 2)) && (Dy >= ((int(boxs[0][1]+boxs[0][3]) + int(bboxes[1][1]+bboxes[1][3])) / 2))):
                                        int delta_x = Dx - ((int(boxs[0][0]+boxs[0][2]) + int(bboxes[1][0]+bboxes[1][2]))/ 2)
                                        int delta_y = Dy - ((int(boxs[0][1]+boxs[0][3]) + int(bboxes[1][1]+bboxes[1][3]))/ 2)
                                        min_dist = sqrt(delta_x * delta_x  + delta_y * delta_y)
                            	 #The intersection of two rectangles, the minimum distance is negative, return -1
                                    else:
                            		          min_dist = -1
                                    if(classes[0]=='mat'):
                                        if((min_dist/Dy)<3):
                                            predictions = loaded_model.predict(reg_input)
                                            a1_pred = predictions[0]
                                            b1_pred = predictions[1]
                                            c1_pred = predictions[2]
                                            d1_pred = predictions[3]
                                            xmin,xmax,ymin,ymax=convert(width,height,a1_pred,b1_pred,c1_pred,d1_pred)
                                            start_point = (xmin, ymin)
                                            end_point = (xmax, ymax)
                                            rect1=xmax-xmin
                                            rect2=ymax-ymin
                                            check_rect=rect2/rect1
                                        else:
                                            print("not predict")
                                    elif(classes[0]=='sensor'):
                                        if((min_dist/Dx)<3):
                                            predictions = loaded_model.predict(reg_input)
                                            a1_pred = predictions[0]
                                            b1_pred = predictions[1]
                                            c1_pred = predictions[2]
                                            d1_pred = predictions[3]
                                            xmin,xmax,ymin,ymax=convert(width,height,a1_pred,b1_pred,c1_pred,d1_pred)
                                            start_point = (xmin, ymin)
                                            end_point = (xmax, ymax)
                                            rect1=xmax-xmin
                                            rect2=ymax-ymin
                                            check_rect=rect2/rect1
                                        else:
                                            print("not predict")


                      ##############



                        ##########################################
                        ######## check door size and display #########if check_rect>1 and frame_num !=104:

                        print("check_rect:{}".format(check_rect))
                        if check_rect>1 :
                            blk = np.zeros(frame.shape, np.uint8)
                            cv2.rectangle(blk, start_point, end_point, color, cv2.FILLED)
                            frame =cv2.addWeighted(frame, 1.0, blk, 0.5, 1)
                            print("predict_BBox Coords (xmin, ymin, xmax, ymax): {}".format((xmin,ymin,xmax,ymax)))
                        else:
                            print("not show predicted bbox")
                        ###############################
            ########
            #      select one entrace
            ########
            #if classes.count('entrance')>1:
            #    entrance_num=[]
            #    iou_list=[]
            #    iou_check=[]
            #    for i in range(len(classes)):
            #        if classes[i]=='entrance'
            #        entrance_num.append(i)
            #        if len(classes)>1:
            #            if(contains_duplicates(classes)==False):
            #                color = (50, 89, 170)
            #                width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
            #                height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
            #                x1,y1,x2,y2=convert2(width,height,int(boxs[0][0]),int(boxs[0][1]),int(boxs[0][0]+boxs[0][2]),int(boxs[0][1]+boxs[0][3]))#xywh to xmin ymin xmax ymax
            #                x3,y3,x4,y4=convert2(width,height,int(bboxes[1][0]),int(bboxes[1][1]),int(bboxes[1][0]+bboxes[1][2]),int(bboxes[1][1]+bboxes[1][3]))#xywh to xmin ymin xmax ymax
            #                reg_input=np.array([[class_index(classes[0]),x1,y1,x2,y2,class_index(classes[1]),x3,y3,x4,y4]])
            #                predictions = loaded_model.predict(reg_input)
            #                a1_pred = predictions[0]
            #                b1_pred = predictions[1]
            #                c1_pred = predictions[2]
            #                d1_pred = predictions[3]
            #                xmin,xmax,ymin,ymax=convert(width,height,a1_pred,b1_pred,c1_pred,d1_pred)
            #                ###IOU###
            #                GT_bbox_area = (xmax -  xmin + 1) * (  ymax -ymin + 1)
            #                ###########
            #                ##check entrace##
            #                Pred_bbox_area =(x_bottomright_p - x_topleft_p + 1 ) * ( y_bottomright_p -y_topleft_p + 1)
            #                x_top_left =np.max([x_topleft_gt, x_topleft_p])
            #                y_top_left = np.max([y_topleft_gt, y_topleft_p])
            #                x_bottom_right = np.min([x_bottomright_gt, x_bottomright_p])
            #                y_bottom_right = np.min([y_bottomright_gt, y_bottomright_p])
            #
            #                intersection_area = (x_bottom_right- x_top_left + 1) * (y_bottom_right-y_top_left  + 1)
            #
            #                union_area = (GT_bbox_area + Pred_bbox_area - intersection_area)
            #
            #                iou_check.append(intersection_area/union_area)
            #
            #        for j in len(iou_check):
            #           if entrance_num[j]<iou_check.max:
            #               track.delete
            #if(int(track.track_id)>=3 or (int(track.track_id)>10 and int(track.track_id)<20 ) ):
            #frame_num
            ###################### draw bbox on screen
            color = colors[int(track.track_id) % len(colors)]
            color = [i * 255 for i in color]

            if(class_name=='entrance'):
                if( int(track.track_id)==1 and frame_num>121):
                    print("skip Tracker ID: {}, Class: {}".format(str(track.track_id), class_name))
                else:
                    print("RED Tracker ID: {}, Class: {}".format(str(track.track_id), class_name))
                    blk = np.zeros(frame.shape, np.uint8)
                    cv2.rectangle(blk,(int(bbox[0]*1.05), int(bbox[1]*1.05)), (int(bbox[2]*0.95), int(bbox[3]*0.95)), (255, 0, 0), cv2.FILLED)
                    frame =cv2.addWeighted(frame, 1.0, blk, 0.5, 1)
                    cv2.rectangle(frame, (int(bbox[0]*1.05), int(bbox[1]*1.05)), (int(bbox[2]*0.95), int(bbox[3]*0.95)), color, 2)
                    cv2.rectangle(frame, (int(bbox[0]*1.05), int(bbox[1]*1.05-30)), (int(bbox[0]*1.05)+(len(class_name)+len(str(track.track_id)))*17, int(bbox[1]*1.05)), color, -1)
                    cv2.putText(frame, class_name + "-" + str(track.track_id),(int(bbox[0]*1.05), int(bbox[1]*1.05-10)),0, 0.75, (255,255,255),2)


        # if enable info flag then print details about each track
            if FLAGS.info:
                print("Tracker ID: {}, Class: {},  BBox Coords (xmin, ymin, xmax, ymax): {}".format(str(track.track_id), class_name, (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]))))

        # calculate frames per second of running detections
        fps = 1.0 / (time.time() - start_time)
        print("FPS: %.2f" % fps)
        avg.append(fps)
        print("avg fps {}".format(statistics.mean(avg)))
        cv2.putText(frame, "FPS: %.2f" % fps, (50, 500), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (66, 245, 141), 2)
        result = np.asarray(frame)
        result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

        if not FLAGS.dont_show:
            cv2.imshow("Output Video", result)

        # if output flag is set, save video file
        if FLAGS.output:
            out.write(result)
        if cv2.waitKey(1) & 0xFF == ord('q'): break
コード例 #9
0
def deepsort(yolo, args):
    #nms_max_overlap = 0.3 #nms threshold

    images_input = True if os.path.isdir(args.input) else False
    if images_input:
        # get images list
        jpeg_files = glob.glob(os.path.join(args.input, '*.jpeg'))
        jpg_files = glob.glob(os.path.join(args.input, '*.jpg'))
        frame_capture = jpeg_files + jpg_files
        frame_capture.sort()
    else:
        # create video capture stream
        frame_capture = cv2.VideoCapture(0 if args.input ==
                                         '0' else args.input)
        if not frame_capture.isOpened():
            raise IOError("Couldn't open webcam or video")

    # create video save stream if needed
    save_output = True if args.output != "" else False
    if save_output:
        if images_input:
            raise IOError("image folder input could be saved to video file")

        # here we encode the video to MPEG-4 for better compatibility, you can use ffmpeg later
        # to convert it to x264 to reduce file size:
        # ffmpeg -i test.mp4 -vcodec libx264 -f mp4 test_264.mp4
        #
        #video_FourCC    = cv2.VideoWriter_fourcc(*'XVID') if args.input == '0' else int(frame_capture.get(cv2.CAP_PROP_FOURCC))
        video_FourCC = cv2.VideoWriter_fourcc(
            *'XVID') if args.input == '0' else cv2.VideoWriter_fourcc(*"mp4v")
        video_fps = frame_capture.get(cv2.CAP_PROP_FPS)
        video_size = (int(frame_capture.get(cv2.CAP_PROP_FRAME_WIDTH)),
                      int(frame_capture.get(cv2.CAP_PROP_FRAME_HEIGHT)))
        out = cv2.VideoWriter(args.output, video_FourCC,
                              (5. if args.input == '0' else video_fps),
                              video_size)

    if args.tracking_classes_path:
        # load the object classes used in tracking if have, other class
        # from detector will be ignored
        tracking_class_names = get_classes(args.tracking_classes_path)
    else:
        tracking_class_names = None

    #create deep_sort box encoder
    encoder = create_box_encoder(args.deepsort_model_path, batch_size=1)

    #create deep_sort tracker
    max_cosine_distance = 0.5  #threshold for cosine distance
    nn_budget = None
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    # alloc a set of queues to record motion trace
    # for each track id
    motion_traces = [deque(maxlen=30) for _ in range(9999)]
    total_obj_counter = []

    # initialize a list of colors to represent each possible class label
    np.random.seed(100)
    COLORS = np.random.randint(0, 255, size=(200, 3), dtype="uint8")

    i = 0
    fps = 0.0
    while True:

        def get_frame():
            # get frame from video or image folder
            if images_input:
                if i >= len(frame_capture):
                    ret = False
                    frame = None
                else:
                    ret = True
                    image_file = frame_capture[i]
                    frame = cv2.imread(image_file)
            else:
                ret, frame = frame_capture.read()

            return ret, frame

        ret, frame = get_frame()
        if ret != True:
            break
        #time.sleep(0.2)
        i += 1

        start_time = time.time()
        image = Image.fromarray(frame[..., ::-1])  # bgr to rgb

        # detect object from image
        _, out_boxes, out_classnames, out_scores = yolo.detect_image(image)
        # get tracking objects and convert bbox from (xmin,ymin,xmax,ymax) to (x,y,w,h)
        boxes, class_names, scores = get_tracking_object(
            out_boxes, out_classnames, out_scores, tracking_class_names)

        # get encoded features of bbox area image
        features = encoder(frame, boxes)

        # form up detection records
        detections = [
            Detection(bbox, score, feature, class_name)
            for bbox, score, class_name, feature in zip(
                boxes, scores, class_names, features)
        ]

        # Run non-maximum suppression.
        #nms_boxes = np.array([d.tlwh for d in detections])
        #nms_scores = np.array([d.confidence for d in detections])
        #indices = preprocessing.non_max_suppression(nms_boxes, nms_max_overlap, nms_scores)
        #detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        # show all detection result as white box
        for det in detections:
            bbox = det.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
            cv2.putText(frame, str(det.class_name),
                        (int(bbox[0]), int(bbox[1] - 20)), 0, 5e-3 * 150,
                        (255, 255, 255), 2)

        track_indexes = []
        track_count = 0
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue

            # record tracking info and get bbox
            track_indexes.append(int(track.track_id))
            total_obj_counter.append(int(track.track_id))
            bbox = track.to_tlbr()

            # show all tracking result as color box
            color = [
                int(c)
                for c in COLORS[track_indexes[track_count] % len(COLORS)]
            ]
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (color), 3)
            cv2.putText(frame, str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 20)), 0, 5e-3 * 150,
                        (color), 2)
            if track.class_name:
                cv2.putText(frame, str(track.class_name),
                            (int(bbox[0] + 30), int(bbox[1] - 20)), 0,
                            5e-3 * 150, (color), 2)

            track_count += 1

            # get center point (x,y) of current track bbox and record in queue
            center = (int(
                ((bbox[0]) + (bbox[2])) / 2), int(((bbox[1]) + (bbox[3])) / 2))
            motion_traces[track.track_id].append(center)

            # draw current center point
            thickness = 5
            cv2.circle(frame, (center), 1, color, thickness)
            #draw motion trace
            motion_trace = motion_traces[track.track_id]
            for j in range(1, len(motion_trace)):
                if motion_trace[j - 1] is None or motion_trace[j] is None:
                    continue
                thickness = int(np.sqrt(64 / float(j + 1)) * 2)
                cv2.line(frame, (motion_trace[j - 1]), (motion_trace[j]),
                         (color), thickness)

        # show tracking statistics
        total_obj_num = len(set(total_obj_counter))
        cv2.putText(frame, "Total Object Counter: " + str(total_obj_num),
                    (int(20), int(120)), 0, 5e-3 * 200, (0, 255, 0), 2)
        cv2.putText(frame, "Current Object Counter: " + str(track_count),
                    (int(20), int(80)), 0, 5e-3 * 200, (0, 255, 0), 2)
        cv2.putText(frame, "FPS: %f" % (fps), (int(20), int(40)), 0,
                    5e-3 * 200, (0, 255, 0), 3)

        # refresh window
        cv2.namedWindow("DeepSORT", 0)
        cv2.resizeWindow('DeepSORT', 1024, 768)
        cv2.imshow('DeepSORT', frame)

        if save_output:
            #save a frame
            out.write(frame)

        end_time = time.time()
        fps = (fps + (1. / (end_time - start_time))) / 2
        # Press q to stop video
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # Release everything if job is finished
    if not images_input:
        frame_capture.release()
    if save_output:
        out.release()
    cv2.destroyAllWindows()
コード例 #10
0
def Object_tracking(Yolo,
                    video_path,
                    output_path,
                    class_names,
                    image_size=416,
                    show=False,
                    rectangle_colors=''):
    # Definition of the parameters
    max_cosine_distance = 0.7
    nn_budget = None

    # initialize deep sort object
    model_filename = 'models/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    if video_path:
        vid = cv2.VideoCapture(video_path)  # detect on video
    else:
        vid = cv2.VideoCapture(0)  # detect from webcam
    width, height, fps = get_video_capture_info(vid)
    codec = cv2.VideoWriter_fourcc(*'XVID')
    # output_path must be .mp4
    out = cv2.VideoWriter(output_path, codec, fps, (width, height))

    key_list = list(class_names.keys())
    val_list = list(class_names.values())

    detection_times, tracking_times = [], []
    _, frame = vid.read()  # BGR

    while frame is not None:
        # create the original_frame for display purposes (draw_bboxes)
        original_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        original_frame = cv2.cvtColor(original_frame, cv2.COLOR_BGR2RGB)
        # preprocessing found in datasets.py
        img = preprocess_image(frame, image_size)

        t1 = time.time()
        boxes, class_inds, scores = yolo_predict(yolo, img, frame)
        t2 = time.time()
        names = []
        for clss in class_inds:
            names.append(class_names[clss])
        features = np.array(encoder(original_frame, boxes))
        # Pass detections to the deepsort object and obtain the track information.
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                boxes, scores, names, features)
        ]
        tracker.predict()
        tracker.update(detections)

        # Obtain info from the tracks
        tracked_bboxes = get_tracker_info(tracker, val_list, key_list)

        # update the times information
        t3 = time.time()
        detection_times.append(t2 - t1)
        tracking_times.append(t3 - t1)
        detection_times = detection_times[-20:]
        tracking_times = tracking_times[-20:]

        ms, fps, fps2 = efficiency_statistics(detection_times, tracking_times)

        # draw detection on frame
        image = draw_bbox(original_frame,
                          tracked_bboxes,
                          class_names,
                          tracking=True,
                          rectangle_colors=rectangle_colors)
        image = cv2.putText(image, "Time: {:.1f} FPS".format(fps), (0, 30),
                            cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)

        # get next frame
        _, frame = vid.read()  # BGR

        # show and store the results
        print(
            "Time: {:.2f}ms, Detection FPS: {:.1f}, total FPS: {:.1f}".format(
                ms, fps, fps2))
        if output_path != '':
            out.write(image)
        if show:
            cv2.imshow('output', image)
            if cv2.waitKey(25) & 0xFF == ord("q"):
                cv2.destroyAllWindows()
                break

    cv2.destroyAllWindows()
コード例 #11
0
def main(yolo):

    start = time.time()
    #Definition of the parameters
    max_cosine_distance = 0.5  #0.9 余弦距离的控制阈值
    nn_budget = None
    nms_max_overlap = 0.3  #非极大抑制的阈值
    vio_counter = 0
    counter = []
    #frame counting

    #deep_sort
    model_filename = 'model_data/market1501.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    writeVideo_flag = True

    video_capture = cv2.VideoCapture(args["input"])

    video_capture.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
    video_capture.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
    original_fps = video_capture.get(cv2.CAP_PROP_FPS)

    output_size = (200, 200)
    fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
    out2 = cv2.VideoWriter('%s_output.mp4' % (args["input"].split('.')[0]),
                           fourcc, original_fps, output_size)
    if writeVideo_flag:

        # Define the codec and create VideoWriter object
        w = int(video_capture.get(3))
        h = int(video_capture.get(4))
        #fourcc = cv2.VideoWriter_fourcc(*'MJPG')
        out = cv2.VideoWriter(
            './output/' + args["input"][43:57] + "_" + args["class"] +
            '_output.mp4', fourcc, original_fps, (w, h))
        list_file = open('detection.txt', 'w')
        frame_index = -1

    fps = 0.0

    first, mouse_frame = video_capture.read()
    cv2.namedWindow('DrawLine')
    cv2.resizeWindow('DrawLine', 1280, 720)

    while True:
        cv2.setMouseCallback('DrawLine', draw_line)

        cv2.imshow('DrawLine', mouse_frame)

        if cv2.waitKey(0) == ord('c'):
            break
    cv2.destroyAllWindows()
    ##
    first, mouse_frame = video_capture.read()
    cv2.namedWindow('DrawALine')
    cv2.resizeWindow('DrawALine', 1280, 720)

    while True:
        cv2.setMouseCallback('DrawALine', draw_Aline)

        cv2.imshow('DrawALine', mouse_frame)

        if cv2.waitKey(0) == ord('a'):
            break
    cv2.destroyAllWindows()
    ##
    first, mouse_frame = video_capture.read()
    cv2.namedWindow('DrawBLine')
    cv2.resizeWindow('DrawBLine', 1280, 720)

    while True:
        cv2.setMouseCallback('DrawBLine', draw_Bline)

        cv2.imshow('DrawBLine', mouse_frame)

        if cv2.waitKey(0) == ord('b'):
            break
    cv2.destroyAllWindows()

    while True:

        ret, frame = video_capture.read()
        if ret != True:
            break

        t1 = time.time()

        image = Image.fromarray(frame[..., ::-1])  #bgr to rgb
        boxs, class_names = yolo.detect_image(image)
        features = encoder(frame, boxs)
        # score to 1.0 here).
        detections = [
            Detection(bbox, 1.0, feature)
            for bbox, feature in zip(boxs, features)
        ]
        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        i = int(0)
        indexIDs = []
        c = []

        for det in detections:
            bbox = det.to_tlbr()

        for track in tracker.tracks:

            if not track.is_confirmed() or track.time_since_update > 1:
                continue

            # 상자그리기
            indexIDs.append(int(track.track_id))
            counter.append(int(track.track_id))
            bbox = track.to_tlbr()

            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (0, 255, 0), 2)
            cv2.putText(frame, str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 50)), 0, 5e-3 * 100,
                        (0, 255, 0), 2)
            if len(class_names) > 0:
                class_name = class_names[0]
                cv2.putText(frame, str(class_names[0]),
                            (int(bbox[0]), int(bbox[1] - 20)), 0, 5e-3 * 100,
                            (0, 255, 0), 2)

            i += 1
            # bbox_center_point(x,y)
            center = (int(
                ((bbox[0]) + (bbox[2])) / 2), int(((bbox[1]) + (bbox[3])) / 2))
            # track_id[center]
            pts[track.track_id].append(center)
            bts[track.track_id].append(center)
            thickness = 2
            # center point
            cv2.circle(frame, (center), 1, (0, 255, 0), 2)

            #intersect A line
            for j in range(1, len(pts[track.track_id])):
                if pts[track.track_id][0] is None or pts[
                        track.track_id][1] is None:
                    continue
                thickness = int(np.sqrt(64 / float(j + 1)) * 2)
                cv2.line(frame, (pts[track.track_id][j - 1]),
                         (pts[track.track_id][j]), (255, 0, 0), 2)
                if intersect(pts[track.track_id][j - 1],
                             pts[track.track_id][j], line[0], line[1]):
                    violation_id[track.track_id] = True
                #if intersect(bts[track.track_id][0], bts[track.track_id][1], A_line[0], A_line[1]):
                #frame_count[track.track_id] = frame_index

                #if intersect(bts[track.track_id][0], bts[track.track_id][1], B_line[0], B_line[1]):
                #if frame_index == frame_count[track.track_id]:
                #continue
                #speed[track.track_id]=324./(frame_index-frame_count[track.track_id])
                #print(str(speed[track.track_id])+"km/h id:"+str(track.track_id))
                #if speed[track.track_id] >20:
                #highspeed.append(speed[track.track_id])
            #this is for speed meter
            for j in range(1, len(bts[track.track_id])):
                if bts[track.track_id][0] is None or bts[
                        track.track_id][1] is None:
                    continue
                if intersect(bts[track.track_id][0], bts[track.track_id][1],
                             A_line[0], A_line[1]):
                    frame_count[track.track_id] = frame_index

                if intersect(bts[track.track_id][0], bts[track.track_id][1],
                             B_line[0], B_line[1]):
                    if frame_index == frame_count[track.track_id]:
                        continue
                    speed[track.track_id] = 324. / (
                        frame_index - frame_count[track.track_id])
                    print(
                        str(speed[track.track_id]) + "km/h id:" +
                        str(track.track_id))
                    if speed[track.track_id] > 20:
                        highspeed.append(speed[track.track_id])

            if violation_id[track.track_id] == True:

                indexIDs.append(int(track.track_id))
                counter.append(int(track.track_id))
                bbox = track.to_tlbr()

                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                              (int(bbox[2]), int(bbox[3])), (0, 0, 255), 2)
                cv2.line(frame, (int(bbox[0]), int(bbox[1])),
                         (int(bbox[2]), int(bbox[3])), (0, 0, 255), 2)
                cv2.line(frame, (int(bbox[0]), int(bbox[3])),
                         (int(bbox[2]), int(bbox[1])), (0, 0, 255), 2)
                cv2.putText(frame,
                            str(track.track_id) + "offender",
                            (int(bbox[0]), int(bbox[1] - 50)), 0, 5e-3 * 100,
                            (0, 0, 255), 2)
                if len(class_names) > 0:
                    class_name = class_names[0]
                    cv2.putText(frame, str(class_names[0]),
                                (int(bbox[0]), int(bbox[1] - 20)), 0,
                                5e-3 * 100, (0, 255, 255), 2)

                i += 1
                # bbox_center_point(x,y)
                center = (int(((bbox[0]) + (bbox[2])) / 2),
                          int(((bbox[1]) + (bbox[3])) / 2))
                # track_id[center]
                pts[track.track_id].append(center)
                thickness = 2
                result_top = int(center[1] - output_size[1] / 2)
                result_bottom = int(center[1] + output_size[1] / 2)
                result_left = int(center[0] - output_size[0] / 2)
                result_right = int(center[0] + output_size[0] / 2)
                if result_top > 0 and result_bottom > 0 and result_left > 0 and result_right > 0:
                    result_img = frame[result_top:result_bottom,
                                       result_left:result_right].copy()
                    out2.write(result_img)
                    cv2.imshow('result_img', result_img)

                # center point
                #cv2.circle(frame, (center), 1, (20,20,20), 1)
                #cv2.circle(frame, (center), 1, (20, 20, 20), thickness)

        count = len(set(counter))

        vio_counter = violation_id.count(True)
        cv2.line(frame, line[0], line[1], (0, 255, 255), 2)
        cv2.line(frame, A_line[0], A_line[1], (0, 255, 255), 2)
        cv2.line(frame, B_line[0], B_line[1], (0, 255, 255), 2)
        #cv2.line(frame, line[0], line[1], (0, 255, 255), 1)
        cv2.putText(
            frame,
            "Speed meter:" + str(round(highspeed[len(highspeed) - 1], 2)) +
            "km/h id:" + str(track.track_id), (int(20), int(180)), 0,
            5e-3 * 120, (0, 0, 255), 2)
        cv2.putText(frame, "Violated Counter: " + str(vio_counter),
                    (int(20), int(150)), 0, 5e-3 * 120, (0, 0, 255), 2)
        cv2.putText(frame, "Total Object Counter: " + str(count),
                    (int(20), int(120)), 0, 5e-3 * 120, (0, 255, 0), 2)
        cv2.putText(frame, "Current Object Counter: " + str(i),
                    (int(20), int(80)), 0, 5e-3 * 120, (0, 255, 0), 2)
        cv2.putText(frame, "FPS: %f" % (fps), (int(20), int(40)), 0,
                    5e-3 * 100, (0, 255, 0), 2)
        #cv2.putText(frame, "Violated Counter: " + str(vio_counter), (int(20), int(150)),0, 5e-3 * 100, (0, 0 ,255),1)
        #cv2.putText(frame, "Total Object Counter: "+str(count),(int(20), int(120)),0, 5e-3 * 100, (0,255,0),1)
        #cv2.putText(frame, "Current Object Counter: "+str(i),(int(20), int(80)),0, 5e-3 * 100, (0,255,0),1)
        #cv2.putText(frame, "FPS: %f"%(fps),(int(20), int(40)),0, 5e-3 * 100, (0,255,0),1)
        cv2.namedWindow("YOLO3_Deep_SORT", 0)
        cv2.resizeWindow('YOLO3_Deep_SORT', 1280, 720)
        cv2.imshow('YOLO3_Deep_SORT', frame)

        if writeVideo_flag:
            #save a frame
            out.write(frame)
            frame_index = frame_index + 1
            list_file.write(str(frame_index) + ' ')
            if len(boxs) != 0:
                for i in range(0, len(boxs)):
                    list_file.write(
                        str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' +
                        str(boxs[i][2]) + ' ' + str(boxs[i][3]) + ' ')
            list_file.write('\n')
        fps = (fps + (1. / (time.time() - t1))) / 2
        #fpss  = 1./(time.time()-t1)
        #print(set(counter))

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            #video_capture.stop()
            break
    print(" ")
    print("[Finish]")
    end = time.time()

    if len(pts[track.track_id]) != None:
        print(args["input"][43:57] + ": " + str(count) + " " +
              str(class_name) + ' Found')

    else:
        print("[No Found]")

    video_capture.release()

    if writeVideo_flag:
        #video_capture.stop()
        out.release()
        list_file.close()
    cv2.destroyAllWindows()
コード例 #12
0
def process_frame():
    # Definition of the parameters

    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0

    # deep_sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    writeVideo_flag = False

    #video_capture = cv2.VideoCapture(0)
    producer = KafkaProducer(
        bootstrap_servers='master:6667',
        value_serializer=lambda m: json.dumps(m).encode('utf8'))
    consumer = KafkaConsumer('test', bootstrap_servers=['master:6667'])
    for msg in consumer:
        json_from_consumer = json.loads(msg[-6])

        decoded = base64.b64decode(json_from_consumer['image'])
        filename = '/home/haohsiang/Vigilancia-Distributed/codev1frame.jpg'  # I assume you have a way of picking unique filenames
        with open(filename, 'wb') as f:
            f.write(decoded)
        frame = cv2.imread(filename)
        #ret, frame = video_capture.read()  # frame shape 640*480*3
        #if ret != True:
        #    break
        t1 = time.time()

        # image = Image.fromarray(frame)
        image = Image.fromarray(frame[..., ::-1])  #bgr to rgb
        boxs = yolo.detect_image(image)
        print("box_num", len(boxs))
        features = encoder(frame, boxs)

        # score to 1.0 here).
        detections = [
            Detection(bbox, 1.0, feature)
            for bbox, feature in zip(boxs, features)
        ]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
            cv2.putText(frame, str(track.track_id),
                        (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200,
                        (0, 255, 0), 2)
            print(
                str(track.track_id) + ' :' + str(bbox[0]) + ' ' +
                str(bbox[1]) + ' ' + str(bbox[2]) + ' ' + str(bbox[3]))
            print(dt.datetime.now().time())
            result = {
                'ID': str(track.track_id),
                'timestamp': dt.datetime.now().isoformat(),
                'location_x': str(bbox[0]),
                'w': str(bbox[2])
            }
            producer.send('resultstream', result)
            time.sleep(0.3)
        for det in detections:
            bbox = det.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)

        cv2.imshow('', frame)

        if writeVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1
            list_file.write(str(frame_index) + ' ')
            if len(boxs) != 0:
                for i in range(0, len(boxs)):
                    list_file.write(
                        str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' +
                        str(boxs[i][2]) + ' ' + str(boxs[i][3]) + ' ')
            list_file.write('\n')
        fps = 0.0
        fps = (fps + (1. / (time.time() - t1))) / 2
        print("fps= %f" % (fps))

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
コード例 #13
0
ファイル: demo.py プロジェクト: alievilya/yolov4-doorman
def main(yolo):
    # Definition of the parameters
    max_cosine_distance = 0.2
    nn_budget = None
    nms_max_overlap = 1.0

    output_format = 'mp4'
    video_name = 'bus4_2in_4out.mp4'
    file_path = join('data_files/videos', video_name)
    output_name = 'save_data/out_' + video_name[0:-3] + output_format
    initialize_door_by_yourself = False
    door_array = None
    # Deep SORT
    model_filename = '../model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    show_detections = True
    writeVideo_flag = True
    asyncVideo_flag = False

    counter = Counter(counter_in=0, counter_out=0, track_id=0)

    if asyncVideo_flag:
        video_capture = VideoCaptureAsync(file_path)
    else:
        video_capture = cv2.VideoCapture(file_path)

    if asyncVideo_flag:
        video_capture.start()

    if writeVideo_flag:
        if asyncVideo_flag:
            w = int(video_capture.cap.get(3))
            h = int(video_capture.cap.get(4))
        else:
            w = int(video_capture.get(3))
            h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        out = cv2.VideoWriter(output_name, fourcc, 15, (w, h))
        frame_index = -1

    fps = 0.0
    fps_imutils = imutils.video.FPS().start()

    ret, first_frame = video_capture.read()

    if door_array is None:
        if initialize_door_by_yourself:
            door_array = select_object(first_frame)[0]
            print(door_array)
        else:
            all_doors = read_door_info('data_files/doors_info_links.json')
            door_array = all_doors[video_name]

    border_door = door_array[3]
    error_values = []
    truth = get_truth(video_name)
    while True:
        ret, frame = video_capture.read()  # frame shape 640*480*3
        if not ret:
            total_count = counter.return_total_count()
            true_total = truth.inside + truth.outside
            err = abs(total_count - true_total) / true_total
            log_res = "in video: {}\n predicted / true\n counter in: {} / {}\n counter out: {} / {}\n" \
                      " total: {} / {}\n error: {}\n______________\n".format(video_name, counter.counter_in,
                                                                             truth.inside,
                                                                             counter.counter_out, truth.outside,
                                                                             total_count, true_total, err)
            with open('../log_results.txt', 'w') as file:
                file.write(log_res)
            print(log_res)
            error_values.append(err)
            break

        t1 = time.time()

        image = Image.fromarray(frame[..., ::-1])  # bgr to rgb
        boxes, confidence, classes = yolo.detect_image(image)

        features = encoder(frame, boxes)
        detections = [
            Detection(bbox, confidence, cls,
                      feature) for bbox, confidence, cls, feature in zip(
                          boxes, confidence, classes, features)
        ]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.cls for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        cv2.rectangle(frame, (int(door_array[0]), int(door_array[1])),
                      (int(door_array[2]), int(door_array[3])), (23, 158, 21),
                      2)

        for det in detections:
            bbox = det.to_tlbr()
            if show_detections and len(classes) > 0:
                score = "%.2f" % (det.confidence * 100) + "%"
                rect_head = Rectangle(bbox[0], bbox[1], bbox[2], bbox[3])
                rect_door = Rectangle(int(door_array[0]), int(door_array[1]),
                                      int(door_array[2]), int(door_array[3]))
                intersection = rect_head & rect_door

                if intersection:
                    squares_coeff = rect_square(*intersection) / rect_square(
                        *rect_head)
                    cv2.putText(
                        frame,
                        score + " inter: " + str(round(squares_coeff, 3)),
                        (int(bbox[0]), int(bbox[3])), 0, 1e-3 * frame.shape[0],
                        (0, 100, 255), 5)
                    cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])), (255, 0, 0), 3)

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            # first appearence of object with id=track.id

            if track.track_id not in counter.people_init or counter.people_init[
                    track.track_id] == 0:
                counter.obj_initialized(track.track_id)
                rect_head = Rectangle(bbox[0], bbox[1], bbox[2], bbox[3])
                rect_door = Rectangle(door_array[0], door_array[1],
                                      door_array[2], door_array[3])
                res = rect_head & rect_door
                if res:

                    inter_square = rect_square(*res)
                    head_square = rect_square(*rect_head)
                    #     was initialized in door, probably going in
                    if (inter_square / head_square) >= 0.8:
                        counter.people_init[track.track_id] = 2
                        #     initialized in the bus, mb going out
                    elif (inter_square /
                          head_square) <= 0.4 or bbox[3] > border_door:
                        counter.people_init[track.track_id] = 1
                # res is None, means that object is not in door contour
                else:
                    counter.people_init[track.track_id] = 1

                counter.people_bbox[track.track_id] = bbox
            counter.cur_bbox[track.track_id] = bbox

            adc = "%.2f" % (track.adc *
                            100) + "%"  # Average detection confidence
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
            cv2.putText(frame, "ID: " + str(track.track_id),
                        (int(bbox[0]), int(bbox[1])), 0, 1e-3 * frame.shape[0],
                        (0, 255, 0), 5)

            if not show_detections:
                track_cls = track.cls
                cv2.putText(frame, str(track_cls),
                            (int(bbox[0]), int(bbox[3])), 0,
                            1e-3 * frame.shape[0], (0, 255, 0), 1)
                cv2.putText(
                    frame, 'ADC: ' + adc,
                    (int(bbox[0]), int(bbox[3] + 2e-2 * frame.shape[1])), 0,
                    1e-3 * frame.shape[0], (0, 255, 0), 1)

        id_get_lost = [
            track.track_id for track in tracker.tracks
            if track.time_since_update >= 25 and track.age >= 29
        ]
        id_inside_tracked = [
            track.track_id for track in tracker.tracks if track.age > 60
        ]
        for val in counter.people_init.keys():
            # check bbox also
            cur_c = find_centroid(counter.cur_bbox[val])
            init_c = find_centroid(counter.people_bbox[val])
            vector_person = (cur_c[0] - init_c[0], cur_c[1] - init_c[1])

            if val in id_get_lost and counter.people_init[val] != -1:
                # if vector_person < 0 then current coord is less than initialized, it means that man is going
                # in the exit direction
                if vector_person[1] > 70 and counter.people_init[
                        val] == 2:  # and counter.people_bbox[val][3] > border_door \
                    counter.get_in()

                elif vector_person[1] < -70 and counter.people_init[val] == 1:
                    counter.get_out()

                counter.people_init[val] = -1
                print(f"person left frame")
                print(f"current centroid - init : {cur_c} - {init_c}\n")
                print(f"vector: {vector_person}\n")

                del val
            # elif val in id_inside_tracked and val not in id_get_lost and counter.people_init[val] == 1 \
            #         and bb_intersection_over_union(counter.cur_bbox[val], door_array) <= 0.3 \
            #         and vector_person[1] > 0:  # and \
            #     # counter.people_bbox[val][3] > border_door:
            #     counter.get_in()
            #
            #     counter.people_init[val] = -1
            #     print(f"person is tracked for a long time")
            #     print(f"current centroid - init : {cur_c} - {init_c}\n")
            #     print(f"vector: {vector_person}\n")
            #     imaggg = cv2.line(frame, find_centroid(counter.cur_bbox[val]),
            #                       find_centroid(counter.people_bbox[val]),
            #                       (0, 0, 255), 7)

            # cv2.imshow('frame', imaggg)
            # cv2.waitKey(0)

        ins, outs = counter.show_counter()
        cv2.putText(frame, "in: {}, out: {} ".format(ins, outs), (10, 30), 0,
                    1e-3 * frame.shape[0], (255, 0, 0), 5)

        cv2.namedWindow('image', cv2.WINDOW_NORMAL)
        cv2.resizeWindow('image', 1400, 800)
        cv2.imshow('image', frame)

        if writeVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1

        fps_imutils.update()

        if not asyncVideo_flag:
            fps = (fps + (1. / (time.time() - t1))) / 2
            # print("FPS = %f" % (fps))

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    fps_imutils.stop()
    print('imutils FPS: {}'.format(fps_imutils.fps()))

    if asyncVideo_flag:
        video_capture.stop()
    else:
        video_capture.release()

    if writeVideo_flag:
        out.release()

    cv2.destroyAllWindows()

    mean_error = np.mean(error_values)
    print("mean error for {} video: {}".format(video_name, mean_error))
コード例 #14
0
ファイル: deep_sort_app.py プロジェクト: MoonBlvd/deep_sort
def run_multiple(sequence_dir, detection_dir, output_dir, min_confidence,
                 nms_max_overlap, min_detection_height, max_cosine_distance,
                 max_age, nn_budget, display, save_images_dir):
    """Run multi-target tracker on a particular sequence.

    Parameters
    ----------
    sequence_dir : str
        Path to the MOTChallenge sequence directory.
    detection_dir : str
        Path to the detections file.
    output_dir : str
        Path to the tracking output file. This file will contain the tracking
        results on completion.
    min_confidence : float
        Detection confidence threshold. Disregard all detections that have
        a confidence lower than this value.
    nms_max_overlap: float
        Maximum detection overlap (non-maxima suppression threshold).
    min_detection_height : int
        Detection height threshold. Disregard all detections that have
        a height lower than this value.
    max_cosine_distance : float
        Gating threshold for cosine distance metric (object appearance).
    nn_budget : Optional[int]
        Maximum size of the appearance descriptor gallery. If None, no budget
        is enforced.
    display : bool
        If True, show visualization of intermediate tracking results.
    save_images_dir : string
        If not None, save the tracking result to indicated directories

    """
    all_sequences = sorted(glob.glob(os.path.join(sequence_dir, '*')))
    if len(all_sequences) == 0:
        raise ValueError("There is no folder in " + sequence_dir)
    for sequence_dir in all_sequences:
        video_name = sequence_dir.split('/')[-1]
        output_file = os.path.join(output_dir, video_name + '.npy')  #'.txt')
        print(video_name)
        detection_file = os.path.join(detection_dir, video_name + '.npy')
        try:
            os.stat(detection_file)
            os.stat(sequence_dir)
        except:
            raise NameError(detection_file + ' or ' + sequence_dir +
                            " doesn't exist!")
        seq_info = gather_sequence_info(sequence_dir, detection_file)
        metric = nn_matching.NearestNeighborDistanceMetric(
            "cosine", max_cosine_distance, nn_budget)
        tracker = Tracker(metric, max_age=max_age)
        results = []

        def frame_callback(vis, frame_idx):
            # print("Processing frame %05d" % frame_idx)

            # Load image and generate detections.
            detections = create_detections(seq_info["detections"], frame_idx,
                                           min_detection_height)
            detections = [
                d for d in detections if d.confidence >= min_confidence
            ]

            # Run non-maxima suppression.
            boxes = np.array([d.tlwh for d in detections])
            scores = np.array([d.confidence for d in detections])
            indices = preprocessing.non_max_suppression(
                boxes, nms_max_overlap, scores)
            detections = [detections[i] for i in indices]

            # Update tracker.
            tracker.predict()
            tracker.update(detections)

            # Update visualization.
            if display:
                image = cv2.imread(seq_info["image_filenames"][frame_idx],
                                   cv2.IMREAD_COLOR)
                image_name = seq_info["image_filenames"][frame_idx].split(
                    '/')[-1]
                vis.set_image(image.copy(), image_name)
                # vis.draw_detections(detections)
                vis.draw_trackers(tracker.tracks)

            # Store results.
            for track in tracker.tracks:
                if not track.is_confirmed() or track.time_since_update > 1:
                    continue
                bbox = track.to_tlwh()
                # print("track._class:",track._class)
                # print("track.confidence:",track.confidence)
                # print("track.feature_to_save:",track.feature_to_save.shape)
                results.append(
                    np.hstack([
                        frame_idx, track.track_id, bbox[0], bbox[1], bbox[2],
                        bbox[3], track._class, track.confidence,
                        track.feature_to_save
                    ]))

        # Run tracker.
        if display:
            visualizer = visualization.Visualization(
                seq_info, update_ms=5, save_images_dir=save_images_dir)
        else:
            visualizer = visualization.NoVisualization(seq_info)
        visualizer.run(frame_callback)

        # Store results.
        np.save(output_file, np.array(results))

        # f = open(output_file, 'w')
        # for row in results:
        #     print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1' % (
        #         row[0], row[1], row[2], row[3], row[4], row[5]),file=f)
        # f.close()
        # shutdown the window
        if display:
            cv2.destroyWindow(visualizer.viewer._caption)
コード例 #15
0
ファイル: main.py プロジェクト: dzc15331066/detect-client
def main():

    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0
    tsocket = TSocket.TSocket(__HOST, __PORT)
    transport = TTransport.TFramedTransport(tsocket)
    protocol = TBinaryProtocol.TBinaryProtocol(transport)
    client = Client(protocol)
    extract_rate = 5  #抽帧频率
    transport.open()

    # deep_sort
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    video_capture = cv2.VideoCapture("MOT16-09.mp4")
    frame_rate = video_capture.get(5)
    sample_interval = 1. / extract_rate
    print(frame_rate, extract_rate, sample_interval)
    delay = 1. / frame_rate
    print(delay)

    fps = 0.0
    ##############################################
    loc_dic = {}
    in_count = 0  #in 计数器
    out_count = 0  #out 计数器
    ##############################################
    frame_count = 0
    global last_stat_time
    last_stat_time = time.time()
    w = 640
    h = 480
    last_sample_time = 0.0
    while True:
        start = time.time()
        ret, frame = video_capture.read()
        if ret != True:
            break
        frame = cv2.resize(frame, (w, h))
        now = time.time()
        if last_sample_time + sample_interval <= now:
            t1 = time.time()
            boxes, features = encode(
                client,
                frame)  #image压缩为jpg格式,发送到gpu server进行yolov3检测,得到features后返回
            last_sample_time = time.time()
            nfps = 1. / (time.time() - t1)
            print(nfps)
            if fps <= 0.1:
                fps = nfps
            else:
                fps = (fps + nfps) / 2
            print("detection fps= %f" % (fps))
            #print(features[0])#128
            tt1 = time.time()
            detections = [
                Detection(bbox, 1.0, feature)
                for bbox, feature in zip(boxes, features)
            ]
            #print(detections)

            # Run non-maxima suppression.
            boxes = np.array([d.tlwh for d in detections])
            scores = np.array([d.confidence for d in detections])
            indices = preprocessing.non_max_suppression(
                boxes, nms_max_overlap, scores)
            detections = [detections[i] for i in indices]

            # Call the tracker
            tracker.predict()
            tracker.update(detections)
            print("tracker used:", time.time() - tt1)

            for track in tracker.tracks:
                #print(track.track_id)
                if not track.is_confirmed() or track.time_since_update > 1:
                    continue
                bbox = track.to_tlbr()
                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                              (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
                cv2.putText(frame, str(track.track_id),
                            (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200,
                            (0, 255, 0), 2)
                id_num = str(track.track_id)
                if id_num in loc_dic:
                    #判断上一帧运动轨迹
                    #向右运动,且经过分界线
                    last_x = loc_dic[id_num]
                    if bbox[0] > last_x and (bbox[0] > float(w / 2)
                                             and last_x < float(w / 2)):
                        print("##################in one#################")
                        loc_dic[id_num] = bbox[0]
                        in_count += 1
                    #向左移动,且经过分界线
                    elif bbox[0] < last_x and (bbox[0] < float(w / 2)
                                               and last_x > float(w / 2)):
                        print("###################out one################")
                        loc_dic[id_num] = bbox[0]
                        out_count += 1
                else:
                    loc_dic[id_num] = bbox[0]
            for det in detections:
                bbox = det.to_tlbr()
                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                              (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)
        frame_count += 1

        cv2.line(frame, (int(w / 2), int(0)), (int(w / 2), int(h)),
                 (255, 255, 255))
        cv2.putText(frame, "in number:" + str(in_count), (10, 40), 0, 1e-3 * h,
                    (255, 0, 0), 2)
        cv2.putText(frame, "out number:" + str(out_count), (10, 60), 0,
                    1e-3 * h, (255, 0, 0), 2)
        ret, frame = cv2.imencode('.jpg', frame)
        dt["img"] = frame.tobytes()
        wait_time = delay - (time.time() - start)
        #print(wait_time)
        if wait_time > 0:
            time.sleep(wait_time)

    video_capture.release()
コード例 #16
0
def run(sequence_dir, detection_file, output_file, min_confidence,
        nms_max_overlap, min_detection_height, max_cosine_distance, nn_budget,
        display, lambda_):
    """Run multi-target tracker on a particular sequence.

    Parameters
    ----------
    sequence_dir : str
        Path to the MOTChallenge sequence directory.
    detection_file : str
        Path to the detections file.
    output_file : str
        Path to the tracking output file. This file will contain the tracking
        results on completion.
    min_confidence : float
        Detection confidence threshold. Disregard all detections that have
        a confidence lower than this value.
    nms_max_overlap: float
        Maximum detection overlap (non-maxima suppression threshold).
    min_detection_height : int
        Detection height threshold. Disregard all detections that have
        a height lower than this value.
    max_cosine_distance : float
        Gating threshold for cosine distance metric (object appearance).
    nn_budget : Optional[int]
        Maximum size of the appearance descriptor gallery. If None, no budget
        is enforced.
    display : bool
        If True, show visualization of intermediate tracking results.

    """
    seq_info = gather_sequence_info(sequence_dir, detection_file)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric, _lambda=lambda_)
    results = []
    print("processing " + sequence_dir)

    def frame_callback(vis, frame_idx):
        if frame_idx % 100 == 0:
            print("Processing frame %05d" % frame_idx)

        # Load image and generate detections.
        detections = create_detections(seq_info["detections"], frame_idx,
                                       min_detection_height)
        detections = [d for d in detections if d.confidence >= min_confidence]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        # Update tracker.
        tracker.predict()
        tracker.update(detections)

        # Update visualization.
        if display:
            image = cv2.imread(seq_info["image_filenames"][frame_idx],
                               cv2.IMREAD_COLOR)
            vis.set_image(image.copy())
            vis.draw_detections(detections)
            vis.draw_trackers(tracker.tracks)

        # Store results.
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlwh()
            results.append([
                frame_idx, track.track_id, bbox[0], bbox[1], bbox[2], bbox[3]
            ])

    # Run tracker.
    if display:
        visualizer = visualization.Visualization(seq_info, update_ms=5)
    else:
        visualizer = visualization.NoVisualization(seq_info)
    visualizer.run(frame_callback)

    # Store results.
    f = open(output_file, 'w')
    for row in results:
        print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1' %
              (row[0], row[1], row[2], row[3], row[4], row[5]),
              file=f)
コード例 #17
0
ファイル: initial.py プロジェクト: sirkpp/major_project
def main(yolo):

    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0

    # Deep SORT
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    tracking = False
    writeVideo_flag = True
    asyncVideo_flag = False

    file_path = ['out.mp4']
    #file_path = ['veed.mp4']
    cols = math.ceil(math.sqrt(len(file_path)))
    rows = math.ceil(len(file_path) / cols)
    singleHeight = int(screenHeight / rows)
    singleWidth = int(screenWidth / cols)
    out_image = np.zeros((screenHeight, screenWidth, 3), np.uint8)

    #if asyncVideo_flag :
    #    video_capture = VideoCaptureAsync(file_path)
    #else:
    #    video_capture = cv2.VideoCapture(file_path)

    video_captures = []
    cameras = []
    prvTimes = []
    localgloballink = []
    imgsSaved = 2

    for i in range(len(file_path)):
        video_captures.append(cv2.VideoCapture(file_path[i]))
        cameras.append(Camera())
        prvTimes.append(time.time())

    #if asyncVideo_flag:
    #    video_capture.start()

    if writeVideo_flag:
        if asyncVideo_flag:
            w = int(video_capture.cap.get(3))
            h = int(video_capture.cap.get(4))
            h = int(video_capture.cap.get(4))
        else:
            w = screenWidth
            h = screenHeight
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        out = cv2.VideoWriter('output_yolov4.avi', fourcc, 30, (w, h))
        frame_index = -1

    fps = 0.0
    fps_imutils = imutils.video.FPS().start()
    frame = []
    globalPersonCount = 1
    for file in file_path:
        frame.append(None)

    curFrame = 1
    gtIndex = 0

    while True:
        allimages = []

        for index in range(len(file_path)):
            cur = time.time()
            ret, frame[index] = video_captures[index].read(
            )  # frame shape 640*480*3
            if ret != True:
                break

            t1 = time.time()

            image = Image.fromarray(frame[index][..., ::-1])  # bgr to rgb
            boxes, confidence, classes = yolo.detect_image(image)

            if tracking:
                features = encoder(frame[index], boxes)

                detections = [
                    Detection(bbox, confidence, cls, feature)
                    for bbox, confidence, cls, feature in zip(
                        boxes, confidence, classes, features)
                ]
            else:
                detections = [
                    Detection_YOLO(bbox, confidence,
                                   cls) for bbox, confidence, cls in zip(
                                       boxes, confidence, classes)
                ]

            # Run non-maxima suppression.
            boxes = np.array([d.tlwh for d in detections])
            scores = np.array([d.confidence for d in detections])
            indices = preprocessing.non_max_suppression(
                boxes, nms_max_overlap, scores)
            detections = [detections[i] for i in indices]

            for det in detections:
                bbox = det.to_tlbr()
                score = "%.2f" % round(det.confidence * 100, 2) + "%"
                cv2.rectangle(frame[index], (int(bbox[0]), int(bbox[1])),
                              (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)
                if len(classes) > 0:
                    cls = det.cls
                    cv2.putText(frame[index],
                                str(cls) + " " + score,
                                (int(bbox[0]), int(bbox[3])), 0,
                                1e-3 * frame[index].shape[0], (0, 255, 0), 1)

            #nabin's code
            hsvImage = cv2.cvtColor(frame[index], cv2.COLOR_BGR2HSV)

            hungarianmatrix = []
            indexx = 0
            if (len(cameras[index].PersonData) > 0):
                diff = cur - prvTimes[index]
                times = int(diff / 0.05)
                prvTimes[index] = cur
                for data in cameras[index].PersonData:
                    if (data.kf != None):
                        for i in range(times):
                            data.kf.predict()
            nodata = len(cameras[index].PersonData)
            for z in range(len(cameras[index].PersonData)):
                cameras[index].PersonData[z].updated = False
            for det in detections:
                bbox = det.to_tlbr()
                if (nodata == 0):
                    persondata = PersonData()
                    persondata.color = [
                        int(random.randint(0, 255)),
                        int(random.randint(0, 255)),
                        int(random.randint(0, 255))
                    ]
                    persondata.positions.append([(bbox[0] + bbox[2]) / 2,
                                                 (bbox[1] + bbox[3]) / 2])
                    persondata.positions.append([(bbox[0] + bbox[2]) / 2 + 0.1,
                                                 (bbox[1] + bbox[3]) / 2 + 0.1
                                                 ])
                    persondata.top = bbox[0]
                    persondata.left = bbox[1]
                    persondata.lastPosition = bbox
                    persondata.localPersonIndex = cameras[
                        index].localPersonCount
                    persondata.kf = KF(persondata.positions[0][0],
                                       persondata.positions[0][1], 0, 0)
                    persondata.globalPersonIndex = globalPersonCount
                    localgloballink.append([
                        globalPersonCount, index, persondata.localPersonIndex
                    ])
                    globalPersonCount = globalPersonCount + 1
                    cameras[index].localPersonCount = cameras[
                        index].localPersonCount + 1
                    hsvCroppedImage = hsvImage[int(bbox[1]):int(bbox[3]),
                                               int(bbox[0]):int(bbox[2])]
                    persondata.histogram_h = cv2.calcHist([hsvCroppedImage],
                                                          [0], None, [180],
                                                          [0, 180])
                    persondata.histogram_h = np.divide(persondata.histogram_h,
                                                       ((bbox[3] - bbox[1]) *
                                                        (bbox[2] - bbox[0])))
                    cameras[index].PersonData.append(persondata)
                else:
                    hungarianmatrix.append([])
                    hsvCroppedImage = hsvImage[int(bbox[1]):int(bbox[3]),
                                               int(bbox[0]):int(bbox[2])]
                    histogram_h = cv2.calcHist([hsvCroppedImage], [0], None,
                                               [180], [0, 180])
                    histogram_h = np.divide(histogram_h, ((bbox[3] - bbox[1]) *
                                                          (bbox[2] - bbox[0])))

                    for z in range(len(cameras[index].PersonData)):
                        postions = len(
                            cameras[index].PersonData[z].positions) - 1
                        cov = np.cov(
                            np.asarray(
                                cameras[index].PersonData[z].positions).T)
                        #mahal=(distance.mahalanobis([cameras[index].PersonData[z].kf.calulatedmean[0],cameras[index].PersonData[z].kf.calulatedmean[2]],[(bbox[0]+bbox[2])/2,(bbox[1]+bbox[3])/2],cov))/ frame[index].shape[0]
                        mahal = math.sqrt((cameras[index].PersonData[z].
                                           positions[postions][0] -
                                           (bbox[0] + bbox[2]) / 2)**2 +
                                          (cameras[index].PersonData[z].
                                           positions[postions][1] -
                                           (bbox[1] + bbox[3]) / 2)**2
                                          ) / frame[index].shape[0]
                        #mahal=math.sqrt((cameras[index].PersonData[z].kf.calulatedmean[0]-(bbox[0]+bbox[2])/2)**2+(cameras[index].PersonData[z].kf.calulatedmean[1]-(bbox[1]+bbox[3])/2)**2)/ frame[index].shape[0]
                        #mahal=getMahalanbolisDist(cameras[index].PersonData[z].positions,[(bbox[0]+bbox[2])/2,(bbox[1]+bbox[3])/2])
                        mahal += (np.sum(
                            np.absolute(
                                np.subtract(
                                    histogram_h,
                                    cameras[index].PersonData[z].histogram_h)))
                                  )
                        hungarianmatrix[indexx].append(mahal)
                    indexx = indexx + 1
                print(hungarianmatrix)
            if (nodata != 0):
                row_ind = []
                col_ind = []
                if (hungarianmatrix != []):
                    row_ind, col_ind = linear_sum_assignment(hungarianmatrix)
                indexx = 0
                for pos in range(len(col_ind)):
                    if (hungarianmatrix[row_ind[pos]][col_ind[pos]] <
                            2 - detections[row_ind[pos]].confidence):
                        bbox = detections[row_ind[pos]].to_tlbr()
                        detections[row_ind[pos]].localProcessed = True
                        cameras[index].PersonData[col_ind[pos]].updated = True
                        cameras[index].PersonData[col_ind[pos]].top = bbox[0]
                        cameras[index].PersonData[col_ind[pos]].left = bbox[1]
                        cameras[index].PersonData[col_ind[pos]].kf.update([
                            (bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2
                        ])
                        cameras[index].PersonData[
                            col_ind[pos]].lastPosition = bbox
                        cameras[index].PersonData[
                            col_ind[pos]].positions.append([
                                (bbox[0] + bbox[2]) / 2,
                                (bbox[1] + bbox[3]) / 2
                            ])
                        hsvCroppedImage = hsvImage[int(bbox[1]):int(bbox[3]),
                                                   int(bbox[0]):int(bbox[2])]
                        toadd = detections[row_ind[pos]].confidence - 0.7
                        cameras[index].PersonData[
                            col_ind[pos]].histogram_h = np.add(
                                np.multiply(
                                    cv2.calcHist([hsvCroppedImage], [0], None,
                                                 [180], [0, 180]),
                                    toadd * 1 / (((bbox[3] - bbox[1]) *
                                                  (bbox[2] - bbox[0])))),
                                np.multiply(
                                    cameras[index].PersonData[
                                        col_ind[pos]].histogram_h, 1 - toadd))
                        if (len(cameras[index].PersonData[
                                col_ind[pos]].positions) > 6):
                            cameras[index].PersonData[
                                col_ind[pos]].positions.pop(0)

                for pos in range(len(detections)):
                    if (hasattr(detections[pos], 'localProcessed') == False):
                        bbox = detections[pos].to_tlbr()
                        #if(bbox[1]>hsvImage.shape[0]):
                        #    continue
                        ndata = PersonData()
                        ndata.top = bbox[0]
                        ndata.left = bbox[1]
                        ndata.positions.append([(bbox[0] + bbox[2]) / 2,
                                                (bbox[1] + bbox[3]) / 2])
                        ndata.positions.append([(bbox[0] + bbox[2]) / 2 + 0.1,
                                                (bbox[1] + bbox[3]) / 2 + 0.1])
                        ndata.kf = KF((bbox[0] + bbox[2]) / 2,
                                      (bbox[1] + bbox[3]) / 2, 0, 0)
                        ndata.color = [
                            int(random.randint(0, 255)),
                            int(random.randint(0, 255)),
                            int(random.randint(0, 255))
                        ]
                        ndata.localPersonIndex = cameras[
                            index].localPersonCount
                        ndata.lastPosition = bbox

                        ndata.kf = KF(ndata.positions[0][0],
                                      ndata.positions[0][1], 0, 0)
                        cameras[index].localPersonCount = cameras[
                            index].localPersonCount + 1
                        localgloballink.append(
                            [globalPersonCount, index, ndata.localPersonIndex])
                        ndata.globalPersonIndex = globalPersonCount
                        globalPersonCount = globalPersonCount + 1
                        hsvCroppedImage = hsvImage[int(bbox[1]):int(bbox[3]),
                                                   int(bbox[0]):int(bbox[2])]
                        ndata.histogram_h = cv2.calcHist([hsvCroppedImage],
                                                         [0], None, [180],
                                                         [0, 180])
                        ndata.histogram_h = np.divide(ndata.histogram_h,
                                                      ((bbox[3] - bbox[1]) *
                                                       (bbox[2] - bbox[0])))

                        cameras[index].PersonData.append(ndata)

            #allimages.append([])
            if (len(file_path)) != 1:
                for pdata in cameras[index].PersonData:
                    if (pdata.updated):
                        nimg = cv2.resize(
                            frame[index][int(pdata.lastPosition[1]
                                             ):int(pdata.lastPosition[3]),
                                         int(pdata.lastPosition[0]
                                             ):int(pdata.lastPosition[2])],
                            (64, 128),
                            interpolation=cv2.INTER_AREA)
                        #allimages[len(allimages)-1].append(np.array(nimg))
                        pdata.imgs.append(nimg)
                        if (len(pdata.imgs) == imgsSaved + 1):
                            pdata.imgs.pop(0)
            #nabin's code ends

            if tracking:
                # Call the tracker
                tracker.predict()
                tracker.update(detections)

                for track in tracker.tracks:
                    if not track.is_confirmed() or track.time_since_update > 1:
                        continue
                    bbox = track.to_tlbr()
                    cv2.rectangle(frame[index], (int(bbox[0]), int(bbox[1])),
                                  (int(bbox[2]), int(bbox[3])),
                                  (255, 255, 255), 2)
                    cv2.putText(frame[index], "ID: " + str(track.track_id),
                                (int(bbox[0]), int(bbox[1])), 0,
                                1e-3 * frame[index].shape[0], (0, 255, 0), 1)

            #if(len(cameras)==2):
            #globalHungarian=[]
            #    for fdata in range(len(cameras[0].PersonData)):
            #        globalHungarian.append([])
            #        for pdata in cameras[1].PersonData:
            #            globalHungarian[fdata].append(np.sum(np.absolute(np.subtract(pdata.histogram_h,cameras[0].PersonData[fdata].histogram_h))))
            #
            #    row_ind, col_ind = linear_sum_assignment(globalHungarian)
            #    for row in range(len(row_ind)):
            #        cv2.putText(frame[0], chr(ord('a')+row),(int(cameras[0].PersonData[row_ind[row]].positions[len(cameras[0].PersonData[row_ind[row]].positions)-1][0]), int(cameras[0].PersonData[row_ind[row]].positions[len(cameras[0].PersonData[row_ind[row]].positions)-1][1])),0, 5e-3 * 200, (0,255,0),2)
            #        cv2.putText(frame[1], chr(ord('a')+row),(int(cameras[1].PersonData[col_ind[row]].positions[len(cameras[1].PersonData[col_ind[row]].positions)-1][0]), int(cameras[1].PersonData[col_ind[row]].positions[len(cameras[1].PersonData[col_ind[row]].positions)-1][1])),0, 5e-3 * 200, (0,255,0),2)

        if (len(cameras) == 1):
            hypos = []
            hyposPos = []
            for person in cameras[0].PersonData:
                if (person.updated == True):
                    cv2.putText(
                        frame[0], str(person.localPersonIndex),
                        (int(person.positions[len(person.positions) - 1][0]),
                         int(person.positions[len(person.positions) - 1][1])),
                        0, 1e-3 * frame[index].shape[0], (0, 255, 0), 1)
                if (person.updated == True):
                    hypos.append(person.localPersonIndex + 1)
                    hyposPos.append([person.top, person.left])
            gts = []
            gtsPos = []
            while gt[gtIndex][0] == curFrame and gtIndex < len(gt):
                gts.append(gt[gtIndex][1])
                gtsPos.append([gt[gtIndex][2], gt[gtIndex][3]])
                gtIndex = gtIndex + 1
            curFrame = curFrame + 1
            dis = mm.distances.norm2squared_matrix(np.array(gtsPos),
                                                   np.array(hyposPos))
            acc.update(gts, hypos, dis)

        else:
            edges = []
            globalHungarian = []
            for i in range(len(cameras)):
                for j in range(i + 1, len(cameras)):
                    globalHungarian = []
                    x = 0
                    xindexes = []
                    yindexes = []
                    stackedimgages = []
                    for pos in range(imgsSaved):
                        stackedimgages.append([])
                        for person in cameras[j].PersonData:
                            if (person.updated == True
                                    and len(person.imgs) == imgsSaved):
                                stackedimgages[pos].append(person.imgs[pos])
                    for fdata in range(len(cameras[i].PersonData)):
                        if (cameras[i].PersonData[fdata].updated == False
                                or len(cameras[i].PersonData[fdata].imgs) !=
                                imgsSaved):
                            continue
                        xindexes.append(fdata)
                        y = 0
                        triplet = test(cameras[i].PersonData[fdata].imgs[0],
                                       stackedimgages[0])
                        for pos in range(1, imgsSaved):
                            triplet = np.add(
                                triplet,
                                test(cameras[i].PersonData[fdata].imgs[pos],
                                     stackedimgages[pos]))
                        globalHungarian.append([])
                        for pdata in range(len(cameras[j].PersonData)):
                            if (cameras[j].PersonData[pdata].updated == False
                                    or len(cameras[j].PersonData[pdata].imgs)
                                    != imgsSaved):
                                continue
                            #globalHungarian[x].append(triplet[y])
                            globalHungarian[x].append(
                                np.sum(
                                    np.absolute(
                                        np.subtract(
                                            cameras[j].PersonData[pdata].
                                            histogram_h, cameras[i].
                                            PersonData[fdata].histogram_h))) *
                                2 + triplet[y])
                            if (x == 0):
                                yindexes.append(pdata)
                            #globalHungarian[fdata].append(np.sum(np.absolute(np.subtract(cameras[j].PersonData[pdata].histogram_h,cameras[i].PersonData[fdata].histogram_h))))
                            #globalHungarian[fdata].append(triplet[pdata])
                            y = y + 1
                        x = x + 1
                    if (len(globalHungarian) != 0):
                        row_ind, col_ind = linear_sum_assignment(
                            globalHungarian)
                        print(globalHungarian)
                        for pos in range(len(row_ind)):
                            if (globalHungarian[row_ind[pos]][col_ind[pos]] <
                                    3.2):
                                edges.append(
                                    (cameras[i].PersonData[xindexes[
                                        row_ind[pos]]].globalPersonIndex,
                                     cameras[j].PersonData[yindexes[
                                         col_ind[pos]]].globalPersonIndex))

            Allcliques = cliques(edges, len(cameras),
                                 globalPersonCount).getCliques()

            for cam in cameras:
                for person in cam.PersonData:
                    isinclique = True
                    for clique in Allcliques:

                        if person.globalPersonIndex in clique:
                            isinclique = False
                            break
                    if isinclique:
                        Allcliques.append([person.globalPersonIndex])

            for sclique in Allcliques:
                indexes = []
                cur = min(sclique)

                for i in range(len(sclique)):
                    isInclique = False
                    prvIndex = cameras[localgloballink[
                        sclique[i] - 1][1]].PersonData[localgloballink[
                            sclique[i] - 1][2]].prvglobalFoundOutPersonIndex
                    if prvIndex == -1:
                        isInclique = True
                    else:
                        for snclique in Allcliques:
                            if prvIndex in snclique:
                                isInclique = True
                                break
                    if isInclique == True:
                        cameras[localgloballink[sclique[i] - 1][1]].PersonData[
                            localgloballink[sclique[i] - 1]
                            [2]].globalFoundOutPersonIndex = cur
                    else:
                        cameras[localgloballink[sclique[i] - 1][1]].PersonData[
                            localgloballink[sclique[i] - 1]
                            [2]].globalFoundOutPersonIndex = prvIndex

            for cam in range(len(cameras)):
                for person in cameras[cam].PersonData:
                    if person.updated == True:
                        cv2.putText(
                            frame[cam], str(person.globalFoundOutPersonIndex),
                            (int(person.positions[len(person.positions) -
                                                  1][0]),
                             int(person.positions[len(person.positions) -
                                                  1][1])), 0,
                            1e-3 * frame[index].shape[0], (0, 255, 0), 2)

            for sclique in Allcliques:
                for i in range(len(sclique)):
                    cameras[localgloballink[sclique[i] - 1][1]].PersonData[
                        localgloballink[sclique[i] - 1]
                        [2]].prvglobalFoundOutPersonIndex = cameras[
                            localgloballink[sclique[i] - 1][1]].PersonData[
                                localgloballink[sclique[i] - 1]
                                [2]].globalFoundOutPersonIndex
        out_image.fill(0)
        vindex = 0
        for row in range(rows):
            for col in range(cols):
                if (vindex == len(file_path)):
                    break
                vidshape = frame[vindex].shape
                curvidheightratio = vidshape[0] / singleHeight
                curvidwidthratio = vidshape[1] / singleWidth

                if (curvidwidthratio < curvidheightratio):
                    #height is small
                    resizedwidth = int(vidshape[1] / vidshape[0] *
                                       singleHeight)
                    nimg = cv2.resize(frame[vindex],
                                      (resizedwidth, singleHeight),
                                      interpolation=cv2.INTER_AREA)
                    widthpos = int(
                        (singleWidth - resizedwidth) / 2) + col * singleWidth
                    out_image[row * singleHeight:(row + 1) * singleHeight,
                              widthpos:widthpos + resizedwidth] = nimg
                else:
                    #width is small
                    resizedheight = int(vidshape[0] / vidshape[1] *
                                        singleWidth)
                    nimg = cv2.resize(frame[vindex],
                                      (singleWidth, resizedheight),
                                      interpolation=cv2.INTER_AREA)
                    heightpos = int(((singleHeight - resizedheight) / 2) +
                                    row * singleHeight)
                    out_image[heightpos:heightpos + resizedheight,
                              col * singleWidth:(col + 1) * singleWidth] = nimg
                vindex = vindex + 1

        if (len(cameras) == 1):
            mh = mm.metrics.create()
            summary = mh.compute(acc,
                                 metrics=['num_frames', 'mota', 'motp'],
                                 name='acc')
            print(summary)
        cv2.imshow('', out_image)

        if writeVideo_flag:  # and not asyncVideo_flag:
            # save a frame
            out.write(out_image)
            frame_index = frame_index + 1

        fps_imutils.update()

        if not asyncVideo_flag:
            fps = (fps + (1. / (time.time() - t1))) / 2
            print("FPS = %f" % (fps))

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    fps_imutils.stop()
    print('imutils FPS: {}'.format(fps_imutils.fps()))

    if asyncVideo_flag:
        video_capture.stop()
    else:
        video_capture.release()

    if writeVideo_flag:
        out.release()

    cv2.destroyAllWindows()
コード例 #18
0
def main():

    yolo = YOLO()
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)
    parser = argparse.ArgumentParser(
        description='Training codes for Openpose using Tensorflow')
    parser.add_argument('--checkpoint_path',
                        type=str,
                        default='checkpoints/train/2018-12-13-16-56-49/')
    parser.add_argument('--backbone_net_ckpt_path',
                        type=str,
                        default='checkpoints/vgg/vgg_19.ckpt')
    parser.add_argument('--image', type=str, default=None)
    # parser.add_argument('--run_model', type=str, default='img')
    parser.add_argument('--video', type=str, default=None)
    parser.add_argument('--train_vgg', type=bool, default=True)
    parser.add_argument('--use_bn', type=bool, default=False)
    parser.add_argument('--save_video', type=str, default='result/our.mp4')
    args = parser.parse_args()
    checkpoint_path = args.checkpoint_path
    logger.info('checkpoint_path: ' + checkpoint_path)

    with tf.name_scope('inputs'):
        raw_img = tf.placeholder(tf.float32, shape=[None, None, None, 3])
        img_size = tf.placeholder(dtype=tf.int32,
                                  shape=(2, ),
                                  name='original_image_size')

    img_normalized = raw_img / 255 - 0.5

    # define vgg19
    with slim.arg_scope(vgg.vgg_arg_scope()):
        vgg_outputs, end_points = vgg.vgg_19(img_normalized)

    # get net graph
    logger.info('initializing model...')
    net = PafNet(inputs_x=vgg_outputs, use_bn=args.use_bn)
    hm_pre, cpm_pre, added_layers_out = net.gen_net()
    hm_up = tf.image.resize_area(hm_pre[5], img_size)
    cpm_up = tf.image.resize_area(cpm_pre[5], img_size)
    # hm_up = hm_pre[5]
    # cpm_up = cpm_pre[5]
    smoother = Smoother({'data': hm_up}, 25, 3.0)
    gaussian_heatMat = smoother.get_output()

    max_pooled_in_tensor = tf.nn.pool(gaussian_heatMat,
                                      window_shape=(3, 3),
                                      pooling_type='MAX',
                                      padding='SAME')
    tensor_peaks = tf.where(tf.equal(gaussian_heatMat, max_pooled_in_tensor),
                            gaussian_heatMat, tf.zeros_like(gaussian_heatMat))

    logger.info('initialize saver...')
    # trainable_var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='openpose_layers')
    # trainable_var_list = []
    trainable_var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                           scope='openpose_layers')
    if args.train_vgg:
        trainable_var_list = trainable_var_list + tf.get_collection(
            tf.GraphKeys.TRAINABLE_VARIABLES, scope='vgg_19')

    restorer = tf.train.Saver(tf.get_collection(
        tf.GraphKeys.TRAINABLE_VARIABLES, scope='vgg_19'),
                              name='vgg_restorer')
    saver = tf.train.Saver(trainable_var_list)

    logger.info('initialize session...')
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        sess.run(tf.group(tf.global_variables_initializer()))
        logger.info('restoring vgg weights...')
        restorer.restore(sess, args.backbone_net_ckpt_path)
        logger.info('restoring from checkpoint...')
        #saver.restore(sess, tf.train.latest_checkpoint(checkpoint_dir=checkpoint_path))
        saver.restore(sess, args.checkpoint_path + 'model-59000.ckpt')
        logger.info('initialization done')
        writeVideo_flag = True
        if args.image is None:
            if args.video is not None:
                cap = cv2.VideoCapture(args.video)
                w = int(cap.get(3))
                h = int(cap.get(4))

            else:
                cap = cv2.VideoCapture("images/video.mp4")
                #cap = cv2.VideoCapture("rtsp://*****:*****@192.168.43.51:554//Streaming/Channels/1")
                #cap = cv2.VideoCapture("http://*****:*****@192.168.1.111:8081")
                #cap = cv2.VideoCapture("rtsp://*****:*****@192.168.1.106:554//Streaming/Channels/1")
            _, image = cap.read()
            #print(_,image)
            if image is None:
                logger.error("Can't read video")
                sys.exit(-1)
            fps = cap.get(cv2.CAP_PROP_FPS)
            ori_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            ori_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
            #print(fps,ori_w,ori_h)
            if args.save_video is not None:
                fourcc = cv2.VideoWriter_fourcc(*'MP4V')
                video_saver = cv2.VideoWriter('result/our.mp4', fourcc, fps,
                                              (ori_w, ori_h))
                logger.info('record vide to %s' % args.save_video)
            logger.info('fps@%f' % fps)
            size = [int(654 * (ori_h / ori_w)), 654]
            h = int(654 * (ori_h / ori_w))
            time_n = time.time()
            #print(time_n)

            max_boxs = 0
            person_track = {}
            yolo2 = YOLO2()

            while True:
                face = []
                cur1 = conn.cursor()  # 获取一个游标
                sql = "select * from worker"
                cur1.execute(sql)
                data = cur1.fetchall()
                for d in data:
                    # 注意int类型需要使用str函数转义
                    name = str(d[1]) + '_' + d[2]

                    face.append(name)
                cur1.close()  # 关闭游标

                _, image_fist = cap.read()
                #穿戴安全措施情况检测

                img = Image.fromarray(
                    cv2.cvtColor(image_fist, cv2.COLOR_BGR2RGB))
                image, wear = yolo2.detect_image(img)
                image = np.array(image)
                image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

                # # 获取警戒线
                cv2.line(image, (837, 393), (930, 300), (0, 255, 255), 3)
                transboundaryline = t.line_detect_possible_demo(image)

                #openpose二维姿态检测
                img = np.array(cv2.resize(image, (654, h)))
                # cv2.imshow('raw', img)
                img_corner = np.array(
                    cv2.resize(image, (360, int(360 * (ori_h / ori_w)))))
                img = img[np.newaxis, :]
                peaks, heatmap, vectormap = sess.run(
                    [tensor_peaks, hm_up, cpm_up],
                    feed_dict={
                        raw_img: img,
                        img_size: size
                    })
                bodys = PoseEstimator.estimate_paf(peaks[0], heatmap[0],
                                                   vectormap[0])

                image, person = TfPoseEstimator.draw_humans(image,
                                                            bodys,
                                                            imgcopy=False)
                #取10右脚 13左脚

                foot = []
                if len(person) > 0:
                    for p in person:
                        foot_lr = []
                        if 10 in p and 13 in p:
                            foot_lr.append(p[10])
                            foot_lr.append(p[13])

                        if len(foot_lr) > 1:
                            foot.append(foot_lr)

                fps = round(1 / (time.time() - time_n), 2)
                image = cv2.putText(image,
                                    str(fps) + 'fps', (10, 15),
                                    cv2.FONT_HERSHEY_COMPLEX_SMALL, 1,
                                    (255, 255, 255))
                time_n = time.time()

                #deep目标检测
                image2 = Image.fromarray(image_fist)
                boxs = yolo.detect_image(image2)
                features = encoder(image, boxs)
                detections = [
                    Detection(bbox, 1.0, feature)
                    for bbox, feature in zip(boxs, features)
                ]
                boxes = np.array([d.tlwh for d in detections])
                scores = np.array([d.confidence for d in detections])
                indices = preprocessing.non_max_suppression(
                    boxes, nms_max_overlap, scores)
                detections = [detections[i] for i in indices]
                if len(boxs) > max_boxs:
                    max_boxs = len(boxs)
                # print(max_boxs)

                # Call the tracker
                tracker.predict()
                tracker.update(detections)

                for track in tracker.tracks:

                    if max_boxs < track.track_id:
                        tracker.tracks.remove(track)
                        tracker._next_id = max_boxs + 1

                    if not track.is_confirmed() or track.time_since_update > 1:
                        continue

                    bbox = track.to_tlbr()
                    PointX = bbox[0] + ((bbox[2] - bbox[0]) / 2)
                    PointY = bbox[3]

                    if track.track_id not in person_track:
                        track2 = copy.deepcopy(track)
                        person_track[track.track_id] = track2

                    else:

                        track2 = copy.deepcopy(track)
                        bbox2 = person_track[track.track_id].to_tlbr()
                        PointX2 = bbox2[0] + ((bbox2[2] - bbox2[0]) / 2)
                        PointY2 = bbox2[3]
                        distance = math.sqrt(
                            pow(PointX - PointX2, 2) +
                            pow(PointY - PointY2, 2))
                        if distance < 120:
                            person_track[track.track_id] = track2

                        else:

                            # print('last',track.track_id)
                            dis = {}
                            for key in person_track:
                                bbox3 = person_track[key].to_tlbr()
                                PointX3 = bbox3[0] + (
                                    (bbox3[2] - bbox3[0]) / 2)
                                PointY3 = bbox3[3]

                                d = math.sqrt(
                                    pow(PointX3 - PointX, 2) +
                                    pow(PointY3 - PointY, 2))
                                dis[key] = d
                            dis = sorted(dis.items(),
                                         key=operator.itemgetter(1),
                                         reverse=False)

                            track2.track_id = dis[0][0]
                            person_track[dis[0][0]] = track2
                            tracker.tracks.remove(track)
                            tracker.tracks.append(person_track[track.track_id])

                    # 写入class

                    try:
                        box_title = face[track2.track_id - 1]
                    except Exception as e:
                        box_title = str(track2.track_id) + "_" + "unknow"
                    if box_title not in workers:
                        wid = box_title.split('_')[0]
                        localtime = time.asctime(time.localtime(time.time()))
                        workers[box_title] = wk.Worker()
                        workers[box_title].set(box_title, localtime,
                                               (int(PointX), int(PointY)))
                        cur2 = conn.cursor()  # 获取一个游标
                        sql2 = "UPDATE worker SET in_time='" + localtime + "' WHERE worker_id= '" + wid + "'"
                        cur2.execute(sql2)
                        cur2.close()  # 关闭游标

                    else:
                        localtime = time.asctime(time.localtime(time.time()))
                        yoloPoint = (int(PointX), int(PointY))
                        foot_dic = {}
                        wear_dic = {}

                        for f in foot:
                            fp = []
                            footCenter = ((f[0][0] + f[1][0]) / 2,
                                          (f[0][1] + f[1][1]) / 2)
                            foot_dis = int(
                                math.sqrt(
                                    pow(footCenter[0] - yoloPoint[0], 2) +
                                    pow(footCenter[1] - yoloPoint[1], 2)))
                            #print(foot_dis)
                            fp.append(f)
                            fp.append(footCenter)
                            foot_dic[foot_dis] = fp

                        #print(box_title, 'sss', foot_dic)
                        foot_dic = sorted(foot_dic.items(),
                                          key=operator.itemgetter(0),
                                          reverse=False)
                        workers[box_title].current_point = foot_dic[0][1][1]
                        workers[box_title].track_point.append(
                            workers[box_title].current_point)

                        #print(box_title,'sss',foot_dic[0][1][1])
                        mytrack = str(workers[box_title].track_point)
                        wid = box_title.split('_')[0]
                        #卡尔曼滤波预测
                        if wid not in KalmanNmae:
                            myKalman(wid)
                        if wid not in lmp:
                            setLMP(wid)
                        cpx, cpy = predict(workers[box_title].current_point[0],
                                           workers[box_title].current_point[1],
                                           wid)

                        if cpx[0] == 0.0 or cpy[0] == 0.0:
                            cpx[0] = workers[box_title].current_point[0]
                            cpy[0] = workers[box_title].current_point[1]
                        workers[box_title].next_point = (int(cpx), int(cpy))

                        workers[box_title].current_footR = foot_dic[0][1][0][0]
                        workers[box_title].current_footL = foot_dic[0][1][0][1]
                        cur3 = conn.cursor()  # 获取一个游标
                        sql = "UPDATE worker SET current_point= '" + str(
                            workers[box_title].current_point
                        ) + "' , current_footR = '" + str(
                            workers[box_title].current_footR
                        ) + "',current_footL = '" + str(
                            workers[box_title].current_footL
                        ) + "',track_point = '" + mytrack + "',next_point = '" + str(
                            workers[box_title].next_point
                        ) + "' WHERE worker_id= '" + wid + "'"
                        cur3.execute(sql)
                        cur3.close()
                        #写入安全措施情况
                        if len(wear) > 0:
                            for w in wear:
                                wear_dis = int(
                                    math.sqrt(
                                        pow(w[0] - yoloPoint[0], 2) +
                                        pow(w[1] - yoloPoint[1], 2)))
                                wear_dic[wear_dis] = w
                            wear_dic = sorted(wear_dic.items(),
                                              key=operator.itemgetter(0),
                                              reverse=False)

                            if wear_dic[0][0] < 120:
                                cur4 = conn.cursor()  # 获取一个游标

                                if wear[wear_dic[0][1]] == 1:
                                    if len(workers[box_title].wear['no helmet']
                                           ) == 0:
                                        workers[box_title].wear[
                                            'no helmet'].append(localtime)

                                        sql = "INSERT INTO wear SET worker_id = '" + wid + "', type = 'no_helmet',abnormal_time = '" + localtime + "'"
                                        cur4.execute(sql)
                                        cur4.close()  # 关闭游标

                                    else:
                                        if localtime not in workers[
                                                box_title].wear['no helmet']:

                                            workers[box_title].wear[
                                                'no helmet'].append(localtime)
                                            sql = "INSERT INTO wear SET worker_id = '" + wid + "', type = 'no_helmet',abnormal_time = '" + localtime + "'"
                                            cur4.execute(sql)
                                            cur4.close()  # 关闭游标

                                elif wear[wear_dic[0][1]] == 2:
                                    if len(workers[box_title].
                                           wear['no work cloths']) == 0:
                                        workers[box_title].wear[
                                            'no work cloths'].append(localtime)
                                        sql = "INSERT INTO wear SET worker_id = '" + wid + "', type = 'no work cloths',abnormal_time = '" + localtime + "'"
                                        cur4.execute(sql)
                                        cur4.close()  # 关闭游标
                                    else:
                                        if localtime not in workers[
                                                box_title].wear[
                                                    'no work cloths']:
                                            workers[box_title].wear[
                                                'no work cloths'].append(
                                                    localtime)
                                            sql = "INSERT INTO wear SET worker_id = '" + wid + "', type = 'no work cloths',abnormal_time = '" + localtime + "'"
                                            cur4.execute(sql)
                                            cur4.close()  # 关闭游标
                                elif wear[wear_dic[0][1]] == 3:
                                    if len(workers[box_title].
                                           wear['unsafe wear']) == 0:
                                        workers[box_title].wear[
                                            'unsafe wear'].append(localtime)
                                        sql = "INSERT INTO wear SET worker_id = '" + wid + "', type = 'unsafe wear',abnormal_time = '" + localtime + "'"
                                        cur4.execute(sql)
                                        cur4.close()  # 关闭游标
                                    else:
                                        if localtime not in workers[
                                                box_title].wear['unsafe wear']:
                                            workers[box_title].wear[
                                                'unsafe wear'].append(
                                                    localtime)
                                            sql = "INSERT INTO wear SET worker_id = '" + wid + "', type = 'unsafe wear',abnormal_time = '" + localtime + "'"
                                            cur4.execute(sql)
                                            cur4.close()  # 关闭游标

                        #写入越线情况

                        if len(workers[box_title].track_point) > 4:

                            for i in range(len(transboundaryline)):
                                p1 = (transboundaryline[i][0],
                                      transboundaryline[i][1])
                                p2 = (transboundaryline[i][2],
                                      transboundaryline[i][3])
                                p3 = workers[box_title].track_point[-2]
                                p4 = workers[box_title].track_point[-1]
                                a = t.IsIntersec(p1, p2, p3, p4)
                                if a == '有交点':

                                    cur5 = conn.cursor()  # 获取一个游标
                                    cur6 = conn.cursor()  # 获取一个游标
                                    cur5.execute(
                                        "select time from transboundary where worker_id = '"
                                        + wid + "' ")
                                    qurrytime = cur5.fetchone()
                                    cur5.close()  # 关闭游标
                                    if qurrytime == None:
                                        print('越线')
                                        sql = "INSERT INTO transboundary SET worker_id = '" + wid + "',time = '" + localtime + "'"
                                        cur6.execute(sql)
                                        cur6.close()  # 关闭游标
                                    else:
                                        temp1 = 0
                                        for qt in qurrytime:

                                            if qt == localtime:
                                                temp1 = 1
                                        if temp1 == 0:
                                            print('越线')
                                            sql = "INSERT INTO transboundary SET worker_id = '" + wid + "',time = '" + localtime + "'"
                                            cur6.execute(sql)
                                            cur6.close()  # 关闭游标
                        if len(workers[box_title].track_point) >= 20:
                            workers[box_title].previous_point = workers[
                                box_title].track_point[-5]
                    conn.commit()
                    try:
                        cv2.putText(image, face[track2.track_id - 1],
                                    (int(bbox[0]), int(bbox[1])), 0,
                                    5e-3 * 200, (0, 255, 0), 2)
                    except Exception as e:
                        cv2.putText(image, "unknow",
                                    (int(bbox[0]), int(bbox[1])), 0,
                                    5e-3 * 200, (0, 255, 0), 2)

                if args.video is not None:
                    image[27:img_corner.shape[0] +
                          27, :img_corner.shape[1]] = img_corner  # [3:-10, :]
                cv2.imshow(' ', image)
                if args.save_video is not None:
                    video_saver.write(image)
                cv2.waitKey(1)
            else:

                image = common.read_imgfile(args.image)
                size = [image.shape[0], image.shape[1]]
                if image is None:
                    logger.error('Image can not be read, path=%s' % args.image)
                    sys.exit(-1)
                h = int(654 * (size[0] / size[1]))
                img = np.array(cv2.resize(image, (654, h)))
                cv2.imshow('ini', img)
                img = img[np.newaxis, :]
                peaks, heatmap, vectormap = sess.run(
                    [tensor_peaks, hm_up, cpm_up],
                    feed_dict={
                        raw_img: img,
                        img_size: size
                    })
                cv2.imshow('in', vectormap[0, :, :, 0])
                bodys = PoseEstimator.estimate_paf(peaks[0], heatmap[0],
                                                   vectormap[0])
                image = TfPoseEstimator.draw_humans(image,
                                                    bodys,
                                                    imgcopy=False)
                cv2.imshow(' ', image)
                cv2.waitKey(0)
コード例 #19
0
def main(yolo):

    # Determining the FPS of a video having variable frame rate
    # cv2.CAP_PROP_FPS is not used since it returns 'infinity' for variable frame rate videos
    filename = "clip1.mp4"
    # Determining the total duration of the video
    clip = VideoFileClip(filename)

    cap2 = cv2.VideoCapture(filename)
    co = 0
    ret2 = True
    while ret2:
        ret2, frame2 = cap2.read()
        # Determining the total number of frames
        co += 1
    cap2.release()

    # Computing the average FPS of the video
    Input_FPS = co / clip.duration

    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0
    frame_count = 0

    # Implementing Deep Sort algorithm
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    # Cosine distance is used as the metric
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    video_capture = cv2.VideoCapture(filename)

    # Define the codec and create a VideoWriter object to save the output video
    out = cv2.VideoWriter(
        'output.mp4', cv2.VideoWriter_fourcc(*'MP4V'), Input_FPS,
        (int(video_capture.get(3)), int(video_capture.get(4))))

    # To calculate the frames processed by the deep sort algorithm per second
    fps = 0.0

    # Loop to process each frame and track people
    while True:
        ret, frame = video_capture.read()
        if ret != True:
            break
        t1 = time.time()

        step1 = cv2.edgePreservingFilter(frame,
                                         flags=1,
                                         sigma_s=15,
                                         sigma_r=0.1)
        step2 = cv2.detailEnhance(step1, sigma_s=40, sigma_r=0.1)
        cv2.imwrite('preprocessing.jpg', step2)

        im = Image.open("preprocessing.jpg")
        enhancer = ImageEnhance.Sharpness(im)
        enhanced_im = enhancer.enhance(6.0)
        enhanced_im.save("enhanced.jpg")

        frame = cv2.imread('enhanced.jpg')

        image = Image.fromarray(frame[..., ::-1])  # BGR to RGB conversion
        boxs = yolo.detect_image(image)
        features = encoder(frame, boxs)

        # Getting the detections having score of 0.0 to 1.0
        detections = [
            Detection(bbox, 1.0, feature)
            for bbox, feature in zip(boxs, features)
        ]

        # Run non-maxima suppression on the bounding boxes
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        head_count = 0

        # Drawing bounding box detections for people inside the store
        for det in detections:
            head_count += 1
            bbox = det.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)

        cv2.putText(frame, str(head_count), (50, 50), 0, 1.5, (0, 255, 77), 2)

        # Write the frame onto the VideoWriter object
        out.write(frame)

        # Calculating the frames processed per second by the model
        fps = (fps + (1. / (time.time() - t1))) / 2
        frame_count += 1
        # Printing processing status to track completion
        op = "FPS_" + str(frame_count) + "/" + str(co) + ": " + str(
            round(fps, 2))
        print("\r" + op, end="")

    # Releasing objects created
    video_capture.release()
    out.release()
    cv2.destroyAllWindows()
コード例 #20
0
 def Setup(self):
     metric = nn_matching.NearestNeighborDistanceMetric("cosine", 0.2, None)
     self.tracker = Tracker(metric, max_iou_distance=0.7, max_age=200, n_init=4)
     self.log('init')
コード例 #21
0
def main(_argv):
    # Definition of the parameters
    max_cosine_distance = 0.5
    nn_budget = None
    nms_max_overlap = 1.0

    #initialize deep sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        tf.config.experimental.set_memory_growth(physical_devices[0], True)

    if FLAGS.tiny:
        yolo = YoloV3Tiny(classes=FLAGS.num_classes)
    else:
        yolo = YoloV3(classes=FLAGS.num_classes)

    yolo.load_weights(FLAGS.weights)
    logging.info('weights loaded')

    class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
    logging.info('classes loaded')

    try:
        vid = stream  #capture.open(best.url)
    except:
        vid = stream  #capture.open(best.url)

    out = None

    if FLAGS.output:
        # by default VideoCapture returns float instead of int
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))
        list_file = open('detection.txt', 'w')
        frame_index = -1

    fps = 0.0
    count = 0
    while True:
        img = vid.read()

        if img is None:
            logging.warning("Empty Frame")
            time.sleep(0.1)
            count += 1
            if count < 3:
                continue
            else:
                break

        img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_in = tf.expand_dims(img_in, 0)
        img_in = transform_images(img_in, FLAGS.size)

        t1 = time.time()
        boxes, scores, classes, nums = yolo.predict(img_in)
        classes = classes[0]
        names = []
        for i in range(len(classes)):
            names.append(class_names[int(classes[i])])
        names = np.array(names)
        converted_boxes = convert_boxes(img, boxes[0])
        features = encoder(img, converted_boxes)
        detections = [
            Detection(bbox, score, class_name, feature)
            for bbox, score, class_name, feature in zip(
                converted_boxes, scores[0], names, features)
        ]

        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima suppresion
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes,
                                                    nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            class_name = track.get_class()
            color = colors[int(track.track_id) % len(colors)]
            color = [i * 255 for i in color]
            cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), color, 2)
            cv2.rectangle(img, (int(bbox[0]), int(bbox[1] - 30)),
                          (int(bbox[0]) +
                           (len(class_name) + len(str(track.track_id))) * 17,
                           int(bbox[1])), color, -1)
            cv2.putText(img, class_name + "-" + str(track.track_id),
                        (int(bbox[0]), int(bbox[1] - 10)), 0, 0.75,
                        (255, 255, 255), 2)

        ### UNCOMMENT BELOW IF YOU WANT CONSTANTLY CHANGING YOLO DETECTIONS TO BE SHOWN ON SCREEN
        #for det in detections:
        #    bbox = det.to_tlbr()
        #    cv2.rectangle(img,(int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(255,0,0), 2)

        # print fps on screen
        fps = (fps + (1. / (time.time() - t1))) / 2
        cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30),
                    cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
        cv2.imshow('output', img)
        if FLAGS.output:
            out.write(img)
            frame_index = frame_index + 1
            list_file.write(str(frame_index) + ' ')
            if len(converted_boxes) != 0:
                for i in range(0, len(converted_boxes)):
                    list_file.write(
                        str(converted_boxes[i][0]) + ' ' +
                        str(converted_boxes[i][1]) + ' ' +
                        str(converted_boxes[i][2]) + ' ' +
                        str(converted_boxes[i][3]) + ' ')
            list_file.write('\n')

        # press q to quit
        if cv2.waitKey(1) == ord('q'):
            break
    vid.release()
    if FLAGS.ouput:
        out.release()
        list_file.close()
    cv2.destroyAllWindows()
コード例 #22
0
def main(yolo):

    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0

    # deep_sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    writeVideo_flag = True

    video_capture = cv2.VideoCapture('top_view1.avi')
    video_capture_1 = cv2.VideoCapture('demo1.avi')

    # if writeVideo_flag:
    #     # Define the codec and create VideoWriter object
    #     w = int(video_capture.get(3))
    #     h = int(video_capture.get(4))
    #     fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    #     out = cv2.VideoWriter('output1.avi', fourcc, 15, (w, h))
    #     list_file = open('detection.txt', 'w')
    #     frame_index = -1

    fps = 0.0
    fig = plt.figure()
    fig1 = plt.figure()
    count = 0
    count1 = 0
    x_list = []
    y_list = []
    x_list1 = []
    y_list1 = []
    # ax1 = fig.add_subplot(1, 1, 1)
    while True:
        ret, frame = video_capture.read()  # frame shape 640*480*3
        ret1, frame1 = video_capture_1.read()  # frame shape 640*480*3
        # if ret == True:
        #     print(' VIDEO FOUND')
        #  t1 = time.time()

        # image = Image.fromarray(frame)
        image = Image.fromarray(frame[..., ::-1])  # bgr to rgb
        image1 = Image.fromarray(frame1[..., ::-1])  # bgr to rgb
        boxs = yolo.detect_image(image)
        boxs1 = yolo.detect_image(image1)
        print("box_co-ordinate = ", (boxs))
        print("box_co-ordinate = ", (boxs1))
        features = encoder(frame, boxs)
        features1 = encoder(frame1, boxs1)

        # score to 1.0 here).
        detections = [
            Detection(bbox, 1.0, feature)
            for bbox, feature in zip(boxs, features)
        ]
        detections1 = [
            Detection1(bbox1, 1.0, feature1)
            for bbox1, feature1 in zip(boxs1, features1)
        ]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        boxes1 = np.array([d.tlwh for d in detections1])
        scores1 = np.array([d.confidence for d in detections1])
        indices1 = preprocessing.non_max_suppression(boxes1, nms_max_overlap,
                                                     scores1)
        detections1 = [detections1[i] for i in indices1]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)
        # tracker1.predict()
        # tracker1.update(detections1)

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
            cv2.putText(frame, str(track.track_id),
                        (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200,
                        (0, 255, 0), 2)

        # for track1 in tracker1.tracks:
        #     if not track1.is_confirmed() or track1.time_since_update > 1:
        #         continue
        #     bbox1 = track1.to_tlbr()
        #     cv2.rectangle(frame1, (int(bbox1[0]), int(bbox1[1])), (int(bbox1[2]), int(bbox1[3])), (255, 255, 255), 2)
        #     cv2.putText(frame1, str(track1.track_id), (int(bbox1[0]), int(bbox1[1])), 0, 5e-3 * 200, (0, 255, 0), 2)

        for det in detections:

            bbox = det.to_tlbr()

            # print((type(bbox)))
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)
            # print("The co-ordinates are:", int(bbox[0]), int(bbox[1]))
        # for det1 in detections1:

        #     bbox1 = det1.to_tlbr()

        #     # print((type(bbox)))
        #     cv2.rectangle(frame1, (int(bbox1[0]), int(bbox1[1])), (int(bbox1[2]), int(bbox1[3])), (255, 0, 0), 2)
        #     # print("The co-ordinates are:", int(bbox[0]), int(bbox[1]))

        try:

            for i in boxs:
                x = (i[0] + i[2]) / 2
                y = (i[1] + i[3]) / 2
                count += 1
                x_list.append(x)
                y_list.append(y)
                if count == 1:
                    points = plt.scatter(x_list, y_list)
                elif count > 1:
                    print('x:', x_list, 'y:', y_list)
                    points.remove()
                    points = plt.scatter(x_list, y_list)
                    # plt.pause(0.9)
            x_list.clear()
            y_list.clear()
        except:
            continue

        try:

            for i in boxs1:
                x = (i[0] + i[2]) / 2
                y = (i[1] + i[3]) / 2
                count1 += 1
                x_list1.append(x)
                y_list1.append(y)
                if count1 == 1:
                    points = plt.scatter(x_list1, y_list1)
                elif count1 > 1:
                    print('x:', x_list1, 'y:', y_list1)
                    points.remove()
                    points = plt.scatter(x_list1, y_list1)
                    # plt.pause(0.9)
            x_list1.clear()
            y_list1.clear()

        except:
            continue

        #     # redraw the canvas
        fig.canvas.draw()
        fig1.canvas.draw()

        # convert canvas to image
        img = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep='')
        img = img.reshape(fig.canvas.get_width_height()[::-1] + (3, ))

        # img is rgb, convert to opencv's default bgr
        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

        # for second frame

        img1 = np.fromstring(fig1.canvas.tostring_rgb(),
                             dtype=np.uint8,
                             sep='')
        img1 = img1.reshape(fig1.canvas.get_width_height()[::-1] + (3, ))

        # img is rgb, convert to opencv's default bgr
        img1 = cv2.cvtColor(img1, cv2.COLOR_RGB2BGR)
        # display image with opencv or any operation you like
        cv2.imshow("plot", img)

        cv2.imshow('frame', frame)

        cv2.imshow("plot2", img1)

        cv2.imshow('frame1', frame1)

        # if writeVideo_flag:
        #     # save a frame
        #     out.write(frame)
        #     frame_index = frame_index + 1
        #     list_file.write(str(frame_index) + ' ')
        #     if len(boxs) != 0:
        #         for i in range(0, len(boxs)):
        #             list_file.write(str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' + str(boxs[i][2]) + ' ' + str(boxs[i][3]) + ' ')
        #     list_file.write('\n')

        # fps = (fps + (1. / (time.time() - t1))) / 2
        # print("fps= %f" % (fps))

        # # Press Q to stop!
        if cv2.waitKey(5) & 0xFF == ord('q'):
            break

    video_capture.release()
    # if writeVideo_flag:
    #     out.release()
    #     list_file.close()
    cv2.destroyAllWindows()
コード例 #23
0
ファイル: receive.py プロジェクト: caoabc/Test
from deep_sort import nn_matching
from deep_sort.detection import Detection
from deep_sort.tracker import Tracker
from tools import generate_detections as gdet
from deep_sort.detection import Detection as ddet
warnings.filterwarnings('ignore')

max_cosine_distance = 0.3
nn_budget = None
nms_max_overlap = 1.0

# deep_sort
model_filename = '/home/nvidia/hello_rospy/src/beginner_tutorials/scripts/model_data/mars-small128.pb'
encoder = gdet.create_box_encoder(model_filename, batch_size=1)
metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
tracker = Tracker(metric)
writeVideo_flag = True

#video_capture = cv2.VideoCapture(0)

if writeVideo_flag:
        # Define the codec and create VideoWriter object
        #w = int(video_capture.get(3))
        #h = int(video_capture.get(4))
        #fourcc = cv2.VideoWriter_fourcc(*'MJPG')
        #out = cv2.VideoWriter('output.avi', fourcc, 15, (w, h))
    list_file = open('detection.txt', 'w')
    frame_index = -1

fps = 0.0
yolo=YOLO()
コード例 #24
0
def run(sequence_dir, detection_file, output_file, min_confidence,
        nms_max_overlap, min_detection_height, max_cosine_distance, nn_budget,
        display):
    """Run multi-target tracker on a particular sequence.

    Parameters
    ----------
    sequence_dir : str
        Path to the MOTChallenge sequence directory.
    detection_file : str
        Path to the detections file.
    output_file : str
        Path to the tracking output file. This file will contain the tracking
        results on completion.
    min_confidence : float
        Detection confidence threshold. Disregard all detections that have
        a confidence lower than this value.
    nms_max_overlap: float
        Maximum detection overlap (non-maxima suppression threshold).
    min_detection_height : int
        Detection height threshold. Disregard all detections that have
        a height lower than this value.
    max_cosine_distance : float
        Gating threshold for cosine distance metric (object appearance).
    nn_budget : Optional[int]
        Maximum size of the appearance descriptor gallery. If None, no budget
        is enforced.
    display : bool
        If True, show visualization of intermediate tracking results.
    IDnum   : int
        Tracking ID_num

    """

    # 프레임 사진 있는 위치, 영상번호 (seq_info["image_filenames"])
    # 프레임번호 (seq_info["image_filenames"][frame_idx])
    # 다 gather_sequence_info에 있음

    if 'y' == input("is there ID [y/n] : "):
        IDnum = int(input("ID_num for tracking : "))  #########
        #range_down = input("ID tracking range_down : ")             #########
        #range_up = input("ID tracking range_up : ")                 #########
    else:
        IDnum = 0

    if 'y' == input("foot display [y/n] : "):  ############# y값 함수 구하고나면 발 위치보기
        foot_dis = True
    else:
        foot_dis = False

    seq_info = gather_sequence_info(sequence_dir, detection_file)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)
    results = []
    target_h = []

    update_ms = seq_info["update_ms"]
    max_frame_idx = seq_info["max_frame_idx"]

    i = 0

    def frame_callback(vis, frame_idx):
        #print("Processing frame %05d" % frame_idx)

        # Load image and generate detections.
        detections = create_detections(seq_info["detections"], frame_idx,
                                       min_detection_height)
        detections = [d for d in detections if d.confidence >= min_confidence]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])  # (x, y, w, h)
        scores = np.array([d.confidence
                           for d in detections])  # Detector confidence score
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        # Update tracker.
        tracker.predict()
        tracker.update(detections)

        # Update visualization.
        if display or foot_dis:
            image = cv2.imread(seq_info["image_filenames"][frame_idx],
                               cv2.IMREAD_COLOR)
            vis.set_image(image.copy())
            if IDnum == 0:
                vis.draw_detections(detections)
                vis.draw_trackers(tracker.tracks)
            else:  #찾는 ID 있을 때
                vis.draw_target_trackers(tracker.tracks, IDnum)
            if foot_dis:  # Tracking 하는 ID만 보여주고 발 표시 !!!!!!!
                vis.draw_foot(tracker.tracks, IDnum)

        # h 저장
        h_file = os.path.dirname(output_file)  # result/text/
        with open(h_file + '/ID_h.txt', 'r') as f_hi:
            line_splits = [
                int(l.split(',')[1]) for l in f_hi.read().splitlines()[1:]
            ]

        # Store results.
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            ############################################################### tracking 대신 bbox만 넣어주고 계산하기
            bbox = track.to_tlwh()

            results.append([
                frame_idx, track.track_id, bbox[0], bbox[1], bbox[2], bbox[3]
            ])
            if int(track.track_id) == int(IDnum):
                #print("find ID-01")
                if bbox[1] + bbox[3] < seq_info["image_size"][0]:
                    target_h.append(
                        [track.track_id, bbox[1] + bbox[3],
                         bbox[3]])  # id의 y값 별 h

                if (
                        frame_idx >= 40
                ):  # start frame 이걸로 설정    ################################
                    # MOT16-02에서 할머니 멈추기 전까지가 260frame
                    endT = 117 * update_ms / 1000
                    # endT = 5
                    vel_py.foot_world(frame_idx, track.track_id, bbox, IDnum,
                                      update_ms, max_frame_idx, endT)
                    # velocity 추정 끝나면 count = 0으로 계산 그만시킴

                # foot 10 보기
                for i in range(10):
                    if int(bbox[1] + bbox[3]) > line_splits[i] - 1 and int(
                            bbox[1] + bbox[3]) < line_splits[i] + 1:
                        print(int(bbox[1] + bbox[3]))
                        vis.draw_foot(tracker.tracks, IDnum)

    # Run tracker.
    if display:
        visualizer = visualization.Visualization(seq_info, update_ms=5)
    else:
        visualizer = visualization.NoVisualization(seq_info)

    ##### 영상 저장 ###########
    #video_output_dir = os.path.curdir + '/result/video'
    #video_filename = os.path.join(video_output_dir, "%s_all_tracking.avi" % (os.path.basename(sequence_dir)))       # video name은 seq_info["sequence_name"]
    #video_filename = os.path.join(video_output_dir, "%s_ID%s_tracking.avi" % (os.path.basename(sequence_dir), IDnum))
    #video_filename = os.path.join(video_output_dir, "%s_ID%s_foot 10.avi" % (os.path.basename(sequence_dir), IDnum))
    #if video_filename is not None:
    #    visualizer.viewer.enable_videowriter(video_filename)
    #########################
    visualizer.run(frame_callback)

    # Store results.
    f = open(output_file, 'w')
    for row in results:
        print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1' %
              (row[0], row[1], row[2], row[3], row[4], row[5]),
              file=f)
        # [frame_idx, track.track_id, bbox[0], bbox[1], bbox[2], bbox[3]]
    f.close()
コード例 #25
0
def get_detect(id_video):
    global control_color
    dq = deque(maxlen=1)

    t_read_video = threading.Thread(target=read_video, args=(dq, ))
    t_put_data = threading.Thread(target=put_data)
    t_read_video.start()
    t_put_data.start()

    # myout = save_video(video_reader, "./video.mp4", sz)
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0
    counter = []
    my_track_dict = {}  #save the info of track_id
    track_smooth_dict = {}  #smooth the imshow
    pts = [deque(maxlen=30) for _ in range(9999)]

    #deep_sort
    model_filename = 'model_data/market1501.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)
    list_file = open('detection_rslt.txt', 'w')

    save_file = mk_dir()
    num = 0
    t1 = time.time()

    while True:
        #avoid the memory error.
        if len(my_track_dict) > 50:
            my_track_dict = {}
        print(len(my_track_dict))

        if dq:
            img = dq.pop()
        else:
            time.sleep(0.05)
            continue

        start_time = time.time()

        num += 1
        if num % 500 == 1:
            cv2.imwrite(save_file + "/_{}.jpg".format(num), img)
        img_h, img_w, img_ch = img.shape
        print(img.shape)
        #2、防止裁剪或推理时把画的框裁剪上
        show_image = img.copy()
        frame = img.copy()

        #the predict of person.
        boxs, confidence, class_names = [], [], []
        out = preson_detect(img)

        #transform the object detection data to input tracter
        for i in range(len(out)):
            #========my_setting==============
            if out[i, 2] > 0.7:
                # print(out[i])
                left = int(out[i, 3] * img_w)
                top = int(out[i, 4] * img_h)
                p_w = int(out[i, 5] * img_w - out[i, 3] * img_w)
                p_h = int(out[i, 6] * img_h - out[i, 4] * img_h)

                right = left + p_w
                bottom = top + p_h

                #detect the person in setting area.
                point1 = [int((left + right) / 2), bottom]
                my_index = inner_point(point1)
                if my_index:
                    boxs.append([left, top, p_w, p_h])
                    class_names.append("person")
                    confidence.append(out[i, 2])

        #start use the tracker
        features = encoder(frame, boxs)
        # score to 1.0 here.
        detections = [
            Detection(bbox, 1.0, feature)
            for bbox, feature in zip(boxs, features)
        ]
        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]
        # Call the tracker
        tracker.predict()
        tracker.update(detections)
        i = int(0)
        indexIDs = []
        #setting detect time
        t2 = time.time()
        detect_time = t2 - t1
        #========my_setting==============
        control_time = 0.2  #detect one time in m second
        if detect_time > control_time:
            t1 = time.time()

        for det, track in zip(detections, tracker.tracks):
            # if not track.is_confirmed() or track.time_since_update > 1:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue

            #print(track.track_id)
            #draw the boxs of object detection.
            pbox = det.to_tlbr()
            #cv2.rectangle(frame,(int(pbox[0]), int(pbox[1])), (int(pbox[2]), int(pbox[3])),(255,255,255), 2)

            my_key = str(int(track.track_id))
            #========my_setting==============
            #if my_key increase or time lt 3s, will be re_detection.
            if my_key not in my_track_dict.keys(
            ) or detect_time > control_time:
                # print(my_key)
                # print(my_track_dict.keys())
                #the code of processing the person box.
                label_dict = get_labels(img, pbox)
                print("**" * 20, label_dict)

                if type(label_dict) == type(None):
                    continue

                if "coat" not in label_dict.keys():
                    continue
                my_track_dict[my_key] = label_dict

            # draw the attr of person.
            frame = draw_person_attr(frame, my_track_dict[my_key], pbox,
                                     control_color)

            indexIDs.append(int(track.track_id))
            counter.append(int(track.track_id))
            bbox = track.to_tlbr()
            color = [int(c) for c in COLORS[indexIDs[i] % len(COLORS)]]

            #define the color of rectangle.
            if my_track_dict[my_key]["coat"] == "Yes":
                color_rect = (0, 255, 0)
            else:
                color_rect = (0, 0, 255)

            #center_loc = [int((bbox[0]+bbox[2])/2), int((bbox[1]+bbox[3])/2)]
            if my_key not in track_smooth_dict.keys():
                print("---------------------------------------------------->")
                cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                              (int(bbox[2]), int(bbox[3])), (color_rect), 3)
                track_smooth_dict[my_key] = bbox
            else:
                fbox = track_smooth_dict[my_key]
                a = int((bbox[0] + fbox[0]) / 2)
                b = int((bbox[1] + fbox[1]) / 2)
                c = int((bbox[2] + fbox[2]) / 2)
                d = int((bbox[3] + fbox[3]) / 2)
                cv2.rectangle(frame, (a, b), (c, d), (color_rect), 3)
                track_smooth_dict[my_key] = bbox

            #draw the boxs of track.
            #cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),(color), 3)
            if True:
                cv2.putText(frame, str(track.track_id),
                            (int(bbox[0]), int(bbox[1] - 50)), 0, 5e-3 * 150,
                            (color), 2)
                if len(class_names) > 0:
                    class_name = class_names[0]
                    cv2.putText(frame, str(class_names[0]),
                                (int(bbox[0]), int(bbox[1] - 20)), 0,
                                5e-3 * 150, (color), 2)
            i += 1

            # 控制上传频率
            if num % 200 == 1:
                my_result = my_track_dict[my_key]
                pic_name = str(int(time.time())) + "_" + my_key
                # put_data(my_key, my_result, frame)
                q_put_img.append([pic_name, my_result, frame])

        count = len(set(counter))
        #draw the gurdline.
        draw_muti(frame)

        # cv2.putText(frame, "Total Pedestrian Counter: "+str(count),(int(20), int(120)),0, 5e-3 * 200, (0,255,0),2)
        # cv2.putText(frame, "Current Pedestrian Counter: "+str(i),(int(20), int(80)),0, 5e-3 * 200, (0,255,0),2)

        end_time = time.time()
        my_one_time = (end_time - start_time) * 1000
        print("====={}=====".format(num), my_one_time)

        frame = cv2.resize(frame, (640, 360))
        ret2, jpeg = cv2.imencode('.jpg', frame)
        yield (b'--frame\r\n'
               b'application/octet-stream: image/jpeg\r\n\r\n' +
               jpeg.tobytes() + b'\r\n\r\n')
コード例 #26
0
def run(video_file, min_confidence,
        nms_max_overlap, min_detection_height, max_cosine_distance,
        nn_budget, display):
    """Run multi-target tracker on a particular sequence.

    Parameters
    ----------
    video_file : str
        Path to the video file.
    min_confidence : float
        Detection confidence threshold. Disregard all detections that have
        a confidence lower than this value.
    nms_max_overlap: float
        Maximum detection overlap (non-maxima suppression threshold).
    min_detection_height : int
        Detection height threshold. Disregard all detections that have
        a height lower than this value.
    max_cosine_distance : float
        Gating threshold for cosine distance metric (object appearance).
    nn_budget : Optional[int]
        Maximum size of the appearance descriptor gallery. If None, no budget
        is enforced.
    display : bool
        If True, show visualization of intermediate tracking results.

    """

    cfg_file = "yolo3/cfg/yolov3.cfg"
    weight_file = "yolo3/yolov3.weights"
    #weight_file = 'yolo3/backup/MOT17Det/yolov3-mot17det_10000.weights'
    use_cuda = 1


    det_model = create_model(cfg_file, weight_file, use_cuda)

    seq_info = gather_video_info(video_file)
    metric = nn_matching.NearestNeighborDistanceMetric(
        "cosine", max_cosine_distance, nn_budget)

    tracker = Tracker(metric)
    # just for warming up
    img = cv2.imread('./000001.jpg')

    sized = cv2.resize(img, (det_model.width, det_model.height))
    sized = cv2.cvtColor(sized, cv2.COLOR_BGR2RGB)
    boxes = do_detect(det_model, sized, 0.5, 0.4, use_cuda)


    def frame_callback(vis, frame_idx):
        #print("Processing frame %05d" % frame_idx)
        #global total_frames, total_time
        ret, img = seq_info['video_cap'].read()
        if not ret:
            print('there is no frame!')
            sys.exit(1)

        #time_0 = time.time()
        # Load image and generate detections.
        detections = create_det_from_model(det_model, img, 0.5, 0.4, min_detection_height, use_cuda)

        #detections = create_detections(
        #    seq_info["detections"], frame_idx, min_detection_height)
        #if seq_info['groundtruth'] is not None:
        #    gts = create_gts(seq_info['groundtruth'], frame_idx)

        #detections = create_detections(
        #    seq_info["detections"], frame_idx, seq_info["image_filenames"][frame_idx], encoder, min_detection_height)


        detections = [d for d in detections if d.confidence >= min_confidence]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(
            boxes, nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # Update tracker.
        tracker.predict()
        tracker.update(detections)
        #time_1 = time.time()
        #total_time += time_1 - time_0
        #total_frames += 1
        # Update visualization.
        if display:
            #image = cv2.imread(
            #    seq_info["image_filenames"][frame_idx], cv2.IMREAD_COLOR)
            #vis.set_image(image.copy())
            vis.set_image(img.copy())
            #vis.draw_detections(detections)
            #vis.draw_detections(gts)
            vis.draw_trackers(tracker.tracks)

        # Store results.
        # NOTE: store from n_init frame(1-based index)


        # for track in tracker.tracks:
        #     # NOTE: the condition is different from that in drawing tracks
        #     if not track.is_confirmed() or track.time_since_update > 1:
        #         continue
        #     # NOTE: store estimated state instead of observation
        #     bbox = track.to_tlwh()
        #     results.append([
        #         frame_idx, track.track_id, bbox[0], bbox[1], bbox[2], bbox[3]])


    # Run tracker.
    if display:
        visualizer = visualization.Visualization(seq_info, update_ms=1)
    else:
        visualizer = visualization.NoVisualization(seq_info)
    visualizer.run(frame_callback)
コード例 #27
0
    def get_keypoints_and_id_from_img_without_normalize(self, img):
        # KP ordering of body parts
        NECK = 1
        R_SHOULDER = 2
        R_ELBOW = 3
        R_WRIST = 4
        L_SHOULDER = 5
        L_ELBOW = 6
        L_WRIST = 7
        MID_HIP = 8
        R_HIP = 9
        R_KNEE = 10
        R_ANKLE = 11
        L_HIP = 12
        L_KNEE = 13
        L_ANKLE = 14

        # Define bodyparts to get the selected keypoints
        BODY_PARTS = [
            NECK, R_SHOULDER, R_ELBOW, R_WRIST, L_SHOULDER, L_ELBOW, L_WRIST,
            MID_HIP, R_HIP, R_KNEE, R_ANKLE, L_HIP, L_KNEE, L_ANKLE
        ]

        # Set tracker
        max_cosine_distance = 0.2
        nn_budget = 100
        metric = nn_matching.NearestNeighborDistanceMetric(
            "cosine", max_cosine_distance, nn_budget)
        tracker = Tracker(metric)

        # Get data points (datum)
        datum = op.Datum()
        datum.cvInputData = img
        self.opWrapper.emplaceAndPop(op.VectorDatum([datum]))

        # Initialize lists
        arr = []
        boxes = []
        list_of_pose_temp = []
        list_of_pose_and_id = []
        try:
            # Get highest and lowest keypoints
            for kp_idx, keypoint in enumerate(datum.poseKeypoints):
                pop_all(arr)
                x_high = 0
                x_low = 9999
                y_high = 0
                y_low = 9999

                for count, x in enumerate(keypoint):
                    # Avoid x=0 and y=0 because some keypoints that are not discovered.
                    # This "if" is to define the LOWEST and HIGHEST discovered keypoint.
                    if x[0] != 0 and x[1] != 0:
                        if x_high < x[0]:
                            x_high = x[0]
                        if x_low > x[0]:
                            x_low = x[0]
                        if y_high < x[1]:
                            y_high = x[1]
                        if y_low > x[1]:
                            y_low = x[1]

                    # Add pose keypoints to a dictionary
                    if count in BODY_PARTS:
                        KP = {'x': x[0], 'y': x[1]}

                        # Append dictionary to array
                        arr.append(KP)

                # Find the highest and lowest position of x and y
                # (Used to draw rectangle)
                if y_high - y_low > x_high - x_low:
                    height = y_high - y_low
                    width = x_high - x_low
                else:
                    height = x_high - x_low
                    width = y_high - y_low

                # Draw rectangle (get width and height)
                y_high = int(y_high + height / 40)
                y_low = int(y_low - height / 12)
                x_high = int(x_high + width / 5)
                x_low = int(x_low - width / 5)

                # # Normalize keypoint
                list_of_pose_temp.append(arr)

                # Make the box
                boxes.append([x_low, y_low, width, height])

                # Encode the features inside the designated box
                features = self.encoder(datum.cvOutputData, boxes)

                # For a non-empty item add to the detection array
                def nonempty(xywh):
                    return xywh[2] != 0 and xywh[3] != 0

                detections = [
                    Detection(bbox, 1.0, feature)
                    for bbox, feature in zip(boxes, features) if nonempty(bbox)
                ]

                # Run non-maxima suppression.
                np_boxes = np.array([d.tlwh for d in detections])
                scores = np.array([d.confidence for d in detections])
                indices = preprocessing.non_max_suppression(
                    np_boxes, self.nms_max_overlap, scores)
                detections = [detections[i] for i in indices]

                # Update tracker.
                tracker.predict()
                tracker.update(detections)

                # Make pose and person ID list
                if kp_idx == len(datum.poseKeypoints) - 1:
                    for track_idx, track in enumerate(tracker.tracks):
                        bbox = track.to_tlwh()
                        list_of_pose_and_id.append({
                            "Keypoints":
                            list_of_pose_temp[track_idx],
                            "ID":
                            track.track_id
                        })

            return list_of_pose_and_id
        except Exception as e:
            print(end="")
コード例 #28
0
def main(yolo):

    # Definition of the parameters
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0

    # deep_sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)

    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    writeVideo_flag = False

    os.environ["OPENCV_FFMPEG_CAPTURE_OPTIONS"] = "rtsp_transport;udp"
    video_capture = cv2.VideoCapture("rtsp://192.168.4.103:8080/h264.sdp",
                                     cv2.CAP_FFMPEG)

    if writeVideo_flag:
        # Define the codec and create VideoWriter object
        w = int(video_capture.get(3))
        h = int(video_capture.get(4))
        fourcc = cv2.VideoWriter_fourcc(*'MJPG')
        out = cv2.VideoWriter('output.avi', fourcc, 15, (w, h))
        list_file = open('detection.txt', 'w')
        frame_index = -1

    fps = 0.0
    while True:
        ret, frame = video_capture.read()  # frame shape 640*480*3
        frame = cv2.resize(frame, (640, 360))
        if ret != True:
            break
        t1 = time.time()

        # image = Image.fromarray(frame)
        image = Image.fromarray(frame[..., ::-1])  #bgr to rgb
        boxs = yolo.detect_image(image)
        # print("box_num",len(boxs))
        features = encoder(frame, boxs)

        # score to 1.0 here).
        detections = [
            Detection(bbox, 1.0, feature)
            for bbox, feature in zip(boxs, features)
        ]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)
        detections = [detections[i] for i in indices]

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            bbox = track.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
            cv2.putText(frame, str(track.track_id),
                        (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200,
                        (0, 255, 0), 2)

        for det in detections:
            bbox = det.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])),
                          (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)

        cv2.imshow('', frame)

        if writeVideo_flag:
            # save a frame
            out.write(frame)
            frame_index = frame_index + 1
            list_file.write(str(frame_index) + ' ')
            if len(boxs) != 0:
                for i in range(0, len(boxs)):
                    list_file.write(
                        str(boxs[i][0]) + ' ' + str(boxs[i][1]) + ' ' +
                        str(boxs[i][2]) + ' ' + str(boxs[i][3]) + ' ')
            list_file.write('\n')

        fps = (fps + (1. / (time.time() - t1))) / 2
        print("fps= %f" % (fps))

        # Press Q to stop!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    video_capture.release()
    if writeVideo_flag:
        out.release()
        list_file.close()
    cv2.destroyAllWindows()
コード例 #29
0
        if cv2.__version__.split(".")[0] == "2":
          frame_count = vcap.get(cv2.cv.CV_CAP_PROP_FRAME_COUNT)
        else:
          # opencv 3/4
          frame_count = vcap.get(cv2.CAP_PROP_FRAME_COUNT)

      # initialize tracking module
      if args.get_tracking:
        tracking_objs = args.tracking_objs.split(",")
        tracker_dict = {}
        tracking_results_dict = {}
        tmp_tracking_results_dict = {}
        for tracking_obj in tracking_objs:
          metric = metric = nn_matching.NearestNeighborDistanceMetric(
              "cosine", args.max_cosine_distance, args.nn_budget)
          tracker_dict[tracking_obj] = Tracker(
              metric, max_iou_distance=args.max_iou_distance)
          tracking_results_dict[tracking_obj] = []
          tmp_tracking_results_dict[tracking_obj] = {}

      # videoname = os.path.splitext(os.path.basename(videofile))[0]
      videoname = os.path.basename(videofile)
      video_obj_out_path = None
      if args.obj_out_dir is not None:  # not saving box json to save time
        video_obj_out_path = os.path.join(args.obj_out_dir, videoname)
        if not os.path.exists(video_obj_out_path):
          os.makedirs(video_obj_out_path)

      # 3. read frame one by one
      cur_frame = 0
      vis_count = 0
      frame_stack = []
コード例 #30
0
def main():
    start = time.time()
    first = start
    #Definition of the parameters
    max_cosine_distance = 0.5#0.9 余弦距离的控制阈值
    nn_budget = None
    nms_max_overlap = 0.3 #非极大抑制的阈值

    counter = []

    #deep_sort
    model_filename = 'model_data/market1501.pb'
    encoder = gdet.create_box_encoder(model_filename,batch_size=1)

    find_objects = ['person', 'fire_extinguisher', 'fireplug', 'car', 'bicycle', 'motorcycle']
    yolo = YOLO()

    for cnt in range(1, 2):
        video_path = "./t1_video/t1_video_%05d" % cnt
        images = os.listdir(video_path)
        images.sort()
        print(images[0])
        trackers = []
        counters = []
        for idx in range(0, 6):
            metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
            trackers.append(Tracker(metric))
            count = []
            counters.append(count)
        tracker_time = 0
        yolo_time = 0
        for image_path in images:
            # image_path = video_path + "/t1_video_%05d_%05d.jpg" % (1, fc)
            t1 = time.time()
            # print(video_path + "/" + image_path)
            frame = cv2.imread(video_path + "/" + image_path)
            image = Image.fromarray(frame[..., ::-1])  # bgr to rgb

            yolo_start = time.time()
            yolo_dict = yolo.detect_image(image)
            yolo_end = time.time()
            yolo_time += (yolo_end - yolo_start)

            for idx in range(0, 6):
                # print(idx)
                tracker = trackers[idx]
                counter = counters[idx]

                boxs = yolo_dict.get(find_objects[idx])
                if boxs == None:
                    continue

                features = encoder(frame, boxs)
                # score to 1.0 here).
                detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features)]
                # Run non-maxima suppression.
                boxes = np.array([d.tlwh for d in detections])
                scores = np.array([d.confidence for d in detections])
                indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores)
                detections = [detections[i] for i in indices]

                # Call the tracker
                t_start = time.time()
                tracker.predict()
                tracker.update(detections)
                t_end = time.time()
                tracker_time += (t_end - t_start)

                for track in tracker.tracks:
                    if not track.is_confirmed() or track.time_since_update > 1:
                        continue
                    #boxes.append([track[0], track[1], track[2], track[3]])

                    counter.append(int(track.track_id))
        #######################################
        num_person = len(set(counters[0]))
        num_fire_extinguisher = len(set(counters[1]))
        num_fireplug = len(set(counters[2]))
        num_car = len(set(counters[3]))
        num_bicycle = len(set(counters[4]))
        num_motocycle = len(set(counters[5]))
        ress.append(present_result(cnt, num_person,
                            num_fire_extinguisher,
							num_fireplug,
							num_car,
							num_bicycle,
							num_motocycle))

        t1_res_cai["track1_results"] = ress
        with open('t1_res_cai.json', 'w') as make_file:
            json.dump(t1_res_cai, make_file, ensure_ascii=False, indent=4)
		#######################################


        for idx in range(0, 6):
            print(len(set(counters[idx])), end=" ")
        end = time.time()
        print(str(':: total:%.2f yolo:%.2f tracker:%.2f' % ((end - start),yolo_time, tracker_time)))
        start = end
    last = time.time()
    print(str(':: %.2f' % (last - first)))