def run_video_detection(scoreThreshold): classes = {} inputs = tf.placeholder(tf.float32, [None, 256, 256, 3]) model = nets.YOLOv3COCO(inputs, nets.Darknet19) with tf.Session() as sess: sess.run(model.pretrained()) cap = cv2.VideoCapture(0) while (cap.isOpened()): classes[currentIndicesDectecting] = currentClassDetecting list_of_classes = [currentIndicesDectecting] ret, frame = cap.read() img = cv2.resize(frame, (256, 256)) imge = np.array(img).reshape(-1, 256, 256, 3) start_time = time.time() preds = sess.run(model.preds, {inputs: model.preprocess(imge)}) boxes = model.get_boxes(preds, imge.shape[1:3]) cv2.namedWindow('Live Camera', cv2.WINDOW_NORMAL) cv2.resizeWindow('Live Camera', 500, 500) boxes1 = np.array(boxes) for j in list_of_classes: count = 0 if j in classes: lab = classes[j] else: lab = 'background' if len(boxes1) != 0: for i in range(len(boxes1[j])): box = boxes1[j][i] if boxes1[j][i][4] >= .40: count += 1 obj = lab cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 1) cv2.putText(img, lab, (box[0], box[1]), cv2.FONT_HERSHEY_SIMPLEX, .5, (0, 0, 255), lineType=cv2.LINE_AA) global coun if coun == 1 and lab == currentClassDetecting: engine.say( str(currentClassDetecting) + "FOUND") engine.runAndWait() engine.stop() coun = 2 cv2.imshow("Live Camera", img) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
def check_frame(frame): tf.disable_v2_behavior() inputs = tf.placeholder(tf.float32, [None, 416, 416, 3]) model = nets.YOLOv3COCO(inputs, nets.Darknet19) classes = { '0': 'person' } # '1':'bicycle','2':'car','3':'bike','5':'bus','7':'truck' list_of_classes = [0, 1, 2, 3, 5, 7] with tf.Session() as sess: sess.run(model.pretrained()) print('Checking Frame') img = cv2.resize(frame, (416, 416)) imge = np.array(img).reshape(-1, 416, 416, 3) start_time = time.time() preds = sess.run(model.preds, {inputs: model.preprocess(imge)}) boxes = model.get_boxes(preds, imge.shape[1:3]) boxes1 = np.array(boxes) ret = {} for j in list_of_classes: count = 0 if str(j) in classes: lab = classes[str(j)] if len(boxes1) != 0: for i in range(len(boxes1[j])): box = boxes1[j][i] if boxes1[j][i][4] >= .40: count += 1 cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 1) ret[lab] = count return ret
def __init__(self): self.url1 = "black.mp4" self.inputs = tf.placeholder(tf.float32, [None, 416, 416, 3]) self.model = nets.YOLOv3COCO(self.inputs, nets.Darknet19) # model = nets.YOLOv2(inputs, nets.Darknet19) # frame=cv2.imread("D://pyworks//yolo//truck.jpg",1) self.count = 0 self.classes = { '0': 'person', '1': 'bicycle', '2': 'car', '3': 'bike', '5': 'bus', '7': 'truck', '8': 'chair' } self.list_of_classes = [0, 1, 2, 3, 5, 7, 8]
def yolo(self): self.__inputs = tf.placeholder(tf.float32, [None, 416, 416, 3]) self.__model = nets.YOLOv3COCO(self.__inputs, nets.Darknet19) self.__classes = { '0': 'person', '1': 'bicycle', '2': 'car', '3': 'bike', '5': 'bus', '7': 'truck' } self.__list_of_classes = [0, 1, 2, 3, 5, 7] # to display other detected objects, # change the classes and list of classes to their respective COCO # indices available in the website. # Here 0th index is for people and 1 for bicycle and so on. # If you want to detect all the classes, add the indices to this list try: self.__sess = tf.Session() self.__sess.run(self.__model.pretrained()) except Exception as e: print(e)
import tensorflow.compat.v1 as tf tf.disable_v2_behavior() import tensornets as nets import cv2 import numpy as np from os import listdir from os.path import isfile, join import time mypath = 'input' inputs = tf.placeholder(tf.float32, [None, 416, 416, 3]) model = nets.YOLOv3COCO(inputs, nets.Darknet19) classes={'0':'person'} list_of_classes=[0] with tf.Session() as sess: sess.run(model.pretrained()) files = [f for f in listdir(mypath) if isfile(join(mypath, f))] #change the path to your directory or to '0' for webcam!ssize.empty() in function 'resize' print(files) for file in files: try: frame = cv2.imread('input/%s' % file) except: continue
def countVehicles(param): # param -> path of the video # list -> number of vehicles will be written in the list # index ->Index at which data has to be written tf.disable_v2_behavior() # Image size must be '416x416' as YoloV3 network expects that specific image size as input img_size = 416 inputs = tf.placeholder(tf.float32, [None, img_size, img_size, 3]) model = nets.YOLOv3COCO(inputs, nets.Darknet19) ct = CentroidTracker( maxDisappeared=5, maxDistance=50 ) # Look into 'CentroidTracker' for further info about parameters trackers = [] # List of all dlib trackers trackableObjects = { } # Dictionary of trackable objects containing object's ID and its' corresponding centroid/s skip_frames = 10 # Numbers of frames to skip from detecting confidence_level = 0.40 # The confidence level of a detection total = 0 # Total number of detected objects from classes of interest use_original_video_size_as_output_size = True # Shows original video as output and not the 416x416 image that is used as yolov3 input (NOTE: Detection still happens with 416x416 img size but the output is displayed in original video size if this parameter is True) video_path = os.getcwd() + param # "/videos/4.mp4" video_name = os.path.basename(video_path) # print("Loading video {video_path}...".format(video_path=video_path)) if not os.path.exists(video_path): print("File does not exist. Exited.") exit() # YoloV3 detects 80 classes represented below all_classes = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", \ "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", \ "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", \ "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", \ "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", \ "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", \ "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", \ "chair", "sofa", "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse", \ "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", \ "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"] # Classes of interest (with their corresponding indexes for easier looping) classes = {1: 'bicycle', 2: 'car', 3: 'motorbike', 5: 'bus', 7: 'truck'} with tf.Session() as sess: sess.run(model.pretrained()) cap = cv2.VideoCapture(video_path) # Get video size (just for log purposes) width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) # Scale used for output window size and net size width_scale = 1 height_scale = 1 if use_original_video_size_as_output_size: width_scale = width / img_size height_scale = height / img_size def drawRectangleCV2(img, pt1, pt2, color, thickness, width_scale=width_scale, height_scale=height_scale): point1 = (int(pt1[0] * width_scale), int(pt1[1] * height_scale)) point2 = (int(pt2[0] * width_scale), int(pt2[1] * height_scale)) return cv2.rectangle(img, point1, point2, color, thickness) def drawTextCV2(img, text, pt, font, font_scale, color, lineType, width_scale=width_scale, height_scale=height_scale): pt = (int(pt[0] * width_scale), int(pt[1] * height_scale)) cv2.putText(img, text, pt, font, font_scale, color, lineType) def drawCircleCV2(img, center, radius, color, thickness, width_scale=width_scale, height_scale=height_scale): center = (int(center[0] * width_scale), int(center[1] * height_scale)) cv2.circle(img, center, radius, color, thickness) # Python 3.5.6 does not support f-strings (next line will generate syntax error) #print(f"Loaded {video_path}. Width: {width}, Height: {height}") # print("Loaded {video_path}. Width: {width}, Height: {height}".format(video_path=video_path, width=width, height=height)) skipped_frames_counter = 0 while (cap.isOpened()): try: ret, frame = cap.read() img = cv2.resize(frame, (img_size, img_size)) except: print(total_str) output_img = frame if use_original_video_size_as_output_size else img tracker_rects = [] if skipped_frames_counter == skip_frames: # Detecting happens after number of frames have passes specified by 'skip_frames' variable value # print("[DETECTING]") trackers = [] skipped_frames_counter = 0 # reset counter np_img = np.array(img).reshape(-1, img_size, img_size, 3) start_time = time.time() predictions = sess.run(model.preds, {inputs: model.preprocess(np_img)}) # print("Detection took %s seconds" % (time.time() - start_time)) # model.get_boxes returns a 80 element array containing information about detected classes # each element contains a list of detected boxes, confidence level ... detections = model.get_boxes(predictions, np_img.shape[1:3]) np_detections = np.array(detections) # Loop only through classes we are interested in for class_index in classes.keys(): local_count = 0 class_name = classes[class_index] # Loop through detected infos of a class we are interested in for i in range(len(np_detections[class_index])): box = np_detections[class_index][i] if np_detections[class_index][i][4] >= confidence_level: # print("Detected ", class_name, " with confidence of ", np_detections[class_index][i][4]) local_count += 1 startX, startY, endX, endY = box[0], box[1], box[ 2], box[3] drawRectangleCV2(output_img, (startX, startY), (endX, endY), (0, 255, 0), 1) drawTextCV2(output_img, class_name, (startX, startY), cv2.FONT_HERSHEY_SIMPLEX, .5, (0, 0, 255), 1) # Construct a dlib rectangle object from the bounding box coordinates and then start the dlib correlation tracker = dlib.correlation_tracker() rect = dlib.rectangle(int(startX), int(startY), int(endX), int(endY)) tracker.start_track(img, rect) # Add the tracker to our list of trackers so we can utilize it during skip frames trackers.append(tracker) # Write the total number of detected objects for a given class on this frame # print(class_name," : ", local_count) else: # If detection is not happening then track previously detected objects (if any) # print("[TRACKING]") skipped_frames_counter += 1 # Increase the number frames for which we did not use detection # Loop through tracker, update each of them and display their rectangle for tracker in trackers: tracker.update(img) pos = tracker.get_position() # Unpack the position object startX = int(pos.left()) startY = int(pos.top()) endX = int(pos.right()) endY = int(pos.bottom()) # Add the bounding box coordinates to the tracking rectangles list tracker_rects.append((startX, startY, endX, endY)) # Draw tracking rectangles drawRectangleCV2(output_img, (startX, startY), (endX, endY), (255, 0, 0), 1) # Use the centroid tracker to associate the (1) old object centroids with (2) the newly computed object centroids objects = ct.update(tracker_rects) # Loop over the tracked objects for (objectID, centroid) in objects.items(): # Check to see if a trackable object exists for the current object ID to = trackableObjects.get(objectID, None) if to is None: # If there is no existing trackable object, create one to = TrackableObject(objectID, centroid) else: to.centroids.append(centroid) # If the object has not been counted, count it and mark it as counted if not to.counted: total += 1 to.counted = True # Store the trackable object in our dictionary trackableObjects[objectID] = to # Draw both the ID of the object and the centroid of the object on the output frame object_id = "ID {}".format(objectID) drawTextCV2(output_img, object_id, (centroid[0] - 10, centroid[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1) drawCircleCV2(output_img, (centroid[0], centroid[1]), 2, (0, 255, 0), -1) # Display the total count so far total_str = str(total) drawTextCV2(output_img, total_str, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2) # Display the current frame (with all annotations drawn up to this point) cv2.imshow(video_name, output_img) key = cv2.waitKey(1) & 0xFF if key == ord('q'): # QUIT (exits) break elif key == ord('p'): cv2.waitKey(0) # PAUSE (Enter any key to continue) cap.release() cv2.destroyAllWindows() print("Exited") """
def __init__(self): self.inputs = tf.placeholder(tf.float32, [None, 416, 416, 3]) self.model = nets.YOLOv3COCO(self.inputs) self.person_class = 0 self.in_size = (416, 416)
def deeplearning(cond,turn): print('deeplearning start') inputs = tf.placeholder(tf.float32, [None, 416, 416, 3]) model = nets.YOLOv3COCO(inputs, nets.Darknet19) # model = nets.YOLOv2(inputs, nets.Darknet19) # frame=cv2.imread("D://pyworks//yolo//truck.jpg",1) # classes={'0':'person','1':'bicycle','2':'car','3':'bike','5':'bus','7':'truck'} classes = {'0': 'person'} # list_of_classes=[0,1,2,3,5,7] list_of_classes = [0] try: with tf.Session() as sess: cond.acquire() ### mutex_lock sess.run(model.pretrained()) # "D://pyworks//yolo//videoplayback.mp4" while turn.myTurn != 1: cond.wait() #추가해준거임 내 번호가 아닐경우 대기 # cap=cv2.imread('test.jpg',cv2.IMREAD_COLOR) cap = cv2.imread('image.JPG', cv2.IMREAD_COLOR) img = cv2.resize(cap, (647, 647), cv2.INTER_AREA) image = np.array(img).reshape(-1, 647, 647, 3) start_time = time.time() preds = sess.run(model.preds, {inputs: model.preprocess(image)}) print(time.time() - start_time) boxes = model.get_boxes(preds, image.shape[1:3]) cv2.namedWindow('image', cv2.WINDOW_NORMAL) cv2.resizeWindow('image', 700, 700) # print("--- %s seconds ---" % (time.time() - start_time)) boxes1 = np.array(boxes) for j in list_of_classes: count = 0 if str(j) in classes: lab = classes[str(j)] if len(boxes1) != 0: for i in range(len(boxes1[j])): box = boxes1[j][i] if boxes1[j][i][4] >= .40: count += 1 cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 1) cv2.putText(img, lab, (box[0], box[1]), cv2.FONT_HERSHEY_SIMPLEX, .5, (0, 0, 255), lineType=cv2.LINE_AA) print(lab, ": ", count) ref3.set({'people_number': count}) ref1.set({'people_number': random.randint(0, 10)}) ref2.set({'people_number': random.randint(0, 10)}) ref4.set({'people_number': random.randint(0, 10)}) cv2.imwrite("result_image.png", img) fileUpload("result_image.png") print("save end") cv2.destroyAllWindows() turn.myTurn = 0 cond.notifyAll() # notify to all consumers cond.release() ### mutex_unlock print('deeplearning end') except Exception as e: print(str(e))
def main(stream, photo): with tf.compat.v1.Session() as sess: # Get YOLOV3 model from tensornet and store it. inputs = tf.compat.v1.placeholder(tf.float32, [None, 416, 416, 3]) model = nets.YOLOv3COCO(inputs, nets.Darknet19) # Choose the class to identify. I'm only need the person class to count people. classes = {'0' : 'person'} indexOfClasses = [0] # Train the model. sess.run(model.pretrained()) # Launch webcam to capture the image from the camera. 0 is for the webcam. webcamStream = cv.VideoCapture(photo) # Start main loop for the webcam stream. while(webcamStream.isOpened()): # Get the image of the stream. ret, frame = webcamStream.read() # Create new image based on the stream shape. img = cv.resize(frame, (416, 416)) imgTmp = np.array(img) imgTmp = np.reshape(imgTmp, (-1, 416, 416, 3)) # Run model with the image of the stream for classification and identifying people. preds = sess.run(model.preds, {inputs: model.preprocess(imgTmp)}) # Create boxes for people in the image. boxes = model.get_boxes(preds, imgTmp.shape[1:3]) # Create a windows to display the image. cv.namedWindow('image', cv.WINDOW_NORMAL) cv.resizeWindow('image', 500, 500) # Loop to create the boxes for people found in image if in it and draw the boxe around. # Also display the number of classes identify. If 2 people on camera should display 'Number of person: 2'. boxes1 = np.array(boxes) for classe in indexOfClasses: count = 0 label = classes[str(classe)] if len(boxes1) != 0: for index in range(len(boxes1[classe])): box = boxes1[classe][index] if boxes1[classe][index][4] >= .40: count += 1 cv.rectangle(img, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 3) cv.putText(img, label, (box[0], box[1]), cv.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255), lineType=cv.LINE_AA) print('Number of person: ', count) # Display the image. cv.imshow("image", img) # If you give an image as input, show the image with the result for 5 secondes and stop the program. if photo != 0: cv.waitKey(5000) webcamStream.release() cv.destroyAllWindows() break # If you're in a real-time stream and you press 'q', this will quit stop the program. if stream == True: if cv.waitKey(1) & 0xFF == ord('q'): webcamStream.release() cv.destroyAllWindows() break
def main(args=None): data_dir_path = '/media/venkatesh/HDD_2/data/AI_city/AIC20_track1/Dataset_A' csv_val = 'data/val.csv' csv_classes = 'data/class_list.csv' model = 'model_files/model_final.pt' video_file_info = '/home/venkatesh/Desktop/AIC20_track1/Dataset_A/list_video_id.txt' trainedWidth = 416 trainedHeight = 416 Output_Folder_Path = 'Results' video_path = 'input/cam_1.mp4' roi_path = '/media/venkatesh/HDD_2/data/AI_city/AIC20_track1/ROIs/cam_1.txt' classes = { '0': 'person', '1': 'bicycle', '2': 'car', '3': 'bike', '5': 'bus', '7': 'truck' } # valid_class = cfg.class_labels valid_class = [label.lower() for label in cfg.class_labels] list_of_classes = [2, 7] video_file_list = read_video_info( video_file_info) #glob.glob(os.path.join(data_dir_path, '*.mp4')) # # Save_Output_Frame = os.path.join(Output_Folder_Path) if not os.path.exists(Output_Folder_Path): os.makedirs(Output_Folder_Path) detection_log_dir = os.path.join(Output_Folder_Path, 'detection') if not os.path.exists(detection_log_dir): os.makedirs(detection_log_dir) # dataset_val = CSVDataset(data_dir = data_dir_path, train_file=csv_val, class_list=csv_classes, # transform=transforms.Compose([Normalizer(), Resizer()])) # # sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) # dataloader_val = DataLoader(dataset_val, num_workers=1, collate_fn=collater, batch_sampler=sampler_val) inputs = tf.placeholder(tf.float32, [None, trainedWidth, trainedHeight, 3]) model = nets.YOLOv3COCO(inputs, nets.Darknet19) Threshold_obj_score = 0.1 ComputationTime_Filename = 'detection_computation_time.txt' ComputationTimeFile = open( os.path.join(detection_log_dir, ComputationTime_Filename), "w+") ComputationTimeFile.write('filename total_time_msec avg_time_msec\n') ComputationTimeFile.close() with tf.Session() as sess: sess.run(model.pretrained()) for video_file in video_file_list: video_path = os.path.join(data_dir_path, video_file) det_filename = os.path.splitext( os.path.basename(video_file))[0] + '.txt' DectionFile = open(os.path.join(detection_log_dir, det_filename), "w+") ComputationTimeFile = open( os.path.join(detection_log_dir, ComputationTime_Filename), "a+") video_reader = cv2.VideoCapture(video_path) num_files = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT)) with tqdm(total=num_files, file=sys.stdout) as pbar: fCount = 0 DP_computation_Time = [] for count in range(num_files): _, image = video_reader.read() cam_image_h, cam_image_w, _ = image.shape org_img = image.copy() image_h, image_w, _ = org_img.shape scale_w = cam_image_w / trainedWidth scale_h = cam_image_h / trainedHeight img = cv2.resize(image, (trainedWidth, trainedHeight)) _data = np.array(img).reshape(-1, trainedWidth, trainedHeight, 3) DP_time_start = time.clock() # Detection module call preds = sess.run(model.preds, {inputs: model.preprocess(_data)}) boxes = model.get_boxes(preds, _data.shape[1:3]) DP_time_elapsed = (time.clock() - DP_time_start) DP_computation_Time.append(DP_time_elapsed) temp_boxes = np.array(boxes) obj_bbox = [] obj_class = [] for obj_class_id in list_of_classes: if str(obj_class_id) in classes: label_name = classes[str(obj_class_id)] if len(temp_boxes) != 0: for i in range(len(temp_boxes[obj_class_id])): bbox = temp_boxes[obj_class_id][i] obj_confidence_score = bbox[4] if obj_confidence_score >= Threshold_obj_score: x1 = int(bbox[0] * scale_w) y1 = int(bbox[1] * scale_h) x2 = int(bbox[2] * scale_w) y2 = int(bbox[3] * scale_h) obj_bbox.append([x1, y1, x2, y2]) # label_name = labels[int(classification[idxs[0][j]])] obj_index = valid_class.index(label_name) obj_class.append(obj_index) DectionFile.write( '{} {} {} {} {} {} {:0.4f}\n'.format( fCount + 1, obj_index, x1, y1, x2, y2, obj_confidence_score)) if cv2.waitKey(25) | 0xFF == ord('q'): cv2.destroyAllWindows() break fCount += 1 pbar.set_description('processed: %d' % (fCount)) pbar.update(1) ComputationTimeFile.write('{} {:0.2f} {:0.2f}\n'.format( os.path.basename(video_file), np.sum(DP_computation_Time) * 1000, (np.mean(DP_computation_Time)) * 1000)) DectionFile.close() ComputationTimeFile.close()