def detect_emotion_in_frame(): global ct, er detected_emotions_on_faces = OrderedDict() request_data = request.get_json() if request_data["clearGlobals"]: print("reset signal sent") ct = CentroidTracker(max_dissapeared=10) er = EmotionRecognition() gc.collect() try: frame = cv2.imdecode(np.frombuffer(base64.b64decode( request_data["capturedFrame"][23:]), np.uint8), cv2.IMREAD_COLOR) except: return jsonify({"error": "error"}) rects = er.detect_faces(frame) objects, dissapeared = ct.track(rects) frame, detected_emotions_on_faces = er.detect_emotions(objects) flag, encoded_img = cv2.imencode(".jpg", frame) return jsonify({"frame": base64.b64encode(encoded_img).decode("utf-8"), "detectedEmotionsOnFaces": list(detected_emotions_on_faces.values()), "dissapearedFaces": list({"id": d[0], "counter": d[1]} for d in dissapeared.items())})
def track(video_name): ct = CentroidTracker() (H, W) = (None, None) net = cv2.dnn.readNetFromCaffe("deploy.prototxt", "res10_300x300_ssd_iter_140000.caffemodel") print("[INFO] starting video stream...") stream = cv2.VideoCapture(video_name) fps = FPS().start() while True: (grabbed, frame) = stream.read() if not grabbed: break # frame = imutils.resize(frame, width=400) if W is None or H is None: (H, W) = frame.shape[:2] blob = cv2.dnn.blobFromImage(frame, 1.0, (W, H), (104.0, 177.0, 123.0)) net.setInput(blob) detections = net.forward() print(detections.shape[2]) rects = [] for i in range(0, detections.shape[2]): if detections[0, 0, i, 2] > 0.85: box = detections[0, 0, i, 3:7] * np.array([W, H, W, H]) rects.append(box.astype("int")) (startX, startY, endX, endY) = box.astype("int") det = frame[startY:endY, startX:endX] # cv2.rectangle(frame, (startX, startY), (endX, endY), # (0, 255, 0), 2) # update our centroid tracker using the computed set of bounding # box rectangles objects = ct.update(rects, frame) # loop over the tracked objects if len(rects) != 0: for (objectID, centroid) in objects.items(): text = "ID {}".format(objectID) cv2.putText(frame, text, (centroid[0] - 10, centroid[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) cv2.circle(frame, (centroid[0], centroid[1]), 4, (0, 255, 0), -1) cv2.imshow("Frame", frame) key = cv2.waitKey(1) & 0xFF fps.update() if key == ord("q"): break cv2.destroyAllWindows() fps.stop() return ct.pics, ct.objects
def __init__(self, maxDisappeared=50, maxDistance=10, classify=False): self.ct = CentroidTracker(maxDisappeared=maxDisappeared, maxDistance=maxDistance) self.trackers = [] self.trackableObjects = {} self.frameID = 0 self.lastObjects = [] self.classify = classify
def __init__(self, fcfg='sim.json'): # initialize variables self.env = env self.countAll = 0 self.ldir = ['E', 'NE', 'N', 'NW', 'W', 'SW', 'S', 'SE'] self.ldirpop = ['EV1D' + Dir for Dir in self.ldir] self.lratepop = ['ER'] # populations that we calculate rates for for d in self.ldir: self.lratepop.append('EV1D' + d) self.dFVec = OrderedDict({ pop: h.Vector() for pop in self.lratepop }) # NEURON Vectors for firing rate calculations self.dFiringRates = OrderedDict({ pop: np.zeros(dconf['net'][pop]) for pop in self.lratepop }) # python objects for firing rate calculations self.dAngPeak = OrderedDict({ 'EV1DE': 0.0, 'EV1DNE': 45.0, # receptive field peak angles for the direction selective populations 'EV1DN': 90.0, 'EV1DNW': 135.0, 'EV1DW': 180.0, 'EV1DSW': 235.0, 'EV1DS': 270.0, 'EV1DSE': 315.0 }) self.AngRFSigma2 = dconf['net'][ 'AngRFSigma']**2 # angular receptive field (RF) sigma squared used for dir selective neuron RFs if self.AngRFSigma2 <= 0.0: self.AngRFSigma2 = 1.0 self.input_dim = int(np.sqrt( dconf['net']['ER'])) # input image XY plane width,height self.dirSensitiveNeuronDim = int(np.sqrt( dconf['net'] ['EV1DE'])) # direction sensitive neuron XY plane width,height self.dirSensitiveNeuronRate = ( dconf['net']['DirMinRate'], dconf['net']['DirMaxRate'] ) # min, max firing rate (Hz) for dir sensitive neurons self.intaction = int( dconf['actionsPerPlay'] ) # integrate this many actions together before returning reward information to model # these are Pong-specific coordinate ranges; should later move out of this function into Pong-specific functions self.courtYRng = (34, 194) # court y range self.courtXRng = (20, 140) # court x range self.racketXRng = (141, 144) # racket x range self.dObjPos = {'racket': [], 'ball': []} self.last_obs = [] # previous observation self.last_ball_dir = 0 # last ball direction self.FullImages = [] # full resolution images from game environment self.ReducedImages = [ ] # low resolution images from game environment used as input to neuronal network model self.ldflow = [] # list of dictionary of optical flow (motion) fields if dconf['DirectionDetectionAlgo']['CentroidTracker']: self.ct = CentroidTracker() self.objects = OrderedDict() # objects detected in current frame self.last_objects = OrderedDict( ) # objects detected in previous frame
def __init__(self, proxy_map): super(SpecificWorker, self).__init__(proxy_map) # self.Period = 2000 # self.timer.start(self.Period) self.available_trackers = [ "dlib", "mosse", "csrt", "kcf", "medianflow" ] self.classes = [ "background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor" ] self.opencv_trackers = { "csrt": cv2.TrackerCSRT_create, "kcf": cv2.TrackerKCF_create, "medianflow": cv2.TrackerMedianFlow_create, "mosse": cv2.TrackerMOSSE_create } self.input = None self.save_results = None self.selected_tracker = None self.model = None self.prototxt = None self.net = None self.fps = None self.stream = None self.frame = None self.rgb = None self.rects = [] self.trackers = None self.confidence = 0.5 self.skip_frames = 30 self.width = None self.height = None self.read_ipstream = False self.writer = None self.ct = CentroidTracker(maxDisappeared=50, maxDistance=40) self.trackableObjects = {} self.totalFrames = 0 self.peopleInside = 0 self.Up = 0 self.Down = 0 self.dict_id_position = {} if self.selected_tracker != "dlib": self.trackers = cv2.MultiTracker_create() else: self.trackers = [] self.peopleCounterMachine.start()
def __init__(self, usePiCamera, width, height, camPort): self.ct = CentroidTracker() self.targetID = -1000000 self.targetCentroid = [] self.radius = 20 self.cap = VideoStream(usePiCamera, width, height, camPort).start() self.haar_cascade = cv2.CascadeClassifier("data/HS.xml") self.frameCount = 0 self.frameCaptureNumber = 7 self.thresholdX = int(width / 2) self.thresholdY = (int)(height / 2) self.width = width self.height = height
def centroids_listener(): rospy.init_node('centriods_listener', anonymous=True) #Initialize the centroid tracker global ct ct = CentroidTracker() rospy.Subscriber('centroid2', Floatlist, callback) rospy.spin()
def ObjectTracking(): ct = CentroidTracker() camera = cv2.VideoCapture(gstreamer_pipeline(flip_method=0), cv2.CAP_GSTREAMER) if not camera.isOpened(): raise RuntimeError('Could not start camera.') try: while True: _, img = camera.read() boxes, confs, clss = detector.prediction(img, conf_class=[1]) img = detector.draw_boxes(img, boxes, confs, clss) objects = ct.update(boxes) if len(boxes) > 0 and 1 in clss: print("detected {} {} {}".format(confs, objects, boxes)) #print("conf", confs) #print('clss', clss) #print('boxes', boxes) # loop over the tracked objects for (objectID, centroid) in objects.items(): text = "ID {}".format(objectID) cv2.putText(img, text, (centroid[0] - 10, centroid[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) cv2.circle(img, (centroid[0], centroid[1]), 4, (0, 255, 0), -1) day = datetime.now().strftime("%Y%m%d") directory = os.path.join(IMAGE_FOLDER, 'pi', day) if not os.path.exists(directory): os.makedirs(directory) hour = datetime.now().strftime("%H%M%S") filename_output = os.path.join( directory, "{}_{}_.jpg".format(hour, "person") ) cv2.imwrite(filename_output, img) except KeyboardInterrupt: print('interrupted!') camera.release() print(type(objects)) print(objects) except Exception as e: print('interrupted! by:') print(e) camera.release() print(type(objects)) print(objects)
def __init__(self, fullPath, enableImshow=0, enableGPU=0, printData=0, printProgress=1, exportJSON=0, exportCSV=1): self.printData = printData self.fullPath = fullPath self.enableImshow = enableImshow self.exportJSON = exportJSON self.exportCSV = exportCSV self.printProgress = printProgress self.basepath, self.filename, self.videoStartDatetime = self.getStartDatetime(self.fullPath) self.tracker = CentroidTracker(maxDisappeared=20, maxDistance=90) self.net, self.faceNet, self.ageNet, self.genderNet = self.modelsInit(enableGPU) self.timestamp = self.videoStartDatetime self.DATADICT = { "timestamp": "", "screenTime": 0, "dwellTime": 0, "totalCount": 0, "impressions": 0, "males": 0, "females": 0, "young": 0, "middleAged": 0, "elderly": 0 } if (exportJSON): data = {self.filename:[]} self.jsonPathName = os.path.join(self.basepath, self.filename+".json") self.write_json(data) if (exportCSV): data = ['timestamp','screenTime','dwellTime','totalCount','impressions','males','females','young','middleAged','elderly'] self.csvPathName = os.path.join(self.basepath,self.filename+".csv") self.write_csv(data, 'w') self.run() if (exportJSON): print("JSON- ", self.jsonPathName) if (exportCSV): print("CSV- ", self.csvPathName)
def __init__(self, args): if (args['method'] == 'HOG'): self.method = Hog(args['confidence']) elif (args['method'] == 'CAFFE'): self.method = Caffe(args['confidence']) elif (args['method'] == 'YOLO'): self.method = Yolo(args['confidence']) else: print( 'Fail, unknown algorithm. Try with -m HOG, -m CAFFE, -m YOLO') rospy.signal_shutdown('Quit') # Initialize the bridge to transform the image detect by topic's ROS self.bridge = CvBridge() # initialize our centroid tracker and frame dimensions self.ct = CentroidTracker() # Initialize the subscriber and publisher self.image_sub = rospy.Subscriber("/ardrone/image_raw", Image, self.callback) self.image_pub = rospy.Publisher("image_processed", Image, queue_size=10) self.controller = DroneController(args) self.droneStatus = DroneStatus() self.previous_position = None self.actual_position = None self.detection_status = DETECTION_STATUS['SearchingPerson'] self.last_action = { 'roll': 0, 'pitch': 0, 'yaw_velocity': 0, 'z_velocity': 0 } self.shutdown = 500
class Camera(BaseCamera): video_source = 0 ct = CentroidTracker() @staticmethod def set_video_source(source): Camera.video_source = source @staticmethod def frames(): camera = cv2.VideoCapture(Camera.video_source) if not camera.isOpened(): raise RuntimeError('Could not start camera.') while True: # read current frame _, img = camera.read() yield img @staticmethod def prediction(img): output = detector.prediction(img) df = detector.filter_prediction(output, img) img = detector.draw_boxes(img, df) return img @staticmethod def object_track(img): output = detector.prediction(img) df = detector.filter_prediction(output, img) img = detector.draw_boxes(img, df) boxes = df[['x1', 'y1', 'x2', 'y2']].values print(df) objects = Camera.ct.update(boxes) if len(boxes) > 0 and (df['class_name'].str.contains('person').any()): for (objectID, centroid) in objects.items(): text = "ID {}".format(objectID) cv2.putText(img, text, (centroid[0] - 10, centroid[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) cv2.circle(img, (centroid[0], centroid[1]), 4, (0, 255, 0), -1) return img @staticmethod def img_to_base64(img): """encode as a jpeg image and return it""" buffer = cv2.imencode('.jpg', img)[1].tobytes() jpg_as_text = base64.b64encode(buffer) base64_string = jpg_as_text.decode('utf-8') return base64_string
def display_wrapper(out, FLAGS, in_queue: Queue, in2_queue: Queue): print("display_wrapper: {}".format(threading.current_thread())) global stop_threads class_names = [c.strip() for c in open(FLAGS.classes).readlines()] data_log = {} frame_count = 0 ct = CentroidTracker() while True: data = in_queue.get() img = in2_queue.get() if data is None or img is None: break boxes, scores, classes, nums, fps = data['boxes'], data[ 'scores'], data['classes'], data['nums'], data['fps'] with TimeMeasure('Display frame:' + str(frame_count)): img, rects, log = draw_outputs(img, (boxes, scores, classes, nums), class_names) img = cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) objects = ct.update(rects) if FLAGS.output: out.write(img) data_log['frame{}'.format(str(frame_count))] = log frame_count += 1 cv2.imshow('output', img) if cv2.waitKey(1) == ord('q'): stop_threads = True break with open(FLAGS.logs, 'w') as f: json.dump(data_log, f) cv2.destroyAllWindows()
class Camera(BaseCamera): video_source = 0 ct = CentroidTracker() @staticmethod def set_video_source(source): Camera.video_source = source @staticmethod def frames(): camera = cv2.VideoCapture(gstreamer_pipeline(flip_method=0), cv2.CAP_GSTREAMER) if not camera.isOpened(): raise RuntimeError('Could not start camera.') while True: # read current frame _, img = camera.read() yield img @staticmethod def prediction(img, conf_class=[]): boxes, confs, clss = detector.prediction(img, conf_class=conf_class) img = detector.draw_boxes(img, boxes, confs, clss) return img @staticmethod def object_track(img, conf_class=[]): boxes, confs, clss = detector.prediction(img, conf_class=conf_class) img = detector.draw_boxes(img, boxes, confs, clss) objects = Camera.ct.update(boxes) if len(boxes) > 0 and 1 in clss: for (objectID, centroid) in objects.items(): text = "ID {}".format(objectID) cv2.putText(img, text, (centroid[0] - 10, centroid[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) cv2.circle(img, (centroid[0], centroid[1]), 4, (0, 255, 0), -1) return img @staticmethod def img_to_base64(img): """encode as a jpeg image and return it""" buffer = cv2.imencode('.jpg', img)[1].tobytes() jpg_as_text = base64.b64encode(buffer) base64_string = jpg_as_text.decode('utf-8') return base64_string
def _smooth_objects(all_frames): tracker_list = [] # Since we assume that objects cannot change types, we separately run the # object tracking algorithm for each object type unique_obj_types = set( [obj['name'] for frame in all_frames for obj in frame]) print('Unique object types: ' + str(unique_obj_types)) # Initialize a centroid tracker for each object type trackers = { obj_type: CentroidTracker(maxDisappeared=15) for obj_type in unique_obj_types } for (frame_idx, frame) in enumerate(all_frames): new_frame_list = [] for obj_type in unique_obj_types: # Update the centroid tracker trackers[obj_type].update([ obj['box_points'] for obj in frame if obj['name'] is obj_type ]) for (ID, centroid) in trackers[obj_type].objects.items(): new_ID = obj_type + '_' + str( ID) # Concatenate object type with object ID for obj in frame: if obj['name'] is obj_type: # Since we need to output (ID, bounding box) and Centroid Tracker # doesn't record bounding boxes match each ID with its bounding box by # centroid; this solution implicitly assumes each object of a # specified type within each frame has a distinct centroid obj_centroid = calc_centroid(obj['box_points']) if (abs(obj_centroid[0] - centroid[0]) < sys.float_info.epsilon) and \ (abs(obj_centroid[1] - centroid[1]) < sys.float_info.epsilon): new_frame_list.append( (new_ID, obj['box_points'], obj['percentage_probability'])) tracker_list.append(new_frame_list) return tracker_list
output_dict = sess.run(tensor_dict, feed_dict={image_tensor: np.expand_dims(image, 0)}) # all outputs are float32 numpy arrays, so convert types as appropriate output_dict['num_detections'] = int(output_dict['num_detections'][0]) output_dict['detection_classes'] = output_dict['detection_classes'][ 0].astype(np.uint8) output_dict['detection_boxes'] = output_dict['detection_boxes'][0] output_dict['detection_scores'] = output_dict['detection_scores'][0] if 'detection_masks' in output_dict: output_dict['detection_masks'] = output_dict['detection_masks'][0] return output_dict # initialize our centroid tracker and frame dimensions ct = CentroidTracker() (H, W) = (None, None) # load our serialized model from disk print("[INFO] loading model...") #net = cv2.dnn.readNetFromCaffe("deploy.prototxt", "res10_300x300_ssd_iter_140000.caffemodel") # net = cv2.dnn.readNetFromTensorflow("./frozen_inference_graph.pb", "./label_map.pbtxt") # net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV) # net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU) test = cv2.imread("board.jpg") # initialize the video stream and allow the camera sensor to warmup print("[INFO] starting video stream...") vs = VideoStream(src=1).start() time.sleep(2.0)
rects.append((startX, startY, endX, endY)) return rects class TrackableObject: # a object in frame def __init__(self, objectID, centroid): self.objectID = objectID # unique id of each object self.centroids = [centroid] self.counted = False if args.Bonus: net = cv2.dnn.readNetFromCaffe("MODEL/SSD_MODEL.txt", "MODEL/SSD_MODEL.bin") # loading model ct = CentroidTracker( maxDisappeared=40, maxDistance=50 ) # centroid tracker for tracking bounding box (works on euclidean distance to compute distance between centroid) trackableObjects = {} total = 0 totalFrames = 0 vs = cv2.VideoCapture("video/sk.mp4") cap1 = cv2.VideoCapture("video/mt.mp4") cap2 = cv2.VideoCapture("video/muqi.mp4") writer = None while True: (grabbed, frame) = cap1.read() (grabbed1, frame1) = cap2.read() (grabbed2, frame2) = vs.read() if not grabbed or not grabbed2 or not grabbed1: break
class FetchData: def __init__(self, fullPath, enableImshow=0, enableGPU=0, printData=0, printProgress=1, exportJSON=0, exportCSV=1): self.printData = printData self.fullPath = fullPath self.enableImshow = enableImshow self.exportJSON = exportJSON self.exportCSV = exportCSV self.printProgress = printProgress self.basepath, self.filename, self.videoStartDatetime = self.getStartDatetime(self.fullPath) self.tracker = CentroidTracker(maxDisappeared=20, maxDistance=90) self.net, self.faceNet, self.ageNet, self.genderNet = self.modelsInit(enableGPU) self.timestamp = self.videoStartDatetime self.DATADICT = { "timestamp": "", "screenTime": 0, "dwellTime": 0, "totalCount": 0, "impressions": 0, "males": 0, "females": 0, "young": 0, "middleAged": 0, "elderly": 0 } if (exportJSON): data = {self.filename:[]} self.jsonPathName = os.path.join(self.basepath, self.filename+".json") self.write_json(data) if (exportCSV): data = ['timestamp','screenTime','dwellTime','totalCount','impressions','males','females','young','middleAged','elderly'] self.csvPathName = os.path.join(self.basepath,self.filename+".csv") self.write_csv(data, 'w') self.run() if (exportJSON): print("JSON- ", self.jsonPathName) if (exportCSV): print("CSV- ", self.csvPathName) def write_csv(self, data, m): with open(self.csvPathName, mode=m) as csv_file: csv_writer = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) csv_writer.writerow(data) def write_json(self, data): with open(self.jsonPathName,'w') as f: json.dump(data, f, indent=2) def read_json(self): with open(self.jsonPathName) as f: return json.load(f) def run(self): cap = cv2.VideoCapture(self.fullPath) video_framerate = cap.get(cv2.CAP_PROP_FPS) video_frametime = timedelta(seconds=(1/video_framerate)) totalFrames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) ln = self.net.getLayerNames() ln = [ln[i[0] - 1] for i in self.net.getUnconnectedOutLayers()] object_id_list = [] impressionsObjectId = [] genderDecidedConfidence = {} ageDecidedConfidence = {} gender = {} age = {} total_count = 0 screenTime_Time = dict() dwellTime_Time = dict() AccumulatedScreenTime = 0 AccumulatedDwellTime = 0 fps = 0 frameNo = 0 while True: start_time = time.time() ret, image = cap.read() if not ret: break image_cpy = copy.deepcopy(image) unique_count = 0 current_count = 0 h, w = image.shape[:2] DwellYLevel = h//2 cv2.line(image, (0, DwellYLevel), (w, DwellYLevel), (255, 128, 0), 2) cv2.putText(image, "Dwell Area", (w - 250, DwellYLevel + 50), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (255, 128, 0), 1) blob = cv2.dnn.blobFromImage(image_cpy, 1 / 255.0, (416, 416), swapRB=True, crop=False) self.net.setInput(blob) layer_outputs = self.net.forward(ln) outputs = np.vstack(layer_outputs) boxes, confidences, class_ids, rects = [], [], [], [] for output in outputs: scores = output[5:] class_id = np.argmax(scores) confidence = scores[class_id] if class_id == 0 and confidence > 0.5: box = output[:4] * np.array([w, h, w, h]) (centerX, centerY, width, height) = box.astype("int") x = int(centerX - (width / 2)) y = int(centerY - (height / 2)) boxes.append([x, y, int(width), int(height)]) confidences.append(float(confidence)) class_ids.append(class_id) indices = cv2.dnn.NMSBoxes(boxes, confidences, 0.6, 0.3) if len(indices) > 0: for i in indices.flatten(): (x, y) = (boxes[i][0], boxes[i][1]) (w, h) = (boxes[i][2], boxes[i][3]) cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 255), 1) person_box = [x, y, x + w, y + h] rects.append(person_box) boundingboxes = np.array(rects) boundingboxes = boundingboxes.astype(int) objects, dereg = self.tracker.update(rects) for (objectId, bbox) in objects.items(): x1, y1, x2, y2 = bbox x1 = abs(int(x1)) y1 = abs(int(y1)) x2 = abs(int(x2)) y2 = abs(int(y2)) centroidX = int((x1 + x2) / 2) centroidY = int((y1 + y2) / 2) cv2.circle(image, (centroidX, centroidY), 5, (255, 255, 255), thickness=-1) cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2) if objectId not in object_id_list: # NEW PERSON object_id_list.append(objectId) # Append in ObjectID list - list of ID of people who have come before unique_count += 1 screenTime_Time[objectId] = 0 # Initialize dwellTime_Time[objectId] = 0 genderDecidedConfidence[objectId] = 0 gender[objectId] = "" ageDecidedConfidence[objectId] = 0 age[objectId] = "" else: # EXISTING PERSON screenTime_Time[objectId] += 1 / video_framerate AccumulatedScreenTime += 1 / video_framerate if centroidY > DwellYLevel: dwellTime_Time[objectId] += 1 / video_framerate AccumulatedDwellTime += 1 / video_framerate if (objectId not in impressionsObjectId): impressionsObjectId.append(objectId) self.DATADICT["impressions"]+=1 personCrop = image_cpy[y1:y2,x1:x2] frapersonCropEnlarge = cv2.resize(personCrop, (0, 0), fx=2, fy=2) # cv2.imshow("person",frapersonCropEnlarge) ageGenderResult = self.detect_and_predict_age(frapersonCropEnlarge) if (ageGenderResult): if(ageGenderResult[0]["gender"][1]>genderDecidedConfidence[objectId]): gender[objectId] = ageGenderResult[0]["gender"][0] genderDecidedConfidence[objectId] = ageGenderResult[0]["gender"][1] if(ageGenderResult[0]["age"][1]>ageDecidedConfidence[objectId]): age[objectId] = ageGenderResult[0]["age"][0] ageDecidedConfidence[objectId] = ageGenderResult[0]["age"][1] ################################################## # cv2.rectangle(image, (x1, y1), (x2, y2), (0, 0, 255), 2) ID_text = "Person:" + str(objectId) ScreenTimeText = "DwellTime: {:.2f}".format(screenTime_Time[objectId]) + str("s") # INTERCHANGED DwellTimeText = "ScreenTime: {:.2f}".format(dwellTime_Time[objectId]) + str("s") # INTERCHANGED cv2.putText( image, ID_text, (x1, y1 - 7), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (0, 0, 255), 2, ) cv2.putText( image, ScreenTimeText, (x1, y1 - 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (0, 0, 255), 2, ) cv2.putText( image, DwellTimeText, (x1, y1 - 60), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1.5, (0, 0, 255), 2, ) current_count += 1 for l in list(dereg.items()): # print(l) objectID = l[0] if (age[objectID] and gender[objectID]): if (age[objectID] == "Young"): self.DATADICT["young"]+=1 elif (age[objectID] == "Middle"): self.DATADICT["middleAged"]+=1 elif (age[objectID] == "Elderly"): self.DATADICT["elderly"]+=1 if (gender[objectId] == "MALE"): self.DATADICT["males"]+=1 elif (gender[objectId] == "FEMALE"): self.DATADICT["females"]+=1 self.tracker.deleteDereg(l[0]) total_count += unique_count self.DATADICT["totalCount"]=total_count total_count_text = "Total Count:" + str(total_count) cv2.putText( image, total_count_text, (5, 400), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 255), 2, ) current_count_text = "Current Count:" + str(current_count) cv2.putText( image, current_count_text, (5, 450), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 255), 2, ) self.DATADICT["screenTime"] = round(AccumulatedScreenTime / 60, 2) AccumulatedScreenTime_Text = "Total DwellTime: {:.2f}".format(AccumulatedScreenTime / 60) + str("m") cv2.putText( image, AccumulatedScreenTime_Text, (5, 550), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 255), 2, ) # INTERCHANGED self.DATADICT["dwellTime"] = round(AccumulatedDwellTime / 60,2) AccumulatedDwellTime_Text = "Total ScreenTime: {:.2f}".format(AccumulatedDwellTime / 60) + str("m") cv2.putText( image, AccumulatedDwellTime_Text, (5, 600), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 255), 2, ) # INTERCHANGED end_time = time.time() frameTime = (end_time - start_time) frameTimeDatetime = timedelta(seconds=frameTime) self.timestamp+=video_frametime self.DATADICT["timestamp"]=self.timestamp.strftime("%m/%d/%Y, %H:%M:%S") fps = 1 / frameTime fps_text = "FPS: {:.2f}".format(fps) cv2.putText(image, fps_text, (5, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) frameNo += 1 if (self.enableImshow): cv2.imshow("Application", image) key = cv2.waitKey(1) if key == ord("q"): break ###################### GLOBAL VARIABLES ###################### global DATADICT, FRAME DATADICT = self.DATADICT.copy() FRAME = copy.deepcopy(image) if(self.printData): print(DATADICT) if(self.printProgress): print("Processed", frameNo, "of", totalFrames, "Frames |", round((frameNo/totalFrames)*100,2), "% Completion |", fps_text) if(self.exportJSON): data = self.read_json() data[self.filename].append(DATADICT) self.write_json(data) if(self.exportCSV): data = [] for key in DATADICT: data.append(DATADICT[key]) self.write_csv(data, 'a') cap.release() cv2.destroyAllWindows() return def detect_and_predict_age(self, frame): faceNet = self.faceNet ageNet = self.ageNet genderNet = self.genderNet minConf=0.5 AGE_BUCKETS = [ "(0-2)", "(4-6)", "(8-12)", "(15-20)", "(25-32)", "(38-43)", "(48-53)", "(60-100)", ] AGE_CATEGORY = { "Young":["(0-2)","(4-6)","(8-12)","(15-20)"], "Middle":["(25-32)","(38-43)"], "Elderly":["(48-53)","(60-100)"] } GENDER_BUCKETS = ["MALE", "FEMALE"] results = [] (h, w) = frame.shape[:2] blob = cv2.dnn.blobFromImage(frame, 1.0, (300, 300), (104.0, 177.0, 123.0)) faceNet.setInput(blob) detections = faceNet.forward() for i in range(0, detections.shape[2]): confidence = detections[0, 0, i, 2] if confidence > minConf: box = detections[0, 0, i, 3:7] * np.array([w, h, w, h]) (startX, startY, endX, endY) = box.astype("int") face = frame[startY:endY, startX:endX] # ensure the face ROI is sufficiently large if face.shape[0] < 20 or face.shape[1] < 20: continue faceBlob = cv2.dnn.blobFromImage( face, 1.0, (227, 227), (78.4263377603, 87.7689143744, 114.895847746), swapRB=False, ) ageNet.setInput(faceBlob) preds = ageNet.forward() i = preds[0].argmax() age = AGE_BUCKETS[i] for key in AGE_CATEGORY: if (age in AGE_CATEGORY[key]): ageCategory = key ageConfidence = preds[0][i] genderNet.setInput(faceBlob) genderPred = genderNet.forward() j = genderPred[0].argmax() gender = GENDER_BUCKETS[j] genderConfidence = genderPred[0][j] d = { "loc": (startX, startY, endX, endY), "age": (ageCategory, ageConfidence), "gender": (gender, genderConfidence), } results.append(d) return results def getStartDatetime(self, path): directory = os.path.split(path)[0] basename = os.path.basename(path) filename = os.path.splitext(basename)[0] try: year = int(filename[1:5]) month = int(filename[5:7]) day = int(filename[7:9]) hours = int(filename[9:11]) minutes = int(filename[11:13]) seconds = int(filename[13:15]) videoStartDatetime = datetime(year, month, day, hours, minutes, seconds, 0) except: videoStartDatetime = datetime.now() print("Invalid Video Name for DateTime extraction. Setting start time to ", videoStartDatetime) return directory, filename, videoStartDatetime def modelsInit(self, enableGPU): labels = open("ModelsAndWeights/data/coco.names").read().strip().split("\n") net = cv2.dnn.readNetFromDarknet("ModelsAndWeights/cfg/yolov4.cfg", "ModelsAndWeights/yolov4.weights") faceNet = cv2.dnn.readNet( "ModelsAndWeights/face_detector/deploy.prototxt", "ModelsAndWeights/face_detector/res10_300x300_ssd_iter_140000.caffemodel", ) ageNet = cv2.dnn.readNet( "./ModelsAndWeights/age_detector/age_deploy.prototxt", "./ModelsAndWeights/age_detector/age_net.caffemodel", ) genderNet = cv2.dnn.readNet( "./ModelsAndWeights/gender_detector/gender_deploy.prototxt", "./ModelsAndWeights/gender_detector/gender_net.caffemodel", ) if (enableGPU): net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA) net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA) faceNet.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA) faceNet.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA) ageNet.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA) ageNet.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA) genderNet.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA) genderNet.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA) return net, faceNet, ageNet, genderNet
def Stream(): st.title("Customer Tracker") st.text( "This application will track how many customer enter & exit your premise" ) st.markdown("\n", unsafe_allow_html=True) camera = st.text_input("Enter Camera/Webcam Path") col1, col2 = st.beta_columns(2) if col1.button('Start ▶️') and not col2.button("Stop ⏹️"): if camera.isnumeric(): camera = int(camera) st.info("Live Streaming") elif camera is not None: st.error("Please Enter the Correct Camera Path") image_placeholder = st.empty() #confidenceValue = 0.4 #frameValue = 30 # initialize the list of class labels MobileNet SSD was trained to # detect CLASSES = [ "background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor" ] # load our serialized model from disk net = cv2.dnn.readNetFromCaffe("MobileNetSSD_deploy.prototxt", "MobileNetSSD_deploy.caffemodel") print("[INFO] Starting the video..") vs = cv2.VideoCapture(camera) # initialize the frame dimensions (we'll set them as soon as we read # the first frame from the video) W = None H = None # instantiate our centroid tracker, then initialize a list to store # each of our dlib correlation trackers, followed by a dictionary to # map each unique object ID to a TrackableObject ct = CentroidTracker(maxDisappeared=80, maxDistance=50) trackers = [] trackableObjects = {} # initialize the total number of frames processed thus far, along # with the total number of objects that have moved either up or down totalFrames = 0 totalDown = 0 totalUp = 0 x = [] empty = [] empty1 = [] # start the frames per second throughput estimator fps = FPS().start() # loop over frames from the video stream while True: # grab the next frame and handle if we are reading from either ret, frame = vs.read() # resize the frame to have a maximum width of 500 pixels (the # less data we have, the faster we can process it), then convert # the frame from BGR to RGB for dlib frame = imutils.resize(frame, width=700) # Default width = 500 rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # if the frame dimensions are empty, set them if W is None or H is None: (H, W) = frame.shape[:2] # initialize the current status along with our list of bounding # box rectangles returned by either (1) our object detector or # (2) the correlation trackers status = "Waiting" rects = [] # check to see if we should run a more computationally expensive # object detection method to aid our tracker if totalFrames % 30 == 0: # set the status and initialize our new set of object trackers status = "Detecting" trackers = [] # convert the frame to a blob and pass the blob through the # network and obtain the detections blob = cv2.dnn.blobFromImage(frame, 0.007843, (W, H), 127.5) net.setInput(blob) detections = net.forward() # loop over the detections for i in np.arange(0, detections.shape[2]): # extract the confidence (i.e., probability) associated # with the prediction confidence = detections[0, 0, i, 2] # filter out weak detections by requiring a minimum # confidence if confidence > 0.4: # extract the index of the class label from the # detections list idx = int(detections[0, 0, i, 1]) # if the class label is not a person, ignore it if CLASSES[idx] != "person": continue # compute the (x, y)-coordinates of the bounding box # for the object box = detections[0, 0, i, 3:7] * np.array([W, H, W, H]) (startX, startY, endX, endY) = box.astype("int") # construct a dlib rectangle object from the bounding # box coordinates and then start the dlib correlation # tracker tracker = dlib.correlation_tracker() rect = dlib.rectangle(startX, startY, endX, endY) tracker.start_track(rgb, rect) # add the tracker to our list of trackers so we can # utilize it during skip frames trackers.append(tracker) # otherwise, we should utilize our object *trackers* rather than # object *detectors* to obtain a higher frame processing throughput else: # loop over the trackers for tracker in trackers: # set the status of our system to be 'tracking' rather # than 'waiting' or 'detecting' status = "Tracking" # update the tracker and grab the updated position tracker.update(rgb) pos = tracker.get_position() # unpack the position object startX = int(pos.left()) startY = int(pos.top()) endX = int(pos.right()) endY = int(pos.bottom()) # add the bounding box coordinates to the rectangles list rects.append((startX, startY, endX, endY)) # draw a horizontal line in the center of the frame -- once an # object crosses this line we will determine whether they were # moving 'up' or 'down' cv2.line(frame, (0, H // 2), (W, H // 2), (0, 0, 255), 3) cv2.putText(frame, "Prediction border", (10, H - ((i * 20) + 200)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1) # use the centroid tracker to associate the (1) old object # centroids with (2) the newly computed object centroids objects = ct.update(rects) # loop over the tracked objects for (objectID, centroid) in objects.items(): # check to see if a trackable object exists for the current # object ID to = trackableObjects.get(objectID, None) # if there is no existing trackable object, create one if to is None: to = TrackableObject(objectID, centroid) # otherwise, there is a trackable object so we can utilize it # to determine direction else: # the difference between the y-coordinate of the *current* # centroid and the mean of *previous* centroids will tell # us in which direction the object is moving (negative for # 'up' and positive for 'down') y = [c[1] for c in to.centroids] direction = centroid[1] - np.mean(y) to.centroids.append(centroid) # check to see if the object has been counted or not if not to.counted: # if the direction is negative (indicating the object # is moving up) AND the centroid is above the center # line, count the object if direction < 0 and centroid[1] < H // 2: totalUp += 1 empty.append(totalUp) to.counted = True # if the direction is positive (indicating the object # is moving down) AND the centroid is below the # center line, count the object elif direction > 0 and centroid[1] > H // 2: totalDown += 1 empty1.append(totalDown) #print(empty1[-1]) x = [] # compute the sum of total people inside x.append(len(empty1) - len(empty)) #print("Total people inside:", x) # if the people limit exceeds over threshold, send an email alert if sum(x) >= config.Threshold: cv2.putText(frame, "-ALERT: People limit exceeded-", (10, frame.shape[0] - 80), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 255), 2) if config.ALERT: print("[INFO] Sending email alert..") Mailer().send(config.MAIL) print("[INFO] Alert sent") to.counted = True # store the trackable object in our dictionary trackableObjects[objectID] = to # draw both the ID of the object and the centroid of the # object on the output frame text = "ID {}".format(objectID) cv2.putText(frame, text, (centroid[0] - 10, centroid[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2) cv2.circle(frame, (centroid[0], centroid[1]), 4, (255, 255, 255), -1) # construct a tuple of information we will be displaying on the info = [ ("Exit", totalUp), ("Enter", totalDown), ("Status", status), ] info2 = [ ("Total people inside", x), ] # Display the output for (i, (k, v)) in enumerate(info): text = "{}: {}".format(k, v) cv2.putText(frame, text, (10, H - ((i * 20) + 20)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 250), 2) for (i, (k, v)) in enumerate(info2): text = "{}: {}".format(k, v) cv2.putText(frame, text, (265, H - ((i * 20) + 60)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2) #Logs.csv # Initiate a simple log to save data at end of the day # if config.Log: # datetimee = [datetime.datetime.now()] # d = [datetimee, empty1, empty, x] # export_data = zip_longest(*d, fillvalue = '') # with open('Log.csv', 'w', newline='') as myfile: # wr = csv.writer(myfile, quoting=csv.QUOTE_ALL) # wr.writerow(("End Time", "In", "Out", "Total Inside")) # wr.writerows(export_data) #cv2.imshow("Real-Time Monitoring/Analysis Window", frame) # show the output frame frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image_placeholder.image(frame) #key = cv2.waitKey(1) & 0xFF # if the `q` key was pressed, break from the loop #if key == ord("q"): # break # increment the total number of frames processed thus far and # then update the FPS counter totalFrames += 1 fps.update() if config.Timer: # Automatic timer to stop the live stream. Set to 8 hours (28800s). t1 = time.time() num_seconds = (t1 - t0) if num_seconds > 28800: break # stop the timer and display FPS information fps.stop() print("[INFO] elapsed time: {:.2f}".format(fps.elapsed())) print("[INFO] approx. FPS: {:.2f}".format(fps.fps())) # # if we are not using a video file, stop the camera video stream # if not args.get("input", False): # vs.stop() # # # otherwise, release the video file pointer # else: # vs.release() # close any open windows cv2.destroyAllWindows()
def processVideo(prototxt, model, filepath): print("[INFO] Filepath: " + filepath) print("[INFO] model: " + model) print("[INFO] prototxt: " + prototxt) outputPath = "./userapp/output.avi" skipframes = 30 conf = 0.4 # construct the argument parse and parse the arguments # ap = argparse.ArgumentParser() # ap.add_argument(prototxt) # ap.add_argument(model) # ap.add_argument(filepath) # # ap.add_argument("-o", "--output", type=str, # # help="path to optional output video file") # ap.add_argument("-c", "--confidence", type=float, default=0.4, # help="minimum probability to filter weak detections") # ap.add_argument("-s", "--skip-frames", type=int, default=30, # help="# of skip frames between detections") # args = vars(ap.parse_args()) # print("[INFO] Starting2.....") # initialize the list of class labels MobileNet SSD was trained to # detect CLASSES = [ "background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor" ] # load our serialized model from disk print("[INFO] loading model...") net = cv2.dnn.readNetFromCaffe(prototxt, model) # if a video path was not supplied, grab a reference to the webcam # if not args.get("input", False): # print("[INFO] starting video stream...") # vs = VideoStream(src=0).start() # time.sleep(2.0) # otherwise, grab a reference to the video file # else: # print("[INFO] opening video file...") # vs = cv2.VideoCapture(args["input"]) vs = cv2.VideoCapture(filepath) # initialize the video writer (we'll instantiate later if need be) writer = None # initialize the frame dimensions (we'll set them as soon as we read # the first frame from the video) W = None H = None # instantiate our centroid tracker, then initialize a list to store # each of our dlib correlation trackers, followed by a dictionary to # map each unique object ID to a TrackableObject ct = CentroidTracker(maxDisappeared=40, maxDistance=50) trackers = [] trackableObjects = {} # initialize the total number of frames processed thus far, along # with the total number of objects that have moved either up or down totalFrames = 0 totalDown = 0 totalUp = 0 # start the frames per second throughput estimator fps = FPS().start() # loop over frames from the video stream while True: # grab the next frame and handle if we are reading from either # VideoCapture or VideoStream print("[Info] Filepath is" + filepath) frame = vs.read() frame = frame[1] if filepath else frame # if we are viewing a video and we did not grab a frame then we # have reached the end of the video if filepath is not None and frame is None: break # resize the frame to have a maximum width of 500 pixels (the # less data we have, the faster we can process it), then convert # the frame from BGR to RGB for dlib frame = imutils.resize(frame, width=500) rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # if the frame dimensions are empty, set them if W is None or H is None: (H, W) = frame.shape[:2] # if we are supposed to be writing a video to disk, initialize # the writer if outputPath is not None and writer is None: print("[INFO] loading model...after vs1") fourcc = cv2.VideoWriter_fourcc(*"MJPG") print("[INFO] loading model...after vs2 :" + outputPath) writer = cv2.VideoWriter(outputPath, fourcc, 30, (W, H), True) print("[INFO] loading model...after vs3") # initialize the current status along with our list of bounding # box rectangles returned by either (1) our object detector or # (2) the correlation trackers status = "Waiting" print("[INFO] loading model...after vs") rects = [] print("[INFO] loading model...after cv2") # check to see if we should run a more computationally expensive # object detection method to aid our tracker if totalFrames % skipframes == 0: # set the status and initialize our new set of object trackers status = "Detecting" trackers = [] # convert the frame to a blob and pass the blob through the # network and obtain the detections blob = cv2.dnn.blobFromImage(frame, 0.007843, (W, H), 127.5) net.setInput(blob) detections = net.forward() # loop over the detections for i in np.arange(0, detections.shape[2]): # extract the confidence (i.e., probability) associated # with the prediction confidence = detections[0, 0, i, 2] # filter out weak detections by requiring a minimum # confidence if confidence > conf: # extract the index of the class label from the # detections list idx = int(detections[0, 0, i, 1]) # if the class label is not a person, ignore it if CLASSES[idx] != "person": continue # compute the (x, y)-coordinates of the bounding box # for the object box = detections[0, 0, i, 3:7] * np.array([W, H, W, H]) (startX, startY, endX, endY) = box.astype("int") # construct a dlib rectangle object from the bounding # box coordinates and then start the dlib correlation # tracker tracker = dlib.correlation_tracker() rect = dlib.rectangle(startX, startY, endX, endY) tracker.start_track(rgb, rect) # add the tracker to our list of trackers so we can # utilize it during skip frames trackers.append(tracker) # otherwise, we should utilize our object *trackers* rather than # object *detectors* to obtain a higher frame processing throughput else: # loop over the trackers for tracker in trackers: # set the status of our system to be 'tracking' rather # than 'waiting' or 'detecting' status = "Tracking" # update the tracker and grab the updated position tracker.update(rgb) pos = tracker.get_position() # unpack the position object startX = int(pos.left()) startY = int(pos.top()) endX = int(pos.right()) endY = int(pos.bottom()) # add the bounding box coordinates to the rectangles list rects.append((startX, startY, endX, endY)) # draw a horizontal line in the center of the frame -- once an # object crosses this line we will determine whether they were # moving 'up' or 'down' cv2.line(frame, (0, H // 2), (W, H // 2), (0, 255, 255), 2) # use the centroid tracker to associate the (1) old object # centroids with (2) the newly computed object centroids objects = ct.update(rects) # loop over the tracked objects for (objectID, centroid) in objects.items(): # check to see if a trackable object exists for the current # object ID to = trackableObjects.get(objectID, None) # if there is no existing trackable object, create one if to is None: to = TrackableObject(objectID, centroid) # otherwise, there is a trackable object so we can utilize it # to determine direction else: # the difference between the y-coordinate of the *current* # centroid and the mean of *previous* centroids will tell # us in which direction the object is moving (negative for # 'up' and positive for 'down') y = [c[1] for c in to.centroids] direction = centroid[1] - np.mean(y) to.centroids.append(centroid) # check to see if the object has been counted or not if not to.counted: # if the direction is negative (indicating the object # is moving up) AND the centroid is above the center # line, count the object if direction < 0 and centroid[1] < H // 2: totalUp += 1 to.counted = True # if the direction is positive (indicating the object # is moving down) AND the centroid is below the # center line, count the object elif direction > 0 and centroid[1] > H // 2: totalDown += 1 to.counted = True # store the trackable object in our dictionary trackableObjects[objectID] = to # draw both the ID of the object and the centroid of the # object on the output frame text = "ID {}".format(objectID) cv2.putText(frame, text, (centroid[0] - 10, centroid[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) cv2.circle(frame, (centroid[0], centroid[1]), 4, (0, 255, 0), -1) # construct a tuple of information we will be displaying on the # frame info = [ ("Up", totalUp), ("Down", totalDown), ("Status", status), ] # loop over the info tuples and draw them on our frame for (i, (k, v)) in enumerate(info): text = "{}: {}".format(k, v) cv2.putText(frame, text, (10, H - ((i * 20) + 20)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2) # check to see if we should write the frame to disk if writer is not None: writer.write(frame) # show the output frame # cv2.imshow("Frame", frame) key = cv2.waitKey(1) & 0xFF # if the `q` key was pressed, break from the loop if key == ord("q"): break # increment the total number of frames processed thus far and # then update the FPS counter totalFrames += 1 fps.update() # stop the timer and display FPS information fps.stop() print("[INFO] elapsed time: {:.2f}".format(fps.elapsed())) print("[INFO] approx. FPS: {:.2f}".format(fps.fps())) # check to see if we need to release the video writer pointer if writer is not None: writer.release() print("[INFO] Total Up Count: " + str(totalUp)) # if we are not using a video file, stop the camera video stream # if filepath is not None: # vs.stop() # # otherwise, release the video file pointer # else: # vs.release() # close any open windows cv2.destroyAllWindows()
import numpy as np from centroidtracker import CentroidTracker from itertools import combinations import math protopath = "MobileNetSSD_deploy.prototxt" modelpath = "MobileNetSSD_deploy.caffemodel" detector = cv2.dnn.readNetFromCaffe(prototxt=protopath, caffeModel=modelpath) CLASSES = [ "background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor" ] tracker = CentroidTracker(maxDisappeared=80, maxDistance=90) def non_max_suppression_fast(boxes, overlapThresh): try: if len(boxes) == 0: return [] if boxes.dtype.kind == "i": boxes = boxes.astype("float") pick = [] x1 = boxes[:, 0] y1 = boxes[:, 1] x2 = boxes[:, 2]
class PersonDetector(): def __init__(self, usePiCamera, width, height, camPort): self.ct = CentroidTracker() self.targetID = -1000000 self.targetCentroid = [] self.radius = 20 self.cap = VideoStream(usePiCamera, width, height, camPort).start() self.haar_cascade = cv2.CascadeClassifier("data/HS.xml") self.frameCount = 0 self.frameCaptureNumber = 7 self.thresholdX = int(width / 2) self.thresholdY = (int)(height / 2) self.width = width self.height = height def frameAdd(self): self.frameCount += 1 def checkFrameCount(self): if self.frameCount % self.frameCaptureNumber == 0: return True return False def robotMoveDirection(self, centroid, center): print(centroid, " ", center) cX = center[0] cY = center[1] targetX = centroid[0] targetY = centroid[1] diffX = cX - targetX diffY = cY - targetY hors = "Move: " verts = " and " if targetX < cX - self.radius: hors += "Left " + str(np.abs(diffX)) + " pixels" elif targetX > cX + self.radius: hors += "Right " + str(np.abs(diffX)) + " pixels" else: hors = "" verts = "Move: " if targetY < cY - self.radius: verts += "Up " + str(np.abs(diffY)) + " pixels" elif targetY > cY + self.radius: verts += "Down " + str(np.abs(diffY)) + " pixels" else: verts = "" return hors + verts def updateObjectIDs(self, upper_body): return self.ct.update(upper_body) def detectPerson(self): capturedframe = self.cap.getFrame() frame = capturedframe[0:self.height, 0:int(self.width / 2)] centerX = -10000 centerY = -100000 cColor = (0, 0, 255) if self.checkFrameCount(): try: # using a greyscale picture, also for faster detection gray = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY) upper_body = self.haar_cascade.detectMultiScale( gray, scaleFactor=1.1, minNeighbors=12, # Min size for valid detection, changes according to video size or body size in the video. minSize=(50, 100), flags=cv2.CASCADE_SCALE_IMAGE) if len(upper_body) > 0: (x, y, w, h) = upper_body[0] # creates green color rectangle with a thickness size of 1 centerX = (int)(x + w / 2) centerY = (int)(y + h / 2) cv2.rectangle(capturedframe, (x, y), (x + w, y + h), (0, 255, 0), 1) cv2.line(capturedframe, (centerX, y), (centerX, y + h), (255, 0, 0), 1) cv2.line(capturedframe, (x, centerY), (x + w, centerY), (255, 0, 0), 1) cv2.circle(capturedframe, (centerX, centerY), 1, (0, 0, 255), 1) # creates green color text with text size of 0.5 & thickness size of 2 cv2.putText(capturedframe, "Head and Shoulders Detected", (x + 5, y + 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) if np.abs(centerX - self.thresholdX) <= 20 and np.abs( centerY - self.thresholdY) <= 20: cColor = (0, 255, 0) else: cColor = (0, 0, 255) objects = self.updateObjectIDs(upper_body) # checking if the id is the same as the one detected if len(list(objects.items())) > 0: targetID = list(objects.items())[0][0] targetCentroid = list(objects.items())[0][1] if targetCentroid[0] == centerX and targetCentroid[ 1] == centerY: cv2.putText(capturedframe, str(targetID), (targetCentroid[0], targetCentroid[1]), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) cv2.circle(capturedframe, (targetCentroid[0], targetCentroid[1]), 4, (0, 255, 0), -1) else: targetID = -100000 targetCentroid = [] print( self.robotMoveDirection(targetCentroid, (self.width / 2, self.height / 2))) except Exception as e: print(str(e)) return capturedframe #threshold center lines cv2.circle(capturedframe, (self.thresholdX, self.thresholdY), self.radius, cColor, 3) return capturedframe def release(self): self.cap.release()
from itertools import combinations import math protopath = "MobileNetSSD_deploy.prototxt" modelpath = "MobileNetSSD_deploy.caffemodel" detector = cv2.dnn.readNetFromCaffe(prototxt=protopath, caffeModel=modelpath) # detector.setPreferableBackend(cv2.dnn.DNN_BACKEND_INFERENCE_ENGINE) # detector.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU) CLASSES = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] tracker = CentroidTracker(maxDisappeared=40, maxDistance=50) def non_max_suppression_fast(boxes, overlapThresh): try: if len(boxes) == 0: return [] if boxes.dtype.kind == "i": boxes = boxes.astype("float") pick = [] x1 = boxes[:, 0] y1 = boxes[:, 1] x2 = boxes[:, 2]
def detect(opt, save_img=False): ct = CentroidTracker() out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source == '0' or source.startswith( 'rtsp') or source.startswith('http') or source.endswith('.txt') # initialize deepsort cfg = get_config() cfg.merge_from_file(opt.config_deepsort) deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) # Initialize device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA now = datetime.datetime.now().strftime("%Y/%m/%d/%H:%M:%S") # current time # Load model model = torch.load(weights, map_location=device)[ 'model'].float() # load to FP32 model.to(device).eval() # ============================================================================= filepath_mask = 'D:/Internship Crime Detection/YOLOv5 person detection/AjnaTask/Mytracker/yolov5/weights/mask.pt' model_mask = torch.load(filepath_mask, map_location = device)['model'].float() model_mask.to(device).eval() if half: model_mask.half() names_m = model_mask.module.names if hasattr(model_mask, 'module') else model_mask.names # ============================================================================= if half: model.half() # to FP16 # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: view_img = False save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img # run once _ = model(img.half() if half else img) if device.type != 'cpu' else None save_path = str(Path(out)) txt_path = str(Path(out)) + '/results.txt' memory = {} people_counter = 0 in_people = 0 out_people = 0 people_mask = 0 people_none = 0 time_sum = 0 # now_time = datetime.datetime.now().strftime('%Y/%m/%d %H:%M:%S') colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset): img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # ============================================================================= pred_mask = model_mask(img)[0] # ============================================================================= # Apply NMS pred = non_max_suppression( pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) # ============================================================================= pred_mask = non_max_suppression(pred_mask, 0.4, 0.5, classes = [0, 1, 2], agnostic = None) if pred_mask is None: continue classification = torch.cat(pred_mask)[:, -1] if len(classification) == 0: print("----",None) continue index = int(classification[0]) mask_class = names_m[index] print("MASK CLASS>>>>>>> \n", mask_class) # ============================================================================= # Create the haar cascade # cascPath = "D:/Internship Crime Detection/YOLOv5 person detection/AjnaTask/Mytracker/haarcascade_frontalface_alt2.xml" # faceCascade = cv2.CascadeClassifier(cascPath) t2 = time_synchronized() # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s s += '%gx%g ' % img.shape[2:] # print string save_path = str(Path(out) / Path(p).name) img_center_y = int(im0.shape[0]//2) # line = [(int(im0.shape[1]*0.258),int(img_center_y*1.3)),(int(im0.shape[1]*0.55),int(img_center_y*1.3))] # print("LINE>>>>>>>>>", line,"------------") # line = [(990, 672), (1072, 24)] line = [(1272, 892), (1800, 203)] # [(330, 468), (704, 468)] print("LINE>>>>>>>>>", line,"------------") cv2.line(im0,line[0],line[1],(0,0,255),5) # ============================================================================= # gray = cv2.cvtColor(im0, cv2.COLOR_BGR2GRAY) # # Detect faces in the image # faces = faceCascade.detectMultiScale( # gray, # scaleFactor=1.1, # minNeighbors=5, # minSize=(30, 30) # ) # # Draw a rectangle around the faces # for (x, y, w, h) in faces: # cv2.rectangle(im0, (x, y), (x+w, y+h), (0, 255, 0), 2) # text_x = x # text_y = y+h # cv2.putText(im0, mask_class, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, # 1, (0, 0, 255), thickness=1, lineType=2) # ============================================================================= if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords( img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string bbox_xywh = [] confs = [] bbox_xyxy = [] rects = [] # Is it correct? # Adapt detections to deep sort input format for *xyxy, conf, cls in det: x_c, y_c, bbox_w, bbox_h = bbox_rel(*xyxy) # label = f'{names[int(cls)]}' xyxy_list = torch.tensor(xyxy).view(1,4).view(-1).tolist() plot_one_box(xyxy, im0, label='person', color=colors[int(cls)], line_thickness=3) rects.append(xyxy_list) obj = [x_c, y_c, bbox_w, bbox_h,int(cls)] #cv2.circle(im0,(int(x_c),int(y_c)),color=(0,255,255),radius=12,thickness = 10) bbox_xywh.append(obj) # bbox_xyxy.append(rec) confs.append([conf.item()]) xywhs = torch.Tensor(bbox_xywh) confss = torch.Tensor(confs) # Pass detections to deepsort outputs = ct.update(rects) # xyxy # outputs = deepsort.update(xywhs, confss, im0) # deepsort index_id = [] previous = memory.copy() memory = {} boxes = [] names_ls = [] # draw boxes for visualization if len(outputs) > 0: # print('output len',len(outputs)) for id_,centroid in outputs.items(): # boxes.append([output[0],output[1],output[2],output[3]]) # index_id.append('{}-{}'.format(names_ls[-1],output[-2])) index_id.append(id_) boxes.append(centroid) memory[index_id[-1]] = boxes[-1] i = int(0) print(">>>>>>>",boxes) for box in boxes: # extract the bounding box coordinates # (x, y) = (int(box[0]), int(box[1])) # (w, h) = (int(box[2]), int(box[3])) x = int(box[0]) y = int(box[1]) # GGG if index_id[i] in previous: previous_box = previous[index_id[i]] (x2, y2) = (int(previous_box[0]), int(previous_box[1])) # (w2, h2) = (int(previous_box[2]), int(previous_box[3])) p0 = (x,y) p1 = (x2,y2) cv2.line(im0, p0, p1, (0,255,0), 3) # current frame obj center point - before frame obj center point if intersect(p0, p1, line[0], line[1]): people_counter += 1 print('==============================') print(p0,"---------------------------",p0[1]) print('==============================') print(line[1][1],'------------------',line[0][0],'-----------------', line[1][0],'-------------',line[0][1]) # if p0[1] <= line[1][1]: # in_people +=1 # else: # # if mask_class == 'mask': # # print("COUNTING MASK..", mask_class) # # people_mask += 1 # # if mask_class == 'none': # # people_none += 1 # out_people +=1 if p0[1] >= line[1][1]: in_people += 1 if mask_class == 'mask': people_mask += 1 else: people_none += 1 else: out_people += 1 i += 1 # Write MOT compliant results to file if save_txt and len(outputs) != 0: for j, output in enumerate(outputs): bbox_left = output[0] bbox_top = output[1] bbox_w = output[2] bbox_h = output[3] identity = output[-1] with open(txt_path, 'a') as f: f.write(('%g ' * 10 + '\n') % (frame_idx, identity, bbox_left, bbox_top, bbox_w, bbox_h, -1, -1, -1, -1)) # label format else: deepsort.increment_ages() cv2.putText(im0, 'Person [down][up] : [{}][{}]'.format(out_people,in_people),(130,50),cv2.FONT_HERSHEY_COMPLEX,1.0,(0,0,255),3) cv2.putText(im0, 'Person [mask][no_mask] : [{}][{}]'.format(people_mask, people_none), (130,100),cv2.FONT_HERSHEY_COMPLEX,1.0,(0,0,255),3) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) time_sum += t2-t1 # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': # im0= cv2.resize(im0,(0,0),fx=0.5,fy=0.5,interpolation=cv2.INTER_LINEAR) cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release() # release previous video writer fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % os.getcwd() + os.sep + out) if platform == 'darwin': # MacOS os.system('open ' + save_path) print('Done. (%.3fs)' % (time.time() - t0))
EMOTIONS = ["angry", "disgust", "fear", "happy", "sad", "suprised", "neutral"] EMOTION_COLORS = [(0, 0, 255), (0, 255, 0), (0, 0, 0), (255, 255, 0), (255, 0, 0), (0, 255, 255), (255, 255, 255)] CAFFE_NET = cv2.dnn.readNetFromCaffe("models/face_detection/deploy.prototxt.txt", "models/face_detection/res10_300x300_ssd_iter_140000.caffemodel") EMOTION_CLASSIFIER = load_model( "models/trained_fer_models/mini_xception.0.65-119.hdf5") # %% app = Flask(__name__) cors = CORS(app, resources={r"/api/*": {"origins": "*"}}) faces = OrderedDict() ct = CentroidTracker(max_dissapeared=10) lock = threading.Lock() @app.route("/api/emotion-detection", methods=["POST"]) def detect_emotion_in_frame(): global faces, ct, lock detected_emotions_on_faces = OrderedDict() with lock: request_data = request.get_json() frame = cv2.imdecode(np.frombuffer(base64.b64decode( request_data["capturedFrame"][23:]), np.uint8), cv2.IMREAD_COLOR) (h, w) = frame.shape[:2] blob = cv2.dnn.blobFromImage(cv2.resize(frame, (300, 300)), 1.0,
ap = argparse.ArgumentParser() ap.add_argument("-v", "--video", help="path to the (optional) video file") ap.add_argument("-b", "--buffer", type=int, default=64, help="max buffer size") ap.add_argument("-c", "--color", default="green", help="color to track") ap.add_argument("-r", "--radius", default=20, help="minimum radius to be tracked") ap.add_argument("-n", "--numpoint", default=2, help="maximum number of objects to be plotted in a frame") args = vars(ap.parse_args()) # Set the trackable object ct = CentroidTracker(maxDisappeared=5, maxDistance=70) trackableObjects = {} # define the lower and upper boundaries of the several colors # in the HSV color space, then initialize the # list of tracked points greenLower = (29, 86, 6) greenUpper = (64, 255, 255) red1Lower = (0, 20, 6) red1Upper = (20, 255, 230) red2Lower = (160, 20, 6) red2Upper = (179, 255, 230) blueLower = (95, 100, 80) blueUpper = (125, 255, 170) colormap = {
path4 = r"C:\Users\hp\Desktop\priyasoftweb\person_tracker_id_frame_time\input\VPD_part2.mp4" path33 = r"C:\Users\hp\Desktop\priyasoftweb\person_tracker_id_frame_time\input\VPD_part11.mp4" path5 = r"C:\Users\hp\Desktop\priyasoftweb\person_tracker_id_frame_time\input\motor_bike.mp4" #image_pth = r"E:\softweb\ML\project\object_detection\data\living_room.jpg" # load the COCO class labels our YOLO model was trained on #image_pth = r"E:\softweb\ML\project\object_detection-transfer-learning-yolo-ssd-mobilenet\data\living_room.jpg" # load the COCO class labels our YOLO model was trained on labelsPath = r'E:\softweb\Pretrained_model\yolo_weights\coco.names' # derive the paths to the YOLO weights and model configuration weightsPath = r'E:\softweb\Pretrained_model\yolo_weights\yolov3.weights' configPath = r'E:\softweb\Pretrained_model\yolo_weights\yolov3.cfg' #centroid #tracker = CentroidTracker(maxDisappeared=80, maxDistance=90) tracker = CentroidTracker(maxDisappeared=50, maxDistance=50) LABELS = [] with open(labelsPath, "r") as f: LABELS = f.read().strip("\n").split("\n") # initialize a list of colors to represent each possible class label np.random.seed(42) COLORS = np.random.randint(0, 255, size=(len(LABELS), 3), dtype="uint8") net = cv2.dnn.readNetFromDarknet(configPath, weightsPath) #net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV) #net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU) #image =cv2.imread(image_pth)
def detect(save_img=False): source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith(('rtsp://', 'rtmp://', 'http://')) # Directories save_dir = Path( increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(opt.device) half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride) else: save_img = True dataset = LoadImages(source, img_size=imgsz, stride=stride) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] ct = CentroidTracker() # Run inference if device.type != 'cpu': model( torch.zeros(1, 3, imgsz, imgsz).to(device).type_as( next(model.parameters()))) # run once t0 = time.time() memory = {} people_counter = 0 detect_frame_num = 0 before = [] for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() #img_center_y = int(im0.shape[0]//2) #line = [(0,int(img_center_y*1.3)),(int(im0.shape[1]*0.55),int(img_center_y*1.3))] #cv2.line(im0,line[0],line[1],(0,0,255),5) # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy( ), dataset.count else: p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / 'labels' / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh index_id = [] previous = memory.copy() memory = {} boxes = [] if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results for *xyxy, conf, cls in reversed(det): xyxy_list = torch.tensor(xyxy).view(1, 4).view(-1).tolist() # center_x = int(np.mean([xyxy_list[0],xyxy_list[2]])) # center_y = int(np.mean([xyxy_list[1],xyxy_list[3]])) # cv.circle(im,(center_x)) xywh_list = xyxy2xywh(torch.tensor(xyxy).view( 1, 4)).view(-1).tolist() boxes.append(xywh_list) for box in boxes: (x, y) = (int(box[0]), int(box[1])) (w, h) = (int(box[2]), int(box[3])) if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if opt.save_conf else ( cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or view_img: # Add bbox to image label = f'{names[int(cls)]}' plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) #cv2.putText(im0,'Person : {}'.format(final_person_cnt),(130,100),cv2.FONT_HERSHEY_COMPLEX,1.0,(0,0,255),3) #cv2.putText(im0,'Car : {}'.format(final_car_cnt),(130,150),cv2.FONT_HERSHEY_COMPLEX,1.0,(0,0,255),3) # Print time (inference + NMS) print(f'{s}Done. ({t2 - t1:.3f}s)') # Stream results if view_img: cv2.imshow(str(p), im0) # Save results (image with detections) if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) else: # 'video' if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") print(f'Done. ({time.time() - t0:.3f}s)')
# Set up a list of class labels. There's a tensorflow method, # but in this case I'm just creating a list since there's only # 10 classes... Refactor later. See ./ssd_inc...1_29/labels.pbtxt # Upon further experimentation, this list seems to be out of order LABELS = [ 'Ferry', 'Buoy', 'Vessel/ship', 'Speed boat', 'Boat', 'Kayak', 'Sail boat', 'Swimming person', 'Flying bird/plane', 'Other', '????', 'dock?' ] # initialize a list of colors to represent each possible class label np.random.seed(38) COLORS = np.random.randint(0, 255, size=(len(LABELS), 3), dtype="uint8") # Initialize centroid tracker. Needed to match objects between frames. ct = CentroidTracker() # This is the main loop that processes frames from video cameras # or input video. For a camera, use: # cap = cv2.VideoCapture(**camera number**) # To test, I recomend using a video from the Singapore Maritime dataset. # Files are too large for github, so must be downloaded independently. #cap = cv2.VideoCapture("testvid.mp4") cap = cv2.VideoCapture("MVI_0797_VIS_OB.avi") while (True): # Input image ret, img = cap.read() if not ret: break
from shapely.geometry import Point from shapely.geometry.polygon import Polygon # Initialize the parameters confThreshold = 0.3 #Confidence threshold nmsThreshold = 0.4 #Non-maximum suppression threshold inpWidth = 416 #Width of network's input image inpHeight = 416 #Height of network's input image parser = argparse.ArgumentParser( description='Object Detection using YOLO in OPENCV') parser.add_argument('--image', help='Path to image file.') parser.add_argument('--video', help='Path to video file.') args = parser.parse_args() #initialize centroid tracker and frame dimensions ct = CentroidTracker() (H, W) = (None, None) # Load names of classes classesFile = "coco.names" classes = None with open(classesFile, 'rt') as f: classes = f.read().rstrip('\n').split('\n') # Give the configuration and weight files for the model and load the network using them. modelConfiguration = "yolov3.cfg" modelWeights = "yolov3.weights" net = cv.dnn.readNetFromDarknet(modelConfiguration, modelWeights) net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV) net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU) #initialize variable for use in count_frames_manualred
def run_inference(model, category_index, cap, labels, roi_position=0.6, threshold=0.5, x_axis=True, skip_frames=20, save_path='', show=True): counter = [0, 0, 0, 0] # left, right, up, down total_frames = 0 ct = CentroidTracker(maxDisappeared=40, maxDistance=50) trackers = [] trackableObjects = {} # Check if results should be saved if save_path: width = int(cap.get(3)) height = int(cap.get(4)) fps = cap.get(cv2.CAP_PROP_FPS) out = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc('M','J','P','G'), fps, (width, height)) while cap.isOpened(): ret, image_np = cap.read() if not ret: break height, width, _ = image_np.shape rgb = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB) status = "Waiting" rects = [] if total_frames % skip_frames == 0: status = "Detecting" trackers = [] # Actual detection. output_dict = run_inference_for_single_image(model, image_np) for i, (y_min, x_min, y_max, x_max) in enumerate(output_dict['detection_boxes']): if output_dict['detection_scores'][i] > threshold and (labels == None or category_index[output_dict['detection_classes'][i]]['name'] in labels): tracker = dlib.correlation_tracker() rect = dlib.rectangle(int(x_min * width), int(y_min * height), int(x_max * width), int(y_max * height)) tracker.start_track(rgb, rect) trackers.append(tracker) else: status = "Tracking" for tracker in trackers: # update the tracker and grab the updated position tracker.update(rgb) pos = tracker.get_position() # unpack the position object x_min, y_min, x_max, y_max = int(pos.left()), int(pos.top()), int(pos.right()), int(pos.bottom()) # add the bounding box coordinates to the rectangles list rects.append((x_min, y_min, x_max, y_max)) objects = ct.update(rects) for (objectID, centroid) in objects.items(): to = trackableObjects.get(objectID, None) if to is None: to = TrackableObject(objectID, centroid) else: if x_axis and not to.counted: x = [c[0] for c in to.centroids] direction = centroid[0] - np.mean(x) if centroid[0] > roi_position*width and direction > 0: counter[1] += 1 to.counted = True elif centroid[0] < roi_position*width and direction < 0: counter[0] += 1 to.counted = True elif not x_axis and not to.counted: y = [c[1] for c in to.centroids] direction = centroid[1] - np.mean(y) if centroid[1] > roi_position*height and direction > 0: counter[3] += 1 to.counted = True elif centroid[1] < roi_position*height and direction < 0: counter[2] += 1 to.counted = True to.centroids.append(centroid) trackableObjects[objectID] = to text = "ID {}".format(objectID) cv2.putText(image_np, text, (centroid[0] - 10, centroid[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2) cv2.circle(image_np, (centroid[0], centroid[1]), 4, (255, 255, 255), -1) # Draw ROI line if x_axis: cv2.line(image_np, (int(roi_position*width), 0), (int(roi_position*width), height), (0xFF, 0, 0), 5) else: cv2.line(image_np, (0, int(roi_position*height)), (width, int(roi_position*height)), (0xFF, 0, 0), 5) # display count and status font = cv2.FONT_HERSHEY_SIMPLEX if x_axis: cv2.putText(image_np, f'Left: {counter[0]}; Right: {counter[1]}', (10, 35), font, 0.8, (0, 0xFF, 0xFF), 2, cv2.FONT_HERSHEY_SIMPLEX) else: cv2.putText(image_np, f'Up: {counter[2]}; Down: {counter[3]}', (10, 35), font, 0.8, (0, 0xFF, 0xFF), 2, cv2.FONT_HERSHEY_SIMPLEX) cv2.putText(image_np, 'Status: ' + status, (10, 70), font, 0.8, (0, 0xFF, 0xFF), 2, cv2.FONT_HERSHEY_SIMPLEX) if show: cv2.imshow('cumulative_object_counting', image_np) if cv2.waitKey(25) & 0xFF == ord('q'): break if save_path: out.write(image_np) total_frames += 1 cap.release() if save_path: out.release() cv2.destroyAllWindows()