def detection_gpu(self, model, frame, cnt): # print('Detecting...') start_time = time.time() frame_tensor = cv_image2tensor(frame, self.input_size).unsqueeze(0) frame_tensor = Variable(frame_tensor) if self.cuda: frame_tensor = frame_tensor.cuda() detections = self.model(frame_tensor, self.cuda).cpu() detections = process_result(detections, self.confidence, self.nms_thresh) print("number of detected objects: ", len(detections)) a = [[] for _ in range(len(detections))] if len(detections) != 0: detections = transform_result(detections, [frame], self.input_size) # for detection in detections: for idx, detection in enumerate(detections): a[idx].append(float(detection[6])) # prediction score a[idx].append(self.classes[int(detection[-1])]) # prediction class a[idx].append(int(detection[1])) # x1 a[idx].append(int(detection[2])) # y1 a[idx].append(int(detection[3])) # x2 a[idx].append(int(detection[4])) # y2 # print(a) self.draw_bbox([frame], detection, self.colors, self.classes) # save frames if you need to. # cv2.imwrite('frame'+cnt+'.jpg', frame) end_time = time.time() print("[INFO] detection done. It took "+str(end_time-start_time)+" seconds") # self.logfile3.write(str(cnt)+"\t"+str(len(detections))+"\t"+str(a)+"\t"+str(end_time-start_time)+"\n") self.logfile3.write(str(cnt)+"\t"+str(len(detections))+"\t"+str(a)+"\t"+str(end_time-start_time)+"\n")
def detect_image(model, args): print('Loading input image(s)...') input_size = [int(model.net_info['height']), int(model.net_info['width'])] batch_size = 1 #int(model.net_info['batch']) imlist, imgs = load_images(args.input) print('Input image(s) loaded') print("Loaded {} images".format(len(imgs))) img_batches = create_batches(imgs, batch_size) # load colors and classes colors = pkl.load(open("pallete", "rb")) classes = load_classes("cfg/garb.names") if not osp.exists(args.outdir): os.makedirs(args.outdir) start_time = datetime.now() print('Detecting...') detected_trush = [] for batchi, img_batch in tqdm(enumerate(img_batches)): img_tensors = [cv_image2tensor(img, input_size) for img in img_batch] img_tensors = torch.stack(img_tensors) img_tensors = Variable(img_tensors) if args.cuda: img_tensors = img_tensors.cuda() detections = model(img_tensors, args.cuda).cpu() detections = process_result(detections, args.obj_thresh, args.nms_thresh) detected_trush.append(len(detections)) if len(detections) == 0: continue detections = transform_result(detections, img_batch, input_size) for detection in detections: draw_bbox(img_batch, detection, colors, classes, 0, args.outdir) for i, img in enumerate(img_batch): save_path = osp.join(args.outdir, osp.basename(imlist[batchi * batch_size + i])) cv2.imwrite(save_path, img) print(save_path, 'saved') end_time = datetime.now() print('Detection finished in %s' % (end_time - start_time)) return detected_trush
def detect_image(model, args): print('Loading input image(s)...') input_size = [int(model.net_info['height']), int(model.net_info['width'])] batch_size = int(model.net_info['batch']) imlist, imgs = load_images(args.input) print('Input image(s) loaded') img_batches = create_batches(imgs, batch_size) # load colors and classes colors = pkl.load( open( "/home/autonomous-car/Desktop/Autonomous_car_ros2/src/stereo_yolo3/stereo_yolo3/pallete", "rb")) classes = load_classes( "/home/autonomous-car/Desktop/Autonomous_car_ros2/src/stereo_yolo3/stereo_yolo3/data/coco.names" ) if not osp.exists(args.outdir): os.makedirs(args.outdir) start_time = datetime.now() print('Detecting...') for batchi, img_batch in enumerate(img_batches): img_tensors = [cv_image2tensor(img, input_size) for img in img_batch] img_tensors = torch.stack(img_tensors) img_tensors = Variable(img_tensors) if args.cuda: img_tensors = img_tensors.cuda() detections = model(img_tensors, args.cuda).cpu() detections = process_result(detections, args.obj_thresh, args.nms_thresh) if len(detections) == 0: continue detections = transform_result(detections, img_batch, input_size) for detection in detections: draw_bbox(img_batch, detection, colors, classes) for i, img in enumerate(img_batch): save_path = osp.join( args.outdir, 'dete_' + osp.basename(imlist[batchi * batch_size + i])) cv2.imwrite(save_path, img) end_time = datetime.now() print('Detection finished in %s' % (end_time - start_time)) return
def detect_image(self,path,colors=[(39, 129, 113), (164, 80, 133), (83, 122, 114)],classes=['container_small', 'garbage_bag', 'cardboard']): print('Loading input image(s)...') input_size = [int(self.model.net_info['height']), int(self.model.net_info['width'])] batch_size = int(self.model.net_info['batch']) imlist, imgs = load_images(path) print('Input image(s) loaded') img_batches = create_batches(imgs, batch_size) start_time = datetime.now() print('Detecting...') all_images_attributes = [] for batchi, img_batch in enumerate(img_batches): img_tensors = [cv_image2tensor(img, input_size) for img in img_batch] img_tensors = torch.stack(img_tensors) img_tensors = Variable(img_tensors) if self.cuda: img_tensors = img_tensors.cuda() detections = self.model(img_tensors, self.cuda).cpu() detections = process_result(detections, self.obj_thresh, self.nms_thresh) if len(detections) == 0: continue detections = transform_result(detections, img_batch, input_size) boxes = [] for detection in detections: boxes.append(create_output_json(img_batch, detection, colors, classes)) images_attributes = {} images_attributes['image_meta'] = {'width':input_size[1],'height':input_size[0]} images_attributes['boxes'] = boxes images_attributes['counts'] = {x:0 for x in classes} images_attributes['counts']['total'] = 0 for box in boxes: images_attributes['counts'][box['class']] +=1 images_attributes['counts']['total'] +=1 all_images_attributes.append(images_attributes) end_time = datetime.now() print('Detection finished in %s' % (end_time - start_time)) return all_images_attributes
def detect_image(model, fname, i): print('Loading input image(s)...') input_size = [int(model.net_info['height']), int(model.net_info['width'])] batch_size = int(model.net_info['batch']) imlist, imgs = load_images(fname) print('Input image(s) loaded') img_batches = create_batches(imgs, batch_size) # load colors and classes colors = pkl.load(open("pallete", "rb")) classes = load_classes("darknet/cfg/trash.names") start_time = datetime.now() print('Detecting...') for batchi, img_batch in zip((1, 2), (3, 4)): # img_tensors = [cv_image2tensor(img, input_size) for img in rangeimg_batch] # img_tensors = torch.stack(img_tensors) # img_tensors = Variable(img_tensors) return model.new_detect(batchi, i) detections = model(img_tensors, False).cpu() detections = process_result(detections, 100, 102) if len(detections) == 0: continue detections = transform_result(detections, img_batch, input_size) for detection in detections: draw_bbox(img_batch, detection, colors, classes, 0, args.outdir) for i, img in enumerate(img_batch): save_path = osp.join(args.outdir, osp.basename(imlist[batchi * batch_size + i])) cv2.imwrite(save_path, img) print(save_path, 'saved') end_time = datetime.now() print('Detection finished in %s' % (end_time - start_time)) return
def detection_gpu_return(self, model, frame, cnt): # print('Detecting...') personcnt =0 start_time = time.time() frame_tensor = cv_image2tensor(frame, self.input_size).unsqueeze(0) frame_tensor = Variable(frame_tensor) if self.cuda: frame_tensor = frame_tensor.cuda() detections = self.model(frame_tensor, self.cuda).cpu() detections = process_result(detections, self.confidence, self.nms_thresh) if len(detections) != 0: detections = transform_result(detections, [frame], self.input_size) # for detection in detections: for idx, detection in enumerate(detections): if(self.classes[int(detection[-1])]=="person"): personcnt +=1 print(personcnt) return personcnt
def detect_frame(model, frame): input_size = [int(model.net_info['height']), int(model.net_info['width'])] colors = pkl.load(open("pallete", "rb")) classes = load_classes("data/coco.names") colors = [colors[1]] frame_tensor = cv_image2tensor(frame, input_size).unsqueeze(0) frame_tensor = Variable(frame_tensor) frame_tensor = frame_tensor.cuda() detections = model(frame_tensor, True).cpu() #print(detections.shape) #processresult changes the variable 'detections' detections = process_result(detections, 0.5, 0.4) cls_conf = detections[:, 6].cpu().data.numpy() cls_ids = detections[:, 7].cpu().data.numpy() #print(detections) # print(cls_conf.cpu().data.numpy(), "\n",cls_ids.cpu().data.numpy(),"\n" ,detections) # print('Getting here') if len(detections) != 0: detections = transform_result(detections, [frame], input_size) xywh = detections[:, 1:5] xywh[:, 0] = (detections[:, 1] + detections[:, 3]) / 2 xywh[:, 1] = (detections[:, 2] + detections[:, 4]) / 2 # TODO: width and hieght are double what they're supposed to be and dunno why xywh[:, 2] = abs(detections[:, 3] - detections[:, 1]) * 2 xywh[:, 3] = abs(detections[:, 2] - detections[:, 4]) * 2 xywh = xywh.cpu().data.numpy( ) #-> THe final bounding box that can be replaced in the deepSort ###################################################### #print(xywh) return xywh, cls_conf, cls_ids
def detect_video(model): kinect = PyKinectRuntime.PyKinectRuntime(PyKinectV2.FrameSourceTypes_Depth) input_size = [int(model.net_info['height']), int(model.net_info['width'])] colors = pkl.load(open("yolov3-master\pallete", "rb")) classes = load_classes("yolov3-master\data\coco.names") colors = [colors[1]] # cap is the video captured by camera # for surface 0 is front 1 is back 2 is kinect cap = cv2.VideoCapture(1 + cv2.CAP_DSHOW) width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int( cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = cap.get(cv2.CAP_PROP_FPS) #print(fps) read_frames = 0 start_time = datetime.now() #print('Start Detect') #set times as 1 #num=0 #sample array #s=[[] for i in range(20)] #s=[] #!while loop! locationx = [] locationy = [] locationd = [] while cap.isOpened(): #print('Detecting') retflag, frame = cap.read() ''' frame need to be corpped and then resize to 512*424 ''' cv2.circle(frame, (960, 540), 5, [0, 255, 255], thickness=-1) #2 feet-41 #3 feet-41 #4 feet-37 #x=154 y = 0 h = 1080 #h=1272 #w=1611 w = 1920 / 84.1 * 70.6 w = int(w) x = (1920 - w) / 2 + 140 x = int(x) dim = (512, 380) frame = frame[y:y + h, x:x + w] frame = cv2.resize(frame, dim) read_frames += 1 if retflag: ''' get depth frame ''' if kinect.has_new_depth_frame(): Dframe = kinect.get_last_depth_frame() frameD = kinect._depth_frame_data Dframe = Dframe.astype(np.uint8) #print(frame) Dframe = np.reshape(Dframe, (424, 512)) dx = 0 dy = 22 dh = 380 dw = 512 dim = (512, 380) Dframe = Dframe[dy:dy + dh, dx:dx + dw] frame = cv2.resize(frame, dim) Dframe = cv2.cvtColor(Dframe, cv2.COLOR_GRAY2RGB) def click_event(event, x, y, flags, param): if event == cv2.EVENT_RBUTTONDOWN: print(x, y) if event == cv2.EVENT_LBUTTONDOWN: Pixel_Depth = frameD[(((22 + y) * 512) + x)] print("x ", x, "y ", y, "Depth", Pixel_Depth) ''' get RGB frame ''' frame_tensor = cv_image2tensor(frame, input_size).unsqueeze(0) frame_tensor = Variable(frame_tensor) #if torch.cuda.is_available: frame_tensor = frame_tensor.cuda() detections = model(frame_tensor, True).cpu() #orange order #flag0=0 flag1 = 0 detections = process_result(detections, 0.5, 0.4) if len(detections) != 0: #3.3 global flag print(flag) time.sleep() detections = transform_result(detections, [frame], input_size) num = len(detections) for detection in detections: Label = int(detection[-1]) #flag=flag+1 if Label == 49: flag1 = flag1 + 1 #detection=[] center = draw_bbox([frame], detection, colors, classes) #print(Label) #print('cc is',center) Dcenter = draw_bbox([Dframe], detection, colors, classes) #import k #k=0.105 #x,y,d=get_depth(center,kinect,k) #redraw the boundary box img = Dframe nx, x, y, d = get_depth(center, kinect) cv2.circle(img, (nx, y), 5, [0, 0, 255], thickness=-1) #send to robot x = center[0] y = center[1] x = x - 512 / 2 + 40 y = y - 318 / 2 - 35 y = -y x = -x #position print("x ", x, "y ", y, "d ", d) l = len(locationx) #if True: #pick up the orange #for the nth orange(n==flag) if flag1 == flag: #approciate depth if d > 500 and d < 1050: locationx.append(x) locationy.append(y) locationd.append(d) print(locationx) print(l) if l > 6: x1 = locationx[l - 6:l - 1] y1 = locationy[l - 6:l - 1] d1 = locationd[l - 6:l - 1] diff = np.var(x1) + np.var(y1) + np.var(d1) print(x1) print(y1) print(d1) print(diff) # less fluctuation if diff < 20: print("get position") x = np.average(x1) y = np.average(y1) d = np.average(d1) #scale x,y k1 = 1.48 x = x * k1 * d / 512 y = y * k1 * d / 318 * 0.72 #x=x*k1 #y=y*k1 #actual position print(flag, x, y, d) #send to raspbaerry pi flag = ClientSocket(num, flag, x, y, d) #reset the data locationx = [] locationy = [] locationd = [] #choose stable on as result #sample function() #num=num+1 cv2.imshow('RGBFrame', frame) cv2.imshow('DepthFrame', Dframe) #print("x: ", x ,"y: ", y) #print("_______________________") if read_frames % 30 == 0: print('Number of frames processed:', read_frames) #print('average FPS',float(read_frames/datetime.now())) #if flag0: #locationx=[] #locationy=[] #locationd=[] if not False and cv2.waitKey(1) & 0xFF == ord('q'): break else: break end_time = datetime.now() print('Detection finished in %s' % (end_time - start_time)) print('Total frames:', read_frames) cap.release() cv2.destroyAllWindows() return
def detect_video(model, args): input_size = [int(model.net_info['height']), int(model.net_info['width'])] #!2020.1.5 change direction colors = pkl.load(open("yolov3-master\pallete", "rb")) classes = load_classes("yolov3-master\data\coco.names") colors = [colors[1]] #1.30 if args.webcam: cap = cv2.VideoCapture(int(args.input)+ cv2.CAP_DSHOW) #0 is front cam, 1 is back cam output_path = osp.join(args.outdir, 'det_webcam.avi') else: cap = cv2.VideoCapture(args.input) output_path = osp.join(args.outdir, 'det_' + osp.basename(args.input).rsplit('.')[0] + '.avi') #width height width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) print("wid: ",width,"hei:", height) #2020.1.10 FPS fps = cap.get(cv2.CAP_PROP_FPS) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(output_path, fourcc, fps, (width, height)) read_frames = 0 start_time = datetime.now() print('Start Detect') #!while loop! while cap.isOpened(): ##print('Detecting') #print(fps) retflag, frame = cap.read() dim = (512, 424) frame=cv2.resize(frame,dim,interpolation = cv2.INTER_AREA) print(frame.shape) read_frames += 1 if retflag: frame_tensor = cv_image2tensor(frame, input_size).unsqueeze(0) frame_tensor = Variable(frame_tensor) if args.cuda: frame_tensor = frame_tensor.cuda() detections = model(frame_tensor, args.cuda).cpu() detections = process_result(detections, args.obj_thresh, args.nms_thresh) if len(detections) != 0: detections = transform_result(detections, [frame], input_size) for detection in detections: draw_bbox([frame], detection, colors, classes) #1.30 #if not args.no_show: #cv2.imshow('frame', frame) cv2.imshow('frame',frame) out.write(frame) if read_frames % 30 == 0: print('Number of frames processed:', read_frames) if not args.no_show and cv2.waitKey(1) & 0xFF == ord('q'): break else: break end_time = datetime.now() print('Detection finished in %s' % (end_time - start_time)) print('Total frames:', read_frames) cap.release() out.release() if not args.no_show: cv2.destroyAllWindows() print('Detected video saved to ' + output_path) return
def detect_video(model, args): # draw_bbox([frame], detection, colors, classes) input_size = [int(model.net_info['height']), int(model.net_info['width'])] colors = pkl.load(open("pallete", "rb")) classes = load_classes("data/coco.names") colors = [colors[1]] if args.webcam: cap = cv2.VideoCapture(int(args.input)) output_path = osp.join(args.outdir, 'det_webcam.avi') else: cap = cv2.VideoCapture(args.input) output_path = osp.join( args.outdir, 'det_' + osp.basename(args.input).rsplit('.')[0] + '.avi') width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int( cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = cap.get(cv2.CAP_PROP_FPS) fourcc = cv2.VideoWriter_fourcc(*'XVID') #TODO: Change output path? out = cv2.VideoWriter("output.avi", fourcc, fps, (width, height)) read_frames = 0 mot_tracker = Sort() start_time = datetime.now() print('Detecting...') while cap.isOpened(): retflag, frame = cap.read() read_frames += 1 if retflag: frame_tensor = cv_image2tensor(frame, input_size).unsqueeze(0) frame_tensor = Variable(frame_tensor) if args.cuda: frame_tensor = frame_tensor.cuda() detections = model(frame_tensor, args.cuda).cpu() #print(detections.shape) #processresult changes the variable 'detections' # print(detections) #print(args.obj_thresh) #print(args.nms_thresh) detections = process_result(detections, 0.5, 0.4) cls_confs = detections[:, 6].cpu().data.numpy() cls_ids = detections[:, 7].cpu().data.numpy() if len(detections) != 0: detections = transform_result(detections, [frame], input_size) #for detection in detections: xywh = detections[:, 1:5] ''' xywh[:, 0] = (detections[:, 1] + detections[:, 3]) / 2 xywh[:, 1] = (detections[:, 2] + detections[:, 4]) / 2 # TODO: width and hieght are double what they're supposed to be and dunno why xywh[:, 2] = abs(detections[:, 3] - detections[:, 1]) *2 xywh[:, 3] = abs(detections[:, 2] - detections[:, 4]) *2 ''' xywh = xywh.cpu().data.numpy( ) #-> THe final bounding box that can be replaced in the deepSort ###################################################### #print(xywh.shape) #print(cls_confs.shape) #num_dets, temp = xywh.shape #Convert to MOT format xs = xywh[:, 0] ys = xywh[:, 1] ws = xywh[:, 2] hs = xywh[:, 3] #new_xs = xs - ws/2 #new_ys = ys - hs/2 MOT16_bbox = np.empty((0, 5)) for cls_id, cls_conf, x, y, w, h in zip( cls_ids, cls_confs, xs, ys, ws, hs): #MOT16_temp = [read_frames, cls_id, x, y, w, h, cls_conf, -1, -1, -1] MOT16_temp = [x, y, w, h, cls_conf] np.set_printoptions(precision=2, linewidth=150) MOT16_bbox = np.append(MOT16_bbox, [MOT16_temp], axis=0) """ for i in range(num_dets): # what exactly is read_frames MOT16_temp = [xywh[i][0], xywh[i][1], xywh[i][2], xywh[i][3]] """ #print("bboxinput: ", MOT16_bbox) tracking_boxes = mot_tracker.update(MOT16_bbox) #print("output: ", tracking_boxes) #print("-------------------NEW BOX-------------------------") for tracking_box in tracking_boxes: #print(tracking_box) draw_mot_bbox(frame, torch.from_numpy(tracking_box), colors, classes) #print("------------------END BOX--------------------------") out.write(frame) if read_frames % 30 == 0: print('Number of frames processed:', read_frames) else: break end_time = datetime.now() print('Detection finished in %s' % (end_time - start_time)) print('Total frames:', read_frames) # print('MOT16_bbox: \n', MOT16_bbox) cap.release() out.release() print('Detected video saved to ' + output_path) return
def detect_image(model, args): countbin = 0 countgar = 0 counttrash = 0 a=[] print('Loading input image(s)...') input_size = [int(model.net_info['height']), int(model.net_info['width'])] batch_size = int(model.net_info['batch']) imlist, imgs = load_images(args.input) print('Input image(s) loaded') img_batches = create_batches(imgs, batch_size) # load colors and classes colors = pkl.load(open("pallete", "rb")) classes = load_classes("cfg/obj.names") if not osp.exists(args.outdir): os.makedirs(args.outdir) start_time = datetime.now() print('Detecting...') for batchi, img_batch in enumerate(img_batches): img_tensors = [cv_image2tensor(img, input_size) for img in img_batch] img_tensors = torch.stack(img_tensors) img_tensors = Variable(img_tensors) if args.cuda: img_tensors = img_tensors.cuda() detections = model(img_tensors, args.cuda).cpu() detections = process_result(detections, args.obj_thresh, args.nms_thresh) if len(detections) == 0: continue detections = transform_result(detections, img_batch, input_size) for detection in detections: draw_bbox(img_batch, detection, colors, classes) #la = classes[int(detection[-1])] #if la == 'bin': #countbin = countbin +1 #if (la == 'plastic_bag' ): #countgar = countgar +1 #if (la == 'trash'): #counttrash = counttrash+1 if (countgar >= 3 or counttrash >= 5): p1 = tuple(detection[1:3].int()) p2 = tuple(detection[3:5].int()) combined_array=np.append(p1, p2) combined_list=combined_array.tolist() a.append(combined_list) result=cv2.groupRectangles(a,1,0.85) for (x,y,w,h) in result[0]: img = img_batch[int(detection[0])] #print("---------Sum Amount-----------" ) #print("bin = " , countbin) #print("plastic_bag = " , countgar) #print("counttrash = " , counttrash) for i, img in enumerate(img_batch): save_path = osp.join(args.outdir, 'det_' + osp.basename(imlist[batchi*batch_size + i])) cv2.imwrite(save_path, img) print(save_path, 'saved') end_time = datetime.now() print('Detection finished in %s' % (end_time - start_time)) return
def detect_video(model, args): input_size = [int(model.net_info['height']), int(model.net_info['width'])] mot_tracker = Sort(min_hits=args.min_hits, max_age=args.max_age) colors = pkl.load(open("cfg/pallete", "rb")) classes = load_classes("cfg/coco.names") # TODO: Turn this into an external config file (relevant classes and mapping) relevant_classes = [ "boat" ] relevant_classes_indices = [classes.index(cls) for cls in relevant_classes] # If you want to merge classes together class_mapping = { classes.index("boat"): [classes.index(cls) for cls in []] } if not osp.isdir(args.outdir): os.mkdir(args.outdir) if args.webcam: cap = cv2.VideoCapture(int(args.input)) output_path = osp.join(args.outdir, 'det_webcam.avi') else: cap = cv2.VideoCapture(args.input) output_path = osp.join(args.outdir, 'det_' + osp.basename(args.input).rsplit('.')[0] + '.avi') width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) fps = cap.get(cv2.CAP_PROP_FPS) fourcc = cv2.VideoWriter_fourcc(*'mp4v') output_path = output_path.replace('.avi', '.mp4') out = cv2.VideoWriter(output_path, fourcc, fps, (width, height)) read_frames = 0 start_time = datetime.now() print('Detecting...') lod = [] while cap.isOpened(): retflag, frame = cap.read() read_frames += 1 if retflag: frame_tensor = cv_image2tensor(frame, input_size).unsqueeze(0) frame_tensor = Variable(frame_tensor) if args.cuda: frame_tensor = frame_tensor.cuda() detections = model(frame_tensor, args.cuda).cpu() detections = process_result(detections, args.obj_thresh, args.nms_thresh, relevant_classes_indices, class_mapping) if len(detections) != 0: detections = transform_result(detections, [frame], input_size) if len(detections) == 0: tracked_objects = mot_tracker.update(detections.cpu().detach().numpy()) else: tracked_objects = mot_tracker.update(detections[:, 1:].cpu().detach().numpy()) if args.debug_trackers: for n, tracker in enumerate(mot_tracker.trackers): tracker_bb = tracker.predict()[0] tracker_id = tracker.id draw_simple_bbox(frame, tracker_bb, f"{tracker_id}") else: if len(tracked_objects) != 0: for obj in tracked_objects: bbox = obj[:4] uid = int(obj[4]) cls_ind = int(obj[5]) draw_bbox([frame], bbox, uid, cls_ind, colors, classes) lod.append(format_output(bbox, uid, cls_ind, classes, read_frames, output_path, fps)) if not args.no_show: cv2.imshow('frame', frame) out.write(frame) if read_frames % 30 == 0: print(f'Frames processed: {read_frames/total_frames*100:0.2f}%') if not args.no_show and cv2.waitKey(1) & 0xFF == ord('q'): break else: break end_time = datetime.now() print(f'Detection finished in {end_time - start_time}') print('Total frames:', read_frames) cap.release() out.release() if not args.no_show: cv2.destroyAllWindows() print('Detected video saved to ' + output_path) name = output_path.replace('.mp4', '.csv') pd.DataFrame(lod).to_csv(name, index=False) print('Detected meta data saved as ' + name)
def detect_video(model, args): input_size = [int(model.net_info['height']), int(model.net_info['width'])] colors = pkl.load( open( "/home/autonomous-car/Desktop/Autonomous_car_ros2/src/stereo_yolo3/stereo_yolo3/pallete", "rb")) classes = load_classes( "/home/autonomous-car/Desktop/Autonomous_car_ros2/src/stereo_yolo3/stereo_yolo3/data/coco.names" ) colors = [colors[1]] if args.webcam: cap1 = cv2.VideoCapture(int(args.input)) cap2 = cv2.VideoCapture(int(args.input) + 2) output_path = osp.join(args.outdir, 'dete_webcam.avi') else: cap1 = cv2.VideoCapture(int(args.input)) cap2 = cv2.VideoCapture(int(args.input) + 2) output_path = osp.join( args.outdir, 'dete_' + osp.basename(args.input).rsplit('.')[0] + '.avi') cap1.set(3, desired_width) cap1.set(4, desired_height) cap2.set(3, desired_width) cap2.set(4, desired_height) width, height = int(cap1.get(3)) * 2, int(cap1.get(4)) fps = cap1.get(cv2.CAP_PROP_FPS) print(width) print(height) print('Detecting...') while cap1.isOpened(): retflag, frame1 = cap1.read() retflag, frame2 = cap2.read() frame = np.concatenate((frame1, frame2), axis=1) # edge detection edge = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) edge = cv2.Canny(edge, 100, 200) if retflag: frame_tensor = cv_image2tensor(frame, input_size).unsqueeze(0) frame_tensor = Variable(frame_tensor) if args.cuda: frame_tensor = frame_tensor.cuda() detections = model(frame_tensor, args.cuda).cpu() detections = process_result(detections, args.obj_thresh, args.nms_thresh) if len(detections) != 0: detections = transform_result(detections, [frame], input_size) for detection in detections: draw_bbox([frame], detection, colors, classes) if not args.no_show: cv2.imshow('frame', frame) gray_scale = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) stereo_image_pub.image_pub(gray_scale, edge) if not args.no_show and cv2.waitKey(1) & 0xFF == ord('q'): break else: break cap1.release() cap2.release() if not args.no_show: cv2.destroyAllWindows() return
def image_dequeue_proc(self): framecnt = 0 endcnt =0 # if idle for 2 minutes, save and quit. frame_start_time = time.time() self.encode_param = [int(cv2.IMWRITE_JPEG_QUALITY),99] while (True): if (self.imgq.empty()): if(endcnt >= 120): self.logfile.close() self.logfile2.close() print("byebye") sys.exit(0) else: print('nothing in q, sleeping..') time.sleep(1) endcnt += 1 else: # lets get 10 to speed up the process. not now though... device_name = self.dev_nameq.get() counter = int(self.framecntq.get()) frame = self.imgq.get() ty = self.typeq.get() if self.cuda: if ty == 'img': # just detection, nothing else self.detection_gpu(self.model, frame, cnt) elif ty == 'img_tracking':# for tracking self.width = frame.shape[0] self.height = frame.shape[1] rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) positions = [] if int(counter) % self.frame_skip == 0: self.trackers = [] frame_tensor = cv_image2tensor(frame, self.input_size).unsqueeze(0) frame_tensor = Variable(frame_tensor) if self.cuda: frame_tensor = frame_tensor.cuda() detections = self.model(frame_tensor, self.cuda).cpu() detections = process_result(detections, self.confidence, self.nms_thresh) if len(detections) != 0: detections = transform_result(detections, [frame], self.input_size) #for detection in detections: for idx, detection in enumerate(detections): if (self.classes[int(detection[-1])]=="person"): if float(detection[6]) > self.confidence: print("foound!") pre_score = (float(detection[6])) # prediction score pre_class = (self.classes[int(detection[-1])]) # prediction class pre_x1 = (int(detection[1])) # x1 pre_y1 = (int(detection[2])) # y1 pre_x2 = (int(detection[3])) # x2 pre_y2 = (int(detection[4])) # y2 #self.draw_bbox([frame], detection, self.colors, self.classes) tracker = dlib.correlation_tracker() rect = dlib.rectangle(pre_x1, pre_y1, pre_x2, pre_y2) tracker.start_track(rgb, rect) self.trackers.append(tracker) else: for tracker in self.trackers: tracker.update(rgb) pos = tracker.get_position() startX = int(pos.left()) startY = int(pos.top()) endX = int(pos.right()) endY = int(pos.bottom()) positions.append((startX, startY, endX, endY)) objects = self.ct.update(positions) for (objectID, centroid) in objects.items(): to = self.trobs.get(objectID, None) #self.ct.getall() if (self.ct.checknumberofexisting()): self.sumframebytes+=sys.getsizeof(frame) #self.ct.predict(objectID, 30) if to == None: to = trackableobject.TrackableObject(objectID, centroid) else: cv2.circle(frame, (centroid[0],centroid[1]),4,(255,255,255),-1) y = [c[1] for c in to.centroids] x = [c[0] for c in to.centroids] dirY = centroid[1] - np.mean(y) dirX = centroid[0] - np.mean(x) to.centroids.append(centroid) if not to.counted: if (self.tr == "dr"): prex, prey = self.ct.predict(objectID, 30) print("predicted obj movement..x,y: ", prex, prey) if(self.checkboundary_dir(prex, prey)=="R"): print("we need to send msg to right") p = self.ct.get_object_rect_by_id(objectID) # x1, y1, x2, y2 if (p[0]<=0 or p[1] <= 0 or p[2] <= 0 or p[3] <=0): pass else: cv2.rectangle(frame, (p[0], p[1]), (p[2], p[3]), (255,0,0),2) croppedimg = frame[p[1]:p[3], p[0]:p[2]] jsonified_data = MessageBus.create_message_list_numpy_handoff(croppedimg, self.encode_param, device_name, self.timegap) #self.msg_bus.send_message_str(self.center_device_ip_int, self.center_device_port, jsonified_data) # sned mesg elif(self.checkboundary_dir(prex, prey)=="L"): print("we need to send msg to left") p = self.ct.get_object_rect_by_id(objectID) # x1, y1, x2, y2 if (p[0]<=0 or p[1] <= 0 or p[2] <= 0 or p[3] <=0): pass else: cv2.rectangle(frame, (p[0], p[1]), (p[2], p[3]), (255,0,0),2) croppedimg = frame[p[1]:p[3], p[0]:p[2]] print((p[0], p[1]), (p[2], p[3])) #cv2.imwrite(str(counter)+".jpg", croppedimg) jsonified_data = MessageBus.create_message_list_numpy_handoff(croppedimg, self.encode_param, device_name, self.timegap) #self.msg_bus.send_message_str(self.left_device_ip_int, self.left_device_port, jsonified_data) elif(self.checkboundary_dir(prex, prey)=="D"): print("we need to send msg to down") elif(self.checkboundary_dir(prex, prey)=="U"): print("we need to send msg to up") elif (self.tr == "bc"): self.checkboundary(centroid, objectID) #print("loc of object in frame: ", objectID, self.boundary[objectID]) self.checkdir(dirX, dirY, objectID) #print("moving direction of the object: ", objectID, self.objstatus[objectID]) self.where[objectID] = self.checkhandoff(objectID) # send hand off msg here if(self.where[objectID] == "RIGHT"): print("we need to send msg to right") #print("just throw in the cropped img template") #print("upon receiving the cropped img, do the template matching & add to tracking dlib queue") p = self.ct.get_object_rect_by_id(objectID) # x1, y1, x2, y2 #t = self.ct.objects[objectID] #centroid x, y #print("type p: ", type(p)) # rect #print("t: ", t) # centroid cv2.rectangle(frame, (p[0], p[1]), (p[2], p[3]), (255,0,0),2) #croppedimg = frame[y1:y2, x1: x2] croppedimg = frame[p[1]:p[3], p[0]:p[2]] jsonified_data = MessageBus.create_message_list_numpy_handoff(croppedimg, self.encode_param, device_name, self.timegap) self.msg_bus.send_message_str(self.center_device_ip_int, self.center_device_port, jsonified_data) elif (self.where[objectID]== "LEFT"): print("we need to send msg to left") p = self.ct.get_object_rect_by_id(objectID) # x1, y1, x2, y2 #t = self.ct.objects[objectID] #centroid x, y #print("type p: ", type(p)) # rect #print("t: ", t) # centroid cv2.rectangle(frame, (p[0], p[1]), (p[2], p[3]), (255,0,0),2) #croppedimg = frame[y1:y2, x1: x2] croppedimg = frame[p[1]:p[3], p[0]:p[2]] jsonified_data = MessageBus.create_message_list_numpy_handoff(croppedimg, self.encode_param, device_name, self.timegap) #jsonified_data = MessageBus.create_message_list_numpy_handoff(croppedimg, encode_param, device_name, self.timegap) self.msg_bus.send_message_str(self.right_device_ip_int, self.right_device_port, jsonified_data) elif (self.where[objectID]== "TOP"): print("we need to send msg to top") elif (self.where[objectID]== "BOTTOM"): print("we need to send msg to bottom") else: print("nothing is happinging") self.trobs[objectID] = to text = "ID {}".format(objectID) +" "+ str(centroid[0]) +" "+ str(centroid[1]) cv2.putText(frame, text, (centroid[0]-10, centroid[1]-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255),2) # ID of the object sendablecnt = 0 # cv2.imwrite('frame'+str(counter)+'.jpg', frame) if self.display == "on": cv2.imshow('NCS live inference', frame) if(cv2.waitKey(3) & 0xFF == ord('q')): break elif ty == 'img_tracking_check':# dummy here #thing = self.detection_gpu_return(self.model, frame, cnt) print('not imp') else: # no GPU enabled if ty == 'img': self.detection(frame) elif ty == 'img_tracking': print ("NOT IMP YET") # self.detection_tracking (self.model, self.imgq.get(), cnt) elif ty == 'img_tracking_check': print ("NOT IMP YET") # thing = self.detection_gpu_return(self.model, self.imgq.get(), cnt) curT = datetime.utcnow().strftime('%H:%M:%S.%f') # string format decay = datetime.strptime(curT, '%H:%M:%S.%f') - self.timerq.get() frame_end_time = time.time() cumlative_fps = int(counter) / (frame_end_time - frame_start_time) print("estimated dequeue speed: ", str(cumlative_fps)) # i don't think its needed here but in processing thread print(str(counter)+"\t"+str(device_name)+"\t"+str(decay.total_seconds()))
def detect_video(model, args): # draw_bbox([frame], detection, colors, classes) input_size = [int(model.net_info['height']), int(model.net_info['width'])] colors = pkl.load(open("pallete", "rb")) classes = load_classes("data/coco.names") colors = [colors[1]] if args.webcam: cap = cv2.VideoCapture(int(args.input)) output_path = osp.join(args.outdir, 'det_webcam.avi') else: cap = cv2.VideoCapture(args.input) output_path = osp.join( args.outdir, 'det_' + osp.basename(args.input).rsplit('.')[0] + '.avi') width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int( cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = cap.get(cv2.CAP_PROP_FPS) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(output_path, fourcc, fps, (width, height)) read_frames = 0 start_time = datetime.now() print('Detecting...') while cap.isOpened(): retflag, frame = cap.read() read_frames += 1 if retflag: frame_tensor = cv_image2tensor(frame, input_size).unsqueeze(0) frame_tensor = Variable(frame_tensor) if args.cuda: frame_tensor = frame_tensor.cuda() detections = model(frame_tensor, args.cuda).cpu() print(detections.shape) #processresult changes the variable 'detections' # print(detections) #print(args.obj_thresh) #print(args.nms_thresh) detections = process_result(detections, args.obj_thresh, args.nms_thresh) if len(detections) != 0: detections = transform_result(detections, [frame], input_size) for detection in detections: draw_bbox([frame], detection, colors, classes) #print(detection) xywh = detections[:, 1:5] xywh[:, 0] = (detections[:, 1] + detections[:, 3]) / 2 xywh[:, 1] = (detections[:, 2] + detections[:, 4]) / 2 # TODO: width and hieght are double what they're supposed to be and dunno why xywh[:, 2] = abs(detections[:, 3] - detections[:, 1]) #*2 xywh[:, 3] = abs(detections[:, 2] - detections[:, 4]) #*2 xywh = xywh.cpu().data.numpy( ) #-> THe final bounding box that can be replaced in the deepSort ###################################################### #print ("xy: \n", xywh) out.write(frame) if read_frames % 30 == 0: print('Number of frames processed:', read_frames) else: break end_time = datetime.now() print('Detection finished in %s' % (end_time - start_time)) print('Total frames:', read_frames) cap.release() out.release() print('Detected video saved to ' + output_path) return
def detect_video(model, args): input_size = [int(model.net_info['height']), int(model.net_info['width'])] colors = pkl.load(open("pallete", "rb")) classes = load_classes("data/coco.names") colors = [colors[1]] if args.webcam: cap = cv2.VideoCapture(int(args.input)) output_path = osp.join(args.outdir, 'det_webcam.avi') else: cap = cv2.VideoCapture(args.input) output_path = osp.join( args.outdir, 'det_' + osp.basename(args.input).rsplit('.')[0] + '.avi') width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int( cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = cap.get(cv2.CAP_PROP_FPS) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(output_path, fourcc, fps, (width, height)) read_frames = 0 start_time = datetime.now() print('Detecting...') while cap.isOpened(): retflag, frame = cap.read() read_frames += 1 if retflag: frame_tensor = cv_image2tensor(frame, input_size).unsqueeze(0) frame_tensor = Variable(frame_tensor) if args.cuda: frame_tensor = frame_tensor.cuda() detections = model(frame_tensor, args.cuda).cpu() detections = process_result(detections, args.obj_thresh, args.nms_thresh) if len(detections) != 0: detections = transform_result(detections, [frame], input_size) for detection in detections: draw_bbox([frame], detection, colors, classes) if not args.no_show: cv2.imshow('frame', frame) out.write(frame) if read_frames % 2 == 0: print('Number of frames processed:', read_frames) if not args.no_show and cv2.waitKey(1) & 0xFF == ord('q'): break else: break end_time = datetime.now() print('Detection finished in %s' % (end_time - start_time)) print('Total frames:', read_frames) cap.release() out.release() if not args.no_show: cv2.destroyAllWindows() print('Detected video saved to ' + output_path) return
def detect_video(model, args): input_size = [int(model.net_info['height']), int(model.net_info['width'])] colors = pkl.load(open("pallete", "rb")) classes = load_classes("data/coco.names") colors = [colors[1]] if args.webcam: cap = cv2.VideoCapture(int(args.input)) output_path = osp.join(args.outdir, 'gpu.avi') else: cap = cv2.VideoCapture(args.input) output_path = osp.join( args.outdir, 'det_' + osp.basename(args.input).rsplit('.')[0] + '.avi') width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int( cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = cap.get(cv2.CAP_PROP_FPS) cap.set(3, 1280) cap.set(4, 720) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(output_path, fourcc, fps, (width, height)) read_frames = 0 start_time = datetime.now() print('Detecting...') while cap.isOpened(): retflag, frame = cap.read() original_frame = frame read_frames += 1 if retflag: frame_tensor = cv_image2tensor(frame, input_size).unsqueeze(0) frame_tensor = Variable(frame_tensor) if args.cuda: frame_tensor = frame_tensor.cuda() detections = model(frame_tensor, args.cuda).cpu() detections = process_result(detections, args.obj_thresh, args.nms_thresh) if len(detections) != 0: detections = transform_result(detections, [frame], input_size) labels = [] for detection in detections: labels.append(classes[int(detection[-1])]) state = state_trigger(labels) if state == 0: for detection in detections: draw_bbox([frame], detection, colors, classes) cv2.imshow('frame', frame) elif state == 1: frame = cv2.cvtColor(original_frame, cv2.COLOR_BGR2GRAY) cv2.putText(frame, 'PHONE DETECTED', bottomLeftCornerOfText, font, fontScale, fontColor, lineType) for detection in detections: if classes[int(detection[-1])] == "cell phone": draw_bbox([frame], detection, colors, classes) cv2.imshow('frame', frame) #sleep(0.65) else: cv2.imshow('frame', original_frame) #if not args.no_show: #cv2.imshow('frame', frame) out.write(frame) if read_frames % 30 == 0: print('Number of frames processed:', read_frames) if not args.no_show and cv2.waitKey(1) & 0xFF == ord('q'): break else: break end_time = datetime.now() print('Detection finished in %s' % (end_time - start_time)) print('Total frames:', read_frames) cap.release() out.release() if not args.no_show: cv2.destroyAllWindows() print('Detected video saved to ' + output_path) return