def build_sot(self): # load config cfg.merge_from_file(self.args.config) cfg.CUDA = torch.cuda.is_available() device = torch.device('cuda' if cfg.CUDA else 'cpu') # create model model = ModelBuilder() # load model model.load_state_dict(torch.load(self.args.snapshot, map_location=lambda storage, loc: storage.cpu())) model.eval().to(device) # build tracker tracker = build_tracker(model) return tracker
def test_snapshot(epoch: int, snapshot: str, test_path: str): # model max_img = 8 model = ModelBuilder() data = torch.load(snapshot, map_location=lambda storage, loc: storage.cpu()) model.load_state_dict(data['state_dict']) model.eval().to(torch.device('cpu')) tracker = build_tracker(model) root = cfg.DATASET.COCO.ROOT cur_path = os.path.dirname(os.path.realpath(__file__)) root = os.path.join(cur_path, '../../', root) anno_path = os.path.join(root, '../', "val2017.json") with open(anno_path, 'r') as f: anno = json.load(f) anno = filter_zero(anno) dataset = os.path.join(root, "val2017") folder = random.choice(glob.glob(f"{dataset}/**")) zs = glob.glob(f"{folder}/*.z.jpg") xs = glob.glob(f"{folder}/*.x.jpg") zs = sorted(zs) xs = sorted(xs) xs = [(x, get_anno_from_img_path(anno, x)) for x in xs] for i in range(len(zs[:max_img])): z = cv2.imread(zs[i]) x_path, bbox = xs[i] x = cv2.imread(x_path) tracker.init_(z) cls, (x1, y1, x2, y2) = tracker.track(x) cv2.rectangle(x, (x1, y1), (x2, y2), (255, 0, 0), 2) a1, b1, a2, b2 = bbox cv2.rectangle(x, (a1, b1), (a2, b2), (0, 0, 255), 2) cv2.putText(x, 'Acc: ' + cls.astype('str'), (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36, 255, 12), 2) parent_dir = f"{test_path}/{os.path.basename(Path(zs[i]).parent)}" if not os.path.exists(parent_dir): os.makedirs(parent_dir) cv2.imwrite(f"{parent_dir}/{os.path.basename(x_path)}", x) cv2.imwrite(f"{parent_dir}/{os.path.basename(zs[i])}", z)
def __init__(self): self.init_rect = None self.pysot_pub = rospy.Publisher(config.TRACK_PUB_TOPIC, Int32MultiArray, queue_size=10) self.img_sub = rospy.Subscriber(config.IMAGE_SUB_TOPIC, Image, self.receive_frame_and_track) self.service = rospy.Service("init_rect", InitRect, self.set_init_rect) cfg.TRACK.TYPE = config.TRACK_TYPE cfg.merge_from_file(config.CONFIG_PATH) cfg.CUDA = torch.cuda.is_available() device = torch.device('cuda' if cfg.CUDA else 'cpu') model = ModelBuilder() model.load_state_dict( torch.load(config.MODEL_PATH, map_location=lambda storage, loc: storage.cpu())) model.eval().to(device) self.tracker = build_tracker(model)
def init_track(self): # 配置config文件 config_path = './models/siamrpn_alex_dwxcorr/config.yaml' # 配置snapshot 文件 snapshot_path = './models/siamrpn_alex_dwxcorr/model.pth' # 参数整合 cfg.merge_from_file(config_path) cfg.CUDA = torch.cuda.is_available() and cfg.CUDA device = torch.device('cuda' if cfg.CUDA else 'cpu') # create model model = ModelBuilder() # load model model.load_state_dict( torch.load(snapshot_path, map_location=lambda storage, loc: storage.cpu())) model.eval().to(device) # 创建跟踪器 self.tracker = build_tracker(model)
def __init__(self, parent=None): super(MyMainWindow, self).__init__(parent) self.isDracula = False # Connect the on-clicked functions self.pushButton_locationLoading.clicked.connect(self.location_loading) self.pushButton_videoLoading.clicked.connect(self.video_loading) self.pushButton_cameraLoading.clicked.connect(self.camera_loading) self.pushButton_bboxSetting.clicked.connect(self.bbox_setting) self.pushButton_algorithmProcessing.clicked.connect( self.algorithm_processing) self.scrollBar.valueChanged.connect(self.slider_change) self.selectBox.valueChanged.connect(self.select_change) self.checkBox.stateChanged.connect(self.checkbox_change) # Message box ignore self.bbox_tips = True self.save_tips = True # Initialize trackers model_location = './pysot/experiments/siammaske_r50_l3' self.config = model_location + '/config.yaml' self.snapshot = model_location + '/model.pth' self.tracker_name = model_location.split('/')[-1] self.video_name = '' cfg.merge_from_file(self.config) cfg.CUDA = torch.cuda.is_available() device = torch.device('cuda' if cfg.CUDA else 'cpu') model = ModelBuilder() model.load_state_dict( torch.load(self.snapshot, map_location=lambda storage, loc: storage.cpu())) model.eval().to(device) self.tracker = build_tracker(model) self.vs = None self.analysis_box = None self.analysis_max = 10 self.save_location = '' self.afterCamera = False self.bbox_list_predict = [] # [time][tracker]
def run_tracker_pysot(args): # load config config = f'pysot/experiments/{args.tracker_name}/config.yaml' snapshot = f'pysot/experiments/{args.tracker_name}/model.pth' cfg.merge_from_file(config) cfg.CUDA = torch.cuda.is_available() device = torch.device('cuda' if cfg.CUDA else 'cpu') # create model model = ModelBuilder() # load model model.load_state_dict( torch.load(snapshot, map_location=lambda storage, loc: storage.cpu())) model.eval().to(device) # build tracker tracker = build_tracker(model) first_frame = True if args.YT_ID: video_name = args.YT_ID.split('/')[-1].split('.')[0] else: video_name = 'webcam' # cv2.namedWindow(args.YT_ID, cv2.WND_PROP_FULLSCREEN) pred_bboxes = [] for frame in get_frames(args): if first_frame: try: init_rect = np.loadtxt(str( os.path.join(args.path, 'Sequences', args.YT_ID + '_' + str(args.ID), 'initial_BB.txt')), delimiter=',', dtype=np.float64) except: exit() tracker.init(frame, init_rect) pred_bboxes.append(init_rect) first_frame = False else: outputs = tracker.track(frame) bbox = list(map(int, outputs['bbox'])) pred_bbox = outputs['bbox'] pred_bboxes.append(pred_bbox) # cv2.rectangle(frame, (bbox[0], bbox[1]), # (bbox[0]+bbox[2], bbox[1]+bbox[3]), # (0, 255, 0), 3) # cv2.imshow(args.YT_ID, frame) # cv2.waitKey(40) model_path = os.path.join(args.path, 'Sequences', args.YT_ID + '_' + str(args.ID), 'results', args.tracker_name) if not os.path.isdir(model_path): os.makedirs(model_path) result_path = os.path.join(model_path, f'{video_name}.txt') with open(result_path, 'w') as f: for x in pred_bboxes: f.write(','.join([str(i) for i in x]) + '\n')
def main(): #load parameters parser = argparse.ArgumentParser(description='tracking demo') parser.add_argument('--config', type=str, help='config file',default=config) parser.add_argument('--snapshot', type=str, help='model name',default=snapshot) parser.add_argument('--video_name',type=str, help='videos or image files',default=video) args = parser.parse_args() # load config cfg.merge_from_file(args.config) cfg.CUDA = torch.cuda.is_available() device = torch.device('cuda' if cfg.CUDA else 'cpu') # create model model = ModelBuilder() # load model model.load_state_dict(torch.load(args.snapshot, map_location=lambda storage, loc: storage.cpu())) model.eval().to(device) # build tracker tracker = build_tracker(model) first_frame = True if args.video_name: video_name = args.video_name.split('/')[-1].split('.')[0] imgname = args.video_name.split('/')[-3].split('.')[0] imgname2 = imgname.split('_')[-2].split('.')[0]+'_'+imgname.split('_')[-1].split('.')[0] print(imgname2) print('model:'+param+' video_name:'+ imgname) else: video_name = 'webcam' cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN) directory='testing_dataset/result/'+param+'/' if not os.path.exists(directory): os.makedirs(directory) ################################变量初始化################################### sum = 0 timer=0 num=0 gif_images=[]#gif图 ############################################################################ for frame in get_frames(args.video_name): start = cv2.getTickCount() #if num==0:#directory+imgname+".avi" #videoWriter = cv2.VideoWriter(directory+imgname+'.avi',cv2.VideoWriter_fourcc("X", "V", "I", "D"),50,(frame.shape[1],frame.shape[0]))#img.shape[1],img.shape[0] num=num+1 if first_frame: try: sss='testing_dataset/rssrai/'+imgname+'/groundtruth.txt' #修改成你自己的测试视频路径 rect=open(sss,'r') data=rect.readline() data2=data.split(',') data2=map(int,data2) rect2=list(data2) #init_rect = cv2.selectROI(video_name, frame, False, False) init_rect=rect2 print(init_rect) # f=open(directory+imgname+'.txt','w') # f.write(str(rect2[0])+','+str(rect2[1])+','+str(rect2[2])+','+str(rect2[3])) # f.close() # f=open(directory+imgname+'.txt','a') except: exit() tracker.init(frame, init_rect) first_frame = False else: outputs = tracker.track(frame) end = cv2.getTickCount() during = (end - start) / cv2.getTickFrequency() timer=timer+during if 'polygon' in outputs: polygon = np.array(outputs['polygon']).astype(np.int32) cv2.polylines(frame, [polygon.reshape((-1, 1, 2))], True, (0,255, 0), 3) mask = ((outputs['mask'] > cfg.TRACK.MASK_THERSHOLD) * 255) mask = mask.astype(np.uint8) mask = np.stack([mask, mask, mask*255]).transpose(1, 2, 0) gray = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY) ret, thresh = cv2.threshold(gray, 100, 255, cv2.THRESH_BINARY) contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) c = sorted(contours, key=cv2.contourArea, reverse=True)[0] #面积最大的轮廓区域 rect_new2= cv2.boundingRect(c) frame = cv2.addWeighted(frame, 0.77, mask, 0.23, -1) cv2.rectangle(frame, (rect_new2[0], rect_new2[1]), (rect_new2[0]+rect_new2[2], rect_new2[1]+rect_new2[3]), (0, 0, 255), 2) # f.write('\n'+str(rect_new2[0])+','+str(rect_new2[1])+','+str(rect_new2[2])+','+str(rect_new2[3])) ####################################################################################### # while 1: # if cv2.waitKey(0)==97: # break # elif(cv2.waitKey(0)==27): # return else: bbox = list(map(int, outputs['bbox'])) cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[0]+bbox[2], bbox[1]+bbox[3]), (0, 255, 0), 2) # f.write('\n'+str(bbox[0])+','+str(bbox[1])+','+str(bbox[2])+','+str(bbox[3])) cv2.putText(frame, imgname2, (5, 50), cv2.FONT_HERSHEY_COMPLEX, 2.0, (255, 0,0), 2) cv2.putText(frame, str(num), (5, 120), cv2.FONT_HERSHEY_COMPLEX, 2.0, (255, 0,0), 2) cv2.namedWindow(video_name,0) cv2.resizeWindow(video_name,1000,800) cv2.imshow(video_name, frame) #gif_images.append(frame) #videoWriter.write(frame) cv2.waitKey(30) #imageio.mimsave(directory+imgname+'.gif',gif_images,'GIF',duration = 0.02)#速度太慢 #f.close() fps=int(num/timer) print('FPS:%d'%(fps))
def main(): # load config cfg.merge_from_file(args.config) cfg.CUDA = torch.cuda.is_available() device = torch.device('cuda' if cfg.CUDA else 'cpu') # create model model = ModelBuilder() # load model model.load_state_dict( torch.load(args.snapshot, map_location=lambda storage, loc: storage.cpu())) model.eval().to(device) # build tracker tracker = build_tracker(model) first_frame = True if args.video_name: video_name = args.video_name.split('/')[-1].split('.')[0] else: video_name = 'webcam' cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN) stat_time = [] for frame in get_frames(args.video_name): original = frame.copy() if first_frame: try: init_rect = cv2.selectROI(video_name, frame, False, False) except: exit() tracker.init(frame, init_rect) first_frame = False else: cur_time = time.time() outputs = tracker.track(frame) if 'polygon' in outputs: polygon = np.array(outputs['polygon']).astype(np.int32) cv2.polylines(frame, [polygon.reshape((-1, 1, 2))], True, (0, 255, 0), 3) mask = ((outputs['mask'] > cfg.TRACK.MASK_THERSHOLD) * 255) mask = mask.astype(np.uint8) mask = np.stack([mask, mask * 255, mask]).transpose(1, 2, 0) final_frame = cv2.addWeighted(frame, 0.77, mask, 0.23, -1) else: bbox = list(map(int, outputs['bbox'])) frame_showing = frame.copy() cv2.rectangle(frame_showing, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 255, 0), 3) x1, y1, x2, y2 = bbox[0], bbox[ 1], bbox[0] + bbox[2], bbox[1] + bbox[3] frame_output = np.zeros_like(frame) frame_output[y1:y2, x1:x2] = 255 final_frame = cv2.hconcat((frame_output, frame_showing)) cv2.imshow(video_name, final_frame) keyPressed = cv2.waitKey(1) & 0xff if keyPressed == 27 or keyPressed == 1048603: print('exited the program by pressing ESC') break # esc to quit stat_time.append(time.time() - cur_time) # print('iteration time = ', time.time()-cur_time) print('average iteration time =', np.average(stat_time))
def main(): # load config cfg.merge_from_file(args.config) cfg.CUDA = torch.cuda.is_available() device = torch.device('cuda' if cfg.CUDA else 'cpu') # create model model = ModelBuilder() # load model model.load_state_dict( torch.load(args.snapshot, map_location=lambda storage, loc: storage.cpu())) model.eval().to(device) # build tracker tracker = build_tracker(model) first_frame = True if args.video_name: video_name = args.video_name.split('/')[-1].split('.')[0] else: video_name = 'webcam' cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN) class_name = get_classname(args.video_name) for frame, image_path in get_frames(args.video_name, args.start_index): frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) frame = cv2.equalizeHist(frame) frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB) if first_frame: while True: try: init_rect = cv2.selectROI(video_name, frame, False, False) # init_rect = (311, 136, 120, 125) except: exit() if check_rect(init_rect): break tracker.init(frame, init_rect) first_frame = False labels = [[ init_rect[0], init_rect[1], init_rect[0] + init_rect[2], init_rect[1] + init_rect[3] ]] else: outputs = tracker.track(frame) if 'polygon' in outputs: polygon = np.array(outputs['polygon']).astype(np.int32) cv2.polylines(frame, [polygon.reshape((-1, 1, 2))], True, (0, 255, 0), 3) mask = ((outputs['mask'] > cfg.TRACK.MASK_THERSHOLD) * 255) mask = mask.astype(np.uint8) mask = np.stack([mask, mask * 255, mask]).transpose(1, 2, 0) frame = cv2.addWeighted(frame, 0.77, mask, 0.23, -1) else: bbox = list(map(int, outputs['bbox'])) cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 255, 0), 3) print(outputs['best_score']) if outputs['best_score'] < 0.9: first_frame = True continue labels = [[bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]]] cv2.imshow(video_name, frame) cv2.waitKey(40) labels[0].append(class_name) generate_label(labels, frame.shape[:2], path=image_path)
frame = cv2.imread(img) yield frame if __name__ == '__main__': # load config cfg.merge_from_file(args.config) cfg.CUDA = torch.cuda.is_available() device = torch.device('cuda' if cfg.CUDA else 'cpu') # create model model = ModelBuilder() # load model model.load_state_dict( torch.load(args.snapshot, map_location=lambda storage, loc: storage.cpu())) model.eval().to(device) # build tracker tracker = build_tracker(model) # init_frame = np.load('../chainer-pysot/init.npz')['frame'] # init_rect = np.load('../chainer-pysot/init.npz')['init_rect'] # second_frame = np.load('../chainer-pysot/output.npz')['second_frame'] # tracker.init(init_frame, init_rect) # np.savez('../chainer-pysot/init.npz', # frame=init_frame, zfs=[f.detach().cpu().numpy() for f in tracker.model.zf], init_rect=init_rect) # tracker.track(second_frame) # raise ValueError
def showImage(): global x1, y1, x2, y2, drawing, init, flag, image, getim, start rospy.init_node('RPN', anonymous=True) flag = 1 init = False drawing = False getim = False start = False x1, x2, y1, y2 = -1, -1, -1, -1 flag_lose = False count_lose = 0 print('laoding model...........') path = sys.path[0] path = path[0:-5] + 'third-party/pysot/' cfg.merge_from_file(path + '/experiments/siamrpn_r50_l234_dwxcorr/config.yaml') cfg.CUDA = torch.cuda.is_available() and cfg.CUDA device = torch.device('cuda' if cfg.CUDA else 'cpu') model = ModelBuilder() #model = load_pretrain(model, '/home/develop/ros/src/fly-vision-1/third-party/pysot/pretrained/model.pth').cuda().eval() pre = torch.load(path + 'pretrained/model.pth') model.load_state_dict(pre) model.cuda().eval() tracker = build_tracker(model) print('ready for starting!') rospy.Subscriber('/camera/rgb/image_raw', Image, callback) pub = rospy.Publisher('/vision/target', Pose, queue_size=10) cv2.namedWindow('image') cv2.setMouseCallback('image', draw_circle) rate = rospy.Rate(30) i = 1 t = time.time() fps = 0 while not rospy.is_shutdown(): if getim: t1 = time.time() idd = readid(image) pose = Pose() pose.position.z = 0 if start is False and init is True: init_rect = np.array([x1, y1, x2 - x1, y2 - y1]) tracker.init(image, init_rect) start = True flag_lose = False continue if start is True: outputs = tracker.track(image) bbox = list(map(int, outputs['bbox'])) res = [int(l) for l in bbox] cv2.rectangle(image, (res[0], res[1]), (res[0] + res[2], res[1] + res[3]), (0, 255, 255), 2) pose.position.x = (bbox[0] + bbox[2] / 2 - image.shape[1] / 2) / (image.shape[1] / 2) pose.position.y = (bbox[1] + bbox[3] / 2 - image.shape[0] / 2) / (image.shape[0] / 2) cv2.putText(image, str(outputs['best_score']), (res[0] + res[2], res[1] + res[3]), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 0), 1) pose.position.z = 1 if outputs['best_score'] < 0.5: count_lose = count_lose + 1 else: count_lose = 0 if count_lose > 4: flag_lose = True if flag_lose is True: cv2.putText(image, 'target is lost!', (200, 200), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 0, 0), 3) pose.position.z = -1 if drawing is True: cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2) cv2.putText(image, '#' + str(idd), (30, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 1) cx = int(image.shape[1] / 2) cy = int(image.shape[0] / 2) cv2.line(image, (cx - 20, cy), (cx + 20, cy), (255, 255, 255), 2) cv2.line(image, (cx, cy - 20), (cx, cy + 20), (255, 255, 255), 2) pub.publish(pose) if start is True: i = i + 1 if i > 5: i = 1 fps = 5 / (time.time() - t) t = time.time() cv2.putText(image, 'fps=' + str(fps), (200, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 1) cv2.imshow('image', image) cv2.waitKey(1) getim = False rate.sleep()
def main(threshold_correct, tolerance): # load config cfg.merge_from_file(args.config) cfg.CUDA = torch.cuda.is_available() device = torch.device('cuda' if cfg.CUDA else 'cpu') # create model model = ModelBuilder() # load model model.load_state_dict( torch.load(args.snapshot, map_location=lambda storage, loc: storage.cpu())) model.eval().to(device) # build tracker tracker = build_tracker(model) first_frame = True if args.video_name: video_name = args.video_name.split('/')[-1].split('.')[0] else: video_name = 'webcam' cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN) counter_threshold_correct = 0 counter_tolerance = 0 previous_color = "none" clf = KNNClassifier(1) clf.load( "/Users/aussabbood/github/SwaliO/pysot/tools/saved_models/model.pkl") for frame in get_frames(args.video_name): if first_frame: try: init_rect = cv2.selectROI(video_name, frame, False, False) except: exit() tracker.init(frame, init_rect) first_frame = False else: outputs = tracker.track(frame) if 'polygon' in outputs: polygon = np.array(outputs['polygon']).astype(np.int32) cv2.polylines(frame, [polygon.reshape((-1, 1, 2))], True, (0, 255, 0), 3) mask = ((outputs['mask'] > cfg.TRACK.MASK_THERSHOLD) * 255).astype(np.uint8) mask = np.stack([mask, mask * 255, mask]).transpose(1, 2, 0) frame = cv2.addWeighted(frame, 0.77, mask, 0.23, -1) else: bbox = list(map(int, outputs['bbox'])) cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 255, 0), 3) frame_excerpt = frame[bbox[1]:(bbox[1] + bbox[3]), bbox[0]:(bbox[0] + bbox[2])] try: pred = clf.predict(frame_excerpt) except cv2.error: print("[WARNING] Force none due to error!") pred = ['none'] print(pred[0]) if pred[0] == previous_color: counter_threshold_correct += 1 else: counter_tolerance += 1 if counter_tolerance == tolerance: counter_threshold_correct = 0 counter_tolerance = 0 if counter_threshold_correct == 10: print(f"Its a {pred[0]} bottle") counter_threshold_correct = 0 counter_tolerance = 0 previous_color = pred[0] print(f"threshold:{counter_threshold_correct}") print(f"tollerance:{counter_tolerance}") cv2.imshow(video_name, frame) cv2.waitKey(40)
def main(): # Initialize ecci sdk and connect to the broker in edge-cloud ecci_client = Client() mqtt_thread = threading.Thread(target=ecci_client.initialize) mqtt_thread.start() ecci_client.wait_for_ready() print('edge start --------') # load config cfg.merge_from_file(args.config) cfg.CUDA = torch.cuda.is_available() and cfg.CUDA device = torch.device('cuda' if cfg.CUDA else 'cpu') # create model model = ModelBuilder() # load model checkpoint = torch.load(args.snapshot) model.load_state_dict(checkpoint) for param in model.parameters(): param.requires_grad = False model.eval().to(device) #multiprocessing manager = mp.Manager() resQueue = manager.Queue() multiProcess = [] label = [] probs = [] for i in range(10): multiProcess.append(build_multitracker(model, label, probs, resQueue)) multiProcess[i].start() first_frame = True image_files = sorted(glob.glob('./test/image/*.JPEG')) for f, image_file in enumerate(image_files): frame = cv2.imread(image_file) if first_frame: # keyframe need to be uploaded to cloud print('first frame') key_frame = frame payload = {"type": "data", "contents": {"frame": frame}} print("####################", payload) ecci_client.publish(payload, "cloud") cloud_data = ecci_client.get_sub_data_payload_queue().get() print("###########recieve data from cloud", cloud_data) bbox = cloud_data["bbox"] label = cloud_data["label"] probs = cloud_data["probs"] num_process = len(bbox) t_detect_start = time.time() for i in range(num_process): init_rect = [ bbox[i][0], bbox[i][1], bbox[i][2] - bbox[i][0], bbox[i][3] - bbox[i][1] ] multiProcess[i].init(frame, init_rect, label[i], probs[i]) cv2.rectangle(frame, (int(bbox[i][0]), int(bbox[i][1])), (int(bbox[i][2]), int(bbox[i][3])), (0, 0, 255), 3) cv2.putText(frame, '%s: %.3f' % (label[i], probs[i]), (bbox[i][0], bbox[i][1] - 15), cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 255), thickness=1) t_detect_end = time.time() print("detect fps : ", 1 / (t_detect_end - t_detect_start)) first_frame = False index = 1 elif index % 10 == 0: if is_key(key_frame, frame) or index % 20 == 0: # keyframe need to be uploaded to cloud ##### outputs, time ###### print('key frame') key_frame = frame payload = {"type": "data", "contents": {"frame": frame}} print("####################", payload) ecci_client.publish(payload, "cloud") cloud_data = ecci_client.get_sub_data_payload_queue().get() print("###########recieve data from cloud", cloud_data) bbox = cloud_data["bbox"] label = cloud_data["label"] probs = cloud_data["probs"] num_process = len(bbox) t_detect_start = time.time() for i in range(num_process): init_rect = [ bbox[i][0], bbox[i][1], bbox[i][2] - bbox[i][0], bbox[i][3] - bbox[i][1] ] multiProcess[i].init(frame, init_rect, label[i], probs[i]) cv2.rectangle(frame, (int(bbox[i][0]), int(bbox[i][1])), (int(bbox[i][2]), int(bbox[i][3])), (0, 0, 255), 3) cv2.putText(frame, '%s: %.3f' % (label[i], probs[i]), (bbox[i][0], bbox[i][1] - 15), cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 255), thickness=1) t_detect_end = time.time() print("detect fps : ", 1 / (t_detect_end - t_detect_start)) index = 1 else: print('non-key frame') t_track_start = time.time() for i in range(num_process): multiProcess[i].track(frame) t_track_end = time.time() print("track fps : ", 1 / (t_track_end - t_track_start)) for i in range(num_process): resDict = resQueue.get() print(resDict) bbox = list(map(int, resDict['bbox'])) cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 255, 0), 3) cv2.putText(frame, '%s: %.3f' % (label[i], probs[i]), (bbox[0], bbox[1] - 15), cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 255, 0), thickness=1) index += 1 else: print('non-key frame') t_track_start = time.time() for i in range(num_process): multiProcess[i].track(frame) t_track_end = time.time() print("track fps : ", 1 / (t_track_end - t_track_start)) for i in range(num_process): resDict = resQueue.get() print(resDict) bbox = list(map(int, resDict['bbox'])) cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 255, 0), 3) cv2.putText(frame, '%s: %.3f' % (label[i], probs[i]), (bbox[0], bbox[1] - 15), cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 255, 0), thickness=1) index += 1 cv2.imwrite('./test/output/%s.jpg' % f, frame) for i in range(10): multiProcess.append(build_multitracker(model, label, probs, resQueue)) multiProcess[i].join()
def main(): # load config cfg.merge_from_file(args.config) cfg.CUDA = torch.cuda.is_available() device = torch.device('cuda' if cfg.CUDA else 'cpu') # create model model = ModelBuilder() # load model model.load_state_dict( torch.load(args.snapshot, map_location=lambda storage, loc: storage.cpu())) model.eval().to(device) # build tracker tracker = build_tracker(model) first_frame = True if args.video_name: video_name = args.video_name.split('/')[-1].split('.')[0] else: video_name = 'webcam' cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN) class_name = get_classname(args.video_name) mot_tracker = MOTrackerWrapper(tracker, class_name) for frame, image_path in get_frames(args.video_name, args.start_index): frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) frame = cv2.equalizeHist(frame) frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB) if first_frame: init_rects = [] while True: try: init_rect = cv2.selectROI(video_name, frame, False, False) # init_rect = (311, 136, 120, 125) except: exit() if check_rect(init_rect): init_rects.append(init_rect) else: break mot_tracker.init(frame, init_rects) first_frame = False else: has_uncertainty = mot_tracker.track(frame) labels = mot_tracker.labels for bbox in labels: cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 3) cv2.imshow(video_name, frame) cv2.waitKey(40) if has_uncertainty or (cv2.waitKey(0) == ord('m')): first_frame = True continue labels = mot_tracker.labels if len(labels) == 0: continue generate_label(labels, frame.shape[:2], path=image_path)
def main(): os.makedirs('outputs/', exist_ok=True) video_dir = '/home/pris1/Downloads/clips' video_file_list = os.listdir(video_dir) vp_to_file_dict = dict() for videofile in video_file_list: tracker, video, person, sec_start, _, sec_end = os.path.splitext(videofile)[0].split('%') if tracker == args.human_tracker: vp = video+'.'+person vp_to_file_dict[vp] = [videofile, int(sec_start)] # load config cfg.merge_from_file(args.config) cfg.CUDA = True device = torch.device('cuda') # create model model = ModelBuilder() # load model model.load_state_dict(torch.load(args.snapshot, map_location=lambda storage, loc: storage.cpu())) model.eval().to(device) # build tracker tracker = build_tracker(model) vp_list = pickle.load(open(args.vp_list, 'rb')) proposal_dict = pickle.load(open(args.proposal_dict, 'rb')) vp_dict = pickle.load(open(args.vp_dict, 'rb')) if args.range[-1] == -1: args.range[-1] == len(vp_dict) vp_range = vp_list[args.range[0]:args.range[1]] for vp_idx, vp in enumerate(vp_range): videofile, sec_start = vp_to_file_dict[vp] video, person = vp.split('.') start_vf = video+'.'+str(sec_start) if start_vf in proposal_dict: start_length = len(proposal_dict[start_vf]) for obj_idx in range(start_length): print('vp: {} / {}, obj: {} / {}'.format(vp_idx+1, len(vp_range), obj_idx+1, start_length)) track_id = '%'.join([video, person, str(obj_idx)]) track_dict = dict() start_rect = copy.deepcopy(proposal_dict[start_vf][obj_idx]) videopath = os.path.join(video_dir, videofile) cap = cv2.VideoCapture(videopath) fps = int(cap.get(cv2.CAP_PROP_FPS)) frame_idx = 0 first_frame = True while True: ret, frame = cap.read() if ret: sec = frame_idx // fps + sec_start remained = frame_idx % fps if remained == 0: if first_frame: init_rect = copy.deepcopy(start_rect) # xywh track_dict[sec] = copy.deepcopy(init_rect) init_rect[2] -= init_rect[0] init_rect[3] -= init_rect[1] tracker.init(frame, init_rect) first_frame = False else: outputs = tracker.track(frame) bbox = outputs['bbox'] # xywh bbox[2] += bbox[0] bbox[3] += bbox[1] vf = video+'.'+str(sec) if vf in proposal_dict: proposals = copy.deepcopy(proposal_dict[vf]) track_results = np.expand_dims(np.array(bbox), axis=0) iou_mat = calc_iou(track_results, proposals)[0] max_proposal_idx = np.argmax(iou_mat) proposal_adjust = copy.deepcopy(proposals[max_proposal_idx]) track_dict[sec] = copy.deepcopy(proposal_adjust) proposal_adjust[2] -= proposal_adjust[0] proposal_adjust[3] -= proposal_adjust[1] tracker.init(frame, proposal_adjust) else: track_dict[sec] = copy.deepcopy(bbox) else: outputs = tracker.track(frame) frame_idx += 1 else: break pickle.dump(track_dict, open('outputs/{}.pkl'.format(track_id), 'wb')) else: pass
def main(): # load config cfg.merge_from_file(args.config) cfg.CUDA = torch.cuda.is_available() device = torch.device('cuda' if cfg.CUDA else 'cpu') # create model model = ModelBuilder() # load model model.load_state_dict( torch.load(args.snapshot, map_location=lambda storage, loc: storage.cpu())) model.eval().to(device) # build tracker tracker = build_tracker(model) first_frame = True if args.video_name: video_name = args.video_name.split('/')[-1].split('.')[0] else: video_name = 'webcam' cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN) for frame in get_frames(args.video_name): # yy # plt.imshow(frame) # plt.show() # os.system("pause") if first_frame: try: init_rect = cv2.selectROI(video_name, frame, False, False) except: exit() tracker.init(frame, init_rect) #yy #print(init_rect) cropRect0 = frame[init_rect[1]:init_rect[1] + init_rect[3], init_rect[0]:init_rect[0] + init_rect[2]] #plt.imshow(cropRect0) #plt.show() first_frame = False else: outputs = tracker.track(frame) #yy #print(outputs['bbox']) #print(outputs['bbox'][0]) track_rect = outputs['bbox'] track_rect[0] = int(outputs['bbox'][0]) track_rect[1] = int(outputs['bbox'][1]) track_rect[2] = int(outputs['bbox'][2]) track_rect[3] = int(outputs['bbox'][3]) cropRectI = frame[track_rect[1]:track_rect[1] + track_rect[3], track_rect[0]:track_rect[0] + track_rect[2]] #plt.imshow(cropRectI) #plt.show() if 'polygon' in outputs: polygon = np.array(outputs['polygon']).astype(np.int32) cv2.polylines(frame, [polygon.reshape((-1, 1, 2))], True, (0, 255, 0), 3) mask = ((outputs['mask'] > cfg.TRACK.MASK_THERSHOLD) * 255) mask = mask.astype(np.uint8) mask = np.stack([mask, mask * 255, mask]).transpose(1, 2, 0) frame = cv2.addWeighted(frame, 0.77, mask, 0.23, -1) else: bbox = list(map(int, outputs['bbox'])) cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 255, 0), 3) #yy #plt.imshow(frame) #plt.show() cv2.imshow(video_name, frame) cv2.waitKey(40)
def main(): # Initialize ecci sdk and connect to the broker in edge-cloud ecci_client = Client() mqtt_thread = threading.Thread(target=ecci_client.initialize) mqtt_thread.start() ecci_client.wait_for_ready() print('edge start --------') # load config cfg.merge_from_file(args.config) cfg.CUDA = torch.cuda.is_available() and cfg.CUDA device = torch.device('cuda' if cfg.CUDA else 'cpu') # create model model = ModelBuilder() # load model checkpoint = torch.load(args.snapshot) model.load_state_dict(checkpoint) for param in model.parameters(): param.requires_grad = False model.eval().to(device) #multiprocessing manager = mp.Manager() resQueue = manager.Queue() multiProcess = [] # VID dataloader dataset = ImagenetDataset(args.dataset, is_val=True) true_case_stat, all_gb_boxes = group_annotation_by_class(dataset) for f in range(len(dataset)): frame, first_frame = dataset.get_image(f) if first_frame: # keyframe need to be uploaded to cloud print('first frame upload to cloud') # close the last multiprocessing for i in range(len(multiProcess)): multiProcess[i].join() # send frame to cloud payload = {"type":"data","contents":{"frame":frame}} print("####################",payload) ecci_client.publish(payload, "cloud") # get rect from cloud cloud_data = ecci_client.get_sub_data_payload_queue().get() print("###########recieve data from cloud",cloud_data) bbox= cloud_data["bbox"] label = cloud_data["label"] probs = cloud_data["probs"] # wirte txt for i in range(len(bbox)): write_txt(dataset, f, bbox[i], label[i], probs[i]) # # start multiprocessing multiProcess = [] for i in range(len(bbox)): multiProcess.append(build_multitracker(model,label[i],probs[i],resQueue)) for i in range(len(multiProcess)): init_rect = [bbox[i][0],bbox[i][1],bbox[i][2]-bbox[i][0],bbox[i][3]-bbox[i][1]] multiProcess[i].init(frame, init_rect) multiProcess[i].start() key_frame = frame first_frame = False index = 1 # elif is_key(key_frame, frame): elif index % 5== 0: if is_key(key_frame, frame) or index % 15 ==0 : # keyframe need to be uploaded to cloud ##### outputs, time ###### print('key frame upload to cloud') # close the last multiprocessing for i in range(len(multiProcess)): multiProcess[i].join() # send frame to cloud payload = {"type":"data","contents":{"frame":frame}} print("####################",payload) ecci_client.publish(payload, "cloud") # get rect from cloud cloud_data = ecci_client.get_sub_data_payload_queue().get() print("###########recieve data from cloud",cloud_data) bbox= cloud_data["bbox"] label = cloud_data["label"] probs = cloud_data["probs"] # wirte txt for i in range(len(bbox)): write_txt(dataset, f, bbox[i], label[i], probs[i]) # # start multiprocessing multiProcess = [] for i in range(len(bbox)): multiProcess.append(build_multitracker(model,label[i],probs[i],resQueue)) for i in range(len(multiProcess)): init_rect = [bbox[i][0],bbox[i][1],bbox[i][2]-bbox[i][0],bbox[i][3]-bbox[i][1]] multiProcess[i].init(frame, init_rect) multiProcess[i].start() key_frame = frame index = 1 else: print('track locally') for i in range(len(multiProcess)): multiProcess[i].track(frame) for i in range(len(multiProcess)): resDict = resQueue.get() resDict['bbox'] = [resDict['bbox'][0],resDict['bbox'][1],resDict['bbox'][0]+resDict['bbox'][2],resDict['bbox'][1]+resDict['bbox'][3]] write_txt(dataset, f, resDict['bbox'], resDict['label'], resDict['probs']-0.1) index += 1 else: print('track locally') for i in range(len(multiProcess)): multiProcess[i].track(frame) t= time.time() for i in range(len(multiProcess)): resDict = resQueue.get() resDict['bbox'] = [resDict['bbox'][0],resDict['bbox'][1],resDict['bbox'][0]+resDict['bbox'][2],resDict['bbox'][1]+resDict['bbox'][3]] write_txt(dataset, f, resDict['bbox'], resDict['label'], resDict['probs']-0.1) print(time.time()-t) index +=1 map_compute()
def main(): # Initialize ecci sdk and connect to the broker in edge-cloud ecci_client = Client() mqtt_thread = threading.Thread(target=ecci_client.initialize) mqtt_thread.start() ecci_client.wait_for_ready() print('edge start --------') # load config cfg.merge_from_file(args.config) cfg.CUDA = torch.cuda.is_available() and cfg.CUDA device = torch.device('cuda' if cfg.CUDA else 'cpu') # create model model = ModelBuilder() # load model checkpoint = torch.load(args.snapshot) model.load_state_dict(checkpoint) for param in model.parameters(): param.requires_grad = False model.eval().to(device) #multiprocessing manager = mp.Manager() resQueue = manager.Queue() multiProcess = [] label = [] probs = [] for i in range(10): multiProcess.append(build_multitracker(model, label, probs, resQueue)) multiProcess[i].start() first_frame = True filename = "./demo/video.avi" camera = cv2.VideoCapture(filename) fourcc = cv2.VideoWriter_fourcc('M', 'P', '4', '2') demo_out = cv2.VideoWriter('./demo/output/video.avi', fourcc, 25, (640, 368)) while camera.isOpened(): ret, frame = camera.read() if ret == True: print(frame.shape) if first_frame: # keyframe need to be uploaded to cloud print('first frame') key_frame = frame # communication payload = {"type": "data", "contents": {"frame": frame}} print("####################", payload) ecci_client.publish(payload, "cloud") cloud_data = ecci_client.get_sub_data_payload_queue().get() print("###########recieve data from cloud", cloud_data) bbox = cloud_data["bbox"] label = cloud_data["label"] probs = cloud_data["probs"] num_process = len(bbox) t_detect_start = time.time() for i in range(num_process): cv2.rectangle(frame, (int(bbox[i][0]), int(bbox[i][1])), (int(bbox[i][2]), int(bbox[i][3])), (0, 0, 255), 3) cv2.putText(frame, '%s: %.3f' % (label[i], probs[i]), (bbox[i][0], bbox[i][1] - 15), cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 255), thickness=1) init_rect = [ bbox[i][0], bbox[i][1], bbox[i][2] - bbox[i][0], bbox[i][3] - bbox[i][1] ] multiProcess[i].init(frame, init_rect, label[i], probs[i]) t_detect_end = time.time() print("detect fps : ", 1 / (t_detect_end - t_detect_start)) first_frame = False index = 1 elif index % 5 == 0: if is_key(key_frame, frame) or index % 10 == 0: # keyframe need to be uploaded to cloud ##### outputs, time ###### print('key frame') key_frame = frame # communication payload = {"type": "data", "contents": {"frame": frame}} print("####################", payload) ecci_client.publish(payload, "cloud") cloud_data = ecci_client.get_sub_data_payload_queue().get() print("###########recieve data from cloud", cloud_data) bbox = cloud_data["bbox"] label = cloud_data["label"] probs = cloud_data["probs"] num_process = len(bbox) t_detect_start = time.time() for i in range(num_process): cv2.rectangle(frame, (int(bbox[i][0]), int(bbox[i][1])), (int(bbox[i][2]), int(bbox[i][3])), (0, 0, 255), 3) cv2.putText(frame, '%s: %.3f' % (label[i], probs[i]), (bbox[i][0], bbox[i][1] - 15), cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 255), thickness=1) init_rect = [ bbox[i][0], bbox[i][1], bbox[i][2] - bbox[i][0], bbox[i][3] - bbox[i][1] ] multiProcess[i].init(frame, init_rect, label[i], probs[i]) t_detect_end = time.time() print("detect fps : ", 1 / (t_detect_end - t_detect_start)) index = 1 else: print('non-key frame') t_track_start = time.time() for i in range(num_process): multiProcess[i].track(frame) t_track_end = time.time() print("track fps : ", 1 / (t_track_end - t_track_start)) for i in range(num_process): resDict = resQueue.get() print(resDict) bbox = list(map(int, resDict['bbox'])) cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 255, 0), 3) cv2.putText(frame, '%s: %.3f' % (label[i], probs[i]), (bbox[0], bbox[1] - 15), cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 255, 0), thickness=1) index += 1 else: print('non-key frame') t_track_start = time.time() for i in range(num_process): multiProcess[i].track(frame) t_track_end = time.time() print("track fps : ", 1 / (t_track_end - t_track_start)) for i in range(num_process): resDict = resQueue.get() print(resDict) bbox = list(map(int, resDict['bbox'])) cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 255, 0), 3) cv2.putText(frame, '%s: %.3f' % (label[i], probs[i]), (bbox[0], bbox[1] - 15), cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 255, 0), thickness=1) index += 1 print("writing") demo_out.write(frame) else: break camera.release() demo_out.release() cv2.destroyAllWindows()
def ObjectTracking(): # load config cfg.merge_from_file(args.config) cfg.CUDA = torch.cuda.is_available() device = torch.device('cuda' if cfg.CUDA else 'cpu') # create model model = ModelBuilder() # load model model.load_state_dict( torch.load(args.snapshot, map_location=lambda storage, loc: storage.cpu())) model.eval().to(device) # build tracker tracker = build_tracker(model) # parameters init if args.video_name: video_name = args.video_name.split('/')[-1].split('.')[0] else: video_name = 'webcam' cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN) first_frame = True cnt_ = 0 pre_frame = 0 pre_rect = 0 points = [] # main loop for getting frame and tracking object for frame in get_frames(args.video_name): if first_frame: try: # to select object init_rect = cv2.selectROI(video_name, frame, False, False) pre_rect = init_rect except: exit() # init model tracker.init(frame, init_rect) first_frame = False else: cnt_ += 1 # make prediction outputs = tracker.track(frame) if outputs['best_score'] > 0.6: pre_frame = frame if 'polygon' in outputs: polygon = np.array(outputs['polygon']).astype(np.int32) cv2.polylines(frame, [polygon.reshape((-1, 1, 2))], True, (0, 255, 0), 3) #make mask mask = ((outputs['mask'] > cfg.TRACK.MASK_THERSHOLD) * 255) mask = mask.astype(np.uint8) mask = np.stack([mask, mask * 255, mask]).transpose(1, 2, 0) frame = cv2.addWeighted(frame, 0.77, mask, 0.23, -1) else: bbox = list(map(int, outputs['bbox'])) pre_rect = bbox cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 255, 0), 3) else: # re-init model using previous location tracker.init(pre_frame, pre_rect) # draw moving path if args.draw_moving_path and len(polygon) != 0: points = draw_moving_path(polygon, points, frame) ## save frame as JPEG file if args.saveframe and os.path.isdir(args.framepath): fullpath = args.framepath + "frame{0:0>3}.jpg".format(cnt_) cv2.imwrite(fullpath, frame) cv2.imshow(video_name, frame) # may need to adjust based on your hardware cv2.waitKey(20)
def main(): # instantiate iiwa iiwa = iiwaRobot() time.sleep(4) # allow iiwa taking some time to wake up # zero joints iiwa.move_joint(commit=True) # iiwa get ready iiwa.move_joint(JOINT_PERCH, commit=True) time.sleep(4) rospy.loginfo("iiwa is ready") # read TCP orientation QUAT = Quaternion() QUAT.x = iiwa.cartesian_pose.orientation.x QUAT.y = iiwa.cartesian_pose.orientation.y QUAT.z = iiwa.cartesian_pose.orientation.z QUAT.w = iiwa.cartesian_pose.orientation.w # Configure realsense D435 depth and color streams pipeline = rs.pipeline() config = rs.config() config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30) config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30) profile = pipeline.start(config) # Create an align object align_to = rs.stream.color align = rs.align(align_to) # load siammask config cfg.merge_from_file(sys.path[0]+"/siammask_r50_l3/config.yaml") cfg.CUDA = torch.cuda.is_available() device = torch.device('cuda' if cfg.CUDA else 'cpu') # create model model = ModelBuilder() # load model model.load_state_dict(torch.load(sys.path[0]+"/siammask_r50_l3/model.pth", map_location=lambda storage, loc: storage.cpu())) model.eval().to(device) # build tracker tracker = build_tracker(model) # label object video_name = 'D435_color' cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN) first_frame = True while True: # wait image stream and select object of interest frames = pipeline.wait_for_frames() # Align the depth frame to color frame aligned_frames = align.process(frames) color_frame = aligned_frames.get_color_frame() depth_frame = aligned_frames.get_depth_frame() depth_intrinsics = rs.video_stream_profile(depth_frame.profile).get_intrinsics() # convert image to numpy arrays if color_frame: color_image = np.asanyarray(color_frame.get_data()) depth_image = np.asanyarray(depth_frame.get_data()) if first_frame: try: init_rect = cv2.selectROI(video_name, color_image, False, False) except: exit() tracker.init(color_image, init_rect) first_frame = False else: # start tracking outputs = tracker.track(color_image) polygon = np.array(outputs['polygon']).astype(np.int32) cv2.polylines(color_image, [polygon.reshape((-1, 1, 2))], True, (0, 255, 0), 3) mask = ((outputs['mask'] > cfg.TRACK.MASK_THERSHOLD) * 255) mask = mask.astype(np.uint8) mask = np.stack([mask, mask*255, mask]).transpose(1, 2, 0) color_image = cv2.addWeighted(color_image, 0.77, mask, 0.23, -1) bbox = list(map(int, outputs['bbox'])) poi_pixel = [int(bbox[0]+0.5*bbox[2]), int(bbox[1]+0.5*bbox[3])] poi_depth = depth_frame.get_distance(poi_pixel[0], poi_pixel[1]) poi_rs = rs.rs2_deproject_pixel_to_point(depth_intrinsics, poi_pixel, poi_depth) print("Object 3D position w.r.t. camera frame: {}".format(poi_rs)) if not np.allclose(poi_rs, np.zeros(3)): # compute transformed position of poi w.r.t. iiwa_link_0 transfrom = iiwa.tf_listener.getLatestCommonTime('/iiwa_link_0', '/rs_d435') pos_rs = PoseStamped() pos_rs.header.frame_id = 'rs_d435' pos_rs.pose.orientation.w = 1. pos_rs.pose.position.x = poi_rs[0] pos_rs.pose.position.y = poi_rs[1] pos_rs.pose.position.z = poi_rs[2] pos_iiwa = iiwa.tf_listener.transformPose('/iiwa_link_0', pos_rs) rospy.loginfo("Object 3D position w.r.t. iiwa base from: {}\n ee w.r.t. iiwa base: {}".format(pos_iiwa.pose.position, iiwa.cartesian_pose.position)) # set cartesian goal iiwa.goal_carte_pose.header.frame_id = 'iiwa_link_0' iiwa.goal_carte_pose.pose.position.x = X iiwa.goal_carte_pose.pose.position.y = np.clip(pos_iiwa.pose.position.y, Y_MIN, Y_MAX) iiwa.goal_carte_pose.pose.position.z = np.clip(pos_iiwa.pose.position.z, Z_MIN, Z_MAX) iiwa.goal_carte_pose.pose.orientation = QUAT iiwa.move_cartesian(cartesian_pose=iiwa.goal_carte_pose) # display image stream, press 'ESC' or 'q' to terminate cv2.imshow(video_name, color_image) key = cv2.waitKey(40) if key in (27, ord("q")): break time.sleep(4) iiwa.move_joint(joint_position=JOINT_PERCH) time.sleep(4) pipeline.stop() rospy.loginfo("Finished")
def main(): # load config cfg.merge_from_file(args.config) cfg.CUDA = torch.cuda.is_available() device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # create model model = ModelBuilder() # load model model.load_state_dict(torch.load(args.snapshot, map_location=device)) model.eval().to(device) # build tracker tracker = build_tracker(model) first_frame = True if args.video_name: video_name = args.video_name.split('/')[-1].split('.')[0] else: video_name = 'webcam' cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN) last_frame = None last_bbox = None for frame in get_frames(args.video_name): if first_frame: try: init_rect = cv2.selectROI(video_name, frame, False, False) except: exit() bbox = init_rect print(init_rect) tracker.init(frame, init_rect) first_frame = False else: outputs = tracker.track(frame) score = outputs["best_score"] if score < 0.95: tracker.init(last_frame, last_bbox) continue print(outputs["best_score"]) if 'polygon' in outputs: polygon = np.array(outputs['polygon']).astype(np.int32) cv2.polylines(frame, [polygon.reshape((-1, 1, 2))], True, (0, 255, 0), 3) mask = ((outputs['mask'] > cfg.TRACK.MASK_THERSHOLD) * 255) mask = mask.astype(np.uint8) mask = np.stack([mask, mask * 255, mask]).transpose(1, 2, 0) frame = cv2.addWeighted(frame, 0.77, mask, 0.23, -1) else: bbox = list(map(int, outputs['bbox'])) cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 255, 0), 3) last_frame = frame last_bbox = bbox cv2.imshow(video_name, frame) cv2.waitKey(40)
def main(): torch.cuda.set_device(args.gpu_id) model_dir = "./experiments/siamrpn_r50_l234_dwxcorr/model.pth" model_config = "./experiments/siamrpn_r50_l234_dwxcorr/config.yaml" if os.path.isfile(model_dir): print("model file {} found".format(model_dir)) else: print("model files not found, starting download".format(model_dir)) os.system( "gdown https://drive.google.com/uc?id=1-tEtYQdT1G9kn8HsqKNDHVqjE16F8YQH") os.system("mv model.pth ./experiments/siamrpn_r50_l234_dwxcorr") # load config cfg.merge_from_file(model_config) cfg.CUDA = torch.cuda.is_available() and cfg.CUDA device = torch.device('cuda' if cfg.CUDA else 'cpu') # create model model = ModelBuilder() # load model model.load_state_dict(torch.load(model_dir, map_location=lambda storage, loc: storage.cpu())) model.eval().to(device) # create an unique identifier worker_id = uuid.uuid4() # build tracker tracker = build_tracker(model) # Socket to talk to server context = zmq.Context() sub_socket = context.socket(zmq.SUB) # set up frame listening socket sub_socket.connect("tcp://{}:5556".format(args.server_ip)) sub_socket.setsockopt_string(zmq.SUBSCRIBE, "frame_") sub_socket.setsockopt_string(zmq.SUBSCRIBE, str(worker_id)) # setup push socket context = zmq.Context() push_socket = context.socket(zmq.PUSH) push_socket.connect("tcp://{}:5557".format(args.server_ip)) # event monitoring # used to register worker once connection is established EVENT_MAP = {} for name in dir(zmq): if name.startswith('EVENT_'): value = getattr(zmq, name) EVENT_MAP[value] = name # monitor thread function def event_monitor(monitor): while monitor.poll(): evt = recv_monitor_message(monitor) evt.update({'description': EVENT_MAP[evt['event']]}) if evt['event'] == zmq.EVENT_HANDSHAKE_SUCCEEDED: push_socket.send_json( {"type": "REGISTER", "id": str(worker_id)}) if evt['event'] == zmq.EVENT_MONITOR_STOPPED: break monitor.close() # register monitor monitor = sub_socket.get_monitor_socket() t = threading.Thread(target=event_monitor, args=(monitor,)) t.start() support = None try: while True: # wait for next message _ = sub_socket.recv() md = sub_socket.recv_json() if md['type'] == 'FRAME': msg = sub_socket.recv() buf = memoryview(msg) frame = np.frombuffer( buf, dtype=md['dtype']).reshape(md['shape']) if support is None: continue outputs = tracker.track(frame) bbox = list(map(int, outputs['bbox'])) # send result push_socket.send_json( { "type": "TRACK", "bbox": bbox, "score": outputs['best_score'].tolist(), "time": md['time'], "id": str(worker_id) }) print('message: {}'.format(md['time']), end='\r') elif md['type'] == 'SUPPORT': frame_raw = md['data']['img'] # base 64 png image frame = np.array( Image.open( io.BytesIO( base64.b64decode(frame_raw) ) ).convert('RGB'))[:, :, ::-1] bbox = [int(float(i)) for i in md['data']['bbox'].split(",")] tracker.init(frame, bbox) support = (frame, bbox) print('Support received, tracking will now start') elif md['type'] == 'LOCATION': # make sure tracker has been initalized if support is not None: center_pos = np.array(md['data']) tracker.update(center_pos) elif md['type'] == 'PING': push_socket.send_json({"type": "PONG", "id": str(worker_id)}) else: print('Invalid message type received: {}'.format(md['type'])) except KeyboardInterrupt: print('Exiting... notifying server of disconnect') push_socket.send_json( {"type": "FIN", "id": str(worker_id)}) # wait for the server to respond or let the user forcefully close print("Waiting for server response. Press CTRL+C again to forcefully close") while True: _ = sub_socket.recv() md = sub_socket.recv_json() if md['type'] == "FIN": print('Server responded, now exiting') exit(0) elif md['type'] == "FRAME": # we have to accept the incoming frame to properly accept future messages msg = sub_socket.recv()
def main(): cfg.merge_from_file("experiments/siamrpn_mobilev2_l234_dwxcorr/config.yaml") device = torch.device("cuda") model = ModelBuilder() model.load_state_dict( torch.load("experiments/siamrpn_mobilev2_l234_dwxcorr/model.pth", map_location=lambda storage, loc: storage.cpu())) model.eval().to(device) tracker = build_tracker(model) frame = cv2.imread("image/benchmark_5.jpg") bbox = (280, 80, 200, 270) """ This part is inhirited from tracker.init method, trying to serialize tracker.template """ # tracker.init(frame, bbox) center_pos = np.array([bbox[0]+(bbox[2]-1)/2, bbox[1]+(bbox[3]-1)/2]) size = np.array([bbox[2], bbox[3]]) w_z = size[0] + 0.5 * np.sum(size) h_z = size[1] + 0.5 * np.sum(size) s_z = round(np.sqrt(w_z * h_z)) s_x = 255 / 127 * s_z # calculate channel average channel_average = np.mean(frame, axis=(0, 1)) z_crop = tracker.get_subwindow(frame, center_pos, 127, s_z, channel_average) x_crop = tracker.get_subwindow(frame, center_pos, 255, s_x, channel_average) ############################################################################# # a new script model inhereted from template class ArcTemplate(torch.nn.Module): def __init__(self): super(ArcTemplate, self).__init__() self.backbone = get_backbone(cfg.BACKBONE.TYPE, **cfg.BACKBONE.KWARGS) self.neck = get_neck(cfg.ADJUST.TYPE, **cfg.ADJUST.KWARGS) self.rpn_head = get_rpn_head(cfg.RPN.TYPE, **cfg.RPN.KWARGS) def forward(self, z_crop): return self.neck(self.backbone(z_crop)) arc = ArcTemplate() arc.load_state_dict( torch.load("experiments/siamrpn_mobilev2_l234_dwxcorr/model.pth", map_location=lambda storage, loc: storage.cpu())) arc.eval().to(device) zrf = arc(z_crop) for z in zrf: print(z.shape) torch.jit.trace(arc, z_crop).save("archine.pt") ############################################################################# # a new script model inhereted from track class FrostTemplate(torch.nn.Module): # the frost is the combination of backbone/neck/rpn_head network def __init__(self): super(FrostTemplate, self).__init__() self.backbone = get_backbone(cfg.BACKBONE.TYPE, **cfg.BACKBONE.KWARGS) self.neck = get_neck(cfg.ADJUST.TYPE, **cfg.ADJUST.KWARGS) self.rpn_head = get_rpn_head(cfg.RPN.TYPE, **cfg.RPN.KWARGS) def forward(self, z, x): zf = self.neck(self.backbone(z)) xf = self.neck(self.backbone(x)) print("zf shape:\n", zf[0].shape, "\t", zf[1].shape, "\t", zf[2].shape) print("xf shape:\n", xf[0].shape, "\t", xf[1].shape, "\t", xf[2].shape) return self.rpn_head(zf, xf) fro = FrostTemplate() fro.load_state_dict( torch.load("experiments/siamrpn_mobilev2_l234_dwxcorr/model.pth", map_location=lambda storage, loc: storage.cpu())) fro.eval().to(device) res = fro(z_crop, x_crop) torch.jit.trace(fro, (z_crop, x_crop)).save("frost.pt") ############################################################################# # the rpn head model class HeadTemplate(torch.nn.Module): def __init__(self): super(HeadTemplate, self).__init__() self.backbone = get_backbone(cfg.BACKBONE.TYPE, **cfg.BACKBONE.KWARGS) self.neck = get_neck(cfg.ADJUST.TYPE, **cfg.ADJUST.KWARGS) self.rpn_head = get_rpn_head(cfg.RPN.TYPE, **cfg.RPN.KWARGS) def forward(self, z, x): return self.rpn_head(z, x) hed = HeadTemplate() hed.load_state_dict( torch.load("experiments/siamrpn_mobilev2_l234_dwxcorr/model.pth", map_location=lambda storage, loc: storage.cpu())) hed.eval().to(device) zeta = hed(zrf, zrf) # the model works. However, we don't know how to save them into script model ############################################################################# # visualization cv2.rectangle(frame, (280, 80), (480, 350), (0, 0, 255), 2) cv2.imshow("_", frame) cv2.waitKey(0)
def main(): # load config cfg.merge_from_file(args.config) cfg.CUDA = torch.cuda.is_available() device = torch.device('cuda' if cfg.CUDA else 'cpu') # create model model = ModelBuilder() # load model model.load_state_dict( torch.load(args.snapshot, map_location=lambda storage, loc: storage.cpu())) model.eval().to(device) cap = cv2.VideoCapture(args.video_name) body_detector = BodyDetector() body_detector.load_model(path_to_model=args.model_path) first_frame_with_detection = False updating_frame = False sot_trackers = {} counter = 0 while cap.isOpened(): ret, frame = cap.read() img_height, img_width, _ = np.shape(frame) regions, _, _ = body_detector.process(frame) if len(regions) > 0 and not first_frame_with_detection: first_frame_with_detection = True if first_frame_with_detection and not updating_frame: for r in regions: counter += 1 sot_trackers[counter] = MSTracker(model=model, tracker_id=counter) sot_trackers[counter].tracker_init(frame, r) updating_frame = True print(f"Init number of MSTracker: {counter}") continue if updating_frame: current_frame_sot_regions = [] for tracker_id in sot_trackers.keys(): sot_region, sot_score = sot_trackers[tracker_id].update(frame) if sot_score > 0.5: current_frame_sot_regions.append(sot_region) current_detected_regions = regions # compare SOT region and detected region to decide whether fire up a new MSTracker for d_region in current_detected_regions: new_tracker = True for sot_region in current_frame_sot_regions: distance = math.sqrt((d_region.x - sot_region.x)**2 + (d_region.y - sot_region.y)**2) if distance < 200: new_tracker = False break if new_tracker: counter += 1 sot_trackers[counter] = MSTracker(model=model, tracker_id=counter) sot_trackers[counter].tracker_init(frame, d_region) print(f"New Tracker: {counter}") # display # displayed = draw_regions(frame, current_frame_sot_regions, color=(0, 255, 0)) # displayed = draw_regions(displayed, current_detected_regions) for r in current_frame_sot_regions: t_id = r.data["sot_id"] frame = write_into_region(frame, str(t_id), r, show_region_outline=True) cv2.putText(frame, f"MSTracker: {len(sot_trackers.keys())}", (30, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0)) cv2.imshow("body", frame) cv2.waitKey(1)
def main(args): seq_name = args.seq_name # the packages of trackers from pysot.core.config import cfg # use the modified config file to reset the tracking system from pysot.models.model_builder import ModelBuilder # modified single tracker with warpper from mot_zj.MUST_sot_builder import build_tracker from mot_zj.MUST_utils import draw_bboxes, find_candidate_detection, handle_conflicting_trackers, sort_trackers from mot_zj.MUST_ASSO.MUST_asso_model import AssociationModel from mot_zj.MUST_utils import traj_interpolate dataset_dir = os.path.join(root, 'result') seq_type = 'img' # set the path of config parameters and config_path = os.path.join(track_dir, "mot_zj", "MUST_config_file", "alex_config.yaml") model_params = os.path.join(params_dir, "alex_model.pth") # enable the visualisation or not is_visualisation = False # print the information of the tracking process or not is_print = True results_dir = os.path.join(dataset_dir, 'track') if not os.path.exists(results_dir): os.makedirs(results_dir) img_traj_dir = os.path.join(track_dir, "img_traj") if os.path.exists(os.path.join(img_traj_dir, seq_name)): shutil.rmtree(os.path.join(img_traj_dir, seq_name)) seq_dir = os.path.join(dataset_dir, seq_type) seq_names = os.listdir(seq_dir) seq_num = len(seq_names) # record the processing time start_point = time.time() # load config # load the config information from other variables cfg.merge_from_file(config_path) # set the flag that CUDA is available cfg.CUDA = torch.cuda.is_available() device = torch.device('cuda' if cfg.CUDA else 'cpu') # create the tracker model (Resnet50) track_model = ModelBuilder() # load tracker model track_model.load_state_dict( torch.load(model_params, map_location=lambda storage, loc: storage.cpu())) track_model.eval().to(device) # create assoiation model asso_model = AssociationModel(args) seq_det_path = os.path.join(seq_dir, seq_name, 'det') seq_img_path = os.path.join(seq_dir, seq_name, 'img1') # print path and dataset information if is_print: print('preparing for the sequence: {}'.format(seq_name)) print('-----------------------------------------------') print("detection result path: {}".format(seq_det_path)) print("image files path: {}".format(seq_img_path)) print('-----------------------------------------------') # read the detection results det_results = np.loadtxt(os.path.join(seq_det_path, 'det.txt'), dtype=float, delimiter=',') # read images from each sequence images = sorted(glob.glob(os.path.join(seq_img_path, '*.jpg'))) img_num = len(images) # the contrainer of trackers trackers = [] # visualisation settings if is_visualisation: cv2.namedWindow(seq_name, cv2.WINDOW_NORMAL) # init(reset) the identifer id_num = 0 # tracking process in each frame for nn, im_path in enumerate(images): each_start = time.time() frame = nn + 1 img = cv2.imread(im_path) print('Frame {} is loaded'.format(frame)) # load the detection results of this frame pre_frame_det_results = det_results[det_results[:, 0] == frame] # non-maximal surpressing [frame, id, x, y, w, h, score] indices = nms.boxes(pre_frame_det_results[:, 2:6], pre_frame_det_results[:, 6]) frame_det_results = pre_frame_det_results[indices, :] # extract the bbox [fr, id, (x, y, w, h), score] bboxes = frame_det_results[:, 2:6] ############################################ # ***multiple tracking and associating*** # ############################################ # 1. sort trackers index1, index2 = sort_trackers(trackers) # 2. save the processed index of trackers index_processed = [] track_time = 0 asso_time = 0 for k in range(2): # process trackers in the first or the second class if k == 0: index_track = index1 else: index_track = index2 track_start = time.time() for ind in index_track: if trackers[ind].track_state == cfg.STATE.TRACKED or trackers[ ind].track_state == cfg.STATE.ACTIVATED: indices = find_candidate_detection( [trackers[i] for i in index_processed], bboxes) to_track_bboxes = bboxes[ indices, :] if not bboxes.size == 0 else np.array([]) # MOT_track(tracking process) trackers[ind].track(img, to_track_bboxes, frame) # if the tracker keep its previous tracking state (tracked or activated) if trackers[ ind].track_state == cfg.STATE.TRACKED or trackers[ ind].track_state == cfg.STATE.ACTIVATED: index_processed.append(ind) track_time += time.time() - track_start asso_start = time.time() for ind in index_track: if trackers[ind].track_state == cfg.STATE.LOST: indices = find_candidate_detection( [trackers[i] for i in index_processed], bboxes) to_associate_bboxes = bboxes[ indices, :] if not bboxes.size == 0 else np.array([]) # MOT_track(association process) trackers[ind].track(img, to_track_bboxes, frame) # add process flag index_processed.append(ind) asso_time += time.time() - asso_start ############################################ # ***init new trackers *** # ############################################ # find the candidate bboxes to init new trackers indices = find_candidate_detection(trackers, bboxes) # process the tracker: init (1st frame) and track mathod (the other frames) for index in indices: id_num += 1 new_tracker = build_tracker(track_model) new_tracker.init(img, bboxes[index, :], id_num, frame, seq_name, asso_model) trackers.append(new_tracker) # find conflict of trackers (I need to know what conflict) trackers = handle_conflicting_trackers(trackers, bboxes) # interpolate the tracklet results for tracker in trackers: if tracker.track_state == cfg.STATE.TRACKED or tracker.track_state == cfg.STATE.ACTIVATED: bbox = tracker.tracking_bboxes[-1, :] traj_interpolate(tracker, bbox, tracker.frames[-1], 30) ############################################ # ***collect tracking results*** # ############################################ # collect the tracking results (all the results, without selected) if frame == len(images): results_bboxes = np.array([]) for tracker in trackers: if results_bboxes.size == 0: results_bboxes = tracker.results_return() else: res = tracker.results_return() if not res.size == 0: results_bboxes = np.concatenate( (results_bboxes, tracker.results_return()), axis=0) # test code segment filename = '{}.txt'.format(seq_name) results_bboxes = results_bboxes[np.argsort(results_bboxes[:, 0])] print(results_bboxes.shape[0]) # detections filter indices = [] if seq_name == 'b1': for ind, result in enumerate(results_bboxes): if result[3] > 540: if result[4] * result[5] < 10000: indices.append(ind) results_bboxes = np.delete(results_bboxes, indices, axis=0) np.savetxt(os.path.join(results_dir, filename), results_bboxes, fmt='%d,%d,%.1f,%.1f,%.1f,%.1f') ############################################ # ***crop tracklet image*** # ############################################ for tracker in trackers: if tracker.track_state == cfg.STATE.START or tracker.track_state == cfg.STATE.TRACKED or tracker.track_state == cfg.STATE.ACTIVATED: bbox = tracker.tracking_bboxes[-1, :] x1 = int(np.floor(np.maximum(1, bbox[0]))) y1 = int(np.ceil(np.maximum(1, bbox[1]))) x2 = int(np.ceil(np.minimum(img.shape[1], bbox[0] + bbox[2]))) y2 = int(np.ceil(np.minimum(img.shape[0], bbox[1] + bbox[3]))) img_traj = img[y1:y2, x1:x2, :] traj_path = os.path.join(img_traj_dir, seq_name, str(tracker.id_num)) if not os.path.exists(traj_path): os.makedirs(traj_path) tracklet_img_path = os.path.join(traj_path, str(tracker.frames[-1])) cv2.imwrite("{}.jpg".format(tracklet_img_path), img_traj) each_time = time.time() - each_start print("period: {}s, track: {}s({:.2f}), asso: {}s({:.2f})".format( each_time, track_time, (track_time / each_time) * 100, asso_time, (asso_time / each_time) * 100)) if is_visualisation: ########################################## # infomation print and visualisation # ########################################## # print("THe numger of new trackers: {}".format(len(indices))) active_trackers = [ trackers[i].id_num for i in range(len(trackers)) if trackers[i].track_state == cfg.STATE.ACTIVATED or trackers[i].track_state == cfg.STATE.TRACKED or trackers[i].track_state == cfg.STATE.LOST ] print("The number of active trackers: {}".format( len(active_trackers))) print(active_trackers) anno_img = draw_bboxes(img, bboxes) cv2.imshow(seq_name, anno_img) cv2.waitKey(1) print("The running time is: {} s".format(time.time() - start_point)) print("The total processing time is: {} s".format(time.time() - start_point))
class SOTTracker: def __init__(self, config_file, model_file): self.config_file = config_file self.model_file = model_file # load config cfg.merge_from_file(self.config_file) cfg.CUDA = torch.cuda.is_available() self.device = torch.device('cuda' if cfg.CUDA else 'cpu') # load model self.model = ModelBuilder() self.model.load_state_dict( torch.load(model_file, map_location=lambda storage, loc: storage.cpu())) self.model.eval().to(self.device) # build tracker self.tracker = build_tracker(self.model) def tracking(self, init_img, init_bbox, imglist_to_track): # init tracker init_frame = cv2.imread(init_img) height, width, channels = init_frame.shape # convert bbox from relative coordinates to actual values init_bbox_coord = [ int(init_bbox[0] * width), int(init_bbox[1] * height), int(init_bbox[2] * width), int(init_bbox[3] * height) ] self.tracker.init(init_frame, init_bbox_coord) # do tracking results = { _: { 'polygon': None, 'mask': None, 'bbox': None } for _ in imglist_to_track } for img in imglist_to_track: frame = cv2.imread(img) outputs = self.tracker.track(frame) if 'polygon' in outputs: polygon = np.array(outputs['polygon']).astype(np.int32) results[img]['polygon'] = [polygon.reshape((-1, 1, 2))] results[img]['mask'] = outputs['mask'] if 'bbox' in outputs: bbox = list(map(float, outputs['bbox'])) results[img]['bbox'] = [ bbox[0] / width, bbox[1] / height, bbox[2] / width, bbox[3] / height ] return results def tracking_json_query(self, query_json): query = json.loads(query_json) try: init_img = query['init_img'] init_bbox = [int(_) for _ in query['init_bbox']] imglist_to_track = query['imglist_to_track'] assert len(imglist_to_track) > 0 return self.tracking(init_img, init_bbox, imglist_to_track) except KeyError: print('invalid query json') return None @staticmethod def result2json(results): json_string = json.dumps(results) return json_string @staticmethod def vis_tracking_result(img_file, result): vis_frame = cv2.imread(img_file) height, width, channels = vis_frame.shape if result['polygon'] is not None: cv2.polylines(vis_frame, result['polygon'], True, (0, 255, 0), 3) mask = ((result['mask'] > cfg.TRACK.MASK_THERSHOLD) * 255) mask = mask.astype(np.uint8) mask = np.stack([mask, mask * 255, mask]).transpose(1, 2, 0) vis_frame = cv2.addWeighted(vis_frame, 0.77, mask, 0.23, -1) elif result['bbox'] is not None: bbox = result['bbox'] cv2.rectangle(vis_frame, (int(bbox[0] * width), int(bbox[1] * height)), (int((bbox[0] + bbox[2]) * width), int((bbox[1] + bbox[3]) * height)), (0, 255, 0), 3) return vis_frame
def main(): #try: #os.remove("/home/developer/kashyap/pysot-master/*.csv") #except: # pass # with open('./demo/groundtruth.csv', 'r') as f: # reader = csv.reader(f) # cords = list(reader) # load config cfg.merge_from_file('./experiments/siamrpn_alex_dwxcorr/config.yaml') cfg.CUDA = torch.cuda.is_available() device = torch.device('cuda' if cfg.CUDA else 'cpu') print(device) # create model model = ModelBuilder() # load model model.load_state_dict(torch.load('./experiments/siamrpn_alex_dwxcorr/model.pth', map_location=lambda storage, loc: storage.cpu())) model.eval().to(device) # build tracker tracker = build_tracker(model) video_list = glob1("/home/developer/kashyap/pysot-master/demo/vids/", "*.mp4") for video_name in video_list: video_name_str = os.path.splitext(video_name)[0] df = pd.read_csv('./demo/vids/'+video_name_str+'.csv', delimiter=',', header=None) cords = [list(x) for x in df.values] object_counter = 0 for cord in cords: object_counter = object_counter + 1 first_frame = True # if video_name:#args.video_name: # #video_name = args.video_name.split('/')[-1].split('.')[0] # video_name = video_name.split('/')[-1].split('.')[0] # else: # exit() frame_count = 1 mylist = [[frame_count,object_counter,cord,video_name]] for frame in get_frames(video_name):#(args.video_name): if first_frame: try: init_rect = cord except: exit() tracker.init(frame, init_rect) first_frame = False else: outputs = tracker.track(frame) if 'polygon' in outputs: exit() else: #crds = map(int,outputs['bbox']) bbox = list(map(int,outputs['bbox'])) #cv2.rectangle(frame,(bbox[0],bbox[1]),(bbox[0]+bbox[2],bbox[1]+bbox[3]),(0,255,0),3) #for frame in get_frames(video_name):#(args.video_name): frame_count = frame_count + 1 mylist.append([frame_count,object_counter,bbox,video_name]) with open('vid-'+str(video_name)+'-tracking-'+str(object_counter)+'-object-'+str(cord)+'.csv', 'w', newline='') as csvfile: writer = csv.writer(csvfile, quoting=0, '\n')#,quotechar='',escapechar='') writer.writerow(mylist)
def main(): # load config cfg.merge_from_file(args.config) cfg.CUDA = torch.cuda.is_available() and cfg.CUDA device = torch.device('cuda' if cfg.CUDA else 'cpu') # create model model = ModelBuilder() # load model model.load_state_dict( torch.load(args.snapshot, map_location=lambda storage, loc: storage.cpu())) model.eval().to(device) # build tracker tracker = build_tracker(model) first_frame = True if args.video_name: video_name = args.video_name.split('/')[-1].split('.')[0] else: video_name = 'webcam' cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN) save = 'C:\\Users\\biosi\\pysot\\demo' video_names = 'vita2' os.makedirs('%s/%s' % (save, video_names), exist_ok=True) os.makedirs('%s/%s/video' % (save, video_names), exist_ok=True) os.makedirs('%s/%s/mask' % (save, video_names), exist_ok=True) os.makedirs('%s/%s/crop' % (save, video_names), exist_ok=True) filename = 'vita2_output.mp4' #image2video write out = cv2.VideoWriter( os.path.join('%s\\%s\\video\\%s' % (save, video_names, filename)), cv2.VideoWriter_fourcc(*'mp4v'), 29.97, (int(list(get_frames(args.video_name))[0].shape[1]), int(list(get_frames(args.video_name))[0].shape[0]))) x1 = [] x2 = [] x3 = [] x4 = [] y1 = [] y2 = [] y3 = [] y4 = [] for idx, frame in enumerate(get_frames(args.video_name)): if first_frame: try: init_rect = cv2.selectROI(video_names, frame, False, False) except: exit() tracker.init(frame, init_rect) first_frame = False else: outputs = tracker.track(frame) if 'polygon' in outputs: polygon = np.array(outputs['polygon']).astype(np.int32) polygon = polygon.reshape((-1, 1, 2)) cv2.polylines(frame, [polygon], True, (0, 255, 0), 3) mask = ((outputs['mask'] > cfg.TRACK.MASK_THERSHOLD) * 255) mask = mask.astype(np.uint8) mask = np.stack([mask, mask * 255, mask]).transpose(1, 2, 0) cv2.imwrite( "%s\\%s\\mask\\%04d.png" % (save, video_names, int(idx + 1)), mask) #frame = cv2.addWeighted(frame, 0.77, mask, 0.23, -1) x = [] y = [] for i in range(4): y.append(polygon[i, 0, 1]) x.append(polygon[i, 0, 0]) x1.append(x[0]) x2.append(x[1]) x3.append(x[2]) x4.append(x[3]) y1.append(y[0]) y2.append(y[1]) y3.append(y[2]) y4.append(y[3]) x_max = x[0] x_min = x[0] y_max = x[0] y_min = x[0] if x[0] < 0: x_max = 0 x_min = 0 if y[0] < 0: y_max = 0 y_min = 0 for i in range(4): x_max = max(x_max, x[i]) x_min = min(x_min, x[i]) y_max = max(y_max, y[i]) y_min = min(y_min, y[i]) if x_min < 0: x_min = 0 if y_min < 0: y_min = 0 #x1.append(bbox[0]) #y1.append(bbox[1]) #x2.append(bbox[0] + bbox[2]) #y2.append(bbox[1] + bbox[3]) #crop frame #crop = frame[bbox[1]:bbox[1]+bbox[3],bbox[0]:bbox[0]+bbox[2],:] crop = mask[int(y_min):int(y_max), int(x_min):int(x_max), :] print(y_min, y_max, x_min, x_max) cv2.imwrite( "%s\\%s\\crop\\%04d.png" % (save, video_names, int(idx + +1)), crop) frame = cv2.addWeighted(frame, 0.77, mask, 0.23, -1) else: bbox = list(map(int, outputs['bbox'])) cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 255, 0), 3) dataframe = pd.DataFrame({ 'y1': y1, 'x1': x1, 'y2': y2, 'x2': x2, 'y3': y3, 'x3': x3, 'y4': y4, 'x4': x4 }) dataframe.to_csv(os.path.join( '%s\\%s\\crop\\%s' % (save, video_names, "vita2_output.csv")), index=False) out.write(frame) cv2.imshow(video_names, frame) cv2.waitKey(40)
color_img = np.zeros((1280, 720, 3), dtype=np.uint8) result_mask_img = np.zeros((1280, 720, 3), dtype=np.uint8) result_bbox_img = np.zeros((1280, 720, 3), dtype=np.uint8) result_mask = np.zeros((1280, 720), dtype=np.uint8) pysot_img = np.zeros((1280, 720, 3), dtype=np.uint8) mask_rcnn_flag = 0 pysot_mask = np.zeros((1280, 720), dtype=np.uint8) pysot_contour_img = np.zeros((1280, 720, 3), dtype=np.uint8) cfg.merge_from_file('config.yaml') cfg.CUDA = torch.cuda.is_available() device = torch.device('cuda' if cfg.CUDA else 'cpu') model_pysot = ModelBuilder() tracker = build_tracker(model_pysot) model_pysot.load_state_dict( torch.load('model.pth', map_location=lambda storage, loc: storage.cpu())) model_pysot.eval().to(device) def run_maskrcnn(): global color_img global result_mask_img global result_bbox_img global result_mask global mask_rcnn_flag global inds_len while 1: mask_rcnn_flag = 1 result = inference_detector(model, color_img) result_mask_img, result_bbox_img, result_mask = show_result( color_img, result, model.CLASSES)
def main(): # load config cfg.merge_from_file(args.config) cfg.CUDA = torch.cuda.is_available() and cfg.CUDA device = torch.device('cuda' if cfg.CUDA else 'cpu') # create model model = ModelBuilder() # load model model.load_state_dict( torch.load(args.snapshot, map_location=lambda storage, loc: storage.cpu())) model.eval().to(device) # build tracker tracker = build_tracker(model) first_frame = True if args.video_name: video_name = args.video_name.split('/')[-1].split('.')[0] else: video_name = 'webcam' # cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN) out = None init_string = args.init_rect init_rect = list(map(int, init_string.split(','))) print("initial rectangle selected as: ", init_rect) print("output video is: ", args.output_video) count = 0 for frame in get_frames(args.video_name): count += 1 # if count < 100: # continue if first_frame: frame_size = frame.shape print(frame_size) out = cv2.VideoWriter(args.output_video, cv2.VideoWriter_fourcc(*'DIVX'), 30, (frame_size[1], frame_size[0])) tracker.init(frame, init_rect) first_frame = False else: all_outputs = tracker.track(frame) for outputs in all_outputs['bbox']: # if 'polygon' in outputs: # polygon = np.array(outputs['polygon']).astype(np.int32) # cv2.polylines(frame, [polygon.reshape((-1, 1, 2))], # True, (0, 255, 0), 3) # mask = ((outputs['mask'] > cfg.TRACK.MASK_THERSHOLD) * 255) # mask = mask.astype(np.uint8) # mask = np.stack([mask, mask*255, mask]).transpose(1, 2, 0) # frame = cv2.addWeighted(frame, 0.77, mask, 0.23, -1) bbox = list(map(int, outputs)) cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 255, 0), 2) cv2.imwrite("test.jpg", frame) out.write(frame) out.release()