def main(): ############################################################################# # initialize the tracker cfg.merge_from_file("experiments/siamrpn_mobilev2_l234_dwxcorr/config.yaml") cfg.CUDA = torch.cuda.is_available() device = torch.device("cuda" if cfg.CUDA else "cpu") model = ModelBuilder() model.load_state_dict( torch.load("experiments/siamrpn_mobilev2_l234_dwxcorr/model.pth", map_location=lambda storage, loc: storage.cpu())) model.eval().to(device) tracker = build_tracker(model) ############################################################################# # initialzie the benchmark parameter img = cv2.imread("image/benchmark_5.jpg") bbox = (131, 122, 92, 118) # calculate channle average channel_average = np.mean(img, axis=(0, 1)) # EXEMPLAR_SIZE of mobilenetV2 is 127 z_new_crop = get_subwindow(img, 127, bbox, channel_average) print(z_new_crop.mean(), "\t", z_new_crop.std())
def __init__(self,classid=0,anchorid=0,init_frame=None,init_bbox=None): # load config cfg_path = '../object_detection/pysot/experiments/siamrpn_r50_l234_dwxcorr/config.yaml' snapshot = '../object_detection/pysot/experiments/siamrpn_r50_l234_dwxcorr/model.pth' cfg.merge_from_file(cfg_path) cfg.CUDA = torch.cuda.is_available() and cfg.CUDA # cfg.CUDA = False device = torch.device('cuda' if cfg.CUDA else 'cpu') # device='cpu' # create model model = ModelBuilder() # load model model.load_state_dict(torch.load(snapshot, map_location=lambda storage, loc: storage.cpu())) model.eval().to(device) # build tracker tracker = build_tracker(model) self.tracker = tracker self.classId = classid self.anchorId = anchorid self.init_bbox = init_bbox self.init_frame = init_frame
def generate_data(): # Load config cfg.merge_from_file("experiments/siammask_r50_l3/config.yaml") with torch.no_grad(): # Load forward model model = ModelBuilder() model.load_state_dict( torch.load("experiments/siammask_r50_l3/model.pth", map_location=CPU)) model.share_memory() model.eval().to(DEVICE) detection_by_tracking(frame_dir=args.frame_dir, json_file=args.json_file, detection_threshold=0.8, tracking_threshold=0.9, save_json_file=args.tracking_json, tracker_model=model, offset=0, high=12, low=12, step=24, parallel=False, multithreading=False)
def main(): # load config cfg.merge_from_file(args.config) cfg.CUDA = torch.cuda.is_available() device = torch.device('cuda' if cfg.CUDA else 'cpu') # create model model = ModelBuilder() # load model model.load_state_dict( torch.load( args.snapshot, map_location=lambda storage, loc: storage.cpu())['state_dict']) model.eval().to(device) # build tracker if cfg.RPN.TYPE == "YOLO": tracker = build_tracker(model, True) else: tracker = build_tracker(model) first_frame = True if args.video_name: video_name = args.video_name.split('/')[-1].split('.')[0] else: video_name = 'webcam' cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN) writer = cv2.VideoWriter("result.avi", cv2.VideoWriter_fourcc(*'XVID'), 25.0, (480, 360)) for frame in get_frames(args.video_name): if first_frame: try: init_rect = cv2.selectROI(video_name, frame, False, False) except: exit() tracker.init(frame, init_rect) first_frame = False else: outputs = tracker.track(frame) if 'polygon' in outputs: polygon = np.array(outputs['polygon']).astype(np.int32) cv2.polylines(frame, [polygon.reshape((-1, 1, 2))], True, (0, 255, 0), 3) mask = ((outputs['mask'] > cfg.TRACK.MASK_THERSHOLD) * 255) mask = mask.astype(np.uint8) mask = np.stack([mask, mask * 255, mask]).transpose(1, 2, 0) frame = cv2.addWeighted(frame, 0.77, mask, 0.23, -1) else: bbox = list(map(int, outputs['bbox'])) cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 255, 0), 3) cv2.imshow(video_name, frame) writer.write(frame) cv2.waitKey(40) writer.release()
class SiamRPNPP(BaseTracker): def __init__(self): super(SiamRPNPP, self).__init__("SiamRPN++") config = path_config.SIAMRPNPP_CONFIG snapshot = path_config.SIAMRPNPP_SNAPSHOT # load config cfg.merge_from_file(config) cfg.CUDA = torch.cuda.is_available() device = torch.device("cuda" if cfg.CUDA else "cpu") # create model self.model = ModelBuilder() # load model self.model.load_state_dict( torch.load(snapshot, map_location=lambda storage, loc: storage.cpu()) ) self.model.eval().to(device) # build tracker self.tracker = build_tracker(self.model) def initialize(self, image_file, box): image = cv2.imread(image_file) self.tracker.init(image, box) def track(self, image_file): image = cv2.imread(image_file) bbox = self.tracker.track(image)["bbox"] return bbox
def __init__(self, parent=None): super(MyMainWindow, self).__init__(parent) # Connect the on-clicked functions self.pushButton_locationLoading.clicked.connect(self.location_loading) self.pushButton_videoLoading.clicked.connect(self.video_loading) self.pushButton_cameraLoading.clicked.connect(self.camera_loading) self.pushButton_bboxSetting.clicked.connect(self.bbox_setting) self.pushButton_algorithmProcessing.clicked.connect( self.algorithm_processing) self.scrollBar.valueChanged.connect(self.slider_change) self.checkBox.stateChanged.connect(self.checkbox_change) # Message box ignore self.bbox_tips = True self.save_tips = True # Initialize trackers model_location = './pysot/experiments/siammaske_r50_l3' self.config = model_location + '/config.yaml' self.snapshot = model_location + '/model.pth' self.tracker_name = model_location.split('/')[-1] self.video_name = '' cfg.merge_from_file(self.config) cfg.CUDA = torch.cuda.is_available() device = torch.device('cuda' if cfg.CUDA else 'cpu') model = ModelBuilder() model.load_state_dict( torch.load(self.snapshot, map_location=lambda storage, loc: storage.cpu())) model.eval().to(device) self.tracker = build_tracker(model) self.vs = None self.analysis_box = None self.analysis_max = 10 self.save_location = '' self.afterCamera = False
def main(): # load config cfg.merge_from_file(args.config) cfg.CUDA = torch.cuda.is_available() and cfg.CUDA device = torch.device('cuda' if cfg.CUDA else 'cpu') # create model model = ModelBuilder() # load model model.load_state_dict( torch.load( args.snapshot, map_location=lambda storage, loc: storage.cpu())['state_dict']) model.eval().to(device) # build tracker tracker = build_tracker(model) first_frame = True if args.video_name: video_name = args.video_name.split('/')[-1].split('.')[0] else: video_name = 'webcam' cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN) for frame in get_frames(args.video_name): if first_frame: try: init_rect = cv2.selectROI(video_name, frame, False, False) except: exit() tracker.init(frame, init_rect) first_frame = False else: outputs = tracker.track(frame) if cfg.TRANSFORMER.TRANSFORMER: acc, (x1, y1, x2, y2) = outputs cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 3) cv2.putText(frame, 'Acc: ' + acc.astype('str'), (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36, 255, 12), 2) cv2.imshow(video_name, frame) cv2.waitKey(40) else: if 'polygon' in outputs: polygon = np.array(outputs['polygon']).astype(np.int32) cv2.polylines(frame, [polygon.reshape((-1, 1, 2))], True, (0, 255, 0), 3) mask = ((outputs['mask'] > cfg.TRACK.MASK_THERSHOLD) * 255) mask = mask.astype(np.uint8) mask = np.stack([mask, mask * 255, mask]).transpose(1, 2, 0) frame = cv2.addWeighted(frame, 0.77, mask, 0.23, -1) else: bbox = list(map(int, outputs['bbox'])) cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 255, 0), 3) cv2.imshow(video_name, frame) cv2.waitKey(40)
def build_model(self): model = ModelBuilder() # load model model.load_state_dict(torch.load(args.snapshot, map_location=lambda storage, loc: storage.cpu())) model.eval().to(device) # build tracker tracker = build_tracker(model) return tracker
def __init__(self, config_file, model_path): super().__init__() cfg.merge_from_file(config_file) model = ModelBuilder() model.load_state_dict( torch.load(model_path, map_location=lambda storage, loc: storage.cpu())) model.eval().cuda() self.tracker = build_tracker(model)
def main(): # load config cfg.merge_from_file(args.config) cfg.CUDA = torch.cuda.is_available() and cfg.CUDA device = torch.device('cuda' if cfg.CUDA else 'cpu') # create model model = ModelBuilder() # load model model.load_state_dict( torch.load(args.snapshot, map_location=lambda storage, loc: storage.cpu())) model.eval().to(device) # build tracker tracker = build_tracker(model) first_frame = True if args.video_name: video_name = args.video_name.split('/')[-1].split('.')[0] else: video_name = 'webcam' # cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN) i = 0 for frame in get_frames(args.video_name): if first_frame: try: init_rect = cv2.selectROI(video_name, frame, False, False) except: exit() tracker.init(frame, init_rect) first_frame = False else: outputs = tracker.track(frame) if 'polygon' in outputs: polygon = np.array(outputs['polygon']).astype(np.int32) cv2.polylines(frame, [polygon.reshape((-1, 1, 2))], True, (0, 255, 0), 3) mask = ((outputs['mask'] > cfg.TRACK.MASK_THERSHOLD) * 255) mask = mask.astype(np.uint8) mask = np.stack([mask, mask * 255, mask]).transpose(1, 2, 0) frame = cv2.addWeighted(frame, 0.77, mask, 0.23, -1) else: bbox = list(map(int, outputs['bbox'])) cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 255, 0), 3) # cv2.imshow(video_name, frame) # cv2.waitKey(40) print(i) cv2.imwrite(filename="/home/tempuser1/pysot/demo/ouput/" + str(i) + '.jpg', img=frame) i += 1
class SiamRPNPPGroup(BaseTracker): def __init__(self, backbone, target): super(SiamRPNPPGroup, self).__init__(f"SiamRPN++Group/{backbone}/{target}") if backbone == "AlexNet" and target == "OTB": config = path_config.SIAMRPNPP_ALEXNET_OTB_CONFIG snapshot = path_config.SIAMRPNPP_ALEXNET_OTB_SNAPSHOT elif backbone == "AlexNet" and target == "VOT": config = path_config.SIAMRPNPP_ALEXNET_CONFIG snapshot = path_config.SIAMRPNPP_ALEXNET_SNAPSHOT elif backbone == "ResNet-50" and target == "OTB": config = path_config.SIAMRPNPP_RESNET_OTB_CONFIG snapshot = path_config.SIAMRPNPP_RESNET_OTB_SNAPSHOT elif backbone == "ResNet-50" and target == "VOT": config = path_config.SIAMRPNPP_RESNET_CONFIG snapshot = path_config.SIAMRPNPP_RESNET_SNAPSHOT elif backbone == "ResNet-50" and target == "VOTLT": config = path_config.SIAMRPNPP_RESNET_LT_CONFIG snapshot = path_config.SIAMRPNPP_RESNET_LT_SNAPSHOT elif backbone == "MobileNetV2" and target == "VOT": config = path_config.SIAMRPNPP_MOBILENET_CONFIG snapshot = path_config.SIAMRPNPP_MOBILENET_SNAPSHOT elif backbone == "SiamMask" and target == "VOT": config = path_config.SIAMPRNPP_SIAMMASK_CONFIG snapshot = path_config.SIAMPRNPP_SIAMMASK_SNAPSHOT else: raise ValueError("Invalid backbone and target") # load config cfg.merge_from_file(config) cfg.CUDA = torch.cuda.is_available() device = torch.device("cuda" if cfg.CUDA else "cpu") # create model self.model = ModelBuilder() # load model self.model.load_state_dict( torch.load(snapshot, map_location=lambda storage, loc: storage.cpu()) ) self.model.eval().to(device) # build tracker self.tracker = build_tracker(self.model) def initialize(self, image_file, box): image = cv2.imread(image_file) self.tracker.init(image, box) def track(self, image_file): image = cv2.imread(image_file) bbox = self.tracker.track(image)["bbox"] return bbox
def build_model(self): model = ModelBuilder() # load model model.load_state_dict(torch.load(args.snapshot, map_location=lambda storage, loc: storage.cpu())) # import ipdb # ipdb.set_trace() device = torch.device('cuda:{}'.format(int(self.index//self.num_workers)) if cfg.CUDA else 'cpu') print(device) model.eval().to(device) # build tracker tracker = build_tracker(model) return tracker
class Tracking(object): def __init__(self, config, snapshot): cfg.merge_from_file(config) cfg.CUDA = torch.cuda.is_available() and cfg.CUDA device = torch.device('cuda' if cfg.CUDA else 'cpu') # create model self.model = ModelBuilder() # load model self.model.load_state_dict( torch.load(snapshot, map_location=lambda storage, loc: storage.cpu())) self.model.eval().to(device) # build tracker self.tracker = build_tracker(self.model) self.center_pos = None self.size = None def init(self, frame, init_rect): print("initial rectangle selected as: ", init_rect) init_rect = list(map(int, init_rect.split(','))) self.tracker.init(frame, init_rect) def update(self, bbox): ## REMEMBER TO CALL UPDATE self.tracker.update(bbox) def get_roi(self, img, instance_size): return self.tracker.get_roi(img, instance_size) def track(self, frame, x_crop, scale_z, instance_size): # x_crop, scale_z = self.get_roi(frame) return self.tracker.track(frame, x_crop, scale_z, instance_size) # Following functions are used for template update def templateFeature(self, z): return self.model.templateFeature(z) def zf(self): return self.model.zf def updateTemplate(self, zf): model.zf = zf
def load_tracker(self, tracker_config, tracker_snapshot): """Load the selected pysot tracker. Args: - tracker_config (str): Path to pysot config file for the tracker - tracker_snapshot (str): Path to .pth file of pysot tracker """ cfg.merge_from_file(tracker_config) cfg.CUDA = torch.cuda.is_available() and cfg.CUDA device = torch.device('cuda' if cfg.CUDA else 'cpu') model = ModelBuilder() model.load_state_dict(torch.load(tracker_snapshot)) model.eval().to(device) self.tracker = build_tracker(model)
def main(): # load config cfg.merge_from_file(args.config) cfg.CUDA = torch.cuda.is_available() and cfg.CUDA device = torch.device('cuda' if cfg.CUDA else 'cpu') # create model model = ModelBuilder() # load model model.load_state_dict(torch.load(args.snapshot, map_location=lambda storage, loc: storage.cpu())) model.eval().to(device) # build tracker tracker = build_tracker(model) first_frame = True if args.video_name: video_name = args.video_name.split('/')[-1].split('.')[0] else: video_name = 'webcam' cv2.namedWindow(video_name, cv2.WINDOW_NORMAL)#cv2.WND_PROP_FULLSCREEN) for frame in get_frames(args.video_name): if first_frame: try: init_rect = cv2.selectROI(video_name, frame, False, False)#choose a rectangle as ROI except: exit() tracker.init(frame, init_rect)#initiating the tracker first_frame = False # choose the ROI on the first frame and then track it on the following frames else: outputs = tracker.track(frame)#outputs:bbox/polygon+best_score if 'polygon' in outputs: polygon = np.array(outputs['polygon']).astype(np.int32) cv2.polylines(frame, [polygon.reshape((-1, 1, 2))],#draw polygons([vertex_nums,1,2]) on the frame True, (0, 255, 0), 3) mask = ((outputs['mask'] > cfg.TRACK.MASK_THERSHOLD) * 255) mask = mask.astype(np.uint8) mask = np.stack([mask, mask*255, mask]).transpose(1, 2, 0) frame = cv2.addWeighted(frame, 0.77, mask, 0.23, -1)#image fusion, can adjust transparency else: bbox = list(map(int, outputs['bbox']))#float to int cv2.rectangle(frame, (bbox[0], bbox[1]),#draw bbox on the frame (bbox[0]+bbox[2], bbox[1]+bbox[3]), (0, 255, 0), 3) cv2.imshow(video_name, frame) cv2.waitKey(40)
def load_pysot_model(tracker_type): configpath = "./week3/kalman/pysot/experiments/" + PYSOT_TRACKERS[tracker_type] + \ "/config.yaml" modelpath = "./week3/kalman/pysot/models/" + PYSOT_TRACKERS[ tracker_type] + ".pth" cfg.merge_from_file(configpath) cfg.CUDA = torch.cuda.is_available() device = torch.device('cuda' if cfg.CUDA else 'cpu') # load model model = ModelBuilder() model.load_state_dict( torch.load(modelpath, map_location=lambda storage, loc: storage.cpu())) model.eval().to(device) return load_pretrain(model, modelpath).cuda().eval()
def PYSOTINIT(): # load config cfg.merge_from_file(tracker_config) cfg.CUDA = torch.cuda.is_available() device = torch.device('cuda' if cfg.CUDA else 'cpu') # create model model = ModelBuilder() # load model model.load_state_dict( torch.load(snapshot, map_location=lambda storage, loc: storage.cpu())) model.eval().to(device) # build tracker tracker = build_tracker(model) return tracker
def test_snapshot(epoch: int, snapshot: str, test_path: str): # model max_img = 8 model = ModelBuilder() data = torch.load(snapshot, map_location=lambda storage, loc: storage.cpu()) model.load_state_dict(data['state_dict']) model.eval().to(torch.device('cpu')) tracker = build_tracker(model) root = cfg.DATASET.COCO.ROOT cur_path = os.path.dirname(os.path.realpath(__file__)) root = os.path.join(cur_path, '../../', root) anno_path = os.path.join(root, '../', "val2017.json") with open(anno_path, 'r') as f: anno = json.load(f) anno = filter_zero(anno) dataset = os.path.join(root, "val2017") folder = random.choice(glob.glob(f"{dataset}/**")) zs = glob.glob(f"{folder}/*.z.jpg") xs = glob.glob(f"{folder}/*.x.jpg") zs = sorted(zs) xs = sorted(xs) xs = [(x, get_anno_from_img_path(anno, x)) for x in xs] for i in range(len(zs[:max_img])): z = cv2.imread(zs[i]) x_path, bbox = xs[i] x = cv2.imread(x_path) tracker.init_(z) cls, (x1, y1, x2, y2) = tracker.track(x) cv2.rectangle(x, (x1, y1), (x2, y2), (255, 0, 0), 2) a1, b1, a2, b2 = bbox cv2.rectangle(x, (a1, b1), (a2, b2), (0, 0, 255), 2) cv2.putText(x, 'Acc: ' + cls.astype('str'), (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36, 255, 12), 2) parent_dir = f"{test_path}/{os.path.basename(Path(zs[i]).parent)}" if not os.path.exists(parent_dir): os.makedirs(parent_dir) cv2.imwrite(f"{parent_dir}/{os.path.basename(x_path)}", x) cv2.imwrite(f"{parent_dir}/{os.path.basename(zs[i])}", z)
def __init__(self): self.init_rect = None self.pysot_pub = rospy.Publisher(config.TRACK_PUB_TOPIC, Int32MultiArray, queue_size=10) self.img_sub = rospy.Subscriber(config.IMAGE_SUB_TOPIC, Image, self.receive_frame_and_track) self.service = rospy.Service("init_rect", InitRect, self.set_init_rect) cfg.TRACK.TYPE = config.TRACK_TYPE cfg.merge_from_file(config.CONFIG_PATH) cfg.CUDA = torch.cuda.is_available() device = torch.device('cuda' if cfg.CUDA else 'cpu') model = ModelBuilder() model.load_state_dict( torch.load(config.MODEL_PATH, map_location=lambda storage, loc: storage.cpu())) model.eval().to(device) self.tracker = build_tracker(model)
def init_track(self): # 配置config文件 config_path = './models/siamrpn_alex_dwxcorr/config.yaml' # 配置snapshot 文件 snapshot_path = './models/siamrpn_alex_dwxcorr/model.pth' # 参数整合 cfg.merge_from_file(config_path) cfg.CUDA = torch.cuda.is_available() and cfg.CUDA device = torch.device('cuda' if cfg.CUDA else 'cpu') # create model model = ModelBuilder() # load model model.load_state_dict( torch.load(snapshot_path, map_location=lambda storage, loc: storage.cpu())) model.eval().to(device) # 创建跟踪器 self.tracker = build_tracker(model)
def main(): #try: #os.remove("/home/developer/kashyap/pysot-master/*.csv") #except: # pass # with open('./demo/groundtruth.csv', 'r') as f: # reader = csv.reader(f) # cords = list(reader) # load config cfg.merge_from_file('./experiments/siamrpn_alex_dwxcorr/config.yaml') cfg.CUDA = torch.cuda.is_available() device = torch.device('cuda' if cfg.CUDA else 'cpu') print(device) # create model model = ModelBuilder() # load model model.load_state_dict(torch.load('./experiments/siamrpn_alex_dwxcorr/model.pth', map_location=lambda storage, loc: storage.cpu())) model.eval().to(device) # build tracker tracker = build_tracker(model) video_list = glob1("/home/developer/kashyap/pysot-master/demo/vids/", "*.mp4") for video_name in video_list: video_name_str = os.path.splitext(video_name)[0] df = pd.read_csv('./demo/vids/'+video_name_str+'.csv', delimiter=',', header=None) cords = [list(x) for x in df.values] object_counter = 0 for cord in cords: object_counter = object_counter + 1 first_frame = True # if video_name:#args.video_name: # #video_name = args.video_name.split('/')[-1].split('.')[0] # video_name = video_name.split('/')[-1].split('.')[0] # else: # exit() frame_count = 1 mylist = [[frame_count,object_counter,cord,video_name]] for frame in get_frames(video_name):#(args.video_name): if first_frame: try: init_rect = cord except: exit() tracker.init(frame, init_rect) first_frame = False else: outputs = tracker.track(frame) if 'polygon' in outputs: exit() else: #crds = map(int,outputs['bbox']) bbox = list(map(int,outputs['bbox'])) #cv2.rectangle(frame,(bbox[0],bbox[1]),(bbox[0]+bbox[2],bbox[1]+bbox[3]),(0,255,0),3) #for frame in get_frames(video_name):#(args.video_name): frame_count = frame_count + 1 mylist.append([frame_count,object_counter,bbox,video_name]) with open('vid-'+str(video_name)+'-tracking-'+str(object_counter)+'-object-'+str(cord)+'.csv', 'w', newline='') as csvfile: writer = csv.writer(csvfile, quoting=0, '\n')#,quotechar='',escapechar='') writer.writerow(mylist)
def main(): # instantiate iiwa iiwa = iiwaRobot() time.sleep(4) # allow iiwa taking some time to wake up # zero joints iiwa.move_joint(commit=True) # iiwa get ready iiwa.move_joint(JOINT_PERCH, commit=True) time.sleep(4) rospy.loginfo("iiwa is ready") # read TCP orientation QUAT = Quaternion() QUAT.x = iiwa.cartesian_pose.orientation.x QUAT.y = iiwa.cartesian_pose.orientation.y QUAT.z = iiwa.cartesian_pose.orientation.z QUAT.w = iiwa.cartesian_pose.orientation.w # Configure realsense D435 depth and color streams pipeline = rs.pipeline() config = rs.config() config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30) config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30) profile = pipeline.start(config) # Create an align object align_to = rs.stream.color align = rs.align(align_to) # load siammask config cfg.merge_from_file(sys.path[0]+"/siammask_r50_l3/config.yaml") cfg.CUDA = torch.cuda.is_available() device = torch.device('cuda' if cfg.CUDA else 'cpu') # create model model = ModelBuilder() # load model model.load_state_dict(torch.load(sys.path[0]+"/siammask_r50_l3/model.pth", map_location=lambda storage, loc: storage.cpu())) model.eval().to(device) # build tracker tracker = build_tracker(model) # label object video_name = 'D435_color' cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN) first_frame = True while True: # wait image stream and select object of interest frames = pipeline.wait_for_frames() # Align the depth frame to color frame aligned_frames = align.process(frames) color_frame = aligned_frames.get_color_frame() depth_frame = aligned_frames.get_depth_frame() depth_intrinsics = rs.video_stream_profile(depth_frame.profile).get_intrinsics() # convert image to numpy arrays if color_frame: color_image = np.asanyarray(color_frame.get_data()) depth_image = np.asanyarray(depth_frame.get_data()) if first_frame: try: init_rect = cv2.selectROI(video_name, color_image, False, False) except: exit() tracker.init(color_image, init_rect) first_frame = False else: # start tracking outputs = tracker.track(color_image) polygon = np.array(outputs['polygon']).astype(np.int32) cv2.polylines(color_image, [polygon.reshape((-1, 1, 2))], True, (0, 255, 0), 3) mask = ((outputs['mask'] > cfg.TRACK.MASK_THERSHOLD) * 255) mask = mask.astype(np.uint8) mask = np.stack([mask, mask*255, mask]).transpose(1, 2, 0) color_image = cv2.addWeighted(color_image, 0.77, mask, 0.23, -1) bbox = list(map(int, outputs['bbox'])) poi_pixel = [int(bbox[0]+0.5*bbox[2]), int(bbox[1]+0.5*bbox[3])] poi_depth = depth_frame.get_distance(poi_pixel[0], poi_pixel[1]) poi_rs = rs.rs2_deproject_pixel_to_point(depth_intrinsics, poi_pixel, poi_depth) print("Object 3D position w.r.t. camera frame: {}".format(poi_rs)) if not np.allclose(poi_rs, np.zeros(3)): # compute transformed position of poi w.r.t. iiwa_link_0 transfrom = iiwa.tf_listener.getLatestCommonTime('/iiwa_link_0', '/rs_d435') pos_rs = PoseStamped() pos_rs.header.frame_id = 'rs_d435' pos_rs.pose.orientation.w = 1. pos_rs.pose.position.x = poi_rs[0] pos_rs.pose.position.y = poi_rs[1] pos_rs.pose.position.z = poi_rs[2] pos_iiwa = iiwa.tf_listener.transformPose('/iiwa_link_0', pos_rs) rospy.loginfo("Object 3D position w.r.t. iiwa base from: {}\n ee w.r.t. iiwa base: {}".format(pos_iiwa.pose.position, iiwa.cartesian_pose.position)) # set cartesian goal iiwa.goal_carte_pose.header.frame_id = 'iiwa_link_0' iiwa.goal_carte_pose.pose.position.x = X iiwa.goal_carte_pose.pose.position.y = np.clip(pos_iiwa.pose.position.y, Y_MIN, Y_MAX) iiwa.goal_carte_pose.pose.position.z = np.clip(pos_iiwa.pose.position.z, Z_MIN, Z_MAX) iiwa.goal_carte_pose.pose.orientation = QUAT iiwa.move_cartesian(cartesian_pose=iiwa.goal_carte_pose) # display image stream, press 'ESC' or 'q' to terminate cv2.imshow(video_name, color_image) key = cv2.waitKey(40) if key in (27, ord("q")): break time.sleep(4) iiwa.move_joint(joint_position=JOINT_PERCH) time.sleep(4) pipeline.stop() rospy.loginfo("Finished")
if __name__ == '__main__': # load config cfg.merge_from_file(args.config) cfg.CUDA = torch.cuda.is_available() device = torch.device('cuda' if cfg.CUDA else 'cpu') # create model model = ModelBuilder() # load model model.load_state_dict( torch.load(args.snapshot, map_location=lambda storage, loc: storage.cpu())) model.eval().to(device) # build tracker tracker = build_tracker(model) # init_frame = np.load('../chainer-pysot/init.npz')['frame'] # init_rect = np.load('../chainer-pysot/init.npz')['init_rect'] # second_frame = np.load('../chainer-pysot/output.npz')['second_frame'] # tracker.init(init_frame, init_rect) # np.savez('../chainer-pysot/init.npz', # frame=init_frame, zfs=[f.detach().cpu().numpy() for f in tracker.model.zf], init_rect=init_rect) # tracker.track(second_frame) # raise ValueError first_frame = True if args.video_name:
def main(): cfg.merge_from_file("experiments/siamrpn_mobilev2_l234_dwxcorr/config.yaml") device = torch.device("cuda") model = ModelBuilder() model.load_state_dict( torch.load("experiments/siamrpn_mobilev2_l234_dwxcorr/model.pth", map_location=lambda storage, loc: storage.cpu())) model.eval().to(device) tracker = build_tracker(model) frame = cv2.imread("image/benchmark_5.jpg") bbox = (280, 80, 200, 270) """ This part is inhirited from tracker.init method, trying to serialize tracker.template """ # tracker.init(frame, bbox) center_pos = np.array([bbox[0]+(bbox[2]-1)/2, bbox[1]+(bbox[3]-1)/2]) size = np.array([bbox[2], bbox[3]]) w_z = size[0] + 0.5 * np.sum(size) h_z = size[1] + 0.5 * np.sum(size) s_z = round(np.sqrt(w_z * h_z)) s_x = 255 / 127 * s_z # calculate channel average channel_average = np.mean(frame, axis=(0, 1)) z_crop = tracker.get_subwindow(frame, center_pos, 127, s_z, channel_average) x_crop = tracker.get_subwindow(frame, center_pos, 255, s_x, channel_average) ############################################################################# # a new script model inhereted from template class ArcTemplate(torch.nn.Module): def __init__(self): super(ArcTemplate, self).__init__() self.backbone = get_backbone(cfg.BACKBONE.TYPE, **cfg.BACKBONE.KWARGS) self.neck = get_neck(cfg.ADJUST.TYPE, **cfg.ADJUST.KWARGS) self.rpn_head = get_rpn_head(cfg.RPN.TYPE, **cfg.RPN.KWARGS) def forward(self, z_crop): return self.neck(self.backbone(z_crop)) arc = ArcTemplate() arc.load_state_dict( torch.load("experiments/siamrpn_mobilev2_l234_dwxcorr/model.pth", map_location=lambda storage, loc: storage.cpu())) arc.eval().to(device) zrf = arc(z_crop) for z in zrf: print(z.shape) torch.jit.trace(arc, z_crop).save("archine.pt") ############################################################################# # a new script model inhereted from track class FrostTemplate(torch.nn.Module): # the frost is the combination of backbone/neck/rpn_head network def __init__(self): super(FrostTemplate, self).__init__() self.backbone = get_backbone(cfg.BACKBONE.TYPE, **cfg.BACKBONE.KWARGS) self.neck = get_neck(cfg.ADJUST.TYPE, **cfg.ADJUST.KWARGS) self.rpn_head = get_rpn_head(cfg.RPN.TYPE, **cfg.RPN.KWARGS) def forward(self, z, x): zf = self.neck(self.backbone(z)) xf = self.neck(self.backbone(x)) print("zf shape:\n", zf[0].shape, "\t", zf[1].shape, "\t", zf[2].shape) print("xf shape:\n", xf[0].shape, "\t", xf[1].shape, "\t", xf[2].shape) return self.rpn_head(zf, xf) fro = FrostTemplate() fro.load_state_dict( torch.load("experiments/siamrpn_mobilev2_l234_dwxcorr/model.pth", map_location=lambda storage, loc: storage.cpu())) fro.eval().to(device) res = fro(z_crop, x_crop) torch.jit.trace(fro, (z_crop, x_crop)).save("frost.pt") ############################################################################# # the rpn head model class HeadTemplate(torch.nn.Module): def __init__(self): super(HeadTemplate, self).__init__() self.backbone = get_backbone(cfg.BACKBONE.TYPE, **cfg.BACKBONE.KWARGS) self.neck = get_neck(cfg.ADJUST.TYPE, **cfg.ADJUST.KWARGS) self.rpn_head = get_rpn_head(cfg.RPN.TYPE, **cfg.RPN.KWARGS) def forward(self, z, x): return self.rpn_head(z, x) hed = HeadTemplate() hed.load_state_dict( torch.load("experiments/siamrpn_mobilev2_l234_dwxcorr/model.pth", map_location=lambda storage, loc: storage.cpu())) hed.eval().to(device) zeta = hed(zrf, zrf) # the model works. However, we don't know how to save them into script model ############################################################################# # visualization cv2.rectangle(frame, (280, 80), (480, 350), (0, 0, 255), 2) cv2.imshow("_", frame) cv2.waitKey(0)
def main(): # Initialize ecci sdk and connect to the broker in edge-cloud ecci_client = Client() mqtt_thread = threading.Thread(target=ecci_client.initialize) mqtt_thread.start() ecci_client.wait_for_ready() print('edge start --------') # load config cfg.merge_from_file(args.config) cfg.CUDA = torch.cuda.is_available() and cfg.CUDA device = torch.device('cuda' if cfg.CUDA else 'cpu') # create model model = ModelBuilder() # load model checkpoint = torch.load(args.snapshot) model.load_state_dict(checkpoint) for param in model.parameters(): param.requires_grad = False model.eval().to(device) #multiprocessing manager = mp.Manager() resQueue = manager.Queue() multiProcess = [] label = [] probs = [] for i in range(10): multiProcess.append(build_multitracker(model, label, probs, resQueue)) multiProcess[i].start() first_frame = True image_files = sorted(glob.glob('./test/image/*.JPEG')) for f, image_file in enumerate(image_files): frame = cv2.imread(image_file) if first_frame: # keyframe need to be uploaded to cloud print('first frame') key_frame = frame payload = {"type": "data", "contents": {"frame": frame}} print("####################", payload) ecci_client.publish(payload, "cloud") cloud_data = ecci_client.get_sub_data_payload_queue().get() print("###########recieve data from cloud", cloud_data) bbox = cloud_data["bbox"] label = cloud_data["label"] probs = cloud_data["probs"] num_process = len(bbox) t_detect_start = time.time() for i in range(num_process): init_rect = [ bbox[i][0], bbox[i][1], bbox[i][2] - bbox[i][0], bbox[i][3] - bbox[i][1] ] multiProcess[i].init(frame, init_rect, label[i], probs[i]) cv2.rectangle(frame, (int(bbox[i][0]), int(bbox[i][1])), (int(bbox[i][2]), int(bbox[i][3])), (0, 0, 255), 3) cv2.putText(frame, '%s: %.3f' % (label[i], probs[i]), (bbox[i][0], bbox[i][1] - 15), cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 255), thickness=1) t_detect_end = time.time() print("detect fps : ", 1 / (t_detect_end - t_detect_start)) first_frame = False index = 1 elif index % 10 == 0: if is_key(key_frame, frame) or index % 20 == 0: # keyframe need to be uploaded to cloud ##### outputs, time ###### print('key frame') key_frame = frame payload = {"type": "data", "contents": {"frame": frame}} print("####################", payload) ecci_client.publish(payload, "cloud") cloud_data = ecci_client.get_sub_data_payload_queue().get() print("###########recieve data from cloud", cloud_data) bbox = cloud_data["bbox"] label = cloud_data["label"] probs = cloud_data["probs"] num_process = len(bbox) t_detect_start = time.time() for i in range(num_process): init_rect = [ bbox[i][0], bbox[i][1], bbox[i][2] - bbox[i][0], bbox[i][3] - bbox[i][1] ] multiProcess[i].init(frame, init_rect, label[i], probs[i]) cv2.rectangle(frame, (int(bbox[i][0]), int(bbox[i][1])), (int(bbox[i][2]), int(bbox[i][3])), (0, 0, 255), 3) cv2.putText(frame, '%s: %.3f' % (label[i], probs[i]), (bbox[i][0], bbox[i][1] - 15), cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 255), thickness=1) t_detect_end = time.time() print("detect fps : ", 1 / (t_detect_end - t_detect_start)) index = 1 else: print('non-key frame') t_track_start = time.time() for i in range(num_process): multiProcess[i].track(frame) t_track_end = time.time() print("track fps : ", 1 / (t_track_end - t_track_start)) for i in range(num_process): resDict = resQueue.get() print(resDict) bbox = list(map(int, resDict['bbox'])) cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 255, 0), 3) cv2.putText(frame, '%s: %.3f' % (label[i], probs[i]), (bbox[0], bbox[1] - 15), cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 255, 0), thickness=1) index += 1 else: print('non-key frame') t_track_start = time.time() for i in range(num_process): multiProcess[i].track(frame) t_track_end = time.time() print("track fps : ", 1 / (t_track_end - t_track_start)) for i in range(num_process): resDict = resQueue.get() print(resDict) bbox = list(map(int, resDict['bbox'])) cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 255, 0), 3) cv2.putText(frame, '%s: %.3f' % (label[i], probs[i]), (bbox[0], bbox[1] - 15), cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 255, 0), thickness=1) index += 1 cv2.imwrite('./test/output/%s.jpg' % f, frame) for i in range(10): multiProcess.append(build_multitracker(model, label, probs, resQueue)) multiProcess[i].join()
default='experiments/siammaske_r50_l3/config.yaml', help='config file') parser.add_argument('--snapshot', type=str, default='experiments/siammaske_r50_l3/model.pth', help='model name') args = parser.parse_args() cfg.merge_from_file(args.config) cfg.CUDA = torch.cuda.is_available() and cfg.CUDA # create model model = ModelBuilder() # load model model.load_state_dict( torch.load(args.snapshot, map_location=lambda storage, loc: storage.cpu())) model.eval().cuda() reset = 1 temp_path = 'updatenet_lasot_dataset' if not os.path.isdir(temp_path): os.makedirs(temp_path) video_path = '../LaSOT/' category = os.listdir(video_path) category.sort() template_acc = None template_cur = None template_gt = None init0 = [] init = []
def main(): # load config cfg.merge_from_file(args.config) cfg.CUDA = torch.cuda.is_available() device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # create model model = ModelBuilder() # load model model.load_state_dict(torch.load(args.snapshot, map_location=device)) model.eval().to(device) # build tracker tracker = build_tracker(model) first_frame = True if args.video_name: video_name = args.video_name.split('/')[-1].split('.')[0] else: video_name = 'webcam' cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN) last_frame = None last_bbox = None for frame in get_frames(args.video_name): if first_frame: try: init_rect = cv2.selectROI(video_name, frame, False, False) except: exit() bbox = init_rect print(init_rect) tracker.init(frame, init_rect) first_frame = False else: outputs = tracker.track(frame) score = outputs["best_score"] if score < 0.95: tracker.init(last_frame, last_bbox) continue print(outputs["best_score"]) if 'polygon' in outputs: polygon = np.array(outputs['polygon']).astype(np.int32) cv2.polylines(frame, [polygon.reshape((-1, 1, 2))], True, (0, 255, 0), 3) mask = ((outputs['mask'] > cfg.TRACK.MASK_THERSHOLD) * 255) mask = mask.astype(np.uint8) mask = np.stack([mask, mask * 255, mask]).transpose(1, 2, 0) frame = cv2.addWeighted(frame, 0.77, mask, 0.23, -1) else: bbox = list(map(int, outputs['bbox'])) cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 255, 0), 3) last_frame = frame last_bbox = bbox cv2.imshow(video_name, frame) cv2.waitKey(40)
def main(): torch.cuda.set_device(args.gpu_id) model_dir = "./experiments/siamrpn_r50_l234_dwxcorr/model.pth" model_config = "./experiments/siamrpn_r50_l234_dwxcorr/config.yaml" if os.path.isfile(model_dir): print("model file {} found".format(model_dir)) else: print("model files not found, starting download".format(model_dir)) os.system( "gdown https://drive.google.com/uc?id=1-tEtYQdT1G9kn8HsqKNDHVqjE16F8YQH") os.system("mv model.pth ./experiments/siamrpn_r50_l234_dwxcorr") # load config cfg.merge_from_file(model_config) cfg.CUDA = torch.cuda.is_available() and cfg.CUDA device = torch.device('cuda' if cfg.CUDA else 'cpu') # create model model = ModelBuilder() # load model model.load_state_dict(torch.load(model_dir, map_location=lambda storage, loc: storage.cpu())) model.eval().to(device) # create an unique identifier worker_id = uuid.uuid4() # build tracker tracker = build_tracker(model) # Socket to talk to server context = zmq.Context() sub_socket = context.socket(zmq.SUB) # set up frame listening socket sub_socket.connect("tcp://{}:5556".format(args.server_ip)) sub_socket.setsockopt_string(zmq.SUBSCRIBE, "frame_") sub_socket.setsockopt_string(zmq.SUBSCRIBE, str(worker_id)) # setup push socket context = zmq.Context() push_socket = context.socket(zmq.PUSH) push_socket.connect("tcp://{}:5557".format(args.server_ip)) # event monitoring # used to register worker once connection is established EVENT_MAP = {} for name in dir(zmq): if name.startswith('EVENT_'): value = getattr(zmq, name) EVENT_MAP[value] = name # monitor thread function def event_monitor(monitor): while monitor.poll(): evt = recv_monitor_message(monitor) evt.update({'description': EVENT_MAP[evt['event']]}) if evt['event'] == zmq.EVENT_HANDSHAKE_SUCCEEDED: push_socket.send_json( {"type": "REGISTER", "id": str(worker_id)}) if evt['event'] == zmq.EVENT_MONITOR_STOPPED: break monitor.close() # register monitor monitor = sub_socket.get_monitor_socket() t = threading.Thread(target=event_monitor, args=(monitor,)) t.start() support = None try: while True: # wait for next message _ = sub_socket.recv() md = sub_socket.recv_json() if md['type'] == 'FRAME': msg = sub_socket.recv() buf = memoryview(msg) frame = np.frombuffer( buf, dtype=md['dtype']).reshape(md['shape']) if support is None: continue outputs = tracker.track(frame) bbox = list(map(int, outputs['bbox'])) # send result push_socket.send_json( { "type": "TRACK", "bbox": bbox, "score": outputs['best_score'].tolist(), "time": md['time'], "id": str(worker_id) }) print('message: {}'.format(md['time']), end='\r') elif md['type'] == 'SUPPORT': frame_raw = md['data']['img'] # base 64 png image frame = np.array( Image.open( io.BytesIO( base64.b64decode(frame_raw) ) ).convert('RGB'))[:, :, ::-1] bbox = [int(float(i)) for i in md['data']['bbox'].split(",")] tracker.init(frame, bbox) support = (frame, bbox) print('Support received, tracking will now start') elif md['type'] == 'LOCATION': # make sure tracker has been initalized if support is not None: center_pos = np.array(md['data']) tracker.update(center_pos) elif md['type'] == 'PING': push_socket.send_json({"type": "PONG", "id": str(worker_id)}) else: print('Invalid message type received: {}'.format(md['type'])) except KeyboardInterrupt: print('Exiting... notifying server of disconnect') push_socket.send_json( {"type": "FIN", "id": str(worker_id)}) # wait for the server to respond or let the user forcefully close print("Waiting for server response. Press CTRL+C again to forcefully close") while True: _ = sub_socket.recv() md = sub_socket.recv_json() if md['type'] == "FIN": print('Server responded, now exiting') exit(0) elif md['type'] == "FRAME": # we have to accept the incoming frame to properly accept future messages msg = sub_socket.recv()
result_mask_img = np.zeros((1280, 720, 3), dtype=np.uint8) result_bbox_img = np.zeros((1280, 720, 3), dtype=np.uint8) result_mask = np.zeros((1280, 720), dtype=np.uint8) pysot_img = np.zeros((1280, 720, 3), dtype=np.uint8) mask_rcnn_flag = 0 pysot_mask = np.zeros((1280, 720), dtype=np.uint8) pysot_contour_img = np.zeros((1280, 720, 3), dtype=np.uint8) cfg.merge_from_file('config.yaml') cfg.CUDA = torch.cuda.is_available() device = torch.device('cuda' if cfg.CUDA else 'cpu') model_pysot = ModelBuilder() tracker = build_tracker(model_pysot) model_pysot.load_state_dict( torch.load('model.pth', map_location=lambda storage, loc: storage.cpu())) model_pysot.eval().to(device) def run_maskrcnn(): global color_img global result_mask_img global result_bbox_img global result_mask global mask_rcnn_flag global inds_len while 1: mask_rcnn_flag = 1 result = inference_detector(model, color_img) result_mask_img, result_bbox_img, result_mask = show_result( color_img, result, model.CLASSES) #print(result)
def ObjectTracking(): # load config cfg.merge_from_file(args.config) cfg.CUDA = torch.cuda.is_available() device = torch.device('cuda' if cfg.CUDA else 'cpu') # create model model = ModelBuilder() # load model model.load_state_dict( torch.load(args.snapshot, map_location=lambda storage, loc: storage.cpu())) model.eval().to(device) # build tracker tracker = build_tracker(model) # parameters init if args.video_name: video_name = args.video_name.split('/')[-1].split('.')[0] else: video_name = 'webcam' cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN) first_frame = True cnt_ = 0 pre_frame = 0 pre_rect = 0 points = [] # main loop for getting frame and tracking object for frame in get_frames(args.video_name): if first_frame: try: # to select object init_rect = cv2.selectROI(video_name, frame, False, False) pre_rect = init_rect except: exit() # init model tracker.init(frame, init_rect) first_frame = False else: cnt_ += 1 # make prediction outputs = tracker.track(frame) if outputs['best_score'] > 0.6: pre_frame = frame if 'polygon' in outputs: polygon = np.array(outputs['polygon']).astype(np.int32) cv2.polylines(frame, [polygon.reshape((-1, 1, 2))], True, (0, 255, 0), 3) #make mask mask = ((outputs['mask'] > cfg.TRACK.MASK_THERSHOLD) * 255) mask = mask.astype(np.uint8) mask = np.stack([mask, mask * 255, mask]).transpose(1, 2, 0) frame = cv2.addWeighted(frame, 0.77, mask, 0.23, -1) else: bbox = list(map(int, outputs['bbox'])) pre_rect = bbox cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 255, 0), 3) else: # re-init model using previous location tracker.init(pre_frame, pre_rect) # draw moving path if args.draw_moving_path and len(polygon) != 0: points = draw_moving_path(polygon, points, frame) ## save frame as JPEG file if args.saveframe and os.path.isdir(args.framepath): fullpath = args.framepath + "frame{0:0>3}.jpg".format(cnt_) cv2.imwrite(fullpath, frame) cv2.imshow(video_name, frame) # may need to adjust based on your hardware cv2.waitKey(20)