class tsn_classifier: def __init__(self): global mypath # services provided self.reconfig_srv_ = rospy.Service('reconf_split',split, self.reconfig_srv) self.start_vidscores = rospy.Service('start_vidscores', Empty, self.start_vidscores) self.stop_vidscores = rospy.Service('stop_vidscores', Empty, self.stop_vidscores) # topics published # self.image_pub = rospy.Publisher("class_overlay_image_raw",Image, queue_size=1) # self.label_fw_pub = rospy.Publisher("action_fw", String, queue_size=1) # self.label_pub = rospy.Publisher("action", String, queue_size=1) # self.ownlabel_pub = rospy.Publisher("action_own", String, queue_size=1) # parameters self.dataset = rospy.get_param('~dataset','hmdb51') self.device_id = rospy.get_param('~device_id',0) self.split = rospy.get_param('~split',1) self.step = rospy.get_param('~step',6) # this should actually be # step = (frame_cnt - stack_depth) / (args.num_frame_per_video-1) # it will change depending on the action length, a value I don't have if I am classifying real time, but that I could get if I am doing it by service calls! self.stack_depth = rospy.get_param('~stack_depth',5) # stack_depth is 1 for rgb and 5 for flows. I am letting it be 5 to test creating an array of cv_images self.classwindow = rospy.get_param('~classification_frame_window',50) #whatswrong = (rospy.resolve_name('~action_list')) #rospy.spin() self.actionlist = rosparam.get_param(rospy.resolve_name('~action_list')) #"['brush_hair','cartwheel','catch','chew','clap','climb','climb_stairs','dive','draw_sword','dribble','drink','eat','fall_floor','fencing','flic_flac','golf','handstand','hit','hug','jump','kick','kick_ball','kiss','laugh','pick','pour','pullup','punch','push','pushup','ride_bike','ride_horse','run','shake_hands','shoot_ball','shoot_bow','shoot_gun','sit','situp','smile','smoke','somersault','stand','swing_baseball','sword','sword_exercise','talk','throw','turn','walk','wave']") #if type(self.actionlist) is str: # self.actionlist = eval(self.actionlist) self.actionlist.sort() self.chooselist = rosparam.get_param(rospy.resolve_name('~choose_list')) ## I must be doing something wrong here for this name not to be resolved. maybe it is because each node here should probably have its own init_node and it doesn't #if type(self.chooselist) is str: # self.chooselist = eval(self.chooselist) self.chooselist.sort() ###probably should use the nice rosparam thingy here to avoid these problems... self.framesize_width = rospy.get_param('~framesize_width',340) self.framesize_height = rospy.get_param('~framesize_height',256) # topics subscribed self.image_sub = rospy.Subscriber('video_topic', Image,self.callback,queue_size=1) # internals self.bridge = CvBridge() from pyActionRecog.utils.video_funcs import default_aggregation_func if self.chooselist: keepi = [] rospy.logwarn('defined own subset of actions! classification will be reduced to smaller set of choices, namely:'+str(self.chooselist)) #print(range(0,len(self.actionlist))) for i in range(0,len(self.actionlist)): for j in range(0, len(self.chooselist)): #print(self.actionlist[i]) #print( self.chooselist[j]) if self.actionlist[i] == self.chooselist[j]: keepi.append(i) tobedeleted = set(range(0,len(self.actionlist)))-set(keepi) #print(tobedeleted) self.defprox = lambda x: np.delete(default_aggregation_func(x),list(tobedeleted)) self.actionlist = self.chooselist else: rospy.logwarn('No choose_list defined. Will classify within the whole set. ') self.defprox = default_aggregation_func self.frame_scores = [] self.prototxt = mypath+'/models/'+ self.dataset +'/tsn_bn_inception_rgb_deploy.prototxt' self.caffemodel = mypath+'/models/'+ self.dataset +'_split_'+str(self.split)+'_tsn_rgb_reference_bn_inception.caffemodel' self.net = CaffeNet(self.prototxt, self.caffemodel, self.device_id) self.ownvidscores = [] # when I instantiate the classifier, the startedownvid is working already. this influences how vsmf_srv will behave, so it needs to be like this, I think. self.startedownvid = True self.lock = threading.Lock() #publishers self.label_fw_pub = FunnyPublisher("action_fw", self.actionlist, self.defprox) self.label_pub = FunnyPublisher("action", self.actionlist, self.defprox) self.ownlabel_pub = FunnyPublisher("action_own", self.actionlist, self.defprox) rospy.set_param('~alive',0.5) rospy.loginfo("waiting for callback from " +rospy.resolve_name('video_topic') +" to do anything") def start_vidscores(self,req): # I will need to use locks here, I think... with self.lock: self.startedownvid = True rospy.logwarn("Started classifying own vid!") return [] def stop_vidscores(self,req): # I will need to use locks here, I think... with self.lock: self.startedownvid = False if self.ownvidscores: self.ownlabel_pub.pub(self.ownvidscores) else: rospy.logerr('ownvidscores is empty!!!!!!!!!!!!!!! are we locking for too long?') self.ownvidscores = [] rospy.logdebug("published the label for the own video version!") rospy.logwarn("stopped classifying own vid") return [] def callback(self,data): rospy.logdebug("reached callback. that means I can read the Subscriber!") rospy.set_param('~alive',1) try: cv_image = self.bridge.imgmsg_to_cv2(data, "bgr8") except CvBridgeError as e: print(e) scores = self.net.predict_single_frame([cv_image,], 'fc-action', frame_size=(self.framesize_width, self.framesize_height)) #print((scores)) #this publishes the instant time version, aka, per frame self.label_pub.pub([scores]) rospy.logdebug("published the label for instant time version!") #this part publishes the frame_window version self.frame_scores.append(scores) if len(self.frame_scores)>self.classwindow: self.frame_scores.pop(0) self.label_fw_pub.pub(self.frame_scores) rospy.logdebug("published the label for the frame window version!") self.lock.acquire() if self.startedownvid: self.ownvidscores.append(scores) else: rospy.logdebug_throttle(20,"waiting for start_vidscores call to start classifying ownvid") self.lock.release() # try: # self.image_pub.publish(self.bridge.cv2_to_imgmsg(cv_image, "bgr8")) # rospy.logdebug("published image") # except CvBridgeError as e: # print(e) def reconfig_srv(self, req): # why not use standard ros reconfigure stds? self.image_sub.unregister() self.split = req.Split rospy.loginfo("reading split:"+str(req.Split)) #print(req.Split) self.caffemodel = mypath+'/models/'+ self.dataset +'_split_'+str(self.split)+'_tsn_rgb_reference_bn_inception.caffemodel' self.net = CaffeNet(self.prototxt, self.caffemodel, self.device_id) self.image_sub = rospy.Subscriber(self.videotopic, Image,self.callback,queue_size=1) #print('Dum') return []
class tsn_classifier: def __init__(self): global mypath # services provided self.reconfig_srv_ = rospy.Service('reconf_split',split, self.reconfig_srv) self.start_vidscores = rospy.Service('start_vidscores', Empty, self.start_vidscores) self.stop_vidscores = rospy.Service('stop_vidscores', Empty, self.stop_vidscores) # topics published self.image_pub = rospy.Publisher("class_overlay_image_raw",Image, queue_size=1) self.label_fw_pub = rospy.Publisher("action_fw", String, queue_size=1) self.label_pub = rospy.Publisher("action", String, queue_size=1) self.ownlabel_pub = rospy.Publisher("action_own", String, queue_size=1) # parameters self.dataset = rospy.get_param('~dataset','hmdb51') self.device_id = rospy.get_param('~device_id',0) self.split = rospy.get_param('~split',1) self.videotopic = rospy.get_param('~video_topic','videofiles/image_raw') self.classwindow = rospy.get_param('~classification_frame_window',50) self.actionlist = rospy.get_param('~action_list', ['brush_hair','cartwheel','catch','chew','clap','climb','climb_stairs','dive','draw_sword','dribble','drink','eat','fall_floor','fencing','flic_flac','golf','handstand','hit','hug','jump','kick','kick_ball','kiss','laugh','pick','pour','pullup','punch','push','pushup','ride_bike','ride_horse','run','shake_hands','shoot_ball','shoot_bow','shoot_gun','sit','situp','smile','smoke','somersault','stand','swing_baseball','sword','sword_exercise','talk','throw','turn','walk','wave']) if type(self.actionlist) is str: self.actionlist = eval(self.actionlist) self.actionlist.sort() self.chooselist = rospy.get_param('~choose_list',[]) if type(self.chooselist) is str: self.chooselist = eval(self.chooselist) self.chooselist.sort() ###probably should use the nice rosparam thingy here to avoid these problems... self.framesize_width = rospy.get_param('~framesize_width',340) self.framesize_height = rospy.get_param('~framesize_height',256) # topics subscribed self.image_sub = rospy.Subscriber(self.videotopic, Image,self.callback,queue_size=1) # internals self.bridge = CvBridge() from pyActionRecog.utils.video_funcs import default_aggregation_func if self.chooselist: keepi = [] rospy.logwarn('defined own subset of actions! classification will be reduced to smaller set of choices, namely:'+str(self.chooselist)) #print(range(0,len(self.actionlist))) for i in range(0,len(self.actionlist)): for j in range(0, len(self.chooselist)): #print(self.actionlist[i]) #print( self.chooselist[j]) if self.actionlist[i] == self.chooselist[j]: keepi.append(i) tobedeleted = set(range(0,len(self.actionlist)))-set(keepi) #print(tobedeleted) self.defprox = lambda x: np.delete(default_aggregation_func(x),list(tobedeleted)) self.actionlist = self.chooselist else: rospy.logwarn('No choose_list defined. Will classify within the whole set. ') self.defprox = default_aggregation_func self.frame_scores = [] self.prototxt = mypath+'/models/'+ self.dataset +'/tsn_bn_inception_rgb_deploy.prototxt' self.caffemodel = mypath+'/models/'+ self.dataset +'_split_'+str(self.split)+'_tsn_rgb_reference_bn_inception.caffemodel' self.net = CaffeNet(self.prototxt, self.caffemodel, self.device_id) self.font = cv2.FONT_HERSHEY_SIMPLEX #print('hio') self.ownvidscores = [] # when I instantiate the classifier, the startedownvid is working already. this influences how vsmf_srv will behave, so it needs to be like this, I think. self.startedownvid = True self.lock = threading.Lock() rospy.loginfo("waiting for callback from " + self.videotopic +" to do anything") def start_vidscores(self,req): # I will need to use locks here, I think... self.lock.acquire() self.startedownvid = True rospy.logwarn("Started classifying own vid!") self.lock.release() return [] def stop_vidscores(self,req): # I will need to use locks here, I think... self.lock.acquire() self.startedownvid = False if self.ownvidscores: currown = self.actionlist[np.argmax(self.defprox(self.ownvidscores))] self.ownlabel_pub.publish(String(currown)) else: rospy.logerr('ownvidscores is empty!!!!!!!!!!!!!!! are we locking for too long?') self.ownvidscores = [] rospy.logdebug("published the label for the own video version!") rospy.logwarn("stopped classifying own vid") self.lock.release() return [] def callback(self,data): rospy.logdebug("reached callback. that means I can read the Subscriber!") try: cv_image = self.bridge.imgmsg_to_cv2(data, "bgr8") except CvBridgeError as e: print(e) #(rows,cols,channels) = cv_image.shape #if cols > 60 and rows > 60 : #cv2.circle(cv_image, (50,50), 10, 255) #TODO: frame_size should be a parameter scores = self.net.predict_single_frame([cv_image,], 'fc-action', frame_size=(self.framesize_width, self.framesize_height)) #print((scores)) #this publishes the instant time version, aka, per frame self.label_pub.publish(String(self.actionlist[np.argmax(self.defprox([scores]))])) rospy.logdebug("published the label for instant time version!") #this part publishes the frame_window version self.frame_scores.append(scores) if len(self.frame_scores)>self.classwindow: curract_fw = self.actionlist[np.argmax(self.defprox(self.frame_scores))] cv2.putText(cv_image,curract_fw,(10,226), self.font, 1,(255,255,255),2) self.frame_scores.pop(0) self.label_fw_pub.publish(String(curract_fw)) rospy.logdebug("published the label for the frame window version!") self.lock.acquire() if self.startedownvid: self.ownvidscores.append(scores) else: rospy.logdebug_throttle(20,"waiting for start_vidscores call to start classifying ownvid") self.lock.release() try: self.image_pub.publish(self.bridge.cv2_to_imgmsg(cv_image, "bgr8")) rospy.logdebug("published image") except CvBridgeError as e: print(e) def reconfig_srv(self, req): self.image_sub.unregister() self.split = req.Split rospy.loginfo("reading split:"+str(req.Split)) #print(req.Split) self.caffemodel = mypath+'/models/'+ self.dataset +'_split_'+str(self.split)+'_tsn_rgb_reference_bn_inception.caffemodel' self.net = CaffeNet(self.prototxt, self.caffemodel, self.device_id) self.image_sub = rospy.Subscriber(self.videotopic, Image,self.callback,queue_size=1) #print('Dum') return []
class tsn_classifier: def __init__(self): global mypath # services provided self.reconfig_srv_ = rospy.Service('reconf_split', split, self.reconfig_srv) self.start_vidscores = rospy.Service('start_vidscores', Empty, self.start_vidscores) self.stop_vidscores = rospy.Service('stop_vidscores', Empty, self.stop_vidscores) # topics published self.scores_pub = rospy.Publisher("scores", Float32MultiArray, queue_size=1) self.score_fw_pub = rospy.Publisher("scores_fw", caffe_tsn_ros.msg.ScoreArray, queue_size=1) # self.label_pub = rospy.Publisher("action", String, queue_size=1) self.ownlabel_pub = rospy.Publisher("scores_own", caffe_tsn_ros.msg.ScoreArray, queue_size=1) # parameters self.dataset = rospy.get_param('~dataset', 'hmdb51') self.device_id = rospy.get_param('~device_id', 0) self.split = rospy.get_param('~split', 1) self.rgbOrFlow = rospy.get_param('~classifier_type') self.clear_fw_after_new_vid = rospy.get_param( '~clear_fw_after_new_vid', True) self.step = rospy.get_param('~step', 6) # this should actually be # step = (frame_cnt - stack_depth) / (args.num_frame_per_video-1) # it will change depending on the action length, a value I don't have if I am classifying real time, but that I could get if I am doing it by service calls! # stack_depth is 1 for rgb and 5 for flows. I am letting it be 5 to test creating an array of cv_images self.stack_depth = rospy.get_param('~stack_depth', 5) self.stack_count = 0 self.cv_image_stack = [] self.classwindow = rospy.get_param('~classification_frame_window', 'undefined') assert not self.classwindow == 'undefined' ###probably should use the nice rosparam thingy here to avoid these problems... self.framesize_width = rospy.get_param('~framesize_width', 340) self.framesize_height = rospy.get_param('~framesize_height', 256) # topics subscribed self.image_sub = rospy.Subscriber('video_topic', Image, self.callback, queue_size=1) # internals self.bridge = CvBridge() self.prototxt = mypath + '/models/' + self.dataset + '/tsn_bn_inception_' + self.rgbOrFlow + '_deploy.prototxt' self.caffemodel = mypath + '/models/' + self.dataset + '_split_' + str( self.split ) + '_tsn_' + self.rgbOrFlow + '_reference_bn_inception.caffemodel' rospy.loginfo("loading prototxt {}".format(self.prototxt)) rospy.loginfo("loading caffemodel {}".format(self.caffemodel)) self.net = CaffeNet(self.prototxt, self.caffemodel, self.device_id) self.frame_scores = caffe_tsn_ros.msg.ScoreArray() self.ownvidscores = caffe_tsn_ros.msg.ScoreArray() # when I instantiate the classifier, the startedownvid is working already. this influences how vsmf_srv will behave, so it needs to be like this, I think. self.startedownvid = True self.lock = threading.Lock() self.published_fw = False rospy.set_param('~alive', 0.5) rospy.loginfo("waiting for callback from " + rospy.resolve_name('video_topic') + " to do anything") def start_vidscores(self, req): # I will need to use locks here, I think... with self.lock: self.startedownvid = True self.published_fw = False rospy.loginfo("Started classifying own vid!") return [] def stop_vidscores(self, req): # I will need to use locks here, I think... with self.lock: self.startedownvid = False if self.ownvidscores: #### I am going to publish now a set of matrices, right? self.ownlabel_pub.publish(self.ownvidscores) rospy.logdebug("length of ownvidscores:%d" % (len(self.ownvidscores.data))) #pass else: rospy.logerr( 'ownvidscores is empty!!!!!!!!!!!!!!! are we locking for too long?' ) self.ownvidscores = caffe_tsn_ros.msg.ScoreArray() if not self.published_fw: rospy.logwarn("did not publish a single fw for this video") rospy.logwarn("length of fw:%d" % (len(self.frame_scores.data))) if self.clear_fw_after_new_vid: self.frame_scores = caffe_tsn_ros.msg.ScoreArray() rospy.logdebug("published the label for the own video version!") rospy.loginfo("stopped classifying own vid") return [] def callback(self, data): rospy.logdebug( "reached callback. that means I can read the Subscriber!") rospy.set_param('~alive', 1) try: cv_image = self.bridge.imgmsg_to_cv2(data, "bgr8") except CvBridgeError as e: #print(e) rospy.logerr(e) #since I am not using stacks for rgb images, I can prevent from making the rgb version any slower by using an if statement here if self.rgbOrFlow == 'flow': ## and I want the combined flow version here, don't I? so I need to strip the frame apart into components. I think it is better than #self.cv_image_stack.append(cv_image) #this would be incorrect, i need to get each matrix and put it together in a stack. first x then y # from ros_flow_bg, I can see that x is the 0 component and y the 1 component. I hope bgr8 stays bgr8 and we don't flip things around here! self.cv_image_stack.append(cv_image[:, :, 0]) self.cv_image_stack.append(cv_image[:, :, 1]) # self.cv_image_stack.extend([cv_image[:,:,0], cv_image[:,:,1]]) if len( self.cv_image_stack ) > 2 * self.stack_depth: #keeps at most 2*self.stack_depth images in cv_image_stack, the 2 comes from the fact that we are using flow_x and flow_y self.cv_image_stack.pop(0) self.cv_image_stack.pop(0) if self.stack_count % self.step == 0: rospy.logdebug("reached execution part of callback!") self.stack_count = 0 ## we don't keep a large number here. scores = None ### i can maybe use a lambda to abstract this. is it faster than an if though? if self.rgbOrFlow == 'rgb': scores = self.net.predict_single_frame( [ cv_image, ], 'global_pool', frame_size=(self.framesize_width, self.framesize_height)) elif self.rgbOrFlow == 'flow' and len(self.cv_image_stack) == 10: scores = self.net.predict_single_flow_stack( self.cv_image_stack, 'global_pool', frame_size=(self.framesize_width, self.framesize_height)) #print(type(scores)) #print(scores.dtype) #scoremsg = caffe_tsn_ros.msg.Scores() #scoremsg.test = 'hello' #scoremsg.scores = scores #print(scoremsg) #scores = np.squeeze(scores) #scores = np.array([[[1.,2.],[3.,4.],[5.,6.]],[[11.,12.],[13.,14.],[15.,16.]]],dtype='float32') #self.scores_pub.publish(self.bridge.cv2_to_imgmsg(scores, '32FC1')) #self.scores_pub.publish(scoremsg) #print(self.scores_pub.get_num_connections()) #print((scores)) #print(np.shape(scores)) #print(scores.shape) if isinstance(scores, np.ndarray): #this publishes the instant time version, aka, per frame #self.label_pub.pub([scores]) ## Not sure if this MultiArrayDimension thing is working. In any ## case, it is already a LOT of data per frame; publishing the chunk ## at the end of the video, all at the same time would probably ## cause a lot of unnecessary waiting # TO CONSIDER: this is fast, so I think it doesn't matter, but I # believe the layout could be pre-set, since it doesn't change on a # frame by frame basis. myheader = Header() myheader.stamp = rospy.Time.now() scoresmsg = Float32MultiArray() scoresmsg.layout.dim = [] dims = np.array(scores.shape) scoresize = dims.prod() / float(scores.nbytes) for i in range(0, len(dims)): #print(i) scoresmsg.layout.dim.append(MultiArrayDimension()) scoresmsg.layout.dim[i].size = dims[i] scoresmsg.layout.dim[i].stride = dims[i:].prod( ) / scoresize scoresmsg.layout.dim[i].label = 'dim_%d' % i #print(scoresmsg.layout.dim[i].size) #print(scoresmsg.layout.dim[i].stride) scoresmsg.data = np.frombuffer(scores.tobytes(), 'float32') self.scores_pub.publish(scoresmsg) #rospy.logdebug("published the label for instant time version!") #this part publishes the frame_window version self.frame_scores.data.append(scoresmsg) #rospy.logwarn("FISHY: length of fw:%d"%(len(self.frame_scores.data))) #rospy.logwarn("FISHY: self.classwindow:%d"%self.classwindow) if len(self.frame_scores.data) > self.classwindow: #rospy.logwarn("FISHY: got into the loop") self.frame_scores.header = myheader self.frame_scores.data.pop(0) self.score_fw_pub.publish(self.frame_scores) #rospy.logwarn("FISHY: passed the publisher, so it must have worked?") with self.lock: self.published_fw = True ## it was not working without the lock rospy.logdebug( "published the label for the frame window version!") with self.lock: if self.startedownvid: self.ownvidscores.data.append(scoresmsg) self.ownvidscores.header = myheader #pass else: rospy.logdebug_throttle( 20, "waiting for start_vidscores call to start classifying ownvid" ) self.stack_count = self.stack_count + 1 # try: # self.image_pub.publish(self.bridge.cv2_to_imgmsg(cv_image, "bgr8")) # rospy.logdebug("published image") # except CvBridgeError as e: # print(e) def reconfig_srv(self, req): # why not use standard ros reconfigure stds? self.image_sub.unregister() self.split = req.Split rospy.loginfo("reading split:" + str(req.Split)) #print(req.Split) self.caffemodel = mypath + '/models/' + self.dataset + '_split_' + str( self.split ) + '_tsn_' + self.rgbOrFlow + '_reference_bn_inception.caffemodel' self.net = CaffeNet(self.prototxt, self.caffemodel, self.device_id) self.image_sub = rospy.Subscriber('video_topic', Image, self.callback, queue_size=1) #print('Dum') return []