def output(video_scores, prior=None): if prior != None: temp = [default_aggregation_func(x[0]) for x in video_scores] temp = [map(operator.truediv, x, prior) for x in temp] video_pred = [np.argmax(x) for x in temp] max_scores = [np.max(x) for x in temp] else: video_pred = [ np.argmax(default_aggregation_func(x[0])) for x in video_scores ] max_scores = [ np.max(default_aggregation_func(x[0])) for x in video_scores ] for index, x in enumerate(max_scores): print "%s %s %s" % (default_aggregation_func( video_scores[index][0]), video_pred[index], eval_video_list[index]) video_labels = [x[1] for x in video_scores] cf = confusion_matrix(video_labels, video_pred).astype(float) print cf cls_cnt = cf.sum(axis=1) cls_hit = np.diag(cf) cls_acc = cls_hit / cls_cnt print cls_acc print 'Mean accuracy over classes {:.02f}%'.format(np.mean(cls_acc) * 100) print 'Accuracy over classes: %s' % (np.mean(cls_acc) * 100) print 'Accuracy over samples: %s' % (cls_hit / np.sum(cf) * 100)
def get_score(score_files, xxxx=0.4): crop_agg = "mean" score_npz_files = [np.load(x) for x in score_files] score_list = [x['scores'][:, 0] for x in score_npz_files] label_list = [x['labels'] for x in score_npz_files] agg_score_list = [] for score_vec in score_list: agg_score_vec = [ default_aggregation_func(x, normalization=False, crop_agg=getattr(np, crop_agg)) for x in score_vec ] agg_score_list.append(np.array(agg_score_vec)) split = score_files[0].split("_")[2] score_weights = [xxxx, 1.0 - xxxx] if score_weights is None: score_weights = [1] * len(score_npz_files) else: score_weights = score_weights if len(score_weights) != len(score_npz_files): raise ValueError( "Only {} weight specifed for a total of {} score files".format( len(score_weights), len(score_npz_files))) final_scores = np.zeros_like(agg_score_list[0]) for i, agg_score in enumerate(agg_score_list): final_scores += agg_score * score_weights[i] print "split: ", split ff = [x[0][0] for x in final_scores] return ff, label_list[0]
def gated_fusion(i): video_pred = [ np.argmax(default_aggregation_func(x[i], normalization=False)) for x in video_scores ] video_labels = [x[num_scores] for x in video_scores] cf = confusion_matrix(video_labels, video_pred).astype(float) cls_cnt = cf.sum(axis=1) cls_hit = np.diag(cf) cls_acc = cls_hit / cls_cnt print cls_acc print 'Accuracy {:.02f}%'.format(np.mean(cls_acc) * 100)
def eval_scores(score_files, score_weights, agg_method): """Fuse the score files of different models Args: list(str) score_files: file names of score files list(float) score_weights: weights of score files str agg_method: the name of method for aggregating the segment level scores This is because the current used scores are segment level scores. See test_models.py. Returns: int: the fused accuracy. """ score_npz_files = [np.load(x) for x in score_files] if score_weights is None: score_weights = [1] * len(score_npz_files) else: if len(score_weights) != len(score_npz_files): raise ValueError( "Only {} weight specifed for a total of {} score files".format( len(score_weights), len(score_npz_files))) score_list = [x['scores'][:, 0] for x in score_npz_files ] # x['scores'] has two columns [segment level score, label] label_list = [x['labels'] for x in score_npz_files] # label verification # score_aggregation agg_score_list = [] for score_vec in score_list: agg_score_vec = [ default_aggregation_func(x, normalization=False, crop_agg=getattr(np, agg_method)) for x in score_vec ] #lz:video level scores agg_score_list.append(np.array(agg_score_vec)) final_scores = np.zeros_like(agg_score_list[0]) for i, agg_score in enumerate(agg_score_list): final_scores += agg_score * score_weights[i] # accuracy acc = mean_class_accuracy(final_scores, label_list[0]) # softmax score softmax_scores = [softmax(vec) for vec in final_scores] return acc, softmax_scores
flow_stack.append(cv2.imread(os.path.join(video_frame_path, y_name), cv2.IMREAD_GRAYSCALE)) scores = net.predict_single_flow_stack(np.array(flow_stack), score_name, frame_size=(340, 256)) frame_scores.append(scores) print 'video {} done'.format(vid) sys.stdin.flush() return np.array(frame_scores), label if args.num_worker > 1: pool = multiprocessing.Pool(args.num_worker, initializer=build_net) video_scores = pool.map(eval_video, eval_video_list) else: build_net() video_scores = map(eval_video, eval_video_list) video_pred = [np.argmax(default_aggregation_func(x[0])) for x in video_scores] video_labels = [x[1] for x in video_scores] cf = confusion_matrix(video_labels, video_pred).astype(float) cls_cnt = cf.sum(axis=1) cls_hit = np.diag(cf) cls_acc = cls_hit/cls_cnt print cls_acc print 'Accuracy {:.02f}%'.format(np.mean(cls_acc)*100) if args.save_scores is not None: np.savez(args.save_scores, scores=video_scores, labels=video_labels)
score_npz_files = [np.load(x) for x in args.score_files] if args.score_weights is None: score_weights = [1] * len(score_npz_files) else: score_weights = args.score_weights if len(score_weights) != len(score_npz_files): raise ValueError("Only {} weight specifed for a total of {} score files" .format(len(score_weights), len(score_npz_files))) score_list = [x['scores'][:, 0] for x in score_npz_files] label_list = [x['labels'] for x in score_npz_files] # label verification # score_aggregation agg_score_list = [] for score_vec in score_list: agg_score_vec = [default_aggregation_func(x, normalization=False, crop_agg=getattr(np, args.crop_agg)) for x in score_vec] agg_score_list.append(np.array(agg_score_vec)) final_scores = np.zeros_like(agg_score_list[0]) for i, agg_score in enumerate(agg_score_list): final_scores += agg_score * score_weights[i] # accuracy acc, cls_acc = mean_class_accuracy(final_scores, label_list[0]) print('Classes results: ' + str(['{:02.2f}%'.format(a * 100) for a in cls_acc])) print('Final accuracy {:02.2f}%'.format(acc * 100))
parser.add_argument('--crop_agg', type=str, choices=['max', 'mean'], default='mean') args = parser.parse_args() score_npz_files = [np.load(x) for x in args.score_files] if args.score_weights is None: score_weights = [1] * len(score_npz_files) else: score_weights = args.score_weights if len(score_weights) != len(score_npz_files): raise ValueError("Only {} weight specifed for a total of {} score files" .format(len(score_weights), len(score_npz_files))) score_list = [x['scores'][:, 0] for x in score_npz_files] label_list = [x['labels'] for x in score_npz_files] # label verification # score_aggregation agg_score_list = [] for score_vec in score_list: agg_score_vec = [default_aggregation_func(x, normalization=False, crop_agg=getattr(np, args.crop_agg)) for x in score_vec] agg_score_list.append(np.array(agg_score_vec)) final_scores = np.zeros_like(agg_score_list[0]) for i, agg_score in enumerate(agg_score_list): final_scores += agg_score * score_weights[i] # accuracy acc = mean_class_accuracy(final_scores, label_list[0]) print 'Final accuracy {:02f}%'.format(acc * 100)
frame_scores.append(scores) global ii ii += 1 print ii, 'video {} done'.format(vid) sys.stdin.flush() return np.array(frame_scores), label if num_worker > 1: pool = multiprocessing.Pool(num_worker, initializer=build_net) video_scores = pool.map(eval_video, eval_video_list) else: build_net() video_scores = map(eval_video, eval_video_list) video_pred = [np.argmax(default_aggregation_func(x[0])) for x in video_scores] video_labels = [x[1] for x in video_scores] cf = confusion_matrix(video_labels, video_pred).astype(float) cls_cnt = cf.sum(axis=1) cls_hit = np.diag(cf) cls_acc = cls_hit / cls_cnt print cls_acc print 'Accuracy {:.02f}%'.format(np.mean(cls_acc) * 100) if save_scores is not None: np.savez(save_scores, scores=video_scores, labels=video_labels)
parser.add_argument('--score_weights', nargs='+', type=float, default=None) args = parser.parse_args() score_npz_files = [np.load(x) for x in args.score_files] if args.score_weights is None: score_weights = [1] * len(score_npz_files) else: score_weights = args.score_weights if len(score_weights) != len(score_npz_files): raise ValueError("Only {} weight specifed for a total of {} score files" .format(len(score_weights), len(score_npz_files))) score_list = [x['scores'][:, 0] for x in score_npz_files] label_list = [x['labels'] for x in score_npz_files] # label verification # score_aggregation agg_score_list = [] for score_vec in score_list: agg_score_vec = [default_aggregation_func(x, normalization=False) for x in score_vec] agg_score_list.append(np.array(agg_score_vec)) final_scores = np.zeros_like(agg_score_list[0]) for i, agg_score in enumerate(agg_score_list): final_scores += agg_score * score_weights[i] # accuracy acc = mean_class_accuracy(final_scores, label_list[0]) print 'Final accuracy {:02f}%'.format(acc * 100)
def __init__(self): global mypath # services provided self.reconfig_srv_ = rospy.Service('reconf_split',split, self.reconfig_srv) self.start_vidscores = rospy.Service('start_vidscores', Empty, self.start_vidscores) self.stop_vidscores = rospy.Service('stop_vidscores', Empty, self.stop_vidscores) # topics published # self.image_pub = rospy.Publisher("class_overlay_image_raw",Image, queue_size=1) # self.label_fw_pub = rospy.Publisher("action_fw", String, queue_size=1) # self.label_pub = rospy.Publisher("action", String, queue_size=1) # self.ownlabel_pub = rospy.Publisher("action_own", String, queue_size=1) # parameters self.dataset = rospy.get_param('~dataset','hmdb51') self.device_id = rospy.get_param('~device_id',0) self.split = rospy.get_param('~split',1) self.step = rospy.get_param('~step',6) # this should actually be # step = (frame_cnt - stack_depth) / (args.num_frame_per_video-1) # it will change depending on the action length, a value I don't have if I am classifying real time, but that I could get if I am doing it by service calls! self.stack_depth = rospy.get_param('~stack_depth',5) # stack_depth is 1 for rgb and 5 for flows. I am letting it be 5 to test creating an array of cv_images self.classwindow = rospy.get_param('~classification_frame_window',50) #whatswrong = (rospy.resolve_name('~action_list')) #rospy.spin() self.actionlist = rosparam.get_param(rospy.resolve_name('~action_list')) #"['brush_hair','cartwheel','catch','chew','clap','climb','climb_stairs','dive','draw_sword','dribble','drink','eat','fall_floor','fencing','flic_flac','golf','handstand','hit','hug','jump','kick','kick_ball','kiss','laugh','pick','pour','pullup','punch','push','pushup','ride_bike','ride_horse','run','shake_hands','shoot_ball','shoot_bow','shoot_gun','sit','situp','smile','smoke','somersault','stand','swing_baseball','sword','sword_exercise','talk','throw','turn','walk','wave']") #if type(self.actionlist) is str: # self.actionlist = eval(self.actionlist) self.actionlist.sort() self.chooselist = rosparam.get_param(rospy.resolve_name('~choose_list')) ## I must be doing something wrong here for this name not to be resolved. maybe it is because each node here should probably have its own init_node and it doesn't #if type(self.chooselist) is str: # self.chooselist = eval(self.chooselist) self.chooselist.sort() ###probably should use the nice rosparam thingy here to avoid these problems... self.framesize_width = rospy.get_param('~framesize_width',340) self.framesize_height = rospy.get_param('~framesize_height',256) # topics subscribed self.image_sub = rospy.Subscriber('video_topic', Image,self.callback,queue_size=1) # internals self.bridge = CvBridge() from pyActionRecog.utils.video_funcs import default_aggregation_func if self.chooselist: keepi = [] rospy.logwarn('defined own subset of actions! classification will be reduced to smaller set of choices, namely:'+str(self.chooselist)) #print(range(0,len(self.actionlist))) for i in range(0,len(self.actionlist)): for j in range(0, len(self.chooselist)): #print(self.actionlist[i]) #print( self.chooselist[j]) if self.actionlist[i] == self.chooselist[j]: keepi.append(i) tobedeleted = set(range(0,len(self.actionlist)))-set(keepi) #print(tobedeleted) self.defprox = lambda x: np.delete(default_aggregation_func(x),list(tobedeleted)) self.actionlist = self.chooselist else: rospy.logwarn('No choose_list defined. Will classify within the whole set. ') self.defprox = default_aggregation_func self.frame_scores = [] self.prototxt = mypath+'/models/'+ self.dataset +'/tsn_bn_inception_rgb_deploy.prototxt' self.caffemodel = mypath+'/models/'+ self.dataset +'_split_'+str(self.split)+'_tsn_rgb_reference_bn_inception.caffemodel' self.net = CaffeNet(self.prototxt, self.caffemodel, self.device_id) self.ownvidscores = [] # when I instantiate the classifier, the startedownvid is working already. this influences how vsmf_srv will behave, so it needs to be like this, I think. self.startedownvid = True self.lock = threading.Lock() #publishers self.label_fw_pub = FunnyPublisher("action_fw", self.actionlist, self.defprox) self.label_pub = FunnyPublisher("action", self.actionlist, self.defprox) self.ownlabel_pub = FunnyPublisher("action_own", self.actionlist, self.defprox) rospy.set_param('~alive',0.5) rospy.loginfo("waiting for callback from " +rospy.resolve_name('video_topic') +" to do anything")
def eval_video(video): global net label = video[1] vid = video[0] video_frame_path = f_info[0][vid] if args.modality == 'rgb': cnt_indexer = 1 elif args.modality == 'flow': cnt_indexer = 1 elif args.modality == 'c3d_rgb': cnt_indexer = 1 elif args.modality == 'c3d_flow': cnt_indexer = 1 else: raise ValueError(args.modality) frame_cnt = f_info[cnt_indexer][vid] stack_depth = 0 if args.modality == 'rgb': stack_depth = 1 elif args.modality == 'flow': stack_depth = 5 elif args.modality == 'c3d_rgb': stack_depth = args.depth elif args.modality == 'c3d_flow': stack_depth = args.depth else: raise ValueError('Invalid modality: ' + args.modality) step = 1.0 * (frame_cnt - stack_depth) / (args.num_frame_per_video - 1) # step = min(6.0, frame_cnt/10.0) if step > 0: frame_ticks = np.arange(1, 2 + frame_cnt - stack_depth, step) else: frame_ticks = [1] * args.num_frame_per_video frame_ticks = np.floor(frame_ticks) frame_ticks = frame_ticks.astype(int) # assert(len(frame_ticks) == args.num_frame_per_video) frame_scores = [] for tick in frame_ticks: if args.modality == 'rgb': name = '{}{:05d}.jpg'.format(args.rgb_prefix, tick) frame = cv2.imread(os.path.join(video_frame_path, name), cv2.IMREAD_COLOR) scores = net.predict_single_frame([ frame, ], score_name, frame_size=(340, 256), attention_name=attention_name) frame_scores.append(scores) if args.modality == 'flow': frame_idx = [ min(frame_cnt, tick + offset) for offset in xrange(stack_depth) ] flow_stack = [] for idx in frame_idx: x_name = '{}{:06d}.jpg'.format(args.flow_x_prefix, idx) y_name = '{}{:06d}.jpg'.format(args.flow_y_prefix, idx) flow_stack.append( cv2.imread(os.path.join(video_frame_path, x_name), cv2.IMREAD_GRAYSCALE)) flow_stack.append( cv2.imread(os.path.join(video_frame_path, y_name), cv2.IMREAD_GRAYSCALE)) scores = net.predict_single_flow_stack( flow_stack, score_name, frame_size=(340, 256), attention_name=attention_name) frame_scores.append(scores) if args.modality == 'c3d_flow': frame_idx = [ min(frame_cnt, tick + offset) for offset in xrange(stack_depth) ] flow_stack = [] for idx in frame_idx: x_name = '{}{:05d}.jpg'.format(args.flow_x_prefix, idx) y_name = '{}{:05d}.jpg'.format(args.flow_y_prefix, idx) flow_stack.append( cv2.imread(os.path.join(video_frame_path, x_name), cv2.IMREAD_GRAYSCALE)) flow_stack.append( cv2.imread(os.path.join(video_frame_path, y_name), cv2.IMREAD_GRAYSCALE)) scores = net.predict_single_c3d_flow_stack(flow_stack, score_name, frame_size=(170, 128)) frame_scores.append(scores) if args.modality == 'c3d_rgb': frame_idx = [ min(frame_cnt, tick + offset) for offset in xrange(stack_depth) ] c3d_stack = [] i = 0 skip = args.skip for idx in frame_idx: i = i + 1 if (i % skip == 0): x_name = '{}{:06d}.jpg'.format(args.c3d_prefix, idx) img = cv2.imread(os.path.join(video_frame_path, x_name), cv2.IMREAD_COLOR) c3d_stack.append(img) scores = net.predict_single_c3d_rgb_stack(c3d_stack, score_name, frame_size=(170, 128)) frame_scores.append(scores) ii = 0 if attention_name is not None: frame_attentions = [x[1] for x in frame_scores] frame_scores = [x[0] for x in frame_scores] if np.argmax(default_aggregation_func(frame_scores)) == label: ii = 1 print 'video {0} pred {1} label {2} same {3} done'.format( vid, np.argmax(default_aggregation_func(frame_scores)), label, ii) sys.stdin.flush() if attention_name is None: return np.array(frame_scores), label else: return np.array(frame_scores), label, np.array(frame_attentions)
num_worker = args.num_worker split_source(args.source_path, trunk_net_base, num_worker) write_proto_template(args.net_proto_template, num_worker, trunk_net_base) if len(gpu_list) > 1: cnn_worker = multiprocessing.Pool(len(gpu_list)) raw_video_scores = cnn_worker.map(eval_video, gpu_list) video_scores = merge_worker_result(raw_video_scores) else: video_scores = eval_video(0) with open('{}.pickle'.format(args.save_scores), 'w') as fv_score: pickle.dump(video_scores, fv_score) video_pred = [ np.argmax(default_aggregation_func(x[0], crop_agg=np.max)) for x in video_scores ] video_labels = [x[1] for x in video_scores] video_names = [x[2] for x in video_scores] cf = confusion_matrix(video_labels, video_pred).astype(float) cls_cnt = cf.sum(axis=1) cls_hit = np.diag(cf) cls_acc = cls_hit / cls_cnt print cls_acc if args.check_result: cnt = 0 for vid, video_label in enumerate(video_labels): if video_label != video_pred[vid]:
diff_score = pickle.load(fmotion) aligned_flow_score = [None for _ in xrange(len(flow_score))] for mid, motion_sample in enumerate(motion_score): for fid, flow_sample in enumerate(flow_score): if motion_sample[2] == flow_sample[2]: aligned_flow_score[mid] = flow_sample # RGB : RGB OFF : FLOW : FLOW OFF -- 1:1.5:0.8:1.8 # RGB : RGB OFF : (RGB DIFF : RGB DIFF OFF : RGB DIFF OFF 14)= 1:1.8: (1:2:0.5)*0.8 # 0~25: Scores from Feature Generation Network # 25~49: Score from OFF-sub-network on 7x7 # 49~73: Score from OFF-sub-network on 14x14 video_pred = [ np.argmax( default_aggregation_func( x[0][:25, ...], normalization=False, crop_agg=np.max) * 1 + default_aggregation_func( x[0][25:49, ...], normalization=False, crop_agg=np.max) * 1.5 + default_aggregation_func( y[0][:25, ...], normalization=False, crop_agg=np.max) * 0.8 + default_aggregation_func( y[0][25:49, ...], normalization=False, crop_agg=np.max) * 1.8) for x, y, z in zip(motion_score, aligned_flow_score, diff_score) ] video_labels = [x[1] for x in motion_score] video_names = [x[2] for x in motion_score] cf = confusion_matrix(video_labels, video_pred).astype(float) cls_cnt = cf.sum(axis=1) cls_hit = np.diag(cf)
else: score_weights = args.score_weights if len(score_weights) != len(score_npz_files): raise ValueError( "Only {} weight specifed for a total of {} score files".format( len(score_weights), len(score_npz_files))) score_list = [x['scores'][:, 0] for x in score_npz_files] label_list = [x['labels'] for x in score_npz_files] # score_aggregation agg_score_list = [] for score_vec in score_list: agg_score_vec = [ default_aggregation_func(x, normalization=False, crop_agg=getattr(np, args.crop_agg)) for x in score_vec ] agg_score_list.append(np.array(agg_score_vec)) final_scores = np.zeros_like(agg_score_list[0]) for i, agg_score in enumerate(agg_score_list): final_scores += agg_score * score_weights[i] # generate prediction predict = np.argmax(final_scores, axis=1) label = label_list[0] num_classes = label.max() + 1 analysis_file = args.cf_analysis_file
def main(argss): def build_net(): global net my_id = multiprocessing.current_process()._identity[0] \ if args.num_worker > 1 else 1 if gpu_list is None: net = CaffeNet(args.net_proto, args.net_weights, my_id - 1) else: net = CaffeNet(args.net_proto, args.net_weights, gpu_list[my_id - 1]) def eval_video(video): global net label = video[1] vid = video[0] video_frame_path = f_info[0][vid] if args.modality == 'rgb': cnt_indexer = 1 elif args.modality == 'flow': cnt_indexer = 2 else: raise ValueError(args.modality) frame_cnt = f_info[cnt_indexer][vid] stack_depth = 0 if args.modality == 'rgb': stack_depth = 1 elif args.modality == 'flow': stack_depth = 5 step = (frame_cnt - stack_depth) / (args.num_frame_per_video - 1) if step > 0: frame_ticks = range( 1, min((2 + step * (args.num_frame_per_video - 1)), frame_cnt + 1), step) else: frame_ticks = [1] * args.num_frame_per_video assert (len(frame_ticks) == args.num_frame_per_video) frame_scores = [] for tick in frame_ticks: if args.modality == 'rgb': name = '{}{:05d}.jpg'.format(args.rgb_prefix, tick) frame = cv2.imread(os.path.join(video_frame_path, name), cv2.IMREAD_COLOR) try: scores = net.predict_single_frame([ frame, ], score_name, frame_size=(340, 256)) except: print(os.path.join(video_frame_path, name)) Image(os.path.join(video_frame_path, name)) frame_scores.append(scores) if args.modality == 'flow': frame_idx = [ min(frame_cnt, tick + offset) for offset in xrange(stack_depth) ] flow_stack = [] for idx in frame_idx: x_name = '{}{:05d}.jpg'.format(args.flow_x_prefix, idx) y_name = '{}{:05d}.jpg'.format(args.flow_y_prefix, idx) flow_stack.append( cv2.imread(os.path.join(video_frame_path, x_name), cv2.IMREAD_GRAYSCALE)) flow_stack.append( cv2.imread(os.path.join(video_frame_path, y_name), cv2.IMREAD_GRAYSCALE)) scores = net.predict_single_flow_stack(flow_stack, score_name, frame_size=(340, 256)) frame_scores.append(scores) print('video {} done'.format(vid)) sys.stdin.flush() return np.array(frame_scores), label global args rospy.init_node('image_converter', anonymous=True) ic = image_converter() #####A LOT of those parameters will not be neccessary anymore, with the whole splitting into rgb and flow and with my decision to remove some of the multiprocessing stuff. anyway, so far will keep the mess... sys.argv = ['','hmdb51','1','rgb','/temporal-segment-networks/my_of/','models/hmdb51/tsn_bn_inception_rgb_deploy.prototxt',\ 'models/hmdb51_split_1_tsn_rgb_reference_bn_inception.caffemodel' , '--num_worker', '1', '--save_scores', 'myscores_fre.txt'] parser = argparse.ArgumentParser() parser.add_argument('dataset', type=str, choices=['ucf101', 'hmdb51']) parser.add_argument('split', type=int, choices=[1, 2, 3], help='on which split to test the network') parser.add_argument('modality', type=str, choices=['rgb', 'flow']) parser.add_argument('frame_path', type=str, help="root directory holding the frames") parser.add_argument('net_proto', type=str) parser.add_argument('net_weights', type=str) parser.add_argument('--rgb_prefix', type=str, help="prefix of RGB frames", default='img_') parser.add_argument('--flow_x_prefix', type=str, help="prefix of x direction flow images", default='flow_x_') parser.add_argument('--flow_y_prefix', type=str, help="prefix of y direction flow images", default='flow_y_') parser.add_argument('--num_frame_per_video', type=int, default=25, help="prefix of y direction flow images") parser.add_argument('--save_scores', type=str, default=None, help='the filename to save the scores in') parser.add_argument('--num_worker', type=int, default=1) parser.add_argument("--caffe_path", type=str, default='./lib/caffe-action/', help='path to the caffe toolbox') parser.add_argument("--gpus", type=int, nargs='+', default=None, help='specify list of gpu to use') args = parser.parse_args() print(args) sys.path.append(os.path.join(args.caffe_path, 'python')) from pyActionRecog import parse_directory from pyActionRecog import parse_split_file from pyActionRecog.utils.video_funcs import default_aggregation_func from pyActionRecog.action_caffe import CaffeNet # build neccessary information print(args.dataset) split_tp = parse_split_file(args.dataset) f_info = parse_directory(args.frame_path, args.rgb_prefix, args.flow_x_prefix, args.flow_y_prefix) gpu_list = args.gpus eval_video_list = split_tp[args.split - 1][1] score_name = 'fc-action' if 1: eval_video_list = [('ua', 1)] print(eval_video_list[0]) print(f_info) if args.num_worker > 1: pool = multiprocessing.Pool(args.num_worker, initializer=build_net) video_scores_rgb = pool.map(eval_video, eval_video_list) else: build_net() video_scores_rgb = map(eval_video, eval_video_list) video_pred = [ np.argmax(default_aggregation_func(x[0])) for x in video_scores_rgb ] print(video_pred) try: rospy.spin() except KeyboardInterrupt: print("Shutting down") cv2.destroyAllWindows()
def eval_video(video): global net label = video[1] vid = video[0] video_frame_path = f_info[0][vid] if modality == 'rgb': cnt_indexer = 1 elif modality == 'flow': cnt_indexer = 2 else: raise ValueError(modality) frame_cnt = f_info[cnt_indexer][vid] stack_depth = 0 if modality == 'rgb': stack_depth = 1 elif modality == 'flow': stack_depth = 5 step = (frame_cnt - stack_depth) / (args.num_frame_per_video - 1) if step > 0: frame_ticks = range( 1, min((2 + step * (args.num_frame_per_video - 1)), frame_cnt + 1), step) else: frame_ticks = [1] * args.num_frame_per_video assert (len(frame_ticks) == args.num_frame_per_video) frame_scores = [] for tick in frame_ticks: if modality == 'rgb': name = '{}{:05d}.jpg'.format(args.rgb_prefix, tick) frame = cv2.imread(os.path.join(video_frame_path, name), cv2.IMREAD_COLOR) scores = net.predict_single_frame([ frame, ], score_name, frame_size=(340, 256)) frame_scores.append(scores) if modality == 'flow': frame_idx = [ min(frame_cnt, tick + offset) for offset in xrange(stack_depth) ] flow_stack = [] for idx in frame_idx: x_name = '{}{:05d}.jpg'.format(args.flow_x_prefix, idx) y_name = '{}{:05d}.jpg'.format(args.flow_y_prefix, idx) flow_stack.append( cv2.imread(os.path.join(video_frame_path, x_name), cv2.IMREAD_GRAYSCALE)) flow_stack.append( cv2.imread(os.path.join(video_frame_path, y_name), cv2.IMREAD_GRAYSCALE)) scores = net.predict_single_flow_stack(flow_stack, score_name, frame_size=(340, 256)) frame_scores.append(scores) print 'video {} done'.format(vid) print np.argmax(default_aggregation_func(frame_scores)) print label sys.stdin.flush() return np.array(frame_scores), label
print 'ii', iii score_npz_files = [np.load(x) for x in score_files[iii]] score_list = [x['scores'][:, 0] for x in score_npz_files] label_list = [x['labels'] for x in score_npz_files] for ii in xrange(11): print ii score_weights = [ii, 10 - ii] # label verification # score_aggregation agg_score_list = [] for score_vec in score_list: agg_score_vec = [ default_aggregation_func(x, normalization=False, crop_agg=getattr(np, 'mean')) for x in score_vec ] #print len(agg_score_vec) agg_score_list.append(np.array(agg_score_vec)) final_scores = np.zeros_like(agg_score_list[0]) for i, agg_score in enumerate(agg_score_list): final_scores += agg_score * score_weights[i] # accuracy acc = mean_class_accuracy(final_scores, label_list[0]) #print acc print 'Final accuracy {:02f}%'.format(acc * 100) accuracy[iii].append(np.array(acc)) #print accuracy
score_weights = args.score_weights if len(score_weights) != len(score_npz_files): raise ValueError( "Only {} weight specifed for a total of {} score files".format( len(score_weights), len(score_npz_files))) score_list = [x['scores'][:, 0] for x in score_npz_files ] # each score (test_num, (25,10, class_num)) label_list = [x['labels'] for x in score_npz_files] # score_aggregation agg_score_list = [] for score_vec in score_list: agg_score_vec = [ default_aggregation_func(x.reshape((25, 10, -1)), normalization=False, crop_agg=getattr(np, args.crop_agg)) for x in score_vec ] agg_score_list.append(np.array(agg_score_vec)) final_scores = np.zeros_like(agg_score_list[0]) for i, agg_score in enumerate(agg_score_list): final_scores += agg_score * score_weights[i] # size: (test_num, class_num) # output: confusion matrix, combined cf, accuracy in total # confusion matrix video_pred = [np.argmax(x) for x in final_scores] video_labels = label_list[0] cf = confusion_matrix(video_labels, video_pred).astype(float)
def __init__(self): global mypath # services provided self.reconfig_srv_ = rospy.Service('reconf_split',split, self.reconfig_srv) self.start_vidscores = rospy.Service('start_vidscores', Empty, self.start_vidscores) self.stop_vidscores = rospy.Service('stop_vidscores', Empty, self.stop_vidscores) # topics published self.image_pub = rospy.Publisher("class_overlay_image_raw",Image, queue_size=1) self.label_fw_pub = rospy.Publisher("action_fw", String, queue_size=1) self.label_pub = rospy.Publisher("action", String, queue_size=1) self.ownlabel_pub = rospy.Publisher("action_own", String, queue_size=1) # parameters self.dataset = rospy.get_param('~dataset','hmdb51') self.device_id = rospy.get_param('~device_id',0) self.split = rospy.get_param('~split',1) self.videotopic = rospy.get_param('~video_topic','videofiles/image_raw') self.classwindow = rospy.get_param('~classification_frame_window',50) self.actionlist = rospy.get_param('~action_list', ['brush_hair','cartwheel','catch','chew','clap','climb','climb_stairs','dive','draw_sword','dribble','drink','eat','fall_floor','fencing','flic_flac','golf','handstand','hit','hug','jump','kick','kick_ball','kiss','laugh','pick','pour','pullup','punch','push','pushup','ride_bike','ride_horse','run','shake_hands','shoot_ball','shoot_bow','shoot_gun','sit','situp','smile','smoke','somersault','stand','swing_baseball','sword','sword_exercise','talk','throw','turn','walk','wave']) if type(self.actionlist) is str: self.actionlist = eval(self.actionlist) self.actionlist.sort() self.chooselist = rospy.get_param('~choose_list',[]) if type(self.chooselist) is str: self.chooselist = eval(self.chooselist) self.chooselist.sort() ###probably should use the nice rosparam thingy here to avoid these problems... self.framesize_width = rospy.get_param('~framesize_width',340) self.framesize_height = rospy.get_param('~framesize_height',256) # topics subscribed self.image_sub = rospy.Subscriber(self.videotopic, Image,self.callback,queue_size=1) # internals self.bridge = CvBridge() from pyActionRecog.utils.video_funcs import default_aggregation_func if self.chooselist: keepi = [] rospy.logwarn('defined own subset of actions! classification will be reduced to smaller set of choices, namely:'+str(self.chooselist)) #print(range(0,len(self.actionlist))) for i in range(0,len(self.actionlist)): for j in range(0, len(self.chooselist)): #print(self.actionlist[i]) #print( self.chooselist[j]) if self.actionlist[i] == self.chooselist[j]: keepi.append(i) tobedeleted = set(range(0,len(self.actionlist)))-set(keepi) #print(tobedeleted) self.defprox = lambda x: np.delete(default_aggregation_func(x),list(tobedeleted)) self.actionlist = self.chooselist else: rospy.logwarn('No choose_list defined. Will classify within the whole set. ') self.defprox = default_aggregation_func self.frame_scores = [] self.prototxt = mypath+'/models/'+ self.dataset +'/tsn_bn_inception_rgb_deploy.prototxt' self.caffemodel = mypath+'/models/'+ self.dataset +'_split_'+str(self.split)+'_tsn_rgb_reference_bn_inception.caffemodel' self.net = CaffeNet(self.prototxt, self.caffemodel, self.device_id) self.font = cv2.FONT_HERSHEY_SIMPLEX #print('hio') self.ownvidscores = [] # when I instantiate the classifier, the startedownvid is working already. this influences how vsmf_srv will behave, so it needs to be like this, I think. self.startedownvid = True self.lock = threading.Lock() rospy.loginfo("waiting for callback from " + self.videotopic +" to do anything")
x_name = '{}{:05d}.jpg'.format(flow_x_prefix, idx) y_name = '{}{:05d}.jpg'.format(flow_y_prefix, idx) flow_stack.append( cv2.imread(os.path.join(video_frame_path, x_name), cv2.IMREAD_GRAYSCALE)) flow_stack.append( cv2.imread(os.path.join(video_frame_path, y_name), cv2.IMREAD_GRAYSCALE)) scores = net.predict_single_flow_stack(flow_stack, score_name, frame_size=(340, 256)) frame_scores.append(scores) print 'video {} done'.format(videoname) sys.stdin.flush() final = {'seg_swin': seg_swin_tsn, 'res': res} scipy.io.savemat('%s/seg_swin.m' % final_path, final, appendmat=False) return np.array(frame_scores) print("OPERATING PAD 2 SECONDS") sport = sys.argv[1] for modality in ['flow', 'rgb']: net_weights = "/media/data/mtriet/temporal-segment-networks/models/huawei_%s/%s_%s.caffemodel" % ( sport, sport, modality) net_proto = "/media/data/mtriet/temporal-segment-networks/models/huawei_%s/tsn_bn_inception_%s_deploy.prototxt" % ( sport, modality) build_net(net_proto, net_weights) video_scores = eval_video(sport, modality, 2, 2) video_pred = np.argmax(default_aggregation_func(video_scores))
if args.num_worker > 1: pool = multiprocessing.Pool(args.num_worker, initializer=build_net) video_scores = pool.map(eval_video, eval_video_list) else: build_net() video_scores = map(eval_video, eval_video_list) ''' print 'video_scores' print video_scores print video_scores[0][0][0] print np.array(video_scores).shape print np.array(video_scores).shape() ''' video_pred = [np.argmax(default_aggregation_func(x[0])) for x in video_scores] #预测结果 print 'video_pred:' print video_pred video_labels = [x[1] for x in video_scores] #真实标签 ''' cf = confusion_matrix(video_labels, video_pred).astype(float) cls_cnt = cf.sum(axis=1) cls_hit = np.diag(cf) cls_acc = cls_hit/cls_cnt print cls_acc