def output(video_scores, prior=None):
    if prior != None:
        temp = [default_aggregation_func(x[0]) for x in video_scores]
        temp = [map(operator.truediv, x, prior) for x in temp]
        video_pred = [np.argmax(x) for x in temp]
        max_scores = [np.max(x) for x in temp]
    else:
        video_pred = [
            np.argmax(default_aggregation_func(x[0])) for x in video_scores
        ]
        max_scores = [
            np.max(default_aggregation_func(x[0])) for x in video_scores
        ]

    for index, x in enumerate(max_scores):
        print "%s %s %s" % (default_aggregation_func(
            video_scores[index][0]), video_pred[index], eval_video_list[index])
    video_labels = [x[1] for x in video_scores]

    cf = confusion_matrix(video_labels, video_pred).astype(float)
    print cf
    cls_cnt = cf.sum(axis=1)
    cls_hit = np.diag(cf)

    cls_acc = cls_hit / cls_cnt

    print cls_acc

    print 'Mean accuracy over classes {:.02f}%'.format(np.mean(cls_acc) * 100)
    print 'Accuracy over classes: %s' % (np.mean(cls_acc) * 100)
    print 'Accuracy over samples: %s' % (cls_hit / np.sum(cf) * 100)
Example #2
0
def get_score(score_files, xxxx=0.4):
    crop_agg = "mean"
    score_npz_files = [np.load(x) for x in score_files]
    score_list = [x['scores'][:, 0] for x in score_npz_files]
    label_list = [x['labels'] for x in score_npz_files]
    agg_score_list = []
    for score_vec in score_list:
        agg_score_vec = [
            default_aggregation_func(x,
                                     normalization=False,
                                     crop_agg=getattr(np, crop_agg))
            for x in score_vec
        ]
        agg_score_list.append(np.array(agg_score_vec))
    split = score_files[0].split("_")[2]

    score_weights = [xxxx, 1.0 - xxxx]

    if score_weights is None:
        score_weights = [1] * len(score_npz_files)
    else:
        score_weights = score_weights
        if len(score_weights) != len(score_npz_files):
            raise ValueError(
                "Only {} weight specifed for a total of {} score files".format(
                    len(score_weights), len(score_npz_files)))

    final_scores = np.zeros_like(agg_score_list[0])
    for i, agg_score in enumerate(agg_score_list):
        final_scores += agg_score * score_weights[i]
    print "split: ", split
    ff = [x[0][0] for x in final_scores]
    return ff, label_list[0]
Example #3
0
def gated_fusion(i):
    video_pred = [
        np.argmax(default_aggregation_func(x[i], normalization=False))
        for x in video_scores
    ]
    video_labels = [x[num_scores] for x in video_scores]

    cf = confusion_matrix(video_labels, video_pred).astype(float)

    cls_cnt = cf.sum(axis=1)
    cls_hit = np.diag(cf)
    cls_acc = cls_hit / cls_cnt
    print cls_acc
    print 'Accuracy {:.02f}%'.format(np.mean(cls_acc) * 100)
Example #4
0
def eval_scores(score_files, score_weights, agg_method):
    """Fuse the score files of different models
    
    Args:
        list(str) score_files: file names of score files
        list(float) score_weights: weights of score files
        str agg_method: the name of method for aggregating the segment level scores
            This is because the current used scores are segment level scores. See test_models.py.
    Returns:
        int: the fused accuracy.
    """
    score_npz_files = [np.load(x) for x in score_files]

    if score_weights is None:
        score_weights = [1] * len(score_npz_files)
    else:
        if len(score_weights) != len(score_npz_files):
            raise ValueError(
                "Only {} weight specifed for a total of {} score files".format(
                    len(score_weights), len(score_npz_files)))

    score_list = [x['scores'][:, 0] for x in score_npz_files
                  ]  # x['scores'] has two columns [segment level score, label]
    label_list = [x['labels'] for x in score_npz_files]

    # label verification

    # score_aggregation
    agg_score_list = []
    for score_vec in score_list:
        agg_score_vec = [
            default_aggregation_func(x,
                                     normalization=False,
                                     crop_agg=getattr(np, agg_method))
            for x in score_vec
        ]  #lz:video level scores
        agg_score_list.append(np.array(agg_score_vec))

    final_scores = np.zeros_like(agg_score_list[0])
    for i, agg_score in enumerate(agg_score_list):
        final_scores += agg_score * score_weights[i]

    # accuracy
    acc = mean_class_accuracy(final_scores, label_list[0])

    # softmax score
    softmax_scores = [softmax(vec) for vec in final_scores]
    return acc, softmax_scores
                flow_stack.append(cv2.imread(os.path.join(video_frame_path, y_name), cv2.IMREAD_GRAYSCALE))
            scores = net.predict_single_flow_stack(np.array(flow_stack), score_name, frame_size=(340, 256))
            frame_scores.append(scores)

    print 'video {} done'.format(vid)
    sys.stdin.flush()
    return np.array(frame_scores), label

if args.num_worker > 1:
    pool = multiprocessing.Pool(args.num_worker, initializer=build_net)
    video_scores = pool.map(eval_video, eval_video_list)
else:
    build_net()
    video_scores = map(eval_video, eval_video_list)

video_pred = [np.argmax(default_aggregation_func(x[0])) for x in video_scores]
video_labels = [x[1] for x in video_scores]

cf = confusion_matrix(video_labels, video_pred).astype(float)

cls_cnt = cf.sum(axis=1)
cls_hit = np.diag(cf)

cls_acc = cls_hit/cls_cnt

print cls_acc

print 'Accuracy {:.02f}%'.format(np.mean(cls_acc)*100)

if args.save_scores is not None:
    np.savez(args.save_scores, scores=video_scores, labels=video_labels)
Example #6
0
score_npz_files = [np.load(x) for x in args.score_files]

if args.score_weights is None:
    score_weights = [1] * len(score_npz_files)
else:
    score_weights = args.score_weights
    if len(score_weights) != len(score_npz_files):
        raise ValueError("Only {} weight specifed for a total of {} score files"
                         .format(len(score_weights), len(score_npz_files)))

score_list = [x['scores'][:, 0] for x in score_npz_files]
label_list = [x['labels'] for x in score_npz_files]

# label verification

# score_aggregation
agg_score_list = []
for score_vec in score_list:
    agg_score_vec = [default_aggregation_func(x, normalization=False, crop_agg=getattr(np, args.crop_agg)) for x in score_vec]
    agg_score_list.append(np.array(agg_score_vec))

final_scores = np.zeros_like(agg_score_list[0])
for i, agg_score in enumerate(agg_score_list):
    final_scores += agg_score * score_weights[i]

# accuracy
acc, cls_acc = mean_class_accuracy(final_scores, label_list[0])
print('Classes results: ' + str(['{:02.2f}%'.format(a * 100) for a in cls_acc]))
print('Final accuracy {:02.2f}%'.format(acc * 100))
parser.add_argument('--crop_agg', type=str, choices=['max', 'mean'], default='mean')
args = parser.parse_args()

score_npz_files = [np.load(x) for x in args.score_files]

if args.score_weights is None:
    score_weights = [1] * len(score_npz_files)
else:
    score_weights = args.score_weights
    if len(score_weights) != len(score_npz_files):
        raise ValueError("Only {} weight specifed for a total of {} score files"
                         .format(len(score_weights), len(score_npz_files)))

score_list = [x['scores'][:, 0] for x in score_npz_files]
label_list = [x['labels'] for x in score_npz_files]

# label verification

# score_aggregation
agg_score_list = []
for score_vec in score_list:
    agg_score_vec = [default_aggregation_func(x, normalization=False, crop_agg=getattr(np, args.crop_agg)) for x in score_vec]
    agg_score_list.append(np.array(agg_score_vec))

final_scores = np.zeros_like(agg_score_list[0])
for i, agg_score in enumerate(agg_score_list):
    final_scores += agg_score * score_weights[i]

# accuracy
acc = mean_class_accuracy(final_scores, label_list[0])
print 'Final accuracy {:02f}%'.format(acc * 100)
        frame_scores.append(scores)
    global ii
    ii += 1
    print ii, 'video {} done'.format(vid)
    sys.stdin.flush()
    return np.array(frame_scores), label


if num_worker > 1:
    pool = multiprocessing.Pool(num_worker, initializer=build_net)
    video_scores = pool.map(eval_video, eval_video_list)
else:
    build_net()
    video_scores = map(eval_video, eval_video_list)

video_pred = [np.argmax(default_aggregation_func(x[0])) for x in video_scores]
video_labels = [x[1] for x in video_scores]

cf = confusion_matrix(video_labels, video_pred).astype(float)

cls_cnt = cf.sum(axis=1)
cls_hit = np.diag(cf)

cls_acc = cls_hit / cls_cnt

print cls_acc

print 'Accuracy {:.02f}%'.format(np.mean(cls_acc) * 100)

if save_scores is not None:
    np.savez(save_scores, scores=video_scores, labels=video_labels)
parser.add_argument('--score_weights', nargs='+', type=float, default=None)
args = parser.parse_args()

score_npz_files = [np.load(x) for x in args.score_files]

if args.score_weights is None:
    score_weights = [1] * len(score_npz_files)
else:
    score_weights = args.score_weights
    if len(score_weights) != len(score_npz_files):
        raise ValueError("Only {} weight specifed for a total of {} score files"
                         .format(len(score_weights), len(score_npz_files)))

score_list = [x['scores'][:, 0] for x in score_npz_files]
label_list = [x['labels'] for x in score_npz_files]

# label verification

# score_aggregation
agg_score_list = []
for score_vec in score_list:
    agg_score_vec = [default_aggregation_func(x, normalization=False) for x in score_vec]
    agg_score_list.append(np.array(agg_score_vec))

final_scores = np.zeros_like(agg_score_list[0])
for i, agg_score in enumerate(agg_score_list):
    final_scores += agg_score * score_weights[i]

# accuracy
acc = mean_class_accuracy(final_scores, label_list[0])
print 'Final accuracy {:02f}%'.format(acc * 100)
  def __init__(self):
    global mypath
    # services provided
    self.reconfig_srv_ = rospy.Service('reconf_split',split, self.reconfig_srv)
    self.start_vidscores = rospy.Service('start_vidscores', Empty, self.start_vidscores)
    self.stop_vidscores = rospy.Service('stop_vidscores', Empty, self.stop_vidscores)
    # topics published
    # self.image_pub = rospy.Publisher("class_overlay_image_raw",Image, queue_size=1)
    # self.label_fw_pub = rospy.Publisher("action_fw", String, queue_size=1)
    # self.label_pub = rospy.Publisher("action", String, queue_size=1)
    # self.ownlabel_pub = rospy.Publisher("action_own", String, queue_size=1)

    # parameters
    self.dataset = rospy.get_param('~dataset','hmdb51')
    self.device_id = rospy.get_param('~device_id',0)
    self.split = rospy.get_param('~split',1)
    self.step = rospy.get_param('~step',6)
    # this should actually be
    # step = (frame_cnt - stack_depth) / (args.num_frame_per_video-1)
    # it will change depending on the action length, a value I don't have if I am classifying real time, but that I could get if I am doing it by service calls!

    self.stack_depth = rospy.get_param('~stack_depth',5)
    # stack_depth is 1 for rgb and 5 for flows. I am letting it be 5 to test creating an array of cv_images

    self.classwindow = rospy.get_param('~classification_frame_window',50)
    #whatswrong = (rospy.resolve_name('~action_list'))
    #rospy.spin()
    self.actionlist = rosparam.get_param(rospy.resolve_name('~action_list')) #"['brush_hair','cartwheel','catch','chew','clap','climb','climb_stairs','dive','draw_sword','dribble','drink','eat','fall_floor','fencing','flic_flac','golf','handstand','hit','hug','jump','kick','kick_ball','kiss','laugh','pick','pour','pullup','punch','push','pushup','ride_bike','ride_horse','run','shake_hands','shoot_ball','shoot_bow','shoot_gun','sit','situp','smile','smoke','somersault','stand','swing_baseball','sword','sword_exercise','talk','throw','turn','walk','wave']")
    #if type(self.actionlist) is str:
    #    self.actionlist = eval(self.actionlist)
    self.actionlist.sort()
    self.chooselist = rosparam.get_param(rospy.resolve_name('~choose_list')) ## I must be doing something wrong here for this name not to be resolved. maybe it is because each node here should probably have its own init_node and it doesn't
    #if type(self.chooselist) is str:
    #    self.chooselist = eval(self.chooselist)
    self.chooselist.sort()
    ###probably should use the nice rosparam thingy here to avoid these problems...
    self.framesize_width = rospy.get_param('~framesize_width',340)
    self.framesize_height = rospy.get_param('~framesize_height',256)

    # topics subscribed
    self.image_sub = rospy.Subscriber('video_topic', Image,self.callback,queue_size=1)

    # internals
    self.bridge = CvBridge()
    from pyActionRecog.utils.video_funcs import default_aggregation_func
    if self.chooselist:
        keepi = []
        rospy.logwarn('defined own subset of actions! classification will be reduced to smaller set of choices, namely:'+str(self.chooselist))
        #print(range(0,len(self.actionlist)))
        for i in range(0,len(self.actionlist)):
             for j in range(0, len(self.chooselist)):
                 #print(self.actionlist[i])
                 #print( self.chooselist[j])
                 if self.actionlist[i] == self.chooselist[j]:
                     keepi.append(i)
        tobedeleted = set(range(0,len(self.actionlist)))-set(keepi)
        #print(tobedeleted)
        self.defprox = lambda x: np.delete(default_aggregation_func(x),list(tobedeleted))
        self.actionlist = self.chooselist
    else:
        rospy.logwarn('No choose_list defined. Will classify within the whole set. ')
        self.defprox = default_aggregation_func
    self.frame_scores = []
    self.prototxt = mypath+'/models/'+ self.dataset +'/tsn_bn_inception_rgb_deploy.prototxt'
    self.caffemodel = mypath+'/models/'+ self.dataset +'_split_'+str(self.split)+'_tsn_rgb_reference_bn_inception.caffemodel'
    self.net = CaffeNet(self.prototxt, self.caffemodel, self.device_id)

    self.ownvidscores = []
    # when I instantiate the classifier, the startedownvid is working already. this influences how vsmf_srv will behave, so it needs to be like this, I think.
    self.startedownvid = True
    self.lock = threading.Lock()

    #publishers
    self.label_fw_pub = FunnyPublisher("action_fw", self.actionlist, self.defprox)
    self.label_pub = FunnyPublisher("action", self.actionlist, self.defprox)
    self.ownlabel_pub = FunnyPublisher("action_own", self.actionlist, self.defprox)
    rospy.set_param('~alive',0.5)
    rospy.loginfo("waiting for callback from " +rospy.resolve_name('video_topic') +" to do anything")
Example #11
0
def eval_video(video):
    global net
    label = video[1]
    vid = video[0]

    video_frame_path = f_info[0][vid]
    if args.modality == 'rgb':
        cnt_indexer = 1
    elif args.modality == 'flow':
        cnt_indexer = 1
    elif args.modality == 'c3d_rgb':
        cnt_indexer = 1
    elif args.modality == 'c3d_flow':
        cnt_indexer = 1
    else:
        raise ValueError(args.modality)
    frame_cnt = f_info[cnt_indexer][vid]

    stack_depth = 0
    if args.modality == 'rgb':
        stack_depth = 1
    elif args.modality == 'flow':
        stack_depth = 5
    elif args.modality == 'c3d_rgb':
        stack_depth = args.depth
    elif args.modality == 'c3d_flow':
        stack_depth = args.depth
    else:
        raise ValueError('Invalid modality: ' + args.modality)

    step = 1.0 * (frame_cnt - stack_depth) / (args.num_frame_per_video - 1)
    # step = min(6.0, frame_cnt/10.0)
    if step > 0:
        frame_ticks = np.arange(1, 2 + frame_cnt - stack_depth, step)
    else:
        frame_ticks = [1] * args.num_frame_per_video

    frame_ticks = np.floor(frame_ticks)
    frame_ticks = frame_ticks.astype(int)
    #    assert(len(frame_ticks) == args.num_frame_per_video)

    frame_scores = []
    for tick in frame_ticks:
        if args.modality == 'rgb':
            name = '{}{:05d}.jpg'.format(args.rgb_prefix, tick)
            frame = cv2.imread(os.path.join(video_frame_path, name),
                               cv2.IMREAD_COLOR)
            scores = net.predict_single_frame([
                frame,
            ],
                                              score_name,
                                              frame_size=(340, 256),
                                              attention_name=attention_name)
            frame_scores.append(scores)
        if args.modality == 'flow':
            frame_idx = [
                min(frame_cnt, tick + offset) for offset in xrange(stack_depth)
            ]
            flow_stack = []
            for idx in frame_idx:
                x_name = '{}{:06d}.jpg'.format(args.flow_x_prefix, idx)
                y_name = '{}{:06d}.jpg'.format(args.flow_y_prefix, idx)
                flow_stack.append(
                    cv2.imread(os.path.join(video_frame_path, x_name),
                               cv2.IMREAD_GRAYSCALE))
                flow_stack.append(
                    cv2.imread(os.path.join(video_frame_path, y_name),
                               cv2.IMREAD_GRAYSCALE))
            scores = net.predict_single_flow_stack(
                flow_stack,
                score_name,
                frame_size=(340, 256),
                attention_name=attention_name)
            frame_scores.append(scores)
        if args.modality == 'c3d_flow':
            frame_idx = [
                min(frame_cnt, tick + offset) for offset in xrange(stack_depth)
            ]
            flow_stack = []
            for idx in frame_idx:
                x_name = '{}{:05d}.jpg'.format(args.flow_x_prefix, idx)
                y_name = '{}{:05d}.jpg'.format(args.flow_y_prefix, idx)
                flow_stack.append(
                    cv2.imread(os.path.join(video_frame_path, x_name),
                               cv2.IMREAD_GRAYSCALE))
                flow_stack.append(
                    cv2.imread(os.path.join(video_frame_path, y_name),
                               cv2.IMREAD_GRAYSCALE))
            scores = net.predict_single_c3d_flow_stack(flow_stack,
                                                       score_name,
                                                       frame_size=(170, 128))
            frame_scores.append(scores)
        if args.modality == 'c3d_rgb':
            frame_idx = [
                min(frame_cnt, tick + offset) for offset in xrange(stack_depth)
            ]
            c3d_stack = []
            i = 0
            skip = args.skip
            for idx in frame_idx:
                i = i + 1
                if (i % skip == 0):
                    x_name = '{}{:06d}.jpg'.format(args.c3d_prefix, idx)
                    img = cv2.imread(os.path.join(video_frame_path, x_name),
                                     cv2.IMREAD_COLOR)
                    c3d_stack.append(img)
            scores = net.predict_single_c3d_rgb_stack(c3d_stack,
                                                      score_name,
                                                      frame_size=(170, 128))
            frame_scores.append(scores)

    ii = 0
    if attention_name is not None:
        frame_attentions = [x[1] for x in frame_scores]
        frame_scores = [x[0] for x in frame_scores]

    if np.argmax(default_aggregation_func(frame_scores)) == label:
        ii = 1
    print 'video {0} pred {1} label {2} same {3} done'.format(
        vid, np.argmax(default_aggregation_func(frame_scores)), label, ii)
    sys.stdin.flush()
    if attention_name is None:
        return np.array(frame_scores), label
    else:
        return np.array(frame_scores), label, np.array(frame_attentions)
    num_worker = args.num_worker

    split_source(args.source_path, trunk_net_base, num_worker)
    write_proto_template(args.net_proto_template, num_worker, trunk_net_base)
    if len(gpu_list) > 1:
        cnn_worker = multiprocessing.Pool(len(gpu_list))
        raw_video_scores = cnn_worker.map(eval_video, gpu_list)
        video_scores = merge_worker_result(raw_video_scores)
    else:
        video_scores = eval_video(0)

    with open('{}.pickle'.format(args.save_scores), 'w') as fv_score:
        pickle.dump(video_scores, fv_score)

    video_pred = [
        np.argmax(default_aggregation_func(x[0], crop_agg=np.max))
        for x in video_scores
    ]
    video_labels = [x[1] for x in video_scores]
    video_names = [x[2] for x in video_scores]

    cf = confusion_matrix(video_labels, video_pred).astype(float)
    cls_cnt = cf.sum(axis=1)
    cls_hit = np.diag(cf)
    cls_acc = cls_hit / cls_cnt

    print cls_acc
    if args.check_result:
        cnt = 0
        for vid, video_label in enumerate(video_labels):
            if video_label != video_pred[vid]:
    diff_score = pickle.load(fmotion)

aligned_flow_score = [None for _ in xrange(len(flow_score))]
for mid, motion_sample in enumerate(motion_score):
    for fid, flow_sample in enumerate(flow_score):
        if motion_sample[2] == flow_sample[2]:
            aligned_flow_score[mid] = flow_sample

# RGB : RGB OFF : FLOW : FLOW OFF -- 1:1.5:0.8:1.8
# RGB : RGB OFF : (RGB DIFF : RGB DIFF OFF : RGB DIFF OFF 14)= 1:1.8:  (1:2:0.5)*0.8
# 0~25: Scores from Feature Generation Network
# 25~49: Score from OFF-sub-network on 7x7
# 49~73: Score from OFF-sub-network on 14x14
video_pred = [
    np.argmax(
        default_aggregation_func(
            x[0][:25, ...], normalization=False, crop_agg=np.max) * 1 +
        default_aggregation_func(
            x[0][25:49, ...], normalization=False, crop_agg=np.max) * 1.5 +
        default_aggregation_func(
            y[0][:25, ...], normalization=False, crop_agg=np.max) * 0.8 +
        default_aggregation_func(
            y[0][25:49, ...], normalization=False, crop_agg=np.max) * 1.8)
    for x, y, z in zip(motion_score, aligned_flow_score, diff_score)
]

video_labels = [x[1] for x in motion_score]
video_names = [x[2] for x in motion_score]

cf = confusion_matrix(video_labels, video_pred).astype(float)
cls_cnt = cf.sum(axis=1)
cls_hit = np.diag(cf)
Example #14
0
else:
    score_weights = args.score_weights
    if len(score_weights) != len(score_npz_files):
        raise ValueError(
            "Only {} weight specifed for a total of {} score files".format(
                len(score_weights), len(score_npz_files)))

score_list = [x['scores'][:, 0] for x in score_npz_files]
label_list = [x['labels'] for x in score_npz_files]

# score_aggregation
agg_score_list = []
for score_vec in score_list:
    agg_score_vec = [
        default_aggregation_func(x,
                                 normalization=False,
                                 crop_agg=getattr(np, args.crop_agg))
        for x in score_vec
    ]
    agg_score_list.append(np.array(agg_score_vec))

final_scores = np.zeros_like(agg_score_list[0])
for i, agg_score in enumerate(agg_score_list):
    final_scores += agg_score * score_weights[i]

# generate prediction
predict = np.argmax(final_scores, axis=1)
label = label_list[0]
num_classes = label.max() + 1

analysis_file = args.cf_analysis_file
Example #15
0
def main(argss):
    def build_net():
        global net
        my_id = multiprocessing.current_process()._identity[0] \
            if args.num_worker > 1 else 1
        if gpu_list is None:
            net = CaffeNet(args.net_proto, args.net_weights, my_id - 1)
        else:
            net = CaffeNet(args.net_proto, args.net_weights,
                           gpu_list[my_id - 1])

    def eval_video(video):
        global net
        label = video[1]
        vid = video[0]

        video_frame_path = f_info[0][vid]
        if args.modality == 'rgb':
            cnt_indexer = 1
        elif args.modality == 'flow':
            cnt_indexer = 2
        else:
            raise ValueError(args.modality)
        frame_cnt = f_info[cnt_indexer][vid]

        stack_depth = 0
        if args.modality == 'rgb':
            stack_depth = 1
        elif args.modality == 'flow':
            stack_depth = 5

        step = (frame_cnt - stack_depth) / (args.num_frame_per_video - 1)
        if step > 0:
            frame_ticks = range(
                1,
                min((2 + step * (args.num_frame_per_video - 1)),
                    frame_cnt + 1), step)
        else:
            frame_ticks = [1] * args.num_frame_per_video

        assert (len(frame_ticks) == args.num_frame_per_video)

        frame_scores = []
        for tick in frame_ticks:
            if args.modality == 'rgb':
                name = '{}{:05d}.jpg'.format(args.rgb_prefix, tick)
                frame = cv2.imread(os.path.join(video_frame_path, name),
                                   cv2.IMREAD_COLOR)
                try:
                    scores = net.predict_single_frame([
                        frame,
                    ],
                                                      score_name,
                                                      frame_size=(340, 256))
                except:
                    print(os.path.join(video_frame_path, name))
                    Image(os.path.join(video_frame_path, name))

                frame_scores.append(scores)
            if args.modality == 'flow':
                frame_idx = [
                    min(frame_cnt, tick + offset)
                    for offset in xrange(stack_depth)
                ]
                flow_stack = []
                for idx in frame_idx:
                    x_name = '{}{:05d}.jpg'.format(args.flow_x_prefix, idx)
                    y_name = '{}{:05d}.jpg'.format(args.flow_y_prefix, idx)
                    flow_stack.append(
                        cv2.imread(os.path.join(video_frame_path, x_name),
                                   cv2.IMREAD_GRAYSCALE))
                    flow_stack.append(
                        cv2.imread(os.path.join(video_frame_path, y_name),
                                   cv2.IMREAD_GRAYSCALE))
                scores = net.predict_single_flow_stack(flow_stack,
                                                       score_name,
                                                       frame_size=(340, 256))
                frame_scores.append(scores)

        print('video {} done'.format(vid))
        sys.stdin.flush()
        return np.array(frame_scores), label

    global args
    rospy.init_node('image_converter', anonymous=True)
    ic = image_converter()

    #####A LOT of those parameters will not be neccessary anymore, with the whole splitting into rgb and flow and with my decision to remove some of the multiprocessing stuff. anyway, so far will keep the mess...
    sys.argv = ['','hmdb51','1','rgb','/temporal-segment-networks/my_of/','models/hmdb51/tsn_bn_inception_rgb_deploy.prototxt',\
              'models/hmdb51_split_1_tsn_rgb_reference_bn_inception.caffemodel' ,  '--num_worker', '1', '--save_scores', 'myscores_fre.txt']

    parser = argparse.ArgumentParser()
    parser.add_argument('dataset', type=str, choices=['ucf101', 'hmdb51'])
    parser.add_argument('split',
                        type=int,
                        choices=[1, 2, 3],
                        help='on which split to test the network')
    parser.add_argument('modality', type=str, choices=['rgb', 'flow'])
    parser.add_argument('frame_path',
                        type=str,
                        help="root directory holding the frames")
    parser.add_argument('net_proto', type=str)
    parser.add_argument('net_weights', type=str)
    parser.add_argument('--rgb_prefix',
                        type=str,
                        help="prefix of RGB frames",
                        default='img_')
    parser.add_argument('--flow_x_prefix',
                        type=str,
                        help="prefix of x direction flow images",
                        default='flow_x_')
    parser.add_argument('--flow_y_prefix',
                        type=str,
                        help="prefix of y direction flow images",
                        default='flow_y_')
    parser.add_argument('--num_frame_per_video',
                        type=int,
                        default=25,
                        help="prefix of y direction flow images")
    parser.add_argument('--save_scores',
                        type=str,
                        default=None,
                        help='the filename to save the scores in')
    parser.add_argument('--num_worker', type=int, default=1)
    parser.add_argument("--caffe_path",
                        type=str,
                        default='./lib/caffe-action/',
                        help='path to the caffe toolbox')
    parser.add_argument("--gpus",
                        type=int,
                        nargs='+',
                        default=None,
                        help='specify list of gpu to use')
    args = parser.parse_args()
    print(args)

    sys.path.append(os.path.join(args.caffe_path, 'python'))
    from pyActionRecog import parse_directory
    from pyActionRecog import parse_split_file
    from pyActionRecog.utils.video_funcs import default_aggregation_func
    from pyActionRecog.action_caffe import CaffeNet

    # build neccessary information
    print(args.dataset)
    split_tp = parse_split_file(args.dataset)
    f_info = parse_directory(args.frame_path, args.rgb_prefix,
                             args.flow_x_prefix, args.flow_y_prefix)

    gpu_list = args.gpus

    eval_video_list = split_tp[args.split - 1][1]

    score_name = 'fc-action'

    if 1:
        eval_video_list = [('ua', 1)]
        print(eval_video_list[0])
        print(f_info)

    if args.num_worker > 1:
        pool = multiprocessing.Pool(args.num_worker, initializer=build_net)
        video_scores_rgb = pool.map(eval_video, eval_video_list)
    else:
        build_net()
        video_scores_rgb = map(eval_video, eval_video_list)

    video_pred = [
        np.argmax(default_aggregation_func(x[0])) for x in video_scores_rgb
    ]
    print(video_pred)

    try:
        rospy.spin()
    except KeyboardInterrupt:
        print("Shutting down")
    cv2.destroyAllWindows()
Example #16
0
def eval_video(video):
    global net
    label = video[1]
    vid = video[0]

    video_frame_path = f_info[0][vid]
    if modality == 'rgb':
        cnt_indexer = 1
    elif modality == 'flow':
        cnt_indexer = 2
    else:
        raise ValueError(modality)
    frame_cnt = f_info[cnt_indexer][vid]

    stack_depth = 0
    if modality == 'rgb':
        stack_depth = 1
    elif modality == 'flow':
        stack_depth = 5

    step = (frame_cnt - stack_depth) / (args.num_frame_per_video - 1)
    if step > 0:
        frame_ticks = range(
            1, min((2 + step * (args.num_frame_per_video - 1)), frame_cnt + 1),
            step)
    else:
        frame_ticks = [1] * args.num_frame_per_video

    assert (len(frame_ticks) == args.num_frame_per_video)

    frame_scores = []
    for tick in frame_ticks:
        if modality == 'rgb':
            name = '{}{:05d}.jpg'.format(args.rgb_prefix, tick)
            frame = cv2.imread(os.path.join(video_frame_path, name),
                               cv2.IMREAD_COLOR)
            scores = net.predict_single_frame([
                frame,
            ],
                                              score_name,
                                              frame_size=(340, 256))
            frame_scores.append(scores)
        if modality == 'flow':
            frame_idx = [
                min(frame_cnt, tick + offset) for offset in xrange(stack_depth)
            ]
            flow_stack = []
            for idx in frame_idx:
                x_name = '{}{:05d}.jpg'.format(args.flow_x_prefix, idx)
                y_name = '{}{:05d}.jpg'.format(args.flow_y_prefix, idx)
                flow_stack.append(
                    cv2.imread(os.path.join(video_frame_path, x_name),
                               cv2.IMREAD_GRAYSCALE))
                flow_stack.append(
                    cv2.imread(os.path.join(video_frame_path, y_name),
                               cv2.IMREAD_GRAYSCALE))
            scores = net.predict_single_flow_stack(flow_stack,
                                                   score_name,
                                                   frame_size=(340, 256))
            frame_scores.append(scores)

    print 'video {} done'.format(vid)
    print np.argmax(default_aggregation_func(frame_scores))
    print label

    sys.stdin.flush()
    return np.array(frame_scores), label
Example #17
0
    print 'ii', iii
    score_npz_files = [np.load(x) for x in score_files[iii]]
    score_list = [x['scores'][:, 0] for x in score_npz_files]
    label_list = [x['labels'] for x in score_npz_files]

    for ii in xrange(11):
        print ii
        score_weights = [ii, 10 - ii]
        # label verification
        # score_aggregation
        agg_score_list = []
        for score_vec in score_list:
            agg_score_vec = [
                default_aggregation_func(x,
                                         normalization=False,
                                         crop_agg=getattr(np, 'mean'))
                for x in score_vec
            ]
            #print len(agg_score_vec)
            agg_score_list.append(np.array(agg_score_vec))

        final_scores = np.zeros_like(agg_score_list[0])
        for i, agg_score in enumerate(agg_score_list):
            final_scores += agg_score * score_weights[i]
        # accuracy
        acc = mean_class_accuracy(final_scores, label_list[0])
        #print acc
        print 'Final accuracy {:02f}%'.format(acc * 100)
        accuracy[iii].append(np.array(acc))
        #print accuracy
Example #18
0
    score_weights = args.score_weights
    if len(score_weights) != len(score_npz_files):
        raise ValueError(
            "Only {} weight specifed for a total of {} score files".format(
                len(score_weights), len(score_npz_files)))

score_list = [x['scores'][:, 0] for x in score_npz_files
              ]  # each score (test_num, (25,10, class_num))
label_list = [x['labels'] for x in score_npz_files]

# score_aggregation
agg_score_list = []
for score_vec in score_list:
    agg_score_vec = [
        default_aggregation_func(x.reshape((25, 10, -1)),
                                 normalization=False,
                                 crop_agg=getattr(np, args.crop_agg))
        for x in score_vec
    ]
    agg_score_list.append(np.array(agg_score_vec))

final_scores = np.zeros_like(agg_score_list[0])
for i, agg_score in enumerate(agg_score_list):
    final_scores += agg_score * score_weights[i]  # size: (test_num, class_num)

# output: confusion matrix, combined cf, accuracy in total
# confusion matrix
video_pred = [np.argmax(x) for x in final_scores]
video_labels = label_list[0]

cf = confusion_matrix(video_labels, video_pred).astype(float)
Example #19
0
  def __init__(self):
    global mypath
    # services provided
    self.reconfig_srv_ = rospy.Service('reconf_split',split, self.reconfig_srv)
    self.start_vidscores = rospy.Service('start_vidscores', Empty, self.start_vidscores)
    self.stop_vidscores = rospy.Service('stop_vidscores', Empty, self.stop_vidscores)
    # topics published
    self.image_pub = rospy.Publisher("class_overlay_image_raw",Image, queue_size=1)
    self.label_fw_pub = rospy.Publisher("action_fw", String, queue_size=1)
    self.label_pub = rospy.Publisher("action", String, queue_size=1)
    self.ownlabel_pub = rospy.Publisher("action_own", String, queue_size=1)
    # parameters
    self.dataset = rospy.get_param('~dataset','hmdb51')
    self.device_id = rospy.get_param('~device_id',0)
    self.split = rospy.get_param('~split',1)
    self.videotopic = rospy.get_param('~video_topic','videofiles/image_raw')
    self.classwindow = rospy.get_param('~classification_frame_window',50)
    self.actionlist = rospy.get_param('~action_list', ['brush_hair','cartwheel','catch','chew','clap','climb','climb_stairs','dive','draw_sword','dribble','drink','eat','fall_floor','fencing','flic_flac','golf','handstand','hit','hug','jump','kick','kick_ball','kiss','laugh','pick','pour','pullup','punch','push','pushup','ride_bike','ride_horse','run','shake_hands','shoot_ball','shoot_bow','shoot_gun','sit','situp','smile','smoke','somersault','stand','swing_baseball','sword','sword_exercise','talk','throw','turn','walk','wave'])
    if type(self.actionlist) is str:
        self.actionlist = eval(self.actionlist)
        self.actionlist.sort()
    self.chooselist = rospy.get_param('~choose_list',[])
    if type(self.chooselist) is str:
        self.chooselist = eval(self.chooselist)
        self.chooselist.sort()
    ###probably should use the nice rosparam thingy here to avoid these problems...
    self.framesize_width = rospy.get_param('~framesize_width',340)
    self.framesize_height = rospy.get_param('~framesize_height',256)

    # topics subscribed
    self.image_sub = rospy.Subscriber(self.videotopic, Image,self.callback,queue_size=1)

    # internals
    self.bridge = CvBridge()
    from pyActionRecog.utils.video_funcs import default_aggregation_func
    if self.chooselist:
        keepi = []
        rospy.logwarn('defined own subset of actions! classification will be reduced to smaller set of choices, namely:'+str(self.chooselist))
        #print(range(0,len(self.actionlist)))
        for i in range(0,len(self.actionlist)):
             for j in range(0, len(self.chooselist)):
                 #print(self.actionlist[i])
                 #print( self.chooselist[j])
                 if self.actionlist[i] == self.chooselist[j]:
                     keepi.append(i)
        tobedeleted = set(range(0,len(self.actionlist)))-set(keepi)
        #print(tobedeleted)
        self.defprox = lambda x: np.delete(default_aggregation_func(x),list(tobedeleted))
        self.actionlist = self.chooselist
    else:
        rospy.logwarn('No choose_list defined. Will classify within the whole set. ')
        self.defprox = default_aggregation_func
    self.frame_scores = []
    self.prototxt = mypath+'/models/'+ self.dataset +'/tsn_bn_inception_rgb_deploy.prototxt'
    self.caffemodel = mypath+'/models/'+ self.dataset +'_split_'+str(self.split)+'_tsn_rgb_reference_bn_inception.caffemodel'
    self.net = CaffeNet(self.prototxt, self.caffemodel, self.device_id)
    self.font = cv2.FONT_HERSHEY_SIMPLEX
    #print('hio')
    self.ownvidscores = []
    # when I instantiate the classifier, the startedownvid is working already. this influences how vsmf_srv will behave, so it needs to be like this, I think.
    self.startedownvid = True
    self.lock = threading.Lock()
    rospy.loginfo("waiting for callback from " + self.videotopic +" to do anything")
                    x_name = '{}{:05d}.jpg'.format(flow_x_prefix, idx)
                    y_name = '{}{:05d}.jpg'.format(flow_y_prefix, idx)
                    flow_stack.append(
                        cv2.imread(os.path.join(video_frame_path, x_name),
                                   cv2.IMREAD_GRAYSCALE))
                    flow_stack.append(
                        cv2.imread(os.path.join(video_frame_path, y_name),
                                   cv2.IMREAD_GRAYSCALE))
                scores = net.predict_single_flow_stack(flow_stack,
                                                       score_name,
                                                       frame_size=(340, 256))
                frame_scores.append(scores)

    print 'video {} done'.format(videoname)
    sys.stdin.flush()
    final = {'seg_swin': seg_swin_tsn, 'res': res}
    scipy.io.savemat('%s/seg_swin.m' % final_path, final, appendmat=False)
    return np.array(frame_scores)


print("OPERATING PAD 2 SECONDS")
sport = sys.argv[1]
for modality in ['flow', 'rgb']:
    net_weights = "/media/data/mtriet/temporal-segment-networks/models/huawei_%s/%s_%s.caffemodel" % (
        sport, sport, modality)
    net_proto = "/media/data/mtriet/temporal-segment-networks/models/huawei_%s/tsn_bn_inception_%s_deploy.prototxt" % (
        sport, modality)
    build_net(net_proto, net_weights)
    video_scores = eval_video(sport, modality, 2, 2)
    video_pred = np.argmax(default_aggregation_func(video_scores))
Example #21
0

if args.num_worker > 1:
    pool = multiprocessing.Pool(args.num_worker, initializer=build_net)
    video_scores = pool.map(eval_video, eval_video_list)
else:
    build_net()
    video_scores = map(eval_video, eval_video_list)
'''
print 'video_scores'
print video_scores
print video_scores[0][0][0]
print np.array(video_scores).shape
print np.array(video_scores).shape()
'''
video_pred = [np.argmax(default_aggregation_func(x[0]))
              for x in video_scores]  #预测结果

print 'video_pred:'
print video_pred
video_labels = [x[1] for x in video_scores]  #真实标签
'''
cf = confusion_matrix(video_labels, video_pred).astype(float)

cls_cnt = cf.sum(axis=1)
cls_hit = np.diag(cf)

cls_acc = cls_hit/cls_cnt

print cls_acc