Esempio n. 1
0
def without_segmentation_sequence_test(metadata_path, model_path, maxlen=200):
    data_dim = 33
    label_list = [
        'null', 'reaching', 'moving', 'placing', 'opening', 'cleaning',
        'closing', 'pouring', 'eating', 'drinking'
    ]
    path_skeleton = metadata_path + 'aligned_skeletons/'
    path_label = metadata_path + 'sequence_label.json'
    with open(path_label, 'r') as f:
        sequence_label = json.load(f)
    skeleton_list = glob.glob(path_skeleton + '*.mat')
    video_index = 5
    sequence_id = skeleton_list[video_index].split('/')[-1][:-4]
    cur_skeleton = np.transpose(
        scipy.io.loadmat(skeleton_list[video_index])['skeleton'])
    num_frame = 0
    for sequence_cut in sequence_label:
        if sequence_id == sequence_cut['sequence_id']:
            if sequence_cut['end_frame'] > num_frame:
                num_frame = sequence_cut['end_frame']
    y_all = np.zeros(num_frame)
    x_all = np.zeros((num_frame, maxlen, data_dim))
    for sequence_cut in sequence_label:
        if sequence_id == sequence_cut['sequence_id']:
            start_frame = sequence_cut['start_frame']
            end_frame = sequence_cut['end_frame']
            # print label_list.index(str(sequence_cut['sequence_label']))
            y_all[start_frame:end_frame] = label_list.index(
                sequence_cut['sequence_label'])
    for frame in range(num_frame):
        if frame < maxlen:
            # x_temp = np.zeros((data_dim, frame+1))
            x_temp = skeleton_prune(cur_skeleton[:, :frame + 1])
            x_temp = np.reshape(x_temp, (data_dim, frame + 1))
        else:
            x_temp = skeleton_prune(cur_skeleton[:, frame -
                                                 (maxlen - 1):frame + 1])
        # print type(x_temp)
        x_all[frame, :, :] = np.transpose(
            sequence.pad_sequences(x_temp, dtype=float, maxlen=maxlen))
    model = load_model(model_path)
    prediction = model.predict(x_all)
    predict_result = np.zeros(num_frame)
    for i in range(num_frame):
        predict_result[i] = int(
            list(prediction[i, :]).index(max(prediction[i, :])))
        print label_list[int(y_all[i])], label_list[int(predict_result[i])]
    vizutil.plot_segmentation(
        [y_all, predict_result, (y_all - predict_result) == 0], frame)
Esempio n. 2
0
def without_segmentation_sequence_test_per_frame(metadata_path, model_path):
    data_dim = 33
    label_list = [
        'null', 'reaching', 'moving', 'placing', 'opening', 'cleaning',
        'closing', 'pouring', 'eating', 'drinking'
    ]
    path_skeleton = metadata_path + 'aligned_skeletons/'
    path_label = metadata_path + 'sequence_label.json'
    with open(path_label, 'r') as f:
        sequence_label = json.load(f)
    skeleton_list = glob.glob(path_skeleton + '*.mat')
    video_index = 60
    sequence_id = skeleton_list[video_index].split('/')[-1][:-4]
    cur_skeleton = np.transpose(
        scipy.io.loadmat(skeleton_list[video_index])['skeleton'])
    num_frame = 0
    for sequence_cut in sequence_label:
        if sequence_id == sequence_cut['sequence_id']:
            if sequence_cut['end_frame'] > num_frame:
                num_frame = sequence_cut['end_frame']
    y_all = np.zeros(num_frame)
    x_all = np.zeros((num_frame, data_dim))
    for sequence_cut in sequence_label:
        if sequence_id == sequence_cut['sequence_id']:
            start_frame = sequence_cut['start_frame']
            end_frame = sequence_cut['end_frame']
            y_all[start_frame:end_frame] = label_list.index(
                sequence_cut['sequence_label'])
    for frame in range(num_frame):
        # print frame
        x_temp = skeleton_prune(cur_skeleton[:, frame])
        x_all[frame, :] = x_temp
    model = load_model(model_path)
    prediction = model.predict(x_all)
    predict_result = np.zeros(num_frame)
    correct_num = 0
    for i in range(num_frame):
        predict_result[i] = int(
            list(prediction[i, :]).index(max(prediction[i, :])))
        if predict_result[i] == int(y_all[i]):
            correct_num += 1
        print i, label_list[int(y_all[i])], label_list[int(predict_result[i])]
    vizutil.plot_segmentation(
        [y_all, predict_result, (y_all - predict_result) == 0], frame)
    print 'accuracy', float(correct_num) / num_frame
Esempio n. 3
0
def without_segmentation_sequence_test_per_frame_sequential(
        data_root, metadata_path):
    model_path = metadata_path + 'models/cnn/'
    relative_path = 'flipped/all/activity_corpus.p'
    if os.path.exists(metadata_path + relative_path):
        activity_corpus = pickle.load(open(metadata_path + relative_path,
                                           'rb'))
    model_name = 'affordance_mixed_feature_epoch_30_with_dropout_3_layer_with_weight_1.4_with_initialization.h5'
    result_path = metadata_path + 'data/affordance_result/'
    tpg_id = dict()
    for activity, tpgs in activity_corpus.items():
        for tpg in tpgs:
            tpg_id[tpg.id] = tpg.terminals
    print 'successful loading the model!'
    if not os.path.exists(result_path):
        os.mkdir(result_path)
    test_set = [1, 3, 4, 5]
    model = sequential_model(weights_path=model_path + model_name)
    for subject_index in test_set:
        if not os.path.exists(result_path + 'subject' + str(subject_index)):
            os.mkdir(result_path + 'subject' + str(subject_index))
        subject_path = metadata_path + 'data/subject' + str(subject_index)
        frame_count_path = subject_path + '/' + 'affordance_frame_count.json'
        subject = 'Subject' + str(subject_index) + '_rgbd_images/'
        action = os.listdir(data_root + subject)
        gt_path = subject_path + '/' + 'affordance_gt.npy'
        feature_path = subject_path + '/' + 'affordance_sequential_feature.npy'
        label_path = subject_path + '/' + 'affordance_object_label_feature.npy'
        gt_all = np.load(gt_path)
        feature_all = np.load(feature_path)
        label_all = np.load(label_path)
        with open(frame_count_path, 'r') as f:
            frame_count = json.load(f)
        f.close()
        prediction = model.predict([feature_all, label_all])
        for action_category in action:
            video = os.listdir(data_root + subject + action_category)
            for sequence_id in video:
                video_prediction = list()
                video_gt = list()
                index = 0
                num_obj = len(frame_count[sequence_id]['object'])
                sequence_prediction = np.zeros(
                    (num_obj, frame_count[sequence_id]['length'], 12))
                for obj_index in frame_count[sequence_id]['frame_record']:
                    add_index = 0
                    for sequence_list in obj_index:
                        if add_index == 0:
                            video_prediction.append(prediction[
                                sequence_list[0]:sequence_list[1], :])
                            video_gt.append(
                                gt_all[sequence_list[0]:sequence_list[1], :])
                        else:
                            video_prediction[index] = np.concatenate(
                                (video_prediction[index], prediction[
                                    sequence_list[0]:sequence_list[1], :]),
                                axis=0)
                            video_gt[index] = np.concatenate(
                                (video_gt[index],
                                 gt_all[sequence_list[0]:sequence_list[1], :]),
                                axis=0)
                        add_index += 1
                    frame_length = frame_count[sequence_id]['length']
                    if frame_length - video_gt[index].shape[0] != 0:
                        video_prediction[index] = np.concatenate((fill_in(
                            frame_length - video_prediction[index].shape[0],
                            12, 0.8), video_prediction[index]),
                                                                 axis=0)
                        video_gt[index] = np.concatenate(
                            (fill_in(frame_length - video_gt[index].shape[0],
                                     12, 0.8), video_gt[index]),
                            axis=0)
                    predict_result = np.zeros(frame_length)
                    y_all = np.zeros(frame_length)
                    correct_num = 0
                    for i in range(frame_length):
                        predict_result[i] = int(
                            list(video_prediction[index][i, :]).index(
                                max(video_prediction[index][i, :])))
                        y_all[i] = np.argmax(video_gt[index][i, :])
                        if y_all[i] == predict_result[i]:
                            correct_num += 1
                    vizutil.plot_segmentation(
                        [y_all, predict_result,
                         (y_all - predict_result) == 0], frame_length)
                    plt.savefig(
                        result_path + 'subject' + str(subject_index) + '/' +
                        sequence_id + '_' +
                        str(frame_count[sequence_id]['object'][index]) +
                        str(index) + '_' +
                        str(float(correct_num) / frame_length) + '.png')
                    plt.close()
                    sequence_prediction[index, :, :] = video_prediction[index]
                    index += 1
                np.save(
                    open(
                        result_path + 'subject' + str(subject_index) + '/' +
                        sequence_id + '.npy', 'w'), sequence_prediction)
Esempio n. 4
0
def validate(val_loader, model, args, test=False):
    def compute_accuracy(gt_results, results, metric='micro'):
        return sklearn.metrics.precision_recall_fscore_support(
            gt_results, results, labels=range(10), average=metric)

    batch_time = logutil.AverageMeter()
    baseline_acc_ratio = logutil.AverageMeter()
    subactivity_acc_ratio = logutil.AverageMeter()
    seg_pred_acc_ratio = logutil.AverageMeter()
    frame_pred_acc_ratio = logutil.AverageMeter()

    all_baseline_detections = list()
    all_gt_detections = list()
    all_detections = list()
    all_gt_seg_predictions = list()
    all_gt_frame_predictions = list()
    all_seg_predictions = list()
    all_frame_predictions = list()

    # switch to evaluate mode
    model.eval()

    end_time = time.time()
    for i, (features, labels, probs, total_lengths, ctc_labels, ctc_lengths,
            activities, sequence_ids) in enumerate(val_loader):
        features = utils.to_variable(features, args.cuda)
        labels = utils.to_variable(labels, args.cuda)

        total_lengths = torch.autograd.Variable(total_lengths)

        # Inference
        model_outputs = model(features)
        pred_labels, batch_earley_pred_labels, batch_tokens, batch_seg_pos = inference(
            model_outputs, activities, sequence_ids, ctc_labels, args)

        # Visualize results
        for batch_i in range(labels.size()[1]):
            vizutil.plot_segmentation(
                [
                    labels[:, batch_i].squeeze(),
                    pred_labels[:, batch_i].squeeze(),
                    batch_earley_pred_labels[batch_i]
                ],
                int(total_lengths[batch_i]),
                filename=os.path.join(
                    args.tmp_root, 'visualize', 'segmentation', 'cad',
                    '{}_{}.pdf'.format(activities[batch_i],
                                       sequence_ids[batch_i])),
                border=False,
                vmax=len(datasets.cad_metadata.subactivities))

        # Evaluation
        # Frame-wise detection
        baseline_detections = pred_labels.cpu().data.numpy().flatten().tolist()
        gt_detections = labels.cpu().data.numpy().flatten().tolist()
        detections = [
            l for pred_labels in batch_earley_pred_labels
            for l in pred_labels.tolist()
        ]
        all_baseline_detections.extend(baseline_detections)
        all_gt_detections.extend(gt_detections)
        all_detections.extend(detections)
        baseline_micro_result = compute_accuracy(gt_detections,
                                                 baseline_detections)
        subact_micro_result = compute_accuracy(gt_detections, detections)

        gt_seg_predictions, gt_frame_predictions, seg_predictions, frame_predictions = predict(
            activities, total_lengths, labels, ctc_labels, batch_tokens,
            batch_seg_pos)
        all_gt_seg_predictions.extend(gt_seg_predictions)
        all_gt_frame_predictions.extend(gt_frame_predictions)
        all_seg_predictions.extend(seg_predictions)
        all_frame_predictions.extend(frame_predictions)
        seg_pred_result = compute_accuracy(gt_seg_predictions, seg_predictions)
        frame_pred_result = compute_accuracy(gt_frame_predictions,
                                             frame_predictions)

        baseline_acc_ratio.update(baseline_micro_result[0],
                                  torch.sum(total_lengths).data[0])
        subactivity_acc_ratio.update(subact_micro_result[0],
                                     torch.sum(total_lengths).data[0])
        seg_pred_acc_ratio.update(seg_pred_result[0],
                                  torch.sum(total_lengths).data[0])
        frame_pred_acc_ratio.update(frame_pred_result[0],
                                    len(all_gt_frame_predictions))

        # Measure elapsed time
        batch_time.update(time.time() - end_time)
        end_time = time.time()

    print(' * Baseline Accuracy Ratio {base_acc.avg:.3f}; '.format(
        base_acc=baseline_acc_ratio))
    print(
        ' * Detection Accuracy Ratio {act_acc.avg:.3f}; Segment Prediction Accuracy Ratio Batch Avg {seg_pred_acc.avg:.3f}; Frame Prediction Accuracy Ratio Batch Avg {frame_pred_acc.avg:.3f}; Time {b_time.avg:.3f}'
        .format(act_acc=subactivity_acc_ratio,
                seg_pred_acc=seg_pred_acc_ratio,
                frame_pred_acc=frame_pred_acc_ratio,
                b_time=batch_time))
    print(
        compute_accuracy(all_gt_detections,
                         all_baseline_detections,
                         metric='macro'))
    print(compute_accuracy(all_gt_detections, all_detections, metric='macro'))
    print(
        compute_accuracy(all_gt_seg_predictions,
                         all_seg_predictions,
                         metric='macro'))
    print(
        compute_accuracy(all_gt_frame_predictions,
                         all_frame_predictions,
                         metric='macro'))

    confusion_matrix = sklearn.metrics.confusion_matrix(
        all_gt_detections,
        all_detections,
        labels=range(len(datasets.cad_metadata.subactivities)))
    vizutil.plot_confusion_matrix(confusion_matrix,
                                  datasets.cad_metadata.subactivities[:],
                                  normalize=True,
                                  title='',
                                  filename=os.path.join(
                                      args.tmp_root, 'visualize', 'confusion',
                                      'cad', 'detection.pdf'))
    confusion_matrix = sklearn.metrics.confusion_matrix(
        all_gt_frame_predictions,
        all_frame_predictions,
        labels=range(len(datasets.cad_metadata.subactivities)))
    vizutil.plot_confusion_matrix(confusion_matrix,
                                  datasets.cad_metadata.subactivities[:],
                                  normalize=True,
                                  title='',
                                  filename=os.path.join(
                                      args.tmp_root, 'visualize', 'confusion',
                                      'cad', 'prediction_frame.pdf'))
    confusion_matrix = sklearn.metrics.confusion_matrix(
        all_gt_seg_predictions,
        all_seg_predictions,
        labels=range(len(datasets.cad_metadata.subactivities)))
    vizutil.plot_confusion_matrix(confusion_matrix,
                                  datasets.cad_metadata.subactivities[:],
                                  normalize=True,
                                  title='',
                                  filename=os.path.join(
                                      args.tmp_root, 'visualize', 'confusion',
                                      'cad', 'prediction_seg.pdf'))

    return 1.0 - subactivity_acc_ratio.avg
Esempio n. 5
0
def without_segmentation_sequence_test_per_frame_sequential(metadata_path):
    label_list = [
        'null', 'reaching', 'moving', 'placing', 'opening', 'cleaning',
        'closing', 'pouring', 'eating', 'drinking'
    ]
    test_path = metadata_path + 'data/test'
    model_path = metadata_path + 'models/cnn/'
    model_name = 'mixed_feature_last_try_epoch_150_layer_3_with_initialization.h5'
    result_path = metadata_path + 'data/subactivity_result/'
    model = Sequential()
    # final_model.add(merged)
    model.add(Dense(512, init=my_init, input_dim=1452, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(128, activation='relu', init=my_init))
    model.add(Dropout(0.5))
    model.add(Dense(32, activation='relu', init=my_init))
    model.add(Dense(10, init=my_init))
    model.add(Activation('softmax'))
    model.load_weights(model_path + model_name)
    print 'successful loading the model!'
    if not os.path.exists(result_path):
        os.mkdir(result_path)
    if not os.path.exists(test_path):
        os.mkdir(test_path)
    test_set = [1, 3, 4, 5]
    for subject_index in test_set:
        path_test = list()
        subject_path = metadata_path + 'data/subject' + str(subject_index)
        img_path = subject_path + '/' + 'img_path.txt'
        gt_path = subject_path + '/' + 'subactivity_gt.npy'
        frame_count_path = subject_path + '/' + 'frame_count.json'
        sk_sq_path = subject_path + '/' + 'sk_sq.npy'
        with open(img_path, 'r') as f:
            path_test.extend(f.readlines())
        f.close()
        label_all = np.load(gt_path)
        num_frame = len(path_test)
        y_all = np.zeros(num_frame)
        sk_sq = np.load(sk_sq_path)
        prediction = model.predict(sk_sq)
        predict_result = np.zeros(num_frame)
        print 'successful predicting the data!'
        for i in range(num_frame):
            predict_result[i] = int(
                list(prediction[i, :]).index(max(prediction[i, :])))
            y_all[i] = np.argmax(label_all[i, :])
        with open(frame_count_path, 'r') as f:
            video_count = json.load(f)
        for video in video_count:
            print video
            correct_num = 0
            start_num = video_count[video]['start_num']
            end_num = video_count[video]['end_num']
            for j in range(end_num - start_num):
                if int(predict_result[start_num + j]) == int(y_all[start_num +
                                                                   j]):
                    correct_num += 1
            vizutil.plot_segmentation([
                y_all[start_num:end_num], predict_result[start_num:end_num],
                (y_all[start_num:end_num] - predict_result[start_num:end_num])
                == 0
            ], end_num - start_num)
            plt.savefig(result_path + 'subject' + str(subject_index) + '/' +
                        video + '_' +
                        str(float(correct_num) /
                            (end_num - start_num)) + '.png')
            plt.close()
            cm = sklearn.metrics.confusion_matrix(
                y_all[start_num:end_num],
                predict_result[start_num:end_num],
                labels=range(10))
            vizutil.plot_confusion_matrix(cm,
                                          classes=label_list,
                                          normalize=True,
                                          filename=result_path + 'subject' +
                                          str(subject_index) + '/' + video +
                                          '_confusion.png')
            np.save(
                open(
                    result_path + 'subject' + str(subject_index) + '/' +
                    video + '.npy', 'w'), prediction[start_num:end_num, :])
        cm = sklearn.metrics.confusion_matrix(y_all,
                                              predict_result,
                                              labels=range(10))
        vizutil.plot_confusion_matrix(cm,
                                      classes=label_list,
                                      normalize=True,
                                      filename=result_path + 'subject' +
                                      str(subject_index) + '/' +
                                      'a_confusion_all.png')
Esempio n. 6
0
def without_segmentation_sequence_test_per_frame_vgg16(metadata_path):
    nb_classes = 10
    batch_size = 1
    test_path = metadata_path + 'data/test'
    model_path = metadata_path + 'models/cnn/'
    model_name = 'vgg_tune_subactivity_train_134_learning_rate_-5_.h5'
    result_path = metadata_path + 'data/subactivity_result/'
    model = vgg_16(model_path + model_name, nb_classes)
    print 'successful loading the model!'
    if not os.path.exists(result_path):
        os.mkdir(result_path)
    if not os.path.exists(test_path):
        os.mkdir(test_path)
    test_set = [5]
    for subject_index in test_set:
        path_test = list()
        index = 0
        subject_path = metadata_path + 'data/subject' + str(subject_index)
        img_path = subject_path + '/' + 'img_path.txt'
        gt_path = subject_path + '/' + 'subactivity_gt.npy'
        bdb_path = subject_path + '/' + 'bdb_gt.npy'
        frame_count_path = subject_path + '/' + 'frame_count.json'
        with open(img_path, 'r') as f:
            path_test.extend(f.readlines())
        f.close()
        if index == 0:
            label_all = np.load(gt_path)
        else:
            label_all = np.concatenate((label_all, np.load(open(gt_path))),
                                       axis=0)
        index += 1
        num_frame = len(path_test)
        print num_frame - (num_frame // batch_size) * batch_size
        y_all = np.zeros(num_frame)
        test_generator = img_from_list_test(batch_size, img_path, bdb_path)
        prediction = model.predict_generator(test_generator,
                                             val_samples=num_frame)
        predict_result = np.zeros(num_frame)
        print 'successful predicting the data!'
        for i in range(num_frame):
            predict_result[i] = int(
                list(prediction[i, :]).index(max(prediction[i, :])))
            y_all[i] = np.argmax(label_all[i, :])
        with open(frame_count_path, 'r') as f:
            video_count = json.load(f)
        for video in video_count:
            print video
            correct_num = 0
            start_num = video_count[video]['start_num']
            end_num = video_count[video]['end_num']
            for j in range(end_num - start_num):
                if int(predict_result[start_num + j]) == int(y_all[start_num +
                                                                   j]):
                    correct_num += 1
            vizutil.plot_segmentation([
                y_all[start_num:end_num], predict_result[start_num:end_num],
                (y_all[start_num:end_num] - predict_result[start_num:end_num])
                == 0
            ], end_num - start_num)
            np.save(
                open(
                    result_path + 'subject' + str(subject_index) + '/' +
                    video + '.npy', 'w'), prediction[start_num:end_num, :])
            plt.savefig(result_path + 'subject' + str(subject_index) + '/' +
                        video + '_' +
                        str(float(correct_num) /
                            (end_num - start_num)) + '.png')
            plt.close()
        precision, recall, beta_score, support = sklearn.metrics.precision_recall_fscore_support(
            y_all, predict_result, labels=range(10), average='micro')
        print 'micro result'
        print precision, recall, beta_score, support
        print get_f1_score(precision, recall)
        precision, recall, beta_score, support = sklearn.metrics.precision_recall_fscore_support(
            y_all, predict_result, labels=range(10), average='macro')
        print 'macro result'
        print precision, recall, beta_score, support
        print get_f1_score(precision, recall)