예제 #1
0
def main():
    set_determinstic_mode(seed=1538574472)
    args = _parse_args()
    trainer = Trainer(args.config, args.device)
    train_data = breakfast.get_mstcn_data('train')
    test_data = breakfast.get_mstcn_data('test')
    trainer.train(train_data, test_data)
def main():
    set_determinstic_mode()
    args = _parse_args()

    if args.model == 'mstcn':
        from scripts.action_segmentation.train_mstcn import Trainer
    elif args.model == 'coarse-inputs':
        from scripts.action_segmentation.train_coarse_inputs import Trainer
    elif args.model == 'coarse-inputs-boundary-true':
        from scripts.action_segmentation.train_coarse_inputs_boundary_true import Trainer
    else:
        raise ValueError('no such model')
    submission_dir = os.path.join(SUBMISSION_DIR, args.model, args.config)
    if not os.path.exists(submission_dir):
        os.makedirs(submission_dir)
    else:
        raise ValueError(
            submission_dir +
            ' exists, please delete if you want a new submission with this name'
        )

    trainer = Trainer(args.config, args.device)
    if 'boundary' in args.model:  # this is for boundary aware models
        i3d_feats, timestamps = get_boundary_aware_submission_data()
        frame_level_predictions = trainer.predict(i3d_feats, timestamps)
    else:
        submission_segments, _, _ = breakfast.get_mstcn_data(split='test')
        frame_level_predictions = trainer.predict(submission_segments)
    return get_cls_results(frame_level_predictions, submission_dir)
예제 #3
0
def get_mstcn_action_reg_data(split):
    i3d_files, labels, logits = get_mstcn_data(split=split)
    reg_files = []
    segment_windows = []
    reg_labels = []
    reg_logits = []
    for j, label in enumerate(labels):
        feat_file = i3d_files[j]
        prev_label = label[0]
        prev_idx = 0
        label_len = len(label)
        for curr_idx in range(1, label_len):
            curr_label = label[curr_idx]
            if curr_label != prev_label:
                if prev_label != 'SIL':
                    segment_windows.append([prev_idx,
                                            curr_idx])  # start idx, end_idx
                    reg_files.append(feat_file)
                    reg_labels.append(prev_label)
                    reg_logits.append(logits[j][prev_idx])

                prev_label = curr_label
                prev_idx = curr_idx
        # add the final one
        if prev_label != 'SIL':
            segment_windows.append([prev_idx, label_len])  # start idx, end_idx
            reg_files.append(feat_file)
            reg_labels.append(prev_label)
            reg_logits.append(logits[j][prev_idx])
    assert len(reg_files) == len(segment_windows) == len(reg_labels) == len(
        reg_logits)
    return reg_files, segment_windows, reg_labels, reg_logits
def main():
    set_determinstic_mode()
    args = _parse_args()

    submission_dir = os.path.join(SUBMISSION_DIR, 'zhangcan', args.config)
    submission_feats, _, _ = breakfast.get_mstcn_data(split='test')

    frame_prediction_dir = submission_dir
    video_names = [
        os.path.split(feat_file)[-1] for feat_file in submission_feats
    ]
    video_names = [feat_file.split('.')[0] for feat_file in video_names]
    mapping_dict = breakfast.read_mapping_file()
    frame_level_predictions = []
    for i, video_name in enumerate(video_names):
        frame_prediction_file = os.path.join(frame_prediction_dir, video_name)
        with open(frame_prediction_file, 'r') as f:
            frame_predictions = f.readlines()[1]
        frame_predictions = frame_predictions.strip().split(' ')
        frame_predictions = [
            mapping_dict[prediction] for prediction in frame_predictions
        ]
        frame_level_predictions.append(frame_predictions)

    with open(breakfast.SUBMISSION_LABEL_FILE, 'r') as f:
        submission_timestamps = f.readlines()
    submission_timestamps = [
        line.strip().split(' ') for line in submission_timestamps
    ]
    submission_timestamps = [
        np.array(timestamps).astype(int)
        for timestamps in submission_timestamps
    ]

    n_segments = 0
    submission_str = 'Id,Category\n'
    for i, video_name in enumerate(video_names):
        video_timestamps = submission_timestamps[i]
        n_timestamps = len(video_timestamps)
        video_frame_predictions = frame_level_predictions[i]
        for j in range(n_timestamps - 1):
            start = video_timestamps[j]
            end = video_timestamps[j + 1]
            segment_frame_predictions = video_frame_predictions[start:end]
            counts = np.bincount(segment_frame_predictions)

            segment_prediction = np.argmax(counts).item()
            submission_str += '{0},{1}\n'.format(n_segments,
                                                 segment_prediction)
            n_segments += 1

    submission_file = os.path.join(submission_dir, 'submission.csv')
    with open(submission_file, 'w') as f:
        f.write(submission_str)
    get_submission_accuracy(submission_file)
def get_boundary_aware_submission_data():
    with open(breakfast.SUBMISSION_LABEL_FILE, 'r') as f:
        submission_timestamps = f.readlines()
    submission_timestamps = [
        line.strip().split(' ') for line in submission_timestamps
    ]
    submission_timestamps = [
        np.array(timestamps).astype(int)
        for timestamps in submission_timestamps
    ]

    i3d_files, _, _ = breakfast.get_mstcn_data(split='test')
    assert len(submission_timestamps) == len(i3d_files)
    return i3d_files, submission_timestamps
def get_cls_results(segment_predictions,
                    submission_dir,
                    postprocess='midpoint'):
    submission_feats, _, _ = breakfast.get_mstcn_data(split='test')
    video_names = [
        os.path.split(feat_file)[-1] for feat_file in submission_feats
    ]

    with open(breakfast.SUBMISSION_LABEL_FILE, 'r') as f:
        submission_timestamps = f.readlines()
    submission_timestamps = [
        line.strip().split(' ') for line in submission_timestamps
    ]
    submission_timestamps = [
        np.array(timestamps).astype(int)
        for timestamps in submission_timestamps
    ]

    n_segments = 0
    submission_str = 'Id,Category\n'
    segment_idx = 0
    for i, video_name in enumerate(video_names):
        video_timestamps = submission_timestamps[i]
        n_timestamps = len(video_timestamps)
        for j in range(n_timestamps - 1):
            start = video_timestamps[j]
            end = video_timestamps[j + 1]

            vid_len = end - start
            assert vid_len > 0
            segment_prediction = segment_predictions[segment_idx]
            segment_idx += 1
            submission_str += '{0},{1}\n'.format(n_segments,
                                                 segment_prediction)
            n_segments += 1
    assert n_segments == len(segment_predictions)

    submission_file = os.path.join(submission_dir, 'submission.csv')
    with open(submission_file, 'w') as f:
        f.write(submission_str)
    return get_submission_accuracy(submission_file)
def get_cls_results(frame_level_predictions,
                    submission_dir,
                    postprocess='midpoint'):
    submission_feats, _, _ = breakfast.get_mstcn_data(split='test')
    frame_prediction_dir = os.path.join(submission_dir,
                                        'frame-level-predictions')
    if not os.path.exists(frame_prediction_dir):
        os.makedirs(frame_prediction_dir)

    video_names = [
        os.path.split(feat_file)[-1] for feat_file in submission_feats
    ]
    video_names = [feat_file.split('.')[0] for feat_file in video_names]
    for i, video_name in enumerate(video_names):
        frame_prediction_file = os.path.join(frame_prediction_dir,
                                             video_name + '.txt')
        frame_predictions = np.array(frame_level_predictions[i])
        with open(frame_prediction_file, 'w') as f:
            frame_predictions = ' '.join(frame_predictions.astype(str))
            f.write(frame_predictions + '\n')

    with open(breakfast.SUBMISSION_LABEL_FILE, 'r') as f:
        submission_timestamps = f.readlines()
    submission_timestamps = [
        line.strip().split(' ') for line in submission_timestamps
    ]
    submission_timestamps = [
        np.array(timestamps).astype(int)
        for timestamps in submission_timestamps
    ]

    print('INFO: creating segment unaware predictions')
    n_segments = 0
    submission_str = 'Id,Category\n'
    for i, video_name in enumerate(tqdm(video_names)):
        video_timestamps = submission_timestamps[i]
        n_gt_segments = len(video_timestamps) - 1
        video_frame_predictions = frame_level_predictions[i]

        segments = []
        prediction_len = 1
        prev_prediction = video_frame_predictions[i]
        for j in range(1, len(video_frame_predictions)):
            curr_prediction = video_frame_predictions[j]

            if curr_prediction != prev_prediction:
                if 0 < prev_prediction < 48:
                    segments.append([prev_prediction, prediction_len])
                prev_prediction = curr_prediction
                prediction_len = 1
            else:
                prediction_len += 1

        is_selected = np.zeros(shape=len(segments))
        while len(segments) < n_gt_segments:
            segments.append(segments[-1])
            is_selected[:] = 1
        segment_lengths = np.array(segments)[:, 1]
        sorted_segment_lengths = np.flip(
            np.sort(np.unique(np.array(segments)[:, 1])))
        idx = 0
        while np.sum(is_selected).item() < n_gt_segments:
            max_length = sorted_segment_lengths[idx]
            is_selected = segment_lengths >= max_length
            idx += 1

        selected_segments = list()
        for ii in range(len(segments)):
            if is_selected[ii]:
                selected_segments.append(segments[ii])
        if len(selected_segments) > n_gt_segments:
            selected_segments = selected_segments[:n_gt_segments]
        for segment in selected_segments:
            segment_prediction = segment[0]
            submission_str += '{0},{1}\n'.format(n_segments,
                                                 segment_prediction)
            n_segments += 1
    submission_file = os.path.join(submission_dir, 'submission.csv')
    with open(submission_file, 'w') as f:
        f.write(submission_str)
    return get_submission_accuracy(submission_file)