Esempio n. 1
0
def eval_split_by_lev(model, vocab, split):
    df = get_split_df(split)
    pp = ProgressPrinter(df.shape[0], 5)
    hypes = []
    gts = []
    with torch.no_grad():
        for idx in range(df.shape[0]):
            row = df.iloc[idx]
            gt = vocab.encode(row.annotation)
            video_path, feat_path = get_video_path(row, split)
            tensor_video = torch.load(feat_path).unsqueeze(0).to(DEVICE)
            pred = model(tensor_video).squeeze(1).log_softmax(dim=1).argmax(
                dim=1).cpu().numpy()

            hypo = []
            for i in range(len(pred)):
                if pred[i] == 0 or (i > 0 and pred[i] == pred[i - 1]):
                    continue
                hypo.append(pred[i])

            gts += gt
            hypes += hypo
            pp.show(idx)

        pp.end()

        hypes = "".join([chr(x) for x in hypes])
        gts = "".join([chr(x) for x in gts])
        wer = Lev.distance(hypes, gts) / len(gts) * 100

        print(wer)
Esempio n. 2
0
def generate_openpose_features_split(pose_estimator, split):
    with torch.no_grad():
        df = get_split_df(split)
        print(SOURCE, "Feature extraction:", STF_MODEL, split, "split")
        L = df.shape[0]

        pp = ProgressPrinter(L, 1)
        for idx in range(L):
            row = df.iloc[idx]
            video_dir, feat_path = get_video_path(row, split, feat_ext=".npy")

            if os.path.exists(feat_path):
                pp.omit()
                continue

            feat_dir = os.path.split(feat_path)[0]

            feats = pose_estimator.estimate_video_pose(video_dir)

            if not os.path.exists(feat_dir):
                os.makedirs(feat_dir)
            np.save(feat_path, feats)

            if SHOW_PROGRESS:
                pp.show(idx)

        if SHOW_PROGRESS:
            pp.end()

        print()
Esempio n. 3
0
    def _get_feat(self, row, glosses=None):

        video_path, feat_path = get_video_path(row, self.split)

        feat = get_images(video_path)
        feat_len = len(feat)

        if feat_len < len(glosses) * 4:
            return None, None, None

        return video_path, feat, feat_len
Esempio n. 4
0
    def _get_feat(self, row, glosses=None):
        video_path, feat_path = get_video_path(row, self.split)

        if not os.path.exists(feat_path):
            return None, None, None

        feat = torch.load(feat_path)
        feat_len = len(feat)

        if feat_len < len(glosses) or len(feat.shape) < 2:
            return None, None, None

        return feat_path, feat, feat_len
Esempio n. 5
0
    def _get_feat(self, row, glosses=None):
        ext = ".npy" if STF_MODEL.startswith("pose") else ".pt"
        video_path, feat_path = get_video_path(row, self.split, feat_ext=ext, stf_feat=False)

        if not os.path.exists(feat_path):
            return None, None, None

        feat = np.load(feat_path) if STF_MODEL.startswith("pose") else torch.load(feat_path)
        feat_len = len(feat)

        if feat_len < len(glosses) or len(feat.shape) < 2:
            return None, None, None

        return feat_path, feat, feat_len
Esempio n. 6
0
def clean_anno_KRSL(split, save=True):
    df = get_split_df(split)
    L = df.shape[0]
    to_remove = []
    for i in range(L):
        row = df.iloc[i]

        video_path, _ = get_video_path(row, split)
        if not os.path.exists(video_path):
            to_remove.append(i)

    df = df.drop(df.index[to_remove])
    if save:
        df.to_csv(os.path.join(ANNO_DIR, split + ".csv"), index=None)

    print("Cleaned ", split, "dataset, from", L, "to", df.shape[0])
Esempio n. 7
0
def gen_img_feat_split(model, preprocess, split):
    if SOURCE == "KRSL" and split == "dev":
        split = "val"

    df = get_split_df(split)

    print(SOURCE, STF_MODEL, "feature extraction:", split, "split")
    L = df.shape[0]

    pp = ProgressPrinter(L, 10)
    for idx in range(L):
        row = df.iloc[idx]
        video_path, feat_path = get_video_path(row, split, stf_feat=False)
        if os.path.exists(feat_path) and not FEAT_OVERRIDE:
            pp.omit()
            continue

        feat_dir = os.path.split(feat_path)[0]

        images = get_images(video_path)
        if len(images) < 4:
            continue

        tensor_video = get_tensor_video(images, preprocess, "2D")
        inp = tensor_video.to(DEVICE)
        feat = model(inp).cpu()

        if not os.path.exists(feat_dir):
            os.makedirs(feat_dir)

        torch.save(feat, feat_path)

        if SHOW_PROGRESS:
            pp.show(idx)

    if SHOW_PROGRESS:
        pp.end()
Esempio n. 8
0
def generate_gloss_dataset(vocab, stf_type=STF_TYPE, use_feat=USE_ST_FEAT):
    print("Generation of the Gloss-Recognition Dataset")
    model, loaded = get_end2end_model(vocab, True, stf_type, use_feat)

    mode = "3D" if stf_type else "2D"

    if not loaded:
        print("STF or SEQ2SEQ model doesn't exist")
        exit(0)

    model.eval()

    temp_stride = 4

    rerun_out_dir = os.path.join(GR_DATASET_DIR, "STF_RERUN")
    rerun_out_path = os.path.join(rerun_out_dir, STF_MODEL + ".bin")

    stf_rerun = use_feat and os.path.exists(rerun_out_path)

    if stf_rerun:
        with open(rerun_out_path, 'rb') as f:
            feats_rerun_data = pickle.load(f)
    else:
        feats_rerun_data = {"frame_n": [], "gloss_paths": [], "gloss_lens": []}

    df = get_split_df("train")
    Y = []
    X = []
    X_lens = []

    pp = ProgressPrinter(df.shape[0], 5)
    cur_n_gloss = 0
    for idx in range(df.shape[0]):
        row = df.iloc[idx]
        video_path, feat_path = get_video_path(row, "train")

        if stf_rerun:
            frame_n = feats_rerun_data["frame_n"][idx]

            if frame_n < temp_stride:
                pp.omit()
                continue

            gloss_paths = feats_rerun_data["gloss_paths"][idx]
            gloss_lens = feats_rerun_data["gloss_lens"][idx]

            with torch.no_grad():
                tensor_video = torch.load(feat_path).unsqueeze(0).to(DEVICE)

        else:
            images = get_images(video_path)
            frame_n = len(images)
            feats_rerun_data["frame_n"].append(frame_n)

            if frame_n < temp_stride:
                pp.omit()
                feats_rerun_data["gloss_paths"].append("")
                feats_rerun_data["gloss_lens"].append(0)
                continue

            gloss_paths, gloss_lens = get_gloss_paths(images, cur_n_gloss, temp_stride, mode)
            feats_rerun_data["gloss_paths"].append(gloss_paths)
            feats_rerun_data["gloss_lens"].append(gloss_lens)

            with torch.no_grad():
                if use_feat:
                    tensor_video = torch.load(feat_path).unsqueeze(0).to(DEVICE)
                else:
                    tensor_video = get_tensor_video(images, preprocess_3d, mode).unsqueeze(0).to(DEVICE)

        X += gloss_paths
        X_lens += gloss_lens
        Y += get_decoded_prediction(model, tensor_video, vocab.encode(row.annotation))

        assert (len(Y) == len(X) == len(X_lens))

        cur_n_gloss = len(X)
        if SHOW_PROGRESS:
            pp.show(idx)

    shuffle_and_save_dataset(X, X_lens, Y)
    if use_feat and not stf_rerun:
        if not os.path.exists(rerun_out_dir): os.makedirs(rerun_out_dir)
        with(open(rerun_out_path, 'wb')) as f:
            pickle.dump(feats_rerun_data, f)

    if SHOW_PROGRESS:
        pp.end()