Ejemplo n.º 1
0
def eval_split_by_lev(model, vocab, split):
    df = get_split_df(split)
    pp = ProgressPrinter(df.shape[0], 5)
    hypes = []
    gts = []
    with torch.no_grad():
        for idx in range(df.shape[0]):
            row = df.iloc[idx]
            gt = vocab.encode(row.annotation)
            video_path, feat_path = get_video_path(row, split)
            tensor_video = torch.load(feat_path).unsqueeze(0).to(DEVICE)
            pred = model(tensor_video).squeeze(1).log_softmax(dim=1).argmax(
                dim=1).cpu().numpy()

            hypo = []
            for i in range(len(pred)):
                if pred[i] == 0 or (i > 0 and pred[i] == pred[i - 1]):
                    continue
                hypo.append(pred[i])

            gts += gt
            hypes += hypo
            pp.show(idx)

        pp.end()

        hypes = "".join([chr(x) for x in hypes])
        gts = "".join([chr(x) for x in gts])
        wer = Lev.distance(hypes, gts) / len(gts) * 100

        print(wer)
Ejemplo n.º 2
0
def generate_openpose_features_split(pose_estimator, split):
    with torch.no_grad():
        df = get_split_df(split)
        print(SOURCE, "Feature extraction:", STF_MODEL, split, "split")
        L = df.shape[0]

        pp = ProgressPrinter(L, 1)
        for idx in range(L):
            row = df.iloc[idx]
            video_dir, feat_path = get_video_path(row, split, feat_ext=".npy")

            if os.path.exists(feat_path):
                pp.omit()
                continue

            feat_dir = os.path.split(feat_path)[0]

            feats = pose_estimator.estimate_video_pose(video_dir)

            if not os.path.exists(feat_dir):
                os.makedirs(feat_dir)
            np.save(feat_path, feats)

            if SHOW_PROGRESS:
                pp.show(idx)

        if SHOW_PROGRESS:
            pp.end()

        print()
Ejemplo n.º 3
0
    def _build_dataset(self):

        dataset_dir = os.sep.join([END2END_DATASETS_DIR, self._get_ffm()])

        X_path = os.sep.join([dataset_dir, "X_" + self.split + ".pkl"])
        Y_path = os.sep.join([dataset_dir, "Y_" + self.split + ".pkl"])
        X_lens_path = os.sep.join(
            [dataset_dir, "X_lens_" + self.split + ".pkl"])

        if os.path.exists(X_path) and os.path.exists(
                Y_path) and os.path.exists(X_lens_path) and self.load:
            with open(X_path, 'rb') as f:
                self.X = pickle.load(f)

            with open(Y_path, 'rb') as f:
                self.Y = pickle.load(f)

            with open(X_lens_path, 'rb') as f:
                self.X_lens = pickle.load(f)

            print(self.split[0].upper() + self.split[1:], "dataset loaded")
        else:
            print("Building", self.split, "dataset")
            df = get_split_df(self.split)
            self.X = []
            self.Y = []
            self.X_lens = []

            pp = ProgressPrinter(df.shape[0], 5)
            for idx in range(df.shape[0]):
                row = df.iloc[idx]
                glosses = self.vocab.encode(row.annotation)
                feat_path, feat, feat_len = self._get_feat(row, glosses)
                if feat is None:
                    continue

                self.X.append(feat_path)
                self.Y.append(glosses)
                self.X_lens.append(feat_len)

                if self._show_progress():
                    pp.show(idx)

            if self._show_progress():
                pp.end()

            if not os.path.exists(dataset_dir):
                os.makedirs(dataset_dir)

            with open(X_path, 'wb') as f:
                pickle.dump(self.X, f)

            with open(Y_path, 'wb') as f:
                pickle.dump(self.Y, f)

            with open(X_lens_path, 'wb') as f:
                pickle.dump(self.X_lens, f)

        self.length = len(self.X)
Ejemplo n.º 4
0
def clean_anno_KRSL(split, save=True):
    df = get_split_df(split)
    L = df.shape[0]
    to_remove = []
    for i in range(L):
        row = df.iloc[i]

        video_path, _ = get_video_path(row, split)
        if not os.path.exists(video_path):
            to_remove.append(i)

    df = df.drop(df.index[to_remove])
    if save:
        df.to_csv(os.path.join(ANNO_DIR, split + ".csv"), index=None)

    print("Cleaned ", split, "dataset, from", L, "to", df.shape[0])
Ejemplo n.º 5
0
def gen_img_feat_split(model, preprocess, split):
    if SOURCE == "KRSL" and split == "dev":
        split = "val"

    df = get_split_df(split)

    print(SOURCE, STF_MODEL, "feature extraction:", split, "split")
    L = df.shape[0]

    pp = ProgressPrinter(L, 10)
    for idx in range(L):
        row = df.iloc[idx]
        video_path, feat_path = get_video_path(row, split, stf_feat=False)
        if os.path.exists(feat_path) and not FEAT_OVERRIDE:
            pp.omit()
            continue

        feat_dir = os.path.split(feat_path)[0]

        images = get_images(video_path)
        if len(images) < 4:
            continue

        tensor_video = get_tensor_video(images, preprocess, "2D")
        inp = tensor_video.to(DEVICE)
        feat = model(inp).cpu()

        if not os.path.exists(feat_dir):
            os.makedirs(feat_dir)

        torch.save(feat, feat_path)

        if SHOW_PROGRESS:
            pp.show(idx)

    if SHOW_PROGRESS:
        pp.end()
Ejemplo n.º 6
0
def generate_gloss_dataset(vocab, stf_type=STF_TYPE, use_feat=USE_ST_FEAT):
    print("Generation of the Gloss-Recognition Dataset")
    model, loaded = get_end2end_model(vocab, True, stf_type, use_feat)

    mode = "3D" if stf_type else "2D"

    if not loaded:
        print("STF or SEQ2SEQ model doesn't exist")
        exit(0)

    model.eval()

    temp_stride = 4

    rerun_out_dir = os.path.join(GR_DATASET_DIR, "STF_RERUN")
    rerun_out_path = os.path.join(rerun_out_dir, STF_MODEL + ".bin")

    stf_rerun = use_feat and os.path.exists(rerun_out_path)

    if stf_rerun:
        with open(rerun_out_path, 'rb') as f:
            feats_rerun_data = pickle.load(f)
    else:
        feats_rerun_data = {"frame_n": [], "gloss_paths": [], "gloss_lens": []}

    df = get_split_df("train")
    Y = []
    X = []
    X_lens = []

    pp = ProgressPrinter(df.shape[0], 5)
    cur_n_gloss = 0
    for idx in range(df.shape[0]):
        row = df.iloc[idx]
        video_path, feat_path = get_video_path(row, "train")

        if stf_rerun:
            frame_n = feats_rerun_data["frame_n"][idx]

            if frame_n < temp_stride:
                pp.omit()
                continue

            gloss_paths = feats_rerun_data["gloss_paths"][idx]
            gloss_lens = feats_rerun_data["gloss_lens"][idx]

            with torch.no_grad():
                tensor_video = torch.load(feat_path).unsqueeze(0).to(DEVICE)

        else:
            images = get_images(video_path)
            frame_n = len(images)
            feats_rerun_data["frame_n"].append(frame_n)

            if frame_n < temp_stride:
                pp.omit()
                feats_rerun_data["gloss_paths"].append("")
                feats_rerun_data["gloss_lens"].append(0)
                continue

            gloss_paths, gloss_lens = get_gloss_paths(images, cur_n_gloss, temp_stride, mode)
            feats_rerun_data["gloss_paths"].append(gloss_paths)
            feats_rerun_data["gloss_lens"].append(gloss_lens)

            with torch.no_grad():
                if use_feat:
                    tensor_video = torch.load(feat_path).unsqueeze(0).to(DEVICE)
                else:
                    tensor_video = get_tensor_video(images, preprocess_3d, mode).unsqueeze(0).to(DEVICE)

        X += gloss_paths
        X_lens += gloss_lens
        Y += get_decoded_prediction(model, tensor_video, vocab.encode(row.annotation))

        assert (len(Y) == len(X) == len(X_lens))

        cur_n_gloss = len(X)
        if SHOW_PROGRESS:
            pp.show(idx)

    shuffle_and_save_dataset(X, X_lens, Y)
    if use_feat and not stf_rerun:
        if not os.path.exists(rerun_out_dir): os.makedirs(rerun_out_dir)
        with(open(rerun_out_path, 'wb')) as f:
            pickle.dump(feats_rerun_data, f)

    if SHOW_PROGRESS:
        pp.end()