def eval_split_by_lev(model, vocab, split): df = get_split_df(split) pp = ProgressPrinter(df.shape[0], 5) hypes = [] gts = [] with torch.no_grad(): for idx in range(df.shape[0]): row = df.iloc[idx] gt = vocab.encode(row.annotation) video_path, feat_path = get_video_path(row, split) tensor_video = torch.load(feat_path).unsqueeze(0).to(DEVICE) pred = model(tensor_video).squeeze(1).log_softmax(dim=1).argmax( dim=1).cpu().numpy() hypo = [] for i in range(len(pred)): if pred[i] == 0 or (i > 0 and pred[i] == pred[i - 1]): continue hypo.append(pred[i]) gts += gt hypes += hypo pp.show(idx) pp.end() hypes = "".join([chr(x) for x in hypes]) gts = "".join([chr(x) for x in gts]) wer = Lev.distance(hypes, gts) / len(gts) * 100 print(wer)
def generate_openpose_features_split(pose_estimator, split): with torch.no_grad(): df = get_split_df(split) print(SOURCE, "Feature extraction:", STF_MODEL, split, "split") L = df.shape[0] pp = ProgressPrinter(L, 1) for idx in range(L): row = df.iloc[idx] video_dir, feat_path = get_video_path(row, split, feat_ext=".npy") if os.path.exists(feat_path): pp.omit() continue feat_dir = os.path.split(feat_path)[0] feats = pose_estimator.estimate_video_pose(video_dir) if not os.path.exists(feat_dir): os.makedirs(feat_dir) np.save(feat_path, feats) if SHOW_PROGRESS: pp.show(idx) if SHOW_PROGRESS: pp.end() print()
def _build_dataset(self): dataset_dir = os.sep.join([END2END_DATASETS_DIR, self._get_ffm()]) X_path = os.sep.join([dataset_dir, "X_" + self.split + ".pkl"]) Y_path = os.sep.join([dataset_dir, "Y_" + self.split + ".pkl"]) X_lens_path = os.sep.join( [dataset_dir, "X_lens_" + self.split + ".pkl"]) if os.path.exists(X_path) and os.path.exists( Y_path) and os.path.exists(X_lens_path) and self.load: with open(X_path, 'rb') as f: self.X = pickle.load(f) with open(Y_path, 'rb') as f: self.Y = pickle.load(f) with open(X_lens_path, 'rb') as f: self.X_lens = pickle.load(f) print(self.split[0].upper() + self.split[1:], "dataset loaded") else: print("Building", self.split, "dataset") df = get_split_df(self.split) self.X = [] self.Y = [] self.X_lens = [] pp = ProgressPrinter(df.shape[0], 5) for idx in range(df.shape[0]): row = df.iloc[idx] glosses = self.vocab.encode(row.annotation) feat_path, feat, feat_len = self._get_feat(row, glosses) if feat is None: continue self.X.append(feat_path) self.Y.append(glosses) self.X_lens.append(feat_len) if self._show_progress(): pp.show(idx) if self._show_progress(): pp.end() if not os.path.exists(dataset_dir): os.makedirs(dataset_dir) with open(X_path, 'wb') as f: pickle.dump(self.X, f) with open(Y_path, 'wb') as f: pickle.dump(self.Y, f) with open(X_lens_path, 'wb') as f: pickle.dump(self.X_lens, f) self.length = len(self.X)
def clean_anno_KRSL(split, save=True): df = get_split_df(split) L = df.shape[0] to_remove = [] for i in range(L): row = df.iloc[i] video_path, _ = get_video_path(row, split) if not os.path.exists(video_path): to_remove.append(i) df = df.drop(df.index[to_remove]) if save: df.to_csv(os.path.join(ANNO_DIR, split + ".csv"), index=None) print("Cleaned ", split, "dataset, from", L, "to", df.shape[0])
def gen_img_feat_split(model, preprocess, split): if SOURCE == "KRSL" and split == "dev": split = "val" df = get_split_df(split) print(SOURCE, STF_MODEL, "feature extraction:", split, "split") L = df.shape[0] pp = ProgressPrinter(L, 10) for idx in range(L): row = df.iloc[idx] video_path, feat_path = get_video_path(row, split, stf_feat=False) if os.path.exists(feat_path) and not FEAT_OVERRIDE: pp.omit() continue feat_dir = os.path.split(feat_path)[0] images = get_images(video_path) if len(images) < 4: continue tensor_video = get_tensor_video(images, preprocess, "2D") inp = tensor_video.to(DEVICE) feat = model(inp).cpu() if not os.path.exists(feat_dir): os.makedirs(feat_dir) torch.save(feat, feat_path) if SHOW_PROGRESS: pp.show(idx) if SHOW_PROGRESS: pp.end()
def generate_gloss_dataset(vocab, stf_type=STF_TYPE, use_feat=USE_ST_FEAT): print("Generation of the Gloss-Recognition Dataset") model, loaded = get_end2end_model(vocab, True, stf_type, use_feat) mode = "3D" if stf_type else "2D" if not loaded: print("STF or SEQ2SEQ model doesn't exist") exit(0) model.eval() temp_stride = 4 rerun_out_dir = os.path.join(GR_DATASET_DIR, "STF_RERUN") rerun_out_path = os.path.join(rerun_out_dir, STF_MODEL + ".bin") stf_rerun = use_feat and os.path.exists(rerun_out_path) if stf_rerun: with open(rerun_out_path, 'rb') as f: feats_rerun_data = pickle.load(f) else: feats_rerun_data = {"frame_n": [], "gloss_paths": [], "gloss_lens": []} df = get_split_df("train") Y = [] X = [] X_lens = [] pp = ProgressPrinter(df.shape[0], 5) cur_n_gloss = 0 for idx in range(df.shape[0]): row = df.iloc[idx] video_path, feat_path = get_video_path(row, "train") if stf_rerun: frame_n = feats_rerun_data["frame_n"][idx] if frame_n < temp_stride: pp.omit() continue gloss_paths = feats_rerun_data["gloss_paths"][idx] gloss_lens = feats_rerun_data["gloss_lens"][idx] with torch.no_grad(): tensor_video = torch.load(feat_path).unsqueeze(0).to(DEVICE) else: images = get_images(video_path) frame_n = len(images) feats_rerun_data["frame_n"].append(frame_n) if frame_n < temp_stride: pp.omit() feats_rerun_data["gloss_paths"].append("") feats_rerun_data["gloss_lens"].append(0) continue gloss_paths, gloss_lens = get_gloss_paths(images, cur_n_gloss, temp_stride, mode) feats_rerun_data["gloss_paths"].append(gloss_paths) feats_rerun_data["gloss_lens"].append(gloss_lens) with torch.no_grad(): if use_feat: tensor_video = torch.load(feat_path).unsqueeze(0).to(DEVICE) else: tensor_video = get_tensor_video(images, preprocess_3d, mode).unsqueeze(0).to(DEVICE) X += gloss_paths X_lens += gloss_lens Y += get_decoded_prediction(model, tensor_video, vocab.encode(row.annotation)) assert (len(Y) == len(X) == len(X_lens)) cur_n_gloss = len(X) if SHOW_PROGRESS: pp.show(idx) shuffle_and_save_dataset(X, X_lens, Y) if use_feat and not stf_rerun: if not os.path.exists(rerun_out_dir): os.makedirs(rerun_out_dir) with(open(rerun_out_path, 'wb')) as f: pickle.dump(feats_rerun_data, f) if SHOW_PROGRESS: pp.end()