def __init__(self, MODEL): self.model_class = MODEL[0] self.weights_file = MODEL[1] self.suffix = MODEL[2] self.w = 224 self.h = 224 self.alpha = 0.1 self.frames_dir = "/home/acances/Data/TVHID/keyframes" self.tracks_dir = "/home/acances/Data/TVHID/tracks" self.pairs_dir = "/home/acances/Data/TVHID/pairs16" self.features_dir = "/home/acances/Data/TVHID/features16_{}".format(self.suffix) self.frame_processor = FrameProcessor(self.w, self.h, self.alpha, self.frames_dir, self.tracks_dir) self.prepare_i3d() self.class_indices = { "negative": 0, "handShake": 1, "highFive": 2, "hug": 3, "kiss": 4 } self.class_names = {(self.class_indices[name], name) for name in self.class_indices} self.gather_positive_pairs() self.gather_negative_pairs()
def __init__( self, checkpoint_file="/home/acances/Code/human_interaction_SyncI3d/checkpoints/checkpoint_size4000_lr0.01_marg1.5_epoch49.pt" ): self.w = 224 self.h = 224 self.alpha = 0.1 self.frames_dir = "/home/acances/Data/TVHID/keyframes" self.tracks_dir = "/home/acances/Data/TVHID/tracks" self.pairs_dir = "/home/acances/Data/TVHID/pairs16" self.features_dir = "/home/acances/Data/TVHID/features16_{}".format( "baseline" if checkpoint_file is None else "ii3d") self.frame_processor = FrameProcessor(self.w, self.h, self.alpha, self.frames_dir, self.tracks_dir) self.checkpoint_file = checkpoint_file self.prepare_synci3d() self.class_indices = { "negative": 0, "handShake": 1, "highFive": 2, "hug": 3, "kiss": 4 } self.class_names = {(self.class_indices[name], name) for name in self.class_indices} self.gather_positive_pairs() self.gather_negative_pairs()
def __init__(self, phase="train", nb_positives=None, seed=0): self.w = 224 self.h = 224 self.alpha = 0.1 self.phase = phase self.frames_dir = "/home/adrien/Data/Ava_v2.2/correct_frames" self.shots_dir = "/home/adrien/Data/Ava_v2.2/final_shots" self.tracks_dir = "/home/adrien/Data/Ava_v2.2/tracks_SORT" self.pairs_dir = "/home/adrien/Data/Ava_v2.2/pairs16_SORT" self.frame_processor = FrameProcessor(self.w, self.h, self.alpha, self.phase, self.frames_dir, self.shots_dir, self.tracks_dir) random.seed(seed) self.nb_positives = nb_positives self.gather_positive_pairs() self.gather_negative_pairs()
class TVHIDPairsHandler: def __init__(self, MODEL): self.model_class = MODEL[0] self.weights_file = MODEL[1] self.suffix = MODEL[2] self.w = 224 self.h = 224 self.alpha = 0.1 self.frames_dir = "/home/acances/Data/TVHID/keyframes" self.tracks_dir = "/home/acances/Data/TVHID/tracks" self.pairs_dir = "/home/acances/Data/TVHID/pairs16" self.features_dir = "/home/acances/Data/TVHID/features16_{}".format(self.suffix) self.frame_processor = FrameProcessor(self.w, self.h, self.alpha, self.frames_dir, self.tracks_dir) self.prepare_i3d() self.class_indices = { "negative": 0, "handShake": 1, "highFive": 2, "hug": 3, "kiss": 4 } self.class_names = {(self.class_indices[name], name) for name in self.class_indices} self.gather_positive_pairs() self.gather_negative_pairs() def prepare_i3d(self): self.model = self.model_class() weights = torch.load(self.weights_file) self.model.load_state_dict(weights) self.model.cuda() def gather_positive_pairs(self): self.positive_pairs_by_video = {} pairs_files = glob.glob("{}/positive/*".format(self.pairs_dir)) for file in pairs_files: video_id = file.split("/")[-1].split(".")[0][6:] class_name = file.split("/")[-1].split("_")[1] class_index = self.class_indices[class_name] self.positive_pairs_by_video[video_id] = [] with open(file, "r") as f: for line in f: pair = line.strip().split(",") self.positive_pairs_by_video[video_id].append(pair + [class_index]) def gather_negative_pairs(self): self.negative_pairs_by_video = {} pairs_files = glob.glob("{}/negative/*".format(self.pairs_dir)) for file in pairs_files: video_id = file.split("/")[-1].split(".")[0][6:] self.negative_pairs_by_video[video_id] = [] with open(file, "r") as f: for line in f: pair = line.strip().split(",") self.negative_pairs_by_video[video_id].append(pair + [0]) def get_features(self, pair): video_id1, track_id1, begin1, end1, video_id2, track_id2, begin2, end2 = pair[:8] track_id1, begin1, end1 = list(map(int, [track_id1, begin1, end1])) track_id2, begin2, end2 = list(map(int, [track_id2, begin2, end2])) assert end1 - begin1 == end2 - begin2 tensor1 = self.frame_processor.processed_frames(video_id1, track_id1, begin1, end1) tensor2 = self.frame_processor.processed_frames(video_id2, track_id2, begin2, end2) # Add batch dimension and transfer to GPU tensor1 = tensor1.unsqueeze(0).cuda() tensor2 = tensor2.unsqueeze(0).cuda() self.model.eval() features1 = self.model.extract_features(tensor1) features2 = self.model.extract_features(tensor2) # Flatten output features1 = torch.flatten(features1, start_dim=1) features2 = torch.flatten(features2, start_dim=1) # Normalize each feature vector (separately) features1 = F.normalize(features1, p=2, dim=1) features2 = F.normalize(features2, p=2, dim=1) # Remove batch dimension and transfer to CPU features1 = features1[0].cpu() features2 = features2[0].cpu() return features1, features2 def compute_all_features(self): print("Computing features for positive pairs") for video_id in tqdm.tqdm(self.positive_pairs_by_video): class_name = video_id.split("_")[0] class_index = self.class_indices[class_name] pairs = self.positive_pairs_by_video[video_id] features_subdir = "{}/positive/{}".format(self.features_dir, video_id) Path(features_subdir).mkdir(parents=True, exist_ok=True) for i, pair in enumerate(pairs): features_1, features_2 = self.get_features(pair) output_file = "{}/features_pair{}.pkl".format(features_subdir, i) with open(output_file, "wb") as f: pickle.dump((features_1, features_2, class_index), f) print("Computing features for negative pairs") for video_id in tqdm.tqdm(self.negative_pairs_by_video): class_name = "negative" class_index = self.class_indices[class_name] pairs = self.negative_pairs_by_video[video_id] features_subdir = "{}/negative/{}".format(self.features_dir, video_id) Path(features_subdir).mkdir(parents=True, exist_ok=True) for i, pair in enumerate(pairs): features_1, features_2 = self.get_features(pair) output_file = "{}/features_pair{}.pkl".format(features_subdir, i) with open(output_file, "wb") as f: pickle.dump((features_1, features_2, class_index), f)
class TVHIDPairsHandler: def __init__(self, MODEL): self.model_class = MODEL[0] self.weights_file = MODEL[1] self.suffix = MODEL[2] self.w = 224 self.h = 224 self.alpha = 0.1 self.frames_dir = "/home/acances/Data/TVHID/keyframes" self.tracks_dir = "/home/acances/Data/TVHID/tracks" self.pairs_dir = "/home/acances/Data/TVHID/pairs16" self.features_dir = "/home/acances/Data/TVHID/features16_aug_{}".format(self.suffix) self.frame_processor = FrameProcessor(self.w, self.h, self.alpha, self.frames_dir, self.tracks_dir) self.prepare_i3d() self.class_indices = { "negative": 0, "handShake": 1, "highFive": 2, "hug": 3, "kiss": 4 } self.class_names = {(self.class_indices[name], name) for name in self.class_indices} self.gather_positive_pairs() self.gather_negative_pairs() def prepare_i3d(self): self.model = self.model_class() weights = torch.load(self.weights_file) self.model.load_state_dict(weights) self.model.cuda() def gather_positive_pairs(self): self.positive_pairs_by_video = {} pairs_files = glob.glob("{}/positive/*".format(self.pairs_dir)) for file in pairs_files: video_id = file.split("/")[-1].split(".")[0][6:] class_name = file.split("/")[-1].split("_")[1] class_index = self.class_indices[class_name] self.positive_pairs_by_video[video_id] = [] with open(file, "r") as f: for line in f: pair = line.strip().split(",") self.positive_pairs_by_video[video_id].append(pair + [class_index]) def gather_negative_pairs(self): self.negative_pairs_by_video = {} pairs_files = glob.glob("{}/negative/*".format(self.pairs_dir)) for file in pairs_files: video_id = file.split("/")[-1].split(".")[0][6:] self.negative_pairs_by_video[video_id] = [] with open(file, "r") as f: for line in f: pair = line.strip().split(",") self.negative_pairs_by_video[video_id].append(pair + [0]) def get_tensors(self, pair): video_id1, track_id1, begin1, end1, video_id2, track_id2, begin2, end2 = pair[:8] track_id1, begin1, end1 = list(map(int, [track_id1, begin1, end1])) track_id2, begin2, end2 = list(map(int, [track_id2, begin2, end2])) assert end1 - begin1 == end2 - begin2 tensor1 = self.frame_processor.processed_frames(video_id1, track_id1, begin1, end1) tensor2 = self.frame_processor.processed_frames(video_id2, track_id2, begin2, end2) return tensor1, tensor2 # 3x16x224x244 def get_i3d_features(self, tensor): # Add batch dimension and transfer to GPU tensor = tensor.unsqueeze(0).cuda() # Extract features self.model.eval() with torch.no_grad(): features = self.model.extract_features(tensor) # Flatten output features = torch.flatten(features, start_dim=1) # Normalize feature vector features = F.normalize(features, p=2, dim=1) # Remove batch dimension and transfer to CPU features = features[0].cpu() return features def get_features(self, pair): tensor1, tensor2 = self.get_tensors(pair) features1 = self.get_i3d_features(tensor1) features2 = self.get_i3d_features(tensor2) return features1, features2 def tensor_to_PIL(self, tensor): return [transforms.functional.to_pil_image(tensor[:,i]/255) \ for i in range(tensor.shape[1])] def PIL_to_tensor(self, pil_list): return torch.stack([torch.tensor(np.array(e).transpose(2, 0, 1)) \ for e in pil_list], dim=1).float() def apply_transformations(self, tensor, transformations): pil_list = self.tensor_to_PIL(tensor) for transf in transformations: pil_list = transf(pil_list) aug_tensor = self.PIL_to_tensor(pil_list) return aug_tensor def get_augmented_tensors(self, pair): tensor1, tensor2 = self.get_tensors(pair) # Transformation functions h_flip = video_transforms.RandomVerticalFlip(p=1) color_jitter = video_transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.25) grayscale = video_transforms.RandomGrayscale(p=1) # Add untransformed tensors augmented_tensors = [(tensor1, tensor2)] # Gather the two tensors in a single tensor to apply same transformation tensor12 = torch.cat([tensor1, tensor2], dim=1) # 3x32x224x244 transformation_lists = [[grayscale], [h_flip, grayscale]] \ + [[color_jitter] for i in range(4)] \ + [[h_flip, color_jitter] for i in range(4)] for transformations in transformation_lists: aug_tensor12 = self.apply_transformations(tensor12, transformations) aug_tensor1 = aug_tensor12[:,:16] aug_tensor2 = aug_tensor12[:,16:] augmented_tensors.append((aug_tensor1, aug_tensor2)) return augmented_tensors def print_augmented_tensors(self, pair): augmented_tensors = self.get_augmented_tensors(pair) for index, (tensor1, tensor2) in enumerate(augmented_tensors): self.print_tensors(tensor1, tensor2, "augmented_tensors/version_{}".format(index)) def get_augmented_features(self, pair): augmented_tensors = self.get_augmented_tensors(pair) augmented_features = [] for tensor1, tensor2 in augmented_tensors: features1 = self.get_i3d_features(tensor1) features2 = self.get_i3d_features(tensor2) augmented_features.append((features1, features2)) return augmented_features def print_tensors(self, tensor1, tensor2, subdir): Path(subdir).mkdir(parents=True, exist_ok=True) for i in range(tensor1.shape[1]): filename1 = "{}/tensor1_frame_{}.jpg".format(subdir, i + 1) frame1 = tensor1[:,i,:,:].numpy().transpose(2, 1, 0) cv2.imwrite(filename1, frame1) filename2 = "{}/tensor2_frame_{}.jpg".format(subdir, i + 1) frame2 = tensor2[:,i,:,:].numpy().transpose(2, 1, 0) cv2.imwrite(filename2, frame2) def compute_all_features(self): print("Computing features for positive pairs") for video_id in tqdm.tqdm(self.positive_pairs_by_video): class_name = video_id.split("_")[0] class_index = self.class_indices[class_name] pairs = self.positive_pairs_by_video[video_id] for i, pair in enumerate(pairs): features_subdir = "{}/positive/{}/pair_{}".format(self.features_dir, video_id, i) Path(features_subdir).mkdir(parents=True, exist_ok=True) augmented_features = self.get_augmented_features(pair) for index, (features_1, features_2) in enumerate(augmented_features): output_file = "{}/features_v{}.pkl".format(features_subdir, index) with open(output_file, "wb") as f: pickle.dump((features_1, features_2, class_index), f) print("Computing features for negative pairs") for video_id in tqdm.tqdm(self.negative_pairs_by_video): class_name = "negative" class_index = self.class_indices[class_name] pairs = self.negative_pairs_by_video[video_id] for i, pair in enumerate(pairs): features_subdir = "{}/negative/{}/pair_{}".format(self.features_dir, video_id, i) Path(features_subdir).mkdir(parents=True, exist_ok=True) augmented_features = self.get_augmented_features(pair) for index, (features_1, features_2) in enumerate(augmented_features): output_file = "{}/features_v{}.pkl".format(features_subdir, index) with open(output_file, "wb") as f: pickle.dump((features_1, features_2, class_index), f)
class AvaPairs(data.Dataset): def __init__(self, phase="train", nb_positives=None, seed=0): self.w = 224 self.h = 224 self.alpha = 0.1 self.phase = phase self.frames_dir = "/home/adrien/Data/Ava_v2.2/correct_frames" self.shots_dir = "/home/adrien/Data/Ava_v2.2/final_shots" self.tracks_dir = "/home/adrien/Data/Ava_v2.2/tracks_SORT" self.pairs_dir = "/home/adrien/Data/Ava_v2.2/pairs16_SORT" self.frame_processor = FrameProcessor(self.w, self.h, self.alpha, self.phase, self.frames_dir, self.shots_dir, self.tracks_dir) random.seed(seed) self.nb_positives = nb_positives self.gather_positive_pairs() self.gather_negative_pairs() def gather_positive_pairs(self): print("Gathering positive pairs") self.positive_pairs = [] pairs_files = glob.glob("{}/{}/positive/*".format( self.pairs_dir, self.phase)) for file in tqdm.tqdm(pairs_files): with open(file, "r") as f: for line in f: pair = line.strip().split(",") self.positive_pairs.append(pair + [1]) if self.nb_positives == None: self.nb_positives = len(self.positive_pairs) self.positive_pairs = random.sample(self.positive_pairs, self.nb_positives) random.shuffle(self.positive_pairs) def gather_negative_pairs(self): nb_hard_negatives = 2 * self.nb_positives nb_medium_negatives = self.nb_positives // 2 nb_easy_negatives = self.nb_positives // 2 self.one_epoch_data_size = self.nb_positives \ + nb_hard_negatives \ + nb_medium_negatives \ + nb_easy_negatives print("Gathering negative pairs") # Hard negatives self.hard_negative_pairs = [] pairs_files = glob.glob("{}/{}/hard_negative/*".format( self.pairs_dir, self.phase)) for file in tqdm.tqdm(pairs_files): with open(file, "r") as f: for line in f: pair = line.strip().split(",") self.hard_negative_pairs.append(pair + [0]) # Medium negatives self.medium_negative_pairs = [] pairs_files = glob.glob("{}/{}/medium_negative/*".format( self.pairs_dir, self.phase)) for file in tqdm.tqdm(pairs_files): with open(file, "r") as f: for line in f: pair = line.strip().split(",") self.medium_negative_pairs.append(pair + [0]) # Easy negatives self.easy_negative_pairs = [] pairs_files = glob.glob("{}/{}/easy_negative/*".format( self.pairs_dir, self.phase)) for file in tqdm.tqdm(pairs_files): with open(file, "r") as f: for line in f: pair = line.strip().split(",") self.easy_negative_pairs.append(pair + [0]) # Make sure that the proportion of the negative pairs are correct number = min( len(self.hard_negative_pairs) // 2, len(self.medium_negative_pairs) * 2, len(self.easy_negative_pairs) * 2) if self.phase == "train": self.negative_pairs = [] self.negative_pairs += random.sample(self.hard_negative_pairs, number * 2) self.negative_pairs += random.sample(self.medium_negative_pairs, number // 2) self.negative_pairs += random.sample(self.easy_negative_pairs, number // 2) else: self.hard_negative_pairs = random.sample(self.hard_negative_pairs, nb_hard_negatives) self.medium_negative_pairs = random.sample( self.medium_negative_pairs, nb_medium_negatives) self.easy_negative_pairs = random.sample(self.easy_negative_pairs, nb_easy_negatives) self.negative_pairs = self.hard_negative_pairs + self.medium_negative_pairs + self.easy_negative_pairs random.shuffle(self.negative_pairs) def __getitem__(self, index): "Generates one sample of data" assert index < self.one_epoch_data_size # For positive pairs, choose among the selected positive pairs. if index < self.nb_positives: pair = self.positive_pairs[index] # For negative pairs, randomly sample among all the negative pairs if it is the training set. else: if self.phase == "train": pair = random.choice(self.negative_pairs) else: index_in_negative_pairs = index - self.nb_positives pair = self.negative_pairs[index_in_negative_pairs] video_id1, shot_id1, i1, begin1, end1, video_id2, shot_id2, i2, begin2, end2, label = pair shot_id1, track_id1, begin1, end1 = list( map(int, [shot_id1, i1, begin1, end1])) shot_id2, track_id2, begin2, end2 = list( map(int, [shot_id2, i2, begin2, end2])) assert end1 - begin1 == end2 - begin2 tensor1 = self.frame_processor.processed_frames( video_id1, shot_id1, track_id1, begin1, end1) tensor2 = self.frame_processor.processed_frames( video_id2, shot_id2, track_id2, begin2, end2) return tensor1, tensor2, label def __len__(self): """Denotes the total number of samples""" return self.one_epoch_data_size