def get(self, rgb_frames, phase_frames): assert len(rgb_frames) == len(phase_frames) assert len(rgb_frames) <= self.max_len phase_images = [] for frames in phase_frames: phase_img_list = [] for frame in frames: img = Image.open(frame).convert('L') phase_img_list.append(img) phase_images.append(phase_img_list) if not self.test_mode: random_seed = np.random.randint(250) W, H = phase_images[0][0].size phase_transform = torchvision.transforms.Compose([ GroupRandomHorizontalFlip(seed=random_seed), GroupRandomCrop(size=int(W * 0.85), seed=random_seed), GroupScale(size=self.phase_size), Stack(), ToTorchFormatTensor() ]) else: phase_transform = torchvision.transforms.Compose([ GroupScale(size=self.phase_size), Stack(), ToTorchFormatTensor() ]) flat_phase_images = [] for sublist in phase_images: flat_phase_images.extend(sublist) flat_phase_images_trans = phase_transform(flat_phase_images) phase_images = flat_phase_images_trans.view(len(phase_images), self.num_phase + 1, self.phase_size, self.phase_size) phase_images = phase_images.type('torch.FloatTensor').cuda() phase_batch_0, phase_batch_1 = phase_2_output( phase_images, self.steerable_pyramid, return_phase=self.return_phase) rgb_features = [] for frame in rgb_frames: video = frame.split('/')[-4] utter = frame.split("/")[-3] index = int(frame.split("/")[-1].split(".")[0].split("_")[ -1]) / media / newssd / Aff - Wild_experiments / annotations path = os.path.join(self.pretrained_feature_root, video, utter + ".mp4", "{:05d}.npy".format(index)) try: rgb_features.append(np.load(path)) except: raise ValueError("Incorrect feature path!") return [phase_batch_0, phase_batch_1, np.array(rgb_features)]
def __init__(self, cfg: DictConfig): super().__init__() self.train_gulp_dir = Path(cfg.data.train_gulp_dir) self.val_gulp_dir = Path(cfg.data.val_gulp_dir) self.test_gulp_dir = Path(cfg.data.test_gulp_dir) self.cfg = cfg channel_count = (3 if self.cfg.modality == "RGB" else 2 * self.cfg.data.segment_length) common_transform = Compose([ Stack(bgr=self.cfg.modality == "RGB" and self.cfg.data.preprocessing.get("bgr", False)), ToTorchFormatTensor(div=self.cfg.data.preprocessing.rescale), GroupNormalize( mean=list(self.cfg.data.preprocessing.mean), std=list(self.cfg.data.preprocessing.std), ), ExtractTimeFromChannel(channel_count), ]) self.train_transform = Compose([ GroupMultiScaleCrop( self.cfg.data.preprocessing.input_size, self.cfg.data.train_augmentation.multiscale_crop_scales, ), GroupRandomHorizontalFlip(is_flow=self.cfg.modality == "Flow"), common_transform, ]) self.test_transform = Compose([ GroupScale(self.cfg.data.test_augmentation.rescale_size), GroupCenterCrop(self.cfg.data.preprocessing.input_size), common_transform, ])
from opts import parser import matplotlib.pyplot as plt from matplotlib.lines import Line2D plt.ion() plt.show() global best_prec1 best_prec1 = 0 args = parser.parse_args() transform_list = transcompose([ GroupScale((150, 150)), Augmentation(), Stack(), ToTorchFormatTensor(div=True) ]) print("Loading training dataset") train_loader = torch.utils.data.DataLoader(DataSetPol( "/media/data_cifs/curvy_2snakes_300/", args.train_list, transform=transform_list), batch_size=args.batch_size, shuffle=True, num_workers=4, pin_memory=True) print("Loading validation dataset") val_loader = torch.utils.data.DataLoader(DataSetPol(
def main(conf, test_set, test_part=-1): gulp_path = os.path.join(conf.gulp_test_dir, conf.modality.lower(), 'test', test_set) gulp_path = os.path.realpath(gulp_path) gulp_path = Path(gulp_path) classes_map = pickle.load(open(conf.classes_map, "rb")) conf.num_classes = count_num_classes(classes_map) net = TSN(conf.num_classes, 1, conf.modality, base_model=conf.arch, consensus_type=conf.crop_fusion_type, dropout=conf.dropout) checkpoint = torch.load(conf.weights) print("Model epoch {} best prec@1: {}".format(checkpoint['epoch'], checkpoint['best_prec1'])) base_dict = { '.'.join(k.split('.')[1:]): v for k, v in list(checkpoint['state_dict'].items()) } net.load_state_dict(base_dict) if conf.test_crops == 1: cropping = torchvision.transforms.Compose([ GroupScale(net.scale_size), GroupCenterCrop(net.input_size), ]) elif conf.test_crops == 10: cropping = torchvision.transforms.Compose( [GroupOverSample(net.input_size, net.scale_size)]) else: raise ValueError( "Only 1 and 10 crops are supported while we got {}".format( conf.test_crops)) class_type = 'verb+noun' if conf.class_type == 'action' else conf.class_type if conf.modality == 'Flow': dataset = EpicVideoFlowDataset(gulp_path=gulp_path, class_type=class_type) else: dataset = EpicVideoDataset(gulp_path=gulp_path, class_type=class_type) data_loader = torch.utils.data.DataLoader(EpicTSNTestDataset( dataset, classes_map, num_segments=conf.test_segments, new_length=1 if conf.modality == "RGB" else 5, modality=conf.modality, transform=torchvision.transforms.Compose([ cropping, Stack(roll=conf.arch == 'BNInception'), ToTorchFormatTensor(div=conf.arch != 'BNInception'), GroupNormalize(net.input_mean, net.input_std), ]), part=test_part), batch_size=1, shuffle=False, num_workers=conf.workers * 2, pin_memory=True) net = torch.nn.DataParallel(net, device_ids=conf.gpus).cuda() net.eval() total_num = len(data_loader.dataset) output = [] proc_start_time = time.time() for i, (keys, input_) in enumerate(data_loader): rst = eval_video(conf, (i, keys, input_), net) output.append(rst[1:]) cnt_time = time.time() - proc_start_time print('video {} done, total {}/{}, average {} sec/video'.format( i, i + 1, total_num, float(cnt_time) / (i + 1))) video_index = [x[0] for x in output] scores = [x[1] for x in output] save_scores = './{}/tsn_{}_{}_testset_{}_{}_lr_{}_model_{:03d}.npz'.format( conf.checkpoint, conf.class_type, conf.modality.lower(), test_set, conf.arch, conf.lr, checkpoint['epoch']) if test_part > 0: save_scores = save_scores.replace('.npz', '_part-{}.npz'.format(test_part)) np.savez(save_scores, segment_indices=video_index, scores=scores)
crop_count = 10 if crop_count == 1: cropping = Compose([ GroupScale(model.scale_size), GroupCenterCrop(model.input_size), ]) elif crop_count == 10: cropping = GroupOverSample(model.input_size, model.scale_size) else: raise ValueError("Only 1 and 10 crop_count are supported while we got {}".format(crop_count)) transform = Compose([ cropping, Stack(roll=base_model == base_model), ToTorchFormatTensor(div=base_model != base_model), GroupNormalize(model.input_mean, model.input_std), ]) pred_verb_indices = [] pred_noun_indices = [] pred_verb_classes = [] pred_noun_classes = [] gt_verb_indices = [] gt_noun_indices = [] gt_verb_classes = [] gt_noun_classes = [] d = { 'pred_verb_indices': [],
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Loss criterion criterion = torch.nn.CrossEntropyLoss().to(device) # Move to GPU if available and set to evaluation model.eval() model.to(device) # Val transform cropping = GroupOverSample(model.tsn_model.input_size, model.tsn_model.scale_size) val_transform = Compose([ cropping, Stack(roll=args['base_model'] == args['base_model']), ToTorchFormatTensor(div=args['base_model'] != args['base_model']), GroupNormalize(model.tsn_model.input_mean, model.tsn_model.input_std), ]) # Datasets val_dataset = KFCDataset(args['dataset_csv'], segment_count=args['segment_count'], transform=val_transform, debug=True) pred_verb_indices = [] pred_noun_indices = [] pred_verb_classes = [] pred_noun_classes = [] gt_verb_indices = []