예제 #1
0
    def get(self, rgb_frames, phase_frames):
        assert len(rgb_frames) == len(phase_frames)
        assert len(rgb_frames) <= self.max_len
        phase_images = []
        for frames in phase_frames:
            phase_img_list = []
            for frame in frames:
                img = Image.open(frame).convert('L')
                phase_img_list.append(img)
            phase_images.append(phase_img_list)

        if not self.test_mode:
            random_seed = np.random.randint(250)
            W, H = phase_images[0][0].size
            phase_transform = torchvision.transforms.Compose([
                GroupRandomHorizontalFlip(seed=random_seed),
                GroupRandomCrop(size=int(W * 0.85), seed=random_seed),
                GroupScale(size=self.phase_size),
                Stack(),
                ToTorchFormatTensor()
            ])
        else:
            phase_transform = torchvision.transforms.Compose([
                GroupScale(size=self.phase_size),
                Stack(),
                ToTorchFormatTensor()
            ])

        flat_phase_images = []
        for sublist in phase_images:
            flat_phase_images.extend(sublist)
        flat_phase_images_trans = phase_transform(flat_phase_images)
        phase_images = flat_phase_images_trans.view(len(phase_images),
                                                    self.num_phase + 1,
                                                    self.phase_size,
                                                    self.phase_size)
        phase_images = phase_images.type('torch.FloatTensor').cuda()
        phase_batch_0, phase_batch_1 = phase_2_output(
            phase_images,
            self.steerable_pyramid,
            return_phase=self.return_phase)
        rgb_features = []
        for frame in rgb_frames:
            video = frame.split('/')[-4]
            utter = frame.split("/")[-3]
            index = int(frame.split("/")[-1].split(".")[0].split("_")[
                -1]) / media / newssd / Aff - Wild_experiments / annotations
            path = os.path.join(self.pretrained_feature_root, video,
                                utter + ".mp4", "{:05d}.npy".format(index))
            try:
                rgb_features.append(np.load(path))
            except:
                raise ValueError("Incorrect feature path!")
        return [phase_batch_0, phase_batch_1, np.array(rgb_features)]
    def __init__(self, cfg: DictConfig):
        super().__init__()
        self.train_gulp_dir = Path(cfg.data.train_gulp_dir)
        self.val_gulp_dir = Path(cfg.data.val_gulp_dir)
        self.test_gulp_dir = Path(cfg.data.test_gulp_dir)
        self.cfg = cfg

        channel_count = (3 if self.cfg.modality == "RGB" else 2 *
                         self.cfg.data.segment_length)
        common_transform = Compose([
            Stack(bgr=self.cfg.modality == "RGB"
                  and self.cfg.data.preprocessing.get("bgr", False)),
            ToTorchFormatTensor(div=self.cfg.data.preprocessing.rescale),
            GroupNormalize(
                mean=list(self.cfg.data.preprocessing.mean),
                std=list(self.cfg.data.preprocessing.std),
            ),
            ExtractTimeFromChannel(channel_count),
        ])
        self.train_transform = Compose([
            GroupMultiScaleCrop(
                self.cfg.data.preprocessing.input_size,
                self.cfg.data.train_augmentation.multiscale_crop_scales,
            ),
            GroupRandomHorizontalFlip(is_flow=self.cfg.modality == "Flow"),
            common_transform,
        ])
        self.test_transform = Compose([
            GroupScale(self.cfg.data.test_augmentation.rescale_size),
            GroupCenterCrop(self.cfg.data.preprocessing.input_size),
            common_transform,
        ])
예제 #3
0
from opts import parser

import matplotlib.pyplot as plt
from matplotlib.lines import Line2D

plt.ion()
plt.show()

global best_prec1
best_prec1 = 0
args = parser.parse_args()

transform_list = transcompose([
    GroupScale((150, 150)),
    Augmentation(),
    Stack(),
    ToTorchFormatTensor(div=True)
])

print("Loading training dataset")
train_loader = torch.utils.data.DataLoader(DataSetPol(
    "/media/data_cifs/curvy_2snakes_300/",
    args.train_list,
    transform=transform_list),
                                           batch_size=args.batch_size,
                                           shuffle=True,
                                           num_workers=4,
                                           pin_memory=True)

print("Loading validation dataset")
val_loader = torch.utils.data.DataLoader(DataSetPol(
def main(conf, test_set, test_part=-1):
    gulp_path = os.path.join(conf.gulp_test_dir, conf.modality.lower(), 'test',
                             test_set)
    gulp_path = os.path.realpath(gulp_path)
    gulp_path = Path(gulp_path)

    classes_map = pickle.load(open(conf.classes_map, "rb"))
    conf.num_classes = count_num_classes(classes_map)

    net = TSN(conf.num_classes,
              1,
              conf.modality,
              base_model=conf.arch,
              consensus_type=conf.crop_fusion_type,
              dropout=conf.dropout)

    checkpoint = torch.load(conf.weights)
    print("Model epoch {} best prec@1: {}".format(checkpoint['epoch'],
                                                  checkpoint['best_prec1']))

    base_dict = {
        '.'.join(k.split('.')[1:]): v
        for k, v in list(checkpoint['state_dict'].items())
    }
    net.load_state_dict(base_dict)

    if conf.test_crops == 1:
        cropping = torchvision.transforms.Compose([
            GroupScale(net.scale_size),
            GroupCenterCrop(net.input_size),
        ])
    elif conf.test_crops == 10:
        cropping = torchvision.transforms.Compose(
            [GroupOverSample(net.input_size, net.scale_size)])
    else:
        raise ValueError(
            "Only 1 and 10 crops are supported while we got {}".format(
                conf.test_crops))

    class_type = 'verb+noun' if conf.class_type == 'action' else conf.class_type
    if conf.modality == 'Flow':
        dataset = EpicVideoFlowDataset(gulp_path=gulp_path,
                                       class_type=class_type)
    else:
        dataset = EpicVideoDataset(gulp_path=gulp_path, class_type=class_type)

    data_loader = torch.utils.data.DataLoader(EpicTSNTestDataset(
        dataset,
        classes_map,
        num_segments=conf.test_segments,
        new_length=1 if conf.modality == "RGB" else 5,
        modality=conf.modality,
        transform=torchvision.transforms.Compose([
            cropping,
            Stack(roll=conf.arch == 'BNInception'),
            ToTorchFormatTensor(div=conf.arch != 'BNInception'),
            GroupNormalize(net.input_mean, net.input_std),
        ]),
        part=test_part),
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=conf.workers * 2,
                                              pin_memory=True)

    net = torch.nn.DataParallel(net, device_ids=conf.gpus).cuda()
    net.eval()

    total_num = len(data_loader.dataset)
    output = []

    proc_start_time = time.time()
    for i, (keys, input_) in enumerate(data_loader):
        rst = eval_video(conf, (i, keys, input_), net)
        output.append(rst[1:])
        cnt_time = time.time() - proc_start_time
        print('video {} done, total {}/{}, average {} sec/video'.format(
            i, i + 1, total_num,
            float(cnt_time) / (i + 1)))

    video_index = [x[0] for x in output]
    scores = [x[1] for x in output]

    save_scores = './{}/tsn_{}_{}_testset_{}_{}_lr_{}_model_{:03d}.npz'.format(
        conf.checkpoint, conf.class_type, conf.modality.lower(), test_set,
        conf.arch, conf.lr, checkpoint['epoch'])
    if test_part > 0:
        save_scores = save_scores.replace('.npz',
                                          '_part-{}.npz'.format(test_part))
    np.savez(save_scores, segment_indices=video_index, scores=scores)
예제 #5
0
    crop_count = 10

    if crop_count == 1:
        cropping = Compose([
            GroupScale(model.scale_size),
            GroupCenterCrop(model.input_size),
        ])
    elif crop_count == 10:
        cropping = GroupOverSample(model.input_size, model.scale_size)
    else:
        raise ValueError("Only 1 and 10 crop_count are supported while we got {}".format(crop_count))

    transform = Compose([
        cropping,
        Stack(roll=base_model == base_model),
        ToTorchFormatTensor(div=base_model != base_model),
        GroupNormalize(model.input_mean, model.input_std),
    ])

    pred_verb_indices = []
    pred_noun_indices = []
    pred_verb_classes = []
    pred_noun_classes = []
    gt_verb_indices = []
    gt_noun_indices = []
    gt_verb_classes = []
    gt_noun_classes = []

    d = {
        'pred_verb_indices': [],
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Loss criterion
criterion = torch.nn.CrossEntropyLoss().to(device)

# Move to GPU if available and set to evaluation
model.eval()
model.to(device)

# Val transform
cropping = GroupOverSample(model.tsn_model.input_size,
                           model.tsn_model.scale_size)
val_transform = Compose([
    cropping,
    Stack(roll=args['base_model'] == args['base_model']),
    ToTorchFormatTensor(div=args['base_model'] != args['base_model']),
    GroupNormalize(model.tsn_model.input_mean, model.tsn_model.input_std),
])

# Datasets
val_dataset = KFCDataset(args['dataset_csv'],
                         segment_count=args['segment_count'],
                         transform=val_transform,
                         debug=True)

pred_verb_indices = []
pred_noun_indices = []
pred_verb_classes = []
pred_noun_classes = []
gt_verb_indices = []