Esempio n. 1
0
def get_predictions_for_8_frames(net, frames):
    a_t = time.time()

    args_arch = "BNInception"
    transform = torchvision.transforms.Compose([
        transforms.GroupOverSample(net.input_size, net.scale_size),
        transforms.Stack(roll=(args_arch in ['BNInception', 'InceptionV3'])),
        transforms.ToTorchFormatTensor(
            div=(args_arch not in ['BNInception', 'InceptionV3'])),
        transforms.GroupNormalize(net.input_mean, net.input_std),
    ])

    data = transform(frames)
    input = data.view(-1, 3, data.size(1), data.size(2)).unsqueeze(0)
    with torch.no_grad():
        logits = net(input)
        torch.onnx.export(net,
                          input,
                          "plm.onnx",
                          verbose=True,
                          input_names=["input"],
                          output_names=["output"])
        h_x = torch.mean(F.softmax(logits, 1), dim=0).data
        probs, idx = h_x.sort(0, True)

    b_t = time.time()

    print(f'Elapsed: {b_t - a_t}')

    # Output the prediction.
    for i in range(0, 5):
        print('{:.3f} -> {}'.format(probs[i], categories[idx[i]]))
Esempio n. 2
0
    def __init__(self, root,
                 hierachy=1,
                 rescale_size=256,
                 crop_size=224,
                 seed=1):
        if not os.path.exists(root):
            raise ValueError('Root doest not exist')
        self.root = root
        if not hierachy in [1, 2]:
            raise ValueError('Bad `hierachy`, must be one of 1, 2.')
        self.hierachy = hierachy
        if self.hierachy == 1:
            self.videos = [os.path.join(self.root, v) for v in os.listdir(self.root)]
        else:
            cls = os.listdir(self.root)
            self.videos = [os.path.join(self.root, c, v) for c in cls for v in os.listdir(os.path.join(self.root, c))]
        self.videos = sorted(self.videos, key=str.lower)
        self.rescale_size = rescale_size
        self.crop_size = crop_size
        self.seed = seed
        random.seed(self.seed)
        np.random.seed(self.seed)
        self.offset = 0

        # self.trans = torchvision.transforms.Compose([
        #     torchvision.transforms.Resize(self.rescale_size, interpolation=Image.BILINEAR),
        #     torchvision.transforms.CenterCrop(self.crop_size),
        #     torchvision.transforms.ToTensor(),
        #     torchvision.transforms.Normalize(
        #         mean=[.485, .456, .406],
        #         std=[.229, .224, .225])
        # ])
        self.trans = torchvision.transforms.Compose([
            transforms.GroupScale(self.rescale_size, interpolation=Image.BILINEAR),
            transforms.GroupCenterCrop(self.crop_size),
            transforms.Stack4d(roll=False),
            transforms.ToTorchFormatTensor4d(div=True),
            transforms.GroupNormalize(
                mean=[.5, .5, .5],
                std=[.5, .5, .5])
        ])
Esempio n. 3
0
    del base_dict[key]
    #print(base_dict)
    """
    #net.load_state_dict(base_dict, strict=False)
    net.load_state_dict(checkpoint, strict=True)
    #print(net)
    #exit(0)
    net.eval()
    net.cuda()

    # Initialize frame transforms.
    transform = torchvision.transforms.Compose([
        transforms.GroupOverSample(net.module.input_size, net.module.scale_size),
        transforms.Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
        transforms.ToTorchFormatTensor(div=(args.arch not in ['BNInception', 'InceptionV3'])),
        transforms.GroupNormalize(net.module.input_mean, net.module.input_std),
    ])

    segments_gt = [0, 0, 1, 1, 0, 0, 0,
                   0, 0, 1, 1, 1, 1, 0,
                   1, 0, 0, 0, 0, 0, 0,
                   1, 1, 1, 0, 0, 0, 0,
                   1, 1, 1, 0, 0, 1, 1,
                   2, 2, 0, 0, 1, 1, 1,
                   0, 0, 0, 0, 2]
    

    pred = [2]* len(segments_gt)
    video_dir = 'segments_2_slow/*.mp4'
    for video_file_name in sorted(glob.glob(video_dir)):
        print('best acc : {}, best cp : {}'.format(best_acc, best_cp))
Esempio n. 4
0
def main(argv):
    # Read arguments passed
    (opts, args) = parser.parse_args(argv)

    # Reading config
    cfg = config(opts.config,
                 debugging=False,
                 additionalText="training_ERM_seen_resnet18")

    # Use CUDA
    # os.environ['CUDA_VISIBLE_DEVICES'] = 1
    use_cuda = torch.cuda.is_available()

    # If the manual seed is not yet choosen
    if cfg.manualSeed == None:
        cfg.manualSeed = 1

    # Set seed for reproducibility for CPU and GPU randomizaton process
    random.seed(cfg.manualSeed)
    torch.manual_seed(cfg.manualSeed)

    if use_cuda:
        torch.cuda.manual_seed_all(cfg.manualSeed)

    dataloader_train = None
    if hasattr(cfg, "train_mode"):

        # Preprocessing (transformation) instantiation for training groupwise
        transformation_train = torchvision.transforms.Compose([
            transforms.GroupMultiScaleCrop(224, [1, 0.875, 0.75, 0.66]),
            transforms.GroupRandomHorizontalFlip(is_flow=False),
            transforms.Stack(),  # concatenation of images
            transforms.ToTorchFormatTensor(),  # to torch
            transforms.GroupNormalize(mean=[0.485, 0.456, 0.406],
                                      std=[0.229, 0.224,
                                           0.225]),  # Normalization
        ])

        if cfg.algo == "ERM" or cfg.algo == "MTGA":
            # Loading training Dataset with N segment for TSN
            EPICdata_train = EPIC(
                mode=cfg.train_mode,
                cfg=cfg,
                transforms=transformation_train,
            )

            # Creating validation dataloader
            # batch size = 16, num_workers = 8 are best fit for 12 Gb GPU and >= 16 Gb RAM
            dataloader_train = DataLoader(
                EPICdata_train,
                batch_size=cfg.train_batch_size,
                shuffle=True,
                num_workers=cfg.num_worker_train,
                pin_memory=True,
            )
        elif cfg.algo == "IRM":
            df = pd.read_csv(cfg.anno_path)
            p_ids = list(set(df["participant_id"].tolist()))

            dataloader_train = []
            for p_id in p_ids:
                tmp_dataset = EPIC(
                    mode=cfg.train_mode,
                    cfg=cfg,
                    transforms=transformation_train,
                    participant_id=p_id,
                )

                if tmp_dataset.haveData:
                    dataloader_train.append(
                        DataLoader(
                            tmp_dataset,
                            batch_size=cfg.train_batch_size,
                            shuffle=True,
                            num_workers=cfg.num_worker_train,
                            pin_memory=True,
                        ))
        elif cfg.algo == "FSL":
            dataloader_train = {}
            # Loading training Dataset with N segment for TSN
            EPICdata_train_verb = EPIC(mode=cfg.train_mode,
                                       cfg=cfg,
                                       transforms=transformation_train)
            sampler = CategoriesSampler(EPICdata_train_verb.verb_label, 200,
                                        cfg.way, cfg.shot + cfg.query)
            dataloader_train["verb"] = DataLoader(
                dataset=EPICdata_train_verb,
                batch_sampler=sampler,
                num_workers=cfg.num_worker_train,
                pin_memory=True,
            )

            EPICdata_train_noun = EPIC(mode=cfg.train_mode,
                                       cfg=cfg,
                                       transforms=transformation_train)
            sampler = CategoriesSampler(EPICdata_train_noun.noun_label, 200,
                                        cfg.way, cfg.shot + cfg.query)
            dataloader_train["noun"] = DataLoader(
                dataset=EPICdata_train_noun,
                batch_sampler=sampler,
                num_workers=cfg.num_worker_train,
                pin_memory=True,
            )

    dataloader_val = None
    if hasattr(cfg, "val_mode") and hasattr(cfg, "train_mode"):
        # Preprocessing (transformation) instantiation for validation groupwise
        transformation_val = torchvision.transforms.Compose([
            transforms.GroupOverSample(
                224, 256),  # group sampling from images using multiple crops
            transforms.Stack(),  # concatenation of images
            transforms.ToTorchFormatTensor(),  # to torch
            transforms.GroupNormalize(mean=[0.485, 0.456, 0.406],
                                      std=[0.229, 0.224,
                                           0.225]),  # Normalization
        ])

        # Loading validation Dataset with N segment for TSN
        EPICdata_val = EPIC(
            mode=cfg.val_mode,
            cfg=cfg,
            transforms=transformation_val,
        )

        # Creating validation dataloader
        dataloader_val = DataLoader(
            EPICdata_val,
            batch_size=cfg.val_batch_size,
            shuffle=False,
            num_workers=cfg.num_worker_val,
            pin_memory=True,
        )

    # Loading Models (Resnet50)
    model = EPICModel(config=cfg)

    if not cfg.feature_extraction:
        if hasattr(cfg, "train_mode"):
            policies = model.get_optim_policies()

            # for group in policies:
            #     print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            #         group['name'], len(group['params']), group['lr_mult'], group['decay_mult'])))

            # Optimizer
            # initial lr = 0.01
            # momentum = 0.9
            # weight_decay = 5e-4
            optimizer = torch.optim.SGD(policies,
                                        lr=cfg.lr,
                                        momentum=cfg.momentum,
                                        weight_decay=cfg.weight_decay)

            # Loss function (CrossEntropy)
            if cfg.algo == "IRM":
                criterion = torch.nn.CrossEntropyLoss(reduction="none")
            elif cfg.algo == "ERM" or cfg.algo == "MTGA":
                criterion = torch.nn.CrossEntropyLoss()
            elif cfg.algo == "FSL":
                criterion = torch.nn.CrossEntropyLoss()

            # If multiple GPUs are available (and bridged)
            # if torch.cuda.device_count() > 1:
            #     print("Let's use", torch.cuda.device_count(), "GPUs!")
            #     model = torch.nn.DataParallel(model)

            # Convert model and loss function to GPU if available for faster computation
            if use_cuda:
                model = model.cuda()
                criterion = criterion.cuda()

            # Loading Trainer
            experiment = Experiment(
                cfg=cfg,
                model=model,
                loss=criterion,
                optimizer=optimizer,
                use_cuda=use_cuda,
                data_train=dataloader_train,
                data_val=dataloader_val,
                debugging=False,
            )

            # Train the model
            experiment.train()

        else:

            # Load Model Checkpoint
            checkpoint = torch.load(cfg.checkpoint_filename_final)
            model.load_state_dict(checkpoint["model_state_dict"])

            if use_cuda:
                model = model.cuda()

            transformation = torchvision.transforms.Compose([
                transforms.GroupOverSample(
                    224,
                    256),  # group sampling from images using multiple crops
                transforms.Stack(),  # concatenation of images
                transforms.ToTorchFormatTensor(),  # to torch
                transforms.GroupNormalize(mean=[0.485, 0.456, 0.406],
                                          std=[0.229, 0.224,
                                               0.225]),  # Normalization
            ])

            # Loading Predictor
            experiment = Experiment(cfg=cfg,
                                    model=model,
                                    use_cuda=use_cuda,
                                    debugging=False)

            filenames = ["seen.json", "unseen.json"]
            for filename in filenames:
                EPICdata = EPIC(
                    mode=cfg.val_mode,
                    cfg=cfg,
                    transforms=transformation,
                    test_mode=filename[:-5],
                )

                data_loader = torch.utils.data.DataLoader(EPICdata,
                                                          batch_size=8,
                                                          shuffle=False,
                                                          num_workers=4,
                                                          pin_memory=True)
                experiment.data_val = data_loader
                experiment.predict(filename)
    else:
        # Load Model Checkpoint
        checkpoint = torch.load(cfg.checkpoint_filename_final)
        model.load_state_dict(checkpoint["model_state_dict"])

        if use_cuda:
            model = model.cuda()

        model.eval()

        transformation = torchvision.transforms.Compose([
            transforms.GroupOverSample(
                224, 256),  # group sampling from images using multiple crops
            transforms.Stack(),  # concatenation of images
            transforms.ToTorchFormatTensor(),  # to torch
            transforms.GroupNormalize(mean=[0.485, 0.456, 0.406],
                                      std=[0.229, 0.224,
                                           0.225]),  # Normalization
        ])

        # Loading Predictor
        experiment = Experiment(cfg=cfg,
                                model=model,
                                use_cuda=use_cuda,
                                debugging=False)

        with torch.no_grad():
            modes = ["train-unseen", "val-unseen"]
            for mode in modes:
                data = np.empty((1, 2050))
                EPICdata = EPIC(
                    mode=mode,
                    cfg=cfg,
                    transforms=transformation,
                )

                data_loader = torch.utils.data.DataLoader(EPICdata,
                                                          batch_size=1,
                                                          shuffle=False,
                                                          num_workers=0,
                                                          pin_memory=True)

                for i, sample_batch in enumerate(data_loader):
                    output = experiment.extract_features(sample_batch)
                    verb_ann = sample_batch["verb_id"].data.item()
                    noun_ann = sample_batch["noun_id"].data.item()
                    out = np.append(np.mean(output, 0), verb_ann)
                    out = np.append(out, noun_ann)
                    data = np.concatenate((data, np.expand_dims(out, 0)), 0)
                np.save(mode, data)
Esempio n. 5
0
    t2 = t_stack(t1)
    print('t2', len(t2), t2.shape)
    #

    #----------------------------
    t_start = time.time()

    t_torch = transforms.ToTorchFormatTensor(div=False)  # chw
    t3 = t_torch(t2)
    print('t3', t3.shape)

    t_end = time.time()
    t_diff = t_end - t_start
    print('cost {}s'.format(t_diff))  #0.055351972579956055s
    #----------------------------
    t_norm = transforms.GroupNormalize(mean=[104, 117, 128], std=[1])
    t4 = t_norm(t3)
    print('t4', t4.shape)

    #imshow(t2[:,:,:3])
    #imshow(t1[0])
    #print(t3[:3,:8,:8])
    #plt.imshow(np.transpose(t3[:3,:,:], (1, 2,0)))
    print(t3.size(0))

#---------------------------------------------------------------------------------
#print('-'*10,'pickle','-'*20)
#
xy_train = EpicLoad.read_pickle('epic_kitchens/train.pkl')
xy_test = EpicLoad.read_pickle('epic_kitchens/test.pkl')
xy_val = EpicLoad.read_pickle('epic_kitchens/val.pkl')
Esempio n. 6
0
# Show all entrypoints and their help strings
for entrypoint in torch.hub.list(repo):
    print(entrypoint)
    print(torch.hub.help(repo, entrypoint))

cap = cv2.VideoCapture(videofile)
P_VAL = 0.85

# Initialize frame transforms.
transform = torchvision.transforms.Compose([
    transforms.GroupScale(tsn.scale_size),
    transforms.GroupCenterCrop(tsn.input_size),
    transforms.Stack(roll=(args.arch in ['BNInception', 'InceptionV3'])),
    transforms.ToTorchFormatTensor(
        div=(args.arch not in ['BNInception', 'InceptionV3'])),
    transforms.GroupNormalize(tsn.input_mean, tsn.input_std),
])


def load_frames(frames, num_frames=8):
    """
    Convert video frame images to list of tensors
    """
    if len(frames) >= num_frames:
        return frames[::int(np.ceil(len(frames) / float(num_frames)))]
    else:
        raise (ValueError(
            'Video must have at least {} frames'.format(num_frames)))


# check if CUDA is available