예제 #1
0
def main(args, params = params):

    # =================================================
    # ================ Data Loading ===================

    #Expect Error if either validation size or train size is 1
    if args.dataset == "Poles" :
        print("Commencing training on dataset {}".format(args.dataset))
        train_set = Poles(
            root_path = args.src,
            load_gt = True,
            number_of_videos = int(args.end),
            starting_video = int(args.start),
            clip_length = clip_length,
            resolution = frame_size,
            val_perc = args.val_perc,
            split = "train")
        print("Size of train set is {}".format(len(train_set)))
        train_loader = data.DataLoader(train_set, **params)

        if args.val_perc > 0:
            val_set = Poles(
                root_path = args.src,
                load_gt = True,
                number_of_videos = int(args.end),
                starting_video = int(args.start),
                clip_length = clip_length,
                resolution = frame_size,
                val_perc = args.val_perc,
                split = "validation")
            print("Size of validation set is {}".format(len(val_set)))
            val_loader = data.DataLoader(val_set, **params)

    if  args.dataset == "Equator":
        print("Commencing training on dataset {}".format(args.dataset))
        train_set = Equator(
            root_path = args.src,
            load_gt = True,
            number_of_videos = int(args.end),
            starting_video = int(args.start),
            clip_length = clip_length,
            resolution = frame_size,
            val_perc = args.val_perc,
            split = "train")
        print("Size of train set is {}".format(len(train_set)))
        train_loader = data.DataLoader(train_set, **params)

        if args.val_perc > 0:
            val_set = Equator(
                root_path = args.src,
                load_gt = True,
                number_of_videos = int(args.end),
                starting_video = int(args.start),
                clip_length = clip_length,
                resolution = frame_size,
                val_perc = args.val_perc,
                split = "validation")
            print("Size of validation set is {}".format(len(val_set)))
            val_loader = data.DataLoader(val_set, **params)


    else:
        print('Your model was not recognized. Check the name again.')
        exit()
    # =================================================
    # ================ Define Model ===================

    # The seed pertains to initializing the weights with a normal distribution
    # Using brute force for 100 seeds I found the number 65 to provide a good starting point (one that looks close to a saliency map predicted by the original SalGAN)
    temporal = True

    elif 'EMA' in args.new_model:

        if 'Poles' in args.new_model:
            model = SalEMA.Poles_EMA(alpha=None, ema_loc=args.ema_loc)
            print("Initialized {} with residual set to {} and dropout set to {}".format(args.new_model))
        elif 'Equator' in args.new_model:
            model = SalEMA.Equator_EMA(alpha=None, ema_loc=args.ema_loc)
            print("Initialized {} with residual set to {} and dropout set to {}".format(args.new_model)) 
def main(args):


    dst = os.path.join(args.dst, "{}_predictions".format(args.pt_model.replace(".pt", "")))
    print("Output directory {}".format(dst))

    # =================================================
    # ================ Data Loading ===================

    #Expect Error if either validation size or train size is 1

    if args.dataset == "Equator" or args.dataset == "Poles" or args.dataset == "other" :
        print("Commencing inference for dataset {}".format(args.dataset))
        dataset = TEST(
            root_path = args.src,
            clip_length = CLIP_LENGTH,
            resolution = frame_size)
        video_name_list = dataset.video_names() #match an index to the sample video name
    else :
        print('dataset not defined')
        exit()


    print("Size of test set is {}".format(len(dataset)))

    loader = data.DataLoader(dataset, **params)

    # =================================================
    # ================= Load Model ====================

    # Using same kernel size as they do in the DHF1K paper
    # Amaia uses default hidden size 128
    # input size is 1 since we have grayscale images

    if "EMA" in args.pt_model:
        if "poles" in args.pt_model:
            model = SalEMA.Poles_EMA(alpha=args.alpha, ema_loc=EMA_LOC)
        elif "equator" in args.pt_model:
            model = SalEMA.Equator_EAM(alpha=args.alpha, ema_loc=EMA_LOC)
        
        load_model(args.pt_model, model)
        print("Pre-trained model {} loaded succesfully".format(args.pt_model))

        TEMPORAL = True
        print("Alpha tuned to {}".format(model.alpha))

    else:
        print("Your model was not recognized not (pole or equator), check the name of the model and try again.")
        exit()

    dtype = torch.FloatTensor
    if args.use_gpu:
        assert torch.cuda.is_available(), \
            "CUDA is not available in your machine"
        cudnn.benchmark = True 
        model = model.cuda()
        dtype = torch.cuda.FloatTensor


    # ================== Inference =====================

    if not os.path.exists(dst):
        os.mkdir(dst)
    else:
        print(" you are about to write on an existing folder {}. If this is not intentional cancel now.".format(dst))

    # switch to evaluate mode
    model.eval()

    for i, video in enumerate(loader):

        count = 0
        state = None # Initially no hidden state

        elif args.dataset == "Poles" or args.dataset == "Equator":

            video_dst = os.path.join(dst, video_name_list[i])
            # if "shooting" in video_dst:
            #     # CUDA error: out of memory is encountered whenever inference reaches that vid.
            #     continue
            print("Destination: {}".format(video_dst))
            if not os.path.exists(video_dst):
                os.mkdir(video_dst)

            for j, (clip, _) in enumerate(video):
                clip = Variable(clip.type(dtype).transpose(0,1), requires_grad=False)

                for idx in range(clip.size()[0]):
                    # Compute output
                    if TEMPORAL:
                        state, saliency_map = model.forward(input_ = clip[idx], prev_state = state)
                    else:
                        saliency_map = model.forward(input_ = clip[idx])

                    
                    saliency_map = saliency_map.squeeze(0)
    
                    post_process_saliency_map = (saliency_map-torch.min(saliency_map))/(torch.max(saliency_map)-torch.min(saliency_map))
                    utils.save_image(post_process_saliency_map, os.path.join(video_dst, "{}.png".format(str(count).zfill(4))))
                    if count == 0:
                        print("The final destination is {}".format(os.path.join(video_dst)))
                    count+=1
                if TEMPORAL:
                    state = repackage_hidden(state)
            print("Video {} done".format(i+int(args.start)))