Beispiel #1
0
def init_dataloaders(args):
    loaders = {}

    # init dataloaders for training and validation
    for split in ['train', 'val']:
        batch_size = args.batch_size
        to_tensor = transforms.ToTensor()
        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225])
        image_transforms = transforms.Compose([to_tensor, normalize])

        if args.dataset == 'youtube':
            dataset = get_dataset(args,
                                  split=split,
                                  image_transforms=image_transforms,
                                  target_transforms=None,
                                  augment=args.augment and split == 'train',
                                  inputRes=(256, 448),
                                  video_mode=True,
                                  use_prev_mask=False)
        else:
            dataset = get_dataset(args,
                                  split=split,
                                  image_transforms=image_transforms,
                                  target_transforms=None,
                                  augment=args.augment and split == 'train',
                                  video_mode=True,
                                  use_prev_mask=False)

        loaders[split] = data.DataLoader(dataset,
                                         batch_size=batch_size,
                                         shuffle=True,
                                         num_workers=args.num_workers,
                                         drop_last=True)
    return loaders
Beispiel #2
0
def init_dataloaders(args):
    loaders = {}

    # init dataloaders for training and validation
    for split in ['train', 'val']:
        batch_size = args.batch_size
        imsize = args.imsize
        to_tensor = transforms.ToTensor()
        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225])
        image_transforms = transforms.Compose([to_tensor, normalize])

        # dataset and loaders for training and validation splits
        dataset = get_dataset(args,
                              split=split,
                              image_transforms=image_transforms,
                              augment=args.augment and split == 'train',
                              imsize = imsize)

        loaders[split] = data.DataLoader(dataset, batch_size=batch_size,
                                         shuffle=True,
                                         num_workers=args.num_workers,
                                         drop_last=True)
        class_names = dataset.get_classes()
    return loaders, class_names
Beispiel #3
0
def init_dataloaders(args, e, batch_sz):
    loaders = {}

    # init dataloaders for training and validation
    for split in ['train', 'val']:

        if e == 0:
            batch_size = args.batch_size
        else:
            batch_size = batch_sz

        to_tensor = transforms.ToTensor()
        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225])
        image_transforms = transforms.Compose([to_tensor, normalize])

        if args.dataset == 'davis2017':
            dataset = get_dataset(
                args,
                split=split,
                e=0,
                image_transforms=image_transforms,
                target_transforms=None,
                augment=args.augment and split == 'train',
                inputRes=(240, 427),
                video_mode=True,
                use_prev_mask=False,
                eval=False)  # use_prev_mask is True only for evaluation
        else:  # args.dataset == 'youtube'
            dataset = get_dataset(
                args,
                split=split,
                e=0,
                image_transforms=image_transforms,
                target_transforms=None,
                augment=args.augment and split == 'train',
                inputRes=(287, 950),
                video_mode=True,
                use_prev_mask=False,
                eval=False)  # use_prev_mask is True only for evaluation

        loaders[split] = data.DataLoader(dataset,
                                         batch_size=batch_size,
                                         shuffle=True,
                                         num_workers=args.num_workers,
                                         drop_last=True)
    return loaders
Beispiel #4
0
    def __init__(self, args):

        self.split = args.eval_split
        self.dataset = args.dataset
        to_tensor = transforms.ToTensor()
        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225])

        image_transforms = transforms.Compose([to_tensor, normalize])

        if args.dataset == 'youtube':
            dataset = get_dataset(args,
                                  split=self.split,
                                  image_transforms=image_transforms,
                                  target_transforms=None,
                                  augment=args.augment
                                  and self.split == 'train',
                                  inputRes=(256, 448),
                                  video_mode=True,
                                  use_prev_mask=False)
        else:
            dataset = get_dataset(args,
                                  split=self.split,
                                  image_transforms=image_transforms,
                                  target_transforms=None,
                                  augment=args.augment
                                  and self.split == 'train',
                                  video_mode=True,
                                  use_prev_mask=False)

        self.loader = data.DataLoader(dataset,
                                      batch_size=args.batch_size,
                                      shuffle=False,
                                      num_workers=args.num_workers,
                                      drop_last=False)

        self.args = args

        print(args.model_name)
        encoder_dict, decoder_dict, enc_opt_dict, dec_opt_dict, load_args = load_checkpoint(
            args.model_name, args.use_gpu)
        load_args.use_gpu = args.use_gpu
        self.encoder = FeatureExtractor(load_args)
        self.decoder = RSIS(load_args)

        print(load_args)

        if args.ngpus > 1 and args.use_gpu:
            self.decoder = torch.nn.DataParallel(self.decoder,
                                                 device_ids=range(args.ngpus))
            self.encoder = torch.nn.DataParallel(self.encoder,
                                                 device_ids=range(args.ngpus))

        encoder_dict, decoder_dict = check_parallel(encoder_dict, decoder_dict)
        self.encoder.load_state_dict(encoder_dict)
        to_be_deleted_dec = []
        for k in decoder_dict.keys():
            if 'fc_stop' in k:
                to_be_deleted_dec.append(k)
        for k in to_be_deleted_dec:
            del decoder_dict[k]
        self.decoder.load_state_dict(decoder_dict)

        if args.use_gpu:
            self.encoder.cuda()
            self.decoder.cuda()

        self.encoder.eval()
        self.decoder.eval()
        if load_args.length_clip == 1:
            self.video_mode = False
            print('video mode not activated')
        else:
            self.video_mode = True
            print('video mode activated')
Beispiel #5
0
    def __init__(self, args):
        self.split = args.eval_split
        self.display = args.display
        self.dataset = args.dataset
        self.all_classes = args.all_classes
        self.T = args.maxseqlen
        self.batch_size = args.batch_size

        to_tensor = transforms.ToTensor()
        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225])

        image_transforms = transforms.Compose([to_tensor, normalize])

        dataset = get_dataset(args,
                              self.split,
                              image_transforms,
                              augment=False)

        self.loader = data.DataLoader(dataset,
                                      batch_size=args.batch_size,
                                      shuffle=False,
                                      num_workers=args.num_workers,
                                      drop_last=False)

        self.sample_list = dataset.get_sample_list()
        self.args = args

        encoder_dict, decoder_dict, _, _, load_args = load_checkpoint(
            args.model_name)
        self.args.use_feedback = load_args.use_feedback
        self.args.base_model = load_args.base_model
        self.hidden_size = load_args.hidden_size
        self.args.nconvlstm = load_args.nconvlstm
        self.encoder = FeatureExtractor(load_args)
        self.decoder = RSIS(load_args)

        if args.ngpus > 1 and args.use_gpu:
            self.decoder = torch.nn.DataParallel(self.decoder,
                                                 device_ids=range(args.ngpus))
            self.encoder = torch.nn.DataParallel(self.encoder,
                                                 device_ids=range(args.ngpus))

        # check if the model was trained using multiple gpus
        trained_parallel = False
        for k, v in encoder_dict.items():
            if k[:7] == "module.":
                trained_parallel = True
            break

        if trained_parallel and not args.ngpus > 1:
            # create new OrderedDict that does not contain "module."
            new_encoder_state_dict = OrderedDict()
            new_decoder_state_dict = OrderedDict()
            for k, v in encoder_dict.items():
                name = k[7:]  # remove "module."
                new_encoder_state_dict[name] = v
            for k, v in decoder_dict.items():
                name = k[7:]  # remove "module."
                new_decoder_state_dict[name] = v
            encoder_dict = new_encoder_state_dict
            decoder_dict = new_decoder_state_dict

        self.encoder.load_state_dict(encoder_dict)
        self.decoder.load_state_dict(decoder_dict)

        if args.use_gpu:
            self.encoder.cuda()
            self.decoder.cuda()

        self.encoder.eval()
        self.decoder.eval()
Beispiel #6
0
    def __init__(self, args):

        self.split = args.eval_split
        self.display = args.display
        self.no_display_text = args.no_display_text
        self.dataset = args.dataset
        self.all_classes = args.all_classes
        self.use_cats = args.use_cats
        to_tensor = transforms.ToTensor()
        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225])

        image_transforms = transforms.Compose([to_tensor, normalize])

        dataset = get_dataset(args,
                              self.split,
                              image_transforms,
                              augment=False,
                              imsize=args.imsize)

        self.sample_list = dataset.get_sample_list()
        self.class_names = dataset.get_classes()
        if args.dataset == 'pascal':
            self.gt_file = pickle.load(
                open(
                    os.path.join(args.pascal_dir,
                                 'VOCGT_%s.pkl' % (self.split)), 'rb'))
            self.key_to_anns = dict()
            self.ignoremasks = {}
            for ann in self.gt_file:
                if ann['ignore'] == 1:
                    if type(ann['segmentation']['counts']) == list:
                        im_height = ann['segmentation']['size'][0]
                        im_width = ann['segmentation']['size'][1]
                        rle = mask.frPyObjects([ann['segmentation']],
                                               im_height, im_width)
                    else:
                        rle = [ann['segmentation']]
                    m = mask.decode(rle)
                    self.ignoremasks[ann['image_id']] = m
                if ann['image_id'] in self.key_to_anns.keys():
                    self.key_to_anns[ann['image_id']].append(ann)
                else:
                    self.key_to_anns[ann['image_id']] = [ann]
            self.coco = create_coco_object(args, self.sample_list,
                                           self.class_names)
        self.loader = data.DataLoader(dataset,
                                      batch_size=args.batch_size,
                                      shuffle=False,
                                      num_workers=args.num_workers,
                                      drop_last=False)

        self.args = args
        self.colors = []
        palette = sequence_palette()
        inv_palette = {}
        for k, v in palette.iteritems():
            inv_palette[v] = k
        num_colors = len(inv_palette.keys())
        for i in range(num_colors):
            if i == 0 or i == 21:
                continue
            c = inv_palette[i]
            self.colors.append(c)

        encoder_dict, decoder_dict, _, _, load_args = load_checkpoint(
            args.model_name, args.use_gpu)
        load_args.use_gpu = args.use_gpu
        self.encoder = FeatureExtractor(load_args)
        self.decoder = RSIS(load_args)

        print(load_args)

        if args.ngpus > 1 and args.use_gpu:
            self.decoder = torch.nn.DataParallel(self.decoder,
                                                 device_ids=range(args.ngpus))
            self.encoder = torch.nn.DataParallel(self.encoder,
                                                 device_ids=range(args.ngpus))

        encoder_dict, decoder_dict = check_parallel(encoder_dict, decoder_dict)
        self.encoder.load_state_dict(encoder_dict)
        self.decoder.load_state_dict(decoder_dict)

        if args.use_gpu:
            self.encoder.cuda()
            self.decoder.cuda()

        self.encoder.eval()
        self.decoder.eval()