def init_dataloaders(args): loaders = {} # init dataloaders for training and validation for split in ['train', 'val']: batch_size = args.batch_size to_tensor = transforms.ToTensor() normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) image_transforms = transforms.Compose([to_tensor, normalize]) if args.dataset == 'youtube': dataset = get_dataset(args, split=split, image_transforms=image_transforms, target_transforms=None, augment=args.augment and split == 'train', inputRes=(256, 448), video_mode=True, use_prev_mask=False) else: dataset = get_dataset(args, split=split, image_transforms=image_transforms, target_transforms=None, augment=args.augment and split == 'train', video_mode=True, use_prev_mask=False) loaders[split] = data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=args.num_workers, drop_last=True) return loaders
def init_dataloaders(args): loaders = {} # init dataloaders for training and validation for split in ['train', 'val']: batch_size = args.batch_size imsize = args.imsize to_tensor = transforms.ToTensor() normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) image_transforms = transforms.Compose([to_tensor, normalize]) # dataset and loaders for training and validation splits dataset = get_dataset(args, split=split, image_transforms=image_transforms, augment=args.augment and split == 'train', imsize = imsize) loaders[split] = data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=args.num_workers, drop_last=True) class_names = dataset.get_classes() return loaders, class_names
def init_dataloaders(args, e, batch_sz): loaders = {} # init dataloaders for training and validation for split in ['train', 'val']: if e == 0: batch_size = args.batch_size else: batch_size = batch_sz to_tensor = transforms.ToTensor() normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) image_transforms = transforms.Compose([to_tensor, normalize]) if args.dataset == 'davis2017': dataset = get_dataset( args, split=split, e=0, image_transforms=image_transforms, target_transforms=None, augment=args.augment and split == 'train', inputRes=(240, 427), video_mode=True, use_prev_mask=False, eval=False) # use_prev_mask is True only for evaluation else: # args.dataset == 'youtube' dataset = get_dataset( args, split=split, e=0, image_transforms=image_transforms, target_transforms=None, augment=args.augment and split == 'train', inputRes=(287, 950), video_mode=True, use_prev_mask=False, eval=False) # use_prev_mask is True only for evaluation loaders[split] = data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=args.num_workers, drop_last=True) return loaders
def __init__(self, args): self.split = args.eval_split self.dataset = args.dataset to_tensor = transforms.ToTensor() normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) image_transforms = transforms.Compose([to_tensor, normalize]) if args.dataset == 'youtube': dataset = get_dataset(args, split=self.split, image_transforms=image_transforms, target_transforms=None, augment=args.augment and self.split == 'train', inputRes=(256, 448), video_mode=True, use_prev_mask=False) else: dataset = get_dataset(args, split=self.split, image_transforms=image_transforms, target_transforms=None, augment=args.augment and self.split == 'train', video_mode=True, use_prev_mask=False) self.loader = data.DataLoader(dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, drop_last=False) self.args = args print(args.model_name) encoder_dict, decoder_dict, enc_opt_dict, dec_opt_dict, load_args = load_checkpoint( args.model_name, args.use_gpu) load_args.use_gpu = args.use_gpu self.encoder = FeatureExtractor(load_args) self.decoder = RSIS(load_args) print(load_args) if args.ngpus > 1 and args.use_gpu: self.decoder = torch.nn.DataParallel(self.decoder, device_ids=range(args.ngpus)) self.encoder = torch.nn.DataParallel(self.encoder, device_ids=range(args.ngpus)) encoder_dict, decoder_dict = check_parallel(encoder_dict, decoder_dict) self.encoder.load_state_dict(encoder_dict) to_be_deleted_dec = [] for k in decoder_dict.keys(): if 'fc_stop' in k: to_be_deleted_dec.append(k) for k in to_be_deleted_dec: del decoder_dict[k] self.decoder.load_state_dict(decoder_dict) if args.use_gpu: self.encoder.cuda() self.decoder.cuda() self.encoder.eval() self.decoder.eval() if load_args.length_clip == 1: self.video_mode = False print('video mode not activated') else: self.video_mode = True print('video mode activated')
def __init__(self, args): self.split = args.eval_split self.display = args.display self.dataset = args.dataset self.all_classes = args.all_classes self.T = args.maxseqlen self.batch_size = args.batch_size to_tensor = transforms.ToTensor() normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) image_transforms = transforms.Compose([to_tensor, normalize]) dataset = get_dataset(args, self.split, image_transforms, augment=False) self.loader = data.DataLoader(dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, drop_last=False) self.sample_list = dataset.get_sample_list() self.args = args encoder_dict, decoder_dict, _, _, load_args = load_checkpoint( args.model_name) self.args.use_feedback = load_args.use_feedback self.args.base_model = load_args.base_model self.hidden_size = load_args.hidden_size self.args.nconvlstm = load_args.nconvlstm self.encoder = FeatureExtractor(load_args) self.decoder = RSIS(load_args) if args.ngpus > 1 and args.use_gpu: self.decoder = torch.nn.DataParallel(self.decoder, device_ids=range(args.ngpus)) self.encoder = torch.nn.DataParallel(self.encoder, device_ids=range(args.ngpus)) # check if the model was trained using multiple gpus trained_parallel = False for k, v in encoder_dict.items(): if k[:7] == "module.": trained_parallel = True break if trained_parallel and not args.ngpus > 1: # create new OrderedDict that does not contain "module." new_encoder_state_dict = OrderedDict() new_decoder_state_dict = OrderedDict() for k, v in encoder_dict.items(): name = k[7:] # remove "module." new_encoder_state_dict[name] = v for k, v in decoder_dict.items(): name = k[7:] # remove "module." new_decoder_state_dict[name] = v encoder_dict = new_encoder_state_dict decoder_dict = new_decoder_state_dict self.encoder.load_state_dict(encoder_dict) self.decoder.load_state_dict(decoder_dict) if args.use_gpu: self.encoder.cuda() self.decoder.cuda() self.encoder.eval() self.decoder.eval()
def __init__(self, args): self.split = args.eval_split self.display = args.display self.no_display_text = args.no_display_text self.dataset = args.dataset self.all_classes = args.all_classes self.use_cats = args.use_cats to_tensor = transforms.ToTensor() normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) image_transforms = transforms.Compose([to_tensor, normalize]) dataset = get_dataset(args, self.split, image_transforms, augment=False, imsize=args.imsize) self.sample_list = dataset.get_sample_list() self.class_names = dataset.get_classes() if args.dataset == 'pascal': self.gt_file = pickle.load( open( os.path.join(args.pascal_dir, 'VOCGT_%s.pkl' % (self.split)), 'rb')) self.key_to_anns = dict() self.ignoremasks = {} for ann in self.gt_file: if ann['ignore'] == 1: if type(ann['segmentation']['counts']) == list: im_height = ann['segmentation']['size'][0] im_width = ann['segmentation']['size'][1] rle = mask.frPyObjects([ann['segmentation']], im_height, im_width) else: rle = [ann['segmentation']] m = mask.decode(rle) self.ignoremasks[ann['image_id']] = m if ann['image_id'] in self.key_to_anns.keys(): self.key_to_anns[ann['image_id']].append(ann) else: self.key_to_anns[ann['image_id']] = [ann] self.coco = create_coco_object(args, self.sample_list, self.class_names) self.loader = data.DataLoader(dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, drop_last=False) self.args = args self.colors = [] palette = sequence_palette() inv_palette = {} for k, v in palette.iteritems(): inv_palette[v] = k num_colors = len(inv_palette.keys()) for i in range(num_colors): if i == 0 or i == 21: continue c = inv_palette[i] self.colors.append(c) encoder_dict, decoder_dict, _, _, load_args = load_checkpoint( args.model_name, args.use_gpu) load_args.use_gpu = args.use_gpu self.encoder = FeatureExtractor(load_args) self.decoder = RSIS(load_args) print(load_args) if args.ngpus > 1 and args.use_gpu: self.decoder = torch.nn.DataParallel(self.decoder, device_ids=range(args.ngpus)) self.encoder = torch.nn.DataParallel(self.encoder, device_ids=range(args.ngpus)) encoder_dict, decoder_dict = check_parallel(encoder_dict, decoder_dict) self.encoder.load_state_dict(encoder_dict) self.decoder.load_state_dict(decoder_dict) if args.use_gpu: self.encoder.cuda() self.decoder.cuda() self.encoder.eval() self.decoder.eval()