def setup_data(self): transform = unrel.TRANSFORM # Initialize trainset self.trainset = data.Dataset(split='train', pairs='annotated', transform=transform) if self.opts.train_size: print('Using subset of %d from train_set' % self.opts.train_size) batch_sampler = sampler.SequentialSampler( range(self.opts.train_size)) else: batch_sampler = None self.trainloader = data.FauxDataLoader(self.trainset, sampler=batch_sampler, batch_size=self.opts.batch_size) # Initialize testset if self.opts.do_validation: self.testset = data.Dataset(split='test', pairs='annotated', transform=transform) batch_sampler = sampler.BatchSampler( sampler.SequentialSampler(self.testset), self.opts.test_batch_size, False ) # make test set load without shuffling so that we can use Tyler's RecallEvaluator self.testloaders = [ data.FauxDataLoader(self.testset, sampler=batch_sampler) ] else: print('No testset') self.testloaders = []
def get_dataloaders(train_batchsize, val_batchsize): kwargs={ 'num_workers': 20, 'pin_memory': True } input_size = INFO['model-info']['input-size'] base = '{}/{}'.format(os.environ['datadir-base'], INFO['dataset']) normalize = T.Normalize(mean=INFO['dataset-info']['normalization']['mean'], std=INFO['dataset-info']['normalization']['std']) transform = { 'train': T.Compose([ T.Resize(tuple([int(x*(4/3)) for x in input_size])), # 放大 T.RandomResizedCrop(input_size), # 随机裁剪后resize T.RandomHorizontalFlip(0.5), # 随机水平翻转 T.RandomVerticalFlip(0.5), # 随机垂直翻转 T.RandomApply([T.RandomRotation(90)], 0.5), # 随机旋转90/270度 T.RandomApply([T.RandomRotation(180)], 0.25), # 随机旋转180度 T.RandomApply([T.ColorJitter(brightness=np.random.random()/5+0.9)], 0.5), #随机调整图像亮度 T.RandomApply([T.ColorJitter(contrast=np.random.random()/5+0.9)], 0.5), # 随机调整图像对比度 T.RandomApply([T.ColorJitter(saturation=np.random.random()/5+0.9)], 0.5), # 随机调整图像饱和度 T.ToTensor(), normalize ]), 'val': T.Compose([ T.Resize(input_size), # 放大 T.ToTensor(), normalize ]) } train_dset = dset.ImageFolder('{}/{}'.format(base, 'Train'), transform=transform['train']) train4val_dset = dset.ImageFolder('{}/{}'.format(base, 'Train'), transform=transform['val']) val_dset = dset.ImageFolder('{}/{}'.format(base, 'Val'), transform=transform['val']) labels = torch.from_numpy(np.array(train_dset.imgs)[:, 1].astype(int)) num_of_images_by_class = torch.zeros(len(train_dset.classes)) for i in range(len(train_dset.classes)): num_of_images_by_class[i] = torch.where(labels == i, torch.ones_like(labels), torch.zeros_like(labels)).sum().item() mapping = {} for c in train_dset.classes: if c in val_dset.classes: mapping[train_dset.class_to_idx[c]] = val_dset.class_to_idx[c] else: mapping[train_dset.class_to_idx[c]] = val_dset.class_to_idx['UNKNOWN'] mapping[-1] = val_dset.class_to_idx['UNKNOWN'] train_len = train_dset.__len__() val_len = val_dset.__len__() train_loader = DataLoader(train_dset, batch_size=train_batchsize, sampler=sampler.RandomSampler(range(train_len)), **kwargs) train4val_loader = DataLoader(train4val_dset, batch_size=val_batchsize, sampler=sampler.SequentialSampler(range(train_len)), **kwargs) val_loader = DataLoader(val_dset, batch_size=val_batchsize, sampler=sampler.SequentialSampler(range(val_len)), **kwargs) imgs = np.array(val_dset.imgs) return train_loader, train4val_loader, val_loader, num_of_images_by_class, mapping, imgs
def get_dataloaders(train_batchsize, val_batchsize): kwargs = {'num_workers': 20, 'pin_memory': True} input_size = INFO['model-info']['input-size'] base = '{}/{}'.format(os.environ['datadir-base'], INFO['dataset']) normalize = T.Normalize(mean=INFO['dataset-info']['normalization']['mean'], std=INFO['dataset-info']['normalization']['std']) transform = { 'val': T.Compose([ T.Resize(608), T.RandomResizedCrop(456), # T.RandomCrop(456), T.ToTensor(), normalize ]) } val_dset = dset.ImageFolder('{}/{}'.format(base, 'Val'), transform=transform['val']) val_len = val_dset.__len__() val_loader = DataLoader(val_dset, batch_size=val_batchsize, sampler=sampler.SequentialSampler(range(val_len)), **kwargs) return None, None, val_loader, None, None, None
def __init__(self, opt): super().__init__(daemon=True) dataset_classes = get_dataset_classes(opt) if len(dataset_classes) > 1: datasets = [] for class_name, collate_fn, task_name in dataset_classes: opt['pytorch_teacher_task'] = task_name opt['task'] = task_name datasets.append(class_name(opt)) self.collate = collate_fn self.dataset = ParlAIConcatDataset(datasets) else: class_name, self.collate, task_name = dataset_classes[0] self.dataset = class_name(opt) self.bsz = opt.get('batchsize', 1) self.num_workers = opt.get('num_workers', 4) self.dataloader = DataLoader( self.dataset, batch_size=self.bsz, shuffle=False, sampler=sampler.SequentialSampler(self.dataset), num_workers=self.num_workers, collate_fn=self.collate, pin_memory=False, drop_last=False, ) self.datatype = opt.get('datatype') self.data = enumerate(self.dataloader) self.batch_sort = opt.get('pytorch_teacher_batch_sort') self.batch_cache_type = opt.get('batch_sort_cache_type') self.batch_length_range = opt.get('batch_length_range') self.batch_sort_field = opt.get('batch_sort_field')
def evaluate_model(model, dataset, classes, examples=None, batch_size=16, dtype=torch.float32, device=DEFAULT_DEVICE): examples = min(examples, len(dataset)) or len(dataset) model.eval() loader = DataLoader(dataset, batch_size=batch_size, sampler=sampler.SequentialSampler( range(examples or len(dataset)))) stats = zero_statistics(len(classes)) for x, y in loader: x = x.to(device=device, dtype=dtype) y = y.to(device=device, dtype=torch.long) scores = model(x).cpu().numpy() predictions = scores.argmax(axis=1) stats = combine( stats, compute_prediction_statistics(predictions, y.cpu().numpy(), classes)) return evaluate(stats)
def get_subm_link(criterion): test_dataset = XRayDataset(f'{data_folder}test.csv', f'{data_folder}', transform=tfms, is_train=False) test_samplers = {'test': sampler.SequentialSampler(test_dataset)} test_dataloaders = { 'test': DataLoader(test_dataset, batch_size=32, sampler=test_samplers['test'], num_workers=8, pin_memory=True) } test_dt_szs = get_dt_szs(test_samplers) # use_gpu = False criterion = criterion t_pdted, t_lbs = predict(dropout_model, 'test', test_dataloaders, test_dt_szs) print(np.bincount(t_pdted)) test_df = pd.read_csv(f'{data_folder}test.csv') test_df.head() test_df['detected'] = pd.Series([ transformed_dataset.idx_to_classes[i] for i in t_pdted ]).astype('category') test_df.drop(['age', 'gender', 'view_position', 'image_name'], axis=1).to_csv('sdir/fst.csv', index=False) return FileLink('./sdir/fst.csv')
def __init__(self, opt, shared=None): opt['batch_sort'] = False super().__init__(opt, shared) self.use_batch_act = self.bsz > 1 self.num_workers = opt['numworkers'] # One can specify a collate function to use for preparing a batch collate_fn = opt.get('collate_fn', default_collate) if not shared: self.dataset = StreamDataset(opt) self.pytorch_dataloader = DataLoader( self.dataset, batch_size=self.bsz, shuffle=False, sampler=sampler.SequentialSampler(self.dataset), num_workers=self.num_workers, collate_fn=collate_fn, pin_memory=False, drop_last=False, ) self.lastYs = [None] * self.bsz else: self.dataset = shared['dataset'] self.pytorch_dataloader = shared['pytorch_dataloader'] self.lastYs = shared['lastYs'] self.num_batches = math.ceil(self.dataset.num_examples() / self.bsz) self.reset()
def get_dataloaders(train_batchsize, val_batchsize): kwargs = {'num_workers': 20, 'pin_memory': True} input_size = INFO['model-info']['input-size'] base = '{}/{}'.format(os.environ['datadir-base'], INFO['dataset']) train_dset = dset.ImageFolder('{}/{}'.format(base, 'Train')) val_dset = dset.ImageFolder('{}/{}'.format(base, 'Val')) labels = torch.from_numpy(np.array(train_dset.imgs)[:, 1].astype(int)) mapping = {} for c in train_dset.classes: if c in val_dset.classes: mapping[train_dset.class_to_idx[c]] = val_dset.class_to_idx[c] else: mapping[ train_dset.class_to_idx[c]] = val_dset.class_to_idx['UNKNOWN'] mapping[-1] = val_dset.class_to_idx['UNKNOWN'] train_len = train_dset.__len__() val_len = val_dset.__len__() val_loader = DataLoader(val_dset, batch_size=val_batchsize, sampler=sampler.SequentialSampler(range(val_len)), **kwargs) imgs = np.array(val_dset.imgs) return None, None, val_loader, None, mapping, imgs
def make_loader(self, batch_size=16, num_workers=0, shuffle=False, pin_memory=False, resize_rate=10, drop_last=False): """ CommandLine: python ~/code/netharn/examples/yolo_voc.py YoloVOCDataset.make_loader Example: >>> # DISABLE_DOCTSET >>> torch.random.manual_seed(0) >>> self = YoloVOCDataset(split='train') >>> self.augmenter = None >>> loader = self.make_loader(batch_size=1, shuffle=True) >>> # training batches should have multiple shapes >>> shapes = set() >>> for batch in ub.ProgIter(iter(loader), total=len(loader)): >>> inputs, labels = batch >>> # test to see multiscale works >>> shapes.add(inputs.shape[-1]) >>> if len(shapes) > 1: >>> break >>> assert len(shapes) > 1 """ import torch.utils.data.sampler as torch_sampler assert len(self) > 0, 'must have some data' if shuffle: sampler = torch_sampler.RandomSampler(self) resample_freq = resize_rate else: sampler = torch_sampler.SequentialSampler(self) resample_freq = None # use custom sampler that does multiscale training batch_sampler = multiscale_batch_sampler.MultiScaleBatchSampler( sampler, batch_size=batch_size, resample_freq=resample_freq, drop_last=drop_last, ) # torch.utils.data.sampler.WeightedRandomSampler loader = torch_data.DataLoader( self, batch_sampler=batch_sampler, collate_fn=nh.data.collate.padded_collate, num_workers=num_workers, pin_memory=pin_memory) if loader.batch_size != batch_size: try: # Hack: ensure dataloader has batch size attr loader._DataLoader__initialized = False loader.batch_size = batch_size loader._DataLoader__initialized = True except Exception: pass return loader
def run_oof_binary(args, session_backup, read_img, read_mask, img_group_id_colname=None): metadata = session_backup[f'metadata'][0] if img_group_id_colname is not None: for group_name, _ in metadata.groupby(by=img_group_id_colname): os.makedirs(os.path.join(args.snapshots_root, args.snapshot, 'oof_inference', group_name), exist_ok=True) else: os.makedirs(os.path.join(args.snapshots_root, args.snapshot, 'oof_inference'), exist_ok=True) for fold_id, _, val_set in session_backup['cv_split'][0]: print(colored('====> ', 'green') + f'Loading fold [{fold_id}]') net = load_fold(args, fold_id) if args.tta: raise NotImplementedError('TTA is not yet supported') val_dataset = SegmentationDataset( split=val_set, trf=session_backup['val_trf'][0], read_img=read_img, read_mask=read_mask, img_group_id_colname=img_group_id_colname) val_loader = DataLoader(val_dataset, batch_size=args.bs, num_workers=args.n_threads, sampler=sampler.SequentialSampler(val_dataset)) with torch.no_grad(): for batch in tqdm(val_loader, total=len(val_loader), desc=f'Predicting fold {fold_id}:'): img = batch['img'] if img_group_id_colname is not None: group_ids = batch['group_id'] else: group_ids = None fnames = batch['fname'] predicts = torch.sigmoid( net(img)).mul(255).to('cpu').numpy().astype(np.uint8) for idx, fname in enumerate(fnames): pred_mask = predicts[idx].squeeze() if img_group_id_colname is not None: cv2.imwrite( os.path.join(args.snapshots_root, args.snapshot, 'oof_inference', group_ids[idx], fname), pred_mask) else: cv2.imwrite( os.path.join(args.snapshots_root, args.snapshot, 'oof_inference', fname), pred_mask)
def get_simple_loader(dataset, batch_size=1): kwargs = {'num_workers': 4} if device.type == "cuda" else {} loader = DataLoader(dataset, batch_size=batch_size, sampler=sampler.SequentialSampler(dataset), collate_fn=collate_MIL, **kwargs) return loader
def make_loader(self, batch_size=16, num_workers=0, shuffle=False, pin_memory=False): """ Example: >>> torch.random.manual_seed(0) >>> dset = coco_api.CocoDataset(coco_api.demo_coco_data()) >>> self = YoloCocoDataset(dset, train=1) >>> loader = self.make_loader(batch_size=1) >>> train_iter = iter(loader) >>> # training batches should have multiple shapes >>> shapes = set() >>> for batch in train_iter: >>> shapes.add(batch[0].shape[-1]) >>> if len(shapes) > 1: >>> break >>> #assert len(shapes) > 1 >>> vali_loader = iter(loaders['vali']) >>> vali_iter = iter(loaders['vali']) >>> # vali batches should have one shape >>> shapes = set() >>> for batch, _ in zip(vali_iter, [1, 2, 3, 4]): >>> shapes.add(batch[0].shape[-1]) >>> assert len(shapes) == 1 """ assert len(self) > 0, 'must have some data' if shuffle: if True: # If the data is not balanced we need to balance it index_to_weight = self._training_sample_weights() num_samples = len(self) index_to_weight = index_to_weight[:num_samples] sampler = torch_sampler.WeightedRandomSampler(index_to_weight, num_samples, replacement=True) sampler.data_source = self # hack for use with multiscale else: sampler = torch_sampler.RandomSampler(self) resample_freq = 10 else: sampler = torch_sampler.SequentialSampler(self) resample_freq = None # use custom sampler that does multiscale training batch_sampler = multiscale_batch_sampler.MultiScaleBatchSampler( sampler, batch_size=batch_size, resample_freq=resample_freq, ) # torch.utils.data.sampler.WeightedRandomSampler loader = torch_data.DataLoader(self, batch_sampler=batch_sampler, collate_fn=nh.data.collate.padded_collate, num_workers=num_workers, pin_memory=pin_memory) if loader.batch_size != batch_size: try: loader.batch_size = batch_size except Exception: pass return loader
def data_sampler(dataset, shuffle, distributed): if distributed: return DistributedSampler(dataset, shuffle=shuffle) if shuffle: return sampler.RandomSampler(dataset) else: return sampler.SequentialSampler(dataset)
def main(): data_dir = sys.argv[1] hero2ix_dir = sys.argv[2] # import DataFrame and hero2ix dictionary heroes_df = pd.read_csv(data_dir, index_col=0) hero2ix_df = pd.read_csv(hero2ix_dir, index_col=0) heroes_df = heroes_df.dropna().reset_index(drop=True) hero2ix = dict(zip(hero2ix_df.hero, hero2ix_df.ID)) # heroes = hero2ix_df['hero'].values # train test split split = int(len(heroes_df)*0.9) heroes_train = heroes_df.iloc[:split] heroes_test = heroes_df.iloc[split:] # build dataset generator train_gen = DataFrameIterator(heroes_train, hero2ix) test_gen = DataFrameIterator(heroes_test, hero2ix) # Use Dataloader class in pytorch to generate batched data batch_size = 16 loader_train = DataLoader(train_gen, batch_size=batch_size, sampler=sampler.RandomSampler(train_gen), num_workers=4) loader_test = DataLoader(test_gen, batch_size=batch_size, sampler=sampler.SequentialSampler(test_gen), num_workers=4) # define model, totally three models in hetor2vec.py model = CBOH(embedding_dim=10, heropool_size=len(hero2ix)) # define loss function loss_function = nn.CrossEntropyLoss() # run train losses = train(model=model, dataloader=loader_train, loss_function=loss_function, init_lr=0.1, epochs=20, lr_decay_epoch=8, print_epoch=2, gpu=False) # check test accuracy print('accuracy: ', accuracy(model, dataloader=loader_test, batch_size=batch_size, gpu=False)) # save embeddings as numpy arrays output_dir = './output/hero/hero_embeddings.npy' save_embeddings(model, filename=output_dir) # pickle model pickle_dir = './output/hero/model.p' pickle.dump(obj=model, file=open(pickle_dir, 'wb')) # plot loss vs epoch plot_loss(losses, './output/hero/loss_hitory.png') # project embeddings to 2d plane plot_embeddings(model, hero2ix)
def __init__(self, opt, shared=None): opt['batch_sort'] = False super().__init__(opt, shared) self.use_batch_act = self.bsz > 1 self.num_workers = opt['numworkers'] self.batch_sort = opt.get('pytorch_teacher_batch_sort') self.batch_cache_type = opt.get('batch_sort_cache') self.batch_sort_field = opt.get('batch_sort_field') # One can specify a collate function to use for preparing a batch self.opt = opt.copy() self.is_shared = shared is not None dataset_classes = self.get_dataset_class(opt) self.ordered = ('ordered' in self.datatype or ('stream' in self.datatype and not opt.get('shuffle'))) if not shared: if len(dataset_classes) > 1: datasets = [] for class_name, collate_fn, task_name in dataset_classes: opt['pytorch_teacher_task'] = task_name opt['task'] = task_name datasets.append(class_name(opt)) self.collate_fn = collate_fn self.dataset = ParlAIConcatDataset(datasets) else: class_name, self.collate_fn, task_name = dataset_classes[0] self.dataset = class_name(opt) if self.ordered or not self.training: data_sampler = sampler.SequentialSampler(self.dataset) pin_memory = False else: data_sampler = sampler.RandomSampler(self.dataset) pin_memory = True self.pytorch_dataloader = DataLoader( self.dataset, batch_size=self.bsz, sampler=data_sampler, num_workers=self.num_workers, collate_fn=self.collate_fn, pin_memory=pin_memory, drop_last=False, ) self.lastYs = [None] * self.bsz if self.batch_sort: self.loader_process = LoaderProcess(opt) self.loader_process.start() self.data = enumerate(self.pytorch_dataloader) else: self.dataset = shared['dataset'] self.pytorch_dataloader = shared['pytorch_dataloader'] self.lastYs = shared['lastYs'] self.data = shared['data'] self.num_batches = math.ceil(self.dataset.num_episodes() / self.bsz) self.reset()
def __init__(self, dataset, batch_size, shuffle = True, drop_last = False): # buckets list 根据contexts长度分组 self.buckets = bucket(dataset) # 打乱 list if shuffle: np.random.shuffle(self.buckets) random_samplers = [sampler.RandomSampler(bucket) for bucket in self.buckets] else: random_samplers = [sampler.SequentialSampler(bucket) for bucket in self.buckets] self.sampler = [sampler.BatchSampler(s, batch_size, drop_last) for s in random_samplers]
def create_training_batch(train_data, batch_size): ''' ''' students = train_data.keys() stud_ids = [stud_id for stud_id in students] batches = list( sampler.BatchSampler(sampler.SequentialSampler(stud_ids), batch_size=batch_size, drop_last=False)) batch_ids = [] for batch in batches: batch_ids.append([stud_ids[i] for i in batch]) return batch_ids
def __init__(self, data_source, shuffle=False, batch_size=16, drop_last=False, resample_frequency=10): if shuffle: self.sampler = torch_sampler.RandomSampler(data_source) else: self.sampler = torch_sampler.SequentialSampler(data_source) self.shuffle = shuffle self.batch_size = batch_size self.drop_last = drop_last self.num_scales = len(data_source.multi_scale_inp_size) self.resample_frequency = resample_frequency
def create_dataloader(config, data, mode): dataset = create_dataset(config, data, mode) if mode == 'train': # create Sampler if dist.is_available() and dist.is_initialized(): train_RandomSampler = distributed.DistributedSampler(dataset) else: train_RandomSampler = sampler.RandomSampler(dataset, replacement=False) train_BatchSampler = sampler.BatchSampler(train_RandomSampler, batch_size=config.train.batch_size, drop_last=config.train.dataloader.drop_last) # Augment collator = get_collate_fn(config) # DataLoader data_loader = DataLoader(dataset=dataset, batch_sampler=train_BatchSampler, collate_fn=collator, pin_memory=config.train.dataloader.pin_memory, num_workers=config.train.dataloader.work_nums) elif mode == 'val': if dist.is_available() and dist.is_initialized(): val_SequentialSampler = distributed.DistributedSampler(dataset) else: val_SequentialSampler = sampler.SequentialSampler(dataset) val_BatchSampler = sampler.BatchSampler(val_SequentialSampler, batch_size=config.val.batch_size, drop_last=config.val.dataloader.drop_last) data_loader = DataLoader(dataset, batch_sampler=val_BatchSampler, pin_memory=config.val.dataloader.pin_memory, num_workers=config.val.dataloader.work_nums) else: if dist.is_available() and dist.is_initialized(): test_SequentialSampler = distributed.DistributedSampler(dataset) else: test_SequentialSampler = None data_loader = DataLoader(dataset, sampler=test_SequentialSampler, batch_size=config.test.batch_size, pin_memory=config.val.dataloader.pin_memory, num_workers=config.val.dataloader.work_nums) return data_loader
def __init__(self, opt, shared=None): opt['batch_sort'] = False super().__init__(opt, shared) self.use_batch_act = self.bsz > 1 self.num_workers = opt['numworkers'] self.batch_cache_type = opt.get('batch_sort_cache') # One can specify a collate function to use for preparing a batch self.opt = copy.deepcopy(opt) self.is_shared = shared is not None dataset_class, self.collate_fn = self.get_dataset_class(opt) opt['dataset_class'] = dataset_class opt['collate_fn'] = self.collate_fn if not shared: self.dataset = dataset_class(opt) if self.datatype == 'train' and not isinstance( self.dataset, StreamDataset): data_sampler = sampler.RandomSampler(self.dataset) else: data_sampler = sampler.SequentialSampler(self.dataset) pin_memory = not isinstance(self.dataset, StreamDataset) self.pytorch_dataloader = DataLoader( self.dataset, batch_size=self.bsz, shuffle=False, sampler=data_sampler, num_workers=self.num_workers, collate_fn=self.collate_fn, pin_memory=pin_memory, drop_last=False, ) self.lastYs = [None] * self.bsz if self.batch_cache_type != 'none': self.loader_process = LoaderProcess(opt) self.loader_process.start() self.data = enumerate(self.pytorch_dataloader) else: self.dataset = shared['dataset'] self.pytorch_dataloader = shared['pytorch_dataloader'] self.lastYs = shared['lastYs'] self.data = shared['data'] self.num_batches = math.ceil(self.dataset.num_episodes() / self.bsz) self.reset()
def __init__(self, opt): super().__init__(daemon=True) self.dataset = opt['dataset_class'](opt) self.bsz = opt.get('batchsize', 1) self.num_workers = opt.get('num_workers', 4) collate_fn = opt.get('collate_fn', default_collate) self.dataloader = DataLoader( self.dataset, batch_size=self.bsz, shuffle=False, sampler=sampler.SequentialSampler(self.dataset), num_workers=self.num_workers, collate_fn=collate_fn, pin_memory=False, drop_last=False, ) self.datatype = opt.get('datatype') self.data = enumerate(self.dataloader) self.batch_cache_type = opt.get('batch_sort_cache') self.batch_length_range = opt.get('batch_length_range')
def make_train_valid_loaders(self, distributed=False ) -> Tuple[DataLoader, DataLoader]: train_dataset, valid_dataset = self.make_train_valid_datasets() train_weights = torch.DoubleTensor( [1.0] * len(train_dataset)) # uniform sampling train_sampler = sampler.WeightedRandomSampler( weights=train_weights, num_samples=self._data_params['batch_size'] * self._data_params['steps_per_epoch'], ) train_loader = self._make_loader(train_dataset, train_sampler, mode='train', distributed=distributed) valid_loader = self._make_loader( valid_dataset, sampler.SequentialSampler(valid_dataset), mode='valid', distributed=distributed, ) return train_loader, valid_loader
def evaluate_pred(config): # define directories model_name = config.model test_data_root = config.data_root if config.deep_pred > 1: test_dir = config.test_dir + '/' + config.experiment_name + '/deep-pred{}/'.format( config.deep_pred) + model_name else: test_dir = config.test_dir + '/' + config.experiment_name + '/pred/' + model_name if not os.path.exists(test_dir): os.makedirs(test_dir) sample_dir = test_dir + '/samples' if not os.path.exists(sample_dir): os.makedirs(sample_dir) nframes_in = config.nframes_in nframes_pred = config.nframes_pred * config.deep_pred nframes = nframes_in + nframes_pred img_size = int(config.resl) nworkers = 4 # load model if config.model == 'FutureGAN': ckpt = torch.load(config.model_path) # model structure G = ckpt['G_structure'] # load model parameters G.load_state_dict(ckpt['state_dict']) G.eval() G = G.module.model if use_cuda: G = G.cuda() print(' ... loading FutureGAN`s FutureGenerator from checkpoint: {}'. format(config.model_path)) # load test dataset transform = transforms.Compose([ transforms.Resize(size=(img_size, img_size), interpolation=Image.NEAREST), transforms.ToTensor(), ]) if config.model == 'FutureGAN' or config.model == 'CopyLast': dataset_gt = VideoFolder(video_root=test_data_root, video_ext=config.ext, nframes=nframes, loader=video_loader, transform=transform) dataloader_gt = DataLoader( dataset=dataset_gt, batch_size=config.batch_size, sampler=sampler.SequentialSampler(dataset_gt), num_workers=nworkers) else: dataset_gt = VideoFolder(video_root=test_data_root + '/in_gt', video_ext=config.ext, nframes=nframes, loader=video_loader, transform=transform) dataset_pred = VideoFolder(video_root=test_data_root + '/in_pred', video_ext=config.ext, nframes=nframes, loader=video_loader, transform=transform) dataloader_pred = DataLoader( dataset=dataset_pred, batch_size=config.batch_size, sampler=sampler.SequentialSampler(dataset_pred), num_workers=nworkers) dataloader_gt = DataLoader( dataset=dataset_gt, batch_size=config.batch_size, sampler=sampler.SequentialSampler(dataset_gt), num_workers=nworkers) data_iter_pred = iter(dataloader_pred) test_len = len(dataset_gt) data_iter_gt = iter(dataloader_gt) # save model structure to file if config.model == 'FutureGAN': # count model parameters nparams_g = count_model_params(G) with open( test_dir + '/model_structure_{}x{}.txt'.format(img_size, img_size), 'w') as f: print('--------------------------------------------------', file=f) print('Sequences in test dataset: ', len(dataset_gt), file=f) print('Number of model parameters: ', file=f) print(nparams_g, file=f) print('--------------------------------------------------', file=f) print('Model structure: ', file=f) print(G, file=f) print('--------------------------------------------------', file=f) print( ' ... FutureGAN`s FutureGenerator has been loaded successfully from checkpoint ... ' ) print(' ... saving model struture to {}'.format(f)) # save test configuration with open(test_dir + '/eval_config.txt', 'w') as f: print('------------- test configuration -------------', file=f) for l, m in vars(config).items(): print(('{}: {}').format(l, m), file=f) print(' ... loading test configuration ... ') print(' ... saving test configuration {}'.format(f)) # define tensors if config.model == 'FutureGAN': print(' ... testing FutureGAN ...') if config.deep_pred > 1: print( ' ... recursively predicting {}x{} future frames from {} input frames ...' .format(config.deep_pred, config.nframes_pred, nframes_in)) else: print(' ... predicting {} future frames from {} input frames ...'. format(nframes_pred, nframes_in)) z = Variable( torch.FloatTensor(config.batch_size, config.nc, nframes_in, img_size, img_size)) z_in = Variable( torch.FloatTensor(config.batch_size, config.nc, nframes_in, img_size, img_size)) x_pred = Variable( torch.FloatTensor(config.batch_size, config.nc, nframes_pred, img_size, img_size)) x = Variable( torch.FloatTensor(config.batch_size, config.nc, nframes, img_size, img_size)) x_eval = Variable( torch.FloatTensor(config.batch_size, config.nc, nframes_pred, img_size, img_size)) # define tensors for evaluation if config.metrics is not None: print(' ... evaluating {} ...'.format(model_name)) if 'ms_ssim' in config.metrics and img_size < 32: raise ValueError( 'For calculating `ms_ssim`, your dataset must consist of images at least of size 32x32!' ) metrics_values = {} for metric_name in config.metrics: metrics_values['{}_frames'.format(metric_name)] = torch.zeros_like( torch.FloatTensor(test_len, nframes_pred)) metrics_values['{}_avg'.format(metric_name)] = torch.zeros_like( torch.FloatTensor(test_len, 1)) print(' ... calculating {} ...'.format(metric_name)) # test loop if config.metrics is not None: metrics_i_video = {} for metric_name in config.metrics: metrics_i_video['{}_i_video'.format(metric_name)] = 0 i_save_video = 1 i_save_gif = 1 for step in tqdm(range(len(data_iter_gt))): # input frames x.data = next(data_iter_gt) x_eval.data = x.data[:, :, nframes_in:, :, :] z.data = x.data[:, :, :nframes_in, :, :] if use_cuda: x = x.cuda() x_eval = x_eval.cuda() z = z.cuda() x_pred = x_pred.cuda() # predict video frames # !!! TODO !!! for deep_pred > 1: correctly implemented only if nframes_in == nframes_pred if config.model == 'FutureGAN': z_in.data = z.data for i_deep_pred in range(0, config.deep_pred): x_pred[:z_in.size(0), :, i_deep_pred * config.nframes_pred:(i_deep_pred * config.nframes_pred) + config.nframes_pred, :, :] = G(z_in).detach() z_in.data = x_pred.data[:, :, i_deep_pred * config.nframes_pred: (i_deep_pred * config.nframes_pred) + config.nframes_pred, :, :] elif config.model == 'CopyLast': for i_baseline_frame in range(x_pred.size(2)): x_pred.data[:x.size(0), :, i_baseline_frame, :, :] = x.data[:, :, nframes_in - 1, :, :] else: x_pred.data = next(data_iter_pred)[:x.size(0), :, nframes_in:, :, :] # calculate eval statistics if config.metrics is not None: for metric_name in config.metrics: calculate_metric = getattr(eval_metrics, 'calculate_{}'.format(metric_name)) for i_batch in range(x.size(0)): for i_frame in range(nframes_pred): metrics_values['{}_frames'.format(metric_name)][ metrics_i_video['{}_i_video'.format(metric_name)], i_frame] = calculate_metric( x_pred[i_batch, :, i_frame, :, :], x_eval[i_batch, :, i_frame, :, :]) metrics_values['{}_avg'.format(metric_name)][ metrics_i_video['{}_i_video'.format( metric_name)]] = torch.mean( metrics_values['{}_frames'.format( metric_name)][metrics_i_video[ '{}_i_video'.format(metric_name)]]) metrics_i_video['{}_i_video'.format( metric_name )] = metrics_i_video['{}_i_video'.format(metric_name)] + 1 # save frames if config.save_frames_every is not 0 and config.model == 'FutureGAN': if step % config.save_frames_every == 0 or step == 0: for i_save_batch in range(x.size(0)): if not os.path.exists( sample_dir + '/in_gt/video{:04d}'.format(i_save_video)): os.makedirs(sample_dir + '/in_gt/video{:04d}'.format(i_save_video)) if not os.path.exists( sample_dir + '/in_pred/video{:04d}'.format(i_save_video)): os.makedirs( sample_dir + '/in_pred/video{:04d}'.format(i_save_video)) for i_save_z in range(z.size(2)): save_image_grid( z.data[i_save_batch, :, i_save_z, :, :].unsqueeze(0), sample_dir + '/in_gt/video{:04d}/video{:04d}_frame{:04d}_R{}x{}.png' .format(i_save_video, i_save_video, i_save_z + 1, img_size, img_size), img_size, 1) save_image_grid( z.data[i_save_batch, :, i_save_z, :, :].unsqueeze(0), sample_dir + '/in_pred/video{:04d}/video{:04d}_frame{:04d}_R{}x{}.png' .format(i_save_video, i_save_video, i_save_z + 1, img_size, img_size), img_size, 1) for i_save_x_pred in range(x_pred.size(2)): save_image_grid( x_eval.data[i_save_batch, :, i_save_x_pred, :, :].unsqueeze(0), sample_dir + '/in_gt/video{:04d}/video{:04d}_frame{:04d}_R{}x{}.png' .format(i_save_video, i_save_video, i_save_x_pred + 1 + nframes_in, img_size, img_size), img_size, 1) save_image_grid( x_pred.data[i_save_batch, :, i_save_x_pred, :, :].unsqueeze(0), sample_dir + '/in_pred/video{:04d}/video{:04d}_frame{:04d}_R{}x{}.png' .format(i_save_video, i_save_video, i_save_x_pred + 1 + nframes_in, img_size, img_size), img_size, 1) i_save_video = i_save_video + 1 # save gifs if config.save_gif_every is not 0: if step % config.save_gif_every == 0 or step == 0: for i_save_batch in range(x.size(0)): if not os.path.exists( sample_dir + '/in_gt/video{:04d}'.format(i_save_gif)): os.makedirs(sample_dir + '/in_gt/video{:04d}'.format(i_save_gif)) if not os.path.exists( sample_dir + '/in_pred/video{:04d}'.format(i_save_gif)): os.makedirs(sample_dir + '/in_pred/video{:04d}'.format(i_save_gif)) frames = [] for i_save_z in range(z.size(2)): frames.append( get_image_grid( z.data[i_save_batch, :, i_save_z, :, :].unsqueeze(0), img_size, 1, config.in_border, config.npx_border)) for i_save_x_pred in range(x_pred.size(2)): frames.append( get_image_grid( x_eval.data[i_save_batch, :, i_save_x_pred, :, :].unsqueeze(0), img_size, 1, config.out_border, config.npx_border)) imageio.mimsave( sample_dir + '/in_gt/video{:04d}/video{:04d}_R{}x{}.gif'.format( i_save_gif, i_save_gif, img_size, img_size), frames) frames = [] for i_save_z in range(z.size(2)): frames.append( get_image_grid( z.data[i_save_batch, :, i_save_z, :, :].unsqueeze(0), img_size, 1, config.in_border, config.npx_border)) for i_save_x_pred in range(x_pred.size(2)): frames.append( get_image_grid( x_pred.data[i_save_batch, :, i_save_x_pred, :, :].unsqueeze(0), img_size, 1, config.out_border, config.npx_border)) imageio.mimsave( sample_dir + '/in_pred/video{:04d}/video{:04d}_R{}x{}.gif'.format( i_save_gif, i_save_gif, img_size, img_size), frames) i_save_gif = i_save_gif + 1 if config.save_frames_every is not 0 and config.model == 'FutureGAN': print(' ... saving video frames to dir: {}'.format(sample_dir)) if config.save_gif_every is not 0: print(' ... saving gifs to dir: {}'.format(sample_dir)) # calculate and save mean eval statistics if config.metrics is not None: metrics_mean_values = {} for metric_name in config.metrics: metrics_mean_values['{}_frames'.format(metric_name)] = torch.mean( metrics_values['{}_frames'.format(metric_name)], 0) metrics_mean_values['{}_avg'.format(metric_name)] = torch.mean( metrics_values['{}_avg'.format(metric_name)], 0) torch.save( metrics_mean_values['{}_frames'.format(metric_name)], os.path.join(test_dir, '{}_frames.pt'.format(metric_name))) torch.save(metrics_mean_values['{}_avg'.format(metric_name)], os.path.join(test_dir, '{}_avg.pt'.format(metric_name))) print(' ... saving evaluation statistics to dir: {}'.format(test_dir))
def __init__(self, opt, shared=None): opt['batch_sort'] = False super().__init__(opt, shared) self.use_batch_act = self.bsz > 1 self.num_workers = opt['numworkers'] self.batch_sort = opt.get('pytorch_teacher_batch_sort') and \ 'train' in self.datatype self.batch_cache_type = opt.get('batch_sort_cache_type') self.batch_sort_field = opt.get('batch_sort_field') # One can specify a collate function to use for preparing a batch self.opt = opt.copy() self.is_shared = shared is not None dataset_classes = self.get_dataset_class(opt) self.ordered = ('ordered' in self.datatype or ('stream' in self.datatype and not opt.get('shuffle'))) if self.ordered: # force index for ordered, so that we see every example warn_once('\nNote: You are using PytorchDataTeacher with ordered ' 'examples. Please specify `--shuffle` if you would like ' 'to have examples loaded in randomized order.\n') self.batch_cache_type = 'index' if not shared: BatchSortCache.create() if len(dataset_classes) > 1: datasets = [] for class_name, collate_fn, task_name in dataset_classes: dataset_opt = opt.copy() dataset_opt['pytorch_teacher_task'] = task_name dataset_opt['task'] = task_name datasets.append(class_name(dataset_opt)) self.collate_fn = collate_fn self.id = ','.join([d[2] for d in dataset_classes]) self.dataset = ParlAIConcatDataset(datasets) else: class_name, self.collate_fn, task_name = dataset_classes[0] self.id = task_name self.dataset = class_name(opt) if self.ordered or not self.training: data_sampler = sampler.SequentialSampler(self.dataset) else: data_sampler = sampler.RandomSampler(self.dataset) self.pytorch_dataloader = DataLoader( self.dataset, batch_size=self.bsz, sampler=data_sampler, num_workers=self.num_workers, collate_fn=self.collate_fn, pin_memory=False, drop_last=False, ) self.lastYs = [None] * self.bsz if self.batch_sort: self.loader_process = LoaderProcess(opt) self.loader_process.start() self.data = enumerate(self.pytorch_dataloader) else: self.dataset = shared['dataset'] self.pytorch_dataloader = shared['pytorch_dataloader'] self.lastYs = shared['lastYs'] self.data = shared['data'] self.id = shared['id'] self.num_batches = math.ceil(self.dataset.num_episodes() / self.bsz) self.reset()
def train_sup(self, epoch_lim, data, valid_data, early_stopping_lim, batch_size, num_workers, track_embeddings, validation_rate, loss_weight_base=1, value_weight=0, value_ratio=0): """ Training loop :param epoch_lim: total number of training epochs :param data: training data :param valid_data: validation data :param early_stopping_lim: Number of epochs to run without validation improvement before stopping if None, never stop early :param batch_size: training batch_size :param num_workers: number of CPU workers to use for data loading :param track_embeddings: Save out embedding information at end of run :param validation_rate: Check validation performance every validation_rate training epochs :param loss_weight_base: A constant between 0 and 1 used to interpolate between Single (=0) and Multi (=1) Step forecasting. :param value_weight: A constant multiplier for the real-value loss, set to 0 in the paper :param value_ratio: The proportion of loss used for the MSE loss term (as opposed for the cross-entropy loss), set to 0 in the paper :return loss array, model: """ if early_stopping_lim is None: early_stopping_lim = epoch_lim train_sampler = sampler.RandomSampler(np.arange(len(data))) data_train = DataLoader(data, batch_size=batch_size, sampler=train_sampler, drop_last=True) valid_sampler = sampler.SequentialSampler(np.arange(len(valid_data))) data_valid = DataLoader(valid_data, batch_size=batch_size, sampler=valid_sampler) step = 0 bsf_loss = np.inf epochs_without_improvement = 0 improvements = [] for epoch in range(epoch_lim): if epochs_without_improvement > early_stopping_lim: print('Exceeded early stopping limit, stopping') break if epoch % validation_rate == 0: valid_loss = self.validation(data_valid=data_valid, step=step, data=data, loss_weight_base=loss_weight_base, value_weight=value_weight, value_ratio=value_ratio) (bsf_loss, epochs_without_improvement, improvements) = self.manage_early_stopping(bsf_loss=bsf_loss, early_stopping_lim=early_stopping_lim, epochs_without_improvement=epochs_without_improvement, valid_loss=valid_loss, validation_rate=validation_rate, improvements=improvements) running_train_loss = 0 for inp, out, out_real, lens in tqdm(data_train): loss, y_p = forecast_model.get_loss(inp=inp, out=out, lens=lens, cuda=True, gn=self.model, glucose_dat=data, criterion=self.criterion, base=loss_weight_base, out_real=out_real, value_weight=value_weight, value_ratio=value_ratio) step += 1 running_train_loss += loss.data.cpu().numpy()[0] self.optimizer.zero_grad() loss.backward() self.optimizer.step() running_train_loss = running_train_loss/len(data_train) self.writer.add_scalar(tag='train_loss', scalar_value=running_train_loss, global_step=step) torch.save(self.model.state_dict(), '{}/final_sup.pt'.format(self.model_dir)) if track_embeddings: self.embed(data_valid, step, embed_batch=100) return improvements
def main(): data_dir = sys.argv[1] hero2ix_dir = sys.argv[2] # import DataFrame and hero2ix dictionary heroes_df_dota = pd.read_csv(data_dir, index_col=0) heroes_df_dota = heroes_df_dota.dropna().reset_index(drop=True) with open(hero2ix_dir, 'r') as fp: hero2ix = json.load(fp) print(len(heroes_df_dota)) # train test split split_1 = int(len(heroes_df_dota) * 0.8) split_2 = int(len(heroes_df_dota) * 0.9) heroes_train_dota = heroes_df_dota.iloc[:split_1] heroes_dev_dota = heroes_df_dota.iloc[split_1:split_2] heroes_test_dota = heroes_df_dota.iloc[split_2:] # build dataset generator train_gen = DataFrameIterator(heroes_train_dota, hero2ix) dev_gen = DataFrameIterator(heroes_dev_dota, hero2ix) test_gen = DataFrameIterator(heroes_test_dota, hero2ix) # Use Dataloader class in pytorch to generate batched data batch_size = 16 loader_train = DataLoader(train_gen, batch_size=batch_size, sampler=sampler.RandomSampler(train_gen), num_workers=4) loader_dev = DataLoader(dev_gen, batch_size=batch_size, sampler=sampler.RandomSampler(dev_gen), num_workers=4) loader_test = DataLoader(test_gen, batch_size=batch_size, sampler=sampler.SequentialSampler(test_gen), num_workers=4) # define model, totally three models in hetor2vec.py # model = CBOHBilayer(embedding_dim=20, heropool_size=len(hero2ix)) model = CBOHBilayer(embedding_dim=20, heropool_size=len(hero2ix), hidden_dim=20) # define loss function loss_function = nn.CrossEntropyLoss() # run train losses = train(model=model, dataloader=loader_train, devloader=loader_dev, loss_function=loss_function, init_lr=0.1, epochs=20, lr_decay_epoch=8, print_epoch=2, gpu=True) # check test accuracy print( 'Top3, Top5 and Top 10 accuracy: ', accuracy_in_train(model, dataloader=loader_test, batch_size=batch_size, gpu=False)) # save embeddings as numpy arrays output_dir = './output/hero/hero_embeddings.npy' save_embeddings(model, filename=output_dir) # pickle model pickle_dir = './output/hero/model.p' pickle.dump(obj=model, file=open(pickle_dir, 'wb')) # np.save('loss0',losses[0]) # np.save('loss1',losses[1]) # np.save('loss2',losses[2]) # np.save('loss3',losses[3]) # # plot loss vs epoch plot_loss(losses, './output/hero/loss_hitory.png') # project embeddings to 2d plane plot_embeddings(model, hero2ix)
drop_last = kwargs.get('drop_last', False) if isinstance(sampler, torchsampler.BatchSampler): return sampler if sampler == None: sampler = torchsampler.RandomSampler(self.dataset) elif not isinstance(sampler, torchsampler.Sampler): sampler = torchsampler.RandomSampler(sampler) return torchsampler.BatchSampler(sampler, batch_size, drop_last) # Test this module if __name__ == '__main__': N_TESTS = 4 passed = 0 dataset = Dataset(transform=unrel.TRANSFORM) # Test on a subset sampler batch_sampler = torchsampler.SequentialSampler(range(14)) dataloader = FauxDataLoader(dataset, sampler=batch_sampler) for batch_i, batch in enumerate(dataloader): assert isinstance(batch['image'], list) for image in batch['image']: assert isinstance(image, torch.Tensor) print('dataset count %3d / %3d' % ((1+batch_i) * dataloader.sampler.batch_size, len(dataloader))) passed += 1; print('OK %d/%d' % (passed, N_TESTS)) # Test on a batched subset sampler batch_sampler = torchsampler.BatchSampler(torchsampler.SequentialSampler(range(14)), 3, False) dataloader = FauxDataLoader(dataset, sampler=batch_sampler) for batch_i, batch in enumerate(dataloader): assert isinstance(batch['image'], list) for image in batch['image']: assert isinstance(image, torch.Tensor) print('dataset count %3d / %3d' % ((1+batch_i) * dataloader.sampler.batch_size, len(dataloader)))
def train_template_network(loss='default'): """Obtain CIFAR10-trained template network. Training parameters follow original ResNet paper. Args: loss: Choose from 'default'/'sgm'/'l2' """ # Use training parameters of original ResNet paper split_index = 45000 batch_size = 128 lr = 1e-1 momentum = 0.9 weight_decay = 1e-4 epoch = 180 decay_milestones = [90, 120] decay_factor = 0.1 # SGM/L2 specific parameters aux_loss_wt = 0.02 train_transform = transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, 4), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) test_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) image_datasets = { x: datasets.CIFAR10(root=args.cifar10_dir, train=y, download=True, transform=z) for x, y, z in zip([0, 1], [True, False], [train_transform, test_transform]) } dataloaders = { x: DataLoader(image_datasets[y], batch_size=batch_size, sampler=z, num_workers=args.num_workers, pin_memory=('cpu' not in args.device)) for x, y, z in zip(['train', 'val', 'test'], [0, 0, 1], [ sampler.SubsetRandomSampler(range(split_index)), sampler.SubsetRandomSampler( range(split_index, len(image_datasets[0]))), sampler.SequentialSampler(image_datasets[1]) ]) } dataset_sizes = { 'train': split_index, 'val': len(image_datasets[0]) - split_index, 'test': len(image_datasets[1]) } model = mutil.get_model(args.arch).to(device) if loss == 'default': criterion = torch.nn.CrossEntropyLoss().to(device) elif loss in ('sgm', 'l2'): criterion = GenericLoss(loss, aux_loss_wt, model.linear.out_features) else: raise NameError('{} is not recognized.'.format(loss)) optimizer = torch.optim.SGD(mutil.get_model_trainable_parameters(model), lr=lr, momentum=momentum, weight_decay=weight_decay) scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=decay_milestones, gamma=decay_factor) model, _ = mutil.train_model(model, criterion, optimizer, dataloaders, dataset_sizes, scheduler=scheduler, num_epochs=epoch, device=device) mutil.eval_model(model, dataloaders['test'], dataset_sizes['test'], device=device) return model
def extract_activation(self, data, labels=None, customfunction=None): datapath = None cachekey, cacheddata = get_cached_data(data, self.modelpath, self.layers, \ conditional=self.conditional, classpath=self.classpath, customfunction=customfunction) if cachekey is not None: if isinstance(cacheddata, tuple): return cacheddata[0], cacheddata[1] else: return cacheddata, None self.activations = collections.defaultdict(list) if isinstance(data, str): datapath = data data = np.load(data) elif isinstance(data, np.ndarray): data = data if isinstance(labels, str): labels = np.load(labels) elif isinstance(labels, np.ndarray): labels = labels channel = data.shape[3] # transpose data if data in nhwc format i.e channel is 3 #TODO more elegant to assert that the shape of the data based on the input expected by the model if channel == 3: data = data.transpose(0, 3, 1, 2) torchdata = torch.stack([torch.Tensor(i) for i in data]) dataset = tdata.TensorDataset(torchdata) dataloader = tdata.DataLoader(dataset, batch_size=100, shuffle=False, num_workers=2, \ sampler=sampler.SequentialSampler(dataset)) y_prob = None for idx, inputs in enumerate(dataloader): y_pred = self.modelinstance(inputs[0]) y_prob_ = np.argmax(y_pred.detach().numpy(), axis=1) if y_prob is None: y_prob = y_prob_ else: y_prob = np.concatenate((y_prob, y_prob_), axis=0) # break; activations = { name: torch.cat(outputs, 0) for name, outputs in self.activations.items() } merged_acts = None if (not activations): raise ValueError('Could not extract from specified layer') for _, act in activations.items(): act = act.detach().numpy() # apply transformation on activations as defined by custom function if customfunction is not None and callable(customfunction): _acts = customfunction(act) act = _acts if len(act.shape) == 4: act = np.reshape( act, (act.shape[0], act.shape[1] * act.shape[2] * act.shape[3])) else: act = np.reshape(act, (act.shape[0], act.shape[1])) if merged_acts is None: merged_acts = act else: merged_acts = np.concatenate((merged_acts, act), axis=-1) if self.conditional: if (labels is not None): y_prob = labels y_prob = y_prob.reshape((-1, )) unique_labels = set(y_prob) conditional_f_acts = {} for label in unique_labels: indices = [x == label for x in y_prob] selected_f_acts = merged_acts[indices] conditional_f_acts[label] = selected_f_acts cachekey = write_to_cache(conditional_f_acts, datapath, self.modelpath, self.layers, \ classpath=self.classpath, conditional=self.conditional) return conditional_f_acts, None else: cachekey = write_to_cache((merged_acts, y_prob), datapath, self.modelpath, self.layers, \ classpath=self.classpath, conditional=self.conditional) return merged_acts, y_prob
weight_method=params['weight_method'], create_cache=params['create_cache'], num_channels=1)) dataset = ConcatDataset(dataset) if len(dataset) > 1 else dataset[0] target_type = params['target_type'] if params['target_type'] != 'spatial_bootstrap' else 'psa' val_dataset = WSJ0(folder=params['validation_folder'], length='full', n_fft=params['n_fft'], hop_length=params['hop_length'], output_type=target_type, create_cache=True, #params['create_cache'], num_channels=1) if args.sample_strategy == 'sequential': sample_strategy = sampler.SequentialSampler(dataset) elif args.sample_strategy == 'random': sample_strategy = sampler.RandomSampler(dataset) dataloader = DataLoader(dataset, batch_size=params['batch_size'], num_workers=params['num_workers'], sampler=sample_strategy) dummy_input, _, _, _, _, dummy_one_hot = dataset[0] params['num_attractors'] = dummy_one_hot.shape[-1] params['num_sources'] = params['num_attractors'] params['sample_rate'] = dataset.sr dataset.reorder_sources = args.reorder_sources val_dataset.reorder_sources = args.reorder_sources