def load_dataset(config): """ Loads and returns the training and validation dataset :param config: configs for training/testing :return train_loader: dataloader for training data :return test_loader: dataloader for testing data :return num_of_classes: total classes in training set """ normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) data_transforms = transforms.Compose( [transforms.Resize((224, 224)), transforms.ToTensor(), normalize]) test_dataset = ImageDataset(root_dir=config["TEST_DATA_PATH"], model_save_path=config["MODEL_SAVE_PATH"], testing=True, transform=data_transforms) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=config["TEST_BATCH_SIZE"], shuffle=False, num_workers=config["TEST_WORKERS"], pin_memory=True) return test_loader, test_dataset.get_total_classes()
def __init__(self, network, learning_rate): train_set = ImageDataset('data/2018-01-01-2019-01-01-20-False') self.train_loader = torch.utils.data.DataLoader(dataset=train_set, batch_size=10, shuffle=False, drop_last=True) test_set = ImageDataset('data/2019-01-01-2020-01-01-20-False') self.test_loader = torch.utils.data.DataLoader(dataset=test_set, batch_size=10, shuffle=False, drop_last=True) self.learning_rate = learning_rate self.model = network self.lossfn = nn.BCELoss() #not sure for this self.optimizer = optim.SGD(self.model.parameters(), lr=learning_rate, momentum=0.9)
def get_data(train_df, val_df, config): data_transforms = { 'train': transforms.Compose([ transforms.ToTensor(), transforms.CenterCrop(config['input_size']) ]), 'val': transforms.Compose([ transforms.ToTensor(), transforms.CenterCrop(config['input_size']) ]) } image_datasets = { 'train': ImageDataset(train_df, transforms=data_transforms['train']), 'val': ImageDataset(val_df, transforms=data_transforms['val']) } dataloaders = { 'train': DataLoader(image_datasets['train'], batch_size=config['batch_size'], shuffle=True, num_workers=config['workers']), 'val': DataLoader(image_datasets['val'], batch_size=2 * config['batch_size'], shuffle=False, num_workers=config['workers']) } dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']} return dataloaders, dataset_sizes
def prepare_data(self): train_dataset = ImageDataset( dataset_cfg=self.cfg.dataset, transform=self.transforms["train"] ) val_dataset = ImageDataset( dataset_cfg=self.cfg.dataset, transform=self.transforms["val"] ) # cross validation k_fold = self.cfg.dataset.k_fold val_k = self.cfg.dataset.val_k if k_fold > 0: X, y = train_dataset.get_all_data() skf = StratifiedKFold(n_splits=k_fold, shuffle=False) fold_set = [(train_idx, val_idx) for train_idx, val_idx in skf.split(X, y)] train_dataset = Subset( train_dataset, indices=fold_set[val_k][0]) val_dataset = Subset( val_dataset, indices=fold_set[val_k][1]) self.datasets = { "train": train_dataset, "val": val_dataset, }
def setup_dataset(config, label_a, label_b): train_dir_a = os.path.join(config['dataset']['train']['dirname'], label_a) train_dir_b = os.path.join(config['dataset']['train']['dirname'], label_b) test_dir_a = os.path.join(config['dataset']['test']['dirname'], label_a) test_dir_b = os.path.join(config['dataset']['test']['dirname'], label_b) assert os.path.exists(train_dir_a) and os.path.exists(train_dir_b) assert os.path.exists(test_dir_a) and os.path.exists(test_dir_b) img_size = config['img_size'] train_transform = setup_image_transform(img_size=img_size, **config['trainsform']) test_transform = setup_image_transform(img_size=img_size) train_dataset_a = ImageDataset(train_dir_a, train_transform, config['dataset']['train']['ext']) train_dataset_b = ImageDataset(train_dir_b, train_transform, config['dataset']['train']['ext']) test_dataset_a = ImageDataset(test_dir_a, test_transform, config['dataset']['test']['ext']) test_dataset_b = ImageDataset(test_dir_b, test_transform, config['dataset']['test']['ext']) train_loader_a = DataLoader(train_dataset_a, **config['loader'], shuffle=True) train_loader_b = DataLoader(train_dataset_b, **config['loader'], shuffle=True) test_loader_a = DataLoader(test_dataset_a, **config['loader']) test_loader_b = DataLoader(test_dataset_b, **config['loader']) return { 'train_loader_a': train_loader_a, 'train_loader_b': train_loader_b, 'test_loader_a': test_loader_a, 'test_loader_b': test_loader_b, }
def get_object_confusion(class1, class2, similarity_model, config): model_config = config['model'] benchmark_config = config['benchmark'] model_path = model_config['model_filename'] dataset_path = benchmark_config['dataset_path'] params = { 'dim': model_config['input_shape'], 'batch_size': benchmark_config['batch_size'], 'shuffle': False } test_dataset = ImageDataset(dataset_path, 'validation') test_dataset.prepare_specific(benchmark_config['test_cases'] // 2, class1, class2) test_generator = DataGenerator(test_dataset, **params) preds = np.array([]) gts = np.array([]) for i in tqdm(range(len(test_generator))): batch = test_generator[i] pred = similarity_model.predict_on_batch(batch[0]) preds = np.append(preds, pred.flatten()) gts = np.append(gts, batch[1]) if benchmark_config['vis_output'] and not i % benchmark_config[ 'test_cases'] // (5 * benchmark_config['batch_size']): show_output(batch[0][0], batch[0][1], pred, batch[1]) te_acc = compute_accuracy(preds, gts) print("Class 1: " + class1 + ", Class2: " + class2 + ", Distinguishability Score: " + str(te_acc)) return te_acc
def train(epoch,args): net.train() train_loss = 0 correct = 0 total = 0 batch_idx = 0 ds = ImageDataset(args.dataset,dataset_load,'data/casia_landmark.txt',name=args.net+':train', bs=args.bs,shuffle=True,nthread=6,imagesize=128) while True: img,label = ds.get() if img is None: break inputs = torch.from_numpy(img).float() targets = torch.from_numpy(label[:,0]).long() if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() optimizer.zero_grad() inputs, targets = Variable(inputs), Variable(targets) outputs_1, outputs_2 = net(inputs) outputs = loss = criterion(outputs, targets) lossd = loss.data[0] loss.backward() optimizer.step() train_loss += loss.data[0] outputs = outputs[0] # 0=cos_theta 1=phi_theta _, predicted = torch.max(outputs.data, 1) total += targets.size(0) correct += predicted.eq(targets.data).cpu().sum() printoneline(dt(),'Te=%d Loss=%.4f | AccT=%.4f%% (%d/%d) %.4f %.2f %d' % (epoch,train_loss/(batch_idx+1), 100.0*correct/total, correct, total, lossd, criterion.lamb, criterion.it)) batch_idx += 1 print('')
def __init__(self, server, port, scenario=0, dataset='MNIST'): self.local_model = ClassNet() # self.local_model = CIFARNet() self.host = server self.port = port self.attack = scenario self.evaluator = ImageDataset(dataset) self.train_loader, self.test_loader = self.evaluator.load_data()
def train(epoch,args): featureNet.train() maskNet.train() fcNet.train() train_loss = 0 classification_loss = 0 correct = 0 total = 0 batch_idx = 0 ds = ImageDataset(args.dataset,dataset_load,'data/casia_landmark.txt',name=args.net+':train', bs=args.bs,shuffle=True,nthread=6,imagesize=128) while True: img,label = ds.get() if img is None: break inputs = torch.from_numpy(img).float() targets = torch.from_numpy(label[:,0]).long() if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() optimizerMask.zero_grad() inputs, targets = Variable(inputs), Variable(targets) features = featureNet(inputs) mask = maskNet(features) maskedFeatures = torch.mul(mask, features) outputs = fcNet(maskedFeatures) outputs1 = outputs[0] # 0=cos_theta 1=phi_theta _, predicted = torch.max(outputs1.data, 1) total += targets.size(0) correct += predicted.eq(targets.data).cpu().sum() # training the advNet: lossAdv = criterion(outputs, targets) lossCompact = torch.sum(conv2d(mask, laplacianKernel, stride=1, groups=512)) # lossSize #L1 norm of the mask to make the mask sparse. lossSize = F.l1_loss(mask, target=torch.ones(mask.size()).cuda(), size_average = False) print("advnet:", - criterion2(outputs1, targets).data/10, lossCompact.data/1000000, lossSize.data/10000) loss = - criterion2(outputs1, targets)/10 + lossCompact/1000000 + lossSize/10000 lossd = loss.data loss.backward(retain_graph=True) optimizerMask.step() optimizerFC.zero_grad() lossC = criterion(outputs, targets) lossClassification = lossC.data lossC.backward() optimizerFC.step() classification_loss += lossClassification train_loss += loss.data print("classification loss:", classification_loss / (batch_idx + 1)) printoneline(dt(),'Te=%d Loss=%.4f | AccT=%.4f%% (%d/%d) %.4f %.2f %d\n' % (epoch,train_loss/(batch_idx+1), 100.0*correct/total, correct, total, lossd, criterion.lamb, criterion.it)) batch_idx += 1 # break print('')
def __init__(self, host, port, scenario=None, dataset='MNIST', delta=None): self.global_model = ClassNet() if dataset == 'MNIST' else CIFARNet() self.host = host self.port = port self.evaluator = ImageDataset(dataset) self.test_loader = None self.scenario = scenario self.isStart = False self.delta = delta self.wait_time = 30 self._lock = threading.Lock() self.startup()
def image_output(model, images): """Get predicted output on a single image""" with torch.no_grad(): model.eval() dataset = ImageDataset() for image in images: dataset.append([image, 0, '']) loader = utils.DataLoader(dataset, batch_size=1, num_workers=1, pin_memory=True) for item in loader: image = item[0] output = model(image, output='argmax') print(f'Output: {output}')
def dataloader(train_dir, test_dir, crop_size, batch_size): input_transforms = transforms.Compose([ transforms.Resize((crop_size, crop_size)), transforms.ToTensor(), ]) train_dataset = ImageDataset(train_dir, input_transforms) test_dataset = ImageDataset(test_dir, input_transforms) training_data_loader = DataLoader(dataset=train_dataset, batch_size=batch_size) testing_data_loader = DataLoader(dataset=test_dataset, batch_size=batch_size) return training_data_loader, testing_data_loader
def train_model(model, train): print('>>> Train model ...') dataset = ImageDataset(train['features'], train['labels']) dataloader = DataLoader(dataset, batch_size=4, shuffle=True, num_workers=2) n_epoch = 10 lr = 0.0005 criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=lr) for epoch in range(n_epoch): model.train() running_loss = 0.0 for i, (features, labels) in enumerate(dataloader): optimizer.zero_grad() outputs = model(features) loss = criterion(outputs, labels) loss.backward() optimizer.step() running_loss += loss.item() if i % 2000 == 1999: print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000)) running_loss = 0.0 return model
def evaluate(): args = arg_parser() save_loc = os.path.join(args.save_loc, args.run_id) if not os.path.exists(save_loc): raise AssertionError('No directory named' ' as {} found'.format(save_loc)) network = resnet50(args.num_classes) models = list() # Ensemble of models (a single model can also be passed for testing) for model in args.models: network.load_state_dict(torch.load(os.path.join(save_loc, model))) models.append(network) device = None if args.device == 'cuda' and torch.cuda.is_available(): device = torch.device('cuda') else: device = torch.device('cpu') img_list = parse_data(args.test_data) # NOTE: You may sort the data according to your usecase img_list.sort(key=lambda x: int(x[39:].strip('.jpg'))) # Dataset and Dataloader test_dataset = ImageDataset(img_list) test_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) assigned_labels = test_cls(models, test_loader, args.num_classes, device) # NOTE: You may amend this function for your usecase get_csv(assigned_labels, save_loc)
def main(image_dir, checkpoint_path, coloured_images_dir): test_data = ImageDataset(image_dir) num_images = len(os.listdir(f"{image_dir}/test")) test_dataloader = torch.utils.data.DataLoader(test_data, batch_size=num_images) model = Network() model = model.to(device) if device == torch.device("cpu"): checkpoint = torch.load(checkpoint_path, map_location=torch.device("cpu")) elif device == torch.device("cuda"): checkpoint = torch.load(checkpoint_path) model.load_state_dict(checkpoint["model_state_dict"]) model.eval() img_gray, img_ab, img_inception = iter(test_dataloader).next() img_gray, img_ab, img_inception = img_gray.to(device), img_ab.to( device), img_inception.to(device) with torch.no_grad(): output = model(img_gray, img_inception) for idx in range(num_images): try: _, predicted_image, _ = convert_to_rgb(img_gray[idx].cpu(), output[idx].cpu(), img_ab[idx].cpu()) plt.imsave(arr=predicted_image, fname=f"{coloured_images_dir}/colourized_{idx}.jpg") except IndexError: break
def main(cfg): # data loader print("start DataLoader") print(f"dataset dir: {cfg['datasets']['dir_path']}") dataset = ImageDataset(cfg) data_loader = DataLoader(dataset, batch_size=cfg["dataloader"]["batch_size"], shuffle=False, num_workers=cfg["dataloader"]["num_workers"], collate_fn=dataloader_collate_fn) print(f"There are {len(dataset)} images to be processed") # feature extractor model print("start initialize model") model = build_model(cfg) model.load_param(cfg["model"]["weight"]) model = model.cuda(0) if torch.cuda.device_count() >= 1 else img_batch model.eval() feature_bank = process_featerbank(data_loader, dataset, model, cfg) img_paths = list(feature_bank.keys()) distance_mat = process_distancing(feature_bank, cfg) del feature_bank upload_to_storage(img_paths, cfg) argsorted_distance = process_argsort(distance_mat, cfg) del distance_mat upload_to_database(argsorted_distance, img_paths, cfg) del argsorted_distance print("processing has been completed")
def test_web_imgs(model, features): print('>>> Test web images ...') dataset = ImageDataset(features, np.array([13, 22, 15, 4, 38])) dataloader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=1) model.eval() for i, (features, labels) in enumerate(dataloader): outputs = model(features) print('label', labels) print('outputs', outputs) _, predicted = torch.max(outputs, 1) m = torch.nn.Softmax() softmax = m(outputs) print('softmax', softmax) print('predicted', predicted) sorts = [] for i, output in enumerate(softmax[0]): sorts.append((output.item(), i)) sorts = sorted(sorts, key=lambda x: x[0], reverse=True) print(sorts) for i in range(5): print('%.4f' % sorts[i][0])
def main(): syn_path = './data/Synthetic' real_path = './data/Real' n_batch = 4 num_epochs = 20 learning_rate = 0.005 train_dataset = ImageDataset(syn_path, real_path) train_loader = DataLoader(train_dataset, batch_size=n_batch, shuffle=True, num_workers=12) valid_loader = DataLoader(train_dataset, batch_size=1, shuffle=False, num_workers=12) classifier = ImageClassifier() classifier = classifier.cuda() loss_fn = nn.CrossEntropyLoss() loss_fn = loss_fn.cuda() optimizer = torch.optim.Adam([{ 'params': classifier.classifier.parameters() }], lr=learning_rate, weight_decay=1e-4) for epoch in range(num_epochs): classifier.train() for i, (images, label) in enumerate(train_loader): label = torch.LongTensor(label) images, label = images.cuda(), label.cuda() pred = classifier(images) # print(pred.size(),label.size()) loss = loss_fn(pred, label) print('eopch:', epoch, i + 1, loss.item()) optimizer.zero_grad() loss.backward() optimizer.step() with torch.no_grad(): classifier.eval() correct = 0 total = 0 for i, (images, label) in enumerate(valid_loader): label = torch.LongTensor(label) images, label = images.cuda(), label.cuda() pred = classifier(images) pred = pred.argmax(dim=1, keepdim=True).cpu() print(pred, label) correct += pred.eq(label.view_as(pred).cpu()).cpu().sum() total += pred.size(0) accuracy = float(correct) / total print(accuracy) best_acc = 0 if (accuracy >= best_acc): best_acc = accuracy best_epoch = epoch torch.save(classifier.state_dict(), './classifier.pth')
def __create_data(self): if self.use_transform: im_transform = transforms.Compose([ transforms.Resize(self.input_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) else: im_transform = None im_dataset = {} for i in ['test', 'train', 'validation']: return_all = True if i == 'test' else False im_dataset[i] = ImageDataset(image_path_names=self.data_dict[i], captions_int=self.captions_int, im_addr=self.image_addr, transformer=im_transform, return_all=return_all) im_loader = {} for i in ['test', 'train', 'validation']: im_loader[i] = DataLoader(im_dataset[i], batch_size=self.batch_size, shuffle=self.shuffle, num_workers=self.num_works, drop_last=True) return im_dataset, im_loader
def inference(cfg): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # define model pl_module = (LightningModuleInference.load_from_checkpoint( CHECKPOINT, cfg=cfg).eval().to(device)) # define transform transform = get_transform(cfg.transform.val) # define Dataset and Dataloader dataset_cfg = Dict({"root": TEST_ROOT_PATH}) dataset = ImageDataset(dataset_cfg, transform) dataloader = DataLoader( dataset=dataset, batch_size=cfg.data.dataloader.batch_size // 2, num_workers=cfg.data.dataloader.num_workers, shuffle=cfg.data.dataloader.shuffle, ) output_path = Path(OUTPUT_PATH) output_path.mkdir(parents=True, exist_ok=True) labels, predicts = [], [] for imgs, targets in tqdm(dataloader): input = imgs.to(device, non_blocking=True) outputs = pl_module(input) _, predicted_indexes = torch.max(outputs.data, 1) targets = targets.cpu().numpy().tolist() predict_idx = predicted_indexes.cpu().numpy().tolist() labels.extend(targets) predicts.extend(predict_idx) calc_eval(labels, predicts)
def __init__(self, config): self.rank, self.world_size = 0, 1 if config['dist']: self.rank = dist.get_rank() self.world_size = dist.get_world_size() self.mode = config['dgp_mode'] assert self.mode in [ 'reconstruct', 'colorization', 'SR', 'hybrid', 'inpainting', 'morphing', 'defence', 'jitter' ] if self.rank == 0: # mkdir path if not os.path.exists('{}/images'.format(config['exp_path'])): os.makedirs('{}/images'.format(config['exp_path'])) if not os.path.exists('{}/images_sheet'.format( config['exp_path'])): os.makedirs('{}/images_sheet'.format(config['exp_path'])) if not os.path.exists('{}/logs'.format(config['exp_path'])): os.makedirs('{}/logs'.format(config['exp_path'])) # prepare logger if not config['no_tb']: try: from tensorboardX import SummaryWriter except ImportError: raise Exception("Please switch off \"tensorboard\" " "in your config file if you do not " "want to use it, otherwise install it.") self.tb_logger = SummaryWriter('{}'.format(config['exp_path'])) else: self.tb_logger = None self.logger = utils.create_logger( 'global_logger', '{}/logs/log_train.txt'.format(config['exp_path'])) self.model = models.DGP(config) if self.mode == 'morphing': self.model2 = models.DGP(config) self.model_interp = models.DGP(config) # Data loader train_dataset = ImageDataset( config['root_dir'], config['list_file'], image_size=config['resolution'], normalize=True) sampler = utils.DistributedSampler( train_dataset) if config['dist'] else None self.train_loader = DataLoader( train_dataset, batch_size=1, shuffle=False, sampler=sampler, num_workers=1, pin_memory=False) self.config = config
def load_dataset() -> [DataLoader, DataLoader]: train_datasets = ImageDataset(config.train_image_dir, config.image_size, config.upscale_factor, "train") valid_datasets = ImageDataset(config.valid_image_dir, config.image_size, config.upscale_factor, "valid") train_dataloader = DataLoader(train_datasets, batch_size=config.batch_size, shuffle=True, num_workers=config.num_workers, pin_memory=True, persistent_workers=True) valid_dataloader = DataLoader(valid_datasets, batch_size=config.batch_size, shuffle=False, num_workers=config.num_workers, pin_memory=True, persistent_workers=True) return train_dataloader, valid_dataloader
def train(epoch, args): net.train() train_loss = 0 correct = 0 total = 0 batch_idx = 0 ds = ImageDataset(imageroot=args.dataset, callback=dataset_load, imagelistfile=args.data_list, name=args.net + ':train', batchsize=args.batchsize, shuffle=True, nthread=args.nthread, imagesize=128) batch_num = ds.imagenum // args.batchsize while True: img, label = ds.get() if img is None: break inputs = torch.from_numpy(img).float() targets = torch.from_numpy(label[:, 0]).long() if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() optimizer.zero_grad() inputs, targets = Variable(inputs), Variable(targets) outputs = net(inputs) loss = criterion(outputs, targets) loss.backward() optimizer.step() train_loss += loss.item() outputs = outputs[0] # 0=cos_theta 1=phi_theta _, predicted = torch.max(outputs.data, 1) total += targets.size(0) correct += predicted.eq(targets.data).sum().item() if batch_idx % 10 == 0: print( dt(), 'Epoch=%d batch: %d/%d Loss=%.4f | Acc=%.4f%%' % (epoch, batch_idx, batch_num, train_loss / (batch_idx + 1), correct * 100.0 / total)) batch_idx += 1
def get_dataloader(data_dir, subdir, train=None): full_directory = os.path.join(data_dir, subdir) if os.path.exists(full_directory): dataset = ImageDataset(full_directory, train) return data.DataLoader(dataset, batch_size=16, shuffle=True, num_workers=4) # return data.DataLoader(dataset, batch_size=2, shuffle=False, num_workers=0) return None
def main(): """ Compute MAV for all the training examples """ parser = argparse.ArgumentParser( description='BC learning for image classification') parser.add_argument('--dataset', required=True, choices=['cifar10', 'cifar100']) parser.add_argument('--netType', required=True, choices=['convnet']) parser.add_argument('--data', required=True, help='Path to dataset') parser.add_argument('--save', default='None', help='Directory to save the results') parser.add_argument('--resume', required=True) parser.add_argument('--gpu', type=int, default=0) parser.add_argument('--BC', action='store_true', help='BC learning') parser.add_argument('--plus', action='store_true', help='Use BC+') parser.add_argument('--batchSize', type=int, default=128) parser.add_argument('--seed', type=int, default=1701) parser.add_argument('--val', action='store_true') parser.add_argument('--nb_vals', type=int, default=10000) opt = parser.parse_args() opt.nClasses = 10 model = getattr(models, opt.netType)(opt.nClasses) serializers.load_npz(opt.resume, model) model.to_gpu(opt.gpu) train_images, train_labels = load_dataset(opt) train_data = ImageDataset(train_images, train_labels, opt, train=False) train_iter = chainer.iterators.SerialIterator(train_data, opt.batchSize, repeat=False, shuffle=False) chainer.config.train = False chainer.config.enable_backprop = False scores = [[] for _ in range(opt.nClasses)] for i, batch in enumerate(train_iter): x_array, t_array = chainer.dataset.concat_examples(batch) x = chainer.Variable(cuda.to_gpu(x_array, opt.gpu)) fc6 = cuda.to_cpu(model(x).data) # (B, 10) for score, (x, t) in zip(fc6, batch): if np.argmax(score) == t: scores[t].append(score) # Add channel axis (needed at multi-crop evaluation) scores = [np.array(x)[:, np.newaxis, :] for x in scores] # (N_c, 1, C) * C mavs = np.array([np.mean(x, axis=0) for x in scores]) # (C, 1, C) joblib.dump(scores, os.path.join(opt.save, "train_scores.joblib")) joblib.dump(mavs, os.path.join(opt.save, "mavs.joblib"))
def predict(config): since = time.time() device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") clf_model_path = trained_models_path + "clf.pth" reg_model_path = trained_models_path + "reg.pth" test_transforms = transforms.Compose( [transforms.ToTensor(), transforms.CenterCrop(config['input_size'])]) sub_df = pd.read_csv(sub_df_path) test_dataset = ImageDataset(df=sub_df, transforms=test_transforms, is_train=False) test_loader = DataLoader(test_dataset, batch_size=config['batch_size'] * 2, shuffle=False, num_workers=config['workers']) reg_model = TransferNet(models.resnet18(pretrained=False)) clf_model = TransferNet(models.resnet18(pretrained=False)) reg_model.load_state_dict(torch.load(reg_model_path)) clf_model.load_state_dict(torch.load(clf_model_path)) reg_model.to(device) clf_model.to(device) reg_preds_accum = [] clf_preds_accum = [] for batch in test_loader: image = batch['image'].to(device) with torch.no_grad(): reg_preds = reg_model(image) clf_preds = clf_model(image) reg_preds = reg_preds.cpu().detach().numpy() clf_preds = clf_preds.cpu().detach().numpy() reg_preds_accum.extend(reg_preds) clf_preds_accum.extend(clf_preds) time_elapsed = time.time() - since print('Inference complete {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60)) reg_preds_accum = np.array(reg_preds_accum) clf_preds_accum = torch.tensor(clf_preds_accum) clf_preds_accum = torch.round(torch.sigmoid(clf_preds_accum)).numpy() return clf_preds_accum, reg_preds_accum
def train_loaders(): """Get the train and validation loaders""" train_augment = transforms.Compose([ transforms.ColorJitter(0.5, 0.5, 0.5), transforms.RandomAffine(degrees=10), transforms.GaussianBlur(5), transforms.RandomPerspective(distortion_scale=0.2) ]) train_dataset = ImageDataset(img_dir=dir_train, augment=train_augment) val_dataset = ImageDataset(img_dir=dir_val) train_loader = utils.DataLoader(train_dataset, batch_size=batch_size, shuffle=shuffle_dataset, num_workers=1, pin_memory=True) validation_loader = utils.DataLoader(val_dataset, batch_size=batch_size, shuffle=shuffle_dataset, num_workers=1, pin_memory=True) return train_loader, validation_loader
def run(file, sim_matrix, data_path, annoy_distrance='angular', annoy_num_tree=50, output_path="./results"): lot_dict = dict() import csv reader = list(csv.reader(open(file, 'r'))) cols = reader[0] for row in reader[1:]: lot = dict(zip(cols, row)) lot_dict[lot['id']] = lot ds = ImageDataset(data_path) ann_indices = list(range(len(ds.image_paths))) data_indices = list( map(lambda x: x.split('/')[-1].split('.')[0], ds.image_paths)) ann_to_data = dict(zip(ann_indices, data_indices)) data_to_ann = dict(zip(data_indices, ann_indices)) features = np.load(sim_matrix) ann = construct_ann_index(annoy_distrance, annoy_num_tree, features) arr_results = [] for k in list(data_indices): if k in data_indices: ann_idx = data_to_ann[k] similar_images, distances = ann.get_nns_by_item( ann_idx, 11, include_distances=True) similar_images, distances = similar_images[1:], distances[1:] try: item = dict(artwork=dict(lot_dict[k])) sims = list( map(lambda s: lot_dict[ann_to_data[s]].copy(), similar_images)) for idx in range(len(sims)): sims[idx]['score'] = 1 - float(distances[idx]) item['sims'] = list( sorted(sims, key=lambda x: x['score'], reverse=True)) arr_results.append(item) except: print('this one fails %d' % ann_idx) sim_name = sim_matrix.split('/')[-1] with open('%s/nn-from-%s.json' % (output_path, sim_name), 'w') as outfile: json.dump(arr_results, outfile)
def get_dataset_list(path): ''' :param path: :return: ''' datasets = [] try: dataset_id = 0 for subdir in os.listdir(path): if not subdir.startswith('.'): dataset_path = os.path.join(path, subdir) if subdir.split('_')[0] == 'image': file_list = [] nr_images = 0 for file in os.listdir(dataset_path): if file.endswith(('.JPEG', '.jpg', '.png')): im = Image.open(dataset_path + "/" + file) nr_images += 1 width, height = im.size file_list.append({ "src": file, "width": width, "height": height }) file_list = sorted(file_list, key=itemgetter('src')) datasets.append( ImageDataset(dataset_id, dataset_path, subdir.split('_')[1], file_list, nr_images)) dataset_id = dataset_id + 1 elif subdir.split('_')[0] == 'text': datasets.append( TextDataset(dataset_id, dataset_path, subdir.split('_')[1])) dataset_id = dataset_id + 1 elif subdir == "tcav_concepts": print("found concept directory for tcav") elif subdir == 'current_explanations': print( "found existing directory for explanation images. Images in the directory may be owerwritten." ) else: print( "{0} is not a valid dataset directory".format(subdir)) except Exception as e: print(e) finally: return datasets
def get_data_loaders(input_dir, batch_size=10, num_workers=6, val_size=100, test_size=100): """Load data from the given folder and split into train/validation/test""" data_transforms = get_transformation() train_dataset = ImageDataset(input_dir=input_dir, test_size=test_size, val_size=val_size, transform=data_transforms) val_dataset = ImageDataset(input_dir=input_dir, test_size=test_size, val_size=val_size, transform=data_transforms, val=True) test_dataset = ImageDataset(input_dir=input_dir, test_size=test_size, val_size=val_size, transform=data_transforms, train=False) train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) val_loader = torch.utils.data.DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) return train_loader, val_loader, test_loader