def get_clusters(dataset, num_clusters, model_type="resnet50_128", batch_size=64, n_batches=500): initcache = os.path.join(ROOT_DIR, 'centroids', model_type + '_' + '_' + str(num_clusters) + '_desc_cen.hdf5') model = Net(model_type).to(device) batch_sampler = BalanceBatchSampler(dataset=dataset, n_classes=64, n_samples=1, n_batches_epoch=n_batches) data_loader = torch.utils.data.DataLoader(dataset=dataset, batch_sampler=batch_sampler, num_workers=2) nDescriptors = batch_size * n_batches if not os.path.exists(os.path.join(ROOT_DIR, 'centroids')): os.makedirs(os.path.join(ROOT_DIR, 'centroids')) with h5py.File(initcache, mode='w') as h5: with torch.no_grad(): model.eval() print('====> Extracting Descriptors') dbFeat = h5.create_dataset("descriptors", [nDescriptors, model.encoder_dim], dtype=np.float32) for iteration, (data, target, img_file, class_id) in enumerate(data_loader): data = data.to(device) idx = iteration * batch_size dbFeat[idx:idx + batch_size, :] = F.normalize(model(data), p=2, dim=1).cpu().numpy() print('====> Clustering..') niter = 100 kmeans = faiss.Kmeans(model.encoder_dim, num_clusters, niter=niter, verbose=False) kmeans.train(dbFeat[...]) print('====> Storing centroids', kmeans.centroids.shape) h5.create_dataset('centroids', data=kmeans.centroids) print('====> Done!')
torchvision.transforms.Resize(256), torchvision.transforms.CenterCrop(224), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean=mean_rgb, std=std_rgb) ])) elif exp_name == 'vgg2': validation_dataset_root = '/nfs/nas4/marzieh/marzieh/VGG_Face2/test/' dataset_validation = VGG_Faces2(validation_dataset_root, split='validation', upper=upper_vgg) # -------------------------------------------------------------------------------------- # Batch Sampling: n_samples * n_samples # -------------------------------------------------------------------------------------- batch_size = n_classes * n_samples batch_sampler_t = BalanceBatchSampler(dataset=dataset_train, n_classes=n_classes, n_samples=n_samples, n_batches_epoch=n_batches_train) train_loader = torch.utils.data.DataLoader(dataset_train, batch_sampler=batch_sampler_t, num_workers=num_workers) batch_sampler_v = BalanceBatchSampler(dataset=dataset_validation, n_classes=n_classes, n_samples=n_samples, n_batches_epoch=n_batches_valid) validation_loader = torch.utils.data.DataLoader( dataset_validation, batch_sampler=batch_sampler_v, num_workers=num_workers) batch_sampler_H0t = BalanceBatchSampler(dataset=dataset_train, n_classes=n_classes * 2,