Exemple #1
0
def get_clusters(dataset, num_clusters, model_type="resnet50_128", batch_size=64, n_batches=500):
    initcache = os.path.join(ROOT_DIR, 'centroids',
                             model_type + '_' + '_' + str(num_clusters) + '_desc_cen.hdf5')
    model = Net(model_type).to(device)
    batch_sampler = BalanceBatchSampler(dataset=dataset, n_classes=64, n_samples=1,
                                        n_batches_epoch=n_batches)
    data_loader = torch.utils.data.DataLoader(dataset=dataset, batch_sampler=batch_sampler, num_workers=2)
    nDescriptors = batch_size * n_batches
    if not os.path.exists(os.path.join(ROOT_DIR, 'centroids')):
        os.makedirs(os.path.join(ROOT_DIR, 'centroids'))
    with h5py.File(initcache, mode='w') as h5:
        with torch.no_grad():
            model.eval()
            print('====> Extracting Descriptors')
            dbFeat = h5.create_dataset("descriptors",
                                       [nDescriptors, model.encoder_dim],
                                       dtype=np.float32)

            for iteration, (data, target, img_file, class_id) in enumerate(data_loader):
                data = data.to(device)
                idx = iteration * batch_size
                dbFeat[idx:idx + batch_size, :] = F.normalize(model(data), p=2, dim=1).cpu().numpy()

        print('====> Clustering..')
        niter = 100
        kmeans = faiss.Kmeans(model.encoder_dim, num_clusters, niter=niter, verbose=False)
        kmeans.train(dbFeat[...])

        print('====> Storing centroids', kmeans.centroids.shape)
        h5.create_dataset('centroids', data=kmeans.centroids)
        print('====> Done!')
Exemple #2
0
                torchvision.transforms.Resize(256),
                torchvision.transforms.CenterCrop(224),
                torchvision.transforms.ToTensor(),
                torchvision.transforms.Normalize(mean=mean_rgb, std=std_rgb)
            ]))
    elif exp_name == 'vgg2':
        validation_dataset_root = '/nfs/nas4/marzieh/marzieh/VGG_Face2/test/'
        dataset_validation = VGG_Faces2(validation_dataset_root,
                                        split='validation',
                                        upper=upper_vgg)
    #  --------------------------------------------------------------------------------------
    # Batch Sampling: n_samples * n_samples
    #  --------------------------------------------------------------------------------------
    batch_size = n_classes * n_samples
    batch_sampler_t = BalanceBatchSampler(dataset=dataset_train,
                                          n_classes=n_classes,
                                          n_samples=n_samples,
                                          n_batches_epoch=n_batches_train)
    train_loader = torch.utils.data.DataLoader(dataset_train,
                                               batch_sampler=batch_sampler_t,
                                               num_workers=num_workers)

    batch_sampler_v = BalanceBatchSampler(dataset=dataset_validation,
                                          n_classes=n_classes,
                                          n_samples=n_samples,
                                          n_batches_epoch=n_batches_valid)
    validation_loader = torch.utils.data.DataLoader(
        dataset_validation,
        batch_sampler=batch_sampler_v,
        num_workers=num_workers)
    batch_sampler_H0t = BalanceBatchSampler(dataset=dataset_train,
                                            n_classes=n_classes * 2,