Exemple #1
0
def get_preprocessor(preproc_str, preproc_cachefile):
    if preproc_str:
        if not preproc_cachefile or not os.path.exists(preproc_cachefile):
            preproc = train_preprocessor(preproc_str)
            if preproc_cachefile:
                print("store", preproc_cachefile)
                faiss.write_VectorTransform(preproc, preproc_cachefile)
        else:
            print("load", preproc_cachefile)
            preproc = faiss.read_VectorTransform(preproc_cachefile)
    else:
        preproc = IdentPreproc(dim)
    return preproc
Exemple #2
0
def get_preprocessor():
    if preproc_str:
        if not preproc_cachefile or not os.path.exists(preproc_cachefile):
            preproc = train_preprocessor()
            if preproc_cachefile:
                print "store", preproc_cachefile
                faiss.write_VectorTransform(preproc, preproc_cachefile)
        else:
            print "load", preproc_cachefile
            preproc = faiss.read_VectorTransform(preproc_cachefile)
    else:
        d = xb.shape[1]
        preproc = IdentPreproc(d)
    return preproc
Exemple #3
0
def get_preprocessor():
    if preproc_str:
        if not preproc_cachefile or not os.path.exists(preproc_cachefile):
            preproc = train_preprocessor()
            if preproc_cachefile:
                print "store", preproc_cachefile
                faiss.write_VectorTransform(preproc, preproc_cachefile)
        else:
            print "load", preproc_cachefile
            preproc = faiss.read_VectorTransform(preproc_cachefile)
    else:
        d = xb.shape[1]
        preproc = IdentPreproc(d)
    return preproc
 def zipBinaryTrainingParams(self, preproc, coarse_quantizer, codesIndex):
     faiss.write_index(codesIndex, 'tmp1')
     # faiss.write_ProductQuantizer(coarse_quantizer,'tmp2')
     faiss.write_VectorTransform(preproc, 'tmp3')
     with open('tmp1', 'r+b') as fp:
         bin_index = fp.read()
     # with open('tmp2','r+b') as fp:
     #     bin_coarsequantizer = fp.read()
     with open('tmp3', 'r+b') as fp:
         bin_preproc = fp.read()
     index_length = ("%012d" % len(bin_index)).encode('ascii')
     # quantizer_length   = ("%012d"%len(bin_coarsequantizer)).encode('ascii')
     preproc_length = ("%012d" % len(bin_preproc)).encode('ascii')
     data = index_length + bin_index + preproc_length + bin_preproc
     return data
 def train_preprocessor(self, preproc_str_local, xt_local):
     if not self.preproc_cachefile or not os.path.exists(
             self.preproc_cachefile):
         print("train preproc", preproc_str_local)
         d = xt_local.shape[1]
         t0 = time.time()
         if preproc_str_local.startswith('OPQ'):
             fi = preproc_str_local[3:].split('_')
             m = int(fi[0])
             dout = int(fi[1]) if len(fi) == 2 else d
             preproc = faiss.OPQMatrix(d, m, dout)
         elif preproc_str_local.startswith('PCAR'):
             dout = int(preproc_str_local[4:-1])
             preproc = faiss.PCAMatrix(d, dout, 0, True)
         else:
             assert False
         preproc.train(indexfunctions.sanitize(xt_local[:100000000]))
         print("preproc train done in %.3f s" % (time.time() - t0))
         faiss.write_VectorTransform(preproc, self.preproc_cachefile)
     else:
         print("load preproc ", self.preproc_cachefile)
         preproc = faiss.read_VectorTransform(self.preproc_cachefile)
     return preproc
Exemple #6
0
 def savemat(self, path):
     faiss.write_VectorTransform(self.mat, path)
    def serializeIndex(self, indexFilePath=None, mmapPath=None):
        import mmap
        index_tmp_path = 'tmpIndex_%03d' % self.machine_num
        if indexFilePath is None:
            indexFilePath = index_tmp_path
            faiss.write_index(self.index, index_tmp_path)
        with open(indexFilePath, 'r+b') as fp:
            # bin_index = fp.read()
            bin_index_map = mmap.mmap(fp.fileno(), 0)
        with open(self.emptyIndexPath, 'r+b') as fp:
            # bin_codes = fp.read()
            bin_codes_map = mmap.mmap(fp.fileno(), 0)
        if not os.path.exists(self.preproc_cachefile):
            faiss.write_VectorTransform(self.preproc, self.preproc_cachefile)
        with open(self.preproc_cachefile, 'r+b') as fp:
            # bin_preproc = fp.read()
            bin_preproc_map = mmap.mmap(fp.fileno(), 0)
        if mmapPath is not None:
            print('saving ivf mmap data to binary...')
            with open(mmapPath, 'r+b') as fp:
                bin_ivf_mmap_map = mmap.mmap(fp.fileno(), 0)
                ivf_length = ("%012d" % len(bin_ivf_mmap_map)).encode('ascii')
                ivf_mmap_path = mmapPath.encode('ascii')
                ivf_mmap_path_length = ("%012d" %
                                        len(ivf_mmap_path)).encode('ascii')

        bin_IDtoNameMap = json.dumps(self.IDtoNameMap).encode('ascii')
        index_length = ("%012d" % len(bin_index_map)).encode('ascii')
        codes_length = ("%012d" % len(bin_codes_map)).encode('ascii')
        preproc_length = ("%012d" % len(bin_preproc_map)).encode('ascii')
        map_length = ("%012d" % len(bin_IDtoNameMap)).encode('ascii')
        totalLength = len(index_length) + len(bin_index_map) + len(
            codes_length) + len(bin_codes_map) + len(preproc_length) + len(
                bin_preproc_map) + len(map_length) + len(bin_IDtoNameMap)
        if mmapPath is not None:
            totalLength += len(ivf_length) + len(bin_ivf_mmap_map) + len(
                ivf_mmap_path_length) + len(ivf_mmap_path)
        print('creating final binary file of size ', totalLength / 1024 / 1024,
              ' MB')
        with open('tmp_binary_index.dat', 'wb') as fp:
            #final_index_bin_map = mmap.mmap(fp.fileno(),totalLength)
            #final_index_bin_map.seek(0)
            print('writing binary to mmaped file...')
            fp.write(index_length)
            fp.write(bin_index_map)
            fp.write(codes_length)
            fp.write(bin_codes_map)
            fp.write(preproc_length)
            fp.write(bin_preproc_map)
            fp.write(map_length)
            fp.write(bin_IDtoNameMap)
            if mmapPath is not None:
                print('writing ivf mmpa data')
                print('lenght: ', ivf_mmap_path_length)
                fp.write(ivf_mmap_path_length)
                print('path: ', ivf_mmap_path)
                fp.write(ivf_mmap_path)
                print('length: ', ivf_length)
                fp.write(ivf_length)
                print('map size: ', len(bin_ivf_mmap_map))
                fp.write(bin_ivf_mmap_map)

        print('Memory mapping final index file...')
        with open('tmp_binary_index.dat', 'r+b') as fp:
            final_index_bin_map = mmap.mmap(fp.fileno(), 0)
        print('returning final binary')
        #final_index_bin = index_length + bin_index_map[:] + codes_length + bin_codes_map[:] + preproc_length + bin_preproc_map[:] + map_length + bin_IDtoNameMap
        return final_index_bin_map[:]
            train_subset[subset_i:subset_i+n_features] = features[:n_features]
            #for n_feature in range(n_features):
            #    index_dict[subset_i+n_feature] = int(label)
            subset_i += n_features

    if pca:
        if os.path.exists(INDEX_FILENAME_PCA):
            mat = faiss.read_VectorTransform(INDEX_FILENAME_PCA)
        else:
            mat = faiss.PCAMatrix (FEATURES_NUMBER, PCA_FEATURES)

            print("PCA training... started")
            mat.train(train_subset)
            print("PCA training... finished")
            
            faiss.write_VectorTransform(mat, INDEX_FILENAME_PCA)

    if pca:
        print("PCA transformation... started")
        train_subset = mat.apply_py(train_subset) if pca else train_subset
        print("PCA transformation... finished")

    cpu_index = faiss.IndexFlatL2(PCA_FEATURES if pca else FEATURES_NUMBER) 
    #cpu_index =  faiss.index_factory(PCA_FEATURES if pca else FEATURES_NUMBER, "IVF4096,Flat")
    index = faiss.index_cpu_to_gpu(res, 0, cpu_index, co) if gpu else cpu_index#, co)
    #nlist = 1000
    if train:
        print("Training index... started")
        #quantizer = faiss.IndexFlatL2(FEATURES_NUMBER)  # the other index
        #index = faiss.IndexIVFFlat(quantizer, FEATURES_NUMBER, nlist, faiss.METRIC_L2)
        # faster, uses more memory
Exemple #9
0
def export(args, model, dataloader, dataset):


    # remove head
    model.top_layer = None
    model.classifier = nn.Sequential(*list(model.classifier.children())[:-1])

    # get the features for the whole dataset
    features, idxs, pos1 = dc_main.compute_features(dataloader, model, len(dataset), args)

    idxs = idxs[np.argsort(idxs)]
    features = features[np.argsort(idxs)]
    
    if args.group > 1:
        args.group = args.ep_length - args.traj_length + 1

    # clustering algorithm to use
    deepcluster = clustering.__dict__[args.clustering](args.nmb_cluster, group=args.group)

    # cluster the features
    clustering_loss = deepcluster.cluster(features, verbose=args.verbose)

    centroids = deepcluster.clus.centroids
    
    # centroids = faiss.vector_float_to_array(deepcluster.clus.get_means_and_variances)
    # centroids = centroids.reshape(nmb_cluster, 256)

    # import pdb; pdb.set_trace()
    
    # self_index = faiss.IndexFlatL2(centroids.shape[1])   # build the index
    # self_index.add(centroids)         
    # self_dists = self_index.search(centroids, centroids.shape[0])

    _, (mean, std), _, _ = vis_utils.make_transform(args.data)

    model.features = model.features.module

    c_mean, c_cov, c_var = get_means_and_variances(deepcluster, features, args)
    resume = args.resume if len(args.resume) > 0 else args.exp

    out = {
            'state_dict': model.state_dict(), 'centroids': centroids,
            'pca_path': resume + '.pca',
            'mean': mean, 'std': std,
            # 'cluster_mean': c_mean, 'cluster_cov': c_cov,
            'clus': deepcluster.clus,
            }

    if args.export > 0:
        faiss.write_VectorTransform(deepcluster.mat, resume + '.pca')
        torch.save(out,
            resume + '.clus')
    out['pca'] = deepcluster.mat

    T = args.traj_length


    pos = pos1

    if sum(sum(pos)) == 0:
        meta = torch.load('%s/meta.dict' % args.data)

        pos = np.array(meta['pos'])

        pos_idx = np.arange(pos.shape[0]*pos.shape[1])
        pos_idx = pos_idx.reshape(pos.shape[0], pos.shape[1])[:, T-1:]
        pos_idx = pos_idx.reshape(pos_idx.shape[0] * pos_idx.shape[1])

        pos = pos.reshape(pos.shape[0]*pos.shape[1], pos.shape[2])
    else:
        meta = torch.load('/data3/ajabri/vizdoom/single_env_hard_fixed1/0/meta.dict')

    # import pdb; pdb.set_trace()

    sz = 30

    from scipy.ndimage.filters import gaussian_filter

    def get_obj_masks(objs):
        out = np.zeros((3, sz, sz))
        for o in objs[0]:
            # import pdb; pdb.set_trace()
            x, y = o
            x, y = int((x - x0)/x1 *sz), int((y-y0)/y1 * sz)
            out[:, x:x+1, y:y+1] = 1

        return out        

    def get_mask_from_coord(coord):
        import matplotlib.cm as cm

        x, y, a = coord
        x, y = int(x), int(y)
        out = np.zeros((3, sz, sz))
        out[:, x, y] = cm.jet(a)[:3]

        return out

    # import pdb; pdb.set_trace()

    # sorted_self_dists = np.argsort(self_dists[0][:, 1])[::-1]
    # sorted_self_dists = np.argsort(self_dists[0].sum(axis=-1))[::-1]
    smoother1 = models.mini_models.GaussianSmoothing(3, 5, 5)
    smoother2 = models.mini_models.GaussianSmoothing(3, 7, 5)
    smoother3 = models.mini_models.GaussianSmoothing(3, 7, 7)
    smoother4 = models.mini_models.GaussianSmoothing(3, 9, 7)

    exp_name = args.resume.split('/')[-2] if args.resume != '' else args.exp.split('/')[-1]
    out_root = '%s/%s' % (args.export_path, exp_name)

    # import pdb; pdb.set_trace()
    if not os.path.exists(out_root):
        os.makedirs(out_root)

    table = Table()

    num_show = 8

    sorted_variance = np.argsort(c_var)[::-1]
    sorted_sizes = np.argsort([len(ll) for ll in deepcluster.images_dists])[::-1]

    # import pdb; pdb.set_trace()
    
    for c, clus_idx in enumerate(sorted_sizes):
    # for c, clus_idx in enumerate(sorted_variance):
    # for c, clus_idx in enumerate(sorted_self_dists):

        l = deepcluster.images_dists[clus_idx]

        if len(l) == 0:
            continue

        ll = [ii[0] for ii in sorted(l, key=lambda x: x[1])[::-1]][:num_show//2]
        ll += [ii[0] for ii in random.sample(l, min(num_show//2, len(l)))]

        l = [ii[0] for ii in l]

        row = TableRow(rno=c)

        e = Element()
        e.addTxt('size: %s <br>variance: %s' % (len(deepcluster.images_dists[clus_idx]), c_var[clus_idx]))
        row.addElement(e)

        # import pdb; pdb.set_trace()

        ## MAP
        poo = []
        for t in range(T):
            poo += [pos[np.array(l) - t]]
        
        posum = env.make_pose_map(np.concatenate(poo), meta['objs'][0], sz=sz)

        # posum *= 255.0
        # vis.image((posum*255.).astype(np.uint8), opts=dict(width=300, height=300))
        # vis.image(gaussian_filter((posum*255.), sigma=1).astype(np.uint8), opts=dict(width=300, height=300))

        # gifname = '%s/%s_%s.png' % (exp_name, c, 'map')
        gifname = '%s_%s.png' % (c, 'map')
        gifpath = '%s/%s' % (out_root, gifname)

        imageio.imwrite(gifpath,
            cv2.resize((posum*255.).astype(np.uint8).transpose(1, 2, 0), 
                (0,0), fx=5, fy=5, interpolation = cv2.INTER_AREA))

        e = Element()
        e.addImg(gifname, width=180)
        row.addElement(e)

        ## EXEMPLARS
        for iii, i in enumerate(ll):
            # import pdb; pdb.set_trace()
            imgs = vis_utils.unnormalize_batch(dataset[i][0], mean, std)
            # vis.images(imgs, opts=dict(title=f"{c} of length {len(l)}"))
            # vis.images(smoother1(torch.Tensor(imgs)).numpy(), opts=dict(title=f"{c} of length {len(l)}"))
            # vis.images(smoother2(torch.Tensor(imgs)).numpy(), opts=dict(title=f"{c} of length {len(l)}"))
            # vis.images(smoother3(torch.Tensor(imgs)).numpy(),  opts=dict(title=f"{c} of length {len(l)}"))
            # vis.images(smoother4(torch.Tensor(imgs)).numpy(), opts=dict(title=f"{c} of length {len(l)}"))

            # gifname = '%s/%s_%s.gif' % (exp_name, c, i)
            gifname = '%s_%s.gif' % (c, i)
            gifpath = '%s/%s' % (out_root, gifname)

            vis_utils.make_gif_from_tensor(imgs.astype(np.uint8), gifpath)
            e = Element()
            if iii < num_show // 2:
                e.addTxt('rank %i<br>' % iii)
            else:
                e.addTxt('random<br>')

            e.addImg(gifname, width=128)
            row.addElement(e)


        ## EXEMPLARS
        # import visdom
        # vis =  visdom.Visdom(port=8095, env='main', use_incoming_socket=False)
        # vis.images(vis_utils.unnormalize_batch(
        #     np.stack([dataset[iii][0][0] for iii in range(-100, -50)]), mean, std
        #     ))
        # import pdb; pdb.set_trace()

        # gl = np.array(l).reshape(-1, args.group)
        # if args.group > 10:
        #     exemplars = gl[random.sample(list(range(gl.shape[0])), 4)]
        # else:
        #     exemplars = gl[random.sample(list(range(gl.shape[0])), 10)]

        # for iii, i in enumerate(exemplars):
        #     # import pdb; pdb.set_trace()
        #     # imgs = vis_utils.unnormalize_batch(dataset[i][0], mean, std)
        #     imgs = np.stack([dataset[_idx][0][0] for _idx in i])
        #     imgs = vis_utils.unnormalize_batch(imgs, mean, std)
        #     # import pdb; pdb.set_trace()
    
        #     # imgs = vis_utils.unnormalize_batch(dataset[i][0], mean, std)


        #     # vis.images(imgs, opts=dict(title=f"{c} of length {len(l)}"))
        #     # vis.images(smoother1(torch.Tensor(imgs)).numpy(), opts=dict(title=f"{c} of length {len(l)}"))
        #     # vis.images(smoother2(torch.Tensor(imgs)).numpy(), opts=dict(title=f"{c} of length {len(l)}"))
        #     # vis.images(smoother3(torch.Tensor(imgs)).numpy(),  opts=dict(title=f"{c} of length {len(l)}"))
        #     # vis.images(smoother4(torch.Tensor(imgs)).numpy(), opts=dict(title=f"{c} of length {len(l)}"))

        #     # gifname = '%s/%s_%s.gif' % (exp_name, c, i)
        #     gifname = '%s_%s.gif' % (c, i[0])
        #     gifpath = '%s/%s' % (out_root, gifname)

        #     vis_utils.make_gif_from_tensor(imgs.astype(np.uint8), gifpath)
        #     e = Element()
        #     if iii < num_show // 2:
        #         e.addTxt('rank %i<br>' % iii)
        #     else:
        #         e.addTxt('random<br>')

        #     e.addImg(gifname, width=128)
        #     row.addElement(e)

        table.addRow(row)

    tw = TableWriter(table, '%s/%s' % (args.export_path, exp_name), rowsPerPage=min(args.nmb_cluster,100))
    tw.write()

    # import pdb; pdb.set_trace()

    return out
def store_styleft():
    vgg_ = vgg16().cuda()
    for param in vgg_.parameters():
        param.requires_grad = False
    vgg_.top_layer = None
    encoder_path = '/home/visiting/Projects/levishery/checkpoint.pth.tar'
    data_path = '/home/visiting/datasets/crop_vangogh_original'
    if os.path.isfile(encoder_path):
        print("=> loading encoder '{}'".format(encoder_path))
        checkpoint = torch.load(encoder_path)
        # remove top_layer and classifier parameters from checkpoint
        for key in list(checkpoint['state_dict']):
            if 'top_layer' in key:
                del checkpoint['state_dict'][key]
        from collections import OrderedDict
        new_state_dict = OrderedDict()
        for k, v in checkpoint['state_dict'].items():
            if 'feature' in k:
                name = k[:8] + k[15:]  # remove `module.`
                new_state_dict[name] = v
            else:
                new_state_dict[k] = v
        vgg_.load_state_dict(new_state_dict)
        print("=> loaded checkpoint '{}' (epoch {})".format(
            encoder_path, checkpoint['epoch']))
    else:
        print("=> no checkpoint found at '{}'".format(encoder_path))

    # preprocessing of data
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    tra = [transforms.Resize(240), transforms.ToTensor(), normalize]

    # load the data
    end = time.time()
    dataset = datasets.ImageFolder(data_path,
                                   transform=transforms.Compose(tra))
    print('Load dataset: {0:.2f} s'.format(time.time() - end))
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=16,
                                             num_workers=4)

    for i, (input_tensor, _) in enumerate(dataloader):
        input_var = torch.autograd.Variable(input_tensor.cuda(), volatile=True)
        features = vgg_.features(input_var)
        PATCH_NUM = 10
        features = features.unfold(2, 3, 3).unfold(3, 3, 3)
        features = features.permute(0, 2, 3, 1, 4, 5)
        x = features.reshape(features.size(0) * PATCH_NUM * PATCH_NUM, -1)
        x = vgg_.classifier(x).cpu().numpy()
        features = features.cpu().numpy()
        if i == 0:
            store_features = np.zeros(
                (len(dataset.imgs), features.shape[1], features.shape[2],
                 features.shape[3], features.shape[4],
                 features.shape[5])).astype('float32')
            store_linear = np.zeros((len(dataset.imgs) * PATCH_NUM * PATCH_NUM,
                                     x.shape[1])).astype('float32')
        if i < len(dataloader) - 1:
            store_features[i * 16:(i + 1) * 16] = features.astype('float32')
            store_linear[i * 16 * PATCH_NUM * PATCH_NUM:(i + 1) * 16 *
                         PATCH_NUM * PATCH_NUM] = x.astype('float32')
        else:
            # special treatment for final batch
            store_features[i * 16:] = features.astype('float32')
            store_linear[i * 16 * PATCH_NUM * PATCH_NUM:] = x.astype('float32')

    small_ft, pca = index_features(store_linear)
    faiss.write_VectorTransform(pca, "vangogh.pca")
    small_ft = small_ft.tolist()
    store_features = store_features.tolist()

    file_name = '/home/visiting/Projects/levishery/reconstruction/vangogh_index.json'
    print('start writing index')
    with open(file_name, 'w') as file_object:
        json.dump(small_ft, file_object)

    file_name = '/home/visiting/Projects/levishery/reconstruction/vangogh_features.json'
    print('start writing features')
    with open(file_name, 'w') as file_object:
        json.dump(store_features, file_object)

fv3_dir = os.getenv('DDIR') + '/features/'

if 'train' in todo:

    f = h5py.File(fv3_dir + 'f100m/block0.hdf5', 'r')

    count = f['count'][0]
    labels = f['all_labels'][:count]
    features = f['all_feats'][:count]

    pca = faiss.PCAMatrix(2048, 256, 0, True)

    pca.train(features)
    faiss.write_VectorTransform(pca, fv3_dir + 'PCAR256.vt')

if 'apply' in todo:
    pca = faiss.read_VectorTransform(fv3_dir + 'PCAR256.vt')

    def load_block(i):
        f = h5py.File(fv3_dir + 'f100m/block%d.hdf5' % i, 'r')
        count = f['count'][0]
        # labels = f['all_labels'][:count]
        features = f['all_feats'][:count]
        return features

    # one read thread, one PCA computation thread, and main thread writes result.
    src = rate_limited_imap(load_block, range(100))
    src2 = rate_limited_imap(pca.apply_py, src)
    f = open(fv3_dir + '/concatenated_PCAR256.raw', 'w')
Exemple #12
0
def store_styleft():
    global args
    args = parser.parse_args()

    # fix random seeds
    # torch.manual_seed(args.seed)
    # torch.cuda.manual_seed_all(args.seed)
    # np.random.seed(args.seed)

    # CNN
    if args.verbose:
        print('Architecture: {}'.format(args.arch))
    model = models.__dict__[args.arch](sobel=args.sobel)
    fd = int(model.top_layer.weight.size()[1])
    model.top_layer = None
    model.features = torch.nn.DataParallel(model.features)
    model.cuda()
    cudnn.benchmark = True

    # create optimizer
    optimizer = torch.optim.SGD(
        filter(lambda x: x.requires_grad, model.parameters()),
        lr=args.lr,
        momentum=args.momentum,
        weight_decay=10**args.wd,
    )

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            # remove top_layer parameters from checkpoint
            for key in checkpoint['state_dict']:
                if 'top_layer' in key:
                    del checkpoint['state_dict'][key]
            model.load_state_dict(checkpoint['state_dict'])
            #            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    # creating checkpoint repo
    exp_check = os.path.join(args.exp, 'checkpoints')
    if not os.path.isdir(exp_check):
        os.makedirs(exp_check)

    # preprocessing of data
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    tra = [transforms.Resize(224), transforms.ToTensor(), normalize]

    # load the data
    end = time.time()
    dataset = datasets.ImageFolder(args.data,
                                   transform=transforms.Compose(tra))
    if args.verbose: print('Load dataset: {0:.2f} s'.format(time.time() - end))
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=args.batch,
                                             num_workers=args.workers)

    # remove head
    model.top_layer = None
    model.classifier = nn.Sequential(*list(model.classifier.children())[:-1])

    # get the features and dominate angles for the whole dataset
    if args.patch_rotate:
        angles = []
        for file in dataset.imgs:
            filepath = file[0]
            img = cv2.imread(filepath, cv2.IMREAD_GRAYSCALE)
            img = cv2.resize(img, (16, 16))
            angles.append(calc_phase(img))
    features = compute_features(dataloader, model, len(dataset))
    small_ft, pca = preprocess_features(features)
    faiss.write_VectorTransform(pca, "gauguin-all.pca")
    #mat = faiss.read_VectorTransform("PCA_128.pca")
    #print(mat)
    small_ft = small_ft.tolist()

    file_name = '/home/visiting/Projects/levishery/deep_cluster/gauguin_all.json'
    with open(file_name, 'w') as file_object:
        json.dump(small_ft, file_object)
    if args.patch_rotate:
        file_name = '/home/visiting/Projects/levishery/deep_cluster/angle_128.json'
        with open(file_name, 'w') as file_object:
            json.dump(angles, file_object)