예제 #1
0
def swap_feature_test(classifier, store_feature, store_index, pca_mat):
    PATCH_NUM = 10
    PATCH_SIZE = 3
    store_index = np.asarray(store_index).astype('float32')
    store_feature = np.asarray(store_feature).astype('float32')
    mat = faiss.read_VectorTransform(pca_mat)
    query = store_feature[1, :, :, :, :, :]
    query = classifier(
        torch.tensor(query.reshape(PATCH_NUM * PATCH_NUM,
                                   -1)).cuda()).cpu().numpy()
    query = mat.apply_py(query)
    index = faiss.IndexFlatL2(64)  # build the index
    print(index.is_trained)
    index.add(store_index)  # add vectors to the index
    print(index.ntotal)
    D, I = index.search(query, 1)
    print(I)
    print(D)
    # D, I = index.search(store_index[:5], 10)
    # print(I)
    # print(D)
    store_feature = torch.from_numpy(store_feature)
    transfered = torch.zeros(
        (1, 512, PATCH_SIZE * PATCH_NUM, PATCH_SIZE * PATCH_NUM))
    num = 0
    for item in I:
        transfered[:, :,
                   num // 10 * PATCH_SIZE:num // 10 * PATCH_SIZE + PATCH_SIZE,
                   num % 10 * PATCH_SIZE:num % 10 * PATCH_SIZE +
                   PATCH_SIZE] = store_feature[item // 100,
                                               (item - item // 100 * 100) //
                                               10, item % 10, :, :, :]
        num += 1
    return transfered.cuda()
def deserializeIndex(self, indexFileResource, id=None):
    # the joined index file contains populated_index,empty_trained_index,preproc,IDMap, and (possible) merged IVF file in that order
    bytearrays = fileutil.splitMultiFileByteArray(indexFileResource._data, 12,
                                                  4)
    tmppath = 'tmp'
    mv = memoryview(indexFileResource._data)
    if id is not None:
        tmppath += '_' + str(id)
    all_tmp_paths = []
    count = 0
    for bytearray in bytearrays:
        p = tmppath + str(count) + '.dat'
        with open(p, 'wb') as fp:
            fp.write(mv[bytearray[0]:bytearray[1]])
        count += 1
        all_tmp_paths.append(p)
    index = faiss.read_index(all_tmp_paths[0], faiss.IO_FLAG_MMAP)
    emptyIndex = faiss.read_index(all_tmp_paths[1])
    preproc = faiss.read_VectorTransform(all_tmp_paths[2])
    IDToName = json.loads(
        bytes(mv[bytearrays[3][0]:bytearrays[3][1]]).decode('ascii'))
    print('index size: ', index.ntotal)
    print('map size:', len(IDToName))
    del bytearrays
    del indexFileResource
    print('initializing index...')
    print('index size: ', self.index.ntotal)
    self.isDeserialized = True
    return (index, emptyIndex, preproc, IDToName, all_tmp_paths)
예제 #3
0
def load_traintest(nl, class_set=1, seed=1234,
                   include_base_class=True,
                   pca256=False, nnonlabeled=0):

    # imagenet validation images    
    Xte, Yte, eval_classes_subset = load_bharath_fv3(
        class_set, include_base_class, 'val')
    # imagenet train images
    Xtr, Ytr, eval_classes_subset = load_bharath_fv3(
        class_set, include_base_class, 'train')

    Yte = BharathEval(Yte, eval_classes_subset)

    # reduce labels to consecutive numbers that start at 0
    label_map = np.cumsum(np.bincount(Ytr).astype(bool)) - 1
    Ytr = label_map[Ytr]
    Yte.remap_labels(label_map)
    eval_classes_subset = label_map[eval_classes_subset]
    
    nclasses = Ytr.max() + 1

    print("selecting images, seed=%d" % seed)
    rs = np.random.RandomState(seed)

    perm1 = []
    perm0 = []
    base = []
    for cl in range(nclasses):
        imnos = (Ytr == cl).nonzero()[0]
        if cl in eval_classes_subset:
            rs.shuffle(imnos)
            perm1.append(imnos[:nl])
            perm0.append(imnos[nl:])
        else:
            base.append(imnos)

    if nnonlabeled == 0:
        perm = np.hstack(base + perm1)
    else:
        perm = np.hstack(base + perm1 + perm0)
        nnonlabeled = np.hstack(perm0).size

    Ytr = Ytr[perm]
    if nnonlabeled != 0:
        Ytr[-nnonlabeled:] = -1
    Xtr = Xtr[perm]

    if pca256:

        pca_fname = fv3_dir + 'PCAR256.vt'
        print("load", pca_fname)
        pcar = faiss.read_VectorTransform(pca_fname)
        Xtr = pcar.apply_py(Xtr)
        Xte = pcar.apply_py(Xte)

    return Xtr, Ytr, Xte, Yte
예제 #4
0
def get_preprocessor(preproc_str, preproc_cachefile):
    if preproc_str:
        if not preproc_cachefile or not os.path.exists(preproc_cachefile):
            preproc = train_preprocessor(preproc_str)
            if preproc_cachefile:
                print("store", preproc_cachefile)
                faiss.write_VectorTransform(preproc, preproc_cachefile)
        else:
            print("load", preproc_cachefile)
            preproc = faiss.read_VectorTransform(preproc_cachefile)
    else:
        preproc = IdentPreproc(dim)
    return preproc
예제 #5
0
def get_preprocessor():
    if preproc_str:
        if not preproc_cachefile or not os.path.exists(preproc_cachefile):
            preproc = train_preprocessor()
            if preproc_cachefile:
                print "store", preproc_cachefile
                faiss.write_VectorTransform(preproc, preproc_cachefile)
        else:
            print "load", preproc_cachefile
            preproc = faiss.read_VectorTransform(preproc_cachefile)
    else:
        d = xb.shape[1]
        preproc = IdentPreproc(d)
    return preproc
예제 #6
0
def get_preprocessor():
    if preproc_str:
        if not preproc_cachefile or not os.path.exists(preproc_cachefile):
            preproc = train_preprocessor()
            if preproc_cachefile:
                print "store", preproc_cachefile
                faiss.write_VectorTransform(preproc, preproc_cachefile)
        else:
            print "load", preproc_cachefile
            preproc = faiss.read_VectorTransform(preproc_cachefile)
    else:
        d = xb.shape[1]
        preproc = IdentPreproc(d)
    return preproc
예제 #7
0
    def deserializeIndex(self, indexFileResource, id=None):
        # the joined index file contains populated_index,empty_trained_index,preproc,IDMap, (possible) IVF file, in that order
        bytearrays = fileutil.splitMultiFileByteArray(indexFileResource._data,
                                                      12, 6)
        tmppath = 'tmp'
        mv = memoryview(indexFileResource._data)
        if id is not None:
            tmppath += '_' + str(id)
        all_tmp_paths = []
        mmap_path = None
        count = 0
        for bytearray in bytearrays:
            if count == 4:
                # This is the path to where the ivf mmap file must be stored
                mmap_path = mv[bytearray[0]:bytearray[1]].tobytes().decode(
                    'ascii')
                print('making directory ', os.path.dirname(mmap_path),
                      ' to store ivf data')
                if not os.path.exists(os.path.dirname(mmap_path)):
                    os.makedirs(os.path.dirname(mmap_path))
            p = tmppath + str(count) + '.dat'
            if count == 5:
                p = mmap_path
            with open(p, 'wb') as fp:
                fp.write(mv[bytearray[0]:bytearray[1]])
            count += 1
            all_tmp_paths.append(p)

        self.preproc = faiss.read_VectorTransform(all_tmp_paths[2])
        self.IDToImage = json.loads(
            mv[bytearrays[3][0]:bytearrays[3][1]].tobytes().decode('ascii'))
        #print(mv[bytearrays[3][0]:bytearrays[3][1]].tobytes())
        del bytearrays
        del indexFileResource
        print('initializing index...')
        self.index = faiss.read_index(all_tmp_paths[0], faiss.IO_FLAG_MMAP)

        print('index size: ', self.index.ntotal)
        print('map size:', len(self.IDToImage))
        self.isDeserialized = True
        return (self.index, self.preproc, self.IDToImage)
def loadIndexParameters(indexParametersResource):
    if indexParametersResource is not None:
        indexParameterData = indexParametersResource._data  #Takes 150 Mb of memory
        bytearrays = fileutil.splitMultiFileByteArray(
            indexParameterData, 12, 2)  #Takes another 150 Mb of memory
        tmppath = 'tmp'
        mv = memoryview(indexParameterData)
        all_tmp_paths = []
        count = 0
        for bytearray in bytearrays:
            p = tmppath + str(count) + '.dat'
            with open(p, 'wb') as fp:
                fp.write(mv[bytearray[0]:bytearray[1]])
            count += 1
            all_tmp_paths.append(p)
        index = faiss.read_index(all_tmp_paths[0])  #WHY 12.5 GB?!!?!
        emptyIndexPath = all_tmp_paths[0]
        preproc = faiss.read_VectorTransform(all_tmp_paths[1])
        return (index, preproc, emptyIndexPath)
    else:
        indexParameterData = None
        return (None, None, None)
 def train_preprocessor(self, preproc_str_local, xt_local):
     if not self.preproc_cachefile or not os.path.exists(
             self.preproc_cachefile):
         print("train preproc", preproc_str_local)
         d = xt_local.shape[1]
         t0 = time.time()
         if preproc_str_local.startswith('OPQ'):
             fi = preproc_str_local[3:].split('_')
             m = int(fi[0])
             dout = int(fi[1]) if len(fi) == 2 else d
             preproc = faiss.OPQMatrix(d, m, dout)
         elif preproc_str_local.startswith('PCAR'):
             dout = int(preproc_str_local[4:-1])
             preproc = faiss.PCAMatrix(d, dout, 0, True)
         else:
             assert False
         preproc.train(indexfunctions.sanitize(xt_local[:100000000]))
         print("preproc train done in %.3f s" % (time.time() - t0))
         faiss.write_VectorTransform(preproc, self.preproc_cachefile)
     else:
         print("load preproc ", self.preproc_cachefile)
         preproc = faiss.read_VectorTransform(self.preproc_cachefile)
     return preproc
예제 #10
0
 def __init__(self, encoder, pca_model_path):
     self.encoder = encoder
     self.pca_mat = faiss.read_VectorTransform(pca_model_path)
예제 #11
0
 def loadmat(self, path):
     self.mat = faiss.read_VectorTransform(path)
예제 #12
0
                name = k[:8] + k[15:]  # remove `module.`
                new_state_dict[name] = v
            else:
                new_state_dict[k] = v
        vgg_.load_state_dict(new_state_dict)
        print("=> loaded checkpoint '{}' (epoch {})".format(
            opt.encoder, checkpoint['epoch']))
    else:
        print("=> no checkpoint found at '{}'".format(opt.encoder))

    with open(opt.store_feature, 'r') as file_object:
        store_features = json.load(file_object)
    with open(opt.store_index, 'r') as file_object:
        store_index = json.load(file_object)

    mat = faiss.read_VectorTransform(opt.store_pca)
    store_index = np.asarray(store_index).astype('float32')
    index = faiss.IndexFlatL2(64)  # build the index
    print(index.is_trained)
    index.add(store_index)  # add vectors to the index
    store_feature = np.asarray(store_features)
    store_feature = torch.from_numpy(store_feature)

    for file in os.listdir(opt.dataroot):
        file_path = opt.dataroot + file
        img_tensor = image_loader(file_path)
        content_feature = vgg_.features(img_tensor)
        PATCH_NUM = 10
        x = content_feature.unfold(2, 3, 3).unfold(3, 3, 3)
        x = x.permute(0, 2, 3, 1, 4, 5)
        x = x.reshape(x.size(0) * PATCH_NUM * PATCH_NUM, -1)
INDEX_FILENAME     = INDEX_FILENAME_PRE + '.index'
INDEX_FILENAME_PK  = INDEX_FILENAME_PRE + '.pk'
INDEX_FILENAME_PCA = INDEX_FILENAME_PRE + '.pca' + str(args.pca)

res = faiss.StandardGpuResources()  # use a single GPU
co = faiss.GpuClonerOptions()
# here we are using a 64-byte PQ, so we must set the lookup tables to
# 16 bit float (this is due to the limited temporary memory).
if args.float16: co.useFloat16 = True

if os.path.exists(INDEX_FILENAME):
    cpu_index = faiss.read_index(INDEX_FILENAME)
    index = faiss.index_cpu_to_gpu(res, 0, cpu_index, co) if gpu else cpu_index

    if pca:
        mat = faiss.read_VectorTransform(INDEX_FILENAME_PCA) # todo calculate it if not there

    with open(INDEX_FILENAME_PK, 'rb') as fp:
        index_dict = pickle.load(fp)
else:
    files = sorted(glob.glob(FEATURES_NPY))
    index_dict = { }
    label_features = { }
    i = 0
    n_train_subset = 0
    for file_name in tqdm(files):
        label = file_name.split('/')[-1].split('.')[0]
        if len(label) == 16:
            continue
        features = np.load(file_name)
        assert features.shape[1] == FEATURES_NUMBER
if 'train' in todo:

    f = h5py.File(fv3_dir + 'f100m/block0.hdf5', 'r')

    count = f['count'][0]
    labels = f['all_labels'][:count]
    features = f['all_feats'][:count]

    pca = faiss.PCAMatrix(2048, 256, 0, True)

    pca.train(features)
    faiss.write_VectorTransform(pca, fv3_dir + 'PCAR256.vt')

if 'apply' in todo:
    pca = faiss.read_VectorTransform(fv3_dir + 'PCAR256.vt')

    def load_block(i):
        f = h5py.File(fv3_dir + 'f100m/block%d.hdf5' % i, 'r')
        count = f['count'][0]
        # labels = f['all_labels'][:count]
        features = f['all_feats'][:count]
        return features

    # one read thread, one PCA computation thread, and main thread writes result.
    src = rate_limited_imap(load_block, range(100))
    src2 = rate_limited_imap(pca.apply_py, src)
    f = open(fv3_dir + '/concatenated_PCAR256.raw', 'w')

    i = 0
    for x in src2:
예제 #15
0
def main():
    global args
    args = parser.parse_args()

    # fix random seeds
    # torch.manual_seed(args.seed)
    # torch.cuda.manual_seed_all(args.seed)
    # np.random.seed(args.seed)

    # CNN
    if args.verbose:
        print('Architecture: {}'.format(args.arch))
    model = models.__dict__[args.arch](sobel=args.sobel)
    fd = int(model.top_layer.weight.size()[1])
    model.top_layer = None
    model.features = torch.nn.DataParallel(model.features)
    model.cuda()
    cudnn.benchmark = True

    # create optimizer
    optimizer = torch.optim.SGD(
        filter(lambda x: x.requires_grad, model.parameters()),
        lr=args.lr,
        momentum=args.momentum,
        weight_decay=10**args.wd,
    )

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            # remove top_layer parameters from checkpoint
            for key in checkpoint['state_dict']:
                if 'top_layer' in key:
                    del checkpoint['state_dict'][key]
            model.load_state_dict(checkpoint['state_dict'])
            # optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    # creating checkpoint repo
    exp_check = os.path.join(args.exp, 'checkpoints')
    if not os.path.isdir(exp_check):
        os.makedirs(exp_check)

    # preprocessing of data
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    tra = [transforms.Resize(224), transforms.ToTensor(), normalize]

    # load the data
    end = time.time()
    dataset = datasets.ImageFolder(args.data,
                                   transform=transforms.Compose(tra))
    if args.verbose: print('Load dataset: {0:.2f} s'.format(time.time() - end))
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=args.batch,
                                             num_workers=args.workers)

    # remove head
    model.top_layer = None
    model.classifier = nn.Sequential(*list(model.classifier.children())[:-1])

    # get the features for the whole dataset
    features, patch_num, angles = compute_patchfeatures(
        '/home/visiting/datasets/source/vincent-van-gogh_road-with-cypresses-1890/0606.jpg',
        model, 224)

    file_name = '/home/visiting/Projects/levishery/deep_cluster/vangogh_all.json'

    with open(file_name, 'r') as file_object:
        contents = json.load(file_object)
    if args.patch_rotate:
        file_name = '/home/visiting/Projects/levishery/deep_cluster/angle_128.json'
        with open(file_name, 'r') as file_object:
            angle_style = json.load(file_object)

    style_ft = np.asarray(contents)
    style_ft = style_ft.astype('float32')
    mean_s, std_s = calc_mean_std(style_ft)
    mat = faiss.read_VectorTransform("vangogh-all.pca")
    photo_ft = mat.apply_py(features)
    mean_c, std_c = calc_mean_std(photo_ft)
    size = photo_ft.shape
    #photo_ft = ((photo_ft - mean_c) / std_c) * std_s + mean_s

    index = faiss.IndexFlatL2(256)  # build the index
    print(index.is_trained)
    index.add(style_ft)  # add vectors to the index
    print(index.ntotal)
    D, I = index.search(photo_ft, 1)
    print(I)
    print(D)
    dataset = datasets.ImageFolder(
        '/home/visiting/datasets/van-gogh_patch_112/')
    out_dir = '/home/visiting/datasets/result/'
    num = 0
    result = np.zeros(
        (args.patch_size * patch_num[1], args.patch_size * patch_num[0], 3))
    match_im = np.zeros((args.patch_size * 2, args.patch_size, 3))
    photo = cv2.imread(
        '/home/visiting/datasets/source/vincent-van-gogh_road-with-cypresses-1890/0606.jpg'
    )
    for i in range(patch_num[0]):
        for j in range(patch_num[1]):
            style_patch = cv2.imread(dataset.imgs[I[num][0]][0])
            match_im[:args.patch_size, :args.patch_size] = photo[
                j * args.patch_size:(j + 1) * args.patch_size,
                i * args.patch_size:(i + 1) * args.patch_size, :]
            #angle1 = angle_style[I[num][0]]
            if args.patch_rotate:
                angle1_ = calc_phase(
                    cv2.resize(
                        cv2.imread(dataset.imgs[I[num][0]][0],
                                   cv2.IMREAD_GRAYSCALE), (16, 16)))
                style_patch = rotate_image(style_patch, angles[num] - angle1_,
                                           True)
                #angle2 = calc_phase(cv2.resize(cv2.cvtColor(style_patch, cv2.COLOR_BGR2GRAY), (16, 16)))
                #angle2_ = angles[num]
            match_im[args.patch_size:2 *
                     args.patch_size, :args.patch_size] = style_patch
            result[j * args.patch_size:(j + 1) * args.patch_size, i *
                   args.patch_size:(i + 1) * args.patch_size, :] = style_patch
            filename = '/home/visiting/datasets/result/' + str(num) + 's.jpg'
            cv2.imwrite(filename, match_im)
            num += 1
    file_name = '/home/visiting/datasets/result/r.jpg'
    cv2.imwrite(file_name, result)