Ejemplo n.º 1
0
def hyrule_vocab_test():
    from yael.yutils import load_ext
    from os.path import join
    import sklearn.cluster

    dbdir = ut.truepath('/raid/work/Oxford/')
    datadir = dbdir + '/smk_data_iccv_2013/data/'

    # Files storing descriptors/geometry for Oxford5k dataset
    test_sift_fname = join(datadir, 'oxford_sift.uint8')
    # test_nf_fname = join(datadir, 'oxford_nsift.uint32')
    all_vecs = load_ext(test_sift_fname, ndims=128,
                        verbose=True).astype(np.float32)
    logger.info(ut.print_object_size(all_vecs))
    # nfeats_list = load_ext(test_nf_fname, verbose=True)

    with ut.embed_on_exception_context:
        rng = np.random.RandomState(13421421)
        # init_size = int(config['num_words'] * 8)
        num_words = int(2**16)
        init_size = num_words * 4
        # converged after 26043 iterations
        minibatch_params = dict(
            n_clusters=num_words,
            init='k-means++',
            # init='random',
            init_size=init_size,
            n_init=1,
            max_iter=100,
            batch_size=1000,
            tol=0.0,
            max_no_improvement=10,
            reassignment_ratio=0.01,
        )
        clusterer = sklearn.cluster.MiniBatchKMeans(compute_labels=False,
                                                    random_state=rng,
                                                    verbose=1,
                                                    **minibatch_params)
        clusterer.fit(all_vecs)
        words = clusterer.cluster_centers_
        logger.info(words.shape)
Ejemplo n.º 2
0
def load_oxford_2013():
    """
    Found this data in README of SMK publication
    https://hal.inria.fr/hal-00864684/document
    http://people.rennes.inria.fr/Herve.Jegou/publications.html
    with download script

    CommandLine:
        # Download oxford13 data
        cd ~/work/Oxford
        mkdir -p smk_data_iccv_2013
        cd smk_data_iccv_2013
        wget -nH --cut-dirs=4 -r -Pdata/ ftp://ftp.irisa.fr/local/texmex/corpus/iccv2013/

    This dataset has 5063 images wheras 07 has 5062
    This dataset seems to contain an extra junk image:
        ashmolean_000214

    # Remember that matlab is 1 indexed!
    # DONT FORGET TO CONVERT TO 0 INDEXING!
    """
    from yael.ynumpy import fvecs_read
    from yael.yutils import load_ext
    import scipy.io
    import vtool as vt
    from os.path import join

    dbdir = ut.truepath('/raid/work/Oxford/')
    datadir = dbdir + '/smk_data_iccv_2013/data/'

    # we are not retraining, so this is unused
    # # Training data descriptors for Paris6k dataset
    # train_sift_fname = join(datadir, 'paris_sift.uint8')  # NOQA
    # # File storing visual words of Paris6k descriptors used in our ICCV paper
    # train_vw_fname = join(datadir, 'clust_preprocessed/oxford_train_vw.int32')

    # Pre-learned quantizer used in ICCV paper (used if docluster=false)
    codebook_fname = join(datadir, 'clust_preprocessed/oxford_codebook.fvecs')

    # Files storing descriptors/geometry for Oxford5k dataset
    test_sift_fname = join(datadir, 'oxford_sift.uint8')
    test_geom_fname = join(datadir, 'oxford_geom_sift.float')
    test_nf_fname = join(datadir, 'oxford_nsift.uint32')

    # File storing visual words of Oxford5k descriptors used in our ICCV paper
    test_vw_fname = join(datadir, 'clust_preprocessed/oxford_vw.int32')
    # Ground-truth for Oxford dataset
    gnd_fname = join(datadir, 'gnd_oxford.mat')

    oxford_vecs = load_ext(test_sift_fname, ndims=128, verbose=True)
    oxford_nfeats = load_ext(test_nf_fname, verbose=True)
    oxford_words = fvecs_read(codebook_fname)
    oxford_wids = load_ext(test_vw_fname, verbose=True) - 1

    test_geom_invV_fname = test_geom_fname + '.invV.pkl'
    try:
        all_kpts = ut.load_data(test_geom_invV_fname)
        logger.info('loaded invV keypoints')
    except IOError:
        oxford_kptsZ = load_ext(test_geom_fname, ndims=5, verbose=True)
        logger.info('converting to invV keypoints')
        all_kpts = vt.convert_kptsZ_to_kpts(oxford_kptsZ)
        ut.save_data(test_geom_invV_fname, all_kpts)

    gnd_ox = scipy.io.loadmat(gnd_fname)
    imlist = [x[0][0] for x in gnd_ox['imlist']]
    qx_to_dx = gnd_ox['qidx'] - 1

    data_uri_order = imlist
    query_uri_order = ut.take(data_uri_order, qx_to_dx)

    offset_list = np.hstack(([0], oxford_nfeats.cumsum())).astype(np.int64)

    # query_gnd = gnd_ox['gnd'][0][0]
    # bboxes = query_gnd[0]
    # qx_to_ok_gtidxs1 = [x[0] for x in query_gnd[1][0]]
    # qx_to_junk_gtidxs2 = [x[0] for x in query_gnd[2][0]]
    # # ut.depth_profile(qx_to_gtidxs1)
    # # ut.depth_profile(qx_to_gtidxs2)

    assert sum(oxford_nfeats) == len(oxford_vecs)
    assert offset_list[-1] == len(oxford_vecs)
    assert len(oxford_wids) == len(oxford_vecs)
    assert oxford_wids.max() == len(oxford_words) - 1

    data = {
        'offset_list': offset_list,
        'all_kpts': all_kpts,
        'all_vecs': oxford_vecs,
        'words': oxford_words,
        'idx_to_wx': oxford_wids,
        'data_uri_order': data_uri_order,
        'query_uri_order': query_uri_order,
    }
    return data