Python LSHash.LSHash Exemples, lshash.lshash.LSHash.LSHash Python Exemples

Exemple #1

0

Afficher le fichier

def lsh(p_hash_size, distance_func):
    """
    实现局部敏感哈希模拟KNN的具体函数
    :param p_hash_size: 与vipno的总数（去重后）相乘构成最终的hash_size
    :param distance_funcs: 可选择的距离计算函数
    :return: 去除自身之后的该vipno对应knn的输出vipno
    """
    datas_set, datas_matrix = get_data()
    # vipno_nums 为vipno去重后的总数
    vipno_nums = len(datas_matrix[0])

    # 随机取一个vipno（这里是vipno对应的下标）
    random_vipno = random.randint(0, vipno_nums - 1)

    # 初始化lshash
    lsh = LSHash(int(vipno_nums * p_hash_size), len(datas_matrix[:, 0]))
    for i in range(vipno_nums):
        # extra_data为当前列对应的vipno值，作为之后输出的时候所想要的knn的输出vipno
        lsh.index(datas_matrix[:, i], extra_data=datas_set.columns[i])

    vipno_res = []
    # num_results可以限制输出的结果个数，这里取前6个，因为第一个为输入列本身
    for res in lsh.query(datas_matrix[:, random_vipno],
                         num_results=6,
                         distance_func=distance_func):
        vipno_res.append(res[0][1])

    print("distance func:", distance_func)
    print("knn output(from 1 to 5): {}".format(vipno_res[1:]))

    return vipno_res[1:], datas_set.columns[random_vipno]

Exemple #2

0

Afficher le fichier

Fichier : app.py Projet : vizlab-fsc/phylo

def build_index():
    lsh = LSHash(32, HASH_SIZE**2)
    q = session.query(models.Image.hash, models.Image.id)
    for h, id in q.all():
        h = hash.hex_to_hash(h).hash.flatten()
        bytearr = h.view(np.uint8)
        lsh.index(bytearr, extra_data=id)
    return lsh

Exemple #3

0

Afficher le fichier

    def constructfeature(self, hash_size, input_dim, num_hashtables):
        multiscale = '[1]'
        print(">> Loading network:\n>>>> '{}'".format(self.network))
        # state = load_url(PRETRAINED[args.network], model_dir=os.path.join(get_data_root(), 'networks'))
        state = torch.load(self.network)
        # parsing net params from meta
        # architecture, pooling, mean, std required
        # the rest has default values, in case that is doesnt exist
        net_params = {}
        net_params['architecture'] = state['meta']['architecture']
        net_params['pooling'] = state['meta']['pooling']
        net_params['local_whitening'] = state['meta'].get('local_whitening', False)
        net_params['regional'] = state['meta'].get('regional', False)
        net_params['whitening'] = state['meta'].get('whitening', False)
        net_params['mean'] = state['meta']['mean']
        net_params['std'] = state['meta']['std']
        net_params['pretrained'] = False
        # network initialization
        net = init_network(net_params)
        net.load_state_dict(state['state_dict'])
        print(">>>> loaded network: ")
        print(net.meta_repr())
        # setting up the multi-scale parameters
        ms = list(eval(multiscale))
        print(">>>> Evaluating scales: {}".format(ms))
        # moving network to gpu and eval mode
        if torch.cuda.is_available():
            net.cuda()
        net.eval()

        # set up the transform
        normalize = transforms.Normalize(
            mean=net.meta['mean'],
            std=net.meta['std']
        )
        transform = transforms.Compose([
            transforms.ToTensor(),
            normalize
        ])

        # extract database and query vectors
        print('>> database images...')
        images = ImageProcess(self.img_dir).process()
        vecs, img_paths = extract_vectors(net, images, 1024, transform, ms=ms)
        feature_dict = dict(zip(img_paths, list(vecs.detach().cpu().numpy().T)))
        # index
        lsh = LSHash(hash_size=int(hash_size), input_dim=int(input_dim), num_hashtables=int(num_hashtables))
        for img_path, vec in feature_dict.items():
            lsh.index(vec.flatten(), extra_data=img_path)

        # ## 保存索引模型
        # with open(self.feature_path, "wb") as f:
        #     pickle.dump(feature_dict, f)
        # with open(self.index_path, "wb") as f:
        #     pickle.dump(lsh, f)

        print("extract feature is done")
        return feature_dict, lsh

Exemple #4

0

Afficher le fichier

Fichier : Question1.py Projet : DigitalSprite/DataMiningClass

def getLSHashOutput(filename, hash_size, k):
    matrix = getMatrix(filename)
    total_num = len(matrix.iloc[0])
    lsh = LSHash(hash_size=int(hash_size * total_num), input_dim=len(matrix.iloc[:,0]))
    for i in range(total_num):
        lsh.index(input_point=matrix.iloc[:,i], extra_data=matrix.columns[i])
    out_num = rand.randint(0, total_num - 1)
    #有多种lshash函数，默认是euclidean
    print(lsh.query(query_point=matrix.iloc[:, out_num], num_results=k + 1, distance_func='euclidean'))

Exemple #5

0

Afficher le fichier

def sphere():
    X = np.random.normal(size=(1000, 3))
    lsh = LSHash(10, 3, num_hashtables=5)
    for x in X:
        x /= np.linalg.norm(x)
        lsh.index(x)
    closest = lsh.query(X[0] + np.array([-0.001, 0.001, -0.001]), distance_func="cosine")
    assert len(closest) >= 10
    assert 0.05 >= closest[9][-1] > 0.0003

Exemple #6

0

Afficher le fichier

    def build_index(self, k: int, hash_size: int = 10, num_hashtables: int = 1,
                    store_file: str = None, overwrite: bool = False):
        """Build index for each picture.
        First use K-means to find k key features from previously extracted features and the assignment of each feature;

        Then apply histogram on each image, get the distribution of its features, which serves as a unique finger print for this image.

        Finally use LSHash (locality sensitive hashing.) algorithm, index each image by their histogram array.

        Args:
            k: parameter used in K-means, number of centeroids (key features).
            hash_size: length of resulting binary hash array.
            num_hashtables: number of hashtables for multiple lookups.
            store_file: Specify the path to the .npz file random matrices are stored or to be stored if the file does not exist yet
            overwrite: Whether to overwrite the matrices file if it already exist.

        Returns:

        """
        assert 0 < k < len(self._all_feats)
        assert hash_size > 0 and num_hashtables > 0

        # Use kmeans to calculate K key features and assignment of each feature.
        logger.info('Calculating {} key featurs...'.format(k))
        # Mini batch kmeans deals with large amount of data better.
        self._kmeans = MiniBatchKMeans(n_clusters=k)
        self._kmeans.fit(np.array(self._all_feats))
        idx = self._kmeans.labels_
        logger.info('Start indexing each image.')

        # Calculate histogram of each image
        self._lsh = LSHash(hash_size=hash_size,
                           input_dim=k,
                           num_hashtables=num_hashtables,
                           matrices_filename=store_file,
                           overwrite=overwrite)
        success = 0
        progress_bar = tqdm(total=len(self._img_dict))
        bins = np.arange(-0.5, k + 0.5, 1)
        for img_name, img_meta in self._img_dict.items():
            try:
                start = img_meta['start_index']
                end = start + img_meta['num_feats']
                # Perform histogram
                hist, _ = np.histogram(idx[start:end], bins=bins)
                img_meta['histogram'] = hist
                # Store each picture in hash tables
                self._lsh.index(input_point=hist, extra_data=img_name)
                success += 1
            except Exception as e:
                logger.warning(e)
                logger.warning('Error when indexing image: {}'.format(img_name))
            progress_bar.update(1)
        progress_bar.close()
        logger.info('Successfully indexed {} images.'.format(success))

Exemple #7

0

Afficher le fichier

def hyperspheres(X=16, num_samples=200000):
    """ Demonstrate curse of dimensionality and where LSH starts to fail

    Returns:
      lsh, X, secondclosest, tenthclosest

    >>> import pandas as pd
    >>> lsh, vectors, dfs = hyperspheres(16)
    >>> for df in dfs:
    ...     print(df)
    """
    X = np.random.uniform(size=(num_samples, X)) if isinstance(X, int) else X
    closest = []
    secondclosest = []
    tenthclosest = []
    hundredthclosest = []
    for D in range(2, X.shape[1] + 1):
        lsh = LSHash(int(64 / D) + D, D, num_hashtables=D)

        # query vector
        q = np.random.uniform(size=(D,))
        q /= np.linalg.norm(q)

        distances = []
        for x in X[:, :D]:
            x /= np.linalg.norm(x)
            distances += [1. - np.sum(x * q)]  # cosine similarity
            lsh.index(x)
        distances = sorted(distances)
        print(distances[:10])
        closest10 = lsh.query(q, distance_func='cosine')

        N = len(closest10)
        hundredthclosest += [[D, N, closest10[min(99, N - 1)][-1] if N else 2., distances[min(99, N - 1)]]]
        tenthclosest += [[D, N, closest10[min(9, N - 1)][-1] if N else 2., distances[min(9, N - 1)]]]
        secondclosest += [[D, N, closest10[min(1, N - 1)][-1] if N else 2., distances[min(1, N - 1)]]]
        closest += [[D, N, closest10[0][-1] if N else 2., distances[0]]]
        print("is correct: 100th 10th 2nd 1st")
        print(round(hundredthclosest[-1][-1], 14) == round(hundredthclosest[-1][-2], 14))
        print(round(tenthclosest[-1][-1], 14) == round(tenthclosest[-1][-2], 14))
        print(round(secondclosest[-1][-1], 14) == round(secondclosest[-1][-2], 14))
        print(round(closest[-1][-1], 14) == round(closest[-1][-2], 14))
        print("distances: 100th 10th 2nd 1st")
        print(hundredthclosest[-1])
        print(tenthclosest[-1])
        print(secondclosest[-1])
        print(closest[-1])
    dfs = []
    for k, (i, df) in enumerate(zip([100, 10, 2, 1], [hundredthclosest, tenthclosest, secondclosest, closest])):
        df = pd.DataFrame(df, columns='D N dist{} true_dist{}'.format(i, i).split()).round(14)
        df['correct{}'.format(i)] = df['dist{}'.format(i)] == df['true_dist{}'.format(i)]
        dfs += [df]
    # for i, tc in enumerate(tenthclosest):
    #     assert 1e-9 < tc[-2] or 1e-6 < 0.2
    return lsh, X, dfs

Exemple #8

0

Afficher le fichier

Fichier : compare.py Projet : jinzitian/LSHCos

def b(r, dim, vector):
    lsh = LSHash(r, dim)
    for n, v in xxx:
        lsh.index(v.tolist())
    start = time.perf_counter()
    q = lsh.query(vector.tolist(), 10, 'cosine')
    end = time.perf_counter()
    qq = [(x, 1 - y) for x, y in q]
    if len(qq) > 0:
        return qq[0][1], end - start
    else:
        return -2, end - start

Exemple #9

0

Afficher le fichier

Fichier : save_embeddings.py Projet : kavithacd/self_supervised_learning

def save_embedding_hash(hash_params, save_path, img_names, features_dict):
    ## Locality Sensitive Hashing
    # params
    k = hash_params['hash_size']  # hash size
    L = hash_params['num_tables']  # number of tables
    d = hash_params['dim']  # Dimension of Feature vector
    lsh = LSHash(hash_size=k, input_dim=d, num_hashtables=L)

    # LSH on all the images
    for img_path, vec in tqdm(features_dict.items()):
        lsh.index(vec.flatten(), extra_data=img_path)
    ## Exporting as pickle
    pickle.dump(lsh, open(save_path, "wb"))

Exemple #10

0

Afficher le fichier

def lsh(p_hash_size, distance_funcs):
    """
    实现局部敏感哈希模拟KNN的具体函数
    :param p_hash_size: 与vipno的总数（去重后）相乘构成最终的hash_size
    :param distance_funcs: 可选择的距离计算函数
    :return: 去除自身之后的该vipno对应knn的输出vipno
    """
    datas_set, datas_matrix = get_data()
    # vipno_nums 为vipno去重后的总数
    vipno_nums = len(datas_matrix[0])

    # 随机取一个vipno（这里是vipno对应的下标）
    random_vipno = random.randint(0, vipno_nums - 1)

    # 初始化lshash
    lsh = LSHash(int(vipno_nums * p_hash_size), len(datas_matrix[:, 0]))
    for i in range(vipno_nums):
        # extra_data为当前列对应的vipno值，作为之后输出的时候所想要的knn的输出vipno
        lsh.index(datas_matrix[:, i], extra_data=datas_set.columns[i])

    print("hash size: {}".format(vipno_nums * p_hash_size))
    # print("distance func:", distance_func)
    print("input vipno: {}".format(datas_set.columns[random_vipno]))
    # vipno_res = []

    ends = []
    for distance_func in distance_funcs:
        start = datetime.datetime.now()
        vipno_res = []
        # num_results可以限制输出的结果个数，这里取前6个，因为第一个为输入列本身
        for res in lsh.query(datas_matrix[:, random_vipno],
                             num_results=6,
                             distance_func=distance_func):
            vipno_res.append(res[0][1])
        end = (datetime.datetime.now() - start).total_seconds()
        ends.append(end)
        print("distance func:", distance_func)
        print("knn output(from 1 to 5): {}".format(vipno_res[1:]))
        print("time:", end)

    # 做时间性能比较图
    plt.bar(distance_funcs,
            ends,
            alpha=0.9,
            width=0.35,
            facecolor='lightskyblue',
            edgecolor='white',
            label='time',
            lw=1)
    plt.legend(loc="upper left")
    plt.show()

Exemple #11

0

Afficher le fichier

Fichier : test_spheres.py Projet : totalgood/LSHash

def hyperspheres_10D(X=np.random.uniform(size=(200000, 10))):
    """ Demonstrate curse of dimensionality and where LSH starts to fail

    Returns:
      lsh, X, secondclosest, tenthclosest

    >>> import pandas as pd
    >>> lsh, vectors, rank1, rank2, rank10 = test_hyperspheres()
    >>> pd.DataFrame(rank2)
    >>> pd.DataFrame(rank10)
    """
    tenthclosest = []
    secondclosest = []
    closest = []
    for D in range(2, X.shape[1]):
        lsh = LSHash(int(1024 * 8182. / D) + D, D, num_hashtables=D)

        # query vector
        q = np.random.uniform(size=(D, ))
        q /= np.linalg.norm(q)

        distances = []
        for x in X[:, :D]:
            lsh.index(x)
            x /= np.linalg.norm(x)
            distances += [1. - np.sum(x * q)]  # cosine similarity
        distances = sorted(distances)
        print(distances[:10])
        closest10 = lsh.query(q, distance_func='cosine')

        N = len(closest10)
        tenthclosest += [[
            D,
            min(9, N - 1) if N else -1,
            closest10[min(9, N - 1)][-1] if N else 2., distances[min(9, N - 1)]
        ]]
        secondclosest += [[
            D,
            min(1, N - 1) if N else -1,
            closest10[min(1, N - 1)][-1] if N else 2., distances[min(1, N - 1)]
        ]]
        closest += [[
            D, 0 if N else -1, closest10[0][-1] if N else 2., distances[0]
        ]]
        print(tenthclosest[-1])
        print(secondclosest[-1])
        print(closest[-1])
    # for i, tc in enumerate(tenthclosest):
    #     assert 1e-9 < tc[-2] or 1e-6 < 0.2
    return lsh, X, closest, secondclosest, tenthclosest

Exemple #12

0

Afficher le fichier

Fichier : stitch_tiles.py Projet : afruehstueck/MapStitch

def generate_lsh(images):
    num_images = images.shape[0]
    image_size = images.shape[1]

    engine = LSHash(8, image_size)

    # create locality sensitive hash from all files
    for idx, image in enumerate(tqdm(images)):

        (top, right, bottom, left) = get_all_edges_from_array(image)
        engine.index(top, extra_data=(idx, TOP))
        engine.index(right, extra_data=(idx, RIGHT))
        engine.index(bottom, extra_data=(idx, BOTTOM))
        engine.index(left, extra_data=(idx, LEFT))
    return engine

Exemple #13

0

Afficher le fichier

def knn(df, k, coefficient):
    hash_size = int(coefficient * df.shape[1])
    lsh = LSHash(hash_size, input_dim=df.shape[0])
    for vipno in df:
        lsh.index(df[vipno], extra_data=vipno)
    random_column = df[df.columns.to_series().sample(1)]
    random_vip = random_column.columns.values[0]
    logging.info('random vipno: {}'.format(random_vip))
    res = lsh.query(random_column.values.flatten())[0:k + 1]
    logging.info('vipno in ranked order using kNN(k = {}):'.format(k))
    knns = []
    for item in res:
        if item[0][1] != random_vip:
            logging.info(item[0][1])
            knns.append(item[0][1])
    return random_vip, knns[:5]

Exemple #14

0

Afficher le fichier

Fichier : ls_hash.py Projet : DigitalSprite/2018_-Data-Mining-Project-

def getLSHashOutput(filename, hash_size, k):
    matrix = getMatrix(filename)
    list = []
    for i in range(matrix.shape[1]):
        list.append(matrix.iloc[i])
    total_num = len(matrix.iloc[0])
    lsh = LSHash(hash_size=int(hash_size * total_num), input_dim=len(matrix.iloc[:,0]))
    for i in range(total_num):
        lsh.index(input_point=matrix.iloc[:,i], extra_data=matrix.columns[i])
    out_num = rand.randint(0, total_num - 1)
    #有多种lshash函数，默认是euclidean
    m = lsh.query(query_point=matrix.iloc[:, out_num], num_results=k + 1, distance_func='euclidean')
    print("输入的vipno是" + str(matrix.columns[out_num]) + "\n其桶中的vipno有：")
    bucket = []
    for i in range(len(m)):
        print(m[i][0][1])
        tag = np.argwhere(matrix.columns == m[i][0][1])
        bucket.append(int(tag))
    return bucket

Exemple #15

0

Afficher le fichier

Fichier : LShash_util.py Projet : 39239580/res_sys_tool-new-

    def __init__(self,
                 hash_size,
                 input_dim,
                 num_of_hashtables=1,
                 storage=None,
                 matrices_filename=None,
                 overwrite=False):
        """
        Attributes:
        :param hash_size:
            The length of the resulting binary hash in integer.E.g., 32 means the resulting binary hash will be 32 - bit long.

        :param input_dim:
            The dimension of the input vector.E.g., a grey - scale picture of 30x30 pixels will have an input dimension of 900.

        :param num_hashtables:
            (optional) The number of hash tables used for multiple lookups.

        :param storage_config:
            (optional) A dictionary of the form `{backend_name: config}` where `backend_name` is the either `dict` or `redis`,
            and `config` is the configuration used by the backend.
            For `redis`it should be in the format of`{"redis": {"host": hostname, "port": port_num}}`,
            where `hostname` is normally `localhost` and `port` is normally 6379.

        :param matrices_filename:
            (optional) Specify the path to the compressed numpy file endin with extension `.npz`, where the uniform random planes
            are stored, or to be stored if the file does not exist yet.

        :paramoverwrite:
            (optional) Whether to overwrite the matrices file if it already exist
        """
        self.hash_object = LSHash(
            hash_size=hash_size,  # 二进制hash  结果的长度
            input_dim=input_dim,  # 输入向量的维度
            num_of_hashtables=num_of_hashtables,  # 用于多次查找的哈希表的数目。可选项
            storage=storage,  # (可选)指定用于索引存储的存储的名称。选项包括“redis”
            matrices_filename=
            matrices_filename,  # (可选)指定.npz文件的路径随机矩阵被存储, 如果文件不存在
            overwrite=overwrite)  # 如果matrices文件存在，是否对其进行覆盖， 可选项

Exemple #16

0

Afficher le fichier

Fichier : main.py Projet : yhy1993824/lsHash

    for word in inputset:
        if word in vocablist:
            returnvec[vocablist.index(word)] += 1
        else:
            print('word:', word, 'is not in the list_vec')
    return returnvec


if __name__ == '__main__':
    datalist, classlist, vocabset = textprocess('./paper')  # 获取每篇论文的词集
    stop_word_file = './stopwords_cn.txt'
    stop_word_set = make_word_set(stop_word_file)
    feature_words = word_dict(vocabset, 0, stop_word_set)
    trainMat = []

    lsh = LSHash(hash_size=10, input_dim=len(feature_words))
    for postinDoc in datalist:
        trainMat_vec = bagof_word2vec(feature_words, postinDoc)  # 训练集向量化
        trainMat.append(trainMat_vec)
        lsh.index(trainMat_vec)

    testfile = './test.txt'
    testlist = []
    with open(testfile, 'r', encoding='utf-8') as f:
        sequence = f.read()
        testlist.append(jieba.lcut(sequence, cut_all=False))
        testvect = bagof_word2vec(feature_words, testlist[0])

    re = lsh.query(testvect, num_results=1)
    print(list(re[0][0]))
    print(trainMat.index(list(re[0][0])))

Exemple #17

0

Afficher le fichier

trade_mat  # show the table

# # 2. LSH

# In[9]:

from lshash.lshash import LSHash
import random

# In[10]:

o = open('lsh_output.txt', 'w')  # create a file to write the results

# loop with different hash size
for e in [0.01, 0.05, 0.1, 0.2, 0.3, 0.5]:
    lsh = LSHash(round(n_vip * e), n_plu)
    for v in vipno:
        feature = list(trade_mat[v])
        lsh.index(feature, extra_data=v)

    # pick up a random vipno
    pick_vip = random.randint(1, n_vip)
    pick_vip = vipno[pick_vip]
    o.write("Hash_size = {} * n_plu \n".format(e))
    o.write("Pick up a vip: {}\n".format(pick_vip))

    # lsh query and write the results
    candi = lsh.query(list(trade_mat[pick_vip]), 6, distance_func='hamming')
    #     print(len(candi))
    for i, item in enumerate(candi[1:]):
        dist = item[1]

Exemple #18

0

Afficher le fichier

Fichier : groupTweet.py Projet : Ten000hours/WebScience

#  convert the words into word frequency matrix
vectorizer1 = CountVectorizer()

X = vectorizer1.fit_transform(corpus)
# get the keywords in corpus
word = vectorizer1.get_feature_names()


transformer = TfidfTransformer()

#  calculate the TF-IDF values
tfidf = transformer.fit_transform(X)



lsh = LSHash(6, 8)
# construct the centriodSet
centriodSet = []
Ind = 0
for i in range(0, DBconnection.DBconnection.count(a.dbconnect_to_collection()) - 1):
    centriod = []
    j = 0
    while j < (tfidf.indptr[i + 1] - tfidf.indptr[i]):
        if j > 7:
            break
        centriod.append(round(tfidf.data[Ind + j], 2))
        j += 1
    if len(centriod) < 8:
        for index in range(8 - len(centriod)):
            centriod.append(0 + 1)
    lsh.index(centriod)

Exemple #19

0

Afficher le fichier

from src.utils import generate_data
import numpy as np
from lshash.lshash import LSHash

np.random.seed(18)
dimension = 10
size = 1000
hash_size = 3
num_tables = 8

if __name__ == '__main__':
    data = generate_data(size, dimension)

    # =============
    lsh = LSH(hash_size, dimension, num_tables)
    lsh.index(data)

    lsh_1 = LSHash(hash_size, dimension, num_tables)
    # make the projection planes the same
    lsh_1.uniform_planes = lsh.projections

    for d in data:
        lsh_1.index(d)

    for i in range(num_tables):
        t1 = lsh.hash_tables[i]
        t2 = lsh_1.hash_tables[i]

        for k in t1:
            assert t1[k] == t2.get_val(k)

Exemple #20

0

Afficher le fichier

                                  weight_decay=1e-4),
                  ContrastiveLoss(),
                  metric=None,
                  device='cuda')
    model.load_weights(
        '/home/palm/PycharmProjects/seven2/snapshots/pairs/4/epoch_0_0.016697616640688282.pth'
    )
    model.model.eval()

    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    transform = transforms.Compose(
        [transforms.Resize((224, 224)),
         transforms.ToTensor(), normalize])

    lsh = LSHash(hash_size=16, input_dim=1024, num_hashtables=5)

    target_path = '/home/palm/PycharmProjects/seven/images/cropped2/unknown/obj'
    query_path = '/home/palm/PycharmProjects/seven/images/cropped2/train'
    cache_path = '/home/palm/PycharmProjects/seven/caches'
    cache_dict = {}
    with torch.no_grad():
        for target_image_path in os.listdir(target_path):
            target = os.path.join(target_path, target_image_path)
            target_image_ori = Image.open(target)
            target_image = transform(target_image_ori)
            x = torch.zeros((1, 3, 224, 224))
            x[0] = target_image
            target_features = model.model._forward_impl(x.cuda())
            minimum = (float('inf'), 0)
            for query_folder in os.listdir(query_path):

Exemple #21

0

Afficher le fichier

Fichier : LShash_util.py Projet : 39239580/res_sys_tool-new-

def test_lshash():
    lsh = LSHash(6, 8)  # 对于输入数据为8维的数据创建6位hash
    lsh.index([1, 2, 3, 4, 5, 6, 7, 8])
    lsh.index([2, 3, 4, 5, 6, 7, 8, 9])
    lsh.index([10, 12, 99, 1, 5, 31, 2, 3])
    print(lsh.query([1, 2, 3, 4, 5, 6, 7, 7]))

Exemple #22

0

Afficher le fichier

Fichier : lsh_fastai.py Projet : thebba2013/utilities_python

feature_dict = dict(zip(img_path,sf.features))				# key val of 'image_path':'512_dim_visual_embedding'

pickle.dump(feature_dict, open(path/"feature_dict.p", "wb"))




feature_dict = pickle.load(open(path/'feature_dict.p','rb'))


## Locality Sensitive Hashing
# params
k = 10 # hash size
L = 5  # number of tables
d = 512 # Dimension of Feature vector
lsh = LSHash(hash_size=k, input_dim=d, num_hashtables=L)


# LSH on all the images
# for img_path, vec in tqdm_notebook(feature_dict.items()):
for img_path, vec in (feature_dict.items()):
	print(img_path)
	print(vec)
	lsh.index(vec.flatten(), extra_data=img_path)




## Exporting as pickle
pickle.dump(lsh, open(path/'lsh.p', "wb"))

Exemple #23

0

Afficher le fichier

Fichier : note.py Projet : yhy1993824/lsHash

# @Time    : 2017/10/15 21:35
# @Author  : Jalin Hu
# @File    : note.py
# @Software: PyCharm
from lshash.lshash import LSHash
if __name__ == '__main__':
    lsh = LSHash(hash_size=6, input_dim=8)
    lsh.index([1, 2, 3, 4, 5, 6, 7, 8])
    lsh.index([2, 3, 4, 5, 6, 7, 8, 9])
    lsh.index([3, 4, 5, 6, 7, 8, 9, 10])
    lsh.index([10, 12, 99, 1, 5, 6, 24, 20])
    res = lsh.query([1, 2, 3, 4, 5, 6, 7, 7], num_results=2)
    print(res)

Exemple #24

0

Afficher le fichier

def createlshash():
    k = 32  # hash size #12
    L = 8  # number of tables #8
    d = 128  # Dimension of Feature vector
    lsh = LSHash(hash_size=k, input_dim=d, num_hashtables=L, seed=40)
    return lsh

Exemple #25

0

Afficher le fichier

import gym
import numpy as np
from PIL import Image
from lshash.lshash import LSHash
from collections import deque
from random import random
from diskcache import FanoutCache, Cache
qtable = Cache('cache')
qtable.clear()

env = gym.make('Breakout-v0')
lshs = LSHash(500, 8192)
LEARNING_RATE = 0.15
DISCOUNT = 0.95
EPISODES = 25000

def preprocess(obs):
    image = Image.fromarray(observation)
    image = image.resize((64, 64))
    image = image.convert(mode='1')
    array = np.array(image, dtype=np.uint8).flatten()
    return array

def get_action(obs_seq):
    query = lshs.query(obs_seq, num_results=1)
    if len(query) <= 0:
        lshs.index(obs_seq)
        actions = np.ones(env.action_space.n)
        qtable[obs_seq] = actions
    elif query[0][1] >= 10:
        lshs.index(obs_seq)