Esempi in Python per HammingHash

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: hamminghash

Classe/tipologia: HammingHash

Esempi su hotexamples.com: 2

HammingHash in Python: 2 esempi trovati. Questi sono i migliori esempi reali in Python per hamminghash.HammingHash, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

do_hashing(1)

do_hashing_index(1)

Esempio n. 1

Mostra file

File: lsh.py Progetto: jasonding1354/locality_sensitive_hashing

    def __init__(self, L, k, d, **kw):
        super(LSH, self).__init__(L=L, k=k, d=d, **kw)
        self._check_parameters()

        self.hash = HammingHash(L=L, k=k, d=d)
        self.bucket = LshBucket(L=L, k=k, d=d)
        self.index = False #the symbol whether use the stored indexes

Esempio n. 2

Mostra file

File: lsh.py Progetto: jasonding1354/locality_sensitive_hashing

class LSH(LSHBase):
    """
     >>> from LSH import LSH

    >>> lsh = LSH(
    ...     L=L,   # number of hash functions
    ...     k=k,   # number of reductions
    ...     d=d,   # number of dimentions
    ... )
    """

    def __init__(self, L, k, d, **kw):
        super(LSH, self).__init__(L=L, k=k, d=d, **kw)
        self._check_parameters()

        self.hash = HammingHash(L=L, k=k, d=d)
        self.bucket = LshBucket(L=L, k=k, d=d)
        self.index = False #the symbol whether use the stored indexes

    def loadDataSet(self,filename,delim=','):
        fr = open(filename)
        arrayOfLines = fr.readlines()
        #numOfLines = len(arrayOfLines)
        for line in arrayOfLines:
            line = line.strip()
            arrayFromLine = line.split(delim)
            vectFromLine = [int(x) for x in arrayFromLine]#transform the string type to integer
            self.insert(vectFromLine)

        self.bucket.store_buckets()#store all the data and index

    def insert(self, vector):
        """insert a vector data to buckets"""
        hashed_array = self.hash.do_hashing(vector)
        self.bucket.insert(vector, hashed_array)

    def insert_index(self,vector):
        hashed_array = self.hash.do_hashing_index(vector)
        self.bucket.insert(vector, hashed_array)

    def nn(self, vector, without_itself=False, index=False):#nearest_neighbours
        neighbours = self._neighbours(vector,without_itself,index)
        nearest_vector = self._nearest(vector,neighbours)
        return nearest_vector

    def knn(self,vector,kk,without_itself=False, index=False):
        neighbours = self._neighbours(vector,without_itself,index)
        knn_vectors = self._k_nearest(vector,kk,neighbours)
        return knn_vectors

    def _neighbours(self, vector, without_itself=False, index=False):
        """this function extracts some vectors as neighbours with query vector"""
        self.index = index
        if self.index == False:
            hashed_array = self.hash.do_hashing(vector)
        else:
            self.bucket.load_buckets()
            hashed_array = self.hash.do_hashing_index(vector)

        neighbours = self.bucket.select(vector, hashed_array, without_itself)
        return neighbours

    def _k_nearest(self, vector, kk, neighbours):
        knn_list = []
        for n_vector in neighbours:
            dist = self._euclidean_dist(vector,n_vector)
            if len(knn_list) < kk:
                knn_list.append({'distance':dist,'vector':n_vector})
                continue

            knn_list.sort(key=lambda x:x['distance'])#sort from small to large
            maximum_dist = knn_list[2]['distance']
            if dist < maximum_dist:
                knn_list[2] = {'distance':dist,'vector':n_vector}
        return knn_list

    def _nearest(self, vector, neighbours):
        """pick up the nearest vector from neighbours"""
        nearest = {}
        for n_vector in neighbours:
            dist = self._euclidean_dist(vector,n_vector)
            if "distance" not in nearest or dist < nearest["distance"]:
                """
                'distance' is the key of dictionary(nearest)
                'distance' is not in the dict,then build it
                dist is shorter,then update
                """
                nearest.update(vector = n_vector,distance = dist)
        return nearest

    def _hamming_dist(self, hashval1, hashval2):
        xor_result = bitarray(hashval1) ^ bitarray(hashval2)
        return xor_result.count()

    def _euclidean_dist(self, vector1, vector2):
        sum = 0
        for x1, x2 in zip(vector1, vector2):
            d = (x1 - x2) ** 2
            sum += d
        return math.sqrt(sum)