def __init__(self, size=16, alphaMax=0.5, alphaLowest=0.25): self._size_ = size # size define the size of the hash table self._hashTable_ = [[None, False] for i in range(self._size_)] self._occupied_ = 0 # occupied will keep track of how many of nodes are utilized self._alpha_ = self._occupied_ / self._size_ self._alphaMax_ = alphaMax self._alphaLowest_ = alphaLowest bitLength = self._size_.bit_length() + 1 if (bitLength < 16): bitLength = 16 self._prime1_ = generatePrimeNumber(bitLength) self._uni_a1_ = randrange(1, self._prime1_ - 1) self._uni_b1_ = randrange(0, self._prime1_ - 1) self._prime2_ = generatePrimeNumber(bitLength) self._uni_a2_ = randrange(0, self._prime2_ - 1) self._uni_b2_ = randrange(0, self._prime2_ - 1)
def find(txt,pattern): base = getBaseByString(txt) #print("Base:",base) size = len(pattern) prime = generatePrimeNumber() rtxt=RollingHash(base,prime) rpat=RollingHash(base,prime) # Get hash of the pattern for i in range(size): rpat.append(ord(pattern[i])) # Get hash of the first (size) number of character string for i in range(size): rtxt.append(ord(txt[i])) if(rtxt.hash == rpat.hash): # if the hashes matched confirm the match by comparing strings if(txt[0:size] == pattern): return 0 old=ord(txt[0]) # old is the first character of our sliding window that is to be remove # Now we slide our window to the given txt for i in range(size,len(txt)): new = ord(txt[i]) # new is the next character that is to be added in the window rtxt.append(new) # adding new in our hash rtxt.skip(old) # remove old in our hash old = ord(txt[i - size + 1]) # old character that will be deleted in next itteration if(rtxt.hash == rpat.hash): # As the hashes matched confirm the match by comparing string characters if(txt[i-size+1:i+1] == pattern): return i - size +1 return -1
def __update__(self,updateFactor): hashTableCopy = self._hashTable_ # updateFactor = 1/2 => reduce hashTable to half the size self._size_ = int(self._size_*updateFactor) # updateFactor = 2 => extend hashTable to twice the size self._hashTable_ = [LinkedList() for i in range(self._size_)] # If you wanna use Numpy insted of list then # Uncomment 2 lines below and comment the just above one # self._hashTable_ = np.empty(self._size_,dtype=LinkedList) # for i in range(self._size_): self._hashTable_[i] = LinkedList() for i in range(self._size_): self._hashTable_[i] = LinkedList() self._occupied_ = 0 self._alpha_ = self._occupied_ / self._size_ bitLength = self._size_.bit_length() + 1 self._prime_ = generatePrimeNumber(bitLength) self._uni_a_ = randrange(0,self._prime_ - 1) self._uni_b_ = randrange(0,self._prime_ - 1) for ith_linkedList in hashTableCopy: if(ith_linkedList.head == None): continue curr = ith_linkedList.head while curr.next != None: self.__add__(curr.key, curr.value) curr = curr.next self.__add__(curr.key, curr.value) ith_linkedList.destroy()
def __update__(self, updateFactor): hashTableCopy = self._hashTable_ # updateFactor = 1/2 => reduce hashTable to half the size self._size_ = int( self._size_ * updateFactor ) # updateFactor = 2 => extend hashTable to twice the size self._hashTable_ = [[None, False] for i in range(self._size_)] self._occupied_ = 0 self._alpha_ = self._occupied_ / self._size_ bitLength = self._size_.bit_length() + 1 bitLength = self._size_.bit_length() + 1 if (bitLength < 16): bitLength = 16 self._prime1_ = generatePrimeNumber(bitLength) self._uni_a1_ = randrange(1, self._prime1_ - 1) self._uni_b1_ = randrange(0, self._prime1_ - 1) self._prime2_ = generatePrimeNumber(bitLength) self._uni_a2_ = randrange(1, self._prime2_ - 1) self._uni_b2_ = randrange(0, self._prime2_ - 1) for node in hashTableCopy: node = node[0] if (node == None): continue self.__add__(node.key, node.value)
def __init__(self, size = 16, alphaMax = 0.5, alphaLowest = 0.25): self._size_ = size # size define the size of the hash table self._hashTable_ = [LinkedList() for i in range(self._size_)] # If you wanna use Numpy insted of list then # Uncomment 2 lines below and comment the just above one # self._hashTable_ = np.empty(self._size_,dtype=LinkedList) # for i in range(self._size_):self._hashTable_[i] = LinkedList() self._occupied_ = 0 # occupied will keep track of how many of nodes are utilized # alpha define the current utilization of hashTable self._alpha_ = self._occupied_ / self._size_ # alphaMax define that when did the size of hashTable doubles 0.5 mean when the hashtable is filled 50% size of hashTable doubles self._alphaMax_ = alphaMax # alphaLowest define that when did the size of hashTable reduces to half # 0.25 mean when the hashtable is left 25% of the size then size of hashTable reduces to half self._alphaLowest_ = alphaLowest # rand_a is used in multiplicationHash() is you wanna use multiplicationHash() insted of universalHashing() then uncomment it # self.rand_a = randrange(self._size_//2,self._size_) | 1 # it's for multiplicationHash, | is just to confirm that rand_a is odd # This is for Universal Hashes here our p can't exceed the word limit i.e. for a 64 bit architecture prime can't exceed 64 bit bitLength = self._size_.bit_length() + 1 self._prime_ = generatePrimeNumber(bitLength) # primr is used by the hash function universalHashing() self._uni_a_ = randrange(0,self._prime_ - 1) # uni_a and uni_b are used by the hash function universalHashing() self._uni_b_ = randrange(0,self._prime_ - 1)