예제 #1
0
 def __init__(self, size=16, alphaMax=0.5, alphaLowest=0.25):
     self._size_ = size  # size define the size of the hash table
     self._hashTable_ = [[None, False] for i in range(self._size_)]
     self._occupied_ = 0  # occupied will keep track of how many of nodes are utilized
     self._alpha_ = self._occupied_ / self._size_
     self._alphaMax_ = alphaMax
     self._alphaLowest_ = alphaLowest
     bitLength = self._size_.bit_length() + 1
     if (bitLength < 16): bitLength = 16
     self._prime1_ = generatePrimeNumber(bitLength)
     self._uni_a1_ = randrange(1, self._prime1_ - 1)
     self._uni_b1_ = randrange(0, self._prime1_ - 1)
     self._prime2_ = generatePrimeNumber(bitLength)
     self._uni_a2_ = randrange(0, self._prime2_ - 1)
     self._uni_b2_ = randrange(0, self._prime2_ - 1)
def find(txt,pattern):
    base = getBaseByString(txt)
    #print("Base:",base)
    size = len(pattern)
    prime = generatePrimeNumber()
    rtxt=RollingHash(base,prime)
    rpat=RollingHash(base,prime)
    # Get hash of the pattern
    for i in range(size):
        rpat.append(ord(pattern[i]))
    # Get hash of the first (size) number of character string
    for i in range(size):
        rtxt.append(ord(txt[i]))
    if(rtxt.hash == rpat.hash):         # if the hashes matched confirm the match by comparing strings
        if(txt[0:size] == pattern):
            return 0
    old=ord(txt[0])                     # old is the first character of our sliding window that is to be remove
    # Now we slide our window to the given txt
    for i in range(size,len(txt)):
        new = ord(txt[i])               # new is the next character that is to be added in the window
        rtxt.append(new)                # adding new in our hash
        rtxt.skip(old)                  # remove old in our hash
        old = ord(txt[i - size + 1])    # old character that will be deleted in next itteration
        if(rtxt.hash == rpat.hash):     # As the hashes matched confirm the match by comparing string characters
            if(txt[i-size+1:i+1] == pattern):
                return i - size +1
    return -1
예제 #3
0
 def __update__(self,updateFactor):
     hashTableCopy = self._hashTable_          # updateFactor = 1/2 => reduce hashTable to half the size
     self._size_ = int(self._size_*updateFactor) # updateFactor = 2 => extend hashTable to twice the size
     self._hashTable_ = [LinkedList() for i in range(self._size_)]
     
     # If you wanna use Numpy insted of list then
     # Uncomment 2 lines below and comment the just above one
     # self._hashTable_ = np.empty(self._size_,dtype=LinkedList)
     # for i in range(self._size_):  self._hashTable_[i] = LinkedList()
     
     for i in range(self._size_):  self._hashTable_[i] = LinkedList()
     self._occupied_ = 0
     self._alpha_ = self._occupied_ / self._size_
     bitLength = self._size_.bit_length() + 1
     self._prime_ = generatePrimeNumber(bitLength)
     self._uni_a_ = randrange(0,self._prime_ - 1)
     self._uni_b_ = randrange(0,self._prime_ - 1)
     for ith_linkedList in hashTableCopy:
         if(ith_linkedList.head == None):
             continue
         curr = ith_linkedList.head
         while curr.next != None:
             self.__add__(curr.key, curr.value)
             curr = curr.next
         self.__add__(curr.key, curr.value)
         ith_linkedList.destroy()
예제 #4
0
    def __update__(self, updateFactor):
        hashTableCopy = self._hashTable_  # updateFactor = 1/2 => reduce hashTable to half the size
        self._size_ = int(
            self._size_ * updateFactor
        )  # updateFactor = 2 => extend hashTable to twice the size
        self._hashTable_ = [[None, False] for i in range(self._size_)]

        self._occupied_ = 0
        self._alpha_ = self._occupied_ / self._size_
        bitLength = self._size_.bit_length() + 1
        bitLength = self._size_.bit_length() + 1
        if (bitLength < 16): bitLength = 16
        self._prime1_ = generatePrimeNumber(bitLength)
        self._uni_a1_ = randrange(1, self._prime1_ - 1)
        self._uni_b1_ = randrange(0, self._prime1_ - 1)
        self._prime2_ = generatePrimeNumber(bitLength)
        self._uni_a2_ = randrange(1, self._prime2_ - 1)
        self._uni_b2_ = randrange(0, self._prime2_ - 1)
        for node in hashTableCopy:
            node = node[0]
            if (node == None):
                continue
            self.__add__(node.key, node.value)
예제 #5
0
 def __init__(self, size = 16, alphaMax = 0.5, alphaLowest = 0.25):
     self._size_ = size    # size define the size of the hash table
     self._hashTable_ = [LinkedList() for i in range(self._size_)]
     
     # If you wanna use Numpy insted of list then
     # Uncomment 2 lines below and comment the just above one
     # self._hashTable_ = np.empty(self._size_,dtype=LinkedList)
     # for i in range(self._size_):self._hashTable_[i] = LinkedList()
     
     self._occupied_ = 0   # occupied will keep track of how many of nodes are utilized
     # alpha define the current utilization of hashTable
     self._alpha_ = self._occupied_ / self._size_
     # alphaMax define that when did the size of hashTable doubles 0.5 mean when the hashtable is filled 50% size of hashTable doubles
     self._alphaMax_ = alphaMax
     # alphaLowest define that when did the size of hashTable reduces to half
     # 0.25 mean when the hashtable is left 25% of the size then size of hashTable reduces to half
     self._alphaLowest_ = alphaLowest
     # rand_a is used in multiplicationHash() is you wanna use multiplicationHash() insted of universalHashing() then uncomment it
     # self.rand_a = randrange(self._size_//2,self._size_) | 1   # it's for multiplicationHash, | is just to confirm that rand_a is odd
     # This is for Universal Hashes here our p can't exceed the word limit i.e. for a 64 bit architecture prime can't exceed 64 bit
     bitLength = self._size_.bit_length() + 1
     self._prime_ = generatePrimeNumber(bitLength) # primr is used by the hash function universalHashing()
     self._uni_a_ = randrange(0,self._prime_ - 1)    # uni_a and uni_b are used by the hash function universalHashing()
     self._uni_b_ = randrange(0,self._prime_ - 1)