def decideWhereToLeap(self, highwayA, highwayB, laneA, laneB, areAdjacentLanes=True): """ Given two highways in adjacent lanes, represented by bits, we decide the point where we leap. We can either leap at the end of first highway, or the start of the second highway. If the two highways have no overlap, then we simply choose the end of first highway (no difference in edit distance). If there is overlap, we look at the bit vector in the overlap of the two lanes, and choose the one with more zeros to go. returns the column number on which to leap from laneA to laneB. """ dColumn = leapForwardColumn(laneA, laneB) if self.debug: print("dcolumn", dColumn) if areAdjacentLanes: choiceA = gmpy.scan1(highwayA >> gmpy.scan0(highwayA)) + gmpy.scan0(highwayA)#self.length - gmpy.scan0(int(format(highwayA, "b")[::-1], 2)) # End of highway A choiceB = gmpy.scan0(highwayB) - dColumn # Start of highway B else: highwayBReversed = int(format(highwayB, "b")[::-1], 2) choiceA = self.length - (gmpy.scan1(highwayBReversed >> gmpy.scan0(highwayBReversed)) + gmpy.scan0(highwayBReversed)) - dColumn # after the last hurdle of highway B #firstHurdle = max(self.currentPosition[1] + leapForwardColumn(self.currentPosition[0], laneA), gmpy.scan0(highwayA)) choiceB = gmpy.scan1(highwayA >> gmpy.scan0(highwayA)) + gmpy.scan0(highwayA) # Before the First hurdle of highway A minRequirement = self.currentPosition[1] + leapForwardColumn(self.currentPosition[0], laneA) choiceA = max(minRequirement, choiceA) choiceB = max(minRequirement, choiceB) #choiceA = max(self.currentPosition[1] + leapForwardColumn(self.currentPosition[0], laneA), ) if self.debug: print("A", choiceA, "B", choiceB) if not areAdjacentLanes: if choiceA < choiceB: return choiceA, 0, True else: return choiceB, 0, False if choiceA < choiceB: return choiceB, choiceB - choiceA, False else: overlapA = format(highwayA, "b")[::-1][choiceB:choiceA] overlapB = format(highwayB, "b")[::-1][(choiceB+dColumn):(choiceA+dColumn)] if self.debug: print("overlaps", overlapA, overlapB) if overlapA.count("0") >= overlapB.count("0"): if self.debug: print("choosing choice A", choiceA, "over B", choiceB) return choiceA, overlapA.count("1"), True else: if self.debug: print("choosing choice B", choiceB, "over A", choiceA) return choiceB, overlapB.count("1"), True
def extractHighway(self, shift, col): """ extract the highway at lane `shift` and column `col`. """ leftBound = gmpy.scan1( self.processedBits[shift + self.k] >> min(col, self.length)) rightBound = gmpy.scan1(self.reversedProcessedBits[shift + self.k] >> max(self.length - col, 0)) #print(leftBound, rightBound) if leftBound < 0 or rightBound < 0: return int("1" * self.length, 2) reconstructHighway = "1" * (self.length - col - leftBound) + "0" * ( leftBound + rightBound) + "1" * (col - rightBound) #reconstructHighway = int(reconstructHighway, 2) | self.bits[shift + self.k] highway = int(reconstructHighway, 2) | self.bits[shift + self.k] return highway
def getFirstHighwayLength(self, l, col): """ Return the length of first highway in lane l. """ #print(format(self.processedBits[l + self.k], "b")) #print("highway", format(self.processedBits[l + self.k] >> self.findFirstHighway(l, col), "b")) return gmpy.scan1( self.processedBits[l + self.k] >> self.findFirstHighway(l, col))
def A1_with_prime(elements, prime_number): a = random.randint(1, prime_number - 1) b = random.randint(1, prime_number - 1) R = 0 for e in elements: hash_val = (a * e + b) % prime_number R = max(gmpy.scan1(int(hash_val)), R) return 2**R
def count(elements, n): if len(elements) == 0: return 0 max_bit = -1 p = findPrimeNumber(n,2*n) a = random.randint(1,p-1) b = random.randint(1,p-1) for e in elements: remainder = ((a*e + b) % p) % n max_bit = max(max_bit, gmpy.scan1(int(remainder))) return 2 ** max_bit
def findBestShortHighwayNearby(self, pos, targetLane, maxCol): """ If leaping through multiple lanes, find if we can take short highways between the lanes we are leaping. If there is such small highway, return the small highway. Otherwise return None """ currentLane, currentCol = pos maxScore = float('-inf') bestLane = None bestHighway = None # Determine the range of lanes if pos[0] > targetLane: leapingLanes = range(max(targetLane - 1, -self.k), min(pos[0] + 2, self.k + 1)) else: leapingLanes = range(max(pos[0] - 1, -self.k), min(targetLane + 2, self.k + 1)) for l in leapingLanes: nearestHighwayCol = gmpy.scan0(self.hurdleBits.bits[l + self.k] >> (currentCol + leapForwardColumn(currentLane, l))) + currentCol + leapForwardColumn(currentLane, l) #print(l, nearestHighwayCol) if nearestHighwayCol < maxCol - leapForwardColumn(l, targetLane): length = gmpy.scan1(self.hurdleBits.bits[l + self.k] >> nearestHighwayCol) if self.debug: print("small highway found:", l, nearestHighwayCol, length) #length = self.hurdleBits.getFirstHighwayLength(l, currentCol) #highway, processedHighway = self.extractHighway(l, self.nearestHighways[l + self.k], length) #print(format(highway, "b")) #hurdlesToCross = self.nearestHighways[l + self.k] - currentCol - leapForwardColumn(currentLane, l) dist = nearestHighwayCol - currentCol - leapForwardColumn(currentLane, l) + leapLanePenalty(currentLane, l) * 0.5 if pos[0] > targetLane: isOutsideBoundary = (l < targetLane or l > pos[0]) else: isOutsideBoundary = (l < pos[0] or l > targetLane) if dist > 2: score = - dist - isOutsideBoundary * 2 else: score = length - dist - isOutsideBoundary * 2 if self.debug: print(l, score, "dist", dist) if score > maxScore or (score == maxScore and leapLanePenalty(currentLane, l) < leapLanePenalty(currentLane, bestLane)): maxScore = score bestHighway = (nearestHighwayCol, nearestHighwayCol + length) bestLane = l return bestLane, bestHighway
def hash_mapping(): expected = np.array([5000,2500,1250,625,312,151,75,37,18,9,4,2,1]) observed = np.array([5082, 2439, 1259, 590, 322, 157, 71, 43, 18, 10, 6, 2, 1]) m = 0.5 * (math.sqrt(5) -1) a = random.random() nums = 20000 * numpy.random.uniform(0,1,size=10000)#generateNumbers(10000) h =[] p=0 lsb_freq = {} lsb_list =[] for k in nums: s = k *a x = 80000*math.modf(s)[0] h.append(math.floor(m*x)) j =int(math.floor(m*x)) lsb = gmpy.scan1(j) lsb_list.append(lsb) if(lsb_freq.has_key(lsb)==True): lsb_freq[lsb] = lsb_freq.get(lsb)+1 else: lsb_freq[lsb] = 1
def gen_hierarchical_slices(tile_width, start_index_in, sentinel_index_in): """ @param tile_width: width of the smallest tile @param start_index_in: index of the first column @param sentinel_index_in: index of the sentinel column """ ncolumns = sentinel_index_in - start_index_in if ncolumns < 1: raise ValueError('bad interval') if ncolumns % tile_width: raise ValueError('the tiles should exactly cover the interval') if gmpy.popcount(ncolumns / tile_width) != 1: raise ValueError('the number of tiles should be a power of two') nlevels = gmpy.scan1(ncolumns / tile_width) + 1 for i in range(nlevels): width = tile_width * 2**i ntiles = ncolumns / width for j in range(ntiles): a = start_index_in + j*width b = start_index_in + (j+1)*width yield a, b
def gen_hierarchical_slices(tile_width, start_index_in, sentinel_index_in): """ @param tile_width: width of the smallest tile @param start_index_in: index of the first column @param sentinel_index_in: index of the sentinel column """ ncolumns = sentinel_index_in - start_index_in if ncolumns < 1: raise ValueError('bad interval') if ncolumns % tile_width: raise ValueError('the tiles should exactly cover the interval') if gmpy.popcount(ncolumns / tile_width) != 1: raise ValueError('the number of tiles should be a power of two') nlevels = gmpy.scan1(ncolumns / tile_width) + 1 for i in range(nlevels): width = tile_width * 2**i ntiles = ncolumns / width for j in range(ntiles): a = start_index_in + j * width b = start_index_in + (j + 1) * width yield a, b
def hashFunction(r, v): m = 0.5 * (math.sqrt(5) - 1) a = random.random() nums = generateNumbers(v) h = [] p = 0 for k in nums: s = k * a x = r * (math.modf(s)[0]) h.append(math.floor(m * x)) j = int(math.floor(m * x)) # p |= int(j) lsp = gmpy.scan1(j) # index of the least significant value lsv = int(math.pow(2, lsp)) # least significant value p |= lsv # OR every number in j with 0 and or it with the value of p # print bin(p) # get the most significant bit of p and get the expected number (2^p) # print bin(p) f = open("hashval.txt", "w") print >> f, h f.close l = int.bit_length(p) print math.pow(2, l)
def lsvalue(j): lsp = gmpy.scan1(j) # index of the least significant value lsv = int(math.pow(2,lsp)) # least significant value return lsv
try: assert gmpy.lowbits(n, i + 1) == lowbits(n, i + 1) except AssertionError: print 'lowbits fail %d', n raise try: assert gmpy.hamdist(n, i) == hamdist(n, i) except AssertionError: print 'hamdist fail %d', n raise try: assert abs(flipbit(n, i) - n) == 2**i except AssertionError: print 'flipbit fail %d', n raise try: assert gmpy.scan0(n, i) == scan0(n, i) except AssertionError: print 'scan0 fail %d', n raise try: assert gmpy.scan1(n, i + 1) == scan1(n, i + 1) except AssertionError: print 'scan0 fail %d', n raise assert scan1(0) is None
def editDistance(self): # Store leap and hurdle costs leapCost = 0 hurdleCost = 0 # Find the first highway lane, highway, length = self.findBestHighwayNearby(self.currentPosition) if self.debug: print(lane, highway, length) #highwayLength = self.hurdleBits.getFirstHighwayLength(self.currentPosition[0], self.currentPosition[1]) leapCol = leapForwardColumn(self.currentPosition[0], lane) #FIXME # Update position to the end of the highway colAfterLeap = self.length - gmpy.scan0(int(format(highway, "b")[::-1], 2)) #self.length - gmpy.scan0(int(format(highway, "b")[::-1], 2))#self.currentPosition[1] + leapCol + length while colAfterLeap < self.length - 1 and self.currentPosition[1] < self.length - 1: if self.debug: print(format(highway, "b")) print("new position,", lane, colAfterLeap) # find the next highway lane_, highway_, length_ = self.findBestHighwayNearby((lane, colAfterLeap)) if highway_ is None: break if False:#abs(lane - lane_) <= 1: # In adjacent lanes leapColumn, hurdles, _ = self.decideWhereToLeap(highway, highway_, lane, lane_) if self.debug: print("leaping at column", leapColumn) newPosition = (lane_, leapColumn + leapForwardColumn(lane, lane_)) self._updateMatch(self.currentPosition, newPosition) self.currentPosition = newPosition if self.debug: print("current position", self.currentPosition) else: if abs(lane - lane_) <= 1: leapColumn, hurdles, hasOverlap = self.decideWhereToLeap(highway, highway_, lane, lane_) else: leapColumn, hurdles, hasOverlap = self.decideWhereToLeap(highway, highway_, lane, lane_, areAdjacentLanes=False) if self.debug: print("leaping at column", leapColumn, "has overlap", hasOverlap) if hasOverlap: newPosition = (lane_, leapColumn + leapForwardColumn(lane, lane_)) self._updateMatch(self.currentPosition, newPosition) self.currentPosition = newPosition if self.debug: print("current position", self.currentPosition) else: #self._updateMatch(self.currentPosition[0], lane, leapColumn - self.currentPosition[1]) #self.currentPosition = (lane, leapColumn) if self.debug: print("current position", self.currentPosition) highwayReversed_ = int(format(highway_, "b")[::-1], 2) highwayStartCol_ = self.length - (gmpy.scan1(highwayReversed_ >> gmpy.scan0(highwayReversed_)) + gmpy.scan0(highwayReversed_)) if self.debug: print("starting col", highwayStartCol_, gmpy.scan0(highway_)) while self.currentPosition[1] + leapForwardColumn(self.currentPosition[0], lane_) < highwayStartCol_ and self.currentPosition[1] < self.length - 1: shortHighwayLane, shortHighway = self.findBestShortHighwayNearby(self.currentPosition, lane_, highwayStartCol_) if shortHighwayLane is None: break else: if self.debug: print("chosen small highway", shortHighwayLane, shortHighway) newPosition = (shortHighwayLane, shortHighway[1]) self._updateMatch(self.currentPosition, newPosition, leapType="before") self.currentPosition = newPosition if self.currentPosition[1] >= self.length - 1: break if self.debug: print("current position", self.currentPosition) if (self.currentPosition[1] + leapForwardColumn(self.currentPosition[0], lane_) < gmpy.scan0(highway_) or self.currentPosition[0] != lane_) and self.currentPosition[1] < self.length - 1: #print(format(highway_, "b")) newPosition = (lane_, max(gmpy.scan0(highway_), self.currentPosition[1] + leapForwardColumn(self.currentPosition[0], lane_))) self._updateMatch(self.currentPosition, newPosition, leapType="before") self.currentPosition = newPosition if self.debug: print("current position", self.currentPosition) if self.debug: print("hurdle cost", self.hurdleCost) print("leap cost", self.leapCost) lane, highway, length = lane_, highway_, length_ colAfterLeap = self.length - gmpy.scan0(int(format(highway, "b")[::-1], 2)) newPosition = (lane, colAfterLeap) self._updateMatch(self.currentPosition, newPosition) #print(colAfterLeap - self.currentPosition[1]) # see if we can find smaller highways in between #leapCost += leapLanePenalty(self.currentPosition[0], lane) self.currentPosition = (lane, colAfterLeap) if lane != self.destinationLane or self.currentPosition[1] < self.length - 1: self._updateMatch(self.currentPosition, (self.destinationLane, self.length - 1)) if self.debug: print("hurdle cost", self.hurdleCost) print("leap cost", self.leapCost) # Adjust match self.match["dna1"] = self.match["dna1"][self.maxZerosIgnored+5:-(self.maxZerosIgnored+5)] self.match["dna2"] = self.match["dna2"][self.maxZerosIgnored+5:-(self.maxZerosIgnored+5)] print(self.match) return self.hurdleCost + self.leapCost