def decideWhereToLeap(self, highwayA, highwayB, laneA, laneB, areAdjacentLanes=True):
        """
        Given two highways in adjacent lanes, represented by bits, we decide the point where we leap.
        We can either leap at the end of first highway, or the start of the second highway.

        If the two highways have no overlap, then we simply choose the end of first highway (no difference
        in edit distance).
        If there is overlap, we look at the bit vector in the overlap of the two lanes, and choose the one
        with more zeros to go.
        
        returns the column number on which to leap from laneA to laneB.
        """
        dColumn = leapForwardColumn(laneA, laneB)
        if self.debug:
            print("dcolumn", dColumn)
        
        if areAdjacentLanes:
            choiceA = gmpy.scan1(highwayA >> gmpy.scan0(highwayA)) + gmpy.scan0(highwayA)#self.length - gmpy.scan0(int(format(highwayA, "b")[::-1], 2)) # End of highway A    
            choiceB = gmpy.scan0(highwayB) - dColumn # Start of highway B
        else:
            highwayBReversed = int(format(highwayB, "b")[::-1], 2)
            choiceA = self.length - (gmpy.scan1(highwayBReversed >> gmpy.scan0(highwayBReversed)) + gmpy.scan0(highwayBReversed)) - dColumn # after the last hurdle of highway B
            #firstHurdle = max(self.currentPosition[1] + leapForwardColumn(self.currentPosition[0], laneA), gmpy.scan0(highwayA))
            choiceB = gmpy.scan1(highwayA >> gmpy.scan0(highwayA)) + gmpy.scan0(highwayA) # Before the First hurdle of highway A
        
        minRequirement = self.currentPosition[1] + leapForwardColumn(self.currentPosition[0], laneA)
        choiceA = max(minRequirement, choiceA)
        choiceB = max(minRequirement, choiceB)
        #choiceA = max(self.currentPosition[1] + leapForwardColumn(self.currentPosition[0], laneA), )
        if self.debug:
            print("A", choiceA, "B", choiceB)

        if not areAdjacentLanes:
            if choiceA < choiceB:
                return choiceA, 0, True
            else:
                return choiceB, 0, False


        if choiceA < choiceB:
            return choiceB, choiceB - choiceA, False
        else:
            overlapA = format(highwayA, "b")[::-1][choiceB:choiceA]
            overlapB = format(highwayB, "b")[::-1][(choiceB+dColumn):(choiceA+dColumn)]
            if self.debug:
                print("overlaps", overlapA, overlapB)
            if overlapA.count("0") >= overlapB.count("0"):
                if self.debug:
                    print("choosing choice A", choiceA, "over B", choiceB)
                return choiceA, overlapA.count("1"), True
            else:
                if self.debug:
                    print("choosing choice B", choiceB, "over A", choiceA)
                return choiceB, overlapB.count("1"), True
 def extractHighway(self, shift, col):
     """
     extract the highway at lane `shift` and column `col`.
     """
     leftBound = gmpy.scan1(
         self.processedBits[shift + self.k] >> min(col, self.length))
     rightBound = gmpy.scan1(self.reversedProcessedBits[shift + self.k] >>
                             max(self.length - col, 0))
     #print(leftBound, rightBound)
     if leftBound < 0 or rightBound < 0:
         return int("1" * self.length, 2)
     reconstructHighway = "1" * (self.length - col - leftBound) + "0" * (
         leftBound + rightBound) + "1" * (col - rightBound)
     #reconstructHighway = int(reconstructHighway, 2) | self.bits[shift + self.k]
     highway = int(reconstructHighway, 2) | self.bits[shift + self.k]
     return highway
 def getFirstHighwayLength(self, l, col):
     """
     Return the length of first highway in lane l.
     """
     #print(format(self.processedBits[l + self.k], "b"))
     #print("highway", format(self.processedBits[l + self.k] >> self.findFirstHighway(l, col), "b"))
     return gmpy.scan1(
         self.processedBits[l + self.k] >> self.findFirstHighway(l, col))
Esempio n. 4
0
def A1_with_prime(elements, prime_number):
    a = random.randint(1, prime_number - 1)
    b = random.randint(1, prime_number - 1)

    R = 0

    for e in elements:
        hash_val = (a * e + b) % prime_number
        R = max(gmpy.scan1(int(hash_val)), R)
    return 2**R
def count(elements, n):
    if len(elements) == 0:
        return 0
    max_bit = -1
    p = findPrimeNumber(n,2*n)
    a = random.randint(1,p-1)
    b = random.randint(1,p-1)

    for e in elements:
        remainder = ((a*e + b) % p) % n
        max_bit = max(max_bit, gmpy.scan1(int(remainder)))
    return 2 ** max_bit
    def findBestShortHighwayNearby(self, pos, targetLane, maxCol):
        """
        If leaping through multiple lanes, find if we can take short highways between the lanes we are leaping. 
        If there is such small highway, return the small highway. Otherwise return None
        """
        currentLane, currentCol = pos
        maxScore = float('-inf')
        bestLane = None
        bestHighway = None

        # Determine the range of lanes
        if pos[0] > targetLane:
            leapingLanes = range(max(targetLane - 1, -self.k), min(pos[0] + 2, self.k + 1))

        else:
            leapingLanes = range(max(pos[0] - 1, -self.k), min(targetLane + 2, self.k + 1))

        for l in leapingLanes:
            nearestHighwayCol = gmpy.scan0(self.hurdleBits.bits[l + self.k] >> (currentCol + leapForwardColumn(currentLane, l))) + currentCol + leapForwardColumn(currentLane, l)
            #print(l, nearestHighwayCol)
            if nearestHighwayCol < maxCol - leapForwardColumn(l, targetLane):
                
                length = gmpy.scan1(self.hurdleBits.bits[l + self.k] >> nearestHighwayCol)
                if self.debug:
                    print("small highway found:", l, nearestHighwayCol, length)
                #length = self.hurdleBits.getFirstHighwayLength(l, currentCol)
                #highway, processedHighway = self.extractHighway(l, self.nearestHighways[l + self.k], length)
                #print(format(highway, "b"))

                #hurdlesToCross = self.nearestHighways[l + self.k] - currentCol - leapForwardColumn(currentLane, l)
                dist = nearestHighwayCol - currentCol - leapForwardColumn(currentLane, l) + leapLanePenalty(currentLane, l) * 0.5
                if pos[0] > targetLane:
                    isOutsideBoundary = (l < targetLane or l > pos[0])
                else:
                    isOutsideBoundary = (l < pos[0] or l > targetLane)
                if dist > 2:
                    score = - dist - isOutsideBoundary * 2
                else:
                    score = length - dist - isOutsideBoundary * 2
                if self.debug:
                    print(l, score, "dist", dist)
                if score > maxScore or (score == maxScore and leapLanePenalty(currentLane, l) < leapLanePenalty(currentLane, bestLane)):
                    maxScore = score
                    bestHighway = (nearestHighwayCol, nearestHighwayCol + length)
                    bestLane = l
        

        
        return bestLane, bestHighway
Esempio n. 7
0
File: hash.py Progetto: emmanuj/kemi
def hash_mapping():
    expected = np.array([5000,2500,1250,625,312,151,75,37,18,9,4,2,1])
    observed = np.array([5082, 2439, 1259, 590, 322, 157, 71, 43, 18, 10, 6, 2, 1])
    m = 0.5 * (math.sqrt(5) -1)
    a = random.random()
    nums = 20000 * numpy.random.uniform(0,1,size=10000)#generateNumbers(10000)
    h =[]
    p=0
    lsb_freq = {}
    lsb_list =[]
    for k in nums:
        s = k *a
        x = 80000*math.modf(s)[0]
        h.append(math.floor(m*x))
        j =int(math.floor(m*x))
        lsb = gmpy.scan1(j)
        lsb_list.append(lsb)
        if(lsb_freq.has_key(lsb)==True):
            lsb_freq[lsb] =  lsb_freq.get(lsb)+1
        else:
            lsb_freq[lsb] = 1
Esempio n. 8
0
def gen_hierarchical_slices(tile_width, start_index_in, sentinel_index_in):
    """
    @param tile_width: width of the smallest tile
    @param start_index_in: index of the first column
    @param sentinel_index_in: index of the sentinel column
    """
    ncolumns = sentinel_index_in - start_index_in
    if ncolumns < 1:
        raise ValueError('bad interval')
    if ncolumns % tile_width:
        raise ValueError('the tiles should exactly cover the interval')
    if gmpy.popcount(ncolumns / tile_width) != 1:
        raise ValueError('the number of tiles should be a power of two')
    nlevels = gmpy.scan1(ncolumns / tile_width) + 1
    for i in range(nlevels):
        width = tile_width * 2**i
        ntiles = ncolumns / width
        for j in range(ntiles):
            a = start_index_in + j*width
            b = start_index_in + (j+1)*width
            yield a, b
Esempio n. 9
0
def gen_hierarchical_slices(tile_width, start_index_in, sentinel_index_in):
    """
    @param tile_width: width of the smallest tile
    @param start_index_in: index of the first column
    @param sentinel_index_in: index of the sentinel column
    """
    ncolumns = sentinel_index_in - start_index_in
    if ncolumns < 1:
        raise ValueError('bad interval')
    if ncolumns % tile_width:
        raise ValueError('the tiles should exactly cover the interval')
    if gmpy.popcount(ncolumns / tile_width) != 1:
        raise ValueError('the number of tiles should be a power of two')
    nlevels = gmpy.scan1(ncolumns / tile_width) + 1
    for i in range(nlevels):
        width = tile_width * 2**i
        ntiles = ncolumns / width
        for j in range(ntiles):
            a = start_index_in + j * width
            b = start_index_in + (j + 1) * width
            yield a, b
Esempio n. 10
0
File: new.py Progetto: emmanuj/kemi
def hashFunction(r, v):
    m = 0.5 * (math.sqrt(5) - 1)
    a = random.random()
    nums = generateNumbers(v)
    h = []
    p = 0
    for k in nums:
        s = k * a
        x = r * (math.modf(s)[0])
        h.append(math.floor(m * x))
        j = int(math.floor(m * x))
        # p |= int(j)
        lsp = gmpy.scan1(j)  # index of the least significant value
        lsv = int(math.pow(2, lsp))  # least significant value
        p |= lsv
    # OR every number in j with 0 and or it with the value of p
    # print bin(p)
    # get the most significant bit of p and get the expected number (2^p)
    # print bin(p)
    f = open("hashval.txt", "w")
    print >> f, h
    f.close
    l = int.bit_length(p)
    print math.pow(2, l)
Esempio n. 11
0
def lsvalue(j):
    lsp = gmpy.scan1(j) # index of the least significant value
    lsv = int(math.pow(2,lsp))	# least significant value
    return lsv
Esempio n. 12
0
                try:
                    assert gmpy.lowbits(n, i + 1) == lowbits(n, i + 1)
                except AssertionError:
                    print 'lowbits fail %d', n
                    raise
                try:
                    assert gmpy.hamdist(n, i) == hamdist(n, i)
                except AssertionError:
                    print 'hamdist fail %d', n
                    raise
                try:
                    assert abs(flipbit(n, i) - n) == 2**i
                except AssertionError:
                    print 'flipbit fail %d', n
                    raise
                try:
                    assert gmpy.scan0(n, i) == scan0(n, i)
                except AssertionError:
                    print 'scan0 fail %d', n
                    raise
                try:
                    assert gmpy.scan1(n, i + 1) == scan1(n, i + 1)
                except AssertionError:
                    print 'scan0 fail %d', n
                    raise
        assert scan1(0) is None




    def editDistance(self):

        # Store leap and hurdle costs
        leapCost = 0
        hurdleCost = 0

        # Find the first highway
        lane, highway, length = self.findBestHighwayNearby(self.currentPosition)
        if self.debug:
            print(lane, highway,  length)

        #highwayLength = self.hurdleBits.getFirstHighwayLength(self.currentPosition[0], self.currentPosition[1])
        leapCol = leapForwardColumn(self.currentPosition[0], lane)
        
        #FIXME
        # Update position to the end of the highway
        colAfterLeap = self.length - gmpy.scan0(int(format(highway, "b")[::-1], 2))
        #self.length - gmpy.scan0(int(format(highway, "b")[::-1], 2))#self.currentPosition[1] + leapCol + length

        while colAfterLeap < self.length - 1 and self.currentPosition[1] < self.length - 1:
            if self.debug:
                print(format(highway, "b"))
                print("new position,", lane, colAfterLeap)
            # find the next highway
            lane_, highway_, length_ = self.findBestHighwayNearby((lane, colAfterLeap))
            if highway_ is None:
                break
            
            if False:#abs(lane - lane_) <= 1:
                # In adjacent lanes
                leapColumn, hurdles, _ = self.decideWhereToLeap(highway, highway_, lane, lane_)
                if self.debug:
                    print("leaping at column", leapColumn)
                newPosition = (lane_, leapColumn + leapForwardColumn(lane, lane_))
                self._updateMatch(self.currentPosition, newPosition)
                self.currentPosition = newPosition
                if self.debug:
                    print("current position", self.currentPosition)
            else:
                if abs(lane - lane_) <= 1:
                    leapColumn, hurdles, hasOverlap = self.decideWhereToLeap(highway, highway_, lane, lane_)
                else:
                    leapColumn, hurdles, hasOverlap = self.decideWhereToLeap(highway, highway_, lane, lane_, areAdjacentLanes=False)
                if self.debug:
                    print("leaping at column", leapColumn, "has overlap", hasOverlap)
                if hasOverlap:
                    newPosition = (lane_, leapColumn + leapForwardColumn(lane, lane_))
                    self._updateMatch(self.currentPosition, newPosition)
                    self.currentPosition = newPosition
                    if self.debug:
                        print("current position", self.currentPosition)
                else:
                    #self._updateMatch(self.currentPosition[0], lane, leapColumn - self.currentPosition[1])
                    #self.currentPosition = (lane, leapColumn)
                    if self.debug:
                        print("current position", self.currentPosition)

                    highwayReversed_ = int(format(highway_, "b")[::-1], 2)
                    highwayStartCol_ = self.length - (gmpy.scan1(highwayReversed_ >> gmpy.scan0(highwayReversed_)) + gmpy.scan0(highwayReversed_))
                    if self.debug:
                        print("starting col", highwayStartCol_, gmpy.scan0(highway_))
                    while self.currentPosition[1] + leapForwardColumn(self.currentPosition[0], lane_) < highwayStartCol_  and self.currentPosition[1] < self.length - 1:
                        shortHighwayLane, shortHighway = self.findBestShortHighwayNearby(self.currentPosition, lane_, highwayStartCol_)
                    
                        if shortHighwayLane is None:
                            break
                    
                        else:
                            if self.debug:
                                print("chosen small highway", shortHighwayLane, shortHighway)
                            newPosition = (shortHighwayLane, shortHighway[1])
                            self._updateMatch(self.currentPosition, newPosition, leapType="before")
                            self.currentPosition = newPosition
                            if self.currentPosition[1] >= self.length - 1:
                                break
                        
                        if self.debug:
                            print("current position", self.currentPosition)

                    if (self.currentPosition[1] + leapForwardColumn(self.currentPosition[0], lane_) < gmpy.scan0(highway_) or self.currentPosition[0] != lane_) and self.currentPosition[1] < self.length - 1:
                        #print(format(highway_, "b"))
                        newPosition = (lane_, max(gmpy.scan0(highway_), self.currentPosition[1] + leapForwardColumn(self.currentPosition[0], lane_)))
                        self._updateMatch(self.currentPosition, newPosition, leapType="before")
                        self.currentPosition = newPosition
                        if self.debug:
                            print("current position", self.currentPosition)
                    
  

            if self.debug:
                print("hurdle cost", self.hurdleCost)
                print("leap cost", self.leapCost)
            
            
            lane, highway, length = lane_, highway_, length_
            colAfterLeap = self.length - gmpy.scan0(int(format(highway, "b")[::-1], 2))
                
                

        newPosition = (lane, colAfterLeap)
        self._updateMatch(self.currentPosition, newPosition)
        #print(colAfterLeap - self.currentPosition[1])
                # see if we can find smaller highways in between

        #leapCost += leapLanePenalty(self.currentPosition[0], lane)
        
        
        self.currentPosition = (lane, colAfterLeap)
        if lane != self.destinationLane or self.currentPosition[1] < self.length - 1:
            self._updateMatch(self.currentPosition, (self.destinationLane, self.length - 1))
        
        if self.debug:
            print("hurdle cost", self.hurdleCost)
            print("leap cost", self.leapCost)
        
        # Adjust match
        self.match["dna1"] = self.match["dna1"][self.maxZerosIgnored+5:-(self.maxZerosIgnored+5)]
        self.match["dna2"] = self.match["dna2"][self.maxZerosIgnored+5:-(self.maxZerosIgnored+5)]
        print(self.match)

        
        return self.hurdleCost + self.leapCost