コード例 #1
0
ファイル: dijkstra.py プロジェクト: MattRijk/data-structures
def singleSourceShortest(G, src):
    """
    Given graph G return dictionary of shortest paths to other vertices
    from vertex src. All vertices in G must be drawn from the range 0..n-1
    and src must also be from same range.
    """
    # Initialize dist[] matrix to be Infinity for all but src
    infinity = sys.maxsize
    n = 0
    dist = {}
    for v in range(len(G)):
        n += 1
        dist[v] = infinity

    dist[src] = 0

    # optimized construction for BinaryHeap
    pq = BinaryHeap(n, src, infinity)

    while not pq.isEmpty():
        u = pq.pop()
        for v,weight in G.neighbors(u):
            newLen = dist[u] + weight
            if newLen < dist[v]:
                pq.decreaseKey(v, newLen)
                dist[v] = newLen

    
    return dist
コード例 #2
0
    def _orient_normals(self, k):
        print 'Orienting normals'
        # find pt with maximum z value
        index = np.argmax([pt.position[2] for pt in self.points])
        root = self.points[index]
        if root.normal[2] > 0:
            root.normal *= -1
        parents = {}
        heap = BinaryHeap()
        for pt in self.points:
            if pt == root:
                heap.insert(0, pt)
                parents[root] = root
            else:
                heap.insert(float('inf'), pt)
        while not heap.is_empty():
            pt = heap.extract_min()
            if pt in parents:
                prev = parents[pt]
            else:
                prev = self.nearest_neighbors(pt, 1, parents.keys())[0]
                parents[pt] = prev
            if np.dot(prev.normal, pt.normal) < 0:
                pt.normal *= -1

            neighbors = self.nearest_neighbors(pt, k)
            for pt2 in neighbors:
                if pt2 not in parents:
                    old_dist = heap.get_key(pt2)
                    dist = 1. - np.abs(np.dot(pt.normal, pt2.normal))
                    if dist < old_dist:
                        parents[pt2] = pt
                        heap.update_key(dist, pt2)
        return parents
コード例 #3
0
    def test_decreaseKeyMany(self):
        self.bh = BinaryHeap(1000,0,999)
        for _ in range(999, 0, -1):
            self.bh.decreaseKey(_, _)

        for _ in range(1000):
            self.assertEqual(_, self.bh.pop())
コード例 #4
0
    def test_decreaseKeyRandom(self):
        self.bh = BinaryHeap(1000,0,999)
        for _ in range(999, 0, -1):
            self.bh.decreaseKey(_, random.randint(1,999))

        result = []
        for _ in range(1000):
            result.append(self.bh.pop())
        self.assertEqual(list(range(1000)), sorted(result))
コード例 #5
0
    def test_multipleDecreaseKey(self):
        self.bh = BinaryHeap(50,0,999)
        for n in range(1, 50):
            for p in range(1, n):
                self.bh.decreaseKey(n, p)

        result = []
        for _ in range(50):
            result.append(self.bh.pop())
            
        self.assertEqual(list(range(50)), sorted(result))
コード例 #6
0
ファイル: prioritizedRL.py プロジェクト: only4hj/DeepRL
 def __init__(self):
     self.totalState = 10
     self.totalAction = 2
     self.epsilon = 0.1 
     self.stepSize = 0.25
     #self.stepSize = 0.05
     self.discount = 1.0 - 1.0 / self.totalState
     self.maxIter = 10**6
     self.repeatNo = 10
     self.minibatch = 5
     self.alpha = 0.7
     self.beta = 0.5
     self.replayMemory = []
     self.maxWeight = 0
     
     self.mode = 'FA'
     #self.mode = 'Tablar'
     
     #self.samplePolicy = 'uniform'
     #self.samplePolicy = 'maxPriority'
     self.samplePolicy = 'rank'
     self.binaryHeap = BinaryHeap()
     
     print 'replay memory size : %s' % (2**(self.totalState + 1) - 2)
コード例 #7
0
def test_random_input():
    h = BinaryHeap()
    for i in random.sample(range(100), 100):
        h.push(i)
    assert all_items(h) == range(0, 100)
コード例 #8
0
def test_load_up_the_list():
    heap = BinaryHeap()
    for i in range(10000, 0, -1):
        heap.push(i)
    assert all_items(heap) == range(1, 10001)
コード例 #9
0
def test_reverse_range():
    heap = BinaryHeap()
    for i in range(5, 0, -1):
        heap.push(i)
    assert all_items(heap) == range(1, 6)
コード例 #10
0
def test_one_push_pop():
    bin_list = BinaryHeap()
    bin_list.push(10)
    assert bin_list.pop() == 10
コード例 #11
0
ファイル: prioritizedRL.py プロジェクト: only4hj/DeepRL
class Tester:
    def __init__(self):
        self.totalState = 10
        self.totalAction = 2
        self.epsilon = 0.1 
        self.stepSize = 0.25
        #self.stepSize = 0.05
        self.discount = 1.0 - 1.0 / self.totalState
        self.maxIter = 10**6
        self.repeatNo = 10
        self.minibatch = 5
        self.alpha = 0.7
        self.beta = 0.5
        self.replayMemory = []
        self.maxWeight = 0
        
        self.mode = 'FA'
        #self.mode = 'Tablar'
        
        #self.samplePolicy = 'uniform'
        #self.samplePolicy = 'maxPriority'
        self.samplePolicy = 'rank'
        self.binaryHeap = BinaryHeap()
        
        print 'replay memory size : %s' % (2**(self.totalState + 1) - 2)
        
    def initialize(self):
        self.Qval = np.random.normal(0, 0.1, (self.totalState, self.totalAction)).astype(np.float32)
        self.params = np.random.normal(0, 0.1, (self.totalState * self.totalAction + 1)).astype(np.float32)
        self.wrongActions = np.zeros((self.totalState), dtype=np.int8)
        for i in range(self.totalAction):
            if i % 2 == 0:
                self.wrongActions[i] = 1
        self.replayMemory = []
        self.generateReplay()
    
    def generateReplay(self):
        for s in range(self.totalState-1, -1, -1):
            repeat = 2 ** (self.totalState - s - 1)
            for r in range(repeat):
                a = self.getTrueAction(s)
                s2, r = self.doAction(s, a)
                self.replayMemory.append((s, a, r, s2))

                a = self.getWrongAction(s)
                s2, r = self.doAction(s, a)
                self.replayMemory.append((s, a, r, s2))
        random.shuffle(self.replayMemory)

        # Generate binary heap
        rank = 1
        rankSum = 0
        for data in self.replayMemory:
            self.binaryHeap.add(data, 1.0)
            rankSum += (1.0 / rank) ** self.alpha
            rank += 1
        
        self.rankIndex = []
        segment = rankSum / self.minibatch 
        if self.samplePolicy == 'rank':
            rank = 1
            segmentRankSum = 0
            segmentIndex = 0
            for i in range(1, len(self.replayMemory)):
                segmentRankSum += (1.0 / rank) ** self.alpha
                rank += 1
                if segmentRankSum >= segment:
                    self.rankIndex.append(i)
                    segmentIndex += 1
                    segmentRankSum = 0
            self.rankIndex.append(len(self.replayMemory) - 1)
        
    def getAction(self, s):
        if random.random() < self.epsilon:
            return random.randint(0, 1) 
        else:
            return np.argmax(self.getQval(s))
    
    def getFeatures(self, s, a):
        features = np.zeros((self.totalState * self.totalAction + 1), dtype=np.int)
        features[s * self.totalAction + a] = 1                      
        features[self.totalState * self.totalAction] = 1       # bias
        return features
        
    def getQval(self, s, a=None):
        if self.mode == 'FA':
            if a == None:
                values = []
                values.append(self.params.dot(self.getFeatures(s, 0)))
                values.append(self.params.dot(self.getFeatures(s, 1)))
                return values
            else:
                return self.params.dot(self.getFeatures(s, a))
        else:
            if a == None:
                return self.Qval[s]
            else:
                return self.Qval[s, a]
        
    def isWrongAction(self, s, a):
        if self.wrongActions[s] == a:
            return True
        else:
            return False
        
    def getTrueAction(self, s):
        return 1 - self.wrongActions[s]
        
    def getWrongAction(self, s):
        return self.wrongActions[s]
        
    def doAction(self, s, a):
        """ returns next state and reward """
         
        if self.isWrongAction(s, a):
            return 0, 0
        else:
            if s < self.totalState - 1:
                return s+1, 0
            else:
                return 0, 1

    def updateValue(self, s, a, td):
        if self.mode == 'FA':
            self.params += self.stepSize * td * self.getFeatures(s, a)
        else:
            self.Qval[s, a] = self.getQval(s, a) + self.stepSize * td

        if self.samplePolicy == 'maxPriority':
            self.binaryHeap.reorderTop(np.abs(td))
            
    def isComplete(self):
        R = 1.0
        error = 0
        for s in range(self.totalState-1, -1, -1):
            for a in range(self.totalAction):
                estimate = self.getQval(s, a)
                if self.isWrongAction(s, a):
                    groundTruth = 0
                else:
                    groundTruth = R
                error += np.square(groundTruth - estimate)
            R *= self.discount
            
        if error / (self.totalState * self.totalAction) <= 10**-3:
            return True
        else:
            return False

    def sampleReplay(self, segment):
        if self.samplePolicy == 'uniform':
            index = random.randint(0, len(self.replayMemory)-1)
            return index, 1.0, self.replayMemory[index]
        elif self.samplePolicy == 'maxPriority':
            index = 1
            item = self.binaryHeap.getTop()
            return index, 1.0, item[0]
        elif self.samplePolicy == 'rank':
            if segment == 0:
                index1 = 1
            else:
                index1 = self.rankIndex[segment-1] + 1
            index2 = self.rankIndex[segment]
            index = random.randint(index1, index2)
            item = self.binaryHeap.heap[index]
            weight = (1.0 / index / self.totalState) ** self.beta
            
            if weight > self.maxWeight:
                self.maxWeight = weight
            
            weight = weight / self.maxWeight
            # DJDJ
            #weight = 1.0
            return index, weight, item[0]

    def gogoReplay(self):
        print 'Training replay : policy %s' % self.samplePolicy

        startTime = time.time()
        trainDone = []
        paramSum = np.zeros((self.totalState * self.totalAction + 1))
        for repeat in range(self.repeatNo):
            self.initialize()
            
            for i in range(self.maxIter):
                paramSum.fill(0)
                for m in range(self.minibatch):
                    index, weight, (s, a, r, s2) = self.sampleReplay(m)
                    if s2 == 0:     # terminal state
                        td = r - self.getQval(s, a)
                    else:
                        td = r + self.discount * np.max(self.getQval(s2)) - self.getQval(s, a)

                    if self.mode == 'FA':
                        paramSum += self.stepSize * weight * td * self.getFeatures(s, a)
                    else:
                        self.Qval[s, a] = self.getQval(s, a) + self.stepSize * td
            
                    self.binaryHeap.reorder(index, np.abs(td))

                self.params += paramSum

                if i % 10 == 0:
                    if self.isComplete():
                        print 'training done %s out of %s' % (repeat+1, self.repeatNo)
                        trainDone.append(i)
                        break
        
        print '%s' % trainDone
        print '%s state training complete with %s mean iters = %.0f' % (self.totalState, self.mode, np.mean(trainDone))
        print 'elapsed : %.1fs' % (time.time() - startTime)
        
    def gogoOnline(self):
        print 'Training online'

        trainDone = []
        for repeat in range(self.repeatNo):
            s = 0
            self.initialize()
            
            for i in range(self.maxIter):
                a = self.getAction(s)
                s2, r = self.doAction(s, a)
                if s2 == 0:     # terminal state
                    td = r - self.getQval(s, a)
                else:
                    td = r + self.discount * np.max(self.getQval(s2)) - self.getQval(s, a)
                self.updateValue(s, a, td)
                s = s2
                
                if i % 10 == 0:
                    if self.isComplete():
                        #print 'training done %s out of %s' % (repeat+1, self.repeatNo)
                        trainDone.append(i)
                        break
        
        print '%s' % trainDone
        print '%s state training complete with %s mean iters = %.0f' % (self.totalState, self.mode, np.mean(trainDone))
コード例 #12
0
class TestBinaryHeap(unittest.TestCase):
    def setUp(self):
        # create BinaryHeap with initial 4 elements, and src is 2. Use
        # 999 for infinity
        self.bh = BinaryHeap(15, 2, 999)

    def tearDown(self):
        self.bh = None

    def test_basic(self):
        self.assertEqual(2, self.bh.pop())

    def test_decreaseKey(self):
        self.bh.decreaseKey(0, 19)
        self.bh.decreaseKey(1, 7)
        self.bh.decreaseKey(3, 22)
        self.assertEqual(2, self.bh.pop())
        self.assertEqual(1, self.bh.pop())
        self.assertEqual(0, self.bh.pop())
        self.assertEqual(3, self.bh.pop())

    def test_decreaseKeyMany(self):
        self.bh = BinaryHeap(1000, 0, 999)
        for _ in range(999, 0, -1):
            self.bh.decreaseKey(_, _)

        for _ in range(1000):
            self.assertEqual(_, self.bh.pop())

    def test_multipleDecreaseKey(self):
        self.bh = BinaryHeap(50, 0, 999)
        for n in range(1, 50):
            for p in range(1, n):
                self.bh.decreaseKey(n, p)

        result = []
        for _ in range(50):
            result.append(self.bh.pop())

        self.assertEqual(list(range(50)), sorted(result))

    def test_decreaseKeyRandom(self):
        self.bh = BinaryHeap(1000, 0, 999)
        for _ in range(999, 0, -1):
            self.bh.decreaseKey(_, random.randint(1, 999))

        result = []
        for _ in range(1000):
            result.append(self.bh.pop())
        self.assertEqual(list(range(1000)), sorted(result))
コード例 #13
0
class TestBinaryHeap(unittest.TestCase):
    
    def setUp(self):
        # create BinaryHeap with initial 4 elements, and src is 2. Use
        # 999 for infinity
        self.bh = BinaryHeap(15,2,999)

    def tearDown(self):
        self.bh = None

    def test_basic(self):
        self.assertEqual(2, self.bh.pop())

    def test_decreaseKey(self):
        self.bh.decreaseKey(0, 19)
        self.bh.decreaseKey(1, 7)
        self.bh.decreaseKey(3,22)
        self.assertEqual(2, self.bh.pop())
        self.assertEqual(1, self.bh.pop())
        self.assertEqual(0, self.bh.pop())
        self.assertEqual(3, self.bh.pop())

    def test_decreaseKeyMany(self):
        self.bh = BinaryHeap(1000,0,999)
        for _ in range(999, 0, -1):
            self.bh.decreaseKey(_, _)

        for _ in range(1000):
            self.assertEqual(_, self.bh.pop())

    def test_multipleDecreaseKey(self):
        self.bh = BinaryHeap(50,0,999)
        for n in range(1, 50):
            for p in range(1, n):
                self.bh.decreaseKey(n, p)

        result = []
        for _ in range(50):
            result.append(self.bh.pop())
            
        self.assertEqual(list(range(50)), sorted(result))
                       
    def test_decreaseKeyRandom(self):
        self.bh = BinaryHeap(1000,0,999)
        for _ in range(999, 0, -1):
            self.bh.decreaseKey(_, random.randint(1,999))

        result = []
        for _ in range(1000):
            result.append(self.bh.pop())
        self.assertEqual(list(range(1000)), sorted(result))
コード例 #14
0
 def setUp(self):
     # create BinaryHeap with initial 4 elements, and src is 2. Use
     # 999 for infinity
     self.bh = BinaryHeap(15,2,999)