def singleSourceShortest(G, src): """ Given graph G return dictionary of shortest paths to other vertices from vertex src. All vertices in G must be drawn from the range 0..n-1 and src must also be from same range. """ # Initialize dist[] matrix to be Infinity for all but src infinity = sys.maxsize n = 0 dist = {} for v in range(len(G)): n += 1 dist[v] = infinity dist[src] = 0 # optimized construction for BinaryHeap pq = BinaryHeap(n, src, infinity) while not pq.isEmpty(): u = pq.pop() for v,weight in G.neighbors(u): newLen = dist[u] + weight if newLen < dist[v]: pq.decreaseKey(v, newLen) dist[v] = newLen return dist
def _orient_normals(self, k): print 'Orienting normals' # find pt with maximum z value index = np.argmax([pt.position[2] for pt in self.points]) root = self.points[index] if root.normal[2] > 0: root.normal *= -1 parents = {} heap = BinaryHeap() for pt in self.points: if pt == root: heap.insert(0, pt) parents[root] = root else: heap.insert(float('inf'), pt) while not heap.is_empty(): pt = heap.extract_min() if pt in parents: prev = parents[pt] else: prev = self.nearest_neighbors(pt, 1, parents.keys())[0] parents[pt] = prev if np.dot(prev.normal, pt.normal) < 0: pt.normal *= -1 neighbors = self.nearest_neighbors(pt, k) for pt2 in neighbors: if pt2 not in parents: old_dist = heap.get_key(pt2) dist = 1. - np.abs(np.dot(pt.normal, pt2.normal)) if dist < old_dist: parents[pt2] = pt heap.update_key(dist, pt2) return parents
def test_decreaseKeyMany(self): self.bh = BinaryHeap(1000,0,999) for _ in range(999, 0, -1): self.bh.decreaseKey(_, _) for _ in range(1000): self.assertEqual(_, self.bh.pop())
def test_decreaseKeyRandom(self): self.bh = BinaryHeap(1000,0,999) for _ in range(999, 0, -1): self.bh.decreaseKey(_, random.randint(1,999)) result = [] for _ in range(1000): result.append(self.bh.pop()) self.assertEqual(list(range(1000)), sorted(result))
def test_multipleDecreaseKey(self): self.bh = BinaryHeap(50,0,999) for n in range(1, 50): for p in range(1, n): self.bh.decreaseKey(n, p) result = [] for _ in range(50): result.append(self.bh.pop()) self.assertEqual(list(range(50)), sorted(result))
def __init__(self): self.totalState = 10 self.totalAction = 2 self.epsilon = 0.1 self.stepSize = 0.25 #self.stepSize = 0.05 self.discount = 1.0 - 1.0 / self.totalState self.maxIter = 10**6 self.repeatNo = 10 self.minibatch = 5 self.alpha = 0.7 self.beta = 0.5 self.replayMemory = [] self.maxWeight = 0 self.mode = 'FA' #self.mode = 'Tablar' #self.samplePolicy = 'uniform' #self.samplePolicy = 'maxPriority' self.samplePolicy = 'rank' self.binaryHeap = BinaryHeap() print 'replay memory size : %s' % (2**(self.totalState + 1) - 2)
def test_random_input(): h = BinaryHeap() for i in random.sample(range(100), 100): h.push(i) assert all_items(h) == range(0, 100)
def test_load_up_the_list(): heap = BinaryHeap() for i in range(10000, 0, -1): heap.push(i) assert all_items(heap) == range(1, 10001)
def test_reverse_range(): heap = BinaryHeap() for i in range(5, 0, -1): heap.push(i) assert all_items(heap) == range(1, 6)
def test_one_push_pop(): bin_list = BinaryHeap() bin_list.push(10) assert bin_list.pop() == 10
class Tester: def __init__(self): self.totalState = 10 self.totalAction = 2 self.epsilon = 0.1 self.stepSize = 0.25 #self.stepSize = 0.05 self.discount = 1.0 - 1.0 / self.totalState self.maxIter = 10**6 self.repeatNo = 10 self.minibatch = 5 self.alpha = 0.7 self.beta = 0.5 self.replayMemory = [] self.maxWeight = 0 self.mode = 'FA' #self.mode = 'Tablar' #self.samplePolicy = 'uniform' #self.samplePolicy = 'maxPriority' self.samplePolicy = 'rank' self.binaryHeap = BinaryHeap() print 'replay memory size : %s' % (2**(self.totalState + 1) - 2) def initialize(self): self.Qval = np.random.normal(0, 0.1, (self.totalState, self.totalAction)).astype(np.float32) self.params = np.random.normal(0, 0.1, (self.totalState * self.totalAction + 1)).astype(np.float32) self.wrongActions = np.zeros((self.totalState), dtype=np.int8) for i in range(self.totalAction): if i % 2 == 0: self.wrongActions[i] = 1 self.replayMemory = [] self.generateReplay() def generateReplay(self): for s in range(self.totalState-1, -1, -1): repeat = 2 ** (self.totalState - s - 1) for r in range(repeat): a = self.getTrueAction(s) s2, r = self.doAction(s, a) self.replayMemory.append((s, a, r, s2)) a = self.getWrongAction(s) s2, r = self.doAction(s, a) self.replayMemory.append((s, a, r, s2)) random.shuffle(self.replayMemory) # Generate binary heap rank = 1 rankSum = 0 for data in self.replayMemory: self.binaryHeap.add(data, 1.0) rankSum += (1.0 / rank) ** self.alpha rank += 1 self.rankIndex = [] segment = rankSum / self.minibatch if self.samplePolicy == 'rank': rank = 1 segmentRankSum = 0 segmentIndex = 0 for i in range(1, len(self.replayMemory)): segmentRankSum += (1.0 / rank) ** self.alpha rank += 1 if segmentRankSum >= segment: self.rankIndex.append(i) segmentIndex += 1 segmentRankSum = 0 self.rankIndex.append(len(self.replayMemory) - 1) def getAction(self, s): if random.random() < self.epsilon: return random.randint(0, 1) else: return np.argmax(self.getQval(s)) def getFeatures(self, s, a): features = np.zeros((self.totalState * self.totalAction + 1), dtype=np.int) features[s * self.totalAction + a] = 1 features[self.totalState * self.totalAction] = 1 # bias return features def getQval(self, s, a=None): if self.mode == 'FA': if a == None: values = [] values.append(self.params.dot(self.getFeatures(s, 0))) values.append(self.params.dot(self.getFeatures(s, 1))) return values else: return self.params.dot(self.getFeatures(s, a)) else: if a == None: return self.Qval[s] else: return self.Qval[s, a] def isWrongAction(self, s, a): if self.wrongActions[s] == a: return True else: return False def getTrueAction(self, s): return 1 - self.wrongActions[s] def getWrongAction(self, s): return self.wrongActions[s] def doAction(self, s, a): """ returns next state and reward """ if self.isWrongAction(s, a): return 0, 0 else: if s < self.totalState - 1: return s+1, 0 else: return 0, 1 def updateValue(self, s, a, td): if self.mode == 'FA': self.params += self.stepSize * td * self.getFeatures(s, a) else: self.Qval[s, a] = self.getQval(s, a) + self.stepSize * td if self.samplePolicy == 'maxPriority': self.binaryHeap.reorderTop(np.abs(td)) def isComplete(self): R = 1.0 error = 0 for s in range(self.totalState-1, -1, -1): for a in range(self.totalAction): estimate = self.getQval(s, a) if self.isWrongAction(s, a): groundTruth = 0 else: groundTruth = R error += np.square(groundTruth - estimate) R *= self.discount if error / (self.totalState * self.totalAction) <= 10**-3: return True else: return False def sampleReplay(self, segment): if self.samplePolicy == 'uniform': index = random.randint(0, len(self.replayMemory)-1) return index, 1.0, self.replayMemory[index] elif self.samplePolicy == 'maxPriority': index = 1 item = self.binaryHeap.getTop() return index, 1.0, item[0] elif self.samplePolicy == 'rank': if segment == 0: index1 = 1 else: index1 = self.rankIndex[segment-1] + 1 index2 = self.rankIndex[segment] index = random.randint(index1, index2) item = self.binaryHeap.heap[index] weight = (1.0 / index / self.totalState) ** self.beta if weight > self.maxWeight: self.maxWeight = weight weight = weight / self.maxWeight # DJDJ #weight = 1.0 return index, weight, item[0] def gogoReplay(self): print 'Training replay : policy %s' % self.samplePolicy startTime = time.time() trainDone = [] paramSum = np.zeros((self.totalState * self.totalAction + 1)) for repeat in range(self.repeatNo): self.initialize() for i in range(self.maxIter): paramSum.fill(0) for m in range(self.minibatch): index, weight, (s, a, r, s2) = self.sampleReplay(m) if s2 == 0: # terminal state td = r - self.getQval(s, a) else: td = r + self.discount * np.max(self.getQval(s2)) - self.getQval(s, a) if self.mode == 'FA': paramSum += self.stepSize * weight * td * self.getFeatures(s, a) else: self.Qval[s, a] = self.getQval(s, a) + self.stepSize * td self.binaryHeap.reorder(index, np.abs(td)) self.params += paramSum if i % 10 == 0: if self.isComplete(): print 'training done %s out of %s' % (repeat+1, self.repeatNo) trainDone.append(i) break print '%s' % trainDone print '%s state training complete with %s mean iters = %.0f' % (self.totalState, self.mode, np.mean(trainDone)) print 'elapsed : %.1fs' % (time.time() - startTime) def gogoOnline(self): print 'Training online' trainDone = [] for repeat in range(self.repeatNo): s = 0 self.initialize() for i in range(self.maxIter): a = self.getAction(s) s2, r = self.doAction(s, a) if s2 == 0: # terminal state td = r - self.getQval(s, a) else: td = r + self.discount * np.max(self.getQval(s2)) - self.getQval(s, a) self.updateValue(s, a, td) s = s2 if i % 10 == 0: if self.isComplete(): #print 'training done %s out of %s' % (repeat+1, self.repeatNo) trainDone.append(i) break print '%s' % trainDone print '%s state training complete with %s mean iters = %.0f' % (self.totalState, self.mode, np.mean(trainDone))
class TestBinaryHeap(unittest.TestCase): def setUp(self): # create BinaryHeap with initial 4 elements, and src is 2. Use # 999 for infinity self.bh = BinaryHeap(15, 2, 999) def tearDown(self): self.bh = None def test_basic(self): self.assertEqual(2, self.bh.pop()) def test_decreaseKey(self): self.bh.decreaseKey(0, 19) self.bh.decreaseKey(1, 7) self.bh.decreaseKey(3, 22) self.assertEqual(2, self.bh.pop()) self.assertEqual(1, self.bh.pop()) self.assertEqual(0, self.bh.pop()) self.assertEqual(3, self.bh.pop()) def test_decreaseKeyMany(self): self.bh = BinaryHeap(1000, 0, 999) for _ in range(999, 0, -1): self.bh.decreaseKey(_, _) for _ in range(1000): self.assertEqual(_, self.bh.pop()) def test_multipleDecreaseKey(self): self.bh = BinaryHeap(50, 0, 999) for n in range(1, 50): for p in range(1, n): self.bh.decreaseKey(n, p) result = [] for _ in range(50): result.append(self.bh.pop()) self.assertEqual(list(range(50)), sorted(result)) def test_decreaseKeyRandom(self): self.bh = BinaryHeap(1000, 0, 999) for _ in range(999, 0, -1): self.bh.decreaseKey(_, random.randint(1, 999)) result = [] for _ in range(1000): result.append(self.bh.pop()) self.assertEqual(list(range(1000)), sorted(result))
class TestBinaryHeap(unittest.TestCase): def setUp(self): # create BinaryHeap with initial 4 elements, and src is 2. Use # 999 for infinity self.bh = BinaryHeap(15,2,999) def tearDown(self): self.bh = None def test_basic(self): self.assertEqual(2, self.bh.pop()) def test_decreaseKey(self): self.bh.decreaseKey(0, 19) self.bh.decreaseKey(1, 7) self.bh.decreaseKey(3,22) self.assertEqual(2, self.bh.pop()) self.assertEqual(1, self.bh.pop()) self.assertEqual(0, self.bh.pop()) self.assertEqual(3, self.bh.pop()) def test_decreaseKeyMany(self): self.bh = BinaryHeap(1000,0,999) for _ in range(999, 0, -1): self.bh.decreaseKey(_, _) for _ in range(1000): self.assertEqual(_, self.bh.pop()) def test_multipleDecreaseKey(self): self.bh = BinaryHeap(50,0,999) for n in range(1, 50): for p in range(1, n): self.bh.decreaseKey(n, p) result = [] for _ in range(50): result.append(self.bh.pop()) self.assertEqual(list(range(50)), sorted(result)) def test_decreaseKeyRandom(self): self.bh = BinaryHeap(1000,0,999) for _ in range(999, 0, -1): self.bh.decreaseKey(_, random.randint(1,999)) result = [] for _ in range(1000): result.append(self.bh.pop()) self.assertEqual(list(range(1000)), sorted(result))
def setUp(self): # create BinaryHeap with initial 4 elements, and src is 2. Use # 999 for infinity self.bh = BinaryHeap(15,2,999)