def bubbleSort(data): data_length = len(data) for i in range(data_length - 1): for j in range(data_length - i - 1): if data[j] > data[j + 1]: data[j], data[j + 1] = data[j + 1], data[j] draw(j + 1, data)
def selectionSort(data): data_length = len(data) for i in range(data_length - 1): least = i for j in range(i + 1, data_length): if data[j] < data[least]: least = j if i != least: data[i], data[least] = data[least], data[i] draw(i + 1, data)
def insertionSort(data): data_length = len(data) for i in range(1, data_length): j = i - 1 key = data[i] while data[j] > key and j >= 0: data[j + 1] = data[j] j = j - 1 data[j + 1] = key draw(j + 1, data)
def heapify(data, index, size): largest = index left = 2 * index + 1 right = 2 * index + 2 if left < size and data[left] > data[largest]: largest = left if right < size and data[right] > data[largest]: largest = right if largest != index: data[largest], data[index] = data[index], data[largest] heapify(data, largest, size) draw(largest, data)
def testQLearning(gamma, epsilon, surprise): global state_sequence print '** Testing Q-Learning algorithm **' print 'Gamma: ', gamma print 'Epsilon: ', epsilon print 'Suprise: ', surprise qLearning(gamma, epsilon, surprise) l = len(state_sequence) print '\tState sequence ', state_sequence[l - 20:] animate.draw(state_sequence[l - 20:])
def testQLearning(gamma, epsilon, surprise): global state_sequence print '** Testing Q-Learning algorithm **' print 'Gamma: ', gamma print 'Epsilon: ', epsilon print 'Suprise: ', surprise qLearning(gamma, epsilon, surprise) l = len(state_sequence) print '\tState sequence ', state_sequence[l-20:] animate.draw(state_sequence[l-20:])
def main(): # Get the variables used to do reinforcement learning gamma = 0.7 T = 10000 epsilon = 0.2 s = 12 n = 0.1 e = Environment(s) Q = calculate_route(T, n, gamma, e, s, epsilon) print("calculation done") draw(e.get_loop(Q, s))
def testPolicyIteration(rew, gamma, iterations): print '** Testing policy iteration algorithm **' print 'Gamma: ', gamma print 'Iterations: ', iterations policyIteration(rew, gamma, iterations) print '\tOptimal policy: ', policy optimal_state_map = [ trans[a][policy[a]] for a in range(len(policy)) ] print '\tOptimal state map', optimal_state_map current_state = int(random.random()*len(policy)) path = [current_state] for i in range(20): current_state = optimal_state_map[current_state] path += [current_state] print '\t(Hopefully) correct walking path', path animate.draw(path)
def testPolicyIteration(rew, gamma, iterations): print '** Testing policy iteration algorithm **' print 'Gamma: ', gamma print 'Iterations: ', iterations policyIteration(rew, gamma, iterations) print '\tOptimal policy: ', policy optimal_state_map = [trans[a][policy[a]] for a in range(len(policy))] print '\tOptimal state map', optimal_state_map current_state = int(random.random() * len(policy)) path = [current_state] for i in range(20): current_state = optimal_state_map[current_state] path += [current_state] print '\t(Hopefully) correct walking path', path animate.draw(path)
a = policy[s] print str(rew[s][a]) + "= " + str(s) + "|" + str(a) print str(rew[s][a]) + " + " + str(gamma) + " * " + str(value[trans[s][a]]) value[s] = rew[s][a] + gamma * value[trans[s][a]] def move(state, steps): ret = [state] for s in range(steps): state = trans[state][policy[state]] ret.append(state) return ret print rew print "=== before " print policy print value pol_iter(100) print "=== after " print policy print value result = move(0, 20) a.draw(result)
pylab.imread('step5.png'), pylab.imread('step6.png'), pylab.imread('step7.png'), pylab.imread('step8.png'), pylab.imread('step9.png'), pylab.imread('step10.png'), pylab.imread('step11.png'), pylab.imread('step12.png'), pylab.imread('step13.png'), pylab.imread('step14.png'), pylab.imread('step15.png'), pylab.imread('step16.png')) #comic = numpy.concatenate([images[i] for i in result], axis=1) #pylab.imshow(comic) #pylab.show() env = Environment() epsilon = 0.01 eta = 0.1 gamma = 0.9 q = qlearn(env, 200000, epsilon, eta, gamma) print(q) moves= qMove(0, 20, q) print(moves) a.draw(moves) comic = numpy.concatenate([images[i] for i in moves], axis=1) pylab.imshow(comic) pylab.show()
pylab.imread('step9.png'), pylab.imread('step10.png'), pylab.imread('step11.png'), pylab.imread('step12.png'), pylab.imread('step13.png'), pylab.imread('step14.png'), pylab.imread('step15.png'), pylab.imread('step16.png')) #visualization of the robot walk #comic = numpy.concatenate([images[i] for i in sequence], axis=1) #pylab.imshow(comic) #pylab.show() animate.draw(sequence) class Environment : ''' Representation of the environment for the Q-learning algorithm ''' def __init__(self, state=0): self.state = state self.trans = ((1, 3 ,4 ,12), (0, 2, 5, 13), (3, 1, 6, 14), (2, 0, 7, 15), (5, 7, 0, 8), (4, 6, 1, 9),