def testAlgo(init=0): arrow = ["^", "v", "<", ">"] i = 0 if init == 0: state = initGrid() elif init == 1: state = initGridPlayer() else: state = initGridRand() drawgridworld.draw_state(state, 0) time.sleep(0.5) #print("Initial State:") #print(dispGrid(state)) status = 1 #while game still in progress while (status == 1): qval = model.predict(state.reshape(1, 64), batch_size=1) action = (np.argmax(qval)) #take action with highest Q-value print('Move #: %s; Taking action: %s' % (i, arrow[action])) state = makeMove(state, action) #print(dispGrid(state)) reward = getReward(state) if reward != -1: status = 0 print("Reward: %s" % (reward, )) i += 1 #If we're taking more than 10 actions, just stop, we probably can't win this game drawgridworld.draw_state(state, i - 10) time.sleep(0.5) if (i > 10): print("Game lost; too many moves.") reward = -10 break return reward
def testAlgo(init=0): i = 0 if init == 0: state = initGrid() elif init == 1: state = initGridPlayer() elif init == 2: state = initGridRand() str_to_print = 'Initial State:' str_to_print += '\n%s' % dispGrid(state) # while game still in progress for i in range(10): qval = model.predict(state.reshape(1, 64), batch_size=1) str_to_print += '\n%s' % str(qval) action = (np.argmax(qval)) #take action with highest Q-value new_state = makeMove(state, action) reward = getReward(new_state) state = new_state str_to_print += '\n%s' % 'Move #: %s; Taking action: %s' % (i, action) str_to_print += '\n%s' % dispGrid(state) if reward != -1 and reward != -5: if reward == -10: print(str_to_print + '\n%s' % "Reward: %s" % (reward, )) return reward print(str_to_print + '\n%s' % "Game lost; too many moves.") return -10
def testAlgo(init=0): arrow = ["^", "v", "<", ">"] i = 0 if init==0: state = initGrid() elif init==1: state = initGridPlayer() else: state = initGridRand() drawgridworld.draw_state(state, 0) time.sleep(0.5) #print("Initial State:") #print(dispGrid(state)) status = 1 #while game still in progress while(status == 1): qval = model.predict(state.reshape(1,64), batch_size=1) action = (np.argmax(qval)) #take action with highest Q-value print('Move #: %s; Taking action: %s' % (i, arrow[action])) state = makeMove(state, action) #print(dispGrid(state)) reward = getReward(state) if reward != -1: status = 0 print("Reward: %s" % (reward,)) i += 1 #If we're taking more than 10 actions, just stop, we probably can't win this game drawgridworld.draw_state(state, i - 10) time.sleep(0.5) if (i > 10): print("Game lost; too many moves.") reward = -10 break return reward
def testAlgo(init=0): state = initGrid() print("Initial State:") dispGrid(state) status = 1 i = 0 #while game still in progress while (status == 1): qval = model.predict(state.reshape(1, 80), batch_size=1) action = (np.argmax(qval)) #take action with highest Q-value print('Move #: %s; Taking action: %s' % (i, action)) state = makeMove(state, action, 4) dispGrid(state) reward = getReward(state) if reward == -10: print("The agent steped on the pit.. You won!") state = 0 break elif reward == 10: print("The agent won!") state = 0 break print("Enter your move (0,1,2,3) for (up,down,left,right)") action = int(input()) state = makeMove(state, action, 3) reward = getReward(state, 3) dispGrid(state) if reward == -10: print("You Lost!") state = 0 break elif reward == 10: print("You won!") state = 0 break i += 1
#model.add(Dropout(0.2)) model.add(Dense(4, init='lecun_uniform')) model.add(Activation( 'linear')) #linear output so we can have range of real-valued outputs rms = RMSprop() model.compile(loss='mse', optimizer=rms) epochs = 1000 gamma = 0.9 #since it may take several moves to goal, making gamma high epsilon = 1 for i in range(epochs): state = initGrid() status = 1 #while game still in progress while (status == 1): #We are in state S #Let's run our Q function on S to get Q values for all possible actions qval = model.predict(state.reshape(1, 80), batch_size=1) if (random.random() < epsilon): #choose random action action = np.random.randint(0, 4) else: #choose best action from Q(s,a) values action = (np.argmax(qval)) #Take action, observe new state S' new_state = makeMove(state, action) new_state = makeMove(new_state, np.random.randint(0, 4), 3) #Observe reward reward = getReward(new_state)