Ejemplo n.º 1
0
    def jugar(self):
        cur = 1
        it = 1
        ql = Qlearning()

        while True:
            self.dist_prev = -1
            self.player.__init__()
            print("intento: ", cur)
            while not self.game_over:
                pred = ql.predecir(self.get_estado())
                if mal_direc[self.player.direcc] == pred:
                    self.game_over = True
                else:
                    self.player.direcc = pred
                    self.player.mover()
                    ql.actualizar(self.get_premio(), self.get_estado())

                if self.revisar():
                    ql.actualizar(10, self.get_estado())
                    self.dist_prev = -1

                it += 1

            print("iteration: ", it)
            #no perder avance
            ql.actualizar(-10, self.get_estado())
            self.game_over = False
            #print('Game over')
            cur += 1
Ejemplo n.º 2
0
    def __init__(self, root):
        self.root = root
        self.root.title("My GridWorld")
        self.root.resizable(width=FALSE, height=FALSE)
        self.rightFrame = Frame(self.root)
        self.rightFrame.pack(side=RIGHT, fill=Y)
        self.leftFrame = Frame(self.root)
        self.leftFrame.pack(side=LEFT, fill=Y)
        Label(self.rightFrame, text="    ", fg='white').grid(row=0, column=2, sticky=W)
        Label(self.rightFrame, text="    ", fg='white').grid(row=0, column=0, sticky=E)

        self.grid = Canvas(self.leftFrame, width=640, heigh=640)

        self.gridMatrix = [[0 for row in range(self.grid_y)] for col in range(self.grid_x)]
        self.qMatrix = [[0 for row in range(self.grid_y)] for col in range(self.grid_x)]

        self.qlearner = Qlearning.Algorithm(self.gridMatrix)
        self.qMatrix_calculated = False
        # Radio Buttons for Add/remove cell section
        self.cell_radio_btn_val = IntVar()
        self.cell_radio_btn_val.set(1)
        # Radio Buttons for QL selection policies
        self.policy_radio_btn_val = IntVar()
        self.policy_radio_btn_val.set(1)
        # Radio Buttons for heat map selection
        self.heatmap_radio_btn_val = IntVar()
        self.heatmap_radio_btn_val.set(2)
        self.set_new_grid()
        self.create_left_side_elements()
        self.grid.bind('<Button-2>', self.reset_start_goal_cell)
        self.grid.bind('<Button-1>', self.add_start_goal_cell)
        self.alpha_text_var = StringVar()
        self.gamma_text_var = StringVar()
Ejemplo n.º 3
0
def main():
    maze = readMaze('maze2.txt')
    #printMaze(maze)
    findStartAndFinish(maze)

    maze[start[0]][start[1]] = PLAYER

    #solveAStar(maze, start, finish)

    #debugWM(maze, start, Directions.UP, 10, 10)

    #play(maze, start, Directions.UP, 5, 5)

    startDir = Directions.UP
    Qlearning(3000, 0.4, 0.1, maze, start, startDir, 5, 5)
Ejemplo n.º 4
0
    def __init__(self):

        self.env = Qgame()
        self.env.reset()
        self.action = -1
        done = False
        Q = Qlearning.Qlearning(self)

        while (True):
            # suc = self.getSuccessors(self.getStartState())
            #
            # if len(suc) < 1:
            #     print("lose")
            #     self.env.setlose()
            #     done = True
            #     # step = random.randrange(0, 3)
            #     # self.env.step(step)
            # else:
            #     step = random.choice(suc)
            #     #print(step)
            #
            #     self.env.render()
            #     self.env.step(step[1])

            self.env.render()
            currentstate = self.getGrid().index(self.getStartState())
            food = self.getGrid().index(self.env.getFood())
            actions = Q[food][currentstate]
            action = actions.index(max(actions))
            # print(action, self.getStartState())
            previouscell = self.env.getSnake()[0]

            done = self.env.step(action)
            print(previouscell, action,
                  self.env.getSnake()[0], self.env.getFood())

            if done:
                break
Ejemplo n.º 5
0
def run_q_learning():
    game_count = 1
    learner = Qlearning.Learner(DIS_WIDTH, DIS_HEIGHT, BLOCK_SIZE)
    scores_imp = []
    condition = True
    while condition:
        learner.Reset()
        if game_count > 100:
            ticks = 50
            learner.epsilon = 0
        else:
            learner.epsilon = .1
        
        score, reason = Q_GameLoop(learner)
        print(f"Games: {game_count}; Score: {score}; Reason: {reason}") # Output results of each game to console to monitor as agent is training
        game_count += 1
        if game_count % 100 == 0: # Save qvalues every qvalue_dump_n games
            print("Save Qvals")
            learner.SaveQvalues()
        if game_count > 300 and game_count < 315:
            scores_imp.append(score)
        if game_count == 316:
            condition = False
Ejemplo n.º 6
0
###################
#COMPARISON BETWEEN DDMQ AND BASIC FLAT QLEARNING

reward_no_option = np.zeros((nbEpisodsTotal))
n_steps_no_option = np.zeros((nbEpisodsTotal))

reward_option = np.zeros((nbEpisodsTotal))
n_steps_option = np.zeros((nbEpisodsTotal))

score_concepts_50_mean = np.zeros(env.n_states)

for k in range(N_runs):

    print("BASIC Q-LEARNING")
    print('Visualise cumulated discounted reward from the first trajectory')
    Q, V, policy, traj_without_option, reward_without_option = Qlearning(
        env, eps_0, nbEpisodsTotal, Tmax, plot=False, return_trajectories=True)

    print("MACRO Q-LEARNING")
    MQL, traj_with_option, reward_with_option, score_concepts_50 = DDMQ(
        plot=False)

    reward_no_option += np.array(reward_without_option)
    n_steps_no_option += np.array([len(t) for t in traj_without_option])
    reward_option += np.array(reward_with_option)
    n_steps_option += np.array([len(t) for t in traj_with_option])

    score_concepts_50_mean += score_concepts_50

reward_no_option /= N_runs
reward_option /= N_runs
n_steps_no_option /= N_runs
Ejemplo n.º 7
0
def learnFrozenLake(fn, isStochastic, lrStochastic, lrDeepQ, isPrint, deviceName):
    
    ## 4-0. 맵 읽고 Q-table 초기화하기 ##
    info = mapReader.readMap(fn, False) # 맵 읽기
        
    map_ = info[0]
    width = info[1] # 맵의 가로 길이
    height = info[2] # 맵의 세로 길이

    Qtable = mapReader.initQtable(width, height) # Q-table 초기화

    ## 4-1. Q-learning으로 일단 학습 ##
    if isPrint: print('\n\n---< 1. Q-learning으로 학습 >---')
    if isStochastic: data = Qlearning_stochastic.learning(map_, width, height, Qtable, 1, lrStochastic, isPrint)
    else: data = Qlearning.learning(map_, width, height, Qtable, 0, isPrint) # (S, A, R, S) set의 집합

    ## 4-2. 1의 학습 결과 데이터를 사용할 수 있는 데이터로 변환 및 학습 데이터 생성
    # Neural Network로 학습시키기 위한 (state, action별 reward)를 저장하기 위한 배열
    stateInfo = [] # 형식은 [세로좌표][가로좌표][action별 보상]
    for i in range(height):
        temp = []
        for j in range(width):
            temp.append([0, 0, 0, 0])
        stateInfo.append(temp)

    # 학습 데이터 (state, action별 reward) 생성
    states = [] # one-hot 적용
    outputs = []

    # state0, state1에 one-hot을 적용.
    # width=W, height=H이면 state = [w0, w1, ..., w(W-1), h0, h1, ..., h(H-1)]꼴
    # 여기서 현재 위치의 좌표가 (wX, hY)이면 state에서 wX=1, hY=1이고 나머지는 모두 0

    # action에 one-hot을 적용.
    # action = [0, 0, 0, 0]으로 초기화하고 action의 번호가 A이면 action[A]=1로 수정하여 해당 부분만 1로 적용.
    
    for i in range(len(data)):
        # state(t)
        state0 = [0]*(width+height)
        # state(t)의 좌표를 (wX, hY)라고 하면
        state0[data[i][0][1]] = 1 # state(t)의 wX를 1로 적용
        state0[width + data[i][0][0]] = 1 # state(t)의 hY를 1로 적용

        # action(t)
        action0 = [0, 0, 0, 0]
        action0[data[i][1]] = 1

        # reward(t+1)
        reward1 = data[i][2]

        # state(t+1)
        state1 = [0]*(width+height)
        # state(t+1)의 좌표를 (wX, hY)라고 하면
        state1[data[i][3][1]] = 1 # state(t+1)의 wX를 1로 적용
        state1[width + data[i][3][0]] = 1 # state(t+1)의 hY를 1로 적용

        # 행동을 할 때마다 stateInfo 테이블의 action별 reward 갱신
        stateInfo[data[i][0][0]][data[i][0][1]][data[i][1]] = reward1

        if i >= len(data)-800: # 마지막 800개만 학습
            states.append(state0) # 학습 데이터의 state 부분
            temp = []
            for j in range(4):
                rewardVal = stateInfo[data[i][0][0]][data[i][0][1]][j]
                rewardVal = 1 / (1 + math.exp(-rewardVal)) # sigmoid 함수 적용
                temp.append(rewardVal) # 학습 데이터의 action별 reward 부분
            outputs.append(temp)

    # 학습 데이터 일부 출력
    if isPrint:
        print('<학습 데이터 일부>')
        for i in range(50):
            print(str(states[i]) + ': ' + str(np.array(outputs[i])))

    ## 4-3. Deep Q Learning으로 학습 ##
    if isPrint: print('\n\n---< 2. Deep learning으로 학습 >---')

    model = create_model([tf.keras.layers.Flatten(input_shape=(width+height,)),
        keras.layers.Dense(64, activation='relu'),
        keras.layers.Dense(64, activation='relu'),
        keras.layers.Dense(4, activation='sigmoid')],
                         tf.keras.optimizers.Adam(0.001), 'mean_squared_error', isPrint)

    learning(model, [states], [outputs], 'FrozenLake', 4, deviceName)    

    ## 4-4. 테스트 ##
    if isPrint: print('\n\n---<3. 테스트>---')

    # 기존에 학습한 결과 불러오기
    jsonFile = open('FrozenLake.json', 'r')
    loaded_model_json = jsonFile.read()
    jsonFile.close()
    newModel = tf.keras.models.model_from_json(loaded_model_json)
    newModel.load_weights('FrozenLake.h5')
    
    if isPrint: newModel.summary()
    newModel.compile(optimizer=tf.keras.optimizers.Adam(0.001), loss='mean_squared_error', metrics=['accuracy'])

    Qtable = mapReader.initQtable(width, height) # Q-table 초기화
    state = [0, 0] # 상태 초기화

    # Q table의 값들을 model로부터 얻은 보상 값들로 바꾸기
    input_ = newModel.input
    output_ = [layer.output for layer in newModel.layers]
    func = K.function([input_, K.learning_phase()], output_)

    # state [i, j] 들을 입력으로 받음
    testInput = []
    for i in range(height):
        for j in range(width):
            temp = [0]*(width+height) # testInput에 추가할 배열
            temp[j] = 1
            temp[width+i] = 1
            testInput.append(temp)
    testInput = np.array(testInput)

    # state [i, j] 들에 대한 output layer 출력값(action별 보상)의 9.9배를 Q table의 해당 값으로
    result = func([testInput, 1])
    print('\n\n< deep-learned Q-table >')
    for i in range(height):
        for j in range(width):
            for k in range(4): Qtable[i][j][k] = result[3][i*width+j][k] * 9.9
            if isPrint: print('Qtable index ' + str(i) + ',' + str(j) + ' -> ' + str(result[3][i*width+j]))

    # 실행 및 맵 출력
    if isStochastic: Qlearning_stochastic.execute(map_, width, height, Qtable, True)
    else: Qlearning.execute(map_, width, height, Qtable, True)
Ejemplo n.º 8
0
if __name__ == "__main__":
	# First grid:
	env = grid.gridenv()
	# Second grid:
	env2 = grid.gridenv()
	# Combined grid
	env3 = grid.gridenv()
	# Probabilistic grid
	env4 = grid.gridenv()

	# Q-learning on first grid, test on first
	ploc, x = env.initDeterministicgrid(1)
	# v = valueIteration.ValueIteration(env, x, 0.5)
	# v.runIterations()
	# print v.run_agent(ploc, env)
	q = Qlearning.Qlearning(env, x)
	rewards = q.runIterations()
	print "?"
	env2.initProbalisticgrid(1)
	print "done"
	print q.run_agent(ploc, env2)



	exit()

	# Plot for first part
	plt.plot(rewards)
	plt.ylabel('Reward')
	plt.xlabel('Episode')
	plt.title('Reward per episode for Q learning')
Ejemplo n.º 9
0
import Qlearning as Q
import Agent as A

#___________Initialisation  step : Get the initBoard of the game__________________________
Outputs = []  # list of outputs
# Create a Random agent first to get the init board #method = RM.RandomMethod()
method = RM.RandomMethod()
playerAgent = A.Agent(method)

initGame = GIA.LaunchGame(playerAgent, Outputs, True)

InitBoard, initPosition = initGame.simpleBoard, initGame.initialPosition

#_____________Board initialized : we can make the real IA play____________________________
Outputs = []
method = Q.Qlearning()
playerAgent = A.Agent(
    method)  # we will be able to update the parameters of our agent easily
playerAgent.brain.initBoard_Qtable(InitBoard)
playerAgent.display()

#GIA.LaunchGame(playerAgent,Outputs) #not trained yet : performs really badly

#_____________Training the agent, and making tests________________________
trainIterations = 3
i = 0
epochs = 50

for j in range(trainIterations):
    while i % epochs != 0:
        etha = 5 / (j + 5)
master.bind("<Down>", call_down)
master.bind("<Right>", call_right)
master.bind("<Left>", call_left)
me = board.create_rectangle(player[0]*Width+Width*2/10, player[1]*Width+Width*2/10,
                            player[0]*Width+Width*8/10, player[1]*Width+Width*8/10, fill="blue", width=1, tag="me")
board.grid(row=0, column=0)


''' ============== 3. Construct Environment with the data provided. ============== '''
env = Environment(n,m,transitions,rewards)


''' ============== 4. Call the Q-learning class to build Q-matrix.  ============== '''
print("Building Q-Matrix...")
q_matrix = []
q_1 = Qlearning.q_learning(env,unsafe_dictionary,state_goal,0.95)
q_matrix.append(q_1)


''' ==== 5. Construct agent with data provided and Q-matrix (Knowledge Base). ==== '''
i_agent = Agent(state_0,env,q_matrix)


''' ===================== 6. Build path (non-deterministic). ===================== '''
discount = 0.3
steps = []
actual_steps = []
while True:
    next_action = i_agent.agent_function(0)
    next_state, new_action = i_agent.env.transition(i_agent.current_state,next_action)
    steps.append(str(new_action)+","+str(next_action))
    I don't know why there is always bugs when running MonteCarlo in this file and try to fix the bug but failed.
    The test about MonteCarlo is done in the MonteCarlo_learning file. I hope you can understand that. 
    I have tried even several days to rewrite the code but still didn't work. 
    
    running_time_M, episode_M, plot_sum_reward_M = MON.m_test(20)
    episode_M_.append(episode_M)
    running_time_M_.append(running_time_M)
    plot_sum_reward_M_.append(plot_sum_reward_M)
    '''

    running_time_S, episode_S, plot_sum_reward_S = SAR.s_test(20)
    episode_S_.append(episode_S)
    running_time_S_.append(running_time_S)
    plot_sum_reward_S_.append(plot_sum_reward_S)

    running_time_Q, episode_Q, plot_sum_reward_Q = QL.q_test(20)
    episode_Q_.append(episode_Q)
    running_time_Q_.append(running_time_Q)
    plot_sum_reward_Q_.append(plot_sum_reward_Q)

Sarsa_run_time = sum(running_time_S_) / train_volumn
average_S_episode = sum(episode_S_) / train_volumn

Q_run_time = sum(running_time_Q_) / train_volumn
average_Q_episode = sum(episode_Q_) / train_volumn

print(f'Sarsa_run_time:{Sarsa_run_time},Q_run_time:{Q_run_time}')
print(f'running_time_S_:{running_time_S_}')
print(f'running_time_Q_:{running_time_Q_}')
# print(f'sum_reward_M:{plot_sum_reward_M_},sum_reward_S:{plot_sum_reward_S_},sum_reward_Q:{plot_sum_reward_Q_}')
print(f'episode_S_:{episode_S_}')
Ejemplo n.º 12
0
def main(win, rows):
    grid = make_grid(win_size, rows)

    lookup = lookup_table(rows)
    q_learning = Qlearning(epsilon=0.7,
                           alpha=0.5,
                           gamma=0.99,
                           episodes=10000,
                           rows=rows)
    q_table = q_learning.q_table()

    start = None
    end = None

    run = True
    while run:
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                run = False

            if pygame.mouse.get_pressed()[0]:
                pos = pygame.mouse.get_pos()
                row, col = get_mouse_pos(pos, win_size, rows)
                spot = grid[row][col]

                if not start and spot != end:
                    start = spot
                    start.make_start()
                elif not end and spot != start:
                    end = spot
                    end.make_end()
                elif spot != start and spot != end:
                    spot.make_barrier()

            if pygame.mouse.get_pressed()[2]:
                pos = pygame.mouse.get_pos()
                row, col = get_mouse_pos(pos, win_size, rows)
                spot = grid[row][col]
                spot.reset()

                if spot == start:
                    start = None
                elif spot == end:
                    end = None

            if event.type == pygame.KEYDOWN:
                if event.key == pygame.K_SPACE:
                    gridworld = Gridworld(grid, win_size, rows)
                    s, e, b = gridworld.get_start_end_barrier()

                    print('Training the model')
                    q_table = q_learning.fit(s, e, q_table, gridworld)

                    path = q_learning.return_path(s, e, q_table, gridworld)
                    print(path)

                    for p in path:
                        if p in b:
                            print('Tune the Hyperparameters')

                    for i in path[1:-1]:
                        spot = grid[i[1]][i[0]]
                        spot.make_path()

            draw(win, rows, grid)
master.bind("<Right>", call_right)
master.bind("<Left>", call_left)
me = board.create_rectangle(player[0] * Width + Width * 2 / 10,
                            player[1] * Width + Width * 2 / 10,
                            player[0] * Width + Width * 8 / 10,
                            player[1] * Width + Width * 8 / 10,
                            fill="blue",
                            width=1,
                            tag="me")
board.grid(row=0, column=0)
''' ============== 3. Construct Environment with the data provided. ============== '''
env = Environment(n, m, transitions, rewards)
''' ============== 4. Call the Q-learning class to build Q-matrix.  ============== '''
print("Building Q-Matrix...\n")
q_matrix = []
q_1 = Qlearning.q_learning(env, unsafe_dictionary, state_goal, 0.9)
env.exchange_start_goal(state_0, state_goal, unsafe_dictionary)
q_2 = Qlearning.q_learning(env, punishment_dict, state_0, 0.9)
env.exchange_start_goal(state_goal, state_0, unsafe_dictionary)
q_matrix.append(q_1)
q_matrix.append(q_2)
''' ==== 5. Construct agent with data provided and Q-matrix (Knowledge Base). ==== '''
i_agent = Agent(state_0, env, q_matrix)
''' ===================== 6. Build path (non-deterministic). ===================== '''
discount = 0.3
steps = [[], []]
for i in range(len(q_matrix)):
    while True:
        next_action = i_agent.agent_function(i)
        next_state, new_action = i_agent.env.transition(
            i_agent.current_state, next_action)
Ejemplo n.º 14
0
 def __init__(self, knowledge_filename, unjustified=0.1, *args, **kwargs):
     self.mind = Qlearning.Qlearning(MF=unjustified)
     self.knowledge_filename = knowledge_filename
     super(CompPlayer, self).__init__(*args, **kwargs)
# Environment definition and initialisation

env = GridWorld_ORIGINAL_PB
#env = GridWorld_SIMPLIFIED_PB

env.reset()

# Parameters
Tmax=100             #Time horizon
nbEpisods=10000      #nb of episods
eps=0.4 #0.7         #initial exploration/exploitation tradeoff
    
#Q learning

print('Visualise cumulated discounted reward from the first trajectory')
Q, V, policy = Qlearning(env, eps, nbEpisods, Tmax, plot=True, return_trajectories=False)
print("final V",V)
print("final policy : ", policy)

#Visualise policy
print('Visualise policy')
gui.render_policy(env,policy)

#Trajectory visualisation
print('Visualise trajectory')

for t in range(10):
    env.render = True
    state = env.reset()
    fps = 10
    for i in range(20): #Tmax):
Ejemplo n.º 16
0
# str = 'we have a small house, we been livd here for 2 years, my children like to play in the back yard'
def lexical_diversity(text):
    return len(set(text)) / len(text)


# print lexical_diversity(str)

#print sentiment("what is the weather?")

reward_table = pd.read_pickle("reward2.pkl")

turn_list = ['0', '1', '2']
rate_table = pd.DataFrame([(0.02, 0.01, 0.005)], columns=turn_list)

q_learning = Qlearning.Qlearning(actions=action_list, learning_rate=rate_table)
q_table = q_learning.q_table
for state in state_list:
    q_table = q_table.append(
        pd.Series(
            [0] * len(action_list),
            index=q_table.columns,
            name=state,
        ))

value = []

for file in all_convs:
    with codecs.open("NewData2/" + file, 'r', 'utf-8') as f:
        lines = f.readlines()
        utt_list = []
Ejemplo n.º 17
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Oct 30 11:09:02 2019

@author: nizar
"""
import Qlearning

print("-----------------Trial Watkins for 4 * 4----------------")

#Qlearning.Qlearning('Tabular', 'FrozenLake-v0', 10000, 100, 0.1, 0.999, 1, 0.4, 1, 1) ###### goes up to 0.9 average reweard on the last 1000 runs

print("-----------------Trial for 8 * 8----------------")
#Qlearning.Qlearning('Tabular', 'FrozenLake8x8-v0', 100000, 100, 0.1, 0.9, 1, 0.5, 1, 1) ####### doesn't work with the same number of episodes but if u
###### multiply it by 10 you can reach 0.5 average rewards on the last 1000 runs

print("-----------------Trial Zap for 4 * 4----------------")

Qlearning.Qlearning('Zap', 'CartPole-v1', 3000, 100, 0.9, 0.9, 1, 0.3, 1, 1)

print("-----------------Trial Zap for 8 * 8----------------")

#Qlearning.Qlearning('Zap', 'FrozenLake8x8-v0', 5000, 100, 0.9, 0.9, 1, 0.3, 1, 1)
Ejemplo n.º 18
0
from tictactoe.TicTacToeGame import TicTacToeGame, display
from tictactoe.TicTacToePlayers import *
#from gobang.GobangGame import GobangGame, display
#from gobang.GobangPlayers import *
import numpy as np
from MCS import *
from MCTS import *
from Qlearning import *
from utils import *
"""
use this script to play any two agents against each other, or play manually with
any agent.
"""
#define games
#g = Connect4Game(4)
g = TicTacToeGame()
#g = GobangGame(7)

#define players
rp = RandomPlayer(g).play
mcsp = MCS(g, 100).play
mctsp = MCTS(g, 100).play
qlp = Qlearning(g, 100, 0.01, 0.9, 0.9).play

arena_rp_op = Arena.Arena(mcsp, rp, g, display=display)
print(arena_rp_op.playGames(10, verbose=False))
arena_rp_op = Arena.Arena(mctsp, rp, g, display=display)
print(arena_rp_op.playGames(10, verbose=False))
arena_rp_op = Arena.Arena(qlp, rp, g, display=display)
print(arena_rp_op.playGames(10, verbose=False))