def __init__(self, gamma=0.9, alpha=0.1, epsilon=0.1, num_episodes=500): self.q_table = pd.DataFrame(0,index=pd.MultiIndex.from_product\ ([[0,1,2,3],[0,1,2,3]]),columns=['UP','DOWN','LEFT','RIGHT']) self.alpha = alpha self.gamma = gamma self.epsilon = epsilon self.num_episodes = num_episodes self.env1 = env.ENV() self.action_count, self.actions = self.env1.getActionItems()
def __init__(self, gamma=0.9, alpha=0.1, epsilon=0.1, num_episodes=500, storage_value=[], row_num=5, col_num=5): self.col_list = list(range(0, col_num)) self.row_list = list(range(0, row_num)) self.q_table = pd.DataFrame(0,index=pd.MultiIndex.from_product\ ([self.row_list,self.col_list]),columns=['UP','DOWN','LEFT','RIGHT']) self.alpha = alpha self.gamma = gamma self.epsilon = epsilon self.num_episodes = num_episodes self.env1 = env.ENV(storage_value, row_num, col_num) self.action_count, self.actions = self.env1.getActionItems()
def test(self): try: q_table = pd.read_pickle( 'q_table.pkl') #read q table from the database saved except: txt.delete("1.0", "end") txt.insert(END, 'Try to train the Agent before testing it\n') return if (q_table is not None): env1 = env.ENV() #display_environment(env1)#display the init state of the environment done = False while not done: #until it reaches the destination state = env1.getAgentPosition() #get current position action = q_table.loc[state, :].index[q_table.loc[ state, :].values.argmax( )] #fetch the next best possible action from the Q table learnt _, _, done = env1.step(action) #execute the action
def handle_click(event): env1 = env.ENV() update(env1) time.sleep(1) env1.step('RIGHT') update(env1) time.sleep(1) env1.step('RIGHT') update(env1) time.sleep(1) env1.step('LEFT') update(env1) time.sleep(1) env1.step('DOWN') update(env1) time.sleep(1) env1.step('DOWN') update(env1) time.sleep(1) env1.step('DOWN') update(env1) time.sleep(1) env1.step('DOWN') update(env1) time.sleep(1) env1.step('RIGHT') update(env1) time.sleep(1) env1.step('RIGHT') update(env1)
import time import numpy as np import config import data_loader import env import agent import policy DL = data_loader.DataLoader() DL.read_data(split='test') env = env.ENV(DL) greedy = policy.MaskedGreedyPolicy() multi = policy.MaskedMultiDisPolicy() agent = agent.pgAgent(env=env, nb_warm_up=400, policy=multi, testPolicy=greedy, gamma=0.95, lr=0.0001, memory_limit=2000, batchsize=64, train_interval=200) time1 = time.time() for round in range(1000): print("------------------------------------------------------") print('\n\n train ' + str(round) + '_' + str((time.time() - time1) / 60))
def reset_environment(self): #just creating another instance of environment variable del (self.env1) self.env1 = env.ENV() return self.env1.getAgentPosition()
def reset_environment(self, storage_value, row_num, col_num): #just creating another instance of environment variable del (self.env1) self.env1 = env.ENV(storage_value, row_num, col_num) return self.env1.getAgentPosition()