コード例 #1
0
    def __init__(self, gamma=0.9, alpha=0.1, epsilon=0.1, num_episodes=500):
        self.q_table = pd.DataFrame(0,index=pd.MultiIndex.from_product\
            ([[0,1,2,3],[0,1,2,3]]),columns=['UP','DOWN','LEFT','RIGHT'])

        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon
        self.num_episodes = num_episodes
        self.env1 = env.ENV()
        self.action_count, self.actions = self.env1.getActionItems()
コード例 #2
0
    def __init__(self,
                 gamma=0.9,
                 alpha=0.1,
                 epsilon=0.1,
                 num_episodes=500,
                 storage_value=[],
                 row_num=5,
                 col_num=5):
        self.col_list = list(range(0, col_num))
        self.row_list = list(range(0, row_num))
        self.q_table = pd.DataFrame(0,index=pd.MultiIndex.from_product\
            ([self.row_list,self.col_list]),columns=['UP','DOWN','LEFT','RIGHT'])

        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon
        self.num_episodes = num_episodes
        self.env1 = env.ENV(storage_value, row_num, col_num)
        self.action_count, self.actions = self.env1.getActionItems()
コード例 #3
0
    def test(self):

        try:
            q_table = pd.read_pickle(
                'q_table.pkl')  #read q table from the database saved
        except:
            txt.delete("1.0", "end")
            txt.insert(END, 'Try to train the Agent before testing it\n')
            return

        if (q_table is not None):
            env1 = env.ENV()
            #display_environment(env1)#display the init state of the environment
            done = False
            while not done:  #until it reaches the destination
                state = env1.getAgentPosition()  #get current position
                action = q_table.loc[state, :].index[q_table.loc[
                    state, :].values.argmax(
                    )]  #fetch the next best possible action from the Q table learnt
                _, _, done = env1.step(action)  #execute the action
コード例 #4
0
def handle_click(event):
    env1 = env.ENV()
    update(env1)
    time.sleep(1)

    env1.step('RIGHT')
    update(env1)
    time.sleep(1)

    env1.step('RIGHT')
    update(env1)
    time.sleep(1)

    env1.step('LEFT')
    update(env1)
    time.sleep(1)

    env1.step('DOWN')
    update(env1)
    time.sleep(1)

    env1.step('DOWN')
    update(env1)
    time.sleep(1)

    env1.step('DOWN')
    update(env1)
    time.sleep(1)

    env1.step('DOWN')
    update(env1)
    time.sleep(1)

    env1.step('RIGHT')
    update(env1)
    time.sleep(1)

    env1.step('RIGHT')
    update(env1)
コード例 #5
0
ファイル: main.py プロジェクト: gzlong96/Refresh-K
import time
import numpy as np

import config
import data_loader
import env
import agent
import policy

DL = data_loader.DataLoader()
DL.read_data(split='test')

env = env.ENV(DL)

greedy = policy.MaskedGreedyPolicy()
multi = policy.MaskedMultiDisPolicy()

agent = agent.pgAgent(env=env,
                      nb_warm_up=400,
                      policy=multi,
                      testPolicy=greedy,
                      gamma=0.95,
                      lr=0.0001,
                      memory_limit=2000,
                      batchsize=64,
                      train_interval=200)

time1 = time.time()
for round in range(1000):
    print("------------------------------------------------------")
    print('\n\n train ' + str(round) + '_' + str((time.time() - time1) / 60))
コード例 #6
0
 def reset_environment(self):
     #just creating another instance of environment variable
     del (self.env1)
     self.env1 = env.ENV()
     return self.env1.getAgentPosition()
コード例 #7
0
 def reset_environment(self, storage_value, row_num, col_num):
     #just creating another instance of environment variable
     del (self.env1)
     self.env1 = env.ENV(storage_value, row_num, col_num)
     return self.env1.getAgentPosition()