コード例 #1
0
def run():
    '''

	'''

    RENDER_TO_SCREEN = True

    # Setting up the environment
    env = Environment(wrap=False,
                      grid_size=GRID_SIZE,
                      rate=80,
                      max_time=100,
                      tail=False,
                      food_count=1,
                      obstacle_count=0,
                      multiplier_count=0,
                      map_path=None,
                      action_space=5)  #sets up the environment

    if RENDER_TO_SCREEN:
        env.prerender()

    Q = Qmatrix(2, env)  # 0 - zeros, 1 - random, 2 - textfile

    # Minimise the overfitting during testing
    epsilon = 0.005

    # Testing for a certain amount of episodes
    for episode in range(10):
        state, info = env.reset()
        done = False  #if epsidoe is in the range of 10 it resets the environment unfo

        while not done:
            if RENDER_TO_SCREEN:
                env.render()

            if np.random.rand() <= epsilon:
                action = env.sample_action(
                )  #if a random numpy is less than or = to epsilon then it does an action
            else:
                action = np.argmax(Q[env.state_index(
                    state)])  #else it does a different action

            new_state, reward, done, info = env.step(action)

            # Q[env.state_index(state), action] += alpha * (reward + gamma * np.max(Q[env.state_index(new_state)]) - Q[env.state_index(state), action])

            state = new_state  #gives state the value of new_state

        if episode % 1 == 0:
            print("Episode:", episode, "\tScore:", info["score"], "\tTime:",
                  info["time"])  #prints out episode, score and time
コード例 #2
0
def train():
    '''
	Starts a function called Train
	'''

    RENDER_TO_SCREEN = False
    # RENDER_TO_SCREEN = True

    # Setting up the environment
    env = Environment(wrap=False,
                      grid_size=GRID_SIZE,
                      rate=80,
                      max_time=100,
                      tail=False,
                      food_count=1,
                      obstacle_count=0,
                      multiplier_count=0,
                      map_path=None,
                      action_space=5)
    '''
	Sets the state of environemnt to equal the above grid size given, sets the speed the snake moves, the max time it runs for, if there is a tail or not, the amount of food spawned, the amount of obstacles spaned, if  there is a specific path to be taken.
	'''

    if RENDER_TO_SCREEN:
        env.prerender()

    Q = Qmatrix(1, env)  # 0 - zeros, 1 - random, 2 - textfile

    alpha = 0.15  # Learning rate, i.e. which fraction of the Q values should be updated
    gamma = 0.99  # Discount factor, i.e. to which extent the algorithm considers possible future rewards
    epsilon = 0.1  # Probability to choose random action instead of best action
    '''
	Sets variables with values because of reasons stated above
	'''

    epsilon_function = True
    epsilon_start = 0.8
    epsilon_end = 0.05
    epsilon_percentage = 0.6  # in decimal

    avg_time = 0
    avg_score = 0

    print_episode = 1000
    total_episodes = 10000
    '''
	Sets values to variables
	'''

    for episode in range(
            total_episodes
    ):  # Takes an episode and if it is in range of the total episodes it proceeds
        # Reset the environment
        state, info = env.reset()  # Resets environment state
        done = False

        # Epsilon linear function
        if epsilon_function:
            epsilon = (
                -(epsilon_start - epsilon_end) /
                (epsilon_percentage * total_episodes)
            ) * episode + (
                epsilon_start
            )  # minuses ep_start from ep_end dived by ep_percentage times by total_episodes then timesed by epsidoe the added to ep_start
            if epsilon < epsilon_end:
                epsilon = epsilon_end
                #checks to see if ep is less than ep_end and if it is it makes ep = to ep_end

        while not done:

            # If cancelled, Q lookup table is still saved
            try:
                if RENDER_TO_SCREEN:
                    env.render()

                if np.random.rand() <= epsilon:
                    action = env.sample_action()
                else:
                    action = np.argmax(Q[env.state_index(state)])
#checks if new random no. is less than or = to ep, else it does a new action.
                new_state, reward, done, info = env.step(action)

                # print(state)

                Q[env.state_index(state), action] += alpha * (
                    reward + gamma * np.max(Q[env.state_index(new_state)]) -
                    Q[env.state_index(state), action])

                state = new_state  #assigns new value to state

                if done:
                    avg_time += info["time"]
                    avg_score += info[
                        "score"]  # adds time nd score to the score counter and prints it out

            except KeyboardInterrupt as e:
                # Test to see if I can write the Q file during runtime
                np.savetxt(Q_textfile_path_save,
                           Q.astype(np.float),
                           fmt='%f',
                           delimiter=" ")
                print("Saved Q matrix to text file")
                raise e
                #try and except work togther, this ecept is anyresponse that doesnt fall into the try section.

        if (episode % print_episode == 0 and episode != 0) or (
                episode == total_episodes - 1
        ):  #tests to see if ep mod print_ep = 0 or if ep == total_ep-1, then if it does it proceeds
            print("Episode:", episode,
                  "\tavg t: {0:.3f}".format(avg_time / print_episode),
                  "\tavg score: {0:.3f}".format(avg_score / print_episode),
                  "\tepsilon {0:.3f}".format(
                      epsilon))  #prints out episodes, score, time
            np.savetxt(Q_textfile_path_save,
                       Q.astype(np.float),
                       fmt='%f',
                       delimiter=" ")
            avg_time = 0
            avg_score = 0  #resets time and score to 0

    # This doesn't need to be here
    # np.savetxt(Q_textfile_path_save, Q.astype(np.float), fmt='%f', delimiter = " ")
    print("Simulation finished. \nSaved Q matrix to text file at:",
          Q_textfile_path_save)