Esempio n. 1
0
def process(render=False):

    print("CartPole main start..")
    env = gym.make('CartPole-v0')

    # Initialize the simulation
    env.reset()
    # Take one random step to get the pole and cart moving
    state, reward, done, _ = env.step(env.action_space.sample())

    memory = Memory(max_size=memory_size)

    # Make a bunch of random actions and store the experiences
    for ii in range(pretrain_length):
        # Uncomment the line below to watch the simulation
        if render:
            env.render()

        # Make a random action
        action = env.action_space.sample()
        next_state, reward, done, _ = env.step(action)

        if done:
            # The simulation fails so no next state
            next_state = np.zeros(state.shape)
            # Add experience to memory
            memory.add((state, action, reward, next_state))

            # Start new episode
            env.reset()
            # Take one random step to get the pole and cart moving
            state, reward, done, _ = env.step(env.action_space.sample())
        else:
            # Add experience to memory
            memory.add((state, action, reward, next_state))
            state = next_state

    #memory.checkBuffer()

    return memory, state, env
Esempio n. 2
0
import gym
import numpy as np

from skimage.transform import resize
import matplotlib.pyplot as plt

from MemoryClass import Memory
from StateClass import SteteClass
#from env import setEnv
from AgentClass import AgentClass

from PIL import Image

myMemory = Memory(max_size=10)

x = range(20)

for item in x:

    myMemory.add(item)

print(myMemory.checkBuffer())
Esempio n. 3
0
                #print("** selected action : ", a)
                #print(qout)
                #a = myAgent.sess.run(myAgent.predict,feed_dict={myAgent.x:state_image})[0]

            observation, reward, done, info = env.step(action)
            # total step + 1
            total_steps += 1

            # episode reward +
            episode_reward += reward
            processed_image = preprocess(observation)

            next_image = np.append(state_image[:, :, 1:], processed_image[:,:,np.newaxis], axis=2)
            state_image /= 255.0
            next_image /= 255.0
            memory.add((state_image, action, reward, done, next_image))

            # pre_train_steps = 10000
            if total_steps > pre_train_steps:

                # initial : e & endE
                # e = 1.0
                # endE = 0.1
                if e > endE:
                    e -= stepDrop

                #
                # every total_steps by 4, batch is run
                #
                if total_steps % (update_freq) == 0:
Esempio n. 4
0
def keepMemory(memory_size=10000, pretrain_length=5000,render=False):

    #print("CartPole main start..")
    #env = gym.make('CartPole-v0')

    envs = setEnv()

    #env = envs["BreakGame"]
    env = envs["SpaceInvador"]

    # Initialize the simulation
    #observation = env.reset()
    stateCls = SteteClass(env)
    stateCls.initial_buffer()

    # current state == initial screen state --> nothing to active 0 action
    curr_state = stateCls.convertAndConcatenateBuffer()
    curr_state = curr_state[np.newaxis,:,:,:]

    #print("initial state size ...", state.shape)
    # Take one random step to get the pole and cart moving
    #state, reward, done, _ = env.step(env.action_space.sample())

    memory = Memory(max_size=memory_size)

    # AgentClass section
    myAgent = AgentClass(6)
    # initialize Q Network

    MINIBATCH_SIZE = 32
    MIN_OBSERVATION = 500

    epsilon = 1.0
    EPSILON_DECAY = 300
    FINAL_EPS = 0.1

    NUM_FRAMES = 3

    observation_num = 0
    alive_frame = 0
    total_reward = 0

    curr_state_actions = []

    MEMORY_FULL = False
    # Make a bunch of random actions and store the experiences
    for ii in range(pretrain_length):
        # Uncomment the line below to watch the simulation
        #if render:
        #    env.render()
        #stateCls.render()

        init_state = stateCls.convertAndConcatenateBuffer()
        action, q_values = myAgent.get_action(curr_state)
        #curr_state_actions.append(action)

        #print("** action and q_value ... ",action, q_values)
        #myAgent.copyTargetQNetwork()
        #return False,False,False
        #next_state, reward, done, _ = env.step(action)

        obs,rewards,done = stateCls.add_frame(action,NUM_FRAMES)

        #if observation_num % 500 == 0:
        #    print("observation_num / q_values ..",observation_num,q_values)

        if done:
            # The simulation fails so no next state
            if MEMORY_FULL:
                print("memory full.....")

            print("** rewards from done ...", total_reward)
            print("** maxium lived frame .. ", alive_frame)

            stateCls.envReset()
            # Start new episode
            # Take one random step to get the pole and cart moving
            alive_frame = 0
            total_reward = 0

        new_state = stateCls.convertAndConcatenateBuffer()
        #memory add
        memory.add((init_state, action, rewards, done, new_state))
        total_reward += rewards

        if memory.checklength() > MIN_OBSERVATION:
            MEMORY_FULL = True
            # Sample mini-batch from memory
            # pick up m = 32
            mini_batch = memory.sample(MINIBATCH_SIZE)
            myAgent.train(mini_batch)

            #s_batch, a_batch, r_batch, d_batch, s2_batch = memory.sample(MINIBATCH_SIZE)
            #self.deep_q.train(s_batch, a_batch, r_batch, d_batch, s2_batch, observation_num)
            #self.deep_q.target_train()


        observation_num += 1
        alive_frame += 1


    print(memory.checklength())
    #print("curr action", curr_state_actions)

    #print("Total rewards from all episodes..", total_reward)

    return curr_state_actions