def __init__(self, env, actor, critic, DISCOUNT_FACTOR, MINIBATCH_SIZE, REPLAY_MEMORY_SIZE, TARGET_DISCOUNT, continue_execution, MEMORIES): # Environment details self.env = env self.actor = actor self.critic = critic self.MINIBATCH_SIZE = MINIBATCH_SIZE self.DISCOUNT = DISCOUNT_FACTOR self.TARGET_DISCOUNT = TARGET_DISCOUNT self.bg_noise = None self.action_dim = self.env.action_space.shape[0] # Replay memory to store experiences of the model with the environment # self.replay_memory = deque(maxlen=REPLAY_MEMORY_SIZE) if continue_execution: self.replay_memory = memory.Memory(REPLAY_MEMORY_SIZE, load=continue_execution, memories=MEMORIES) else: self.replay_memory = memory.Memory(REPLAY_MEMORY_SIZE)
_state += list(states[i]) + list(actions[i]) _state += list(states[len(states) - 1]) return states, actions, np.asarray(tuple(_state)) #start iterating from 'current epoch' for episode in xrange(CURRENT_EPISODE + 1, EPISODES + 1, 1): done = False first_state = env.reset() first_action = np.array([0, 0, 0]) states = [first_state, first_state, first_state] actions = [first_action, first_action] states, actions, cur_state = make_state(states, actions, first_state, first_action) action_memory = memory.Memory(STEPS) episode_reward = 0 episode_step = 0 new_episode = True while not done: action, action_step = actor_critic.act(cur_state, new_episode, GREEDY_RATE) _next_state, reward, done, _ = env.step(action_step) states, actions, next_state = make_state(states, actions, _next_state, action) episode_reward += reward action_memory.addMemory(cur_state, action, reward, next_state, done)