def main(): env = gym.make('CartPole-v0') done = False controller = Controller() num_streaks = 0 episode = 0 while(True): old_state = State(env.reset()).discretize_features() done = False it = 0 while not done: it += 1 action = controller.take_action(old_state, episode) observation, reward, done, _ = env.step(action) new_state = State(observation).discretize_features() controller.update_q(new_state, old_state, action, reward, episode) old_state = new_state if done: #print("Finished {} with {} steps.".format(episode, it)) if (it >= Config.DURATION): num_streaks += 1 else: num_streaks = 0 break if num_streaks > Config.STREAKS: break if episode >= MAX_EPISODE: break episode += 1 #print("Solved in {} episodes".format(episode)) if ANIMATE: old_state = State(env.reset()).discretize_features() done = False points = 0 while not done: env.render() action = controller.take_best_action(old_state) observation, reward, done, _ = env.step(action) new_state = State(observation).discretize_features() old_state = new_state points += 1
def removeWaypointBack(self): self.size = self.size - 1 rear = (self.front + self.size) % self.buff returnable = self.Waypoint[rear] self.Waypoint[rear] = State() return returnable
def removeWaypointFront(self): self.size = self.size - 1 returnable = self.Waypoint[self.front] self.Waypoint[self.front] = State() self.front = (self.front + 1) % self.buff return returnable
def __init__(self, size=10): self.buff = size self.size = 0 self.front = 0 self.Waypoint = [] for x in range(0, self.buff): self.Waypoint.append(State())
def play(env, path=None): if path: controller = Controller(path) else: controller = Controller() old_state = State(env.reset()).discretize_features() done = False points = 0 while not done: env.render() action = controller.take_best_action(old_state) observation, reward, done, _ = env.step(action) points += reward new_state = State(observation).discretize_features() old_state = new_state print(points)
def main(): env = gym.make('LunarLander-v2') play(env, 'hein.npy') exit(0) done = False controller = Controller() episode = 0 best_reward = -500 while (episode < Config.ITERATIONS): old_state = State(env.reset()).discretize_features() done = False it = 0 total_reward = 0 while not done: it += 1 action = controller.take_action(old_state, episode) observation, reward, done, _ = env.step(action) total_reward += reward new_state = State(observation).discretize_features() controller.update_q(new_state, old_state, action, reward, episode) old_state = new_state if done: if (episode % 30 == 0): print("Finished {} with {} steps and {} reward {}.".format( episode, it, total_reward, best_reward)) if total_reward > best_reward: best_reward = total_reward break episode += 1 controller.q_table.save("{}.txt".format(best_reward)) #print("Solved in {} episodes".format(episode)) if ANIMATE: play(env)
def addWaypointBack(self, x, y, z): size = self.size buff = self.buff rear = (self.front + size) % buff if size == buff: return False else: self.size = size + 1 self.Waypoint[rear] = State(x, y, z) return True
def addWaypointFront(self, x, y, z): size = self.size buff = self.buff front = self.front if size == buff: return False else: self.front = (front - 1 + buff) % buff self.size = size + 1 self.Waypoint[self.front] = State(x, y, z) return True
from controller import Delta from controller import State from controller import FSMD def locked_on_enter(): print "Entered locked state" time.sleep(3) def locked_on_leave(): pass locked = State("locked") locked.on_enter = locked_on_enter locked.on_leave = locked_on_leave def unlocked_on_enter(): print "Entered unlocked state" time.sleep(3) def unlocked_on_leave(): pass unlocked = State("unlocked") unlocked.on_enter = unlocked_on_enter
def __init__(self,myState=State(), rawImage): self.co_ordinate = myState self.Image=rawImage self.isSafe= False;