scores = [] epsHistory = [] numGames = 500 batch_size = 50 #env = wrappers.Monitor(env, "tmp500/space-invaders-1", video_callable=lambda episode_id: True, force=True) for i in range(numGames): print('starting game ', i + 1, 'epsilon: %.4f' % brain.EPSILON) epsHistory.append(brain.EPSILON) done = False observation = env.reset() frames = [np.sum(observation[15:200, 30:125], axis=2)] score = 0 lastAction = 0 while not done: if len(frames) == 3: action = brain.chooseAction(frames) frames = [] else: action = lastAction observation_, reward, done, info = env.step(action) score += reward frames.append(np.sum(observation_[15:200, 30:125], axis=2)) if done and info['ale.lives'] == 0: reward = -100 brain.storeTransition( np.mean(observation[15:200, 30:125], axis=2), action, reward, np.mean(observation_[15:200, 30:125], axis=2)) observation = observation_ brain.learn(batch_size) lastAction = action env.render()
msg = world_state.observations[-1].text observations = json.loads(msg) grid = observations.get(u'floor9x9', 0) state = get_state(grid) # Loop until mission ends while world_state.is_mission_running: print("-", end="") time.sleep(0.1) # action based on current state # print(state) # action_taken = np.random.choice(len(action_space)) # agent_host.sendCommand(action_space[action_taken]) action = brain.chooseAction(state) agent_host.sendCommand(action_space[action]) world_state = agent_host.getWorldState() for error in world_state.errors: print("Error:", error.text) # Have we received any observations if world_state.number_of_observations_since_last_state > 0 and world_state.number_of_rewards_since_last_state > 0: msg = world_state.observations[-1].text observations = json.loads(msg) grid = observations.get(u'floor9x9', 0) # Get new_state & reward
def train(): print() print("RUNNING THE MINECRAFT SIMULATION") print() RENDER = False # RENDER = True LOAD_MODEL = False # LOAD_MODEL = True start_eps = 0.8 WRAP = False GRID_SIZE = 7 LOCAL_GRID_SIZE = 9 # Has to be an odd number (I think...) SEED = 1 FOOD_COUNT = 1 OBSTACLE_COUNT = 0 # MAP_PATH = "./Maps/Grid{}/map2.txt".format(GRID_SIZE) MAP_PATH = None env = Environment(wrap=WRAP, grid_size=GRID_SIZE, rate=80, max_time=30, food_count=FOOD_COUNT, obstacle_count=OBSTACLE_COUNT, lava_count=0, zombie_count=0, action_space=5, map_path=MAP_PATH) brain = Agent(gamma=0.99, epsilon=start_eps, alpha=0.01, maxMemorySize=10000, replace=10) if LOAD_MODEL: try: path = "./Models/Torch/my_model.pth" brain.load_model(path) print("Model loaded from path:", path) print() brain.EPSILON = 0.05 except Exception: print('Could not load model') print('Press <ENTER> to continue with random initialision') print() input() # quit() if RENDER: env.prerender() games_played = 0 print("INITIALISING REPLAY MEMORY") while brain.memCntr < brain.memSize: observation, _ = env.reset() # print(observation) # observation = env.local_state_vector_3D() done = False if RENDER: env.render() # Render first screen while not done: action = brain.chooseAction(observation) observation_, reward, done, info = env.step(action) # observation_ = env.local_state_vector_3D() # print(observation_) if done: # reward = -1 games_played += 1 brain.storeTransition(observation, action, reward, done, observation_) observation = observation_ if RENDER: env.render() print("Done initialising replay memory. Played {} games".format( games_played)) scores = [] epsHistory = [] numGames = 100000 print_episode = 100 batch_size = 16 avg_score = 0 avg_time = 0 avg_loss = 0 print() print("TRAINING MODEL") print() for i in range(numGames): epsHistory.append(brain.EPSILON) done = False observation, _ = env.reset() # observation = env.local_state_vector_3D() score = 0 lastAction = 0 if RENDER: env.render() # Render first screen while not done: action = brain.chooseAction(observation) observation_, reward, done, info = env.step(action) # observation_ = env.local_state_vector_3D() # score += reward # print(observation_) brain.storeTransition(observation, action, reward, done, observation_) observation = observation_ loss = brain.learn(batch_size) lastAction = action if RENDER: env.render() avg_score += info["score"] avg_time += info["time"] avg_loss += loss.item() if i % print_episode == 0 and not i == 0 or i == numGames - 1: print("Episode", i, "\tavg time: {0:.3f}".format(avg_time / print_episode), "\tavg score: {0:.3f}".format(avg_score / print_episode), "\tavg loss: {0:.3f}".format(avg_loss / print_episode), "\tepsilon: %.4f" % brain.EPSILON) brain.save_model("./Models/Torch/my_model{}.pth".format(i)) avg_loss = 0 avg_score = 0 avg_time = 0 # scores.append(score) # print("score:", score) brain.save_model("./Models/Torch/my_model.pth")
# brain.load_model("./Models/Torch/my_model.pth") # except Exception: # print('Could not load model') # quit() for i in range(numGames): epsHistory.append(brain.EPSILON) done = False obs, _ = env.reset() observation = env.state_vector_3D() score = 0 lastAction = 0 while not done: action = brain.chooseAction(observation) observation_, reward, done, info = env.step(action) observation_ = env.state_vector_3D() score += reward brain.storeTransition(observation, action, reward, observation_) observation = observation_ if TRAIN: loss = brain.learn(batch_size) lastAction = action if RENDER: env.render() avg_score += info["score"] if TRAIN: avg_loss += loss.item()