scores = []
 epsHistory = []
 numGames = 500
 batch_size = 50
 #env = wrappers.Monitor(env, "tmp500/space-invaders-1", video_callable=lambda episode_id: True, force=True)
 for i in range(numGames):
     print('starting game ', i + 1, 'epsilon: %.4f' % brain.EPSILON)
     epsHistory.append(brain.EPSILON)
     done = False
     observation = env.reset()
     frames = [np.sum(observation[15:200, 30:125], axis=2)]
     score = 0
     lastAction = 0
     while not done:
         if len(frames) == 3:
             action = brain.chooseAction(frames)
             frames = []
         else:
             action = lastAction
         observation_, reward, done, info = env.step(action)
         score += reward
         frames.append(np.sum(observation_[15:200, 30:125], axis=2))
         if done and info['ale.lives'] == 0:
             reward = -100
         brain.storeTransition(
             np.mean(observation[15:200, 30:125], axis=2), action, reward,
             np.mean(observation_[15:200, 30:125], axis=2))
         observation = observation_
         brain.learn(batch_size)
         lastAction = action
         env.render()
Beispiel #2
0
    msg = world_state.observations[-1].text
    observations = json.loads(msg)
    grid = observations.get(u'floor9x9', 0)
    state = get_state(grid)

    # Loop until mission ends
    while world_state.is_mission_running:
        print("-", end="")
        time.sleep(0.1)

        # action based on current state
        # print(state)
        # action_taken = np.random.choice(len(action_space))
        # agent_host.sendCommand(action_space[action_taken])

        action = brain.chooseAction(state)
        agent_host.sendCommand(action_space[action])

        world_state = agent_host.getWorldState()

        for error in world_state.errors:
            print("Error:", error.text)

        # Have we received any observations
        if world_state.number_of_observations_since_last_state > 0 and world_state.number_of_rewards_since_last_state > 0:

            msg = world_state.observations[-1].text
            observations = json.loads(msg)
            grid = observations.get(u'floor9x9', 0)

            # Get new_state & reward
def train():
    print()
    print("RUNNING THE MINECRAFT SIMULATION")
    print()

    RENDER = False
    # RENDER = True
    LOAD_MODEL = False
    # LOAD_MODEL = True
    start_eps = 0.8

    WRAP = False
    GRID_SIZE = 7
    LOCAL_GRID_SIZE = 9  # Has to be an odd number (I think...)
    SEED = 1
    FOOD_COUNT = 1
    OBSTACLE_COUNT = 0
    # MAP_PATH = "./Maps/Grid{}/map2.txt".format(GRID_SIZE)
    MAP_PATH = None

    env = Environment(wrap=WRAP,
                      grid_size=GRID_SIZE,
                      rate=80,
                      max_time=30,
                      food_count=FOOD_COUNT,
                      obstacle_count=OBSTACLE_COUNT,
                      lava_count=0,
                      zombie_count=0,
                      action_space=5,
                      map_path=MAP_PATH)

    brain = Agent(gamma=0.99,
                  epsilon=start_eps,
                  alpha=0.01,
                  maxMemorySize=10000,
                  replace=10)

    if LOAD_MODEL:
        try:
            path = "./Models/Torch/my_model.pth"
            brain.load_model(path)
            print("Model loaded from path:", path)
            print()
            brain.EPSILON = 0.05
        except Exception:
            print('Could not load model')
            print('Press <ENTER> to continue with random initialision')
            print()
            input()
            # quit()

    if RENDER: env.prerender()

    games_played = 0

    print("INITIALISING REPLAY MEMORY")

    while brain.memCntr < brain.memSize:
        observation, _ = env.reset()
        # print(observation)
        # observation = env.local_state_vector_3D()
        done = False

        if RENDER: env.render()  # Render first screen
        while not done:

            action = brain.chooseAction(observation)

            observation_, reward, done, info = env.step(action)
            # observation_ = env.local_state_vector_3D()
            # print(observation_)
            if done:
                # reward = -1
                games_played += 1
            brain.storeTransition(observation, action, reward, done,
                                  observation_)

            observation = observation_
            if RENDER: env.render()

    print("Done initialising replay memory. Played {} games".format(
        games_played))

    scores = []
    epsHistory = []
    numGames = 100000
    print_episode = 100
    batch_size = 16

    avg_score = 0
    avg_time = 0
    avg_loss = 0

    print()
    print("TRAINING MODEL")
    print()

    for i in range(numGames):
        epsHistory.append(brain.EPSILON)
        done = False
        observation, _ = env.reset()
        # observation = env.local_state_vector_3D()
        score = 0
        lastAction = 0

        if RENDER: env.render()  # Render first screen
        while not done:
            action = brain.chooseAction(observation)

            observation_, reward, done, info = env.step(action)

            # observation_ = env.local_state_vector_3D()
            # score += reward

            # print(observation_)

            brain.storeTransition(observation, action, reward, done,
                                  observation_)

            observation = observation_
            loss = brain.learn(batch_size)
            lastAction = action
            if RENDER: env.render()

        avg_score += info["score"]
        avg_time += info["time"]
        avg_loss += loss.item()

        if i % print_episode == 0 and not i == 0 or i == numGames - 1:
            print("Episode", i,
                  "\tavg time: {0:.3f}".format(avg_time / print_episode),
                  "\tavg score: {0:.3f}".format(avg_score / print_episode),
                  "\tavg loss: {0:.3f}".format(avg_loss / print_episode),
                  "\tepsilon: %.4f" % brain.EPSILON)
            brain.save_model("./Models/Torch/my_model{}.pth".format(i))
            avg_loss = 0
            avg_score = 0
            avg_time = 0

        # scores.append(score)
        # print("score:", score)

    brain.save_model("./Models/Torch/my_model.pth")
Beispiel #4
0
	# 	brain.load_model("./Models/Torch/my_model.pth")
	# except Exception:
	# 	print('Could not load model')
	# 	quit()


	for i in range(numGames):
		epsHistory.append(brain.EPSILON)
		done = False
		obs, _ = env.reset()
		observation = env.state_vector_3D()
		score = 0
		lastAction = 0

		while not done:
			action = brain.chooseAction(observation)

			observation_, reward, done, info = env.step(action)
			
			observation_ = env.state_vector_3D()
			score += reward

			brain.storeTransition(observation, action, reward, observation_)

			observation = observation_
			if TRAIN: loss = brain.learn(batch_size)
			lastAction = action
			if RENDER: env.render()

		avg_score += info["score"]
		if TRAIN: avg_loss += loss.item()