Ejemplo n.º 1
0
from past.utils import old_div
import MalmoPython
import os
import random
import sys
import time
import json
import random
import errno
import math
import malmoutils
import numpy as np

import agentMC

malmoutils.fix_print()

agent_host = MalmoPython.AgentHost()
malmoutils.parse_command_line(agent_host)
recordingsDirectory = malmoutils.get_recordings_directory(agent_host)
video_requirements = '<VideoProducer><Width>860</Width><Height>480</Height></VideoProducer>' if agent_host.receivedArgument(
    "record_video") else ''

# Task parameters:
MAX_DISTANCE = 40
MAX_ZOMBIES = 16
####### SPEED OF GAME #######
SPEED = 8
ARENA_WIDTH = MAX_DISTANCE
ARENA_BREADTH = MAX_DISTANCE
Ejemplo n.º 2
0
from builtins import range
from past.utils import old_div
import MalmoPython
import os
import random
import sys
import time
import json
import copy
import errno
import xml.etree.ElementTree
from collections import deque
import malmoutils

malmoutils.fix_print()

agent_host = MalmoPython.AgentHost()
malmoutils.parse_command_line(agent_host)
recordingsDirectory = malmoutils.get_recordings_directory(agent_host)

# Set up some pallettes:
colourful=["stained_glass", "diamond_block", "lapis_block", "gold_block", "redstone_block", "obsidian"]
fiery=["stained_glass WHITE", "stained_glass PINK", "stained_glass ORANGE", "stained_glass RED", "wool BLACK", "glowstone"]
oresome=["gold_ore", "lapis_ore", "iron_ore", "emerald_ore", "redstone_ore", "quartz_ore"]
frilly=["skull", "stained_glass WHITE", "wool PINK", "wool WHITE", "stained_hardened_clay PINK", "stained_hardened_clay WHITE"]
icepalace=["ice", "stained_glass", "stained_glass", "stained_glass", "stained_glass", "snow"]
volatile=["tnt", "stained_glass", "stained_glass", "redstone_block", "stained_glass", "stained_glass"]
oak=["planks", "planks", "planks", "planks", "lapis_block", "lapis_block"]
sponge=["sponge", "glass", "sponge", "glass", "sponge", "glass"]
palletes = [colourful, fiery, oresome, frilly, icepalace, volatile, oak, sponge]
Ejemplo n.º 3
0
def run(argv=['']):
    if "MALMO_XSD_PATH" not in os.environ:
        print("Please set the MALMO_XSD_PATH environment variable.")
        return

    malmoutils.fix_print()

    agent_host = MalmoPython.AgentHost()
    malmoutils.parse_command_line(agent_host, argv)

    my_mission = MalmoPython.MissionSpec()
    my_mission.timeLimitInSeconds( 10 )
    my_mission.requestVideo( 320, 240 )
    my_mission.rewardForReachingPosition( 19.5, 0.0, 19.5, 100.0, 1.1 )

    my_mission_record = malmoutils.get_default_recording_object(agent_host, "saved_data")

    # client_info = MalmoPython.ClientInfo('localhost', 10000)
    client_info = MalmoPython.ClientInfo('127.0.0.1', 10000)
    pool = MalmoPython.ClientPool()
    pool.add(client_info)

    experiment_id = str(uuid.uuid1())
    print("experiment id " + experiment_id)

    max_retries = 3
    max_response_time = 60  # seconds

    for retry in range(max_retries):
        try:
            agent_host.startMission(my_mission, pool, my_mission_record, 0, experiment_id)
            break
        except RuntimeError as e:
            if retry == max_retries - 1:
                print("Error starting mission:",e)
                exit(1)
            else:
                time.sleep(2)

    print("Waiting for the mission to start", end=' ')
    start_time = time.time()
    world_state = agent_host.getWorldState()
    while not world_state.has_mission_begun:
        print(".", end="")
        time.sleep(0.1)
        if time.time() - start_time > max_response_time:
            print("Max delay exceeded for mission to begin")
            restart_minecraft(world_state, agent_host, client_info, "begin mission")
        world_state = agent_host.getWorldState()
        for error in world_state.errors:
            print("Error:",error.text)
    print()

    last_delta = time.time()
    # main loop:
    while world_state.is_mission_running:
        agent_host.sendCommand( "move 1" )
        agent_host.sendCommand( "turn " + str(random.random()*2-1) )
        time.sleep(0.5)
        world_state = agent_host.getWorldState()
        print("video,observations,rewards received:",world_state.number_of_video_frames_since_last_state,world_state.number_of_observations_since_last_state,world_state.number_of_rewards_since_last_state)
        if (world_state.number_of_video_frames_since_last_state > 0 or
           world_state.number_of_observations_since_last_state > 0 or
           world_state.number_of_rewards_since_last_state > 0):
            last_delta = time.time()
        else:
            if time.time() - last_delta > max_response_time:
                print("Max delay exceeded for world state change")
                restart_minecraft(world_state, agent_host, client_info, "world state change")
        for reward in world_state.rewards:
            print("Summed reward:",reward.getValue())
        for error in world_state.errors:
            print("Error:",error.text)
        for frame in world_state.video_frames:
            print("Frame:",frame.width,'x',frame.height,':',frame.channels,'channels')
            #image = Image.frombytes('RGB', (frame.width, frame.height), bytes(frame.pixels) ) # to convert to a PIL image
    print("Mission has stopped.")
Ejemplo n.º 4
0
def run(argv=['']):
    if "MALMO_XSD_PATH" not in os.environ:
        print("Please set the MALMO_XSD_PATH environment variable.")
        return

    malmoutils.fix_print()

    agent_host = MalmoPython.AgentHost()
    malmoutils.parse_command_line(agent_host, argv)

    my_mission = MalmoPython.MissionSpec()
    my_mission.timeLimitInSeconds( 10 )
    my_mission.requestVideo( 320, 240 )
    my_mission.rewardForReachingPosition( 19.5, 0.0, 19.5, 100.0, 1.1 )

    my_mission_record = malmoutils.get_default_recording_object(agent_host, "saved_data")

    # client_info = MalmoPython.ClientInfo('localhost', 10000)
    client_info = MalmoPython.ClientInfo('127.0.0.1', 10000)
    pool = MalmoPython.ClientPool()
    pool.add(client_info)

    experiment_id = str(uuid.uuid1())
    print("experiment id " + experiment_id)

    max_retries = 3
    max_response_time = 60  # seconds

    for retry in range(max_retries):
        try:
            agent_host.startMission(my_mission, pool, my_mission_record, 0, experiment_id)
            break
        except RuntimeError as e:
            if retry == max_retries - 1:
                print("Error starting mission:",e)
                exit(1)
            else:
                time.sleep(2)

    print("Waiting for the mission to start", end=' ')
    start_time = time.time()
    world_state = agent_host.getWorldState()
    while not world_state.has_mission_begun:
        print(".", end="")
        time.sleep(0.1)
        if time.time() - start_time > max_response_time:
            print("Max delay exceeded for mission to begin")
            restart_minecraft(world_state, agent_host, client_info, "begin mission")
        world_state = agent_host.getWorldState()
        for error in world_state.errors:
            print("Error:",error.text)
    print()

    last_delta = time.time()
    # main loop:
    while world_state.is_mission_running:
        agent_host.sendCommand( "move 1" )
        agent_host.sendCommand( "turn " + str(random.random()*2-1) )
        time.sleep(0.5)
        world_state = agent_host.getWorldState()
        print("video,observations,rewards received:",world_state.number_of_video_frames_since_last_state,world_state.number_of_observations_since_last_state,world_state.number_of_rewards_since_last_state)
        if (world_state.number_of_video_frames_since_last_state > 0 or
           world_state.number_of_observations_since_last_state > 0 or
           world_state.number_of_rewards_since_last_state > 0):
            last_delta = time.time()
        else:
            if time.time() - last_delta > max_response_time:
                print("Max delay exceeded for world state change")
                restart_minecraft(world_state, agent_host, client_info, "world state change")
        for reward in world_state.rewards:
            print("Summed reward:",reward.getValue())
        for error in world_state.errors:
            print("Error:",error.text)
        for frame in world_state.video_frames:
            print("Frame:",frame.width,'x',frame.height,':',frame.channels,'channels')
            #image = Image.frombytes('RGB', (frame.width, frame.height), bytes(frame.pixels) ) # to convert to a PIL image
    print("Mission has stopped.")
Ejemplo n.º 5
0
def main(agent_host):
    device = torch.device("cpu")
    if VISION_ENABLED:
        eyes = Eyes()
    if GET_VISION_DATA:
        clear_images()
    malmoutils.fix_print()
    malmoutils.parse_command_line(agent_host)
    recordingsDirectory = malmoutils.get_recordings_directory(agent_host)

    q_network = QNetwork((2, Hyperparameters.OBS_SIZE, Hyperparameters.OBS_SIZE), len(Hyperparameters.ACTION_DICT))
    target_network = QNetwork((2, Hyperparameters.OBS_SIZE, Hyperparameters.OBS_SIZE), len(Hyperparameters.ACTION_DICT))
    target_network.load_state_dict(q_network.state_dict())

    optim = torch.optim.Adam(q_network.parameters(), lr= Hyperparameters.LEARNING_RATE)

    replay_buffer = deque(maxlen=Hyperparameters.REPLAY_BUFFER_SIZE)

    global_step = 0
    num_episode = 0
    epsilon = 1
    start_time = time.time()
    returns = []
    steps = []
    loss_array = []

    loop = tqdm(total=Hyperparameters.MAX_GLOBAL_STEPS, position=0, leave=False)

    result_dataset = []

    print("Global Step", Hyperparameters.MAX_GLOBAL_STEPS)
    while global_step < Hyperparameters.MAX_GLOBAL_STEPS:
        episode_step = 0
        episode_return = 0
        episode_loss = 0
        done = False
        

        #Initialize
        agent_host = init_malmo(agent_host,recordingsDirectory, video_width,video_height)
        world_state = agent_host.getWorldState()
        while not world_state.has_mission_begun:
            time.sleep(0.1)
            world_state = agent_host.getWorldState()
            #for error in world_state.errors:
                #print("\nError:",error.text)
        obs = get_observation(world_state, agent_host)


        #Testing  
        agent_host.sendCommand( "move 1" )

        while world_state.is_mission_running:
            #Depth Implementation
            while world_state.number_of_video_frames_since_last_state < 1 and world_state.is_mission_running:
                time.sleep(0.05)
                world_state = agent_host.getWorldState()

            if world_state.is_mission_running:
                frame = world_state.video_frames[0].pixels
                processFrame(frame)

                if GET_VISION_DATA:
                    try:
                        result_dataset.append(view_surrounding(video_height, video_width, frame, global_step))
                    except:
                        print("Error in getting image for training data.")
                
                elif VISION_ENABLED:
                    input_img_temp = get_img(world_state,frame,agent_host,eyes,device,video_width,video_height)
                        
                print("Yaw Delta ", current_yaw_delta_from_depth)  

                if current_yaw_delta_from_depth > 0:
                    agent_host.sendCommand(Hyperparameters.ACTION_DICT[1])
                else:
                    agent_host.sendCommand(Hyperparameters.ACTION_DICT[2])
        

            action_idx = get_action(obs, q_network, epsilon)
            command = Hyperparameters.ACTION_DICT[action_idx]

            agent_host.sendCommand(command)
            #agent_host.sendCommand( "turn " + str(current_yaw_delta_from_depth) )

            #time.sleep(.3)

            episode_step += 1
            if episode_step >= Hyperparameters.MAX_EPISODE_STEPS or \
                    (obs[0, int(Hyperparameters.OBS_SIZE/2)+1, int(Hyperparameters.OBS_SIZE/2)] == -1 and \
                    command == 'movesouth 1'):
                done = True
                time.sleep(2)  

            world_state = agent_host.getWorldState()            
                        
            for error in world_state.errors:
                print("Error:", error.text)
            
            next_obs = get_observation(world_state, agent_host) 
        
            reward = 0
            for r in world_state.rewards:
                reward += r.getValue()
            episode_return += reward

            replay_buffer.append((obs, action_idx, next_obs, reward, done))
            obs = next_obs

            global_step += 1
            #print(global_step)
            if global_step == Hyperparameters.MAX_GLOBAL_STEPS:
                break

            if global_step > Hyperparameters.START_TRAINING and global_step % Hyperparameters.LEARN_FREQUENCY == 0:
                batch = prepare_batch(replay_buffer)
                loss = learn(batch, optim, q_network, target_network)
                episode_loss += loss

                if epsilon > Hyperparameters.MIN_EPSILON:
                    epsilon *= Hyperparameters.EPSILON_DECAY

                if global_step % Hyperparameters.TARGET_UPDATE == 0:
                    target_network.load_state_dict(q_network.state_dict())



        num_episode += 1
        returns.append(episode_return)
        loss_array.append(episode_loss)
        steps.append(global_step)
        avg_return = sum(returns[-min(len(returns), 10):]) / min(len(returns), 10)
        loop.update(episode_step)
        loop.set_description('Episode: {} Steps: {} Time: {:.2f} Loss: {:.2f} Last Return: {:.2f} Avg Return: {:.2f}'.format(
            num_episode, global_step, (time.time() - start_time) / 60, episode_loss, episode_return, avg_return))

        if num_episode > 0 and num_episode % 10 == 0:
            log_returns(steps, loss_array)
            #print()

    #print(len(result_dataset))
    np.save("images/image_labels",np.array(result_dataset))
Ejemplo n.º 6
0
def run(argv=['']):
    if "MALMO_XSD_PATH" not in os.environ:
        print("Please set the MALMO_XSD_PATH environment variable.")
        return
    #forceReset="true"
    missionXML = '''<?xml version="1.0" encoding="UTF-8" standalone="no" ?>
            <Mission xmlns="http://ProjectMalmo.microsoft.com" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
            
              <About>
                <Summary>Hello world!</Summary>
              </About>
              
              <ServerSection>
                <ServerHandlers>
                  <DefaultWorldGenerator forceReset="true" />
                  <ServerQuitFromTimeUp timeLimitMs="30000"/>
                  <ServerQuitWhenAnyAgentFinishes/>
                </ServerHandlers>
              </ServerSection>
              
              
              <AgentSection mode="Survival">
                <Name>MalmoTutorialBot</Name>
                <AgentStart>
                    <Inventory>
                        <InventoryItem slot="8" type="diamond_pickaxe"/>
                    </Inventory>
                </AgentStart>
                <AgentHandlers>
                    <ObservationFromFullStats/>
                    <ObservationFromGrid>
                        <Grid name="all_the_blocks" >
                            <min x="-1" y="-1" z="-1"/>
                            <max x="1" y="2" z="1"/>
                        </Grid>
                    </ObservationFromGrid>
                    <ContinuousMovementCommands turnSpeedDegs="180"/>
                </AgentHandlers>
              </AgentSection>
            </Mission>'''

    malmoutils.fix_print()

    #agent_host = MalmoPython.AgentHost()
    agent_host = MalmoPython.AgentHost()
    malmoutils.parse_command_line(agent_host, argv)

    my_mission = MalmoPython.MissionSpec(missionXML, True)
    my_mission.timeLimitInSeconds(300)
    my_mission.requestVideo(640, 480)

    #my_mission.rewardForReachingPosition( 19.5, 0.0, 19.5, 100.0, 1.1 )

    my_mission_record = malmoutils.get_default_recording_object(
        agent_host, "saved_data")

    # client_info = MalmoPython.ClientInfo('localhost', 10000)
    client_info = MalmoPython.ClientInfo('127.0.0.1', 10000)
    pool = MalmoPython.ClientPool()
    pool.add(client_info)

    experiment_id = str(uuid.uuid1())
    print("experiment id " + experiment_id)

    max_retries = 3
    max_response_time = 60  # seconds

    for retry in range(max_retries):
        try:
            agent_host.startMission(my_mission, pool, my_mission_record, 0,
                                    experiment_id)
            break
        except RuntimeError as e:
            if retry == max_retries - 1:
                print("Error starting mission:", e)
                exit(1)
            else:
                time.sleep(2)

    print("Waiting for the mission to start", end=' ')
    start_time = time.time()
    world_state = agent_host.getWorldState()
    while not world_state.has_mission_begun:
        print(".", end="")
        time.sleep(0.1)
        if time.time() - start_time > max_response_time:
            print("Max delay exceeded for mission to begin")
            restart_minecraft(world_state, agent_host, client_info,
                              "begin mission")
        world_state = agent_host.getWorldState()
        for error in world_state.errors:
            print("Error:", error.text)
    print()

    last_delta = time.time()

    # main loop:
    #agent_host.sendCommand( "jump 1")
    TURN = 0
    TURN2 = 0
    JUMP = 0
    while world_state.is_mission_running:
        print("New Iteration")

        if JUMP > 0:
            JUMP = JUMP - 1
        if JUMP == 0:
            agent_host.sendCommand("jump 0")
            JUMP = JUMP - 1
        agent_host.sendCommand("move 1")
        if math.sin(TURN) / 3 >= 0:
            agent_host.sendCommand("turn 0.15")
        else:
            agent_host.sendCommand("turn -0.2")
        print(TURN, " ", math.sin(TURN))
        TURN = TURN + 0.3

        #agent_host.sendCommand( "jump 1" )
        time.sleep(0.5)
        world_state = agent_host.getWorldState()
        y = json.loads(world_state.observations[-1].text)

        #print(y["all_the_blocks"])
        dir = ""
        if y["Yaw"] + 180 < 90:
            dir = "S"
            print("Facing South")
        elif y["Yaw"] < 180:
            dir = "W"
            print("Facing West")
        elif y["Yaw"] < 270:
            dir = "N"
            print("Facing North")
        else:
            dir = "E"
            print("Facing East")

        blocks = [[], [], [], []]
        i = 0
        for x in y["all_the_blocks"]:
            blocks[math.floor(i / 9)].append(x)
            i = i + 1

        if dir == "S":
            willjump = False

            for j in range(0, 3):
                if blocks[1][j] != "air":
                    willjump = True
                print(j, blocks[1][j], willjump)
            if willjump:
                JUMP = 2
                agent_host.sendCommand("jump 1")
        elif dir == "W":
            willjump = False

            for j in range(0, 3):
                if blocks[1][j * 3] != "air":
                    willjump = True
                print(j * 3, blocks[1][j * 3], willjump)
            if willjump:
                JUMP = 2
                agent_host.sendCommand("jump 1")
        elif dir == "E":
            willjump = False

            for j in range(1, 4):
                if blocks[1][j * 3 - 1] != "air":
                    willjump = True
                print(j * 3 - 1, blocks[1][j * 3 - 1], willjump)
            if willjump:
                JUMP = 2
                agent_host.sendCommand("jump 1")
        elif dir == "N":
            willjump = False

            for j in range(0, 3):
                if blocks[1][j] != "air":
                    willjump = True
                print(j, blocks[1][j + 6], willjump)
            if willjump:
                JUMP = 2
                agent_host.sendCommand("jump 1")

        if (blocks[1][2] != "air" and blocks[2][2] != "air"
                or blocks[1][4] != "air" and blocks[2][4] != "air"
                or blocks[1][2] != "air" and blocks[2][2] != "air"
                or blocks[1][4] != "air" and blocks[2][4] != "air"):
            TURN2 = 2

        if TURN2 >= 0:
            agent_host.sendCommand("turn 1")
            TURN2 = TURN2 - 1
        '''if blocks[1][5] != "air" or  blocks[1][5] != "grass" or blocks[1][5] != "tallgrass" :
            JUMP = 2
            agent_host.sendCommand( "jump 1" )
            print()
            print(blocks[1][5])'''

        #print(len(blocks))
        #print(blocks)

        if (world_state.number_of_video_frames_since_last_state > 0
                or world_state.number_of_observations_since_last_state > 0
                or world_state.number_of_rewards_since_last_state > 0):
            last_delta = time.time()
        else:
            if time.time() - last_delta > max_response_time:
                print("Max delay exceeded for world state change")
                restart_minecraft(world_state, agent_host, client_info,
                                  "world state change")
        for reward in world_state.rewards:
            print("Summed reward:", reward.getValue())
        for error in world_state.errors:
            print("Error:", error.text)
        for frame in world_state.video_frames:
            print()
            #print("Frame:",frame.width,'x',frame.height,':',frame.channels,'channels')
            #image = Image.frombytes('RGB', (frame.width, frame.height), bytes(frame.pixels) ) # to convert to a PIL image
    print("Mission has stopped.")
Ejemplo n.º 7
0
def run(size, algo1, algo2):
    #algorithms = {"reflex": reflex.reflex, "hiddenMarkov": hiddenMarkov.hiddenMarkov, "minimax":minimax.minimax, "expectimax": expectimax.expectimax}
    algorithms = {
        "reflex": reflex.reflex,
        'random': randomagent.randommove,
        'smartrandom': smartrandomagent.randommove,
        'astarreflex': AStarReflex.search,
        "minimax": minimax.minmax
    }
    #assert len(sys.argv) == 4, "Wrong number of arguments, the form is: mapSize, agent algorithm, enemy alogrithm"

    malmoutils.fix_print()

    # -- set up two agent hosts --
    agent_host1 = MalmoPython.AgentHost()
    agent_host2 = MalmoPython.AgentHost()
    #map_size = str(sys.argv[1])
    map_size = int(size)
    map_minus = str(map_size - 1)
    agentAlgo = algorithms[algo1]
    enemyAlgo = algorithms[algo2]
    #agentAlgo =  algorithms[sys.argv[2]]
    #enemyAlgo = algorithms[sys.argv[3]]

    # Use agent_host1 for parsing the command-line options.
    # (This is why agent_host1 is passed in to all the subsequent malmoutils calls, even for
    # agent 2's setup.)
    malmoutils.parse_command_line(agent_host1)

    missionXML = '''<?xml version="1.0" encoding="UTF-8" standalone="no" ?>
                <Mission xmlns="http://ProjectMalmo.microsoft.com" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
                
                  <About>
                    <Summary>Hello world!</Summary>
                  </About>
                  
                  <ServerSection>
                    <ServerInitialConditions>
                      <Time>
                        <StartTime>12000</StartTime>
                        <AllowPassageOfTime>false</AllowPassageOfTime>
                      </Time>
                    </ServerInitialConditions>
                    <ServerHandlers>
                      <FlatWorldGenerator generatorString="3;7,220*1,5*3,2;3;,biome_1"/>
                      <DrawingDecorator>
                        <!-- coordinates for cuboid are inclusive -->
                        <DrawCuboid x1="0" y1="45" z1="0" x2=''' + '"' + map_minus + '"' + ''' y2="300" z2=''' + '"' + map_minus + '"' + ''' type="air" />            <!-- limits of our arena -->
                        <DrawCuboid x1="0" y1="40" z1="0" x2=''' + '"' + map_minus + '"' + ''' y2="44" z2=''' + '"' + map_minus + '"' + ''' type="lava" />           <!-- lava floor -->
                        <DrawCuboid x1="0"  y1="46" z1="0"  x2=''' + '"' + map_minus + '"' + ''' y2="46" z2=''' + '"' + map_minus + '"' + ''' type="snow" />
                      </DrawingDecorator>
                      <ServerQuitFromTimeUp timeLimitMs="30000"/>
                      
                    </ServerHandlers>
                  </ServerSection>
                  
                  <AgentSection mode="Survival">
                    <Name>Agent</Name>
                    <AgentStart>
                        <Inventory>
                            <InventoryItem slot="0" type="diamond_shovel"/>
                        </Inventory>
                        <Placement x="0.5" y="47.0" z="0.5" pitch="50" yaw="0"/>
                    </AgentStart>
                    <AgentHandlers>
                      <ObservationFromFullStats/>
                      <ObservationFromGrid>
                          <Grid name="floor3x3W">
                            <min x="-1" y="0" z="-1"/>
                            <max x="1" y="0" z="1"/>
                          </Grid>
                          <Grid name="floor3x3F">
                            <min x="-1" y="-1" z="-1"/>
                            <max x="1" y="-1" z="1"/>
                          </Grid>
                      </ObservationFromGrid>
                      <DiscreteMovementCommands/>
                    </AgentHandlers>
                  </AgentSection>
                  
                  <AgentSection mode="Survival">
                    <Name>Enemy</Name>
                    <AgentStart>
                        <Inventory>
                            <InventoryItem slot="0" type="diamond_shovel"/>
                        </Inventory>
                        <Placement x=''' + '"' + str(
        float(map_size) - 0.5) + '"' + ''' y="47.0" z=''' + '"' + str(
            float(map_size) - 0.5) + '"' + ''' pitch="50" yaw="180"/>
                    </AgentStart>
                    
                    <AgentHandlers>
                      <ObservationFromFullStats/>
                      <DiscreteMovementCommands/>
                      <ObservationFromGrid>
                          <Grid name="floor3x3W">
                            <min x="-1" y="0" z="-1"/>
                            <max x="1" y="0" z="1"/>
                          </Grid>
                          <Grid name="floor3x3F">
                            <min x="-1" y="-1" z="-1"/>
                            <max x="1" y="-1" z="1"/>
                          </Grid>
                      </ObservationFromGrid>
                      <RewardForTouchingBlockType>
                        <Block reward="-100.0" type="lava" behaviour="onceOnly"/>
                      </RewardForTouchingBlockType>
                      <AgentQuitFromTouchingBlockType>
                        <Block type="lava" />
                      </AgentQuitFromTouchingBlockType>
                    </AgentHandlers>
                  </AgentSection>
                </Mission>'''

    # Create default Malmo objects:
    my_mission = MalmoPython.MissionSpec(missionXML, True)

    client_pool = MalmoPython.ClientPool()
    client_pool.add(MalmoPython.ClientInfo('127.0.0.1', 10000))
    client_pool.add(MalmoPython.ClientInfo('127.0.0.1', 10001))

    MalmoPython.setLogging("", MalmoPython.LoggingSeverityLevel.LOG_OFF)
    my_mission_record = MalmoPython.MissionRecordSpec()

    def safeStartMission(agent_host, mission, client_pool, recording, role,
                         experimentId):
        used_attempts = 0
        max_attempts = 5
        print("Calling startMission for role", role)
        while True:
            try:
                agent_host.startMission(mission, client_pool, recording, role,
                                        experimentId)
                break
            except MalmoPython.MissionException as e:
                errorCode = e.details.errorCode
                if errorCode == MalmoPython.MissionErrorCode.MISSION_SERVER_WARMING_UP:
                    print("Server not quite ready yet - waiting...")
                    time.sleep(2)
                elif errorCode == MalmoPython.MissionErrorCode.MISSION_INSUFFICIENT_CLIENTS_AVAILABLE:
                    print("Not enough available Minecraft instances running.")
                    used_attempts += 1
                    if used_attempts < max_attempts:
                        print("Will wait in case they are starting up.",
                              max_attempts - used_attempts, "attempts left.")
                        time.sleep(2)
                elif errorCode == MalmoPython.MissionErrorCode.MISSION_SERVER_NOT_FOUND:
                    print(
                        "Server not found - has the mission with role 0 been started yet?"
                    )
                    used_attempts += 1
                    if used_attempts < max_attempts:
                        print("Will wait and retry.",
                              max_attempts - used_attempts, "attempts left.")
                        time.sleep(2)
                else:
                    print("Other error:", e.message)
                    print("Waiting will not help here - bailing immediately.")
                    exit(1)
            if used_attempts == max_attempts:
                print("All chances used up - bailing now.")
                exit(1)
        print("startMission called okay.")

    def safeWaitForStart(agent_hosts):
        print("Waiting for the mission to start", end=' ')
        start_flags = [False for a in agent_hosts]
        start_time = time.time()
        time_out = 120  # Allow two minutes for mission to start.
        while not all(start_flags) and time.time() - start_time < time_out:
            states = [a.peekWorldState() for a in agent_hosts]
            start_flags = [w.has_mission_begun for w in states]
            errors = [e for w in states for e in w.errors]
            if len(errors) > 0:
                print("Errors waiting for mission start:")
                for e in errors:
                    print(e.text)
                print("Bailing now.")
                exit(1)
            time.sleep(0.1)
            print(".", end=' ')
        print()
        if time.time() - start_time >= time_out:
            print("Timed out waiting for mission to begin. Bailing.")
            exit(1)
        print("Mission has started.")

    safeStartMission(agent_host1, my_mission, client_pool, my_mission_record,
                     0, '')
    safeStartMission(agent_host2, my_mission, client_pool, my_mission_record,
                     1, '')
    safeWaitForStart([agent_host1, agent_host2])

    def movement(ah, direction, pos):
        if direction == "north":
            ah.sendCommand("movenorth 1")
            position = (pos[0], pos[1] - 1)
        elif direction == "south":
            ah.sendCommand("movesouth 1")
            position = (pos[0], pos[1] + 1)
        elif direction == "west":
            ah.sendCommand("movewest 1")
            position = (pos[0] - 1, pos[1])
        elif direction == "east":
            ah.sendCommand("moveeast 1")
            position = (pos[0] + 1, pos[1])
        else:
            position = (pos[0], pos[1])
        time.sleep(0.1)
        return position

    def attack(ah, index, pos, map, enemy=False):
        #We are going to make it so the agent can only break the blocks immediately around them.
        #So a location will be one of the 8 locations around it
        #Enemy starts facing north (1), Agent starts facing south (3)
        #  Enemy: 0 1 0  Agent: 0 3 0
        #         4 X 2         2 X 4
        #         0 3 0         0 1 0
        x, y = math.floor(pos[0]), math.floor(pos[1])
        #print("Player position: {},{} Direction: {}".format(x,y, index))
        did_Break = False
        if enemy:
            if index == "north":
                # print("Index 1")
                ah.sendCommand("attack 1")
                time.sleep(0.1)
                y -= 1
                did_Break = True
            if index == "east":
                # print("Index 2")
                ah.sendCommand("turn 1")
                time.sleep(0.1)
                ah.sendCommand("attack 1")
                time.sleep(0.1)
                ah.sendCommand("turn -1")
                time.sleep(0.1)
                x += 1
                did_Break = True
            if index == "west":
                # print("Index 4")
                ah.sendCommand("turn -1")
                time.sleep(0.1)
                ah.sendCommand("attack 1")
                time.sleep(0.1)
                ah.sendCommand("turn 1")
                time.sleep(0.1)
                x -= 1
                did_Break = True
            if index == "south":
                # print("Index 3")
                ah.sendCommand("turn 1")
                time.sleep(0.1)
                ah.sendCommand("turn 1")
                time.sleep(0.1)
                ah.sendCommand("attack 1")
                time.sleep(0.1)
                ah.sendCommand("turn -1")
                time.sleep(0.1)
                ah.sendCommand("turn -1")
                time.sleep(0.1)
                y += 1
                did_Break = True
        else:
            # Agent: 0 3 0
            #        2 X 4
            #        0 1 0
            if index == "south":
                # print("Index 3")
                ah.sendCommand("attack 1")
                time.sleep(0.1)
                y += 1
                did_Break = True
            if index == "west":
                # print("Index 4")
                ah.sendCommand("turn 1")
                time.sleep(0.1)
                ah.sendCommand("attack 1")
                time.sleep(0.1)
                ah.sendCommand("turn -1")
                time.sleep(0.1)
                x -= 1
                did_Break = True
            if index == "east":
                # print("Index 2")
                ah.sendCommand("turn -1")
                time.sleep(0.1)
                ah.sendCommand("attack 1")
                time.sleep(0.1)
                ah.sendCommand("turn 1")
                time.sleep(0.1)
                x += 1
                did_Break = True
            if index == "north":
                # print("Index 3")
                ah.sendCommand("turn 1")
                time.sleep(0.1)
                ah.sendCommand("turn 1")
                time.sleep(0.1)
                ah.sendCommand("attack 1")
                time.sleep(0.1)
                ah.sendCommand("turn -1")
                time.sleep(0.1)
                ah.sendCommand("turn -1")
                time.sleep(0.1)
                y -= 1
                did_Break = True
        if did_Break:
            map[x][y] = False

    '''
    Sample Observation:
    {"DistanceTravelled":0,"TimeAlive":50,"MobsKilled":0,"PlayersKilled":0,"DamageTaken":0,"DamageDealt":0,
    "Life":20.0,"Score":0,"Food":20,"XP":0,"IsAlive":true,"Air":300,"Name":"Enemy","XPos":5.5,"YPos":47.0,
    "ZPos":5.5,"Pitch":50.0,"Yaw":180.0,"WorldTime":12000,"TotalTime":57}

    '''

    agent_score = 0
    #count = 0
    agent_ob = None
    enemy_ob = None

    map = [[True for i in range(0, int(map_size))]
           for j in range(0, int(map_size))]
    # for i in map:
    # print(i)

    while True:
        #Scores should decrease with time and get a bonus if they win
        agent_score -= 1
        agent_state = agent_host1.peekWorldState()
        enemy_state = agent_host2.peekWorldState()
        if agent_state.number_of_observations_since_last_state > 0:
            agent_ob = json.loads(agent_state.observations[-1].text)

        if enemy_state.number_of_observations_since_last_state > 0:
            enemy_ob = json.loads(enemy_state.observations[-1].text)
        if agent_ob is None or enemy_ob is None:
            continue
        if agent_state.is_mission_running == False:
            break
        agent_position = (agent_ob["XPos"], agent_ob["ZPos"])
        enemy_position = (enemy_ob["XPos"], enemy_ob["ZPos"])

        agent_grid = agent_ob.get(u'floor3x3F', 0)
        enemy_grid = enemy_ob.get(u'floor3x3F', 0)

        if "lava" in agent_grid:
            print("Enemy Won!")
            agent_score -= 100
            for i in map:
                print(i)
            return 0
            break
        if "lava" in enemy_grid:
            print("Agent Won!")
            agent_score += 100
            for i in map:
                print(i)
            return 1
            break

        agentMoveString, agentBreakIndex = agentAlgo(agent_host1,
                                                     agent_position,
                                                     enemy_position,
                                                     agent_grid, map)
        enemyMoveString, enemyBreakIndex = enemyAlgo(agent_host2,
                                                     enemy_position,
                                                     agent_position,
                                                     enemy_grid, map)

        # #Agent Turn to Break
        attack(agent_host1, agentBreakIndex, agent_position, map)
        # #Enemy Turn to Move
        pos = movement(agent_host2, enemyMoveString, enemy_position)

        # #Enemy Turn to Break
        attack(agent_host2, enemyBreakIndex, pos, map, enemy=True)
        # #Agent Turn to Move
        movement(agent_host1, agentMoveString, agent_position)
    for i in map:
        print(i)
    return 2
def main():
    sight = {'x': (-30, 30), 'z': (-30, 30), 'y': (-1, 1)}

    range_x = abs(sight['x'][1] - sight['x'][0]) + 1
    range_y = abs(sight['y'][1] - sight['y'][0]) + 1
    range_z = abs(sight['z'][1] - sight['z'][0]) + 1

    malmoutils.fix_print()
    agent_host = MalmoPython.AgentHost()
    malmoutils.parse_command_line(agent_host)
    recordingsDirectory = malmoutils.get_recordings_directory(agent_host)
    recordingsDirectory = "../human_trajectories"
    if (not os.path.exists(recordingsDirectory)):
        os.mkdir(recordingsDirectory)
    logging.basicConfig(level=logging.INFO)
    # pdb.set_trace()
    logger = logging.getLogger(__name__)
    logger.setLevel(logging.DEBUG)  # set to INFO if you want fewer messages

    video_width = 640
    video_height = 480
    sys.argv

    mission_xml_path = "../custom_xmls/usar.xml"
    validate = True
    # my_mission = MalmoPython.MissionSpec(missionXML, validate)
    my_mission = MalmoPython.MissionSpec(getMissionXML(mission_xml_path),
                                         validate)

    # ObservationFromGrid
    my_mission.observeGrid(sight['x'][0], sight['y'][0], sight['z'][0],
                           sight['x'][1], sight['y'][1], sight['z'][1],
                           'relative_view')

    # agent_host.setObservationsPolicy(MalmoPython.ObservationsPolicy.LATEST_OBSERVATION_ONLY)
    agent_host.setVideoPolicy(MalmoPython.VideoPolicy.LATEST_FRAME_ONLY)

    if agent_host.receivedArgument("test"):
        num_reps = 1
    else:
        num_reps = 30000

    my_mission_record = MalmoPython.MissionRecordSpec()
    if recordingsDirectory:
        my_mission_record.recordRewards()
        my_mission_record.recordObservations()
        my_mission_record.recordCommands()
        # if agent_host.receivedArgument("record_video"): # my_mission_record.recordMP4(24,2000000)
        my_mission_record.recordMP4(24, 2000000)
    recording_name = datetime.now().strftime("%Y-%m-%d_%I-%M-%S_%p")
    for iRepeat in range(1):
        my_mission_record.setDestination(
            os.path.join(recordingsDirectory, recording_name + ".tgz"))
        max_retries = 3
        for retry in range(max_retries):
            try:
                agent_host.startMission(my_mission, my_mission_record)
                break
            except RuntimeError as e:
                if retry == max_retries - 1:
                    logger.error("Error starting mission: %s" % e)
                    exit(1)
                else:
                    time.sleep(2)

        logger.info('Mission %s', iRepeat)
        logger.info("Waiting for the mission to start")
        world_state = agent_host.getWorldState()
        while not world_state.has_mission_begun:
            print(".", end="")
            time.sleep(0.1)
            world_state = agent_host.getWorldState()
        print()

        img_counter = 0
        # print('observations', world_state.observations)
        while world_state.is_mission_running:
            world_state = agent_host.getWorldState()

            # Observations
            # msg = observe(agent_host)
            # if msg is not None:
            #     print('timestamp: ', msg['timestamp'])

            # NOTE : Nothing recorded in world state. Uncomment to test it out.

            # if world_state.number_of_observations_since_last_state > 0:
            #     timestamp = world_state.observations[-1].timestamp
            #     msg = world_state.observations[-1].text
            #     obs = json.loads(msg)
            #     print("{'timestamp': timestamp, 'observations': obs}")

            # Video Frames
            while world_state.number_of_video_frames_since_last_state < 1 and world_state.is_mission_running:
                logger.info("Waiting for frames...")
                time.sleep(0.05)
                world_state = agent_host.getWorldState()

            logger.info("Got frame!")
            # import ipdb; ipdb.set_trace
            # print('observations', world_state.observations)
            # world_state.observations
            if world_state.is_mission_running:
                # timestamp = world_state.observations[-1].timestamp
                # msg = world_state.observations[-1].text
                # print(timestamp)
                # print(msg)
                frame = world_state.video_frames[-1]
                img = Image.frombytes('RGB', (640, 480), bytes(frame.pixels))
                # imageio.imsave("./tmp_imgs/{}.png".format(img_counter), img)
                img_counter += 1
        logger.info("Mission has stopped.")
        time.sleep(1)  # let the Mod recover