Exemple #1
0
    def configure_gym_marlo(self, env_name):
        """ Configure environment for Marlo platform
            Require previously launched minecraft environment
            - to launch: $MALMO_MINECRAFT_ROOT/launchClient.sh -p 10000

            Available params defined in:
            - https://github.com/crowdAI/marLo/blob/8652f8daef2caf9202881d002a2d3c28c882d941/marlo/base_env_builder.py
        """

        import marlo
        marlo.logger.setLevel('ERROR')

        # client_pool = [('127.0.0.1', 10000), ('127.0.0.1', 10001)]
        client_pool = [('127.0.0.1', 10000), ('127.0.0.1', 10001)]
        join_tokens = marlo.make(
            env_name,
            params={
                "client_pool": client_pool,
                "videoResolution": [84, 84],
                "tick_length": 1,
                # "prioritise_offscreen_rendering": False,
                # "comp_all_commands": ['move', "turn"],
            })
        join_token = join_tokens[0]
        env = marlo.init(join_token)

        return env
    def __init__(self):
        client_pool = [('127.0.0.1', 10000)]

        join_tokens = marlo.make('MarLo-CliffWalking-v0',
                                  params={
                                    "client_pool": client_pool,
                                    "comp_all_commands": ["movenorth", "movesouth", "moveeast", "movewest"],
                                    "prioritise_offscreen_rendering":False
                                  })

        join_token = join_tokens[0]

        self.env = marlo.init(join_token)
        self.env.params.suppress_info = False
        self.env.mission_spec.setViewpoint(1)
        for z in range(0,13,1):
            x = 2
            self.env.mission_spec.drawBlock( x,45,z,"lava")

        # command_parser = commands.CommandParser()
        # self.commands = command_parser.get_commands(self.env.params.mission_xml.encode('ascii'), self.env.params.role)
        # self.commands_size = len(self.commands)
        for _space, _commands in zip(self.env.action_spaces, self.env.action_names):
            self.commands = _commands
            self.commands_size = len(_commands) - 1
            print(self.commands)

        self.previousState = None
        self.alpha = 0.1
        self.gamma = 1.0
        self.epsilon = 0.1
        self.q_table = {}
        self.test = 0
Exemple #3
0
 def make_env(self, env_id):
     params = merge(params_default, {
         'client_pool': self.client_pool
     })
     print("{} Minecraft clients available".format(len(self.client_pool)))
     join_token = marlo.make(env_id,
                             params)
     return marlo.init(join_token[0])
Exemple #4
0
def make_env(env_seed=0):
    join_tokens = marlo.make("MarLo-FindTheGoal-v0",
                             params=dict(
                                 allowContinuousMovement=["move", "turn"],
                                 videoResolution=[84, 84],
                                 kill_clients_after_num_rounds=500))
    env = marlo.init(join_tokens[0])

    obs = env.reset()
    # env.render(mode="rgb_array")
    action = env.action_space.sample()
    obs, r, done, info = env.step(action)
    env.seed(int(env_seed))
    return env
def make_env(env_seed=0):
    join_tokens = marlo.make("MarLo-FindTheGoal-v0",
                             params=dict(
                                 comp_all_commands=["move", "turn"],
                                 allowContinuousMovement=True,
                                 videoResolution=[336, 336],
                             ))
    env = marlo.init(join_tokens[0])

    obs = env.reset()
    action = env.action_space.sample()
    obs, r, done, info = env.step(action)
    env.seed(int(env_seed))
    return env
Exemple #6
0
def create_marlo(env_config: dict):

    config = deepcopy(MALMO_DEFAULTS)
    config.update(env_config)

    client_pool = [('127.0.0.1', 10000)]
    join_tokens = marlo.make('MarLo-DefaultFlatWorld-v0', 
                params=dict(
                    allowContinuousMovement=["move", "turn"],
					client_pool = client_pool,
                    videoResolution=[84, 84]
                ))
    
    env = marlo.init(join_tokens[0])

    return env
def get_join_tokens():
    if marlo.is_grading():
        """
            In the crowdAI Evaluation environment obtain the join_tokens 
            from the evaluator
            
            the `params` parameter passed to the `evaluator_join_token` only allows
            the following keys : 
                    "seed",
                    "tick_length",
                    "max_retries",
                    "retry_sleep",
                    "step_sleep",
                    "skip_steps",
                    "videoResolution",
                    "continuous_to_discrete",
                    "allowContinuousMovement",
                    "allowDiscreteMovement",
                    "allowAbsoluteMovement",
                    "add_noop_command",
                    "comp_all_commands"
                    # TODO: Add this to the official documentation ? 
                    # Help Wanted :D Pull Requests welcome :D 
        """
        join_tokens = marlo.evaluator_join_token(params={})

    else:
        """
            When debugging locally,
            Please ensure that you have a Minecraft client running on port 10000 and 10001
            by doing : 
            $MALMO_MINECRAFT_ROOT/launchClient.sh -port 10000
            $MALMO_MINECRAFT_ROOT/launchClient.sh -port 10001
        """
        print("Generating join tokens locally...")
        client_pool = [('127.0.0.1', 10000), ('127.0.0.1', 10001)]
        join_tokens = marlo.make('MarLo-BuildbattleTrain1-v0',
                                 params={
                                    "client_pool": client_pool,
                                    "agent_names" : [
                                        "MarLo-Agent-0",
                                        "MarLo-Agent-1"
                                    ]
                                 })
    return join_tokens
def get_join_tokens():
    if marlo.is_grading():
        """
            In the crowdAI Evaluation environment obtain the join_tokens 
            from the evaluator
            
            the `params` parameter passed to the `evaluator_join_token` only allows
            the following keys : 
                    "seed",
                    "tick_length",
                    "max_retries",
                    "retry_sleep",
                    "step_sleep",
                    "skip_steps",
                    "videoResolution",
                    "continuous_to_discrete",
                    "allowContinuousMovement",
                    "allowDiscreteMovement",
                    "allowAbsoluteMovement",
                    "add_noop_command",
                    "comp_all_commands"
                    # TODO: Add this to the official documentation ? 
                    # Help Wanted :D Pull Requests welcome :D 
        """
        join_tokens = marlo.evaluator_join_token(params={})

    else:
        """
            When debugging locally,
            Please ensure that you have a Minecraft client running on port 10000 and 10001
            by doing : 
            $MALMO_MINECRAFT_ROOT/launchClient.sh -port 10000
            $MALMO_MINECRAFT_ROOT/launchClient.sh -port 10001
        """
        print("Generating join tokens locally...")
        client_pool = [('127.0.0.1', 10000), ('127.0.0.1', 10001)]
        join_tokens = marlo.make('MarLo-BuildbattleTrain1-v0',
                                 params={
                                     "client_pool":
                                     client_pool,
                                     "agent_names":
                                     ["MarLo-Agent-0", "MarLo-Agent-1"]
                                 })
    return join_tokens
def init_environment(env_name):

    client_pool = [('127.0.0.1', 10000), ('127.0.0.1', 10001)]
    join_tokens = marlo.make(env_name,
                             params={
                                "client_pool": client_pool
                             })

    # As this is a single agent scenario,there will just be a single token
    # assert len(join_tokens) == 1
    join_token = join_tokens[0]

    # initialize environment    
    env = marlo.init(join_token)

    # env.configure(videoResolution=[84,84])
    # I need to figure out where in marlo I can fix the videoresolutions. probably in the params of marlo.make

    # env.configure(stack_frames=4) 
    # there is the frame stacking logic which is in the environment
    # I have to either transpose it here
    # or let go of the DQN 
    # I can just transpose
    # issue is I can't observe 4 times
    # it is turn based so every observation I do is one step I take 

    # create env and add specific conifigurations to Malmo

    # I can probably configure all that in the wrapper
    # I have to understand what in the wrapper I will still use in the new environment
    # but the logic of getting 4 states in a row will need to go to the wrapper
    # only thing wrapper is currently doing is adding the float
    # the black and white transformation is also not happening
    # hence I will still get and extra dimension 3 channels, instead of 4, 
    # and since 4 is not fixed anywhere it should work fine 
    env = wrap_env_marlo(env)

    if RANDOM_SEED:
        env.seed(RANDOM_SEED)

    print("Observation Space: ", env.observation_space)
    print("Action Space: ", env.action_space)

    return env
Exemple #10
0
def get_join_tokens():
    if marlo.is_grading():
        """
            In the crowdAI Evaluation environment obtain the join_tokens 
            from the evaluator
        """
        join_tokens = marlo.evaluator_join_token()

    else:
        """
            When debugging locally,
            Please ensure that you have a Minecraft client running on port 10000
            by doing : 
            $MALMO_MINECRAFT_ROOT/launchClient.sh -port 10000
        """
        client_pool = [('127.0.0.1', 10000)]
        join_tokens = marlo.make('MarLo-FindTheGoal-v0',
                                 params={"client_pool": client_pool})
    return join_tokens
Exemple #11
0
def env_creator(env_config):
    time.sleep(0.5)
    tokens = marlo.make('MarLo-SimpleRoom-v0',
                        params=dict(
                        role=getRole(),
                        client_pool= getClientPool(),
                        observeGrid= [-1, -1, -1, 1, 0, 1],  # (x1, y1, z1, x2, y2, z2, name)
                        gridSize= 18,
                        episode_timelimit= 40e3,
                        seed= 10,
                        comp_all_commands=['move', 'turn'],
                        add_noop_command=False,
                        tick_length=50,
                        forceWorldReset=False,
                        # "recordDestination": "data.tgz",
                        # "recordMP4": [20, 400000],
                        ))
    join_token = tokens[0]                   
    return marlo.init(join_token)
Exemple #12
0
def make_env(env_name, env_seed=0, demo=False):
    join_tokens = marlo.make(env_name,
                             params=dict(
                                 comp_all_commands=["move", "turn"],
                                 allowContinuousMovement=True,
                                 videoResolution=[336, 336],
                                 kill_clients_retry=10,
                                 step_sleep=0.01,
                                 kill_clients_after_num_rounds=100,
                                 prioritise_offscreen_rendering=not demo,
                             ))
    env = marlo.init(join_tokens[0])
    env = Monitor(env)

    obs = env.reset()
    action = env.action_space.sample()
    obs, r, done, info = env.step(action)
    env.seed(int(env_seed))
    return env
Exemple #13
0
def setupEnv(mission='MarLo-FindTheGoal-v0', videoResolution = [800, 600], port=10000):
    # Sets up marlo environment
    client_pool = [('127.0.0.1', port)]
    # Step sleep at to 0.2 to handle lag between marlo and Malmo
    join_tokens = marlo.make(mission, params={
        "client_pool": client_pool,
        'suppress_info': False,
        'videoResolution': videoResolution,
        'tick_length': 50,
        'step_sleep': 0.2})
    # As this is a single agent scenario,
    # there will just be a single token
    assert len(join_tokens) == 1
    join_token = join_tokens[0]
    env = marlo.init(join_token)
    # Change the spec of the mission by loading xml from file
    # This is necessary to use our custom XML files
    missionXML= loadMissionFile(mission+'.xml')
    env.mission_spec = MalmoPython.MissionSpec(missionXML, True)
    return env
Exemple #14
0
def env_creator(env_config):
    print(
        '---------------------------------------------\n\n\nENV_CREATE FUNCTION CALLED\n\n\n---------------------------------------------'
    )
    join_tokens = marlo.make(
        'MarLo-SimpleRoom-v0',
        params=dict(
            client_pool=None,
            observeGrid=[-1, -1, -1, 1, 0,
                         1],  # (x1, y1, z1, x2, y2, z2, name)
            gridSize=18,
            episode_timelimit=40e3,
            seed=10,
            comp_all_commands=['move', 'turn'],
            add_noop_command=False,
            tick_length=10,
            forceWorldReset=False,
        ))
    join_token = join_tokens[0]
    return marlo.init(join_token)
Exemple #15
0
import marlo
import time
import json
import base64

client_pool = [('127.0.0.1', 10000), ('127.0.0.1', 10001)]
join_tokens = marlo.make('MarLo-MazeRunner-v0',
                         params={
                             "agent_names": ["MarLo-Agent0"],
                             "videoResolution": [800, 600],
                             "forceWorldReset": True,
                             "client_pool": client_pool,
                             "allowContinuousMovement": ["move", "turn"],
                             "recordMP4": "video"
                         })


@marlo.threaded
def run_agent(join_token):
    env = marlo.init(join_token)
    frame = env.reset()
    done = False
    while not done:
        _action = env.action_space.sample()
        obs, reward, done, info = env.step(_action)
        time.sleep(0.5)
        print("reward:", reward)
        print("done:", done)
        print("info", info)

Exemple #16
0
if not os.path.exists(out_dir):
    os.makedirs(out_dir)
out_dir_logs = out_dir + '/logging'
if not os.path.exists(out_dir_logs):
    os.makedirs(out_dir_logs)
if save_dir and not os.path.exists(save_dir):
    os.makedirs(save_dir)

experiments.set_log_base_dir(out_dir)

# Ensure that you have a minecraft-client running with : marlo-server --port 10000
# "MarLo-FindTheGoal-v0"
# 'MarLo-CatchTheMob-v0'
join_tokens = marlo.make("MarLo-FindTheGoal-v0",
                         params=dict(allowContinuousMovement=["move", "turn"],
                                     videoResolution=[84, 84],
                                     kill_clients_after_num_rounds=500))
env = marlo.init(join_tokens[0])

obs = env.reset()
env.render(mode="rgb_array")
print('initial observation:', obs)

action = env.action_space.sample()
obs, r, done, info = env.step(action)
print('next observation:', obs)
print('reward:', r)
print('done:', done)
print('info:', info)

print('actions:', str(env.action_space))
Exemple #17
0
parser.add_argument('--turn_based', action='store_true')
args = parser.parse_args()

turn_based = args.turn_based
number_of_rollouts = args.rollouts

# Register the multi-agent environment.
env_name = 'malmo-multi-agent-v0'

register(
    id=env_name,
    entry_point='marlo.envs:MinecraftEnv',
    # Make sure mission xml is in the marlo/assets directory.
    kwargs={'mission_file': args.mission_file})

env = marlo.make(env_name)

resolution = [84, 84]  # [800, 600]
config = {
    'allowDiscreteMovement': ["move", "turn"],
    'videoResolution': resolution,
    "turn_based": turn_based
}

join_agents = start_agents(env,
                           env_name,
                           None,
                           config,
                           number_of_rollouts + 1,
                           daemon=True)
Exemple #18
0
import marlo
import time
import json
import base64

client_pool = [('127.0.0.1', 10000)]
join_tokens = marlo.make('MarLo-MazeRunner-v0',
                         params={
                             "videoResolution": [800, 600],
                             "client_pool": client_pool,
                             "agent_names": ["MarLo-Agent0"],
                             "allowContinuousMovement": ["move", "turn"],
                         })


def run_agent(join_token):
    env = marlo.init(join_token)
    frame = env.reset()
    done = False
    count = 0
    while not done:
        _action = env.action_space.sample()
        # obs, reward, done, info = env.step(_action)
        time.sleep(0.5)
        # print("reward:", reward)
        # print("done:", done)
        # print("info", info)
    env.close()


for _join_token in join_tokens:
Exemple #19
0
import pickle
from enum import Enum
import matplotlib.pyplot as plt
from lxml import etree
import logging

frame_size = [60, 60]
num_input = frame_size[0] * frame_size[1]

client_pool = [('127.0.0.1', 10000), ('127.0.0.1', 10001)]
join_tokens = marlo.make("MarLo-MobchaseTrain1-v0",
                         params=dict(
                             client_pool=client_pool,
                             agent_names=["MarLo-Agent-0", "MarLo-Agent-1"],
                             videoResolution=frame_size,
                             kill_clients_after_num_rounds=500,
                             forceWorldReset=False,
                             max_retries=500,
                             retry_sleep=0.1,
                             step_sleep=0.1,
                             prioritise_offscreen_rendering=False,
                             suppress_info=False))
assert len(join_tokens) == 2


class Convolutional():
    def __init__(self, num_classes, learningRate):
        self.x = tf.placeholder("float", [None, num_input])

        #Reshape the flatten data
        self.input_layer = tf.reshape(self.x,
                                      [-1, frame_size[1], frame_size[0], 1])
Exemple #20
0
def train(arglist):

    ############################################

    @marlo.threaded
    def funcion(env, action, agent_num):

        contador = 0
        while True:  # Ejecutamos la accion evitando errores
            _, r, done, info, new_obs = env.step(np.argmax(action) + 1)
            new_obs = new_obs['observation']
            if new_obs == None:
                new_obs = last_obs[agent_num]
            else:
                new_obs = [
                    new_obs.get('XPos'),
                    new_obs.get('ZPos'),
                    new_obs.get('Yaw')
                ]
            contador += 1
            if r != 0:
                break
            elif info != None:
                if "caught_the_Chicken" in info:
                    r += 1
                    print("SE HA HARCODEADO LA PUNTUACION ", done, " ", info)
                    break

                if "Agent0_defaulted" in info:
                    r = -0.02
                    break

                if "Agent1_defaulted" in info:
                    r = -0.02
                    break

            elif contador >= 100:
                print("SE HA TARDADO MUCHO EN REALIZAR LA ACCION")
                break
        return new_obs, r, done, info


#######################################################

    with U.single_threaded_session():

        # Create environment

        client_pool = [('127.0.0.1', 10000), ('127.0.0.1', 10001)]
        join_tokens = marlo.make(
            "MarLo-MobchaseTrain1-v0",
            params=dict(client_pool=client_pool,
                        agent_names=["MarLo-Agent-0", "MarLo-Agent-1"],
                        videoResolution=[64, 64],
                        kill_clients_after_num_rounds=500,
                        forceWorldReset=False,
                        max_retries=500,
                        retry_sleep=0.1,
                        step_sleep=0.1,
                        prioritise_offscreen_rendering=False,
                        suppress_info=False))
        assert len(join_tokens) == 2

        # Create agent trainers
        #obs_shape_n = [(64,64,3,),(64,64,3,)]
        observation_space = [
            gym.spaces.Box(low=-np.inf,
                           high=+np.inf,
                           shape=(6, ),
                           dtype=np.float32),
            gym.spaces.Box(low=-np.inf,
                           high=+np.inf,
                           shape=(6, ),
                           dtype=np.float32)
        ]
        obs_shape_n = [observation_space[i].shape for i in range(2)]
        action_space = [gym.spaces.Discrete(4), gym.spaces.Discrete(4)]
        num_adversaries = 0
        trainers = get_trainers(num_adversaries, obs_shape_n, action_space,
                                arglist)

        # Initialize
        U.initialize()

        epis_trans = 0
        epsilon = 0.0

        # Load previous results, if necessary
        if arglist.load_dir == "":
            arglist.load_dir = arglist.save_dir
        if arglist.restore:
            print('Loading previous state...')
            resbuf = pickle.load(open("./saves/losbuffers.p", "rb"))
            epis_trans = resbuf[2]
            epsilon = resbuf[3]
            U.load_state(arglist.load_dir + str(epis_trans))
            trainers[0].replay_buffer = resbuf[0]
            trainers[1].replay_buffer = resbuf[1]

        episode_rewards = []
        agent_rewards = [
            [] for _ in range(2)
        ]  # lista de sumas de las recompensas de cada episodio
        final_ep_rewards = []  # sum of rewards for training curve
        final_ep_ag_rewards = []  # agent rewards for training curve
        saver = tf.train.Saver()
        t_start = time.time()

        #inicial0 = [1.5, 2.5, 270, 5.5, 6.5, 180]
        #inicial1 = [5.5, 6.5, 180, 1.5, 2.5, 270]
        inicial0 = [1.5, 2.5, 270, 3.5, 4.5, 180]
        inicial1 = [3.5, 4.5, 180, 1.5, 2.5, 270]

        while True:
            #NEW

            last_obs = []

            agent_rewards[0].append(0)
            agent_rewards[1].append(0)

            env0 = marlo.init(join_tokens[0])
            env1 = marlo.init(join_tokens[1])

            # Run agent-0
            agent_thread_0, res0 = reiniciar(env0)
            # Run agent-1
            agent_thread_1, res1 = reiniciar(env1)

            obs0 = res0.get()
            obs1 = res1.get()

            obs0 = inicial0
            obs1 = inicial1

            done0 = False
            done1 = False

            num_eps = 0

            #Ejecutar 10 episodios
            while True:

                if (random() > epsilon):
                    action0 = trainers[0].action(np.array(
                        obs0))  # se obtine la accion que ejecuta la politica
                else:
                    action0 = np.random.dirichlet(np.ones(4), size=1)[0]

                if (random() > epsilon):
                    action1 = trainers[1].action(np.array(
                        obs0))  # se obtine la accion que ejecuta la politica
                else:
                    action1 = np.random.dirichlet(np.ones(4), size=1)[0]
                #print("Estan dentro")
                # Run agent-0
                agent_thread_0, resul0 = funcion(env0, action0, 0)
                # Run agent-1
                agent_thread_1, resul1 = funcion(env1, action1, 1)

                # Wait for both the threads to complete execution
                agent_thread_0.join()
                #print("Esta fuera 1")
                agent_thread_1.join()
                #print("Estan fuera")

                nob0, r0, done0, i0 = resul0.get()
                nob1, r1, done1, i1 = resul1.get()

                last_obs = [copy.deepcopy(nob0), copy.deepcopy(nob1)]

                # Las nuevas observciones
                varhelp = copy.deepcopy(nob0)
                nob0.extend(nob1)
                nob1.extend(varhelp)

                #print("ESTAS SON LAS OBSERVACIONES")
                #print(nob0)
                #print(nob1)

                trainers[0].experience(np.array(obs0), action0, r0,
                                       np.array(nob0), done0, False)
                trainers[1].experience(np.array(obs1), action1, r1,
                                       np.array(nob1), done1, False)

                agent_rewards[0][-1] += r0
                agent_rewards[1][-1] += r1

                obs0 = nob0
                obs1 = nob1

                if done0 or done1:
                    print("EPISODIO NUMERO:", num_eps)
                    # Run agent-0
                    agent_thread_0, res0 = reiniciar(env0)
                    # Run agent-1
                    agent_thread_1, res1 = reiniciar(env1)

                    obs0 = res0.get()
                    obs1 = res1.get()
                    obs0 = inicial0
                    obs1 = inicial1
                    done0 = False
                    done1 = False
                    num_eps += 1

                    loss = None
                    for agent in trainers:
                        agent.preupdate()
                    for agent in trainers:
                        loss = agent.update(trainers)
                        print("LA LOSS", loss)

                    if num_eps % epi_per_iter == 0:
                        break
                    agent_rewards[0].append(0)
                    agent_rewards[1].append(0)

            #Fin de ejecutar 10 episodios
            print("FIN DEL SAMPLE")

            # Se obtiene una lista de tuplas que contienen las rewards de los agentes emparejadas por episodios utilizadno los ultimos episodios generados en la iteracion
            # A estas tuplas se transforman a listas y se aplica sum()
            # El resultado de esto se coloca al final de episode_rewards
            #
            # En resumen: se suman las ultimas rewards de los agentes por episodios y se añaden a la lista
            episode_rewards.extend(
                list(
                    map(
                        sumtuple,
                        list(
                            zip(agent_rewards[0][epis_trans:],
                                agent_rewards[1][epis_trans:])))))

            epis_trans += 10
            if epsilon > 0.1:
                epsilon -= 0.002

            print("TOTAL DE EPISODIOS TRANSCURRIDOS: ", epis_trans,
                  " Epsilon: ", epsilon)

            # update all trainers, if not in display or benchmark mode

            # save model, display training output
            if (epis_trans % arglist.save_rate == 0):
                U.save_state(arglist.save_dir + str(epis_trans), saver=saver)
                losbuffers = [
                    trainers[0].replay_buffer, trainers[1].replay_buffer,
                    epis_trans, epsilon
                ]
                pickle.dump(
                    losbuffers,
                    open("./saves/losbuffers" + str(epis_trans) + ".p", "wb"))
                pickle.dump(losbuffers, open("./saves/losbuffers.p", "wb"))
            if (epis_trans % 1000 == 0):
                break
Exemple #21
0
#!/usr/bin/env python
# Please ensure that you have two Minecraft clients running on port 10000 and
# port 10001 by doing :
# $MALMO_MINECRAFT_ROOT/launchClient.sh -port 10000
# $MALMO_MINECRAFT_ROOT/launchClient.sh -port 10001

import marlo
client_pool = [('127.0.0.1', 10000), ('127.0.0.1', 10001)]
join_tokens = marlo.make('MarLo-MazeRunner-v0',
                         params={
                             "client_pool": client_pool,
                             "agent_names": ["MarLo-Agent-0", "MarLo-Agent-1"]
                         })
# As this is a two-agent scenario,
# there will just two join tokens
assert len(join_tokens) == 2


@marlo.threaded
def run_agent(join_token):
    env = marlo.init(join_token)
    observation = env.reset()
    done = False
    count = 0
    while not done:
        _action = env.action_space.sample()
        obs, reward, done, info = env.step(_action)
        print("reward:", reward)
        print("done:", done)
        print("info", info)
    env.close()
Exemple #22
0
# !/usr/bin/env python
# $MALMO_MINECRAFT_ROOT/launchClient.sh -port 10000

import marlo
import time
import random

client_pool = [('127.0.0.1', 10000)]
join_tokens = marlo.make('MarLo-MineField-v0',
                         params=dict(
                             client_pool= client_pool,
                             observeGrid= [-1, 0, -1, 1, 0, 1],  # (x1, y1, z1, x2, y2, z2, name)
                             gridSize= 9,
                             seed=1,  #seed=99 !!!
                             episode_timelimit=60e3,
                             comp_all_commands=['move', 'turn'],
                             add_noop_command=False,
                             videoResolution=None,
                        ))

# As this is a single agent scenario,
# there will just be a single tokenw 
assert len(join_tokens) == 1
join_token = join_tokens[0]

env = marlo.init(join_token)
observation = env.reset()

done = False
total_reward = 0
i = 0
Exemple #23
0
n_hidden_channels = args.n_hidden_channels
n_hidden_layers = args.n_hidden_layers

if DEBUG_ON:
    print("n_hidden_channels " + str(n_hidden_channels) + " n_hidden_layers " +
          str(n_hidden_layers))

# GAME SELECTION AND CONNECTION TO THE CLIENT

# Ensure that you have a minecraft-client running with : marlo-server --port 10000
client_pool = [('127.0.0.1', 10020)]
if DEBUG_ON:
    print("Game:", GAME)
join_tokens = marlo.make(GAME,
                         params=dict(videoResolution=[VIDEO_RES, VIDEO_RES],
                                     kill_clients_after_num_rounds=500))
env = marlo.init(join_tokens[0])

# ------------------------------------------

obs = env.reset()
env.render(mode="rgb_array")
if DEBUG_ON:
    print('initial observation:', obs)

action = env.action_space.sample()
obs, r, done, info = env.step(action)

if DEBUG_ON:
    print('next observation:', obs)
Exemple #24
0
#!/usr/bin/env python
# Please ensure that you have a Minecraft client running on port 10000
# by doing :
# $MALMO_MINECRAFT_ROOT/launchClient.sh -port 10000

import marlo

client_pool = [('127.0.0.1', 10000)]
join_tokens = marlo.make('MarLo-MazeRunner-v0',
                         params={"client_pool": client_pool})
# As this is a single agent scenario,
# there will just be a single token
assert len(join_tokens) == 1
join_token = join_tokens[0]

env = marlo.init(join_token)

observation = env.reset()

done = False
while not done:
    _action = env.action_space.sample()
    obs, reward, done, info = env.step(_action)
    print("reward:", reward)
    print("done:", done)
    print("info", info)
env.close()
Exemple #25
0
import marlo
# import os
#
#
client_pool = [('127.0.0.1', 10000)]
#
# join_tokens = marlo.make('MarLo-FindTheGoal-v0',params={"client_pool": client_pool})
join_tokens = marlo.make('MarLo-CatchTheMob-v0',
                         params={"client_pool": client_pool})

assert len(join_tokens) == 1
join_token = join_tokens[0]

env = marlo.init(join_token)

observation = env.reset()

done = False
while not done:
    _action = env.action_space.sample()
    obs, reward, done, info = env.step(_action)
    print("reward:", reward)
    print("done:", done)
    print("info", info)
env.close()
Exemple #26
0
#!/usr/bin/env python
# Please ensure that you have a Minecraft client running on port 10000
# by doing :
# $MALMO_MINECRAFT_ROOT/launchClient.sh -port 10000

import marlo

client_pool = [('127.0.0.1', 10000)]
join_tokens = marlo.make('MarLo-FindTheGoal-v0',
                         params={"client_pool": client_pool})
# As this is a single agent scenario,
# there will just be a single token
assert len(join_tokens) == 1
join_token = join_tokens[0]

env = marlo.init(join_token)

observation = env.reset()

done = False
while not done:
    _action = env.action_space.sample()
    obs, reward, done, info = env.step(_action)
    print("reward:", reward)
    print("done:", done)
    print("info", info)
env.close()
Exemple #27
0
import random

import ray
from ray import tune
from ray.tune.registry import register_env
from ray.rllib.agents.pg import PGTrainer

#client_pool = [('127.0.0.1', 10000)]
join_tokens = marlo.make(
    'MarLo-SimpleRoom-v0',
    params=dict(
        client_pool=None,
        observeGrid=[-1, -1, -1, 1, 0, 1],  # (x1, y1, z1, x2, y2, z2, name)
        gridSize=18,
        episode_timelimit=40e3,
        seed=10,
        comp_all_commands=['move', 'turn'],
        add_noop_command=False,
        tick_length=10,  #in millisec
        forceWorldReset=False,
        #  "recordDestination": "data.tgz",
        #  "recordMP4": [20, 400000],
    ))

assert len(join_tokens) == 1
join_token = join_tokens[0]


def env_creator(env_config):
    return marlo.init(join_token)
Exemple #28
0
target_update_interval = 10 ** 2
update_interval = 1
target_update_method = 'hard'
soft_update_tau = 1e-2
rbuf_capacity = 5 * 10 ** 5
steps = 10 ** 5
eval_n_runs = 100
eval_interval = 10 ** 4

def phi(obs):
    return obs.astype(np.float32)

# Ensure that you have a minecraft-client running with : marlo-server --port 10000
join_tokens = marlo.make('MinecraftCliffWalking1-v0', 
                params=dict(
                    allowContinuousMovement=["move", "turn"],
                    videoResolution=[800, 600]
                ))
env = marlo.init(join_tokens[0])


obs = env.reset()
env.render(mode="rgb_array")
print('initial observation:', obs)

action = env.action_space.sample()
obs, r, done, info = env.step(action)
print('next observation:', obs)
print('reward:', r)
print('done:', done)
print('info:', info)
Exemple #29
0
# import malmo.minecraftbootstrap; malmo.minecraftbootstrap.set_malmo_xsd_path()
#!/usr/bin/env python
# Please ensure that you have a Minecraft client running on port 10000
# by doing :
# $MALMO_MINECRAFT_ROOT/launchClient.sh -port 10000

import marlo
client_pool = [('127.0.0.1', 10000)]
join_tokens = marlo.make('MarLo-BuildbattleTrainX-v0',
                          params={
                            "client_pool": client_pool
                          })
# As this is a single agent scenario,
# there will just be a single token
assert len(join_tokens) == 1
join_token = join_tokens[0]

env = marlo.init(join_token)

observation = env.reset()

done = False
while not done:
    _action = env.action_space.sample()
    obs, reward, done, info = env.step(_action)
    print("reward:", reward)
    print("done:", done)
    print("info", info)
env.close()
Exemple #30
0
from collections import deque
import marlo
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Activation
from keras.optimizers import RMSprop, Adam
import numpy as np
from enum import Enum
import QLearn
from QAgent import QAgent
print("testo")
episodes = 1000
client_pool = [('127.0.0.1', 10000)]

join_tokens = marlo.make('MarLo-CliffWalking-v0',
                         params={
                             "client_pool": client_pool
                         })
# As this is a single agent scenario,
# there will just be a single token
assert len(join_tokens) == 1
join_token = join_tokens[0]
env = marlo.init(join_token)
if __name__ == "__main__":
    env = marlo.init(join_token)
    trials = 1000
    trial_len = 200

    updateTargetNework = 100
    DQN_agent = QLearn.DQN(env=env)
    steps = []
    print('nombre d\'essais: ', trials)
import marlo
from Agent import Agent
from GameState import GameState
import numpy as np
from bs4 import BeautifulSoup
mission_name = 'MarLo-FindTheGoal-v0'
from marlo.base_env_builder import dotdict
from MonteCarlo import MonteCarlo
from Q import Q

# config
client_pool = [('127.0.0.1', 10000)]
join_tokens = marlo.make(mission_name,
                         params={
                             "client_pool": client_pool,
                             "suppress_info": False,
                             "skip_steps": 2,
                             "step_sleep": 0.5
                         })

# As this is a single agent scenario,
# there will just be a single token
assert len(join_tokens) == 1
join_token = join_tokens[0]

# define the environment variable and reset observation
env = marlo.init(join_token)

env.default_base_params.allowDiscreteMovement = True
env.default_base_params.allowContinuousMovement = False
env.default_base_params.allowAbsoluteMovement = False