def configure_gym_marlo(self, env_name): """ Configure environment for Marlo platform Require previously launched minecraft environment - to launch: $MALMO_MINECRAFT_ROOT/launchClient.sh -p 10000 Available params defined in: - https://github.com/crowdAI/marLo/blob/8652f8daef2caf9202881d002a2d3c28c882d941/marlo/base_env_builder.py """ import marlo marlo.logger.setLevel('ERROR') # client_pool = [('127.0.0.1', 10000), ('127.0.0.1', 10001)] client_pool = [('127.0.0.1', 10000), ('127.0.0.1', 10001)] join_tokens = marlo.make( env_name, params={ "client_pool": client_pool, "videoResolution": [84, 84], "tick_length": 1, # "prioritise_offscreen_rendering": False, # "comp_all_commands": ['move', "turn"], }) join_token = join_tokens[0] env = marlo.init(join_token) return env
def __init__(self): client_pool = [('127.0.0.1', 10000)] join_tokens = marlo.make('MarLo-CliffWalking-v0', params={ "client_pool": client_pool, "comp_all_commands": ["movenorth", "movesouth", "moveeast", "movewest"], "prioritise_offscreen_rendering":False }) join_token = join_tokens[0] self.env = marlo.init(join_token) self.env.params.suppress_info = False self.env.mission_spec.setViewpoint(1) for z in range(0,13,1): x = 2 self.env.mission_spec.drawBlock( x,45,z,"lava") # command_parser = commands.CommandParser() # self.commands = command_parser.get_commands(self.env.params.mission_xml.encode('ascii'), self.env.params.role) # self.commands_size = len(self.commands) for _space, _commands in zip(self.env.action_spaces, self.env.action_names): self.commands = _commands self.commands_size = len(_commands) - 1 print(self.commands) self.previousState = None self.alpha = 0.1 self.gamma = 1.0 self.epsilon = 0.1 self.q_table = {} self.test = 0
def make_env(self, env_id): params = merge(params_default, { 'client_pool': self.client_pool }) print("{} Minecraft clients available".format(len(self.client_pool))) join_token = marlo.make(env_id, params) return marlo.init(join_token[0])
def make_env(env_seed=0): join_tokens = marlo.make("MarLo-FindTheGoal-v0", params=dict( allowContinuousMovement=["move", "turn"], videoResolution=[84, 84], kill_clients_after_num_rounds=500)) env = marlo.init(join_tokens[0]) obs = env.reset() # env.render(mode="rgb_array") action = env.action_space.sample() obs, r, done, info = env.step(action) env.seed(int(env_seed)) return env
def make_env(env_seed=0): join_tokens = marlo.make("MarLo-FindTheGoal-v0", params=dict( comp_all_commands=["move", "turn"], allowContinuousMovement=True, videoResolution=[336, 336], )) env = marlo.init(join_tokens[0]) obs = env.reset() action = env.action_space.sample() obs, r, done, info = env.step(action) env.seed(int(env_seed)) return env
def create_marlo(env_config: dict): config = deepcopy(MALMO_DEFAULTS) config.update(env_config) client_pool = [('127.0.0.1', 10000)] join_tokens = marlo.make('MarLo-DefaultFlatWorld-v0', params=dict( allowContinuousMovement=["move", "turn"], client_pool = client_pool, videoResolution=[84, 84] )) env = marlo.init(join_tokens[0]) return env
def get_join_tokens(): if marlo.is_grading(): """ In the crowdAI Evaluation environment obtain the join_tokens from the evaluator the `params` parameter passed to the `evaluator_join_token` only allows the following keys : "seed", "tick_length", "max_retries", "retry_sleep", "step_sleep", "skip_steps", "videoResolution", "continuous_to_discrete", "allowContinuousMovement", "allowDiscreteMovement", "allowAbsoluteMovement", "add_noop_command", "comp_all_commands" # TODO: Add this to the official documentation ? # Help Wanted :D Pull Requests welcome :D """ join_tokens = marlo.evaluator_join_token(params={}) else: """ When debugging locally, Please ensure that you have a Minecraft client running on port 10000 and 10001 by doing : $MALMO_MINECRAFT_ROOT/launchClient.sh -port 10000 $MALMO_MINECRAFT_ROOT/launchClient.sh -port 10001 """ print("Generating join tokens locally...") client_pool = [('127.0.0.1', 10000), ('127.0.0.1', 10001)] join_tokens = marlo.make('MarLo-BuildbattleTrain1-v0', params={ "client_pool": client_pool, "agent_names" : [ "MarLo-Agent-0", "MarLo-Agent-1" ] }) return join_tokens
def get_join_tokens(): if marlo.is_grading(): """ In the crowdAI Evaluation environment obtain the join_tokens from the evaluator the `params` parameter passed to the `evaluator_join_token` only allows the following keys : "seed", "tick_length", "max_retries", "retry_sleep", "step_sleep", "skip_steps", "videoResolution", "continuous_to_discrete", "allowContinuousMovement", "allowDiscreteMovement", "allowAbsoluteMovement", "add_noop_command", "comp_all_commands" # TODO: Add this to the official documentation ? # Help Wanted :D Pull Requests welcome :D """ join_tokens = marlo.evaluator_join_token(params={}) else: """ When debugging locally, Please ensure that you have a Minecraft client running on port 10000 and 10001 by doing : $MALMO_MINECRAFT_ROOT/launchClient.sh -port 10000 $MALMO_MINECRAFT_ROOT/launchClient.sh -port 10001 """ print("Generating join tokens locally...") client_pool = [('127.0.0.1', 10000), ('127.0.0.1', 10001)] join_tokens = marlo.make('MarLo-BuildbattleTrain1-v0', params={ "client_pool": client_pool, "agent_names": ["MarLo-Agent-0", "MarLo-Agent-1"] }) return join_tokens
def init_environment(env_name): client_pool = [('127.0.0.1', 10000), ('127.0.0.1', 10001)] join_tokens = marlo.make(env_name, params={ "client_pool": client_pool }) # As this is a single agent scenario,there will just be a single token # assert len(join_tokens) == 1 join_token = join_tokens[0] # initialize environment env = marlo.init(join_token) # env.configure(videoResolution=[84,84]) # I need to figure out where in marlo I can fix the videoresolutions. probably in the params of marlo.make # env.configure(stack_frames=4) # there is the frame stacking logic which is in the environment # I have to either transpose it here # or let go of the DQN # I can just transpose # issue is I can't observe 4 times # it is turn based so every observation I do is one step I take # create env and add specific conifigurations to Malmo # I can probably configure all that in the wrapper # I have to understand what in the wrapper I will still use in the new environment # but the logic of getting 4 states in a row will need to go to the wrapper # only thing wrapper is currently doing is adding the float # the black and white transformation is also not happening # hence I will still get and extra dimension 3 channels, instead of 4, # and since 4 is not fixed anywhere it should work fine env = wrap_env_marlo(env) if RANDOM_SEED: env.seed(RANDOM_SEED) print("Observation Space: ", env.observation_space) print("Action Space: ", env.action_space) return env
def get_join_tokens(): if marlo.is_grading(): """ In the crowdAI Evaluation environment obtain the join_tokens from the evaluator """ join_tokens = marlo.evaluator_join_token() else: """ When debugging locally, Please ensure that you have a Minecraft client running on port 10000 by doing : $MALMO_MINECRAFT_ROOT/launchClient.sh -port 10000 """ client_pool = [('127.0.0.1', 10000)] join_tokens = marlo.make('MarLo-FindTheGoal-v0', params={"client_pool": client_pool}) return join_tokens
def env_creator(env_config): time.sleep(0.5) tokens = marlo.make('MarLo-SimpleRoom-v0', params=dict( role=getRole(), client_pool= getClientPool(), observeGrid= [-1, -1, -1, 1, 0, 1], # (x1, y1, z1, x2, y2, z2, name) gridSize= 18, episode_timelimit= 40e3, seed= 10, comp_all_commands=['move', 'turn'], add_noop_command=False, tick_length=50, forceWorldReset=False, # "recordDestination": "data.tgz", # "recordMP4": [20, 400000], )) join_token = tokens[0] return marlo.init(join_token)
def make_env(env_name, env_seed=0, demo=False): join_tokens = marlo.make(env_name, params=dict( comp_all_commands=["move", "turn"], allowContinuousMovement=True, videoResolution=[336, 336], kill_clients_retry=10, step_sleep=0.01, kill_clients_after_num_rounds=100, prioritise_offscreen_rendering=not demo, )) env = marlo.init(join_tokens[0]) env = Monitor(env) obs = env.reset() action = env.action_space.sample() obs, r, done, info = env.step(action) env.seed(int(env_seed)) return env
def setupEnv(mission='MarLo-FindTheGoal-v0', videoResolution = [800, 600], port=10000): # Sets up marlo environment client_pool = [('127.0.0.1', port)] # Step sleep at to 0.2 to handle lag between marlo and Malmo join_tokens = marlo.make(mission, params={ "client_pool": client_pool, 'suppress_info': False, 'videoResolution': videoResolution, 'tick_length': 50, 'step_sleep': 0.2}) # As this is a single agent scenario, # there will just be a single token assert len(join_tokens) == 1 join_token = join_tokens[0] env = marlo.init(join_token) # Change the spec of the mission by loading xml from file # This is necessary to use our custom XML files missionXML= loadMissionFile(mission+'.xml') env.mission_spec = MalmoPython.MissionSpec(missionXML, True) return env
def env_creator(env_config): print( '---------------------------------------------\n\n\nENV_CREATE FUNCTION CALLED\n\n\n---------------------------------------------' ) join_tokens = marlo.make( 'MarLo-SimpleRoom-v0', params=dict( client_pool=None, observeGrid=[-1, -1, -1, 1, 0, 1], # (x1, y1, z1, x2, y2, z2, name) gridSize=18, episode_timelimit=40e3, seed=10, comp_all_commands=['move', 'turn'], add_noop_command=False, tick_length=10, forceWorldReset=False, )) join_token = join_tokens[0] return marlo.init(join_token)
import marlo import time import json import base64 client_pool = [('127.0.0.1', 10000), ('127.0.0.1', 10001)] join_tokens = marlo.make('MarLo-MazeRunner-v0', params={ "agent_names": ["MarLo-Agent0"], "videoResolution": [800, 600], "forceWorldReset": True, "client_pool": client_pool, "allowContinuousMovement": ["move", "turn"], "recordMP4": "video" }) @marlo.threaded def run_agent(join_token): env = marlo.init(join_token) frame = env.reset() done = False while not done: _action = env.action_space.sample() obs, reward, done, info = env.step(_action) time.sleep(0.5) print("reward:", reward) print("done:", done) print("info", info)
if not os.path.exists(out_dir): os.makedirs(out_dir) out_dir_logs = out_dir + '/logging' if not os.path.exists(out_dir_logs): os.makedirs(out_dir_logs) if save_dir and not os.path.exists(save_dir): os.makedirs(save_dir) experiments.set_log_base_dir(out_dir) # Ensure that you have a minecraft-client running with : marlo-server --port 10000 # "MarLo-FindTheGoal-v0" # 'MarLo-CatchTheMob-v0' join_tokens = marlo.make("MarLo-FindTheGoal-v0", params=dict(allowContinuousMovement=["move", "turn"], videoResolution=[84, 84], kill_clients_after_num_rounds=500)) env = marlo.init(join_tokens[0]) obs = env.reset() env.render(mode="rgb_array") print('initial observation:', obs) action = env.action_space.sample() obs, r, done, info = env.step(action) print('next observation:', obs) print('reward:', r) print('done:', done) print('info:', info) print('actions:', str(env.action_space))
parser.add_argument('--turn_based', action='store_true') args = parser.parse_args() turn_based = args.turn_based number_of_rollouts = args.rollouts # Register the multi-agent environment. env_name = 'malmo-multi-agent-v0' register( id=env_name, entry_point='marlo.envs:MinecraftEnv', # Make sure mission xml is in the marlo/assets directory. kwargs={'mission_file': args.mission_file}) env = marlo.make(env_name) resolution = [84, 84] # [800, 600] config = { 'allowDiscreteMovement': ["move", "turn"], 'videoResolution': resolution, "turn_based": turn_based } join_agents = start_agents(env, env_name, None, config, number_of_rollouts + 1, daemon=True)
import marlo import time import json import base64 client_pool = [('127.0.0.1', 10000)] join_tokens = marlo.make('MarLo-MazeRunner-v0', params={ "videoResolution": [800, 600], "client_pool": client_pool, "agent_names": ["MarLo-Agent0"], "allowContinuousMovement": ["move", "turn"], }) def run_agent(join_token): env = marlo.init(join_token) frame = env.reset() done = False count = 0 while not done: _action = env.action_space.sample() # obs, reward, done, info = env.step(_action) time.sleep(0.5) # print("reward:", reward) # print("done:", done) # print("info", info) env.close() for _join_token in join_tokens:
import pickle from enum import Enum import matplotlib.pyplot as plt from lxml import etree import logging frame_size = [60, 60] num_input = frame_size[0] * frame_size[1] client_pool = [('127.0.0.1', 10000), ('127.0.0.1', 10001)] join_tokens = marlo.make("MarLo-MobchaseTrain1-v0", params=dict( client_pool=client_pool, agent_names=["MarLo-Agent-0", "MarLo-Agent-1"], videoResolution=frame_size, kill_clients_after_num_rounds=500, forceWorldReset=False, max_retries=500, retry_sleep=0.1, step_sleep=0.1, prioritise_offscreen_rendering=False, suppress_info=False)) assert len(join_tokens) == 2 class Convolutional(): def __init__(self, num_classes, learningRate): self.x = tf.placeholder("float", [None, num_input]) #Reshape the flatten data self.input_layer = tf.reshape(self.x, [-1, frame_size[1], frame_size[0], 1])
def train(arglist): ############################################ @marlo.threaded def funcion(env, action, agent_num): contador = 0 while True: # Ejecutamos la accion evitando errores _, r, done, info, new_obs = env.step(np.argmax(action) + 1) new_obs = new_obs['observation'] if new_obs == None: new_obs = last_obs[agent_num] else: new_obs = [ new_obs.get('XPos'), new_obs.get('ZPos'), new_obs.get('Yaw') ] contador += 1 if r != 0: break elif info != None: if "caught_the_Chicken" in info: r += 1 print("SE HA HARCODEADO LA PUNTUACION ", done, " ", info) break if "Agent0_defaulted" in info: r = -0.02 break if "Agent1_defaulted" in info: r = -0.02 break elif contador >= 100: print("SE HA TARDADO MUCHO EN REALIZAR LA ACCION") break return new_obs, r, done, info ####################################################### with U.single_threaded_session(): # Create environment client_pool = [('127.0.0.1', 10000), ('127.0.0.1', 10001)] join_tokens = marlo.make( "MarLo-MobchaseTrain1-v0", params=dict(client_pool=client_pool, agent_names=["MarLo-Agent-0", "MarLo-Agent-1"], videoResolution=[64, 64], kill_clients_after_num_rounds=500, forceWorldReset=False, max_retries=500, retry_sleep=0.1, step_sleep=0.1, prioritise_offscreen_rendering=False, suppress_info=False)) assert len(join_tokens) == 2 # Create agent trainers #obs_shape_n = [(64,64,3,),(64,64,3,)] observation_space = [ gym.spaces.Box(low=-np.inf, high=+np.inf, shape=(6, ), dtype=np.float32), gym.spaces.Box(low=-np.inf, high=+np.inf, shape=(6, ), dtype=np.float32) ] obs_shape_n = [observation_space[i].shape for i in range(2)] action_space = [gym.spaces.Discrete(4), gym.spaces.Discrete(4)] num_adversaries = 0 trainers = get_trainers(num_adversaries, obs_shape_n, action_space, arglist) # Initialize U.initialize() epis_trans = 0 epsilon = 0.0 # Load previous results, if necessary if arglist.load_dir == "": arglist.load_dir = arglist.save_dir if arglist.restore: print('Loading previous state...') resbuf = pickle.load(open("./saves/losbuffers.p", "rb")) epis_trans = resbuf[2] epsilon = resbuf[3] U.load_state(arglist.load_dir + str(epis_trans)) trainers[0].replay_buffer = resbuf[0] trainers[1].replay_buffer = resbuf[1] episode_rewards = [] agent_rewards = [ [] for _ in range(2) ] # lista de sumas de las recompensas de cada episodio final_ep_rewards = [] # sum of rewards for training curve final_ep_ag_rewards = [] # agent rewards for training curve saver = tf.train.Saver() t_start = time.time() #inicial0 = [1.5, 2.5, 270, 5.5, 6.5, 180] #inicial1 = [5.5, 6.5, 180, 1.5, 2.5, 270] inicial0 = [1.5, 2.5, 270, 3.5, 4.5, 180] inicial1 = [3.5, 4.5, 180, 1.5, 2.5, 270] while True: #NEW last_obs = [] agent_rewards[0].append(0) agent_rewards[1].append(0) env0 = marlo.init(join_tokens[0]) env1 = marlo.init(join_tokens[1]) # Run agent-0 agent_thread_0, res0 = reiniciar(env0) # Run agent-1 agent_thread_1, res1 = reiniciar(env1) obs0 = res0.get() obs1 = res1.get() obs0 = inicial0 obs1 = inicial1 done0 = False done1 = False num_eps = 0 #Ejecutar 10 episodios while True: if (random() > epsilon): action0 = trainers[0].action(np.array( obs0)) # se obtine la accion que ejecuta la politica else: action0 = np.random.dirichlet(np.ones(4), size=1)[0] if (random() > epsilon): action1 = trainers[1].action(np.array( obs0)) # se obtine la accion que ejecuta la politica else: action1 = np.random.dirichlet(np.ones(4), size=1)[0] #print("Estan dentro") # Run agent-0 agent_thread_0, resul0 = funcion(env0, action0, 0) # Run agent-1 agent_thread_1, resul1 = funcion(env1, action1, 1) # Wait for both the threads to complete execution agent_thread_0.join() #print("Esta fuera 1") agent_thread_1.join() #print("Estan fuera") nob0, r0, done0, i0 = resul0.get() nob1, r1, done1, i1 = resul1.get() last_obs = [copy.deepcopy(nob0), copy.deepcopy(nob1)] # Las nuevas observciones varhelp = copy.deepcopy(nob0) nob0.extend(nob1) nob1.extend(varhelp) #print("ESTAS SON LAS OBSERVACIONES") #print(nob0) #print(nob1) trainers[0].experience(np.array(obs0), action0, r0, np.array(nob0), done0, False) trainers[1].experience(np.array(obs1), action1, r1, np.array(nob1), done1, False) agent_rewards[0][-1] += r0 agent_rewards[1][-1] += r1 obs0 = nob0 obs1 = nob1 if done0 or done1: print("EPISODIO NUMERO:", num_eps) # Run agent-0 agent_thread_0, res0 = reiniciar(env0) # Run agent-1 agent_thread_1, res1 = reiniciar(env1) obs0 = res0.get() obs1 = res1.get() obs0 = inicial0 obs1 = inicial1 done0 = False done1 = False num_eps += 1 loss = None for agent in trainers: agent.preupdate() for agent in trainers: loss = agent.update(trainers) print("LA LOSS", loss) if num_eps % epi_per_iter == 0: break agent_rewards[0].append(0) agent_rewards[1].append(0) #Fin de ejecutar 10 episodios print("FIN DEL SAMPLE") # Se obtiene una lista de tuplas que contienen las rewards de los agentes emparejadas por episodios utilizadno los ultimos episodios generados en la iteracion # A estas tuplas se transforman a listas y se aplica sum() # El resultado de esto se coloca al final de episode_rewards # # En resumen: se suman las ultimas rewards de los agentes por episodios y se añaden a la lista episode_rewards.extend( list( map( sumtuple, list( zip(agent_rewards[0][epis_trans:], agent_rewards[1][epis_trans:]))))) epis_trans += 10 if epsilon > 0.1: epsilon -= 0.002 print("TOTAL DE EPISODIOS TRANSCURRIDOS: ", epis_trans, " Epsilon: ", epsilon) # update all trainers, if not in display or benchmark mode # save model, display training output if (epis_trans % arglist.save_rate == 0): U.save_state(arglist.save_dir + str(epis_trans), saver=saver) losbuffers = [ trainers[0].replay_buffer, trainers[1].replay_buffer, epis_trans, epsilon ] pickle.dump( losbuffers, open("./saves/losbuffers" + str(epis_trans) + ".p", "wb")) pickle.dump(losbuffers, open("./saves/losbuffers.p", "wb")) if (epis_trans % 1000 == 0): break
#!/usr/bin/env python # Please ensure that you have two Minecraft clients running on port 10000 and # port 10001 by doing : # $MALMO_MINECRAFT_ROOT/launchClient.sh -port 10000 # $MALMO_MINECRAFT_ROOT/launchClient.sh -port 10001 import marlo client_pool = [('127.0.0.1', 10000), ('127.0.0.1', 10001)] join_tokens = marlo.make('MarLo-MazeRunner-v0', params={ "client_pool": client_pool, "agent_names": ["MarLo-Agent-0", "MarLo-Agent-1"] }) # As this is a two-agent scenario, # there will just two join tokens assert len(join_tokens) == 2 @marlo.threaded def run_agent(join_token): env = marlo.init(join_token) observation = env.reset() done = False count = 0 while not done: _action = env.action_space.sample() obs, reward, done, info = env.step(_action) print("reward:", reward) print("done:", done) print("info", info) env.close()
# !/usr/bin/env python # $MALMO_MINECRAFT_ROOT/launchClient.sh -port 10000 import marlo import time import random client_pool = [('127.0.0.1', 10000)] join_tokens = marlo.make('MarLo-MineField-v0', params=dict( client_pool= client_pool, observeGrid= [-1, 0, -1, 1, 0, 1], # (x1, y1, z1, x2, y2, z2, name) gridSize= 9, seed=1, #seed=99 !!! episode_timelimit=60e3, comp_all_commands=['move', 'turn'], add_noop_command=False, videoResolution=None, )) # As this is a single agent scenario, # there will just be a single tokenw assert len(join_tokens) == 1 join_token = join_tokens[0] env = marlo.init(join_token) observation = env.reset() done = False total_reward = 0 i = 0
n_hidden_channels = args.n_hidden_channels n_hidden_layers = args.n_hidden_layers if DEBUG_ON: print("n_hidden_channels " + str(n_hidden_channels) + " n_hidden_layers " + str(n_hidden_layers)) # GAME SELECTION AND CONNECTION TO THE CLIENT # Ensure that you have a minecraft-client running with : marlo-server --port 10000 client_pool = [('127.0.0.1', 10020)] if DEBUG_ON: print("Game:", GAME) join_tokens = marlo.make(GAME, params=dict(videoResolution=[VIDEO_RES, VIDEO_RES], kill_clients_after_num_rounds=500)) env = marlo.init(join_tokens[0]) # ------------------------------------------ obs = env.reset() env.render(mode="rgb_array") if DEBUG_ON: print('initial observation:', obs) action = env.action_space.sample() obs, r, done, info = env.step(action) if DEBUG_ON: print('next observation:', obs)
#!/usr/bin/env python # Please ensure that you have a Minecraft client running on port 10000 # by doing : # $MALMO_MINECRAFT_ROOT/launchClient.sh -port 10000 import marlo client_pool = [('127.0.0.1', 10000)] join_tokens = marlo.make('MarLo-MazeRunner-v0', params={"client_pool": client_pool}) # As this is a single agent scenario, # there will just be a single token assert len(join_tokens) == 1 join_token = join_tokens[0] env = marlo.init(join_token) observation = env.reset() done = False while not done: _action = env.action_space.sample() obs, reward, done, info = env.step(_action) print("reward:", reward) print("done:", done) print("info", info) env.close()
import marlo # import os # # client_pool = [('127.0.0.1', 10000)] # # join_tokens = marlo.make('MarLo-FindTheGoal-v0',params={"client_pool": client_pool}) join_tokens = marlo.make('MarLo-CatchTheMob-v0', params={"client_pool": client_pool}) assert len(join_tokens) == 1 join_token = join_tokens[0] env = marlo.init(join_token) observation = env.reset() done = False while not done: _action = env.action_space.sample() obs, reward, done, info = env.step(_action) print("reward:", reward) print("done:", done) print("info", info) env.close()
#!/usr/bin/env python # Please ensure that you have a Minecraft client running on port 10000 # by doing : # $MALMO_MINECRAFT_ROOT/launchClient.sh -port 10000 import marlo client_pool = [('127.0.0.1', 10000)] join_tokens = marlo.make('MarLo-FindTheGoal-v0', params={"client_pool": client_pool}) # As this is a single agent scenario, # there will just be a single token assert len(join_tokens) == 1 join_token = join_tokens[0] env = marlo.init(join_token) observation = env.reset() done = False while not done: _action = env.action_space.sample() obs, reward, done, info = env.step(_action) print("reward:", reward) print("done:", done) print("info", info) env.close()
import random import ray from ray import tune from ray.tune.registry import register_env from ray.rllib.agents.pg import PGTrainer #client_pool = [('127.0.0.1', 10000)] join_tokens = marlo.make( 'MarLo-SimpleRoom-v0', params=dict( client_pool=None, observeGrid=[-1, -1, -1, 1, 0, 1], # (x1, y1, z1, x2, y2, z2, name) gridSize=18, episode_timelimit=40e3, seed=10, comp_all_commands=['move', 'turn'], add_noop_command=False, tick_length=10, #in millisec forceWorldReset=False, # "recordDestination": "data.tgz", # "recordMP4": [20, 400000], )) assert len(join_tokens) == 1 join_token = join_tokens[0] def env_creator(env_config): return marlo.init(join_token)
target_update_interval = 10 ** 2 update_interval = 1 target_update_method = 'hard' soft_update_tau = 1e-2 rbuf_capacity = 5 * 10 ** 5 steps = 10 ** 5 eval_n_runs = 100 eval_interval = 10 ** 4 def phi(obs): return obs.astype(np.float32) # Ensure that you have a minecraft-client running with : marlo-server --port 10000 join_tokens = marlo.make('MinecraftCliffWalking1-v0', params=dict( allowContinuousMovement=["move", "turn"], videoResolution=[800, 600] )) env = marlo.init(join_tokens[0]) obs = env.reset() env.render(mode="rgb_array") print('initial observation:', obs) action = env.action_space.sample() obs, r, done, info = env.step(action) print('next observation:', obs) print('reward:', r) print('done:', done) print('info:', info)
# import malmo.minecraftbootstrap; malmo.minecraftbootstrap.set_malmo_xsd_path() #!/usr/bin/env python # Please ensure that you have a Minecraft client running on port 10000 # by doing : # $MALMO_MINECRAFT_ROOT/launchClient.sh -port 10000 import marlo client_pool = [('127.0.0.1', 10000)] join_tokens = marlo.make('MarLo-BuildbattleTrainX-v0', params={ "client_pool": client_pool }) # As this is a single agent scenario, # there will just be a single token assert len(join_tokens) == 1 join_token = join_tokens[0] env = marlo.init(join_token) observation = env.reset() done = False while not done: _action = env.action_space.sample() obs, reward, done, info = env.step(_action) print("reward:", reward) print("done:", done) print("info", info) env.close()
from collections import deque import marlo import keras from keras.models import Sequential from keras.layers import Dense, Dropout, Flatten, Activation from keras.optimizers import RMSprop, Adam import numpy as np from enum import Enum import QLearn from QAgent import QAgent print("testo") episodes = 1000 client_pool = [('127.0.0.1', 10000)] join_tokens = marlo.make('MarLo-CliffWalking-v0', params={ "client_pool": client_pool }) # As this is a single agent scenario, # there will just be a single token assert len(join_tokens) == 1 join_token = join_tokens[0] env = marlo.init(join_token) if __name__ == "__main__": env = marlo.init(join_token) trials = 1000 trial_len = 200 updateTargetNework = 100 DQN_agent = QLearn.DQN(env=env) steps = [] print('nombre d\'essais: ', trials)
import marlo from Agent import Agent from GameState import GameState import numpy as np from bs4 import BeautifulSoup mission_name = 'MarLo-FindTheGoal-v0' from marlo.base_env_builder import dotdict from MonteCarlo import MonteCarlo from Q import Q # config client_pool = [('127.0.0.1', 10000)] join_tokens = marlo.make(mission_name, params={ "client_pool": client_pool, "suppress_info": False, "skip_steps": 2, "step_sleep": 0.5 }) # As this is a single agent scenario, # there will just be a single token assert len(join_tokens) == 1 join_token = join_tokens[0] # define the environment variable and reset observation env = marlo.init(join_token) env.default_base_params.allowDiscreteMovement = True env.default_base_params.allowContinuousMovement = False env.default_base_params.allowAbsoluteMovement = False