def configure(self, environment_definitions, port_inc=0): self.action_space = spaces.Discrete( environment_definitions['action_shape'][0]) min_value = environment_definitions['min_value'] max_value = environment_definitions['max_value'] state_shape = environment_definitions['state_shape'] state_type = environment_definitions['state_type'] self.observation_space = spaces.Box(low=min_value, high=max_value, shape=state_shape, dtype=state_type) self.n_envs = environment_definitions['n_envs'] self.actions = environment_definitions['actions'] if 'action_meaning' in environment_definitions: self.action_meaning = environment_definitions['action_meaning'] else: self.action_meaning = ['action'] * len(self.actions) if 'agent' in environment_definitions: if inspect.isclass(environment_definitions['agent']): self.agent = environment_definitions['agent']() else: raise Exception("Agent object is not a class!!!!") else: self.agent = BasicAgent() host = environment_definitions['host'] input_port = environment_definitions['input_port'] output_port = environment_definitions['output_port'] self.remoteenv = RemoteEnv(host, output_port + port_inc, input_port + port_inc) self.remoteenv.open(0) self.nlives = 1 self.configureFlag = True self.state = None
def agent(): env = RemoteEnv(IN_PORT=8080, OUT_PORT=7070) env.open(0) for i in range(10000000): sum_energy = 0 state = env.step("restart") prev_energy = state['energy'] print(prev_energy) done = state['done'] while not done: frame = image_decode(state['frame'], 20, 20) print(frame) action = int(input('action')) for i in range(8): state = env.step("act", action) state = env.step('get_status') done = state["done"] energy = state['energy'] frame = state['frame'] touched = state['touched'] ID = state['id'] delta = (energy - prev_energy) print(delta) sum_energy += delta prev_energy = energy print(sum_energy) env.close()
def train(): actions = [("fx", 0.1), ("fx", -0.1), ("fz", 0.1), ("fz", -0.1)] agent = KerasDQNAgent(build_model, action_size=4, decay_steps=20000) agent.eps_min = 0.001 t = Thread(target=run_worker, args=(agent, )) t.start() env = RemoteEnv() env.open(0) fields = None total_steps = 0 for e in range(1000000): print("Starting new episode ", e) fields = env.step("restart") done = fields['done'] reward = 0 initial_state = get_state_from_fields(fields).reshape(1, 7) steps = 0 sum_rewards = 0 while not done: if total_steps >= 10000: agent.training = True action = agent.predict(initial_state) else: action = agent.predict(initial_state, True) reward = 0.0 for _ in range(4): fields = env.step(actions[action][0], actions[action][1]) reward += fields['reward'] if fields['done'] == True: break final_state = get_state_from_fields(fields).reshape(1, 7) reward = np.clip(reward, -1.0, 1.0) sum_rewards += reward done = fields['done'] if total_steps % 20 == 0: print("\nLOSS ", agent.last_loss, "\n") agent.add_sample(initial_state, action, final_state, reward, done) if steps >= 2000 and not done: done = True if not done: initial_state = np.copy(final_state) steps += 1 total_steps += 1 print("Sum of rewards ", sum_rewards, ", Steps by episode: ", steps, ", Total Steps: ", total_steps, ", EPS: ", agent.eps, ", TOTAL SAMPLES: ", agent.mem_size()) if e % 50 == 0: agent.model.save_weights("model.h5", save_format='h5') agent.done = True agent.training = False env.close()
class Environment(gym.Env): metadata = {'render.modes': ['human']} def __init__(self): self.ale = AleWrapper(self) def configure(self, environment_definitions, port_inc=0): self.action_space = spaces.Discrete( environment_definitions['action_shape'][0]) min_value = environment_definitions['min_value'] max_value = environment_definitions['max_value'] state_shape = environment_definitions['state_shape'] state_type = environment_definitions['state_type'] self.observation_space = spaces.Box(low=min_value, high=max_value, shape=state_shape, dtype=state_type) self.n_envs = environment_definitions['n_envs'] self.actions = environment_definitions['actions'] self.action_meaning = environment_definitions['action_meaning'] self.state_wrapper = environment_definitions['state_wrapper'] host = environment_definitions['host'] input_port = environment_definitions['input_port'] output_port = environment_definitions['output_port'] self.remoteenv = RemoteEnv(host, output_port + port_inc, input_port + port_inc) self.remoteenv.open(0) self.nlives = 1 def get_action_meanings(self): return [self.action_meaning[i] for i in range(len(self.actions))] def reset(self): fields = self.remoteenv.step('restart') self.state, self.reward, self.done, self.info = self.state_wrapper( fields) return self.state def render(self, mode='human', close=False): return self.state def close(self): self.remoteenv.close() def step(self, action): fields = self.remoteenv.step(self.actions[action][0], self.actions[action][1]) self.state, self.reward, self.done, self.info = self.state_wrapper( fields) return (self.state, self.reward, self.done, self.info) def __del__(self): self.remoteenv.close()
def configure(self, environment_definitions, port_inc=0): self.action_space = spaces.Discrete( environment_definitions['action_shape'][0]) min_value = environment_definitions['min_value'] max_value = environment_definitions['max_value'] state_shape = environment_definitions['state_shape'] state_type = environment_definitions['state_type'] self.observation_space = spaces.Box(low=min_value, high=max_value, shape=state_shape, dtype=state_type) self.n_envs = environment_definitions['n_envs'] self.actions = environment_definitions['actions'] self.action_meaning = environment_definitions['action_meaning'] self.state_wrapper = environment_definitions['state_wrapper'] host = environment_definitions['host'] input_port = environment_definitions['input_port'] output_port = environment_definitions['output_port'] self.remoteenv = RemoteEnv(host, output_port + port_inc, input_port + port_inc) self.remoteenv.open(0) self.nlives = 1
def agent(): env = RemoteEnv(IN_PORT=8081, OUT_PORT=7071) env.open(0) for i in range(10000000): state = env.step("restart") done = state['done'] print('new episode') while not done: frame = image_from_str(state['state'], 10, 10) print(frame) action = int(input('action')) state = env.step("move", action) done = state['done'] print(state) env.close()
def agent(): env = RemoteEnv(IN_PORT=8085, OUT_PORT=7075) env.open(0) for i in range(10000000): state = env.step("restart") prev_energy = state['energy'] done = state['done'] while not done: action = int(input('action')) #action = np.random.choice([0, 1]) reward_sum = 0 touched = -1 for i in range(8): state = env.step("act", action) state = env.step('get_status') energy = state['energy'] if touched == -1: touched = state['touched'] elif not state['touched'] in [0, 1]: touched = state['touched'] if state['done']: break done = state['done'] reward_sum += (energy - prev_energy) prev_energy = energy frame = state['frame'] ID = state['id'] frame = image_decode(state['frame'], 20, 20) print(frame) print(reward_sum) print(touched) print(state['signal']) print(state['done']) prev_energy = energy if i >= 2: break env.close()
from unityremote.core import RemoteEnv import numpy as np env = RemoteEnv() env.open(0) if __name__ == "__main__": for i in range(10): sum_reward = 0 state = env.step("restart") print(state) done = state['done'] while not done: frame = None action = int( input("action-----------------------------------------")) if action == 0: state = env.step("fx", 0.1) elif action == 1: state = env.step("fz", 0.1) else: state = env.step("noop", 0.0) done = state["done"] reward = state['reward'] sum_reward += reward print(sum_reward) env.close()
class Environment(gym.Env): metadata = {'render.modes': ['human']} def __init__(self): self.ale = AleWrapper(self) self.configureFlag = False def configure(self, environment_definitions, port_inc=0): self.action_space = spaces.Discrete( environment_definitions['action_shape'][0]) min_value = environment_definitions['min_value'] max_value = environment_definitions['max_value'] state_shape = environment_definitions['state_shape'] state_type = environment_definitions['state_type'] self.observation_space = spaces.Box(low=min_value, high=max_value, shape=state_shape, dtype=state_type) self.n_envs = environment_definitions['n_envs'] self.actions = environment_definitions['actions'] if 'action_meaning' in environment_definitions: self.action_meaning = environment_definitions['action_meaning'] else: self.action_meaning = ['action'] * len(self.actions) if 'agent' in environment_definitions: if inspect.isclass(environment_definitions['agent']): self.agent = environment_definitions['agent']() else: raise Exception("Agent object is not a class!!!!") else: self.agent = BasicAgent() host = environment_definitions['host'] input_port = environment_definitions['input_port'] output_port = environment_definitions['output_port'] self.remoteenv = RemoteEnv(host, output_port + port_inc, input_port + port_inc) self.remoteenv.open(0) self.nlives = 1 self.configureFlag = True self.state = None def get_action_meanings(self): self.__check_configuration_() return [self.action_meaning[i] for i in range(len(self.actions))] def reset(self): self.__check_configuration_() return self.agent.reset(self) def render(self, mode='human', close=False): self.__check_configuration_() self.agent.render() def close(self): self.__check_configuration_() self.remoteenv.close() def one_step(self, action): return self.remoteenv.step(self.actions[action][0], self.actions[action][1]) def step(self, action, info=None): self.__check_configuration_() return self.agent.act(self, action, info) def __del__(self): self.__check_configuration_() self.remoteenv.close() def __check_configuration_(self): if not self.configureFlag: raise Exception( "The environment is not configured. Try to set up the environment before trying again!!!" )
global env return env.step(actions['r']) def apply(action): if action == 0: return Up(None) elif action == 1: return Down(None) if __name__ == "__main__": agent = KerasDQNAgent(build_model) t = Thread(target=run_worker, args=(agent, )) t.start() env = RemoteEnv() env.open(0) fields = None total_steps = 0 for e in range(100000000): print("Starting new episode ", e) state = Restart(None) seq = deque(maxlen=4) frame = get_image(state['frame']) done = state['done'] reward = state['reward'] for _ in range(4): seq.append(frame) initial_state = np.array(seq).reshape(1, 84, 84, 4) steps = 0 sum_rewards = 0
from unityremote.core import RemoteEnv env = RemoteEnv() env.open() env.step('tx', 5) env.close()
def run_agent(inport, outport): env = RemoteEnv(IN_PORT=inport, OUT_PORT=outport) env.open(0) env.step("restart") speed = 100 angular_speed = 50 actions = [('walk', 15), ('run', 30), ('walk_in_circle', 1), ('left_turn', 1), ('right_turn', 1), ('up', 1), ('down', 1), ('jump', True), ('pickup', True), ('pickup', False), ('noop', -1)] action_size = len(actions) for i in range(100000): sum_rewards = 0 touchID = 0 energy = 0 #idx = np.random.choice(len(actions)) idx = int(input()) for i in range(8): env_info = env.step(actions[idx][0], actions[idx][1]) done = env_info['done'] if done: break if not done: env_info = env.step('get_result', -1) sum_rewards += env_info['reward'] touchID = env_info['touchID'] energy = env_info['energy'] if done: sum_rewards += env_info['reward'] touchID = env_info['touchID'] energy = env_info['energy'] env_info = env.step('restart', -1) print("Object touched ---------------- ", touchID) print("Reward sum -------------------- ", sum_rewards) print("Done ---------------------------", env_info['done']) print("--------------------------------------------------------------------------------------") print_matrix(get_frame_from_fields(env_info['frame'])) print("-----------------------------------------------") env.close()
from unityremote.core import RemoteEnv import numpy as np env = RemoteEnv() env.open(0) if __name__ == "__main__": for i in range(10000): c = np.random.choice([0, 1, 2, 3]) if c == 0: env.stepfv("move", [2.0, 0.0]) elif c == 1: env.stepfv("move", [0.0, 2.0]) elif c == 2: env.stepfv("move", [-2.0, 0.0]) elif c == 3: env.stepfv("move", [0.0, -2.0]) state = env.step("NoOp") print(state) env.close()