Example #1
0
    def configure(self, environment_definitions, port_inc=0):
        self.action_space = spaces.Discrete(
            environment_definitions['action_shape'][0])
        min_value = environment_definitions['min_value']
        max_value = environment_definitions['max_value']
        state_shape = environment_definitions['state_shape']
        state_type = environment_definitions['state_type']
        self.observation_space = spaces.Box(low=min_value,
                                            high=max_value,
                                            shape=state_shape,
                                            dtype=state_type)
        self.n_envs = environment_definitions['n_envs']
        self.actions = environment_definitions['actions']
        if 'action_meaning' in environment_definitions:
            self.action_meaning = environment_definitions['action_meaning']
        else:
            self.action_meaning = ['action'] * len(self.actions)

        if 'agent' in environment_definitions:
            if inspect.isclass(environment_definitions['agent']):
                self.agent = environment_definitions['agent']()
            else:
                raise Exception("Agent object is not a class!!!!")
        else:
            self.agent = BasicAgent()

        host = environment_definitions['host']
        input_port = environment_definitions['input_port']
        output_port = environment_definitions['output_port']
        self.remoteenv = RemoteEnv(host, output_port + port_inc,
                                   input_port + port_inc)
        self.remoteenv.open(0)
        self.nlives = 1
        self.configureFlag = True
        self.state = None
Example #2
0
def agent():
    env = RemoteEnv(IN_PORT=8080, OUT_PORT=7070)
    env.open(0)
    for i in range(10000000):
        sum_energy = 0
        state = env.step("restart")
        prev_energy = state['energy']
        print(prev_energy)
        done = state['done']
        while not done:
            frame = image_decode(state['frame'], 20, 20)
            print(frame)
            action = int(input('action'))
            for i in range(8):
                state = env.step("act", action)
            state = env.step('get_status')
            done = state["done"]
            energy = state['energy']
            frame = state['frame']
            touched = state['touched']
            ID = state['id']
            delta = (energy - prev_energy)
            print(delta)
            sum_energy += delta
            prev_energy = energy
        print(sum_energy)
    env.close()
Example #3
0
def train():
    actions = [("fx", 0.1), ("fx", -0.1), ("fz", 0.1), ("fz", -0.1)]
    agent = KerasDQNAgent(build_model, action_size=4, decay_steps=20000)
    agent.eps_min = 0.001
    t = Thread(target=run_worker, args=(agent, ))
    t.start()
    env = RemoteEnv()
    env.open(0)
    fields = None
    total_steps = 0
    for e in range(1000000):
        print("Starting new episode ", e)
        fields = env.step("restart")
        done = fields['done']
        reward = 0
        initial_state = get_state_from_fields(fields).reshape(1, 7)
        steps = 0
        sum_rewards = 0
        while not done:
            if total_steps >= 10000:
                agent.training = True
                action = agent.predict(initial_state)
            else:
                action = agent.predict(initial_state, True)

            reward = 0.0

            for _ in range(4):
                fields = env.step(actions[action][0], actions[action][1])
                reward += fields['reward']
                if fields['done'] == True:
                    break
            final_state = get_state_from_fields(fields).reshape(1, 7)

            reward = np.clip(reward, -1.0, 1.0)
            sum_rewards += reward
            done = fields['done']
            if total_steps % 20 == 0:
                print("\nLOSS ", agent.last_loss, "\n")

            agent.add_sample(initial_state, action, final_state, reward, done)

            if steps >= 2000 and not done:
                done = True
            if not done:
                initial_state = np.copy(final_state)

            steps += 1
            total_steps += 1

        print("Sum of rewards ", sum_rewards, ", Steps by episode: ", steps,
              ", Total Steps: ", total_steps, ", EPS: ", agent.eps,
              ", TOTAL SAMPLES: ", agent.mem_size())
        if e % 50 == 0:
            agent.model.save_weights("model.h5", save_format='h5')

    agent.done = True
    agent.training = False
    env.close()
Example #4
0
class Environment(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self):
        self.ale = AleWrapper(self)

    def configure(self, environment_definitions, port_inc=0):
        self.action_space = spaces.Discrete(
            environment_definitions['action_shape'][0])
        min_value = environment_definitions['min_value']
        max_value = environment_definitions['max_value']
        state_shape = environment_definitions['state_shape']
        state_type = environment_definitions['state_type']
        self.observation_space = spaces.Box(low=min_value,
                                            high=max_value,
                                            shape=state_shape,
                                            dtype=state_type)
        self.n_envs = environment_definitions['n_envs']
        self.actions = environment_definitions['actions']
        self.action_meaning = environment_definitions['action_meaning']
        self.state_wrapper = environment_definitions['state_wrapper']
        host = environment_definitions['host']
        input_port = environment_definitions['input_port']
        output_port = environment_definitions['output_port']
        self.remoteenv = RemoteEnv(host, output_port + port_inc,
                                   input_port + port_inc)
        self.remoteenv.open(0)
        self.nlives = 1

    def get_action_meanings(self):
        return [self.action_meaning[i] for i in range(len(self.actions))]

    def reset(self):
        fields = self.remoteenv.step('restart')
        self.state, self.reward, self.done, self.info = self.state_wrapper(
            fields)
        return self.state

    def render(self, mode='human', close=False):
        return self.state

    def close(self):
        self.remoteenv.close()

    def step(self, action):
        fields = self.remoteenv.step(self.actions[action][0],
                                     self.actions[action][1])
        self.state, self.reward, self.done, self.info = self.state_wrapper(
            fields)
        return (self.state, self.reward, self.done, self.info)

    def __del__(self):
        self.remoteenv.close()
Example #5
0
 def configure(self, environment_definitions, port_inc=0):
     self.action_space = spaces.Discrete(
         environment_definitions['action_shape'][0])
     min_value = environment_definitions['min_value']
     max_value = environment_definitions['max_value']
     state_shape = environment_definitions['state_shape']
     state_type = environment_definitions['state_type']
     self.observation_space = spaces.Box(low=min_value,
                                         high=max_value,
                                         shape=state_shape,
                                         dtype=state_type)
     self.n_envs = environment_definitions['n_envs']
     self.actions = environment_definitions['actions']
     self.action_meaning = environment_definitions['action_meaning']
     self.state_wrapper = environment_definitions['state_wrapper']
     host = environment_definitions['host']
     input_port = environment_definitions['input_port']
     output_port = environment_definitions['output_port']
     self.remoteenv = RemoteEnv(host, output_port + port_inc,
                                input_port + port_inc)
     self.remoteenv.open(0)
     self.nlives = 1
Example #6
0
def agent():
    env = RemoteEnv(IN_PORT=8081, OUT_PORT=7071)
    env.open(0)
    for i in range(10000000):
        state = env.step("restart")
        done = state['done']
        print('new episode')
        while not done:
            frame = image_from_str(state['state'], 10, 10)
            print(frame)
            action = int(input('action'))
            state = env.step("move", action)
            done = state['done']
            print(state)
    env.close()
Example #7
0
def agent():
    env = RemoteEnv(IN_PORT=8085, OUT_PORT=7075)
    env.open(0)
    for i in range(10000000):
        state = env.step("restart")
        prev_energy = state['energy']
        done = state['done']
        while not done:
            action = int(input('action'))
            #action = np.random.choice([0, 1])
            reward_sum = 0
            touched = -1
            for i in range(8):
                state = env.step("act", action)
                state = env.step('get_status')
                energy = state['energy']
                if touched == -1:
                    touched = state['touched']
                elif not state['touched'] in [0, 1]:
                    touched = state['touched']
                if state['done']:
                    break
            done = state['done']
            reward_sum += (energy - prev_energy)
            prev_energy = energy
            frame = state['frame']
            ID = state['id']
            frame = image_decode(state['frame'], 20, 20)
            print(frame)
            print(reward_sum)
            print(touched)
            print(state['signal'])
            print(state['done'])
            prev_energy = energy
        if i >= 2:
            break
    env.close()
Example #8
0
from unityremote.core import RemoteEnv
import numpy as np

env = RemoteEnv()
env.open(0)

if __name__ == "__main__":
    for i in range(10):
        sum_reward = 0
        state = env.step("restart")
        print(state)
        done = state['done']
        while not done:
            frame = None
            action = int(
                input("action-----------------------------------------"))
            if action == 0:
                state = env.step("fx", 0.1)
            elif action == 1:
                state = env.step("fz", 0.1)
            else:
                state = env.step("noop", 0.0)

            done = state["done"]
            reward = state['reward']
            sum_reward += reward
        print(sum_reward)
env.close()
Example #9
0
class Environment(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self):
        self.ale = AleWrapper(self)
        self.configureFlag = False

    def configure(self, environment_definitions, port_inc=0):
        self.action_space = spaces.Discrete(
            environment_definitions['action_shape'][0])
        min_value = environment_definitions['min_value']
        max_value = environment_definitions['max_value']
        state_shape = environment_definitions['state_shape']
        state_type = environment_definitions['state_type']
        self.observation_space = spaces.Box(low=min_value,
                                            high=max_value,
                                            shape=state_shape,
                                            dtype=state_type)
        self.n_envs = environment_definitions['n_envs']
        self.actions = environment_definitions['actions']
        if 'action_meaning' in environment_definitions:
            self.action_meaning = environment_definitions['action_meaning']
        else:
            self.action_meaning = ['action'] * len(self.actions)

        if 'agent' in environment_definitions:
            if inspect.isclass(environment_definitions['agent']):
                self.agent = environment_definitions['agent']()
            else:
                raise Exception("Agent object is not a class!!!!")
        else:
            self.agent = BasicAgent()

        host = environment_definitions['host']
        input_port = environment_definitions['input_port']
        output_port = environment_definitions['output_port']
        self.remoteenv = RemoteEnv(host, output_port + port_inc,
                                   input_port + port_inc)
        self.remoteenv.open(0)
        self.nlives = 1
        self.configureFlag = True
        self.state = None

    def get_action_meanings(self):
        self.__check_configuration_()
        return [self.action_meaning[i] for i in range(len(self.actions))]

    def reset(self):
        self.__check_configuration_()
        return self.agent.reset(self)

    def render(self, mode='human', close=False):
        self.__check_configuration_()
        self.agent.render()

    def close(self):
        self.__check_configuration_()
        self.remoteenv.close()

    def one_step(self, action):
        return self.remoteenv.step(self.actions[action][0],
                                   self.actions[action][1])

    def step(self, action, info=None):
        self.__check_configuration_()
        return self.agent.act(self, action, info)

    def __del__(self):
        self.__check_configuration_()
        self.remoteenv.close()

    def __check_configuration_(self):
        if not self.configureFlag:
            raise Exception(
                "The environment is not configured. Try to set up the environment before trying again!!!"
            )
Example #10
0
    global env
    return env.step(actions['r'])


def apply(action):
    if action == 0:
        return Up(None)
    elif action == 1:
        return Down(None)


if __name__ == "__main__":
    agent = KerasDQNAgent(build_model)
    t = Thread(target=run_worker, args=(agent, ))
    t.start()
    env = RemoteEnv()
    env.open(0)
    fields = None
    total_steps = 0
    for e in range(100000000):
        print("Starting new episode ", e)
        state = Restart(None)
        seq = deque(maxlen=4)
        frame = get_image(state['frame'])
        done = state['done']
        reward = state['reward']
        for _ in range(4):
            seq.append(frame)
        initial_state = np.array(seq).reshape(1, 84, 84, 4)
        steps = 0
        sum_rewards = 0
Example #11
0
from unityremote.core import RemoteEnv

env = RemoteEnv()
env.open()

env.step('tx', 5)

env.close()
Example #12
0
def run_agent(inport, outport):
	env = RemoteEnv(IN_PORT=inport, OUT_PORT=outport)
	env.open(0)
	env.step("restart")
	speed = 100
	angular_speed = 50

	actions = [('walk', 15), ('run', 30), ('walk_in_circle', 1), ('left_turn', 1), ('right_turn', 1), ('up', 1),
				('down', 1), ('jump', True), ('pickup', True), ('pickup', False), ('noop', -1)]

	action_size = len(actions)
		
	for i in range(100000):		
		sum_rewards = 0
		touchID = 0
		energy = 0
		#idx = np.random.choice(len(actions))
		idx = int(input())
		for i in range(8):
			env_info = env.step(actions[idx][0], actions[idx][1])
			done = env_info['done']
			if done:
				break
		if not done:
			env_info = env.step('get_result', -1)
			sum_rewards += env_info['reward']
			touchID = env_info['touchID']
			energy = env_info['energy']
		if done:
			sum_rewards += env_info['reward']
			touchID = env_info['touchID']
			energy = env_info['energy']
			env_info = env.step('restart', -1)
		print("Object touched ---------------- ",  touchID)
		print("Reward sum -------------------- ", sum_rewards)
		print("Done ---------------------------", env_info['done'])
		print("--------------------------------------------------------------------------------------")
		print_matrix(get_frame_from_fields(env_info['frame']))
		print("-----------------------------------------------")
	env.close()
Example #13
0
from unityremote.core import RemoteEnv
import numpy as np

env = RemoteEnv()
env.open(0)

if __name__ == "__main__":
    for i in range(10000):
        c = np.random.choice([0, 1, 2, 3])
        if c == 0:
            env.stepfv("move", [2.0, 0.0])
        elif c == 1:
            env.stepfv("move", [0.0, 2.0])
        elif c == 2:
            env.stepfv("move", [-2.0, 0.0])
        elif c == 3:
            env.stepfv("move", [0.0, -2.0])
        state = env.step("NoOp")
        print(state)
env.close()