Ejemplo n.º 1
0
        return np.array(adv)[::-1]

if __name__ == "__main__":
    env = UnityEnv('test.app', 0,use_visual=True)
    ppo = PPO(env)
    all_ep_r = []
    t = 0
    for ep in range(EP_MAX):
        s = env.reset()
        
        ep_r = 0
        
        done = False
        while not done: 
            t+=1
            env.render()
            a,v = ppo.choose_action(s)
            s_, r, done, _ = env.step(a)
            ppo.buffer_s.append(s)
            ppo.buffer_a.append(a)
            ppo.buffer_r.append(r)
            ppo.buffer_v.append(v)
            ppo.buffer_done.append(done)
            s = s_
            ep_r += r

            # update ppo
            if (t+1) % BATCH == 0:
                print("updating...")
                t = 0
                v_s_ = v
Ejemplo n.º 2
0
def cartpole():
    env = UnityEnv(environment_filename=ENV_NAME,
                   worker_id=2,
                   use_visual=False,
                   multiagent=True)
    score_logger = ScoreLogger(ENV_NAME)
    agents_brain = []
    agents_action = []
    index_list = []
    agents_alive = []
    count = 0
    count1 = 0
    num_agents = env.number_agents
    print("___________Number of agents in cartpole __")
    print(num_agents)
    observation_space = env.observation_space.shape[0]
    action_space = env.action_space.n
    dqn_solver = DQNSolver(observation_space, action_space)
    print("__dqn solver______")
    print(dqn_solver)
    #model = tf.keras.models.load_model("")
    for x in range((env.number_agents)):
        agents_brain.append(dqn_solver)
        print("______agentbrain____")
        print(agents_brain)
        print("_Agent action___")
        print(agents_action)

    learning_brain = copy.deepcopy(agents_brain)
    run = 0
    state = env.reset()
    initialstate = copy.deepcopy(state)
    while True:
        run += 1
        env.reset()
        print("____________STATE____________-")
        print(state[0])
        state = copy.deepcopy(initialstate)
        agents_brain = []
        agents_action = []
        index_list = []
        agents_alive = []
        count = 0
        count1 = 0
        num_agents = int(state[0][-5])
        agents_brain = copy.deepcopy(learning_brain)
        print(learning_brain)
        print(agents_brain)
        print(state)
        #for x in range ( (env.number_agents - 1) ):

        step = 0
        while True:
            step += 1
            env.render()
            print("___________STatte Lenth_______")
            print(len(state))
            print("______selffish___")
            print(state[0])
            agents_action = [1] * len(state)
            copied_agents_alive = copy.deepcopy(agents_alive)
            print("__________numagents_____")
            for x in range(num_agents - 1):
                state[x] = np.reshape(state[x], [1, observation_space])
                agents_action[x] = agents_brain[x].act(state[x])
            print(agents_action)
            state_next, reward, terminal, info = env.step(
                agents_action, num_agents)
            print("_______Reward________")
            print(reward)
            print("_____________NEXT STATE LENGTH____________")
            print(len(state_next))
            if (len(state_next) == 0):
                break
            agents_alive = state_next[0][-13:-5]
            num_agents = int(state_next[0][-5])
            print("_______num agnets in cartpole________")
            print(num_agents)
            print("_____index list")
            print(index_list)
            print(agents_alive)
            agents_alive1 = np.delete(agents_alive, index_list)
            print("_______Alive agent list_______")
            print(agents_alive1)
            flag = False
            # del agents_alive[index_list[x]]
            for x in range(len(agents_alive)):
                if (agents_alive[x] == float(1)):
                    for y in range(len(index_list)):
                        if (index_list[y] == x):
                            flag = True
                    if (flag == False):
                        index_list.append(x)

                flag = False

            index_to_remove = []
            for x in range(len(agents_alive1)):
                if (agents_alive1[x] == float(1)):
                    learning_brain[index_list[count]] = agents_brain[x]
                    index_to_remove.append(x)
                    count = count + 1

            agents_brain = [
                i for j, i in enumerate(agents_brain)
                if j not in index_to_remove
            ]
            print("____________AGENTS_BRAIN_________")
            print(len(agents_brain))
            print("_______________Terminal_____________")
            print(terminal)
            if (terminal[0] == True):
                print("Run: " + str(run) + ", exploration: " +
                      str(dqn_solver.exploration_rate) + ", score: " +
                      str(step))
                score_logger.add_score(step, run)
                for x in range(len(copied_agents_alive)):
                    learning_brain[x] = agents_brain[count1]
                    count1 = count1 + 1
                for x in range(len(learning_brain)):
                    learning_brain[x].save(str(run) + "brain" + str(x) + ".h5")

                break

            for x in range(num_agents - 1):
                state[x] = np.reshape(state[x], [1, observation_space])
                state_next[x] = np.reshape(state_next[x],
                                           [1, observation_space])
                agents_brain[x].remember(state[x], agents_action[x], reward[x],
                                         state_next[x], terminal[x])
                agents_brain[x].experience_replay()
            state = state_next
Ejemplo n.º 3
0
def cartpole():
    env = UnityEnv(environment_filename=ENV_NAME, worker_id=5, use_visual=False, multiagent = True)
    score_logger = ScoreLogger(ENV_NAME)
    agents_brain = []
    agents_action = []

    num_agents = env.number_agents
    observation_space = env.observation_space.shape[0]
    print("____________Observation_space")
    print(observation_space)
    action_space = env.action_space.n
    dqn_solver = DQNSolver(observation_space, action_space)
    for x in range ((env.number_agents)):
        agents_brain.append(DQNSolver(observation_space, action_space))
    print ("Length of BrainList:    ",len(agents_brain))
    run = 0
    state = env.reset()
    print("______INITIAL______")
    print(state)
    #initialstate = copy.deepcopy(state)
    print("*****************************initial state for unity  envirmonet**************")
    #print(initialstate)
    jk = 1
    while True:
        run += 1
        state = env.reset()
        #state = copy.deepcopy(initialstate)
        num_agents = int(state[0][-5])
        print("_____________State _______________")
        print(int(state[0][12]))
        step = 0

        print("################################This is loop################################# :" , jk)
        while True:
            step += 1
            env.render()
            agents_action = [1] * len(state)
            print(state[0])
            print("*******************Length of state******************")
            print(len(state))
            for x in range(len(state)):
                state[x] = np.reshape(state[x], [1, observation_space])
                agents_action[x] = agents_brain[int(state[x][0,12]) - 1].act(state[x])
            print("Agents Actions List: ",agents_action)
            state_next, reward, terminal, info = env.step(agents_action)
            #print ("_____________STATE_NEXT___________")
            #print (state_next)
            if (len(state_next) == 0):
                break
            agents_alive = state_next[0][-13:-5]
            print ("Agents_alive:    ", agents_alive)
            print ("Rewards:    ",reward)
            num_agents = int(state_next[0][-5])
            print ("Number of agents:   ",num_agents)
            print("_________Terminal list_______" , terminal)
            if (terminal[0] == True):
                print("**************************Brain saved******************************")
                for x in range(len(agents_brain)):
                    agents_brain[x].save(str(run) + "brain" + str(x) + ".h5")

                jk+=1
                print("#####################################Loop is######################## :" , jk)
                #break

            for x in range(len(state_next)):
                state[x] = np.reshape(state[x], [1, observation_space])
                state_next[x] = np.reshape(state_next[x], [1, observation_space])
                agents_brain[int(state_next[x][0,12]) - 1].remember(state[x], agents_action[x], reward[x], state_next[x], terminal[x])
                agents_brain[int(state_next[x][0,12]) - 1].experience_replay()
            state = state_next
Ejemplo n.º 4
0
def cartpole():
    env = UnityEnv(environment_filename=ENV_NAME,
                   worker_id=1,
                   use_visual=False,
                   multiagent=True)
    #score_logger = ScoreLogger(ENV_NAME)
    agents_brain = []
    agents_action = []
    pathname = "C:/HinaProgramm/testingFolder/Unity Environment"
    num_agents = env.number_agents
    print("Number of agents in enviroment : ", num_agents)
    observation_space = env.observation_space.shape[0]
    print("____________Observation_space______________")
    print(observation_space)
    print("__________Action Space________________")
    action_space = env.action_space.n
    print(action_space)
    dqn_solver = DQNSolver(observation_space, action_space)
    for x in range((num_agents)):
        agents_brain.append(DQNSolver(observation_space, action_space))
    print("Length of BrainList:    ", len(agents_brain))
    run = 0
    state = env.reset()
    #print("______INITIAL______")
    #print(state)
    initialstate = copy.deepcopy(state)
    #print("*****************************initial state for unity  envirmonet**************")
    #print(initialstate)
    jk = 1
    sharecount = 0
    eatcount = 0
    filecount = 0
    #f = str(filecount) + "sahre.csv"
    f = open(str(filecount) + "sahre.csv", 'ab')
    #J = str(filecount) + "eat.csv"
    J = open(str(filecount) + "eat.csv", 'ab')
    while True:
        run += 1
        env.reset()

        state = copy.deepcopy(initialstate)
        num_agents = int(state[0][-8])
        print("_numagents__________", num_agents)
        print("_____________State _______________")
        print(int(state[0][12]))
        step = 0
        print(
            "################################This is loop################################# :",
            jk)
        print("_____Run _______ :", run)
        while True:

            #print("************Number of agents *********")
            #print(env.number_agents)
            step += 1
            env.render()
            agents_action = [1] * len(state)
            #print(state[0])
            #print("*******************Length of state******************")
            #print(len(state))
            for x in range(len(state)):
                state[x] = np.reshape(state[x], [1, observation_space])
                agents_action[x] = agents_brain[int(state[x][0, 12]) - 1].act(
                    state[x])

            sharecount += agents_action.count(5)
            eatcount += agents_action.count(6)
            #print("Agents Actions List: ",agents_action)

            state_next, reward, terminal, info = env.step(agents_action)
            for x in range(len(agents_action)):
                if (agents_action[x] == 5):
                    new = np.asarray([state_next[x]])
                    np.savetxt(f, new, delimiter=",")
                    #f.write(str(state_next[x])+"\r\n")
                if (agents_action[x] == 6):
                    #J.write(str(state_next[x])+"\r\n")
                    new = np.asarray([state_next[x]])
                    np.savetxt(J, new, delimiter=",")
            print("_____________STATE_NEXT___________")
            print(state_next)
            if (len(state_next) == 0):
                #f.write(str(sharecount))
                #J.write(str(eatcount))
                #f.close()
                #J.close()
                filecount += 1
                np.savetxt(f, sharecount, delimiter=",")
                np.savetxt(J, eatcount, delimiter=",")
                break
            agents_alive = state_next[0][-16:-8]
            print("Agents_alive:    ", agents_alive)
            print("Rewards:    ", reward)
            num_agents = int(state_next[0][-8])
            print("Number of agents:   ", num_agents)
            #print("_________Terminal list_______" , terminal)
            if (terminal[0] == True):
                print(
                    "**************************Brain saved******************************"
                )
                for x in range(len(agents_brain)):
                    agents_brain[x].model.save(pathname + str(run) + "brain" +
                                               str(x) + ".h5")

                jk += 1
                print(
                    "#####################################Loop is######################## :",
                    jk)
                #f.write(str(sharecount))
                #J.write(str(eatcount))
                #f.close()
                #J.close()
                filecount += 1
                break

            for x in range(len(state_next)):
                state[x] = np.reshape(state[x], [1, observation_space])
                state_next[x] = np.reshape(state_next[x],
                                           [1, observation_space])
                agents_brain[int(state_next[x][0, 12]) - 1].remember(
                    state[x], agents_action[x], reward[x], state_next[x],
                    terminal[x])
                agents_brain[int(state_next[x][0, 12]) - 1].experience_replay()
            state = state_next
Ejemplo n.º 5
0
class Drone_Racing(Environment):
    worker_id = 0

    def __init__(self, platform):
        if platform == OSName.MAC:
            env_filename = EnvironmentName.DRONE_RACING_MAC.value
        elif platform == OSName.WINDOWS:
            env_filename = EnvironmentName.DRONE_RACING_WINDOWS.value
        else:
            env_filename = None

        self.env = UnityEnv(
            environment_filename=env_filename,
            worker_id=randrange(65536),
            use_visual=False,
            multiagent=False
        ).unwrapped

        super(Drone_Racing, self).__init__()
        Drone_Racing.worker_id += 1
        self.action_shape = self.get_action_shape()
        self.action_space = self.env.action_space

        self.continuous = False

        self.skipping_state_fq = 3
        self.skipping_state_index = 0

        self.WIN_AND_LEARN_FINISH_SCORE = 200

    def get_n_states(self):
        return self.env.observation_space.shape[0]

    def get_n_actions(self):
        return self.env.action_space.shape[0]

    def get_state_shape(self):
        return self.env.observation_space

    def get_action_shape(self):
        return self.env.action_space

    def get_action_space(self):
        return self.env.action_space

    @property
    def action_meanings(self):
        action_meanings = ["FORWARD", "BACKWARD", "RIGHT", "LEFT", "UP", "DOWN", "R_ROTATE", "L_ROTATE", "HOVER"]
        return action_meanings

    def reset(self):
        state = self.env.reset()
        return state

    def step(self, action):
        action_list = [0] * 9

        if self.is_skip_phase():
            action_list[8] = 1  # hover action
        else:
            action_list[action] = 1

        next_state, reward, done, info = self.env.step(action_list)
        adjusted_reward = reward

        info["skipping"] = True
        if not self.is_skip_phase():
            self.skipping_state_index = 0
            info["skipping"] = False

        self.skipping_state_index += 1

        return next_state, reward, adjusted_reward, done, info

    def render(self):
        self.env.render()

    def close(self):
        self.env.close()

    def is_skip_phase(self):
        return self.skipping_state_index != self.skipping_state_fq