예제 #1
0
def randomGame():
    print("\n== init random game ==")
    game = Game(20, 20)  # height, width
    game.setRandomMap(3, 3, 4)  # numbers of agents, targets, obstacles
    game.setScore(100, 20, 10, -2, -0.04, -20)

    game.printGodInfo()

    print("\n== 1st round ==")
    commands = []
    commands.append(Command(0, 1, 1))  # id, dx, dy
    commands.append(Command(1, -1, 1))
    commands.append(Command(2, 1, -1))

    game.runOneRound(commands)

    game.printConsoleInfo()
    print("Score: " + str(game.outputScore()))

    print("\n== 2ed round ==")
    commands = []
    commands.append(Command(0, 1, 1))
    commands.append(Command(1, -1, 1))
    commands.append(Command(2, 1, -1))

    game.runOneRound(commands)

    game.printConsoleInfo()
    print("Score: " + str(game.outputScore()))
예제 #2
0
def manualGame():
    print("\n== init manual setting game ==")
    height = 20
    width = 20
    game = Game(height, width)

    obstacles = [{"x": 1, "y": 1}, {"x": 2, "y": 2}, {"x": 3, "y": 3}]
    targets = [{"x": 10, "y": 10}, {"x": 12, "y": 12}, {"x": 13, "y": 13}]
    game.setObstacles(obstacles)
    game.setTargets(targets)

    agents = {
        0: Agent(0, 0, 0, height, width),  # id, x, y, height, width
        1: Agent(1, 14, 14, height, width),
        2: Agent(2, 15, 15, height, width),
    }
    game.setAgents(agents)
    game.setScore(100, 20, 10, -0.0005, 0, -20)

    game.printGodInfo()
    print("Score: " + str(game.outputScore()))

    print("\n== 1st round ==")
    commands = []
    commands.append(Command(0, 0, 1))  # id, dx, dy
    commands.append(Command(1, -1, 1))
    commands.append(Command(2, 1, -1))

    game.runOneRound(commands)

    game.printConsoleInfo()
    print("Score: " + str(game.outputScore()))

    print("\n== 2ed round ==")
    commands = []
    commands.append(Command(0, 1, 1))
    commands.append(Command(1, -1, 1))
    commands.append(Command(2, 1, -1))

    game.runOneRound(commands)

    game.printConsoleInfo()
    print("Score: " + str(game.outputScore()))

    print("\n== 3ed round ==")
    commands = []
    commands.append(Command(0, 1, 1))
    commands.append(Command(1, -1, 1))
    commands.append(Command(2, -1, 0))

    game.runOneRound(commands)

    game.printConsoleInfo()
    print("Score: " + str(game.outputScore()))
예제 #3
0
class ReplayBuffer():
    def __init__(self, agent_num, height, width, model, modelpath, game_round, cuda=True):
        #initial the model in the replay buffer
        #model is be defined using pytorch lib

        #the game is the enviroment of route planning game in the project
        self.height = height
        self.width = width
        self.game_round = game_round
        self.agent_num = agent_num
        self.model = model
        self.modelpath = modelpath
        self.cuda = cuda

        self.state = torch.zeros([self.agent_num, 3, self.height, self.width], dtype=torch.float32)

        #default score setting in the game
        self.acquisition_sum = 400
        self.explored_target_sum = 70
        self.explored_sum = 40
        self.total_score = self.acquisition_sum + self.explored_target_sum + self.explored_sum
        self.time_decrease = -0.00005
        self.crash_time_penalty = -0.0001
        self.crash_sum = -400
        self.reg_val = 1

        # self.action = torch.tensor([random.randint(0, 4) for i in range(self.agent_num)])
        self.memory = []
        self.buffer_limit = 1000

        self.game = None
        self.score = None
        self.initialGame()

    def collect(self, model, verbose=1):
        if self.cuda:
            self.model.cuda()

        self.model.load_state_dict(model.state_dict())

        if self.game.active and self.game.state < self.game_round:
            #observe part of the bot
            action = self.select_action()

            self.game.runOneRound([intoCommand(i, action[i]) for i in range(self.agent_num)])

            next_state = np.array([self.game.outputAgentImage(i) for i in range(self.agent_num)]).astype(np.float32)
            next_state = torch.from_numpy(next_state)

            if verbose == 1:
                print(self.game.outputScore())

            done = False
            if self.game.active is False:
                done = True

            data = Data(self.state, action, next_state)
            self.memory.append(data)

            self.state = next_state

            if len(self.memory) > self.buffer_limit:
                self.memory.remove(self.memory[0])
        else:
            #only occur when the game is not active
            self.initialGame()

    
    def select_action(self):
        return torch.tensor([random.randint(0, 4) for i in range(self.agent_num)])


    def save_state(self, path):
        for i in range(self.agent_num):
            img = np.zeros([32, 32, 3], dtype=np.uint8)
            arr = self.game.outputAgentImage(i)
            img[:,:,0] = arr[0,:,:]
            img[:,:,1] = arr[1,:,:]
            img[:,:,2] = arr[2,:,:]
            plt.subplot(2,2,i + 1)
            plt.imshow(img)
        plt.show()

    def play(self, model, game_round):
        while self.game.active and self.game.state < game_round:
            self.states[0] = self.states[1]
            self.states[1] = self.states[2]
            self.states[2] = self.states[3]
            s = np.array([self.game.outputAgentImage(i) for i in range(self.agent_num)]).astype(np.float32)
            s = torch.from_numpy(s)
            self.states[3] = s

            action = model(self.states[1:].unsqueeze(0)).max(2)[1].view(self.agent_num)
            print(action)
            action = [intoCommand(i, action[i]) for i in range(self.agent_num)]
            self.game.runOneRound(action)
            print(self.game.outputScore())
            sleep(0.1)

    def initialGame(self):
        #initial the game envirnment for th replay buffer
        t = (self.height + self.width) / 2

        self.game = Game(self.height, self.width, self.game_round)
        self.game.setRandomMap(self.agent_num, int(t * 0.3) ** 2, int(t * 0.1) ** 2)
        self.game.setScore(self.acquisition_sum, self.explored_target_sum, self.explored_sum, self.time_decrease, self.crash_time_penalty, self.crash_sum, self.reg_val)

        self.game.runOneRound([Command(i, 0, 0) for i in range(self.agent_num)])
        self.score = np.array([self.game.outputScore() for i in range(self.agent_num)])
 
        self.state = np.array([self.game.outputAgentImage(i) for i in range(self.agent_num)]).astype(np.float32)
        self.state = torch.from_numpy(self.state)

        print('New Game!')

    def loadWeight(self):
        if os.path.exists(self.modelpath):
            if self.cuda:
                self.model.load_state_dict(torch.load(self.modelpath))
            else:
                self.model.load_state_dict(torch.load(self.modelpath, map_location='cpu'))
        else:
            print('Model weight [{0}] not found'.format(self.modelpath))
        return