Python Dynamics 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: DiscreteHFO.EnvironmentDynamics

클래스/타입: Dynamics

hotexamples.com에서의 예제들: 2

Python Dynamics - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 DiscreteHFO.EnvironmentDynamics.Dynamics에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Dynamics(1)

sampleDynamics(1)

자주 사용되는 메소드들

Dynamics (1)

sampleDynamics (1)

예제 #1

파일 보기

파일: HFOAttackingPlayer.py 프로젝트: cdroutsas/Reinforcement_Learning_HFO

	def __init__(self, config_dir = '../../../bin/teams/base/config/formations-dt', agentId=0,
		port = 2207, server_addr = 'localhost', team_name = 'base_left', play_goalie = False,
		initDiscCoordX = 0, initDiscCoordY = 0, numOpponents = 0, numTeammates = 0,
		collisionPenalty = 0.4, dribbleAccuracy = 0.2, kickAccuracy = [[0.8] * 5] * 5,
		actionDurations = 40, initFileLoc = "initCoordinates.txt"):

		self.hfo = HFOEnvironment()
		self.config_dir = config_dir
		self.port = port
		self.server_addr = server_addr
		self.team_name = team_name
		self.play_goalie = play_goalie
		self.initDiscCoordY = initDiscCoordY
		self.initDiscCoordX = initDiscCoordX
		self.numTeammates = numTeammates
		self.numOpponents = numOpponents
		self.collisionPenalty = collisionPenalty
		self.curState = None
		self.dribbleAccuracy = dribbleAccuracy
		self.kickAccuracy = kickAccuracy
		self.possibleActions = ['DRIBBLE_UP', 'DRIBBLE_DOWN', 'DRIBBLE_LEFT', 'DRIBBLE_RIGHT', 'KICK']
		self.actionDurations =  actionDurations
		self.initPositions = []
		self.oppoPositions = []
		self.agentId = agentId
		self.episode = 0
		self.initFileLoc = initFileLoc
		self.dynamics = Dynamics()
		self.readInitLocFinal()

예제 #2

파일 보기

파일: HFOAttackingPlayer.py 프로젝트: nanton96/Reinforcement-Learning-Algorithms

class HFOAttackingPlayer(object):
    def __init__(self,
                 config_dir='../../../bin/teams/base/config/formations-dt',
                 agentId=0,
                 port=6000,
                 server_addr='localhost',
                 team_name='base_left',
                 play_goalie=False,
                 initDiscCoordX=0,
                 initDiscCoordY=0,
                 numOpponents=0,
                 numTeammates=0,
                 collisionPenalty=0.4,
                 dribbleAccuracy=0.2,
                 kickAccuracy=[[0.8] * 5] * 5,
                 actionDurations=40,
                 initFileLoc="initCoordinates.txt"):

        self.hfo = HFOEnvironment()
        self.config_dir = config_dir
        self.port = port
        self.server_addr = server_addr
        self.team_name = team_name
        self.play_goalie = play_goalie
        self.initDiscCoordY = initDiscCoordY
        self.initDiscCoordX = initDiscCoordX
        self.numTeammates = numTeammates
        self.numOpponents = numOpponents
        self.collisionPenalty = collisionPenalty
        self.curState = None
        self.dribbleAccuracy = dribbleAccuracy
        self.kickAccuracy = kickAccuracy
        self.possibleActions = [
            'DRIBBLE_UP', 'DRIBBLE_DOWN', 'DRIBBLE_LEFT', 'DRIBBLE_RIGHT',
            'KICK'
        ]
        self.actionDurations = actionDurations
        self.initPositions = []
        self.oppoPositions = []
        self.agentId = agentId
        self.episode = 0
        self.initFileLoc = initFileLoc
        self.dynamics = Dynamics()
        self.readInitLocFinal()

    # Restarts episode by resetting the current state of the environment to the initial state.
    def reset(self):
        self.curState = [(self.initPositions[self.episode][0],
                          self.initPositions[self.episode][1])]
        for oppoIndex in range(len(self.oppoPositions[self.episode])):
            self.curState.append(
                (self.oppoPositions[self.episode][oppoIndex][0],
                 self.oppoPositions[self.episode][oppoIndex][1]))

        self.initDiscCoordX = self.initPositions[self.episode][0]
        self.initDiscCoordY = self.initPositions[self.episode][1]
        self.initGame()
        self.episode += 1

        return self.curState

    # Establish connection with HFO server
    def connectToServer(self):
        self.hfo.connectToServer(HIGH_LEVEL_FEATURE_SET, self.config_dir,
                                 self.port, self.server_addr, self.team_name,
                                 self.play_goalie)

    # From a location feature given by HFO, output the discrete representation of that location
    def getDiscretizedLocation(self, coordX, coordY):
        discCoordX = int(math.floor((coordX + (1.0 / 11.0)) / 0.34))
        discCoordY = int(math.floor((coordY) / 0.275))

        return discCoordX, discCoordY

    # Based on gridworld coordinate, get the coordinates of the centroid of that
    # grid in the real HFO state representation.

    def getCentroidCoord(self, discCoordX, discCoordY):
        centroidX = (-1.0 / 1.1) + discCoordX * 0.34 + 0.17
        centroidY = -0.825 + discCoordY * 0.275 + 0.1375

        return centroidX, centroidY

    # Method to move agent to it's initial position

    def moveToInitLocs(self):
        destinationX, destinationY = self.getCentroidCoord(
            self.initDiscCoordX, self.initDiscCoordY)
        self.hfo.act(DRIBBLE_TO, destinationX, destinationY)
        self.hfo.step()
        #completeState = self.hfo.getState()
        #self.curState = self.process_state(completeState)

    # Method updates the discrete state representation of the environment after
    # the agent does an action. Stochaticity of the environment is implemented here.

    def act(self, actionString):

        resultingStatus = 0
        counter = 0
        agentCurrentState = self.curState[0]
        actionString = self.dynamics.sampleDynamics(actionString,
                                                    agentCurrentState)
        if actionString == 'DRIBBLE_UP':
            nextDiscX = self.curState[0][0]
            nextDiscY = max(self.curState[0][1] - 1, 0)

        elif actionString == 'DRIBBLE_DOWN':
            nextDiscX = self.curState[0][0]
            nextDiscY = min(self.curState[0][1] + 1, 5)

        elif actionString == 'DRIBBLE_LEFT':
            nextDiscX = max(0, self.curState[0][0] - 1)
            nextDiscY = self.curState[0][1]

        elif actionString == 'DRIBBLE_RIGHT':
            nextDiscX = min(4, self.curState[0][0] + 1)
            nextDiscY = self.curState[0][1]

        if actionString != 'KICK' and actionString != 'KICK_WAYWARD':
            destinationX, destinationY = self.getCentroidCoord(
                nextDiscX, nextDiscY)

            for index in range(1, len(self.curState)):
                if (nextDiscX, nextDiscY) == self.curState[index]:
                    destinationX -= 0.05
                    destinationY -= 0.05
                    break
            resultingStatus = self.visualizeDribbles(destinationX,
                                                     destinationY)
            self.curState[0] = (nextDiscX, nextDiscY)

        else:
            kickSuccessFlag = False
            if actionString == 'KICK':
                kickSuccessFlag = True
                self.curState = "GOAL"
                resultingStatus = GOAL
            else:
                self.curState = "OUT_OF_BOUNDS"
                resultingStatus = OUT_OF_BOUNDS

            self.visualizeKicks(kickSuccessFlag)

        return resultingStatus, self.curState

    # Visualizes the DRIBBLE_* actions taken by agent. Action is
    # completed only if the environment decides to stop the game
    # or if the action duration surpasses self.actionDurations iterations.
    # Returns the status after the dribble action is completed

    def visualizeDribbles(self, destinationX, destinationY):
        resultingStatus = 0
        counter = 0

        # Action will run as long as the number of iterations where it's done is less than
        # self.actionDurations and the ball doesn't get out of the game board.

        while counter < self.actionDurations and resultingStatus == 0:
            currentState = self.hfo.getState()
            # if agent does not have the ball agent must get closer to the ball
            # else, dribble to destinatination

            if currentState[5] != 1:
                self.hfo.act(GO_TO_BALL)
            else:
                self.hfo.act(DRIBBLE_TO, destinationX, destinationY)
            resultingStatus = self.hfo.step()
            counter += 1
        return resultingStatus

    # Visualizes the KICK action taken by agent. Action is
    # completed only if the ball gets out of the play, the episode finishes,
    # or a goal happened.

    def visualizeKicks(self, kickSuccessFlag):
        resultingStatus = 0
        # If kick is successfully directed to goal
        if kickSuccessFlag:
            currentState = self.hfo.getState()
            status = 0
            while status == 0:

                # Shoot the ball to the goal
                self.hfo.act(SHOOT)
                status = self.hfo.step()
                currentState = self.hfo.getState()

                # In case that the kick is too weak, agent must chase the ball
                while currentState[5] != 1 and status == 0:
                    self.hfo.act(GO_TO_BALL)
                    status = self.hfo.step()
                    currentState = self.hfo.getState()

        # If kick is wayward
        else:
            currentState = self.hfo.getState()
            status = 0
            while status == 0:

                # Shoot the ball to a point close to the agent which isn't
                # the goal
                curPosX, curPosY = currentState[0], currentState[1]
                if curPosX > 0 and curPosY > 0:
                    self.hfo.act(KICK_TO, 0.5, 1.0, 3)
                elif curPosX <= 0 and curPosY > 0:
                    self.hfo.act(KICK_TO, -0.5, 1.0, 3)
                elif curPosX > 0 and curPosY <= 0:
                    self.hfo.act(KICK_TO, 1.0, -0.5, 3)
                else:
                    self.hfo.act(KICK_TO, -1.0, -0.5, 3)

                # In case that the kick is too weak, agent must chase the ball
                status = self.hfo.step()
                currentState = self.hfo.getState()
                while currentState[5] != 1 and status == 0:
                    self.hfo.act(GO_TO_BALL)
                    status = self.hfo.step()
                    currentState = self.hfo.getState()

        return resultingStatus

    # Defined reward for this environment
    # Add a -0.4 penalty if attacking agent occupies the same grid
    # as an attacking opponent. Also, give +1 for goal.
    def get_reward(self, status, nextState):
        totalReward = 0
        if status == GOAL:
            totalReward += 1

        if nextState != "GOAL" and nextState != "OUT_OF_BOUNDS":
            for index in range(1, len(nextState)):
                if nextState[0] == nextState[index]:
                    totalReward -= self.collisionPenalty
                    break

        return totalReward

    # Discretize the state representation given by the HFO environment.
    # Discretization is done to the locations of the ball and agents
    def process_state(self, state):
        discretizedState = self.getDiscretizedLocation(state[0], state[1])
        offset = 10 + 6 * self.numTeammates

        infoList = [discretizedState]
        for i in range(self.numOpponents):
            oppoLocX = offset + 3 * i
            oppoLocY = offset + 3 * i + 1
            infoList.append(
                self.getDiscretizedLocation(state[oppoLocX], state[oppoLocY]))
        return infoList

    # Method that serves as an interface between a script controlling the agent
    # and the environment. Method returns the nextState, reward, flag indicating
    # end of episode, and current status of the episode

    def step(self, action_params):
        status, nextState = self.act(action_params)
        done = (status != IN_GAME)
        reward = self.get_reward(status, nextState)
        return self.curState, reward, done, status

    def waste_one_episode(self):
        status = IN_GAME
        while status == IN_GAME:
            self.hfo.act(DASH, 0, 0)
            status = self.hfo.step()

    def waste_one_step(self):
        self.hfo.act(DASH, 0, 0)
        self.hfo.step()

    def quitGame(self):
        self.hfo.act(QUIT)

    # For the first 150 iterations, reposition agents
    # to initial position
    def initGame(self):
        frameCounters = 0
        while frameCounters < 150:
            self.moveToInitLocs()
            frameCounters += 1

    def readInitLocFinal(self):
        filename = self.initFileLoc
        file = open(filename, "r")

        self.initPositions = []

        for line in file:
            episodeOpponents = []
            listPos = ast.literal_eval(line)
            for index in range(self.numOpponents + 1):
                if index == self.agentId:
                    self.initPositions.append(listPos[index])
                else:
                    episodeOpponents.append(listPos[index])
            self.oppoPositions.append(episodeOpponents)