Python indexToActionの例、newMDPvsMPC_GR_deterministic.indexToAction Pythonの例

コード例 #1

0

ファイルを表示

ファイル: initPong_temp.py プロジェクト: TD-97/Control-Group-Code

def getQPred(pStates, rl, next_move, action_not_same):
    #print("Q state:",pStates.qs, "\nLast Q:", pStates.last_qs)
    #print("\nNext move: ",next_move)
    if action_not_same:
        rl.update(pStates.last_qs, next_move, pStates.qs, 0)
    else:
        rl.update(pStates.last_qs, next_move, pStates.qs, 0.1)
    return indexToAction(rl.action(pStates.qs))

コード例 #2

0

ファイルを表示

ファイル: mpc.py プロジェクト: TD-97/Control-Group-Code

    def getMPCPred(self, pongStates):
        # if we the states are right, use nxtMPC
        if not self.checkState(pongStates):
            return 0, -1

        predY, criticalT = getTrajectory(pongStates.statek1,
                                         self.justLeftOfPaddle, self.maxIter,
                                         self.c)
        #print("\n\nPrediciton: ", predY)
        if (self.learnMode):
            addState(pongStates.statek, pongStates.statek1, self.c)
        if (predY != None):
            nextU = self.nxtMPC(predY, pongStates.agent, self.m, self.y,
                                self.u)
            #nextU = 0
            nextInd = int(round(nextU))
            #print("Action: ",nextInd)
            next_move = indexToAction(nextInd)
            return next_move, predY
        # otherwise, return zero
        else:
            return 0, -1

コード例 #3

0

ファイルを表示

ファイル: leastSquares2.py プロジェクト: TD-97/Control-Group-Code

paddle = np.zeros([1, len(controlSeq)])
paddle[0][0] = 25
paddle[0][1] = 25
len(paddle)

env = gym.make(
    "PongDeterministic-v4")  # skip 4 frames everytime and no random action
observation = env.reset()
gs_agent = 92
downsample_factor = np.array([2, 4])
zfactor = 4

steps = len(controlSeq) - 1
i = 1
while i < steps:
    next_move = indexToAction(controlSeq[i])

    observation, reward, done, info = env.step(next_move)
    i = i + 1
    # allow opencv to interperet the image
    observation_rgb = cv2.cvtColor(observation, cv2.COLOR_BGR2RGB)

    # remove color
    dsNoColour = copy.deepcopy(observation_rgb[:, :, grayscale])
    # downsample
    downsampled = copy.deepcopy(
        dsNoColour[::downsample_factor[1], ::downsample_factor[0]])

    paddle[0][i] = findAgent(downsampled, gs_agent, paddle_aoi_full)

    # turn grayscale back into rgb for data visualisation p

コード例 #4

0

ファイルを表示

def Main():
    m, y, u = initGekko(alpha, beta, 25, 0)
    env = gym.make(
        "PongDeterministic-v4")  # skip 4 frames everytime and no random action
    observation = env.reset()
    found = False
    sqlite3.register_adapter(np.int32, lambda val: int(val))
    init_PT_db(db_file_name)
    d = getcwd() + "\\Database\\" + db_file_name  # get path to db
    c = create_connection(d)

    observation, reward, done, info = env.step(random.choice(actions))
    # TODO remove this -> aoi_ds = np.array( [[8,12], [72,48]]) # this is in for (x,y) (x,y)

    # at 4 (in y direction) it never seems to not find it and it is always 1 pixel
    # at 3, it is sometimes 2 pixels in y direction
    # at 5 sometimes it disapears

    # Debug statements
    print("Area of Interest:\n", aoi_orig)
    print("Downsampled aoi:\n", aoi_ds)
    print("Paddle area:\n", paddle_aoi_full)

    # initialise some variables
    i = 0  # the time step of the episode we are currently on
    found3 = False  # boolean to see if we have found the last three position of the ball
    next_move = 0  # the next move our agent will take
    episode_rewards = []  # a list of rewards we have got in prev episodes
    reward_sum = 0  # the sum of rewards from the current episode
    episode_number = 1  # the episode number we are on
    last_action = deque(maxlen=3)
    last_action.append(0)
    last_action.append(0)
    last_action.append(0)
    if visualMode:
        grayscale = 0
    else:
        grayscale = 2
    paddle_aoi = copy.deepcopy(paddle_aoi_full)
    foundAgent = False
    foundBall = False

    # qlearning stuff
    index_q = 0
    controller = QLearning(ball_x=82,
                           ball_y=82,
                           ai_pos_y=84,
                           v_x=11,
                           v_y=11,
                           n_action=3)
    qstate = [0, 0, 0, 0, 0]
    game_actions = []
    game_states = []

    # wait until the ball and other player is in the game
    while episode_number <= numOfEps:
        #####################################
        # hopefully loop will look like this

        # dsImg = downsample(observation)
        # pongStates.updateStates(dsImg,i)
        # mpcAction = mpc.getMPCPred(pongStates)
        # rlAction = rl.getQPred(pongStates)
        # next_move = chooseAction(pongStates,rlAction,mpcAction)
        # observation, reward, done, info = env.step(next_move)
        # show (ifvisual mode)

        #####################################

        #next_move=random.choice(actions)
        next_move = 0
        if (visualMode):
            # allow opencv to interperet the image
            observation = cv2.cvtColor(observation, cv2.COLOR_BGR2RGB)

        # remove color
        dsNoColour = copy.deepcopy(observation[:, :, grayscale])
        # downsample
        downsampled = copy.deepcopy(
            dsNoColour[::downsample_factor[1], ::downsample_factor[0]])

        # turn grayscale back into rgb for data visualisation purposes
        ds_nc_u = cv2.cvtColor(downsampled, cv2.COLOR_GRAY2RGB)

        # if we found the ball in the last frame, we only check a small area around the ball, rather than the whole frame
        if foundBall:
            foundBall, pos = findBall(downsampled, gs_ball, aoi)
        # otherwise check the whole downsampled frame
        else:
            foundBall, pos = findBall(downsampled, gs_ball, aoi_ds)
        #print(downsampled[paddlePos[0,1]:paddlePos[1,1],paddlePos[0,0]:paddlePos[1,0]])
        if foundAgent:
            paddlePos = findAgent(downsampled, gs_agent, paddle_aoi)
        else:
            paddlePos = findAgent(downsampled, gs_agent, paddle_aoi_full)

        # if we have found the ball in this time step
        if foundBall:

            # append the ball position to queue
            last_pos.append([pos[0, 0], pos[0, 1], i])

            # check if we have found the last three positions consecutively
            if len(last_pos) > 2:
                if (last_pos[2][2] - last_pos[1][2]
                        == 1) and (last_pos[1][2] - last_pos[0][2] == 1):
                    found3 = True
                else:
                    found3 = False
            else:
                found3 = False
            #print("Position:\n",pos)

            aoi = findaoi(pos)
        else:
            #print("Could not find at timestep:", i)
            #aoi = np.empty([2,2])
            last_pos.append([-1, -1, -1])

        if paddlePos != -1:
            #print("found agent")
            foundAgent = True
            # the area to search for the ball in the next time step
            paddle_aoi[0, 1] = paddlePos - agentBuffer
            paddle_aoi[1, 1] = paddlePos + agentBuffer

            #print("aoi before rejig:\n",aoi)

            # if this is outside the bounds of the aoi of the whole downsampled version set it equal to it
            # since we do not want to search outside this area
            if (paddle_aoi[0, 1] < paddle_aoi_full[0, 1]):
                paddle_aoi[0, 1] = paddle_aoi_full[0, 1]
            if (paddle_aoi[1, 1] > paddle_aoi_full[1, 1]):
                paddle_aoi[1, 1] = paddle_aoi_full[1, 1]
            #print("Next paddle aoi: ",paddle_aoi," paddle position: ",paddlePos," paddle aoi full: ",paddle_aoi_full)

        else:
            foundAgent = False

        if (found3):
            #print("found last three. Last position: ",last_pos)

            # we want to predict the trajectory if 1. the ball is coming towards us. 2. it is past a certain point
            # as specified by startPredictPoint 3. We detected the ball position during this timestep
            if (((last_pos[1][0] - last_pos[0][0]) > 0)
                    and last_pos[1][0] > startPredictPoint
                    and i == last_pos[2][2]):
                #print("Ball is coming towards us, and in our half")

                # find the velocity at the last few timesteps
                velxk1 = last_pos[2][0] - last_pos[1][0]
                velyk1 = last_pos[2][1] - last_pos[1][1]
                velxk = last_pos[1][0] - last_pos[0][0]
                velyk = last_pos[1][1] - last_pos[0][1]

                # debug statement
                #if abs(velxk1)>6:
                #    print("last Position: ",last_pos)

                # the last two states according to our data
                statek = (last_pos[1][0].astype(int),
                          last_pos[1][1].astype(int), velxk.astype(float),
                          velyk.astype(float))
                statek1 = (last_pos[2][0].astype(int),
                           last_pos[2][1].astype(int), velxk1.astype(float),
                           velyk1.astype(float))

                #print("adding states\nstatek: ",statek,"\nstatek1: ",statek1)

                if learningMode:
                    # add this data to the database
                    addState(statek, statek1, c)

                # get the prediction of the ball trajectory
                ball_y, T = getTrajectory(statek1, justLeftOfPaddle, maxIter,
                                          c)

                # get all the points the ball will hit on its trajectory
                # for visualisation purposes only
                if (visualMode):
                    prediciton = getTrajectoryAll(statek1, justLeftOfPaddle,
                                                  maxIter, c)

                    # this is for visualisation, plot the trajectory the agent thinks the ball will take
                    for n in prediciton:
                        ds_nc_u[n[1], n[0]] = (40, 166, 255)
                    #print("Prediction: ",ball_y)

                # if no prediction, do nothing
                if (ball_y == None):
                    #print("No Prediction at time: ",i)
                    next_move = 0
                    last_action.append(next_move)

                # if the agent isn't found make random choice TODO justify this
                elif (paddlePos == -1):
                    next_move = random.choice(actions)
                    # debug stuff
                    #print("Can't see agent at time: ",i)
                    #cv2.imshow("Test",downsampled[paddlePos[0,1]:paddlePos[0,0],paddlePos[1,1]:paddlePos[1,0]])
                    #if cv2.waitKey() & 0xFF == ord('q'):
                    #    break

                # this is where I need to put MPC
                else:
                    nextU = nxtMPC(ball_y, paddlePos, m, y, u)
                    nextInd = int(round(nextU))
                    next_move = indexToAction(nextInd)
                    print(nextU)

                if visualMode:
                    if (ball_y != None):
                        # if we have a trajectory guess, paint the pixel red
                        # visualisation only
                        ds_nc_u[ball_y, justLeftOfPaddle] = (0, 0, 255)

        # get the observation (image), reward, done flag and info (unused)
        observation, reward, done, info = env.step(next_move)

        if (visualMode):
            bre = showImg(ds_nc_u)
            if bre:
                break

        i = i + 1
        reward_sum += reward

        # stop the game from getting stuck in infinite loop, or reset when done an episode
        if done or i > 10000:
            observation = env.reset()  # reset env
            episode_rewards.append(
                reward_sum)  # add episode reward to reward list
            print('episode:', episode_number,
                  ' reward total was %f' % (reward_sum)
                  )  #. running mean: %f' % (reward_sum, running_reward))
            reward_sum = 0
            i = 0

            episode_number += 1

        if not visualMode:
            if i % 100 == 0:
                print("Time: ", i, ". Reward sum: ", reward_sum)

    print("commiting and closing")
    c.commit()
    c.close()