def runAgent():
    exp = AGENT(envSize,
                nRobot,
                nHidden,
                lR,
                maxIter=maxIter,
                rewardType=rewType)
    exp.reinforce(nEpisode, gamma, returnDF=False)
    return exp
Exemple #2
0
    def __init__(self, args, Ite):

        self.args = args
        self.Ite = Ite

        #Dim of state and action
        self.num_states = 3
        self.num_actions = 1

        #Initialize Agent
        self.agent = AGENT(self.num_states, self.num_actions, self.args)
    def __init__(self, args, Ite):

        self.args = args
        self.Ite = Ite

        #Dim of state and action
        self.num_states = 3  #x=[sin\theta cos\theta \omega]
        self.num_actions = 1  #a=[a]

        #Initialize Agent
        self.agent = AGENT(self.num_states, self.num_actions, self.args)
def login(accountListFile, transactionSummary):
    try:
        choice = input("Welcome to the front end: \n")
        if choice == 'login':                                                               # use correctly input login
            print("Successfully login.")     
            status = False                                                                  # check the validation of input
            while status == False:                                                          # loop if user input invalid command 
                mode = input("Select mode to enter: \n")                                      # select mode
                if mode == "atm":                                                           # user correctly input atm
                    print("Successfully entered ATM mode.")
                    newAtm = ATM(accountListFile, transactionSummary)                                           # create new atm object
                    status = True                                              
                elif mode == "agent":                                                       # user correctly input agent
                    print("Successfully entered agent mode.")
                    newAgent = AGENT(accountListFile, transactionSummary)                                       # create new agent object
                    status = True
                elif mode == "logout":
                    status = True
                    f = open(transactionSummary, "w")
                    f.writelines("EOS")
                    f.close()
                else:                                                                       # error for anything else
                    print("Error:Invalid mode choice, please input a valid mode choice!")
            print("Successfully logout.")
            login(accountListFile, transactionSummary)
        else:                                                                               # invalid input
            print("Error:Please login first!")
            login(accountListFile, transactionSummary)
    except:
        quit                                                                            # exit program
Exemple #5
0
class ENVIRONMENT:
    def __init__(self, args, Ite):

        self.args = args
        self.Ite = Ite

        #Dim of state and action
        self.num_states = 3
        self.num_actions = 1

        #Initialize Agent
        self.agent = AGENT(self.num_states, self.num_actions, self.args)

    def run(self):

        xlist = np.zeros((201, 201))
        ylist = np.zeros((201, 201))
        ulist = np.zeros((201, 201))

        for episode in range(1):

            for xx in range(201):
                for yy in range(201):
                    reset_x_1 = -np.pi + 0.0314 * xx
                    reset_x_2 = -4.0 + 0.04 * yy

                    plot_x = np.array([[reset_x_1,
                                        reset_x_2]])  # np.array(1,2)

                    xlist[xx, yy] = plot_x[0, 0]
                    ylist[xx, yy] = plot_x[0, 1]

                    input_state = torch.zeros(1, 3)
                    input_state[0, 0] = np.sin(plot_x[0, 0])
                    input_state[0, 1] = np.cos(plot_x[0, 0])
                    input_state[0, 2] = plot_x[0, 1]

                    u = self.agent.get_action(input_state, None)

                    u = u.cpu()  # Torch -> numpy
                    u = u.detach().numpy()
                    u = u.reshape(1, 1)  # numpy.array(1,1)

                    ulist[xx, yy] = u

            plt.rcParams['font.family'] = 'Times New Roman'
            plt.rcParams["mathtext.fontset"] = 'cm'
            plt.rcParams['mathtext.default'] = 'it'

            fig, ax = plt.subplots()
            cs = ax.pcolormesh(xlist,
                               ylist,
                               ulist,
                               shading='auto',
                               cmap='seismic',
                               vmin=-1.0,
                               vmax=1.0)  # seismic,hot
            fig.colorbar(cs)

            # Plot cross mark
            ax.set_xlabel('$x_1$', fontsize=18)
            ax.set_ylabel('$x_2$', fontsize=18)
            point = {'fixedpoint': [0.0, 0.0]}
            ax.plot(*point['fixedpoint'], 'x', color="gray", markersize=12)

            fig.savefig('mu_1.eps', pad_inches=0.05)
            fig.savefig('mu_1.png', pad_inches=0.05)
Exemple #6
0
class ENVIRONMENT:
    def __init__(self, args, Ite):

        self.args = args
        self.Ite = Ite

        #Dim of state and action
        self.num_states = 3
        self.num_actions = 1

        #Initialize Agent
        self.agent = AGENT(self.num_states, self.num_actions, self.args)

    def run(self):

        xlist = np.array(
            [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0])
        ylist = np.array([5.0, 6.0, 7.0, 8.0, 9.0,\
                         10.0, 11.0, 12.0, 13.0, 14.0,\
                         15.0, 16.0, 17.0, 18.0, 19.0, \
                         20.0, 21.0, 22.0, 23.0, 24.0, \
                         25.0, 26.0, 27.0, 28.0, 29.0, \
                         30.0, 31.0, 32.0, 33.0, 34.0, \
                         35.0, 36.0, 37.0, 38.0, 39.0, \
                         40.0, 41.0, 42.0, 43.0, 44.0, \
                         45.0, 46.0, 47.0, 48.0, 49.0, 50.0])
        vallist = np.zeros((45, 10))

        for a_iteration in range(10):
            a_param = 0.05 + a_iteration * 0.1
            for b_iteration in range(45):
                b_param = 5.5 + b_iteration * 1.0

                max_reward = -10000.0

                # (a,b)'s score
                rewards = 0

                theta = np.pi
                omega = 0.0
                state = np.array([[theta, omega]])

                for test_step in range(1000):

                    current_obs = torch.Tensor([[
                        np.sin(state[0, 0]),
                        np.cos(state[0, 0]), state[0, 1]
                    ]])  #state: numpy(1,2) -> torch.Tensor(1,3)
                    action = self.agent.get_action(current_obs,
                                                   None)  #Not input ounoise
                    action = action.detach().numpy()[
                        0]  #action: torch.Tensor(1,1) -> numpy(scaler)

                    next_state, reward, done = dynamics.Dynamics(
                        state, action, a_param,
                        b_param)  #next_state: numpy(1,2)

                    rewards += reward

                    state = next_state

                print("(a,b)=(" + str(a_param) + "," + str(b_param) +
                      ") : reward " + str(rewards))
                vallist[b_iteration, a_iteration] = rewards

        plt.rcParams['font.family'] = 'Times New Roman'
        plt.rcParams["mathtext.fontset"] = 'cm'
        plt.rcParams['mathtext.default'] = 'it'

        fig, ax = plt.subplots()
        cs = ax.pcolormesh(xlist,
                           ylist,
                           vallist,
                           cmap="jet",
                           vmin=-4000.0,
                           vmax=-50.0)  #seismic,hot
        fig.colorbar(cs)
        ax.set_xlim(0.0, 1.0)
        ax.set_ylim(5.0, 50.0)
        ax.set_xticks([0.0, 0.2, 0.4, 0.6, 0.8, 1.0])
        ax.set_yticks(
            [5.0, 10.0, 15.0, 20.0, 25.0, 30.0, 35.0, 40.0, 45.0, 50.0])
        ax.set_xlabel(r'$\xi_{1}$', fontsize=18)
        ax.set_ylabel(r'$\xi_{2}$', fontsize=18)

        fig.savefig('Score_of_mu_1.eps', pad_inches=0.05)
        fig.savefig('Score_of_mu_1.png', pad_inches=0.05)
class ENVIRONMENT:
    def __init__(self, args, Ite):

        self.args = args
        self.Ite = Ite

        #Dim of state and action
        self.num_states = 3  #x=[sin\theta cos\theta \omega]
        self.num_actions = 1  #a=[a]

        #Initialize Agent
        self.agent = AGENT(self.num_states, self.num_actions, self.args)

    def run(self):
        #Initialize Replay Memory (class)
        memory = MEMORY(self.args.replay_buffer_size)

        plt.rcParams['font.family'] = 'Times New Roman'
        plt.rcParams["mathtext.fontset"] = 'cm'
        plt.rcParams['mathtext.default'] = 'it'
        params = {'legend.fontsize': 12, 'legend.handlelength': 3}
        plt.rcParams.update(params)

        fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(9, 6))
        plt.subplots_adjust(hspace=0.5)

        sum_of_rewards = 0

        a_param = self.args.a_param
        b_param = self.args.b_param

        #Learning Phase
        state = dynamics.Initialize()  # Get the initial state s_0 (numpy(1,2))
        print("Initial State is " + str(state))

        print('This episode mass:' + str(a_param))
        print('This episode length:' + str(b_param))

        MAX_STEP = 1000

        time_list = []
        a_list = []
        x_1_list = []
        x_2_list = []

        for learning_step in range(MAX_STEP):
            #gradually change
            if learning_step < 200:
                b_param += 45.0 / 200
            x_1_list.append(state[0, 0])
            x_2_list.append(state[0, 1])
            time_list.append(learning_step)

            current_obs = torch.Tensor(
                [[np.sin(state[0, 0]),
                  np.cos(state[0, 0]),
                  state[0, 1]]])  #state: numpy(1,2) -> torch.Tensor(1,3)
            action = self.agent.get_action(
                current_obs,
                None)  #exploration action by agent (torch.Tensor(1,1))
            action = action.detach().numpy()[
                0]  #action: torch.Tensor(1,1) -> numpy(1,)
            #exploration noise###############################################
            if np.sqrt(state[0, 0]**(2) + state[0, 1]**(2)) >= 0.05:
                noise = 0.1 * np.random.normal()
            action = action + noise
            a_list.append(action)

            next_state, reward, done = dynamics.Dynamics(
                state, action, a_param, b_param)  #next_state: numpy(1,2)
            sum_of_rewards += reward

            #Make Exploration
            action = torch.Tensor([action
                                   ])  #action: numpy(1,) -> torch.Tensor(1,1)
            mask = torch.Tensor(
                [not done])  #mask: bool(False) -> torch.Tensor(1)(True)
            next_obs = torch.Tensor([[
                np.sin(next_state[0, 0]),
                np.cos(next_state[0, 0]), next_state[0, 1]
            ]])  #next_state: numpy(1,2) -> torch.Tensor(1,3)
            reward = torch.Tensor(
                [reward])  #reward: numpy(scalar) -> torch.Tensor(1)

            if abs(
                    action[0]
            ) <= 1.0:  #If we do not want to store the experience that has the big scale action.
                memory.push(current_obs, action, mask, next_obs,
                            reward)  # all torch.Tensor

            state = next_state

            #Update main DNN and target DNN
            if len(memory) > self.args.batch_size:
                transitions = memory.sample(
                    self.args.batch_size)  #Make exploration_batch
                batch = Transition(*zip(*transitions))

                self.agent.update_DNNs(batch)  #Update DNN
                self.agent.update_target_DNNs()  #Update Target DNN

            #if done:
            #break

        print("Sum of rewards is " + str(sum_of_rewards))

        axes[0].plot([0, MAX_STEP], [0, 0], "red", linestyle='dashed')
        axes[0].plot(time_list, a_list, linewidth=2)
        axes[0].set_xlim(0.0, MAX_STEP)
        axes[0].set_ylim(-1, 1)
        axes[0].set_xlabel('$k$', fontsize=16)
        axes[0].set_ylabel('$a[k]$', fontsize=16)
        axes[0].grid(True)

        axes[1].plot([0, MAX_STEP], [0, 0], "red", linestyle='dashed')
        axes[1].plot(time_list, x_1_list, linewidth=2)
        axes[1].set_xlim(0.0, MAX_STEP)
        axes[1].set_ylim(-np.pi, np.pi)
        axes[1].set_xlabel('$k$', fontsize=16)
        axes[1].set_ylabel('$x_1[k]$', fontsize=16)
        axes[1].grid(True)

        axes[2].plot([0, MAX_STEP], [0, 0], "red", linestyle='dashed')
        axes[2].plot(time_list, x_2_list, linewidth=2)
        axes[2].set_xlim(0.0, MAX_STEP)
        axes[2].set_ylim(-7, 7)
        axes[2].set_xlabel('$k$', fontsize=16)
        axes[2].set_ylabel('$x_2[k]$', fontsize=16)
        axes[2].grid(True)

        fig.savefig('standard_from5_to_50.eps',
                    bbox_inches="tight",
                    pad_inches=0.05)
        fig.savefig('standard_from5_to_50.png',
                    bbox_inches="tight",
                    pad_inches=0.05)
class ENVIRONMENT:
    def __init__(self, args, Ite):

        self.args = args
        self.Ite = Ite

        #Dim of state and action
        self.num_states = 3
        self.num_actions = 1

        #Initialize Agent
        self.agent = AGENT(self.num_states, self.num_actions, self.args)

    def run(self):
        #episode_final = False
        plt.rcParams['font.family'] = 'Times New Roman'
        plt.rcParams["mathtext.fontset"] = 'cm'
        plt.rcParams['mathtext.default'] = 'it'
        params = {'legend.fontsize': 12, 'legend.handlelength': 3}
        plt.rcParams.update(params)
        fig, axes = plt.subplots(nrows=5, ncols=1, figsize=(9, 10))
        plt.subplots_adjust(hspace=0.8)
        #reward list
        sum_reward_list = []

        #======================Hyper Parameter=====================
        weight = np.array([[1 / 4, 1 / 4, 1 / 4, 1 / 4]])
        learn_alpha = 5e-5
        gamma = 0.99
        MAX_STEP = 1000
        #==========================================================

        max_reward = -10000.0

        a_param = self.args.a_param
        b_param = self.args.b_param

        weight_1_list = []
        weight_2_list = []
        weight_3_list = []
        weight_4_list = []

        time_list = []
        a_list = []
        x_1_list = []
        x_2_list = []

        td_error_list = []

        Discrete_time = 0

        state = np.array([[np.pi, 0.0]])

        for test_step in range(MAX_STEP):

            weight_1_list.append(weight[0, 0])  #store the initial parameter
            weight_2_list.append(weight[0, 1])
            weight_3_list.append(weight[0, 2])
            weight_4_list.append(weight[0, 3])

            time_list.append(test_step)
            x_1_list.append(state[0, 0])
            x_2_list.append(state[0, 1])

            current_obs = torch.Tensor([[state[0, 0], state[0, 1]]])
            action = self.agent.get_action(current_obs,
                                           weight)  #Not input ounoise
            action = action.detach().numpy()[
                0]  #action: torch.Tensor(1,1) -> numpy(scaler)

            #exploration noise###############################################
            noise = max(
                (400 - Discrete_time), 0.0) / 400 * 0.1 * np.random.normal()

            action = action + noise
            a_list.append(action)

            action = torch.Tensor([action])
            Q_vec = self.agent.get_Q_value(
                current_obs,
                action)  # Q(x[k],a[k]) as characteristic functions
            action = action.detach().numpy()[
                0]  #action: torch.Tensor(1,1) -> numpy(1,)

            next_state, reward, done = dynamics.Dynamics(
                state, action, a_param, b_param)  #next_state: numpy(1,2)
            next_obs = torch.Tensor([[next_state[0, 0], next_state[0, 1]]])

            #update of the parameters
            max_Q_next_vec = self.agent.get_next_value(next_obs, weight)

            param = np.array(
                [[weight[0, 0], weight[0, 1], weight[0, 2],
                  weight[0, 3]]])  #w=[w_{1},...,w_{N}]
            td_error = param @ Q_vec.T - (reward + gamma *
                                          (param @ max_Q_next_vec.T))
            td_error_list.append(abs(td_error[0, 0]))

            chara_vec = np.array(
                [[Q_vec[0, 0], Q_vec[0, 1], Q_vec[0, 2], Q_vec[0, 3]]])

            update_vec = td_error * chara_vec

            #Barrier
            eta = 1e-7
            epsilon_w = 1e-9
            barrier_vec = eta*np.array([[-1/(weight[0,0]+epsilon_w),\
                                        -1/(weight[0,1]+epsilon_w),\
                                        -1/(weight[0,2]+epsilon_w),\
                                        -1/(weight[0,3]+epsilon_w)]])

            update_vec = update_vec + barrier_vec

            pre_weight = weight  #memorize pre_weight
            weight = weight - learn_alpha * (update_vec
                                             )  #weight is next weight

            if (weight[0, 0] <
                    0.0) or (weight[0, 1] < 0.0) or (weight[0, 2] < 0.0) or (
                        weight[0, 3] < 0.0):  #If some weights are negative
                update_error_count = 1
                while (True):
                    weight = pre_weight
                    weight = weight - (2**(
                        -update_error_count)) * learn_alpha * (update_vec)
                    update_error_count += 1
                    if (weight[0, 0] >= 0.0) and (weight[0, 1] >= 0.0) and (
                            weight[0, 2] >= 0.0) and (weight[0, 3] >= 0.0):
                        break
            weight_sum = weight[0, 0] + weight[0, 1] + weight[0, 2] + weight[0,
                                                                             3]
            weight = weight / weight_sum

            state = next_state
            Discrete_time += 1

        axes[0].plot([0, MAX_STEP], [0, 0], "red", linestyle='dashed')
        axes[0].plot(time_list, a_list, linewidth=2)
        axes[0].set_xlim(0.0, MAX_STEP)
        axes[0].set_ylim(-1, 1)
        axes[0].set_xlabel('$k$', fontsize=16)
        axes[0].set_ylabel('$a[k]$', fontsize=16)
        axes[0].grid(True)

        axes[1].plot([0, MAX_STEP], [0, 0], "red", linestyle='dashed')
        axes[1].plot(time_list, x_1_list, linewidth=2)
        axes[1].set_xlim(0.0, MAX_STEP)
        axes[1].set_ylim(-np.pi, np.pi)
        axes[1].set_xlabel('$k$', fontsize=16)
        axes[1].set_ylabel('$x_1[k]$', fontsize=16)
        axes[1].grid(True)

        axes[2].plot([0, MAX_STEP], [0, 0], "red", linestyle='dashed')
        axes[2].plot(time_list, x_2_list, linewidth=2)
        axes[2].set_xlim(0.0, MAX_STEP)
        axes[2].set_ylim(-7, 7)
        axes[2].set_xlabel('$k$', fontsize=16)
        axes[2].set_ylabel('$x_2[k]$', fontsize=16)
        axes[2].grid(True)

        axes[3].plot(time_list, weight_1_list, linewidth=2, label="$w_1$")
        axes[3].plot(time_list, weight_2_list, linewidth=2, label="$w_2$")
        axes[3].plot(time_list, weight_3_list, linewidth=2, label="$w_3$")
        axes[3].plot(time_list, weight_4_list, linewidth=2, label="$w_4$")
        axes[3].set_xlim(0.0, MAX_STEP)
        axes[3].set_ylim(0, 1)
        axes[3].set_xlabel('$k$', fontsize=16)
        axes[3].set_ylabel('$w[k]$', fontsize=16)
        axes[3].grid(True)
        axes[3].legend(loc='upper left', ncol=4)

        axes[4].plot(time_list, td_error_list, linewidth=2)
        axes[4].set_xlim(0.0, MAX_STEP)
        # axes[4].set_ylim(0,0.5)
        axes[4].set_xlabel('$k$', fontsize=16)
        axes[4].set_ylabel(r'$|\delta[k]|$', fontsize=16)
        axes[4].grid(True)

        fig.savefig('TR_N4_case1.eps', bbox_inches="tight", pad_inches=0.05)
        fig.savefig('TR_N4_case1.png', bbox_inches="tight", pad_inches=0.05)
Exemple #9
0
def runGame():
    # setup variables for the start of the game
    board = getBlankBoard()
    lastMoveDownTime = time.time()
    lastMoveSidewaysTime = time.time()
    lastFallTime = time.time()
    movingDown = False  # note: there is no movingUp variable
    movingLeft = False
    movingRight = False
    score = 0
    reward = 0
    level, fallFreq = calculateLevelAndFallFreq(score)

    # make agent for RL
    agent = AGENT()

    fallingPiece = getNewPiece()
    nextPiece = getNewPiece()

    while True:  # game loop
        if fallingPiece == None:
            # No falling piece in play, so start a new piece at the top
            fallingPiece = nextPiece
            nextPiece = getNewPiece()
            lastFallTime = time.time()  # reset lastFallTime

            if not isValidPosition(board, fallingPiece):
                return  # can't fit a new piece on the board, so game over

        checkForQuit()

        # copy the board for agent
        board_copy = deepcopy(board)
        board_copy = addToBoard(board_copy, fallingPiece)
        action = agent.getAction(board_copy)

        # event handling loop
        if action == "KEYUP":
            if (action == K_p):
                # Pausing the game
                DISPLAYSURF.fill(BGCOLOR)
                pygame.mixer.music.stop()
                showTextScreen('Paused')  # pause until a key press
                pygame.mixer.music.play(-1, 0.0)
                lastFallTime = time.time()
                lastMoveDownTime = time.time()
                lastMoveSidewaysTime = time.time()
            elif (action == "K_LEFT"):
                movingLeft = False
            elif (action == "K_RIGHT"):
                movingRight = False
            elif (action == "K_DOWN"):
                movingDown = False

        else:
            # moving the piece sideways
            if (action == "K_LEFT") and isValidPosition(
                    board, fallingPiece, adjX=-1):
                fallingPiece['x'] -= 1
                movingLeft = True
                movingRight = False
                lastMoveSidewaysTime = time.time()

            elif (action == "K_RIGHT") and isValidPosition(
                    board, fallingPiece, adjX=1):
                fallingPiece['x'] += 1
                movingRight = True
                movingLeft = False
                lastMoveSidewaysTime = time.time()

                # rotating the piece (if there is room to rotate)
            elif (action == "K_UP"):
                fallingPiece['rotation'] = (fallingPiece['rotation'] +
                                            1) % len(
                                                PIECES[fallingPiece['shape']])
                if not isValidPosition(board, fallingPiece):
                    fallingPiece['rotation'] = (
                        fallingPiece['rotation'] - 1) % len(
                            PIECES[fallingPiece['shape']])
            elif (action == K_q):  # rotate the other direction
                fallingPiece['rotation'] = (fallingPiece['rotation'] -
                                            1) % len(
                                                PIECES[fallingPiece['shape']])
                if not isValidPosition(board, fallingPiece):
                    fallingPiece['rotation'] = (
                        fallingPiece['rotation'] + 1) % len(
                            PIECES[fallingPiece['shape']])

                # making the piece fall faster with the down key
            elif (action == "K_DOWN"):
                movingDown = True
                if isValidPosition(board, fallingPiece, adjY=1):
                    fallingPiece['y'] += 1
                lastMoveDownTime = time.time()

                # move the current piece all the way down
            elif action == "K_SPACE":
                movingDown = False
                movingLeft = False
                movingRight = False
                for i in range(1, BOARDHEIGHT):
                    if not isValidPosition(board, fallingPiece, adjY=i):
                        break
                fallingPiece['y'] += i - 1

        # handle moving the piece because of user input
        if (movingLeft or movingRight
            ) and time.time() - lastMoveSidewaysTime > MOVESIDEWAYSFREQ:
            if movingLeft and isValidPosition(board, fallingPiece, adjX=-1):
                fallingPiece['x'] -= 1
            elif movingRight and isValidPosition(board, fallingPiece, adjX=1):
                fallingPiece['x'] += 1
            lastMoveSidewaysTime = time.time()

        if movingDown and time.time(
        ) - lastMoveDownTime > MOVEDOWNFREQ and isValidPosition(
                board, fallingPiece, adjY=1):
            fallingPiece['y'] += 1
            lastMoveDownTime = time.time()

        # let the piece fall if it is time to fall
        if time.time() - lastFallTime > fallFreq:
            # see if the piece has landed
            if not isValidPosition(board, fallingPiece, adjY=1):
                # falling piece has landed, set it on the board
                addToBoard(board, fallingPiece)
                reward = removeCompleteLines(board)
                score += reward
                level, fallFreq = calculateLevelAndFallFreq(score)
                fallingPiece = None
            else:
                # piece did not land, just move the piece down
                fallingPiece['y'] += 1
                lastFallTime = time.time()

        board_copy = deepcopy(board)
        if fallingPiece != None:
            board_copy = addToBoard(board_copy, fallingPiece)

        agent.giveData(board_copy, reward)

        # drawing everything on the screen
        DISPLAYSURF.fill(BGCOLOR)
        drawBoard(board)
        drawStatus(score, level)
        drawNextPiece(nextPiece)
        if fallingPiece != None:
            drawPiece(fallingPiece)

        pygame.display.update()
        FPSCLOCK.tick(FPS)
Exemple #10
0
class ENVIRONMENT:
    
    def __init__(self, args, Ite):
        
        self.args = args
        self.Ite = Ite
        
        #Dim of state and action
        self.num_states = 3
        self.num_actions = 1
        
        #Initialize Agent
        self.agent = AGENT(self.num_states, self.num_actions, self.args)
        
    def run(self):
        xlist = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0])
        ylist = np.array([5.0, 6.0, 7.0, 8.0, 9.0,\
                         10.0, 11.0, 12.0, 13.0, 14.0,\
                         15.0, 16.0, 17.0, 18.0, 19.0, \
                         20.0, 21.0, 22.0, 23.0, 24.0, \
                         25.0, 26.0, 27.0, 28.0, 29.0, \
                         30.0, 31.0, 32.0, 33.0, 34.0, \
                         35.0, 36.0, 37.0, 38.0, 39.0, \
                         40.0, 41.0, 42.0, 43.0, 44.0, \
                         45.0, 46.0, 47.0, 48.0, 49.0, 50.0])
        vallist = np.zeros((45,10))

        for a_iteration in range(10):
            a_param = 0.05 + a_iteration*0.1
            for b_iteration in range(45):
                b_param = 5.5 + b_iteration*1.0

                #======================Hyper Parameter=====================
                weight = np.array([[1/8,1/8,1/8,1/8,1/8,1/8,1/8,1/8]])
                learn_alpha = 5e-5
                gamma = 0.99
                MAX_STEP = 1000
                #==========================================================

                state = np.array([[np.pi, 0.0]])            
                rewards = 0 #sum of rewards for each system (a,b)

                for test_step in range(MAX_STEP):
                    current_obs = torch.Tensor([[state[0,0],state[0,1]]])
                    action = self.agent.get_action(current_obs, weight) 
                    action = action.detach().numpy()[0] #action: torch.Tensor(1,1) -> numpy(1,)

                    #exploration noise=========================
                    noise = max((400-test_step),0.0)/400*0.1*np.random.normal()
                    action = action + noise #numpy(1,)
                    #==========================================

                    action = torch.Tensor([action])
                    Q_vec = self.agent.get_Q_value(current_obs, action) # Q(x[k],a[k]) as characteristic functions
                    action = action.detach().numpy()[0] #action: torch.Tensor(1,1) -> numpy(1,)

                    next_state, reward, done = dynamics.Dynamics(state, action, a_param, b_param)  #next_state: numpy(1,2)
                    next_obs = torch.Tensor([[next_state[0,0], next_state[0,1]]])

                    #update of the parameters 
                    max_Q_next_vec = self.agent.get_next_value(next_obs, weight)

                    param = np.array([[weight[0,0], weight[0,1], weight[0,2], weight[0,3], weight[0,4], weight[0,5], weight[0,6], weight[0,7]]]) #w=[w_{1},...,w_{N}]
                    td_error = param @ Q_vec.T - (reward + gamma * (param @ max_Q_next_vec.T))

                    chara_vec = np.array([[Q_vec[0,0], Q_vec[0,1], Q_vec[0,2], Q_vec[0,3], Q_vec[0,4], Q_vec[0,5], Q_vec[0,6], Q_vec[0,7]]])

                    update_vec = td_error * chara_vec

                    #Barrier
                    eta = 1e-7
                    epsilon_w = 1e-9
                    barrier_vec = eta*np.array([[-1/(weight[0,0]+epsilon_w),\
                                                -1/(weight[0,1]+epsilon_w),\
                                                -1/(weight[0,2]+epsilon_w),\
                                                -1/(weight[0,3]+epsilon_w),\
                                                -1/(weight[0,4]+epsilon_w),\
                                                -1/(weight[0,5]+epsilon_w),\
                                                -1/(weight[0,6]+epsilon_w),\
                                                -1/(weight[0,7]+epsilon_w)]])

                    update_vec = update_vec + barrier_vec
                        
                    pre_weight = weight #memorize pre_weight
                    weight = weight - learn_alpha * (update_vec)
                        

                    if (weight[0,0]<0.0)or(weight[0,1]<0.0)or(weight[0,2]<0.0)or(weight[0,3]<0.0)or(weight[0,4]<0.0)or(weight[0,5]<0.0)or(weight[0,6]<0.0)or(weight[0,7]<0.0): #If some weights are negative 
                        update_error_count = 1
                        while(True):
                            weight = pre_weight 
                            weight = weight - (2**(-update_error_count))*learn_alpha * (update_vec)
                            update_error_count += 1
                            if (weight[0,0]>=0.0)and(weight[0,1]>=0.0)and(weight[0,2]>=0.0)and(weight[0,3]>=0.0)and(weight[0,4]>=0.0)and(weight[0,5]>=0.0)and(weight[0,6]>=0.0)and(weight[0,7]>=0.0):
                                break

                    #Normalize weight
                    weight_sum = weight[0,0] + weight[0,1] + weight[0,2] + weight[0,3] + weight[0,4] + weight[0,5] + weight[0,6] + weight[0,7]
                    weight = weight/weight_sum   
                    
                    rewards += reward
                    state = next_state

                  
                print("##########################################################################################")
                print("(a,b)=("+str(a_param)+","+str(b_param)+") : reward "+str(rewards)+" Weight is "+str(weight))
                print("Last State is "+str(state))
                print("##########################################################################################")
                vallist[b_iteration,a_iteration] = rewards

        plt.rcParams['font.family'] = 'Times New Roman'
        plt.rcParams["mathtext.fontset"] = 'cm'
        plt.rcParams['mathtext.default'] = 'it'

        fig, ax = plt.subplots()
        cs = ax.pcolormesh(xlist, ylist, vallist,  cmap="jet", vmin=-4000.0, vmax=-50.0)#seismic,hot
        fig.colorbar(cs)
        ax.set_xlim(0.0, 1.0)
        ax.set_ylim(5.0, 50.0)
        ax.set_xticks([0.0, 0.2, 0.4, 0.6, 0.8, 1.0])
        ax.set_yticks([5.0, 10.0, 15.0, 20.0, 25.0, 30.0, 35.0, 40.0, 45.0, 50.0])
        ax.set_xlabel(r'$\xi_{1}$',fontsize=16)
        ax.set_ylabel(r'$\xi_{2}$',fontsize=16)
  
        fig.savefig('N8.eps', pad_inches=0.05) 
        fig.savefig('N8.png', pad_inches=0.05)
                
            
                
            
            
from visualize_test import GraphicDisplay
from environment import grid_world
from agent import AGENT


WORLD_HEIGHT = 5
WORLD_WIDTH = 10

env = grid_world(WORLD_HEIGHT,WORLD_WIDTH,
                 GOAL = [[WORLD_HEIGHT-1, WORLD_WIDTH-1]],
                 OBSTACLES=[[0,2], [1,2], [2,2], [2,4], [3,4], [2, 6],[3, 6],[4, 6]])

agent = AGENT(env,is_upload=True)
grid_world_vis = GraphicDisplay(env, agent)
grid_world_vis.print_value_table()
grid_world_vis.mainloop()


from environment import grid_world
from agent import AGENT

WORLD_HEIGHT = 5
WORLD_WIDTH = 10

env = grid_world(WORLD_HEIGHT,
                 WORLD_WIDTH,
                 GOAL=[[WORLD_HEIGHT - 1, WORLD_WIDTH - 1]],
                 OBSTACLES=[[0, 2], [1, 2], [2, 2], [0, 4], [2, 4], [4, 4],
                            [2, 6], [3, 6], [4, 6], [2, 7], [2, 8]])

agent = AGENT(env, is_upload=False)
agent.Q_learning(epsilon=0.4, decay_period=10000, decay_rate=0.8)
from agent import AGENT
from environment import grid_world

WORLD_HEIGHT = 5
WORLD_WIDTH = 10

env = grid_world(WORLD_HEIGHT, WORLD_WIDTH,
                 GOAL=[[WORLD_HEIGHT - 1, WORLD_WIDTH - 1]],
                 OBSTACLES=[[0, 2], [1, 2], [2, 2], [2, 4], [3, 4], [2, 6], [3, 6], [4, 6]])
agent = AGENT(env, is_upload=False)
agent.TD_Control(epsilon=0.4, decay_period=20000, decay_rate=0.9)
agent.Monte_Carlo_Control(epsilon=0.4, decay_period=20000, decay_rate=0.9)