Beispiel #1
0
def get_action(State,Action,robot,agent):
	robot_copy = robot.copy()
		
	reward, nextState = robot_copy.take_action(Action)
	#print "Reward, State:",reward,nextState
	agent.learn(State,nextState,Action,reward)
	if nextState == None:
		pass
	else:
		robot = robot_copy.copy()
	return robot
def run_episode_with_Qlearning(env, agent, render = False):
    steps = 0
    total_reward = 0
    state = env.reset()
    while True:
        action = agent.sample(state)
        next_state, reward, done, _ = env.step(action) # 与环境进行一个交互
        # train Q-learning
        agent.learn(state, action, reward, next_state, done)
        state = next_state  # 存储上一个观察值
        total_reward += reward
        steps += 1 # 计算step数
        if render:
            env.render() #渲染新的一帧图形
        if done:
            break
    return total_reward, steps
    def run_episode(self, agent):
        self.reward = 0
        s = self.env.reset()
        done = False
        while not done:
            self.env.render()
            a = agent.act(s)
            s_, r, done, _ = self.env.step(a)
            agent.learn((s, a, s_, r, done))
            self.reward += r
            s = s_

        self.episode_count += 1
        self.reward_buffer.append(self.reward)
        average = sum(self.reward_buffer) / len(self.reward_buffer)

        print("Episode Nr. {} \nScore: {} \nAverage: {}".format(
            self.episode_count, self.reward, average))
def run_episode_with_sarsa(env, agent, render = False):
    steps = 0
    total_reward = 0
    state = env.reset()
    action = agent.sample(state)
    while True:
        next_state, reward, done, _ = env.step(action) # 与环境进行一个交互
        next_action = agent.sample(next_state) # 根据算法选择一个动作
        # train Sarsa
        agent.learn(state, action, reward, next_state, next_action, done)
        action = next_action
        state = next_state  # 存储上一个观察值
        total_reward += reward
        steps += 1 # 计算step数
        if render:
            env.render() #渲染新的一帧图形
        if done:
            break
    return total_reward, steps
Beispiel #5
0
def run_episodes(mode, N, robot, Q):
	old_output = robot.measure_output(mode)
	State = get_state(robot)
	
	plt.axes()
	rectangle = plt.Rectangle((-25, -25), 60, 60, fc='w')
	plt.gca().add_patch(rectangle)
	plt.axis('scaled')
	plt.ion()
	plt.show()
			
	for i in range(N):
		Action = get_policy(State,Q)

		if Action == None:
			Move = robot.get_random_action()
			Action = (Move[0],Move[1],Move[2][:2],Move[3][2])
		else:
			Move = robot.get_move(Action)
		
		robot_fell = not robot.take_action(Move)
		#robot.draw(plt)
		NewState = get_state(robot)

		new_output = robot.measure_output(mode)
		if robot_fell:
			reward = -10
		elif Move[1] == Move[2] == Move[3] == (0,0,0):
			reward = --0.5
			#print "Reward= -1"
		else:
			reward = get_reward(mode, old_output, new_output, Move)
		
		#print "Reward:", reward
		old_output = new_output

		learn(NewState, State, Action, reward, Q)
		State = NewState

		if robot_fell: break
Beispiel #6
0
def run_episode(env, agent, rpm):
    obs = env.reset()
    step = 0
    total_reward = 0
    while True:
        action = agent.predict(obs) # 采样动作
        action = np.clip(np.random.normal(action, opt["NOISE"]), -1.0, 1.0)
        next_obs, reward, done, info = env.step(action)
        rpm.append((obs, action, opt["REWARD_SCALE"] * reward, next_obs, done))

        if len(rpm) > opt["MEMORY_WARMUP_SIZE"] and (step % opt["LEARN_FREQ"]) == 0:
            (batch_obs, batch_action, batch_reward, batch_next_obs,
             batch_done) = rpm.sample(opt["BATCH_SIZE"])
            agent.learn(batch_obs, batch_action, batch_reward, batch_next_obs,
                        batch_done)

        obs = next_obs
        total_reward += reward
        step += 1
        if done or step >= 200:
            break
    return step, total_reward
Beispiel #7
0
    def updateInfo(self, environment, agent):
        # get old state
        old_state = self.environment.current_location

        old_action = self.action

        self.environment.current_location = self.getCurrentLocation()

        self.action = agent.choose_action(self.environment.actions)

        reward = self.environment.make_step(self.action)

        print(self.pushButton_3.isEnabled())
        if self.pushButton_3.isEnabled():
            self.humanRewardFeedback = 0.2

        self.activateButton()

        self.reward_record.append(reward)
        print(self.humanRewardFeedback)
        self.human_reward_record.append(self.humanRewardFeedback)
        agent.learn(old_state, reward, self.humanRewardFeedback,
                    self.environment.current_location, old_action)
Beispiel #8
0
    def run_episode(self, agent):
        self.reward = 0
        s = self.env.reset()
        done = False
        step=0
        r = 0
        actions = np.zeros(5)
        while not done:
            step+=1
            a = agent.act(s)
            if a == 0:
                actions[0] +=1
                r -= 1

            elif a == 1:
                actions[1] += 1
                r += 5

            elif a == 2:
                actions[2] += 1
                r += 5

            elif a == 3:
                actions[3] += 1
                r += 1

            elif a == 4:
                actions[4] += 1
                r += 1

            index, s_, price, gain, terminal, money = self.env.step(a)

            gain = gain if not terminal else 0

            if terminal:
                r -= 4000
                print("step: " + str(step) + " money: " +str(money), " rewards: " + str(r), " action", actions)
                self.steps.append(step)
                self.wallet.append(money)
                self.rewards.append(r)
                done = True

            elif step > 3300:
                if money > 3000:
                    r += 5000
                print("step: " + str(step) + " money: " + str(money)," rewards:"+ str(r), " action", actions)
                self.steps.append(step)
                self.wallet.append(money)
                self.rewards.append(r)
                done = True

            if gain > 0:
                r += 200
            if money>3000:
                r+=15
            r+=1
            agent.learn((s, a, s_, r, terminal))
            self.reward += r
            s = s_

        self.episode_count += 1
        self.reward_buffer.append(self.reward)
        average = sum(self.reward_buffer) / len(self.reward_buffer)

        print("Episode Nr. {} \nScore: {} \nAverage: {}".format(
            self.episode_count, self.reward, average))
Beispiel #9
0
#!/usr/bin/env python

'''
Skeleton from https://github.com/joacar/reinforcement-learning/blob/master/rl.py
'''

from __future__ import print_function
import numpy as np
import random
import environment
import agent

x_bound = 2
y_bound = 2
states = x_bound*y_bound

# learning_rate = 0.9
learning_rate = 0.1
learning_step = 50 * states * (3**states)
discount_rate = 0.9
curiosity = 0.4

#random.seed(13)

agent = agent.AgentQLearn((x_bound,y_bound),curiosity,discount_rate)

agent.learn(learning_rate, learning_step)
agent.print_policy()
Beispiel #10
0
def play_a_game(opponent, commentary = False):
    board = init_board() # initialize the board
    player = np.random.randint(2)*2-1 # which player begins?
    y_old = 0
    y_old_p2 = 0
    firstMove = True
    firstMove_p2 = True
    pickle_in = open("randommodel.pickle","rb")
    model = pickle.load(pickle_in)
    model = model.cuda()
    # play on
    while not game_over(board) and not check_for_error(board):
        if commentary: print("lets go player ",player)
        
        # roll dice
        dice = roll_dice()
        if commentary: print("rolled dices:", dice)
            
        # make a move (2 moves if the same number appears on the dice)
        for i in range(1+int(dice[0] == dice[1])):
            board_copy = np.copy(board) 

            # make the move (agent vs agent):
            if(opponent == "agent"):
                if player == 1:
                    move, y_old = agent.action(board_copy,dice,player,i, y_old, model, firstMove, True)
                    # update the board
                    if len(move) != 0:
                        for m in move:
                            board = update_board(board, m, player)
                    if(firstMove):
                        firstMove = False
                elif player == -1:
                    flipped_board = flipped_agent.flip_board(board_copy)
                    move, y_old_p2 = agent.action(flipped_board,dice,1,i, y_old_p2, model, firstMove_p2, True)
                    if len(move) != 0:
                        for m in move:
                            flipped_board = update_board(flipped_board, m, 1)
                    board = flipped_agent.flip_board(flipped_board)
            
                    if(firstMove_p2):
                        firstMove_p2 = False
            elif(opponent == "human"):
                pretty_print(board)
                if player == 1:
                    print("Computer's turn")
                    move, y_old = agent.action(board_copy,dice,player,i, y_old, model, firstMove, False)
                    print("Computer's move", move)
                elif player == -1:
                    print("Human's turn")
                    possible_moves, possible_boards = legal_moves(board_copy, dice, player)
                    print("dice:", dice)
                    printing.moves_to_string(possible_moves)
                    text = input("prompt")
                    move = possible_moves[int(text)]

                if len(move) != 0:
                    for m in move:
                        board = update_board(board, m, player)
            #if you're playing vs random agent:
            elif(opponent == "random"):
                if player == 1:
                    move, y_old = agent.action(board_copy,dice,player,i, y_old, model, firstMove, False)
                elif player == -1:
                    move = random_agent(board_copy,dice,player,i)
                if len(move) != 0:
                        for m in move:
                            board = update_board(board, m, player)
            # update the board

            
            # give status after every move:         
            if commentary: 
                print("move from player",player,":")
                pretty_print(board)
        

        # players take turns 
        player = -player


    # return the winner
    winner = -1*player
    if(opponent == "agent"):
        if(winner == 1):
            agent.learn(y_old, model, board_copy, "yes")
            agent.learn(y_old_p2, model, board_copy, "no")
        else:
            agent.learn(y_old, model, board_copy, "no")
            agent.learn(y_old_p2, model, board_copy, "yes")
        
    #print("Winner is player", winner)
    pickle_out = open("randommodel.pickle","wb")
    pickle.dump(model, pickle_out)
    pickle_out.close()
    return winner
Beispiel #11
0
board, rewardLocation, agentLocation, reward, done = env.newAction(None)

print("REMEMBER THE NONE ACTION")
prevBoard, prevRewardLocation, prevAgentLocation = board, rewardLocation, agentLocation

episode = 0
totalReward = 0
game = 0
gameLength = 0

while True:
    episode += 1
    print("EPISODE : ", episode)
    print("GAME : ", game)

    action = agent.chooseAction(board, rewardLocation)

    board, rewardLocation, agentLocation, reward, done = env.newAction(action)

    agent.remember(prevBoard, action, reward, rewardLocation, board, done)

    prevBoard, prevRewardLocation, prevAgentLocation = board, rewardLocation, agentLocation

    if done:
        game += 1
    gameLength += 1
    if done and game % 1 == 0:
        agent.learn(gameLength)
        gameLength = 0

    os.system('cls' if os.name == 'nt' else 'clear')