コード例 #1
0
ファイル: cliff_S.py プロジェクト: trigrass2/blog-2
 def __init__(self):
     self.ai = sarsa.Sarsa(actions=range(directions),
                           epsilon=0.1,
                           alpha=0.1,
                           gamma=0.9)
     self.lastAction = None
     self.score = 0
コード例 #2
0
 def __init__(self,
              randomReward=True,
              case1=0.25,
              case2=0.5,
              case3=0.5,
              case4=0.75):
     self.ai = sarsa.Sarsa(actions=["left", "right"],
                           epsilon=0.1,
                           alpha=0.1,
                           gamma=0.9)
     self.lastAction = None
     self.lastBoardState = None
     self.currBoardState = 0  # there are three possible boards (states)
     self.currAction = None
     self.currReward = 0
     #stuff to set up the random walk of reward
     self.SD = 0.025
     self.lowerBoundary = 0.25
     self.upperBoundary = 0.75
     if randomReward:
         self.case1RewardProb = self.initializeReward()
         self.case2RewardProb = self.initializeReward()
         self.case3RewardProb = self.initializeReward()
         self.case4RewardProb = self.initializeReward()
     else:
         self.case1RewardProb = case1
         self.case2RewardProb = case2
         self.case3RewardProb = case3
         self.case4RewardProb = case4
コード例 #3
0
ファイル: zhed_bot.py プロジェクト: Pastilhas/IART-FEUP-2
 def __init__(self):
     self.run = True
     self.win = False
     self.level = 6
     self.board = []
     self.last_state = None
     self.last_action = None
     self.curr_state = []
     self.generate_board()
     self.actions = self.get_actions()
     self.ai = sarsa.Sarsa(len(self.actions), 0.2, 0.9)
     self.file = open("result.txt", "w")
コード例 #4
0
ファイル: Robot.py プロジェクト: martin-abadi/Sarsa
 def __init__(self):
     self.ai = sarsa.Sarsa(actions=range(0, 6),
                           epsilon=0.1,
                           alpha=0.1,
                           gamma=0.9)
     self.lastAction = None
     self.lastPosition = 1
     self.pos = 1
     self.score = 0
     self.misses = 0
     self.normalReward = 0
     self.missReward = -10
     self.goalReward = 100
     self.hit = 0
コード例 #5
0
    last_time_steps = numpy.ndarray(0)

    # Loads parameters from the ROS param server
    # Parameters are stored in a yaml file inside the config directory
    # They are loaded at runtime by the launch file
    Alpha = rospy.get_param("/cartpole_v0/alpha")
    Epsilon = rospy.get_param("/cartpole_v0/epsilon")
    Gamma = rospy.get_param("/cartpole_v0/gamma")
    epsilon_discount = rospy.get_param("/cartpole_v0/epsilon_discount")
    nepisodes = rospy.get_param("/cartpole_v0/nepisodes")
    nsteps = rospy.get_param("/cartpole_v0/nsteps")
    running_step = rospy.get_param("/cartpole_v0/running_step")

    # Initialises the algorithm that we are going to use for learning
    sarsa = sarsa.Sarsa(actions=range(env.action_space.n),
                    alpha=Alpha, gamma=Gamma, epsilon=Epsilon)
    initial_epsilon = sarsa.epsilon

    start_time = time.time()
    highest_reward = 0

    # Starts the main training loop: the one about the episodes to do
    for x in range(nepisodes):
        rospy.logdebug("############### START EPISODE => " + str(x))

        cumulated_reward = 0
        done = False
        if sarsa.epsilon > 0.05:
            sarsa.epsilon *= epsilon_discount

        # Initialize the environment and get first state of the robot
コード例 #6
0
if __name__ == '__main__':

    env = gym.make('GazeboCircuit2TurtlebotLidar-v0')

    #outdir = '/home/venky/Construct/jupyter_notebook/gazebo_gym_experiments'
    outdir = '/home/user/catkin_ws/src/gym_construct/src/gazebo_gym_experiments'
    # env.monitor.start(outdir, force=True, seed=None)       # I had to comment this and
    env = wrappers.Monitor(env, outdir,
                           force=True)  # use this to avoid warnings
    # plotter = LivePlot(outdir)

    last_time_steps = numpy.ndarray(0)

    sarsa = sarsa.Sarsa(actions=range(env.action_space.n),
                        epsilon=0.9,
                        alpha=0.2,
                        gamma=0.9)

    initial_epsilon = sarsa.epsilon

    epsilon_discount = 0.9986

    start_time = time.time()
    total_episodes = 10
    highest_reward = 0

    for x in range(total_episodes):
        done = False

        cumulated_reward = 0  #Should going forward give more reward then L/R ?
コード例 #7
0
import matplotlib.pyplot as plt
import sarsa


episodes = 3000
steps = 200
evnironment = 'Taxi-v3' #'HotterColder-v0' #'FrozenLake-v0'

Sarsa = sarsa.Sarsa()

Gvector = Sarsa.run(episodes, 
                    steps, 
                    evnironment)

plt.plot(Gvector)
plt.show()



コード例 #8
0
ファイル: cartpole_sarsa.py プロジェクト: Sanyam07/opengym
    return total_reward


print("")
print("-------------------------------------")
print("")
print("Initialize SARSA agent.")

agent_settings = {
    'alpha': 0.2,  # default to a low(-ish) learning rate
    'gamma': 0.8,  # default of a high(-ish) dependance on future expectation
    'defaultReward': 0,
    'epsilon': 0.001,
    'policy': 'epsilonGreedy'
}
basic_agent = sarsa.Sarsa(config=agent_settings)

agents = [basic_agent]
agents.append(
    sarsa.TransformState(lambda state: state[:2],
                         sarsa.Sarsa(config=agent_settings)))
agents.append(
    sarsa.TransformState(lambda state: state[2:],
                         sarsa.Sarsa(config=agent_settings)))
agents.append(
    sarsa.TransformState(lambda state: [state[0], state[2]],
                         sarsa.Sarsa(config=agent_settings)))
agents.append(
    sarsa.TransformState(lambda state: [state[1], state[3]],
                         sarsa.Sarsa(config=agent_settings)))
for i in range(4):
コード例 #9
0
ファイル: driver.py プロジェクト: MarkerDaSharker/StockBot
def processInput(i):
    ending = []
    startingM = 10000
    for k in range(0, 10):
        lastState = None
        lastAction = None

        ai = sarsa.Sarsa(actions=range(5),
                         epsilon=epsilon0,
                         alpha=alpha0,
                         gamma=gamma0)

        count = 0
        count2 = 0
        stock = 0
        money = startingM
        value = startingM
        while count < len(apple):
            lastValue = value
            value = money + apple[count][5] * stock
            if count > 0:
                adjclosediff = apple[count][5] - apple[count - 1][5]
            else:
                adjclosediff = 0

            state = calcState(adjclosediff, count2)
            #reward = adjclosediff*stock

            reward = lastValue - value + adjclosediff * stock - np.sign(
                state) * money

            action = ai.chooseAction(state)

            if lastAction is not None:
                ai.learn(lastState, lastAction, reward, state, action)

            if action == 1:
                if money >= apple[count][5] * 50:
                    stock += 50
                    money += -apple[count][5] * 50

                else:
                    action = 0

            if action == 2:
                if money >= apple[count][5] * 100:
                    stock += 100
                    money += -apple[count][5] * 100

                else:
                    if money >= apple[count][5] * 50:
                        stock += 50
                        money += -apple[count][5] * 50
                        action = 1

                    else:
                        action = 0

            if action == 3:
                if stock > 50:
                    stock += -50
                    money += apple[count][5] * 50

                else:
                    action = 0

            if action == 4:
                if stock > 100:
                    stock += -100
                    money += apple[count][5] * 100

                else:
                    if stock > 50:
                        stock += -50
                        money += apple[count][5] * 50
                        action = 3
                    else:
                        action = 0

            lastState = state
            lastAction = action

            count += 1

            #print(money, stock)

        ending.append(money + apple[count - 1][5] * stock)
    return ending