def __init__(self): self.ai = sarsa.Sarsa(actions=range(directions), epsilon=0.1, alpha=0.1, gamma=0.9) self.lastAction = None self.score = 0
def __init__(self, randomReward=True, case1=0.25, case2=0.5, case3=0.5, case4=0.75): self.ai = sarsa.Sarsa(actions=["left", "right"], epsilon=0.1, alpha=0.1, gamma=0.9) self.lastAction = None self.lastBoardState = None self.currBoardState = 0 # there are three possible boards (states) self.currAction = None self.currReward = 0 #stuff to set up the random walk of reward self.SD = 0.025 self.lowerBoundary = 0.25 self.upperBoundary = 0.75 if randomReward: self.case1RewardProb = self.initializeReward() self.case2RewardProb = self.initializeReward() self.case3RewardProb = self.initializeReward() self.case4RewardProb = self.initializeReward() else: self.case1RewardProb = case1 self.case2RewardProb = case2 self.case3RewardProb = case3 self.case4RewardProb = case4
def __init__(self): self.run = True self.win = False self.level = 6 self.board = [] self.last_state = None self.last_action = None self.curr_state = [] self.generate_board() self.actions = self.get_actions() self.ai = sarsa.Sarsa(len(self.actions), 0.2, 0.9) self.file = open("result.txt", "w")
def __init__(self): self.ai = sarsa.Sarsa(actions=range(0, 6), epsilon=0.1, alpha=0.1, gamma=0.9) self.lastAction = None self.lastPosition = 1 self.pos = 1 self.score = 0 self.misses = 0 self.normalReward = 0 self.missReward = -10 self.goalReward = 100 self.hit = 0
last_time_steps = numpy.ndarray(0) # Loads parameters from the ROS param server # Parameters are stored in a yaml file inside the config directory # They are loaded at runtime by the launch file Alpha = rospy.get_param("/cartpole_v0/alpha") Epsilon = rospy.get_param("/cartpole_v0/epsilon") Gamma = rospy.get_param("/cartpole_v0/gamma") epsilon_discount = rospy.get_param("/cartpole_v0/epsilon_discount") nepisodes = rospy.get_param("/cartpole_v0/nepisodes") nsteps = rospy.get_param("/cartpole_v0/nsteps") running_step = rospy.get_param("/cartpole_v0/running_step") # Initialises the algorithm that we are going to use for learning sarsa = sarsa.Sarsa(actions=range(env.action_space.n), alpha=Alpha, gamma=Gamma, epsilon=Epsilon) initial_epsilon = sarsa.epsilon start_time = time.time() highest_reward = 0 # Starts the main training loop: the one about the episodes to do for x in range(nepisodes): rospy.logdebug("############### START EPISODE => " + str(x)) cumulated_reward = 0 done = False if sarsa.epsilon > 0.05: sarsa.epsilon *= epsilon_discount # Initialize the environment and get first state of the robot
if __name__ == '__main__': env = gym.make('GazeboCircuit2TurtlebotLidar-v0') #outdir = '/home/venky/Construct/jupyter_notebook/gazebo_gym_experiments' outdir = '/home/user/catkin_ws/src/gym_construct/src/gazebo_gym_experiments' # env.monitor.start(outdir, force=True, seed=None) # I had to comment this and env = wrappers.Monitor(env, outdir, force=True) # use this to avoid warnings # plotter = LivePlot(outdir) last_time_steps = numpy.ndarray(0) sarsa = sarsa.Sarsa(actions=range(env.action_space.n), epsilon=0.9, alpha=0.2, gamma=0.9) initial_epsilon = sarsa.epsilon epsilon_discount = 0.9986 start_time = time.time() total_episodes = 10 highest_reward = 0 for x in range(total_episodes): done = False cumulated_reward = 0 #Should going forward give more reward then L/R ?
import matplotlib.pyplot as plt import sarsa episodes = 3000 steps = 200 evnironment = 'Taxi-v3' #'HotterColder-v0' #'FrozenLake-v0' Sarsa = sarsa.Sarsa() Gvector = Sarsa.run(episodes, steps, evnironment) plt.plot(Gvector) plt.show()
return total_reward print("") print("-------------------------------------") print("") print("Initialize SARSA agent.") agent_settings = { 'alpha': 0.2, # default to a low(-ish) learning rate 'gamma': 0.8, # default of a high(-ish) dependance on future expectation 'defaultReward': 0, 'epsilon': 0.001, 'policy': 'epsilonGreedy' } basic_agent = sarsa.Sarsa(config=agent_settings) agents = [basic_agent] agents.append( sarsa.TransformState(lambda state: state[:2], sarsa.Sarsa(config=agent_settings))) agents.append( sarsa.TransformState(lambda state: state[2:], sarsa.Sarsa(config=agent_settings))) agents.append( sarsa.TransformState(lambda state: [state[0], state[2]], sarsa.Sarsa(config=agent_settings))) agents.append( sarsa.TransformState(lambda state: [state[1], state[3]], sarsa.Sarsa(config=agent_settings))) for i in range(4):
def processInput(i): ending = [] startingM = 10000 for k in range(0, 10): lastState = None lastAction = None ai = sarsa.Sarsa(actions=range(5), epsilon=epsilon0, alpha=alpha0, gamma=gamma0) count = 0 count2 = 0 stock = 0 money = startingM value = startingM while count < len(apple): lastValue = value value = money + apple[count][5] * stock if count > 0: adjclosediff = apple[count][5] - apple[count - 1][5] else: adjclosediff = 0 state = calcState(adjclosediff, count2) #reward = adjclosediff*stock reward = lastValue - value + adjclosediff * stock - np.sign( state) * money action = ai.chooseAction(state) if lastAction is not None: ai.learn(lastState, lastAction, reward, state, action) if action == 1: if money >= apple[count][5] * 50: stock += 50 money += -apple[count][5] * 50 else: action = 0 if action == 2: if money >= apple[count][5] * 100: stock += 100 money += -apple[count][5] * 100 else: if money >= apple[count][5] * 50: stock += 50 money += -apple[count][5] * 50 action = 1 else: action = 0 if action == 3: if stock > 50: stock += -50 money += apple[count][5] * 50 else: action = 0 if action == 4: if stock > 100: stock += -100 money += apple[count][5] * 100 else: if stock > 50: stock += -50 money += apple[count][5] * 50 action = 3 else: action = 0 lastState = state lastAction = action count += 1 #print(money, stock) ending.append(money + apple[count - 1][5] * stock) return ending