def __init__(self, env): super(LearningAgent, self).__init__( env ) # sets self.env = env, state = None, next_waypoint = None, and a default color self.color = 'red' # override color self.planner = RoutePlanner( self.env, self) # simple route planner to get next_waypoint # TODO: Initialize any additional variables here self.successes = [] self.Q = {} self.numOfStates = 0 self.time = 0.0 self.alpha = 1.0 self.gamma = 0.9 self.eplison = 1.0 self.oFile = open('log.txt', 'w') self.iterations = 0 self.write = False
def __init__(self, env): super(LearningAgent, self).__init__( env ) # sets self.env = env, state = None, next_waypoint = None, and a default color self.color = 'red' # override color self.planner = RoutePlanner( self.env, self) # simple route planner to get next_waypoint self.total_reward = 0 self.trip_reward = 0 self.reset() self.state = OrderedDict() self.q_table = OrderedDict() self.learning_rate = 1.0 # begin as an "eager" learner, will decay overtime self.exploitation_factor = 0.0 #beginner has nothing to exploit
def __init__(self, env): super(LearningAgent, self).__init__(env) self.color = 'red' self.planner = RoutePlanner(self.env, self) self.q_states = {} self.n_states = {} self.alpha = 0.5 self.gamma = 0.5 self.epsilon = 0.5 self.trial_stats_columns = [ 'total_reward', 'negative_reward', 'trial_length', 'reached_destination' ] self.trial_stats = pd.DataFrame(columns=self.trial_stats_columns) self.actions = ['forward', 'right', 'left', None] self.possible_states = self.state_permutations() self.verbose_debugging = False
def __init__(self, env, learning=False, epsilon=1.0, alpha=0.5): super(LearningAgent, self).__init__(env) # Set the agent in the evironment self.planner = RoutePlanner(self.env, self) # Create a route planner self.valid_actions = self.env.valid_actions # The set of valid actions # Set parameters of the learning agent self.learning = learning # Whether the agent is expected to learn self.Q = dict( ) # Create a Q-table which will be a dictionary of tuples self.epsilon = epsilon # Random exploration factor self.alpha = alpha # Learning factor self.maxQ = 0.0 # Deprecated self.waypoint = None # Saves the waypoint self.statesList = list() # List that contains the tuples of all states self.t = 1 # t = number of trials self.optimized = True # Just to discard alpha and function changes of the optimized mode
def __init__(self, env): super(LearningAgent, self).__init__( env ) # sets self.env = env, state = None, next_waypoint = None, and a default color self.color = 'red' # override color self.planner = RoutePlanner( self.env, self) # simple route planner to get next_waypoint # TODO: Initialize any additional variables here self.q = {} self.learningRate = 1.0 self.epsilon = 1.0 self.discountFactor = 0.3 self.actions = [None, 'forward', 'left', 'right'] self.trip = 0 self.minusReward = 0
def __init__(self, env): super(LearningAgent, self).__init__( env ) # sets self.env = env, state = None, next_waypoint = None, and a default color self.color = 'red' # override color self.planner = RoutePlanner( self.env, self) # simple route planner to get next_waypoint # TODO: Initialize any additional variables here LearningAgent.qTable[(None, None)] self.counter = 0 self.endCounter = 0 self.badTripCounter = 0 self.badMoveCounter = 0 self.optimalMoveCounter = 0 self.Q = 0 self.memR = [0] self.memStaAct = [(None, None)]
def __init__(self, env, alpha, gamma, epsilon): super(LearningAgent, self).__init__( env ) # sets self.env = env, state = None, next_waypoint = None, and a default color self.color = 'red' # override color self.planner = RoutePlanner( self.env, self) # simple route planner to get next_waypoint # TODO: Initialize any additional variables here self.alpha = alpha self.one_minus_alpha = 1.0 - alpha self.gamma = gamma self.epsilon = epsilon self.q_values = dict() for s in self.STATE_SPACE: action_values = dict() for w in self.env.valid_actions: action_values[w] = float(0) self.q_values[s] = action_values
def __init__(self, env): super(LearningAgent, self).__init__( env ) # sets self.env = env, state = None, next_waypoint = None, and a default color self.color = 'red' # override color self.planner = RoutePlanner( self.env, self) # simple route planner to get next_waypoint self.alpha = 0.4 self.gamma = 0.7 self.Q_table = {} for waypoint in ['left', 'right', 'forward']: for light in ['red', 'green']: for oncoming in self.env.valid_actions: for left in self.env.valid_actions: for right in self.env.valid_actions: for action in self.env.valid_actions: self.Q_table[((waypoint, light, oncoming, left, right), action)] = 3
def __init__(self, env): super(LearningAgent, self).__init__( env ) # sets self.env = env, state = None, next_waypoint = None, and a default color self.color = 'red' # override color self.planner = RoutePlanner( self.env, self) # simple route planner to get next_waypoint # TODO: Initialize any additional variables here ## Actions self.A = ['forward', 'left', 'right', None] # all avaiable action self.trial = 0 # the number of trails # Inicialize Q table(light, oncoming, next_waypoint) self.Q = {} for i in ['green', 'red']: # possible lights for j in [None, 'forward', 'left', 'right']: # possible oncoming for k in ['forward', 'left', 'right']: ## possible next_waypoints self.Q[(i, j, k)] = [1] * len(self.A) ## linized Q talbe
def __init__(self, env, learning=False, epsilon=1.0, alpha=0.5): super(LearningAgent, self).__init__(env) # Set the agent in the evironment self.planner = RoutePlanner(self.env, self) # Create a route planner self.valid_actions = self.env.valid_actions # The set of valid actions # Set parameters of the learning agent self.learning = learning # Whether the agent is expected to learn self.Q = dict( ) # Create a Q-table which will be a dictionary of tuples self.epsilon = epsilon # Random exploration factor self.alpha = alpha # Learning factor ########### ## TO DO ## ########### # Set any additional class parameters as needed self.initialtrial = 0
def __init__(self, env): super(LearningAgent, self).__init__(env)# sets self.env = env, state = None, next_waypoint = None, and a default color self.color = 'black' # override color self.planner = RoutePlanner(self.env, self) # simple route planner to get next_waypoint # TODO: Initialize any additional variables here self.state = None self.action = None self.alpha = 0.50 self.gamma = 0.05 self.epsilon = .01 self.q_table = {} self.actions = [None, 'forward', 'left', 'right'] self.trips = 0 softmax_probabilities = {} self.previous_state = None self.last_action = None self.last_reward = None
def __init__(self, env): super(LearningAgent, self).__init__( env ) # sets self.env = env, state = None, next_waypoint = None, and a default color self.color = 'red' # override color self.planner = RoutePlanner( self.env, self) # simple route planner to get next_waypoint # TODO: Initialize any additional variables here self.qEstimate = {} self.learningRate = 1 self.discountRate = 0 self.randomExploration = 0 self.randomExplorationDecay = 0.00075 #Use the following to keep track of progress over the trials self.randomExplorationTrack = [0] self.cumScore = [0] self.completionTime = [0] self.wrongMoves = [0]
def __init__(self, env): super(LearningAgent, self).__init__( env ) # sets self.env = env, state = None, next_waypoint = None, and a default color self.color = 'red' # override color self.planner = RoutePlanner( self.env, self) # simple route planner to get next_waypoint # TODO: Initialize any additional variables here self.prevReward = 0 self.prevAction = None self.Q = {} self.alpha = 0.6 self.epsilon = 0.0 self.gamma = 0.35 #discount value self.totalReward = 0.0 self.totalActions = 0.0 self.success = 0.0 self.negativeRewards = 0.0
def __init__(self, env): # sets self.env = env, state = None, next_waypoint = None, # and a default color super(LearningAgent, self).__init__(env) self.color = 'red' # override color # simple route planner to get next_waypoint self.planner = RoutePlanner(self.env, self) # TODO: Initialize any additional variables here # Define the q matrix self.Q = pd.DataFrame(columns=['None', 'forward', 'left', 'right']) # Define Gamma of the Q learning algorithm self.gamma = 0.2 # Define alpha of the Q learning algorith self.alpha = 0.8 # Define the initialisation of Q self.q_init = 4.0 # Define the Epsilon for Q self.epsilon = 0.01
def __init__(self, env): super(LearningAgent, self).__init__( env ) # sets self.env = env, state = None, next_waypoint = None, and a default color self.color = 'red' # override color self.planner = RoutePlanner( self.env, self) # simple route planner to get next_waypoint # TODO: Initialize any additional variables here # self.q_table = [(self.state, random.choice([None,'forward','left','right']))] # self.q_table = [(self.state, (random.choice([None,'forward','left','right'])))] # self.q_value = {} # def initialize_Q_values(self, val=4.0): self.next_waypoint = None self.reward = 0 self.q_table = self.initialize_q_table() self.penalty = 0 self.successes = 0
def __init__(self, env): super(LearningAgent, self).__init__(env) # sets self.env = env, state = None, next_waypoint = None, and a default color self.color = 'red' # override color self.planner = RoutePlanner(self.env, self) # simple route planner to get next_waypoint # TODO: Initialize any additional variables here self.actions = [None, 'forward', 'left', 'right'] self.q = {} self.alpha = -1 self.gamma = -1 self.epsilon = 0.01 self.lastState = None self.lastAction = None self.run_mode = None self.enforce_deadline = False self.trial_num = 0 self.report_stats = [] # collect data for visualization and reporting
def __init__(self, env, learning=False, epsilon=1.0, alpha=0.8, decay_rate=0.9975): super(LearningAgent, self).__init__(env) # Set the agent in the evironment self.planner = RoutePlanner(self.env, self) # Create a route planner self.valid_actions = self.env.valid_actions # The set of valid actions # Set parameters of the learning agent self.learning = learning # Whether the agent is expected to learn self.Q = dict( ) # Create a Q-table which will be a dictionary of tuples self.epsilon = epsilon # Random exploration factor self.alpha = alpha # Learning factor self.decay_rate = decay_rate #decay rate
def __init__(self, env): super(LearningAgent, self).__init__( env) # sets self.env = env, state = None, next_waypoint = None self.color = 'red' # override color self.planner = RoutePlanner( self.env, self) # simple route planner to get next_waypoint # TODO: Initialize any additional variables here self.availableAction = [None, 'forward', 'left', 'right'] self.next_waypoint = None self.goal = 0 self.steps = 0 self.features = [] self.Qtable = {} self.epsilon = 0.95 self.epsilon_end = 0.03125 self.gamma = 0.05 self.gamma_end = 0.125 self.total_reward = [0] self.alpha = 0.5
def __init__(self, env): super(LearningAgent, self).__init__( env ) # sets self.env = env, state = None, next_waypoint = None, and a default color self.color = 'red' # override color self.planner = RoutePlanner( self.env, self) # simple route planner to get next_waypoint # Initialize any additional variables here self.state = None self.deadline = self.env.get_deadline(self) self.trip_len = [] self.next_waypoint = random.choice(self.env.valid_actions[1:]) self.qtable = dict() self.alpha = 0.6 self.gamma = 0.4 self.e = 0.1
def __init__(self, env): super(QLearningAgent, self).__init__(env) # sets self.env = env, state = None, next_waypoint = None, and a default color self.color = 'red' # override color self.planner = RoutePlanner(self.env, self) # simple route planner to get next_waypoint # TODO: Initialize any additional variables here self.all_actions = ['left','right','forward', None] self.q_value_state_action_dict = dict() #self.alpha = 0.6 # learning rate between 0 and 1. Setting it to 0 means nothing is learned i.e Q- values are never updated. while setting it to 0.9 means that learning can occur quickly #self.discount = 0.5 # discount factor between 0 and 1. Lesser means that furture rewards are worth less than immediate reward. #self.epsilon = 0.2 # more epsilon value more random action choice, less epsilon value then more best action choosen but at the expense of more computation #self.exploration_of_unknown_state_actions = 5 # reward for exploring is more than the moving into the optimal solution i.le left ,right or forward in any state of light or incoming traffic self.reached_destination_count = 0 self.wait_seconds_prior_to_next_run = 0 self.run_iteration = 0 self.total_reward = 0 #self.urgency = 'low' self.initial_q_0_value = 0 self.agent_has_reached_destination_in_cur_iteration = 0
def __init__(self, env, alpha=0.9, epsilon=0.01, gamma=0.1, init_q=2): super(LearningAgent, self).__init__( env ) # sets self.env = env, state = None, next_waypoint = None, and a default color self.color = 'red' # override color self.planner = RoutePlanner( self.env, self) # simple route planner to get next_waypoint # Initiate q learning vales self.alpha = alpha # learning rate self.epsilon = epsilon # exploration bound - default is no exploration at eps = 0 self.gamma = gamma # decay rate self.init_q = init_q # inital q value for unexplored # Need variables to track total number of penalties and turns self.penalties = 0.0 self.updates = 0.0 # with open("logs.txt", 'wb') as logs: # logs.write = ('Alpha: {}'.format(self.alpha)) # logs.write('Gamma: {}'.format(self.gamma)) # logs.write('Epsilon: {}'.format(self.epsilon)) # logs.write('Q Initialization: {}'.format(self.init_q)) # TODO: Initialize any additional variables here self.reward = 0.0 # cumulative reward for a trial # Initiate a Q table to hold values only when creating the agent # sample space is the state space of 384 variables # the order of the tuple is light, oncoming, left, right, next_waypoint ss = tuple(itertools.product(['red', 'green'], self.env.valid_actions, \ self.env.valid_actions, ['forward', 'left', 'right'])) #keys = tuple(itertools.product(ss, self.env.valid_actions)) # q values are the values of the q dict ### q is really q-hat until the learner converges q = {} for s in ss: q[s] = {} for act in self.env.valid_actions: q[s][act] = self.init_q self.q = q
def __init__(self, env): super(LearningAgent, self).__init__( env ) # sets self.env = env, state = None, next_waypoint = None, and a default color self.color = 'red' # override color self.planner = RoutePlanner( self.env, self) # simple route planner to get next_waypoint # TODO: Initialize any additional variables here self.Q = {} self.alpha = 0.3 self.gamma = 0.3 self.explore = 0.99 self.stateHist = {} self.run = 1 # Iteration counter for each alpha/gamma change self.trip = 0 # Trip counter in each iteration self.counter = 0 # Step counter in each run self.glNum = 0 # global counter self.rewards = 0 self.experiment = 'None'
def __init__(self, env): super(LearningAgent, self).__init__( env ) # sets self.env = env, state = None, next_waypoint = None, and a default color self.color = 'red' # override color self.planner = RoutePlanner( self.env, self) # simple route planner to get next_waypoint # TODO: Initialize any additional variables here #state variables self.next_waypoint = None self.state_0 = None self.reward_0 = None self.action_0 = None self.total_reward = 0. waypoints = ['left', 'right', 'forward'] lights = ['red', 'green'] #Qtable parameters self.alpha = 0.75 #Learning Rate self.gamma = 0.25 #Discounted Reward Factor self.epsilon = 0.95 #exploitation-exploration #Create the Q-table and initialize all to zero: self.Qdict = { } #dictionary of states and and possible action, value pairs. Keys are (states), action waypoints = ['left', 'right', 'forward'] lights = ['red', 'green'] for waypoint in waypoints: for light in lights: for left in Environment.valid_actions: #for right in Environment.valid_actions: for oncoming in Environment.valid_actions: for act in Environment.valid_actions: self.Qdict[(waypoint, light, left, oncoming), act] = 0. #Counters: self.ndeadline = [] self.tlist = [] self.trial_reward = 0 self.cum_rewards = []
def __init__(self, env): super(QLearningAgent, self).__init__( env ) # sets self.env = env, state = None, next_waypoint = None, and a default color self.color = 'red' # OverflowError(" error")ide color self.planner = RoutePlanner( self.env, self) # simple route planner to get next_waypoint ##initialize q table here self.qDict = dict() self.alpha = 0.9 self.epsilon = 0.0 ##initial probability of flipping the coin self.gamma = 0.35 self.discount = self.gamma self.previous_state = None self.state = None self.previous_action = None self.deadline = self.env.get_deadline(self) self.previous_reward = None self.cumulativeRewards = 0
def __init__(self, env): super(LearningAgent, self).__init__( env ) # sets self.env = env, state = None, next_waypoint = None, and a default color self.color = 'red' # override color self.planner = RoutePlanner( self.env, self) # simple route planner to get next_waypoint # TODO: Initialize any additional variables here self.qtable = {} self.actions = ["right", "forward", "left", None] self.previousstate = None self.previousreward = 0 self.previousaction = None self.alpha = 0.9 self.gamma = 0 self.epsilon = 0 self.success = 0 self.totalpenalties = 0 self.penalties = 0
def __init__(self, env): super(LearningAgent, self).__init__(env) # sets self.env = env, state = None, next_waypoint = None, and a default color self.color = 'red' # override color self.planner = RoutePlanner(self.env, self) # simple route planner to get next_waypoint # TODO: Initialize any additional variables here # Initialize q-table as a default dictionary self.q_table = defaultdict(float) # Initialize variables for state, previous state, previous action, previous reward self.state = None self.prev_state = ('red', 'safe', 'safe', 'forward') # chosen randomly self.prev_action = 'None' # chosen randomly self.prev_reward = 0 # Initialize Q-learning parameters # The probability of taking a random action (exploration) self.epsilon = 0.1 # The learning rate self.alpha = 0.9 # The discount self.gamma = 0.4
def __init__(self, env, learning=False, epsilon=1.0, alpha=0.5): super(LearningAgent, self).__init__(env) # Set the agent in the evironment self.planner = RoutePlanner(self.env, self) # Create a route planner self.valid_actions = self.env.valid_actions # The set of valid actions # Set parameters of the learning agent self.learning = learning # Whether the agent is expected to learn self.Q = dict( ) # Create a Q-table which will be a dictionary of tuples self.epsilon = epsilon # Random exploration factor self.alpha = alpha # Learning factor ### Setting additional parameters self.testing = False # flag used to check for testing trial self.initialQ = 0.0 # initialization value for new entries in the Q-table self.timer = 0 # timer counter increments by 1 at every time step self.a = 0.9 # used for epsilon decay factor : a^t
def __init__(self, env): super(LearningAgent, self).__init__(env) # sets self.env = env, state = None, next_waypoint = None, and a default color self.color = 'yellow' # override color self.planner = RoutePlanner(self.env, self) # simple route planner to get next_waypoint # TODO: Initialize any additional variables here self.reward = 0 self.next_waypoint = None # Decalring the variable to calculate the success percentage rate self.S = 0 #Declaring the trial counter variable self.var_trial = 0 #Declaring the variable to track sucess rate self.var_trial_S = {}
def __init__(self, env, policy, alpha, gamma, no_plot): super(LearningAgent, self).__init__( env ) # sets self.env = env, state = None, next_waypoint = None, and a default color self.color = 'red' # override color self.planner = RoutePlanner( self.env, self) # simple route planner to get next_waypoint self.policy = policy # State descriptors self.actions = self.env.valid_actions self.lights = ['green', 'red'] # For a global tally of events over the n trials self.total_time = 0 self.trial = -1 self.no_plot = no_plot # activate plots or not self.bad_actions = [0 for trial in range(number_trials) ] # bad actions performed in a given trial self.out_of_times = [0 for trial in range(number_trials) ] # trials that the agent ran out of time # For Q learning implementation self.gamma = gamma self.alpha = alpha self.Q = { (action, 'green', oncoming, waypoint) : 1 \ for action in self.actions \ for oncoming in self.actions \ for waypoint in self.actions[1:] ## waypoint is only None when target is reached } red_Q = { (action, 'red', left, waypoint) : 1 \ for action in self.actions \ for left in self.actions \ for waypoint in self.actions[1:] } self.Q.update( red_Q ) ## combining the two main learning scenarios into dictionary Q
def __init__(self, env): super(LearningAgent, self).__init__(env) # sets self.env = env, state = None, next_waypoint = None, and a default color self.color = 'red' # override color self.planner = RoutePlanner(self.env, self) # simple route planner to get next_waypoint # TODO: Initialize any additional variables here self.Qtable = {} self.Q_0 = 15 self.Q_init = {None : self.Q_0, 'forward' : self.Q_0, 'left' : self.Q_0, 'right' : self.Q_0} self.gamma = 0.2 self.alpha = 0.9 self.state = None self.prev_state = None self.prev_action = None self.prev_reward = None if 1: self.load() print(len(self.Qtable)) pp.pprint(self.Qtable)