예제 #1
0
 def __init__(self, env):
     super(LearningAgent, self).__init__(
         env
     )  # sets self.env = env, state = None, next_waypoint = None, and a default color
     self.color = 'red'  # override color
     self.planner = RoutePlanner(
         self.env, self)  # simple route planner to get next_waypoint
     # TODO: Initialize any additional variables here
     self.successes = []
     self.Q = {}
     self.numOfStates = 0
     self.time = 0.0
     self.alpha = 1.0
     self.gamma = 0.9
     self.eplison = 1.0
     self.oFile = open('log.txt', 'w')
     self.iterations = 0
     self.write = False
예제 #2
0
    def __init__(self, env):
        super(LearningAgent, self).__init__(
            env
        )  # sets self.env = env, state = None, next_waypoint = None, and a default color
        self.color = 'red'  # override color
        self.planner = RoutePlanner(
            self.env, self)  # simple route planner to get next_waypoint

        self.total_reward = 0

        self.trip_reward = 0

        self.reset()

        self.state = OrderedDict()
        self.q_table = OrderedDict()
        self.learning_rate = 1.0  # begin as an "eager" learner, will decay overtime
        self.exploitation_factor = 0.0  #beginner has nothing to exploit
    def __init__(self, env):
        super(LearningAgent, self).__init__(env)
        self.color = 'red'
        self.planner = RoutePlanner(self.env, self)

        self.q_states = {}
        self.n_states = {}
        self.alpha = 0.5
        self.gamma = 0.5
        self.epsilon = 0.5
        self.trial_stats_columns = [
            'total_reward', 'negative_reward', 'trial_length',
            'reached_destination'
        ]
        self.trial_stats = pd.DataFrame(columns=self.trial_stats_columns)
        self.actions = ['forward', 'right', 'left', None]
        self.possible_states = self.state_permutations()
        self.verbose_debugging = False
예제 #4
0
    def __init__(self, env, learning=False, epsilon=1.0, alpha=0.5):
        super(LearningAgent,
              self).__init__(env)  # Set the agent in the evironment
        self.planner = RoutePlanner(self.env, self)  # Create a route planner
        self.valid_actions = self.env.valid_actions  # The set of valid actions

        # Set parameters of the learning agent
        self.learning = learning  # Whether the agent is expected to learn
        self.Q = dict(
        )  # Create a Q-table which will be a dictionary of tuples
        self.epsilon = epsilon  # Random exploration factor
        self.alpha = alpha  # Learning factor

        self.maxQ = 0.0  # Deprecated
        self.waypoint = None  # Saves the waypoint
        self.statesList = list()  # List that contains the tuples of all states
        self.t = 1  # t = number of trials
        self.optimized = True  # Just to discard alpha and function changes of the optimized mode
예제 #5
0
    def __init__(self, env):
        super(LearningAgent, self).__init__(
            env
        )  # sets self.env = env, state = None, next_waypoint = None, and a default color
        self.color = 'red'  # override color
        self.planner = RoutePlanner(
            self.env, self)  # simple route planner to get next_waypoint

        # TODO: Initialize any additional variables here
        self.q = {}

        self.learningRate = 1.0
        self.epsilon = 1.0
        self.discountFactor = 0.3
        self.actions = [None, 'forward', 'left', 'right']

        self.trip = 0
        self.minusReward = 0
예제 #6
0
    def __init__(self, env):
        super(LearningAgent, self).__init__(
            env
        )  # sets self.env = env, state = None, next_waypoint = None, and a default color
        self.color = 'red'  # override color
        self.planner = RoutePlanner(
            self.env, self)  # simple route planner to get next_waypoint

        # TODO: Initialize any additional variables here
        LearningAgent.qTable[(None, None)]
        self.counter = 0
        self.endCounter = 0
        self.badTripCounter = 0
        self.badMoveCounter = 0
        self.optimalMoveCounter = 0
        self.Q = 0
        self.memR = [0]
        self.memStaAct = [(None, None)]
예제 #7
0
 def __init__(self, env, alpha, gamma, epsilon):
     super(LearningAgent, self).__init__(
         env
     )  # sets self.env = env, state = None, next_waypoint = None, and a default color
     self.color = 'red'  # override color
     self.planner = RoutePlanner(
         self.env, self)  # simple route planner to get next_waypoint
     # TODO: Initialize any additional variables here
     self.alpha = alpha
     self.one_minus_alpha = 1.0 - alpha
     self.gamma = gamma
     self.epsilon = epsilon
     self.q_values = dict()
     for s in self.STATE_SPACE:
         action_values = dict()
         for w in self.env.valid_actions:
             action_values[w] = float(0)
         self.q_values[s] = action_values
예제 #8
0
 def __init__(self, env):
     super(LearningAgent, self).__init__(
         env
     )  # sets self.env = env, state = None, next_waypoint = None, and a default color
     self.color = 'red'  # override color
     self.planner = RoutePlanner(
         self.env, self)  # simple route planner to get next_waypoint
     self.alpha = 0.4
     self.gamma = 0.7
     self.Q_table = {}
     for waypoint in ['left', 'right', 'forward']:
         for light in ['red', 'green']:
             for oncoming in self.env.valid_actions:
                 for left in self.env.valid_actions:
                     for right in self.env.valid_actions:
                         for action in self.env.valid_actions:
                             self.Q_table[((waypoint, light, oncoming, left,
                                            right), action)] = 3
예제 #9
0
 def __init__(self, env):
     super(LearningAgent, self).__init__(
         env
     )  # sets self.env = env, state = None, next_waypoint = None, and a default color
     self.color = 'red'  # override color
     self.planner = RoutePlanner(
         self.env, self)  # simple route planner to get next_waypoint
     # TODO: Initialize any additional variables here
     ## Actions
     self.A = ['forward', 'left', 'right', None]  # all avaiable action
     self.trial = 0  # the number of trails
     # Inicialize Q table(light, oncoming, next_waypoint)
     self.Q = {}
     for i in ['green', 'red']:  # possible lights
         for j in [None, 'forward', 'left', 'right']:  # possible oncoming
             for k in ['forward', 'left',
                       'right']:  ## possible next_waypoints
                 self.Q[(i, j, k)] = [1] * len(self.A)  ## linized Q talbe
예제 #10
0
    def __init__(self, env, learning=False, epsilon=1.0, alpha=0.5):
        super(LearningAgent,
              self).__init__(env)  # Set the agent in the evironment
        self.planner = RoutePlanner(self.env, self)  # Create a route planner
        self.valid_actions = self.env.valid_actions  # The set of valid actions

        # Set parameters of the learning agent
        self.learning = learning  # Whether the agent is expected to learn
        self.Q = dict(
        )  # Create a Q-table which will be a dictionary of tuples
        self.epsilon = epsilon  # Random exploration factor
        self.alpha = alpha  # Learning factor

        ###########
        ## TO DO ##
        ###########
        # Set any additional class parameters as needed
        self.initialtrial = 0
예제 #11
0
 def __init__(self, env):
     super(LearningAgent, self).__init__(env)# sets self.env = env, state = None, next_waypoint = None, and a default color
     self.color = 'black'  # override color
     self.planner = RoutePlanner(self.env, self)  # simple route planner to get next_waypoint
     
     # TODO: Initialize any additional variables here
     self.state = None
     self.action = None
     self.alpha = 0.50
     self.gamma = 0.05
     self.epsilon = .01
     self.q_table = {}
     self.actions = [None, 'forward', 'left', 'right']
     self.trips = 0 
     softmax_probabilities = {} 
     self.previous_state = None
     self.last_action = None 
     self.last_reward = None
예제 #12
0
 def __init__(self, env):
     super(LearningAgent, self).__init__(
         env
     )  # sets self.env = env, state = None, next_waypoint = None, and a default color
     self.color = 'red'  # override color
     self.planner = RoutePlanner(
         self.env, self)  # simple route planner to get next_waypoint
     # TODO: Initialize any additional variables here
     self.qEstimate = {}
     self.learningRate = 1
     self.discountRate = 0
     self.randomExploration = 0
     self.randomExplorationDecay = 0.00075
     #Use the following to keep track of progress over the trials
     self.randomExplorationTrack = [0]
     self.cumScore = [0]
     self.completionTime = [0]
     self.wrongMoves = [0]
예제 #13
0
 def __init__(self, env):
     super(LearningAgent, self).__init__(
         env
     )  # sets self.env = env, state = None, next_waypoint = None, and a default color
     self.color = 'red'  # override color
     self.planner = RoutePlanner(
         self.env, self)  # simple route planner to get next_waypoint
     # TODO: Initialize any additional variables here
     self.prevReward = 0
     self.prevAction = None
     self.Q = {}
     self.alpha = 0.6
     self.epsilon = 0.0
     self.gamma = 0.35  #discount value
     self.totalReward = 0.0
     self.totalActions = 0.0
     self.success = 0.0
     self.negativeRewards = 0.0
예제 #14
0
 def __init__(self, env):
     # sets self.env = env, state = None, next_waypoint = None,
     # and a default color
     super(LearningAgent, self).__init__(env)  
     self.color = 'red'  # override color
     # simple route planner to get next_waypoint
     self.planner = RoutePlanner(self.env, self)  
     # TODO: Initialize any additional variables here
     # Define the q matrix
     self.Q = pd.DataFrame(columns=['None', 'forward', 'left', 'right'])
     # Define Gamma of the Q learning algorithm
     self.gamma = 0.2
     # Define alpha of the Q learning algorith
     self.alpha = 0.8
     # Define the initialisation of Q
     self.q_init = 4.0
     # Define the Epsilon for Q
     self.epsilon = 0.01
예제 #15
0
    def __init__(self, env):
        super(LearningAgent, self).__init__(
            env
        )  # sets self.env = env, state = None, next_waypoint = None, and a default color
        self.color = 'red'  # override color
        self.planner = RoutePlanner(
            self.env, self)  # simple route planner to get next_waypoint
        # TODO: Initialize any additional variables here
        # self.q_table = [(self.state, random.choice([None,'forward','left','right']))]
        # self.q_table = [(self.state,  (random.choice([None,'forward','left','right'])))]
        # self.q_value = {}

        # def initialize_Q_values(self, val=4.0):
        self.next_waypoint = None
        self.reward = 0
        self.q_table = self.initialize_q_table()
        self.penalty = 0
        self.successes = 0
예제 #16
0
파일: agent.py 프로젝트: valeman/udacity
    def __init__(self, env):
        super(LearningAgent, self).__init__(env)  # sets self.env = env, state = None, next_waypoint = None, and a default color
        self.color = 'red'  # override color
        self.planner = RoutePlanner(self.env, self)  # simple route planner to get next_waypoint
        
        # TODO: Initialize any additional variables here
        self.actions = [None, 'forward', 'left', 'right']
        self.q = {}
        self.alpha = -1
        self.gamma = -1
        self.epsilon = 0.01
        self.lastState = None
        self.lastAction = None
        self.run_mode = None
        self.enforce_deadline = False

        self.trial_num = 0
        self.report_stats = [] # collect data for visualization and reporting
예제 #17
0
    def __init__(self,
                 env,
                 learning=False,
                 epsilon=1.0,
                 alpha=0.8,
                 decay_rate=0.9975):
        super(LearningAgent,
              self).__init__(env)  # Set the agent in the evironment
        self.planner = RoutePlanner(self.env, self)  # Create a route planner
        self.valid_actions = self.env.valid_actions  # The set of valid actions

        # Set parameters of the learning agent
        self.learning = learning  # Whether the agent is expected to learn
        self.Q = dict(
        )  # Create a Q-table which will be a dictionary of tuples
        self.epsilon = epsilon  # Random exploration factor
        self.alpha = alpha  # Learning factor
        self.decay_rate = decay_rate  #decay rate
예제 #18
0
파일: agent.py 프로젝트: llathrop/smartcab
 def __init__(self, env):
     super(LearningAgent, self).__init__(
         env)  # sets self.env = env, state = None, next_waypoint = None
     self.color = 'red'  # override color
     self.planner = RoutePlanner(
         self.env, self)  # simple route planner to get next_waypoint
     # TODO: Initialize any additional variables here
     self.availableAction = [None, 'forward', 'left', 'right']
     self.next_waypoint = None
     self.goal = 0
     self.steps = 0
     self.features = []
     self.Qtable = {}
     self.epsilon = 0.95
     self.epsilon_end = 0.03125
     self.gamma = 0.05
     self.gamma_end = 0.125
     self.total_reward = [0]
     self.alpha = 0.5
예제 #19
0
    def __init__(self, env):
        super(LearningAgent, self).__init__(
            env
        )  # sets self.env = env, state = None, next_waypoint = None, and a default color
        self.color = 'red'  # override color
        self.planner = RoutePlanner(
            self.env, self)  # simple route planner to get next_waypoint

        # Initialize any additional variables here
        self.state = None
        self.deadline = self.env.get_deadline(self)
        self.trip_len = []
        self.next_waypoint = random.choice(self.env.valid_actions[1:])

        self.qtable = dict()

        self.alpha = 0.6
        self.gamma = 0.4
        self.e = 0.1
    def __init__(self, env):
        super(QLearningAgent, self).__init__(env)  # sets self.env = env, state = None, next_waypoint = None, and a default color
        self.color = 'red'  # override color
        self.planner = RoutePlanner(self.env, self)  # simple route planner to get next_waypoint
        # TODO: Initialize any additional variables here
        self.all_actions = ['left','right','forward', None]
        self.q_value_state_action_dict = dict()
        #self.alpha = 0.6  # learning rate between 0 and 1. Setting it to 0 means nothing is learned i.e Q- values are never updated. while setting it to 0.9 means that learning can occur quickly
        #self.discount = 0.5  # discount factor between 0 and 1. Lesser means that furture rewards are  worth less than immediate reward.
        #self.epsilon = 0.2   # more epsilon value more random action choice, less epsilon value then more best action choosen but at the expense of more computation

        #self.exploration_of_unknown_state_actions = 5 # reward  for exploring is more than the moving into the optimal solution i.le left ,right or forward in  any state of light or incoming traffic
        self.reached_destination_count = 0
        self.wait_seconds_prior_to_next_run = 0
        self.run_iteration = 0
        self.total_reward = 0
        #self.urgency = 'low'
        self.initial_q_0_value = 0
        self.agent_has_reached_destination_in_cur_iteration = 0
예제 #21
0
파일: agent96.py 프로젝트: sunnycd/Udacity
    def __init__(self, env, alpha=0.9, epsilon=0.01, gamma=0.1, init_q=2):
        super(LearningAgent, self).__init__(
            env
        )  # sets self.env = env, state = None, next_waypoint = None, and a default color
        self.color = 'red'  # override color
        self.planner = RoutePlanner(
            self.env, self)  # simple route planner to get next_waypoint

        #   Initiate q learning vales
        self.alpha = alpha  # learning rate
        self.epsilon = epsilon  # exploration bound - default is no exploration at eps = 0
        self.gamma = gamma  # decay rate
        self.init_q = init_q  # inital q value for unexplored

        # Need variables to track total number of penalties and turns
        self.penalties = 0.0
        self.updates = 0.0

        #        with open("logs.txt", 'wb') as logs:
        #            logs.write = ('Alpha: {}'.format(self.alpha))
        #            logs.write('Gamma: {}'.format(self.gamma))
        #            logs.write('Epsilon: {}'.format(self.epsilon))
        #            logs.write('Q Initialization: {}'.format(self.init_q))

        # TODO: Initialize any additional variables here
        self.reward = 0.0  # cumulative reward for a trial

        # Initiate a Q table to hold values only when creating the agent
        # sample space is the state space of 384 variables
        # the order of the tuple is light, oncoming, left, right, next_waypoint

        ss = tuple(itertools.product(['red', 'green'], self.env.valid_actions, \
        self.env.valid_actions, ['forward', 'left', 'right']))
        #keys = tuple(itertools.product(ss, self.env.valid_actions))
        # q values are the values of the q dict

        ### q is really q-hat until the learner converges
        q = {}
        for s in ss:
            q[s] = {}
            for act in self.env.valid_actions:
                q[s][act] = self.init_q
        self.q = q
예제 #22
0
 def __init__(self, env):
     super(LearningAgent, self).__init__(
         env
     )  # sets self.env = env, state = None, next_waypoint = None, and a default color
     self.color = 'red'  # override color
     self.planner = RoutePlanner(
         self.env, self)  # simple route planner to get next_waypoint
     # TODO: Initialize any additional variables here
     self.Q = {}
     self.alpha = 0.3
     self.gamma = 0.3
     self.explore = 0.99
     self.stateHist = {}
     self.run = 1  # Iteration counter for each alpha/gamma change
     self.trip = 0  # Trip counter in each iteration
     self.counter = 0  # Step counter in each run
     self.glNum = 0  # global counter
     self.rewards = 0
     self.experiment = 'None'
예제 #23
0
    def __init__(self, env):
        super(LearningAgent, self).__init__(
            env
        )  # sets self.env = env, state = None, next_waypoint = None, and a default color
        self.color = 'red'  # override color
        self.planner = RoutePlanner(
            self.env, self)  # simple route planner to get next_waypoint

        # TODO: Initialize any additional variables here

        #state variables
        self.next_waypoint = None
        self.state_0 = None
        self.reward_0 = None
        self.action_0 = None
        self.total_reward = 0.
        waypoints = ['left', 'right', 'forward']
        lights = ['red', 'green']

        #Qtable parameters
        self.alpha = 0.75  #Learning Rate
        self.gamma = 0.25  #Discounted Reward Factor
        self.epsilon = 0.95  #exploitation-exploration

        #Create the Q-table and initialize all to zero:
        self.Qdict = {
        }  #dictionary of states and and possible action, value pairs. Keys are (states), action
        waypoints = ['left', 'right', 'forward']
        lights = ['red', 'green']
        for waypoint in waypoints:
            for light in lights:
                for left in Environment.valid_actions:
                    #for right in Environment.valid_actions:
                    for oncoming in Environment.valid_actions:
                        for act in Environment.valid_actions:
                            self.Qdict[(waypoint, light, left, oncoming),
                                       act] = 0.

        #Counters:
        self.ndeadline = []
        self.tlist = []
        self.trial_reward = 0
        self.cum_rewards = []
예제 #24
0
 def __init__(self, env):
     super(QLearningAgent, self).__init__(
         env
     )  # sets self.env = env, state = None, next_waypoint = None, and a default color
     self.color = 'red'  # OverflowError(" error")ide color
     self.planner = RoutePlanner(
         self.env, self)  # simple route planner to get next_waypoint
     ##initialize q table here
     self.qDict = dict()
     self.alpha = 0.9
     self.epsilon = 0.0  ##initial probability of flipping the coin
     self.gamma = 0.35
     self.discount = self.gamma
     self.previous_state = None
     self.state = None
     self.previous_action = None
     self.deadline = self.env.get_deadline(self)
     self.previous_reward = None
     self.cumulativeRewards = 0
예제 #25
0
 def __init__(self, env):
     super(LearningAgent, self).__init__(
         env
     )  # sets self.env = env, state = None, next_waypoint = None, and a default color
     self.color = 'red'  # override color
     self.planner = RoutePlanner(
         self.env, self)  # simple route planner to get next_waypoint
     # TODO: Initialize any additional variables here
     self.qtable = {}
     self.actions = ["right", "forward", "left", None]
     self.previousstate = None
     self.previousreward = 0
     self.previousaction = None
     self.alpha = 0.9
     self.gamma = 0
     self.epsilon = 0
     self.success = 0
     self.totalpenalties = 0
     self.penalties = 0
예제 #26
0
 def __init__(self, env):
     super(LearningAgent, self).__init__(env)  # sets self.env = env, state = None, next_waypoint = None, and a default color
     self.color = 'red'  # override color
     self.planner = RoutePlanner(self.env, self)  # simple route planner to get next_waypoint
     # TODO: Initialize any additional variables here
     # Initialize q-table as a default dictionary
     self.q_table = defaultdict(float)
     # Initialize variables for state, previous state, previous action, previous reward
     self.state = None
     self.prev_state = ('red', 'safe', 'safe', 'forward') # chosen randomly
     self.prev_action = 'None' # chosen randomly
     self.prev_reward = 0
     # Initialize Q-learning parameters
     # The probability of taking a random action (exploration)
     self.epsilon = 0.1
     # The learning rate
     self.alpha = 0.9
     # The discount
     self.gamma = 0.4
예제 #27
0
    def __init__(self, env, learning=False, epsilon=1.0, alpha=0.5):
        super(LearningAgent,
              self).__init__(env)  # Set the agent in the evironment
        self.planner = RoutePlanner(self.env, self)  # Create a route planner
        self.valid_actions = self.env.valid_actions  # The set of valid actions

        # Set parameters of the learning agent
        self.learning = learning  # Whether the agent is expected to learn
        self.Q = dict(
        )  # Create a Q-table which will be a dictionary of tuples
        self.epsilon = epsilon  # Random exploration factor
        self.alpha = alpha  # Learning factor

        ### Setting additional parameters

        self.testing = False  # flag used to check for testing trial
        self.initialQ = 0.0  # initialization value for new entries in the Q-table
        self.timer = 0  # timer counter increments by 1 at every time step
        self.a = 0.9  # used for epsilon decay factor : a^t
예제 #28
0
    def __init__(self, env):
        super(LearningAgent, self).__init__(env)  # sets self.env = env, state = None, next_waypoint = None, and a default color
        self.color = 'yellow'  # override color
        self.planner = RoutePlanner(self.env, self)  # simple route planner to get next_waypoint
        # TODO: Initialize any additional variables here
        self.reward = 0
        self.next_waypoint = None

        # Decalring the variable to calculate the success percentage rate

        self.S = 0

        #Declaring the trial counter variable

        self.var_trial = 0

        #Declaring the variable to track sucess rate

        self.var_trial_S = {}
예제 #29
0
    def __init__(self, env, policy, alpha, gamma, no_plot):
        super(LearningAgent, self).__init__(
            env
        )  # sets self.env = env, state = None, next_waypoint = None, and a default color
        self.color = 'red'  # override color
        self.planner = RoutePlanner(
            self.env, self)  # simple route planner to get next_waypoint
        self.policy = policy

        # State descriptors
        self.actions = self.env.valid_actions
        self.lights = ['green', 'red']

        # For a global tally of events over the n trials
        self.total_time = 0
        self.trial = -1
        self.no_plot = no_plot  # activate plots or not
        self.bad_actions = [0 for trial in range(number_trials)
                            ]  # bad actions performed in a given trial
        self.out_of_times = [0 for trial in range(number_trials)
                             ]  # trials that the agent ran out of time

        # For Q learning implementation
        self.gamma = gamma
        self.alpha = alpha

        self.Q = {
                  (action, 'green', oncoming, waypoint) : 1 \
                  for action   in self.actions              \
                  for oncoming in self.actions              \
                  for waypoint in self.actions[1:]            ## waypoint is only None when target is reached
            }

        red_Q =  {
                  (action, 'red', left, waypoint) : 1 \
                  for action   in self.actions        \
                  for left     in self.actions        \
                  for waypoint in self.actions[1:]
            }

        self.Q.update(
            red_Q
        )  ## combining the two main learning scenarios into dictionary Q
예제 #30
0
 def __init__(self, env):
     super(LearningAgent, self).__init__(env)  # sets self.env = env, state = None, next_waypoint = None, and a default color
     self.color = 'red'  # override color
     self.planner = RoutePlanner(self.env, self)  # simple route planner to get next_waypoint
     # TODO: Initialize any additional variables here
     self.Qtable = {}
     self.Q_0 = 15
     self.Q_init = {None : self.Q_0, 'forward' : self.Q_0, 'left' : self.Q_0, 'right' : self.Q_0}
     self.gamma = 0.2 
     self.alpha =   0.9
     self.state = None
     self.prev_state  = None
     self.prev_action = None
     self.prev_reward = None
     
     if 1:
         self.load()
         print(len(self.Qtable))
         pp.pprint(self.Qtable)