def __init__(self, params):
     """See documentation in the base class"""
     Agent.__init__(self, params)
     self.q = np.random.rand(self.num_states, self.num_actions)
     self.returns = {(state, action):[] for state in range(self.num_states) for action in range(self.num_actions)}
     self.epsilon = 1
     self.tuple_state_agent_met=[]
Esempio n. 2
0
    def __init__(self, params):
        Agent.__init__(self, params)

        # The 3 following args will be given by the env.
        self.n_x = None
        self.n_y = None
        self.states = None

        self.X_memory = []
        self.Y_memory = []
        self.ia = MLPRegressor(warm_start=True,
                               max_iter=200,
                               early_stopping=False,
                               hidden_layer_sizes=(20, 10, 5),
                               learning_rate_init=1 * 10**-3,
                               activation='identity')

        self.epsilon = 0.5
        self.gamma = 0.8
        self.alpha = 0.5

        self.last_action = None
        self.last_state = None
        self.is_action_possible = None
        self.n_max_action = params["max_action_per_episode"]

        self.first_fit = True
        self.key_taken = []
 def __init__(self, params):
     """See documentation in the base class"""
     Agent.__init__(self, params)
     
     #strategie initiale (au depart l'agent ne connait pas les deplacements optimaux a effectuer)
     #tableau de taille nombre d'etat * nombre d'actions possibles
     self.Q = np.zeros((int(params['num_cells_grid1D']),2))
     
     #Probabilite d'exploration
     self.exploration = 0.05
    def __init__(self, params):
        """See documentation in the base class"""
        Agent.__init__(self, params)

        self.epsilon = 1
        self.n_key_max = 5
        self.alpha = 0.4
        self.gamma = 0.9999
        self.key_taken = []
        self.q = np.random.rand(self.num_states, self.num_actions, self.n_key_max)
Esempio n. 5
0
    def __init__(self, params):
        """See documentation in the base class"""
        Agent.__init__(self, params)

        #Nombre de lignes
        self.l = int(params['ligne'])
        #Nombre de colonnes
        self.c = int(params['colonne'])

        #strategie initiale (au depart l'agent ne connait pas les deplacements optimaux a effectuer)
        #tableau de taille nombre d'etat * nombre d'actions possibles
        self.Q = np.zeros((self.c * self.l, 4))

        #Probabilite d'exploration
        self.exploration = 0.05
    def __init__(self, params):
        """initialisation de l'agent"""

        Agent.__init__(self, params)

        self.l = int(params['ligne'])
        self.c = int(params['colonne'])

        #strategie initiale (au depart l'agent ne connait pas les deplacements optimaux a effectuer)
        #tableau de taille nombre d'etat * nombre d'actions possibles
        self.Q = np.zeros((self.l * self.c, 4))

        #Probabilite d'exploration
        self.exploration = 0.05

        self.alpha = 0.6  #coefficient optimises
        self.gamma = 1
Esempio n. 7
0
    def __init__(self, params):
        """See documentation in the base class"""
        Agent.__init__(self, params)

        #Nombre de lignes
        self.l = int(params['ligne'])
        #Nombre de colonnes
        self.c = int(params['colonne'])

        #strategie initiale (au depart l'agent ne connait pas les deplacements optimaux a effectuer)
        #tableau de taille nombre d'etat * nombre d'actions possibles
        self.Q = np.zeros((self.c * self.l, 4))

        #Probabilite d'exploration
        self.exploration = 0.05

        #Coefficients pour la formule de Q
        self.alpha = 1
        self.gamma = 1

        #vitesse de convergence initialisee (etape a partir de laquelle les recompenses totales sont superieures a 60
        self.vitesse = 0
        #numero de l'episode qui vient de finir
        self.ep = 0
 def __init__(self, params):
     """See documentation in the base class"""
     Agent.__init__(self, params)
Esempio n. 9
0
 def __init__(self, params):
     """See documentation in the base class"""
     Agent.__init__(self, params)
     self.final_state = EnvironmentGrid2D(params).terminal_state