def __init__(self, params): """See documentation in the base class""" Agent.__init__(self, params) self.q = np.random.rand(self.num_states, self.num_actions) self.returns = {(state, action):[] for state in range(self.num_states) for action in range(self.num_actions)} self.epsilon = 1 self.tuple_state_agent_met=[]
def __init__(self, params): Agent.__init__(self, params) # The 3 following args will be given by the env. self.n_x = None self.n_y = None self.states = None self.X_memory = [] self.Y_memory = [] self.ia = MLPRegressor(warm_start=True, max_iter=200, early_stopping=False, hidden_layer_sizes=(20, 10, 5), learning_rate_init=1 * 10**-3, activation='identity') self.epsilon = 0.5 self.gamma = 0.8 self.alpha = 0.5 self.last_action = None self.last_state = None self.is_action_possible = None self.n_max_action = params["max_action_per_episode"] self.first_fit = True self.key_taken = []
def __init__(self, params): """See documentation in the base class""" Agent.__init__(self, params) #strategie initiale (au depart l'agent ne connait pas les deplacements optimaux a effectuer) #tableau de taille nombre d'etat * nombre d'actions possibles self.Q = np.zeros((int(params['num_cells_grid1D']),2)) #Probabilite d'exploration self.exploration = 0.05
def __init__(self, params): """See documentation in the base class""" Agent.__init__(self, params) self.epsilon = 1 self.n_key_max = 5 self.alpha = 0.4 self.gamma = 0.9999 self.key_taken = [] self.q = np.random.rand(self.num_states, self.num_actions, self.n_key_max)
def __init__(self, params): """See documentation in the base class""" Agent.__init__(self, params) #Nombre de lignes self.l = int(params['ligne']) #Nombre de colonnes self.c = int(params['colonne']) #strategie initiale (au depart l'agent ne connait pas les deplacements optimaux a effectuer) #tableau de taille nombre d'etat * nombre d'actions possibles self.Q = np.zeros((self.c * self.l, 4)) #Probabilite d'exploration self.exploration = 0.05
def __init__(self, params): """initialisation de l'agent""" Agent.__init__(self, params) self.l = int(params['ligne']) self.c = int(params['colonne']) #strategie initiale (au depart l'agent ne connait pas les deplacements optimaux a effectuer) #tableau de taille nombre d'etat * nombre d'actions possibles self.Q = np.zeros((self.l * self.c, 4)) #Probabilite d'exploration self.exploration = 0.05 self.alpha = 0.6 #coefficient optimises self.gamma = 1
def __init__(self, params): """See documentation in the base class""" Agent.__init__(self, params) #Nombre de lignes self.l = int(params['ligne']) #Nombre de colonnes self.c = int(params['colonne']) #strategie initiale (au depart l'agent ne connait pas les deplacements optimaux a effectuer) #tableau de taille nombre d'etat * nombre d'actions possibles self.Q = np.zeros((self.c * self.l, 4)) #Probabilite d'exploration self.exploration = 0.05 #Coefficients pour la formule de Q self.alpha = 1 self.gamma = 1 #vitesse de convergence initialisee (etape a partir de laquelle les recompenses totales sont superieures a 60 self.vitesse = 0 #numero de l'episode qui vient de finir self.ep = 0
def __init__(self, params): """See documentation in the base class""" Agent.__init__(self, params)
def __init__(self, params): """See documentation in the base class""" Agent.__init__(self, params) self.final_state = EnvironmentGrid2D(params).terminal_state