class ControloAprendRef(Controlo):
    
    def __init__(self, r_max = 100, a = None, s = None):
        self._r_max = r_max
        self._a = a
        self._s = s
        self._alpha = 1
        self._gama = 0.9
        self._epsilon = 0.01
        self._accoes = dirmov()
        self._mem_aprend = MemoriaEsparsa()
        self._mec_sel_accao = SelAccaoEGreedy(self._mem_aprend, self._accoes, self._epsilon)
        self._mec_aprend = AprendQ(self._mem_aprend, self._mec_sel_accao, self._alpha, self._gama)

        
    def _processar(self, percepcao):
        sn = percepcao.posicao
        if self._a is not None:
            r = self._reforco(percepcao, self._s, sn)
            self._mec_aprend._aprender(self._s,self._a,r,sn)
            psa.vismod.accaovalordir(self._mem_aprend._memoria, self._accoes)
        an = self._mec_sel_accao._selecionar_accao(sn)
        self._s = sn
        self._a = an
        if an is not None:
            return MOVER(an)
        
        
    def _reforco(self, percepcao, s, sn):
        r = -dist(s,sn)
        if percepcao.colisao:
            r += -self._r_max
        elif percepcao.carga:
            r += self._r_max
        return r
 def __init__(self, r_max = 100, a = None, s = None):
     self._r_max = r_max
     self._a = a
     self._s = s
     self._alpha = 1
     self._gama = 0.9
     self._epsilon = 0.01
     self._accoes = dirmov()
     self._mem_aprend = MemoriaEsparsa()
     self._mec_sel_accao = SelAccaoEGreedy(self._mem_aprend, self._accoes, self._epsilon)
     self._mec_aprend = AprendQ(self._mem_aprend, self._mec_sel_accao, self._alpha, self._gama)