class ControloAprendRef(Controlo): def __init__(self, r_max = 100, a = None, s = None): self._r_max = r_max self._a = a self._s = s self._alpha = 1 self._gama = 0.9 self._epsilon = 0.01 self._accoes = dirmov() self._mem_aprend = MemoriaEsparsa() self._mec_sel_accao = SelAccaoEGreedy(self._mem_aprend, self._accoes, self._epsilon) self._mec_aprend = AprendQ(self._mem_aprend, self._mec_sel_accao, self._alpha, self._gama) def _processar(self, percepcao): sn = percepcao.posicao if self._a is not None: r = self._reforco(percepcao, self._s, sn) self._mec_aprend._aprender(self._s,self._a,r,sn) psa.vismod.accaovalordir(self._mem_aprend._memoria, self._accoes) an = self._mec_sel_accao._selecionar_accao(sn) self._s = sn self._a = an if an is not None: return MOVER(an) def _reforco(self, percepcao, s, sn): r = -dist(s,sn) if percepcao.colisao: r += -self._r_max elif percepcao.carga: r += self._r_max return r
def __init__(self, accoes): self._alfa = 0.5 self._gama = 0.9 self._epsilon = 0.01 self._accoes = accoes self.memoria = MemoriaEsparsa() self.sel_accao = SelAccaoEGreedy(self.memoria, self._accoes, self._epsilon) self.aprend_ref = AprendQ(self.memoria, self.sel_accao, self._alfa, self._gama)
def __init__(self, r_max = 100, a = None, s = None): self._r_max = r_max self._a = a self._s = s self._alpha = 1 self._gama = 0.9 self._epsilon = 0.01 self._accoes = dirmov() self._mem_aprend = MemoriaEsparsa() self._mec_sel_accao = SelAccaoEGreedy(self._mem_aprend, self._accoes, self._epsilon) self._mec_aprend = AprendQ(self._mem_aprend, self._mec_sel_accao, self._alpha, self._gama)
class MecAprend: def __init__(self, accoes): self._alfa = 0.5 self._gama = 0.9 self._epsilon = 0.01 self._accoes = accoes self.memoria = MemoriaEsparsa() self.sel_accao = SelAccaoEGreedy(self.memoria, self._accoes, self._epsilon) self.aprend_ref = AprendQ(self.memoria, self.sel_accao, self._alfa, self._gama) def aprender(self, s, a, r, sn): self.mostrar(s) self.aprend_ref.aprender(s, a, r, sn) def seleccionar_accao(self, s): return self.sel_accao.seleccionar_accao(s) def mostrar(self, s): psa.vis(1).limpar() psa.vis(1).aprendref(self.aprend_ref)