def __init__(self, n='bot', c=False, sc=COBALTGREEN, sb=GOLDENROD, r=20, p=10, a=-15, g=[50,-10,30,20,35,100,30]): Snake.__init__(self, n, c, sc, sb) self._dirs_names = [LEFT, UP, RIGHT, DOWN] self._dirs_ids = {LEFT: 0, UP: 1, RIGHT: 2, DOWN: 3} self._input_vec = np.zeros(methods._nd_array_grid.shape[0]*methods._nd_array_grid.shape[1] + 4) self.reinforcement = 0.0 self._iterations_num = 0 # init MPF hierarchy self.UNIT_VF = (4, 4) UNIT_INP_DIM = self.UNIT_VF[0]*self.UNIT_VF[1] self._l0_units = [] self._grid_map = [] for i in xrange(mth._nd_array_grid.shape[0] / self.UNIT_VF[0]): for j in xrange(mth._nd_array_grid.shape[1] / self.UNIT_VF[1]): self._l0_units.append(MPF_Unit_RL( ss_class = Miller_SOM, ts_class = Miller_SOM, input_dim = UNIT_INP_DIM, ss_shape = (10, 10), ts_shape = (8, 8), parent_unit = None, unit_type = MPF_UT_SENSOR ) ) for x in xrange(i*self.UNIT_VF[0], (i+1)*self.UNIT_VF[0]): for y in xrange(j*self.UNIT_VF[1], (j+1)*self.UNIT_VF[1]): self._grid_map.append(x*mth._nd_array_grid.shape[1] + y) #print self._grid_map self._l0_units.append(MPF_Unit_RL( ss_class = Miller_SOM, ts_class = Miller_SOM, input_dim = 4, ss_shape = (1, 4), ts_shape = (1, 10), parent_unit = None, unit_type = MPF_UT_ACTUATOR) ) #self._l0_units[-1].ss.neurons += 1.0 #self._l0_units[-1].ss.neurons *= 4.0 #self._l0_units[-1].ss.neurons[:] = linspace(0.0, 4.0, # self._l0_units[-1].ss.neurons.shape[0])[:, None]#array([0.5, 1.5, 2.5, 2.5])[:, None] #self._l0_units[-1].ss.init_generative_gmm('full') #for unit in self._l0_units: #unit.ts.init_generative_gmm('tied') self._mpf_h = MPF_Hierarchy(self._l0_units, 4, 20, "K:/__temp/mpf_rl_dumps") #self._mpf_h.top_unit.ss.init_generative_gmm('full') #self._mpf_h.top_unit.ts.init_generative_gmm('full') #for unit in self._mpf_h.top_unit.children_units: #unit.ss.init_generative_gmm('full') # initial direction self.direction = self._dirs_names[1]
class OpponentMPF(Opponent): def __init__(self, n='bot', c=False, sc=COBALTGREEN, sb=GOLDENROD, r=20, p=10, a=-15, g=[50,-10,30,20,35,100,30]): Snake.__init__(self, n, c, sc, sb) self._dirs_names = [LEFT, UP, RIGHT, DOWN] self._dirs_ids = {LEFT: 0, UP: 1, RIGHT: 2, DOWN: 3} self._input_vec = np.zeros(methods._nd_array_grid.shape[0]*methods._nd_array_grid.shape[1] + 4) self.reinforcement = 0.0 self._iterations_num = 0 # init MPF hierarchy self.UNIT_VF = (4, 4) UNIT_INP_DIM = self.UNIT_VF[0]*self.UNIT_VF[1] self._l0_units = [] self._grid_map = [] for i in xrange(mth._nd_array_grid.shape[0] / self.UNIT_VF[0]): for j in xrange(mth._nd_array_grid.shape[1] / self.UNIT_VF[1]): self._l0_units.append(MPF_Unit_RL( ss_class = Miller_SOM, ts_class = Miller_SOM, input_dim = UNIT_INP_DIM, ss_shape = (10, 10), ts_shape = (8, 8), parent_unit = None, unit_type = MPF_UT_SENSOR ) ) for x in xrange(i*self.UNIT_VF[0], (i+1)*self.UNIT_VF[0]): for y in xrange(j*self.UNIT_VF[1], (j+1)*self.UNIT_VF[1]): self._grid_map.append(x*mth._nd_array_grid.shape[1] + y) #print self._grid_map self._l0_units.append(MPF_Unit_RL( ss_class = Miller_SOM, ts_class = Miller_SOM, input_dim = 4, ss_shape = (1, 4), ts_shape = (1, 10), parent_unit = None, unit_type = MPF_UT_ACTUATOR) ) #self._l0_units[-1].ss.neurons += 1.0 #self._l0_units[-1].ss.neurons *= 4.0 #self._l0_units[-1].ss.neurons[:] = linspace(0.0, 4.0, # self._l0_units[-1].ss.neurons.shape[0])[:, None]#array([0.5, 1.5, 2.5, 2.5])[:, None] #self._l0_units[-1].ss.init_generative_gmm('full') #for unit in self._l0_units: #unit.ts.init_generative_gmm('tied') self._mpf_h = MPF_Hierarchy(self._l0_units, 4, 20, "K:/__temp/mpf_rl_dumps") #self._mpf_h.top_unit.ss.init_generative_gmm('full') #self._mpf_h.top_unit.ts.init_generative_gmm('full') #for unit in self._mpf_h.top_unit.children_units: #unit.ss.init_generative_gmm('full') # initial direction self.direction = self._dirs_names[1] def ressurect(self): if (not self.alive): self.alive = True self.coords = mth.getStartCoords(1) self.direction = self._dirs_names[1] #self.coords = mth.getStartCoords(random.randint(1, 4)) #self.direction = self._dirs_names[random.randint(1, 4)] self.reinforcement -= 1.0 self._iterations_num = 0 def __sample_from_pmf(self, pmf, nominals): # normalize PMF to make it "proper" abs(pmf, out = pmf) # -- CDF bins = cumsum(pmf) if (bins[-1] == 0.0): print pmf raise ValueError bins /= bins[-1] # normalization return nominals[digitize(random.random_sample(1), bins)] def updateDirection(self, grid): # map grid to linear input vector self._input_vec[:-4] = grid.flatten()[self._grid_map] # actuator value from _really_ taken action self._input_vec[-4:] = 0.0 self._input_vec[-4 + self._dirs_ids[self.direction]] = 1.0 # evaluate hierarchy res_dir_distr = self._mpf_h.evaluate(self._input_vec, self.reinforcement) if (self._iterations_num / 20 > 0) and (self._iterations_num % 20 == 0): print "\n !!! Stil alive for %d !!!\n" % (self._iterations_num) self.reinforcement += 0.1 if (self.reinforcement > 1.0): self.reinforcement = 1.0 # decode actuator's value self.direction = self._dirs_names[self.__sample_from_pmf(res_dir_distr, self._dirs_ids.values())] print res_dir_distr, self.direction, self._mpf_h.reinforcement_prime self._iterations_num += 1 def apply_reinforcement(self, grid, reinforcement): """ print "\n========== PUNISHMENT =============" #for i in xrange(15): #self.reinforcement -= 0.9 / 15.0 #if (self.reinforcement < -1.0): self.reinforcement = -1.0 # map grid to linear input vector self._input_vec[:-4] = grid.flatten()[self._grid_map] # actuator value from _really_ taken action self._input_vec[-4:] = 0.0 self._input_vec[-4 + self._dirs_ids[self.direction]] = 1.0 # evaluate hierarchy self.reinforcement = reinforcement res_dir_distr = self._mpf_h.evaluate(self._input_vec, reinforcement) print res_dir_distr, self.direction, self._mpf_h.reinforcement_prime print "========== END OF PUNISHMENT =============\n" """ pass