예제 #1
0
파일: snake.py 프로젝트: xpinguin/mpf_rl
	def __init__(self, n='bot', c=False, sc=COBALTGREEN, sb=GOLDENROD, r=20, p=10, a=-15, g=[50,-10,30,20,35,100,30]):
		Snake.__init__(self, n, c, sc, sb)
		
		self._dirs_names = [LEFT, UP, RIGHT, DOWN]
		self._dirs_ids = {LEFT: 0, UP: 1, RIGHT: 2, DOWN: 3}
		
		self._input_vec = np.zeros(methods._nd_array_grid.shape[0]*methods._nd_array_grid.shape[1] + 4)
		
		self.reinforcement = 0.0
		
		self._iterations_num = 0
		
		
		# init MPF hierarchy
		self.UNIT_VF = (4, 4)
		UNIT_INP_DIM = self.UNIT_VF[0]*self.UNIT_VF[1]
		
		self._l0_units = []
		self._grid_map = []
		
		for i in xrange(mth._nd_array_grid.shape[0] / self.UNIT_VF[0]):
			for j in xrange(mth._nd_array_grid.shape[1] / self.UNIT_VF[1]):
				self._l0_units.append(MPF_Unit_RL(
									ss_class = Miller_SOM,
									ts_class = Miller_SOM,
									input_dim = UNIT_INP_DIM,
									ss_shape = (10, 10),
									ts_shape = (8, 8),
									parent_unit = None,
									unit_type = MPF_UT_SENSOR
								)
				)
				
				for x in xrange(i*self.UNIT_VF[0], (i+1)*self.UNIT_VF[0]):
					for y in xrange(j*self.UNIT_VF[1], (j+1)*self.UNIT_VF[1]):
						self._grid_map.append(x*mth._nd_array_grid.shape[1] + y)
						
		#print self._grid_map
		
		
		self._l0_units.append(MPF_Unit_RL(
			ss_class = Miller_SOM,
			ts_class = Miller_SOM,
			input_dim = 4,
			ss_shape = (1, 4),
			ts_shape = (1, 10),
			parent_unit = None,
			unit_type = MPF_UT_ACTUATOR)
		)
		
		#self._l0_units[-1].ss.neurons += 1.0
		#self._l0_units[-1].ss.neurons *= 4.0
		#self._l0_units[-1].ss.neurons[:] = linspace(0.0, 4.0, 
		#					self._l0_units[-1].ss.neurons.shape[0])[:, None]#array([0.5, 1.5, 2.5, 2.5])[:, None]
		
		#self._l0_units[-1].ss.init_generative_gmm('full')
		
		#for unit in self._l0_units:
			#unit.ts.init_generative_gmm('tied')

		self._mpf_h = MPF_Hierarchy(self._l0_units, 4, 20, "K:/__temp/mpf_rl_dumps")
		
		#self._mpf_h.top_unit.ss.init_generative_gmm('full')
		#self._mpf_h.top_unit.ts.init_generative_gmm('full')
		
		#for unit in self._mpf_h.top_unit.children_units:
			#unit.ss.init_generative_gmm('full')
			
			
		# initial direction
		self.direction = self._dirs_names[1]
예제 #2
0
파일: snake.py 프로젝트: xpinguin/mpf_rl
class OpponentMPF(Opponent):
	def __init__(self, n='bot', c=False, sc=COBALTGREEN, sb=GOLDENROD, r=20, p=10, a=-15, g=[50,-10,30,20,35,100,30]):
		Snake.__init__(self, n, c, sc, sb)
		
		self._dirs_names = [LEFT, UP, RIGHT, DOWN]
		self._dirs_ids = {LEFT: 0, UP: 1, RIGHT: 2, DOWN: 3}
		
		self._input_vec = np.zeros(methods._nd_array_grid.shape[0]*methods._nd_array_grid.shape[1] + 4)
		
		self.reinforcement = 0.0
		
		self._iterations_num = 0
		
		
		# init MPF hierarchy
		self.UNIT_VF = (4, 4)
		UNIT_INP_DIM = self.UNIT_VF[0]*self.UNIT_VF[1]
		
		self._l0_units = []
		self._grid_map = []
		
		for i in xrange(mth._nd_array_grid.shape[0] / self.UNIT_VF[0]):
			for j in xrange(mth._nd_array_grid.shape[1] / self.UNIT_VF[1]):
				self._l0_units.append(MPF_Unit_RL(
									ss_class = Miller_SOM,
									ts_class = Miller_SOM,
									input_dim = UNIT_INP_DIM,
									ss_shape = (10, 10),
									ts_shape = (8, 8),
									parent_unit = None,
									unit_type = MPF_UT_SENSOR
								)
				)
				
				for x in xrange(i*self.UNIT_VF[0], (i+1)*self.UNIT_VF[0]):
					for y in xrange(j*self.UNIT_VF[1], (j+1)*self.UNIT_VF[1]):
						self._grid_map.append(x*mth._nd_array_grid.shape[1] + y)
						
		#print self._grid_map
		
		
		self._l0_units.append(MPF_Unit_RL(
			ss_class = Miller_SOM,
			ts_class = Miller_SOM,
			input_dim = 4,
			ss_shape = (1, 4),
			ts_shape = (1, 10),
			parent_unit = None,
			unit_type = MPF_UT_ACTUATOR)
		)
		
		#self._l0_units[-1].ss.neurons += 1.0
		#self._l0_units[-1].ss.neurons *= 4.0
		#self._l0_units[-1].ss.neurons[:] = linspace(0.0, 4.0, 
		#					self._l0_units[-1].ss.neurons.shape[0])[:, None]#array([0.5, 1.5, 2.5, 2.5])[:, None]
		
		#self._l0_units[-1].ss.init_generative_gmm('full')
		
		#for unit in self._l0_units:
			#unit.ts.init_generative_gmm('tied')

		self._mpf_h = MPF_Hierarchy(self._l0_units, 4, 20, "K:/__temp/mpf_rl_dumps")
		
		#self._mpf_h.top_unit.ss.init_generative_gmm('full')
		#self._mpf_h.top_unit.ts.init_generative_gmm('full')
		
		#for unit in self._mpf_h.top_unit.children_units:
			#unit.ss.init_generative_gmm('full')
			
			
		# initial direction
		self.direction = self._dirs_names[1]
	
	def ressurect(self):
		if (not self.alive):
			self.alive = True
			self.coords = mth.getStartCoords(1)
			self.direction = self._dirs_names[1]
			
			#self.coords = mth.getStartCoords(random.randint(1, 4))
			#self.direction = self._dirs_names[random.randint(1, 4)]
			
			self.reinforcement -= 1.0
			
			self._iterations_num = 0
			
	def __sample_from_pmf(self, pmf, nominals):
		# normalize PMF to make it "proper"
		abs(pmf, out = pmf)
		
		# -- CDF
		bins = cumsum(pmf)
		
		if (bins[-1] == 0.0):
			print pmf
			raise ValueError
		
		bins /= bins[-1] # normalization
		
		return nominals[digitize(random.random_sample(1), bins)]
	
	def updateDirection(self, grid):
		# map grid to linear input vector
		self._input_vec[:-4] = grid.flatten()[self._grid_map]
		
		# actuator value from _really_ taken action
		self._input_vec[-4:] = 0.0
		self._input_vec[-4 + self._dirs_ids[self.direction]] = 1.0
		
		# evaluate hierarchy
		res_dir_distr = self._mpf_h.evaluate(self._input_vec, self.reinforcement)
		
		if (self._iterations_num / 20 > 0) and (self._iterations_num % 20 == 0):
			print "\n !!! Stil alive for %d !!!\n" % (self._iterations_num)
		
			self.reinforcement += 0.1
			if (self.reinforcement > 1.0): self.reinforcement = 1.0
		
		# decode actuator's value
		self.direction = self._dirs_names[self.__sample_from_pmf(res_dir_distr, self._dirs_ids.values())]
		print res_dir_distr, self.direction, self._mpf_h.reinforcement_prime
		
		self._iterations_num += 1
		
	def apply_reinforcement(self, grid, reinforcement):
		"""
		print "\n========== PUNISHMENT ============="
		#for i in xrange(15):
			#self.reinforcement -= 0.9 / 15.0
			#if (self.reinforcement < -1.0): self.reinforcement = -1.0
		
		# map grid to linear input vector
		self._input_vec[:-4] = grid.flatten()[self._grid_map]
		
		# actuator value from _really_ taken action
		self._input_vec[-4:] = 0.0
		self._input_vec[-4 + self._dirs_ids[self.direction]] = 1.0
		
		# evaluate hierarchy
		self.reinforcement = reinforcement
		res_dir_distr = self._mpf_h.evaluate(self._input_vec, reinforcement)
		print res_dir_distr, self.direction, self._mpf_h.reinforcement_prime
			
		print "========== END OF PUNISHMENT =============\n" 
		"""
		
		pass