Python Agent.getMoveList 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: agent

클래스/타입: Agent

메소드/함수: getMoveList

hotexamples.com에서의 예제들: 2

Python Agent.getMoveList - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 agent.Agent.getMoveList에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

act(30)

Agent(30)

__init__(30)

bind_udp_sockets(6)

agent_factory(5)

act_e_greedy(5)

act_and_train(4)

numTrt(4)

numValidTrt(4)

perform_production(4)

setLocation(3)

getMoveList(2)

find(2)

_getReward(2)

_getState(2)

populate_replay_memory(2)

_set_position(2)

MCTS(2)

GetAgentByEmail(2)

expand(1)

get_qq_name_id(1)

enter_group_v2(1)

do_nothing(1)

f_train(1)

check_group_msg(1)

findAll(1)

bash(1)

estimate(1)

Action(1)

get_state_value(1)

print_it(1)

update_experience(1)

turn(1)

stock_experience(1)

simulateAgent(1)

set_board(1)

send_group_msg(1)

rand(1)

present_word(1)

handle(1)

present_number(1)

play_in_test_mode(1)

nextSymbols(1)

nextArticle(1)

new(1)

loop(1)

load_group_list(1)

locAfterMove(1)

a_star_manhattan(1)

act_detail(1)

예제 #1

파일 보기

파일: Assignment1.4.py 프로젝트: HarrieO/Autonomous-Agents

def valueIteration(discountFactor):
	# all locations in grid
	alllocations = [ (x,y) for x in range(11) for y in range(11)]

	# initialize values
	values = {}
	bestMoves = {}
	for predloc in alllocations:
			for preyloc in alllocations:
				if preyloc != predloc:
					values[(predloc,preyloc)] = 0

	agent = Agent(0,0)

	deltas = []
	epsilon = 0.01
	delta = 1
	numIt = 0
	# perform value iteration according to pseud-code
	while delta > epsilon:
		delta = 0
		newValues = {}
		# loop over all states
		for predloc in alllocations:
			for preyloc in alllocations:
				if predloc == preyloc:
					continue
				agent.setLocation(predloc)
				prey = Prey(*preyloc)
				temp = values[(predloc,preyloc)]
				# find optimal value according to current values
				bestVal = 0
				bestMove = (0,0)
				for prob, predMove in agent.getMoveList():
					preySum = 0
					newPredloc = ((predloc[0] + predMove[0])%11,(predloc[1] + predMove[1])%11)
					if newPredloc == preyloc :
						preySum += 10.0
					else:
						for preyProb, newPreyloc in prey.expand(newPredloc):
							preySum += preyProb * discountFactor * values[(newPredloc,newPreyloc)]
					if bestVal <= preySum:
						bestVal = preySum
						bestMove = predMove
				newValues[(predloc,preyloc)] = bestVal
				bestMoves[(predloc,preyloc)] = bestMove
				delta = max(delta, np.abs(bestVal - temp))
		values = newValues
		deltas.append(delta)
		numIt+=1
	# greedy policy to the optimal values computed above
	def policy(state):
		predloc, preyloc = state
		agent.setLocation(predloc)
		prey = Prey(*preyloc)
		return bestMoves[(predloc,preyloc)]
	return numIt, values, policy

예제 #2

파일 보기

파일: Assignment1.5.py 프로젝트: HarrieO/Autonomous-Agents

def valueIteration():

	alldiffs = [ (x,y) for x in range(-5,6) for y in range(-5,6)]
	alldiffs.remove((0,0))

	# the relative positions vary from -5 up to 5, in both dimensions
	values = {}
	for x in range(-5,6):
		for y in range(-5,6):
			values[(x,y)] = 0

	bestMoves = {}
	agent = Agent(0,0)

	deltas = []
	discountFactor = 0.8
	epsilon = 0.01
	delta = 1
	while delta > epsilon:
		delta = 0
		newValues = {}
		for diff in alldiffs:
			# we place the predator in the middle of the world,
			# we are allowed to do this, since the positions are encoded relatively
			predloc = (5,5)
			preyloc = (predloc[0]+diff[0],predloc[1]+diff[1])
			curKey  = rewriteStates(predloc,preyloc)
			agent.setLocation(predloc)
			prey = Prey(*preyloc)
			temp = values[curKey]
			bestVal = 0
			bestMove = (0,0)
			for prob, predMove in agent.getMoveList():
				preySum = 0
				newPredloc = agent.locAfterMove(predMove)
				if newPredloc == preyloc :
					preySum += 10.0
				else:
					for preyProb, newPreyloc in prey.expand(newPredloc):
						# using rewriteStates we use relative positions
						preySum += preyProb * discountFactor * values[rewriteStates(newPredloc,newPreyloc)]
				if bestVal <= preySum:
					bestVal = preySum
					bestMove = predMove
			newValues[curKey] = bestVal
			bestMoves[curKey] = bestMove
			delta = max(delta, np.abs(bestVal - temp))
		values = newValues
		deltas.append(delta)

	def policy(state):
		predloc, preyloc = state
		agent.setLocation(predloc)
		prey = Prey(*preyloc)
		return bestMoves[rewriteStates(predloc,preyloc)]
	return policy