def play(agent): start_data = api_game_start() session_id = start_data['session_id'] grid = start_data['grid'] turn = 0 result = [] while (1): # agent にはどんな形で渡せばよい? # lib/pybrain/rl/experiments/experiment.py # lib/pybrain/rl/environments/mazes/tasks/mdp.py agent.integrateObservation(numpy.array(grid).ravel()) move = agent.getAction() data = api_move(session_id, move[0]) if grid == data['grid']: agent.giveReward(numpy.array([-50])) rand_move = numpy.random.randint(4, size=1)[0] data = api_move(session_id, rand_move) else: agent.giveReward(numpy.array([data['points']])) turn += 1 grid = data['grid'] print_state(turn, data) if data['over']: print #print data break return data['score']
def play(agent): start_data = api_game_start() session_id = start_data['session_id'] grid = start_data['grid'] turn = 0 result = [] while(1): # agent にはどんな形で渡せばよい? # lib/pybrain/rl/experiments/experiment.py # lib/pybrain/rl/environments/mazes/tasks/mdp.py agent.integrateObservation(numpy.array(grid).ravel()) move = agent.getAction() data = api_move(session_id, move[0]) if grid == data['grid']: agent.giveReward(numpy.array([-50])) rand_move = numpy.random.randint(4, size=1)[0] data = api_move(session_id, rand_move) else: agent.giveReward(numpy.array([data['points']])) turn += 1 grid = data['grid'] print_state(turn, data) if data['over']: print #print data break return data['score']
def play(agent): #start_data = api_game_start() start_data = api_simple_game_start() session_id = start_data['session_id'] grid = start_data['grid'] next_point = 100 turn = 0 result = [] while(1): # agent にはどんな形で渡せばよい? # lib/pybrain/rl/experiments/experiment.py # lib/pybrain/rl/environments/mazes/tasks/mdp.py agent.integrateObservation(numpy.array(grid).ravel()) move = agent.getAction() #data = api_move(session_id, move[0]) data = api_move(session_id, numpy.random.randint(4, size=1)[0]) if grid == data['grid']: agent.giveReward(numpy.array([-50])) data = api_move(session_id, numpy.random.randint(4, size=1)[0]) else: agent.giveReward(numpy.array([data['points']])) # # 100 point 毎に報酬を付与するやり方. # if next_point < data['score']: # print 'get point' # agent.giveReward(numpy.array([data['score']])) # next_point += 100 # else: # agent.giveReward(numpy.array([0])) turn += 1 grid = data['grid'] print_state(turn, data) if data['over']: print #print data break return data['score']