def run(): # Main loop #values_hist = [np.ravel(values.copy())] actions_executed = 0 last_action_time = 0 position = env.getState().copy() in_end_position = False while actions_executed < NUM_ITERATIONS: if not in_end_position: # stimulate new state nest.SetStatus(nest.GetConnections(stimulus, states[position['x']][position['y']]), {'weight': 0.}) position = env.getState().copy() nest.SetStatus(nest.GetConnections(stimulus, states[position['x']][position['y']]), {'weight': 1.}) nest.SetStatus(wta_noise, {'rate': 3000.}) for t in range(8): nest.Simulate(5) time.sleep(0.01) max_rate = -1 chosen_action = -1 for i in range(len(sd_actions)): rate = len([e for e in nest.GetStatus([sd_actions[i]], keys='events')[0]['times'] if e > last_action_time]) # calc the "firerate" of each actor population if rate > max_rate: max_rate = rate # the population with the hightes rate wins chosen_action = i nest.SetStatus(stimulus, {'rate': 5000.}) possible_actions = env.get_possible_actions() new_position, outcome, in_end_position = env.move(possible_actions[chosen_action]) nest.SetStatus(wta_noise, {'rate': 0.}) for t in range(4): nest.Simulate(5) time.sleep(0.01) last_action_time += 60 actions_executed += 1 else: position = env.get_agent_pos().copy() _, in_end_position = env.init_new_trial() nest.SetStatus(nest.GetConnections(stimulus, states[position['x']][position['y']]), {'weight': 0.})
import time import os import json import mpi_environment as env policy_filename = "pong_policy.dat" values_filename = "pong_values.dat" alpha = 0.1 # values / critic learning parameter beta = 0.1 # actor learning parameter gamma = 0.9 # error signal: future states parameter world_dim = env.getWorldDim() world_dim = {'y': world_dim[0], 'x': world_dim[1]} num_possible_moves = env.getActionDim() state = env.getState() pol_file = None val_file = None if os.path.exists(policy_filename): pol_file = open(policy_filename, 'r+') policy = numpy.array(json.loads(pol_file.read())) pol_file.close() else: #create random policy print num_possible_moves policy = numpy.random.rand(world_dim['y'], world_dim['x'], num_possible_moves) #pol_file = open(policy_filename, 'w+')