def critic(state, last_state, reward): error = reward - values[last_state[1], last_state[0]] + gamma * values[state[1], state[0]] return (error) i = 0 while i < iterations: #time.sleep(0.9) i += 1 sys.stdout.write(str(float(i)/iterations) + "\r") direction = pick_action(state) last_state = state[:][:] outcome = 0 state, outcome = env.move(direction) error = critic(state, last_state, outcome) if outcome != 0 or state != last_state: # print "error ", error values[last_state[1], last_state[0]] += alpha * error policy[last_state[1], last_state[0], direction] += beta * error # if outcome != 0: # for row in values: # print numpy.array(row, dtype=int)
i = 0 in_end_pos = False while i < iterations: state = env.getState().copy() if not in_end_pos: possible_actions = env.get_possible_actions() #time.sleep(0.9) i += 1 sys.stdout.write(str(float(i)/iterations) + "\r") direction = pick_action(state) last_state = state.copy() outcome = 0 state, outcome, in_end_pos = env.move(possible_actions[direction]) error = critic(state, last_state, outcome * 100) if outcome != 0 or state != last_state: # print "error ", error values[last_state['y'], last_state['x']] += alpha * error policy[last_state['y'], last_state['x'], direction] += beta * error # if outcome != 0: # for row in values: # print numpy.array(row, dtype=int)
nest.SetStatus(nest.GetConnections(stimulus, states[position['x']][position['y']]), {'weight': 1.}) nest.SetStatus(wta_noise, {'rate': 3000.}) nest.Simulate(100) max_rate = -1 chosen_action = -1 for i in range(len(sd_actions)): rate = len([e for e in nest.GetStatus([sd_actions[i]], keys='events')[0]['times'] if e > last_action_time]) # calc the "firerate" of each actor population if rate > max_rate: max_rate = rate # the population with the hightes rate wins chosen_action = i nest.SetStatus(stimulus, {'rate': 5000.}) possible_actions = env.get_possible_actions() new_position, outcome, in_end_position = env.move(possible_actions[chosen_action]) prediction_error = update_values(position, chosen_action, new_position, outcome) print "iteration:", actions_executed, "action:", chosen_action, print "new pos:", new_position, "reward:", outcome, "updated values:", values[position['x']][position['y']], "prediction error:", prediction_error for i in range(num_actions): nest.SetStatus(nest.GetConnections(states[position['x']][position['y']], actions[i]), {'weight': values[position['x']][position['y']][i] * WEIGHT_SCALING}) # stimulate new state nest.SetStatus(nest.GetConnections(stimulus, states[position['x']][position['y']]), {'weight': 0.}) nest.SetStatus(nest.GetConnections(stimulus, states[new_position['x']][new_position['y']]), {'weight': 1.}) nest.SetStatus(wta_noise, {'rate': 0.}) nest.Simulate(50.)