error = critic(state, last_state, outcome * 100) if outcome != 0 or state != last_state: # print "error ", error values[last_state['y'], last_state['x']] += alpha * error policy[last_state['y'], last_state['x'], direction] += beta * error # if outcome != 0: # for row in values: # print numpy.array(row, dtype=int) pol_file.seek(0) val_file.seek(0) pol_file.write(json.dumps(policy.tolist())) val_file.write(json.dumps(values.tolist())) pol_file.truncate() val_file.truncate() else: _, in_end_pos = env.init_new_trial() print values pol_file.close() val_file.close()
for i in range(num_actions): nest.SetStatus(nest.GetConnections(states[position['x']][position['y']], actions[i]), {'weight': values[position['x']][position['y']][i] * WEIGHT_SCALING}) # stimulate new state nest.SetStatus(nest.GetConnections(stimulus, states[position['x']][position['y']]), {'weight': 0.}) nest.SetStatus(nest.GetConnections(stimulus, states[new_position['x']][new_position['y']]), {'weight': 1.}) nest.SetStatus(wta_noise, {'rate': 0.}) nest.Simulate(50.) last_action_time += 150 actions_executed += 1 else: position = env.get_agent_pos().copy() _, in_end_position = env.init_new_trial() nest.SetStatus(nest.GetConnections(stimulus, states[position['x']][position['y']]), {'weight': 0.}) SaveNetworkToFile("connections.dat", all_states, all_actions) rplt.from_device(sd_wta, title="WTA circuit") rplt.from_device(sd_states, title="states") rplt.show() #fig = plt.figure() #plt.xlabel("# action") #plt.ylabel("valence") #plt.title("valence of each action") #ax = fig.add_subplot(111, projection='3d') #x, y = np.meshgrid(range(world_dim['x'] * num_actions), range(NUM_ITERATIONS))