예제 #1
0
import time
import os
import json
import pong_environment_play_muscle as env 

policy_filename = "pong_policy.dat"
values_filename = "pong_values.dat"

alpha = 0.1 # values / critic learning parameter
beta = 0.1  # actor learning parameter
gamma = 0.9  # error signal: future states parameter

world_dim = env.getWorldDim()
world_dim = {'y': world_dim[0], 'x': world_dim[1]}
num_possible_moves = env.getActionDim()
state = env.getState()

pol_file = None
val_file = None

if os.path.exists(policy_filename):
	pol_file = open(policy_filename, 'r+')
	policy = numpy.array(json.loads(pol_file.read()))
	pol_file.close()
else:
	#create random policy
	#print num_possible_moves 
	policy = numpy.random.rand(world_dim['y'], world_dim['x'],  num_possible_moves)

#pol_file = open(policy_filename, 'w+')
예제 #2
0
    # prediction error
    best_new_action = values[new_position['x']][new_position['y']].argmax()
    prediction_error = outcome + gamma * values[new_position['x']][new_position['y']][best_new_action] - values[position['x']][position['y']][chosen_action]
    
    # update values
    values[position['x']][position['y']][chosen_action] += prediction_error * LEARNING_RATE 

    return prediction_error



# Main loop
#values_hist = [np.ravel(values.copy())]
actions_executed = 0
last_action_time = 0
position = env.getState().copy()
in_end_position = False

# interactive plotting
fig, ax = plt.subplots()
plt.ion()

while actions_executed < NUM_ITERATIONS:
    if not in_end_position:
        # stimulate new state
        nest.SetStatus(nest.GetConnections(stimulus, states[position['x']][position['y']]), {'weight': 0.})
        position = env.getState().copy()
        nest.SetStatus(nest.GetConnections(stimulus, states[position['x']][position['y']]), {'weight': 1.})
        
        nest.SetStatus(wta_noise, {'rate': 3000.})
        for t in range(8):