import time import os import json import pong_environment_play_muscle as env policy_filename = "pong_policy.dat" values_filename = "pong_values.dat" alpha = 0.1 # values / critic learning parameter beta = 0.1 # actor learning parameter gamma = 0.9 # error signal: future states parameter world_dim = env.getWorldDim() world_dim = {'y': world_dim[0], 'x': world_dim[1]} num_possible_moves = env.getActionDim() state = env.getState() pol_file = None val_file = None if os.path.exists(policy_filename): pol_file = open(policy_filename, 'r+') policy = numpy.array(json.loads(pol_file.read())) pol_file.close() else: #create random policy #print num_possible_moves policy = numpy.random.rand(world_dim['y'], world_dim['x'], num_possible_moves) #pol_file = open(policy_filename, 'w+')
# prediction error best_new_action = values[new_position['x']][new_position['y']].argmax() prediction_error = outcome + gamma * values[new_position['x']][new_position['y']][best_new_action] - values[position['x']][position['y']][chosen_action] # update values values[position['x']][position['y']][chosen_action] += prediction_error * LEARNING_RATE return prediction_error # Main loop #values_hist = [np.ravel(values.copy())] actions_executed = 0 last_action_time = 0 position = env.getState().copy() in_end_position = False # interactive plotting fig, ax = plt.subplots() plt.ion() while actions_executed < NUM_ITERATIONS: if not in_end_position: # stimulate new state nest.SetStatus(nest.GetConnections(stimulus, states[position['x']][position['y']]), {'weight': 0.}) position = env.getState().copy() nest.SetStatus(nest.GetConnections(stimulus, states[position['x']][position['y']]), {'weight': 1.}) nest.SetStatus(wta_noise, {'rate': 3000.}) for t in range(8):