values_filename = "pong_values.dat" if os.path.exists(values_filename): os.remove(values_filename) if os.path.exists(policy_filename): os.remove(policy_filename) alpha = 0.05 # values / critic learning parameter beta = 0.01 # actor learning parameter gamma = 0.5 # error signal: future states parameter world_dim = env.getWorldDim() num_possible_moves = env.getActionDim() state = env.getState() iterations = int(sys.argv[1]) pol_file = None val_file = None if os.path.exists(policy_filename): pol_file = open(policy_filename, 'r+') policy = numpy.array(json.loads(pol_file.read())) pol_file.close() else: #create random policy policy = numpy.random.rand(world_dim[1], world_dim[0], num_possible_moves) pol_file = open(policy_filename, 'w+')
import os import json #import pong_environment as env import pong_environment_training as env policy_filename = "pong_policy.dat" values_filename = "pong_values.dat" alpha = 0.1 # values / critic learning parameter beta = 0.01 # actor learning parameter gamma = 0.5 # error signal: future states parameter world_dim = env.getWorldDim() num_possible_moves = env.getActionDim() state = env.getState() iterations = int(sys.argv[1]) pol_file = None val_file = None if os.path.exists(policy_filename): pol_file = open(policy_filename, 'r+') policy = numpy.array(json.loads(pol_file.read())) pol_file.close() else: #create random policy policy = numpy.random.rand(world_dim[1], world_dim[0], num_possible_moves) pol_file = open(policy_filename, 'w+')