Example #1
0
values_filename = "pong_values.dat"

if os.path.exists(values_filename):
    os.remove(values_filename)

if os.path.exists(policy_filename):
    os.remove(policy_filename)

alpha = 0.05 # values / critic learning parameter
beta = 0.01  # actor learning parameter
gamma = 0.5  # error signal: future states parameter

world_dim = env.getWorldDim()
num_possible_moves = env.getActionDim()

state = env.getState()

iterations = int(sys.argv[1])

pol_file = None
val_file = None

if os.path.exists(policy_filename):
	pol_file = open(policy_filename, 'r+')
	policy = numpy.array(json.loads(pol_file.read()))
	pol_file.close()
else:
	#create random policy
	policy = numpy.random.rand(world_dim[1], world_dim[0], num_possible_moves)

pol_file = open(policy_filename, 'w+')
Example #2
0
import os
import json
#import pong_environment as env 
import pong_environment_training as env

policy_filename = "pong_policy.dat"
values_filename = "pong_values.dat"

alpha = 0.1 # values / critic learning parameter
beta = 0.01  # actor learning parameter
gamma = 0.5  # error signal: future states parameter

world_dim = env.getWorldDim()
num_possible_moves = env.getActionDim()

state = env.getState()

iterations = int(sys.argv[1])

pol_file = None
val_file = None

if os.path.exists(policy_filename):
	pol_file = open(policy_filename, 'r+')
	policy = numpy.array(json.loads(pol_file.read()))
	pol_file.close()
else:
	#create random policy
	policy = numpy.random.rand(world_dim[1], world_dim[0], num_possible_moves)

pol_file = open(policy_filename, 'w+')