def test_physicality_hanging(): """ Test that energy does not spontaneously enter system """ # Apply a bunch of non-force actions, ensure that monotonically accelerate # LEFT_FORCE = 0, RIGHT_FORCE = 1 LEFT_FORCE = 0 NO_FORCE = 1 RIGHT_FORCE = 2 domain = FiniteCartPoleBalanceModern() domain.force_noise_max = 0 # no stochasticity in applied force domain.ANGLE_LIMITS = [-np.pi, np.pi] # We actually want to test hanging # Positive angle (right) s = np.array([179.6 * np.pi / 180.0, 0.0, -2.0, 0.0]) # pendulum hanging down domain.state = s.copy() for i in np.arange(5): # do for 5 steps and ensure works domain.step(NO_FORCE) # assert np.abs(domain.state[0]) <=179.5 # angle does not increase assert np.abs(domain.state[1]) <= 0.1 # angular rate does not increase # no energy should enter or leave system under no force action assert np.abs( _cartPoleEnergy(domain, s) - _cartPoleEnergy(domain, domain.state)) < 0.01 s = domain.state # Ensure that running out of x bounds causes experiment to terminate assert domain.isTerminal(s=np.array([0.0, 0.0, 2.5, 0.0])) assert domain.isTerminal(s=np.array([0.0, 0.0, -2.5, 0.0]))
def test_physicality_hanging(): """ Test that energy does not spontaneously enter system """ # Apply a bunch of non-force actions, ensure that monotonically accelerate # LEFT_FORCE = 0, RIGHT_FORCE = 1 LEFT_FORCE = 0 NO_FORCE = 1 RIGHT_FORCE = 2 domain = FiniteCartPoleBalanceModern() domain.force_noise_max = 0 # no stochasticity in applied force domain.ANGLE_LIMITS = [-np.pi, np.pi] # We actually want to test hanging # Positive angle (right) s = np.array([179.6 * np.pi/180.0, 0.0, -2.0, 0.0]) # pendulum hanging down domain.state = s.copy() for i in np.arange(5): # do for 5 steps and ensure works domain.step(NO_FORCE) # assert np.abs(domain.state[0]) <=179.5 # angle does not increase assert np.abs(domain.state[1]) <= 0.1 # angular rate does not increase # no energy should enter or leave system under no force action assert np.abs(_cartPoleEnergy(domain, s) - _cartPoleEnergy(domain, domain.state)) < 0.01 s = domain.state # Ensure that running out of x bounds causes experiment to terminate assert domain.isTerminal(s=np.array([0.0, 0.0, 2.5, 0.0])) assert domain.isTerminal(s=np.array([0.0, 0.0, -2.5, 0.0]))
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", boyan_N0=2120, initial_learn_rate=.26, lambda_=0.9, resolution=8, num_rbfs=4958): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 30000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 10 domain = FiniteCartPoleBalanceModern() opt["domain"] = domain representation = RBF(domain, num_rbfs=int(num_rbfs), resolution_max=resolution, resolution_min=resolution, const_feature=False, normalize=True, seed=exp_id) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_LEARNING(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", discover_threshold=.21, boyan_N0=200., initial_learn_rate=.1, kernel_resolution=13.14): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 100000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 1 active_threshold = 0.01 max_base_feat_sim = 0.5 sparsify = 1 domain = FiniteCartPoleBalanceModern() opt["domain"] = domain kernel_width = old_div( (domain.statespace_limits[:, 1] - domain.statespace_limits[:, 0]), kernel_resolution) representation = KernelizediFDD(domain, sparsify=sparsify, kernel=gaussian_kernel, kernel_args=[kernel_width], active_threshold=active_threshold, discover_threshold=discover_threshold, normalization=True, max_active_base_feat=10, max_base_feat_sim=max_base_feat_sim) policy = eGreedy(representation, epsilon=0.) # agent = SARSA(representation,policy,domain,initial_learn_rate=initial_learn_rate, # lambda_=.0, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) opt["agent"] = Q_LEARNING(policy, representation, discount_factor=domain.discount_factor, lambda_=0.9, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def test_physicality(): """ Test coordinate system [vertical up is 0] 1) gravity acts in proper direction based on origin 2) force actions behave as expected in that frame """ # Apply a bunch of non-force actions, ensure that monotonically accelerate LEFT_FORCE = 0 NO_FORCE = 1 RIGHT_FORCE = 2 domain = FiniteCartPoleBalanceModern() domain.force_noise_max = 0 # no stochasticity in applied force domain.int_type = 'rk4' # Slightly positive angle, just right of vertical up s = np.array([10.0 * np.pi / 180.0, 0.0, 0.0, 0.0]) # pendulum slightly right domain.state = s for i in np.arange(5): # do for 5 steps and ensure works domain.step(NO_FORCE) assert np.all( domain.state[0:2] > s[0:2]) # angle and angular velocity increase # no energy should enter or leave system under no force action assert np.abs( _cartPoleEnergy(domain, s) - _cartPoleEnergy(domain, domain.state)) < 0.01 s = domain.state # Negative angle (left) s = np.array([-10.0 * np.pi / 180.0, 0.0, 0.0, 0.0]) # pendulum slightly right domain.state = s for i in np.arange(5): # do for 5 steps and ensure works domain.step(NO_FORCE) assert np.all( domain.state[0:2] < s[0:2]) # angle and angular velocity increase # no energy should enter or leave system under no force action assert np.abs( _cartPoleEnergy(domain, s) - _cartPoleEnergy(domain, domain.state)) < 0.01 s = domain.state # Start vertical, ensure that force increases angular velocity in direction # Negative force on cart, yielding positive rotation s = np.array([0.0, 0.0, 0.0, 0.0]) domain.state = s for i in np.arange(5): # do for 5 steps and ensure works domain.step(LEFT_FORCE) assert np.all( domain.state[0:2] > s[0:2]) # angle and angular velocity increase s = domain.state # Positive force on cart, yielding negative rotation s = np.array([0.0, 0.0, 0.0, 0.0]) domain.state = s for i in np.arange(5): # do for 5 steps and ensure works domain.step(RIGHT_FORCE) assert np.all( domain.state[0:2] < s[0:2]) # angle and angular velocity increase s = domain.state
def test_physicality(): """ Test coordinate system [vertical up is 0] 1) gravity acts in proper direction based on origin 2) force actions behave as expected in that frame """ # Apply a bunch of non-force actions, ensure that monotonically accelerate LEFT_FORCE = 0 NO_FORCE = 1 RIGHT_FORCE = 2 domain = FiniteCartPoleBalanceModern() domain.force_noise_max = 0 # no stochasticity in applied force domain.int_type = 'rk4' # Slightly positive angle, just right of vertical up s = np.array([10.0 * np.pi/180.0, 0.0, 0.0, 0.0]) # pendulum slightly right domain.state = s for i in np.arange(5): # do for 5 steps and ensure works domain.step(NO_FORCE) assert np.all(domain.state[0:2] > s[0:2]) # angle and angular velocity increase # no energy should enter or leave system under no force action assert np.abs(_cartPoleEnergy(domain, s) - _cartPoleEnergy(domain, domain.state)) < 0.01 s = domain.state # Negative angle (left) s = np.array([-10.0 * np.pi/180.0, 0.0, 0.0, 0.0]) # pendulum slightly right domain.state = s for i in np.arange(5): # do for 5 steps and ensure works domain.step(NO_FORCE) assert np.all(domain.state[0:2] < s[0:2]) # angle and angular velocity increase # no energy should enter or leave system under no force action assert np.abs(_cartPoleEnergy(domain, s) - _cartPoleEnergy(domain, domain.state)) < 0.01 s = domain.state # Start vertical, ensure that force increases angular velocity in direction # Negative force on cart, yielding positive rotation s = np.array([0.0, 0.0, 0.0, 0.0]) domain.state = s for i in np.arange(5): # do for 5 steps and ensure works domain.step(LEFT_FORCE) assert np.all(domain.state[0:2] > s[0:2]) # angle and angular velocity increase s = domain.state # Positive force on cart, yielding negative rotation s = np.array([0.0, 0.0, 0.0, 0.0]) domain.state = s for i in np.arange(5): # do for 5 steps and ensure works domain.step(RIGHT_FORCE) assert np.all(domain.state[0:2] < s[0:2]) # angle and angular velocity increase s = domain.state