def test_state_to_observation_normalization(): """normalize argument converts all values to range 0.0-1.0""" env_state = MathyEnvState(problem="4+2") obs: MathyObservation = env_state.to_observation(normalize=False) assert np.max(obs.values) == 4.0 norm: MathyObservation = env_state.to_observation(normalize=True) assert np.max(norm.values) == 1.0
def test_state_encode_player(): env_state = MathyEnvState(problem="4x+2") env_state = env_state.get_out_state(problem="2+4x", moves_remaining=10, action=(0, 0)) agent = env_state.agent assert agent.problem == "2+4x" assert agent.moves_remaining == 10 assert agent.action == (0, 0)
def test_state_to_observation_normalized_problem_type(): """normalize argument converts all values and type hash to range 0.0-1.0""" env_state = MathyEnvState(problem="4+2") obs: MathyObservation = env_state.to_observation() print(obs.type) assert np.max(obs.time) <= 1.0 assert np.min(obs.time) >= 0.0 assert np.max(obs.values) <= 1.0 assert np.min(obs.values) >= 0.0 assert np.max(obs.type) <= 1.0 assert np.min(obs.type) >= 0.0
def test_state_serialize_numpy(): env_state = MathyEnvState(problem="4x+2") for i in range(10): env_state = env_state.get_out_state(problem="2+4x", moves_remaining=10 - i, action=(i, i)) state_np = env_state.to_np() compare = MathyEnvState.from_np(state_np) assert env_state.agent.problem == compare.agent.problem assert env_state.agent.moves_remaining == compare.agent.moves_remaining for one, two in zip(env_state.agent.history, compare.agent.history): assert one.raw == two.raw assert one.action == two.action
def test_state_encodes_hierarchy(): """Verify that the observation generated encodes hierarchy properly so the model can determine the precise nodes to act on""" diff_pairs: List[Tuple[str, str]] = [ ("4x + (3u + 7x + 3u) + 4u", "4x + 3u + 7x + 3u + 4u"), ("7c * 5", "7 * (c * 5)"), ("5v + 20b + (10v + 7b)", "5v + 20b + 10v + 7b"), ("5s + 60 + 12s + s^2", "5s + 60 + (12s + s^2)"), ] env = PolySimplify() for one, two in diff_pairs: state_one = MathyEnvState(problem=one) obs_one = state_one.to_observation(env.get_valid_moves(state_one)) state_two = MathyEnvState(problem=two) obs_two = state_two.to_observation(env.get_valid_moves(state_two)) assert obs_one.nodes != obs_two.nodes
def test_state_to_observation(): """to_observation has defaults to allow calling with no arguments""" env_state = MathyEnvState(problem="4x+2") assert env_state.to_observation() is not None
def test_state_sanity(): state = MathyEnvState(problem="4+4") assert state is not None
#!pip install gym from mathy.solver import SwarmConfig, mathy_swarm from mathy_envs.state import MathyEnvState # Which values do we want from the history tree? history_names = ["states", "actions", "rewards"] # Configure the swarm swarm = mathy_swarm(SwarmConfig(history=True, history_names=history_names)) # Run the swarm to generate tree history swarm.run() # Sample random batches random_batches = swarm.tree.iterate_nodes_at_random(batch_size=32, names=history_names) total_set = set() total_generated = 0 for states, actions, rewards in random_batches: texts = [MathyEnvState.from_np(s).agent.problem for s in states] total_generated += len(texts) unique = list(set(texts)) total_set.update(unique) best_state = MathyEnvState.from_np(swarm.walkers.states.best_state) swarm.env._env._env.mathy.print_history(best_state) print(f"Generated {total_generated} states, {len(total_set)} of which are unique") print(f"Highest reward encountered: {swarm.walkers.states.best_reward}") print(best_state.agent.problem)