def __init__(self, agent_class_name, agent_module_path, mdp_class_name, mdp_module_path): agent_class = import_from_strings(agent_class_name, agent_module_path) mdp_class = import_from_strings(mdp_class_name, mdp_module_path) dummy_environment = Environment(mdp_class((1, 1))) self.spec = dummy_environment.spec() self.agent_class = agent_class self.mdp_class = mdp_class
def setUp(self): # An observation space observation_space = gym.spaces.Discrete(7) # Default reward default_reward = Vector([1, 2, 1]) # Set initial_seed to 0 to testing. self.environment = Environment(observation_space=observation_space, default_reward=default_reward, seed=0)
def __init__(self): env = FlappyBird() self.p = PLE(env, add_noop_action=True) self.p.init() self.win_score = 10. action_space = len(self.p.getActionSet()) state_space = len(self.p.getGameState()) actions = ["up", "nothing"] state_names = list(self.p.getGameState().keys()) Environment.__init__(self, env, action_space, state_space, actions, state_names)
def collectData(info): i, location, ID = info print('Start', ID) disablePrint() agent = Agent(memory=i) env = Environment(render=False).fruitbot while i > 0: obs = clean(env.reset()) hn = torch.zeros(2, 1, hidden_size, device=device) cn = torch.zeros(2, 1, hidden_size, device=device) while i > 0: i -= 1 # hn, cn = hn.detach(), cn.detach() act, obs_old, h0, c0, hn, cn = agent.choose(obs, hn, cn) obs, rew, done, _ = env.step(act) obs = agent.remember(obs_old.detach(), act, clean(obs).detach(), rew, h0.detach(), c0.detach(), hn.detach(), cn.detach(), int(not done)) env.render() if done: break env.close() saveData(agent, location, ID) enablePrint() print('Done', ID) return os.getpid()
def collectData(agent): print('Start', agent.memory.size) disablePrint() i = agent.memory.size env = Environment(render=False).fruitbot while i > 0: obs = clean(env.reset()) hn = torch.zeros(2, 1, hidden_size, device=device) cn = torch.zeros(2, 1, hidden_size, device=device) while i > 0: i -= 1 # hn, cn = hn.detach(), cn.detach() act, obs_old, h0, c0, hn, cn = agent.choose(obs, hn, cn) obs, rew, done, _ = env.step(act) obs = agent.remember(obs_old.detach(), act, clean(obs).detach(), rew, h0.detach(), c0.detach(), hn.detach(), cn.detach(), int(not done)) env.render() if done: break env.close() enablePrint() print('Done') return agent.memory.memory
def dumps(data: dict, environment: Environment): """ Dumps full_data given into dumps directory :param environment: :param data: :return: """ timestamp = int(time.time()) # Get environment name in snake case environment = um.str_to_snake_case(environment.__class__.__name__) # Get only first letter of each word env_name_abbr = ''.join([word[0] for word in environment.split('_')]) # Specify full path file_path = Path(__file__).parent.parent.joinpath( 'dumps/w/train_data/{}_w_{}_{}.yml'.format(env_name_abbr, timestamp, Vector.decimal_precision)) # If any parents doesn't exist, make it. file_path.parent.mkdir(parents=True, exist_ok=True) with file_path.open(mode='w+', encoding='UTF-8') as f: f.write(um.structures_to_yaml(data=data))
def do(args, env): do_env = Environment(name="do", outer=env) if len(args) == 0: throw_error( "syntax", "Incorrect use of (do ...): must take at least one argument.") result = None for a in args: result = ev.evaluate(a, do_env) return result
def anonymous(*arguments): # print("inside anonymous function") # print("arguments(" + str(len(arguments)) + "):", arguments) if len(arguments) != len(largs): throw_error( "syntax", "This function takes " + str(len(largs)) + " arguments (" + str(len(arguments)) + " provided).") lenv = Environment(name="anon_fn", outer=env, variables=largs, values=arguments) return ev.evaluate(lbody, lenv)
def test_synergies(self): cfg = KinArmSynergies2D.defcfg._deepcopy() cfg.dim = 3 cfg.limits = (-180.0, 180.0) cfg.lengths = 1.0 cfg.syn_span = 3 cfg.syn_res = 3 kin_env = Environment.create(cfg) m_signal = {'j0': 0.0, 'j1': 0.0, 'j2': 0.0, 's0': 1.0} feedback = kin_env.execute(m_signal) s_signal = feedback['s_signal'] self.assertTrue(near(s_signal['y'], 0.0)) self.assertTrue(near(s_signal['x'], -1.0))
import numpy as np from time import time import math start_time = time() # just a timer # hyperbolic tangent function, similar to sigmoid but has a range of (-1, 1) as opposed to (0, 1), used for squahsing but with negs def tanh(x): return (math.e**(2 * x) - 1) / (math.e**(2 * x) + 1) # config order = 5 e = Environment(solids=[pe.Circle(pos=[-100, .001])], g_type='uniform', g_strength=[0, -9.81]) destination = np.array([100, 0]) n = nn.NeuralNetwork(inputs=np.array([[ e.g_strength[1] / 10, (destination[0] - e.solids[0].pos[0]) / 100, (destination[1] - e.solids[0].pos[1]) / 100 ]]), l1_size=4) # run neural network for i in range(10**order): if i % ((10**order) / 100) == 0: print(i / (10**(order - 2))) # turn the inputs into outputs using existing weights n.feedforward()
plotting.circle(xs[ 1:-1], ys[ 1:-1], radius=0.008, **kwargs) plotting.circle(xs[-1: ], ys[-1: ], radius=0.01, color='red') plotting.hold(False) if __name__ == '__main__': from environments import Environment from environments.envs import KinematicArm2D # Arm with same length segments cfg = KinematicArm2D.defcfg._deepcopy() cfg.dim = 20 cfg.limits = (-150.0, 150.0) cfg.lengths = 1/cfg.dim cfg.full_sensors = True kin_env = Environment.create(cfg) # Arm with decreasing lenghts segments cfg2 = cfg._deepcopy() cfg2.lengths = np.array([0.9**i for i in range(cfg2.dim)]) cfg2.lengths = cfg2.lengths/sum(cfg2.lengths) kin_env2 = Environment.create(cfg2) m_signals = [{'j0': -31.23, 'j1': -44.21, 'j2': -20.18, 'j3': +31.55, 'j4': +35.66, 'j5': +5.19, 'j6': +17.34, 'j7': +24.51, 'j8': -2.69, 'j9': +26.52, 'j10': -34.87, 'j11': +10.72, 'j12': -19.38, 'j13': -33.49, 'j14': +13.78, 'j15': -22.43, 'j16': +33.61, 'j17': -28.95, 'j18': +34.31, 'j19': 45.75}, {'j0': -53.66, 'j1': -56.20, 'j2': -56.67, 'j3': -34.83, 'j4': -20.29, 'j5': +7.51, 'j6': +20.92, 'j7': +25.51, 'j8': -17.59, 'j9': +6.51, 'j10': -9.65, 'j11': +45.70, 'j12': +20.88, 'j13': +24.25, 'j14': +28.65, 'j15': -42.79, 'j16': +34.45, 'j17': -39.90, 'j18': +2.74, 'j19': -11.12}, {'j0': +58.13, 'j1': +45.43, 'j2': -21.01, 'j3': +2.35, 'j4': -38.90, 'j5': -39.23, 'j6': +45.14, 'j7': -57.58, 'j8': +39.49, 'j9': +29.01, 'j10': -0.09, 'j11': -56.19, 'j12': +56.07, 'j13': +5.91, 'j14': +36.61, 'j15': -52.65, 'j16': -58.60, 'j17': +32.45, 'j18': +43.69, 'j19': -120.77}, {'j0': +53.09, 'j1': +55.83, 'j2': -51.08, 'j3': +41.44, 'j4': +44.43, 'j5': +4.67, 'j6': +2.15, 'j7': +37.23, 'j8': -3.77, 'j9': -46.70, 'j10': +56.41, 'j11': -21.08, 'j12': +13.73, 'j13': +47.23, 'j14': +7.94, 'j15': -27.26, 'j16': +56.54, 'j17': -7.77, 'j18': -18.98, 'j19': +149.46}] plotting.output_file('html/arm_vizu.html') for i, m_signal in enumerate(m_signals):
def test_agents(environment: Environment, hv_reference: Vector, variable: str, agents_configuration: dict, graph_configuration: dict, epsilon: float = None, alpha: float = None, max_steps: int = None, states_to_observe: list = None, number_of_agents: int = 30, gamma: float = 1., solution: list = None, initial_q_value: Vector = None, evaluation_mechanism: EvaluationMechanism = None): """ If we choose DATA_PER_STATE in graph_configurations, the agent train during `limit` steps, and only get train_data in the last steps (ignore `interval`). If we choose MEMORY in graph_configurations, the agent train during `limit` steps and take train_data every `interval` steps. :param initial_q_value: :param graph_configuration: :param solution: :param environment: :param hv_reference: :param variable: :param agents_configuration: :param epsilon: :param alpha: :param max_steps: :param states_to_observe: :param number_of_agents: :param gamma: :param evaluation_mechanism: :return: """ # Extract graph_types graph_types = set(graph_configuration.keys()) if len(graph_types) > 2: print("Isn't recommended more than 2 graphs") # Parameters if states_to_observe is None: states_to_observe = {environment.initial_state} complex_states = isinstance(environment.observation_space[0], gym.spaces.Tuple) if not complex_states and GraphType.DATA_PER_STATE in graph_types: print( "This environment has complex states, so DATA_PER_STATE graph is disabled." ) graph_configuration.pop(GraphType.DATA_PER_STATE) # Build environment env_name = environment.__class__.__name__ env_name_snake = str_to_snake_case(env_name) # File timestamp timestamp = int(time.time()) # Write all information in configuration path write_config_file(timestamp=timestamp, number_of_agents=number_of_agents, env_name_snake=env_name_snake, seed=','.join(map(str, range(number_of_agents))), epsilon=epsilon, alpha=alpha, gamma=gamma, max_steps=max_steps, variable=variable, agents_configuration=agents_configuration, graph_configuration=graph_configuration, evaluation_mechanism=evaluation_mechanism) # Create graphs structure graphs, graphs_info = initialize_graph_data( graph_types=graph_types, agents_configuration=agents_configuration) # Show information print('Environment: {}'.format(env_name)) for graph_type in graph_types: # Extract interval and limit interval = graph_configuration[graph_type].get('interval', 1) limit = graph_configuration[graph_type]['limit'] # Show information print(('\t' * 1) + "Graph type: {} - [{}/{}]".format(graph_type, limit, interval)) # Set interval to get train_data Agent.interval_to_get_data = interval # Execute a iteration with different initial_seed for each agent indicate for seed in range(number_of_agents): # Show information print(('\t' * 2) + "Execution: {}".format(seed + 1)) # For each configuration for agent_type in agents_configuration: # Show information print(('\t' * 3) + 'Agent: {}'.format(agent_type.value)) # Extract configuration for that agent for configuration in agents_configuration[agent_type].keys(): # Show information print( ('\t' * 4) + '{}: {}'.format(variable, configuration), end=' ') # Mark of time t0 = time.time() # Reset environment environment.reset() environment.seed(seed=seed) # Variable parameters parameters = { 'epsilon': epsilon, 'alpha': alpha, 'gamma': gamma, 'max_steps': max_steps, 'evaluation_mechanism': evaluation_mechanism, 'initial_value': initial_q_value } if variable == 'decimal_precision': Vector.set_decimal_precision( decimal_precision=configuration) else: # Modify current configuration parameters.update({variable: configuration}) agent, v_s_0 = train_agent_and_get_v_s_0( agent_type=agent_type, environment=environment, graph_type=graph_type, graph_types=graph_types, hv_reference=hv_reference, limit=limit, seed=seed, parameters=parameters, states_to_observe=states_to_observe) print('-> {:.2f}s'.format(time.time() - t0)) train_data = dict() if agent_type is AgentType.PQL and graph_type is GraphType.DATA_PER_STATE: train_data.update({ 'vectors': { state: { action: agent.q_set(state=state, action=action) for action in agent.nd[state].keys() } for state in agent.nd.keys() } }) # Order vectors by origin Vec(0) nearest train_data.update({ 'v_s_0': Vector.order_vectors_by_origin_nearest(vectors=v_s_0), # 'q': agent.q, # 'v': agent.v }) # Write vectors found into path dumps_train_data( timestamp=timestamp, seed=seed, env_name_snake=env_name_snake, train_data=train_data, variable=variable, agent_type=agent_type, configuration=configuration, evaluation_mechanism=evaluation_mechanism, columns=environment.observation_space[0].n) # Update graphs update_graphs(graphs=graphs, agent=agent, graph_type=graph_type, configuration=str(configuration), agent_type=agent_type, states_to_observe=states_to_observe, graphs_info=graphs_info, solution=solution) prepare_data_and_show_graph(timestamp=timestamp, env_name=env_name, env_name_snake=env_name_snake, graphs=graphs, number_of_agents=number_of_agents, agents_configuration=agents_configuration, alpha=alpha, epsilon=epsilon, gamma=gamma, graph_configuration=graph_configuration, max_steps=max_steps, initial_state=environment.initial_state, variable=variable, graphs_info=graphs_info, evaluation_mechanism=evaluation_mechanism, solution=solution)
class TestEnvironment(unittest.TestCase): def setUp(self): # An observation space observation_space = gym.spaces.Discrete(7) # Default reward default_reward = Vector([1, 2, 1]) # Set initial_seed to 0 to testing. self.environment = Environment(observation_space=observation_space, default_reward=default_reward, seed=0) def tearDown(self): self.environment = None def test_init(self): """ Testing if constructor works :return: """ # All agents must be have next attributes self.assertTrue(hasattr(self.environment, '_actions')) self.assertTrue(hasattr(self.environment, '_icons')) self.assertTrue(hasattr(self.environment, 'actions')) self.assertTrue(hasattr(self.environment, 'icons')) self.assertTrue(hasattr(self.environment, 'action_space')) self.assertTrue(hasattr(self.environment, 'observation_space')) self.assertTrue(hasattr(self.environment, 'np_random')) self.assertTrue(hasattr(self.environment, 'initial_seed')) self.assertTrue(hasattr(self.environment, 'initial_state')) self.assertTrue(hasattr(self.environment, 'current_state')) self.assertTrue(hasattr(self.environment, 'finals')) self.assertTrue(hasattr(self.environment, 'obstacles')) self.assertTrue(hasattr(self.environment, 'default_reward')) # All agents must be have next methods. self.assertTrue(hasattr(self.environment, 'step')) self.assertTrue(hasattr(self.environment, 'initial_seed')) self.assertTrue(hasattr(self.environment, 'reset')) self.assertTrue(hasattr(self.environment, 'render')) self.assertTrue(hasattr(self.environment, 'next_state')) self.assertTrue(hasattr(self.environment, 'is_final')) self.assertIsInstance(self.environment.observation_space, gym.spaces.Space) self.assertIsInstance(self.environment.action_space, gym.spaces.Space) self.assertEqual(self.environment.initial_state, self.environment.current_state) def test_icons(self): """ Testing icons property :return: """ self.assertEqual(self.environment._icons, self.environment.icons) def test_actions(self): """ Testing actions property :return: """ self.assertEqual(self.environment._actions, self.environment.actions) def test_action_space_length(self): pass def test_seed(self): """ Testing initial_seed method :return: """ self.environment.seed(seed=0) n1_1 = self.environment.np_random.randint(0, 10) n1_2 = self.environment.np_random.randint(0, 10) self.environment.seed(seed=0) n2_1 = self.environment.np_random.randint(0, 10) n2_2 = self.environment.np_random.randint(0, 10) self.assertEqual(n1_1, n2_1) self.assertEqual(n1_2, n2_2) def test_reset(self): """ Testing reset method :return: """ # Set current position to random position self.environment.current_state = self.environment.observation_space.sample( ) # Reset environment self.environment.reset() # Asserts self.assertEqual(self.environment.initial_state, self.environment.current_state) def test_states(self): """ Testing that all states must be contained into observation space :return: """ pass def test_reachable_states(self): pass def test_transition_probability(self): pass def test_transition_reward(self): pass
plotting.circle(xs[1:-1], ys[1:-1], radius=0.008, **kwargs) plotting.circle(xs[-1:], ys[-1:], radius=0.01, color='red') plotting.hold(False) if __name__ == '__main__': from environments import Environment from environments.envs import KinematicArm2D # Arm with same length segments cfg = KinematicArm2D.defcfg._deepcopy() cfg.dim = 20 cfg.limits = (-150.0, 150.0) cfg.lengths = 1 / cfg.dim cfg.full_sensors = True kin_env = Environment.create(cfg) # Arm with decreasing lenghts segments cfg2 = cfg._deepcopy() cfg2.lengths = np.array([0.9**i for i in range(cfg2.dim)]) cfg2.lengths = cfg2.lengths / sum(cfg2.lengths) kin_env2 = Environment.create(cfg2) m_signals = [{ 'j0': -31.23, 'j1': -44.21, 'j2': -20.18, 'j3': +31.55, 'j4': +35.66, 'j5': +5.19, 'j6': +17.34,
from environments import Environment from agents import DeepQAgent import os, warnings, sys # hide warnings os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' warnings.filterwarnings("ignore") environment = Environment() # get the shape of the observation and action space state_num = environment.env.observation_space.shape[0] action_num = environment.env.action_space.n print("State %2f" % state_num) print("Action %2f" % action_num) agent = DeepQAgent(state_num, action_num) if len(sys.argv) > 1 and sys.argv[1] == 'train': environment.train(agent) else: agent.is_training = False environment.run(agent)
import math start_time = time() # just a timer # hyperbolic tangent function, similar to sigmoid but has a range of (-1, 1) as opposed to (0, 1), used for squahsing but with negs def tanh(x): return (math.e**(2 * x) - 1) / (math.e**(2 * x) + 1) # config order = 3 e = Environment(solids=[ pe.Circle(pos=[-100, 0], mass=100, static=True), pe.Circle(pos=[0, 0], velocity=[4, 0], mass=1, radius=1), pe.Circle(pos=[50, 0], velocity=[0, 2.582], mass=20) ], g_type='nonuniform', g_strength=10) n = nn.NeuralNetwork(inputs=np.array( [[e.g_strength / 10, e.solids[0].pos[0] / 10, e.solids[0].pos[1] / 10]]), l1_size=8) # run neural network for i in range(10**order): # print percent progress if i % ((10**order) / 100) == 0: print(i / (10**(order - 2))) # # switch variables every 5 iterations # if i % 20 == 0:
# This is the one thing that is provided de-facto with every application # whether you like it or not. from twitter.common import options class AppException(Exception): pass options.add( '--env', '--environment', action='callback', callback=Environment._option_parser, default='DEVELOPMENT', metavar='ENV', dest='twitter_common_app_environment', help="The environment in which to run this Python application. " "Known environments: %s [default: %%default]" % ' '.join(Environment.names())) options.add( '--app_debug', action='store_true', default=False, dest='twitter_common_app_debug', help="Print extra debugging information during application initialization.") _APP_REGISTRY = {} _APP_NAME = None _APP_INITIALIZED = False __all__ = [ # exceptions 'AppException',
from environments import Environment from agents import DeepQAgent import os, warnings, sys # hide warnings os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' warnings.filterwarnings("ignore") cartpole = 'CartPole-v0' mountaincar = 'MountainCar-v0' # stockmarket = 'StockMarket' current_env = mountaincar environment = Environment(current_env) # get the shape of the observation and action space state_num = environment.env.observation_space.shape[0] action_num = environment.env.action_space.n agent = DeepQAgent(state_num, action_num, current_env) if len(sys.argv) > 1 and sys.argv[1] == 'train': environment.train(agent) else: agent.is_training = False environment.run(agent)
import physics_engine as pe import neural_network as nn from environments import Environment import numpy as np from time import time import math start_time = time() # just a timer # config order = 6 e = Environment(solids=[pe.Circle(pos=[-100, .001])], g_type='uniform', g_strength=[0, -9.81]) destinations = [[0, 0], [100, 0], [200, 0], [100, 0], [200, 0]] gravities = [-9.81, -9.81, -9.81, -20, -20] n = nn.NeuralNetwork(inputs=np.array([[0, 0, 0]]), l1_size=8) # run neural network for i in range(10**order): # print percent progress if i % ((10**order) / 100) == 0: print(i / (10**(order - 2))) # switch variables every 5 iterations if i % 20 == 0: destination = destinations[int(i / 20) % 5] e.g_strength[1] = gravities[int(i / 20) % 5] n.inputs[0] = [ e.g_strength[1] / 10, (destination[0] + 100) / 100,
gen_count = 300 # for how many generations training will last mutate_chance = .5 # the odds of an organism being mutated on any given generation full_mutate_chance = .2 # odds of an organism being replaced by a randomized organism instead of just being tweaked according to the normal distribution standard_deviations = [.1 for i in range(3)] # how much each gene is mutated by, follows normal distribution so gene_ranges = [(-5, 5) for i in range(3)] pop_size = 100 # number of organisms in the population time_limit = 50 # how long each fitness test will run for before just giving up tick_length = .2 # how often the physics engine will update, smaller values create more precise simulations but take longer start_pos = [0, 11.001] x = start_pos[0] y = start_pos[1] e = Environment(solids=[pe.Circle(static=True), pe.Circle(radius=1, pos=start_pos)], g_type='nonuniform', g_strength=10) # initialize population with random genes initial_population = [] for i in range(pop_size): dna = [] for gene_range in gene_ranges: dna.append(randrange(gene_range[0], gene_range[1])) initial_population.append(Organism(dna)) p = Population(initial_population) # iterates through all generations
if r.random() < .5: return x - delta return x + delta # all 6 possible orders in which the algorithm will be introduced to the environments orders = [['PS', 'TD', 'SV'], ['PS', 'SV', 'TD'], ['TD', 'PS', 'SV'], ['TD', 'SV', 'PS'], ['SV', 'TD', 'PS'], ['SV', 'PS', 'TD']] # possible start locations for PS_1's rocket (solids[1]) ps1_starts = [[-11.001, .1], [.1, -11.001], [11.001, .1], [.1, 11.001], [7.8, 7.8], [-7.8, 7.8], [-7.8, -7.8], [7.8, -7.8]] # 6 environments for the LT ML algorithm to use, only initialized with instance variables that will be kept constant PS_1 = Environment( solids=[pe.Circle(static=True), pe.Circle(radius=1, pos=[0, 11.01])], g_type='nonuniform', g_strength=100) PS_2 = Environment(solids=[ pe.Circle(static=True, pos=[-100, 0], mass=100), pe.Circle(radius=1, pos=[-88.99, 0], mass=1), pe.Circle(radius=3, pos=[1, 0], velocity=[0, 3.162]) ], g_type='nonuniform', g_strength=10) TD_1 = Environment(solids=[ pe.Circle(pos=[1, 1]), pe.Rect(static=True, pos=[-155, 0], height=300), pe.Rect(static=True, pos=[155, 0], height=300), pe.Rect(static=True, pos=[0, -155], width=300), pe.Rect(static=True, pos=[0, 155], width=300)
class AppException(Exception): pass options.add('--env', '--environment', action='callback', callback=Environment._option_parser, default='DEVELOPMENT', metavar='ENV', dest='twitter_common_app_environment', help="The environment in which to run this Python application. " "Known environments: %s [default: %%default]" % ' '.join(Environment.names())) options.add( '--app_debug', action='store_true', default=False, dest='twitter_common_app_debug', help="Print extra debugging information during application initialization." ) _APP_REGISTRY = {} _APP_NAME = None _APP_INITIALIZED = False __all__ = [ # exceptions
def __init__(self, timer, dimensions): Dimensioned.__init__(self, Vertex(*dimensions)) Environment.__init__(self, timer)
mutate_chance = .8 # the odds of an organism being mutated on any given generation full_mutate_chance = .4 # odds of an organism being replaced by a randomized organism instead of just being tweaked according to the normal distribution standard_deviations = [ .05 for i in range(2) ] # how much each gene is mutated by, follows normal distribution so gene_ranges = [(-3, 3) for i in range(2)] pop_size = 20 # number of organisms in the population time_limit = 10**10 # how long each fitness test will run for before just giving up tick_length = .2 # how often the physics engine will update, smaller values create more precise simulations but take longer e = Environment(solids=[ pe.Circle(pos=[-100, -100]), pe.Rect(static=True, pos=[-155, 0], height=300), pe.Rect(static=True, pos=[155, 0], height=300), pe.Rect(static=True, pos=[0, -155], width=300), pe.Rect(static=True, pos=[0, 155], width=300) ], g_type='downward', g_strength=.2) # initialize population with random genes initial_population = [] for i in range(pop_size): dna = [] for gene_range in gene_ranges: dna.append(np.random.uniform(gene_range[0], gene_range[1])) initial_population.append(Organism(dna)) p = Population(initial_population)
import random import numpy as np from environments import Environment from agents import RandomAgent from agents import ValueApproxAgent num_gen = 1000 tot_reward = 0 env = Environment(6) agent = ValueApproxAgent(env.action_space, 0.05) for i in range(num_gen): curr_arm = agent.choose_action() curr_reward = env.try_arm(curr_arm) agent.learn(curr_arm, curr_reward) tot_reward += curr_reward print('Total Reward: ', tot_reward) print('Original Probabilities: ', env._probs) print('Computed Probabilities: ', agent.approx_values)
def __init__(self, name): Environment.__init__(self, name)
def test(): envStrings = genEnvStrings(20, 10, 10) env = Environment(envStrings, selectRandomStart(envStrings)) e = Explorer(env) e.explore()