Ejemplo n.º 1
0
 def __init__(self, agent_class_name, agent_module_path, mdp_class_name, mdp_module_path):
     agent_class = import_from_strings(agent_class_name, agent_module_path)
     mdp_class = import_from_strings(mdp_class_name, mdp_module_path)
     dummy_environment = Environment(mdp_class((1, 1)))
     self.spec = dummy_environment.spec()
     self.agent_class = agent_class
     self.mdp_class = mdp_class
Ejemplo n.º 2
0
    def setUp(self):
        # An observation space
        observation_space = gym.spaces.Discrete(7)

        # Default reward
        default_reward = Vector([1, 2, 1])

        # Set initial_seed to 0 to testing.
        self.environment = Environment(observation_space=observation_space,
                                       default_reward=default_reward,
                                       seed=0)
Ejemplo n.º 3
0
    def __init__(self):
        env = FlappyBird()
        self.p = PLE(env, add_noop_action=True)
        self.p.init()
        self.win_score = 10.
        action_space = len(self.p.getActionSet())
        state_space = len(self.p.getGameState())
        actions = ["up", "nothing"]
        state_names = list(self.p.getGameState().keys())

        Environment.__init__(self, env, action_space, state_space, actions,
                             state_names)
def collectData(info):
    i, location, ID = info
    print('Start', ID)
    disablePrint()
    agent = Agent(memory=i)
    env = Environment(render=False).fruitbot
    while i > 0:
        obs = clean(env.reset())
        hn = torch.zeros(2, 1, hidden_size, device=device)
        cn = torch.zeros(2, 1, hidden_size, device=device)
        while i > 0:
            i -= 1
            # hn, cn = hn.detach(), cn.detach()
            act, obs_old, h0, c0, hn, cn = agent.choose(obs, hn, cn)
            obs, rew, done, _ = env.step(act)
            obs = agent.remember(obs_old.detach(), act,
                                 clean(obs).detach(), rew, h0.detach(),
                                 c0.detach(), hn.detach(), cn.detach(),
                                 int(not done))
            env.render()
            if done:
                break
        env.close()
    saveData(agent, location, ID)
    enablePrint()
    print('Done', ID)
    return os.getpid()
Ejemplo n.º 5
0
def collectData(agent):
    print('Start', agent.memory.size)
    disablePrint()
    i = agent.memory.size
    env = Environment(render=False).fruitbot
    while i > 0:
        obs = clean(env.reset())
        hn = torch.zeros(2, 1, hidden_size, device=device)
        cn = torch.zeros(2, 1, hidden_size, device=device)
        while i > 0:
            i -= 1
            # hn, cn = hn.detach(), cn.detach()
            act, obs_old, h0, c0, hn, cn = agent.choose(obs, hn, cn)
            obs, rew, done, _ = env.step(act)
            obs = agent.remember(obs_old.detach(), act,
                                 clean(obs).detach(), rew, h0.detach(),
                                 c0.detach(), hn.detach(), cn.detach(),
                                 int(not done))
            env.render()
            if done:
                break
        env.close()
    enablePrint()
    print('Done')
    return agent.memory.memory
Ejemplo n.º 6
0
def dumps(data: dict, environment: Environment):
    """
    Dumps full_data given into dumps directory
    :param environment:
    :param data:
    :return:
    """

    timestamp = int(time.time())

    # Get environment name in snake case
    environment = um.str_to_snake_case(environment.__class__.__name__)

    # Get only first letter of each word
    env_name_abbr = ''.join([word[0] for word in environment.split('_')])

    # Specify full path
    file_path = Path(__file__).parent.parent.joinpath(
        'dumps/w/train_data/{}_w_{}_{}.yml'.format(env_name_abbr, timestamp,
                                                   Vector.decimal_precision))

    # If any parents doesn't exist, make it.
    file_path.parent.mkdir(parents=True, exist_ok=True)

    with file_path.open(mode='w+', encoding='UTF-8') as f:
        f.write(um.structures_to_yaml(data=data))
Ejemplo n.º 7
0
def do(args, env):
    do_env = Environment(name="do", outer=env)
    if len(args) == 0:
        throw_error(
            "syntax",
            "Incorrect use of (do ...): must take at least one argument.")
    result = None
    for a in args:
        result = ev.evaluate(a, do_env)
    return result
Ejemplo n.º 8
0
 def anonymous(*arguments):
     # print("inside anonymous function")
     # print("arguments(" + str(len(arguments)) + "):", arguments)
     if len(arguments) != len(largs):
         throw_error(
             "syntax", "This function takes " + str(len(largs)) +
             " arguments (" + str(len(arguments)) + " provided).")
     lenv = Environment(name="anon_fn",
                        outer=env,
                        variables=largs,
                        values=arguments)
     return ev.evaluate(lbody, lenv)
Ejemplo n.º 9
0
    def test_synergies(self):

        cfg = KinArmSynergies2D.defcfg._deepcopy()
        cfg.dim = 3
        cfg.limits = (-180.0, 180.0)
        cfg.lengths = 1.0
        cfg.syn_span = 3
        cfg.syn_res = 3

        kin_env = Environment.create(cfg)
        m_signal = {'j0': 0.0, 'j1': 0.0, 'j2': 0.0, 's0': 1.0}

        feedback = kin_env.execute(m_signal)
        s_signal = feedback['s_signal']
        self.assertTrue(near(s_signal['y'], 0.0))
        self.assertTrue(near(s_signal['x'], -1.0))
Ejemplo n.º 10
0
    def test_synergies(self):

        cfg = KinArmSynergies2D.defcfg._deepcopy()
        cfg.dim = 3
        cfg.limits = (-180.0, 180.0)
        cfg.lengths = 1.0
        cfg.syn_span = 3
        cfg.syn_res  = 3

        kin_env = Environment.create(cfg)
        m_signal = {'j0': 0.0, 'j1': 0.0, 'j2': 0.0, 's0': 1.0}

        feedback = kin_env.execute(m_signal)
        s_signal = feedback['s_signal']
        self.assertTrue(near(s_signal['y'],  0.0))
        self.assertTrue(near(s_signal['x'], -1.0))
import numpy as np
from time import time
import math

start_time = time()  # just a timer


# hyperbolic tangent function, similar to sigmoid but has a range of (-1, 1) as opposed to (0, 1), used for squahsing but with negs
def tanh(x):
    return (math.e**(2 * x) - 1) / (math.e**(2 * x) + 1)


# config
order = 5
e = Environment(solids=[pe.Circle(pos=[-100, .001])],
                g_type='uniform',
                g_strength=[0, -9.81])
destination = np.array([100, 0])
n = nn.NeuralNetwork(inputs=np.array([[
    e.g_strength[1] / 10, (destination[0] - e.solids[0].pos[0]) / 100,
    (destination[1] - e.solids[0].pos[1]) / 100
]]),
                     l1_size=4)

# run neural network
for i in range(10**order):
    if i % ((10**order) / 100) == 0:
        print(i / (10**(order - 2)))

    # turn the inputs into outputs using existing weights
    n.feedforward()
Ejemplo n.º 12
0
    plotting.circle(xs[ 1:-1], ys[ 1:-1], radius=0.008, **kwargs)
    plotting.circle(xs[-1:  ], ys[-1:  ], radius=0.01, color='red')
    plotting.hold(False)


if __name__ == '__main__':
    from environments import Environment
    from environments.envs import KinematicArm2D

    # Arm with same length segments
    cfg = KinematicArm2D.defcfg._deepcopy()
    cfg.dim = 20
    cfg.limits = (-150.0, 150.0)
    cfg.lengths = 1/cfg.dim
    cfg.full_sensors = True
    kin_env = Environment.create(cfg)

    # Arm with decreasing lenghts segments
    cfg2 = cfg._deepcopy()
    cfg2.lengths = np.array([0.9**i for i in range(cfg2.dim)])
    cfg2.lengths = cfg2.lengths/sum(cfg2.lengths)
    kin_env2 = Environment.create(cfg2)

    m_signals = [{'j0': -31.23, 'j1': -44.21, 'j2': -20.18, 'j3': +31.55, 'j4': +35.66, 'j5':  +5.19, 'j6': +17.34, 'j7': +24.51, 'j8':  -2.69, 'j9': +26.52, 'j10': -34.87, 'j11': +10.72, 'j12': -19.38, 'j13': -33.49, 'j14': +13.78, 'j15': -22.43, 'j16': +33.61, 'j17': -28.95, 'j18': +34.31, 'j19':   45.75},
                 {'j0': -53.66, 'j1': -56.20, 'j2': -56.67, 'j3': -34.83, 'j4': -20.29, 'j5':  +7.51, 'j6': +20.92, 'j7': +25.51, 'j8': -17.59, 'j9':  +6.51, 'j10':  -9.65, 'j11': +45.70, 'j12': +20.88, 'j13': +24.25, 'j14': +28.65, 'j15': -42.79, 'j16': +34.45, 'j17': -39.90, 'j18':  +2.74, 'j19':  -11.12},
                 {'j0': +58.13, 'j1': +45.43, 'j2': -21.01, 'j3':  +2.35, 'j4': -38.90, 'j5': -39.23, 'j6': +45.14, 'j7': -57.58, 'j8': +39.49, 'j9': +29.01, 'j10':  -0.09, 'j11': -56.19, 'j12': +56.07, 'j13':  +5.91, 'j14': +36.61, 'j15': -52.65, 'j16': -58.60, 'j17': +32.45, 'j18': +43.69, 'j19': -120.77},
                 {'j0': +53.09, 'j1': +55.83, 'j2': -51.08, 'j3': +41.44, 'j4': +44.43, 'j5':  +4.67, 'j6':  +2.15, 'j7': +37.23, 'j8':  -3.77, 'j9': -46.70, 'j10': +56.41, 'j11': -21.08, 'j12': +13.73, 'j13': +47.23, 'j14':  +7.94, 'j15': -27.26, 'j16': +56.54, 'j17':  -7.77, 'j18': -18.98, 'j19': +149.46}]

    plotting.output_file('html/arm_vizu.html')

    for i, m_signal in enumerate(m_signals):
Ejemplo n.º 13
0
def test_agents(environment: Environment,
                hv_reference: Vector,
                variable: str,
                agents_configuration: dict,
                graph_configuration: dict,
                epsilon: float = None,
                alpha: float = None,
                max_steps: int = None,
                states_to_observe: list = None,
                number_of_agents: int = 30,
                gamma: float = 1.,
                solution: list = None,
                initial_q_value: Vector = None,
                evaluation_mechanism: EvaluationMechanism = None):
    """
    If we choose DATA_PER_STATE in graph_configurations, the agent train during `limit` steps, and only get train_data
    in the last steps (ignore `interval`).

    If we choose MEMORY in graph_configurations, the agent train during `limit` steps and take train_data every
    `interval` steps.

    :param initial_q_value:
    :param graph_configuration:
    :param solution:
    :param environment:
    :param hv_reference:
    :param variable:
    :param agents_configuration:
    :param epsilon:
    :param alpha:
    :param max_steps:
    :param states_to_observe:
    :param number_of_agents:
    :param gamma:
    :param evaluation_mechanism:
    :return:
    """

    # Extract graph_types
    graph_types = set(graph_configuration.keys())

    if len(graph_types) > 2:
        print("Isn't recommended more than 2 graphs")

    # Parameters
    if states_to_observe is None:
        states_to_observe = {environment.initial_state}

    complex_states = isinstance(environment.observation_space[0],
                                gym.spaces.Tuple)

    if not complex_states and GraphType.DATA_PER_STATE in graph_types:
        print(
            "This environment has complex states, so DATA_PER_STATE graph is disabled."
        )
        graph_configuration.pop(GraphType.DATA_PER_STATE)

    # Build environment
    env_name = environment.__class__.__name__
    env_name_snake = str_to_snake_case(env_name)

    # File timestamp
    timestamp = int(time.time())

    # Write all information in configuration path
    write_config_file(timestamp=timestamp,
                      number_of_agents=number_of_agents,
                      env_name_snake=env_name_snake,
                      seed=','.join(map(str, range(number_of_agents))),
                      epsilon=epsilon,
                      alpha=alpha,
                      gamma=gamma,
                      max_steps=max_steps,
                      variable=variable,
                      agents_configuration=agents_configuration,
                      graph_configuration=graph_configuration,
                      evaluation_mechanism=evaluation_mechanism)

    # Create graphs structure
    graphs, graphs_info = initialize_graph_data(
        graph_types=graph_types, agents_configuration=agents_configuration)

    # Show information
    print('Environment: {}'.format(env_name))

    for graph_type in graph_types:

        # Extract interval and limit
        interval = graph_configuration[graph_type].get('interval', 1)
        limit = graph_configuration[graph_type]['limit']

        # Show information
        print(('\t' * 1) +
              "Graph type: {} - [{}/{}]".format(graph_type, limit, interval))

        # Set interval to get train_data
        Agent.interval_to_get_data = interval

        # Execute a iteration with different initial_seed for each agent indicate
        for seed in range(number_of_agents):

            # Show information
            print(('\t' * 2) + "Execution: {}".format(seed + 1))

            # For each configuration
            for agent_type in agents_configuration:

                # Show information
                print(('\t' * 3) + 'Agent: {}'.format(agent_type.value))

                # Extract configuration for that agent
                for configuration in agents_configuration[agent_type].keys():

                    # Show information
                    print(
                        ('\t' * 4) + '{}: {}'.format(variable, configuration),
                        end=' ')

                    # Mark of time
                    t0 = time.time()

                    # Reset environment
                    environment.reset()
                    environment.seed(seed=seed)

                    # Variable parameters
                    parameters = {
                        'epsilon': epsilon,
                        'alpha': alpha,
                        'gamma': gamma,
                        'max_steps': max_steps,
                        'evaluation_mechanism': evaluation_mechanism,
                        'initial_value': initial_q_value
                    }

                    if variable == 'decimal_precision':
                        Vector.set_decimal_precision(
                            decimal_precision=configuration)
                    else:
                        # Modify current configuration
                        parameters.update({variable: configuration})

                    agent, v_s_0 = train_agent_and_get_v_s_0(
                        agent_type=agent_type,
                        environment=environment,
                        graph_type=graph_type,
                        graph_types=graph_types,
                        hv_reference=hv_reference,
                        limit=limit,
                        seed=seed,
                        parameters=parameters,
                        states_to_observe=states_to_observe)

                    print('-> {:.2f}s'.format(time.time() - t0))

                    train_data = dict()

                    if agent_type is AgentType.PQL and graph_type is GraphType.DATA_PER_STATE:
                        train_data.update({
                            'vectors': {
                                state: {
                                    action: agent.q_set(state=state,
                                                        action=action)
                                    for action in agent.nd[state].keys()
                                }
                                for state in agent.nd.keys()
                            }
                        })

                    # Order vectors by origin Vec(0) nearest
                    train_data.update({
                        'v_s_0':
                        Vector.order_vectors_by_origin_nearest(vectors=v_s_0),
                        # 'q': agent.q,
                        # 'v': agent.v
                    })

                    # Write vectors found into path
                    dumps_train_data(
                        timestamp=timestamp,
                        seed=seed,
                        env_name_snake=env_name_snake,
                        train_data=train_data,
                        variable=variable,
                        agent_type=agent_type,
                        configuration=configuration,
                        evaluation_mechanism=evaluation_mechanism,
                        columns=environment.observation_space[0].n)

                    # Update graphs
                    update_graphs(graphs=graphs,
                                  agent=agent,
                                  graph_type=graph_type,
                                  configuration=str(configuration),
                                  agent_type=agent_type,
                                  states_to_observe=states_to_observe,
                                  graphs_info=graphs_info,
                                  solution=solution)

    prepare_data_and_show_graph(timestamp=timestamp,
                                env_name=env_name,
                                env_name_snake=env_name_snake,
                                graphs=graphs,
                                number_of_agents=number_of_agents,
                                agents_configuration=agents_configuration,
                                alpha=alpha,
                                epsilon=epsilon,
                                gamma=gamma,
                                graph_configuration=graph_configuration,
                                max_steps=max_steps,
                                initial_state=environment.initial_state,
                                variable=variable,
                                graphs_info=graphs_info,
                                evaluation_mechanism=evaluation_mechanism,
                                solution=solution)
Ejemplo n.º 14
0
class TestEnvironment(unittest.TestCase):
    def setUp(self):
        # An observation space
        observation_space = gym.spaces.Discrete(7)

        # Default reward
        default_reward = Vector([1, 2, 1])

        # Set initial_seed to 0 to testing.
        self.environment = Environment(observation_space=observation_space,
                                       default_reward=default_reward,
                                       seed=0)

    def tearDown(self):
        self.environment = None

    def test_init(self):
        """
        Testing if constructor works
        :return:
        """

        # All agents must be have next attributes
        self.assertTrue(hasattr(self.environment, '_actions'))
        self.assertTrue(hasattr(self.environment, '_icons'))
        self.assertTrue(hasattr(self.environment, 'actions'))
        self.assertTrue(hasattr(self.environment, 'icons'))
        self.assertTrue(hasattr(self.environment, 'action_space'))
        self.assertTrue(hasattr(self.environment, 'observation_space'))
        self.assertTrue(hasattr(self.environment, 'np_random'))
        self.assertTrue(hasattr(self.environment, 'initial_seed'))
        self.assertTrue(hasattr(self.environment, 'initial_state'))
        self.assertTrue(hasattr(self.environment, 'current_state'))
        self.assertTrue(hasattr(self.environment, 'finals'))
        self.assertTrue(hasattr(self.environment, 'obstacles'))
        self.assertTrue(hasattr(self.environment, 'default_reward'))

        # All agents must be have next methods.
        self.assertTrue(hasattr(self.environment, 'step'))
        self.assertTrue(hasattr(self.environment, 'initial_seed'))
        self.assertTrue(hasattr(self.environment, 'reset'))
        self.assertTrue(hasattr(self.environment, 'render'))
        self.assertTrue(hasattr(self.environment, 'next_state'))
        self.assertTrue(hasattr(self.environment, 'is_final'))

        self.assertIsInstance(self.environment.observation_space,
                              gym.spaces.Space)
        self.assertIsInstance(self.environment.action_space, gym.spaces.Space)

        self.assertEqual(self.environment.initial_state,
                         self.environment.current_state)

    def test_icons(self):
        """
        Testing icons property
        :return:
        """
        self.assertEqual(self.environment._icons, self.environment.icons)

    def test_actions(self):
        """
        Testing actions property
        :return:
        """
        self.assertEqual(self.environment._actions, self.environment.actions)

    def test_action_space_length(self):
        pass

    def test_seed(self):
        """
        Testing initial_seed method
        :return:
        """

        self.environment.seed(seed=0)
        n1_1 = self.environment.np_random.randint(0, 10)
        n1_2 = self.environment.np_random.randint(0, 10)

        self.environment.seed(seed=0)
        n2_1 = self.environment.np_random.randint(0, 10)
        n2_2 = self.environment.np_random.randint(0, 10)

        self.assertEqual(n1_1, n2_1)
        self.assertEqual(n1_2, n2_2)

    def test_reset(self):
        """
        Testing reset method
        :return:
        """

        # Set current position to random position
        self.environment.current_state = self.environment.observation_space.sample(
        )

        # Reset environment
        self.environment.reset()

        # Asserts
        self.assertEqual(self.environment.initial_state,
                         self.environment.current_state)

    def test_states(self):
        """
        Testing that all states must be contained into observation space
        :return:
        """
        pass

    def test_reachable_states(self):
        pass

    def test_transition_probability(self):
        pass

    def test_transition_reward(self):
        pass
Ejemplo n.º 15
0
    plotting.circle(xs[1:-1], ys[1:-1], radius=0.008, **kwargs)
    plotting.circle(xs[-1:], ys[-1:], radius=0.01, color='red')
    plotting.hold(False)


if __name__ == '__main__':
    from environments import Environment
    from environments.envs import KinematicArm2D

    # Arm with same length segments
    cfg = KinematicArm2D.defcfg._deepcopy()
    cfg.dim = 20
    cfg.limits = (-150.0, 150.0)
    cfg.lengths = 1 / cfg.dim
    cfg.full_sensors = True
    kin_env = Environment.create(cfg)

    # Arm with decreasing lenghts segments
    cfg2 = cfg._deepcopy()
    cfg2.lengths = np.array([0.9**i for i in range(cfg2.dim)])
    cfg2.lengths = cfg2.lengths / sum(cfg2.lengths)
    kin_env2 = Environment.create(cfg2)

    m_signals = [{
        'j0': -31.23,
        'j1': -44.21,
        'j2': -20.18,
        'j3': +31.55,
        'j4': +35.66,
        'j5': +5.19,
        'j6': +17.34,
Ejemplo n.º 16
0
from environments import Environment
from agents import DeepQAgent
import os, warnings, sys
# hide warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
warnings.filterwarnings("ignore")

environment = Environment()

# get the shape of the observation and action space
state_num = environment.env.observation_space.shape[0]
action_num = environment.env.action_space.n

print("State %2f" % state_num)
print("Action %2f" % action_num)

agent = DeepQAgent(state_num, action_num)

if len(sys.argv) > 1 and sys.argv[1] == 'train':
    environment.train(agent)
else:
    agent.is_training = False
    environment.run(agent)
import math

start_time = time()  # just a timer


# hyperbolic tangent function, similar to sigmoid but has a range of (-1, 1) as opposed to (0, 1), used for squahsing but with negs
def tanh(x):
    return (math.e**(2 * x) - 1) / (math.e**(2 * x) + 1)


# config
order = 3
e = Environment(solids=[
    pe.Circle(pos=[-100, 0], mass=100, static=True),
    pe.Circle(pos=[0, 0], velocity=[4, 0], mass=1, radius=1),
    pe.Circle(pos=[50, 0], velocity=[0, 2.582], mass=20)
],
                g_type='nonuniform',
                g_strength=10)
n = nn.NeuralNetwork(inputs=np.array(
    [[e.g_strength / 10, e.solids[0].pos[0] / 10, e.solids[0].pos[1] / 10]]),
                     l1_size=8)

# run neural network
for i in range(10**order):
    # print percent progress
    if i % ((10**order) / 100) == 0:
        print(i / (10**(order - 2)))

    # # switch variables every 5 iterations
    # if i % 20 == 0:
Ejemplo n.º 18
0
# This is the one thing that is provided de-facto with every application
# whether you like it or not.
from twitter.common import options

class AppException(Exception): pass

options.add(
 '--env', '--environment',
 action='callback',
 callback=Environment._option_parser,
 default='DEVELOPMENT',
 metavar='ENV',
 dest='twitter_common_app_environment',
 help="The environment in which to run this Python application. "
 "Known environments: %s [default: %%default]" % ' '.join(Environment.names()))

options.add(
 '--app_debug',
 action='store_true',
 default=False,
 dest='twitter_common_app_debug',
 help="Print extra debugging information during application initialization.")

_APP_REGISTRY = {}
_APP_NAME = None
_APP_INITIALIZED = False

__all__ = [
  # exceptions
  'AppException',
Ejemplo n.º 19
0
from environments import Environment
from agents import DeepQAgent
import os, warnings, sys
# hide warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
warnings.filterwarnings("ignore")

cartpole = 'CartPole-v0'
mountaincar = 'MountainCar-v0'
# stockmarket = 'StockMarket'

current_env = mountaincar
environment = Environment(current_env)

# get the shape of the observation and action space
state_num = environment.env.observation_space.shape[0]
action_num = environment.env.action_space.n

agent = DeepQAgent(state_num, action_num, current_env)

if len(sys.argv) > 1 and sys.argv[1] == 'train':
    environment.train(agent)
else:
    agent.is_training = False
    environment.run(agent)
Ejemplo n.º 20
0
import physics_engine as pe
import neural_network as nn
from environments import Environment
import numpy as np
from time import time
import math

start_time = time()  # just a timer

# config
order = 6
e = Environment(solids=[pe.Circle(pos=[-100, .001])],
                g_type='uniform',
                g_strength=[0, -9.81])
destinations = [[0, 0], [100, 0], [200, 0], [100, 0], [200, 0]]
gravities = [-9.81, -9.81, -9.81, -20, -20]
n = nn.NeuralNetwork(inputs=np.array([[0, 0, 0]]), l1_size=8)

# run neural network
for i in range(10**order):
    # print percent progress
    if i % ((10**order) / 100) == 0:
        print(i / (10**(order - 2)))

    # switch variables every 5 iterations
    if i % 20 == 0:
        destination = destinations[int(i / 20) % 5]
        e.g_strength[1] = gravities[int(i / 20) % 5]

    n.inputs[0] = [
        e.g_strength[1] / 10, (destination[0] + 100) / 100,
gen_count = 300 # for how many generations training will last
mutate_chance = .5 # the odds of an organism being mutated on any given generation
full_mutate_chance = .2 # odds of an organism being replaced by a randomized organism instead of just being tweaked according to the normal distribution
standard_deviations = [.1 for i in range(3)] # how much each gene is mutated by, follows normal distribution so
gene_ranges = [(-5, 5) for i in range(3)]
pop_size = 100 # number of organisms in the population

time_limit = 50 # how long each fitness test will run for before just giving up
tick_length = .2 # how often the physics engine will update, smaller values create more precise simulations but take longer

start_pos = [0, 11.001]
x = start_pos[0]
y = start_pos[1]
e = Environment(solids=[pe.Circle(static=True),
                        pe.Circle(radius=1, pos=start_pos)],
                g_type='nonuniform',
                g_strength=10)


# initialize population with random genes
initial_population = []
for i in range(pop_size):
    dna = []
    for gene_range in gene_ranges:
        dna.append(randrange(gene_range[0], gene_range[1]))
    initial_population.append(Organism(dna))

p = Population(initial_population)


# iterates through all generations
Ejemplo n.º 22
0
    if r.random() < .5:
        return x - delta
    return x + delta


# all 6 possible orders in which the algorithm will be introduced to the environments
orders = [['PS', 'TD', 'SV'], ['PS', 'SV', 'TD'], ['TD', 'PS', 'SV'],
          ['TD', 'SV', 'PS'], ['SV', 'TD', 'PS'], ['SV', 'PS', 'TD']]
# possible start locations for PS_1's rocket (solids[1])
ps1_starts = [[-11.001, .1], [.1, -11.001], [11.001, .1], [.1, 11.001],
              [7.8, 7.8], [-7.8, 7.8], [-7.8, -7.8], [7.8, -7.8]]

# 6 environments for the LT ML algorithm to use, only initialized with instance variables that will be kept constant
PS_1 = Environment(
    solids=[pe.Circle(static=True),
            pe.Circle(radius=1, pos=[0, 11.01])],
    g_type='nonuniform',
    g_strength=100)
PS_2 = Environment(solids=[
    pe.Circle(static=True, pos=[-100, 0], mass=100),
    pe.Circle(radius=1, pos=[-88.99, 0], mass=1),
    pe.Circle(radius=3, pos=[1, 0], velocity=[0, 3.162])
],
                   g_type='nonuniform',
                   g_strength=10)
TD_1 = Environment(solids=[
    pe.Circle(pos=[1, 1]),
    pe.Rect(static=True, pos=[-155, 0], height=300),
    pe.Rect(static=True, pos=[155, 0], height=300),
    pe.Rect(static=True, pos=[0, -155], width=300),
    pe.Rect(static=True, pos=[0, 155], width=300)
Ejemplo n.º 23
0

class AppException(Exception):
    pass


options.add('--env',
            '--environment',
            action='callback',
            callback=Environment._option_parser,
            default='DEVELOPMENT',
            metavar='ENV',
            dest='twitter_common_app_environment',
            help="The environment in which to run this Python application. "
            "Known environments: %s [default: %%default]" %
            ' '.join(Environment.names()))

options.add(
    '--app_debug',
    action='store_true',
    default=False,
    dest='twitter_common_app_debug',
    help="Print extra debugging information during application initialization."
)

_APP_REGISTRY = {}
_APP_NAME = None
_APP_INITIALIZED = False

__all__ = [
    # exceptions
Ejemplo n.º 24
0
 def __init__(self, timer, dimensions):
     Dimensioned.__init__(self, Vertex(*dimensions))
     Environment.__init__(self, timer)
mutate_chance = .8  # the odds of an organism being mutated on any given generation
full_mutate_chance = .4  # odds of an organism being replaced by a randomized organism instead of just being tweaked according to the normal distribution
standard_deviations = [
    .05 for i in range(2)
]  # how much each gene is mutated by, follows normal distribution so
gene_ranges = [(-3, 3) for i in range(2)]
pop_size = 20  # number of organisms in the population

time_limit = 10**10  # how long each fitness test will run for before just giving up
tick_length = .2  # how often the physics engine will update, smaller values create more precise simulations but take longer

e = Environment(solids=[
    pe.Circle(pos=[-100, -100]),
    pe.Rect(static=True, pos=[-155, 0], height=300),
    pe.Rect(static=True, pos=[155, 0], height=300),
    pe.Rect(static=True, pos=[0, -155], width=300),
    pe.Rect(static=True, pos=[0, 155], width=300)
],
                g_type='downward',
                g_strength=.2)

# initialize population with random genes
initial_population = []
for i in range(pop_size):
    dna = []
    for gene_range in gene_ranges:
        dna.append(np.random.uniform(gene_range[0], gene_range[1]))
    initial_population.append(Organism(dna))

p = Population(initial_population)
Ejemplo n.º 26
0
import random
import numpy as np
from environments import Environment
from agents import RandomAgent
from agents import ValueApproxAgent

num_gen = 1000
tot_reward = 0

env = Environment(6)
agent = ValueApproxAgent(env.action_space, 0.05)

for i in range(num_gen):
    curr_arm = agent.choose_action()
    curr_reward = env.try_arm(curr_arm)
    agent.learn(curr_arm, curr_reward)
    tot_reward += curr_reward

print('Total Reward: ', tot_reward)
print('Original Probabilities: ', env._probs)
print('Computed Probabilities: ', agent.approx_values)
Ejemplo n.º 27
0
 def __init__(self, name):
     Environment.__init__(self, name)
Ejemplo n.º 28
0
def test():
    envStrings = genEnvStrings(20, 10, 10)
    env = Environment(envStrings, selectRandomStart(envStrings))
    e = Explorer(env)
    e.explore()