Ejemplos de Environment.Environment en Python, ejemplos de environments.Environment.Environment en Python

Ejemplo n.º 1

0

Mostrar archivo

def collectData(agent):
    print('Start', agent.memory.size)
    disablePrint()
    i = agent.memory.size
    env = Environment(render=False).fruitbot
    while i > 0:
        obs = clean(env.reset())
        hn = torch.zeros(2, 1, hidden_size, device=device)
        cn = torch.zeros(2, 1, hidden_size, device=device)
        while i > 0:
            i -= 1
            # hn, cn = hn.detach(), cn.detach()
            act, obs_old, h0, c0, hn, cn = agent.choose(obs, hn, cn)
            obs, rew, done, _ = env.step(act)
            obs = agent.remember(obs_old.detach(), act,
                                 clean(obs).detach(), rew, h0.detach(),
                                 c0.detach(), hn.detach(), cn.detach(),
                                 int(not done))
            env.render()
            if done:
                break
        env.close()
    enablePrint()
    print('Done')
    return agent.memory.memory

Ejemplo n.º 2

0

Mostrar archivo

Archivo: controller.py Proyecto: FredslundMagnus/DeepLearningProject

def collectData(info):
    i, location, ID = info
    print('Start', ID)
    disablePrint()
    agent = Agent(memory=i)
    env = Environment(render=False).fruitbot
    while i > 0:
        obs = clean(env.reset())
        hn = torch.zeros(2, 1, hidden_size, device=device)
        cn = torch.zeros(2, 1, hidden_size, device=device)
        while i > 0:
            i -= 1
            # hn, cn = hn.detach(), cn.detach()
            act, obs_old, h0, c0, hn, cn = agent.choose(obs, hn, cn)
            obs, rew, done, _ = env.step(act)
            obs = agent.remember(obs_old.detach(), act,
                                 clean(obs).detach(), rew, h0.detach(),
                                 c0.detach(), hn.detach(), cn.detach(),
                                 int(not done))
            env.render()
            if done:
                break
        env.close()
    saveData(agent, location, ID)
    enablePrint()
    print('Done', ID)
    return os.getpid()

Ejemplo n.º 3

0

Mostrar archivo

def do(args, env):
    do_env = Environment(name="do", outer=env)
    if len(args) == 0:
        throw_error(
            "syntax",
            "Incorrect use of (do ...): must take at least one argument.")
    result = None
    for a in args:
        result = ev.evaluate(a, do_env)
    return result

Ejemplo n.º 4

0

Mostrar archivo

Archivo: test_environment.py Proyecto: Pozas91/tiadas

    def setUp(self):
        # An observation space
        observation_space = gym.spaces.Discrete(7)

        # Default reward
        default_reward = Vector([1, 2, 1])

        # Set initial_seed to 0 to testing.
        self.environment = Environment(observation_space=observation_space,
                                       default_reward=default_reward,
                                       seed=0)

Ejemplo n.º 5

0

Mostrar archivo

 def anonymous(*arguments):
     # print("inside anonymous function")
     # print("arguments(" + str(len(arguments)) + "):", arguments)
     if len(arguments) != len(largs):
         throw_error(
             "syntax", "This function takes " + str(len(largs)) +
             " arguments (" + str(len(arguments)) + " provided).")
     lenv = Environment(name="anon_fn",
                        outer=env,
                        variables=largs,
                        values=arguments)
     return ev.evaluate(lbody, lenv)

Ejemplo n.º 6

0

Mostrar archivo

Archivo: GA_TD_1.py Proyecto: jackschu/The-Application-of-Long-Term-Machine-Learning-to-Simulated-Physical-Environments

mutate_chance = .8  # the odds of an organism being mutated on any given generation
full_mutate_chance = .4  # odds of an organism being replaced by a randomized organism instead of just being tweaked according to the normal distribution
standard_deviations = [
    .05 for i in range(2)
]  # how much each gene is mutated by, follows normal distribution so
gene_ranges = [(-3, 3) for i in range(2)]
pop_size = 20  # number of organisms in the population

time_limit = 10**10  # how long each fitness test will run for before just giving up
tick_length = .2  # how often the physics engine will update, smaller values create more precise simulations but take longer

e = Environment(solids=[
    pe.Circle(pos=[-100, -100]),
    pe.Rect(static=True, pos=[-155, 0], height=300),
    pe.Rect(static=True, pos=[155, 0], height=300),
    pe.Rect(static=True, pos=[0, -155], width=300),
    pe.Rect(static=True, pos=[0, 155], width=300)
],
                g_type='downward',
                g_strength=.2)

# initialize population with random genes
initial_population = []
for i in range(pop_size):
    dna = []
    for gene_range in gene_ranges:
        dna.append(np.random.uniform(gene_range[0], gene_range[1]))
    initial_population.append(Organism(dna))

p = Population(initial_population)

Ejemplo n.º 7

0

Mostrar archivo

Archivo: NN_SV_1.py Proyecto: DavidB256/The-Application-of-Long-Term-Machine-Learning-to-Simulated-Physical-Environments

import numpy as np
from time import time
import math

start_time = time()  # just a timer


# hyperbolic tangent function, similar to sigmoid but has a range of (-1, 1) as opposed to (0, 1), used for squahsing but with negs
def tanh(x):
    return (math.e**(2 * x) - 1) / (math.e**(2 * x) + 1)


# config
order = 5
e = Environment(solids=[pe.Circle(pos=[-100, .001])],
                g_type='uniform',
                g_strength=[0, -9.81])
destination = np.array([100, 0])
n = nn.NeuralNetwork(inputs=np.array([[
    e.g_strength[1] / 10, (destination[0] - e.solids[0].pos[0]) / 100,
    (destination[1] - e.solids[0].pos[1]) / 100
]]),
                     l1_size=4)

# run neural network
for i in range(10**order):
    if i % ((10**order) / 100) == 0:
        print(i / (10**(order - 2)))

    # turn the inputs into outputs using existing weights
    n.feedforward()

Ejemplo n.º 8

0

Mostrar archivo

Archivo: NN_PS_1.py Proyecto: DavidB256/The-Application-of-Long-Term-Machine-Learning-to-Simulated-Physical-Environments

import math

start_time = time()  # just a timer


# hyperbolic tangent function, similar to sigmoid but has a range of (-1, 1) as opposed to (0, 1), used for squahsing but with negs
def tanh(x):
    return (math.e**(2 * x) - 1) / (math.e**(2 * x) + 1)


# config
order = 3
e = Environment(solids=[
    pe.Circle(pos=[-100, 0], mass=100, static=True),
    pe.Circle(pos=[0, 0], velocity=[4, 0], mass=1, radius=1),
    pe.Circle(pos=[50, 0], velocity=[0, 2.582], mass=20)
],
                g_type='nonuniform',
                g_strength=10)
n = nn.NeuralNetwork(inputs=np.array(
    [[e.g_strength / 10, e.solids[0].pos[0] / 10, e.solids[0].pos[1] / 10]]),
                     l1_size=8)

# run neural network
for i in range(10**order):
    # print percent progress
    if i % ((10**order) / 100) == 0:
        print(i / (10**(order - 2)))

    # # switch variables every 5 iterations
    # if i % 20 == 0:

Ejemplo n.º 9

0

Mostrar archivo

Archivo: GA_PS_2.py Proyecto: jackschu/The-Application-of-Long-Term-Machine-Learning-to-Simulated-Physical-Environments

gen_count = 300 # for how many generations training will last
mutate_chance = .5 # the odds of an organism being mutated on any given generation
full_mutate_chance = .2 # odds of an organism being replaced by a randomized organism instead of just being tweaked according to the normal distribution
standard_deviations = [.1 for i in range(3)] # how much each gene is mutated by, follows normal distribution so
gene_ranges = [(-5, 5) for i in range(3)]
pop_size = 100 # number of organisms in the population

time_limit = 50 # how long each fitness test will run for before just giving up
tick_length = .2 # how often the physics engine will update, smaller values create more precise simulations but take longer

start_pos = [0, 11.001]
x = start_pos[0]
y = start_pos[1]
e = Environment(solids=[pe.Circle(static=True),
                        pe.Circle(radius=1, pos=start_pos)],
                g_type='nonuniform',
                g_strength=10)


# initialize population with random genes
initial_population = []
for i in range(pop_size):
    dna = []
    for gene_range in gene_ranges:
        dna.append(randrange(gene_range[0], gene_range[1]))
    initial_population.append(Organism(dna))

p = Population(initial_population)


# iterates through all generations

Ejemplo n.º 10

0

Mostrar archivo

    if r.random() < .5:
        return x - delta
    return x + delta


# all 6 possible orders in which the algorithm will be introduced to the environments
orders = [['PS', 'TD', 'SV'], ['PS', 'SV', 'TD'], ['TD', 'PS', 'SV'],
          ['TD', 'SV', 'PS'], ['SV', 'TD', 'PS'], ['SV', 'PS', 'TD']]
# possible start locations for PS_1's rocket (solids[1])
ps1_starts = [[-11.001, .1], [.1, -11.001], [11.001, .1], [.1, 11.001],
              [7.8, 7.8], [-7.8, 7.8], [-7.8, -7.8], [7.8, -7.8]]

# 6 environments for the LT ML algorithm to use, only initialized with instance variables that will be kept constant
PS_1 = Environment(
    solids=[pe.Circle(static=True),
            pe.Circle(radius=1, pos=[0, 11.01])],
    g_type='nonuniform',
    g_strength=100)
PS_2 = Environment(solids=[
    pe.Circle(static=True, pos=[-100, 0], mass=100),
    pe.Circle(radius=1, pos=[-88.99, 0], mass=1),
    pe.Circle(radius=3, pos=[1, 0], velocity=[0, 3.162])
],
                   g_type='nonuniform',
                   g_strength=10)
TD_1 = Environment(solids=[
    pe.Circle(pos=[1, 1]),
    pe.Rect(static=True, pos=[-155, 0], height=300),
    pe.Rect(static=True, pos=[155, 0], height=300),
    pe.Rect(static=True, pos=[0, -155], width=300),
    pe.Rect(static=True, pos=[0, 155], width=300)

Ejemplo n.º 11

0

Mostrar archivo

from environments import Environment
from agents import DeepQAgent
import os, warnings, sys
# hide warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
warnings.filterwarnings("ignore")

environment = Environment()

# get the shape of the observation and action space
state_num = environment.env.observation_space.shape[0]
action_num = environment.env.action_space.n

print("State %2f" % state_num)
print("Action %2f" % action_num)

agent = DeepQAgent(state_num, action_num)

if len(sys.argv) > 1 and sys.argv[1] == 'train':
    environment.train(agent)
else:
    agent.is_training = False
    environment.run(agent)

Ejemplo n.º 12

0

Mostrar archivo

import random
import numpy as np
from environments import Environment
from agents import RandomAgent
from agents import ValueApproxAgent

num_gen = 1000
tot_reward = 0

env = Environment(6)
agent = ValueApproxAgent(env.action_space, 0.05)

for i in range(num_gen):
    curr_arm = agent.choose_action()
    curr_reward = env.try_arm(curr_arm)
    agent.learn(curr_arm, curr_reward)
    tot_reward += curr_reward

print('Total Reward: ', tot_reward)
print('Original Probabilities: ', env._probs)
print('Computed Probabilities: ', agent.approx_values)

Ejemplo n.º 13

0

Mostrar archivo

from environments import Environment
from agents import DeepQAgent
import os, warnings, sys
# hide warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
warnings.filterwarnings("ignore")

cartpole = 'CartPole-v0'
mountaincar = 'MountainCar-v0'
# stockmarket = 'StockMarket'

current_env = mountaincar
environment = Environment(current_env)

# get the shape of the observation and action space
state_num = environment.env.observation_space.shape[0]
action_num = environment.env.action_space.n

agent = DeepQAgent(state_num, action_num, current_env)

if len(sys.argv) > 1 and sys.argv[1] == 'train':
    environment.train(agent)
else:
    agent.is_training = False
    environment.run(agent)

Ejemplo n.º 14

0

Mostrar archivo

Archivo: exploration.py Proyecto: hrishisd/Shade-Newman-2d-sim

def test():
    envStrings = genEnvStrings(20, 10, 10)
    env = Environment(envStrings, selectRandomStart(envStrings))
    e = Explorer(env)
    e.explore()