Ejemplo n.º 1
0
def collectData(agent):
    print('Start', agent.memory.size)
    disablePrint()
    i = agent.memory.size
    env = Environment(render=False).fruitbot
    while i > 0:
        obs = clean(env.reset())
        hn = torch.zeros(2, 1, hidden_size, device=device)
        cn = torch.zeros(2, 1, hidden_size, device=device)
        while i > 0:
            i -= 1
            # hn, cn = hn.detach(), cn.detach()
            act, obs_old, h0, c0, hn, cn = agent.choose(obs, hn, cn)
            obs, rew, done, _ = env.step(act)
            obs = agent.remember(obs_old.detach(), act,
                                 clean(obs).detach(), rew, h0.detach(),
                                 c0.detach(), hn.detach(), cn.detach(),
                                 int(not done))
            env.render()
            if done:
                break
        env.close()
    enablePrint()
    print('Done')
    return agent.memory.memory
def collectData(info):
    i, location, ID = info
    print('Start', ID)
    disablePrint()
    agent = Agent(memory=i)
    env = Environment(render=False).fruitbot
    while i > 0:
        obs = clean(env.reset())
        hn = torch.zeros(2, 1, hidden_size, device=device)
        cn = torch.zeros(2, 1, hidden_size, device=device)
        while i > 0:
            i -= 1
            # hn, cn = hn.detach(), cn.detach()
            act, obs_old, h0, c0, hn, cn = agent.choose(obs, hn, cn)
            obs, rew, done, _ = env.step(act)
            obs = agent.remember(obs_old.detach(), act,
                                 clean(obs).detach(), rew, h0.detach(),
                                 c0.detach(), hn.detach(), cn.detach(),
                                 int(not done))
            env.render()
            if done:
                break
        env.close()
    saveData(agent, location, ID)
    enablePrint()
    print('Done', ID)
    return os.getpid()
Ejemplo n.º 3
0
def do(args, env):
    do_env = Environment(name="do", outer=env)
    if len(args) == 0:
        throw_error(
            "syntax",
            "Incorrect use of (do ...): must take at least one argument.")
    result = None
    for a in args:
        result = ev.evaluate(a, do_env)
    return result
Ejemplo n.º 4
0
    def setUp(self):
        # An observation space
        observation_space = gym.spaces.Discrete(7)

        # Default reward
        default_reward = Vector([1, 2, 1])

        # Set initial_seed to 0 to testing.
        self.environment = Environment(observation_space=observation_space,
                                       default_reward=default_reward,
                                       seed=0)
Ejemplo n.º 5
0
 def anonymous(*arguments):
     # print("inside anonymous function")
     # print("arguments(" + str(len(arguments)) + "):", arguments)
     if len(arguments) != len(largs):
         throw_error(
             "syntax", "This function takes " + str(len(largs)) +
             " arguments (" + str(len(arguments)) + " provided).")
     lenv = Environment(name="anon_fn",
                        outer=env,
                        variables=largs,
                        values=arguments)
     return ev.evaluate(lbody, lenv)
mutate_chance = .8  # the odds of an organism being mutated on any given generation
full_mutate_chance = .4  # odds of an organism being replaced by a randomized organism instead of just being tweaked according to the normal distribution
standard_deviations = [
    .05 for i in range(2)
]  # how much each gene is mutated by, follows normal distribution so
gene_ranges = [(-3, 3) for i in range(2)]
pop_size = 20  # number of organisms in the population

time_limit = 10**10  # how long each fitness test will run for before just giving up
tick_length = .2  # how often the physics engine will update, smaller values create more precise simulations but take longer

e = Environment(solids=[
    pe.Circle(pos=[-100, -100]),
    pe.Rect(static=True, pos=[-155, 0], height=300),
    pe.Rect(static=True, pos=[155, 0], height=300),
    pe.Rect(static=True, pos=[0, -155], width=300),
    pe.Rect(static=True, pos=[0, 155], width=300)
],
                g_type='downward',
                g_strength=.2)

# initialize population with random genes
initial_population = []
for i in range(pop_size):
    dna = []
    for gene_range in gene_ranges:
        dna.append(np.random.uniform(gene_range[0], gene_range[1]))
    initial_population.append(Organism(dna))

p = Population(initial_population)
import numpy as np
from time import time
import math

start_time = time()  # just a timer


# hyperbolic tangent function, similar to sigmoid but has a range of (-1, 1) as opposed to (0, 1), used for squahsing but with negs
def tanh(x):
    return (math.e**(2 * x) - 1) / (math.e**(2 * x) + 1)


# config
order = 5
e = Environment(solids=[pe.Circle(pos=[-100, .001])],
                g_type='uniform',
                g_strength=[0, -9.81])
destination = np.array([100, 0])
n = nn.NeuralNetwork(inputs=np.array([[
    e.g_strength[1] / 10, (destination[0] - e.solids[0].pos[0]) / 100,
    (destination[1] - e.solids[0].pos[1]) / 100
]]),
                     l1_size=4)

# run neural network
for i in range(10**order):
    if i % ((10**order) / 100) == 0:
        print(i / (10**(order - 2)))

    # turn the inputs into outputs using existing weights
    n.feedforward()
import math

start_time = time()  # just a timer


# hyperbolic tangent function, similar to sigmoid but has a range of (-1, 1) as opposed to (0, 1), used for squahsing but with negs
def tanh(x):
    return (math.e**(2 * x) - 1) / (math.e**(2 * x) + 1)


# config
order = 3
e = Environment(solids=[
    pe.Circle(pos=[-100, 0], mass=100, static=True),
    pe.Circle(pos=[0, 0], velocity=[4, 0], mass=1, radius=1),
    pe.Circle(pos=[50, 0], velocity=[0, 2.582], mass=20)
],
                g_type='nonuniform',
                g_strength=10)
n = nn.NeuralNetwork(inputs=np.array(
    [[e.g_strength / 10, e.solids[0].pos[0] / 10, e.solids[0].pos[1] / 10]]),
                     l1_size=8)

# run neural network
for i in range(10**order):
    # print percent progress
    if i % ((10**order) / 100) == 0:
        print(i / (10**(order - 2)))

    # # switch variables every 5 iterations
    # if i % 20 == 0:
gen_count = 300 # for how many generations training will last
mutate_chance = .5 # the odds of an organism being mutated on any given generation
full_mutate_chance = .2 # odds of an organism being replaced by a randomized organism instead of just being tweaked according to the normal distribution
standard_deviations = [.1 for i in range(3)] # how much each gene is mutated by, follows normal distribution so
gene_ranges = [(-5, 5) for i in range(3)]
pop_size = 100 # number of organisms in the population

time_limit = 50 # how long each fitness test will run for before just giving up
tick_length = .2 # how often the physics engine will update, smaller values create more precise simulations but take longer

start_pos = [0, 11.001]
x = start_pos[0]
y = start_pos[1]
e = Environment(solids=[pe.Circle(static=True),
                        pe.Circle(radius=1, pos=start_pos)],
                g_type='nonuniform',
                g_strength=10)


# initialize population with random genes
initial_population = []
for i in range(pop_size):
    dna = []
    for gene_range in gene_ranges:
        dna.append(randrange(gene_range[0], gene_range[1]))
    initial_population.append(Organism(dna))

p = Population(initial_population)


# iterates through all generations
Ejemplo n.º 10
0
    if r.random() < .5:
        return x - delta
    return x + delta


# all 6 possible orders in which the algorithm will be introduced to the environments
orders = [['PS', 'TD', 'SV'], ['PS', 'SV', 'TD'], ['TD', 'PS', 'SV'],
          ['TD', 'SV', 'PS'], ['SV', 'TD', 'PS'], ['SV', 'PS', 'TD']]
# possible start locations for PS_1's rocket (solids[1])
ps1_starts = [[-11.001, .1], [.1, -11.001], [11.001, .1], [.1, 11.001],
              [7.8, 7.8], [-7.8, 7.8], [-7.8, -7.8], [7.8, -7.8]]

# 6 environments for the LT ML algorithm to use, only initialized with instance variables that will be kept constant
PS_1 = Environment(
    solids=[pe.Circle(static=True),
            pe.Circle(radius=1, pos=[0, 11.01])],
    g_type='nonuniform',
    g_strength=100)
PS_2 = Environment(solids=[
    pe.Circle(static=True, pos=[-100, 0], mass=100),
    pe.Circle(radius=1, pos=[-88.99, 0], mass=1),
    pe.Circle(radius=3, pos=[1, 0], velocity=[0, 3.162])
],
                   g_type='nonuniform',
                   g_strength=10)
TD_1 = Environment(solids=[
    pe.Circle(pos=[1, 1]),
    pe.Rect(static=True, pos=[-155, 0], height=300),
    pe.Rect(static=True, pos=[155, 0], height=300),
    pe.Rect(static=True, pos=[0, -155], width=300),
    pe.Rect(static=True, pos=[0, 155], width=300)
Ejemplo n.º 11
0
from environments import Environment
from agents import DeepQAgent
import os, warnings, sys
# hide warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
warnings.filterwarnings("ignore")

environment = Environment()

# get the shape of the observation and action space
state_num = environment.env.observation_space.shape[0]
action_num = environment.env.action_space.n

print("State %2f" % state_num)
print("Action %2f" % action_num)

agent = DeepQAgent(state_num, action_num)

if len(sys.argv) > 1 and sys.argv[1] == 'train':
    environment.train(agent)
else:
    agent.is_training = False
    environment.run(agent)
Ejemplo n.º 12
0
import random
import numpy as np
from environments import Environment
from agents import RandomAgent
from agents import ValueApproxAgent

num_gen = 1000
tot_reward = 0

env = Environment(6)
agent = ValueApproxAgent(env.action_space, 0.05)

for i in range(num_gen):
    curr_arm = agent.choose_action()
    curr_reward = env.try_arm(curr_arm)
    agent.learn(curr_arm, curr_reward)
    tot_reward += curr_reward

print('Total Reward: ', tot_reward)
print('Original Probabilities: ', env._probs)
print('Computed Probabilities: ', agent.approx_values)
Ejemplo n.º 13
0
from environments import Environment
from agents import DeepQAgent
import os, warnings, sys
# hide warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
warnings.filterwarnings("ignore")

cartpole = 'CartPole-v0'
mountaincar = 'MountainCar-v0'
# stockmarket = 'StockMarket'

current_env = mountaincar
environment = Environment(current_env)

# get the shape of the observation and action space
state_num = environment.env.observation_space.shape[0]
action_num = environment.env.action_space.n

agent = DeepQAgent(state_num, action_num, current_env)

if len(sys.argv) > 1 and sys.argv[1] == 'train':
    environment.train(agent)
else:
    agent.is_training = False
    environment.run(agent)
Ejemplo n.º 14
0
def test():
    envStrings = genEnvStrings(20, 10, 10)
    env = Environment(envStrings, selectRandomStart(envStrings))
    e = Explorer(env)
    e.explore()