def collectData(agent): print('Start', agent.memory.size) disablePrint() i = agent.memory.size env = Environment(render=False).fruitbot while i > 0: obs = clean(env.reset()) hn = torch.zeros(2, 1, hidden_size, device=device) cn = torch.zeros(2, 1, hidden_size, device=device) while i > 0: i -= 1 # hn, cn = hn.detach(), cn.detach() act, obs_old, h0, c0, hn, cn = agent.choose(obs, hn, cn) obs, rew, done, _ = env.step(act) obs = agent.remember(obs_old.detach(), act, clean(obs).detach(), rew, h0.detach(), c0.detach(), hn.detach(), cn.detach(), int(not done)) env.render() if done: break env.close() enablePrint() print('Done') return agent.memory.memory
def collectData(info): i, location, ID = info print('Start', ID) disablePrint() agent = Agent(memory=i) env = Environment(render=False).fruitbot while i > 0: obs = clean(env.reset()) hn = torch.zeros(2, 1, hidden_size, device=device) cn = torch.zeros(2, 1, hidden_size, device=device) while i > 0: i -= 1 # hn, cn = hn.detach(), cn.detach() act, obs_old, h0, c0, hn, cn = agent.choose(obs, hn, cn) obs, rew, done, _ = env.step(act) obs = agent.remember(obs_old.detach(), act, clean(obs).detach(), rew, h0.detach(), c0.detach(), hn.detach(), cn.detach(), int(not done)) env.render() if done: break env.close() saveData(agent, location, ID) enablePrint() print('Done', ID) return os.getpid()
def do(args, env): do_env = Environment(name="do", outer=env) if len(args) == 0: throw_error( "syntax", "Incorrect use of (do ...): must take at least one argument.") result = None for a in args: result = ev.evaluate(a, do_env) return result
def setUp(self): # An observation space observation_space = gym.spaces.Discrete(7) # Default reward default_reward = Vector([1, 2, 1]) # Set initial_seed to 0 to testing. self.environment = Environment(observation_space=observation_space, default_reward=default_reward, seed=0)
def anonymous(*arguments): # print("inside anonymous function") # print("arguments(" + str(len(arguments)) + "):", arguments) if len(arguments) != len(largs): throw_error( "syntax", "This function takes " + str(len(largs)) + " arguments (" + str(len(arguments)) + " provided).") lenv = Environment(name="anon_fn", outer=env, variables=largs, values=arguments) return ev.evaluate(lbody, lenv)
mutate_chance = .8 # the odds of an organism being mutated on any given generation full_mutate_chance = .4 # odds of an organism being replaced by a randomized organism instead of just being tweaked according to the normal distribution standard_deviations = [ .05 for i in range(2) ] # how much each gene is mutated by, follows normal distribution so gene_ranges = [(-3, 3) for i in range(2)] pop_size = 20 # number of organisms in the population time_limit = 10**10 # how long each fitness test will run for before just giving up tick_length = .2 # how often the physics engine will update, smaller values create more precise simulations but take longer e = Environment(solids=[ pe.Circle(pos=[-100, -100]), pe.Rect(static=True, pos=[-155, 0], height=300), pe.Rect(static=True, pos=[155, 0], height=300), pe.Rect(static=True, pos=[0, -155], width=300), pe.Rect(static=True, pos=[0, 155], width=300) ], g_type='downward', g_strength=.2) # initialize population with random genes initial_population = [] for i in range(pop_size): dna = [] for gene_range in gene_ranges: dna.append(np.random.uniform(gene_range[0], gene_range[1])) initial_population.append(Organism(dna)) p = Population(initial_population)
import numpy as np from time import time import math start_time = time() # just a timer # hyperbolic tangent function, similar to sigmoid but has a range of (-1, 1) as opposed to (0, 1), used for squahsing but with negs def tanh(x): return (math.e**(2 * x) - 1) / (math.e**(2 * x) + 1) # config order = 5 e = Environment(solids=[pe.Circle(pos=[-100, .001])], g_type='uniform', g_strength=[0, -9.81]) destination = np.array([100, 0]) n = nn.NeuralNetwork(inputs=np.array([[ e.g_strength[1] / 10, (destination[0] - e.solids[0].pos[0]) / 100, (destination[1] - e.solids[0].pos[1]) / 100 ]]), l1_size=4) # run neural network for i in range(10**order): if i % ((10**order) / 100) == 0: print(i / (10**(order - 2))) # turn the inputs into outputs using existing weights n.feedforward()
import math start_time = time() # just a timer # hyperbolic tangent function, similar to sigmoid but has a range of (-1, 1) as opposed to (0, 1), used for squahsing but with negs def tanh(x): return (math.e**(2 * x) - 1) / (math.e**(2 * x) + 1) # config order = 3 e = Environment(solids=[ pe.Circle(pos=[-100, 0], mass=100, static=True), pe.Circle(pos=[0, 0], velocity=[4, 0], mass=1, radius=1), pe.Circle(pos=[50, 0], velocity=[0, 2.582], mass=20) ], g_type='nonuniform', g_strength=10) n = nn.NeuralNetwork(inputs=np.array( [[e.g_strength / 10, e.solids[0].pos[0] / 10, e.solids[0].pos[1] / 10]]), l1_size=8) # run neural network for i in range(10**order): # print percent progress if i % ((10**order) / 100) == 0: print(i / (10**(order - 2))) # # switch variables every 5 iterations # if i % 20 == 0:
gen_count = 300 # for how many generations training will last mutate_chance = .5 # the odds of an organism being mutated on any given generation full_mutate_chance = .2 # odds of an organism being replaced by a randomized organism instead of just being tweaked according to the normal distribution standard_deviations = [.1 for i in range(3)] # how much each gene is mutated by, follows normal distribution so gene_ranges = [(-5, 5) for i in range(3)] pop_size = 100 # number of organisms in the population time_limit = 50 # how long each fitness test will run for before just giving up tick_length = .2 # how often the physics engine will update, smaller values create more precise simulations but take longer start_pos = [0, 11.001] x = start_pos[0] y = start_pos[1] e = Environment(solids=[pe.Circle(static=True), pe.Circle(radius=1, pos=start_pos)], g_type='nonuniform', g_strength=10) # initialize population with random genes initial_population = [] for i in range(pop_size): dna = [] for gene_range in gene_ranges: dna.append(randrange(gene_range[0], gene_range[1])) initial_population.append(Organism(dna)) p = Population(initial_population) # iterates through all generations
if r.random() < .5: return x - delta return x + delta # all 6 possible orders in which the algorithm will be introduced to the environments orders = [['PS', 'TD', 'SV'], ['PS', 'SV', 'TD'], ['TD', 'PS', 'SV'], ['TD', 'SV', 'PS'], ['SV', 'TD', 'PS'], ['SV', 'PS', 'TD']] # possible start locations for PS_1's rocket (solids[1]) ps1_starts = [[-11.001, .1], [.1, -11.001], [11.001, .1], [.1, 11.001], [7.8, 7.8], [-7.8, 7.8], [-7.8, -7.8], [7.8, -7.8]] # 6 environments for the LT ML algorithm to use, only initialized with instance variables that will be kept constant PS_1 = Environment( solids=[pe.Circle(static=True), pe.Circle(radius=1, pos=[0, 11.01])], g_type='nonuniform', g_strength=100) PS_2 = Environment(solids=[ pe.Circle(static=True, pos=[-100, 0], mass=100), pe.Circle(radius=1, pos=[-88.99, 0], mass=1), pe.Circle(radius=3, pos=[1, 0], velocity=[0, 3.162]) ], g_type='nonuniform', g_strength=10) TD_1 = Environment(solids=[ pe.Circle(pos=[1, 1]), pe.Rect(static=True, pos=[-155, 0], height=300), pe.Rect(static=True, pos=[155, 0], height=300), pe.Rect(static=True, pos=[0, -155], width=300), pe.Rect(static=True, pos=[0, 155], width=300)
from environments import Environment from agents import DeepQAgent import os, warnings, sys # hide warnings os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' warnings.filterwarnings("ignore") environment = Environment() # get the shape of the observation and action space state_num = environment.env.observation_space.shape[0] action_num = environment.env.action_space.n print("State %2f" % state_num) print("Action %2f" % action_num) agent = DeepQAgent(state_num, action_num) if len(sys.argv) > 1 and sys.argv[1] == 'train': environment.train(agent) else: agent.is_training = False environment.run(agent)
import random import numpy as np from environments import Environment from agents import RandomAgent from agents import ValueApproxAgent num_gen = 1000 tot_reward = 0 env = Environment(6) agent = ValueApproxAgent(env.action_space, 0.05) for i in range(num_gen): curr_arm = agent.choose_action() curr_reward = env.try_arm(curr_arm) agent.learn(curr_arm, curr_reward) tot_reward += curr_reward print('Total Reward: ', tot_reward) print('Original Probabilities: ', env._probs) print('Computed Probabilities: ', agent.approx_values)
from environments import Environment from agents import DeepQAgent import os, warnings, sys # hide warnings os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' warnings.filterwarnings("ignore") cartpole = 'CartPole-v0' mountaincar = 'MountainCar-v0' # stockmarket = 'StockMarket' current_env = mountaincar environment = Environment(current_env) # get the shape of the observation and action space state_num = environment.env.observation_space.shape[0] action_num = environment.env.action_space.n agent = DeepQAgent(state_num, action_num, current_env) if len(sys.argv) > 1 and sys.argv[1] == 'train': environment.train(agent) else: agent.is_training = False environment.run(agent)
def test(): envStrings = genEnvStrings(20, 10, 10) env = Environment(envStrings, selectRandomStart(envStrings)) e = Explorer(env) e.explore()