def __init__(self): self.gamma = 0.99 self.critic_learning_rate = 0.0002 self.actor_learning_rate = 0.0001 self.tau = 0.001 self.batch_size = 64 self.update_frequency = 4 self.exploration = RLAgents.DecayConst(0.1, 0.1) self.experience_replay_size = 200000
def __init__(self): self.gamma = 0.99 self.critic_learning_rate = 0.0002 self.actor_learning_rate = 0.0001 self.forward_learning_rate = 0.0002 self.autoencoder_learning_rate = 0.0002 self.tau = 0.001 self.beta1 = 0.1 self.beta2 = 0.1 self.episodic_memory_size = 128 self.batch_size = 64 self.update_frequency = 4 self.exploration = RLAgents.DecayConst(0.1, 0.1) self.experience_replay_size = 200000
import RLAgents import matplotlib.pyplot as plt result_path = "./results/" files = [] files.append("./models/ddpg_baseline/run_0/result/result.log") files.append("./models/ddpg_baseline/run_1/result/result.log") files.append("./models/ddpg_baseline/run_2/result/result.log") files.append("./models/ddpg_baseline/run_3/result/result.log") files.append("./models/ddpg_baseline/run_4/result/result.log") files.append("./models/ddpg_baseline/run_5/result/result.log") files.append("./models/ddpg_baseline/run_6/result/result.log") files.append("./models/ddpg_baseline/run_7/result/result.log") ddpg_baseline = RLAgents.RLStatsCompute(files) files = [] files.append("./models/ddpg_curiosity/run_0/result/result.log") files.append("./models/ddpg_curiosity/run_1/result/result.log") files.append("./models/ddpg_curiosity/run_2/result/result.log") files.append("./models/ddpg_curiosity/run_3/result/result.log") files.append("./models/ddpg_curiosity/run_4/result/result.log") files.append("./models/ddpg_curiosity/run_5/result/result.log") files.append("./models/ddpg_curiosity/run_6/result/result.log") files.append("./models/ddpg_curiosity/run_7/result/result.log") ddpg_curiosity_rnd = RLAgents.RLStatsCompute(files) ''' files = [] files.append("./models/ddpg_entropy/run_0/result/result.log") files.append("./models/ddpg_entropy/run_1/result/result.log")
import gym import pybullet_envs import numpy import time import RLAgents import models.ddpg_baseline.model.src.model_critic as ModelCritic import models.ddpg_baseline.model.src.model_actor as ModelActor import models.ddpg_baseline.model.src.config as Config path = "models/ddpg_baseline/model/" env = gym.make("AntBulletEnv-v0") agent = RLAgents.AgentDDPG(env, ModelCritic, ModelActor, Config) max_iterations = 4 * (10**6) trainig = RLAgents.TrainingIterations(env, agent, max_iterations, path, 1000) trainig.run() ''' agent.load(path) agent.disable_training() while True: reward, done = agent.main() time.sleep(0.01) '''
import gym import gym_aeris import numpy import time import RLAgents import models.ddpg_curiosity.model.src.model_critic as ModelCritic import models.ddpg_curiosity.model.src.model_actor as ModelActor import models.ddpg_curiosity.model.src.model_forward as ModelForward import models.ddpg_curiosity.model.src.model_forward_target as ModelForwardTarget import models.ddpg_curiosity.model.src.config as Config path = "models/ddpg_curiosity/model/" env = gym.make("TargetNavigate-v0", render = False) agent = RLAgents.AgentDDPGCuriosity(env, ModelCritic, ModelActor, ModelForward, ModelForwardTarget, Config) max_iterations = 1*(10**6) trainig = RLAgents.TrainingIterations(env, agent, max_iterations, path, 10000) trainig.run() ''' agent.load(path) agent.disable_training() while True: reward, done = agent.main() time.sleep(0.01) '''
import gym import gym_line_follower import numpy import time import RLAgents import models.ppo_baseline_continuous.src.model as Model import models.ppo_baseline_continuous.src.config as Config path = "models/ppo_baseline_continuous/" config = Config.Config() envs = RLAgents.MultiEnvSeq('LineFollower-v0', None, config.actors) agent = RLAgents.AgentPPOContinuous(envs, Model, Config) max_iterations = 1 * (10**6) trainig = RLAgents.TrainingIterations(envs, agent, max_iterations, path, 1000) trainig.run() ''' agent.load(path) agent.disable_training() while True: agent.main() envs.render(0) '''
import gym import pybullet_envs import numpy import time import RLAgents import models.ppo_baseline.model.src.model as Model import models.ppo_baseline.model.src.config as Config path = "models/ppo_baseline/model/" config = Config.Config() envs = RLAgents.MultiEnvSeq("AntBulletEnv-v0", None, config.actors) #envs.render(0) agent = RLAgents.AgentPPOContinuous(envs, Model, Config) max_iterations = 1 * (10**6) trainig = RLAgents.TrainingIterations(envs, agent, max_iterations, path, 1000) trainig.run() ''' agent.load(path) agent.disable_training() while True: reward, done = agent.main() time.sleep(0.01) '''
import gym import gym_aeris import numpy import time import RLAgents import models.ddpg_baseline.model.src.model_critic as ModelCritic import models.ddpg_baseline.model.src.model_actor as ModelActor import models.ddpg_baseline.model.src.config as Config path = "models/ddpg_baseline/model/" env = gym.make("FoodGatheringAdvanced-v0", render=True) agent = RLAgents.AgentDDPG(env, ModelCritic, ModelActor, Config) max_iterations = 4 * (10**6) #trainig = RLAgents.TrainingIterations(env, agent, max_iterations, path, 1000) #trainig.run() agent.load(path) agent.disable_training() while True: reward, done = agent.main() time.sleep(0.01)
duration=duration, loop=0) if __name__ == '__main__': """ """ start = time.time() EpsImagePlugin.gs_windows_binary = r'C:\Program Files\gs\gs9.54.0\bin\gswin64c' ##np.seterr(all='raise') random.seed('sdsc8006') layouts = ['small', 'medium'] pacmans = { 'MC': RLAgents.MonteCarloAgent(eps0=1e1, gamma=1), 'TD': TDAgent(eps0=1e1, gamma=1), 'QL': RLAgents.QLearningAgent(eps0=1, gamma=1, alpha=1e-4), # alpha for w update, beta for theta update 'AC': RLAgents.ActorCriticAgent(gamma=1, alpha=1e-4, beta=1e-4), } argsList, options = readCommand(sys.argv[1:]) if not options.noRun: for args in argsList: print(args) runGames(**args) if options.testAll: plotAll(layouts,
import gym import gym_aeris import numpy import time import RLAgents import models.ddpg_entropy.model.src.model_critic as ModelCritic import models.ddpg_entropy.model.src.model_actor as ModelActor import models.ddpg_entropy.model.src.model_forward as ModelForward import models.ddpg_entropy.model.src.model_forward_target as ModelForwardTarget import models.ddpg_entropy.model.src.model_ae as ModelAutoencoder import models.ddpg_entropy.model.src.config as Config path = "models/ddpg_entropy/model/" env = gym.make("TargetNavigate-v0", render=False) agent = RLAgents.AgentDDPGEntropy(env, ModelCritic, ModelActor, ModelForward, ModelForwardTarget, ModelAutoencoder, Config) max_iterations = 1 * (10**6) trainig = RLAgents.TrainingIterations(env, agent, max_iterations, path, 10000) trainig.run() ''' agent.load(path) agent.disable_training() while True: reward, done = agent.main() time.sleep(0.01) '''