Esempio n. 1
0
    def __init__(self):
        self.gamma = 0.99
        self.critic_learning_rate = 0.0002
        self.actor_learning_rate = 0.0001
        self.tau = 0.001

        self.batch_size = 64
        self.update_frequency = 4

        self.exploration = RLAgents.DecayConst(0.1, 0.1)

        self.experience_replay_size = 200000
    def __init__(self):        
        self.gamma                          = 0.99
        self.critic_learning_rate           = 0.0002
        self.actor_learning_rate            = 0.0001
        self.forward_learning_rate          = 0.0002
        self.autoencoder_learning_rate      = 0.0002

        self.tau                            = 0.001 
        self.beta1                          = 0.1
        self.beta2                          = 0.1

        self.episodic_memory_size           = 128 
        
        self.batch_size                     = 64
        self.update_frequency               = 4

        self.exploration   = RLAgents.DecayConst(0.1, 0.1)

        self.experience_replay_size = 200000
Esempio n. 3
0
import RLAgents

import matplotlib.pyplot as plt

result_path = "./results/"

files = []
files.append("./models/ddpg_baseline/run_0/result/result.log")
files.append("./models/ddpg_baseline/run_1/result/result.log")
files.append("./models/ddpg_baseline/run_2/result/result.log")
files.append("./models/ddpg_baseline/run_3/result/result.log")
files.append("./models/ddpg_baseline/run_4/result/result.log")
files.append("./models/ddpg_baseline/run_5/result/result.log")
files.append("./models/ddpg_baseline/run_6/result/result.log")
files.append("./models/ddpg_baseline/run_7/result/result.log")
ddpg_baseline = RLAgents.RLStatsCompute(files)

files = []
files.append("./models/ddpg_curiosity/run_0/result/result.log")
files.append("./models/ddpg_curiosity/run_1/result/result.log")
files.append("./models/ddpg_curiosity/run_2/result/result.log")
files.append("./models/ddpg_curiosity/run_3/result/result.log")
files.append("./models/ddpg_curiosity/run_4/result/result.log")
files.append("./models/ddpg_curiosity/run_5/result/result.log")
files.append("./models/ddpg_curiosity/run_6/result/result.log")
files.append("./models/ddpg_curiosity/run_7/result/result.log")
ddpg_curiosity_rnd = RLAgents.RLStatsCompute(files)
'''
files = []
files.append("./models/ddpg_entropy/run_0/result/result.log")
files.append("./models/ddpg_entropy/run_1/result/result.log")
import gym
import pybullet_envs
import numpy
import time

import RLAgents

import models.ddpg_baseline.model.src.model_critic as ModelCritic
import models.ddpg_baseline.model.src.model_actor as ModelActor
import models.ddpg_baseline.model.src.config as Config

path = "models/ddpg_baseline/model/"

env = gym.make("AntBulletEnv-v0")

agent = RLAgents.AgentDDPG(env, ModelCritic, ModelActor, Config)

max_iterations = 4 * (10**6)
trainig = RLAgents.TrainingIterations(env, agent, max_iterations, path, 1000)
trainig.run()
'''
agent.load(path)
agent.disable_training()
while True:
    reward, done = agent.main()
    time.sleep(0.01)
'''
Esempio n. 5
0
import gym
import gym_aeris
import numpy
import time

import RLAgents


import models.ddpg_curiosity.model.src.model_critic     as ModelCritic
import models.ddpg_curiosity.model.src.model_actor      as ModelActor
import models.ddpg_curiosity.model.src.model_forward      as ModelForward
import models.ddpg_curiosity.model.src.model_forward_target      as ModelForwardTarget
import models.ddpg_curiosity.model.src.config           as Config

path = "models/ddpg_curiosity/model/"

env = gym.make("TargetNavigate-v0", render = False)

agent = RLAgents.AgentDDPGCuriosity(env, ModelCritic, ModelActor, ModelForward, ModelForwardTarget, Config)

max_iterations = 1*(10**6)
trainig = RLAgents.TrainingIterations(env, agent, max_iterations, path, 10000)
trainig.run()

'''
agent.load(path)
agent.disable_training()
while True:
    reward, done = agent.main()
    time.sleep(0.01)
'''
import gym
import gym_line_follower

import numpy
import time

import RLAgents

import models.ppo_baseline_continuous.src.model as Model
import models.ppo_baseline_continuous.src.config as Config

path = "models/ppo_baseline_continuous/"

config = Config.Config()

envs = RLAgents.MultiEnvSeq('LineFollower-v0', None, config.actors)

agent = RLAgents.AgentPPOContinuous(envs, Model, Config)

max_iterations = 1 * (10**6)

trainig = RLAgents.TrainingIterations(envs, agent, max_iterations, path, 1000)
trainig.run()
'''
agent.load(path)
agent.disable_training()

while True:
    agent.main()
    envs.render(0)
'''
Esempio n. 7
0
import gym
import pybullet_envs
import numpy
import time

import RLAgents

import models.ppo_baseline.model.src.model as Model
import models.ppo_baseline.model.src.config as Config

path = "models/ppo_baseline/model/"

config = Config.Config()
envs = RLAgents.MultiEnvSeq("AntBulletEnv-v0", None, config.actors)
#envs.render(0)

agent = RLAgents.AgentPPOContinuous(envs, Model, Config)

max_iterations = 1 * (10**6)
trainig = RLAgents.TrainingIterations(envs, agent, max_iterations, path, 1000)
trainig.run()
'''
agent.load(path)
agent.disable_training()
while True:
    reward, done = agent.main()
    time.sleep(0.01)
'''
import gym
import gym_aeris
import numpy
import time

import RLAgents

import models.ddpg_baseline.model.src.model_critic as ModelCritic
import models.ddpg_baseline.model.src.model_actor as ModelActor
import models.ddpg_baseline.model.src.config as Config

path = "models/ddpg_baseline/model/"

env = gym.make("FoodGatheringAdvanced-v0", render=True)

agent = RLAgents.AgentDDPG(env, ModelCritic, ModelActor, Config)

max_iterations = 4 * (10**6)
#trainig = RLAgents.TrainingIterations(env, agent, max_iterations, path, 1000)
#trainig.run()

agent.load(path)
agent.disable_training()
while True:
    reward, done = agent.main()
    time.sleep(0.01)
Esempio n. 9
0
                duration=duration,
                loop=0)


if __name__ == '__main__':
    """
    """
    start = time.time()

    EpsImagePlugin.gs_windows_binary = r'C:\Program Files\gs\gs9.54.0\bin\gswin64c'
    ##np.seterr(all='raise')
    random.seed('sdsc8006')

    layouts = ['small', 'medium']
    pacmans = {
        'MC': RLAgents.MonteCarloAgent(eps0=1e1, gamma=1),
        'TD': TDAgent(eps0=1e1, gamma=1),
        'QL': RLAgents.QLearningAgent(eps0=1, gamma=1, alpha=1e-4),
        # alpha for w update, beta for theta update
        'AC': RLAgents.ActorCriticAgent(gamma=1, alpha=1e-4, beta=1e-4),
    }

    argsList, options = readCommand(sys.argv[1:])

    if not options.noRun:
        for args in argsList:
            print(args)
            runGames(**args)

    if options.testAll:
        plotAll(layouts,
Esempio n. 10
0
import gym
import gym_aeris
import numpy
import time

import RLAgents

import models.ddpg_entropy.model.src.model_critic as ModelCritic
import models.ddpg_entropy.model.src.model_actor as ModelActor
import models.ddpg_entropy.model.src.model_forward as ModelForward
import models.ddpg_entropy.model.src.model_forward_target as ModelForwardTarget
import models.ddpg_entropy.model.src.model_ae as ModelAutoencoder
import models.ddpg_entropy.model.src.config as Config

path = "models/ddpg_entropy/model/"

env = gym.make("TargetNavigate-v0", render=False)

agent = RLAgents.AgentDDPGEntropy(env, ModelCritic, ModelActor, ModelForward,
                                  ModelForwardTarget, ModelAutoencoder, Config)

max_iterations = 1 * (10**6)
trainig = RLAgents.TrainingIterations(env, agent, max_iterations, path, 10000)
trainig.run()
'''
agent.load(path)
agent.disable_training()
while True:
    reward, done = agent.main()
    time.sleep(0.01)
'''