Beispiel #1
0
gae_lambda = 0.95
entropy_coef = 0.01
value_loss_coef = 0.5
max_grad_norm = 0.5
seed = 1  # didnt change
cuda_deterministic = False
num_processes = 1
num_steps = 2500
custom_gym = "growspace"
ppo_epoch = 4
num_mini_batch = 32
clip_param = 0.1
log_interval = 10  # amount of times we save to wandb
save_interval = 100  # amount of times we save internal
eval_interval = None
num_env_steps = 1e6  # no change
env_name = "GrowSpaceEnv-Control-v0"  #"GrowSpaceSpotlight-Mnist4-v0"
log_dir = "/tmp/gym/"
save_dir = "./trained_models/"
use_proper_time_limits = False
recurrent_policy = False
use_linear_lr_decay = True
no_cuda = False
cuda = not no_cuda and torch.cuda.is_available()

experiment_buddy.register(locals())
tensorboard = experiment_buddy.deploy("mila",
                                      sweep_yaml="pposweep.yaml",
                                      proc_num=10,
                                      wandb_kwargs={"entity": "growspace"})
Beispiel #2
0
use_fa = False
horizon = 6 if use_fa else 20
penalty = 0.15 if use_fa else 1.4  #1.6
eps = 1e-4
gamma = 0.9
eta = 1.
grid_size = 10
agent = "pg_clip"
save_interval = 10
max_steps = int(2e3)
seed = 984
eval_episodes = 10
data = "data"

REMOTE =  1
RUN_SWEEP = REMOTE
NUM_PROCS = 5
sweep_yaml = "sweep_params.yaml" if RUN_SWEEP else False
HOST = "mila" if REMOTE else ""  # in host
DEBUG = '_pydev_bundle.pydev_log' in sys.modules.keys()

render = not DEBUG
experiment_buddy.register(locals())
tb = experiment_buddy.deploy(host=HOST, sweep_yaml=sweep_yaml, proc_num=NUM_PROCS,
                             wandb_kwargs=dict(mode= "disabled" if DEBUG else "online"))

os.makedirs(data, exist_ok=True)
plot_path = os.path.join(tb.objects_path, "plots")
os.makedirs(plot_path, exist_ok=True)
Beispiel #3
0
seed = 984
h_dim = 32
# wandb_mode = "online" if DEBUG else "offline"

use_cuda = False

experiment_buddy.register(locals())
device = torch.device("cuda" if use_cuda else "cpu")

################################################################
# Derivative parameters
################################################################
# esh = """
# #SBATCH --mem=24GB
# """
esh = """
#SBATCH --job-name=spython
#SBATCH --output=job_output.txt
#SBATCH --error=job_error.txt
#SBATCH --time=2-00:00
#SBATCH --mem=12GB
#SBATCH --gres=gpu:1
#SBATCH --cpus-per-task=4
#SBATCH --partition=long
#SBATCH --get-user-env=L
"""
tb = experiment_buddy.deploy(host=HOST,
                             sweep_yaml=sweep_yaml,
                             extra_slurm_headers=esh,
                             proc_num=NUM_PROCS)
Beispiel #4
0
num_steps = 10000

algo = "ppo"
gail = False
gail_experts_dir = './gail_experts'
gail_batch_size = 128
gail_epoch = 5
alpha = 0.99
seed = np.random.randint(1, 80, size=1)  # didnt change
cuda_deterministic = False
num_processes = 4
custom_gym = "growspace"
log_interval = 10
save_interval = 100
eval_interval = None
num_updates = 1e3
env_name = "GrowSpaceEnv-Control-v0"  # "GrowSpaceSpotlight-MnistMix-v0"
log_dir = "/tmp/gym/"
save_dir = "./trained_models/"
use_proper_time_limits = False
recurrent_policy = False
no_cuda = False
cuda = not no_cuda and torch.cuda.is_available()
momentum = 0.9  # if sgd is used

experiment_buddy.register(locals())
tensorboard = experiment_buddy.deploy("",
                                      sweep_yaml="",
                                      proc_num=3,
                                      wandb_kwargs={"entity": "growspace"})
Beispiel #5
0
import experiment_buddy

# Hyperparameters
ENV_NAME = 'BipedalWalker-v3'

MAX_ITER = 500000

BATCH_SIZE = 64
PPO_EPOCHS = 7
CLIP_GRADIENT = 0.2
CLIP_EPS = 0.2

TRAJECTORY_SIZE = 2049
GAE_LAMBDA = 0.95
GAMMA = 0.99

## Test Hyperparameters
test_episodes = 50
save_video_test = False
N_ITER_TEST = 100

POLICY_LR = 0.0004
VALUE_LR = 0.001

experiment_buddy.register(locals())
tensorboard = experiment_buddy.deploy("",
                                      sweep_yaml="",
                                      proc_num=1,
                                      wandb_kwargs={"entity": "ionelia"})
Beispiel #6
0
import experiment_buddy

initial_lr = .0001

decay_steps = 500000
num_hidden = 1024
decay_factor = .5

batch_size = 128
momentum_mass = 0.99
weight_norm = 0.00

num_epochs = 1000

experiment_buddy.register(locals())

################################################################
# Derivative parameters
################################################################
learning_rate = jax.experimental.optimizers.inverse_time_decay(initial_lr,
                                                               decay_steps,
                                                               decay_factor,
                                                               staircase=True)
eval_every = math.ceil(num_epochs / 1000)

HOST = os.environ['DEPLOY_DESTINATION']

host_map = {'cluster': 'mila', 'local': ''}

tensorboard = experiment_buddy.deploy(host=host_map[HOST], sweep_yaml="")
Beispiel #7
0
import jax.experimental.optimizers

import experiment_buddy

initial_lr = .0001

decay_steps = 500000
num_hidden = 1024
decay_factor = .5

batch_size = 128
momentum_mass = 0.99
weight_norm = 0.00

num_epochs = 1000

experiment_buddy.register(locals())

################################################################
# Derivative parameters
################################################################
learning_rate = jax.experimental.optimizers.inverse_time_decay(initial_lr,
                                                               decay_steps,
                                                               decay_factor,
                                                               staircase=True)
eval_every = math.ceil(num_epochs / 1000)

tensorboard = experiment_buddy.deploy(host=os.environ.get('BUDDY_HOST', ""),
                                      sweep_yaml=os.environ.get('SWEEP', ""))