Example #1
0
from zbatcher import Batcher

niceness = -0.001
#niceness = 1
name = "simple_cartpole"

Batcher([
    [
        'python scripts/builder.py -i 4 -o 2 -s 64 -a relu -A softmax -p adam -l 1e-3 -c 1.0 -f models/sc/1.h5'
    ],
    [
        'python scripts/run_env.py -u 8501 -p 8500 -s {} -n {} --render -e "CartPole-v0"'
        .format(niceness, name),
        'python scripts/run_pget.py -u 8500 -p 8501 -m models/sc/1.h5 -t discrete -c 3 -k 1e-6 -s {} -n {} -g 0.999 -e 0.01'
        .format(niceness, name)
    ]
]).run()
Example #2
0
from zbatcher import Batcher
#seems to work with 1x32 hidden in input/output but 0x hidden in center

#niceness = -0.1
niceness = 1
lr = 1e-4

Batcher([
    [
        #create models
        'python scripts/builder.py -i 8 -o 32 -s 32 -a tanh -A tanh -f models/tal/1.h5',
        'python scripts/builder.py -i 32 -o 32 -a tanh -A tanh -f models/tal/2.h5',
        'python scripts/builder.py -i 32 -o 4 -s 32 -a tanh -A softmax -f models/tal/3.h5'
    ],
    [
        #start proxy
        'python scripts/run_reward_proxy.py -u 8001 8002 8003 -p 7999 -s {}'.
        format(niceness),
        'python scripts/run_env.py -u 8003 -p 8000 -r -1 -s {} -n tal -e "LunarLander-v2"'
        .format(niceness),
        'python scripts/run_pget.py -u 8000 -p 8001 -m models/tal/1.h5 -s {} -n 0.001 -a {}'
        .format(niceness, lr),
        'python scripts/run_pget.py -u 8001 -p 8002 -m models/tal/2.h5 -s {} -n 0.001 -a {}'
        .format(niceness, lr),
        'python scripts/run_pget.py -u 8002 -p 8003 -m models/tal/3.h5 -e discrete -s {} -n 0.01 -a {}'
        .format(niceness, lr)
    ]
]).run()
Example #3
0
from zbatcher import Batcher

niceness = -0.001
#niceness = 1
name = "simple_lunar_continuous"

Batcher([
    [
        'python scripts/builder.py -i 8 -o 2 -s 64 -a relu -A tanh -f models/slc/1.h5'
    ],
    [
        'python scripts/run_env.py -u 8501 -p 8500 -s {} -n {} --render -e "LunarLanderContinuous-v2"'
        .format(niceness, name),
        'python scripts/run_pget.py -u 8500 -p 8501 -m models/slc/1.h5 -t continuous -c 3 -k 1e-3 -s {} -n {} -g 0.999 -l 0.99 -e 0.001 -a 1e-3'
        .format(niceness, name)
    ]
]).run()
Example #4
0
from zbatcher import Batcher

niceness = -0.005
#niceness = 1
name = "montezuma-ram"
repeat = 4
interval = 250

Batcher([
    [
        'python scripts/builder.py -i 128 -o 18 -s 64 -a relu -A softmax -p adam -l 1e-4 -c 99999 -f models/monte-ram/1.h5'
    ],
    [
        'python scripts/ae_builder.py -i 128 -l 64 -s 64 -f models/monte-ram/curiosity.h5'
    ],
    [
        #until we have --no-reward, just proxy the rewards to the curiosity node, since it doesnt use it
        'python scripts/run_retro.py -u 8801 -p 8800 -x 8810 -i {} -k {} -s {} -n {} -o ram -a discrete --render -e "MontezumaRevenge-Atari2600"'
        .format(interval, repeat, niceness, name),
        #'python scripts/run_curiosity.py -u 8800 -a 8801 -p 8810 -m models/monte-ram/curiosity.h5 -b True -s {} -n {}'.format(niceness, name),
        'python scripts/run_curiosity.py -u 8800 -a 8801 -p 8810 -m models/monte-ram/curiosity.h5 -b False -s {} -n {}'
        .format(niceness, name),
        'python scripts/run_pget.py -u 8800 -p 8801 -m models/monte-ram/1.h5 -t discrete -c 3 -k 1e-6 -s {} -n {} -d 10 -g 0.999 -l 0.9 -e 0.1'
        .format(niceness, name)
    ]
]).run()
Example #5
0
from zbatcher import Batcher

niceness = -0.001
#niceness = 1
name = "curiosity_lunar"

Batcher([
    [
        'python scripts/builder.py -i 8 -o 4 -s 64 -a relu -A softmax -p adam -l 1e-4 -c 1.0 -f models/cl/1.h5'
    ],
    #['python scripts/ae_builder.py -i 8 -l 4 -s 64 -f models/cl/curiosity.h5'],
    [
        #until we have --no-reward, just proxy the rewards to the curiosity node, since it doesnt use it
        'python scripts/run_env.py -u 8301 -p 8300 -x 8302 -s {} -n {} --render -e "LunarLander-v2"'
        .format(niceness, name),
        'python scripts/run_curiosity.py -u 8300 -a 8301 -p 8302 -m models/cl/curiosity.h5 -b True -s {} -n {}'
        .format(niceness, name),
        'python scripts/run_pget.py -u 8300 -p 8301 -m models/cl/1.h5 -t discrete -c 3 -k 1e-6 -s {} -n {} -g 0.999 -e 0.1'
        .format(niceness, name)
    ]
]).run()
Example #6
0
from zbatcher import Batcher

#niceness = -0.001
niceness = 1

Batcher([
    [
        'python scripts/builder.py -i 3 -o 1 -s 64 64 -a relu -A tanh -f models/sp/1.h5'
    ],
    [
        'python scripts/run_env.py -u 8101 -p 8100 -s {} -n simple_pendulum -e "Pendulum-v0"'
        .format(niceness),
        'python scripts/run_pget.py -u 8100 -p 8101 -m models/sp/1.h5 -s {} -n 0.1 -a 1e-3'
        .format(niceness)
    ]
]).run()
Example #7
0
Batcher([
    [
        #create models
        'python scripts/builder.py -i 8 -o 32 -s 32 32 -a relu -A tanh -f models/dl/1.h5',
        'python scripts/builder.py -i 8 -o 32 -s 32 32 -a relu -A tanh -f models/dl/2.h5',
        #'python scripts/builder.py -i 32 -o 32 -A tanh -f models/dl/3.h5',
        'python scripts/builder.py -i 32 -o 32 -s 32 -a relu -A tanh -f models/dl/3.h5',
        'python scripts/builder.py -i 32 -o 4 -s 32 32 -a relu -A softmax -f models/dl/4.h5',
        'python scripts/builder.py -i 32 -o 4 -s 32 32 -a relu -A softmax -f models/dl/5.h5'
    ],
    [
        #run reward proxy
        'python scripts/run_reward_proxy.py -p 7999 -c one two -s {} -u 8002 8003 8004 8005 8006'
        .format(niceness),
        #run envs
        'python scripts/run_env.py -u 8005 -x 7999/one -p 8000 -r -1 -n "dl 1" -e "LunarLander-v2" -s {} -k {}'
        .format(niceness, action_repeat),
        'python scripts/run_env.py -u 8006 -x 7999/two -p 8001 -r -1 -n "dl 2" -e "LunarLander-v2" -s {} -k {}'
        .format(niceness, action_repeat),
        #run agents
        'python scripts/run_pget.py -u 8000 -p 8002 -m models/dl/1.h5 -s {} -n 0.001 -a {}'
        .format(niceness, lr),
        'python scripts/run_pget.py -u 8001 -p 8003 -m models/dl/2.h5 -s {} -n 0.001 -a {}'
        .format(niceness, lr),
        'python scripts/run_pget.py -u 8002 8003 -p 8004 -m models/dl/3.h5 -s {} -n 0.001 -a {}'
        .format(niceness, lr),
        'python scripts/run_pget.py -u 8004 -p 8005 -m models/dl/4.h5 -e discrete -s {} -n 0.01 -a {}'
        .format(niceness, lr),
        'python scripts/run_pget.py -u 8004 -p 8006 -m models/dl/5.h5 -e discrete -s {} -n 0.01 -a {}'
        .format(niceness, lr)
    ]
]).run()
Example #8
0
from zbatcher import Batcher

niceness = -0.001
#niceness = 1
name = "mspacman"

Batcher([
    #['python scripts/builder.py -i 128 -o 18 -s 64 -a relu -A softmax -p adam -l 1e-3 -c 1.0 -f models/mspacman/1.h5'],
    [
        'python scripts/builder.py -i 128 -o 18 -s 64 -a relu -A softmax -p adam -l 1e-4 -c 1.0 -f models/mspacman/1.h5'
    ],
    [
        'python scripts/run_retro.py -u 8201 -p 8200 -s {} -n mspacman -k 4 -a discrete -e "MsPacMan-Atari2600" -o ram --render'
        .format(niceness),
        'python scripts/run_pget.py -u 8200 -p 8201 -m models/mspacman/1.h5 -t discrete -k 1e-6 -s {} -n {} -d 10 -e 0.01 -g 0.999 -l 0.9'
        .format(niceness, name)
    ]
]).run()
Example #9
0
from zbatcher import Batcher

niceness = -0.001
#niceness = 1
name = "curiosity_walker"

Batcher([
    [
        'python scripts/builder.py -i 24 -o 4 -s 64 -a relu -A tanh -f models/cw/1.h5'
    ],
    #['python scripts/builder.py -i 24 -o 4 -s 64 -a tanh -A tanh -f models/cw/1.h5'],
    #['python scripts/ae_builder.py -i 24 -l 10 -s 64 -f models/cw/curiosity.h5'],
    [
        'python scripts/ae_builder.py -i 24 -l 32 -s 64 -f models/cw/curiosity.h5'
    ],
    [
        #until we have --no-reward, just proxy the rewards to the curiosity node, since it doesnt use it
        'python scripts/run_env.py -u 8701 -p 8700 -x 8702 -k 4 -s {} -n {} --render -e "BipedalWalker-v2"'
        .format(niceness, name),
        'python scripts/run_curiosity.py -u 8700 -a 8701 -p 8702 -m models/cw/curiosity.h5 -s {} -n {}'
        .format(niceness, name),
        'python scripts/run_pget.py -u 8700 -p 8701 -m models/cw/1.h5 -t continuous -k 1e-3 -s {} -n {} -g 0.999 -e 0.01 -a 1e-3'
        .format(niceness, name)
    ]
]).run()
Example #10
0
from zbatcher import Batcher

#niceness = -0.001
#niceness = -0.01
niceness = -1 / 60
#niceness = 1
name = "pset_simple_cartpole"

Batcher([
    [
        'python scripts/builder.py -i 4 -o 2 -s 64 -a relu -A softmax -f models/pset-sc/1.h5'
    ],
    [
        'python scripts/run_env.py -u 8601 -p 8600 -s {} -n {} --render -r -1 -e "CartPole-v0"'
        .format(niceness, name),
        #'python scripts/run_pset.py -u 8600 -p 8601 -m models/pset-sc/1.h5 -t discrete -k 1e-1 -s {} -n {} -g 0.999 -e 0.001 -a 1e-3'.format(niceness, name)
        #'python scripts/run_pset.py -u 8600 -p 8601 -m models/pset-sc/1.h5 -t discrete -k 1e-5 -c 9999999 -s {} -n {} -g 0.999 -e 0.01 -a 1e-3'.format(niceness, name)
        #'python scripts/run_pset.py -u 8600 -p 8601 -m models/pset-sc/1.h5 -t discrete -k 1e-3 -c 999999 -s {} -n {} -g 0.999 -l 0.99 -e 1e-2 -a 1e-2'.format(niceness, name)
        #'python scripts/run_pset.py -u 8600 -p 8601 -m models/pset-sc/1.h5 -t discrete -k 1e-2 -c 999999 -s {} -n {} -g 0.999 -l 0.99 -e 1e-2 -a 1e-3'.format(niceness, name)
        'python scripts/run_pset.py -u 8600 -p 8601 -m models/pset-sc/1.h5 -o none -t discrete -k 1e-5 -c 999999 -s {} -n {} -g 0.999 -l 0.99 -e 1e-1 -a 1e-1'
        .format(niceness, name)
    ]
]).run()
Example #11
0
from zbatcher import Batcher

niceness = 1
frameskip = 4
step_limit = 10000

name = "atari_gauntlet"

Batcher([
    [
        'python scripts/build_sb_ppo.py -i 128 -o 18 -d True -p MlpPolicy -f testagent'
    ],
    [
        #'tensorboard --logdir logs',
        #'python scripts/run_atari_gauntlet.py -u 8501 -p 8500 -s {} -n {} -g MsPacMan Pong MontezumaRevenge --render -k {} -l {} -o ram'.format(niceness, name, frameskip, step_limit),
        'python scripts/run_atari_gauntlet.py -u 8501 -p 8500 -s {} -n {} --render -k {} -l {} -o ram'
        .format(niceness, name, frameskip, step_limit),
        #'python scripts/run_sb.py -u 8500 -p 8501 -i 128 -o 18 -d True -f testagent -l logs/sb -s {} -n {}'.format(niceness, name)
    ]
]).run()
Example #12
0
from zbatcher import Batcher

niceness = -0.001
#niceness = 1

Batcher([
    [
        'python scripts/builder.py -i 1024 -o 8 -s 64 -a relu -A sigmoid -f models/mn_mpm/1.h5'
    ],
    [
        'python scripts/run_retro.py -u 8102 -p 8100 -s {} -n mn_mpm -k 4 -e "MsPacMan-Atari2600" --render'
        .format(niceness),
        'python scripts/run_keras_app.py -u 8100 -p 8101 -r 80 80 -s {} -a mobilenet'
        .format(niceness),
        #'python scripts/run_pget.py -u 8101 -p 8102 -m models/mn_mpm/1.h5 -c 10000 -d 100 -e multibinary -s {} -n 0.1 -l 0.99 -a 1e-3'.format(niceness)
        #'python scripts/run_pget.py -u 8101 -p 8102 -m models/mn_mpm/1.h5 -e multibinary -s {} -n 0.1 -l 0.99 -a 1e-3'.format(niceness)
        #'python scripts/run_pget.py -u 8101 -p 8102 -m models/mn_mpm/1.h5 -e multibinary -s {} -n 0.1 -g 0.999 -l 0.9 -a 1e-3'.format(niceness)
        'python scripts/run_pget.py -u 8101 -p 8102 -m models/mn_mpm/1.h5 -o none -e multibinary -s {} -n 0.1 -d 100 -g 0.999 -l 0.9 -a 1e-1'
        .format(niceness)
    ]
]).run()
Example #13
0
from zbatcher import Batcher

niceness = -0.001
#niceness = 1
name = "sb_simple_lunar"

Batcher([
    [
        'python scripts/build_sb_ppo.py -i 8 -o 4 -d True -p MlpPolicy -f testagent'
    ],
    [
        'python scripts/run_env.py -u 8501 -p 8500 -s {} -n {} --render -e "LunarLander-v2"'
        .format(niceness, name),
        'python scripts/run_sb.py -u 8500 -p 8501 -i 8 -o 4 -d True -f testagent -s {} -n {}'
        .format(niceness, name)
    ]
]).run()
Example #14
0
from zbatcher import Batcher

niceness = -0.001
#niceness = 1
name = "simple_lunar"

Batcher([
  ['python scripts/builder.py -i 8 -o 4 -s 64 -a relu -A softmax -p adam -l 1e-3 -c 999999 -f models/sl/1.h5'],
  [
    'python scripts/run_env.py -u 8501 -p 8500 -s {} -n {} --render -e "LunarLander-v2"'.format(niceness, name),
    'python scripts/run_pget.py -u 8500 -p 8501 -m models/sl/1.h5 -t discrete -c 3 -k 1e-6 -s {} -n {} -g 0.999 -e 0.1'.format(niceness, name)
  ]
]).run()
Example #15
0
from zbatcher import Batcher

niceness = 1
#niceness = 1
name = "sb_double_lunar"

Batcher([[
    'python scripts/build_sb_ppo.py -i 8 -o 32 -d False -p MlpPolicy -l 1e-3 -f models/sb/double_1',
    'python scripts/build_sb_ppo.py -i 32 -o 4 -d True -p MlpPolicy -l 1e-3 -f models/sb/double_2',
],
         [
             'python scripts/run_env.py -u 8502 -p 8500 -s {} -n {} --render -e "LunarLander-v2"'
             .format(niceness, name),
             'python scripts/run_sb.py -u 8500 -p 8501 -i 8 -o 32 -d False -f models/sb/double_1 -l logs/sb -s {} -n {} -r 1'
             .format(niceness, name + "_1"),
             'python scripts/run_sb.py -u 8501 -p 8502 -i 32 -o 4 -d True -f models/sb/double_2 -l logs/sb -s {} -n {} -r 1'
             .format(niceness, name + "_2")
         ]]).run()
Example #16
0
from zbatcher import Batcher
#got it to work, needed lower noise on the continuous outputs

#niceness = -0.1
niceness = 1
continuous_noise = 0.001
#continuous_noise = 0.0001
lr = 1e-4

Batcher([
    [
        #create models
        'python scripts/builder.py -i 8 -o 32 -s 32 32 -a tanh -A tanh -f models/dal/1.h5',
        'python scripts/builder.py -i 32 -o 4 -s 32 32 -a tanh -A softmax -f models/dal/2.h5'
    ],
    [
        #start proxy
        'python scripts/run_reward_proxy.py -u 8001 8002 -p 7999 -s -0.1',
        #start env
        'python scripts/run_env.py -u 8002 -x 7999 -p 8000 -r -1 -s {} -n dal -e "LunarLander-v2"'
        .format(niceness),
        #start agents
        'python scripts/run_pget.py -u 8000 -p 8001 -m models/dal/1.h5 -s {} -n {} -a {}'
        .format(niceness, continuous_noise, lr),
        'python scripts/run_pget.py -u 8001 -p 8002 -m models/dal/2.h5 -e discrete -s {} -n 0.01 -a {}'
        .format(niceness, lr)
    ]
]).run()
Example #17
0
from zbatcher import Batcher

niceness = -0.005
#niceness = 1
name = "montezuma"
repeat = 4
interval = 250

Batcher([
    [
        'python scripts/builder.py -i 1024 -o 18 -s 64 -a relu -A softmax -p adam -l 1e-4 -c 1.0 -f models/monte/1.h5'
    ],
    #['python scripts/builder.py -i 1024 -o 18 -s 64 -a relu -A softmax -p rmsprop -l 1e-4 -c 1.0 -f models/monte/1.h5'],
    #['python scripts/builder.py -i 1024 -o 18 -s 64 -a relu -A softmax -p sgd -l 1e-3 -c 1.0 -f models/monte/1.h5'],
    [
        'python scripts/ae_builder.py -i 1024 -l 32 -s 256 -f models/monte/curiosity.h5'
    ],
    [
        #until we have --no-reward, just proxy the rewards to the curiosity node, since it doesnt use it
        'python scripts/run_retro.py -u 8802 -p 8800 -x 8810 -i {} -k {} -s {} -n {} -o image -a discrete --render -e "MontezumaRevenge-Atari2600"'
        .format(interval, repeat, niceness, name),
        'python scripts/run_keras_app.py -u 8800 -p 8801 -r 80 80 -s {} -a mobilenet'
        .format(niceness),
        'python scripts/run_curiosity.py -u 8801 -a 8802 -p 8810 -m models/monte/curiosity.h5 -b True -s {} -n {}'
        .format(niceness, name),
        #'python scripts/run_pget.py -u 8801 -p 8802 -m models/monte/1.h5 -t discrete -c 3 -k 1e-6 -s {} -n {} -d 100 -g 0.9999 -l 0.99 -e 0.1'.format(niceness, name)
        'python scripts/run_pget.py -u 8801 -p 8802 -m models/monte/1.h5 -t discrete -c 3 -k 1e-6 -s {} -n {} -d 10 -g 0.9999 -l 0.99 -e 0.1'
        .format(niceness, name)
    ]
]).run()