from zbatcher import Batcher niceness = -0.001 #niceness = 1 name = "simple_cartpole" Batcher([ [ 'python scripts/builder.py -i 4 -o 2 -s 64 -a relu -A softmax -p adam -l 1e-3 -c 1.0 -f models/sc/1.h5' ], [ 'python scripts/run_env.py -u 8501 -p 8500 -s {} -n {} --render -e "CartPole-v0"' .format(niceness, name), 'python scripts/run_pget.py -u 8500 -p 8501 -m models/sc/1.h5 -t discrete -c 3 -k 1e-6 -s {} -n {} -g 0.999 -e 0.01' .format(niceness, name) ] ]).run()
from zbatcher import Batcher #seems to work with 1x32 hidden in input/output but 0x hidden in center #niceness = -0.1 niceness = 1 lr = 1e-4 Batcher([ [ #create models 'python scripts/builder.py -i 8 -o 32 -s 32 -a tanh -A tanh -f models/tal/1.h5', 'python scripts/builder.py -i 32 -o 32 -a tanh -A tanh -f models/tal/2.h5', 'python scripts/builder.py -i 32 -o 4 -s 32 -a tanh -A softmax -f models/tal/3.h5' ], [ #start proxy 'python scripts/run_reward_proxy.py -u 8001 8002 8003 -p 7999 -s {}'. format(niceness), 'python scripts/run_env.py -u 8003 -p 8000 -r -1 -s {} -n tal -e "LunarLander-v2"' .format(niceness), 'python scripts/run_pget.py -u 8000 -p 8001 -m models/tal/1.h5 -s {} -n 0.001 -a {}' .format(niceness, lr), 'python scripts/run_pget.py -u 8001 -p 8002 -m models/tal/2.h5 -s {} -n 0.001 -a {}' .format(niceness, lr), 'python scripts/run_pget.py -u 8002 -p 8003 -m models/tal/3.h5 -e discrete -s {} -n 0.01 -a {}' .format(niceness, lr) ] ]).run()
from zbatcher import Batcher niceness = -0.001 #niceness = 1 name = "simple_lunar_continuous" Batcher([ [ 'python scripts/builder.py -i 8 -o 2 -s 64 -a relu -A tanh -f models/slc/1.h5' ], [ 'python scripts/run_env.py -u 8501 -p 8500 -s {} -n {} --render -e "LunarLanderContinuous-v2"' .format(niceness, name), 'python scripts/run_pget.py -u 8500 -p 8501 -m models/slc/1.h5 -t continuous -c 3 -k 1e-3 -s {} -n {} -g 0.999 -l 0.99 -e 0.001 -a 1e-3' .format(niceness, name) ] ]).run()
from zbatcher import Batcher niceness = -0.005 #niceness = 1 name = "montezuma-ram" repeat = 4 interval = 250 Batcher([ [ 'python scripts/builder.py -i 128 -o 18 -s 64 -a relu -A softmax -p adam -l 1e-4 -c 99999 -f models/monte-ram/1.h5' ], [ 'python scripts/ae_builder.py -i 128 -l 64 -s 64 -f models/monte-ram/curiosity.h5' ], [ #until we have --no-reward, just proxy the rewards to the curiosity node, since it doesnt use it 'python scripts/run_retro.py -u 8801 -p 8800 -x 8810 -i {} -k {} -s {} -n {} -o ram -a discrete --render -e "MontezumaRevenge-Atari2600"' .format(interval, repeat, niceness, name), #'python scripts/run_curiosity.py -u 8800 -a 8801 -p 8810 -m models/monte-ram/curiosity.h5 -b True -s {} -n {}'.format(niceness, name), 'python scripts/run_curiosity.py -u 8800 -a 8801 -p 8810 -m models/monte-ram/curiosity.h5 -b False -s {} -n {}' .format(niceness, name), 'python scripts/run_pget.py -u 8800 -p 8801 -m models/monte-ram/1.h5 -t discrete -c 3 -k 1e-6 -s {} -n {} -d 10 -g 0.999 -l 0.9 -e 0.1' .format(niceness, name) ] ]).run()
from zbatcher import Batcher niceness = -0.001 #niceness = 1 name = "curiosity_lunar" Batcher([ [ 'python scripts/builder.py -i 8 -o 4 -s 64 -a relu -A softmax -p adam -l 1e-4 -c 1.0 -f models/cl/1.h5' ], #['python scripts/ae_builder.py -i 8 -l 4 -s 64 -f models/cl/curiosity.h5'], [ #until we have --no-reward, just proxy the rewards to the curiosity node, since it doesnt use it 'python scripts/run_env.py -u 8301 -p 8300 -x 8302 -s {} -n {} --render -e "LunarLander-v2"' .format(niceness, name), 'python scripts/run_curiosity.py -u 8300 -a 8301 -p 8302 -m models/cl/curiosity.h5 -b True -s {} -n {}' .format(niceness, name), 'python scripts/run_pget.py -u 8300 -p 8301 -m models/cl/1.h5 -t discrete -c 3 -k 1e-6 -s {} -n {} -g 0.999 -e 0.1' .format(niceness, name) ] ]).run()
from zbatcher import Batcher #niceness = -0.001 niceness = 1 Batcher([ [ 'python scripts/builder.py -i 3 -o 1 -s 64 64 -a relu -A tanh -f models/sp/1.h5' ], [ 'python scripts/run_env.py -u 8101 -p 8100 -s {} -n simple_pendulum -e "Pendulum-v0"' .format(niceness), 'python scripts/run_pget.py -u 8100 -p 8101 -m models/sp/1.h5 -s {} -n 0.1 -a 1e-3' .format(niceness) ] ]).run()
Batcher([ [ #create models 'python scripts/builder.py -i 8 -o 32 -s 32 32 -a relu -A tanh -f models/dl/1.h5', 'python scripts/builder.py -i 8 -o 32 -s 32 32 -a relu -A tanh -f models/dl/2.h5', #'python scripts/builder.py -i 32 -o 32 -A tanh -f models/dl/3.h5', 'python scripts/builder.py -i 32 -o 32 -s 32 -a relu -A tanh -f models/dl/3.h5', 'python scripts/builder.py -i 32 -o 4 -s 32 32 -a relu -A softmax -f models/dl/4.h5', 'python scripts/builder.py -i 32 -o 4 -s 32 32 -a relu -A softmax -f models/dl/5.h5' ], [ #run reward proxy 'python scripts/run_reward_proxy.py -p 7999 -c one two -s {} -u 8002 8003 8004 8005 8006' .format(niceness), #run envs 'python scripts/run_env.py -u 8005 -x 7999/one -p 8000 -r -1 -n "dl 1" -e "LunarLander-v2" -s {} -k {}' .format(niceness, action_repeat), 'python scripts/run_env.py -u 8006 -x 7999/two -p 8001 -r -1 -n "dl 2" -e "LunarLander-v2" -s {} -k {}' .format(niceness, action_repeat), #run agents 'python scripts/run_pget.py -u 8000 -p 8002 -m models/dl/1.h5 -s {} -n 0.001 -a {}' .format(niceness, lr), 'python scripts/run_pget.py -u 8001 -p 8003 -m models/dl/2.h5 -s {} -n 0.001 -a {}' .format(niceness, lr), 'python scripts/run_pget.py -u 8002 8003 -p 8004 -m models/dl/3.h5 -s {} -n 0.001 -a {}' .format(niceness, lr), 'python scripts/run_pget.py -u 8004 -p 8005 -m models/dl/4.h5 -e discrete -s {} -n 0.01 -a {}' .format(niceness, lr), 'python scripts/run_pget.py -u 8004 -p 8006 -m models/dl/5.h5 -e discrete -s {} -n 0.01 -a {}' .format(niceness, lr) ] ]).run()
from zbatcher import Batcher niceness = -0.001 #niceness = 1 name = "mspacman" Batcher([ #['python scripts/builder.py -i 128 -o 18 -s 64 -a relu -A softmax -p adam -l 1e-3 -c 1.0 -f models/mspacman/1.h5'], [ 'python scripts/builder.py -i 128 -o 18 -s 64 -a relu -A softmax -p adam -l 1e-4 -c 1.0 -f models/mspacman/1.h5' ], [ 'python scripts/run_retro.py -u 8201 -p 8200 -s {} -n mspacman -k 4 -a discrete -e "MsPacMan-Atari2600" -o ram --render' .format(niceness), 'python scripts/run_pget.py -u 8200 -p 8201 -m models/mspacman/1.h5 -t discrete -k 1e-6 -s {} -n {} -d 10 -e 0.01 -g 0.999 -l 0.9' .format(niceness, name) ] ]).run()
from zbatcher import Batcher niceness = -0.001 #niceness = 1 name = "curiosity_walker" Batcher([ [ 'python scripts/builder.py -i 24 -o 4 -s 64 -a relu -A tanh -f models/cw/1.h5' ], #['python scripts/builder.py -i 24 -o 4 -s 64 -a tanh -A tanh -f models/cw/1.h5'], #['python scripts/ae_builder.py -i 24 -l 10 -s 64 -f models/cw/curiosity.h5'], [ 'python scripts/ae_builder.py -i 24 -l 32 -s 64 -f models/cw/curiosity.h5' ], [ #until we have --no-reward, just proxy the rewards to the curiosity node, since it doesnt use it 'python scripts/run_env.py -u 8701 -p 8700 -x 8702 -k 4 -s {} -n {} --render -e "BipedalWalker-v2"' .format(niceness, name), 'python scripts/run_curiosity.py -u 8700 -a 8701 -p 8702 -m models/cw/curiosity.h5 -s {} -n {}' .format(niceness, name), 'python scripts/run_pget.py -u 8700 -p 8701 -m models/cw/1.h5 -t continuous -k 1e-3 -s {} -n {} -g 0.999 -e 0.01 -a 1e-3' .format(niceness, name) ] ]).run()
from zbatcher import Batcher #niceness = -0.001 #niceness = -0.01 niceness = -1 / 60 #niceness = 1 name = "pset_simple_cartpole" Batcher([ [ 'python scripts/builder.py -i 4 -o 2 -s 64 -a relu -A softmax -f models/pset-sc/1.h5' ], [ 'python scripts/run_env.py -u 8601 -p 8600 -s {} -n {} --render -r -1 -e "CartPole-v0"' .format(niceness, name), #'python scripts/run_pset.py -u 8600 -p 8601 -m models/pset-sc/1.h5 -t discrete -k 1e-1 -s {} -n {} -g 0.999 -e 0.001 -a 1e-3'.format(niceness, name) #'python scripts/run_pset.py -u 8600 -p 8601 -m models/pset-sc/1.h5 -t discrete -k 1e-5 -c 9999999 -s {} -n {} -g 0.999 -e 0.01 -a 1e-3'.format(niceness, name) #'python scripts/run_pset.py -u 8600 -p 8601 -m models/pset-sc/1.h5 -t discrete -k 1e-3 -c 999999 -s {} -n {} -g 0.999 -l 0.99 -e 1e-2 -a 1e-2'.format(niceness, name) #'python scripts/run_pset.py -u 8600 -p 8601 -m models/pset-sc/1.h5 -t discrete -k 1e-2 -c 999999 -s {} -n {} -g 0.999 -l 0.99 -e 1e-2 -a 1e-3'.format(niceness, name) 'python scripts/run_pset.py -u 8600 -p 8601 -m models/pset-sc/1.h5 -o none -t discrete -k 1e-5 -c 999999 -s {} -n {} -g 0.999 -l 0.99 -e 1e-1 -a 1e-1' .format(niceness, name) ] ]).run()
from zbatcher import Batcher niceness = 1 frameskip = 4 step_limit = 10000 name = "atari_gauntlet" Batcher([ [ 'python scripts/build_sb_ppo.py -i 128 -o 18 -d True -p MlpPolicy -f testagent' ], [ #'tensorboard --logdir logs', #'python scripts/run_atari_gauntlet.py -u 8501 -p 8500 -s {} -n {} -g MsPacMan Pong MontezumaRevenge --render -k {} -l {} -o ram'.format(niceness, name, frameskip, step_limit), 'python scripts/run_atari_gauntlet.py -u 8501 -p 8500 -s {} -n {} --render -k {} -l {} -o ram' .format(niceness, name, frameskip, step_limit), #'python scripts/run_sb.py -u 8500 -p 8501 -i 128 -o 18 -d True -f testagent -l logs/sb -s {} -n {}'.format(niceness, name) ] ]).run()
from zbatcher import Batcher niceness = -0.001 #niceness = 1 Batcher([ [ 'python scripts/builder.py -i 1024 -o 8 -s 64 -a relu -A sigmoid -f models/mn_mpm/1.h5' ], [ 'python scripts/run_retro.py -u 8102 -p 8100 -s {} -n mn_mpm -k 4 -e "MsPacMan-Atari2600" --render' .format(niceness), 'python scripts/run_keras_app.py -u 8100 -p 8101 -r 80 80 -s {} -a mobilenet' .format(niceness), #'python scripts/run_pget.py -u 8101 -p 8102 -m models/mn_mpm/1.h5 -c 10000 -d 100 -e multibinary -s {} -n 0.1 -l 0.99 -a 1e-3'.format(niceness) #'python scripts/run_pget.py -u 8101 -p 8102 -m models/mn_mpm/1.h5 -e multibinary -s {} -n 0.1 -l 0.99 -a 1e-3'.format(niceness) #'python scripts/run_pget.py -u 8101 -p 8102 -m models/mn_mpm/1.h5 -e multibinary -s {} -n 0.1 -g 0.999 -l 0.9 -a 1e-3'.format(niceness) 'python scripts/run_pget.py -u 8101 -p 8102 -m models/mn_mpm/1.h5 -o none -e multibinary -s {} -n 0.1 -d 100 -g 0.999 -l 0.9 -a 1e-1' .format(niceness) ] ]).run()
from zbatcher import Batcher niceness = -0.001 #niceness = 1 name = "sb_simple_lunar" Batcher([ [ 'python scripts/build_sb_ppo.py -i 8 -o 4 -d True -p MlpPolicy -f testagent' ], [ 'python scripts/run_env.py -u 8501 -p 8500 -s {} -n {} --render -e "LunarLander-v2"' .format(niceness, name), 'python scripts/run_sb.py -u 8500 -p 8501 -i 8 -o 4 -d True -f testagent -s {} -n {}' .format(niceness, name) ] ]).run()
from zbatcher import Batcher niceness = -0.001 #niceness = 1 name = "simple_lunar" Batcher([ ['python scripts/builder.py -i 8 -o 4 -s 64 -a relu -A softmax -p adam -l 1e-3 -c 999999 -f models/sl/1.h5'], [ 'python scripts/run_env.py -u 8501 -p 8500 -s {} -n {} --render -e "LunarLander-v2"'.format(niceness, name), 'python scripts/run_pget.py -u 8500 -p 8501 -m models/sl/1.h5 -t discrete -c 3 -k 1e-6 -s {} -n {} -g 0.999 -e 0.1'.format(niceness, name) ] ]).run()
from zbatcher import Batcher niceness = 1 #niceness = 1 name = "sb_double_lunar" Batcher([[ 'python scripts/build_sb_ppo.py -i 8 -o 32 -d False -p MlpPolicy -l 1e-3 -f models/sb/double_1', 'python scripts/build_sb_ppo.py -i 32 -o 4 -d True -p MlpPolicy -l 1e-3 -f models/sb/double_2', ], [ 'python scripts/run_env.py -u 8502 -p 8500 -s {} -n {} --render -e "LunarLander-v2"' .format(niceness, name), 'python scripts/run_sb.py -u 8500 -p 8501 -i 8 -o 32 -d False -f models/sb/double_1 -l logs/sb -s {} -n {} -r 1' .format(niceness, name + "_1"), 'python scripts/run_sb.py -u 8501 -p 8502 -i 32 -o 4 -d True -f models/sb/double_2 -l logs/sb -s {} -n {} -r 1' .format(niceness, name + "_2") ]]).run()
from zbatcher import Batcher #got it to work, needed lower noise on the continuous outputs #niceness = -0.1 niceness = 1 continuous_noise = 0.001 #continuous_noise = 0.0001 lr = 1e-4 Batcher([ [ #create models 'python scripts/builder.py -i 8 -o 32 -s 32 32 -a tanh -A tanh -f models/dal/1.h5', 'python scripts/builder.py -i 32 -o 4 -s 32 32 -a tanh -A softmax -f models/dal/2.h5' ], [ #start proxy 'python scripts/run_reward_proxy.py -u 8001 8002 -p 7999 -s -0.1', #start env 'python scripts/run_env.py -u 8002 -x 7999 -p 8000 -r -1 -s {} -n dal -e "LunarLander-v2"' .format(niceness), #start agents 'python scripts/run_pget.py -u 8000 -p 8001 -m models/dal/1.h5 -s {} -n {} -a {}' .format(niceness, continuous_noise, lr), 'python scripts/run_pget.py -u 8001 -p 8002 -m models/dal/2.h5 -e discrete -s {} -n 0.01 -a {}' .format(niceness, lr) ] ]).run()
from zbatcher import Batcher niceness = -0.005 #niceness = 1 name = "montezuma" repeat = 4 interval = 250 Batcher([ [ 'python scripts/builder.py -i 1024 -o 18 -s 64 -a relu -A softmax -p adam -l 1e-4 -c 1.0 -f models/monte/1.h5' ], #['python scripts/builder.py -i 1024 -o 18 -s 64 -a relu -A softmax -p rmsprop -l 1e-4 -c 1.0 -f models/monte/1.h5'], #['python scripts/builder.py -i 1024 -o 18 -s 64 -a relu -A softmax -p sgd -l 1e-3 -c 1.0 -f models/monte/1.h5'], [ 'python scripts/ae_builder.py -i 1024 -l 32 -s 256 -f models/monte/curiosity.h5' ], [ #until we have --no-reward, just proxy the rewards to the curiosity node, since it doesnt use it 'python scripts/run_retro.py -u 8802 -p 8800 -x 8810 -i {} -k {} -s {} -n {} -o image -a discrete --render -e "MontezumaRevenge-Atari2600"' .format(interval, repeat, niceness, name), 'python scripts/run_keras_app.py -u 8800 -p 8801 -r 80 80 -s {} -a mobilenet' .format(niceness), 'python scripts/run_curiosity.py -u 8801 -a 8802 -p 8810 -m models/monte/curiosity.h5 -b True -s {} -n {}' .format(niceness, name), #'python scripts/run_pget.py -u 8801 -p 8802 -m models/monte/1.h5 -t discrete -c 3 -k 1e-6 -s {} -n {} -d 100 -g 0.9999 -l 0.99 -e 0.1'.format(niceness, name) 'python scripts/run_pget.py -u 8801 -p 8802 -m models/monte/1.h5 -t discrete -c 3 -k 1e-6 -s {} -n {} -d 10 -g 0.9999 -l 0.99 -e 0.1' .format(niceness, name) ] ]).run()