def main(): """Run PPO until the environment throws an exception.""" config = tf.ConfigProto() #os.environ["CUDA_VISIBLE_DEVICES"]="-1" config.gpu_options.allow_growth = True # pylint: disable=E1101 with tf.Session(config=config): # Take more timesteps than we need to be sure that # we stop due to an exception. ppo2ttifrutti.learn(policy=policies.CnnPolicy, env=SubprocVecEnv([ env.make_train_5, env.make_train_18, env.make_train_43 ]), nsteps=2048, nminibatches=16, lam=0.95, gamma=0.99, noptepochs=4, log_interval=1, ent_coef=0.01, lr=lambda _: 2e-4, cliprange=lambda _: 0.1, total_timesteps=int(1e9), save_interval=25)
def main(): """Run PPO until the environment throws an exception.""" config = tf.ConfigProto() #os.environ["CUDA_VISIBLE_DEVICES"]="-1" config.gpu_options.allow_growth = True # pylint: disable=E1101 # Creating directory if necessary train_path = args.log_path if not os.path.isdir(train_path): os.mkdir(train_path) if not os.path.isdir(train_path + 'log/'): os.mkdir(train_path + 'log/') if not os.path.isdir(train_path + 'model/'): os.mkdir(train_path + 'model/') if not os.path.isdir(train_path + 'log/'): os.mkdir(train_path + 'log/') if not os.path.isdir(train_path + 'model/'): os.mkdir(train_path + 'model/') with tf.Session(config=config): # Take more timesteps than we need to be sure that # we stop due to an exception. ppo2ttifrutti.learn( policy=policies.CnnPolicy, env=SubprocVecEnv([ env.make_train_0, env.make_train_1, env.make_train_2, env.make_train_3, env.make_train_4, env.make_train_5, env.make_train_6, env.make_train_7, env.make_train_8, env.make_train_9, env.make_train_10, env.make_train_11, env.make_train_12, env.make_train_13, env.make_train_14, env.make_train_15, env.make_train_16, env.make_train_17, env.make_train_18, env.make_train_19, env.make_train_20, env.make_train_21, env.make_train_22, env.make_train_23, env.make_train_24, env.make_train_25, env.make_train_26, env.make_train_27, env.make_train_28, env.make_train_29, env.make_train_30, env.make_train_31, env.make_train_32, env.make_train_33, env.make_train_34, env.make_train_35, env.make_train_36, env.make_train_37, env.make_train_38, env.make_train_39, env.make_train_40, env.make_train_41, env.make_train_42, env.make_train_43, env.make_train_44, env.make_train_45, env.make_train_46 ]), # env=SubprocVecEnv([env.make_train_0, env.make_train_1, env.make_train_2, env.make_train_3, env.make_train_4, env.make_train_5, env.make_train_6]), nsteps=args.nsteps, nminibatches=args.nminibatches, lam=0.95, gamma=0.99, noptepochs=4, log_interval=1, ent_coef=0.01, lr=lambda _: args.lr, cliprange=lambda _: 0.1, total_timesteps=args.total_timesteps, save_interval=25, log_path=args.log_path, train=args.train)
def main(): """Run PPO until the environment throws an exception.""" config = tf.ConfigProto() #os.environ["CUDA_VISIBLE_DEVICES"]="-1" config.gpu_options.allow_growth = True # pylint: disable=E1101 with tf.Session(config=config): # Take more timesteps than we need to be sure that # we stop due to an exception. ppo2ttifrutti.learn( policy=policies.CnnPolicy, env=SubprocVecEnv([ env.make_train_0, env.make_train_1, env.make_train_2, env.make_train_3, env.make_train_4, env.make_train_5, env.make_train_6, env.make_train_7, env.make_train_8, env.make_train_9, env.make_train_10, env.make_train_11, env.make_train_12, env.make_train_13, env.make_train_14, env.make_train_15, env.make_train_16, env.make_train_17, env.make_train_18, env.make_train_19, env.make_train_20, env.make_train_21, env.make_train_22, env.make_train_23, env.make_train_24, env.make_train_25, env.make_train_26, env.make_train_27, env.make_train_28, env.make_train_29, env.make_train_30, env.make_train_31, env.make_train_32, env.make_train_33, env.make_train_34, env.make_train_35, env.make_train_36, env.make_train_37, env.make_train_38, env.make_train_39, env.make_train_40, env.make_train_41, env.make_train_42, env.make_train_43, env.make_train_44, env.make_train_45, env.make_train_46, env.make_val_0, env.make_val_1, env.make_val_2, env.make_val_3, env.make_val_4, env.make_val_5, env.make_val_6, env.make_val_7, env.make_val_8, env.make_val_9, env.make_val_10, env.make_extra_0, env.make_extra_1, env.make_extra_2, env.make_extra_3, env.make_extra_4, env.make_extra_5, env.make_extra_6, env.make_extra_7, env.make_extra_8, env.make_extra_9, env.make_extra_10, env.make_extra_11, env.make_extra_12, env.make_extra_13, env.make_extra_14, env.make_extra_15, env.make_extra_16, env.make_extra_17, env.make_extra_18, env.make_extra_19, env.make_extra_20, env.make_extra_21, env.make_extra_22, env.make_extra_23, env.make_extra_24, env.make_extra_25, env.make_extra_26, env.make_extra_27, env.make_extra_28, env.make_extra_29, env.make_extra_30, env.make_extra_31, env.make_extra_32, env.make_extra_33, env.make_extra_34, env.make_extra_35, env.make_extra_36, env.make_extra_37, env.make_extra_38, env.make_extra_39 ]), nsteps=2048, nminibatches=16, lam=0.95, gamma=0.99, noptepochs=4, log_interval=1, ent_coef=0.01, lr=lambda _: 2e-4, cliprange=lambda _: 0.1, total_timesteps=int(1e9), save_interval=25)
def main(): """Run PPO until the environment throws an exception.""" config = tf.ConfigProto() #os.environ["CUDA_VISIBLE_DEVICES"]="-1" config.gpu_options.allow_growth = True # pylint: disable=E1101 lis = [ env.make_val_0, env.make_val_1, env.make_val_2, env.make_val_3, env.make_val_4, env.make_val_5, env.make_val_6, env.make_val_7, env.make_val_8, env.make_val_9, env.make_val_10 ] i = args.val_idx # Creating directory if necessary val_path = args.log_path + 'val_' + str(i) + '/' if not os.path.isdir(val_path): os.mkdir(val_path) if not os.path.isdir(val_path + 'log/'): os.mkdir(val_path + 'log/') if not os.path.isdir(val_path + 'model/'): os.mkdir(val_path + 'model/') if not os.path.isdir(val_path + 'log/'): os.mkdir(val_path + 'log/') if not os.path.isdir(val_path + 'model/'): os.mkdir(val_path + 'model/') # Create session with tf.Session(config=config): # Take more timesteps than we need to be sure that # we stop due to an exception. ppo2ttifrutti.learn(policy=policies.CnnPolicy, env=DummyVecEnv([lis[i]]), nsteps=args.nsteps, nminibatches=args.nminibatches, lam=0.95, gamma=0.99, noptepochs=4, log_interval=1, ent_coef=0.01, lr=lambda _: args.lr, cliprange=lambda _: 0.1, total_timesteps=args.total_timesteps, load_path=tf.train.latest_checkpoint( args.load_path)[:-3], log_path=val_path)
def main(): """Run PPO until the environment throws an exception.""" config = tf.ConfigProto() #os.environ["CUDA_VISIBLE_DEVICES"]="-1" config.gpu_options.allow_growth = True # pylint: disable=E1101 with tf.Session(config=config): # Take more timesteps than we need to be sure that # we stop due to an exception. ppo2ttifrutti.learn(policy=policies.CnnPolicy, env=DummyVecEnv([env.make_custom]), nsteps=4096, nminibatches=4, lam=0.95, gamma=0.99, noptepochs=4, log_interval=1, ent_coef=0.01, lr=lambda _: 7.5e-5, cliprange=lambda _: 0.1, total_timesteps=int(1e7), load_path='model')