def run(): args = parser.parse_args() args.gpu = util.parse_to_num(args.gpu) cmds, notes = create_commands("e", args) if args.dry_run: print( "Dry-run mode due to -d flag, otherwise the following commands would be executed:" ) else: print("Executing the following commands:") print("\n".join(cmds)) print("") if not args.dry_run: if args.mode == "tmux": os.environ["TMUX"] = "" path = os.path.join(os.getcwd(), args.log) if os.path.exists(path): key = raw_input("%s exists. Do you want to delete it? (y/n): " % path) if key != 'n': os.system("rm -rf %s" % path) os.system("\n".join(cmds)) print('\n'.join(notes)) else: os.system("\n".join(cmds)) print('\n'.join(notes))
def run(): args = parser.parse_args() args.task = 0 args.f_num = util.parse_to_num(args.f_num) args.f_stride = util.parse_to_num(args.f_stride) args.f_size = util.parse_to_num(args.f_size) args.branch = util.parse_to_num(args.branch) env = new_env(args) args.meta_dim = 0 if not hasattr(env, 'meta') else len(env.meta()) device = '/gpu:0' if args.gpu > 0 else '/cpu:0' gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.2) config = tf.ConfigProto(device_filters=device, gpu_options=gpu_options, allow_soft_placement=True) with tf.Session(config=config) as sess: if args.alg == 'A3C': model_type = 'policy' elif args.alg == 'Q': model_type = 'q' elif args.alg == 'VPN': model_type = 'vpn' else: raise ValueError('Invalid algorithm: ' + args.alg) with tf.device(device): with tf.variable_scope("local/learner"): agent = eval("model." + args.model)( env.observation_space.shape, env.action_space.n, type=model_type, gamma=args.gamma, dim=args.dim, f_num=args.f_num, f_stride=args.f_stride, f_size=args.f_size, f_pad=args.f_pad, branch=args.branch, meta_dim=args.meta_dim) print("Num parameters: %d" % agent.num_param) saver = tf.train.Saver() saver.restore(sess, args.checkpoint) np.random.seed(args.seed) reward = evaluate(env, agent, args.n_play, eps=args.eps) print("Reward: %.2f" % (reward))
def run(): args = parser.parse_args() args.task = 0 args.f_num = util.parse_to_num(args.f_num) args.f_stride = util.parse_to_num(args.f_stride) args.f_size = util.parse_to_num(args.f_size) args.branch = util.parse_to_num(args.branch) env = new_env(args) args.meta_dim = 0 device = '/gpu:0' if args.gpu > 0 else '/cpu:0' gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.2) config = tf.ConfigProto(device_filters=device, gpu_options=gpu_options, allow_soft_placement=True) with tf.Session(config=config) as sess: model_type = 'vpn' with tf.device(device): with tf.variable_scope("local/learner"): agent = eval("model." + args.model)( env.observation_space.shape, env.action_space.n, type=model_type, gamma=args.gamma, dim=args.dim, f_num=args.f_num, f_stride=args.f_stride, f_size=args.f_size, f_pad=args.f_pad, branch=args.branch, meta_dim=args.meta_dim) print("Num parameters: %d" % agent.num_param) saver = tf.train.Saver() saver.restore(sess, args.checkpoint) np.random.seed(args.seed) evaluate(env, agent, args.n_play, eps=args.eps)
def main(_): """ Setting up Tensorflow for data parallel work """ parser = argparse.ArgumentParser(description=None) parser.add_argument('-gpu', '--gpu', default=0, type=int, help='Number of GPUs') parser.add_argument('-v', '--verbose', action='count', dest='verbosity', default=0, help='Set verbosity.') parser.add_argument('--task', default=0, type=int, help='Task index') parser.add_argument('--job-name', default="worker", help='worker or ps') parser.add_argument('--num-workers', default=1, type=int, help='Number of workers') parser.add_argument('--num-ps', type=int, default=1, help="Number of parameter servers") parser.add_argument('--log', default="/tmp/vpn", help='Log directory path') parser.add_argument('--env-id', default="maze", help='Environment id') parser.add_argument( '-r', '--remotes', default=None, help='References to environments to create (e.g. -r 20), ' 'or the address of pre-existing VNC servers and ' 'rewarders to use (e.g. -r vnc://localhost:5900+15900,vnc://localhost:5901+15901)' ) parser.add_argument('-a', '--alg', choices=['A3C', 'Q', 'VPN'], default="A3C") parser.add_argument('-mo', '--model', type=str, default="LSTM", help="Name of model: [CNN | LSTM]") parser.add_argument('--eval-freq', type=int, default=250000, help="Evaluation frequency") parser.add_argument('--eval-num', type=int, default=500, help="Evaluation frequency") parser.add_argument('--eval-epoch', type=int, default=0, help="Evaluation epoch") parser.add_argument('--seed', type=int, default=0, help="Random seed") parser.add_argument('--config', type=str, default="config/collect_deterministic.xml", help="config xml file for environment") # Hyperparameters parser.add_argument('-n', '--t-max', type=int, default=10, help="Number of unrolling steps") parser.add_argument('-g', '--gamma', type=float, default=0.98, help="Discount factor") parser.add_argument('-ld', '--ld', type=float, default=1, help="Lambda for GAE") parser.add_argument('-lr', '--lr', type=float, default=1e-4, help="Learning rate") parser.add_argument('--decay', type=float, default=0.95, help="Learning decay") parser.add_argument('-ms', '--max-step', type=int, default=int(15e6), help="Max global step") parser.add_argument('--dim', type=int, default=0, help="Number of final hidden units") parser.add_argument('--f-num', type=str, default='32,32,64', help="num of conv filters") parser.add_argument('--f-pad', type=str, default='SAME', help="padding of conv filters") parser.add_argument('--f-stride', type=str, default='1,1,2', help="stride of conv filters") parser.add_argument('--f-size', type=str, default='3,3,4', help="size of conv filters") parser.add_argument('--h-dim', type=str, default='', help="num of hidden units") # Q-Learning parameters parser.add_argument('-s', '--sync', type=int, default=10000, help="Target network synchronization frequency") parser.add_argument('-f', '--update-freq', type=int, default=1, help="Parameter update frequency") parser.add_argument('--eps-step', type=int, default=int(1e6), help="Num of local steps for epsilon scheduling") parser.add_argument('--eps', type=float, default=0.05, help="Final epsilon value") parser.add_argument('--eps-eval', type=float, default=0.0, help="Epsilon for evaluation") # VPN parameters parser.add_argument('--prediction-step', type=int, default=3, help="number of prediction steps") parser.add_argument('--branch', type=str, default="4,4,4", help="branching factor") parser.add_argument('--buf', type=int, default=10**6, help="num of steps for random buffer") args = parser.parse_args() args.f_num = util.parse_to_num(args.f_num) args.f_stride = util.parse_to_num(args.f_stride) args.f_size = util.parse_to_num(args.f_size) args.h_dim = util.parse_to_num(args.h_dim) args.eps_eval = min(args.eps, args.eps_eval) args.branch = util.parse_to_num(args.branch) spec = cluster_spec(args.num_workers, args.num_ps) cluster = tf.train.ClusterSpec(spec).as_cluster_def() def shutdown(signal, frame): logger.warn('Received signal %s: exiting', signal) sys.exit(128 + signal) signal.signal(signal.SIGHUP, shutdown) signal.signal(signal.SIGINT, shutdown) signal.signal(signal.SIGTERM, shutdown) gpu_options = None gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.15) if args.job_name == "worker": server = tf.train.Server(cluster, job_name="worker", task_index=args.task, config=tf.ConfigProto( intra_op_parallelism_threads=1, inter_op_parallelism_threads=1, gpu_options=gpu_options)) run(args, server) elif args.job_name == "test": server = tf.train.Server(cluster, job_name="worker", task_index=args.task, config=tf.ConfigProto( intra_op_parallelism_threads=1, inter_op_parallelism_threads=1, gpu_options=gpu_options)) run_tester(args, server) elif args.job_name == "ps": gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.05) server = tf.train.Server(cluster, job_name="ps", task_index=args.task, config=tf.ConfigProto( device_filters=["/job:ps"], gpu_options=gpu_options)) while True: time.sleep(1000)
parser.add_argument('--f-size', type=str, default='3,3,4', help="size of conv filters") parser.add_argument('--f-pad', type=str, default='SAME', help="padding of conv filters") # VPN parameters parser.add_argument('--branch', type=str, default="4,4,4", help="branching factor") config = open('config/evaluation_config.xml').read() config = BeautifulSoup(config, "lxml") env = MazeSMDP(gamma=0.99, config=config) # env.visualize().show() eps = 0. args = parser.parse_args() args.task = 0 args.f_num = util.parse_to_num(args.f_num) args.f_stride = util.parse_to_num(args.f_stride) args.f_size = util.parse_to_num(args.f_size) args.branch = util.parse_to_num(args.branch) args.meta_dim = 0 if env.meta() is None else len(env.meta()) device = '/gpu:0' if args.gpu > 0 else '/cpu:0' gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.2) config = tf.ConfigProto(device_filters=device, gpu_options=gpu_options, allow_soft_placement=True) with tf.Session(config=config) as sess: if args.alg == 'A3C': model_type = 'policy' elif args.alg == 'Q': model_type = 'q'
def run(envs=None): stats = [] args = parser.parse_args() args.task = 0 args.f_num = util.parse_to_num(args.f_num) args.f_stride = util.parse_to_num(args.f_stride) args.f_size = util.parse_to_num(args.f_size) args.branch = util.parse_to_num(args.branch) env = new_env(args) if envs is None or not envs: envs = [env] args.meta_dim = 0 if env.meta() is None else len(env.meta()) device = '/gpu:0' if args.gpu > 0 else '/cpu:0' gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.2) branches = [ [4, 4, 4], [4, 4, 4, 4], [4, 4, 4, 4, 4], [4, 1, 4, 1, 4], [1], [1, 1, 1], [1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1], [4, 4, 4, 1], [4, 4, 4, 1, 1], [4, 4, 4, 1, 1, 1], [1, 4, 4, 4], [1, 1, 1, 4, 4, 4], [1, 1, 1, 4, 4, 4], ] paths = [ f'/home/ikaynov/Repositories/value-prediction-network/Experiments/{x}/best' for x in [ 's10_t20_g8_444', 's10_t20_g8_4444', 's10_t20_g8_44444', ] ] count = 0 count_max = len(paths) * len(branches) for ck in paths: for branch_type in branches: print(f'Executing {count}/{count_max}.') config = tf.ConfigProto(device_filters=device, gpu_options=gpu_options, allow_soft_placement=True) tf.reset_default_graph() with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) if args.alg == 'A3C': model_type = 'policy' elif args.alg == 'Q': model_type = 'q' elif args.alg == 'VPN': model_type = 'vpn' else: raise ValueError('Invalid algorithm: ' + args.alg) with tf.device(device): # np.random.seed(args.seed) with tf.variable_scope("local/learner"): agent = eval("model." + args.model)( env.observation_space.shape, env.action_space.n, type=model_type, gamma=args.gamma, dim=args.dim, f_num=args.f_num, f_stride=args.f_stride, f_size=args.f_size, f_pad=args.f_pad, branch=branch_type, meta_dim=args.meta_dim) agent.train_branch = str([ int(x) for x in list(ck.split('/')[-2].split('_')[-1]) ]) print("Num parameters: %d" % agent.num_param) saver = tf.train.Saver() saver.restore(sess, ck) for i, env in enumerate(envs): run_stats = evaluate(env, agent, args.n_play, eps=args.eps) stats += run_stats count += 1 return stats