parser.add_argument("--cfg_path", type=str, default='config.json.dist') parser.add_argument("--restrict", type=bool, default=False) parser.add_argument("--imitation", type=bool, default=False) parser.add_argument("--test", type=bool, nargs='?', const=True, default=False) parser.add_argument("--restore", type=bool, nargs='?', const=True, default=False) parser.add_argument('--save_replay', type=bool, nargs='?', const=True, default=False) args = parser.parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu) #environ是一个字符串所对应环境的映像对象,输入为要使用的GPU number tf.reset_default_graph() sess = tf.Session() # config = Config(args.sz, args.map, lambda _: 1) config = Config(args.sz, args.map, args.run_id, restrict=args.restrict, imitation=args.imitation) # 进行参数的设置 os.makedirs('weights/' + config.full_id(), exist_ok=True) cfg_path = 'weights/%s/config.json' % config.full_id() # 保存参数的位置 config.build(cfg_path if args.restore else args.cfg_path) # 建立和设置参数 if not args.restore and not args.test: config.save(cfg_path) # 保存参数 envs = EnvWrapper(make_envs(args), config) # 创建环境,封装一层 agent = A2CAgent(sess, fully_conv, config, args.restore, args.discount, args.lr, args.vf_coef, args.ent_coef, args.clip_grads) # 创建agent runner = Runner(envs, agent, args.steps) # 创建进程 runner.run(args.updates, not args.test) # 开始运行 if args.save_replay: #是否保存回放 envs.save_replay() envs.close() # 关闭环境
const=True, default=False) parser.add_argument('--skip_steps', type=int, default=0) args = parser.parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu) tf.reset_default_graph() sess = tf.Session() # config = Config(args.sz, args.map, lambda _: 1) config = Config(args.sz, args.map, args.run_id) os.makedirs('weights/' + config.full_id(), exist_ok=True) cfg_path = 'weights/%s/config.json' % config.full_id() config.build(cfg_path if args.restore else args.cfg_path) if not args.restore and not args.test: config.save(cfg_path) envs = EnvWrapper(make_envs(args), config) agent = A2CAgent(sess, fully_conv, config, args.restore, args.discount, args.lr, args.vf_coef, args.ent_coef, args.clip_grads, args.save_best_only, not args.test) runner = Runner(envs, agent, not args.test, args.steps) runner.run(args.updates) if args.save_replay: envs.save_replay() envs.close()
weightsList = os.listdir(args.ckptPath) weightsList.remove('config.json') weightsList.remove('checkpoint') weightsList = [oneWeight.split('.')[0] for oneWeight in weightsList] weightsList = list(set(weightsList)) npyList = os.listdir(args.nextStatePath) npyList1 = [oneNpy.split('_')[1][:-4] for oneNpy in npyList] npyList2 = list(set(npyList1)) for oneCkpt in weightsList: tf.reset_default_graph() if oneCkpt.split('.')[0][4:] in npyList2: continue try: sess = tf.Session(config=tf_config) args.ckptfile = oneCkpt agent = A2CAgent(sess, fully_conv, config, args.discount, args.lr, args.vf_coef, args.ent_coef, args.clip_grads, weight_dir, log_dir, args) runner = Runner(None, agent, args, args.steps) NextStateValue = runner.forwardState() np.save(args.nextStatePath+'nextState_'+str(oneCkpt.split('-')[1]) + '.npy', NextStateValue) except: pass time.sleep(10) os._exit(0) else: args.ckptfile = None # begin to train the agent if not args.distill: try: sess = tf.Session(config=tf_config)
parser.add_argument('--clip_grads', type=float, default=1.) parser.add_argument("--run_id", type=int, default=-1) parser.add_argument("--game", type=str, default='Pong-v0') parser.add_argument("--test", type=bool, nargs='?', const=True, default=False) parser.add_argument("--restore", type=bool, nargs='?', const=True, default=False) parser.add_argument("--agenttype", type=str, default="vpg") # or you can use "a2c" args = parser.parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu) tf.reset_default_graph() sess = tf.Session() envs = EnvWrapper(make_envs(args)) # not specify which feature to use when initialization if args.game == 'Pong-v0': num_action = 3 input_size = 80 agent = A2CAgent(sess, fully_conv, input_size, num_action, args.game, args.restore, args.discount, args.lr, args.vf_coef, args.ent_coef, args.clip_grads, agenttype = agenttype) runner = Runner(envs, agent, args.steps, args.game, agenttype = agenttype) runner.run(args.updates, not args.test) else: num_action = 2 input_size = None agent = A2CAgent(sess, carpole_net, input_size, num_action, args.game, args.restore, args.discount, args.lr, args.vf_coef, args.ent_coef, args.clip_grads) runner = Runner(envs, agent, args.steps, args.game) runner.run(args.updates, not args.test) envs.close()