Beispiel #1
0
    parser.add_argument("--cfg_path", type=str, default='config.json.dist')
    parser.add_argument("--restrict", type=bool, default=False)
    parser.add_argument("--imitation", type=bool, default=False)
    parser.add_argument("--test", type=bool, nargs='?', const=True, default=False)
    parser.add_argument("--restore", type=bool, nargs='?', const=True, default=False)
    parser.add_argument('--save_replay', type=bool, nargs='?', const=True, default=False)
    args = parser.parse_args()

    os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu) #environ是一个字符串所对应环境的映像对象,输入为要使用的GPU number
    tf.reset_default_graph()
    sess = tf.Session()

    # config = Config(args.sz, args.map, lambda _: 1)
    config = Config(args.sz, args.map, args.run_id, restrict=args.restrict, imitation=args.imitation) # 进行参数的设置
    os.makedirs('weights/' + config.full_id(), exist_ok=True)
    cfg_path = 'weights/%s/config.json' % config.full_id() # 保存参数的位置
    config.build(cfg_path if args.restore else args.cfg_path) # 建立和设置参数
    if not args.restore and not args.test:
        config.save(cfg_path) # 保存参数

    envs = EnvWrapper(make_envs(args), config) # 创建环境,封装一层
    agent = A2CAgent(sess, fully_conv, config, args.restore, args.discount, args.lr, args.vf_coef, args.ent_coef, args.clip_grads) # 创建agent

    runner = Runner(envs, agent, args.steps) # 创建进程
    runner.run(args.updates, not args.test) # 开始运行

    if args.save_replay: #是否保存回放
        envs.save_replay()

    envs.close() # 关闭环境
Beispiel #2
0
                        const=True,
                        default=False)

    parser.add_argument('--skip_steps', type=int, default=0)

    args = parser.parse_args()

    os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu)
    tf.reset_default_graph()
    sess = tf.Session()
    # config = Config(args.sz, args.map, lambda _: 1)
    config = Config(args.sz, args.map, args.run_id)
    os.makedirs('weights/' + config.full_id(), exist_ok=True)
    cfg_path = 'weights/%s/config.json' % config.full_id()
    config.build(cfg_path if args.restore else args.cfg_path)
    if not args.restore and not args.test:
        config.save(cfg_path)

    envs = EnvWrapper(make_envs(args), config)
    agent = A2CAgent(sess, fully_conv, config, args.restore, args.discount,
                     args.lr, args.vf_coef, args.ent_coef, args.clip_grads,
                     args.save_best_only, not args.test)

    runner = Runner(envs, agent, not args.test, args.steps)
    runner.run(args.updates)

    if args.save_replay:
        envs.save_replay()

    envs.close()
Beispiel #3
0
        weightsList = os.listdir(args.ckptPath)
        weightsList.remove('config.json')
        weightsList.remove('checkpoint')
        weightsList = [oneWeight.split('.')[0] for oneWeight in weightsList]
        weightsList = list(set(weightsList))
        npyList = os.listdir(args.nextStatePath)
        npyList1 = [oneNpy.split('_')[1][:-4] for oneNpy in npyList]
        npyList2 = list(set(npyList1))
        for oneCkpt in weightsList:
            tf.reset_default_graph()
            if oneCkpt.split('.')[0][4:] in npyList2:
                continue
            try:
                sess = tf.Session(config=tf_config)
                args.ckptfile = oneCkpt
                agent = A2CAgent(sess, fully_conv, config, args.discount, args.lr, args.vf_coef, args.ent_coef, args.clip_grads,
                         weight_dir, log_dir, args)
                runner = Runner(None, agent, args, args.steps)
                NextStateValue = runner.forwardState()
                np.save(args.nextStatePath+'nextState_'+str(oneCkpt.split('-')[1]) + '.npy', NextStateValue)
            except:
                pass

        time.sleep(10)
        os._exit(0)
    else:
        args.ckptfile = None

    # begin to train the agent
    if not args.distill:
        try:
            sess = tf.Session(config=tf_config)
Beispiel #4
0
    parser.add_argument('--clip_grads', type=float, default=1.)
    parser.add_argument("--run_id", type=int, default=-1)
    parser.add_argument("--game", type=str, default='Pong-v0')
    parser.add_argument("--test", type=bool, nargs='?', const=True, default=False)
    parser.add_argument("--restore", type=bool, nargs='?', const=True, default=False)
    parser.add_argument("--agenttype", type=str, default="vpg") # or you can use "a2c"
    args = parser.parse_args()
    os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu)
    tf.reset_default_graph()
    sess = tf.Session()

    envs = EnvWrapper(make_envs(args))
    # not specify which feature to use when initialization
    if args.game == 'Pong-v0':
        num_action = 3
        input_size =  80
        agent = A2CAgent(sess, fully_conv, input_size, num_action, args.game,  args.restore, args.discount, args.lr, args.vf_coef, args.ent_coef, args.clip_grads, agenttype = agenttype)
        runner = Runner(envs, agent, args.steps, args.game, agenttype = agenttype)
        runner.run(args.updates, not args.test)
    else:
        num_action = 2
        input_size = None
        agent = A2CAgent(sess, carpole_net, input_size, num_action, args.game, args.restore, args.discount, args.lr,
                         args.vf_coef, args.ent_coef, args.clip_grads)
        runner = Runner(envs, agent, args.steps, args.game)
        runner.run(args.updates, not args.test)

    envs.close()