Example #1
0
    #observation_dim = 9

    # 定义两组不同的事件,update 和 rolling
    UPDATE_EVENT = Event()  # ppo更新事件
    ROLLING_EVENT = Event()  # worker收集数据事件
    UPDATE_EVENT.clear()  # not update now,相当于把标志位设置为False ppo事件停止
    ROLLING_EVENT.set(
    )  # start to roll out,相当于把标志位设置为True,并通知所有处于等待阻塞状态的线程恢复运行状态。 worker开始工作
    update_rlock = RLock()
    episode_rlock = RLock()

    ns = Manager().Namespace()
    ns.GLOBAL_UPDATE_COUNTER = 0
    ns.GLOBAL_EPISODE = 0
    ns.GLOBAL_RUNNING_REWARD = []
    ns.coord_status = True

    QUEUE = Manager().Queue()

    class MyManager(BaseManager):
        pass

    MyManager.register('Ppo', PPO)
    manager = MyManager()
    manager.start()
    Ppo = manager.Ppo(action_dim, observation_dim, is_train=True)

    # GLOBAL_PPO = PPO(action_dim, observation_dim, is_train=True) #一个global的ppo
    print('<TRAIN_LOG> ', 'GLOBAL_PPO get', ' time:', datetime.now())

    try: