def run(count=10, **kwargs):
    force.init()

    for i in range(count):
        kwargs['xh'] = i
        _run(**kwargs)
        env = SingleCartPoleEnv().unwrapped
        RL = DeepQNetwork(n_actions=env.action_space.n,
                          n_features=env.observation_space.shape[0])
        force.force_generator = force.ForceGenerator(0.0, 0.0, 0.0, 1.01)
def run(count=10, **kwargs):
    force.init()

    for i in range(count):
        kwargs = {
            'mode': 'reset',
            'xh': i,
            'maxepochcount': 1500,
            'complexunit': 100.0
        }
        env = SingleCartPoleEnv().unwrapped
        net = PolicyGradients()
        while 1:
            result = net.run(**kwargs)
            if kwargs['mode'] == 'noreset':
                break
            if not result:
                break
            env = SingleCartPoleEnv().unwrapped
            net = PolicyGradients()
        force.force_generator = force.ForceGenerator(0.0, 0.0, 0.0, 1.01)
Ejemplo n.º 3
0
                if not changed or newcomplex is None or newcomplex == complexes_list[-1]:
                    sess.close()
                    return False
                    break  # 复杂度已经达到最大,结束
                print('新的环境复杂度=%.3f,k=%.2f,w=%.2f,f=%.2f,sigma=%.2f' % (newcomplex, k, w, f, sigma))

                if mode == 'reset':
                    sess.close()
                    return True

                episode_number = 0

        sess.close()

if __name__ == '__main__':
    force.init()

    for i in range(10):
        kwargs = {'mode': 'reset', 'xh':i,'maxepochcount' : 1500,'complexunit':100.0}
        env = SingleCartPoleEnv().unwrapped
        net = PolicyGradients()
        while 1:
            result = net.run(**kwargs)
            if kwargs['mode'] == 'noreset':
                break
            if not result:
                break
            env = SingleCartPoleEnv().unwrapped
            net = PolicyGradients()
        force.force_generator = force.ForceGenerator(0.0, 0.0, 0.0, 1.01)