예제 #1
0
파일: train.py 프로젝트: yrpang/mindspore
parser.add_argument('--ckpt_path',
                    type=str,
                    default="./ckpt",
                    help='if is test, must provide\
                    path where the trained ckpt file')
args = parser.parse_args()
set_seed(1)

if __name__ == "__main__":
    context.set_context(mode=context.GRAPH_MODE,
                        device_target=args.device_target)
    env = gym.make('CartPole-v1')
    cfg.state_space_dim = env.observation_space.shape[0]
    cfg.action_space_dim = env.action_space.n
    agent = Agent(**cfg)
    agent.load_dict()

    for episode in range(300):
        s0 = env.reset()
        total_reward = 1
        while True:
            a0 = agent.act(s0)
            s1, r1, done, _ = env.step(a0)

            if done:
                r1 = -1

            agent.put(s0, a0, r1, s1)

            if done:
                break