예제 #1
0
파일: train.py 프로젝트: yrpang/mindspore
    agent = Agent(**cfg)
    agent.load_dict()

    for episode in range(300):
        s0 = env.reset()
        total_reward = 1
        while True:
            a0 = agent.act(s0)
            s1, r1, done, _ = env.step(a0)

            if done:
                r1 = -1

            agent.put(s0, a0, r1, s1)

            if done:
                break

            total_reward += r1
            s0 = s1
            agent.learn()
        agent.load_dict()
        print("episode", episode, "total_reward", total_reward)

    path = os.path.realpath(args.ckpt_path)
    if not os.path.exists(path):
        os.makedirs(path)

    ckpt_name = path + "/dqn.ckpt"
    save_checkpoint(agent.policy_net, ckpt_name)
예제 #2
0
    position_bounds,  # Position bounds
    velocity_bounds  # Velocity bounds
)

# Instanced Agent
agent = Agent(
    policy,  # NeuralNetwork class
    model,
    actions,  # Actions array (after discretization)
    episodes,  # Max number of episodes
    epoches,  # Max number of epoches per episode
    greed_factor  # Greed factor
)

# Getting the result array of len(episodes) length
results = agent.learn()

# Success episodes
success_results = [x for x in results if x["state"][0] >= position_bounds[1]]

# Writing the log file
log_file = open("model.log", "w+")

log_file.write("Episodes: {0}\n".format(episodes))
log_file.write("Epoches: {0}\n".format(epoches))
log_file.write("Epsilon-Greedy: {0}\n".format(greed_factor))
log_file.write("\n------- {0} successful episodes -------\n\n".format(
    len(success_results)))

for r in success_results:
    log_file.write(str(r) + "\n")