コード例 #1
0
def trainer(fargs):
    trainer_id, args = fargs
    print("Trainer id", trainer_id, "started")
    # Create a Gym environment
    env = gym.make(args.env)

    # Set maximum episode length
    if args.episode_steps is not None:
        env._max_episode_steps = args.episode_steps

    # Get dimensionalities of actions and observations
    action_space_dim = get_space_dim(env.action_space)
    observation_space_dim = get_space_dim(env.observation_space)

    # Instantiate agent and its policy
    policy = Policy(observation_space_dim, action_space_dim)
    agent = Agent(policy)

    training_history = train(agent,
                             env,
                             args.train_episodes,
                             silent=True,
                             train_run_id=trainer_id,
                             early_stop=False)

    print("Trainer id", trainer_id, "finished")

    return training_history
コード例 #2
0
def main(args):
    # Create a Gym environment
    env = gym.make(args.env)

    # Exercise 1
    # TODO: For CartPole-v0 - maximum episode length
    env._max_episode_steps = 1000

    # Get dimensionalities of actions and observations
    action_space_dim = get_space_dim(env.action_space)
    observation_space_dim = get_space_dim(env.observation_space)

    # Instantiate agent and its policy
    policy = Policy(observation_space_dim, action_space_dim)
    agent = Agent(policy)

    # Print some stuff
    print("Environment:", args.env)
    print("Training device:", agent.train_device)
    print("Observation space dimensions:", observation_space_dim)
    print("Action space dimensions:", action_space_dim)

    # If no model was passed, train a policy from scratch.
    # Otherwise load the policy from the file and go directly to testing.
    if args.test is None:
        training_history = train(args.position, agent, env,
                                 args.train_episodes, False,
                                 args.render_training)

        # Save the model
        tt = str(datetime.datetime.now().date()) + "-" + str(
            datetime.datetime.now().hour) + "-" + str(
                datetime.datetime.now().minute)
        model_file = "%s_params.mdl" % (args.env + tt + "vel")
        torch.save(policy.state_dict(), model_file)
        print("Model saved to", model_file)

        # Plot rewards
        sns.lineplot(x="episode", y="reward", data=training_history)
        sns.lineplot(x="episode", y="mean_reward", data=training_history)
        plt.legend(["Reward", "100-episode average"])
        plt.title("Reward history (%s)" % args.env)
        # time and day of plot
        plt.savefig("train_history" + tt + "vel" + ".jpg")
        plt.show()

        print("Training finished.")
    else:
        print("Loading model from", args.test, "...")
        state_dict = torch.load(args.test)
        policy.load_state_dict(state_dict)
        print("Testing...")
        test(args.position, agent, env, args.train_episodes, args.render_test)
コード例 #3
0
def main(args):
    # Create a Gym environment
    env = gym.make(args.env)

    # Exercise 1
    env._max_episode_steps = args.episode_length

    # Get dimensionalities of actions and observations
    action_space_dim = get_space_dim(env.action_space)
    observation_space_dim = get_space_dim(env.observation_space)

    # Instantiate agent and its policy
    policy = Policy(observation_space_dim, action_space_dim)
    agent = Agent(policy)

    # Print some stuff
    print("Environment:", args.env)
    print("Training device:", agent.train_device)
    print("Observation space dimensions:", observation_space_dim)
    print("Action space dimensions:", action_space_dim)

    # If no model was passed, train a policy from scratch.
    # Otherwise load the policy from the file and go directly to testing.
    if args.test is None:
        training_history = train(agent,
                                 env,
                                 args.train_episodes,
                                 False,
                                 args.render_training,
                                 x0=args.x0,
                                 args=args,
                                 policy=policy)

        # Save the model
        model_file = "%s_params.mdl" % args.env
        torch.save(policy.state_dict(), model_file)
        print("Model saved to", model_file)

        # Plot rewards
        sns.lineplot(x="episode", y="reward", data=training_history)
        sns.lineplot(x="episode", y="mean_reward", data=training_history)
        plt.legend(["Reward", "100-episode average"])
        plt.title("Reward history (%s)" % args.env)
        plt.show()
        print("Training finished.")
    else:
        print("Loading model from", args.test, "...")
        state_dict = torch.load(args.test)
        policy.load_state_dict(state_dict)
        print("Testing...")
        test(agent, env, args.train_episodes, args.render_test, x0=args.x0)
コード例 #4
0
        # Let the agent do its magic (update the policy)
        agent.episode_finished(episode_number)

    # Training is finished - plot rewards
    plt.plot(reward_history)
    plt.plot(average_reward_history)
    plt.legend(["Reward", "100-episode average"])
    plt.title("Reward history")
    plt.show()
    print("Training finished.")


# Create a Gym environment
env = CartPoleEnv()

# For CartPole - maximum episode length
env._max_episode_steps = 1000

# Get dimensionalities of actions and observations
action_space_dim = get_space_dim(env.action_space)
observation_space_dim = get_space_dim(env.observation_space)

# Create the agent and the policy
policy = Policy(observation_space_dim, action_space_dim)
agent = Agent(policy)

# Train
train(args.train_episodes, agent)
model_file = "%s_params.mdl" % args.env
torch.save(policy.state_dict(), model_file)
print("Model saved to", model_file)