def main():
    """ The main() function. """

    print("STARTING MINITAUR ARS")

    # TRAINING PARAMETERS
    # env_name = "MinitaurBulletEnv-v0"
    seed = 0
    max_timesteps = 2000
    file_name = "mini_ars_"

    # Find abs path to this file
    my_path = os.path.abspath(os.path.dirname(__file__))
    results_path = os.path.join(my_path, "../results")
    models_path = os.path.join(my_path, "../models")

    if not os.path.exists(results_path):
        os.makedirs(results_path)

    if not os.path.exists(models_path):
        os.makedirs(models_path)

    env = MinitaurBulletEnv(render=True, on_rack=False)

    dt = env._time_step

    movetype = "walk"
    # movetype = "trot"
    # movetype = "bound"
    # movetype = "pace"
    # movetype = "pronk"

    TG = TGPolicy(movetype=movetype,
                  center_swing=0.0,
                  amplitude_extension=0.5,
                  amplitude_lift=0.2)

    # Set seeds
    env.seed(seed)
    torch.manual_seed(seed)
    np.random.seed(seed)

    state_dim = env.observation_space.shape[0]
    print("STATE DIM: {}".format(state_dim))
    action_dim = env.action_space.shape[0]
    print("ACTION DIM: {}".format(action_dim))
    max_action = float(env.action_space.high[0])

    print("RECORDED MAX ACTION: {}".format(max_action))

    # # Initialize Normalizer
    # normalizer = Normalizer(state_dim)

    # # Initialize Policy
    # policy = Policy(state_dim, action_dim)

    # # Initialize Agent with normalizer, policy and gym env
    # agent = ARSAgent(normalizer, policy, env)
    # agent_num = raw_input("Policy Number: ")
    # if os.path.exists(models_path + "/" + file_name + str(agent_num) +
    #                   "_policy"):
    #     print("Loading Existing agent")
    #     agent.load(models_path + "/" + file_name + str(agent_num))
    #     agent.policy.episode_steps = 3000
    #     policy = agent.policy

    env.reset()

    print("STARTED MINITAUR TEST SCRIPT")

    # Just to store correct action space
    action = env.action_space.sample()

    # ELEMENTS PROVIDED BY POLICY
    f_tg = 4.0
    Beta = 3.0
    h_tg = 0.0
    alpha_tg = 0.5
    intensity = 1.0

    # Record extends for plot
    LF_ext = []
    LB_ext = []
    RF_ext = []
    RB_ext = []

    LF_tp = []
    LB_tp = []
    RF_tp = []
    RB_tp = []

    t = 0
    while t < (int(max_timesteps)):
        action[:] = 0.0

        # Get Action from TG [no policies here]
        action = TG.get_utg(action, alpha_tg, h_tg, intensity,
                            env.minitaur.num_motors)

        LF_ext.append(action[env.minitaur.num_motors / 2])
        LB_ext.append(action[1 + env.minitaur.num_motors / 2])
        RF_ext.append(action[2 + env.minitaur.num_motors / 2])
        RB_ext.append(action[3 + env.minitaur.num_motors / 2])
        # Perform action
        next_state, reward, done, _ = env.step(action)

        obs = TG.get_TG_state()
        # LF_tp.append(obs[0])
        # LB_tp.append(obs[1])
        # RF_tp.append(obs[2])
        # RB_tp.append(obs[3])

        # Increment phase
        TG.increment(dt, f_tg, Beta)

        # time.sleep(1.0)

        t += 1

    plt.plot(0)
    plt.plot(LF_ext, label="LF")
    plt.plot(LB_ext, label="LB")
    plt.plot(RF_ext, label="RF")
    plt.plot(RB_ext, label="RB")
    plt.xlabel("t")
    plt.ylabel("EXT")
    plt.title("Leg Extensions")
    plt.legend()
    plt.show()

    env.close()
Example #2
0
def main():
    """ The main() function. """

    # Hold mp pipes
    mp.freeze_support()

    print("STARTING MINITAUR ARS")

    # TRAINING PARAMETERS
    # env_name = "MinitaurBulletEnv-v0"
    seed = 0
    max_timesteps = 4e6
    eval_freq = 1e1
    save_model = True
    file_name = "mini_tg_ars_"

    # Find abs path to this file
    my_path = os.path.abspath(os.path.dirname(__file__))
    results_path = os.path.join(my_path, "../results")
    models_path = os.path.join(my_path, "../models")

    if not os.path.exists(results_path):
        os.makedirs(results_path)

    if not os.path.exists(models_path):
        os.makedirs(models_path)

    env = MinitaurBulletEnv(render=False)

    # Set seeds
    env.seed(seed)
    torch.manual_seed(seed)
    np.random.seed(seed)

    # TRAJECTORY GENERATOR
    movetype = "walk"
    # movetype = "trot"
    # movetype = "bound"
    # movetype = "pace"
    # movetype = "pronk"
    TG = TGPolicy(movetype=movetype,
                  center_swing=0.0,
                  amplitude_extension=0.6,
                  amplitude_lift=0.3)
    TG_state_dim = len(TG.get_TG_state())
    TG_action_dim = 5  # f_tg, alpha_tg, h_tg, Beta, Intensity
    state_dim = env.observation_space.shape[0] + TG_state_dim
    print("STATE DIM: {}".format(state_dim))
    action_dim = env.action_space.shape[0] + TG_action_dim
    print("ACTION DIM: {}".format(action_dim))
    max_action = float(env.action_space.high[0])

    print("RECORDED MAX ACTION: {}".format(max_action))

    # Initialize Normalizer
    normalizer = Normalizer(state_dim)

    # Initialize Policy
    policy = Policy(state_dim, action_dim)

    # Initialize Agent with normalizer, policy and gym env
    agent = ARSAgent(normalizer, policy, env, TGP=TG)
    agent_num = 0
    if os.path.exists(models_path + "/" + file_name + str(agent_num) +
                      "_policy"):
        print("Loading Existing agent")
        agent.load(models_path + "/" + file_name + str(agent_num))

    # Evaluate untrained agent and init list for storage
    evaluations = []

    env.reset(agent.desired_velocity, agent.desired_rate)
    episode_reward = 0
    episode_timesteps = 0
    episode_num = 0

    # MULTIPROCESSING

    # Create mp pipes
    num_processes = policy.num_deltas
    processes = []
    childPipes = []
    parentPipes = []

    # Store mp pipes
    for pr in range(num_processes):
        parentPipe, childPipe = Pipe()
        parentPipes.append(parentPipe)
        childPipes.append(childPipe)

    # Start multiprocessing
    for proc_num in range(num_processes):
        p = mp.Process(target=ParallelWorker,
                       args=(childPipes[proc_num], env, state_dim))
        p.start()
        processes.append(p)

    print("STARTED MINITAUR ARS")

    t = 0
    while t < (int(max_timesteps)):

        # Maximum timesteps per rollout
        t += policy.episode_steps

        episode_timesteps += 1

        episode_reward = agent.train_parallel(parentPipes)
        # episode_reward = agent.train()
        # +1 to account for 0 indexing.
        # +0 on ep_timesteps since it will increment +1 even if done=True
        print("Total T: {} Episode Num: {} Episode T: {} Reward: {}, >400: {}".
              format(t, episode_num, policy.episode_steps, episode_reward,
                     agent.successes))
        # Reset environment
        evaluations.append(episode_reward)
        episode_reward = 0
        episode_timesteps = 0

        # Evaluate episode
        if (episode_num + 1) % eval_freq == 0:
            # evaluate_agent(agent, env_name, seed,
            np.save(results_path + "/" + str(file_name), evaluations)
            if save_model:
                agent.save(models_path + "/" + str(file_name) +
                           str(episode_num))
                # replay_buffer.save(t)

        episode_num += 1

    # Close pipes and hence envs
    for parentPipe in parentPipes:
        parentPipe.send([_CLOSE, "pay2"])

    for p in processes:
        p.join()
Example #3
0
def main():
    """ The main() function. """

    print("STARTING MINITAUR TD3")

    # TRAINING PARAMETERS
    # env_name = "MinitaurBulletEnv-v0"
    seed = 0
    max_timesteps = 4e6
    file_name = "mini_td3_"

    # Find abs path to this file
    my_path = os.path.abspath(os.path.dirname(__file__))
    results_path = os.path.join(my_path, "../results")
    models_path = os.path.join(my_path, "../models")

    if not os.path.exists(results_path):
        os.makedirs(results_path)

    if not os.path.exists(models_path):
        os.makedirs(models_path)

    env = MinitaurBulletEnv(render=True)

    # Set seeds
    env.seed(seed)
    torch.manual_seed(seed)
    np.random.seed(seed)

    state_dim = env.observation_space.shape[0]
    print("STATE DIM: {}".format(state_dim))
    action_dim = env.action_space.shape[0]
    print("ACTION DIM: {}".format(action_dim))
    max_action = float(env.action_space.high[0])

    print("RECORDED MAX ACTION: {}".format(max_action))

    policy = TD3Agent(state_dim, action_dim, max_action)
    policy_num = 2239999
    if os.path.exists(models_path + "/" + file_name + str(policy_num) +
                      "_critic"):
        print("Loading Existing Policy")
        policy.load(models_path + "/" + file_name + str(policy_num))

    replay_buffer = ReplayBuffer()
    # Optionally load existing policy, replace 9999 with num
    buffer_number = 0  # BY DEFAULT WILL LOAD NOTHING, CHANGE THIS
    if os.path.exists(replay_buffer.buffer_path + "/" + "replay_buffer_" +
                      str(buffer_number) + '.data'):
        print("Loading Replay Buffer " + str(buffer_number))
        replay_buffer.load(buffer_number)
        print(replay_buffer.storage)

    # Evaluate untrained policy and init list for storage
    evaluations = []

    state = env.reset()
    done = False
    episode_reward = 0
    episode_timesteps = 0
    episode_num = 0

    print("STARTED MINITAUR TEST SCRIPT")

    for t in range(int(max_timesteps)):

        episode_timesteps += 1
        # Deterministic Policy Action
        action = np.clip(policy.select_action(np.array(state)), -max_action,
                         max_action)
        # rospy.logdebug("Selected Acton: {}".format(action))

        # Perform action
        next_state, reward, done, _ = env.step(action)

        state = next_state
        episode_reward += reward
        # print("DT REWARD: {}".format(reward))

        if done:
            # +1 to account for 0 indexing.
            # +0 on ep_timesteps since it will increment +1 even if done=True
            print(
                "Total T: {} Episode Num: {} Episode T: {} Reward: {}".format(
                    t + 1, episode_num, episode_timesteps, episode_reward))
            # Reset environment
            state, done = env.reset(), False
            evaluations.append(episode_reward)
            episode_reward = 0
            episode_timesteps = 0
            episode_num += 1
Example #4
0
def main():
    """ The main() function. """

    print("STARTING MINITAUR ARS")

    # TRAINING PARAMETERS
    # env_name = "MinitaurBulletEnv-v0"
    seed = 0
    max_timesteps = 4e6
    file_name = "mini_ars_"

    # Find abs path to this file
    my_path = os.path.abspath(os.path.dirname(__file__))
    results_path = os.path.join(my_path, "../results")
    models_path = os.path.join(my_path, "../models")

    if not os.path.exists(results_path):
        os.makedirs(results_path)

    if not os.path.exists(models_path):
        os.makedirs(models_path)

    env = MinitaurBulletEnv(render=True)

    # Set seeds
    env.seed(seed)
    torch.manual_seed(seed)
    np.random.seed(seed)

    state_dim = env.observation_space.shape[0]
    print("STATE DIM: {}".format(state_dim))
    action_dim = env.action_space.shape[0]
    print("ACTION DIM: {}".format(action_dim))
    max_action = float(env.action_space.high[0])

    print("RECORDED MAX ACTION: {}".format(max_action))

    # Initialize Normalizer
    normalizer = Normalizer(state_dim)

    # Initialize Policy
    policy = Policy(state_dim, action_dim)

    # Initialize Agent with normalizer, policy and gym env
    agent = ARSAgent(normalizer, policy, env)
    agent_num = raw_input("Policy Number: ")
    if os.path.exists(models_path + "/" + file_name + str(agent_num) +
                      "_policy"):
        print("Loading Existing agent")
        agent.load(models_path + "/" + file_name + str(agent_num))
        agent.policy.episode_steps = 3000
        policy = agent.policy

    # Evaluate untrained agent and init list for storage
    evaluations = []

    env.reset()
    episode_reward = 0
    episode_timesteps = 0
    episode_num = 0

    print("STARTED MINITAUR TEST SCRIPT")

    t = 0
    while t < (int(max_timesteps)):

        # Maximum timesteps per rollout
        t += policy.episode_steps

        episode_timesteps += 1

        episode_reward = agent.deploy()
        # episode_reward = agent.train()
        # +1 to account for 0 indexing.
        # +0 on ep_timesteps since it will increment +1 even if done=True
        print("Total T: {} Episode Num: {} Episode T: {} Reward: {}".format(
            t, episode_num, policy.episode_steps, episode_reward))
        # Reset environment
        evaluations.append(episode_reward)
        episode_reward = 0
        episode_timesteps = 0
        episode_num += 1

    env.close()
def main():
    """ The main() function. """

    print("STARTING MINITAUR TD3")

    # TRAINING PARAMETERS
    # env_name = "MinitaurBulletEnv-v0"
    seed = 0
    max_timesteps = 4e6
    start_timesteps = 1e4  # 1e3 for testing purposes, use 1e4 for real
    expl_noise = 0.1
    batch_size = 100
    eval_freq = 1e4
    save_model = True
    file_name = "mini_td3_"

    # Find abs path to this file
    my_path = os.path.abspath(os.path.dirname(__file__))
    results_path = os.path.join(my_path, "../results")
    models_path = os.path.join(my_path, "../models")

    if not os.path.exists(results_path):
        os.makedirs(results_path)

    if not os.path.exists(models_path):
        os.makedirs(models_path)

    env = MinitaurBulletEnv(render=False)

    # Set seeds
    env.seed(seed)
    torch.manual_seed(seed)
    np.random.seed(seed)

    state_dim = env.observation_space.shape[0]
    print("STATE DIM: {}".format(state_dim))
    action_dim = env.action_space.shape[0]
    print("ACTION DIM: {}".format(action_dim))
    max_action = float(env.action_space.high[0])

    print("RECORDED MAX ACTION: {}".format(max_action))

    policy = TD3Agent(state_dim, action_dim, max_action)
    policy_num = 0
    if os.path.exists(models_path + "/" + file_name + str(policy_num) +
                      "_critic"):
        print("Loading Existing Policy")
        policy.load(models_path + "/" + file_name + str(policy_num))

    replay_buffer = ReplayBuffer()
    # Optionally load existing policy, replace 9999 with num
    buffer_number = 0  # BY DEFAULT WILL LOAD NOTHING, CHANGE THIS
    if os.path.exists(replay_buffer.buffer_path + "/" + "replay_buffer_" +
                      str(buffer_number) + '.data'):
        print("Loading Replay Buffer " + str(buffer_number))
        replay_buffer.load(buffer_number)
        # print(replay_buffer.storage)

    # Evaluate untrained policy and init list for storage
    evaluations = []

    state = env.reset()
    done = False
    episode_reward = 0
    episode_timesteps = 0
    episode_num = 0

    print("STARTED MINITAUR TD3")

    for t in range(int(max_timesteps)):

        episode_timesteps += 1

        # Select action randomly or according to policy
        # Random Action - no training yet, just storing in buffer
        if t < start_timesteps:
            action = env.action_space.sample()
            # rospy.logdebug("Sampled Action")
        else:
            # According to policy + Exploraton Noise
            # print("POLICY Action")
            """ Note we clip at +-0.99.... because Gazebo
                has problems executing actions at the
                position limit (breaks model)
            """
            action = np.clip(
                (policy.select_action(np.array(state)) + np.random.normal(
                    0, max_action * expl_noise, size=action_dim)), -max_action,
                max_action)
            # rospy.logdebug("Selected Acton: {}".format(action))

        # Perform action
        next_state, reward, done, _ = env.step(action)
        done_bool = float(done)

        # Store data in replay buffer
        replay_buffer.add((state, action, next_state, reward, done_bool))

        state = next_state
        episode_reward += reward

        # Train agent after collecting sufficient data for buffer
        if t >= start_timesteps:
            policy.train(replay_buffer, batch_size)

        if done:
            # +1 to account for 0 indexing.
            # +0 on ep_timesteps since it will increment +1 even if done=True
            print(
                "Total T: {} Episode Num: {} Episode T: {} Reward: {}".format(
                    t + 1, episode_num, episode_timesteps, episode_reward))
            # Reset environment
            state, done = env.reset(), False
            evaluations.append(episode_reward)
            episode_reward = 0
            episode_timesteps = 0
            episode_num += 1

        # Evaluate episode
        if (t + 1) % eval_freq == 0:
            # evaluate_policy(policy, env_name, seed,
            np.save(results_path + "/" + str(file_name), evaluations)
            if save_model:
                policy.save(models_path + "/" + str(file_name) + str(t))
                # replay_buffer.save(t)

    env.close()
def main():
    """ The main() function. """

    print("STARTING MINITAUR ARS")

    # TRAINING PARAMETERS
    # env_name = "MinitaurBulletEnv-v0"
    seed = 0
    max_timesteps = 1e6
    file_name = "mini_tg_ars_"

    # Find abs path to this file
    my_path = os.path.abspath(os.path.dirname(__file__))
    results_path = os.path.join(my_path, "../results")
    models_path = os.path.join(my_path, "../models")

    if not os.path.exists(results_path):
        os.makedirs(results_path)

    if not os.path.exists(models_path):
        os.makedirs(models_path)

    env = MinitaurBulletEnv(render=True, on_rack=False)

    dt = env._time_step

    # TRAJECTORY GENERATOR
    movetype = "walk"
    # movetype = "trot"
    # movetype = "bound"
    # movetype = "pace"
    # movetype = "pronk"
    TG = TGPolicy(movetype=movetype,
                  center_swing=0.0,
                  amplitude_extension=0.2,
                  amplitude_lift=0.4)
    TG_state_dim = len(TG.get_TG_state())
    TG_action_dim = 5  # f_tg, Beta, alpha_tg, h_tg, intensity
    state_dim = env.observation_space.shape[0] + TG_state_dim
    print("STATE DIM: {}".format(state_dim))
    action_dim = env.action_space.shape[0] + TG_action_dim
    print("ACTION DIM: {}".format(action_dim))
    max_action = float(env.action_space.high[0])

    print("RECORDED MAX ACTION: {}".format(max_action))

    # Initialize Normalizer
    normalizer = Normalizer(state_dim)

    # Initialize Policy
    policy = Policy(state_dim, action_dim)

    # Initialize Agent with normalizer, policy and gym env
    agent = ARSAgent(normalizer, policy, env, TGP=TG)
    agent_num = raw_input("Policy Number: ")
    if os.path.exists(models_path + "/" + file_name + str(agent_num) +
                      "_policy"):
        print("Loading Existing agent")
        agent.load(models_path + "/" + file_name + str(agent_num))
        agent.policy.episode_steps = 1000
        policy = agent.policy

    # Set seeds
    env.seed(seed)
    torch.manual_seed(seed)
    np.random.seed(seed)

    env.reset()
    episode_reward = 0
    episode_timesteps = 0
    episode_num = 0

    print("STARTED MINITAUR TEST SCRIPT")

    # Just to store correct action space
    action = env.action_space.sample()

    # Record extends for plot
    # LF_ext = []
    # LB_ext = []
    # RF_ext = []
    # RB_ext = []

    LF_tp = []
    LB_tp = []
    RF_tp = []
    RB_tp = []

    t = 0
    while t < (int(max_timesteps)):
        action[:] = 0.0

        # # Get Action from TG [no policies here]
        # action = TG.get_utg(action, alpha_tg, h_tg, intensity,
        #                     env.minitaur.num_motors)

        # LF_ext.append(action[env.minitaur.num_motors / 2])
        # LB_ext.append(action[1 + env.minitaur.num_motors / 2])
        # RF_ext.append(action[2 + env.minitaur.num_motors / 2])
        # RB_ext.append(action[3 + env.minitaur.num_motors / 2])
        # # Perform action
        # next_state, reward, done, _ = env.step(action)

        obs = agent.TGP.get_TG_state()
        # LF_tp.append(obs[0])
        # LB_tp.append(obs[1])
        # RF_tp.append(obs[2])
        # RB_tp.append(obs[3])

        # # Increment phase
        # TG.increment(dt, f_tg, Beta)

        # # time.sleep(1.0)

        # t += 1

        # Maximum timesteps per rollout
        t += policy.episode_steps

        episode_timesteps += 1

        episode_reward = agent.deployTG()
        # episode_reward = agent.train()
        # +1 to account for 0 indexing.
        # +0 on ep_timesteps since it will increment +1 even if done=True
        print("Total T: {} Episode Num: {} Episode T: {} Reward: {}".format(
            t, episode_num, policy.episode_steps, episode_reward))
        # Reset environment
        episode_reward = 0
        episode_timesteps = 0
        episode_num += 1

    plt.plot(0)
    plt.plot(LF_tp, label="LF")
    plt.plot(LB_tp, label="LB")
    plt.plot(RF_tp, label="RF")
    plt.plot(RB_tp, label="RB")
    plt.xlabel("t")
    plt.ylabel("EXT")
    plt.title("Leg Extensions")
    plt.legend()
    plt.show()

    env.close()