예제 #1
0
def main(test_episodes=20, test_steps=50):
    env = env_search_control()
    print(env.observation_space)
    print(env.action_space)
    act = deepq.learn(env,
                      network=models.mlp(num_layers=1, num_hidden=64),
                      total_timesteps=0,
                      total_episodes=0,
                      total_steps=0,
                      load_path="assembly_model_fuzzy_final.pkl")
    episode_rewards = []
    episode_states = []
    for i in range(test_episodes):
        obs, done = env.reset()
        episode_rew = 0
        episode_obs = []
        logger.info(
            "================== The {} episode start !!! ===================".
            format(i))
        for j in range(test_steps):
            obs, rew, done, _ = env.step(act(obs[None])[0], j)
            episode_rew += rew
            episode_obs.append(obs)
        episode_rewards.append(cp.deepcopy(episode_rew))
        episode_states.append(cp.deepcopy(episode_obs))
        print("Episode reward", episode_rew)

    np.save('../data/test_episode_reward_fuzzy_final_new', episode_rewards)
    np.save('../data/test_episode_state_fuzzy_final_new', episode_states)
예제 #2
0
def search(arg):
    env = env_search_control()
    # pull_finish = env.pull_peg_up()
    # force, state, pull_terminal = env.search_reset()
    #
    # # position control
    # if pull_terminal:
    #     done = env.pos_control()
    state, obs, done = env.reset()
    print('force', state[:6])
    print('state', state[6:])

    Force, State = [], []

    # force control
    if done:
        for i in range(arg['steps']):
            current_state = env.get_state()
            force, state = current_state[:6], current_state[6:]
            Force.append(cp.deepcopy(force))
            State.append(cp.deepcopy(state))
            _, _, finish = env.step(0, i)
            if finish:
                break
        pull_finish = env.pull_search_peg()
    np.save('../data/search_force', Force)
    np.save('../data/search_state', State)
    def __init__(self, hyperparams):
        """
        Initialize agent.
        Args:
            hyperparams: Dictionary of hyperparameters.
            init_node: Whether or not to initialize a new ROS node.
        """
        config = copy.deepcopy(AGENT)
        config.update(hyperparams)
        Agent.__init__(self, config)

        self._env = env_search_control(step_max=200,
                                       fuzzy=False,
                                       add_noise=False)

        self.x0 = self._hyperparams['x0']

        self.use_tf = False
        self.observations_stale = True
예제 #4
0
def main():
    env = env_search_control()
    act = deepq.learn(
        env,
        network=models.mlp(num_hidden=64, num_layers=1),
        lr=1e-3,
        total_timesteps=5000,
        total_episodes=100,
        total_steps=50,
        target_network_update_freq=20,
        buffer_size=32,
        learning_starts=32,
        learning_times=10,
        exploration_fraction=0.2,
        exploration_final_eps=0.01,
        print_freq=10,
        param_noise=True,
        save_path='_fuzzy_noise_final',
        load_path='assembly_model_fuzzy_final.pkl'
    )
    # load_path = 'assembly_model_fuzzy.pkl'
    # load_path = 'assembly_model.pkl'
    # print("Saving model to assembly_fuzzy_noise.pkl")
    act.save("assembly_model_fuzzy_final.pkl")
예제 #5
0
        np.save(
            data_path + 'train_states_' + algorithm_name + '_' + noise_type +
            file_name, epochs_states)
        np.save(
            data_path + 'train_times_' + algorithm_name + '_' + noise_type +
            file_name, epochs_times)

    # # agent save
    agent.store(model_path + 'train_model_' + algorithm_name + '_' +
                noise_type + file_name)


if __name__ == '__main__':

    algorithm_name = 'dyna_nn_ddpg'
    env = env_search_control(step_max=200, fuzzy=False, add_noise=False)
    data_path = './prediction_data/'
    model_path = './prediction_model/'
    file_name = '_epochs_5_episodes_100_none_fuzzy'
    model_name = './prediction_model/'
    learn(network='mlp',
          env=env,
          data_path=data_path,
          model_based=True,
          memory_extend=False,
          dyna_learning=True,
          model_type='mlp',
          noise_type='normal_0.2',
          file_name=file_name,
          model_path=model_path,
          model_name=model_name,
예제 #6
0
    parser.add_argument('--steps', type=int, default=200)
    parser.add_argument('--memory_size', type=int, default=3000)
    parser.add_argument('--data-file', type=str)
    parser.add_argument('--lambda', type=float, default=0.6)
    parser.add_argument('--meta_step_size', type=float, default=0.00001)
    parser.add_argument('--eta', type=float, default=0.01)
    parser.add_argument('--loop', type=float, default=1)
    parser.add_argument('--noplot', action='store_false', dest='plot')
    parser.add_argument('--record-file', type=str)
    parser.add_argument('--seed', type=int)
    return vars(parser.parse_args())


if __name__ == '__main__':
    args = parse_args()
    env = env_search_control()
    obs, state, _ = env.reset()

    epoch_force_pose = []
    epoch_action = []
    action = np.zeros(6)

    for i in range(args['steps']):
        next_obs, next_state, reward, done, safe_or_not, execute_action = env.step(
            np.array([0., 0, 0., 0., 0., 0.]), i)
        epoch_force_pose.append(cp.deepcopy(next_state))
        epoch_action.append(cp.deepcopy(execute_action))

        if done:
            env.pull_peg_up()