Ejemplo n.º 1
0
class ModelInterface():
    def __init__(self):
        self.agent = Agent(state_size=STATE_SIZE,
                           action_size=ACTION_SIZE,
                           random_seed=10)
        self.agent.actor_local.load_state_dict(
            torch.load('model/checkpoint_actor.pth', map_location='cpu'))
        self.agent.critic_local.load_state_dict(
            torch.load('model/checkpoint_critic.pth', map_location='cpu'))
        self.agent.actor_local.eval()
        self.agent.critic_local.eval()

    def get_action_q(self, state, action):
        s = np.zeros((128, 6))
        s[0, :] = state

        a = np.zeros((128, 2))
        a[0, :] = action

        state = torch.Tensor(s)
        action = torch.Tensor(a)

        return self.agent.critic_local(state, action).detach().numpy()[0, 0]

    def get_action(self, state):
        return self.agent.act(state)
Ejemplo n.º 2
0
def submit_agent(args, model_params):

    ##########################################################

    actor_fn, params_actor, params_crit = build_model_test(**model_params)
    weights = [p.get_value() for p in params_actor]
    actor = Agent(actor_fn, params_actor, params_crit)
    actor.set_actor_weights(weights)
    if args.weights is not None:
        actor.load(args.weights)

    env = RunEnv2(model=args.modeldim,
                  prosthetic=args.prosthetic,
                  difficulty=args.difficulty,
                  skip_frame=3)

    # Settings
    remote_base = "http://grader.crowdai.org:1729"
    token = args.token
    client = Client(remote_base)

    # Create environment
    di = client.env_create(token, env_id="ProstheticsEnv")

    stat = []
    ep = 1
    ii = 0
    reward_sum = 0
    print('\n\n#################################################\n\n')
    while True:
        ii += 1
        proj = env.dict_to_vec(di)
        action = actor.act(proj)
        action += np.random.rand(len(action)) / 10.

        [di, reward, done, info] = client.env_step(action.tolist(), True)
        reward_sum += reward
        print('ep: ' + str(ep) + '  >>  step: ' + str(int(ii)) +
              '  >>  reward: ' + format(reward, '.2f') + '  \t' +
              str(int(reward_sum)) + '\t  >>  pelvis X Y Z: \t' +
              format(di['body_pos']['pelvis'][0], '.2f') + '\t' +
              format(di['body_pos']['pelvis'][1], '.2f') + '\t' +
              format(di['body_pos']['pelvis'][2], '.2f'))
        if done:
            print('\n\n#################################################\n\n')
            stat.append([ep, ii, reward_sum])
            di = client.env_reset()
            ep += 1
            ii = 0
            reward_sum = 0
            if not di:
                break
    for e in stat:
        print(e)
    print('\n\nclient.submit()\n\n')
    client.submit()
    ##########################################################
    print('\n\n#################################################\n\n')
    print('DONE\n\n')
Ejemplo n.º 3
0
def test_agent(args, testing, num_test_episodes, model_params, weights,
               best_reward, updates, global_step, save_dir):
    env = RunEnv2(model=args.modeldim,
                  prosthetic=args.prosthetic,
                  difficulty=args.difficulty,
                  skip_frame=3)
    test_rewards_all = []
    test_pelvis_X_all = []

    train_fn, actor_fn, target_update_fn, params_actor, params_crit, actor_lr, critic_lr = build_model(
        **model_params)
    actor = Agent(actor_fn, params_actor, params_crit)
    actor.set_actor_weights(weights)
    # if args.weights is not None:
    #     actor.load(args.weights)

    for ep in range(num_test_episodes):
        seed = random.randrange(2**32 - 2)
        state = env.reset(seed=seed, difficulty=0)
        test_reward = 0
        while True:
            state = np.asarray(state, dtype='float32')
            action = actor.act(state)
            state, reward, terminal, info = env._step(action)
            test_reward += reward
            if terminal:
                break
        test_rewards_all.append(test_reward)
        test_pelvis_X_all.append(info['pelvis_X'])
    test_reward_mean = np.mean(test_rewards_all)
    mean_pelvis_X = np.mean(test_pelvis_X_all)
    std_reward = np.std(test_rewards_all)

    test_str ='global step {}; test_reward_mean: {:.2f}, test_rewards_all: {}; mean_pelvis_Xmean: {:.2f}, test_pelvis_X_all: {} '.\
        format(global_step.value, float(test_reward_mean), test_rewards_all, float(mean_pelvis_X), test_pelvis_X_all)

    print(test_str)
    try:
        with open(os.path.join(save_dir, 'test_report.log'), 'a') as f:
            f.write(test_str + '\n')
    except:
        print('#############################################')
        print('except  »  f.write(test_str )')
        print('#############################################')

    if test_reward_mean > best_reward.value or test_reward_mean > 30 * env.reward_mult:
        if test_reward_mean > best_reward.value:
            best_reward.value = test_reward_mean
        fname = os.path.join(
            save_dir,
            'weights_updates_{}_reward_{:.1f}_pelvis_X_{:.1f}.pkl'.format(
                updates.value, test_reward_mean, mean_pelvis_X))
        actor.save(fname)
    testing.value = 0
Ejemplo n.º 4
0
def test_agent(args, testing, state_transform, num_test_episodes, model_params,
               weights, best_reward, updates, global_step, save_dir):
    env = RunEnv2(state_transform,
                  visualize=args.test,
                  integrator_accuracy=args.accuracy,
                  model=args.modeldim,
                  prosthetic=args.prosthetic,
                  difficulty=args.difficulty,
                  skip_frame=1)
    test_rewards = []

    train_fn, actor_fn, target_update_fn, params_actor, params_crit, actor_lr, critic_lr = \
        build_model(**model_params)
    actor = Agent(actor_fn, params_actor, params_crit)
    actor.set_actor_weights(weights)
    if args.weights is not None:
        actor.load(args.weights)

    for ep in range(num_test_episodes):
        seed = random.randrange(2**32 - 2)
        state = env.reset(seed=seed, difficulty=2)
        test_reward = 0
        while True:
            state = np.asarray(state, dtype='float32')
            action = actor.act(state)
            state, reward, terminal, _ = env._step(action)
            test_reward += reward
            if terminal:
                break
        test_rewards.append(test_reward)
    mean_reward = np.mean(test_rewards)
    std_reward = np.std(test_rewards)

    test_str ='global step {}; test reward mean: {:.2f}, std: {:.2f}, all: {} '.\
        format(global_step.value, float(mean_reward), float(std_reward), test_rewards)

    print(test_str)
    with open(os.path.join(save_dir, 'test_report.log'), 'a') as f:
        f.write(test_str + '\n')

    if mean_reward > best_reward.value or mean_reward > 30 * env.reward_mult:
        if mean_reward > best_reward.value:
            best_reward.value = mean_reward
        fname = os.path.join(
            save_dir, 'weights_updates_{}_reward_{:.2f}.pkl'.format(
                updates.value, mean_reward))
        actor.save(fname)
    testing.value = 0
Ejemplo n.º 5
0
def test_agent(args, num_test_episodes, model_params):
    env = RunEnv2(visualize=True,
                  model=args.modeldim,
                  prosthetic=args.prosthetic,
                  difficulty=args.difficulty,
                  skip_frame=3)
    test_rewards = []

    # train_fn, actor_fn, target_update_fn, params_actor, params_crit, actor_lr, critic_lr = build_model(**model_params)
    # actor_fn, params_actor, params_crit, actor_lr, critic_lr = build_model(**model_params)
    actor_fn, params_actor, params_crit = build_model_test(**model_params)
    weights = [p.get_value() for p in params_actor]
    actor = Agent(actor_fn, params_actor, params_crit)
    actor.set_actor_weights(weights)
    if args.weights is not None:
        actor.load(args.weights)

    for ep in range(num_test_episodes):
        seed = random.randrange(2**32 - 2)
        state = env.reset(seed=seed, difficulty=0)
        test_reward = 0
        while True:
            state = np.asarray(state, dtype='float32')
            # state = np.concatenate((state,state,state))[:390]  # ndrw tmp
            action = actor.act(state)  # ndrw tmp
            # if args.prosthetic:
            #     action = np.zeros(19)  # ndrw tmp
            # else:
            #     action = np.zeros(22)  # ndrw tmp
            state, reward, terminal, _ = env._step(action)
            test_reward += reward
            if terminal:
                break
        test_rewards.append(test_reward)
    mean_reward = np.mean(test_rewards)
    std_reward = np.std(test_rewards)

    global_step = 0
    test_str ='global step {}; test reward mean: {:.2f}, std: {:.2f}, all: {} '.\
        format(global_step.value, float(mean_reward), float(std_reward), test_rewards)

    print(test_str)
    with open(os.path.join('test_report.log'), 'a') as f:
        f.write(test_str + '\n')
Ejemplo n.º 6
0
def run_agent(model_params, weights, state_transform, data_queue, weights_queue,
              process, global_step, updates, best_reward, param_noise_prob, save_dir,
              max_steps=10000000):

    train_fn, actor_fn, target_update_fn, params_actor, params_crit, actor_lr, critic_lr = \
        build_model(**model_params)
    actor = Agent(actor_fn, params_actor, params_crit)
    actor.set_actor_weights(weights)

    env = RunEnv2(state_transform, max_obstacles=config.num_obstacles, skip_frame=config.skip_frames)
    random_process = OrnsteinUhlenbeckProcess(theta=.1, mu=0., sigma=.2, size=env.noutput,
                                              sigma_min=0.05, n_steps_annealing=1e6)
    # prepare buffers for data
    states = []
    actions = []
    rewards = []
    terminals = []

    total_episodes = 0
    start = time()
    action_noise = True
    while global_step.value < max_steps:
        seed = random.randrange(2**32-2)
        state = env.reset(seed=seed, difficulty=2)
        random_process.reset_states()

        total_reward = 0.
        total_reward_original = 0.
        terminal = False
        steps = 0
        
        while not terminal:
            state = np.asarray(state, dtype='float32')
            action = actor.act(state)
            if action_noise:
                action += random_process.sample()

            next_state, reward, next_terminal, info = env.step(action)
            total_reward += reward
            total_reward_original += info['original_reward']
            steps += 1
            global_step.value += 1

            # add data to buffers
            states.append(state)
            actions.append(action)
            rewards.append(reward)
            terminals.append(terminal)

            state = next_state
            terminal = next_terminal

            if terminal:
                break

        total_episodes += 1

        # add data to buffers after episode end
        states.append(state)
        actions.append(np.zeros(env.noutput))
        rewards.append(0)
        terminals.append(terminal)

        states_np = np.asarray(states).astype(np.float32)
        data = (states_np,
                np.asarray(actions).astype(np.float32),
                np.asarray(rewards).astype(np.float32),
                np.asarray(terminals),
                )
        weight_send = None
        if total_reward > best_reward.value:
            weight_send = actor.get_actor_weights()
        # send data for training
        data_queue.put((process, data, weight_send, total_reward))

        # receive weights and set params to weights
        weights = weights_queue.get()

        report_str = 'Global step: {}, steps/sec: {:.2f}, updates: {}, episode len {}, ' \
                     'reward: {:.2f}, original_reward {:.4f}; best reward: {:.2f} noise {}'. \
            format(global_step.value, 1. * global_step.value / (time() - start), updates.value, steps,
                   total_reward, total_reward_original, best_reward.value, 'actions' if action_noise else 'params')
        print(report_str)

        with open(os.path.join(save_dir, 'train_report.log'), 'a') as f:
            f.write(report_str + '\n')

        actor.set_actor_weights(weights)
        action_noise = np.random.rand() < 1 - param_noise_prob
        if not action_noise:
            set_params_noise(actor, states_np, random_process.current_sigma)

        # clear buffers
        del states[:]
        del actions[:]
        del rewards[:]
        del terminals[:]

        if total_episodes % 100 == 0:
            env = RunEnv2(state_transform, max_obstacles=config.num_obstacles, skip_frame=config.skip_frames)
Ejemplo n.º 7
0
    n_episode += 1

    # observation: [row, column, RGB]
    observation = env.reset()
    agent.reset(observation)

    done = False
    timestep = 0

    print('Episode start: %s' % (episode))

    # Play game
    while (done is False):
        env.render()

        action = agent.act()
        observation, reward, done, info = env.step(action)
        agent.train(observation, reward, done)

        timestep += 1

    print('Episode finished after timestep: %s' % (timestep))

env.close()
print('Training complete after episode: %s' % (n_episode))

# plt.matshow(observation[:100, 20:80 ,0])
# plt.matshow(observation[:,:,2])
# plt.matshow(observation[:,:,1])
ob = env.reset()
observation.shape
Ejemplo n.º 8
0
def run_agent(args,
              model_params,
              weights,
              data_queue,
              weights_queue,
              process,
              global_step,
              updates,
              best_reward,
              param_noise_prob,
              save_dir,
              max_steps=10000000):

    train_fn, actor_fn, target_update_fn, params_actor, params_crit, actor_lr, critic_lr = build_model(
        **model_params)
    actor = Agent(actor_fn, params_actor, params_crit)
    actor.set_actor_weights(weights)

    env = RunEnv2(model=args.modeldim,
                  prosthetic=args.prosthetic,
                  difficulty=args.difficulty,
                  skip_frame=config.skip_frames)
    env.spec.timestep_limit = 3000  # ndrw
    # random_process = OrnsteinUhlenbeckProcess(theta=.1, mu=0., sigma=.3, size=env.noutput, sigma_min=0.05, n_steps_annealing=1e6)

    sigma_rand = random.uniform(0.05, 0.5)
    dt_rand = random.uniform(0.002, 0.02)
    param_noise_prob = random.uniform(param_noise_prob * 0.25,
                                      min(param_noise_prob * 1.5, 1.))

    random_process = OrnsteinUhlenbeckProcess(theta=.1,
                                              mu=0.,
                                              sigma=sigma_rand,
                                              dt=dt_rand,
                                              size=env.noutput,
                                              sigma_min=0.05,
                                              n_steps_annealing=1e6)

    print('OUProcess_sigma = ' + str(sigma_rand) + '    OUProcess_dt = ' +
          str(dt_rand) + '    param_noise_prob = ' + str(param_noise_prob))

    # prepare buffers for data
    states = []
    actions = []
    rewards = []
    terminals = []

    total_episodes = 0
    start = time()
    action_noise = True
    while global_step.value < max_steps:
        seed = random.randrange(2**32 - 2)
        state = env.reset(seed=seed, difficulty=args.difficulty)
        random_process.reset_states()

        total_reward = 0.
        total_reward_original = 0.
        terminal = False
        steps = 0

        while not terminal:
            state = np.asarray(state, dtype='float32')
            action = actor.act(state)
            if action_noise:
                action += random_process.sample()

            next_state, reward, next_terminal, info = env._step(action)
            total_reward += reward
            total_reward_original += info['original_reward']
            steps += 1
            global_step.value += 1

            # add data to buffers
            states.append(state)
            actions.append(action)
            rewards.append(reward)
            terminals.append(terminal)

            state = next_state
            terminal = next_terminal

            if terminal:
                break

        total_episodes += 1

        # add data to buffers after episode end
        states.append(state)
        actions.append(np.zeros(env.noutput))
        rewards.append(0)
        terminals.append(terminal)

        states_np = np.asarray(states).astype(np.float32)
        data = (
            states_np,
            np.asarray(actions).astype(np.float32),
            np.asarray(rewards).astype(np.float32),
            np.asarray(terminals),
        )
        weight_send = None
        if total_reward > best_reward.value:
            weight_send = actor.get_actor_weights()
        # send data for training
        data_queue.put((process, data, weight_send, total_reward))

        # receive weights and set params to weights
        weights = weights_queue.get()

        # report_str = 'Global step: {}, steps/sec: {:.2f}, updates: {}, episode len: {}, pelvis_X: {:.2f}, reward: {:.2f}, original_reward {:.4f}, best reward: {:.2f}, noise: {}'. \
        #     format(global_step.value, 1. * global_step.value / (time() - start), updates.value, steps, info['pelvis_X'], total_reward, total_reward_original, best_reward.value, 'actions' if action_noise else 'params')
        # report_str = 'Global step: {}, steps/sec: {:.2f}, updates: {}, episode len: {}, pelvis_X: {:.2f}, reward: {:.2f}, best reward: {:.2f}, noise: {}'. \
        #     format(global_step.value, 1. * global_step.value / (time() - start), updates.value, steps, info['pelvis_X'], total_reward, best_reward.value, 'actions' if action_noise else 'params')
        report_str = 'Global step: {}, steps/sec: {:.2f}, updates: {}, episode len: {}, pelvis_X: {:.2f}, pelvis_Z: {:.2f}, reward: {:.2f}, best reward: {:.2f}, noise: {}'. \
            format(global_step.value, 1. * global_step.value / (time() - start), updates.value, steps, info['pelvis'][0], info['pelvis'][2], total_reward, best_reward.value, 'actions' if action_noise else 'params')
        print(report_str)

        try:
            with open(os.path.join(save_dir, 'train_report.log'), 'a') as f:
                f.write(report_str + '\n')
        except:
            print('#############################################')
            print(
                'except  »  with open(os.path.join(save_dir, train_report.log), a) as f:'
            )
            print('#############################################')

        actor.set_actor_weights(weights)
        action_noise = np.random.rand() < 1 - param_noise_prob
        if not action_noise:
            set_params_noise(actor, states_np, random_process.current_sigma)

        # clear buffers
        del states[:]
        del actions[:]
        del rewards[:]
        del terminals[:]

        if total_episodes % 100 == 0:
            env = RunEnv2(model=args.modeldim,
                          prosthetic=args.prosthetic,
                          difficulty=args.difficulty,
                          skip_frame=config.skip_frames)