コード例 #1
0
class WrapperClient():
    def __init__(self, remote_base):
        self.client = Client(remote_base)
        self.ob_0 = np.array(41)
        self.ob_1 = np.zeros(14)
        # self.ob_2 = np.zeros(41)

    def env_create(self, token):
        self.ob_0 = self.preprocess(
            np.array(
                self.client.env_create("7be35dd3a64deac826068d37c2258847")))
        # return np.concatenate((self.ob_0,self.ob_1,self.ob_2),axis=0)
        return np.concatenate((self.ob_0, self.ob_1), axis=0)

    def env_reset(self):
        ob = self.client.env_reset()
        if ob is None:
            return None
        self.ob_0 = self.preprocess(np.array(ob))
        self.ob_0[1] = 0
        self.ob_1 = np.zeros(14)
        # self.ob_2 = np.zeros(41)
        # return np.concatenate((self.ob_0,self.ob_1,self.ob_2),axis=0)
        return np.concatenate((self.ob_0, self.ob_1), axis=0)

    def env_step(self, action):
        res = self.client.env_step(action)
        ob_0_post = self.ob_0
        # ob_1_post = self.ob_1
        # ob_2_post = self.ob_2
        self.ob_0 = self.preprocess(np.array(res[0]))
        self.ob_0[1] = 0
        self.ob_1 = (self.ob_0[22:36] - ob_0_post[22:36]) / 0.01
        # self.ob_2 = self.ob_1 - ob_1_post
        # res[0] = np.concatenate((self.ob_0,self.ob_1,self.ob_2),axis=0)
        return np.concatenate((self.ob_0, self.ob_1), axis=0)
        return res

    def submit(self):
        self.client.submit()

    def preprocess(self, v):
        n = [1, 18, 22, 24, 26, 28, 30, 32, 34]
        m = [19, 23, 25, 27, 29, 31, 33, 35]
        for i in n:
            v[i] = v[i] - v[1]
        for i in m:
            v[i] = v[i] - v[2]
        v[20] = v[20] - v[4]
        v[21] = v[21] - v[5]
        return v
コード例 #2
0
 def submit():
     from osim.http.client import Client
     remote_base = "http://grader.crowdai.org:1729"
     crowdai_token = "01342e360022c2def5c2cc04c5843381"
     Client = Client(remote_base)
     observation = Client.env_create(env_id="ProstheticsEnv", token=crowdai_token)
     while True:
         k = np.reshape(np.array(observation), newshape=(-1, len(observation)))
         ac_ind = sess.run(sy_sampled_ac, feed_dict={sy_ob_no: k})
         ac_ind = np.reshape(ac_ind, newshape=(ac_ind.shape[1]))
         action = bins[ac_ind]
         [observation, reward, done, info] = Client.env_step(action, True)
         if done:
             observation = Client.env_reset()
             if not observation:
                 break
     Client.submit()
コード例 #3
0
def submit(args):
    print('start submitting')

    remote_base = 'http://grader.crowdai.org:1733'
    client = Client(remote_base)

    ddpg = DDPG()
    ddpg.load_model(args.model, load_memory=False)

    state = client.env_create(TOKEN)
    fg = FeatureGenerator()
    state = fg.gen(state)

    step = 0
    ep_reward = 0

    while True:
        print('selecting action ...', end=' ')
        action = ddpg.select_action(list(state))

        print('client.env_step ...')
        next_state, reward, done, info = client.env_step(action.tolist())
        next_state = fg.gen(next_state)

        print('step: {0:03d}, ep_reward: {1:02.08f}'.format(step, ep_reward))
        state = next_state
        ep_reward += reward
        step += 1

        if done:
            print('done')
            state = client.env_reset()
            if not state:
                break

            step = 0
            ep_reward = 0

            fg = FeatureGenerator()
            state = fg.gen(state)

    client.submit()
コード例 #4
0
    def submit(self):

        remote_base = 'http://grader.crowdai.org:1729'
        env = RunEnv(visualize=self.visualize)
        client = Client(remote_base)

        # Create environment
        observation = client.env_create(self.submit_token)

        # Run a single step
        #
        # The grader runs 3 simulations of at most 1000 steps each. We stop after the last one
        while True:
            [observation, reward, done,
             info] = client.env_step(self.agent.forward(observation))
            if done:
                observation = client.env_reset()
                if not observation:
                    break

        client.submit()
コード例 #5
0
class RemoteSubmit(object):
    def __init__(self, token, agent_type):
        self.token = token
        self.remote_base = "http://grader.crowdai.org:1729"
        self.client = Client(self.remote_base)

        # TODO:: Add agent selector
        if agent_type == 'random':
            self.agent = RandomAgent()
        elif agent_type == 'fixed-action':
            self.agent = FixedActionAgent()
        elif agent_type == 'a3c':
            self.agent = A3CAgent(num_envs=2, num_steps=50, max_frames=1000)
        else:
            status = {
                'status': 'ERROR',
                'error_msg': 'Not supported agent-type'
            }
            raise Exception(status)

    def run(self):
        try:
            status = self.agent.run()
            observation = self.client.env_create(self.token,
                                                 env_id="ProstheticsEnv")

            while True:
                action = self.agent.get_action(observation)
                [observation, reward, done,
                 info] = self.client.env_step(action, False)
                if done:
                    observation = self.client.env_reset()
                    if not observation:
                        break
            self.client.submit()

        except Exception as e:
            status = {'status': 'ERROR', 'error_msg': e}
            raise Exception(status)
コード例 #6
0
def main():
    # Settings
    remote_base = 'http://grader.crowdai.org'

    # Command line parameters
    parser = argparse.ArgumentParser(
        description='Submit the result to crowdAI')
    parser.add_argument("hdf")
    parser.add_argument('--token', dest='token', action='store', required=True)
    args = parser.parse_args()

    hdf = h5py.File(args.hdf, 'r')

    env = GaitEnv(visualize=False)

    agent = cPickle.loads(hdf['agent_snapshots']['0995'].value)
    agent.stochastic = False

    client = Client(remote_base)

    # Create environment
    observation = client.env_create(args.token)

    total_reward = 0
    # Run a single step
    for i in range(501):
        ob = agent.obfilt(observation)
        a, _info = agent.act(ob)
        [observation, reward, done, info] = client.env_step(a.tolist(), True)
        print i, reward, done
        total_reward += reward
        if done:
            break

    print 'TOTAL REWARD: ', total_reward
    raw_input('press ENTER to submit')
    client.submit()
コード例 #7
0
    # if not observation:
    #     break

    # client.submit()

# If TEST and no TOKEN, run some test experiments
if args.token:
    agent.load_weights(args.model)
    remote_base = 'http://grader.crowdai.org:1729'
    client = Client(remote_base)

    # Create environment
    observation = client.env_create(args.token)

    # Run a single step
    # The grader runs 3 simulations of at most 1000 steps each. We stop after the last one
    while True:
        v = np.array(observation).reshape((env.observation_space.shape[0]))
        action = agent.forward(v)
        [observation, reward, done, info] = client.env_step(action.tolist())
        observation = process_observation(observation)
        total_reward += reward
        if done:
            observation = client.env_reset()
            if not observation:
                break

    client.submit()
    # Finally, evaluate our algorithm for 1 episode.
    #
コード例 #8
0
	def upload(frameskip = 1):

		from osim.http.client import Client

		apikey = open('apikey.txt').read().strip('\n')

		print('Using apikey:', apikey)

		remote_base = "http://grader.crowdai.org:1729"
		crowdai_token = apikey

		print('connecting...')
		client = Client(remote_base)

		observation_d = client.env_create(crowdai_token, env_id="ProstheticsEnv")
		#observation = process_obs_dict(observation_d)

		print('environment created! running...')

		#obs_collect = []
		#a_collect = []

		stepno= 0
		epino = 0
		total_reward = 0

		while True:

			#a = AGENT OUTPUT
			observation = process_obs_dict(observation_d)
			a, q = agent.act(observation)
			a = [float(i) for i in list(a)]

			#obs_collect.append(observation)
			#a_collect.append(a)

			for _ in range(frameskip):

				[observation_d, reward, done, info] = client.env_step(a, True)


				stepno += 1
				total_reward += reward

				print('step',stepno,'total reward',total_reward)

				if done:

					'''
					print('')
					print('saving...')
					print('')
					with open('upload_saves/upload_a_collect_' + str(epino) + '.p', 'wb') as f:
						pickle.dump(a_collect, f)
					with open('upload_saves/upload_obs_collect_' + str(epino) + '.p', 'wb') as f:
						pickle.dump(obs_collect, f)
					'''

					observation_d = client.env_reset()


					print('>> episode',epino,' Done after',stepno,'got reward:',total_reward)
					print('')

					total_reward = 0
					stepno = 0
					epino += 1

					break

			if not observation_d:

				break

		print('Done! Submitting...')
		client.submit()
コード例 #9
0
def main(env_name, num_episodes, gamma, lam, kl_targ, batch_size, nprocs,
         policy_hid_list, valfunc_hid_list, gpu_pct, restore_path, animate,
         submit):
    """ Main training loop

    Args:
        env_name: OpenAI Gym environment name, e.g. 'Hopper-v1'
        num_episodes: maximum number of episodes to run
        gamma: reward discount factor (float)
        lam: lambda from Generalized Advantage Estimate
        kl_targ: D_KL target for policy update [D_KL(pi_old || pi_new)
        batch_size: number of episodes per policy training batch
    """
    # killer = GracefulKiller()

    env, obs_dim, act_dim = init_osim(animate)
    env.seed(111 + mpi_util.rank)
    mpi_util.set_global_seeds(111 + mpi_util.rank)

    obs_dim += 1  # add 1 to obs dimension for time step feature (see run_episode())
    now = datetime.utcnow().strftime(
        "%b-%d_%H:%M:%S")  # create unique directories
    if mpi_util.rank == 0:
        #aigym_path = os.path.join('/tmp', env_name, now)
        #env = wrappers.Monitor(env, aigym_path, force=True)
        logger = Logger(logname=env_name, now=now)

    episode = 0

    checkpoint = Checkpoint("saves", now)
    # restore from checkpoint?
    if restore_path:
        (policy, val_func, scaler, episode, obs_dim, act_dim,
         kl_targ) = checkpoint.restore(restore_path)
    else:
        policy = Policy(obs_dim, act_dim, kl_targ)
        val_func = NNValueFunction(obs_dim)
        scaler = Scaler(obs_dim)

        if mpi_util.rank == 0:
            # run a few episodes (on node 0) of untrained policy to initialize scaler:
            trajectories = run_policy(env, policy, scaler, episodes=5)

            unscaled = np.concatenate(
                [t['unscaled_obs'] for t in trajectories])
            scaler.update(
                unscaled)  # update running statistics for scaling observations

        # broadcast policy weights, scaler, val_func
        (policy, scaler, val_func) = mpi_util.broadcast_policy_scaler_val(
            policy, scaler, val_func)

        if mpi_util.rank == 0:
            checkpoint.save(policy, val_func, scaler, episode)

    if animate:
        observes, actions, rewards, unscaled_obs = run_episode(env,
                                                               policy,
                                                               scaler,
                                                               animate=animate)
        exit(0)

    if submit:
        # Settings
        #remote_base = 'http://grader.crowdai.org:1729'
        remote_base = 'http://grader.crowdai.org:1730'
        token = 'a83412a94593cae3a491f3ee28ff44e1'

        client = Client(remote_base)

        # Create environment
        observation = client.env_create(token)
        step = 0.0
        observes, actions, rewards, unscaled_obs = [], [], [], []
        scale, offset = scaler.get()
        scale[-1] = 1.0  # don't scale time step feature
        offset[-1] = 0.0  # don't offset time step feature

        # Run a single step
        #
        # The grader runs 3 simulations of at most 1000 steps each. We stop after the last one
        while True:
            obs = np.array(observation).astype(np.float32).reshape((1, -1))
            print("OBSERVATION TYPE:", type(obs), obs.shape)
            print(obs)
            obs = np.append(obs, [[step]], axis=1)  # add time step feature
            unscaled_obs.append(obs)
            obs = (obs - offset) * scale  # center and scale observations
            observes.append(obs)

            action = policy.sample(obs).astype(np.float32).reshape((-1, 1))
            print("ACTION TYPE:", type(action), action.shape)
            print(action)
            actions.append(action)

            [observation, reward, done,
             info] = client.env_step(action.tolist())
            print("step:", step, "reward:", reward)

            if not isinstance(reward, float):
                reward = np.asscalar(reward)
            rewards.append(reward)
            step += 1e-3  # increment time step feature

            if done:
                print(
                    "================================== RESTARTING ================================="
                )
                observation = client.env_reset()
                step = 0.0
                observes, actions, rewards, unscaled_obs = [], [], [], []
                scale, offset = scaler.get()
                scale[-1] = 1.0  # don't scale time step feature
                offset[-1] = 0.0  # don't offset time step feature
                if not observation:
                    break

        client.submit()
        exit(0)

    ######

    worker_batch_size = int(batch_size / mpi_util.nworkers)  # HACK
    if (worker_batch_size * mpi_util.nworkers != batch_size):
        print("batch_size:", batch_size, " is not divisible by nworkers:",
              mpi_util.nworkers)
        exit(1)

    batch = 0
    while episode < num_episodes:
        if mpi_util.rank == 0 and batch > 0 and batch % 10 == 0:
            checkpoint.save(policy, val_func, scaler, episode)
        batch = batch + 1

        trajectories = run_policy(env,
                                  policy,
                                  scaler,
                                  episodes=worker_batch_size)
        trajectories = mpi_util.gather_trajectories(trajectories)

        if mpi_util.rank == 0:
            # concatentate trajectories into one list
            trajectories = list(itertools.chain.from_iterable(trajectories))
            print("did a batch of ", len(trajectories), " trajectories")
            print([t['rewards'].sum() for t in trajectories])

            episode += len(trajectories)
            add_value(trajectories,
                      val_func)  # add estimated values to episodes
            add_disc_sum_rew(trajectories,
                             gamma)  # calculated discounted sum of Rs
            add_gae(trajectories, gamma, lam)  # calculate advantage

            # concatenate all episodes into single NumPy arrays
            observes, actions, advantages, disc_sum_rew = build_train_set(
                trajectories)

            # add various stats to training log:
            logger.log({
                '_MeanReward':
                np.mean([t['rewards'].sum() for t in trajectories]),
                'Steps':
                np.sum([t['observes'].shape[0] for t in trajectories])
            })
            log_batch_stats(observes, actions, advantages, disc_sum_rew,
                            logger, episode)

            policy.update(observes, actions, advantages,
                          logger)  # update policy
            val_func.fit(observes, disc_sum_rew,
                         logger)  # update value function

            unscaled = np.concatenate(
                [t['unscaled_obs'] for t in trajectories])
            scaler.update(
                unscaled)  # update running statistics for scaling observations

            logger.write(
                display=True)  # write logger results to file and stdout

        # if mpi_util.rank == 0 and killer.kill_now:
        #     if input('Terminate training (y/[n])? ') == 'y':
        #         break
        #     killer.kill_now = False

        # broadcast policy weights, scaler, val_func
        (policy, scaler, val_func) = mpi_util.broadcast_policy_scaler_val(
            policy, scaler, val_func)

    if mpi_util.rank == 0: logger.close()
    policy.close_sess()
    if mpi_util.rank == 0: val_func.close_sess()
コード例 #10
0
ファイル: submit.py プロジェクト: wiplug/osim-rl
import numpy as np
import argparse

# Settings
remote_base = 'http://grader.crowdai.org:1729'

# Command line parameters
parser = argparse.ArgumentParser(description='Submit the result to crowdAI')
parser.add_argument('--token', dest='token', action='store', required=True)
args = parser.parse_args()

env = RunEnv(visualize=False)
client = Client(remote_base)

# Create environment
observation = client.env_create(args.token)

# Run a single step
#
# The grader runs 3 simulations of at most 1000 steps each. We stop after the last one
while True:
    v = np.array(observation).reshape((-1,1,env.observation_space.shape[0]))
    [observation, reward, done, info] = client.env_step(env.action_space.sample().tolist())
    print(observation)
    if done:
        observation = client.env_reset()
        if not observation:
            break

client.submit()
コード例 #11
0
ファイル: submit.py プロジェクト: nagyistge/osim-rl
nb_actions = env.action_space.shape[0]

# Load the acton
actor = Sequential()
actor.add(Flatten(input_shape=(1,) + env.observation_space.shape))
actor.add(Dense(32))
actor.add(Activation('relu'))
actor.add(Dense(32))
actor.add(Activation('relu'))
actor.add(Dense(32))
actor.add(Activation('relu'))
actor.add(Dense(nb_actions))
actor.add(Activation('sigmoid'))
actor.load_weights(args.model)

client = Client(remote_base)

# Create environment
observation = client.env_create(args.token)

# Run a single step
for i in range(501):
    v = np.array(observation).reshape((-1,1,env.observation_space.shape[0]))
    [observation, reward, done, info] = client.env_step(args.token, actor.predict(v)[0].tolist(), True)
    if done:
        break

client.submit(args.token)

コード例 #12
0
def main():
    args = parse_args()
    logger.configure()
    gamma = 0.99
    tau = 0.01
    normalize_returns = False
    normalize_observations = True
    batch_size = 64
    action_noise = None
    stddev = 0.2
    param_noise = AdaptiveParamNoiseSpec(initial_stddev=float(stddev),
                                         desired_action_stddev=float(stddev))
    critic_l2_reg = 1e-2
    actor_lr = 1e-4
    critic_lr = 1e-3
    popart = False
    clip_norm = None
    reward_scale = 1.

    env = prosthetics_env.Wrapper(osim_env.ProstheticsEnv(visualize=False),
                                  frameskip=4,
                                  reward_shaping=True,
                                  reward_shaping_x=1,
                                  feature_embellishment=True,
                                  relative_x_pos=True,
                                  relative_z_pos=True)

    top_model_dir = 'top-models/'

    # create tf sessions and graphs
    sess_list = []
    graph_list = []
    for i in range(len(args.model_files)):
        graph_list.append(tf.Graph())
        sess_list.append(tf.Session(graph=graph_list[i]))
    ddpg_agents = []
    for i in range(len(args.model_files)):
        model_name = args.model_files[i]
        sess = sess_list[i]
        graph = graph_list[i]
        l_size = args.layer_sizes[i]
        with sess.as_default():
        #with U.make_session(num_cpu=1, graph=g) as sess:
            with graph.as_default():
                #tf.global_variables_initializer()

                # restore agents from model files and store in ddpg_agents
                print("Restoring from..." + model_name)

                # Configure components.
                memory = Memory(limit=int(1e6), action_shape=env.action_space.shape,
                                observation_shape=env.observation_space.shape)
                critic = Critic(layer_norm=True, activation='relu', layer_sizes=[l_size, l_size])
                actor = Actor(env.action_space.shape[-1], layer_norm=True,
                              activation='relu', layer_sizes=[l_size, l_size])
                agent = DDPG(actor, critic, memory, env.observation_space.shape,
                             env.action_space.shape, gamma=gamma, tau=tau,
                             normalize_returns=normalize_returns,
                             normalize_observations=normalize_observations,
                             batch_size=batch_size, action_noise=action_noise,
                             param_noise=param_noise, critic_l2_reg=critic_l2_reg,
                             actor_lr=actor_lr, critic_lr=critic_lr,
                             enable_popart=popart, clip_norm=clip_norm,
                             reward_scale=reward_scale)

                # restore adam state and param noise
                restore_model_path = top_model_dir + model_name
                saver = tf.train.Saver(max_to_keep=500)

                # restore network weights
                saver.restore(sess, restore_model_path)

                adam_optimizer_store = pickle.load(open(restore_model_path
                                                        + ".pkl", "rb"))
                agent.actor_optimizer.m = adam_optimizer_store['actor_optimizer']['m']
                agent.actor_optimizer.v = adam_optimizer_store['actor_optimizer']['v']
                agent.actor_optimizer.t = adam_optimizer_store['actor_optimizer']['t']
                agent.critic_optimizer.m = adam_optimizer_store['critic_optimizer']['m']
                agent.critic_optimizer.v = adam_optimizer_store['critic_optimizer']['v']
                agent.critic_optimizer.t = adam_optimizer_store['critic_optimizer']['t']
                if 'param_noise' in adam_optimizer_store:
                    agent.param_noise = adam_optimizer_store['param_noise']

                # intialize and prepare agent session.
                agent.initialize(sess)
                #sess.graph.finalize()
                agent.reset()

                ddpg_agents.append(agent)

    agent = BlendedAgent(ddpg_agents, sess_list, graph_list)

    if args.evaluation:
        # setup eval env
        eval_env = prosthetics_env.EvaluationWrapper(osim_env.ProstheticsEnv(visualize=False),
                                                     frameskip=4,
                                                     reward_shaping=True,
                                                     reward_shaping_x=1,
                                                     feature_embellishment=True,
                                                     relative_x_pos=True,
                                                     relative_z_pos=True)
        eval_env.change_model(model=('3D').upper(), prosthetic=True, difficulty=0, seed=0)
        eval_env = bench.Monitor(eval_env, os.path.join(logger.get_dir(), 'gym_eval'))

        nb_eval_steps = 1000
        # reward, mean_q, final_steps = evaluate_one_episode(eval_env, ddpg_agents, sess_list, graph_list,
        #                                                    nb_eval_steps=nb_eval_steps,
        #                                                    render=False)
        reward, mean_q, final_steps = evaluate_one_episode(eval_env, agent, nb_eval_steps, render=False)
        print("Reward: " + str(reward))
        print("Mean Q: " + str(mean_q))
        print("Final num steps: " + str(final_steps))

    # Submit to crowdai competition. What a hack. :)
    # if crowdai_client is not None and crowdai_token is not None and eval_env is not None:
    crowdai_submit_count = 0
    if args.crowdai_submit:
        remote_base = "http://grader.crowdai.org:1729"
        crowdai_client = Client(remote_base)
        eval_obs_dict = crowdai_client.env_create(args.crowdai_token, env_id="ProstheticsEnv")
        eval_obs_dict, eval_obs_projection = prosthetics_env.transform_observation(
            eval_obs_dict,
            reward_shaping=True,
            reward_shaping_x=1.,
            feature_embellishment=True,
            relative_x_pos=True,
            relative_z_pos=True)
        while True:
            action, _ = agent.pi(eval_obs_projection, apply_noise=False, compute_Q=False)
            submit_action = prosthetics_env.openai_to_crowdai_submit_action(action)
            clipped_submit_action = np.clip(submit_action, 0., 1.)
            actions_equal = clipped_submit_action == submit_action
            if not np.all(actions_equal):
                logger.debug("crowdai_submit_count:", crowdai_submit_count)
                logger.debug("  openai-action:", action)
                logger.debug("  submit-action:", submit_action)
            crowdai_submit_count += 1
            [eval_obs_dict, reward, done, info] = crowdai_client.env_step(clipped_submit_action.tolist(), True)
            # [eval_obs_dict, reward, done, info] = crowdai_client.env_step(agent.pi(eval_obs_projection, apply_noise=False, compute_Q=False), True)
            eval_obs_dict, eval_obs_projection = prosthetics_env.transform_observation(
                eval_obs_dict,
                reward_shaping=True,
                reward_shaping_x=1.,
                feature_embellishment=True,
                relative_x_pos=True,
                relative_z_pos=True)
            if done:
                logger.debug("done: crowdai_submit_count:", crowdai_submit_count)
                eval_obs_dict = crowdai_client.env_reset()
                if not eval_obs_dict:
                    break
                logger.debug("done: eval_obs_dict exists after reset")
                eval_obs_dict, eval_obs_projection = prosthetics_env.transform_observation(
                    eval_obs_dict,
                    reward_shaping=True,
                    reward_shaping_x=1.,
                    feature_embellishment=True,
                    relative_x_pos=True,
                    relative_z_pos=True)
        crowdai_client.submit()

    for i in range(len(sess_list)):
        sess_list[i].close()
コード例 #13
0
sess=tf.InteractiveSession()
agent.initialize(sess)
sess.graph.finalize()
agent.reset()
filename="/home/vaisakhs_shaj/Desktop/MODEL/tfSteps"+str(30000)+".model"
saver.restore(sess,filename)
# Create environment
observation = Client.env_create(env_id="ProstheticsEnv",token=crowdai_token)

#print([n.name for n in tf.get_default_graph().as_graph_def().node])

def my_controller(obs):
	obs=np.array(dict_to_list(obs))
	action=agent.pi(obs, apply_noise=False, compute_Q=False)[0] 
	action=action.tolist()
	return action





while True:
	[observation, reward, done, info] = Client.env_step(my_controller(observation), True)

	if done:
	    observation = Client.env_reset()
	    if not observation:
	        break

Client.submit()
コード例 #14
0
ファイル: ddpg2.py プロジェクト: dpduanpu/stanford-osrl
    def up():
        # uploading to CrowdAI

        # global _stepsize
        # _stepsize = 0.01

        apikey = open('apikey.txt').read().strip('\n')
        print('apikey is', apikey)

        import opensim as osim
        from osim.http.client import Client
        from osim.env import RunEnv

        # Settings
        remote_base = "http://grader.crowdai.org:1729"
        crowdai_token = apikey

        client = Client(remote_base)
        ob_log = ''  # string to log observations

        # Create environment
        observation = client.env_create(crowdai_token)
        # old_observation = None
        stepno = 0
        epino = 0
        total_reward = 0
        old_observation = None

        def obg(plain_obs):
            nonlocal old_observation, stepno, ob_log

            # log csv observation into string
            ob_log += ','.join([str(i) for i in plain_obs]) + '\n'

            processed_observation, old_observation = go(plain_obs,
                                                        old_observation,
                                                        step=stepno)
            return np.array(processed_observation)

        print('environment created! running...')
        # Run a single step
        while True:
            proc_observation = obg(observation)

            [observation, reward, done, info] = client.env_step(
                [float(i) for i in list(agent.act(proc_observation)[0])], True)
            stepno += 1
            total_reward += reward
            print('step', stepno, 'total reward', total_reward)
            # print(observation)
            if done:
                observation = client.env_reset()
                old_observation = None

                print('>>>>>>>episode', epino, ' DONE after', stepno,
                      'got_reward', total_reward)
                total_reward = 0
                stepno = 0
                epino += 1

                if not observation:
                    break

        print('submitting...')
        client.submit()

        print('saving to file...')
        with open('sublog.csv', 'w') as f:
            f.write(ob_log)
コード例 #15
0
ファイル: ddpg2.py プロジェクト: paulgowdy/NIPS18_Prosthetics
    def up():

        # uploading to CrowdAI

        # global _stepsize
        # _stepsize = 0.01

        apikey = open('apikey.txt').read().strip('\n')
        print('apikey is', apikey)

        import opensim as osim
        from osim.http.client import Client
        from osim.env import ProstheticsEnv as RunEnv

        # Settings
        remote_base = "http://grader.crowdai.org:1729"
        crowdai_token = apikey

        client = Client(remote_base)
        ob_log = ''  # string to log observations

        # Create environment
        observation = client.env_create(crowdai_token, env_id="ProstheticsEnv")

        #print('pg test 717:', observation)

        #observation = obs_dict_to_list(observation)

        #print('pg test 734:', observation)

        # old_observation = None
        stepno = 0
        epino = 0
        total_reward = 0
        old_observation = None
        '''
        def obg(plain_obs):
            nonlocal old_observation, stepno, ob_log

            # log csv observation into string
            ob_log += ','.join([str(i) for i in plain_obs]) + '\n'

            processed_observation, old_observation = go(plain_obs, old_observation, step=stepno)
            return np.array(processed_observation)
        '''
        def obg(plain_obs):
            # observation generator
            # derivatives of observations extracted here.
            #print('pg multi.py 21, plain_obs:', len(plain_obs))

            #processed_observation, self.old_observation = go(plain_obs, self.old_observation, step=self.stepcount)

            observation = plain_obs
            obs = []

            obs.extend(observation['misc']['mass_center_pos'])  # x, y, z
            obs.extend(observation['misc']['mass_center_vel'])  # x, y, z
            obs.extend(observation['misc']['mass_center_acc'])  # x, y, z

            # joint body, positions and vels relative to pelvis

            # Absolute Joint Positions
            obs.extend(observation['joint_pos']['ground_pelvis'])

            obs.extend(observation['joint_pos']['hip_r'])
            obs.extend(observation['joint_pos']['knee_r'])
            obs.extend(observation['joint_pos']['ankle_r'])

            obs.extend(observation['joint_pos']['hip_l'])
            obs.extend(observation['joint_pos']['knee_l'])
            obs.extend(observation['joint_pos']['ankle_l'])
            '''

            # Relative Joint Positions
            #print(observation['joint_pos']['ground_pelvis'])
            obs.extend(observation['joint_pos']['ground_pelvis']) # 6 elements

            #print(rel_to_A(observation['joint_pos']['hip_r'], observation['body_pos']['pelvis']))
            obs.extend(rel_to_A(observation['joint_pos']['hip_r'], observation['body_pos']['pelvis'])) # 3e
            obs.extend(rel_to_A(observation['joint_pos']['knee_r'], observation['body_pos']['pelvis'])) # 1e
            obs.extend(rel_to_A(observation['joint_pos']['ankle_r'], observation['body_pos']['pelvis'])) # 1e

            obs.extend(rel_to_A(observation['joint_pos']['hip_l'], observation['body_pos']['pelvis'])) # 3e
            obs.extend(rel_to_A(observation['joint_pos']['knee_l'], observation['body_pos']['pelvis'])) # 1e
            obs.extend(rel_to_A(observation['joint_pos']['ankle_l'], observation['body_pos']['pelvis'])) # 1e
            '''

            # Absolute Joint Vel

            obs.extend(observation['joint_vel']['ground_pelvis'])

            obs.extend(observation['joint_vel']['hip_r'])
            obs.extend(observation['joint_vel']['knee_r'])
            obs.extend(observation['joint_vel']['ankle_r'])

            obs.extend(observation['joint_vel']['hip_l'])
            obs.extend(observation['joint_vel']['knee_l'])
            obs.extend(observation['joint_vel']['ankle_l'])

            # Absolute Joint Acc

            obs.extend(observation['joint_acc']['ground_pelvis'])

            obs.extend(observation['joint_acc']['hip_r'])
            obs.extend(observation['joint_acc']['knee_r'])
            obs.extend(observation['joint_acc']['ankle_r'])

            obs.extend(observation['joint_acc']['hip_l'])
            obs.extend(observation['joint_acc']['knee_l'])
            obs.extend(observation['joint_acc']['ankle_l'])

            b = [
                'body_pos', 'body_vel', 'body_acc', 'body_pos_rot',
                'body_vel_rot', 'body_acc_rot'
            ]
            parts = [
                'pelvis', 'femur_r', 'pros_tibia_r', 'pros_foot_r', 'femur_l',
                'tibia_l', 'talus_l', 'calcn_l', 'toes_l', 'torso', 'head'
            ]

            for i in b:

                for j in parts:

                    obs.extend(observation[i][j])

            forces_subkeys = observation['forces'].keys()

            for k in forces_subkeys:

                obs.extend(observation['forces'][k])

            #print('pg multi.py 25, proc_obs:', len(processed_observation))

            return np.array(obs)

        #print(observation)

    #print(obg(observation).shape)

        print('environment created! running...')
        # Run a single step
        while True:
            proc_observation = obg(observation)

            a = [float(i) for i in list(agent.act(proc_observation)[0])]
            #print(a)

            [observation, reward, done, info] = client.env_step(a, True)
            stepno += 1
            total_reward += reward
            print('step', stepno, 'total reward', total_reward)
            # print(observation)
            if done:
                observation = client.env_reset()
                old_observation = None

                print('>>>>>>>episode', epino, ' DONE after', stepno,
                      'got_reward', total_reward)
                total_reward = 0
                stepno = 0
                epino += 1

                if not observation:
                    break

        print('submitting...')
        client.submit()

        print('saving to file...')
        with open('sublog.csv', 'w') as f:
            f.write(ob_log)
コード例 #16
0
class SubmitEnv:
    def __init__(self):
        from osim.http.client import Client
        remote_base = "http://grader.crowdai.org:1729"
        self.crowdai_token = "e47cb9f7fd533dc036dbd5d65d0d68c3"
        self.client = Client(remote_base)
        self.first_reset = True
        self.action_space = Box(low=0, high=1, shape=[19])
        self.observation_space = Box(low=-3, high=3, shape=[224])
        self.episodic_length = 0
        self.score = 0.0

        self.reward_range = None
        self.metadata = None

    def reset(self):
        self.episodic_length = 0
        self.score = 0
        if self.first_reset:
            self.first_reset = False
            return self.get_observation(
                self.client.env_create(self.crowdai_token,
                                       env_id="ProstheticsEnv"))
        else:
            obs = self.client.env_reset()
            if obs is None:
                self.client.submit()
                print('SUBMITTED')
                import sys
                sys.exit(0)
            return self.get_observation(obs)

    def step(self, action):
        [obs, rew, done, info] = self.client.env_step(action.tolist(), True)
        self.episodic_length += 1
        self.score += rew
        pelvis_vx = obs['body_vel']['pelvis'][0]
        print(
            f'timestamp={self.episodic_length:3d} score={self.score:5.2f} velocity={pelvis_vx:3.2f}'
        )
        import sys
        sys.stdout.flush()
        return self.get_observation(obs), rew, done, info

    def close(self):
        pass

    def get_observation(self, state_desc):
        res = []
        pelvis = None

        for body_part in [
                "pelvis", "head", "torso", "toes_l", "talus_l", "pros_foot_r",
                "pros_tibia_r"
        ]:
            cur = []
            cur += state_desc["body_pos"][body_part]
            cur += state_desc["body_vel"][body_part]
            cur += state_desc["body_acc"][body_part]
            cur += state_desc["body_pos_rot"][body_part]
            cur += state_desc["body_vel_rot"][body_part]
            cur += state_desc["body_acc_rot"][body_part]
            if body_part == "pelvis":
                pelvis = cur
                res += cur[1:]  # make sense, pelvis.x is not important
            else:
                cur[0] -= pelvis[0]
                cur[2] -= pelvis[2]  # relative position work for x / z axis
                res += cur

        for joint in [
                "ankle_l", "ankle_r", "back", "hip_l", "hip_r", "knee_l",
                "knee_r"
        ]:
            res += state_desc["joint_pos"][joint]
            res += state_desc["joint_vel"][joint]
            res += state_desc["joint_acc"][joint]

        for muscle in sorted(state_desc["muscles"].keys()):
            res += [state_desc["muscles"][muscle]["activation"]]
            res += [state_desc["muscles"][muscle]["fiber_length"]]
            res += [state_desc["muscles"][muscle]["fiber_velocity"]]

        cm_pos = state_desc["misc"][
            "mass_center_pos"]  # relative x / z axis center of mass position
        cm_pos[0] -= pelvis[0]
        cm_pos[2] -= pelvis[0]
        res = res + cm_pos + state_desc["misc"][
            "mass_center_vel"] + state_desc["misc"]["mass_center_acc"]

        return res