class WrapperClient(): def __init__(self, remote_base): self.client = Client(remote_base) self.ob_0 = np.array(41) self.ob_1 = np.zeros(14) # self.ob_2 = np.zeros(41) def env_create(self, token): self.ob_0 = self.preprocess( np.array( self.client.env_create("7be35dd3a64deac826068d37c2258847"))) # return np.concatenate((self.ob_0,self.ob_1,self.ob_2),axis=0) return np.concatenate((self.ob_0, self.ob_1), axis=0) def env_reset(self): ob = self.client.env_reset() if ob is None: return None self.ob_0 = self.preprocess(np.array(ob)) self.ob_0[1] = 0 self.ob_1 = np.zeros(14) # self.ob_2 = np.zeros(41) # return np.concatenate((self.ob_0,self.ob_1,self.ob_2),axis=0) return np.concatenate((self.ob_0, self.ob_1), axis=0) def env_step(self, action): res = self.client.env_step(action) ob_0_post = self.ob_0 # ob_1_post = self.ob_1 # ob_2_post = self.ob_2 self.ob_0 = self.preprocess(np.array(res[0])) self.ob_0[1] = 0 self.ob_1 = (self.ob_0[22:36] - ob_0_post[22:36]) / 0.01 # self.ob_2 = self.ob_1 - ob_1_post # res[0] = np.concatenate((self.ob_0,self.ob_1,self.ob_2),axis=0) return np.concatenate((self.ob_0, self.ob_1), axis=0) return res def submit(self): self.client.submit() def preprocess(self, v): n = [1, 18, 22, 24, 26, 28, 30, 32, 34] m = [19, 23, 25, 27, 29, 31, 33, 35] for i in n: v[i] = v[i] - v[1] for i in m: v[i] = v[i] - v[2] v[20] = v[20] - v[4] v[21] = v[21] - v[5] return v
def submit(): from osim.http.client import Client remote_base = "http://grader.crowdai.org:1729" crowdai_token = "01342e360022c2def5c2cc04c5843381" Client = Client(remote_base) observation = Client.env_create(env_id="ProstheticsEnv", token=crowdai_token) while True: k = np.reshape(np.array(observation), newshape=(-1, len(observation))) ac_ind = sess.run(sy_sampled_ac, feed_dict={sy_ob_no: k}) ac_ind = np.reshape(ac_ind, newshape=(ac_ind.shape[1])) action = bins[ac_ind] [observation, reward, done, info] = Client.env_step(action, True) if done: observation = Client.env_reset() if not observation: break Client.submit()
def submit(args): print('start submitting') remote_base = 'http://grader.crowdai.org:1733' client = Client(remote_base) ddpg = DDPG() ddpg.load_model(args.model, load_memory=False) state = client.env_create(TOKEN) fg = FeatureGenerator() state = fg.gen(state) step = 0 ep_reward = 0 while True: print('selecting action ...', end=' ') action = ddpg.select_action(list(state)) print('client.env_step ...') next_state, reward, done, info = client.env_step(action.tolist()) next_state = fg.gen(next_state) print('step: {0:03d}, ep_reward: {1:02.08f}'.format(step, ep_reward)) state = next_state ep_reward += reward step += 1 if done: print('done') state = client.env_reset() if not state: break step = 0 ep_reward = 0 fg = FeatureGenerator() state = fg.gen(state) client.submit()
def submit(self): remote_base = 'http://grader.crowdai.org:1729' env = RunEnv(visualize=self.visualize) client = Client(remote_base) # Create environment observation = client.env_create(self.submit_token) # Run a single step # # The grader runs 3 simulations of at most 1000 steps each. We stop after the last one while True: [observation, reward, done, info] = client.env_step(self.agent.forward(observation)) if done: observation = client.env_reset() if not observation: break client.submit()
class RemoteSubmit(object): def __init__(self, token, agent_type): self.token = token self.remote_base = "http://grader.crowdai.org:1729" self.client = Client(self.remote_base) # TODO:: Add agent selector if agent_type == 'random': self.agent = RandomAgent() elif agent_type == 'fixed-action': self.agent = FixedActionAgent() elif agent_type == 'a3c': self.agent = A3CAgent(num_envs=2, num_steps=50, max_frames=1000) else: status = { 'status': 'ERROR', 'error_msg': 'Not supported agent-type' } raise Exception(status) def run(self): try: status = self.agent.run() observation = self.client.env_create(self.token, env_id="ProstheticsEnv") while True: action = self.agent.get_action(observation) [observation, reward, done, info] = self.client.env_step(action, False) if done: observation = self.client.env_reset() if not observation: break self.client.submit() except Exception as e: status = {'status': 'ERROR', 'error_msg': e} raise Exception(status)
def main(): # Settings remote_base = 'http://grader.crowdai.org' # Command line parameters parser = argparse.ArgumentParser( description='Submit the result to crowdAI') parser.add_argument("hdf") parser.add_argument('--token', dest='token', action='store', required=True) args = parser.parse_args() hdf = h5py.File(args.hdf, 'r') env = GaitEnv(visualize=False) agent = cPickle.loads(hdf['agent_snapshots']['0995'].value) agent.stochastic = False client = Client(remote_base) # Create environment observation = client.env_create(args.token) total_reward = 0 # Run a single step for i in range(501): ob = agent.obfilt(observation) a, _info = agent.act(ob) [observation, reward, done, info] = client.env_step(a.tolist(), True) print i, reward, done total_reward += reward if done: break print 'TOTAL REWARD: ', total_reward raw_input('press ENTER to submit') client.submit()
# if not observation: # break # client.submit() # If TEST and no TOKEN, run some test experiments if args.token: agent.load_weights(args.model) remote_base = 'http://grader.crowdai.org:1729' client = Client(remote_base) # Create environment observation = client.env_create(args.token) # Run a single step # The grader runs 3 simulations of at most 1000 steps each. We stop after the last one while True: v = np.array(observation).reshape((env.observation_space.shape[0])) action = agent.forward(v) [observation, reward, done, info] = client.env_step(action.tolist()) observation = process_observation(observation) total_reward += reward if done: observation = client.env_reset() if not observation: break client.submit() # Finally, evaluate our algorithm for 1 episode. #
def upload(frameskip = 1): from osim.http.client import Client apikey = open('apikey.txt').read().strip('\n') print('Using apikey:', apikey) remote_base = "http://grader.crowdai.org:1729" crowdai_token = apikey print('connecting...') client = Client(remote_base) observation_d = client.env_create(crowdai_token, env_id="ProstheticsEnv") #observation = process_obs_dict(observation_d) print('environment created! running...') #obs_collect = [] #a_collect = [] stepno= 0 epino = 0 total_reward = 0 while True: #a = AGENT OUTPUT observation = process_obs_dict(observation_d) a, q = agent.act(observation) a = [float(i) for i in list(a)] #obs_collect.append(observation) #a_collect.append(a) for _ in range(frameskip): [observation_d, reward, done, info] = client.env_step(a, True) stepno += 1 total_reward += reward print('step',stepno,'total reward',total_reward) if done: ''' print('') print('saving...') print('') with open('upload_saves/upload_a_collect_' + str(epino) + '.p', 'wb') as f: pickle.dump(a_collect, f) with open('upload_saves/upload_obs_collect_' + str(epino) + '.p', 'wb') as f: pickle.dump(obs_collect, f) ''' observation_d = client.env_reset() print('>> episode',epino,' Done after',stepno,'got reward:',total_reward) print('') total_reward = 0 stepno = 0 epino += 1 break if not observation_d: break print('Done! Submitting...') client.submit()
def main(env_name, num_episodes, gamma, lam, kl_targ, batch_size, nprocs, policy_hid_list, valfunc_hid_list, gpu_pct, restore_path, animate, submit): """ Main training loop Args: env_name: OpenAI Gym environment name, e.g. 'Hopper-v1' num_episodes: maximum number of episodes to run gamma: reward discount factor (float) lam: lambda from Generalized Advantage Estimate kl_targ: D_KL target for policy update [D_KL(pi_old || pi_new) batch_size: number of episodes per policy training batch """ # killer = GracefulKiller() env, obs_dim, act_dim = init_osim(animate) env.seed(111 + mpi_util.rank) mpi_util.set_global_seeds(111 + mpi_util.rank) obs_dim += 1 # add 1 to obs dimension for time step feature (see run_episode()) now = datetime.utcnow().strftime( "%b-%d_%H:%M:%S") # create unique directories if mpi_util.rank == 0: #aigym_path = os.path.join('/tmp', env_name, now) #env = wrappers.Monitor(env, aigym_path, force=True) logger = Logger(logname=env_name, now=now) episode = 0 checkpoint = Checkpoint("saves", now) # restore from checkpoint? if restore_path: (policy, val_func, scaler, episode, obs_dim, act_dim, kl_targ) = checkpoint.restore(restore_path) else: policy = Policy(obs_dim, act_dim, kl_targ) val_func = NNValueFunction(obs_dim) scaler = Scaler(obs_dim) if mpi_util.rank == 0: # run a few episodes (on node 0) of untrained policy to initialize scaler: trajectories = run_policy(env, policy, scaler, episodes=5) unscaled = np.concatenate( [t['unscaled_obs'] for t in trajectories]) scaler.update( unscaled) # update running statistics for scaling observations # broadcast policy weights, scaler, val_func (policy, scaler, val_func) = mpi_util.broadcast_policy_scaler_val( policy, scaler, val_func) if mpi_util.rank == 0: checkpoint.save(policy, val_func, scaler, episode) if animate: observes, actions, rewards, unscaled_obs = run_episode(env, policy, scaler, animate=animate) exit(0) if submit: # Settings #remote_base = 'http://grader.crowdai.org:1729' remote_base = 'http://grader.crowdai.org:1730' token = 'a83412a94593cae3a491f3ee28ff44e1' client = Client(remote_base) # Create environment observation = client.env_create(token) step = 0.0 observes, actions, rewards, unscaled_obs = [], [], [], [] scale, offset = scaler.get() scale[-1] = 1.0 # don't scale time step feature offset[-1] = 0.0 # don't offset time step feature # Run a single step # # The grader runs 3 simulations of at most 1000 steps each. We stop after the last one while True: obs = np.array(observation).astype(np.float32).reshape((1, -1)) print("OBSERVATION TYPE:", type(obs), obs.shape) print(obs) obs = np.append(obs, [[step]], axis=1) # add time step feature unscaled_obs.append(obs) obs = (obs - offset) * scale # center and scale observations observes.append(obs) action = policy.sample(obs).astype(np.float32).reshape((-1, 1)) print("ACTION TYPE:", type(action), action.shape) print(action) actions.append(action) [observation, reward, done, info] = client.env_step(action.tolist()) print("step:", step, "reward:", reward) if not isinstance(reward, float): reward = np.asscalar(reward) rewards.append(reward) step += 1e-3 # increment time step feature if done: print( "================================== RESTARTING =================================" ) observation = client.env_reset() step = 0.0 observes, actions, rewards, unscaled_obs = [], [], [], [] scale, offset = scaler.get() scale[-1] = 1.0 # don't scale time step feature offset[-1] = 0.0 # don't offset time step feature if not observation: break client.submit() exit(0) ###### worker_batch_size = int(batch_size / mpi_util.nworkers) # HACK if (worker_batch_size * mpi_util.nworkers != batch_size): print("batch_size:", batch_size, " is not divisible by nworkers:", mpi_util.nworkers) exit(1) batch = 0 while episode < num_episodes: if mpi_util.rank == 0 and batch > 0 and batch % 10 == 0: checkpoint.save(policy, val_func, scaler, episode) batch = batch + 1 trajectories = run_policy(env, policy, scaler, episodes=worker_batch_size) trajectories = mpi_util.gather_trajectories(trajectories) if mpi_util.rank == 0: # concatentate trajectories into one list trajectories = list(itertools.chain.from_iterable(trajectories)) print("did a batch of ", len(trajectories), " trajectories") print([t['rewards'].sum() for t in trajectories]) episode += len(trajectories) add_value(trajectories, val_func) # add estimated values to episodes add_disc_sum_rew(trajectories, gamma) # calculated discounted sum of Rs add_gae(trajectories, gamma, lam) # calculate advantage # concatenate all episodes into single NumPy arrays observes, actions, advantages, disc_sum_rew = build_train_set( trajectories) # add various stats to training log: logger.log({ '_MeanReward': np.mean([t['rewards'].sum() for t in trajectories]), 'Steps': np.sum([t['observes'].shape[0] for t in trajectories]) }) log_batch_stats(observes, actions, advantages, disc_sum_rew, logger, episode) policy.update(observes, actions, advantages, logger) # update policy val_func.fit(observes, disc_sum_rew, logger) # update value function unscaled = np.concatenate( [t['unscaled_obs'] for t in trajectories]) scaler.update( unscaled) # update running statistics for scaling observations logger.write( display=True) # write logger results to file and stdout # if mpi_util.rank == 0 and killer.kill_now: # if input('Terminate training (y/[n])? ') == 'y': # break # killer.kill_now = False # broadcast policy weights, scaler, val_func (policy, scaler, val_func) = mpi_util.broadcast_policy_scaler_val( policy, scaler, val_func) if mpi_util.rank == 0: logger.close() policy.close_sess() if mpi_util.rank == 0: val_func.close_sess()
import numpy as np import argparse # Settings remote_base = 'http://grader.crowdai.org:1729' # Command line parameters parser = argparse.ArgumentParser(description='Submit the result to crowdAI') parser.add_argument('--token', dest='token', action='store', required=True) args = parser.parse_args() env = RunEnv(visualize=False) client = Client(remote_base) # Create environment observation = client.env_create(args.token) # Run a single step # # The grader runs 3 simulations of at most 1000 steps each. We stop after the last one while True: v = np.array(observation).reshape((-1,1,env.observation_space.shape[0])) [observation, reward, done, info] = client.env_step(env.action_space.sample().tolist()) print(observation) if done: observation = client.env_reset() if not observation: break client.submit()
nb_actions = env.action_space.shape[0] # Load the acton actor = Sequential() actor.add(Flatten(input_shape=(1,) + env.observation_space.shape)) actor.add(Dense(32)) actor.add(Activation('relu')) actor.add(Dense(32)) actor.add(Activation('relu')) actor.add(Dense(32)) actor.add(Activation('relu')) actor.add(Dense(nb_actions)) actor.add(Activation('sigmoid')) actor.load_weights(args.model) client = Client(remote_base) # Create environment observation = client.env_create(args.token) # Run a single step for i in range(501): v = np.array(observation).reshape((-1,1,env.observation_space.shape[0])) [observation, reward, done, info] = client.env_step(args.token, actor.predict(v)[0].tolist(), True) if done: break client.submit(args.token)
def main(): args = parse_args() logger.configure() gamma = 0.99 tau = 0.01 normalize_returns = False normalize_observations = True batch_size = 64 action_noise = None stddev = 0.2 param_noise = AdaptiveParamNoiseSpec(initial_stddev=float(stddev), desired_action_stddev=float(stddev)) critic_l2_reg = 1e-2 actor_lr = 1e-4 critic_lr = 1e-3 popart = False clip_norm = None reward_scale = 1. env = prosthetics_env.Wrapper(osim_env.ProstheticsEnv(visualize=False), frameskip=4, reward_shaping=True, reward_shaping_x=1, feature_embellishment=True, relative_x_pos=True, relative_z_pos=True) top_model_dir = 'top-models/' # create tf sessions and graphs sess_list = [] graph_list = [] for i in range(len(args.model_files)): graph_list.append(tf.Graph()) sess_list.append(tf.Session(graph=graph_list[i])) ddpg_agents = [] for i in range(len(args.model_files)): model_name = args.model_files[i] sess = sess_list[i] graph = graph_list[i] l_size = args.layer_sizes[i] with sess.as_default(): #with U.make_session(num_cpu=1, graph=g) as sess: with graph.as_default(): #tf.global_variables_initializer() # restore agents from model files and store in ddpg_agents print("Restoring from..." + model_name) # Configure components. memory = Memory(limit=int(1e6), action_shape=env.action_space.shape, observation_shape=env.observation_space.shape) critic = Critic(layer_norm=True, activation='relu', layer_sizes=[l_size, l_size]) actor = Actor(env.action_space.shape[-1], layer_norm=True, activation='relu', layer_sizes=[l_size, l_size]) agent = DDPG(actor, critic, memory, env.observation_space.shape, env.action_space.shape, gamma=gamma, tau=tau, normalize_returns=normalize_returns, normalize_observations=normalize_observations, batch_size=batch_size, action_noise=action_noise, param_noise=param_noise, critic_l2_reg=critic_l2_reg, actor_lr=actor_lr, critic_lr=critic_lr, enable_popart=popart, clip_norm=clip_norm, reward_scale=reward_scale) # restore adam state and param noise restore_model_path = top_model_dir + model_name saver = tf.train.Saver(max_to_keep=500) # restore network weights saver.restore(sess, restore_model_path) adam_optimizer_store = pickle.load(open(restore_model_path + ".pkl", "rb")) agent.actor_optimizer.m = adam_optimizer_store['actor_optimizer']['m'] agent.actor_optimizer.v = adam_optimizer_store['actor_optimizer']['v'] agent.actor_optimizer.t = adam_optimizer_store['actor_optimizer']['t'] agent.critic_optimizer.m = adam_optimizer_store['critic_optimizer']['m'] agent.critic_optimizer.v = adam_optimizer_store['critic_optimizer']['v'] agent.critic_optimizer.t = adam_optimizer_store['critic_optimizer']['t'] if 'param_noise' in adam_optimizer_store: agent.param_noise = adam_optimizer_store['param_noise'] # intialize and prepare agent session. agent.initialize(sess) #sess.graph.finalize() agent.reset() ddpg_agents.append(agent) agent = BlendedAgent(ddpg_agents, sess_list, graph_list) if args.evaluation: # setup eval env eval_env = prosthetics_env.EvaluationWrapper(osim_env.ProstheticsEnv(visualize=False), frameskip=4, reward_shaping=True, reward_shaping_x=1, feature_embellishment=True, relative_x_pos=True, relative_z_pos=True) eval_env.change_model(model=('3D').upper(), prosthetic=True, difficulty=0, seed=0) eval_env = bench.Monitor(eval_env, os.path.join(logger.get_dir(), 'gym_eval')) nb_eval_steps = 1000 # reward, mean_q, final_steps = evaluate_one_episode(eval_env, ddpg_agents, sess_list, graph_list, # nb_eval_steps=nb_eval_steps, # render=False) reward, mean_q, final_steps = evaluate_one_episode(eval_env, agent, nb_eval_steps, render=False) print("Reward: " + str(reward)) print("Mean Q: " + str(mean_q)) print("Final num steps: " + str(final_steps)) # Submit to crowdai competition. What a hack. :) # if crowdai_client is not None and crowdai_token is not None and eval_env is not None: crowdai_submit_count = 0 if args.crowdai_submit: remote_base = "http://grader.crowdai.org:1729" crowdai_client = Client(remote_base) eval_obs_dict = crowdai_client.env_create(args.crowdai_token, env_id="ProstheticsEnv") eval_obs_dict, eval_obs_projection = prosthetics_env.transform_observation( eval_obs_dict, reward_shaping=True, reward_shaping_x=1., feature_embellishment=True, relative_x_pos=True, relative_z_pos=True) while True: action, _ = agent.pi(eval_obs_projection, apply_noise=False, compute_Q=False) submit_action = prosthetics_env.openai_to_crowdai_submit_action(action) clipped_submit_action = np.clip(submit_action, 0., 1.) actions_equal = clipped_submit_action == submit_action if not np.all(actions_equal): logger.debug("crowdai_submit_count:", crowdai_submit_count) logger.debug(" openai-action:", action) logger.debug(" submit-action:", submit_action) crowdai_submit_count += 1 [eval_obs_dict, reward, done, info] = crowdai_client.env_step(clipped_submit_action.tolist(), True) # [eval_obs_dict, reward, done, info] = crowdai_client.env_step(agent.pi(eval_obs_projection, apply_noise=False, compute_Q=False), True) eval_obs_dict, eval_obs_projection = prosthetics_env.transform_observation( eval_obs_dict, reward_shaping=True, reward_shaping_x=1., feature_embellishment=True, relative_x_pos=True, relative_z_pos=True) if done: logger.debug("done: crowdai_submit_count:", crowdai_submit_count) eval_obs_dict = crowdai_client.env_reset() if not eval_obs_dict: break logger.debug("done: eval_obs_dict exists after reset") eval_obs_dict, eval_obs_projection = prosthetics_env.transform_observation( eval_obs_dict, reward_shaping=True, reward_shaping_x=1., feature_embellishment=True, relative_x_pos=True, relative_z_pos=True) crowdai_client.submit() for i in range(len(sess_list)): sess_list[i].close()
sess=tf.InteractiveSession() agent.initialize(sess) sess.graph.finalize() agent.reset() filename="/home/vaisakhs_shaj/Desktop/MODEL/tfSteps"+str(30000)+".model" saver.restore(sess,filename) # Create environment observation = Client.env_create(env_id="ProstheticsEnv",token=crowdai_token) #print([n.name for n in tf.get_default_graph().as_graph_def().node]) def my_controller(obs): obs=np.array(dict_to_list(obs)) action=agent.pi(obs, apply_noise=False, compute_Q=False)[0] action=action.tolist() return action while True: [observation, reward, done, info] = Client.env_step(my_controller(observation), True) if done: observation = Client.env_reset() if not observation: break Client.submit()
def up(): # uploading to CrowdAI # global _stepsize # _stepsize = 0.01 apikey = open('apikey.txt').read().strip('\n') print('apikey is', apikey) import opensim as osim from osim.http.client import Client from osim.env import RunEnv # Settings remote_base = "http://grader.crowdai.org:1729" crowdai_token = apikey client = Client(remote_base) ob_log = '' # string to log observations # Create environment observation = client.env_create(crowdai_token) # old_observation = None stepno = 0 epino = 0 total_reward = 0 old_observation = None def obg(plain_obs): nonlocal old_observation, stepno, ob_log # log csv observation into string ob_log += ','.join([str(i) for i in plain_obs]) + '\n' processed_observation, old_observation = go(plain_obs, old_observation, step=stepno) return np.array(processed_observation) print('environment created! running...') # Run a single step while True: proc_observation = obg(observation) [observation, reward, done, info] = client.env_step( [float(i) for i in list(agent.act(proc_observation)[0])], True) stepno += 1 total_reward += reward print('step', stepno, 'total reward', total_reward) # print(observation) if done: observation = client.env_reset() old_observation = None print('>>>>>>>episode', epino, ' DONE after', stepno, 'got_reward', total_reward) total_reward = 0 stepno = 0 epino += 1 if not observation: break print('submitting...') client.submit() print('saving to file...') with open('sublog.csv', 'w') as f: f.write(ob_log)
def up(): # uploading to CrowdAI # global _stepsize # _stepsize = 0.01 apikey = open('apikey.txt').read().strip('\n') print('apikey is', apikey) import opensim as osim from osim.http.client import Client from osim.env import ProstheticsEnv as RunEnv # Settings remote_base = "http://grader.crowdai.org:1729" crowdai_token = apikey client = Client(remote_base) ob_log = '' # string to log observations # Create environment observation = client.env_create(crowdai_token, env_id="ProstheticsEnv") #print('pg test 717:', observation) #observation = obs_dict_to_list(observation) #print('pg test 734:', observation) # old_observation = None stepno = 0 epino = 0 total_reward = 0 old_observation = None ''' def obg(plain_obs): nonlocal old_observation, stepno, ob_log # log csv observation into string ob_log += ','.join([str(i) for i in plain_obs]) + '\n' processed_observation, old_observation = go(plain_obs, old_observation, step=stepno) return np.array(processed_observation) ''' def obg(plain_obs): # observation generator # derivatives of observations extracted here. #print('pg multi.py 21, plain_obs:', len(plain_obs)) #processed_observation, self.old_observation = go(plain_obs, self.old_observation, step=self.stepcount) observation = plain_obs obs = [] obs.extend(observation['misc']['mass_center_pos']) # x, y, z obs.extend(observation['misc']['mass_center_vel']) # x, y, z obs.extend(observation['misc']['mass_center_acc']) # x, y, z # joint body, positions and vels relative to pelvis # Absolute Joint Positions obs.extend(observation['joint_pos']['ground_pelvis']) obs.extend(observation['joint_pos']['hip_r']) obs.extend(observation['joint_pos']['knee_r']) obs.extend(observation['joint_pos']['ankle_r']) obs.extend(observation['joint_pos']['hip_l']) obs.extend(observation['joint_pos']['knee_l']) obs.extend(observation['joint_pos']['ankle_l']) ''' # Relative Joint Positions #print(observation['joint_pos']['ground_pelvis']) obs.extend(observation['joint_pos']['ground_pelvis']) # 6 elements #print(rel_to_A(observation['joint_pos']['hip_r'], observation['body_pos']['pelvis'])) obs.extend(rel_to_A(observation['joint_pos']['hip_r'], observation['body_pos']['pelvis'])) # 3e obs.extend(rel_to_A(observation['joint_pos']['knee_r'], observation['body_pos']['pelvis'])) # 1e obs.extend(rel_to_A(observation['joint_pos']['ankle_r'], observation['body_pos']['pelvis'])) # 1e obs.extend(rel_to_A(observation['joint_pos']['hip_l'], observation['body_pos']['pelvis'])) # 3e obs.extend(rel_to_A(observation['joint_pos']['knee_l'], observation['body_pos']['pelvis'])) # 1e obs.extend(rel_to_A(observation['joint_pos']['ankle_l'], observation['body_pos']['pelvis'])) # 1e ''' # Absolute Joint Vel obs.extend(observation['joint_vel']['ground_pelvis']) obs.extend(observation['joint_vel']['hip_r']) obs.extend(observation['joint_vel']['knee_r']) obs.extend(observation['joint_vel']['ankle_r']) obs.extend(observation['joint_vel']['hip_l']) obs.extend(observation['joint_vel']['knee_l']) obs.extend(observation['joint_vel']['ankle_l']) # Absolute Joint Acc obs.extend(observation['joint_acc']['ground_pelvis']) obs.extend(observation['joint_acc']['hip_r']) obs.extend(observation['joint_acc']['knee_r']) obs.extend(observation['joint_acc']['ankle_r']) obs.extend(observation['joint_acc']['hip_l']) obs.extend(observation['joint_acc']['knee_l']) obs.extend(observation['joint_acc']['ankle_l']) b = [ 'body_pos', 'body_vel', 'body_acc', 'body_pos_rot', 'body_vel_rot', 'body_acc_rot' ] parts = [ 'pelvis', 'femur_r', 'pros_tibia_r', 'pros_foot_r', 'femur_l', 'tibia_l', 'talus_l', 'calcn_l', 'toes_l', 'torso', 'head' ] for i in b: for j in parts: obs.extend(observation[i][j]) forces_subkeys = observation['forces'].keys() for k in forces_subkeys: obs.extend(observation['forces'][k]) #print('pg multi.py 25, proc_obs:', len(processed_observation)) return np.array(obs) #print(observation) #print(obg(observation).shape) print('environment created! running...') # Run a single step while True: proc_observation = obg(observation) a = [float(i) for i in list(agent.act(proc_observation)[0])] #print(a) [observation, reward, done, info] = client.env_step(a, True) stepno += 1 total_reward += reward print('step', stepno, 'total reward', total_reward) # print(observation) if done: observation = client.env_reset() old_observation = None print('>>>>>>>episode', epino, ' DONE after', stepno, 'got_reward', total_reward) total_reward = 0 stepno = 0 epino += 1 if not observation: break print('submitting...') client.submit() print('saving to file...') with open('sublog.csv', 'w') as f: f.write(ob_log)
class SubmitEnv: def __init__(self): from osim.http.client import Client remote_base = "http://grader.crowdai.org:1729" self.crowdai_token = "e47cb9f7fd533dc036dbd5d65d0d68c3" self.client = Client(remote_base) self.first_reset = True self.action_space = Box(low=0, high=1, shape=[19]) self.observation_space = Box(low=-3, high=3, shape=[224]) self.episodic_length = 0 self.score = 0.0 self.reward_range = None self.metadata = None def reset(self): self.episodic_length = 0 self.score = 0 if self.first_reset: self.first_reset = False return self.get_observation( self.client.env_create(self.crowdai_token, env_id="ProstheticsEnv")) else: obs = self.client.env_reset() if obs is None: self.client.submit() print('SUBMITTED') import sys sys.exit(0) return self.get_observation(obs) def step(self, action): [obs, rew, done, info] = self.client.env_step(action.tolist(), True) self.episodic_length += 1 self.score += rew pelvis_vx = obs['body_vel']['pelvis'][0] print( f'timestamp={self.episodic_length:3d} score={self.score:5.2f} velocity={pelvis_vx:3.2f}' ) import sys sys.stdout.flush() return self.get_observation(obs), rew, done, info def close(self): pass def get_observation(self, state_desc): res = [] pelvis = None for body_part in [ "pelvis", "head", "torso", "toes_l", "talus_l", "pros_foot_r", "pros_tibia_r" ]: cur = [] cur += state_desc["body_pos"][body_part] cur += state_desc["body_vel"][body_part] cur += state_desc["body_acc"][body_part] cur += state_desc["body_pos_rot"][body_part] cur += state_desc["body_vel_rot"][body_part] cur += state_desc["body_acc_rot"][body_part] if body_part == "pelvis": pelvis = cur res += cur[1:] # make sense, pelvis.x is not important else: cur[0] -= pelvis[0] cur[2] -= pelvis[2] # relative position work for x / z axis res += cur for joint in [ "ankle_l", "ankle_r", "back", "hip_l", "hip_r", "knee_l", "knee_r" ]: res += state_desc["joint_pos"][joint] res += state_desc["joint_vel"][joint] res += state_desc["joint_acc"][joint] for muscle in sorted(state_desc["muscles"].keys()): res += [state_desc["muscles"][muscle]["activation"]] res += [state_desc["muscles"][muscle]["fiber_length"]] res += [state_desc["muscles"][muscle]["fiber_velocity"]] cm_pos = state_desc["misc"][ "mass_center_pos"] # relative x / z axis center of mass position cm_pos[0] -= pelvis[0] cm_pos[2] -= pelvis[0] res = res + cm_pos + state_desc["misc"][ "mass_center_vel"] + state_desc["misc"]["mass_center_acc"] return res