from baselines.ddpg.ddpg import DDPG import baselines.common.tf_util as U from baselines import logger import numpy as np import tensorflow as tf from mpi4py import MPI from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv from osim.env import ProstheticsEnv import sys # Settings remote_base = "http://grader.crowdai.org:1729" crowdai_token = "8592db9b224e4293d437776321861a32" client = Client(remote_base) critic = Critic(layer_norm=layer_norm) actor = Actor(nb_actions, layer_norm=layer_norm) memory=[] agent = DDPG(actor, critic, env.observation_space.shape, env.action_space.shape) # Create environment observation = client.env_create(crowdai_token) # IMPLEMENTATION OF YOUR CONTROLLER # my_controller = ... (for example the one trained in keras_rl) def my_controller(): with U.make_session() as sess: # Prepare everything. agent.initialize(sess) sess.graph.finalize()
help='CGP individual filename') parser.add_argument('--live', action='store_true', default=False) parser.add_argument('--visual', action='store_true', default=False) parser.add_argument('--seed', type=int, default=0, help='random seed for evaluation') args = parser.parse_args() # Settings remote_base = 'http://osim-rl-grader.aicrowd.com/' cgp_id = args.ind # Create environment if args.live: with open(args.token, 'r') as f: aicrowd_token = f.read().strip() client = Client(remote_base) observation = client.env_create(aicrowd_token, env_id='L2M2019Env') else: env = L2M2019Env(visualize=args.visual) observation = env.reset(seed=args.seed) # CGP controller library = build_funcLib() ind = CGP.load_from_file(cgp_id, library) l2meval = L2MEvaluator(1e8, 1) i = 0 j = 0 r_total = 0.0 while True: inputs = l2meval.get_inputs(observation)
import opensim as osim from osim.http.client import Client import random import numpy as np import math import pickle remote_base = "http://grader.crowdai.org:1729" crowdai_token = "d140e3d9a1bf0e4aa9b4efba4d862460" client = Client(remote_base) observation = client.env_create(crowdai_token, env_id="ProstheticsEnv") def sigmoid(x): return 1 / (1 + np.exp(-x)) def relu(x): return np.multiply(x, (x > 0)) with open('normalization.pkl', 'rb') as f: maximum, minimum, srednia = pickle.load(f) with open('evolution.pkl', 'rb') as f: baza = pickle.load(f) def for_prop_ac(theta, x): #x to macierz ileś X 1 siec = [] x = np.matrix(x).transpose() for i in range(len(theta)):
help='render the environment locally') args = parser.parse_args() if args.agent not in globals(): raise ValueError('[run] Agent {} not found.'.format(args.agent)) SpecifiedAgent = globals()[args.agent] if args.submit and args.nb_steps: raise ValueError('[run] Cannot train and submit agent at same time.') if args.submit and args.visualize: raise ValueError('[run] Cannot visualize agent while submitting.') if args.submit: # Submit agent client = Client(remote_base) client.env_create(crowdai_token, env_id='ProstheticsEnv') client_env = ClientToEnv(client) client_env = DictToListFull(client_env) client_env = JSONable(client_env) agent = SpecifiedAgent(client_env.observation_space, client_env.action_space) agent.submit(client_env) elif args.nb_steps: # Train agent locally env = ProstheticsEnv(visualize=args.visualize) env = ForceDictObservation(env) env = DictToListFull(env) env = JSONable(env) agent = SpecifiedAgent(env.observation_space, env.action_space) # obs = env.reset()
import opensim as osim from osim.http.client import Client from osim.env import * import numpy as np import argparse # Settings remote_base = 'http://grader.crowdai.org:1729' # Command line parameters parser = argparse.ArgumentParser(description='Submit the result to crowdAI') parser.add_argument('--token', dest='token', action='store', required=True) args = parser.parse_args() env = RunEnv(visualize=False) client = Client(remote_base) # Create environment observation = client.env_create(args.token) # Run a single step # # The grader runs 3 simulations of 500 steps each, Hence we attempt to # run a max of 500x3=1500 steps for i in range(1500): v = np.array(observation).reshape((-1, 1, env.observation_space.shape[0])) [observation, reward, done, info] = client.env_step(env.action_space.sample().tolist()) print(observation) if done: observation = client.env_reset()
import opensim as osim from osim.http.client import Client from osim.env import ProstheticsEnv import numpy as np import argparse # Settings remote_base = 'http://grader.crowdai.org:1729' token = 'c4cda3976f22b8f468b78a33e47bb432' client = Client(remote_base) # Create environment observation = client.env_create(token, env_id="ProstheticsEnv") env = ProstheticsEnv(visualize=False) # IMPLEMENTATION OF YOUR CONTROLLER # my_controller = ... (for example the one trained in keras_rl) # Run a single step # The grader runs 3 simulations of at most 1000 steps each. We stop after the last one while True: #print(observation) env.action = [ 0, 0, 0.001, 0.001, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0 ] [observation, reward, done, info] = client.env_step(env.action) if done: observation = client.env_reset() if not observation:
agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. if args.train: agent.fit(env, nb_steps=nallsteps, visualize=False, verbose=1, nb_max_episode_steps=env.timestep_limit, log_interval=10000) # After training is done, we save the final weights. agent.save_weights(args.model, overwrite=True) # If TEST and TOKEN, submit to crowdAI if not args.train and args.token: agent.load_weights(args.model) # Settings remote_base = 'http://grader.crowdai.org:1729' client = Client(remote_base) # Create environment observation = client.env_create(args.token) # Run a single step # The grader runs 3 simulations of at most 1000 steps each. We stop after the last one while True: v = np.array(observation).reshape((env.observation_space.shape[0])) action = agent.forward(v) [observation, reward, done, info] = client.env_step(action.tolist()) if done: observation = client.env_reset() if not observation: break
def upload(frameskip=1): from osim.http.client import Client apikey = open('apikey.txt').read().strip('\n') print('Using apikey:', apikey) remote_base = "http://grader.crowdai.org:1729" crowdai_token = apikey print('connecting...') client = Client(remote_base) observation_d = client.env_create(crowdai_token, env_id="ProstheticsEnv") #observation = process_obs_dict(observation_d) print('environment created! running...') #obs_collect = [] #a_collect = [] stepno = 0 epino = 0 total_reward = 0 while True: #a = AGENT OUTPUT observation = process_obs_dict(observation_d) a, q = agent.act(observation) a = [float(i) for i in list(a)] #obs_collect.append(observation) #a_collect.append(a) for _ in range(frameskip): [observation_d, reward, done, info] = client.env_step(a, True) stepno += 1 total_reward += reward print('step', stepno, 'total reward', total_reward) if done: ''' print('') print('saving...') print('') with open('upload_saves/upload_a_collect_' + str(epino) + '.p', 'wb') as f: pickle.dump(a_collect, f) with open('upload_saves/upload_obs_collect_' + str(epino) + '.p', 'wb') as f: pickle.dump(obs_collect, f) ''' observation_d = client.env_reset() print('>> episode', epino, ' Done after', stepno, 'got reward:', total_reward) print('') total_reward = 0 stepno = 0 epino += 1 break if not observation_d: break print('Done! Submitting...') client.submit()
def up(): # uploading to CrowdAI # global _stepsize # _stepsize = 0.01 apikey = open('apikey.txt').read().strip('\n') print('apikey is', apikey) import opensim as osim from osim.http.client import Client from osim.env import ProstheticsEnv as RunEnv # Settings remote_base = "http://grader.crowdai.org:1729" crowdai_token = apikey client = Client(remote_base) ob_log = '' # string to log observations # Create environment observation = client.env_create(crowdai_token, env_id="ProstheticsEnv") #print('pg test 717:', observation) #observation = obs_dict_to_list(observation) #print('pg test 734:', observation) # old_observation = None stepno = 0 epino = 0 total_reward = 0 old_observation = None ''' def obg(plain_obs): nonlocal old_observation, stepno, ob_log # log csv observation into string ob_log += ','.join([str(i) for i in plain_obs]) + '\n' processed_observation, old_observation = go(plain_obs, old_observation, step=stepno) return np.array(processed_observation) ''' def obg(plain_obs): # observation generator # derivatives of observations extracted here. #print('pg multi.py 21, plain_obs:', len(plain_obs)) #processed_observation, self.old_observation = go(plain_obs, self.old_observation, step=self.stepcount) observation = plain_obs obs = [] obs.extend(observation['misc']['mass_center_pos']) # x, y, z obs.extend(observation['misc']['mass_center_vel']) # x, y, z obs.extend(observation['misc']['mass_center_acc']) # x, y, z # joint body, positions and vels relative to pelvis # Absolute Joint Positions obs.extend(observation['joint_pos']['ground_pelvis']) obs.extend(observation['joint_pos']['hip_r']) obs.extend(observation['joint_pos']['knee_r']) obs.extend(observation['joint_pos']['ankle_r']) obs.extend(observation['joint_pos']['hip_l']) obs.extend(observation['joint_pos']['knee_l']) obs.extend(observation['joint_pos']['ankle_l']) ''' # Relative Joint Positions #print(observation['joint_pos']['ground_pelvis']) obs.extend(observation['joint_pos']['ground_pelvis']) # 6 elements #print(rel_to_A(observation['joint_pos']['hip_r'], observation['body_pos']['pelvis'])) obs.extend(rel_to_A(observation['joint_pos']['hip_r'], observation['body_pos']['pelvis'])) # 3e obs.extend(rel_to_A(observation['joint_pos']['knee_r'], observation['body_pos']['pelvis'])) # 1e obs.extend(rel_to_A(observation['joint_pos']['ankle_r'], observation['body_pos']['pelvis'])) # 1e obs.extend(rel_to_A(observation['joint_pos']['hip_l'], observation['body_pos']['pelvis'])) # 3e obs.extend(rel_to_A(observation['joint_pos']['knee_l'], observation['body_pos']['pelvis'])) # 1e obs.extend(rel_to_A(observation['joint_pos']['ankle_l'], observation['body_pos']['pelvis'])) # 1e ''' # Absolute Joint Vel obs.extend(observation['joint_vel']['ground_pelvis']) obs.extend(observation['joint_vel']['hip_r']) obs.extend(observation['joint_vel']['knee_r']) obs.extend(observation['joint_vel']['ankle_r']) obs.extend(observation['joint_vel']['hip_l']) obs.extend(observation['joint_vel']['knee_l']) obs.extend(observation['joint_vel']['ankle_l']) # Absolute Joint Acc obs.extend(observation['joint_acc']['ground_pelvis']) obs.extend(observation['joint_acc']['hip_r']) obs.extend(observation['joint_acc']['knee_r']) obs.extend(observation['joint_acc']['ankle_r']) obs.extend(observation['joint_acc']['hip_l']) obs.extend(observation['joint_acc']['knee_l']) obs.extend(observation['joint_acc']['ankle_l']) b = [ 'body_pos', 'body_vel', 'body_acc', 'body_pos_rot', 'body_vel_rot', 'body_acc_rot' ] parts = [ 'pelvis', 'femur_r', 'pros_tibia_r', 'pros_foot_r', 'femur_l', 'tibia_l', 'talus_l', 'calcn_l', 'toes_l', 'torso', 'head' ] for i in b: for j in parts: obs.extend(observation[i][j]) forces_subkeys = observation['forces'].keys() for k in forces_subkeys: obs.extend(observation['forces'][k]) #print('pg multi.py 25, proc_obs:', len(processed_observation)) return np.array(obs) #print(observation) #print(obg(observation).shape) print('environment created! running...') # Run a single step while True: proc_observation = obg(observation) a = [float(i) for i in list(agent.act(proc_observation)[0])] #print(a) [observation, reward, done, info] = client.env_step(a, True) stepno += 1 total_reward += reward print('step', stepno, 'total reward', total_reward) # print(observation) if done: observation = client.env_reset() old_observation = None print('>>>>>>>episode', epino, ' DONE after', stepno, 'got_reward', total_reward) total_reward = 0 stepno = 0 epino += 1 if not observation: break print('submitting...') client.submit() print('saving to file...') with open('sublog.csv', 'w') as f: f.write(ob_log)
nb_max_episode_steps=1000) print('Saving model ' + args.model) agent.save_weights(args.model, overwrite=True) print('Saved model ' + args.model) with open(args.model + '_history', 'w') as f: f.write(str(keras_history.history)) summarize.plot_diagrams(keras_history.history, args.model) if args.test: agent.load_weights(args.model) agent.test(env, nb_episodes=1, nb_max_episode_steps=env.timestep_limit) if args.submit: agent.load_weights(args.model) remote_base = 'http://grader.crowdai.org:1729' token = '688545d8ba985c174b4f967b40924a43' client = Client(remote_base) observation = client.env_create(token) # The grader runs 3 simulations of at most 1000 steps each. We stop after the last one while True: [observation, reward, done, info] = client.env_step(agent.forward(observation).tolist()) print(observation) if done: observation = client.env_reset() if not observation: break client.submit()
class SubmitEnv: def __init__(self): from osim.http.client import Client remote_base = "http://grader.crowdai.org:1729" self.crowdai_token = "e47cb9f7fd533dc036dbd5d65d0d68c3" self.client = Client(remote_base) self.first_reset = True self.action_space = Box(low=0, high=1, shape=[19]) self.observation_space = Box(low=-3, high=3, shape=[224]) self.episodic_length = 0 self.score = 0.0 self.reward_range = None self.metadata = None def reset(self): self.episodic_length = 0 self.score = 0 if self.first_reset: self.first_reset = False return self.get_observation( self.client.env_create(self.crowdai_token, env_id="ProstheticsEnv")) else: obs = self.client.env_reset() if obs is None: self.client.submit() print('SUBMITTED') import sys sys.exit(0) return self.get_observation(obs) def step(self, action): [obs, rew, done, info] = self.client.env_step(action.tolist(), True) self.episodic_length += 1 self.score += rew pelvis_vx = obs['body_vel']['pelvis'][0] print( f'timestamp={self.episodic_length:3d} score={self.score:5.2f} velocity={pelvis_vx:3.2f}' ) import sys sys.stdout.flush() return self.get_observation(obs), rew, done, info def close(self): pass def get_observation(self, state_desc): res = [] pelvis = None for body_part in [ "pelvis", "head", "torso", "toes_l", "talus_l", "pros_foot_r", "pros_tibia_r" ]: cur = [] cur += state_desc["body_pos"][body_part] cur += state_desc["body_vel"][body_part] cur += state_desc["body_acc"][body_part] cur += state_desc["body_pos_rot"][body_part] cur += state_desc["body_vel_rot"][body_part] cur += state_desc["body_acc_rot"][body_part] if body_part == "pelvis": pelvis = cur res += cur[1:] # make sense, pelvis.x is not important else: cur[0] -= pelvis[0] cur[2] -= pelvis[2] # relative position work for x / z axis res += cur for joint in [ "ankle_l", "ankle_r", "back", "hip_l", "hip_r", "knee_l", "knee_r" ]: res += state_desc["joint_pos"][joint] res += state_desc["joint_vel"][joint] res += state_desc["joint_acc"][joint] for muscle in sorted(state_desc["muscles"].keys()): res += [state_desc["muscles"][muscle]["activation"]] res += [state_desc["muscles"][muscle]["fiber_length"]] res += [state_desc["muscles"][muscle]["fiber_velocity"]] cm_pos = state_desc["misc"][ "mass_center_pos"] # relative x / z axis center of mass position cm_pos[0] -= pelvis[0] cm_pos[2] -= pelvis[0] res = res + cm_pos + state_desc["misc"][ "mass_center_vel"] + state_desc["misc"]["mass_center_acc"] return res
import opensim as osim from osim.http.client import Client # Settings remote_base = "http://127.0.0.1:5000" crowdai_token = "a66245c8324e2d37b92f098a57ef3f99" client = Client(remote_base) # Create environment observation = client.env_create(crowdai_token, env_id='L2M2019Env') # IMPLEMENTATION OF YOUR CONTROLLER # my_controller = ... (for example the one trained in keras_rl) while True: [observation, reward, done, info] = client.env_step([ 0.27365685, 0.3674228, 0.97836083, 0.15261972, 0.3319228, 0.03692374, 0.09905472, 0.1971763, 0.8908676, 0.5744208, 0.9313108, 0.26675472, 0.54930794, 0.91221607, 0.7701997, 0.95412385, 0.43612957, 0.2880115, 0.26009786, 0.27759373, 0.9234055, 0.63657844 ], True) print(observation) if done: observation = client.env_reset() if not observation: break client.submit()
# Settings remote_base = 'http://grader.crowdai.org:1729' crowdai_token = "68535f05c7d17429755a34fb2bbb30e0" # Command line parameters parser = argparse.ArgumentParser(description='Submit the result to crowdAI') parser.add_argument('--token', dest='token', action='store') parser.add_argument('--model', dest='model', action='store', default="sample.h5f") args = parser.parse_args() env = RunEnv(visualize=False) client = Client(remote_base) nb_actions = env.action_space.shape[0] # Create environment observation = client.env_create(crowdai_token) # IMPLEMENTATION OF YOUR CONTROLLER actor = Sequential() actor.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) actor.add(Dense(32)) actor.add(Activation('relu')) actor.add(Dense(32)) actor.add(Activation('relu')) actor.add(Dense(32)) actor.add(Activation('relu')) actor.add(Dense(nb_actions))
current_observation = observation state = torch.Tensor( np.append(np.append(current_observation, context), 0)) episode_reward += reward timestep += 1 print(episode_reward, timestep) print(episode_reward, timestep) else: if submit: remote_base = "http://grader.crowdai.org:1730" crowdai_token = "f5969a7bb0466e0da072c72d6eb6d667" client = Client(remote_base) with open(modeldir + logfile + "/best_trajectory.pkl", "rb") as f: trajectory = pickle.load(f)["trajectory"] done = False timestep = 0 observation = client.env_create(crowdai_token, env_id='ProstheticsEnv') episode_reward = 0 while True: action = trajectory[timestep % 100]["action"] [observation, reward, done, info] = client.env_step(action.detach().numpy().tolist(), True) episode_reward += reward
idt = 100.0 observation[0] = (ob[22] - prevob[22]) * idt observation[41] = (ob[24] - prevob[24]) * idt observation[42] = (ob[26] - prevob[26]) * idt observation[43] = (ob[28] - prevob[28]) * idt observation[44] = (ob[30] - prevob[30]) * idt observation[45] = (ob[32] - prevob[32]) * idt observation[46] = (ob[34] - prevob[34]) * idt for i in range(lac): observation[46 + i + 1] = prevac[i] return observation client = Client(remote_base) # Create environment observation = client.env_create( '96dce98d36c80beead4d56faa0380d4d' ) #'ef125dcc4a82b5f162cc7f401c4c58a1') #args.token) prevac = np.zeros(env.action_space.shape) prevob = np.copy( np.array(observation).reshape((env.observation_space.shape[0]))) #action = action_filter(np.zeros(env.action_space.shape)) # Run a single step # # The grader runs 3 simulations of at most 1000 steps each. We stop after the last one rewards = []
if args.train: agent.fit(env, nb_steps=nallsteps, visualize=False, verbose=1, nb_max_episode_steps=env.timestep_limit, log_interval=10000) # After training is done, we save the final weights. agent.save_weights(args.model, overwrite=True) # If TEST and TOKEN, submit to crowdAI if not args.train and args.token: agent.load_weights(args.model) # Settings remote_base = 'http://grader.crowdai.org:1729' client = Client(remote_base) # Create environment observation = client.env_create(args.token) # Run a single step # The grader runs 3 simulations of at most 1000 steps each. We stop after the last one while True: v = np.array(observation).reshape((env.observation_space.shape[0])) action = agent.forward(v) [observation, reward, done, info] = client.env_step(action.tolist()) if done: observation = client.env_reset() if not observation: break
def up(): # uploading to CrowdAI # global _stepsize # _stepsize = 0.01 apikey = open('apikey.txt').read().strip('\n') print('apikey is', apikey) import opensim as osim from osim.http.client import Client from osim.env import RunEnv # Settings remote_base = "http://grader.crowdai.org:1729" crowdai_token = apikey client = Client(remote_base) ob_log = '' # string to log observations # Create environment observation = client.env_create(crowdai_token) # old_observation = None stepno = 0 epino = 0 total_reward = 0 old_observation = None def obg(plain_obs): nonlocal old_observation, stepno, ob_log # log csv observation into string ob_log += ','.join([str(i) for i in plain_obs]) + '\n' processed_observation, old_observation = go(plain_obs, old_observation, step=stepno) return np.array(processed_observation) print('environment created! running...') # Run a single step while True: proc_observation = obg(observation) [observation, reward, done, info] = client.env_step( [float(i) for i in list(agent.act(proc_observation)[0])], True) stepno += 1 total_reward += reward print('step', stepno, 'total reward', total_reward) # print(observation) if done: observation = client.env_reset() old_observation = None print('>>>>>>>episode', epino, ' DONE after', stepno, 'got_reward', total_reward) total_reward = 0 stepno = 0 epino += 1 if not observation: break print('submitting...') client.submit() print('saving to file...') with open('sublog.csv', 'w') as f: f.write(ob_log)
res += state_desc["joint_acc"][joint] for muscle in sorted(state_desc["muscles"].keys()): res += [state_desc["muscles"][muscle]["activation"]] res += [state_desc["muscles"][muscle]["fiber_length"]] res += [state_desc["muscles"][muscle]["fiber_velocity"]] cm_pos = [state_desc["misc"]["mass_center_pos"][i] - pelvis[i] for i in range(2)] res = res + cm_pos + state_desc["misc"]["mass_center_vel"] + state_desc["misc"]["mass_center_acc"] return res # Settings remote_base = "http://grader.crowdai.org:1729" crowdai_token = "01342e360022c2def5c2cc04c5843381" Client = Client(remote_base) layer_norm=True nb_actions=19 memory = Memory(limit=int(1.5e6), action_shape=(158,), observation_shape=(19,)) critic = Critic(layer_norm=layer_norm) actor = Actor(nb_actions, layer_norm=layer_norm) agent = DDPG(actor, critic, memory, (158,), (19,), gamma=0.99) saver=tf.train.Saver() # IMPLEMENTATION OF YOUR CONTROLLER # my_controller = ... (for example the one trained in keras_rl) sess=tf.InteractiveSession() agent.initialize(sess) sess.graph.finalize()
import opensim as osim from osim.http.client import Client from osim.env import ProstheticsEnv import numpy as np import os import sys import numpy as np import gym from gym import wrappers # Settings remote_base = "http://grader.crowdai.org:1729" crowdai_token = "06a7a9af9a879495223bcf5b9f0be33c" client = Client(remote_base) # Create environment observation = client.env_create(crowdai_token, env_id='ProstheticsEnv') # IMPLEMENTATION OF YOUR CONTROLLER # my_controller = ... (for example the one trained in keras_rl) def my_controller(observation, theta, n, mean, mean_diff, var): obs_std = np.sqrt(var) state = (observation - mean) / obs_std return theta.dot(state) def dict_list(observation): obs = [] for key in observation:
import opensim as osim from osim.http.client import Client from osim.env import * import numpy as np import argparse # Settings remote_base = 'http://grader.crowdai.org:1729' # Command line parameters parser = argparse.ArgumentParser(description='Submit the result to crowdAI') parser.add_argument('--token', dest='token', action='store', required=True) args = parser.parse_args() env = RunEnv(visualize=False) client = Client(remote_base) # Create environment observation = client.env_create(args.token) # Run a single step # # The grader runs 3 simulations of at most 1000 steps each. We stop after the last one while True: v = np.array(observation).reshape((-1,1,env.observation_space.shape[0])) [observation, reward, done, info] = client.env_step(env.action_space.sample().tolist()) print(observation) if done: observation = client.env_reset() if not observation: break
def __init__(self, remote_base): self.client = Client(remote_base) self.ob_0 = np.array(41) self.ob_1 = np.zeros(14)