コード例 #1
0
ファイル: submit.py プロジェクト: wh-forker/Run-Skeleton-Run
def submit(actor, critic, args, act_update_fn):
    act_fn, _, _ = act_update_fn(actor, critic, None, None, args)

    client = Client(REMOTE_BASE)

    all_episode_metrics = []

    episode_metrics = {
        "reward": 0.0,
        "step": 0,
    }

    observation_handler = create_observation_handler(args)
    action_handler = create_action_handler(args)
    observation = client.env_create(args.token)
    action = np.zeros(ACTION_SHAPE, dtype=np.float32)
    observation = observation_handler(observation, action)

    submitted = False
    while not submitted:
        print(episode_metrics["reward"])
        action = act_fn(observation)

        observation, reward, done, _ = client.env_step(
            action_handler(action).tolist())

        episode_metrics["reward"] += reward
        episode_metrics["step"] += 1

        if done:
            all_episode_metrics.append(episode_metrics)

            episode_metrics = {
                "reward": 0.0,
                "step": 0,
            }

            observation_handler = create_observation_handler(args)
            action_handler = create_action_handler(args)
            observation = client.env_create(args.token)

            if not observation:
                submitted = True
                break

            action = np.zeros(ACTION_SHAPE, dtype=np.float32)
            observation = observation_handler(observation, action)
        else:
            observation = observation_handler(observation, action)

    df = pd.DataFrame(all_episode_metrics)
    pprint(df.describe())

    if query_yes_no("Submit?"):
        client.submit()
コード例 #2
0
class RemoteProstheticsEnv(gym.Env):
    def __init__(self, base, token, round):
        self.base = base
        self.token = token
        self.client = None
        ## simulate local env
        self.osim_model = OSmodel()
        self.time_limit = 300 if round == 1 else 1000

    def reset(self, project=True):
        if self.client == None:
            self.client = Client(self.base)
            obs = self.client.env_create(self.token, env_id='ProstheticsEnv')
            self.osim_model.istep = 0
            return obs
        else:
            ### It is not allowed to call reset() twice in submitting.
            raise NotImplementedError

    def step(self, action, project=True):
        self.osim_model.istep += 1
        [obs, reward, done, info] = self.client.env_step(action.tolist(), render=True)
        if done:
            self.osim_model.istep = 0
            obs = self.client.env_reset()
            if not obs:
                done = True
            else:
                done = False
        return obs, reward, done, info
コード例 #3
0
ファイル: run_trpo.py プロジェクト: JacobImai/learning_to_run
def submit(agent, logger, jump=False):
    token = None
    assert token is not None, "You need to provide your token to submit()"
    # Settings
    remote_base = 'http://grader.crowdai.org:1729'
    client = Client(remote_base)
    # Create environment
    new_ob = client.env_create(token)
    agent.ob_processor.reset()
    zero_action = np.zeros(agent.env.action_space.shape).tolist()
    first_frame = True
    done = False
    # Run a single step
    # The grader runs 3 simulations of at most 1000 steps each. We stop after the last one
    episode_count = 0
    episode_steps = 0
    episode_reward = 0

    all_rewards = []

    while True:

        # ignore first frame because it contains phantom obstacle
        if first_frame:
            new_ob, reward, done, info = client.env_step(zero_action, True)
            episode_reward += reward
            episode_steps += 1
            first_frame = False
            assert not done, "Episode finished in one step"
            continue

        new_ob = agent.ob_processor.process(new_ob)
        observation = np.reshape(new_ob, [1, -1])
        action, _ = agent.actor.predict(observation)
        action = np.clip(action, agent.act_low, agent.act_high)
        act_to_apply = action.squeeze()
        if self.jump:
            act_to_apply = np.tile(act_to_apply, 2)
        [new_ob, reward, done, info] = client.env_step(act_to_apply.tolist(),
                                                       True)

        episode_steps += 1
        episode_reward += reward
        logger.info("step={}, reward={}".format(episode_steps, reward))

        if done:
            episode_count += 1
            logger.info("Episode={}, steps={}, reward={}".format(
                episode_count, episode_steps, episode_reward))
            all_rewards.append(episode_reward)

            episode_steps = 0
            episode_reward = 0
            new_ob = client.env_reset()
            agent.ob_processor.reset()
            first_frame = True
            if not new_ob:
                break
    client.submit()
    logger.info("All rewards: {}".format(all_rewards))
コード例 #4
0
ファイル: ltr.py プロジェクト: BotYue/LearningToRun
def submit():
    remote_base = "http://grader.crowdai.org:1729"
    crowdai_token = "[YOUR_CROWD_AI_TOKEN_HERE]"
    client = Client(remote_base)

    task_fn = lambda: LTR()
    task = task_fn()
    state_dim = task.env.observation_space.shape[0]
    action_dim = task.env.action_space.shape[0]
    with open('data/ddpg-model-LearningToRun.bin', 'rb') as f:
        model = pickle.load(f)
    actor = DDPGActorNet(state_dim, action_dim)
    actor.load_state_dict(model)

    # Create environment
    state = client.env_create(crowdai_token)

    total_reward = 0.0
    while True:
        action = actor.predict(np.stack([state]), to_numpy=True).flatten()
        [state, reward, done, info] = client.env_step(action, True)
        total_reward += reward
        print(observation)
        if done:
            observation = client.env_reset()
            if not observation:
                break
    print total_reward
    client.submit()
コード例 #5
0
ファイル: wrappers.py プロジェクト: sshkhr/prosthetics-rl
class Client_To_Env:
    def __init__(self, remote_base, crowdai_token):
        """
        Wrapper that reformats client environment to a local environment format,
        complete with observation_space, action_space, reset, step, submit, and
        time_limit.
        """
        
        self.client = Client(remote_base)
        self.crowdai_token = crowdai_token
        self.reset_ = self.client.env_reset
        self.step  = self.client.env_step
        self.submit = self.client.submit
        self.time_limit = 300
        self.action_space = gym.spaces.Box(low=0, high=1, shape=(19, ),dtype=np.float32)

        self.first_reset = True

    def reset(self):
        if self.first_reset:
            self.first_reset = False
            obs = self.client.env_create(self.crowdai_token, env_id='ProstheticsEnv')
            return obs
        else:
            obs = self.reset_()
            return obs
コード例 #6
0
def submit(pi):
    remote_base = "http://grader.crowdai.org:1729"
    crowdai_token = "0dd7c22f5eb61cb4453b5a5b8e510656"

    client = Client(remote_base)
    observation = client.env_create(crowdai_token, env_id="ProstheticsEnv")

    frame = score = 0

    while True:

        a = pi.act(desc_to_list(observation))

        [observation, reward, done, _] = client.env_step(a.tolist(), True)
        score += reward
        frame += 1

        if done:
            print("score=%0.2f in %i frames" % (score, frame))
            frame = score = 0

            observation = client.env_reset()
            if not observation:
                break

    client.submit()
コード例 #7
0
    def up():
        # uploading to CrowdAI

        # global _stepsize
        # _stepsize = 0.01

        apikey = open('apikey.txt').read().strip('\n')
        print('apikey is', apikey)

        import opensim as osim
        from osim.http.client import Client
        from osim.env import RunEnv

        # Settings
        remote_base = "http://grader.crowdai.org:1729"
        crowdai_token = apikey

        client = Client(remote_base)

        # Create environment
        observation = client.env_create(crowdai_token)
        # old_observation = None
        stepno = 0
        epino = 0
        total_reward = 0
        old_observation = None

        def obg(plain_obs):
            nonlocal old_observation, stepno
            processed_observation, old_observation = go(plain_obs,
                                                        old_observation,
                                                        step=stepno)
            return np.array(processed_observation)

        print('environment created! running...')
        # Run a single step
        while True:
            proc_observation = obg(observation)

            [observation, reward, done, info] = client.env_step(
                [float(i) for i in list(agent.act(proc_observation))], True)
            stepno += 1
            total_reward += reward
            print('step', stepno, 'total reward', total_reward)
            # print(observation)
            if done:
                observation = client.env_reset()
                old_observation = None

                print('>>>>>>>episode', epino, ' DONE after', stepno,
                      'got_reward', total_reward)
                total_reward = 0
                stepno = 0
                epino += 1

                if not observation:
                    break

        print('submitting...')
        client.submit()
コード例 #8
0
def submit(identifier, policy_fn, seed, iter):

    client = Client(remote_base)

    # Create environment
    observation = client.env_create(crowdai_token, env_id="ProstheticsEnv")

    # IMPLEMENTATION OF YOUR CONTROLLER
    pi = train(identifier,
               policy_fn,
               1,
               1,
               seed,
               save_final=False,
               play=True,
               bend=0)
    load_state(identifier, iter)

    while True:
        ob = state_desc_to_ob(observation)
        action = pi.act(False, np.array(ob))[0].tolist()
        for _ in range(param.action_repeat):
            [observation, reward, done, info] = client.env_step(action, True)
            if done:
                break
        if done:
            observation = client.env_reset()
            if not observation:
                break

    client.submit()
コード例 #9
0
ファイル: submit.py プロジェクト: 0123Andrew/submit_l2r
def submit_agent(args, model_params):

    ##########################################################

    actor_fn, params_actor, params_crit = build_model_test(**model_params)
    weights = [p.get_value() for p in params_actor]
    actor = Agent(actor_fn, params_actor, params_crit)
    actor.set_actor_weights(weights)
    if args.weights is not None:
        actor.load(args.weights)

    env = RunEnv2(model=args.modeldim,
                  prosthetic=args.prosthetic,
                  difficulty=args.difficulty,
                  skip_frame=3)

    # Settings
    remote_base = "http://grader.crowdai.org:1729"
    token = args.token
    client = Client(remote_base)

    # Create environment
    di = client.env_create(token, env_id="ProstheticsEnv")

    stat = []
    ep = 1
    ii = 0
    reward_sum = 0
    print('\n\n#################################################\n\n')
    while True:
        ii += 1
        proj = env.dict_to_vec(di)
        action = actor.act(proj)
        action += np.random.rand(len(action)) / 10.

        [di, reward, done, info] = client.env_step(action.tolist(), True)
        reward_sum += reward
        print('ep: ' + str(ep) + '  >>  step: ' + str(int(ii)) +
              '  >>  reward: ' + format(reward, '.2f') + '  \t' +
              str(int(reward_sum)) + '\t  >>  pelvis X Y Z: \t' +
              format(di['body_pos']['pelvis'][0], '.2f') + '\t' +
              format(di['body_pos']['pelvis'][1], '.2f') + '\t' +
              format(di['body_pos']['pelvis'][2], '.2f'))
        if done:
            print('\n\n#################################################\n\n')
            stat.append([ep, ii, reward_sum])
            di = client.env_reset()
            ep += 1
            ii = 0
            reward_sum = 0
            if not di:
                break
    for e in stat:
        print(e)
    print('\n\nclient.submit()\n\n')
    client.submit()
    ##########################################################
    print('\n\n#################################################\n\n')
    print('DONE\n\n')
コード例 #10
0
class NIPS(object):

    def __init__(self, visualize=False, token=None, max_obstacles=3):
        logger.info("max_obstacles={}".format(max_obstacles))
        if token is None:
            self.remote_env = False
            self.env = RunEnv(visualize=visualize, max_obstacles=max_obstacles)
        else:
            self.remote_env = True
            self.local_env = RunEnv(visualize=False, max_obstacles=max_obstacles)
            self.token = token
            self.env = Client(GRADER_URL)
            self.env_created = False

    @property
    def observation_space(self):
        if self.remote_env:
            # because Client() has not observation_space
            return self.local_env.observation_space
        else:
            return self.env.observation_space

    @property
    def action_space(self):
        if self.remote_env:
            # because Client() has not action_space
            return self.local_env.action_space
        else:
            return self.env.action_space

    def reset(self):
        if self.remote_env:
            if not self.env_created:
                ob = self.env.env_create(self.token)
                self.env_created = True
            else:
                ob = self.env.env_reset()
        else:
            ob = self.env.reset(difficulty=2)
        return ob

    def step(self, action):
        if self.remote_env:
            ob, reward, done, info = self.env.env_step(action.tolist(), True)
        else:
            ob, reward, done, info = self.env.step(action)
        return ob, reward, done, info

    def close(self):
        if self.remote_env:
            self.env.submit()
        else:
            self.env.close()
コード例 #11
0
ファイル: wrapperClient.py プロジェクト: yychrzh/parallel_rl
class WrapperClient():
    def __init__(self, remote_base):
        self.client = Client(remote_base)
        self.ob_0 = np.array(41)
        self.ob_1 = np.zeros(14)
        # self.ob_2 = np.zeros(41)

    def env_create(self, token):
        self.ob_0 = self.preprocess(
            np.array(
                self.client.env_create("7be35dd3a64deac826068d37c2258847")))
        # return np.concatenate((self.ob_0,self.ob_1,self.ob_2),axis=0)
        return np.concatenate((self.ob_0, self.ob_1), axis=0)

    def env_reset(self):
        ob = self.client.env_reset()
        if ob is None:
            return None
        self.ob_0 = self.preprocess(np.array(ob))
        self.ob_0[1] = 0
        self.ob_1 = np.zeros(14)
        # self.ob_2 = np.zeros(41)
        # return np.concatenate((self.ob_0,self.ob_1,self.ob_2),axis=0)
        return np.concatenate((self.ob_0, self.ob_1), axis=0)

    def env_step(self, action):
        res = self.client.env_step(action)
        ob_0_post = self.ob_0
        # ob_1_post = self.ob_1
        # ob_2_post = self.ob_2
        self.ob_0 = self.preprocess(np.array(res[0]))
        self.ob_0[1] = 0
        self.ob_1 = (self.ob_0[22:36] - ob_0_post[22:36]) / 0.01
        # self.ob_2 = self.ob_1 - ob_1_post
        # res[0] = np.concatenate((self.ob_0,self.ob_1,self.ob_2),axis=0)
        return np.concatenate((self.ob_0, self.ob_1), axis=0)
        return res

    def submit(self):
        self.client.submit()

    def preprocess(self, v):
        n = [1, 18, 22, 24, 26, 28, 30, 32, 34]
        m = [19, 23, 25, 27, 29, 31, 33, 35]
        for i in n:
            v[i] = v[i] - v[1]
        for i in m:
            v[i] = v[i] - v[2]
        v[20] = v[20] - v[4]
        v[21] = v[21] - v[5]
        return v
コード例 #12
0
 def submit():
     from osim.http.client import Client
     remote_base = "http://grader.crowdai.org:1729"
     crowdai_token = "01342e360022c2def5c2cc04c5843381"
     Client = Client(remote_base)
     observation = Client.env_create(env_id="ProstheticsEnv", token=crowdai_token)
     while True:
         k = np.reshape(np.array(observation), newshape=(-1, len(observation)))
         ac_ind = sess.run(sy_sampled_ac, feed_dict={sy_ob_no: k})
         ac_ind = np.reshape(ac_ind, newshape=(ac_ind.shape[1]))
         action = bins[ac_ind]
         [observation, reward, done, info] = Client.env_step(action, True)
         if done:
             observation = Client.env_reset()
             if not observation:
                 break
     Client.submit()
コード例 #13
0
class RobotControlNipsClient(RobotControlNipsLocal):
    
    '''
    Initiates the simulator and the connection
    '''

    def __init__(self):
        # Settings
        self.remote_base = "http://osim-rl-grader.aicrowd.com/"
        self.aicrowd_token = "a66245c8324e2d37b92f098a57ef3f99"  # use your aicrowd token
        # your aicrowd token (API KEY) can be found at your prorfile page at https://www.aicrowd.com

        self.client = Client(self.remote_base)

        # Create environment
        self.observation = self.client.env_create(self.aicrowd_token, env_id='L2M2019Env')
        self.reward = 0
コード例 #14
0
def submit(args):
    print('start submitting')

    remote_base = 'http://grader.crowdai.org:1733'
    client = Client(remote_base)

    ddpg = DDPG()
    ddpg.load_model(args.model, load_memory=False)

    state = client.env_create(TOKEN)
    fg = FeatureGenerator()
    state = fg.gen(state)

    step = 0
    ep_reward = 0

    while True:
        print('selecting action ...', end=' ')
        action = ddpg.select_action(list(state))

        print('client.env_step ...')
        next_state, reward, done, info = client.env_step(action.tolist())
        next_state = fg.gen(next_state)

        print('step: {0:03d}, ep_reward: {1:02.08f}'.format(step, ep_reward))
        state = next_state
        ep_reward += reward
        step += 1

        if done:
            print('done')
            state = client.env_reset()
            if not state:
                break

            step = 0
            ep_reward = 0

            fg = FeatureGenerator()
            state = fg.gen(state)

    client.submit()
コード例 #15
0
    def submit(self):

        remote_base = 'http://grader.crowdai.org:1729'
        env = RunEnv(visualize=self.visualize)
        client = Client(remote_base)

        # Create environment
        observation = client.env_create(self.submit_token)

        # Run a single step
        #
        # The grader runs 3 simulations of at most 1000 steps each. We stop after the last one
        while True:
            [observation, reward, done,
             info] = client.env_step(self.agent.forward(observation))
            if done:
                observation = client.env_reset()
                if not observation:
                    break

        client.submit()
コード例 #16
0
class RemoteSubmit(object):
    def __init__(self, token, agent_type):
        self.token = token
        self.remote_base = "http://grader.crowdai.org:1729"
        self.client = Client(self.remote_base)

        # TODO:: Add agent selector
        if agent_type == 'random':
            self.agent = RandomAgent()
        elif agent_type == 'fixed-action':
            self.agent = FixedActionAgent()
        elif agent_type == 'a3c':
            self.agent = A3CAgent(num_envs=2, num_steps=50, max_frames=1000)
        else:
            status = {
                'status': 'ERROR',
                'error_msg': 'Not supported agent-type'
            }
            raise Exception(status)

    def run(self):
        try:
            status = self.agent.run()
            observation = self.client.env_create(self.token,
                                                 env_id="ProstheticsEnv")

            while True:
                action = self.agent.get_action(observation)
                [observation, reward, done,
                 info] = self.client.env_step(action, False)
                if done:
                    observation = self.client.env_reset()
                    if not observation:
                        break
            self.client.submit()

        except Exception as e:
            status = {'status': 'ERROR', 'error_msg': e}
            raise Exception(status)
コード例 #17
0
def main():
    # Settings
    remote_base = 'http://grader.crowdai.org'

    # Command line parameters
    parser = argparse.ArgumentParser(
        description='Submit the result to crowdAI')
    parser.add_argument("hdf")
    parser.add_argument('--token', dest='token', action='store', required=True)
    args = parser.parse_args()

    hdf = h5py.File(args.hdf, 'r')

    env = GaitEnv(visualize=False)

    agent = cPickle.loads(hdf['agent_snapshots']['0995'].value)
    agent.stochastic = False

    client = Client(remote_base)

    # Create environment
    observation = client.env_create(args.token)

    total_reward = 0
    # Run a single step
    for i in range(501):
        ob = agent.obfilt(observation)
        a, _info = agent.act(ob)
        [observation, reward, done, info] = client.env_step(a.tolist(), True)
        print i, reward, done
        total_reward += reward
        if done:
            break

    print 'TOTAL REWARD: ', total_reward
    raw_input('press ENTER to submit')
    client.submit()
コード例 #18
0
ファイル: submit.py プロジェクト: wiplug/osim-rl
import numpy as np
import argparse

# Settings
remote_base = 'http://grader.crowdai.org:1729'

# Command line parameters
parser = argparse.ArgumentParser(description='Submit the result to crowdAI')
parser.add_argument('--token', dest='token', action='store', required=True)
args = parser.parse_args()

env = RunEnv(visualize=False)
client = Client(remote_base)

# Create environment
observation = client.env_create(args.token)

# Run a single step
#
# The grader runs 3 simulations of at most 1000 steps each. We stop after the last one
while True:
    v = np.array(observation).reshape((-1,1,env.observation_space.shape[0]))
    [observation, reward, done, info] = client.env_step(env.action_space.sample().tolist())
    print(observation)
    if done:
        observation = client.env_reset()
        if not observation:
            break

client.submit()
コード例 #19
0
	def upload(frameskip = 1):

		from osim.http.client import Client

		apikey = open('apikey.txt').read().strip('\n')

		print('Using apikey:', apikey)

		remote_base = "http://grader.crowdai.org:1729"
		crowdai_token = apikey

		print('connecting...')
		client = Client(remote_base)

		observation_d = client.env_create(crowdai_token, env_id="ProstheticsEnv")
		#observation = process_obs_dict(observation_d)

		print('environment created! running...')

		#obs_collect = []
		#a_collect = []

		stepno= 0
		epino = 0
		total_reward = 0

		while True:

			#a = AGENT OUTPUT
			observation = process_obs_dict(observation_d)
			a, q = agent.act(observation)
			a = [float(i) for i in list(a)]

			#obs_collect.append(observation)
			#a_collect.append(a)

			for _ in range(frameskip):

				[observation_d, reward, done, info] = client.env_step(a, True)


				stepno += 1
				total_reward += reward

				print('step',stepno,'total reward',total_reward)

				if done:

					'''
					print('')
					print('saving...')
					print('')
					with open('upload_saves/upload_a_collect_' + str(epino) + '.p', 'wb') as f:
						pickle.dump(a_collect, f)
					with open('upload_saves/upload_obs_collect_' + str(epino) + '.p', 'wb') as f:
						pickle.dump(obs_collect, f)
					'''

					observation_d = client.env_reset()


					print('>> episode',epino,' Done after',stepno,'got reward:',total_reward)
					print('')

					total_reward = 0
					stepno = 0
					epino += 1

					break

			if not observation_d:

				break

		print('Done! Submitting...')
		client.submit()
コード例 #20
0
#     remote_host=REMOTE_HOST,
#     remote_port=REMOTE_PORT
# )

# # Create environment
# observation = client.env_create()

# Settings
remote_base = "http://osim-rl-grader.aicrowd.com/"
aicrowd_token = "b5f5cd09cb870c14547db176596d09e5"  # use your aicrowd token
# your aicrowd token (API KEY) can be found at your prorfile page at https://www.aicrowd.com

client = Client(remote_base)

# Create environment
observation = client.env_create(aicrowd_token, env_id='L2M2019Env')
"""
The grader runs N simulations of at most 1000 steps each. 
We stop after the last one
A new simulation starts when `clinet.env_step` returns `done==True`
and all the simulations end when the subsequent `client.env_reset()` 
returns a False
"""
mode = '3D'
difficulty = 2
visualize = False
seed = None
sim_dt = 0.01
sim_t = 10
timstep_limit = int(round(sim_t / sim_dt))
コード例 #21
0
critic = Critic(layer_norm=layer_norm)
actor = Actor(nb_actions, layer_norm=layer_norm)
agent = DDPG(actor, critic, memory, (158,), (19,),
    gamma=0.99)
saver=tf.train.Saver()
# IMPLEMENTATION OF YOUR CONTROLLER
# my_controller = ... (for example the one trained in keras_rl)

sess=tf.InteractiveSession()
agent.initialize(sess)
sess.graph.finalize()
agent.reset()
filename="/home/vaisakhs_shaj/Desktop/MODEL/tfSteps"+str(30000)+".model"
saver.restore(sess,filename)
# Create environment
observation = Client.env_create(env_id="ProstheticsEnv",token=crowdai_token)

#print([n.name for n in tf.get_default_graph().as_graph_def().node])

def my_controller(obs):
	obs=np.array(dict_to_list(obs))
	action=agent.pi(obs, apply_noise=False, compute_Q=False)[0] 
	action=action.tolist()
	return action





while True:
	[observation, reward, done, info] = Client.env_step(my_controller(observation), True)
コード例 #22
0
import opensim as osim
from osim.http.client import Client
from osim.env import RunEnv

import pickle, sys
import numpy as np

# Settings
remote_base = "http://grader.crowdai.org:1729"
crowdai_token = "e5d9c43bc6add5150e8e23029d118215"

client = Client(remote_base)

# Create environment
observation = client.env_create(crowdai_token)

f = open('values_jump_new.txt', 'rb')
arrs = pickle.load(f)

g = open('values_second_leg.txt', 'rb')
arrs_new = pickle.load(g)


def my_controller(observation, ctr):
    return [float(x) for x in list(arr_list[min(ctr, max_action_steps - 1)])]


ep_no = 2
arr_list = arrs[ep_no]

ep_no_new = 1
コード例 #23
0
def main():
    args = parse_args()
    logger.configure()
    gamma = 0.99
    tau = 0.01
    normalize_returns = False
    normalize_observations = True
    batch_size = 64
    action_noise = None
    stddev = 0.2
    param_noise = AdaptiveParamNoiseSpec(initial_stddev=float(stddev),
                                         desired_action_stddev=float(stddev))
    critic_l2_reg = 1e-2
    actor_lr = 1e-4
    critic_lr = 1e-3
    popart = False
    clip_norm = None
    reward_scale = 1.

    env = prosthetics_env.Wrapper(osim_env.ProstheticsEnv(visualize=False),
                                  frameskip=4,
                                  reward_shaping=True,
                                  reward_shaping_x=1,
                                  feature_embellishment=True,
                                  relative_x_pos=True,
                                  relative_z_pos=True)

    top_model_dir = 'top-models/'

    # create tf sessions and graphs
    sess_list = []
    graph_list = []
    for i in range(len(args.model_files)):
        graph_list.append(tf.Graph())
        sess_list.append(tf.Session(graph=graph_list[i]))
    ddpg_agents = []
    for i in range(len(args.model_files)):
        model_name = args.model_files[i]
        sess = sess_list[i]
        graph = graph_list[i]
        l_size = args.layer_sizes[i]
        with sess.as_default():
        #with U.make_session(num_cpu=1, graph=g) as sess:
            with graph.as_default():
                #tf.global_variables_initializer()

                # restore agents from model files and store in ddpg_agents
                print("Restoring from..." + model_name)

                # Configure components.
                memory = Memory(limit=int(1e6), action_shape=env.action_space.shape,
                                observation_shape=env.observation_space.shape)
                critic = Critic(layer_norm=True, activation='relu', layer_sizes=[l_size, l_size])
                actor = Actor(env.action_space.shape[-1], layer_norm=True,
                              activation='relu', layer_sizes=[l_size, l_size])
                agent = DDPG(actor, critic, memory, env.observation_space.shape,
                             env.action_space.shape, gamma=gamma, tau=tau,
                             normalize_returns=normalize_returns,
                             normalize_observations=normalize_observations,
                             batch_size=batch_size, action_noise=action_noise,
                             param_noise=param_noise, critic_l2_reg=critic_l2_reg,
                             actor_lr=actor_lr, critic_lr=critic_lr,
                             enable_popart=popart, clip_norm=clip_norm,
                             reward_scale=reward_scale)

                # restore adam state and param noise
                restore_model_path = top_model_dir + model_name
                saver = tf.train.Saver(max_to_keep=500)

                # restore network weights
                saver.restore(sess, restore_model_path)

                adam_optimizer_store = pickle.load(open(restore_model_path
                                                        + ".pkl", "rb"))
                agent.actor_optimizer.m = adam_optimizer_store['actor_optimizer']['m']
                agent.actor_optimizer.v = adam_optimizer_store['actor_optimizer']['v']
                agent.actor_optimizer.t = adam_optimizer_store['actor_optimizer']['t']
                agent.critic_optimizer.m = adam_optimizer_store['critic_optimizer']['m']
                agent.critic_optimizer.v = adam_optimizer_store['critic_optimizer']['v']
                agent.critic_optimizer.t = adam_optimizer_store['critic_optimizer']['t']
                if 'param_noise' in adam_optimizer_store:
                    agent.param_noise = adam_optimizer_store['param_noise']

                # intialize and prepare agent session.
                agent.initialize(sess)
                #sess.graph.finalize()
                agent.reset()

                ddpg_agents.append(agent)

    agent = BlendedAgent(ddpg_agents, sess_list, graph_list)

    if args.evaluation:
        # setup eval env
        eval_env = prosthetics_env.EvaluationWrapper(osim_env.ProstheticsEnv(visualize=False),
                                                     frameskip=4,
                                                     reward_shaping=True,
                                                     reward_shaping_x=1,
                                                     feature_embellishment=True,
                                                     relative_x_pos=True,
                                                     relative_z_pos=True)
        eval_env.change_model(model=('3D').upper(), prosthetic=True, difficulty=0, seed=0)
        eval_env = bench.Monitor(eval_env, os.path.join(logger.get_dir(), 'gym_eval'))

        nb_eval_steps = 1000
        # reward, mean_q, final_steps = evaluate_one_episode(eval_env, ddpg_agents, sess_list, graph_list,
        #                                                    nb_eval_steps=nb_eval_steps,
        #                                                    render=False)
        reward, mean_q, final_steps = evaluate_one_episode(eval_env, agent, nb_eval_steps, render=False)
        print("Reward: " + str(reward))
        print("Mean Q: " + str(mean_q))
        print("Final num steps: " + str(final_steps))

    # Submit to crowdai competition. What a hack. :)
    # if crowdai_client is not None and crowdai_token is not None and eval_env is not None:
    crowdai_submit_count = 0
    if args.crowdai_submit:
        remote_base = "http://grader.crowdai.org:1729"
        crowdai_client = Client(remote_base)
        eval_obs_dict = crowdai_client.env_create(args.crowdai_token, env_id="ProstheticsEnv")
        eval_obs_dict, eval_obs_projection = prosthetics_env.transform_observation(
            eval_obs_dict,
            reward_shaping=True,
            reward_shaping_x=1.,
            feature_embellishment=True,
            relative_x_pos=True,
            relative_z_pos=True)
        while True:
            action, _ = agent.pi(eval_obs_projection, apply_noise=False, compute_Q=False)
            submit_action = prosthetics_env.openai_to_crowdai_submit_action(action)
            clipped_submit_action = np.clip(submit_action, 0., 1.)
            actions_equal = clipped_submit_action == submit_action
            if not np.all(actions_equal):
                logger.debug("crowdai_submit_count:", crowdai_submit_count)
                logger.debug("  openai-action:", action)
                logger.debug("  submit-action:", submit_action)
            crowdai_submit_count += 1
            [eval_obs_dict, reward, done, info] = crowdai_client.env_step(clipped_submit_action.tolist(), True)
            # [eval_obs_dict, reward, done, info] = crowdai_client.env_step(agent.pi(eval_obs_projection, apply_noise=False, compute_Q=False), True)
            eval_obs_dict, eval_obs_projection = prosthetics_env.transform_observation(
                eval_obs_dict,
                reward_shaping=True,
                reward_shaping_x=1.,
                feature_embellishment=True,
                relative_x_pos=True,
                relative_z_pos=True)
            if done:
                logger.debug("done: crowdai_submit_count:", crowdai_submit_count)
                eval_obs_dict = crowdai_client.env_reset()
                if not eval_obs_dict:
                    break
                logger.debug("done: eval_obs_dict exists after reset")
                eval_obs_dict, eval_obs_projection = prosthetics_env.transform_observation(
                    eval_obs_dict,
                    reward_shaping=True,
                    reward_shaping_x=1.,
                    feature_embellishment=True,
                    relative_x_pos=True,
                    relative_z_pos=True)
        crowdai_client.submit()

    for i in range(len(sess_list)):
        sess_list[i].close()
コード例 #24
0

def observation_filter(observation):
    return observation


def action_filter(action):
    return action


def reward_filter(observation, action, reward):
    return reward


# need to change if
observation = observation_filter(client.env_create(args.token))
action = action_filter(np.zeros(env.action_space.shape))
numo = len(observation)
numa = len(action)

print("numo = " + str(numo) + " numa = " + str(numa))
sumreward = 0
numsteps = 0
first = 1
while True:
    message = socket.recv()
    off = 0
    if USE_BINARY_PROTO:
        cmd = struct.unpack_from('@B', message, offset=off)[0]
        off += 1
        req = bytearray()
コード例 #25
0
            use_lstm = True

if args.token:
    # Submit to competition
    # Reference: https://github.com/stanfordnmbl/osim-rl/blob/master/examples/submit.py

    remote_base = 'http://grader.crowdai.org:1729' # Submission to Round-1
    #remote_base = 'http://grader.crowdai.org:1730' # Submission to Round-2
    crowdai_token = args.token

    # Dummy environment, just need process_state_desc()
    dummy_env = create_env(agent_config['env_config'])

    # Create environment w/ Client
    client = Client(remote_base)
    state_desc = client.env_create(crowdai_token, env_id="ProstheticsEnv")
    state = dummy_env.process_state_desc(state_desc)  # initial state

    if use_lstm:
        # Initial hidden state at start of episode
        hidden = agent.local_evaluator.policy_map['default'].get_initial_state()

    # Evaluation loop
    while True:
        # NOTE TODO: reduce action space is hard-coded in train.py!
        if use_lstm:
            action, hidden, logits_dict = agent.compute_action(observation=state, state=hidden)
        else:
            action = agent.compute_action(state)

        action = dummy_env.expand_action(action)  # get back original action space, this is also a list now
コード例 #26
0
def main(env_name, num_episodes, gamma, lam, kl_targ, batch_size, nprocs,
         policy_hid_list, valfunc_hid_list, gpu_pct, restore_path, animate,
         submit):
    """ Main training loop

    Args:
        env_name: OpenAI Gym environment name, e.g. 'Hopper-v1'
        num_episodes: maximum number of episodes to run
        gamma: reward discount factor (float)
        lam: lambda from Generalized Advantage Estimate
        kl_targ: D_KL target for policy update [D_KL(pi_old || pi_new)
        batch_size: number of episodes per policy training batch
    """
    # killer = GracefulKiller()

    env, obs_dim, act_dim = init_osim(animate)
    env.seed(111 + mpi_util.rank)
    mpi_util.set_global_seeds(111 + mpi_util.rank)

    obs_dim += 1  # add 1 to obs dimension for time step feature (see run_episode())
    now = datetime.utcnow().strftime(
        "%b-%d_%H:%M:%S")  # create unique directories
    if mpi_util.rank == 0:
        #aigym_path = os.path.join('/tmp', env_name, now)
        #env = wrappers.Monitor(env, aigym_path, force=True)
        logger = Logger(logname=env_name, now=now)

    episode = 0

    checkpoint = Checkpoint("saves", now)
    # restore from checkpoint?
    if restore_path:
        (policy, val_func, scaler, episode, obs_dim, act_dim,
         kl_targ) = checkpoint.restore(restore_path)
    else:
        policy = Policy(obs_dim, act_dim, kl_targ)
        val_func = NNValueFunction(obs_dim)
        scaler = Scaler(obs_dim)

        if mpi_util.rank == 0:
            # run a few episodes (on node 0) of untrained policy to initialize scaler:
            trajectories = run_policy(env, policy, scaler, episodes=5)

            unscaled = np.concatenate(
                [t['unscaled_obs'] for t in trajectories])
            scaler.update(
                unscaled)  # update running statistics for scaling observations

        # broadcast policy weights, scaler, val_func
        (policy, scaler, val_func) = mpi_util.broadcast_policy_scaler_val(
            policy, scaler, val_func)

        if mpi_util.rank == 0:
            checkpoint.save(policy, val_func, scaler, episode)

    if animate:
        observes, actions, rewards, unscaled_obs = run_episode(env,
                                                               policy,
                                                               scaler,
                                                               animate=animate)
        exit(0)

    if submit:
        # Settings
        #remote_base = 'http://grader.crowdai.org:1729'
        remote_base = 'http://grader.crowdai.org:1730'
        token = 'a83412a94593cae3a491f3ee28ff44e1'

        client = Client(remote_base)

        # Create environment
        observation = client.env_create(token)
        step = 0.0
        observes, actions, rewards, unscaled_obs = [], [], [], []
        scale, offset = scaler.get()
        scale[-1] = 1.0  # don't scale time step feature
        offset[-1] = 0.0  # don't offset time step feature

        # Run a single step
        #
        # The grader runs 3 simulations of at most 1000 steps each. We stop after the last one
        while True:
            obs = np.array(observation).astype(np.float32).reshape((1, -1))
            print("OBSERVATION TYPE:", type(obs), obs.shape)
            print(obs)
            obs = np.append(obs, [[step]], axis=1)  # add time step feature
            unscaled_obs.append(obs)
            obs = (obs - offset) * scale  # center and scale observations
            observes.append(obs)

            action = policy.sample(obs).astype(np.float32).reshape((-1, 1))
            print("ACTION TYPE:", type(action), action.shape)
            print(action)
            actions.append(action)

            [observation, reward, done,
             info] = client.env_step(action.tolist())
            print("step:", step, "reward:", reward)

            if not isinstance(reward, float):
                reward = np.asscalar(reward)
            rewards.append(reward)
            step += 1e-3  # increment time step feature

            if done:
                print(
                    "================================== RESTARTING ================================="
                )
                observation = client.env_reset()
                step = 0.0
                observes, actions, rewards, unscaled_obs = [], [], [], []
                scale, offset = scaler.get()
                scale[-1] = 1.0  # don't scale time step feature
                offset[-1] = 0.0  # don't offset time step feature
                if not observation:
                    break

        client.submit()
        exit(0)

    ######

    worker_batch_size = int(batch_size / mpi_util.nworkers)  # HACK
    if (worker_batch_size * mpi_util.nworkers != batch_size):
        print("batch_size:", batch_size, " is not divisible by nworkers:",
              mpi_util.nworkers)
        exit(1)

    batch = 0
    while episode < num_episodes:
        if mpi_util.rank == 0 and batch > 0 and batch % 10 == 0:
            checkpoint.save(policy, val_func, scaler, episode)
        batch = batch + 1

        trajectories = run_policy(env,
                                  policy,
                                  scaler,
                                  episodes=worker_batch_size)
        trajectories = mpi_util.gather_trajectories(trajectories)

        if mpi_util.rank == 0:
            # concatentate trajectories into one list
            trajectories = list(itertools.chain.from_iterable(trajectories))
            print("did a batch of ", len(trajectories), " trajectories")
            print([t['rewards'].sum() for t in trajectories])

            episode += len(trajectories)
            add_value(trajectories,
                      val_func)  # add estimated values to episodes
            add_disc_sum_rew(trajectories,
                             gamma)  # calculated discounted sum of Rs
            add_gae(trajectories, gamma, lam)  # calculate advantage

            # concatenate all episodes into single NumPy arrays
            observes, actions, advantages, disc_sum_rew = build_train_set(
                trajectories)

            # add various stats to training log:
            logger.log({
                '_MeanReward':
                np.mean([t['rewards'].sum() for t in trajectories]),
                'Steps':
                np.sum([t['observes'].shape[0] for t in trajectories])
            })
            log_batch_stats(observes, actions, advantages, disc_sum_rew,
                            logger, episode)

            policy.update(observes, actions, advantages,
                          logger)  # update policy
            val_func.fit(observes, disc_sum_rew,
                         logger)  # update value function

            unscaled = np.concatenate(
                [t['unscaled_obs'] for t in trajectories])
            scaler.update(
                unscaled)  # update running statistics for scaling observations

            logger.write(
                display=True)  # write logger results to file and stdout

        # if mpi_util.rank == 0 and killer.kill_now:
        #     if input('Terminate training (y/[n])? ') == 'y':
        #         break
        #     killer.kill_now = False

        # broadcast policy weights, scaler, val_func
        (policy, scaler, val_func) = mpi_util.broadcast_policy_scaler_val(
            policy, scaler, val_func)

    if mpi_util.rank == 0: logger.close()
    policy.close_sess()
    if mpi_util.rank == 0: val_func.close_sess()
コード例 #27
0
ファイル: submit.py プロジェクト: yrevar/osim-rl
from osim.http.client import Client
from osim.env import ProstheticsEnv
import numpy as np
import argparse

# Settings
remote_base = 'http://grader.crowdai.org:1729'

# Command line parameters
parser = argparse.ArgumentParser(description='Submit the result to crowdAI')
parser.add_argument('--token', dest='token', action='store', required=True)
args = parser.parse_args()

client = Client(remote_base)

# Create environment
observation = client.env_create(args.token, env_id="ProstheticsEnv")
env = ProstheticsEnv()

# Run a single step
# The grader runs 3 simulations of at most 1000 steps each. We stop after the last one
while True:
    print(observation)
    [observation, reward, done, info] = client.env_step(env.action_space.sample().tolist())
    if done:
        observation = client.env_reset()
        if not observation:
            break
            
client.submit()
コード例 #28
0
from evo_rbc.main.prosthetic_map_elites.common import get_MAPElites

load_path = "map_elites_repertoire_50.pkl"

map_elites = get_MAPElites()
map_elites.load_repertoire(load_path)

# Settings
remote_base = "http://grader.crowdai.org:1729"
crowdai_token = "f5969a7bb0466e0da072c72d6eb6d667"

client = Client(remote_base)

# Create environment
observation = client.env_create(crowdai_token, env_id="ProstheticsEnv")

# IMPLEMENTATION OF YOUR CONTROLLER
# my_controller = ... (for example the one trained in keras_rl)


def my_controller(observation, time_step):
    bin_index = (0, )
    genome = map_elites.container.grid[bin_index]["genome"]
    action = []
    for muscle_index in range(19):
        action.append(
            genome.control_function(muscle_index=muscle_index,
                                    time_step=time_step)[0])
    return action
コード例 #29
0
ファイル: run.py プロジェクト: decoderkurt/osim-rl-helper
    args = parser.parse_args()

    if args.agent not in globals():
        raise ValueError('[run] Agent {} not found.'.format(args.agent))
    SpecifiedAgent = globals()[args.agent]

    if args.submit and args.nb_steps:
        raise ValueError('[run] Cannot train and submit agent at same time.')

    if args.submit and args.visualize:
        raise ValueError('[run] Cannot visualize agent while submitting.')

    if args.submit:
        # Submit agent
        client = Client(remote_base)
        client.env_create(crowdai_token, env_id='ProstheticsEnv')
        client_env = ClientToEnv(client)
        client_env = DictToListFull(client_env)
        client_env = JSONable(client_env)
        agent = SpecifiedAgent(client_env.observation_space,
                               client_env.action_space)
        agent.submit(client_env)
    elif args.nb_steps:
        # Train agent locally
        env = ProstheticsEnv(visualize=args.visualize)
        env = ForceDictObservation(env)
        env = DictToListFull(env)
        env = JSONable(env)
        agent = SpecifiedAgent(env.observation_space, env.action_space)
        if MPI.COMM_WORLD.Get_rank() == 0:
            logger.configure()
コード例 #30
0
    # if done:
    #     observation = env.reset()
    # if not observation:
    #     break

    # client.submit()

# If TEST and no TOKEN, run some test experiments
if args.token:
    agent.load_weights(args.model)
    remote_base = 'http://grader.crowdai.org:1729'
    client = Client(remote_base)

    # Create environment
    observation = client.env_create(args.token)

    # Run a single step
    # The grader runs 3 simulations of at most 1000 steps each. We stop after the last one
    while True:
        v = np.array(observation).reshape((env.observation_space.shape[0]))
        action = agent.forward(v)
        [observation, reward, done, info] = client.env_step(action.tolist())
        observation = process_observation(observation)
        total_reward += reward
        if done:
            observation = client.env_reset()
            if not observation:
                break

    client.submit()
コード例 #31
0

def str2bool(v):
    return v.lower() in ("yes", "true", "1")


remote_base = 'http://grader.crowdai.org:1729'
client = Client(remote_base)

config = ConfigParser.ConfigParser()
config.readfp(open('config.ini'))

learning = False

env = RunEnv(visualize=False)
observation = client.env_create('2060a86df422ef5a67dea16c5320c8ad')
nb_sensors = env.observation_space.shape[0]
if config.get('simulation', 'agent_type') == 'cacla':
    ag = CaclaAg(env.action_space.shape[0], nb_sensors)
else:
    ag = OffNFACAg(env.action_space.shape[0], nb_sensors)
ag.load(int(config.get('simulation', 'load_episode')))
stop = False
while not stop:
    #env stoch but testing only on one episode
    ag.start_ep(observation, learning)
    ac = ag.run(0, observation, learning, False, False)
    total_reward = 0.0
    step = 0
    while True:
        #-1 1 to 0 1