コード例 #1
0
ファイル: run_trpo.py プロジェクト: JacobImai/learning_to_run
def submit(agent, logger, jump=False):
    token = None
    assert token is not None, "You need to provide your token to submit()"
    # Settings
    remote_base = 'http://grader.crowdai.org:1729'
    client = Client(remote_base)
    # Create environment
    new_ob = client.env_create(token)
    agent.ob_processor.reset()
    zero_action = np.zeros(agent.env.action_space.shape).tolist()
    first_frame = True
    done = False
    # Run a single step
    # The grader runs 3 simulations of at most 1000 steps each. We stop after the last one
    episode_count = 0
    episode_steps = 0
    episode_reward = 0

    all_rewards = []

    while True:

        # ignore first frame because it contains phantom obstacle
        if first_frame:
            new_ob, reward, done, info = client.env_step(zero_action, True)
            episode_reward += reward
            episode_steps += 1
            first_frame = False
            assert not done, "Episode finished in one step"
            continue

        new_ob = agent.ob_processor.process(new_ob)
        observation = np.reshape(new_ob, [1, -1])
        action, _ = agent.actor.predict(observation)
        action = np.clip(action, agent.act_low, agent.act_high)
        act_to_apply = action.squeeze()
        if self.jump:
            act_to_apply = np.tile(act_to_apply, 2)
        [new_ob, reward, done, info] = client.env_step(act_to_apply.tolist(),
                                                       True)

        episode_steps += 1
        episode_reward += reward
        logger.info("step={}, reward={}".format(episode_steps, reward))

        if done:
            episode_count += 1
            logger.info("Episode={}, steps={}, reward={}".format(
                episode_count, episode_steps, episode_reward))
            all_rewards.append(episode_reward)

            episode_steps = 0
            episode_reward = 0
            new_ob = client.env_reset()
            agent.ob_processor.reset()
            first_frame = True
            if not new_ob:
                break
    client.submit()
    logger.info("All rewards: {}".format(all_rewards))
コード例 #2
0
ファイル: ltr.py プロジェクト: BotYue/LearningToRun
def submit():
    remote_base = "http://grader.crowdai.org:1729"
    crowdai_token = "[YOUR_CROWD_AI_TOKEN_HERE]"
    client = Client(remote_base)

    task_fn = lambda: LTR()
    task = task_fn()
    state_dim = task.env.observation_space.shape[0]
    action_dim = task.env.action_space.shape[0]
    with open('data/ddpg-model-LearningToRun.bin', 'rb') as f:
        model = pickle.load(f)
    actor = DDPGActorNet(state_dim, action_dim)
    actor.load_state_dict(model)

    # Create environment
    state = client.env_create(crowdai_token)

    total_reward = 0.0
    while True:
        action = actor.predict(np.stack([state]), to_numpy=True).flatten()
        [state, reward, done, info] = client.env_step(action, True)
        total_reward += reward
        print(observation)
        if done:
            observation = client.env_reset()
            if not observation:
                break
    print total_reward
    client.submit()
コード例 #3
0
    def up():
        # uploading to CrowdAI

        # global _stepsize
        # _stepsize = 0.01

        apikey = open('apikey.txt').read().strip('\n')
        print('apikey is', apikey)

        import opensim as osim
        from osim.http.client import Client
        from osim.env import RunEnv

        # Settings
        remote_base = "http://grader.crowdai.org:1729"
        crowdai_token = apikey

        client = Client(remote_base)

        # Create environment
        observation = client.env_create(crowdai_token)
        # old_observation = None
        stepno = 0
        epino = 0
        total_reward = 0
        old_observation = None

        def obg(plain_obs):
            nonlocal old_observation, stepno
            processed_observation, old_observation = go(plain_obs,
                                                        old_observation,
                                                        step=stepno)
            return np.array(processed_observation)

        print('environment created! running...')
        # Run a single step
        while True:
            proc_observation = obg(observation)

            [observation, reward, done, info] = client.env_step(
                [float(i) for i in list(agent.act(proc_observation))], True)
            stepno += 1
            total_reward += reward
            print('step', stepno, 'total reward', total_reward)
            # print(observation)
            if done:
                observation = client.env_reset()
                old_observation = None

                print('>>>>>>>episode', epino, ' DONE after', stepno,
                      'got_reward', total_reward)
                total_reward = 0
                stepno = 0
                epino += 1

                if not observation:
                    break

        print('submitting...')
        client.submit()
コード例 #4
0
def submit(identifier, policy_fn, seed, iter):

    client = Client(remote_base)

    # Create environment
    observation = client.env_create(crowdai_token, env_id="ProstheticsEnv")

    # IMPLEMENTATION OF YOUR CONTROLLER
    pi = train(identifier,
               policy_fn,
               1,
               1,
               seed,
               save_final=False,
               play=True,
               bend=0)
    load_state(identifier, iter)

    while True:
        ob = state_desc_to_ob(observation)
        action = pi.act(False, np.array(ob))[0].tolist()
        for _ in range(param.action_repeat):
            [observation, reward, done, info] = client.env_step(action, True)
            if done:
                break
        if done:
            observation = client.env_reset()
            if not observation:
                break

    client.submit()
コード例 #5
0
def submit(pi):
    remote_base = "http://grader.crowdai.org:1729"
    crowdai_token = "0dd7c22f5eb61cb4453b5a5b8e510656"

    client = Client(remote_base)
    observation = client.env_create(crowdai_token, env_id="ProstheticsEnv")

    frame = score = 0

    while True:

        a = pi.act(desc_to_list(observation))

        [observation, reward, done, _] = client.env_step(a.tolist(), True)
        score += reward
        frame += 1

        if done:
            print("score=%0.2f in %i frames" % (score, frame))
            frame = score = 0

            observation = client.env_reset()
            if not observation:
                break

    client.submit()
コード例 #6
0
class RemoteProstheticsEnv(gym.Env):
    def __init__(self, base, token, round):
        self.base = base
        self.token = token
        self.client = None
        ## simulate local env
        self.osim_model = OSmodel()
        self.time_limit = 300 if round == 1 else 1000

    def reset(self, project=True):
        if self.client == None:
            self.client = Client(self.base)
            obs = self.client.env_create(self.token, env_id='ProstheticsEnv')
            self.osim_model.istep = 0
            return obs
        else:
            ### It is not allowed to call reset() twice in submitting.
            raise NotImplementedError

    def step(self, action, project=True):
        self.osim_model.istep += 1
        [obs, reward, done, info] = self.client.env_step(action.tolist(), render=True)
        if done:
            self.osim_model.istep = 0
            obs = self.client.env_reset()
            if not obs:
                done = True
            else:
                done = False
        return obs, reward, done, info
コード例 #7
0
ファイル: submit.py プロジェクト: 0123Andrew/submit_l2r
def submit_agent(args, model_params):

    ##########################################################

    actor_fn, params_actor, params_crit = build_model_test(**model_params)
    weights = [p.get_value() for p in params_actor]
    actor = Agent(actor_fn, params_actor, params_crit)
    actor.set_actor_weights(weights)
    if args.weights is not None:
        actor.load(args.weights)

    env = RunEnv2(model=args.modeldim,
                  prosthetic=args.prosthetic,
                  difficulty=args.difficulty,
                  skip_frame=3)

    # Settings
    remote_base = "http://grader.crowdai.org:1729"
    token = args.token
    client = Client(remote_base)

    # Create environment
    di = client.env_create(token, env_id="ProstheticsEnv")

    stat = []
    ep = 1
    ii = 0
    reward_sum = 0
    print('\n\n#################################################\n\n')
    while True:
        ii += 1
        proj = env.dict_to_vec(di)
        action = actor.act(proj)
        action += np.random.rand(len(action)) / 10.

        [di, reward, done, info] = client.env_step(action.tolist(), True)
        reward_sum += reward
        print('ep: ' + str(ep) + '  >>  step: ' + str(int(ii)) +
              '  >>  reward: ' + format(reward, '.2f') + '  \t' +
              str(int(reward_sum)) + '\t  >>  pelvis X Y Z: \t' +
              format(di['body_pos']['pelvis'][0], '.2f') + '\t' +
              format(di['body_pos']['pelvis'][1], '.2f') + '\t' +
              format(di['body_pos']['pelvis'][2], '.2f'))
        if done:
            print('\n\n#################################################\n\n')
            stat.append([ep, ii, reward_sum])
            di = client.env_reset()
            ep += 1
            ii = 0
            reward_sum = 0
            if not di:
                break
    for e in stat:
        print(e)
    print('\n\nclient.submit()\n\n')
    client.submit()
    ##########################################################
    print('\n\n#################################################\n\n')
    print('DONE\n\n')
コード例 #8
0
ファイル: submit.py プロジェクト: wh-forker/Run-Skeleton-Run
def submit(actor, critic, args, act_update_fn):
    act_fn, _, _ = act_update_fn(actor, critic, None, None, args)

    client = Client(REMOTE_BASE)

    all_episode_metrics = []

    episode_metrics = {
        "reward": 0.0,
        "step": 0,
    }

    observation_handler = create_observation_handler(args)
    action_handler = create_action_handler(args)
    observation = client.env_create(args.token)
    action = np.zeros(ACTION_SHAPE, dtype=np.float32)
    observation = observation_handler(observation, action)

    submitted = False
    while not submitted:
        print(episode_metrics["reward"])
        action = act_fn(observation)

        observation, reward, done, _ = client.env_step(
            action_handler(action).tolist())

        episode_metrics["reward"] += reward
        episode_metrics["step"] += 1

        if done:
            all_episode_metrics.append(episode_metrics)

            episode_metrics = {
                "reward": 0.0,
                "step": 0,
            }

            observation_handler = create_observation_handler(args)
            action_handler = create_action_handler(args)
            observation = client.env_create(args.token)

            if not observation:
                submitted = True
                break

            action = np.zeros(ACTION_SHAPE, dtype=np.float32)
            observation = observation_handler(observation, action)
        else:
            observation = observation_handler(observation, action)

    df = pd.DataFrame(all_episode_metrics)
    pprint(df.describe())

    if query_yes_no("Submit?"):
        client.submit()
コード例 #9
0
class NIPS(object):

    def __init__(self, visualize=False, token=None, max_obstacles=3):
        logger.info("max_obstacles={}".format(max_obstacles))
        if token is None:
            self.remote_env = False
            self.env = RunEnv(visualize=visualize, max_obstacles=max_obstacles)
        else:
            self.remote_env = True
            self.local_env = RunEnv(visualize=False, max_obstacles=max_obstacles)
            self.token = token
            self.env = Client(GRADER_URL)
            self.env_created = False

    @property
    def observation_space(self):
        if self.remote_env:
            # because Client() has not observation_space
            return self.local_env.observation_space
        else:
            return self.env.observation_space

    @property
    def action_space(self):
        if self.remote_env:
            # because Client() has not action_space
            return self.local_env.action_space
        else:
            return self.env.action_space

    def reset(self):
        if self.remote_env:
            if not self.env_created:
                ob = self.env.env_create(self.token)
                self.env_created = True
            else:
                ob = self.env.env_reset()
        else:
            ob = self.env.reset(difficulty=2)
        return ob

    def step(self, action):
        if self.remote_env:
            ob, reward, done, info = self.env.env_step(action.tolist(), True)
        else:
            ob, reward, done, info = self.env.step(action)
        return ob, reward, done, info

    def close(self):
        if self.remote_env:
            self.env.submit()
        else:
            self.env.close()
コード例 #10
0
ファイル: wrapperClient.py プロジェクト: yychrzh/parallel_rl
class WrapperClient():
    def __init__(self, remote_base):
        self.client = Client(remote_base)
        self.ob_0 = np.array(41)
        self.ob_1 = np.zeros(14)
        # self.ob_2 = np.zeros(41)

    def env_create(self, token):
        self.ob_0 = self.preprocess(
            np.array(
                self.client.env_create("7be35dd3a64deac826068d37c2258847")))
        # return np.concatenate((self.ob_0,self.ob_1,self.ob_2),axis=0)
        return np.concatenate((self.ob_0, self.ob_1), axis=0)

    def env_reset(self):
        ob = self.client.env_reset()
        if ob is None:
            return None
        self.ob_0 = self.preprocess(np.array(ob))
        self.ob_0[1] = 0
        self.ob_1 = np.zeros(14)
        # self.ob_2 = np.zeros(41)
        # return np.concatenate((self.ob_0,self.ob_1,self.ob_2),axis=0)
        return np.concatenate((self.ob_0, self.ob_1), axis=0)

    def env_step(self, action):
        res = self.client.env_step(action)
        ob_0_post = self.ob_0
        # ob_1_post = self.ob_1
        # ob_2_post = self.ob_2
        self.ob_0 = self.preprocess(np.array(res[0]))
        self.ob_0[1] = 0
        self.ob_1 = (self.ob_0[22:36] - ob_0_post[22:36]) / 0.01
        # self.ob_2 = self.ob_1 - ob_1_post
        # res[0] = np.concatenate((self.ob_0,self.ob_1,self.ob_2),axis=0)
        return np.concatenate((self.ob_0, self.ob_1), axis=0)
        return res

    def submit(self):
        self.client.submit()

    def preprocess(self, v):
        n = [1, 18, 22, 24, 26, 28, 30, 32, 34]
        m = [19, 23, 25, 27, 29, 31, 33, 35]
        for i in n:
            v[i] = v[i] - v[1]
        for i in m:
            v[i] = v[i] - v[2]
        v[20] = v[20] - v[4]
        v[21] = v[21] - v[5]
        return v
コード例 #11
0
 def submit():
     from osim.http.client import Client
     remote_base = "http://grader.crowdai.org:1729"
     crowdai_token = "01342e360022c2def5c2cc04c5843381"
     Client = Client(remote_base)
     observation = Client.env_create(env_id="ProstheticsEnv", token=crowdai_token)
     while True:
         k = np.reshape(np.array(observation), newshape=(-1, len(observation)))
         ac_ind = sess.run(sy_sampled_ac, feed_dict={sy_ob_no: k})
         ac_ind = np.reshape(ac_ind, newshape=(ac_ind.shape[1]))
         action = bins[ac_ind]
         [observation, reward, done, info] = Client.env_step(action, True)
         if done:
             observation = Client.env_reset()
             if not observation:
                 break
     Client.submit()
コード例 #12
0
def submit(args):
    print('start submitting')

    remote_base = 'http://grader.crowdai.org:1733'
    client = Client(remote_base)

    ddpg = DDPG()
    ddpg.load_model(args.model, load_memory=False)

    state = client.env_create(TOKEN)
    fg = FeatureGenerator()
    state = fg.gen(state)

    step = 0
    ep_reward = 0

    while True:
        print('selecting action ...', end=' ')
        action = ddpg.select_action(list(state))

        print('client.env_step ...')
        next_state, reward, done, info = client.env_step(action.tolist())
        next_state = fg.gen(next_state)

        print('step: {0:03d}, ep_reward: {1:02.08f}'.format(step, ep_reward))
        state = next_state
        ep_reward += reward
        step += 1

        if done:
            print('done')
            state = client.env_reset()
            if not state:
                break

            step = 0
            ep_reward = 0

            fg = FeatureGenerator()
            state = fg.gen(state)

    client.submit()
コード例 #13
0
    def submit(self):

        remote_base = 'http://grader.crowdai.org:1729'
        env = RunEnv(visualize=self.visualize)
        client = Client(remote_base)

        # Create environment
        observation = client.env_create(self.submit_token)

        # Run a single step
        #
        # The grader runs 3 simulations of at most 1000 steps each. We stop after the last one
        while True:
            [observation, reward, done,
             info] = client.env_step(self.agent.forward(observation))
            if done:
                observation = client.env_reset()
                if not observation:
                    break

        client.submit()
コード例 #14
0
class RemoteSubmit(object):
    def __init__(self, token, agent_type):
        self.token = token
        self.remote_base = "http://grader.crowdai.org:1729"
        self.client = Client(self.remote_base)

        # TODO:: Add agent selector
        if agent_type == 'random':
            self.agent = RandomAgent()
        elif agent_type == 'fixed-action':
            self.agent = FixedActionAgent()
        elif agent_type == 'a3c':
            self.agent = A3CAgent(num_envs=2, num_steps=50, max_frames=1000)
        else:
            status = {
                'status': 'ERROR',
                'error_msg': 'Not supported agent-type'
            }
            raise Exception(status)

    def run(self):
        try:
            status = self.agent.run()
            observation = self.client.env_create(self.token,
                                                 env_id="ProstheticsEnv")

            while True:
                action = self.agent.get_action(observation)
                [observation, reward, done,
                 info] = self.client.env_step(action, False)
                if done:
                    observation = self.client.env_reset()
                    if not observation:
                        break
            self.client.submit()

        except Exception as e:
            status = {'status': 'ERROR', 'error_msg': e}
            raise Exception(status)
コード例 #15
0
def main():
    # Settings
    remote_base = 'http://grader.crowdai.org'

    # Command line parameters
    parser = argparse.ArgumentParser(
        description='Submit the result to crowdAI')
    parser.add_argument("hdf")
    parser.add_argument('--token', dest='token', action='store', required=True)
    args = parser.parse_args()

    hdf = h5py.File(args.hdf, 'r')

    env = GaitEnv(visualize=False)

    agent = cPickle.loads(hdf['agent_snapshots']['0995'].value)
    agent.stochastic = False

    client = Client(remote_base)

    # Create environment
    observation = client.env_create(args.token)

    total_reward = 0
    # Run a single step
    for i in range(501):
        ob = agent.obfilt(observation)
        a, _info = agent.act(ob)
        [observation, reward, done, info] = client.env_step(a.tolist(), True)
        print i, reward, done
        total_reward += reward
        if done:
            break

    print 'TOTAL REWARD: ', total_reward
    raw_input('press ENTER to submit')
    client.submit()
コード例 #16
0
ファイル: submit.py プロジェクト: d9w/L2M2019
    observation = env.reset(seed=args.seed)

# CGP controller
library = build_funcLib()
ind = CGP.load_from_file(cgp_id, library)
l2meval = L2MEvaluator(1e8, 1)
i = 0
j = 0
r_total = 0.0

while True:
    inputs = l2meval.get_inputs(observation)
    outputs = l2meval.scale_outputs(ind.run(inputs))

    if args.live:
        [observation, reward, done, info] = client.env_step(outputs.tolist())
    else:
        [observation, reward, done, info] = env.step(outputs)
    r_total += reward
    print('%d %d %f %f' % (i, j, reward, r_total))
    i += 1
    if done:
        if args.live:
            i = 0
            j += 1
            r_total = 0
            observation = client.env_reset()
            if not observation:
                break
        else:
            break
コード例 #17
0
	def upload(frameskip = 1):

		from osim.http.client import Client

		apikey = open('apikey.txt').read().strip('\n')

		print('Using apikey:', apikey)

		remote_base = "http://grader.crowdai.org:1729"
		crowdai_token = apikey

		print('connecting...')
		client = Client(remote_base)

		observation_d = client.env_create(crowdai_token, env_id="ProstheticsEnv")
		#observation = process_obs_dict(observation_d)

		print('environment created! running...')

		#obs_collect = []
		#a_collect = []

		stepno= 0
		epino = 0
		total_reward = 0

		while True:

			#a = AGENT OUTPUT
			observation = process_obs_dict(observation_d)
			a, q = agent.act(observation)
			a = [float(i) for i in list(a)]

			#obs_collect.append(observation)
			#a_collect.append(a)

			for _ in range(frameskip):

				[observation_d, reward, done, info] = client.env_step(a, True)


				stepno += 1
				total_reward += reward

				print('step',stepno,'total reward',total_reward)

				if done:

					'''
					print('')
					print('saving...')
					print('')
					with open('upload_saves/upload_a_collect_' + str(epino) + '.p', 'wb') as f:
						pickle.dump(a_collect, f)
					with open('upload_saves/upload_obs_collect_' + str(epino) + '.p', 'wb') as f:
						pickle.dump(obs_collect, f)
					'''

					observation_d = client.env_reset()


					print('>> episode',epino,' Done after',stepno,'got reward:',total_reward)
					print('')

					total_reward = 0
					stepno = 0
					epino += 1

					break

			if not observation_d:

				break

		print('Done! Submitting...')
		client.submit()
コード例 #18
0
        client = Client(remote_base)

        with open(modeldir + logfile + "/best_trajectory.pkl", "rb") as f:
            trajectory = pickle.load(f)["trajectory"]

        done = False
        timestep = 0

        observation = client.env_create(crowdai_token, env_id='ProstheticsEnv')
        episode_reward = 0

        while True:
            action = trajectory[timestep % 100]["action"]
            [observation, reward, done,
             info] = client.env_step(action.detach().numpy().tolist(), True)
            episode_reward += reward
            timestep += 1
            print(episode_reward, timestep)
            if done:
                observation = client.env_reset()
                print("Reset")
                if not observation:
                    break

        client.submit()

    else:
        with open(modeldir + logfile + "/best_trajectory.pkl", "rb") as f:
            trajectory = pickle.load(f)["trajectory"]
        done = False
コード例 #19
0

def my_controller(observation, ctr):
    return [float(x) for x in list(arr_list[min(ctr, max_action_steps - 1)])]


ep_no = 2
arr_list = arrs[ep_no]

ep_no_new = 1
arr_list_new = arrs_new[ep_no_new]

arr_list = arr_list[0:180]
arr_list = arr_list + arr_list_new

max_action_steps = len(arr_list)

ctr = 0

while True:
    [observation, reward, done,
     info] = client.env_step(my_controller(observation, ctr), True)
    ctr += 1
    if done:
        observation = client.env_reset()
        ctr = 0
        if not observation:
            break

client.submit()
コード例 #20
0
# IMPLEMENTATION OF YOUR CONTROLLER
# my_controller = ... (for example the one trained in keras_rl)
def my_controller(observation,theta,n,mean,mean_diff,var):
    obs_std = np.sqrt(var)
    state = (observation-mean)/obs_std
    return theta.dot(state)


theta = np.genfromtxt('policy.out', delimiter = ' ', dtype = np.float32)
print("Loading from policy matrix.")
n = np.genfromtxt('n.out', delimiter = ' ', dtype = np.float32)
print("Loading from n matrix.")
mean = np.genfromtxt('mean.out', delimiter = ' ', dtype = np.float32)
print("Loading from mean matrix.")
mean_diff = np.genfromtxt('mean_diff.out', delimiter = ' ', dtype = np.float32)
print("Loading from Mean diff matrix.") 
var = np.genfromtxt('var.out', delimiter = ' ', dtype = np.float32)
print("Loading from Variance matrix.")

tot_reward = 0

while True:
    [observation, reward, done, info] = client.env_step(my_controller(observation,theta,n,mean,mean_diff,var), True)
    tot_reward+=reward
    print(tot_reward)
    if done:
        observation = client.env_reset()
        if not observation:
            break
print(tot_reward)
client.submit()
コード例 #21
0
def main():
    args = parse_args()
    logger.configure()
    gamma = 0.99
    tau = 0.01
    normalize_returns = False
    normalize_observations = True
    batch_size = 64
    action_noise = None
    stddev = 0.2
    param_noise = AdaptiveParamNoiseSpec(initial_stddev=float(stddev),
                                         desired_action_stddev=float(stddev))
    critic_l2_reg = 1e-2
    actor_lr = 1e-4
    critic_lr = 1e-3
    popart = False
    clip_norm = None
    reward_scale = 1.

    env = prosthetics_env.Wrapper(osim_env.ProstheticsEnv(visualize=False),
                                  frameskip=4,
                                  reward_shaping=True,
                                  reward_shaping_x=1,
                                  feature_embellishment=True,
                                  relative_x_pos=True,
                                  relative_z_pos=True)

    top_model_dir = 'top-models/'

    # create tf sessions and graphs
    sess_list = []
    graph_list = []
    for i in range(len(args.model_files)):
        graph_list.append(tf.Graph())
        sess_list.append(tf.Session(graph=graph_list[i]))
    ddpg_agents = []
    for i in range(len(args.model_files)):
        model_name = args.model_files[i]
        sess = sess_list[i]
        graph = graph_list[i]
        l_size = args.layer_sizes[i]
        with sess.as_default():
        #with U.make_session(num_cpu=1, graph=g) as sess:
            with graph.as_default():
                #tf.global_variables_initializer()

                # restore agents from model files and store in ddpg_agents
                print("Restoring from..." + model_name)

                # Configure components.
                memory = Memory(limit=int(1e6), action_shape=env.action_space.shape,
                                observation_shape=env.observation_space.shape)
                critic = Critic(layer_norm=True, activation='relu', layer_sizes=[l_size, l_size])
                actor = Actor(env.action_space.shape[-1], layer_norm=True,
                              activation='relu', layer_sizes=[l_size, l_size])
                agent = DDPG(actor, critic, memory, env.observation_space.shape,
                             env.action_space.shape, gamma=gamma, tau=tau,
                             normalize_returns=normalize_returns,
                             normalize_observations=normalize_observations,
                             batch_size=batch_size, action_noise=action_noise,
                             param_noise=param_noise, critic_l2_reg=critic_l2_reg,
                             actor_lr=actor_lr, critic_lr=critic_lr,
                             enable_popart=popart, clip_norm=clip_norm,
                             reward_scale=reward_scale)

                # restore adam state and param noise
                restore_model_path = top_model_dir + model_name
                saver = tf.train.Saver(max_to_keep=500)

                # restore network weights
                saver.restore(sess, restore_model_path)

                adam_optimizer_store = pickle.load(open(restore_model_path
                                                        + ".pkl", "rb"))
                agent.actor_optimizer.m = adam_optimizer_store['actor_optimizer']['m']
                agent.actor_optimizer.v = adam_optimizer_store['actor_optimizer']['v']
                agent.actor_optimizer.t = adam_optimizer_store['actor_optimizer']['t']
                agent.critic_optimizer.m = adam_optimizer_store['critic_optimizer']['m']
                agent.critic_optimizer.v = adam_optimizer_store['critic_optimizer']['v']
                agent.critic_optimizer.t = adam_optimizer_store['critic_optimizer']['t']
                if 'param_noise' in adam_optimizer_store:
                    agent.param_noise = adam_optimizer_store['param_noise']

                # intialize and prepare agent session.
                agent.initialize(sess)
                #sess.graph.finalize()
                agent.reset()

                ddpg_agents.append(agent)

    agent = BlendedAgent(ddpg_agents, sess_list, graph_list)

    if args.evaluation:
        # setup eval env
        eval_env = prosthetics_env.EvaluationWrapper(osim_env.ProstheticsEnv(visualize=False),
                                                     frameskip=4,
                                                     reward_shaping=True,
                                                     reward_shaping_x=1,
                                                     feature_embellishment=True,
                                                     relative_x_pos=True,
                                                     relative_z_pos=True)
        eval_env.change_model(model=('3D').upper(), prosthetic=True, difficulty=0, seed=0)
        eval_env = bench.Monitor(eval_env, os.path.join(logger.get_dir(), 'gym_eval'))

        nb_eval_steps = 1000
        # reward, mean_q, final_steps = evaluate_one_episode(eval_env, ddpg_agents, sess_list, graph_list,
        #                                                    nb_eval_steps=nb_eval_steps,
        #                                                    render=False)
        reward, mean_q, final_steps = evaluate_one_episode(eval_env, agent, nb_eval_steps, render=False)
        print("Reward: " + str(reward))
        print("Mean Q: " + str(mean_q))
        print("Final num steps: " + str(final_steps))

    # Submit to crowdai competition. What a hack. :)
    # if crowdai_client is not None and crowdai_token is not None and eval_env is not None:
    crowdai_submit_count = 0
    if args.crowdai_submit:
        remote_base = "http://grader.crowdai.org:1729"
        crowdai_client = Client(remote_base)
        eval_obs_dict = crowdai_client.env_create(args.crowdai_token, env_id="ProstheticsEnv")
        eval_obs_dict, eval_obs_projection = prosthetics_env.transform_observation(
            eval_obs_dict,
            reward_shaping=True,
            reward_shaping_x=1.,
            feature_embellishment=True,
            relative_x_pos=True,
            relative_z_pos=True)
        while True:
            action, _ = agent.pi(eval_obs_projection, apply_noise=False, compute_Q=False)
            submit_action = prosthetics_env.openai_to_crowdai_submit_action(action)
            clipped_submit_action = np.clip(submit_action, 0., 1.)
            actions_equal = clipped_submit_action == submit_action
            if not np.all(actions_equal):
                logger.debug("crowdai_submit_count:", crowdai_submit_count)
                logger.debug("  openai-action:", action)
                logger.debug("  submit-action:", submit_action)
            crowdai_submit_count += 1
            [eval_obs_dict, reward, done, info] = crowdai_client.env_step(clipped_submit_action.tolist(), True)
            # [eval_obs_dict, reward, done, info] = crowdai_client.env_step(agent.pi(eval_obs_projection, apply_noise=False, compute_Q=False), True)
            eval_obs_dict, eval_obs_projection = prosthetics_env.transform_observation(
                eval_obs_dict,
                reward_shaping=True,
                reward_shaping_x=1.,
                feature_embellishment=True,
                relative_x_pos=True,
                relative_z_pos=True)
            if done:
                logger.debug("done: crowdai_submit_count:", crowdai_submit_count)
                eval_obs_dict = crowdai_client.env_reset()
                if not eval_obs_dict:
                    break
                logger.debug("done: eval_obs_dict exists after reset")
                eval_obs_dict, eval_obs_projection = prosthetics_env.transform_observation(
                    eval_obs_dict,
                    reward_shaping=True,
                    reward_shaping_x=1.,
                    feature_embellishment=True,
                    relative_x_pos=True,
                    relative_z_pos=True)
        crowdai_client.submit()

    for i in range(len(sess_list)):
        sess_list[i].close()
コード例 #22
0
def main(env_name, num_episodes, gamma, lam, kl_targ, batch_size, nprocs,
         policy_hid_list, valfunc_hid_list, gpu_pct, restore_path, animate,
         submit):
    """ Main training loop

    Args:
        env_name: OpenAI Gym environment name, e.g. 'Hopper-v1'
        num_episodes: maximum number of episodes to run
        gamma: reward discount factor (float)
        lam: lambda from Generalized Advantage Estimate
        kl_targ: D_KL target for policy update [D_KL(pi_old || pi_new)
        batch_size: number of episodes per policy training batch
    """
    # killer = GracefulKiller()

    env, obs_dim, act_dim = init_osim(animate)
    env.seed(111 + mpi_util.rank)
    mpi_util.set_global_seeds(111 + mpi_util.rank)

    obs_dim += 1  # add 1 to obs dimension for time step feature (see run_episode())
    now = datetime.utcnow().strftime(
        "%b-%d_%H:%M:%S")  # create unique directories
    if mpi_util.rank == 0:
        #aigym_path = os.path.join('/tmp', env_name, now)
        #env = wrappers.Monitor(env, aigym_path, force=True)
        logger = Logger(logname=env_name, now=now)

    episode = 0

    checkpoint = Checkpoint("saves", now)
    # restore from checkpoint?
    if restore_path:
        (policy, val_func, scaler, episode, obs_dim, act_dim,
         kl_targ) = checkpoint.restore(restore_path)
    else:
        policy = Policy(obs_dim, act_dim, kl_targ)
        val_func = NNValueFunction(obs_dim)
        scaler = Scaler(obs_dim)

        if mpi_util.rank == 0:
            # run a few episodes (on node 0) of untrained policy to initialize scaler:
            trajectories = run_policy(env, policy, scaler, episodes=5)

            unscaled = np.concatenate(
                [t['unscaled_obs'] for t in trajectories])
            scaler.update(
                unscaled)  # update running statistics for scaling observations

        # broadcast policy weights, scaler, val_func
        (policy, scaler, val_func) = mpi_util.broadcast_policy_scaler_val(
            policy, scaler, val_func)

        if mpi_util.rank == 0:
            checkpoint.save(policy, val_func, scaler, episode)

    if animate:
        observes, actions, rewards, unscaled_obs = run_episode(env,
                                                               policy,
                                                               scaler,
                                                               animate=animate)
        exit(0)

    if submit:
        # Settings
        #remote_base = 'http://grader.crowdai.org:1729'
        remote_base = 'http://grader.crowdai.org:1730'
        token = 'a83412a94593cae3a491f3ee28ff44e1'

        client = Client(remote_base)

        # Create environment
        observation = client.env_create(token)
        step = 0.0
        observes, actions, rewards, unscaled_obs = [], [], [], []
        scale, offset = scaler.get()
        scale[-1] = 1.0  # don't scale time step feature
        offset[-1] = 0.0  # don't offset time step feature

        # Run a single step
        #
        # The grader runs 3 simulations of at most 1000 steps each. We stop after the last one
        while True:
            obs = np.array(observation).astype(np.float32).reshape((1, -1))
            print("OBSERVATION TYPE:", type(obs), obs.shape)
            print(obs)
            obs = np.append(obs, [[step]], axis=1)  # add time step feature
            unscaled_obs.append(obs)
            obs = (obs - offset) * scale  # center and scale observations
            observes.append(obs)

            action = policy.sample(obs).astype(np.float32).reshape((-1, 1))
            print("ACTION TYPE:", type(action), action.shape)
            print(action)
            actions.append(action)

            [observation, reward, done,
             info] = client.env_step(action.tolist())
            print("step:", step, "reward:", reward)

            if not isinstance(reward, float):
                reward = np.asscalar(reward)
            rewards.append(reward)
            step += 1e-3  # increment time step feature

            if done:
                print(
                    "================================== RESTARTING ================================="
                )
                observation = client.env_reset()
                step = 0.0
                observes, actions, rewards, unscaled_obs = [], [], [], []
                scale, offset = scaler.get()
                scale[-1] = 1.0  # don't scale time step feature
                offset[-1] = 0.0  # don't offset time step feature
                if not observation:
                    break

        client.submit()
        exit(0)

    ######

    worker_batch_size = int(batch_size / mpi_util.nworkers)  # HACK
    if (worker_batch_size * mpi_util.nworkers != batch_size):
        print("batch_size:", batch_size, " is not divisible by nworkers:",
              mpi_util.nworkers)
        exit(1)

    batch = 0
    while episode < num_episodes:
        if mpi_util.rank == 0 and batch > 0 and batch % 10 == 0:
            checkpoint.save(policy, val_func, scaler, episode)
        batch = batch + 1

        trajectories = run_policy(env,
                                  policy,
                                  scaler,
                                  episodes=worker_batch_size)
        trajectories = mpi_util.gather_trajectories(trajectories)

        if mpi_util.rank == 0:
            # concatentate trajectories into one list
            trajectories = list(itertools.chain.from_iterable(trajectories))
            print("did a batch of ", len(trajectories), " trajectories")
            print([t['rewards'].sum() for t in trajectories])

            episode += len(trajectories)
            add_value(trajectories,
                      val_func)  # add estimated values to episodes
            add_disc_sum_rew(trajectories,
                             gamma)  # calculated discounted sum of Rs
            add_gae(trajectories, gamma, lam)  # calculate advantage

            # concatenate all episodes into single NumPy arrays
            observes, actions, advantages, disc_sum_rew = build_train_set(
                trajectories)

            # add various stats to training log:
            logger.log({
                '_MeanReward':
                np.mean([t['rewards'].sum() for t in trajectories]),
                'Steps':
                np.sum([t['observes'].shape[0] for t in trajectories])
            })
            log_batch_stats(observes, actions, advantages, disc_sum_rew,
                            logger, episode)

            policy.update(observes, actions, advantages,
                          logger)  # update policy
            val_func.fit(observes, disc_sum_rew,
                         logger)  # update value function

            unscaled = np.concatenate(
                [t['unscaled_obs'] for t in trajectories])
            scaler.update(
                unscaled)  # update running statistics for scaling observations

            logger.write(
                display=True)  # write logger results to file and stdout

        # if mpi_util.rank == 0 and killer.kill_now:
        #     if input('Terminate training (y/[n])? ') == 'y':
        #         break
        #     killer.kill_now = False

        # broadcast policy weights, scaler, val_func
        (policy, scaler, val_func) = mpi_util.broadcast_policy_scaler_val(
            policy, scaler, val_func)

    if mpi_util.rank == 0: logger.close()
    policy.close_sess()
    if mpi_util.rank == 0: val_func.close_sess()
コード例 #23
0
# If TEST and TOKEN, submit to crowdAI
if not args.train and args.token:
    agent.load_weights(args.model)
    # Settings
    remote_base = 'http://grader.crowdai.org:1729'
    client = Client(remote_base)

    # Create environment
    observation = client.env_create(args.token)

    # Run a single step
    # The grader runs 3 simulations of at most 1000 steps each. We stop after the last one
    while True:
        v = np.array(observation).reshape((env.observation_space.shape[0]))
        action = agent.forward(v)
        print("lala", client.env_step(action.tolist()))
        [observation, reward, done, info] = client.env_step(action.tolist())
        if done:
            observation = client.env_reset()
            if not observation:
                break

    client.submit()

# If TEST and no TOKEN, run some test experiments
if not args.train and not args.token:
    agent.load_weights(args.model)
    # Finally, evaluate our algorithm for 1 episode.
    agent.test(env, nb_episodes=1, visualize=False, nb_max_episode_steps=500)
コード例 #24
0
# IMPLEMENTATION OF YOUR CONTROLLER
# my_controller = ... (for example the one trained in keras_rl)


def my_controller(observation, time_step):
    bin_index = (0, )
    genome = map_elites.container.grid[bin_index]["genome"]
    action = []
    for muscle_index in range(19):
        action.append(
            genome.control_function(muscle_index=muscle_index,
                                    time_step=time_step)[0])
    return action


i = 0
total_reward = 0
time_step = 0
while True:
    time_step += 1
    [observation, reward, done,
     info] = client.env_step(my_controller(observation, time_step), True)
    total_reward += reward
    print(i, total_reward)
    i += 1
    if done:
        observation = client.env_reset()
        if not observation:
            break

client.submit()
コード例 #25
0
ファイル: submit.py プロジェクト: yrevar/osim-rl
from osim.http.client import Client
from osim.env import ProstheticsEnv
import numpy as np
import argparse

# Settings
remote_base = 'http://grader.crowdai.org:1729'

# Command line parameters
parser = argparse.ArgumentParser(description='Submit the result to crowdAI')
parser.add_argument('--token', dest='token', action='store', required=True)
args = parser.parse_args()

client = Client(remote_base)

# Create environment
observation = client.env_create(args.token, env_id="ProstheticsEnv")
env = ProstheticsEnv()

# Run a single step
# The grader runs 3 simulations of at most 1000 steps each. We stop after the last one
while True:
    print(observation)
    [observation, reward, done, info] = client.env_step(env.action_space.sample().tolist())
    if done:
        observation = client.env_reset()
        if not observation:
            break
            
client.submit()
コード例 #26
0
ファイル: submit.py プロジェクト: wiplug/osim-rl
import numpy as np
import argparse

# Settings
remote_base = 'http://grader.crowdai.org:1729'

# Command line parameters
parser = argparse.ArgumentParser(description='Submit the result to crowdAI')
parser.add_argument('--token', dest='token', action='store', required=True)
args = parser.parse_args()

env = RunEnv(visualize=False)
client = Client(remote_base)

# Create environment
observation = client.env_create(args.token)

# Run a single step
#
# The grader runs 3 simulations of at most 1000 steps each. We stop after the last one
while True:
    v = np.array(observation).reshape((-1,1,env.observation_space.shape[0]))
    [observation, reward, done, info] = client.env_step(env.action_space.sample().tolist())
    print(observation)
    if done:
        observation = client.env_reset()
        if not observation:
            break

client.submit()
コード例 #27
0
import numpy as np
import argparse

# Settings
# remote_base = 'http://grader.crowdai.org:1729' # Submission to Round-1
remote_base = 'http://grader.crowdai.org:1730'  # Submission to Round-2

# Command line parameters
parser = argparse.ArgumentParser(description='Submit the result to crowdAI')
parser.add_argument('--token', dest='token', action='store', required=True)
args = parser.parse_args()

client = Client(remote_base)

# Create environment
observation = client.env_create(args.token, env_id="ProstheticsEnv")
env = ProstheticsEnv()

# Run a single step
# The grader runs 3 simulations of at most 1000 steps each. We stop after the last one
while True:
    print(observation)
    [observation, reward, done,
     info] = client.env_step(env.action_space.sample().tolist())
    if done:
        observation = client.env_reset()
        if not observation:
            break

client.submit()
コード例 #28
0
from osim.env import ProstheticsEnv

remote_base = "http://grader.crowdai.org:1729"
crowdai_token = "a6d6c970d3883bee5730708739550518"

client = Client(remote_base)
observation = client.env_create(crowdai_token, env_id="ProstheticsEnv")
# env = ProstheticsEnv(visualize=True)
# i = 0

def exit(default=0):
    import sys
    sys.exit(default)

def get_default():
    return [0.5488135, 0.71518934, 0.60276335, 0.5448832, 0.4236548, 0.6458941, 0.4375872, 0.891773, 0.96366274, 0.3834415, 0.79172504, 0.5288949, 0.56804454, 0.92559665, 0.07103606, 0.0871293, 0.0202184, 0.83261985, 0.77815676]

def my_controller(observation):
    return get_default()

while True:
    # [observation, reward, done, info] = client.env_step(env.action_space.sample().tolist())
    [observation, reward, done, info] = client.env_step(my_controller(observation), True)
    if done:
        print("done")
        observation = client.env_reset()
        if not observation:
            print("break")
            break

client.submit()
コード例 #29
0
ファイル: example.py プロジェクト: wiplug/osim-rl
    agent.save_weights(args.model, overwrite=True)

# If TEST and TOKEN, submit to crowdAI
if not args.train and args.token:
    agent.load_weights(args.model)
    # Settings
    remote_base = 'http://grader.crowdai.org:1729'
    client = Client(remote_base)

    # Create environment
    observation = client.env_create(args.token)

    # Run a single step
    # The grader runs 3 simulations of at most 1000 steps each. We stop after the last one
    while True:
        v = np.array(observation).reshape((env.observation_space.shape[0]))
        action = agent.forward(v)
        [observation, reward, done, info] = client.env_step(action.tolist())
        if done:
            observation = client.env_reset()
            if not observation:
                break

    client.submit()

# If TEST and no TOKEN, run some test experiments
if not args.train and not args.token:
    agent.load_weights(args.model)
    # Finally, evaluate our algorithm for 1 episode.
    agent.test(env, nb_episodes=1, visualize=False, nb_max_episode_steps=500)
コード例 #30
0
        # Initial hidden state at start of episode
        hidden = agent.local_evaluator.policy_map['default'].get_initial_state()

    # Evaluation loop
    while True:
        # NOTE TODO: reduce action space is hard-coded in train.py!
        if use_lstm:
            action, hidden, logits_dict = agent.compute_action(observation=state, state=hidden)
        else:
            action = agent.compute_action(state)

        action = dummy_env.expand_action(action)  # get back original action space, this is also a list now

        # Repeat same action downsample_factor number of times
        for _ in range(downsample_factor):
            state_desc, reward, done, info = client.env_step(action) #, True)
            if done:
                break

        state = dummy_env.process_state_desc(state_desc)  # "next state"

        if done:
            state_desc = client.env_reset()
            if not state_desc:
                break
            state = dummy_env.process_state_desc(state_desc)

            if use_lstm:
                # Initial hidden state at start of episode
                hidden = agent.local_evaluator.policy_map['default'].get_initial_state()
コード例 #31
0
    # if not observation:
    #     break

    # client.submit()

# If TEST and no TOKEN, run some test experiments
if args.token:
    agent.load_weights(args.model)
    remote_base = 'http://grader.crowdai.org:1729'
    client = Client(remote_base)

    # Create environment
    observation = client.env_create(args.token)

    # Run a single step
    # The grader runs 3 simulations of at most 1000 steps each. We stop after the last one
    while True:
        v = np.array(observation).reshape((env.observation_space.shape[0]))
        action = agent.forward(v)
        [observation, reward, done, info] = client.env_step(action.tolist())
        observation = process_observation(observation)
        total_reward += reward
        if done:
            observation = client.env_reset()
            if not observation:
                break

    client.submit()
    # Finally, evaluate our algorithm for 1 episode.
    #
コード例 #32
0
ファイル: submit.py プロジェクト: nagyistge/osim-rl
nb_actions = env.action_space.shape[0]

# Load the acton
actor = Sequential()
actor.add(Flatten(input_shape=(1,) + env.observation_space.shape))
actor.add(Dense(32))
actor.add(Activation('relu'))
actor.add(Dense(32))
actor.add(Activation('relu'))
actor.add(Dense(32))
actor.add(Activation('relu'))
actor.add(Dense(nb_actions))
actor.add(Activation('sigmoid'))
actor.load_weights(args.model)

client = Client(remote_base)

# Create environment
observation = client.env_create(args.token)

# Run a single step
for i in range(501):
    v = np.array(observation).reshape((-1,1,env.observation_space.shape[0]))
    [observation, reward, done, info] = client.env_step(args.token, actor.predict(v)[0].tolist(), True)
    if done:
        break

client.submit(args.token)