Esempio n. 1
0
class Environment:
    def __init__(self):

        print("Setting env...")
        self.env = RunEnv(visualize=False)
        print("Env set !")

    def get_state_size(self):
        return list(self.env.observation_space.shape)

    def get_action_size(self):
        return self.env.action_space.shape[0]

    def get_bounds(self):
        return self.env.action_space.low, self.env.action_space.high

    def set_render(self, render):
        self.env = RunEnv(visualize=render)

    def reset(self):
        return self.env.reset(difficulty=0)

    def random(self):
        return self.env.action_space.sample()

    def act(self, action):
        return self.env.step(action)

    def close(self):
        self.env.close()
class LearnToRunEnv(gym.Env):
    """Wrapping LearnToRunEnv in OpenAI Gym"""
    def __init__(self, visualize=False, difficulty=None):
        super(LearnToRunEnv, self).__init__()
        if difficulty == None:
            self.difficulty = random.randint(0,2)
        else:
            self.difficulty = difficulty

        self.learntorun_env = RunEnv(visualize=visualize)
        self.observation_space = self.learntorun_env.observation_space
        self.action_space = self.learntorun_env.action_space

        self._spec = EnvSpec("RunEnv-diff{}-v1".format(difficulty))

    def _step(self, action):
        obs, reward, terminal, info = self.learntorun_env.step(action)
        return np.asarray(obs), reward, terminal, info

    def _reset(self):
        obs = self.learntorun_env.reset(difficulty=self.difficulty,\
                                            seed=self.learntorun_seed)
        return np.asarray(obs)

    def _render(self, mode='human', close=False):
        #raise NotImplementedError
        return None

    def _seed(self, seed=None):
        self.learntorun_seed = seed

    def _close(self):
        self.learntorun_env.close()
class LearnToRunEnv(gym.Env):
    """Wrapping LearnToRunEnv in OpenAI Gym"""
    def __init__(self, visualize=False, difficulty=None):
        super(LearnToRunEnv, self).__init__()
        if difficulty == None:
            self.difficulty = random.randint(0, 2)
        else:
            self.difficulty = difficulty

        self.learntorun_env = RunEnv(visualize=visualize)
        self.observation_space = self.learntorun_env.observation_space
        self.action_space = self.learntorun_env.action_space

    def _step(self, action):
        return self.learntorun_env.step(action)

    def _reset(self):
        return self.learntorun_env.reset(difficulty=self.difficulty,\
                                            seed=self.learntorun_seed)

    def _render(self, mode='human', close=False):
        #raise NotImplementedError
        return None

    def _seed(self, seed=None):
        self.learntorun_seed = seed

    def _close(self):
        self.learntorun_env.close()
Esempio n. 4
0
    def __init__(self, game_name, display):
        self.game_name = game_name
        self.display = display

        # self.env = gym.make(game_name)
        self.env = RunEnv(self.display)
        self.reset()
Esempio n. 5
0
class Environment:
    def __init__(self):
        self.env = RunEnv(visualize=False)
        print()
        self.render = False

    def get_state_size(self):
        return list(self.env.observation_space.shape)

    def get_action_size(self):
        return self.env.action_space.shape[0]

    def get_bounds(self):
        return self.env.action_space.low, self.env.action_space.high

    def set_render(self, render):
        visu = render and DISPLAY
        if visu != self.render:
            self.render = visu
            self.env = RunEnv(visualize=visu)
            self.reset()

    def reset(self):
        return np.asarray(self.env.reset(difficulty=0))

    def random(self):
        return self.env.action_space.sample()

    def act(self, action):
        s_, r, d, i = self.env.step(action)
        return np.asarray(s_), r, d, i

    def close(self):
        self.env.close()
Esempio n. 6
0
def test():
    task_fn = lambda: LTR()
    task = task_fn()
    state_dim = task.env.observation_space.shape[0]
    action_dim = task.env.action_space.shape[0]
    with open('data/ddpg-model-LearningToRun.bin', 'rb') as f:
        model = pickle.load(f)
    actor = DDPGActorNet(state_dim, action_dim)
    actor.load_state_dict(model)

    logger = Logger('./log')

    env = RunEnv(visualize=False)
    state = env.reset(difficulty=0)
    print state
    done = False
    total_reward = 0.0
    step = 0
    while not done:
        action = actor.predict(np.stack([state]), to_numpy=True).flatten()
        state, reward, done, info = env.step(action)
        total_reward += reward
        step += 1
        logger.histo_summary('input', actor.input, step)
        logger.histo_summary('act1', actor.act1, step)
        logger.histo_summary('act2', actor.act2, step)
        logger.histo_summary('pre_act3', actor.pre_act3, step)
        logger.histo_summary('act3', actor.act3, step)
        for tag, value in actor.named_parameters():
            tag = tag.replace('.', '/')
            logger.histo_summary(tag, value.data.numpy(), step)

    print total_reward
    print step
Esempio n. 7
0
    def test1(self):
        
        env = RunEnv(visualize=False)
        observation = env.reset()

        action = env.action_space.sample()
        action[5] = np.NaN
        self.assertRaises(ValueError, env.step, action)
Esempio n. 8
0
    def test1(self):

        env = RunEnv(visualize=False)
        observation = env.reset()

        action = env.action_space.sample()
        action[5] = np.NaN
        self.assertRaises(ValueError, env.step, action)
Esempio n. 9
0
    def run(self):
        
        self.env = RunEnv(visualize=False)
        self.env.reset(difficulty = 2, seed = int(time.time()))
        if self.monitor:
            self.env.monitor.start('monitor/', force=True)

        # tensorflow variables (same as in model.py)
        self.observation_size = 55+7
        self.action_size = np.prod(self.env.action_space.shape)
        self.hidden_size = 128
        weight_init = tf.random_uniform_initializer(-0.05, 0.05)
        bias_init = tf.constant_initializer(0)
        # tensorflow model of the policy
        self.obs = tf.placeholder(tf.float32, [None, self.observation_size])
        self.debug = tf.constant([2,2])
        with tf.variable_scope("policy-a"):
            h1 = fully_connected(self.obs, self.observation_size, self.hidden_size, weight_init, bias_init, "policy_h1")
            h1 = tf.nn.relu(h1)
            h2 = fully_connected(h1, self.hidden_size, self.hidden_size, weight_init, bias_init, "policy_h2")
            h2 = tf.nn.relu(h2)
            h3 = fully_connected(h2, self.hidden_size, self.action_size, weight_init, bias_init, "policy_h3_1")
            h3 = tf.nn.tanh(h3,name="policy_h3")
            action_dist_logstd_param = tf.Variable((.01*np.random.randn(1, self.action_size)).astype(np.float32), name="policy_logstd")
        self.action_dist_mu = h3
        self.action_dist_logstd = tf.tile(action_dist_logstd_param, tf.stack((tf.shape(self.action_dist_mu)[0], 1)))

        config = tf.ConfigProto(
            device_count = {'CPU': 0}
        )
        self.session = tf.Session()
        self.session.run(tf.initialize_all_variables())
        var_list = tf.trainable_variables()

        self.set_policy = SetPolicyWeights(self.session, var_list)

        while True:
            # get a task, or wait until it gets one
            next_task = self.task_q.get(block=True)
            if next_task == 1:
                # the task is an actor request to collect experience
                path = self.rollout()
                self.task_q.task_done()
                self.result_q.put(path)
            elif next_task == 2:
                print "kill message"
                if self.monitor:
                    self.env.monitor.close()
                self.task_q.task_done()
                break
            else:
                # the task is to set parameters of the actor policy
                self.set_policy(next_task)
                # super hacky method to make sure when we fill the queue with set parameter tasks,
                # an actor doesn't finish updating before the other actors can accept their own tasks.
                time.sleep(0.1)
                self.task_q.task_done()
        return
 def test_actions(self):
     env = RunEnv(visualize=False)
     env.reset()
     v = env.action_space.sample()
     v[0] = 1.5
     v[1] = -0.5
     observation, reward, done, info = env.step(v)
     self.assertLessEqual(env.last_action[0],1.0)
     self.assertGreaterEqual(env.last_action[1],0.0)
    def __init__(self, visualize=False, difficulty=None):
        super(LearnToRunEnv, self).__init__()
        if difficulty == None:
            self.difficulty = random.randint(0, 2)
        else:
            self.difficulty = difficulty

        self.learntorun_env = RunEnv(visualize=visualize)
        self.observation_space = self.learntorun_env.observation_space
        self.action_space = self.learntorun_env.action_space
Esempio n. 12
0
 def __init__(self, visualize=False, token=None, max_obstacles=3):
     logger.info("max_obstacles={}".format(max_obstacles))
     if token is None:
         self.remote_env = False
         self.env = RunEnv(visualize=visualize, max_obstacles=max_obstacles)
     else:
         self.remote_env = True
         self.local_env = RunEnv(visualize=False, max_obstacles=max_obstacles)
         self.token = token
         self.env = Client(GRADER_URL)
         self.env_created = False
Esempio n. 13
0
def standalone_headless_isolated(pq, cq, plock):
    # locking to prevent mixed-up printing.
    plock.acquire()
    print('starting headless...', pq, cq)
    try:
        from osim.env import RunEnv
        # RunEnv = runenv_with_alternative_obstacle_generation_scheme()
        e = RunEnv(visualize=False, max_obstacles=0)
        # bind_alternative_pelvis_judgement(e)
        # use_alternative_episode_length(e)
    except Exception as err:
        print('error on start of standalone')
        traceback.print_exc()
        plock.release()
        return
    else:
        plock.release()

    def report(e):
        # a way to report errors ( since you can't just throw them over a pipe )
        # e should be a string
        print('(standalone) got error!!!')
        cq.put(('error', e))

    def floatify(np):
        return [float(np[i]) for i in range(len(np))]

    try:
        while True:
            msg = pq.get()
            # messages should be tuples,
            # msg[0] should be string

            # isinstance is dangerous, commented out
            # if not isinstance(msg,tuple):
            #     raise Exception('pipe message received by headless is not a tuple')

            if msg[0] == 'reset':
                o = e.reset(difficulty=0)
                cq.put(floatify(o))
            elif msg[0] == 'step':
                o, r, d, i = e.step(msg[1])
                o = floatify(o)  # floatify the observation
                cq.put((o, r, d, i))
            else:
                cq.close()
                pq.close()
                del e
                break
    except Exception as e:
        traceback.print_exc()
        report(str(e))

    return  # end process
Esempio n. 14
0
class OsimEnv(Env):
    def __init__(self,
                 visualize=True,
                 test=False,
                 step_size=0.01,
                 processor=None,
                 timestep_limit=1000):
        self.visualize = visualize
        self._osim_env = RunEnv(visualize=visualize)
        self._osim_env.stepsize = step_size
        self._osim_env.spec.timestep_limit = timestep_limit
        self._osim_env.horizon = timestep_limit
        # self._osim_env.integration_accuracy = 1e-1
        if test:
            self._osim_env.timestep_limit = 1000
        self.processor = processor
        print "stepsize: " + str(self._osim_env.stepsize)

    def reset(self, seed=None, difficulty=2):
        observation = self._osim_env.reset(seed=seed, difficulty=difficulty)
        if self.processor:
            observation, reward, done, info = self.processor.process_step(
                observation, 0.0, False, dict())

        return observation

    def step(self, action):
        if self.processor:
            action = self.processor.process_action(action)

        observation, reward, done, info = self._osim_env.step(action)

        if self.processor:
            observation, reward, done, info = self.processor.process_step(
                observation, reward, done, info)

        return observation, reward, done, info

    def get_observation_dim(self):
        return len(self.reset())

    def get_action_dim(self):
        nb_actions = self._osim_env.action_space.shape[0]
        return nb_actions

    # FOR PICKLING
    def __setstate__(self, state):
        self.__init__(visualize=state['visualize'])

    def __getstate__(self):
        state = {'visualize': self.visualize}
        return state
Esempio n. 15
0
def standalone_headless_isolated(conn, plock):
    # locking to prevent mixed-up printing.
    plock.acquire()
    print('starting headless...', conn)
    try:
        import traceback
        from osim.env import RunEnv
        e = RunEnv(visualize=False)
    except Exception as e:
        print('error on start of standalone')
        traceback.print_exc()

        plock.release()
        return
    else:
        plock.release()

    def report(e):
        # a way to report errors ( since you can't just throw them over a pipe )
        # e should be a string
        print('(standalone) got error!!!')
        conn.send(('error', e))

    def floatify(np):
        return [float(np[i]) for i in range(len(np))]

    try:
        while True:
            msg = conn.recv()
            # messages should be tuples,
            # msg[0] should be string

            # isinstance is dangerous, commented out
            # if not isinstance(msg,tuple):
            #     raise Exception('pipe message received by headless is not a tuple')

            if msg[0] == 'reset':
                o = e.reset(difficulty=2)
                conn.send(floatify(o))
            elif msg[0] == 'step':
                ordi = e.step(msg[1])
                ordi[0] = floatify(ordi[0])
                conn.send(ordi)
            else:
                conn.close()
                del e
                break
    except Exception as e:
        traceback.print_exc()
        report(str(e))

    return  # end process
Esempio n. 16
0
 def __init__(self,
              game='l2r',
              visualize=False,
              max_obstacles=10,
              skip_count=1):
     self.env = RunEnv(visualize=visualize, max_obstacles=max_obstacles)
     self.step_count = 0
     self.old_observation = None
     self.skip_count = 1  # skip_count  # 4
     self.last_x = 0
     self.current_x = 0
     self.observation_space_shape = (76, )
     self.action_space = self.env.action_space
     self.difficulty = 2
Esempio n. 17
0
class LTR(BasicTask):
    name = 'LearningToRun'
    success_threshold = 2000
    def __init__(self):
        BasicTask.__init__(self)
        self.env = RunEnv(visualize=False)

    def step(self, action):
        action = np.clip(action, 0, 1)
        next_state, reward, done, info = self.env.step(action)
        return np.asarray(next_state) / math.pi, reward, done, info

    def reset(self):
        state = self.env.reset(difficulty=0, seed=np.random.randint(0, 10000000))
        return np.asarray(state) / math.pi
Esempio n. 18
0
def create_env(args):
    env = RunEnv(visualize=True, max_obstacles=args.max_obstacles)

    if hasattr(args, "baseline_wrapper") or hasattr(args, "ddpg_wrapper"):
        env = DdpgWrapper(env, args)

    return env
Esempio n. 19
0
def main():
    env = RunEnv(visualize=False)
    population = [[NN(), 0] for _ in range(100)]
    generation = 0

    for _ in range(2000):
        for i in range(len(population)):
            print i
            population[i][1] = run(population[i][0], env)

        population = sorted(population, key=lambda x: x[1], reverse=True)
        print np.mean([p[1] for p in population[:5]])
        generation += 1

        population = population[:50]

        for _ in range(20):
            population.append([random.choice(population[:50])[0].mutate(), 0])

        for _ in range(20):
            nn1 = random.choice(population[:20])[0]
            nn2 = random.choice(population[:50])[0]
            population.append([nn1.crossover(nn2), 0])

        for _ in range(10):
            population.append([NN(), 0])

        with open('save.p', 'w') as f:
            pickle.dump(population, f)
Esempio n. 20
0
 def __init__(self,
              visualize=True,
              test=False,
              step_size=0.01,
              processor=None,
              timestep_limit=1000):
     self.visualize = visualize
     self._osim_env = RunEnv(visualize=visualize)
     self._osim_env.stepsize = step_size
     self._osim_env.spec.timestep_limit = timestep_limit
     self._osim_env.horizon = timestep_limit
     # self._osim_env.integration_accuracy = 1e-1
     if test:
         self._osim_env.timestep_limit = 1000
     self.processor = processor
     print "stepsize: " + str(self._osim_env.stepsize)
Esempio n. 21
0
    def test():

        env = RunEnv(visualize=False)

        observation_d = env.reset(project=False)
        observation = process_obs_dict(observation_d)

        total_reward = 0
        steps = 0

        while True:

            #a = AGENT OUTPUT
            a, q = agent.act(observation)

            observation_d, reward, done, info = env.step(a, project=False)
            observation = process_obs_dict(observation_d)

            total_reward += reward
            steps += 1

            #print(observation)

            print(steps, 'total reward:', total_reward)

            if done:

                break

        print('finished testing!')
Esempio n. 22
0
 def test(skip=4):
     test_env = RunEnv(visualize=True, max_obstacles=0)
     fast_env = FastEnv(test_env, skip)  # 4 is skip factor
     agent.training = False
     agent.play(fast_env, noise_level=1e-11, episode_index=-1)
     agent.training = True
     del test_env
Esempio n. 23
0
	def test(frameskip = 1, vis = False):

		env = RunEnv(visualize=vis)
		#env.change_model(model='2D', prosthetic=True, difficulty=0, seed=None)

		observation_d = env.reset(project = False)
		#observation = process_obs_dict(observation_d)


		total_reward = 0
		steps = 0

		while True:

			#a = AGENT OUTPUT
			observation = process_obs_dict(observation_d)
			a, q = agent.act(observation)

			for _ in range(frameskip):

				observation_d, reward, done, info = env.step(a, project = False)
				#observation = process_obs_dict(observation_d)

				total_reward += reward
				steps += 1

			#print(observation)

			print(steps, 'total reward:', total_reward)

			if done:

				break

		print('finished testing!')
Esempio n. 24
0
def main():
    env = RunEnv(visualize=False)

    s = socket.socket()
    s.bind(("localhost", 8000))
    s.listen(10)  # max number of connections

    while True:
        sc, address = s.accept()
        f = open("work.p", 'wb')
        while (True):
            l = sc.recv(1024)
            while (l):
                f.write(l)
                l = sc.recv(1024)
        f.close()

        with open('work.p', 'r') as f:
            nn = pickle.load(f)

        reward = run(nn, env)
        sc.send(str(reward))
        sc.close()

    s.close()
Esempio n. 25
0
class OsimAdapter:
    def __init__(self):
        self.env = RunEnv(visualize=False)
        self.reset()

    def reset(self, difficulty=2):
        self.reward = 0
        self.total_reward = 0
        self.timestamp = 0.
        self.features = np.array(
            (self.env.reset(difficulty=difficulty))).reshape((1, -1))
        self.last_obs = np.zeros(shape=(1, 41))
        self.features = np.concatenate([self.features, self.last_obs], axis=1)
        self.done = False
        return self.features

    def get_action_space(self):
        space = [1] * 18
        return space

    def get_observation_space(self):
        return 41 * 2

    def step(self, actions):
        mean_possible = (np.array(self.env.action_space.low) +
                         np.array(self.env.action_space.high)) / 2.
        actions = np.array(actions) + mean_possible
        actions *= (np.array(self.env.action_space.high) -
                    np.array(self.env.action_space.low))
        actions = np.clip(actions, self.env.action_space.low,
                          self.env.action_space.high)
        obs, reward1, done, _ = self.env.step(actions)
        reward2 = 0
        if not done:
            obs, reward2, done, _ = self.env.step(actions)
        self.features = np.array(obs).reshape((1, -1))
        self.features = np.concatenate(
            [self.features, self.features - self.last_obs], axis=1)
        self.last_obs = np.array(obs).reshape((1, -1))
        self.reward = reward1 + reward2
        self.total_reward += self.reward
        self.done = done
        self.timestamp += 1

    def get_total_reward(self):
        return self.total_reward
Esempio n. 26
0
def Simulation(proxy_agent,index, return_dict,  episodes, vis=False):
    print('starting simulation')
    env = RunEnv(visualize=vis)
    observation = env.reset(difficulty=0)

    rewards = np.zeros(episodes)
    totalreward = 0
    for episode in range(0, episodes):
        action = env.action_space.sample()
        observation, reward, done, info = env.step(action)
        observation = np.array(observation)
        Preprocess = Preprocessing(observation, delta=0.01)
        prevState = Preprocess.GetState(observation)
        for i in range(1,1000):
            observation, reward, done, info = env.step(action)
            observation = np.array(observation)
            #means it didn't go the full simulation
            if done and i < 1000:
                reward = 0  

            state = Preprocess.GetState(observation)
            s,a,r,sp = Preprocess.ConvertToTensor(prevState,action, reward, state)

            totalreward += reward
            if done:
                env.reset(difficulty = 0, seed = None) #resets the environment if done is true
                print("reseting environment" + str(episode))
                rewards[episode] = totalreward
                totalreward = 0
                break
            action = proxy_agent(Variable(s, volatile=True))
            action = action.data.numpy()
            prevState = state;
    return_dict[index] = np.sum(rewards) / episodes
    return np.sum(rewards) / episodes
Esempio n. 27
0
def build_model(shared_object):
	shared_object['env'] = RunEnv(shared_object.get('visualize',False))
	model_class_name = 'models.agents.' + shared_object.get('model_class',None)
	log_info('importing class : {}'.format(model_class_name))
	model_class = import_class(model_class_name)
	log_info('{} successfuly imported'.format(model_class_name))
	log_info('building model')
	model = model_class(shared_object)
	return model
Esempio n. 28
0
def test(args):
    print('start testing')

    ddpg = DDPG()
    ddpg.load_model(args.model, load_memory=False)
    env = RunEnv(visualize=args.visualize, max_obstacles=args.max_obs)

    np.random.seed(args.seed)
    for i in range(1):
        step = 0
        state = env.reset(difficulty=2)
        fg = FeatureGenerator()

        state = fg.gen(state)
        #obs = fg.traj[0]
        #print(obs.left_knee_r, obs.right_knee_r)

        ep_reward = 0
        ep_memories = []
        while True:
            action = ddpg.select_action(list(state))
            next_state, reward, done, info = env.step(action.tolist())
            next_state = fg.gen(next_state)

            #obs = fg.traj[0]
            #print(obs.left_knee_r, obs.right_knee_r)

            print('step: {0:03d}'.format(step), end=', action: ')
            for act in action:
                print('{0:.3f}'.format(act), end=', ')
            print()

            state = next_state
            ep_reward += reward
            step += 1

            print('reward:', ep_reward)

            if done:
                break

        print('\nEpisode: {} Reward: {}, n_steps: {}'.format(
            i, ep_reward, step))
Esempio n. 29
0
    def create(self, env_id, seed=None):
        try:
            if (env_id == 'osim'):
                from osim.env import RunEnv
                env = RunEnv(visualize=True)
            else:
                env = gym.make(env_id)
            print('making environment')
            if seed:
                env.seed(seed)
        except gym.error.Error:
            raise InvalidUsage(
                "Attempted to look up malformed environment ID '{}'".format(
                    env_id))

        instance_id = str(uuid.uuid4().hex)[:self.id_len]
        self.envs[instance_id] = env
        self.envs_id[instance_id] = env_id
        return instance_id
Esempio n. 30
0
def Simulation(proxy_agent, episodes, vis=False):
    env = RunEnv(visualize=vis)
    observation = env.reset(difficulty=0)
    memory = random.randint(1000, 2000)
    tau = random.uniform(0.01, .9)
    epsilon = random.uniform(.15, .9)
    target = proxy_agent.ProduceTargetActorCritic( memory, tau, epsilon )
    batches =  [ 16, 32, 64, 128]
    batchsize = batches[random.randint(0,len(batches)-1)]
    for episode in range(0, episodes):
        action = env.action_space.sample()
        observation, reward, done, info = env.step(action)
        observation = np.array(observation)
        Preprocess = Preprocessing(observation, delta=0.01)
        prevState = Preprocess.GetState(observation)
        if(vis):
            target.OUprocess(0, 0.15, 0.0)
        else:
            target.OUprocess(random.random(), 0.15,0.0)
        pelvis_y = 0

        for i in range(1,1000):
            observation, reward, done, info = env.step(action)
            observation = np.array(observation)
            #means it didn't go the full simulation
            if i > 1:
                reward += (observation[2] - pelvis_y)*0.01 #penalty for pelvis going down
            reward = env.current_state[4] * 0.01
            reward += 0.01  # small reward for still standing
            reward += min(0, env.current_state[22] - env.current_state[1]) * 0.1  # penalty for head behind pelvis
            reward -= sum([max(0.0, k - 0.1) for k in [env.current_state[7], env.current_state[10]]]) * 0.02  # penalty for straight legs


            if done and i < 1000:
                reward = 0

            state = Preprocess.GetState(observation)
            s,a,r,sp = Preprocess.ConvertToTensor(prevState,action, reward, state)
            target.addToMemory(s,a,r,sp)

                #        env.render()
            if done:
                env.reset(difficulty = 0, seed = None) #resets the environment if done is true
                if(target.primedToLearn()):

                    lock.acquire()
                    proxy_agent.PerformUpdate(batchsize, target)
                    target.UpdateTargetNetworks(agent.getCritic(), agent.getActor())
                    print("saving actor")
                    proxy_agent.saveActorCritic()
                    print("actor saved")
                    lock.release()
                print("reseting environment" + str(episode))
                break
            action = target.selectAction(s)
            action = action.numpy()
            prevState = state;
Esempio n. 31
0
class Worker:

     def __init__(self,wid,diff):
          self.wid = wid
          self.env = RunEnv(visualize=False)
          self.dif = diff
          self.Actor = models.create_actor(args.feature,18)

     def choose_action(self,state):
          state = torch.from_numpy(state).unsqueeze(0)
          action_mean, _, action_std = self.Actor(Variable(state))
          action = torch.normal(action_mean, action_std)
          return action

     def work(self,globalPPO):
          self.Actor.load_state_dict(globalPPO.state_dict())
          while True:
               ep_r = 0
               step_count = 0
               state1,state2,state3,state = [0]*60, [0]*60, [0]*60, [0]*60
               balls = []
               state = self.env.reset(difficulty = self.dif)

               state1, state2, state3, state=process_observation(state1, state2, state3, state,balls)
               state = numpy.array(state)
               buffer_s, buffer_a, buffer_r = [], [], []
               while True:
                    if not ROLLING_EVENT.is_set():
                         ROLLING_EVENT.wait()
                         self.Actor.load_state_dict(globalPPO.state_dict())
                         buffer_s, buffer_a, buffer_r = [], [], []
                    a = choose_action(state)

                    r = 0
                    _,_r,_,_ = env.step(a)
                    r += _r
                    _,_r,_,_ = env.step(a)
                    r += _r
                    next_state, _r, done, _ = env.step(a)
                    r += _r

                    buffer_s.append(state)
                    buffer_a.append(a)
                    buffer_r.append(r)

                    addball_if_new(next_state,balls)
                    state1, state2, state3, next_state=process_observation(state1, state2, state3, next_state,balls)
                    next_state = numpy.array(next_state)
                    state = next_state
                    ep_r = ep_r + r

                    GLOBAL_UPDATE_COUNTER.set()
                    if done == True or FILL.is_set() :
from osim.env import RunEnv
import numpy as np
import copy
import pickle

env = RunEnv(visualize=False)
observation = env.reset(difficulty = 0)
sin=np.sin
file_Name = "w_best"

array=np.array

T=4


alpha=0.01
alpha_0=0.01
#TODO: we should exploit the Fourier property for which higher harmonics weights tend to decays as 1/x^n for smooth and continous functions

#I initialize to 0 the weights list, 4 weights for each muscle (I compose the periodic function with 4 elements of a Fourier Series)
#I define weights only for 9 periodic functions, as I assume that the legs move symmetrically in time.

w=[]

for i in range(9):
    w.append(np.array([0.,0.,0.,0.,0.,0.,0.,0.]))



def output(a,T,t):
    # Output of a 4th degree Fourier Series of sin.