def restore(self):
        f = open(
            "/home/yunke/prl_proj/panda_ws/src/franka_cal_sim/python/replay_buffer.txt",
            "r")
        obs, actions, rewards, next_obs, terminals, weights = [],[],[],[],[],[]
        for line in f:
            cols = line.strip().split('\t')
            obs_t = np.array([float(v) for v in cols[0].split(',')])
            obs.append(obs_t)
            action = np.array([float(v) for v in cols[1].split(',')])
            actions.append(action)
            rewards.append(float(cols[2]))
            obs_tp1 = np.array([float(v) for v in cols[3].split(',')])
            next_obs.append(obs_tp1)
            terminals.append(bool(cols[4]))
            weights.append(float(cols[5]))

        batch = SampleBatch({
            "obs": obs,
            "actions": actions,
            "rewards": rewards,
            "new_obs": next_obs,
            "dones": terminals,
            "weights": weights
        })

        for i in range(obs_s.shape[0]):
            self.replay_buffers[policy_id].add(pack_if_needed(obs_s[i]),
                                               actions[i], rewards[i],
                                               pack_if_needed(new_obs[i]),
                                               dones[i])
Exemple #2
0
    def step(self):
        with self.update_weights_timer:
            if self.remote_evaluators:
                weights = ray.put(self.local_evaluator.get_weights())
                for e in self.remote_evaluators:
                    e.set_weights.remote(weights)

        with self.sample_timer:
            if self.remote_evaluators:
                batch = SampleBatch.concat_samples(
                    ray_get_and_free(
                        [e.sample.remote() for e in self.remote_evaluators]))
            else:
                batch = self.local_evaluator.sample()

            # Handle everything as if multiagent
            if isinstance(batch, SampleBatch):
                batch = MultiAgentBatch({DEFAULT_POLICY_ID: batch},
                                        batch.count)

            for policy_id, s in batch.policy_batches.items():
                for row in s.rows():
                    self.replay_buffers[policy_id].add(
                        pack_if_needed(row["obs"]),
                        row["actions"],
                        row["rewards"],
                        pack_if_needed(row["new_obs"]),
                        row["dones"],
                        weight=None)

        if self.num_steps_sampled >= self.replay_starts:
            self._optimize()

        self.num_steps_sampled += batch.count
    def step(self):
        with self.update_weights_timer:
            if self.remote_evaluators:
                weights = ray.put(self.local_evaluator.get_weights())
                for e in self.remote_evaluators:
                    e.set_weights.remote(weights)

        with self.sample_timer:
            if self.remote_evaluators:
                batch = SampleBatch.concat_samples(
                    ray.get(
                        [e.sample.remote() for e in self.remote_evaluators]))
            else:
                batch = self.local_evaluator.sample()

            # Handle everything as if multiagent
            if isinstance(batch, SampleBatch):
                batch = MultiAgentBatch({
                    DEFAULT_POLICY_ID: batch
                }, batch.count)

            for policy_id, s in batch.policy_batches.items():
                for row in s.rows():
                    self.replay_buffers[policy_id].add(
                        pack_if_needed(row["obs"]),
                        row["actions"],
                        row["rewards"],
                        pack_if_needed(row["new_obs"]),
                        row["dones"],
                        weight=None)

        if self.num_steps_sampled >= self.replay_starts:
            self._optimize()

        self.num_steps_sampled += batch.count
Exemple #4
0
 def __call__(self, batch: SampleBatchType):
     for policy_id, s in batch.policy_batches.items():
         for row in s.rows():
             self.replay_buffers.buffers[policy_id].add(
                 pack_if_needed(row["obs"]),
                 row["actions"],
                 row["rewards"],
                 pack_if_needed(row["new_obs"]),
                 row["dones"],
                 weight=None)
     return batch
Exemple #5
0
    def __call__(self, batch: SampleBatchType):
        # Handle everything as if multiagent
        if isinstance(batch, SampleBatch):
            batch = MultiAgentBatch({DEFAULT_POLICY_ID: batch}, batch.count)

        for policy_id, s in batch.policy_batches.items():
            for row in s.rows():
                self.replay_buffers[policy_id].add(
                    pack_if_needed(row["obs"]),
                    row["actions"],
                    row["rewards"],
                    pack_if_needed(row["new_obs"]),
                    row["dones"],
                    weight=None)
Exemple #6
0
    def __call__(self, batch: SampleBatchType):
        x = 0
        for policy_id, s in batch.policy_batches.items():
            if policy_id in self.policies_to_train:
                for row in s.rows():
                    flag = row["mode"] == MODE.best_response.value
                    if flag:
                        # Transition must be inserted in the reservoir buffer
                        self.reservoir_buffers.buffers[policy_id].add(
                            pack_if_needed(row["obs"]), row["actions"])
                        self.replay_buffers.steps[policy_id] += 1

                    bb = SampleBatch({
                        'obs':
                        row["obs"].reshape(1, -1),
                        'actions':
                        row['actions'].reshape(1, -1),
                        'rewards':
                        row['rewards'].reshape(1, -1),
                        'new_obs':
                        row['new_obs'].reshape(1, -1),
                        'dones':
                        np.array([row['dones']]),
                        "eps_id":
                        np.array([row['eps_id']]),
                        'unroll_id':
                        np.array([row['unroll_id']]),
                        'agent_index':
                        np.array([row['agent_index']])
                    })
                    bb.compress(bulk=True)
                    self.replay_buffers.buffers[policy_id].add_batch(bb)
                    self.reservoir_buffers.steps[policy_id] += 1

        return batch
Exemple #7
0
    def step(self):
        with self.update_weights_timer:
            if self.workers.remote_workers():
                weights = ray.put(self.workers.local_worker().get_weights())
                for e in self.workers.remote_workers():
                    e.set_weights.remote(weights)

        with self.sample_timer:
            if self.workers.remote_workers():
                batch = SampleBatch.concat_samples(
                    ray_get_and_free([
                        e.sample.remote()
                        for e in self.workers.remote_workers()
                    ]))
            else:
                batch = self.workers.local_worker().sample()

            # Handle everything as if multiagent
            if isinstance(batch, SampleBatch):
                batch = MultiAgentBatch({DEFAULT_POLICY_ID: batch},
                                        batch.count)

            for policy_id, s in batch.policy_batches.items():
                for row in s.rows():
                    self.replay_buffers[policy_id].add(
                        pack_if_needed(row["obs"]),
                        row["actions"],
                        row["rewards"],
                        pack_if_needed(row["new_obs"]),
                        row["dones"],
                        None,
                        row["action_logp"],
                        # row["diversity_advantages"],
                        row["diversity_rewards"],
                        # row["diversity_value_targets"],
                        # row["my_logits"],
                        row["prev_actions"],
                        row["prev_rewards"])

        if self.num_steps_sampled >= self.replay_starts:
            self._optimize()

        self.num_steps_sampled += batch.count
    def step(self, attention_score_dic=None):
        with self.update_weights_timer:
            if self.workers.remote_workers():
                weights = ray.put(self.workers.local_worker().get_weights())
                for e in self.workers.remote_workers():
                    e.set_weights.remote(weights)

        with self.sample_timer:
            if self.workers.remote_workers():
                batch = SampleBatch.concat_samples(
                    ray_get_and_free([
                        e.sample.remote()
                        for e in self.workers.remote_workers()
                    ]))
            else:
                batch = self.workers.local_worker().sample()

            # Handle everything as if multiagent
            if isinstance(batch, SampleBatch):
                batch = MultiAgentBatch({DEFAULT_POLICY_ID: batch},
                                        batch.count)
            '''
            For Gamma Reward by LJJ (You can check the local history for changing)
            '''
            for policy_id, s in batch.policy_batches.items():
                for row in s.rows():
                    self.pre_replay_buffers[policy_id].add(
                        pack_if_needed(row["obs"]),
                        row["actions"],
                        row["rewards"],
                        pack_if_needed(row["new_obs"]),
                        row["dones"],
                        weight=None)

            if self.num_steps_presampled >= self.memory_thres:
                self._preprocess(batch, attention_score_dic)

            self.num_steps_presampled += batch.count
Exemple #9
0
    def step(self):
        with self.update_weights_timer:
            if self.workers.remote_workers():
                # !!!!! CHANGED FROM ORIGINAL !!!! doesnt sync policies we arent training
                weights = ray.put(self.workers.local_worker().get_weights(
                    policies=self.workers_only_sync_policy_list))
                for e in self.workers.remote_workers():
                    e.set_weights.remote(weights)

        with self.sample_timer:
            if self.workers.remote_workers():
                batch = SampleBatch.concat_samples(
                    ray_get_and_free([
                        e.sample.remote()
                        for e in self.workers.remote_workers()
                    ]))
            else:
                batch = self.workers.local_worker().sample()

            # Handle everything as if multiagent
            if isinstance(batch, SampleBatch):
                batch = MultiAgentBatch({DEFAULT_POLICY_ID: batch},
                                        batch.count)

            for policy_id, s in batch.policy_batches.items():
                for row in s.rows():
                    self.replay_buffers[policy_id].add(
                        pack_if_needed(row["obs"]),
                        row["actions"],
                        row["rewards"],
                        pack_if_needed(row["new_obs"]),
                        row["dones"],
                        weight=None)

        if self.num_steps_sampled >= self.replay_starts:
            self._optimize()

        self.num_steps_sampled += batch.count
Exemple #10
0
    def step(self):
        with self.update_weights_timer:
            if self.remote_evaluators:
                weights = ray.put(self.local_evaluator.get_weights())
                for e in self.remote_evaluators:
                    e.set_weights.remote(weights)

        with self.sample_timer:
            if self.remote_evaluators:
                batch = SampleBatch.concat_samples(
                    ray.get(
                        [e.sample.remote() for e in self.remote_evaluators]))
            else:
                batch = self.local_evaluator.sample()
            for row in batch.rows():
                self.replay_buffer.add(
                    pack_if_needed(row["obs"]), row["actions"], row["rewards"],
                    pack_if_needed(row["new_obs"]),
                    row["dones"], row["weights"])

        if len(self.replay_buffer) >= self.replay_starts:
            self._optimize()

        self.num_steps_sampled += batch.count
Exemple #11
0
    def add(self, **kwargs):
        assert len(kwargs) == len(self.expected_keys) and sorted(
            kwargs.keys()) == self.expected_keys

        for k in kwargs.keys():
            if k in self.can_pack_list:
                kwargs[k] = pack_if_needed(kwargs[k])

        data = [kwargs[k] for k in self.expected_keys]

        if len(self._storage) < self._maxsize:
            self._storage.append(data)
            self._est_size_bytes += sum(sys.getsizeof(d) for d in data)
        else:
            idx = np.random.randint(0, self._num_added + 1)
            if idx < self._maxsize:
                self._storage[idx] = data

                self._evicted_hit_stats.push(self._hit_count[idx])
                self._hit_count[idx] = 0

        self._num_added += 1
Exemple #12
0
    def __call__(self, batch: SampleBatchType):
        x = 0
        for policy_id, s in batch.policy_batches.items():
            if policy_id in self.policies_to_train:
                for row in s.rows():
                    if row["mode"] == MODE.best_response.value:
                        # Transition must be inserted in the reservoir buffer
                        self.reservoir_buffers.buffers[policy_id].add(
                            pack_if_needed(row["obs"]), row["actions"])
                        self.replay_buffers.steps[policy_id] += 1

                episode_ids = np.unique(s['eps_id'])
                for ep_id in episode_ids:
                    sample_ids = np.where(s["eps_id"] == ep_id)
                    bb = SampleBatch({
                        'obs':
                        s["obs"][sample_ids],
                        'actions':
                        s['actions'][sample_ids],
                        'rewards':
                        s['rewards'][sample_ids],
                        'new_obs':
                        s['new_obs'][sample_ids],
                        'dones':
                        np.array(s['dones'][sample_ids]),
                        "eps_id":
                        np.array(s['eps_id'][sample_ids]),
                        'unroll_id':
                        np.array(s['unroll_id'][sample_ids]),
                        'agent_index':
                        np.array(s['agent_index'][sample_ids])
                    })
                    bb.compress(bulk=True)
                    self.replay_buffers.buffers[policy_id].add_batch(bb)
                    self.reservoir_buffers.steps[policy_id] += bb.count

        return batch