Beispiel #1
0
 def with_replaced_reward(self, new_r):
     new_trajs = [
         Trajectory(traj.obs_T_Do, traj.obsfeat_T_Df, traj.adist_T_Pa,
                    traj.a_T_Da, traj_new_r)
         for traj, traj_new_r in util.safezip(self.trajs, new_r)
     ]
     return TrajBatch(new_trajs, self.obs, self.obsfeat, self.adist, self.a,
                      new_r, self.time)
Beispiel #2
0
 def __init__(self, arrays, lengths=None):
     if lengths is None:
         # Without provided lengths, `arrays` is interpreted as a list of arrays
         # and self.lengths is set to the list of lengths for those arrays
         self.arrays = arrays
         self.stacked = np.concatenate(arrays, axis=0)
         self.lengths = np.array([len(a) for a in arrays])
     else:
         # With provided lengths, `arrays` is interpreted as concatenated data
         # and self.lengths is set to the provided lengths.
         self.arrays = np.split(arrays, np.cumsum(lengths)[:-1])
         self.stacked = arrays
         self.lengths = np.asarray(lengths, dtype=int)
     assert all(len(a) == l for a,l in util.safezip(self.arrays, self.lengths))
     self.boundaries = np.concatenate([[0], np.cumsum(self.lengths)])
     assert self.boundaries[-1] == len(self.stacked)
Beispiel #3
0
 def __init__(self, arrays, lengths=None):
     if lengths is None:
         # Without provided lengths, `arrays` is interpreted as a list of arrays
         # and self.lengths is set to the list of lengths for those arrays
         self.arrays = arrays
         self.stacked = np.concatenate(arrays, axis=0)
         self.lengths = np.array([len(a) for a in arrays])
     else:
         # With provided lengths, `arrays` is interpreted as concatenated data
         # and self.lengths is set to the provided lengths.
         self.arrays = np.split(arrays, np.cumsum(lengths)[:-1])
         self.stacked = arrays
         self.lengths = np.asarray(lengths, dtype=int)
     assert all(len(a) == l for a,l in util.safezip(self.arrays, self.lengths))
     self.boundaries = np.concatenate([[0], np.cumsum(self.lengths)])
     assert self.boundaries[-1] == len(self.stacked)
Beispiel #4
0
 def with_replaced_reward(self, new_r):
     new_trajseqs = [TrajSequence(trajseq.obs_s_T_Do, trajseq.obsfeat_T_Df, trajseq.adist_T_Pa, trajseq.a_T_Da, traj_new_r) for trajseq, traj_new_r in util.safezip(self.trajseqs, new_r)]
     return TrajSeqBatch(new_trajseqs, self.obs, self.obsfeat, self.adist, self.a, new_r, self.m, self.seqlen)
Beispiel #5
0
 def with_replaced_reward(self, new_r):
     new_trajs = [Trajectory(traj.obs_T_Do, traj.obsfeat_T_Df, traj.adist_T_Pa, traj.a_T_Da, traj_new_r) for traj, traj_new_r in util.safezip(self.trajs, new_r)]
     return TrajBatch(new_trajs, self.obs, self.obsfeat, self.adist, self.a, new_r, self.time)