def with_replaced_reward(self, new_r): new_trajs = [ Trajectory(traj.obs_T_Do, traj.obsfeat_T_Df, traj.adist_T_Pa, traj.a_T_Da, traj_new_r) for traj, traj_new_r in util.safezip(self.trajs, new_r) ] return TrajBatch(new_trajs, self.obs, self.obsfeat, self.adist, self.a, new_r, self.time)
def __init__(self, arrays, lengths=None): if lengths is None: # Without provided lengths, `arrays` is interpreted as a list of arrays # and self.lengths is set to the list of lengths for those arrays self.arrays = arrays self.stacked = np.concatenate(arrays, axis=0) self.lengths = np.array([len(a) for a in arrays]) else: # With provided lengths, `arrays` is interpreted as concatenated data # and self.lengths is set to the provided lengths. self.arrays = np.split(arrays, np.cumsum(lengths)[:-1]) self.stacked = arrays self.lengths = np.asarray(lengths, dtype=int) assert all(len(a) == l for a,l in util.safezip(self.arrays, self.lengths)) self.boundaries = np.concatenate([[0], np.cumsum(self.lengths)]) assert self.boundaries[-1] == len(self.stacked)
def with_replaced_reward(self, new_r): new_trajseqs = [TrajSequence(trajseq.obs_s_T_Do, trajseq.obsfeat_T_Df, trajseq.adist_T_Pa, trajseq.a_T_Da, traj_new_r) for trajseq, traj_new_r in util.safezip(self.trajseqs, new_r)] return TrajSeqBatch(new_trajseqs, self.obs, self.obsfeat, self.adist, self.a, new_r, self.m, self.seqlen)
def with_replaced_reward(self, new_r): new_trajs = [Trajectory(traj.obs_T_Do, traj.obsfeat_T_Df, traj.adist_T_Pa, traj.a_T_Da, traj_new_r) for traj, traj_new_r in util.safezip(self.trajs, new_r)] return TrajBatch(new_trajs, self.obs, self.obsfeat, self.adist, self.a, new_r, self.time)