def generate_samples(self, num_transitions, epsilon, with_possible=True) -> Samples: samples = GridworldContinuous.generate_samples(self, num_transitions, epsilon, with_possible) enum_states = [] for state in samples.states: enum_states.append({0: float(list(state.keys())[0])}) enum_next_states = [] for state in samples.next_states: enum_next_states.append({0: float(list(state.keys())[0])}) return Samples( mdp_ids=samples.mdp_ids, sequence_numbers=samples.sequence_numbers, states=enum_states, actions=samples.actions, propensities=samples.propensities, rewards=samples.rewards, next_states=enum_next_states, next_actions=samples.next_actions, terminals=samples.terminals, possible_next_actions=samples.possible_next_actions, reward_timelines=samples.reward_timelines, )
def generate_samples(self, num_transitions, epsilon, with_possible=True) -> Samples: samples = self.generate_samples_discrete(num_transitions, epsilon, with_possible) continuous_actions = [ self.action_to_features(a) for a in samples.actions ] continuous_next_actions = [ self.action_to_features(a) if a is not "" else {} for a in samples.next_actions ] continuous_possible_next_actions = [] for possible_next_action in samples.possible_next_actions: continuous_possible_next_actions.append([ self.action_to_features(a) if a is not None else {} for a in possible_next_action ]) return Samples( states=samples.states, actions=continuous_actions, propensities=samples.propensities, rewards=samples.rewards, next_states=samples.next_states, next_actions=continuous_next_actions, is_terminal=samples.is_terminal, possible_next_actions=continuous_possible_next_actions, reward_timelines=samples.reward_timelines, )
def test_predictor_export(self): """Verify that q-values before model export equal q-values after model export. Meant to catch issues with export logic.""" environment = Gridworld() trainer = trainer = self.get_sarsa_trainer(environment, False) samples = Samples( mdp_ids=["0"], sequence_numbers=[0], states=[{0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0, 5: 1.0, 15: 1.0, 24: 1.0}], actions=["D"], action_probabilities=[0.5], rewards=[0], possible_actions=[["R", "D"]], next_states=[{5: 1.0}], next_actions=["U"], terminals=[False], possible_next_actions=[["R", "U", "D"]], ) tdps = environment.preprocess_samples(samples, 1) pre_export_q_values = trainer.q_network(tdps[0].states).detach().numpy() predictor = trainer.predictor() with tempfile.TemporaryDirectory() as tmpdirname: tmp_path = os.path.join(tmpdirname, "model") predictor.save(tmp_path, "minidb") new_predictor = DQNPredictor.load(tmp_path, "minidb", False) post_export_q_values = new_predictor.predict([samples.states[0]]) for i, action in enumerate(environment.ACTIONS): self.assertAlmostEquals( pre_export_q_values[0][i], post_export_q_values[0][action], places=4 )
def generate_samples(self, num_transitions, epsilon, discount_factor) -> Samples: samples = GridworldContinuous.generate_samples(self, num_transitions, epsilon, discount_factor) enum_states = [] for state in samples.states: enum_states.append({0: float(list(state.keys())[0])}) enum_next_states = [] for state in samples.next_states: enum_next_states.append({0: float(list(state.keys())[0])}) return Samples( mdp_ids=samples.mdp_ids, sequence_numbers=samples.sequence_numbers, states=enum_states, actions=samples.actions, action_probabilities=samples.action_probabilities, rewards=samples.rewards, possible_actions=samples.possible_actions, next_states=enum_next_states, next_actions=samples.next_actions, terminals=samples.terminals, possible_next_actions=samples.possible_next_actions, episode_values=samples.episode_values, )
def generate_samples( self, num_transitions, epsilon, discount_factor, multi_steps: Optional[int] = None, ) -> Samples: samples = self.generate_samples_discrete(num_transitions, epsilon, discount_factor, multi_steps) continuous_actions = [ self.action_to_features(a) for a in samples.actions ] continuous_possible_actions = [] for possible_action in samples.possible_actions: continuous_possible_actions.append([ self.action_to_features(a) if a is not None else {} for a in possible_action ]) if multi_steps is not None: continuous_next_actions = [[ self.action_to_features(a) if a is not "" else {} for a in next_action ] for next_action in samples.next_actions] continuous_possible_next_actions = [] for possible_next_actions in samples.possible_next_actions: continuous_possible_next_actions.append([[ self.action_to_features(a) if a is not None else {} for a in pna ] for pna in possible_next_actions]) else: continuous_next_actions = [ self.action_to_features(a) if a is not "" else {} for a in samples.next_actions ] continuous_possible_next_actions = [] for possible_next_action in samples.possible_next_actions: continuous_possible_next_actions.append([ self.action_to_features(a) if a is not None else {} for a in possible_next_action ]) return Samples( mdp_ids=samples.mdp_ids, sequence_numbers=samples.sequence_numbers, states=samples.states, actions=continuous_actions, action_probabilities=samples.action_probabilities, rewards=samples.rewards, possible_actions=continuous_possible_actions, next_states=samples.next_states, next_actions=continuous_next_actions, terminals=samples.terminals, possible_next_actions=continuous_possible_next_actions, )
def test_predictor_torch_export(self): """Verify that q-values before model export equal q-values after model export. Meant to catch issues with export logic.""" environment = Gridworld() samples = Samples( mdp_ids=["0"], sequence_numbers=[0], sequence_number_ordinals=[1], states=[{0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0, 5: 1.0, 15: 1.0, 24: 1.0}], actions=["D"], action_probabilities=[0.5], rewards=[0], possible_actions=[["R", "D"]], next_states=[{5: 1.0}], next_actions=["U"], terminals=[False], possible_next_actions=[["R", "U", "D"]], ) tdps = environment.preprocess_samples(samples, 1) assert len(tdps) == 1, "Invalid number of data pages" trainer, exporter = self.get_modular_sarsa_trainer_exporter( environment, {}, False ) input = rlt.PreprocessedState.from_tensor(tdps[0].states) pre_export_q_values = trainer.q_network(input).q_values.detach().numpy() preprocessor = Preprocessor(environment.normalization, False) cpu_q_network = trainer.q_network.cpu_model() cpu_q_network.eval() dqn_with_preprocessor = DiscreteDqnWithPreprocessor(cpu_q_network, preprocessor) serving_module = DiscreteDqnPredictorWrapper( dqn_with_preprocessor, action_names=environment.ACTIONS ) with tempfile.TemporaryDirectory() as tmpdirname: buf = export_module_to_buffer(serving_module) tmp_path = os.path.join(tmpdirname, "model") with open(tmp_path, "wb") as f: f.write(buf.getvalue()) f.close() predictor = DiscreteDqnTorchPredictor(torch.jit.load(tmp_path)) post_export_q_values = predictor.predict([samples.states[0]]) for i, action in enumerate(environment.ACTIONS): self.assertAlmostEqual( float(pre_export_q_values[0][i]), float(post_export_q_values[0][action]), places=4, )
def generate_samples(self, num_transitions, epsilon, discount_factor) -> Samples: samples = self.generate_samples_discrete( num_transitions, epsilon, discount_factor ) continuous_actions = [self.action_to_features(a) for a in samples.actions] continuous_next_actions = [ self.action_to_features(a) if a is not "" else {} for a in samples.next_actions ] continuous_possible_actions = [] for possible_action in samples.possible_actions: continuous_possible_actions.append( [ self.action_to_features(a) if a is not None else {} for a in possible_action ] ) continuous_possible_next_actions = [] for possible_next_action in samples.possible_next_actions: continuous_possible_next_actions.append( [ self.action_to_features(a) if a is not None else {} for a in possible_next_action ] ) return Samples( mdp_ids=samples.mdp_ids, sequence_numbers=samples.sequence_numbers, states=samples.states, actions=continuous_actions, action_probabilities=samples.action_probabilities, rewards=samples.rewards, possible_actions=continuous_possible_actions, next_states=samples.next_states, next_actions=continuous_next_actions, terminals=samples.terminals, possible_next_actions=continuous_possible_next_actions, episode_values=samples.episode_values, )
def preprocess_samples( self, samples: Samples, minibatch_size: int, use_gpu: bool = False, one_hot_action: bool = True, normalize_actions: bool = True, ) -> List[TrainingDataPage]: logger.info("Shuffling...") samples.shuffle() logger.info("Sparse2Dense...") net = core.Net("gridworld_preprocessing") C2.set_net(net) saa = StackedAssociativeArray.from_dict_list(samples.states, "states") sorted_state_features, _ = sort_features_by_normalization(self.normalization) state_matrix, _ = sparse_to_dense( saa.lengths, saa.keys, saa.values, sorted_state_features ) saa = StackedAssociativeArray.from_dict_list(samples.next_states, "next_states") next_state_matrix, _ = sparse_to_dense( saa.lengths, saa.keys, saa.values, sorted_state_features ) sorted_action_features, _ = sort_features_by_normalization( self.normalization_action ) saa = StackedAssociativeArray.from_dict_list(samples.actions, "action") action_matrix, _ = sparse_to_dense( saa.lengths, saa.keys, saa.values, sorted_action_features ) saa = StackedAssociativeArray.from_dict_list( samples.next_actions, "next_action" ) next_action_matrix, _ = sparse_to_dense( saa.lengths, saa.keys, saa.values, sorted_action_features ) action_probabilities = torch.tensor( samples.action_probabilities, dtype=torch.float32 ).reshape(-1, 1) rewards = torch.tensor(samples.rewards, dtype=torch.float32).reshape(-1, 1) pnas_lengths_list = [] pnas_flat: List[List[str]] = [] for pnas in samples.possible_next_actions: pnas_lengths_list.append(len(pnas)) pnas_flat.extend(pnas) saa = StackedAssociativeArray.from_dict_list(pnas_flat, "possible_next_actions") pnas_lengths = torch.tensor(pnas_lengths_list, dtype=torch.int32) pna_lens_blob = "pna_lens_blob" workspace.FeedBlob(pna_lens_blob, pnas_lengths.numpy()) possible_next_actions_matrix, _ = sparse_to_dense( saa.lengths, saa.keys, saa.values, sorted_action_features ) state_pnas_tile_blob = C2.LengthsTile(next_state_matrix, pna_lens_blob) workspace.RunNetOnce(net) logger.info("Preprocessing...") state_preprocessor = Preprocessor(self.normalization, False) action_preprocessor = Preprocessor(self.normalization_action, False) states_ndarray = workspace.FetchBlob(state_matrix) states_ndarray = state_preprocessor.forward(states_ndarray) actions_ndarray = torch.from_numpy(workspace.FetchBlob(action_matrix)) if normalize_actions: actions_ndarray = action_preprocessor.forward(actions_ndarray) next_states_ndarray = workspace.FetchBlob(next_state_matrix) next_states_ndarray = state_preprocessor.forward(next_states_ndarray) next_actions_ndarray = torch.from_numpy(workspace.FetchBlob(next_action_matrix)) if normalize_actions: next_actions_ndarray = action_preprocessor.forward(next_actions_ndarray) logged_possible_next_actions = action_preprocessor.forward( workspace.FetchBlob(possible_next_actions_matrix) ) state_pnas_tile = state_preprocessor.forward( workspace.FetchBlob(state_pnas_tile_blob) ) logged_possible_next_state_actions = torch.cat( (state_pnas_tile, logged_possible_next_actions), dim=1 ) logger.info("Reward Timeline to Torch...") possible_next_actions_ndarray = logged_possible_next_actions possible_next_actions_state_concat = logged_possible_next_state_actions time_diffs = torch.ones([len(samples.states), 1]) tdps = [] pnas_start = 0 logger.info("Batching...") for start in range(0, states_ndarray.shape[0], minibatch_size): end = start + minibatch_size if end > states_ndarray.shape[0]: break pnas_end = pnas_start + torch.sum(pnas_lengths[start:end]) pnas = possible_next_actions_ndarray[pnas_start:pnas_end] pnas_concat = possible_next_actions_state_concat[pnas_start:pnas_end] pnas_start = pnas_end tdp = TrainingDataPage( states=states_ndarray[start:end], actions=actions_ndarray[start:end], propensities=action_probabilities[start:end], rewards=rewards[start:end], next_states=next_states_ndarray[start:end], next_actions=next_actions_ndarray[start:end], possible_next_actions=None, not_terminals=(pnas_lengths[start:end] > 0).reshape(-1, 1), time_diffs=time_diffs[start:end], possible_next_actions_lengths=pnas_lengths[start:end], possible_next_actions_state_concat=pnas_concat, ) tdp.set_type(torch.cuda.FloatTensor if use_gpu else torch.FloatTensor) tdps.append(tdp) return tdps
def preprocess_samples(self, samples: Samples, minibatch_size: int) -> List[TrainingDataPage]: samples.shuffle() net = core.Net("gridworld_preprocessing") C2.set_net(net) preprocessor = PreprocessorNet(True) saa = StackedAssociativeArray.from_dict_list(samples.states, "states") state_matrix, _ = preprocessor.normalize_sparse_matrix( saa.lengths, saa.keys, saa.values, self.normalization, "state_norm", False, False, ) saa = StackedAssociativeArray.from_dict_list(samples.next_states, "next_states") next_state_matrix, _ = preprocessor.normalize_sparse_matrix( saa.lengths, saa.keys, saa.values, self.normalization, "next_state_norm", False, False, ) saa = StackedAssociativeArray.from_dict_list(samples.actions, "action") action_matrix, _ = preprocessor.normalize_sparse_matrix( saa.lengths, saa.keys, saa.values, self.normalization_action, "action_norm", False, False, ) saa = StackedAssociativeArray.from_dict_list(samples.next_actions, "next_action") next_action_matrix, _ = preprocessor.normalize_sparse_matrix( saa.lengths, saa.keys, saa.values, self.normalization_action, "next_action_norm", False, False, ) propensities = np.array(samples.propensities, dtype=np.float32).reshape(-1, 1) rewards = np.array(samples.rewards, dtype=np.float32).reshape(-1, 1) pnas_lengths_list = [] pnas_flat: List[List[str]] = [] for pnas in samples.possible_next_actions: pnas_lengths_list.append(len(pnas)) pnas_flat.extend(pnas) saa = StackedAssociativeArray.from_dict_list(pnas_flat, "possible_next_actions") pnas_lengths = np.array(pnas_lengths_list, dtype=np.int32) possible_next_actions_matrix, _ = preprocessor.normalize_sparse_matrix( saa.lengths, saa.keys, saa.values, self.normalization_action, "possible_next_action_norm", False, False, ) workspace.RunNetOnce(net) states_ndarray = workspace.FetchBlob(state_matrix) actions_ndarray = workspace.FetchBlob(action_matrix) next_states_ndarray = workspace.FetchBlob(next_state_matrix) next_actions_ndarray = workspace.FetchBlob(next_action_matrix) possible_next_actions_ndarray = workspace.FetchBlob( possible_next_actions_matrix) tdps = [] pnas_start = 0 for start in range(0, states_ndarray.shape[0], minibatch_size): end = start + minibatch_size if end > states_ndarray.shape[0]: break pnas_end = pnas_start + np.sum(pnas_lengths[start:end]) pnas = possible_next_actions_ndarray[pnas_start:pnas_end] pnas_start = pnas_end tdps.append( TrainingDataPage( states=states_ndarray[start:end], actions=actions_ndarray[start:end], propensities=propensities[start:end], rewards=rewards[start:end], next_states=next_states_ndarray[start:end], next_actions=next_actions_ndarray[start:end], possible_next_actions=StackedArray(pnas_lengths[start:end], pnas), not_terminals=(pnas_lengths[start:end] > 0).reshape(-1, 1), reward_timelines=samples.reward_timelines[start:end] if samples.reward_timelines else None, )) return tdps
def preprocess_samples(self, samples: Samples, minibatch_size: int) -> List[TrainingDataPage]: samples.shuffle() net = core.Net("gridworld_preprocessing") C2.set_net(net) preprocessor = PreprocessorNet(True) saa = StackedAssociativeArray.from_dict_list(samples.states, "states") state_matrix, _ = preprocessor.normalize_sparse_matrix( saa.lengths, saa.keys, saa.values, self.normalization, "state_norm", False, False, False, ) saa = StackedAssociativeArray.from_dict_list(samples.next_states, "next_states") next_state_matrix, _ = preprocessor.normalize_sparse_matrix( saa.lengths, saa.keys, saa.values, self.normalization, "next_state_norm", False, False, False, ) saa = StackedAssociativeArray.from_dict_list(samples.actions, "action") action_matrix, _ = preprocessor.normalize_sparse_matrix( saa.lengths, saa.keys, saa.values, self.normalization_action, "action_norm", False, False, False, ) saa = StackedAssociativeArray.from_dict_list(samples.next_actions, "next_action") next_action_matrix, _ = preprocessor.normalize_sparse_matrix( saa.lengths, saa.keys, saa.values, self.normalization_action, "next_action_norm", False, False, False, ) propensities = np.array(samples.propensities, dtype=np.float32).reshape(-1, 1) rewards = np.array(samples.rewards, dtype=np.float32).reshape(-1, 1) pnas_lengths_list = [] pnas_flat: List[List[str]] = [] for pnas in samples.possible_next_actions: pnas_lengths_list.append(len(pnas)) pnas_flat.extend(pnas) saa = StackedAssociativeArray.from_dict_list(pnas_flat, "possible_next_actions") pnas_lengths = np.array(pnas_lengths_list, dtype=np.int32) pna_lens_blob = "pna_lens_blob" workspace.FeedBlob(pna_lens_blob, pnas_lengths) possible_next_actions_matrix, _ = preprocessor.normalize_sparse_matrix( saa.lengths, saa.keys, saa.values, self.normalization_action, "possible_next_action_norm", False, False, False, ) state_pnas_tile_blob = C2.LengthsTile(next_state_matrix, pna_lens_blob) workspace.RunNetOnce(net) state_preprocessor = Preprocessor(self.normalization, False) action_preprocessor = Preprocessor(self.normalization_action, False) states_ndarray = workspace.FetchBlob(state_matrix) states_ndarray = state_preprocessor.forward(states_ndarray).numpy() actions_ndarray = workspace.FetchBlob(action_matrix) actions_ndarray = action_preprocessor.forward(actions_ndarray).numpy() next_states_ndarray = workspace.FetchBlob(next_state_matrix) next_states_ndarray = state_preprocessor.forward( next_states_ndarray).numpy() next_actions_ndarray = workspace.FetchBlob(next_action_matrix) next_actions_ndarray = action_preprocessor.forward( next_actions_ndarray).numpy() logged_possible_next_actions = action_preprocessor.forward( workspace.FetchBlob(possible_next_actions_matrix)) state_pnas_tile = state_preprocessor.forward( workspace.FetchBlob(state_pnas_tile_blob)) logged_possible_next_state_actions = torch.cat( (state_pnas_tile, logged_possible_next_actions), dim=1) possible_next_actions_ndarray = logged_possible_next_actions.cpu( ).numpy() next_state_pnas_concat = logged_possible_next_state_actions.cpu( ).numpy() time_diffs = np.ones(len(states_ndarray)) episode_values = None if samples.reward_timelines is not None: episode_values = np.zeros(rewards.shape, dtype=np.float32) for i, reward_timeline in enumerate(samples.reward_timelines): for time_diff, reward in reward_timeline.items(): episode_values[i, 0] += reward * (DISCOUNT**time_diff) tdps = [] pnas_start = 0 for start in range(0, states_ndarray.shape[0], minibatch_size): end = start + minibatch_size if end > states_ndarray.shape[0]: break pnas_end = pnas_start + np.sum(pnas_lengths[start:end]) pnas = possible_next_actions_ndarray[pnas_start:pnas_end] pnas_concat = next_state_pnas_concat[pnas_start:pnas_end] pnas_start = pnas_end tdps.append( TrainingDataPage( states=states_ndarray[start:end], actions=actions_ndarray[start:end], propensities=propensities[start:end], rewards=rewards[start:end], next_states=next_states_ndarray[start:end], next_actions=next_actions_ndarray[start:end], possible_next_actions=StackedArray(pnas_lengths[start:end], pnas), not_terminals=(pnas_lengths[start:end] > 0).reshape(-1, 1), episode_values=episode_values[start:end] if episode_values is not None else None, time_diffs=time_diffs[start:end], possible_next_actions_lengths=pnas_lengths[start:end], next_state_pnas_concat=pnas_concat, )) return tdps
def preprocess_samples( self, samples: Samples, minibatch_size: int, use_gpu: bool = False, one_hot_action: bool = True, normalize_actions: bool = True, ) -> List[TrainingDataPage]: logger.info("Shuffling...") samples.shuffle() logger.info("Sparse2Dense...") net = core.Net("gridworld_preprocessing") C2.set_net(net) saa = StackedAssociativeArray.from_dict_list(samples.states, "states") sorted_state_features, _ = sort_features_by_normalization( self.normalization) state_matrix, _ = sparse_to_dense(saa.lengths, saa.keys, saa.values, sorted_state_features) saa = StackedAssociativeArray.from_dict_list(samples.next_states, "next_states") next_state_matrix, _ = sparse_to_dense(saa.lengths, saa.keys, saa.values, sorted_state_features) sorted_action_features, _ = sort_features_by_normalization( self.normalization_action) saa = StackedAssociativeArray.from_dict_list(samples.actions, "action") action_matrix, _ = sparse_to_dense(saa.lengths, saa.keys, saa.values, sorted_action_features) saa = StackedAssociativeArray.from_dict_list(samples.next_actions, "next_action") next_action_matrix, _ = sparse_to_dense(saa.lengths, saa.keys, saa.values, sorted_action_features) action_probabilities = torch.tensor(samples.action_probabilities, dtype=torch.float32).reshape( -1, 1) rewards = torch.tensor(samples.rewards, dtype=torch.float32).reshape(-1, 1) max_action_size = 4 pnas_mask_list: List[List[int]] = [] pnas_flat: List[Dict[str, float]] = [] for pnas in samples.possible_next_actions: pnas_mask_list.append([1] * len(pnas) + [0] * (max_action_size - len(pnas))) pnas_flat.extend(pnas) for _ in range(max_action_size - len(pnas)): pnas_flat.append({}) # Filler saa = StackedAssociativeArray.from_dict_list(pnas_flat, "possible_next_actions") pnas_mask = torch.Tensor(pnas_mask_list) possible_next_actions_matrix, _ = sparse_to_dense( saa.lengths, saa.keys, saa.values, sorted_action_features) workspace.RunNetOnce(net) logger.info("Preprocessing...") state_preprocessor = Preprocessor(self.normalization, False) action_preprocessor = Preprocessor(self.normalization_action, False) states_ndarray = workspace.FetchBlob(state_matrix) states_ndarray = state_preprocessor.forward(states_ndarray) actions_ndarray = torch.from_numpy(workspace.FetchBlob(action_matrix)) if normalize_actions: actions_ndarray = action_preprocessor.forward(actions_ndarray) next_states_ndarray = workspace.FetchBlob(next_state_matrix) next_states_ndarray = state_preprocessor.forward(next_states_ndarray) state_pnas_tile = next_states_ndarray.repeat( 1, max_action_size).reshape(-1, next_states_ndarray.shape[1]) next_actions_ndarray = torch.from_numpy( workspace.FetchBlob(next_action_matrix)) if normalize_actions: next_actions_ndarray = action_preprocessor.forward( next_actions_ndarray) logged_possible_next_actions = action_preprocessor.forward( workspace.FetchBlob(possible_next_actions_matrix)) assert state_pnas_tile.shape[0] == logged_possible_next_actions.shape[ 0], ("Invalid shapes: " + str(state_pnas_tile.shape) + " != " + str(logged_possible_next_actions.shape)) logged_possible_next_state_actions = torch.cat( (state_pnas_tile, logged_possible_next_actions), dim=1) logger.info("Reward Timeline to Torch...") time_diffs = torch.ones([len(samples.states), 1]) tdps = [] pnas_start = 0 logger.info("Batching...") for start in range(0, states_ndarray.shape[0], minibatch_size): end = start + minibatch_size if end > states_ndarray.shape[0]: break pnas_end = pnas_start + (minibatch_size * max_action_size) tdp = TrainingDataPage( states=states_ndarray[start:end], actions=actions_ndarray[start:end], propensities=action_probabilities[start:end], rewards=rewards[start:end], next_states=next_states_ndarray[start:end], next_actions=next_actions_ndarray[start:end], possible_next_actions=None, not_terminal=(pnas_mask[start:end, :].sum(dim=1, keepdim=True) > 0), time_diffs=time_diffs[start:end], possible_next_actions_mask=pnas_mask[start:end, :], possible_next_actions_state_concat= logged_possible_next_state_actions[pnas_start:pnas_end, :], ) pnas_start = pnas_end tdp.set_type( torch.cuda.FloatTensor if use_gpu else torch.FloatTensor) tdps.append(tdp) return tdps