def dataset_fast(path_table, batch_size, is_shuffle, nb_hits, is_train): table = ShuffledHitsTable(path_table) padding_size = table.padding_size table = filter_by_nb_hits(table, nb_hits) # table = drop_padded_hits(table, nb_hits) if is_train is not None: table = ShuffledHitsColumns( table.dataclass, list(Train80Partitioner(is_train).partition(table))) dtypes = { 'hits': np.float32, 'first_hit_index': np.int32, 'padded_size': np.int32, } data = { k: np.array([getattr(table.data[i], k) for i in range(table.capacity)], dtype=dtypes[k]) for k in table.columns } dataset = tf.data.Dataset.from_tensor_slices(data) dataset = dataset.repeat() if is_shuffle: dataset = dataset.shuffle(4 * batch_size) dataset = dataset.batch(batch_size) tensors = dataset.make_one_shot_iterator().get_next() for k in tensors: tensors[k] = Tensor(tensors[k]) tensors['first_hit_index'] = OneHot(tensors['hits'].shape[1])( tensors['first_hit_index']) return DatasetIncidentSingle(tensors['hits'], tensors['first_hit_index'], tensors['padded_size'])
def get_dataset(self, nb_samples=100, nb_epochs=1, batch_size=32, is_shuffle=False): d = DatasetFromColumns('datset', DataColumnsPartition(RangeColumns(nb_samples), Train80Partitioner(True)), nb_epochs=nb_epochs, batch_size=batch_size, is_shuffle=is_shuffle) return d
def photon_pytable(path_table, batch_size, is_shuffle, nb_hits, is_train=None): photons = load_table(path_table) padding_size = table.padding_size table = filter_by_nb_hits(table, nb_hits) table = drop_padded_hits(table, nb_hits) if is_train is not None: table = ShuffledHitsColumns( table.dataclass, list(Train80Partitioner(is_train).partition(table))) dataset = DatasetFromColumnsV2('dataset', table, batch_size=batch_size, is_shuffle=is_shuffle) dataset.make() return post_processing(dataset, nb_hits)
def get_columns(self): return DataColumnsPartition( PyTablesColumns(self.DATA_PATH, '/train'), Train80Partitioner(True))
def get_columns(self): nb_samples = 10 return DataColumnsPartition(RangeColumns(nb_samples), Train80Partitioner(True))