Beispiel #1
0
    def test_combined_iterator_reporting(self, mnist_factory):
        iterator_train, iterator_train_meta = mnist_factory.get_dataset_iterator(
            split="train")
        iterator_test, iterator_test_meta = mnist_factory.get_dataset_iterator(
            split="test")
        meta_train = MetaFactory.get_dataset_meta(
            identifier="id x",
            dataset_name="MNIST",
            dataset_tag="train",
            iterator_meta=iterator_train_meta)
        meta_test = MetaFactory.get_dataset_meta(
            identifier="id x",
            dataset_name="MNIST",
            dataset_tag="train",
            iterator_meta=iterator_test_meta)

        informed_iterator_train = InformedDatasetFactory.get_dataset_iterator(
            iterator_train, meta_train)
        informed_iterator_test = InformedDatasetFactory.get_dataset_iterator(
            iterator_test, meta_test)

        meta_combined = MetaFactory.get_dataset_meta_from_existing(
            informed_iterator_train.dataset_meta, dataset_tag="full")

        iterator = InformedDatasetFactory.get_combined_dataset_iterator(
            [informed_iterator_train, informed_iterator_test], meta_combined)
        report = DatasetIteratorReportGenerator.generate_report(iterator)
        assert report.length == 70000 and report.sub_reports[
            0].length == 60000 and report.sub_reports[1].length == 10000
        assert not report.sub_reports[
            0].sub_reports and not report.sub_reports[1].sub_reports
Beispiel #2
0
 def get_shuffled_iterator(identifier: str,
                           iterator: InformedDatasetIteratorIF,
                           seed: int) -> InformedDatasetIteratorIF:
     meta = MetaFactory.get_dataset_meta_from_existing(
         iterator.dataset_meta, identifier=identifier)
     return InformedDatasetFactory.get_shuffled_dataset_iterator(
         iterator, meta, seed)
Beispiel #3
0
 def get_in_memory_iterator(
         identifier: str,
         iterator: InformedDatasetIteratorIF) -> InformedDatasetIteratorIF:
     meta = MetaFactory.get_dataset_meta_from_existing(
         iterator.dataset_meta, identifier=identifier)
     return InformedDatasetFactory.get_in_memory_dataset_iterator(
         iterator, meta)
Beispiel #4
0
 def get_iterator_view(
         identifier: str, iterator: InformedDatasetIteratorIF,
         selection_fun: Callable[[DatasetIteratorIF], List[int]],
         view_tags: Dict[str, Any]) -> InformedDatasetIteratorIF:
     valid_indices = selection_fun(iterator)
     # valid_indices = list(np.argwhere(valid_mask).flatten())
     meta = MetaFactory.get_dataset_meta_from_existing(
         iterator.dataset_meta, identifier=identifier)
     return InformedDatasetFactory.get_dataset_iterator_view(
         iterator, meta, valid_indices, view_tags)
Beispiel #5
0
 def test_combined_iterator_reporting(self, informed_dataset_iterator):
     meta_combined = MetaFactory.get_dataset_meta_from_existing(
         informed_dataset_iterator.dataset_meta, dataset_tag="full")
     iterator = InformedDatasetFactory.get_combined_dataset_iterator(
         [informed_dataset_iterator, informed_dataset_iterator],
         meta_combined)
     report = DatasetIteratorReportGenerator.generate_report(iterator)
     assert report.length == 2180 and report.sub_reports[
         0].length == 1090 and report.sub_reports[1].length == 1090
     assert not report.sub_reports[
         0].sub_reports and not report.sub_reports[1].sub_reports
Beispiel #6
0
 def get_filtered_labels_iterator(
         identifier: str, iterator: InformedDatasetIteratorIF,
         filtered_labels: List[Any]) -> InformedDatasetIteratorIF:
     valid_indices = [
         i for i in range(len(iterator))
         if iterator[i][iterator.dataset_meta.target_pos] in filtered_labels
     ]
     meta = MetaFactory.get_dataset_meta_from_existing(
         iterator.dataset_meta, identifier=identifier)
     return InformedDatasetFactory.get_dataset_iterator_view(
         iterator, meta, valid_indices)
Beispiel #7
0
    def iterator(self) -> str:
        targets = [1]*100 + [2]*200 + [3]*300
        sequence_targets = torch.Tensor(targets)
        sequence_samples = torch.ones_like(sequence_targets)

        iterator = SequenceDatasetIterator([sequence_samples, sequence_targets])
        iterator_meta = MetaFactory.get_iterator_meta(sample_pos=0, target_pos=1, tag_pos=1)
        meta = MetaFactory.get_dataset_meta(identifier="dataset id",
                                            dataset_name="dataset",
                                            dataset_tag="full",
                                            iterator_meta=iterator_meta)
        return InformedDatasetFactory.get_dataset_iterator(iterator, meta)
Beispiel #8
0
 def get_mapped_labels_iterator(
         identifier: str, iterator: DatasetIteratorIF,
         mappings: Dict) -> InformedDatasetIteratorIF:
     label_mapper_post_processor = LabelMapperPostProcessor(
         mappings=mappings,
         target_position=iterator.dataset_meta.target_pos,
         tag_position=iterator.dataset_meta.tag_pos)
     meta = MetaFactory.get_dataset_meta_from_existing(
         iterator.dataset_meta, identifier=identifier)
     return InformedDatasetFactory.get_dataset_iterator(
         PostProcessedDatasetIterator(iterator,
                                      label_mapper_post_processor), meta)
Beispiel #9
0
 def get_one_hot_encoded_target_iterators(
         identifier: str, iterators: Dict[str, InformedDatasetIteratorIF],
         target_vector_size: int) -> Dict[str, DatasetIteratorIF]:
     target_position = list(iterators.items())[0][1].dataset_meta.target_pos
     postprocessor = OneHotEncodedTargetPostProcessor(
         target_vector_size=target_vector_size,
         target_position=target_position)
     return {
         name: InformedDatasetFactory.get_dataset_iterator(
             PostProcessedDatasetIterator(iterator, postprocessor),
             MetaFactory.get_dataset_meta_from_existing(
                 iterator.dataset_meta, identifier=identifier))
         for name, iterator in iterators.items()
     }
Beispiel #10
0
 def get_feature_encoded_iterators(
     identifier: str, iterators: Dict[str, InformedDatasetIteratorIF],
     feature_encoding_configs: Dict[str, List[Any]]
 ) -> Dict[str, DatasetIteratorIF]:
     sample_position = list(iterators.items())[0][1].dataset_meta.sample_pos
     feature_encoder_post_processor = FeatureEncoderPostProcessor(
         sample_position=sample_position,
         feature_encoding_configs=feature_encoding_configs)
     feature_encoder_post_processor.fit(iterators)
     return {
         name: InformedDatasetFactory.get_dataset_iterator(
             PostProcessedDatasetIterator(iterator,
                                          feature_encoder_post_processor),
             MetaFactory.get_dataset_meta_from_existing(
                 iterator.dataset_meta, identifier=identifier))
         for name, iterator in iterators.items()
     }
Beispiel #11
0
 def _split(iterator: InformedDatasetIteratorIF, seed: int,
            split_config: Dict) -> Dict[str, InformedDatasetIteratorIF]:
     names = list(split_config.keys())
     ratios = list(split_config.values())
     if stratified:
         splitter = SplitterFactory.get_stratified_splitter(
             ratios=ratios, seed=seed)
     else:
         splitter = SplitterFactory.get_random_splitter(ratios=ratios,
                                                        seed=seed)
     splitted_iterators = splitter.split(iterator)
     dataset_metas = [
         MetaFactory.get_dataset_meta_from_existing(
             iterator.dataset_meta,
             identifier=identifier,
             dataset_tag=name) for name in names
     ]
     return {
         name: InformedDatasetFactory.get_dataset_iterator(
             splitted_iterators[i], dataset_metas[i])
         for i, name in enumerate(names)
     }
Beispiel #12
0
    def get_combined_iterators(
            identifier: str, iterators: Dict[str,
                                             Dict[str,
                                                  InformedDatasetIteratorIF]],
            combine_configs: Dict) -> Dict[str, InformedDatasetIteratorIF]:
        """Combines iterators.

        Args:
            identifier (str):
            iterators (Dict[str, Dict[str, InformedDatasetIteratorIF]]): Dictionary mapping from iterator_name -> split_name -> iterator
            combine_configs (Dict):

        Returns:
            Dict[str, InformedDatasetIteratorIF]:
        """
        def get_iterators_to_be_combined(
                iterators: Dict[str, Dict[str, InformedDatasetIteratorIF]],
                split_config: List):
            return [
                iterators[element["iterators_name"]][split_name]
                for element in split_config for split_name in element["splits"]
            ]

        combined_iterators = {}
        for split_config in combine_configs:
            iterator_list = get_iterators_to_be_combined(
                iterators, split_config["old_splits"])
            meta = MetaFactory.get_dataset_meta_from_existing(
                dataset_meta=iterator_list[0].dataset_meta,
                identifier=identifier,
                dataset_name="combined_dataset",
                dataset_tag=None)
            combined_iterators[split_config[
                "new_split"]] = InformedDatasetFactory.get_dataset_iterator(
                    CombinedDatasetIterator(iterator_list), meta)
        return combined_iterators
Beispiel #13
0
 def informed_dataset_iterator(self, dataset_iterator,
                               dataset_meta) -> DatasetIteratorIF:
     return InformedDatasetFactory.get_dataset_iterator(
         dataset_iterator, dataset_meta)