Ejemplo n.º 1
0
    def train_component(self, prediction_postprocessing_registry,
                        loss_function_registry):
        requirements = {
            "prediction_postprocessing_registry":
            Requirement(components=prediction_postprocessing_registry),
            "loss_function_registry":
            Requirement(components=loss_function_registry)
        }
        loss_fun_config: Dict = {
            "key": "CrossEntropyLoss",
            "target_subscription_key": "target_key",
            "prediction_subscription_key": "model_prediction_key"
        }
        post_processors_config: List[Dict] = [{
            "key": "ARG_MAX",
            "params": {
                "prediction_subscription_key": "model_prediction_key_anchor",
                "prediction_publication_key":
                "postprocessing_argmax_key_anchor"
            }
        }]

        show_progress: bool = False
        constructable = TrainComponentConstructable(
            component_identifier="train_component_constructable",
            requirements=requirements,
            loss_fun_config=loss_fun_config,
            post_processors_config=post_processors_config,
            show_progress=show_progress)
        train_component = constructable.construct()
        return train_component
Ejemplo n.º 2
0
 def test_constructable(self, data_loader, train_component):
     requirements = {
         "data_loaders": Requirement(components=data_loader),
         "train_component": Requirement(components=train_component)
     }
     constructable = TrainerConstructable(
         component_identifier="trainer_constructable",
         requirements=requirements)
     trainer = constructable.construct()
     return trainer
    def test_constructable(self, informed_iterators):
        applicable_splits = ["train", "test"]
        seeds = {"train": 0, "test": 0}
        requirements = {"iterators": Requirement(components=informed_iterators, subscription=["train", "test"])}

        sample, target, tag = informed_iterators["train"][0]

        # construct a ShuffledDatasetIterator
        shuffled_constructable = ShuffledDatasetIteratorConstructable(component_identifier="shuffled_component",
                                                                      requirements=requirements,
                                                                      applicable_splits=applicable_splits,
                                                                      seeds=seeds)

        shuffled_iterators = shuffled_constructable.construct()

        shuffled_sample, shuffled_target, shuffled_tag = shuffled_iterators["train"][0]

        assert list(sample.shape) == [28, 28]
        assert isinstance(target, int)

        assert isinstance(shuffled_iterators["train"], InformedDatasetIterator)
        assert isinstance(shuffled_iterators["test"], InformedDatasetIterator)

        # assert if shuffled_sample != sample
        assert (shuffled_sample != sample).any()
Ejemplo n.º 4
0
 def test_constructable(self, sampling_strategies, drop_last, batch_size, sampler_class, informed_iterators,
                        data_collator):
     requirements = {"iterators": Requirement(components=informed_iterators, subscription=["train", "test"]),
                     "data_collator": Requirement(components=data_collator, subscription=["train", "test"])}
     constructable = DataLoadersConstructable(component_identifier="data_loader_component",
                                              requirements=requirements,
                                              batch_size=batch_size,
                                              sampling_strategies=sampling_strategies,
                                              drop_last=drop_last
                                              )
     data_loader = constructable.construct()
     assert isinstance(data_loader["train"], DatasetLoader)
     assert isinstance(data_loader["test"], DatasetLoader)
     assert data_loader["train"].batch_size == batch_size
     assert data_loader["test"].batch_size == batch_size
     assert data_loader["train"].drop_last == drop_last
     assert data_loader["test"].drop_last == drop_last
     assert isinstance(data_loader["train"].sampler, sampler_class)
Ejemplo n.º 5
0
 def data_collator(self, informed_iterators, collator_type):
     requirements = {"iterators": Requirement(components=informed_iterators, subscription=["train", "test"])}
     collator_params = {}
     collator_type = collator_type
     constructable = DataCollatorConstructable(component_identifier="data_collator_component",
                                               requirements=requirements,
                                               collator_params=collator_params,
                                               collator_type=collator_type)
     data_collator = constructable.construct()
     return data_collator
Ejemplo n.º 6
0
    def test_constructable(self, informed_iterators, data_collator, weigthed_sampling_split_name, sample_class):
        requirements = {"iterators": Requirement(components=informed_iterators, subscription=["train", "test"]),
                        "data_collator": Requirement(components=data_collator, subscription=["train", "test"])}
        batch_size: int = 1
        label_pos: int = 2
        drop_last: bool = False
        seeds: Dict = {'train': 0, 'test': 0}
        constructable = DeprecatedDataLoadersConstructable(component_identifier="data_loader_component",
                                                           requirements=requirements,
                                                           weigthed_sampling_split_name=weigthed_sampling_split_name,
                                                           batch_size=batch_size,
                                                           seeds=seeds,
                                                           label_pos=label_pos,
                                                           drop_last=drop_last
                                                           )
        data_loader = constructable.construct()
        assert isinstance(data_loader["train"], DatasetLoader)
        assert isinstance(data_loader["test"], DatasetLoader)

        assert isinstance(data_loader[weigthed_sampling_split_name].sampler, sample_class)
    def test_constructable(self, informed_iterators):
        requirements = {"iterators": Requirement(components=informed_iterators, subscription=["train", "test"])}
        constructable = InMemoryDatasetIteratorConstructable(component_identifier="memory_component",
                                                             requirements=requirements)
        iterators = constructable.construct()
        iterator_train, iterator_test = iterators["train"], iterators["test"]
        sample, target, tag = iterator_train[0]
        assert list(sample.shape) == [28, 28]
        assert isinstance(target, int)

        assert isinstance(iterator_train, InformedDatasetIterator)
        assert isinstance(iterator_test, InformedDatasetIterator)
 def data_loader(self, informed_iterators, data_collator):
     sampling_strategies, drop_last, batch_size = {
         'train': {
             "strategy": "RANDOM",
             "seed": 0
         }
     }, True, 16
     requirements = {
         "iterators":
         Requirement(components=informed_iterators,
                     subscription=["train", "test"]),
         "data_collator":
         Requirement(components=data_collator,
                     subscription=["train", "test"])
     }
     constructable = DataLoadersConstructable(
         component_identifier="data_loader_component",
         requirements=requirements,
         batch_size=batch_size,
         sampling_strategies=sampling_strategies,
         drop_last=drop_last)
     data_loader = constructable.construct()
     return data_loader
 def test_constructable(self, informed_iterators):
     requirements = {"iterators": Requirement(components=informed_iterators, subscription=["train"])}
     split_configs = {"train": {"train_1": 0.7, "train_2": 0.3}}
     constructable = DatasetIteratorSplitsConstructable(component_identifier="splitted_component",
                                                        requirements=requirements,
                                                        split_configs=split_configs)
     splitted_iterators = constructable.construct()
     iterator_1, iterator_2 = splitted_iterators["train_1"], splitted_iterators["train_2"]
     sample, target, tag = iterator_1[0]
     assert list(sample.shape) == [28, 28]
     assert isinstance(target, int)
     assert isinstance(iterator_1, InformedDatasetIteratorIF)
     assert isinstance(iterator_2, InformedDatasetIteratorIF)
     assert int(len(informed_iterators["train"]) * 0.7) == len(iterator_1)
     assert int(len(informed_iterators["train"]) * 0.3) == len(iterator_2)
    def test_constructable(self, informed_iterators):
        requirements = {"iterators": Requirement(components=informed_iterators, subscription=["train", "test"])}

        feature_encoding_configs = []
        constructable = FeatureEncodedIteratorConstructable(component_identifier="feature_encoding_component",
                                                            requirements=requirements,
                                                            feature_encoding_configs=feature_encoding_configs,
                                                            applicable_splits=["train"])
        iterators = constructable.construct()
        iterator_train_encoded = iterators["train"]
        sample, target, tag = iterator_train_encoded[0]
        assert list(sample.shape) == [28, 28]
        assert isinstance(target, int)

        assert isinstance(iterator_train_encoded, InformedDatasetIteratorIF)
    def test_constructable(self, informed_iterators):
        requirements = {"iterators": Requirement(components=informed_iterators, subscription=["train", "test"])}
        target_vector_size = 10
        constructable = OneHotEncodedTargetsIteratorConstructable(component_identifier="one_hot_targets_component",
                                                                  requirements=requirements,
                                                                  target_vector_size=target_vector_size,
                                                                  applicable_splits=["train"])
        iterators = constructable.construct()
        _, label, _ = informed_iterators["train"][0]
        iterator_train_encoded = iterators["train"]
        sample, target, tag = iterator_train_encoded[0]
        assert list(sample.shape) == [28, 28]
        assert len(target) == target_vector_size
        assert target[int(label)] == 1

        assert isinstance(iterator_train_encoded, InformedDatasetIteratorIF)
Ejemplo n.º 12
0
 def test_constructable(self, mocked_nn_class):
     # use model registry to register a model class
     requirements = {
         "model_registry": Requirement(components=mocked_nn_class)
     }
     model_definition = {}
     seed = 0
     prediction_publication_keys = {}
     constructable = ModelConstructable(
         component_identifier="model_component",
         requirements=requirements,
         model_type="mocked_nn",
         model_definition=model_definition,
         seed=seed,
         prediction_publication_keys=prediction_publication_keys)
     model = constructable.construct()
     assert isinstance(model, mocked_nn_class)
    def test_constructable(self, informed_iterators):
        requirements = {"iterators": Requirement(components=informed_iterators, subscription=["train", "test"])}

        filtered_labels = [1, 3, 5]
        constructable = FilteredLabelsIteratorConstructable(component_identifier="filtered_component",
                                                            requirements=requirements,
                                                            filtered_labels=filtered_labels,
                                                            applicable_splits=["train"])
        iterators = constructable.construct()
        iterator_train_filtered, iterator_test_not_filtered = iterators["train"], iterators["test"]
        sample, target, tag = iterator_train_filtered[0]
        assert list(sample.shape) == [28, 28]
        assert isinstance(target, int)

        assert isinstance(iterator_train_filtered, InformedDatasetIteratorIF)
        assert isinstance(iterator_test_not_filtered, InformedDatasetIteratorIF)
        assert all([t in filtered_labels for _, _, t in iterator_train_filtered])
        assert any([t not in filtered_labels for _, _, t in iterator_test_not_filtered])
        assert any([t not in filtered_labels for _, _, t in informed_iterators["train"]])
        assert any([t not in filtered_labels for _, _, t in informed_iterators["test"]])
    def test_constructable(self, informed_iterators):
        requirements = {"iterators": Requirement(components=informed_iterators, subscription=["train", "test"])}

        mappings = [{"previous_labels": [1, 2, 3, 4], "new_label": 0}]
        constructable = MappedLabelsIteratorConstructable(component_identifier="mapped_component",
                                                          requirements=requirements,
                                                          mappings=mappings,
                                                          applicable_splits=["train"])
        iterators = constructable.construct()
        iterator_train_mapped, iterator_test_not_mapped = iterators["train"], iterators["test"]
        sample, target, tag = iterator_train_mapped[0]
        assert list(sample.shape) == [28, 28]
        assert isinstance(target, int)

        assert isinstance(iterator_train_mapped, InformedDatasetIteratorIF)
        assert isinstance(iterator_test_not_mapped, InformedDatasetIteratorIF)
        assert all([t not in mappings[0]["previous_labels"] for _, _, t in iterator_train_mapped])
        assert any([t in mappings[0]["previous_labels"] for _, _, t in iterator_test_not_mapped])
        assert any([t in mappings[0]["previous_labels"] for _, _, t in informed_iterators["train"]])
        assert any([t in mappings[0]["previous_labels"] for _, _, t in informed_iterators["test"]])
 def test_constructable(self, informed_iterators):
     requirements = {"iterators": Requirement(components=informed_iterators, subscription=["train", "test"])}
     num_indices_train = 15
     num_indices_test = 10
     constructable = IteratorViewConstructable(component_identifier="mapped_component",
                                               requirements=requirements,
                                               split_indices={"train": list(range(num_indices_train)),
                                                              "test": list(range(num_indices_test))},
                                               applicable_split="train")
     iterator_views = constructable.construct()
     iterator_view_train, iterator_view_test = iterator_views["train"], iterator_views["test"]
     assert isinstance(iterator_view_train, InformedDatasetIteratorIF)
     assert isinstance(iterator_view_test, InformedDatasetIteratorIF)
     assert len(iterator_view_train) == num_indices_train
     assert len(iterator_view_test) == num_indices_test
     try:
         iterator_view_train[num_indices_train]
         assert False
     except:
         assert True
    def test_constructable(self, informed_iterators):
        requirements = {"iterators": Requirement(components=informed_iterators, subscription=["train", "test"])}
        combine_configs = [
            {"new_split": "full", "old_splits": [{"iterators_name": "iterators", "splits": ["train", "test"]}]},
            {"new_split": "train", "old_splits": [{"iterators_name": "iterators", "splits": ["train"]}]},
            {"new_split": "test", "old_splits": [{"iterators_name": "iterators", "splits": ["test"]}]}]
        constructable = CombinedDatasetIteratorConstructable(component_identifier="combined_component",
                                                             requirements=requirements,
                                                             combine_configs=combine_configs)
        iterators = constructable.construct()
        iterator_full, iterator_train, iterator_test = iterators["full"], iterators["train"], iterators["test"]
        sample, target, tag = iterator_full[0]
        assert list(sample.shape) == [28, 28]
        assert isinstance(target, int)

        assert isinstance(iterator_full, InformedDatasetIteratorIF)
        assert isinstance(iterator_train, InformedDatasetIteratorIF)
        assert isinstance(iterator_test, InformedDatasetIteratorIF)

        assert int(len(iterator_full)) == len(iterator_train) + len(iterator_test)
Ejemplo n.º 17
0
 def requirements(self, repository) -> Dict[str, Requirement]:
     return {"repository": Requirement(components=repository)}
Ejemplo n.º 18
0
    def test_constructable(self, data_loader,
                           prediction_postprocessing_registry,
                           loss_function_registry, metric_registry):
        train_split_name: str = "train"
        metrics_config: List = [{
            "key": "F1_SCORE",
            "params": {
                "average": "macro"
            },
            "prediction_subscription_key": "model_prediction_key",
            "target_subscription_key": "target_key",
            "tag": "F1_SCORE_macro"
        }]
        loss_funs_config: List = [{
            "prediction_subscription_key": "model_prediction_key",
            "target_subscription_key": "target_key",
            "key": "CrossEntropyLoss",
            "tag": "cross_entropy_loss"
        }]
        post_processors_config: List = [{
            "key":
            "ARG_MAX",
            "prediction_subscription_key":
            "model_prediction_key",
            "prediction_publication_key":
            "postprocessing_argmax_key"
        }]
        show_progress: bool = False
        cpu_target_subscription_keys: List[str] = ["target_key"]
        cpu_prediction_subscription_keys: List[str] = [
            "postprocessing_argmax_key", "model_prediction_key"
        ]
        metrics_computation_config: List[Dict] = None
        loss_computation_config: List[Dict] = None

        requirements = {
            "data_loaders":
            Requirement(components=data_loader),
            "prediction_postprocessing_registry":
            Requirement(components=prediction_postprocessing_registry),
            "loss_function_registry":
            Requirement(components=loss_function_registry),
            "metric_registry":
            Requirement(components=metric_registry),
        }

        constructable = EvalComponentConstructable(
            component_identifier="eval_component_constructable",
            requirements=requirements,
            train_split_name=train_split_name,
            metrics_config=metrics_config,
            loss_funs_config=loss_funs_config,
            post_processors_config=post_processors_config,
            cpu_target_subscription_keys=cpu_target_subscription_keys,
            cpu_prediction_subscription_keys=cpu_prediction_subscription_keys,
            metrics_computation_config=metrics_computation_config,
            loss_computation_config=loss_computation_config,
            show_progress=show_progress)
        train_component = constructable.construct()

        assert isinstance(train_component, EvalComponent)