def __init__( self, data_source: Optional[DataSource] = None, preprocess: Optional[Preprocess] = None, postprocess: Optional[Postprocess] = None, deserializer: Optional[Deserializer] = None, serializer: Optional[Serializer] = None, ) -> None: self.data_source = data_source self._preprocess_pipeline = preprocess or DefaultPreprocess() self._postprocess_pipeline = postprocess or Postprocess() self._serializer = serializer or Serializer() self._deserializer = deserializer or Deserializer() self._running_stage = None
def test_data_pipeline_init_and_assignement(use_preprocess, use_postprocess, tmpdir): class CustomModel(Task): def __init__(self, postprocess: Optional[Postprocess] = None): super().__init__(model=torch.nn.Linear(1, 1), loss_fn=torch.nn.MSELoss()) self._postprocess = postprocess def train_dataloader(self) -> Any: return DataLoader(DummyDataset()) class SubPreprocess(DefaultPreprocess): pass class SubPostprocess(Postprocess): pass data_pipeline = DataPipeline( preprocess=SubPreprocess() if use_preprocess else None, postprocess=SubPostprocess() if use_postprocess else None, ) assert isinstance(data_pipeline._preprocess_pipeline, SubPreprocess if use_preprocess else DefaultPreprocess) assert isinstance(data_pipeline._postprocess_pipeline, SubPostprocess if use_postprocess else Postprocess) model = CustomModel(postprocess=Postprocess()) model.data_pipeline = data_pipeline # TODO: the line below should make the same effect but it's not # data_pipeline._attach_to_model(model) if use_preprocess: assert isinstance(model._preprocess, SubPreprocess) else: assert model._preprocess is None or isinstance(model._preprocess, Preprocess) if use_postprocess: assert isinstance(model._postprocess, SubPostprocess) else: assert model._postprocess is None or isinstance( model._postprocess, Postprocess)
def __init__(self): super().__init__(model=torch.nn.Linear(1, 1), loss_fn=torch.nn.MSELoss()) self._postprocess = Postprocess()