Example #1
0
    def test_get_path(self, os_mock, datetime_mock):
        datetime_mock.now.return_value = datetime(2442, 12, 24, 1, 2, 3)
        os_mock.path.join.side_effect = [
            "testpath/2442_12_24_01_02_03",
            "testpath/2442_12_24_01_02_03/my_result",
            "testpath/2442_12_24_01_02_03/my_result/result.csv"
        ]

        self.filemanager = FileManager("testpath")

        os_mock.path.isfile.return_value = False
        os_mock.path.split.return_value = ("", "result.csv")

        filepath = self.filemanager.get_path("result.csv", "my_result")

        join_calls = [
            call("testpath", "2442_12_24_01_02_03"),
            call("testpath/2442_12_24_01_02_03", "my_result"),
            call("testpath/2442_12_24_01_02_03/my_result", "result.csv")
        ]
        os_mock.path.join.assert_has_calls(join_calls)
        os_mock.path.split.assert_called_once_with("result.csv")

        self.assertEqual(filepath,
                         "testpath/2442_12_24_01_02_03/my_result/result.csv")
Example #2
0
 def get_json(self, fm: FileManager):
     json = super().get_json(fm)
     condition_path = None
     train_if_path = None
     callbacks_paths = []
     if self.condition:
         condition_path = fm.get_path(f"{self.name}_condition.pickle")
         with open(condition_path, 'wb') as outfile:
             cloudpickle.dump(self.condition, outfile)
     if self.train_if:
         train_if_path = fm.get_path(f"{self.name}_train_if.pickle")
         with open(train_if_path, 'wb') as outfile:
             cloudpickle.dump(self.train_if, outfile)
     for callback in self.callbacks:
         callback_path = fm.get_path(f"{self.name}_callback.pickle")
         with open(callback_path, 'wb') as outfile:
             cloudpickle.dump(callback, outfile)
         callbacks_paths.append(callback_path)
     json.update({
         "callbacks": callbacks_paths,
         "condition": condition_path,
         "train_if": train_if_path,
         "batch_size": self.batch_size
     })
     return json
Example #3
0
    def test_not_allowed_filetype(self, os_mock, datetime_mock):
        datetime_mock.now.return_value = datetime(2442, 12, 24, 1, 2, 3)
        os_mock.path.join.side_effect = ["testpath/2442_12_24_01_02_03"]

        self.filemanager = FileManager("testpath")

        os_mock.path.isfile.return_value = False
        os_mock.path.split.return_value = ("", "result.test")
        with self.assertRaises(IOException) as cm:
            self.filemanager.get_path("result.test")
        self.assertEqual(cm.exception.args, (
            "test is not an allowed file type. Allowed types are ['png', 'csv', 'xlsx', "
            "'pickle', 'tex', 'json', 'h5', 'pt', 'md'].", ))
Example #4
0
    def to_folder(self, path: Union[str, Path]):
        """
        Saves the pipeline in pipeline.json in the specified folder.

        :param path: path of the folder
        :return: None
        """
        if not isinstance(path, Path):
            path = Path(path)
        save_file_manager = FileManager(path, time_mode=False)

        modules = []
        # 1. Iterate over steps and collect all modules -> With step_id to module_id
        #    Create for each step dict with information for restorage
        steps_for_storing = []
        for step in self.id_to_step.values():
            step_json = step.get_json(save_file_manager)
            if isinstance(step, Step):
                if step.module in modules:
                    step_json["module_id"] = modules.index(step.module)
                else:
                    modules.append(step.module)
                    step_json["module_id"] = len(modules) - 1
            steps_for_storing.append(step_json)

        # 2. Iterate over all modules and create Json and save as pickle or h5 ... if necessary...
        modules_for_storing = []
        for module in modules:
            stored_module = module.save(save_file_manager)
            modules_for_storing.append(stored_module)

        # 3. Put everything together and dump it.
        stored_pipeline = {
            "name": "Pipeline",
            "id": 1,
            "version": 1,
            "modules": modules_for_storing,
            "steps": steps_for_storing,
            "path":
            self.file_manager.basic_path if self.file_manager else None,
            "batch": str(self.batch) if self.batch else None,
        }
        file_path = save_file_manager.get_path('pipeline.json')
        with open(file_path, 'w') as outfile:
            json.dump(obj=stored_pipeline,
                      fp=outfile,
                      sort_keys=False,
                      indent=4,
                      cls=PyWATTSJsonEncoder)
Example #5
0
 def save(self, fm: FileManager):
     json = super().save(fm)
     file_path = fm.get_path(f'{self.name}.pickle')
     with open(file_path, 'wb') as outfile:
         pickle.dump(obj=self.module, file=outfile)
     json.update({"sklearn_module": file_path})
     return json
Example #6
0
    def create_summary(self, summaries: List[SummaryObject], fm: FileManager):
        """
        This method is responsible for creating and storing the summaries as json file.
        :param summaries: The summaries that should be stored.
        :type summaries: List[SummaryObject]
        :param fm: The pyWATTS filemanager.
        :type fm: FileManager
        """
        summary_dict = {}
        for category in [
                SummaryCategory.Summary, SummaryCategory.FitTime,
                SummaryCategory.TransformTime
        ]:
            category_dict = {}
            for summary in filter(lambda s: s.category == category, summaries):
                if summary.additional_information != "" or len(
                        summary.k_v) > 0:
                    if isinstance(summary, SummaryObjectList):
                        category_dict.update(self._create_summary(summary))
                    elif isinstance(summary, SummaryObjectTable):
                        category_dict.update(
                            self._create_table_summary(summary))

            summary_dict.update({category.name: category_dict})
        with open(fm.get_path("summary.json"), "w") as file:
            json.dump(summary_dict, file)
        return summary_dict
Example #7
0
    def create_summary(self, summaries: List[SummaryObject], fm: FileManager):
        """
        This method is responsible for creating and storing the summaries as markdown file.
        :param summaries: The summaries that should be stored.
        :type summaries: List[SummaryObject]
        :param fm: The pyWATTS filemanager.
        :type fm: FileManager
        """
        summary_string = "# Summary: \n"
        for category in [
                SummaryCategory.Summary, SummaryCategory.FitTime,
                SummaryCategory.TransformTime
        ]:
            summary_string += f"## {category.name}\n"
            for summary in filter(lambda s: s.category == category, summaries):
                if summary.additional_information != "" or len(
                        summary.k_v) > 0:
                    if isinstance(summary, SummaryObjectList):
                        summary_string += self._create_summary(summary)
                    elif isinstance(summary, SummaryObjectTable):
                        summary_string += self._create_table_summary(summary)

        with open(fm.get_path("summary.md"), "w") as file:
            file.write(summary_string)
        return summary_string
Example #8
0
 def save(self, fm: FileManager) -> Dict:
     json = super().save(fm)
     if self.filter_method is not None:
         filter_path = fm.get_path(f"{self.name}_filter.pickle")
         with open(filter_path, 'wb') as outfile:
             cloudpickle.dump(self.filter_method, outfile)
         json["filter"] = filter_path
     return json
Example #9
0
    def save(self, fm: FileManager) -> dict:
        """
        Stores the keras model at the given path
        :param fm: The Filemanager, which contains the path where the model should be stored
        :return: The path where the model is stored.
        """
        json = super().save(fm)
        self.model.save(filepath=fm.get_path(f"{self.name}.h5"))
        aux_models = []
        for name, aux_model in self.aux_models.items():
            aux_model.save(filepath=fm.get_path(f"{self.name}_{name}.h5"))
            aux_models.append((name, fm.get_path(f"{self.name}_{name}.h5")))
        json.update({
            "aux_models": aux_models,
            "model": fm.get_path(f"{self.name}.h5")
        })

        return json
Example #10
0
 def __init__(self,
              path: str = ".",
              batch: Optional[pd.Timedelta] = None,
              name="Pipeline"):
     super().__init__(name)
     self.batch = batch
     self.counter = None
     self.start_steps = dict()
     self.id_to_step: Dict[int, BaseStep] = {}
     self.file_manager = FileManager(path)
Example #11
0
    def save(self, fm: FileManager):
        """
        Saves the pytorch wrapper and the containing model
        :param fm: Filemanager for getting the path
        :type fm: FileManager
        :return: Dictionary with additional information
        :rtype: Dict
        """

        json = super().save(fm)
        file_path = fm.get_path(f'{self.name}.pt')
        loss_fn_path = fm.get_path(f"loss_{self.name}.pickle")
        with open(loss_fn_path, "wb")as file:
            cloudpickle.dump(self.loss_fn, file)
        optimizer_path = fm.get_path(f"optimizer_{self.name}.pickle")
        with open(optimizer_path, "wb")as file:
            cloudpickle.dump(self.optimizer, file)
        torch.save(self.model, file_path)
        json.update({"pytorch_module": file_path, "optimizer": optimizer_path, "loss_fn": loss_fn_path})
        return json
Example #12
0
    def test_duplicate_filename(self, os_mock, datetime_mock, logger_mock):
        datetime_mock.now.return_value = datetime(2442, 12, 24, 1, 2, 3)
        os_mock.path.join.side_effect = [
            "testpath/2442_12_24_01_02_03",
            "testpath/2442_12_24_01_02_03/my_result",
            "testpath/2442_12_24_01_02_03/my_result/result.csv"
        ]

        os_mock.path.splitext.return_value = (
            "testpath/2442_12_24_01_02_03/my_result/result", "csv")

        self.filemanager = FileManager("testpath")

        os_mock.path.isfile.return_value = True
        os_mock.path.split.return_value = ("", "result.csv")
        result = self.filemanager.get_path("result.csv")
        self.assertEqual(
            result, 'testpath/2442_12_24_01_02_03/my_result/result_1.csv')
        logger_mock.info.assert_called_with(
            'File %s already exists. We appended %s to the name',
            'testpath/2442_12_24_01_02_03/my_result', 1)
Example #13
0
    def save(self, fm: FileManager) -> Dict:
        """
        Stores the PNN at the given path

        :param fm: The Filemanager, which contains the path where the model should be stored
        :return: The path where the model is stored.
        """
        json = super().save(fm)
        if self.is_fitted:
            filepath = fm.get_path(f"{self.name}.h5")
            self.pnn.save(filepath=filepath)
            json.update({"pnn": filepath})
        return json
Example #14
0
    def save(self, fm: FileManager):
        """
        Saves the Conditional module to JSON file

        :param fm: A FileManager, from which the path where the JSON file is saved is fetches
        :type fm: FileManager
        :return: Dictionary with name, parameters, related module and class, and path to the file
        :rtype: Dict
        """
        json_module = super().save(fm)
        file_path = fm.get_path(f'{self.name}.pickle')
        with open(file_path, 'wb') as outfile:
            cloudpickle.dump(self, file=outfile)
        json_module["pickled_module"] = file_path
        return json_module
Example #15
0
    def save(self, fm: FileManager):
        """
        Saves the statsmodels wrappers and the containing model

        :param fm: FileManager for getting the path
        :type fm: FileManager
        :return: Dictionary with all information for restoting the module
        :rtype: Dict
        """
        json = super().save(fm)
        if self.is_fitted:
            model_file_path = fm.get_path(f"{self.name}_fitted_model.pickle")
            self.model.save(model_file_path)
            json.update({"statsmodel_model": model_file_path})
        json.update({
            "sm_class": self.module.__name__,
            "sm_module": self.module.__module__
        })
        return json
Example #16
0
class TestFilemanager(unittest.TestCase):
    @patch("pywatts.core.filemanager.datetime")
    @patch("pywatts.core.filemanager.os")
    def test_get_path(self, os_mock, datetime_mock):
        datetime_mock.now.return_value = datetime(2442, 12, 24, 1, 2, 3)
        os_mock.path.join.side_effect = [
            "testpath/2442_12_24_01_02_03",
            "testpath/2442_12_24_01_02_03/my_result",
            "testpath/2442_12_24_01_02_03/my_result/result.csv"
        ]

        self.filemanager = FileManager("testpath")

        os_mock.path.isfile.return_value = False
        os_mock.path.split.return_value = ("", "result.csv")

        filepath = self.filemanager.get_path("result.csv", "my_result")

        join_calls = [
            call("testpath", "2442_12_24_01_02_03"),
            call("testpath/2442_12_24_01_02_03", "my_result"),
            call("testpath/2442_12_24_01_02_03/my_result", "result.csv")
        ]
        os_mock.path.join.assert_has_calls(join_calls)
        os_mock.path.split.assert_called_once_with("result.csv")

        self.assertEqual(filepath,
                         "testpath/2442_12_24_01_02_03/my_result/result.csv")

    @patch("pywatts.core.filemanager.datetime")
    @patch("pywatts.core.filemanager.os")
    def test_get_path_filename_with_path(self, os_mock, datetime_mock):
        datetime_mock.now.return_value = datetime(2442, 12, 24, 1, 2, 3)
        os_mock.path.join.side_effect = [
            "testpath/2442_12_24_01_02_03",
            "testpath/2442_12_24_01_02_03/my_result",
            "testpath/2442_12_24_01_02_03/my_result/result.csv"
        ]

        self.filemanager = FileManager("testpath")

        os_mock.path.isfile.return_value = False
        os_mock.path.split.return_value = ("", "result.csv")

        filepath = self.filemanager.get_path("path/result.csv", "my_result")

        self.assertEqual(filepath,
                         "testpath/2442_12_24_01_02_03/my_result/result.csv")

    @patch("pywatts.core.filemanager.datetime")
    @patch("pywatts.core.filemanager.os")
    def test_not_allowed_filetype(self, os_mock, datetime_mock):
        datetime_mock.now.return_value = datetime(2442, 12, 24, 1, 2, 3)
        os_mock.path.join.side_effect = ["testpath/2442_12_24_01_02_03"]

        self.filemanager = FileManager("testpath")

        os_mock.path.isfile.return_value = False
        os_mock.path.split.return_value = ("", "result.test")
        with self.assertRaises(IOException) as cm:
            self.filemanager.get_path("result.test")
        self.assertEqual(cm.exception.args, (
            "test is not an allowed file type. Allowed types are ['png', 'csv', 'xlsx', "
            "'pickle', 'tex', 'json', 'h5', 'pt', 'md'].", ))

    @patch("pywatts.core.filemanager.logger")
    @patch("pywatts.core.filemanager.datetime")
    @patch("pywatts.core.filemanager.os")
    def test_duplicate_filename(self, os_mock, datetime_mock, logger_mock):
        datetime_mock.now.return_value = datetime(2442, 12, 24, 1, 2, 3)
        os_mock.path.join.side_effect = [
            "testpath/2442_12_24_01_02_03",
            "testpath/2442_12_24_01_02_03/my_result",
            "testpath/2442_12_24_01_02_03/my_result/result.csv"
        ]

        os_mock.path.splitext.return_value = (
            "testpath/2442_12_24_01_02_03/my_result/result", "csv")

        self.filemanager = FileManager("testpath")

        os_mock.path.isfile.return_value = True
        os_mock.path.split.return_value = ("", "result.csv")
        result = self.filemanager.get_path("result.csv")
        self.assertEqual(
            result, 'testpath/2442_12_24_01_02_03/my_result/result_1.csv')
        logger_mock.info.assert_called_with(
            'File %s already exists. We appended %s to the name',
            'testpath/2442_12_24_01_02_03/my_result', 1)