Beispiel #1
0
    def deserialize(
        filepath: str,
        source_directory: str = "filestore",
        destination_directory: str = "system_temp",
        **kwargs,
    ) -> Dict[str, str]:
        DiskIOMethods.copy_file(
            join(FILEPATH_REGISTRY.get(source_directory), filepath),
            join(FILEPATH_REGISTRY.get(destination_directory), filepath),
        )

        return {
            "filepath": filepath,
            "source_directory": destination_directory
        }
Beispiel #2
0
    def serialize(
        obj: ddDataFrame,
        filepath: str,
        format_directory: str = JSON_DIRECTORY,
        format_extension: str = ".jsonl",
        destination_directory: str = "system_temp",
        **kwargs,
    ) -> Dict[str, str]:
        # Append the filepath to the storage directory
        # read_json method expects a * format
        destination_folder = FILEPATH_REGISTRY.get(destination_directory)
        filename_format = join(format_directory,
                               filepath + "-*" + format_extension)
        full_path = join(destination_folder, filename_format)
        DaskPersistenceMethods.to_json(obj, full_path)

        written_filepaths = glob.glob(full_path)

        # strip out root path to keep relative to directory
        filepaths = []
        for i in written_filepaths:
            relative_path = i.split(destination_folder)[1]
            # strip the preceding /
            if relative_path[0] == "/":
                relative_path = relative_path[1:]
            filepaths.append(relative_path)

        return {
            "filepaths": filepaths,
            "source_directory": destination_directory
        }
Beispiel #3
0
 def deserialize(filepaths: List[str],
                 source_directory: str = "system_temp",
                 **kwargs) -> Dict[str, Any]:
     full_paths = [
         join(FILEPATH_REGISTRY.get(source_directory), filepath)
         for filepath in filepaths
     ]
     return {"obj": DaskPersistenceMethods.read_json(full_paths)}
Beispiel #4
0
    def serialize(
        obj: Any,
        filepath: str,
        format_directory: str = HDF5_DIRECTORY,
        format_extension: str = ".h5",
        destination_directory: str = "system_temp",
        **kwargs,
    ) -> Dict[str, str]:

        # Append the filepath to the storage directory
        filepath = join(format_directory, filepath + format_extension)
        full_path = join(FILEPATH_REGISTRY.get(destination_directory), filepath)
        KerasPersistenceMethods.save_model(obj, full_path, save_format="h5")
        return {"filepath": filepath, "source_directory": destination_directory}
Beispiel #5
0
    def serialize(
        obj: ddDataFrame,
        filepath: str,
        format_directory: str = ORC_DIRECTORY,
        format_extension: str = ".orc",
        destination_directory: str = "system_temp",
        **kwargs,
    ) -> Dict[str, str]:

        # Append the filepath to the storage directory
        filepath = join(format_directory, filepath + format_extension)
        full_path = join(FILEPATH_REGISTRY.get(destination_directory),
                         filepath)
        DaskPersistenceMethods.to_orc(obj, full_path)
        return {
            "filepath": filepath,
            "source_directory": destination_directory
        }
Beispiel #6
0
    def serialize(
        obj: Any,
        filepath: str,
        format_directory: str = PICKLE_DIRECTORY,
        format_extension: str = ".pkl",
        destination_directory: str = "system_temp",
        **kwargs,
    ) -> Dict[str, str]:

        # Append the filepath to the pickle storage directory
        filepath = join(format_directory, filepath + format_extension)
        full_path = join(FILEPATH_REGISTRY.get(destination_directory), filepath)
        # make sure the directory exists
        makedirs(dirname(full_path), exist_ok=True)

        PicklePersistenceMethods.dump_object(obj, full_path)

        return {"filepath": filepath, "source_directory": destination_directory}
Beispiel #7
0
 def deserialize(
     filepath: str, source_directory: str = "system_temp", **kwargs
 ) -> Dict[str, Any]:
     full_path = join(FILEPATH_REGISTRY.get(source_directory), filepath)
     return {"obj": KerasPersistenceMethods.load_model(full_path)}
Beispiel #8
0
 def deserialize(filepath: str,
                 source_directory: str = "system_temp",
                 **kwargs) -> Dict[str, pd.DataFrame]:
     full_path = join(FILEPATH_REGISTRY.get(source_directory), filepath)
     return {"obj": PandasPersistenceMethods.read_json(full_path)}