Beispiel #1
0
    def _zip(self, local_dir, tarball):
        """
        Zip the model information and all files in local_dir into a tarball.

        Args:
            local_dir (str): the local directory.
            tarball (str): the tarball path.

        Returns:
            None.
        """
        # NOTE: the unzip files of the job tarball should be skipped
        from runtime.pai.prepare_archive import ALL_TAR_FILES

        def filter(tarinfo):
            name = tarinfo.name
            if name.startswith("./"):
                name = name[2:]

            if name in ALL_TAR_FILES:
                return None

            return tarinfo

        zip_dir(local_dir, tarball, arcname="./", filter=filter)
Beispiel #2
0
    def test_tar(self):
        with temp_file.TemporaryDirectory(as_cwd=True):
            # create the test file tree:
            #
            # |-sqlflow_tar
            #   |-sqlflow_sub_dir
            #     |-hello.py
            test_dir = "sqlflow_tar"
            test_sub_dir = "sqlflow_sub_dir"
            test_py_file = "hello.py"
            test_py_content = "print('hello SQLFlow!')"

            fullpath = os.path.join(test_dir, test_sub_dir)
            os.makedirs(fullpath)
            with open(os.path.join(fullpath, test_py_file), "w") as f:
                f.write(test_py_content)

            zip_dir(fullpath, "sqlflow.tar.gz")
            unzip_dir("sqlflow.tar.gz", "output")
            self.assertTrue(
                os.path.isdir("output/sqlflow_tar/sqlflow_sub_dir"))
            self.assertTrue(
                os.path.isfile("output/sqlflow_tar/sqlflow_sub_dir/hello.py"))
            with open(os.path.join(fullpath, test_py_file), "r") as f:
                self.assertEqual(f.read(), test_py_content)
Beispiel #3
0
    def save(self, datasource, table, cwd="./"):
        """This save function would archive all the files on work director
        into a tarball, and saved it into DBMS with the specified table name.

        Args:
            datasource: string
                the connection string to DBMS.
            table: string
                the saved table name.
        """
        _dump_pkl(self, model_obj_file)
        zip_dir(cwd, tarball)

        def _bytes_reader(filename, buf_size=8 * 32):
            def _gen():
                with open(filename, "rb") as f:
                    while True:
                        data = f.read(buf_size)
                        if data:
                            yield data
                        else:
                            break

            return _gen

        write_with_generator(datasource, table, _bytes_reader(tarball))
Beispiel #4
0
    def _zip(self, local_dir, tarball):
        """
        Zip the model information and all files in local_dir into a tarball.

        Args:
            local_dir (str): the local directory.
            tarball (str): the tarball path.

        Returns:
            None.
        """
        model_obj_file = os.path.join(local_dir, MODEL_OBJ_FILE_NAME)
        _dump_pkl(self, model_obj_file)
        zip_dir(local_dir, tarball, arcname="./")
        os.remove(model_obj_file)
Beispiel #5
0
    def _zip(self, local_dir, tarball, save_to_db=False):
        """
        Zip the model information and all files in local_dir into a tarball.

        Args:
            local_dir (str): the local directory.
            tarball (str): the tarball path.

        Returns:
            None.
        """
        if not save_to_db:
            model_obj_file = os.path.join(local_dir, MODEL_OBJ_FILE_NAME)
            with open(model_obj_file, "w") as f:
                d = self._to_dict()
                f.write(json.dumps(d, cls=JSONEncoderWithFeatureColumn))
        else:
            model_obj_file = None

        zip_dir(local_dir, tarball, arcname="./")
        if model_obj_file:
            os.remove(model_obj_file)