예제 #1
0
파일: hdfs.py 프로젝트: shiyuann/determined
class HDFSTensorboardManager(base.TensorboardManager):
    """
    Store and tfevents files to HDFS.
    """
    @util.preserve_random_state
    def __init__(
        self,
        hdfs_url: str,
        hdfs_path: str,
        user: Optional[str] = None,
        *args: Any,
        **kwargs: Any,
    ) -> None:
        super().__init__(*args, **kwargs)
        self.hdfs_url = hdfs_url
        self.hdfs_path = hdfs_path
        self.user = user

        self.client = InsecureClient(self.hdfs_url,
                                     root=self.hdfs_path,
                                     user=self.user)
        self.client.makedirs(str(self.sync_path))

    @util.preserve_random_state
    def sync(self) -> None:
        for path in self.to_sync():
            file_name = str(self.sync_path.joinpath(path.name))

            logging.debug(f"Uploading {path} to {self.hdfs_path}")

            self.client.upload(file_name, str(path))

    def delete(self) -> None:
        self.client.delete(self.sync_path, recursive=True)
예제 #2
0
class HDFSStorageManager(StorageManager):
    """
    Store and load checkpoints from HDFS.
    """
    def __init__(
        self,
        hdfs_url: str,
        hdfs_path: str,
        user: Optional[str] = None,
        temp_dir: Optional[str] = None,
    ) -> None:
        super().__init__(
            temp_dir if temp_dir is not None else tempfile.gettempdir())

        self.hdfs_url = hdfs_url
        self.hdfs_path = hdfs_path
        self.user = user

        self.client = InsecureClient(self.hdfs_url,
                                     root=self.hdfs_path,
                                     user=self.user)

    def post_store_path(self, storage_id: str, storage_dir: str,
                        metadata: StorageMetadata) -> None:
        """post_store_path uploads the checkpoint to hdfs and deletes the original files."""
        try:
            logging.info("Uploading storage {} to HDFS".format(storage_id))
            result = self.client.upload(metadata, storage_dir)

            logging.info("Uploaded storage {} to HDFS path {}".format(
                storage_id, result))
        finally:
            self._remove_checkpoint_directory(metadata.storage_id)

    @contextlib.contextmanager
    def restore_path(self, metadata: StorageMetadata) -> Iterator[str]:
        logging.info("Downloading storage {} from HDFS".format(
            metadata.storage_id))

        self.client.download(metadata.storage_id,
                             self._base_path,
                             overwrite=True)

        try:
            yield os.path.join(self._base_path, metadata.storage_id)
        finally:
            self._remove_checkpoint_directory(metadata.storage_id)

    def delete(self, metadata: StorageMetadata) -> None:
        logging.info("Deleting storage {} from HDFS".format(
            metadata.storage_id))
        self.client.delete(metadata.storage_id, recursive=True)
def run_remove_files(configData: ConfigData):
    f_date_str = configData.get_f_date()  # StrTool.get_the_date_str(the_date, delta_day)  # "20181101"
    data_path = os.path.join(configData.get_data_path(), f_date_str)   # allinpay_data_bl
    utf8_path = os.path.join(configData.get_utf8_path(), f_date_str)   # allinpay_utf8_bl
    hdfs_path = str(pathlib.PurePosixPath(configData.get_hdfs_path()).joinpath(f_date_str))    # hdfs_dir_bl

    a_client = InsecureClient(configData.hdfs_ip(), user="******")  # "http://10.2.201.197:50070"

    shutil.rmtree(data_path, ignore_errors=True)
    shutil.rmtree(utf8_path, ignore_errors=True)
    try:
        a_client.delete(hdfs_path, recursive=True)
    except:
        pass
예제 #4
0
class HDFSTensorboardManager(base.TensorboardManager):
    """
    Store and tfevents files to HDFS.
    """
    @util.preserve_random_state
    def __init__(
        self,
        hdfs_url: str,
        hdfs_path: str,
        user: Optional[str] = None,
        *args: Any,
        **kwargs: Any,
    ) -> None:
        super().__init__(*args, **kwargs)
        self.hdfs_url = hdfs_url
        self.hdfs_path = hdfs_path
        self.user = user

        self.client = InsecureClient(self.hdfs_url,
                                     root=self.hdfs_path,
                                     user=self.user)
        self.client.makedirs(str(self.sync_path))

    @util.preserve_random_state
    def sync(
        self,
        selector: Callable[[pathlib.Path], bool] = lambda _: True,
        mangler: Callable[[pathlib.Path, int], pathlib.Path] = lambda p, __: p,
        rank: int = 0,
    ) -> None:
        for path in self.to_sync(selector):
            relative_path = path.relative_to(self.base_path)
            mangled_relative_path = mangler(relative_path, rank)
            mangled_path = self.sync_path.joinpath(mangled_relative_path)
            file_name = str(mangled_path)
            logging.debug(f"Uploading {path} to {self.hdfs_path}")

            self.client.upload(file_name, str(path))

    def delete(self) -> None:
        self.client.delete(self.sync_path, recursive=True)
예제 #5
0
class HDFSStorageManager(storage.CloudStorageManager):
    """
    Store and load checkpoints from HDFS.
    """

    def __init__(
        self,
        hdfs_url: str,
        hdfs_path: str,
        user: Optional[str] = None,
        temp_dir: Optional[str] = None,
    ) -> None:
        super().__init__(temp_dir if temp_dir is not None else tempfile.gettempdir())

        self.hdfs_url = hdfs_url
        self.hdfs_path = hdfs_path
        self.user = user

        self.client = InsecureClient(self.hdfs_url, root=self.hdfs_path, user=self.user)

    @util.preserve_random_state
    def upload(self, src: Union[str, os.PathLike], dst: str) -> None:
        src = os.fspath(src)
        logging.info(f"Uploading to HDFS: {dst}")
        self.client.upload(dst, src)

    @util.preserve_random_state
    def download(self, src: str, dst: Union[str, os.PathLike]) -> None:
        dst = os.fspath(dst)
        logging.info(f"Downloading {src} from HDFS")
        self.client.download(src, dst, overwrite=True)

    @util.preserve_random_state
    def delete(self, tgt: str) -> None:
        logging.info(f"Deleting {tgt} from HDFS")
        self.client.delete(tgt, recursive=True)
예제 #6
0
import datetime

from hdfs import HdfsError
from hdfs.client import Client, InsecureClient

# client = Client("http://master.hadoop:50070")

client = InsecureClient('http://master.hadoop:50070', user='******')

# print dir(client)

# print client.list("/user")

# print client.makedirs("/tmp/test")

client.delete('/user/spider', recursive=True)

# current_date = datetime.datetime.now().strftime("%Y%m%d")
#
# hdfs_dir = "/user/a/b/c/"
#
# hdfs_path = hdfs_dir + current_date
#
# data = "\n测试"
#
# try:
#     client.write(hdfs_path=hdfs_path, data=data, append=True)
# except HdfsError,e:
#     client.write(hdfs_path=hdfs_path, data=data)