Esempio n. 1
0
class HDFSTensorboardManager(base.TensorboardManager):
    """
    Store and tfevents files to HDFS.
    """
    @util.preserve_random_state
    def __init__(
        self,
        hdfs_url: str,
        hdfs_path: str,
        user: Optional[str] = None,
        *args: Any,
        **kwargs: Any,
    ) -> None:
        super().__init__(*args, **kwargs)
        self.hdfs_url = hdfs_url
        self.hdfs_path = hdfs_path
        self.user = user

        self.client = InsecureClient(self.hdfs_url,
                                     root=self.hdfs_path,
                                     user=self.user)
        self.client.makedirs(str(self.sync_path))

    @util.preserve_random_state
    def sync(self) -> None:
        for path in self.to_sync():
            file_name = str(self.sync_path.joinpath(path.name))

            logging.debug(f"Uploading {path} to {self.hdfs_path}")

            self.client.upload(file_name, str(path))

    def delete(self) -> None:
        self.client.delete(self.sync_path, recursive=True)
Esempio n. 2
0
def run_conv_file_local_to_hdfs(configData: ConfigData):
    """

    # client.upload('/shouyinbao/', "/home/testFolder/logflow/bl_shouyinbao/UTF8/20181101/9999100000/t1_trxrecord_20181101_V2.csv", cleanup=True)
    # dat = client.list('/shouyinbao/', status=False)
    # print(dat)

    # root_path = "/home/bd/桌面/201811_flow/zc_shouyinbao/UNZIP/"
    # dest_dir1 = "/home/bd/桌面/201811_flow/zc_shouyinbao/UTF8/"
    # dest_dir2 = "/shouyinbao/zc_shouyinbao/UTF8/"

    # root_path = "/home/testFolder/logflow/bl_shouyinbao/UNZIP/"
    # dest_dir1 = "/home/testFolder/logflow/bl_shouyinbao/UTF8/"
    # dest_dir2 = "/shouyinbao/zc_shouyinbao/UTF8/"

    # i_file = '/home/testFolder/logflow/bl_shouyinbao/20181101/9999100000/t1_trxrecord_20181101_V2.csv'
    # o_file = '/home/testFolder/logflow/bl_shouyinbao/UTF8/20181101/9999100000/t1_trxrecord_20181101_V2.csv'

    :param configData:
    :param the_date:
    :param is_baoli:
    :return:
    """
    f_date_str = configData.get_f_date()  # "20181101"

    a_client = InsecureClient(configData.hdfs_ip(), user="******")   # "http://10.2.201.197:50070"
    # webhdfs 默认是 dr.who ,不能伪装成其他用户,可以在配置里修改 hadoop.http.staticuser.user=dr.who
    # https://www.cnblogs.com/peizhe123/p/5540845.html
    root_path = os.path.join(configData.get_data_path(), f_date_str)
    dest_dir1 = os.path.join(configData.get_utf8_path(), f_date_str)
    dest_dir2 = str(pathlib.PurePosixPath(configData.get_hdfs_path()).joinpath(f_date_str))

    print("Start\n")

    f_name = configData.get_file_name(f_date_str)  # "t1_trxrecord_" the_date # "_V2.csv"

    branches = MyLocalFile.get_child_dir(root_path)
    for aBranch in branches:
        if MyLocalFile.check_branch(aBranch):
            files = MyLocalFile.get_child_file(aBranch)
            f_a_branch = os.path.basename(aBranch)
            for aFile in files:
                if MyLocalFile.check_file(aFile, f_name):
                    to_file1 = str(pathlib.PurePath(dest_dir1).joinpath(f_a_branch, f_name))
                    to_file2 = str(pathlib.PurePosixPath(dest_dir2).joinpath(f_a_branch, f_name))
                    f_add_head = configData.get_hive_add_date(f_a_branch)
                    f_add_end = configData.get_hive_add_date("789")
                    f_need_head = not configData.get_hive_head()  # False
                    MyLocalFile.conv_file_local(aFile, to_file1, need_first_line=f_need_head,p_add_head=f_add_head, p_add_tail=f_add_end,quoting="")
                    MyHdfsFile.safe_make_dir(a_client, to_file2)
                    # client.newupload(to_file2, to_file1, encoding='utf-8')
                    the_file = a_client.status(to_file2, strict=False)
                    if the_file is None:
                        a_client.upload(to_file2, to_file1) #, encoding='utf-8')
                        a_client.set_permission(to_file2, 777)
                    # a_client.set_owner(thePath,owner='hdfs',group='supergroup')
                    elif the_file['type'].lower() == 'file':  # 'directory'
                        a_client.set_permission(to_file2, 777)
def run_conv_file_local_to_hdfs(configData: ConfigData):
    """

    :param configData:
    :return:
    """
    f_date_str = configData.get_f_date()  # "20181101"
    a_client = InsecureClient(configData.hdfs_ip(), user="******")  # "http://10.2.201.197:50070"

    root_path = os.path.join(configData.get_data_path(), f_date_str)                        # allinpay_data_bl
    dest_dir1 = os.path.join(configData.get_utf8_path(), f_date_str)                        # allinpay_utf8_bl
    dest_dir2 = str(pathlib.PurePosixPath(configData.get_hdfs_path()).joinpath(f_date_str)) # hdfs_dir_bl
    # file_ext7 = configData.get_data("file_ext7")  # _loginfo_rsp_bl_new.csv   # 20181101_loginfo_rsp_bl_new.csv
    # file_ext8 = configData.get_data("file_ext8")  # _rsp_agt_bl_new.del       # 20181101_rsp_agt_bl_new.del
    # file_ext9 = configData.get_data("file_ext9")  # _rxinfo_rsp_bl.txt        # 20181101_rxinfo_rsp_bl.txt

    # f_list = [file_ext7, file_ext8, file_ext9]

    print("Start\n")

    # "file_ext" + str(configData.the_id)
    file_name = configData.get_file_name(f_date_str).lower()

    files = MyLocalFile.get_child_file(root_path)
    for aFile in files:
        short_name = os.path.basename(aFile).lower()
        f_name = pathlib.PurePath(aFile).name
        if short_name == file_name:
            to_file1 = str(pathlib.PurePath(dest_dir1).joinpath(f_name))
            to_file2 = str(pathlib.PurePosixPath(dest_dir2).joinpath(f_name))
            f_add_date = configData.get_hive_add_date(f_date_str)
            f_need_head = not configData.get_hive_head()
            MyLocalFile.conv_file_local(aFile, to_file1, need_first_line=f_need_head, p_add_head=f_add_date)
            MyHdfsFile.safe_make_dir(a_client, to_file2)
            # a_client.newupload(to_file2, to_file1, encoding='utf-8')
            the_file = a_client.status(to_file2, strict=False)
            if the_file is None:
                a_client.upload(to_file2, to_file1)
                a_client.set_permission(to_file2, 777)
            # a_client.set_owner(thePath,owner='hdfs',group='supergroup')
            elif the_file['type'].lower() == 'file':  # 'directory'
                a_client.set_permission(to_file2, 777)
Esempio n. 4
0
class HDFSTensorboardManager(base.TensorboardManager):
    """
    Store and tfevents files to HDFS.
    """
    @util.preserve_random_state
    def __init__(
        self,
        hdfs_url: str,
        hdfs_path: str,
        user: Optional[str] = None,
        *args: Any,
        **kwargs: Any,
    ) -> None:
        super().__init__(*args, **kwargs)
        self.hdfs_url = hdfs_url
        self.hdfs_path = hdfs_path
        self.user = user

        self.client = InsecureClient(self.hdfs_url,
                                     root=self.hdfs_path,
                                     user=self.user)
        self.client.makedirs(str(self.sync_path))

    @util.preserve_random_state
    def sync(
        self,
        selector: Callable[[pathlib.Path], bool] = lambda _: True,
        mangler: Callable[[pathlib.Path, int], pathlib.Path] = lambda p, __: p,
        rank: int = 0,
    ) -> None:
        for path in self.to_sync(selector):
            relative_path = path.relative_to(self.base_path)
            mangled_relative_path = mangler(relative_path, rank)
            mangled_path = self.sync_path.joinpath(mangled_relative_path)
            file_name = str(mangled_path)
            logging.debug(f"Uploading {path} to {self.hdfs_path}")

            self.client.upload(file_name, str(path))

    def delete(self) -> None:
        self.client.delete(self.sync_path, recursive=True)
Esempio n. 5
0
class HDFSStorageManager(StorageManager):
    """
    Store and load checkpoints from HDFS.
    """
    def __init__(
        self,
        hdfs_url: str,
        hdfs_path: str,
        user: Optional[str] = None,
        temp_dir: Optional[str] = None,
    ) -> None:
        super().__init__(
            temp_dir if temp_dir is not None else tempfile.gettempdir())

        self.hdfs_url = hdfs_url
        self.hdfs_path = hdfs_path
        self.user = user

        self.client = InsecureClient(self.hdfs_url,
                                     root=self.hdfs_path,
                                     user=self.user)

    def post_store_path(self, storage_id: str, storage_dir: str,
                        metadata: StorageMetadata) -> None:
        """post_store_path uploads the checkpoint to hdfs and deletes the original files."""
        try:
            logging.info("Uploading storage {} to HDFS".format(storage_id))
            result = self.client.upload(metadata, storage_dir)

            logging.info("Uploaded storage {} to HDFS path {}".format(
                storage_id, result))
        finally:
            self._remove_checkpoint_directory(metadata.storage_id)

    @contextlib.contextmanager
    def restore_path(self, metadata: StorageMetadata) -> Iterator[str]:
        logging.info("Downloading storage {} from HDFS".format(
            metadata.storage_id))

        self.client.download(metadata.storage_id,
                             self._base_path,
                             overwrite=True)

        try:
            yield os.path.join(self._base_path, metadata.storage_id)
        finally:
            self._remove_checkpoint_directory(metadata.storage_id)

    def delete(self, metadata: StorageMetadata) -> None:
        logging.info("Deleting storage {} from HDFS".format(
            metadata.storage_id))
        self.client.delete(metadata.storage_id, recursive=True)
Esempio n. 6
0
class HDFSStorageManager(storage.CloudStorageManager):
    """
    Store and load checkpoints from HDFS.
    """

    def __init__(
        self,
        hdfs_url: str,
        hdfs_path: str,
        user: Optional[str] = None,
        temp_dir: Optional[str] = None,
    ) -> None:
        super().__init__(temp_dir if temp_dir is not None else tempfile.gettempdir())

        self.hdfs_url = hdfs_url
        self.hdfs_path = hdfs_path
        self.user = user

        self.client = InsecureClient(self.hdfs_url, root=self.hdfs_path, user=self.user)

    @util.preserve_random_state
    def upload(self, src: Union[str, os.PathLike], dst: str) -> None:
        src = os.fspath(src)
        logging.info(f"Uploading to HDFS: {dst}")
        self.client.upload(dst, src)

    @util.preserve_random_state
    def download(self, src: str, dst: Union[str, os.PathLike]) -> None:
        dst = os.fspath(dst)
        logging.info(f"Downloading {src} from HDFS")
        self.client.download(src, dst, overwrite=True)

    @util.preserve_random_state
    def delete(self, tgt: str) -> None:
        logging.info(f"Deleting {tgt} from HDFS")
        self.client.delete(tgt, recursive=True)
Esempio n. 7
0
def upload_and_delete_file(file_path, upload_path):
    for file_name in os.listdir(file_path):
        try:
            # session = Session()
            # session.auth = HTTPBasicAuth('username', 'password')
            # client = InsecureClient(url=HDFS_HOSTS, user="******", session=session)
            client = InsecureClient(url=HDFS_HOSTS, user=HDFS_USER)
            # client.delete("/data/scan/sm_word_log_message-2018-06-27-20")
            # print(client.list("/data/scan/sm/", status=False))
            path = client.upload(upload_path + file_name,
                                 file_path + file_name,
                                 cleanup=True)
            if path:
                os.remove(file_path + file_name)
        except Exception as e:
            logger.error("upload_and_delete_file  error = %s", str(e))