Ejemplo n.º 1
0
def test_with_gzip(hdfs_cluster):
    from gzip import GzipFile
    w = WebHDFS(hdfs_cluster,
                user='******',
                data_proxy={'worker.example.com': 'localhost'})
    fn = '/user/testuser/gzfile'
    with w.open(fn, 'wb') as f:
        gf = GzipFile(fileobj=f, mode='w')
        gf.write(b'hello')
        gf.close()
    with w.open(fn, 'rb') as f:
        gf = GzipFile(fileobj=f, mode='r')
        assert gf.read() == b'hello'
Ejemplo n.º 2
0
def test_with_gzip(hdfs_cluster):
    from gzip import GzipFile

    w = WebHDFS(hdfs_cluster,
                user="******",
                data_proxy={"worker.example.com": "localhost"})
    fn = "/user/testuser/gzfile"
    with w.open(fn, "wb") as f:
        gf = GzipFile(fileobj=f, mode="w")
        gf.write(b"hello")
        gf.close()
    with w.open(fn, "rb") as f:
        gf = GzipFile(fileobj=f, mode="r")
        assert gf.read() == b"hello"
Ejemplo n.º 3
0
def test_workflow(hdfs_cluster):
    w = WebHDFS(hdfs_cluster,
                user='******',
                data_proxy={'worker.example.com': 'localhost'})
    fn = '/user/testuser/testrun/afile'
    w.mkdir('/user/testuser/testrun')
    with w.open(fn, 'wb') as f:
        f.write(b'hello')
    assert w.exists(fn)
    info = w.info(fn)
    assert info['size'] == 5
    assert w.isfile(fn)
    assert w.cat(fn) == b'hello'
    w.rm('/user/testuser/testrun', recursive=True)
    assert not w.exists(fn)
Ejemplo n.º 4
0
def test_workflow(hdfs_cluster):
    w = WebHDFS(hdfs_cluster,
                user="******",
                data_proxy={"worker.example.com": "localhost"})
    fn = "/user/testuser/testrun/afile"
    w.mkdir("/user/testuser/testrun")
    with w.open(fn, "wb") as f:
        f.write(b"hello")
    assert w.exists(fn)
    info = w.info(fn)
    assert info["size"] == 5
    assert w.isfile(fn)
    assert w.cat(fn) == b"hello"
    w.rm("/user/testuser/testrun", recursive=True)
    assert not w.exists(fn)
Ejemplo n.º 5
0
def test_webhdfs_cp_file(hdfs_cluster):
    fs = WebHDFS(hdfs_cluster,
                 user="******",
                 data_proxy={"worker.example.com": "localhost"})

    src, dst = "/user/testuser/testrun/f1", "/user/testuser/testrun/f2"

    fs.mkdir("/user/testuser/testrun")

    with fs.open(src, "wb") as f:
        f.write(b"hello")

    fs.cp_file(src, dst)

    assert fs.exists(src)
    assert fs.exists(dst)
    assert fs.cat(src) == fs.cat(dst)
Ejemplo n.º 6
0
def test_workflow_transaction(hdfs_cluster):
    w = WebHDFS(hdfs_cluster,
                user='******',
                data_proxy={'worker.example.com': 'localhost'})
    fn = '/user/testuser/testrun/afile'
    w.mkdirs('/user/testuser/testrun')
    with w.transaction:
        with w.open(fn, 'wb') as f:
            f.write(b'hello')
        assert not w.exists(fn)
    assert w.exists(fn)
    assert w.ukey(fn)
    files = w.ls('/user/testuser/testrun', True)
    summ = w.content_summary('/user/testuser/testrun')
    assert summ['length'] == files[0]['size']
    assert summ['fileCount'] == 1

    w.rm('/user/testuser/testrun', recursive=True)
    assert not w.exists(fn)
Ejemplo n.º 7
0
def test_workflow_transaction(hdfs_cluster):
    w = WebHDFS(hdfs_cluster,
                user="******",
                data_proxy={"worker.example.com": "localhost"})
    fn = "/user/testuser/testrun/afile"
    w.mkdirs("/user/testuser/testrun")
    with w.transaction:
        with w.open(fn, "wb") as f:
            f.write(b"hello")
        assert not w.exists(fn)
    assert w.exists(fn)
    assert w.ukey(fn)
    files = w.ls("/user/testuser/testrun", True)
    summ = w.content_summary("/user/testuser/testrun")
    assert summ["length"] == files[0]["size"]
    assert summ["fileCount"] == 1

    w.rm("/user/testuser/testrun", recursive=True)
    assert not w.exists(fn)
Ejemplo n.º 8
0
    def open(self, hive=False):
        """Opens DISC connection: 
        selects automatically according to platform (Local Windows or CDSW)
        """
        from fsspec.implementations.webhdfs import WebHDFS
        os.environ['REQUESTS_CA_BUNDLE'] = self.__PEM_PATH

        self._hdfs_cnxn = WebHDFS(self._HTTPFS_HOST,
                                  port=14000,
                                  kerberos=True,
                                  use_https=True,
                                  use_ssl=True,
                                  use_listings_cache=False)
        
        self._engine = "hive" if hive else 'impala'
        if IS_WINDOWS:        # LOCAL - Windows
            from pyodbc import connect
            self._cnxn = connect('DSN=DISC DP Impala 64bit' if not hive 
                                 else 'DSN=DISC DP Hive 64bit', 
                                 autocommit=True)
            
        else:                 # CDSW (os.name='POSIX')
            from impala.dbapi import connect
            self._cnxn = connect(host=self._HIVE_HOST if hive
                                      else self._IMPALA_HOST,
                                 use_ssl=True,
                                 timeout=30,
                                 kerberos_service_name=self._engine,
                                 port=10000 if hive else 21050,
                                 auth_mechanism="GSSAPI")  #['NOSASL', 'PLAIN', 'GSSAPI', 'LDAP']

        self._cursor = self._cnxn.cursor()
        if not os.path.exists(self.__TEMP_LOCAL_DIR):
            os.mkdir(self.__TEMP_LOCAL_DIR)
        
        self._is_disc_connected = True
        self.db = None
Ejemplo n.º 9
0
def test_simple(hdfs_cluster):
    w = WebHDFS(hdfs_cluster, user='******')
    home = w.home_directory()
    assert home == '/user/testuser'
    with pytest.raises(PermissionError):
        w.mkdir('/root')
Ejemplo n.º 10
0
def test_pickle(hdfs_cluster):
    w = WebHDFS(hdfs_cluster, user='******')
    w2 = pickle.loads(pickle.dumps(w))
    assert w == w2
Ejemplo n.º 11
0
def test_simple(hdfs_cluster):
    w = WebHDFS(hdfs_cluster, user="******")
    home = w.home_directory()
    assert home == "/user/testuser"
    with pytest.raises(PermissionError):
        w.mkdir("/root")
Ejemplo n.º 12
0
Archivo: webhdfs.py Proyecto: jhhuh/dvc
    def fs(self):
        from fsspec.implementations.webhdfs import WebHDFS

        fs = WebHDFS(**self.fs_args)
        fs.session.verify = self._ssl_verify
        return fs
Ejemplo n.º 13
0
def test_simple(hdfs_cluster):
    w = WebHDFS(hdfs_cluster, user='******')
    home = w.home_directory()
    assert home == '/user/testuser'
Ejemplo n.º 14
0
    def fs(self):
        from fsspec.implementations.webhdfs import WebHDFS

        return WebHDFS(**self.fs_args)