コード例 #1
0
def convert_to_parquet(pJson):
    with open(pJson, 'r') as f:
        data = json.loads(f.read())

    f.close()
    df = pd.json_normalize(data)
    parqId = uuid.uuid1()
    parqHexVal = parqId.hex
    pathParquet = "../output/jsonParquet-" + parqHexVal + ".parquet"
    df.to_parquet(path=pathParquet, compression='GZIP')
    print(pd.read_parquet(pathParquet))
    try:
        webHDFS = webhdfspy.WebHDFSClient("mercury.tritronik.com", 50070,
                                          "hdfs")
        pathHdfs = '/dpi-aggregate/' + parqHexVal + ".parquet"
        webHDFS.copyfromlocal(pathParquet, pathHdfs)
        os.remove(pJson)
        os.remove(pathParquet)
    except:
        logging.error(traceback)
コード例 #2
0
 def setUp(self):
     self.webHDFS = webhdfspy.WebHDFSClient('localhost', 50070, 'fabio')
コード例 #3
0
#!/usr/bin/env python
import time
import json
import webhdfspy

c = webhdfspy.WebHDFSClient('1.1.1.1', 8443, 'USER', 'PASS')

print '\n## list dir ##'
print json.dumps(c.listdir('/tmp'), indent=4)
time.sleep(1)

print '\n## mkdir: /tmp/test_webhdfs ##'
c.mkdir('/tmp/test_webhdfs')
time.sleep(1)

print '\n## create file: /tmp/test_webhdfs/text ##'
c.create('/tmp/test_webhdfs/text', 'text', True)
print json.dumps(c.listdir('/tmp/test_webhdfs'))
time.sleep(1)

print '\n## copyfromlocal: /etc/hosts to /tmp/test_webhdfs/test_hosts ##'
c.copyfromlocal('/etc/hosts', '/tmp/test_webhdfs/test_hosts', True)
time.sleep(1)

print '\n## rename to /tmp/test_webhdfs/test_hosts_rename ##'
c.rename('/tmp/test_webhdfs/test_hosts', '/tmp/test_webhdfs/test_hosts_rename')
time.sleep(1)

print '\n## open: /tmp/test_webhdfs/test_hosts_rename ##'
print c.open('/tmp/test_webhdfs/test_hosts_rename')
time.sleep(1)
コード例 #4
0
 def setUp(self):
     self.webHDFS = webhdfspy.WebHDFSClient('localhost', 50070, 'fabio')
     self.webHDFS.mkdir(TEST_DIR_PATH)
コード例 #5
0
ファイル: 2-4.py プロジェクト: chhak/Bigdata
"""
날짜 : 2020/07/22
이름 : 김철학
내용 : 파이썬 Hadoop 실습하기
"""
from pywebhdfs.webhdfs import PyWebHdfsClient as hadoop
import webhdfspy

hdfs = webhdfspy.WebHDFSClient('192.168.100.101', 50070, 'root')
#print(hdfs.listdir('/'))

#hdfs.mkdir('/test1')
hdfs.copyfromlocal(local_path='/home/bigdata/naver',
                   hdfs_path='/naver',
                   overwrite=True)

print('완료')
#Hadoop 접속
#Local의 /home/bigdata/naver/naver-20-xx-xx를 하둡 /naver/ 복사
#Local의 /home/bigdata/naver/naver-20-xx-xx를 삭제
#프로그램 종료
コード例 #6
0
import webhdfspy
import pandas as pd
webHDFS = webhdfspy.WebHDFSClient("host6.cloud.sinocbd.com",
                                  50070,
                                  username='******')
data = pd.DataFrame(webHDFS.listdir('/'))
print(data)
pathlist = data['pathSuffix']
for i in pathlist:
    path = "/" + pathlist
    # print(path)
    # print(webHDFS.listdir(path))
コード例 #7
0
"""
    날짜 : 2020/07/22
    이름 : 김동욱
    내용 : 파이썬 Hadoop 실습하기
"""

#from pywebhdfs.webhdfs import PyWebHdfsClient as hadoop
import webhdfspy

#hadoop 접속
hdfs = webhdfspy.WebHDFSClient(host='192.168.100.101',
                               port=50070,
                               username='******')

#HDFS 디렉토리 생성
hdfs.mkdir('/sample')

#HDFS 파일 생성
text = 'Hello Hadoop! 반갑습니다.'
hdfs.create('/sample/test.txt', text.encode('UTF-8'), overwrite=True)

print('프로그램 종료...')
コード例 #8
0
 def setUp(self):
     self.webHDFS = webhdfspy.WebHDFSClient('localhost', 50070,
                                            HADOOP_USERNAME)