예제 #1
0
def basic():
	client = HdfsClient(hosts='study:50070')
	print(client.list_status('/'))

	print '判断某个路径是否存在'
	print client.exists("/test")
	print client.exists("/data/gz/thrift-0.9.2.tar.gz")

	client = HdfsClient(hosts='study:50070')
	print client.get_file_checksum("/data/gz/bison-2.5.1.tar.gz")

	summary = client.get_content_summary("/")
	print summary

	#文件拷贝--从HDFS拷贝到本地磁盘系统
	client.copy_to_local("/data/gz/pip-7.1.2.tar.gz","/root/data/pip-7.1.2.tar.gz")
	#文件拷贝--从本地磁盘系统拷贝到HDFS系统中
	client.copy_from_local("/root/data/thrift-0.9.2.tar.gz","/data/gz/thrift-0.9.2.tar.gz")

	print client.get_home_directory()
예제 #2
0
def start():
    # 连接MongoDB,查询tokens,根据contractAddress到etherscan查询最新数据
    client = MongoCluster().connect()
    db = client.get_database('gse-transaction')
    collection = db.get_collection('mrout_6000001-6001000')
    # collection.insert_one()

    # 连接HDFS读取文件
    from pyhdfs import HdfsClient
    client2 = HdfsClient(hosts='%s,50070' % hdfs_ip, max_tries=10)
    # 返回这个用户的根目录
    print client2.get_home_directory()
    # 返回可用的namenode节点
    print client2.get_active_namenode()
    # 返回指定目录下的所有文件
    print client2.listdir("/user/leon/mrout_3_6000001-6001000/")
    # 读某个文件
    client2.mkdirs("/user/leon")
    inputfile = client2.open('/user/leon/mrout_3_6000001-6001000/part-00000')
    # 查看文件内容
    for r in inputfile:
        line = str(r).encode('utf-8')  # open后是二进制,str()转换为字符串并转码
        print(line)