def basic(): client = HdfsClient(hosts='study:50070') print(client.list_status('/')) print '判断某个路径是否存在' print client.exists("/test") print client.exists("/data/gz/thrift-0.9.2.tar.gz") client = HdfsClient(hosts='study:50070') print client.get_file_checksum("/data/gz/bison-2.5.1.tar.gz") summary = client.get_content_summary("/") print summary #文件拷贝--从HDFS拷贝到本地磁盘系统 client.copy_to_local("/data/gz/pip-7.1.2.tar.gz","/root/data/pip-7.1.2.tar.gz") #文件拷贝--从本地磁盘系统拷贝到HDFS系统中 client.copy_from_local("/root/data/thrift-0.9.2.tar.gz","/data/gz/thrift-0.9.2.tar.gz") print client.get_home_directory()
def start(): # 连接MongoDB,查询tokens,根据contractAddress到etherscan查询最新数据 client = MongoCluster().connect() db = client.get_database('gse-transaction') collection = db.get_collection('mrout_6000001-6001000') # collection.insert_one() # 连接HDFS读取文件 from pyhdfs import HdfsClient client2 = HdfsClient(hosts='%s,50070' % hdfs_ip, max_tries=10) # 返回这个用户的根目录 print client2.get_home_directory() # 返回可用的namenode节点 print client2.get_active_namenode() # 返回指定目录下的所有文件 print client2.listdir("/user/leon/mrout_3_6000001-6001000/") # 读某个文件 client2.mkdirs("/user/leon") inputfile = client2.open('/user/leon/mrout_3_6000001-6001000/part-00000') # 查看文件内容 for r in inputfile: line = str(r).encode('utf-8') # open后是二进制,str()转换为字符串并转码 print(line)