def connect(self, user, passwd, host, port): """ Connect to a hdfs file system as a specific user @type user: string @param user: user name for login in @type passwd: string @param passwd: user password for login in @type host: string @param host: The HDFS host you wanted to connect, you can see at hadoop-site.xml. @type port: int @param port: HDFS port, also can be found in hadoop-site.xml. @rtype: hdfsFS @return: Returns a handle to the filesystem or NULL on error. """ hdfsFS = pyhdfs.hdfsConnectAsUser(host, port, user, passwd) if hdfsFS == None: raise StandardError("Could not connect as %s" % user) return hdfsFS
def connect(self,user,passwd,host,port): """ Connect to a hdfs file system as a specific user @type user: string @param user: user name for login in @type passwd: string @param passwd: user password for login in @type host: string @param host: The HDFS host you wanted to connect, you can see at hadoop-site.xml. @type port: int @param port: HDFS port, also can be found in hadoop-site.xml. @rtype: hdfsFS @return: Returns a handle to the filesystem or NULL on error. """ hdfsFS = pyhdfs.hdfsConnectAsUser(host,port,user,passwd) if hdfsFS == None: raise StandardError("Could not connect as %s" % user) return hdfsFS
def init(): global handle if handle is None: handle = pyhdfs.hdfsConnectAsUser(NODE_ADDRESS, NODE_PORT, USER_NAME, USE_PASSWORD) return handle
import os import pyhdfs name_node_address = 'nj01-nanling-hdfs.dmop.baidu.com' name_node_port = 54310 user_name = 'tuku' user_password = '******' pyhdfs.com_loadlog("./conf/", "log.conf") fs = pyhdfs.hdfsConnectAsUser(name_node_address, name_node_port, user_name, user_password) path = '/app/tuku/bianyunlong/clickquery/clickquery_merge_triplet.test' path_out = '/app/tuku/chenghuige/image-text-sim/test' hdfs_path = "hdfs://{}:{}/{}".format(name_node_address, name_node_port, path) result, num = pyhdfs.hdfsListDirectory(fs, hdfs_path) files = [item.mName for item in [pyhdfs.hdfsFileInfo_getitem(result, i) for i in xrange(num)]] import glob # for file in files: # print file # os.system('rm -rf ./test_src/*') # os.system('rm -rf ./test/*') # os.system('hadoop fs -get %s ./test_src'%file) # for file_ in glob.glob('./test_src/*'): # print file_ # os.system('python /home/img/chenghuige/tools/split.py %s'%file_) # os.system('python ./gen-records-nonpsave.py --input %s --output %s --name train'%(file_, './test')) # os.system('hadoop fs -put ./test/* %s'%(file_, path_out)) # for file_ in glob.glob('./test/*'): # if file_.endswith('.npy'):