Esempio n. 1
0
    def connect(self, user, passwd, host, port):
        """
            Connect to a hdfs file system as a specific user

            @type user: string
            @param user: user name for login in 
            @type passwd: string
            @param passwd: user password for login in
            @type host: string
            @param host: The HDFS host you wanted to connect, you can see at hadoop-site.xml.
            @type port: int
            @param port: HDFS port, also can be found in hadoop-site.xml. 

            @rtype: hdfsFS
            @return: Returns a handle to the filesystem or NULL on error.

        """
        hdfsFS = pyhdfs.hdfsConnectAsUser(host, port, user, passwd)
        if hdfsFS == None:
            raise StandardError("Could not connect as %s" % user)
        return hdfsFS
Esempio n. 2
0
    def connect(self,user,passwd,host,port):
        """
            Connect to a hdfs file system as a specific user

            @type user: string
            @param user: user name for login in 
            @type passwd: string
            @param passwd: user password for login in
            @type host: string
            @param host: The HDFS host you wanted to connect, you can see at hadoop-site.xml.
            @type port: int
            @param port: HDFS port, also can be found in hadoop-site.xml. 

            @rtype: hdfsFS
            @return: Returns a handle to the filesystem or NULL on error.

        """
        hdfsFS = pyhdfs.hdfsConnectAsUser(host,port,user,passwd)
        if hdfsFS == None:
            raise StandardError("Could not connect as %s" % user)
        return hdfsFS
Esempio n. 3
0
def init():
    global handle
    if handle is None:
        handle = pyhdfs.hdfsConnectAsUser(NODE_ADDRESS, NODE_PORT, USER_NAME,
                                          USE_PASSWORD)
    return handle
Esempio n. 4
0
import os
import pyhdfs

name_node_address = 'nj01-nanling-hdfs.dmop.baidu.com'
name_node_port = 54310
user_name = 'tuku'
user_password = '******'

pyhdfs.com_loadlog("./conf/", "log.conf")
fs = pyhdfs.hdfsConnectAsUser(name_node_address, name_node_port, user_name, user_password)

path = '/app/tuku/bianyunlong/clickquery/clickquery_merge_triplet.test'
path_out = '/app/tuku/chenghuige/image-text-sim/test'
hdfs_path = "hdfs://{}:{}/{}".format(name_node_address, name_node_port, path)
result, num = pyhdfs.hdfsListDirectory(fs, hdfs_path)
files = [item.mName for item in [pyhdfs.hdfsFileInfo_getitem(result, i) for i in xrange(num)]]

import glob
# for file in files:
#   print file
#   os.system('rm -rf ./test_src/*')
#   os.system('rm -rf ./test/*')  
#   os.system('hadoop fs -get %s ./test_src'%file)
#   for file_ in glob.glob('./test_src/*'):
#     print file_
#     os.system('python /home/img/chenghuige/tools/split.py %s'%file_)
#     os.system('python ./gen-records-nonpsave.py --input %s --output %s --name train'%(file_, './test'))
#   os.system('hadoop fs -put ./test/* %s'%(file_, path_out))

# for file_ in glob.glob('./test/*'):
#   if file_.endswith('.npy'):