예제 #1
0
파일: main.py 프로젝트: jevic/logcenter
def Run(jobTime):
    Client = HdfsClient(hosts=HDFS)
    Log = LogTime(jobTime)
    if Client.exists(Log.HPath + Log.success):
        r.rset(Rkey,jobTime,1)
        logger.info("-------- %s -----------" % jobTime)
        for dm in Domains:
            Files = Log.HPath + dm + Log.LogName
            if Client.exists(Files):
                Dfile = DOWN_DIR + '/' + dm + Log.UpFile
                TmpFile = dm + Log.UpTmp()
                try:
                    sts = time.time()
                    logger.info('DownloadStart... %s' % Files)
                    Client.copy_to_local(Files,Dfile)
                    logger.info('DownloadSuccess... %s %s' % (Files,Dfile))
                    Upload(TmpFile,Dfile,Log.PathDay)
                    Rename(Dday + "/" + TmpFile)
                    logger.info('UploadSuccess... %s' % Dfile)
                    ets = time.time()
                    Write(jobTime,dm,sts,ets,200)
                    r.rdel(Rkey,jobTime)
                except Exception,e:
                    Write(jobTime,dm,500)
                    r.rdel(Rkey,jobTime)
                    logger.error(e)
            else:
                logger.warn(Files + ' Non-existent')
                continue
        r.rdel(Rkey,jobTime)
예제 #2
0
def update_csv():
    local = '/Users/constantine/PycharmProjects/test02/data.csv'
    tmpLocal = '/Users/constantine/PycharmProjects/test02/tmpdata.csv'
    remote = '/data/data.csv'
    host = '127.0.0.1:9870'
    user_name = 'host'
    client = HdfsClient(hosts=host,user_name=user_name)
    if client.exists(remote):
        client.copy_to_local(remote,tmpLocal)
        client.delete(remote)
        fRead = open(local,'r')
        fWrite = open(tmpLocal,'w')
        lines = fRead.readlines()

        for line in lines:
            fWrite.writelines(lines)
        fRead.close()
        fWrite.close()
        fRead = open(local, 'r')
        lines = fRead.read()
        fRead.close()
        fWrite = open(tmpLocal, 'w')
        lines = '\n'.join(list(set(lines.split('\n')))[1:])
        fWrite.write(lines)
        fWrite.close()
        client.copy_from_local(tmpLocal,remote)


    else:
        client.copy_from_local(local, remote)
예제 #3
0
def Copy_To_Local(file):
    '''
    从Hadoop上下载文件
    '''
    client = HdfsClient(hosts='localhost:50070')  #连接到hdfs
    if os.path.exists(file):
        os.remove(file)
        #判断本地是否存在文件,存在就删除
    client.copy_to_local(file, './')
예제 #4
0
def basic():
	client = HdfsClient(hosts='study:50070')
	print(client.list_status('/'))

	print '判断某个路径是否存在'
	print client.exists("/test")
	print client.exists("/data/gz/thrift-0.9.2.tar.gz")

	client = HdfsClient(hosts='study:50070')
	print client.get_file_checksum("/data/gz/bison-2.5.1.tar.gz")

	summary = client.get_content_summary("/")
	print summary

	#文件拷贝--从HDFS拷贝到本地磁盘系统
	client.copy_to_local("/data/gz/pip-7.1.2.tar.gz","/root/data/pip-7.1.2.tar.gz")
	#文件拷贝--从本地磁盘系统拷贝到HDFS系统中
	client.copy_from_local("/root/data/thrift-0.9.2.tar.gz","/data/gz/thrift-0.9.2.tar.gz")

	print client.get_home_directory()
예제 #5
0
class HDFSClientUtilityTest(unittest.TestCase):
    '''Unit test for hdfsClientUtility.py'''
    def setUp(self):
        self.hdfs_file_path = '../../.vscode/hdfsInfo.json'
        self.hdfs_config = None
        try:
            with open(self.hdfs_file_path, 'r') as file:
                self.hdfs_config = json.load(file)
        except Exception as exception:
            print(exception)

        self.hdfs_client = HdfsClient(hosts='{0}:{1}'.format(
            self.hdfs_config['host'], '50070'),
                                      user_name=self.hdfs_config['userName'])

    def get_random_name(self, length):
        return ''.join(
            random.sample(string.ascii_letters + string.digits, length))

    def test_copy_file_run(self):
        '''test copyFileToHdfs'''
        file_name = self.get_random_name(8)
        file_content = 'hello world!'

        with open('./{}'.format(file_name), 'w') as file:
            file.write(file_content)

        result = copyFileToHdfs(
            './{}'.format(file_name),
            '/{0}/{1}'.format(self.hdfs_config['userName'],
                              file_name), self.hdfs_client)
        self.assertTrue(result)

        file_list = self.hdfs_client.listdir('/{0}'.format(
            self.hdfs_config['userName']))
        self.assertIn(file_name, file_list)

        hdfs_file_name = self.get_random_name(8)
        self.hdfs_client.copy_to_local(
            '/{0}/{1}'.format(self.hdfs_config['userName'], file_name),
            './{}'.format(hdfs_file_name))
        self.assertTrue(os.path.exists('./{}'.format(hdfs_file_name)))

        with open('./{}'.format(hdfs_file_name), 'r') as file:
            content = file.readline()
            self.assertEqual(file_content, content)
        #clean up
        os.remove('./{}'.format(file_name))
        os.remove('./{}'.format(hdfs_file_name))
        self.hdfs_client.delete('/{0}/{1}'.format(self.hdfs_config['userName'],
                                                  file_name))

    def test_copy_directory_run(self):
        '''test copyDirectoryToHdfs'''
        directory_name = self.get_random_name(8)
        file_name_list = [self.get_random_name(8), self.get_random_name(8)]
        file_content = 'hello world!'

        os.makedirs('./{}'.format(directory_name))
        for file_name in file_name_list:
            with open('./{0}/{1}'.format(directory_name, file_name),
                      'w') as file:
                file.write(file_content)

        result = copyDirectoryToHdfs(
            './{}'.format(directory_name),
            '/{0}/{1}'.format(self.hdfs_config['userName'],
                              directory_name), self.hdfs_client)
        self.assertTrue(result)

        directory_list = self.hdfs_client.listdir('/{0}'.format(
            self.hdfs_config['userName']))
        self.assertIn(directory_name, directory_list)

        sub_file_list = self.hdfs_client.listdir('/{0}/{1}'.format(
            self.hdfs_config['userName'], directory_name))
        for file_name in file_name_list:
            self.assertIn(file_name, sub_file_list)
            #clean up
            self.hdfs_client.delete('/{0}/{1}/{2}'.format(
                self.hdfs_config['userName'], directory_name, file_name))
        self.hdfs_client.delete('/{0}/{1}'.format(self.hdfs_config['userName'],
                                                  directory_name))

        shutil.rmtree('./{}'.format(directory_name))