def Run(jobTime): Client = HdfsClient(hosts=HDFS) Log = LogTime(jobTime) if Client.exists(Log.HPath + Log.success): r.rset(Rkey,jobTime,1) logger.info("-------- %s -----------" % jobTime) for dm in Domains: Files = Log.HPath + dm + Log.LogName if Client.exists(Files): Dfile = DOWN_DIR + '/' + dm + Log.UpFile TmpFile = dm + Log.UpTmp() try: sts = time.time() logger.info('DownloadStart... %s' % Files) Client.copy_to_local(Files,Dfile) logger.info('DownloadSuccess... %s %s' % (Files,Dfile)) Upload(TmpFile,Dfile,Log.PathDay) Rename(Dday + "/" + TmpFile) logger.info('UploadSuccess... %s' % Dfile) ets = time.time() Write(jobTime,dm,sts,ets,200) r.rdel(Rkey,jobTime) except Exception,e: Write(jobTime,dm,500) r.rdel(Rkey,jobTime) logger.error(e) else: logger.warn(Files + ' Non-existent') continue r.rdel(Rkey,jobTime)
def update_csv(): local = '/Users/constantine/PycharmProjects/test02/data.csv' tmpLocal = '/Users/constantine/PycharmProjects/test02/tmpdata.csv' remote = '/data/data.csv' host = '127.0.0.1:9870' user_name = 'host' client = HdfsClient(hosts=host,user_name=user_name) if client.exists(remote): client.copy_to_local(remote,tmpLocal) client.delete(remote) fRead = open(local,'r') fWrite = open(tmpLocal,'w') lines = fRead.readlines() for line in lines: fWrite.writelines(lines) fRead.close() fWrite.close() fRead = open(local, 'r') lines = fRead.read() fRead.close() fWrite = open(tmpLocal, 'w') lines = '\n'.join(list(set(lines.split('\n')))[1:]) fWrite.write(lines) fWrite.close() client.copy_from_local(tmpLocal,remote) else: client.copy_from_local(local, remote)
def Copy_To_Local(file): ''' 从Hadoop上下载文件 ''' client = HdfsClient(hosts='localhost:50070') #连接到hdfs if os.path.exists(file): os.remove(file) #判断本地是否存在文件,存在就删除 client.copy_to_local(file, './')
def basic(): client = HdfsClient(hosts='study:50070') print(client.list_status('/')) print '判断某个路径是否存在' print client.exists("/test") print client.exists("/data/gz/thrift-0.9.2.tar.gz") client = HdfsClient(hosts='study:50070') print client.get_file_checksum("/data/gz/bison-2.5.1.tar.gz") summary = client.get_content_summary("/") print summary #文件拷贝--从HDFS拷贝到本地磁盘系统 client.copy_to_local("/data/gz/pip-7.1.2.tar.gz","/root/data/pip-7.1.2.tar.gz") #文件拷贝--从本地磁盘系统拷贝到HDFS系统中 client.copy_from_local("/root/data/thrift-0.9.2.tar.gz","/data/gz/thrift-0.9.2.tar.gz") print client.get_home_directory()
class HDFSClientUtilityTest(unittest.TestCase): '''Unit test for hdfsClientUtility.py''' def setUp(self): self.hdfs_file_path = '../../.vscode/hdfsInfo.json' self.hdfs_config = None try: with open(self.hdfs_file_path, 'r') as file: self.hdfs_config = json.load(file) except Exception as exception: print(exception) self.hdfs_client = HdfsClient(hosts='{0}:{1}'.format( self.hdfs_config['host'], '50070'), user_name=self.hdfs_config['userName']) def get_random_name(self, length): return ''.join( random.sample(string.ascii_letters + string.digits, length)) def test_copy_file_run(self): '''test copyFileToHdfs''' file_name = self.get_random_name(8) file_content = 'hello world!' with open('./{}'.format(file_name), 'w') as file: file.write(file_content) result = copyFileToHdfs( './{}'.format(file_name), '/{0}/{1}'.format(self.hdfs_config['userName'], file_name), self.hdfs_client) self.assertTrue(result) file_list = self.hdfs_client.listdir('/{0}'.format( self.hdfs_config['userName'])) self.assertIn(file_name, file_list) hdfs_file_name = self.get_random_name(8) self.hdfs_client.copy_to_local( '/{0}/{1}'.format(self.hdfs_config['userName'], file_name), './{}'.format(hdfs_file_name)) self.assertTrue(os.path.exists('./{}'.format(hdfs_file_name))) with open('./{}'.format(hdfs_file_name), 'r') as file: content = file.readline() self.assertEqual(file_content, content) #clean up os.remove('./{}'.format(file_name)) os.remove('./{}'.format(hdfs_file_name)) self.hdfs_client.delete('/{0}/{1}'.format(self.hdfs_config['userName'], file_name)) def test_copy_directory_run(self): '''test copyDirectoryToHdfs''' directory_name = self.get_random_name(8) file_name_list = [self.get_random_name(8), self.get_random_name(8)] file_content = 'hello world!' os.makedirs('./{}'.format(directory_name)) for file_name in file_name_list: with open('./{0}/{1}'.format(directory_name, file_name), 'w') as file: file.write(file_content) result = copyDirectoryToHdfs( './{}'.format(directory_name), '/{0}/{1}'.format(self.hdfs_config['userName'], directory_name), self.hdfs_client) self.assertTrue(result) directory_list = self.hdfs_client.listdir('/{0}'.format( self.hdfs_config['userName'])) self.assertIn(directory_name, directory_list) sub_file_list = self.hdfs_client.listdir('/{0}/{1}'.format( self.hdfs_config['userName'], directory_name)) for file_name in file_name_list: self.assertIn(file_name, sub_file_list) #clean up self.hdfs_client.delete('/{0}/{1}/{2}'.format( self.hdfs_config['userName'], directory_name, file_name)) self.hdfs_client.delete('/{0}/{1}'.format(self.hdfs_config['userName'], directory_name)) shutil.rmtree('./{}'.format(directory_name))