def hdfs(): hdfs = HDFileSystem(host='localhost', port=8020) if hdfs.exists('/tmp/test'): hdfs.rm('/tmp/test') hdfs.mkdir('/tmp/test') yield hdfs if hdfs.exists('/tmp/test'): hdfs.rm('/tmp/test') hdfs.disconnect()
with client.open(file, 'rb') as f: out = f.read(len(data)) assert out == data def read_lines(client): line = '/tmp/test/line' with client.open(line, 'wb', replication=1) as f: f.write(b"Hello\nHadoop!") with client.open(line, 'rb') as f: lines = f.readlines() assert len(lines) == 2 if __name__ == '__main__': host = 'http://192.168.58.128' port = 8020 HDFS_client = HDFileSystem(host=host, port=port) file_exists(HDFS_client) write_read(HDFS_client) read_lines(HDFS_client) HDFS_client.disconnect() print('-' * 20) print('Hello Hadoop!')
file = '/test/isd-history.txt' station = [] n = 0 # connect to HDFS and read the file hdfs_client = HDFileSystem(host=test_host, port=test_port) with hdfs_client.open(file, 'rb') as f: line = f.readline() while line: n += 1 if n > 22: USAF = line[0:6] NAME = line[13:42] FIPS = line[43:45] ELEV = line[74:81] print("I'm converting the number " + str(n) + " station ...") station.append(Station(USAF, NAME, FIPS, ELEV)) line = f.readline() hdfs_client.disconnect() # display the station info n -= 22 print("There are", n, "stations :") for i in range(n): print(i, end=' ') station[i].display() print("-" * 20 + "END" + "-" * 20)
test_host ='localhost' test_port = 9000 def hdfs_exists(hdfs_client) path = 'tmp/test' if hdfs_client.exists(path) hdfs_client.rm(path) hdfs_client.makedirs(path) def hdfs_write_read(hdfs_client) data = b"hello"*20 file_a = '/tmp/text/file_a' with hdfs_client.open(file_a,'wb',replication=1) as f: f.write(data) with hdfs_client.open(file_a,'rb') as f: out = f.red(len(data)) def hdfs_readline(hdfs_client) file_b = '/tmp/test/file_b' with hdfs_client.open(file_b,'wb') as f: f.write(b"hello\nhadoop") with hdfs_client.open(file_b,'rb') as f: lines = f.readline() assertlen(lines)==2 if __name__=="__main__": hdfs_client = HDFileSystem(host=test_host,port=test_port) hdfs_exists(hdfs_client) hdfs_write_read(hdfs_client) hdfs_readline(hdfs_clinet) hdfs_client.disconnect() HelloWorld()