class HbaseUtil: def __init__(self, tableName, host='localhost', port=9090): self.tableName = tableName transport = TSocket.TSocket(host, port) self.transport = TTransport.TBufferedTransport(transport) protocol = TBinaryProtocol.TBinaryProtocol(transport) self.client = Client(protocol) self.transport.open() if tableName not in self.client.getTableNames(): print 'creating table %s' % tableName columns = [] col = ColumnDescriptor() col.name = 'page:title' col.maxVersions = 10 columns.append(col) col = ColumnDescriptor() col.name = 'page:article' columns.append(col) self.client.createTable(tableName, columns) #self.printAll() def close(self): self.transport.close() def insert(self, rowkey, title, content): mutations = [Mutation(column='page:article', value=content.encode('utf-8')), Mutation(column='page:title', value=title.encode('utf-8'))] self.client.mutateRow(self.tableName, rowkey, mutations, {}) def existRowKey(self, rowkey): pass def printAll(self): print 'starting scanner...' scanner = self.client.scannerOpen(self.tableName, '', ['page:title'], {}) r = self.client.scannerGet(scanner) while r: for i in r: print i.row for k, v in i.columns.items(): print k, v.value, v.timestamp x = v.value print x r = self.client.scannerGet(scanner) print 'scanner finished '
class HBaseClient: ''' Hbase client ''' def __init__(self): transport = TSocket.TSocket('localhost', 9090) transport = TTransport.TBufferedTransport(transport) protocol = TBinaryProtocol.TBinaryProtocol(transport) self.__client = Client(protocol) transport.open() def getTableNames(self): ''' get table names ''' return self.__client.getTableNames() def deleteTable(self, tName): ''' delete table name ''' if self.__client.isTableEnabled(tName): self.__client.disableTable(tName) self.__client.deleteTable(tName) def createTable(self, tName, ColumnDescriptors): try: self.__client.createTable(tName, ColumnDescriptors) except ttypes.AlreadyExists as excp: raise UfException(Errors.HBASE_CREATE_ERROR, "AlreadyExists Error when creating table %s with cols: %s): %s" % \ (tName, [col.name for col in ColumnDescriptors], excp.message)) def getColumnDescriptors(self, tName): try: return self.__client.getColumnDescriptors(tName) except: raise UfException(Errors.UNKNOWN_ERROR, "Error when getting column descriptors table %s" % tName) def updateRow(self, tName, rowName, mutations, timestamp=None): ''' add row to table ''' try: if timestamp is None: self.__client.mutateRow(tName, rowName, mutations) else: self.__client.mutateRowTs(tName, rowName, mutations, timestamp) except Exception as excp: raise UfException(Errors.HBASE_UPDATE_ERROR, "Error when updating table %s - rowName %s - mutations %s: %s" % \ (tName, rowName, mutations, excp)) def getRow(self, tName, rowName): ''' get row ''' result = self.__client.getRow(tName, rowName) if not result: return result else: return result[0] def scanTable(self, tName, columns, startRow="", endRow=None): ''' scan a table ''' if endRow is None: scanner = self.__client.scannerOpen(tName, startRow, columns) else: scanner = self.__client.scannerOpenWithStop(tName, startRow, endRow, columns) ret = [] row = self.__client.scannerGet(scanner) while row: ret.append(row[0]) row = self.__client.scannerGet(scanner) return ret def getClient(self): ''' return client, in case low level api is needed ''' return self.__client
client.mutateRow(t, "", mutations) # this row name is valid utf8 mutations = [Mutation(column="entry:foo", value=valid)] client.mutateRow(t, valid, mutations) # non-utf8 is not allowed in row names try: mutations = [Mutation(column="entry:foo", value=invalid)] client.mutateRow(t, invalid, mutations) except ttypes.IOError, e: print 'expected exception: %s' % (e.message) # Run a scanner on the rows we just created print "Starting scanner..." scanner = client.scannerOpen(t, "", ["entry:"]) r = client.scannerGet(scanner) while r: printRow(r[0]) r = client.scannerGet(scanner) print "Scanner finished" # # Run some operations on a bunch of rows. # for e in range(100, 0, -1): # format row keys as "00000" to "00100" row = "%0.5d" % (e) mutations = [Mutation(column="unused:", value="DELETE_ME")]
client.mutateRow(t, "", mutations) # this row name is valid utf8 mutations = [Mutation({"column": "entry:foo", "value": valid})] client.mutateRow(t, valid, mutations) # non-utf8 is not allowed in row names try: mutations = [Mutation({"column": "entry:foo", "value": invalid})] client.mutateRow(t, invalid, mutations) except ttypes.IOError, e: print "expected exception: %s" % (e.message) # Run a scanner on the rows we just created print "Starting scanner..." scanner = client.scannerOpen(t, "", ["entry:"]) try: while 1: printRow(client.scannerGet(scanner)) except ttypes.NotFound, e: print "Scanner finished" # # Run some operations on a bunch of rows. # for e in range(100, 0, -1): # format row keys as "00000" to "00100" row = "%0.5d" % (e) mutations = [Mutation({"column": "unused:", "value": "DELETE_ME"})] client.mutateRow(t, row, mutations)
# try empty strings # cell value empty mutations = [Mutation(column='entry:foo', value='')] # rowkey empty client.mutateRow(t, '', mutations) #this row name is valid utf8 mutations = [Mutation(column='entry:foo', value=valid)] client.mutateRow(t, valid, mutations) # run a scanner on the rows we just created # 全表扫描 print 'starting scanner...' scanner = client.scannerOpen(t, '', ['entry:']) r = client.scannerGet(scanner) while r: #printRow(r[0]) r = client.scannerGet(scanner) print 'scanner finished ' # 范围扫描 columnNames = [] for (col, desc) in client.getColumnDescriptors(t).items(): print 'column with name:', desc.name print desc columnNames.append(desc.name + ':') print 'stating scanner...'