def setupHbase(): admin = HBaseAdmin(conf) nodesDesc = HTableDescriptor(nodesTablename) nodesDesc.addFamily(HColumnDescriptor("nodeData")) waysDesc = HTableDescriptor(waysTablename) waysDesc.addFamily(HColumnDescriptor("wayData")) if admin.tableExists(nodesTablename): admin.disableTable(nodesTablename) admin.deleteTable(nodesTablename) admin.createTable(nodesDesc) if admin.tableExists(waysTablename): admin.disableTable(waysTablename) admin.deleteTable(waysTablename) admin.createTable(waysDesc) global nodesTable,waysTable nodesTable = HTable(conf, nodesTablename) waysTable = HTable(conf, waysTablename)
class Admin(object): ''' Utility class for administering HBase tables. This class basically just provides more convenient access to the commonly used org.apache.hadoop.hbase.client.HBaseAdmin methods. ''' def __init__(self, conf=None): if conf is None: conf = HBaseConfiguration() self.hadmin = HBaseAdmin(conf) def create(self, tablename, cols, tableopts=None): ''' Creates a new HBase table with the given name, column families, indexes and options. ''' tdef = HTableDescriptor(tablename) # set any table options if tableopts is not None: if MEMORY in tableopts: tdef.setInMemory(bool(tableopt[MEMORY])) if FILESIZE in tableopts: tdef.setMaxFileSize(int(tableopt[FILESIZE])) if MEMCACHEFLUSH in tableopts: tdef.setMemcacheFlushSize(int(tableopt[MEMCACHEFLUSH])) if READONLY in tableopts: tdef.setReadOnly(bool(tableopt[READONLY])) # setup column families and options for k, v in cols.items(): colfamily = HColumnDescriptor(str(k)) if BLOCKCACHE in v: colfamily.setBlockCacheEnabled(bool(v[BLOCKCACHE])) if BLOOMFILTER in v: colfamily.setBloomfilter(bool(v[BLOOMFILTER])) if COMPRESSION in v: colfamily.setCompressionType(v[COMPRESSION]) if MEMORY in v: colfamily.setInMemory(bool(v[MEMORY])) if INDEXINTERVAL in v: colfamily.setMapFileIndexInterval(int(v[INDEXINTERVAL])) if LENGTH in v: colfamily.setMaxValueLength(int(v[LENGTH])) if VERSIONS in v: colfamily.setMaxVersions(int(v[VERSIONS])) if TTL in v: colfamily.setTimeToLive(int(v[TTL])) tdef.addFamily(colfamily) self.hadmin.createTable(tdef) def drop(self, tablename): '''Convenience method to first disable and then delete a given table.''' self.disable(tablename) self.delete(tablename) def describe(self, tablename): ''' Returns the full org.apache.hadoop.hbase.HTableDescriptor instance for the given table ''' return self.hadmin.getTableDescriptor(tablename) def exists(self, tablename): '''Checks if the given table exists''' return self.hadmin.tableExists(tablename) def show(self): ''' List the names (only) all currently defined tables. For full table definitions, see Admin.describeAll() ''' return [x.getNameAsString() for x in self.describeAll()] def describeAll(self): ''' Returns a list of org.apache.hadoop.hbase.HTableDescriptor instances for all tables. ''' return self.hadmin.listTables() def enable(self, tablename): '''Brings a previously disabled table back online''' self.hadmin.enableTable(tablename) def disable(self, tablename): '''Takes a given table offline in HBase''' self.hadmin.disableTable(tablename) def delete(self, tablename): ''' Completely removes the given table. This can only be called on tables that have already been disabled. To disable and delete in one step, see Admin.drop(tablename) ''' self.hadmin.deleteTable(tablename)
class Admin(object): ''' Utility class for administering HBase tables. This class basically just provides more convenient access to the commonly used org.apache.hadoop.hbase.client.HBaseAdmin methods. ''' def __init__(self, conf=None): if conf is None: conf = HBaseConfiguration() self.hadmin = HBaseAdmin(conf) def create(self, tablename, cols, tableopts=None): ''' Creates a new HBase table with the given name, column families, indexes and options. ''' tdef = HTableDescriptor(tablename) # set any table options if tableopts is not None: if MEMORY in tableopts: tdef.setInMemory( bool(tableopt[MEMORY]) ) if FILESIZE in tableopts: tdef.setMaxFileSize( int(tableopt[FILESIZE]) ) if MEMCACHEFLUSH in tableopts: tdef.setMemcacheFlushSize( int(tableopt[MEMCACHEFLUSH]) ) if READONLY in tableopts: tdef.setReadOnly( bool(tableopt[READONLY]) ) # setup column families and options for k, v in cols.items(): colfamily = HColumnDescriptor(str(k)) if BLOCKCACHE in v: colfamily.setBlockCacheEnabled(bool(v[BLOCKCACHE])) if BLOOMFILTER in v: colfamily.setBloomfilter(bool(v[BLOOMFILTER])) if COMPRESSION in v: colfamily.setCompressionType(v[COMPRESSION]) if MEMORY in v: colfamily.setInMemory(bool(v[MEMORY])) if INDEXINTERVAL in v: colfamily.setMapFileIndexInterval(int(v[INDEXINTERVAL])) if LENGTH in v: colfamily.setMaxValueLength(int(v[LENGTH])) if VERSIONS in v: colfamily.setMaxVersions(int(v[VERSIONS])) if TTL in v: colfamily.setTimeToLive(int(v[TTL])) tdef.addFamily(colfamily) self.hadmin.createTable(tdef) def drop(self, tablename): '''Convenience method to first disable and then delete a given table.''' self.disable(tablename) self.delete(tablename) def describe(self, tablename): ''' Returns the full org.apache.hadoop.hbase.HTableDescriptor instance for the given table ''' return self.hadmin.getTableDescriptor(tablename) def exists(self, tablename): '''Checks if the given table exists''' return self.hadmin.tableExists(tablename) def show(self): ''' List the names (only) all currently defined tables. For full table definitions, see Admin.describeAll() ''' return [ x.getNameAsString() for x in self.describeAll() ] def describeAll(self): ''' Returns a list of org.apache.hadoop.hbase.HTableDescriptor instances for all tables. ''' return self.hadmin.listTables() def enable(self, tablename): '''Brings a previously disabled table back online''' self.hadmin.enableTable(tablename) def disable(self, tablename): '''Takes a given table offline in HBase''' self.hadmin.disableTable(tablename) def delete(self, tablename): ''' Completely removes the given table. This can only be called on tables that have already been disabled. To disable and delete in one step, see Admin.drop(tablename) ''' self.hadmin.deleteTable(tablename)