def sync_region_info(self, id_region=None): if id_region is None: id_region = self.id_region meta_table = HTable(self.conf, HConstants.META_TABLE_NAME) gobj = Get(Bytes.toBytes(id_region)) result = meta_table.get(gobj) bytes = result.getValue(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER) hri = None try: hri = Writables.getHRegionInfo(bytes) except java.lang.NullPointerException: raise PHBaseException("could to retrieve region info for %s" % (id_region)) self.start_key = jls(hri.getStartKey()) self.end_key = jls(hri.getEndKey()) self.name = jls(hri.getRegionName()) self.table_name = HTable(hri.getTableDesc().getName()) self.offline = hri.isOffline() self.obj = hri
class Table(object): def __init__(self, tablename, conf=None): if conf is None: conf = HBaseConfiguration() self.table = HTable(conf, tablename) def get(self, rowkey): ''' Retrieves the specific table row as a dictionary, with full column names (column family:name) as keys, and deserialized (from protocol buffers) values as values. If the row does not exist, returns None. ''' op = Get(Bytes.toBytes(java_str(rowkey))) row = self.table.get(op) if row is not None and not row.isEmpty(): return todict(row) return None def getVersions(self, rowkey, limit=None, ts=None): ''' Retrieves up to _limit_ versions of the specified row, at or before the specified timestamp (_ts_). If _ts_ is None or not specified, defaults to now. ''' op = Get(Bytes.toBytes(java_str(rowkey))) if ts is not None: op.setTimeRange(0, ts) if limit is not None: op.setMaxVersions(limit) row = self.table.get(op) versions = [] if row is not None and not row.isEmpty(): for e in row.list(): col = str(java.lang.String(e.getColumn())) colts = e.getTimestamp() val = PBUtil.toValue(e.getValue()) versions.append((colts, {col: val})) return versions def scan(self, cols, startrow=None, limit=50, filter=None): rows = [] if startrow is None: startrow = HConstants.EMPTY_START_ROW scan = Scan(startrow) if filter is not None: scan.setFilter(filter) for c in cols: scan.addColumn(c) scanner = None cnt = 0 try: scanner = self.table.getScanner(scan) for rec in scanner: if limit is not None and cnt >= limit: break rows.append(todict(rec)) cnt += 1 finally: if scanner is not None: try: scanner.close() except: pass return rows def scan_apply(self, cols, startrow=None, limit=50, filter=None, rowfunc=None): rows = [] if startrow is None: startrow = HConstants.EMPTY_START_ROW else: startrow = Bytes.toBytes(java_str(startrow)) scan = Scan(startrow) if filter is not None: scan.setFilter(filter) for c in cols: scan.addColumn(c) cnt = 0 scanner = None try: scanner = self.table.getScanner(scan) for rec in scanner: if limit is not None and cnt >= limit: break rowfunc(rec) cnt += 1 finally: if scanner is not None: try: scanner.close() except: pass return def save(self, rowkey, vals, ts=None): key = java_str(rowkey) rowup = Put(Bytes.toBytes(key)) if ts is not None: rowup.setTimestamp(ts) for k, v in vals.items(): (fam, col) = KeyValue.parseColumn(Bytes.toBytes(java_str(k))) if isinstance(v, com.google.protobuf.Message): rowup.add(fam, col, v.toByteArray()) else: rowup.add(fam, col, PBUtil.toBytes(v)) self.table.put(rowup) def update(self, startrow, wherefilt, sets): if startrow is None: startrow = HConstants.EMPTY_START_ROW elif isinstance(startrow, java.lang.String): startrow = Bytes.toBytes(startrow) updater = ColumnUpdate(sets) cols = self.families() cnt = 0 upcnt = 0 scan = Scan(startrow) for c in cols: scan.addColumn(c) if wherefilt is not None: scan.setFilter(filter) scanner = None try: scanner = self.table.getScanner(scan) for rec in scanner: cnt += 1 rowup = updater.getUpdate(rec) if rowup is not None: self.table.commit(rowup) upcnt += 1 finally: if scanner is not None: try: scanner.close() except: pass return upcnt def delete(self, rowkey): op = Delete(Bytes.toBytes(java_str(rowkey))) self.table.delete(op) def deleteAll(self, rows): for rec in rows: if '__key__' in rec: op = Delete(Bytes.toBytes(java_str(rec['__key__']))) self.table.delete(op) def families(self): return [ coldesc.getNameAsString() + ":" for coldesc in self.table.getTableDescriptor().getFamilies() ]
class Table(object): def __init__(self, tablename, conf=None): if conf is None: conf = HBaseConfiguration() self.table = HTable(conf, tablename) def get(self, rowkey): ''' Retrieves the specific table row as a dictionary, with full column names (column family:name) as keys, and deserialized (from protocol buffers) values as values. If the row does not exist, returns None. ''' op = Get( Bytes.toBytes( java_str(rowkey) ) ) row = self.table.get(op) if row is not None and not row.isEmpty(): return todict(row) return None def getVersions(self, rowkey, limit=None, ts=None): ''' Retrieves up to _limit_ versions of the specified row, at or before the specified timestamp (_ts_). If _ts_ is None or not specified, defaults to now. ''' op = Get( Bytes.toBytes(java_str(rowkey)) ) if ts is not None: op.setTimeRange( 0, ts ) if limit is not None: op.setMaxVersions(limit) row = self.table.get(op) versions = [] if row is not None and not row.isEmpty(): for e in row.list(): col = str(java.lang.String(e.getColumn())) colts = e.getTimestamp() val = PBUtil.toValue( e.getValue() ) versions.append( (colts, {col: val} ) ) return versions def scan(self, cols, startrow=None, limit=50, filter=None): rows = [] if startrow is None: startrow = HConstants.EMPTY_START_ROW scan = Scan(startrow) if filter is not None: scan.setFilter(filter) for c in cols: scan.addColumn(c) scanner = None cnt = 0 try: scanner = self.table.getScanner(scan) for rec in scanner: if limit is not None and cnt >= limit: break rows.append(todict(rec)) cnt += 1 finally: if scanner is not None: try: scanner.close() except: pass return rows def scan_apply(self, cols, startrow=None, limit=50, filter=None, rowfunc=None): rows = [] if startrow is None: startrow = HConstants.EMPTY_START_ROW else: startrow = Bytes.toBytes( java_str(startrow) ) scan = Scan(startrow) if filter is not None: scan.setFilter(filter) for c in cols: scan.addColumn(c) cnt = 0 scanner = None try: scanner = self.table.getScanner(scan) for rec in scanner: if limit is not None and cnt >= limit: break rowfunc(rec) cnt += 1 finally: if scanner is not None: try: scanner.close() except: pass return def save(self, rowkey, vals, ts=None): key = java_str(rowkey) rowup = Put( Bytes.toBytes(key) ) if ts is not None: rowup.setTimestamp(ts) for k, v in vals.items(): (fam, col) = KeyValue.parseColumn( Bytes.toBytes( java_str(k) ) ) if isinstance(v, com.google.protobuf.Message): rowup.add(fam, col, v.toByteArray()) else: rowup.add(fam, col, PBUtil.toBytes(v)) self.table.put(rowup) def update(self, startrow, wherefilt, sets): if startrow is None: startrow = HConstants.EMPTY_START_ROW elif isinstance(startrow, java.lang.String): startrow = Bytes.toBytes(startrow) updater = ColumnUpdate(sets) cols = self.families() cnt = 0 upcnt = 0 scan = Scan(startrow) for c in cols: scan.addColumn(c) if wherefilt is not None: scan.setFilter(filter) scanner = None try: scanner = self.table.getScanner(scan) for rec in scanner: cnt += 1 rowup = updater.getUpdate(rec) if rowup is not None: self.table.commit(rowup) upcnt += 1 finally: if scanner is not None: try: scanner.close() except: pass return upcnt def delete(self, rowkey): op = Delete( Bytes.toBytes( java_str(rowkey) ) ) self.table.delete( op ) def deleteAll(self, rows): for rec in rows: if '__key__' in rec: op = Delete( Bytes.toBytes( java_str( rec['__key__'] ) ) ) self.table.delete( op ) def families(self): return [coldesc.getNameAsString()+":" for coldesc in self.table.getTableDescriptor().getFamilies()]
from org.apache.hadoop.hbase import HBaseConfiguration from org.apache.hadoop.hbase.client import HTable, Get from org.apache.hadoop.hbase.util import Bytes conf = HBaseConfiguration.create() table = HTable(conf, "test") get = Get(Bytes.toBytes('row1')) result = table.get(get) value = result.getValue(Bytes.toBytes('cf'), Bytes.toBytes('col1')) print Bytes.toString(value, 0, len(value))