def sync_region_info(self, id_region=None): if id_region is None: id_region = self.id_region meta_table = HTable(self.conf, HConstants.META_TABLE_NAME) gobj = Get(Bytes.toBytes(id_region)) result = meta_table.get(gobj) bytes = result.getValue(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER) hri = None try: hri = Writables.getHRegionInfo(bytes) except java.lang.NullPointerException: raise PHBaseException("could to retrieve region info for %s" % (id_region)) self.start_key = jls(hri.getStartKey()) self.end_key = jls(hri.getEndKey()) self.name = jls(hri.getRegionName()) self.table_name = HTable(hri.getTableDesc().getName()) self.offline = hri.isOffline() self.obj = hri
class HBaseTable(object): ''' Represents ah HBase table and operation that can be performed on an HBase table. ''' def __init__(self, name, conf, admin): self.logger = Logger.getLogger("HBaseTable") # self.logger.setLevel(Level.DEBUG) self.conf = conf self.admin = admin self.name = name self._table = None def row(self, rowId): if not self._table: self._table = HTable(self.conf, self.name) return HBaseRow(self._table, rowId) def scan(self, start_row=None, end_row=None, filter=None): ''' Read the table including and between start and end rows applying the given filter. ''' if not self._table: self._table = HTable(self.conf, self.name) sc = None if start_row and filter: sc = Scan(start_row, filter) elif start_row and end_row: sc = Scan(start_row, end_row) elif start_row: sc = Scan(start_row) else: sc = Scan() s = self._table.getScanner(sc) while True: r = s.next() if r is None: raise StopIteration() yield r
def scan(self, start_row=None, end_row=None, filter=None): ''' Read the table including and between start and end rows applying the given filter. ''' if not self._table: self._table = HTable(self.conf, self.name) sc = None if start_row and filter: sc = Scan(start_row, filter) elif start_row and end_row: sc = Scan(start_row, end_row) elif start_row: sc = Scan(start_row) else: sc = Scan() s = self._table.getScanner(sc) while True: r = s.next() if r is None: raise StopIteration() yield r
def __init__(self, tablename, conf=None): if conf is None: conf = HBaseConfiguration() self.table = HTable(conf, tablename)
class Table(object): def __init__(self, tablename, conf=None): if conf is None: conf = HBaseConfiguration() self.table = HTable(conf, tablename) def get(self, rowkey): ''' Retrieves the specific table row as a dictionary, with full column names (column family:name) as keys, and deserialized (from protocol buffers) values as values. If the row does not exist, returns None. ''' op = Get(Bytes.toBytes(java_str(rowkey))) row = self.table.get(op) if row is not None and not row.isEmpty(): return todict(row) return None def getVersions(self, rowkey, limit=None, ts=None): ''' Retrieves up to _limit_ versions of the specified row, at or before the specified timestamp (_ts_). If _ts_ is None or not specified, defaults to now. ''' op = Get(Bytes.toBytes(java_str(rowkey))) if ts is not None: op.setTimeRange(0, ts) if limit is not None: op.setMaxVersions(limit) row = self.table.get(op) versions = [] if row is not None and not row.isEmpty(): for e in row.list(): col = str(java.lang.String(e.getColumn())) colts = e.getTimestamp() val = PBUtil.toValue(e.getValue()) versions.append((colts, {col: val})) return versions def scan(self, cols, startrow=None, limit=50, filter=None): rows = [] if startrow is None: startrow = HConstants.EMPTY_START_ROW scan = Scan(startrow) if filter is not None: scan.setFilter(filter) for c in cols: scan.addColumn(c) scanner = None cnt = 0 try: scanner = self.table.getScanner(scan) for rec in scanner: if limit is not None and cnt >= limit: break rows.append(todict(rec)) cnt += 1 finally: if scanner is not None: try: scanner.close() except: pass return rows def scan_apply(self, cols, startrow=None, limit=50, filter=None, rowfunc=None): rows = [] if startrow is None: startrow = HConstants.EMPTY_START_ROW else: startrow = Bytes.toBytes(java_str(startrow)) scan = Scan(startrow) if filter is not None: scan.setFilter(filter) for c in cols: scan.addColumn(c) cnt = 0 scanner = None try: scanner = self.table.getScanner(scan) for rec in scanner: if limit is not None and cnt >= limit: break rowfunc(rec) cnt += 1 finally: if scanner is not None: try: scanner.close() except: pass return def save(self, rowkey, vals, ts=None): key = java_str(rowkey) rowup = Put(Bytes.toBytes(key)) if ts is not None: rowup.setTimestamp(ts) for k, v in vals.items(): (fam, col) = KeyValue.parseColumn(Bytes.toBytes(java_str(k))) if isinstance(v, com.google.protobuf.Message): rowup.add(fam, col, v.toByteArray()) else: rowup.add(fam, col, PBUtil.toBytes(v)) self.table.put(rowup) def update(self, startrow, wherefilt, sets): if startrow is None: startrow = HConstants.EMPTY_START_ROW elif isinstance(startrow, java.lang.String): startrow = Bytes.toBytes(startrow) updater = ColumnUpdate(sets) cols = self.families() cnt = 0 upcnt = 0 scan = Scan(startrow) for c in cols: scan.addColumn(c) if wherefilt is not None: scan.setFilter(filter) scanner = None try: scanner = self.table.getScanner(scan) for rec in scanner: cnt += 1 rowup = updater.getUpdate(rec) if rowup is not None: self.table.commit(rowup) upcnt += 1 finally: if scanner is not None: try: scanner.close() except: pass return upcnt def delete(self, rowkey): op = Delete(Bytes.toBytes(java_str(rowkey))) self.table.delete(op) def deleteAll(self, rows): for rec in rows: if '__key__' in rec: op = Delete(Bytes.toBytes(java_str(rec['__key__']))) self.table.delete(op) def families(self): return [ coldesc.getNameAsString() + ":" for coldesc in self.table.getTableDescriptor().getFamilies() ]
class Table(object): def __init__(self, tablename, conf=None): if conf is None: conf = HBaseConfiguration() self.table = HTable(conf, tablename) def get(self, rowkey): ''' Retrieves the specific table row as a dictionary, with full column names (column family:name) as keys, and deserialized (from protocol buffers) values as values. If the row does not exist, returns None. ''' op = Get( Bytes.toBytes( java_str(rowkey) ) ) row = self.table.get(op) if row is not None and not row.isEmpty(): return todict(row) return None def getVersions(self, rowkey, limit=None, ts=None): ''' Retrieves up to _limit_ versions of the specified row, at or before the specified timestamp (_ts_). If _ts_ is None or not specified, defaults to now. ''' op = Get( Bytes.toBytes(java_str(rowkey)) ) if ts is not None: op.setTimeRange( 0, ts ) if limit is not None: op.setMaxVersions(limit) row = self.table.get(op) versions = [] if row is not None and not row.isEmpty(): for e in row.list(): col = str(java.lang.String(e.getColumn())) colts = e.getTimestamp() val = PBUtil.toValue( e.getValue() ) versions.append( (colts, {col: val} ) ) return versions def scan(self, cols, startrow=None, limit=50, filter=None): rows = [] if startrow is None: startrow = HConstants.EMPTY_START_ROW scan = Scan(startrow) if filter is not None: scan.setFilter(filter) for c in cols: scan.addColumn(c) scanner = None cnt = 0 try: scanner = self.table.getScanner(scan) for rec in scanner: if limit is not None and cnt >= limit: break rows.append(todict(rec)) cnt += 1 finally: if scanner is not None: try: scanner.close() except: pass return rows def scan_apply(self, cols, startrow=None, limit=50, filter=None, rowfunc=None): rows = [] if startrow is None: startrow = HConstants.EMPTY_START_ROW else: startrow = Bytes.toBytes( java_str(startrow) ) scan = Scan(startrow) if filter is not None: scan.setFilter(filter) for c in cols: scan.addColumn(c) cnt = 0 scanner = None try: scanner = self.table.getScanner(scan) for rec in scanner: if limit is not None and cnt >= limit: break rowfunc(rec) cnt += 1 finally: if scanner is not None: try: scanner.close() except: pass return def save(self, rowkey, vals, ts=None): key = java_str(rowkey) rowup = Put( Bytes.toBytes(key) ) if ts is not None: rowup.setTimestamp(ts) for k, v in vals.items(): (fam, col) = KeyValue.parseColumn( Bytes.toBytes( java_str(k) ) ) if isinstance(v, com.google.protobuf.Message): rowup.add(fam, col, v.toByteArray()) else: rowup.add(fam, col, PBUtil.toBytes(v)) self.table.put(rowup) def update(self, startrow, wherefilt, sets): if startrow is None: startrow = HConstants.EMPTY_START_ROW elif isinstance(startrow, java.lang.String): startrow = Bytes.toBytes(startrow) updater = ColumnUpdate(sets) cols = self.families() cnt = 0 upcnt = 0 scan = Scan(startrow) for c in cols: scan.addColumn(c) if wherefilt is not None: scan.setFilter(filter) scanner = None try: scanner = self.table.getScanner(scan) for rec in scanner: cnt += 1 rowup = updater.getUpdate(rec) if rowup is not None: self.table.commit(rowup) upcnt += 1 finally: if scanner is not None: try: scanner.close() except: pass return upcnt def delete(self, rowkey): op = Delete( Bytes.toBytes( java_str(rowkey) ) ) self.table.delete( op ) def deleteAll(self, rows): for rec in rows: if '__key__' in rec: op = Delete( Bytes.toBytes( java_str( rec['__key__'] ) ) ) self.table.delete( op ) def families(self): return [coldesc.getNameAsString()+":" for coldesc in self.table.getTableDescriptor().getFamilies()]
from org.apache.hadoop.hbase.util import Bytes # ---------------------------------------------------------------- def delete_row_proc (table,key_in): print "delete_row_proc" print key_in dd = Delete(Bytes.toBytes(key_in)) table.delete (dd) # ---------------------------------------------------------------- print ("*** 開始 ***") # id_in = sys.argv[1] print ("%s" % id_in) # conf = HBaseConfiguration() table = HTable(conf, "cities") delete_row_proc (table,id_in) ss = Scan () ss.addColumn ("name","") ss.addColumn ("population","") ss.addColumn ("date_mod","") scanner = table.getScanner(ss) while 1: result = scanner.next() if not result: break key = java.lang.String(result.row) print key,"\t",
from org.apache.hadoop.hbase import HBaseConfiguration from org.apache.hadoop.hbase.client import HTable, Get from org.apache.hadoop.hbase.util import Bytes conf = HBaseConfiguration.create() table = HTable(conf, "test") get = Get(Bytes.toBytes('row1')) result = table.get(get) value = result.getValue(Bytes.toBytes('cf'), Bytes.toBytes('col1')) print Bytes.toString(value, 0, len(value))
def row(self, rowId): if not self._table: self._table = HTable(self.conf, self.name) return HBaseRow(self._table, rowId)
import os, sys import java.lang from org.apache.hadoop.hbase import HBaseConfiguration from org.apache.hadoop.hbase.client import HTable from org.apache.hadoop.hbase.client import MetaScanner from org.apache.hadoop.hbase.util import Bytes from org.apache.hadoop.hbase import HConstants from org.apache.hadoop.hbase.client import Scan from org.apache.hadoop.hbase.util import Writables import hbaseutils # setup table/connection conf = HBaseConfiguration() meta_table = HTable(conf, HConstants.META_TABLE_NAME) scanner = meta_table.getScanner(Scan()) # utility funcs jls = java.lang.String prev_region = curr_region = None regions = [] start_keys = {} end_keys = {} while True: errors = 0