コード例 #1
0
class Table(object):
    def __init__(self, tablename, conf=None):
        if conf is None:
            conf = HBaseConfiguration()

        self.table = HTable(conf, tablename)

    def get(self, rowkey):
        '''
		Retrieves the specific table row as a dictionary, with
		full column names (column family:name) as keys, and deserialized
		(from protocol buffers) values as values.

		If the row does not exist, returns None.
		'''
        op = Get(Bytes.toBytes(java_str(rowkey)))
        row = self.table.get(op)
        if row is not None and not row.isEmpty():
            return todict(row)

        return None

    def getVersions(self, rowkey, limit=None, ts=None):
        '''
		Retrieves up to _limit_ versions of the specified row,
		at or before the specified timestamp (_ts_).  If _ts_ is None
		or not specified, defaults to now.
		'''
        op = Get(Bytes.toBytes(java_str(rowkey)))
        if ts is not None:
            op.setTimeRange(0, ts)
        if limit is not None:
            op.setMaxVersions(limit)

        row = self.table.get(op)

        versions = []
        if row is not None and not row.isEmpty():
            for e in row.list():
                col = str(java.lang.String(e.getColumn()))
                colts = e.getTimestamp()
                val = PBUtil.toValue(e.getValue())
                versions.append((colts, {col: val}))

        return versions

    def scan(self, cols, startrow=None, limit=50, filter=None):
        rows = []
        if startrow is None:
            startrow = HConstants.EMPTY_START_ROW

        scan = Scan(startrow)

        if filter is not None:
            scan.setFilter(filter)

        for c in cols:
            scan.addColumn(c)

        scanner = None

        cnt = 0
        try:
            scanner = self.table.getScanner(scan)
            for rec in scanner:
                if limit is not None and cnt >= limit:
                    break

                rows.append(todict(rec))
                cnt += 1
        finally:
            if scanner is not None:
                try:
                    scanner.close()
                except:
                    pass

        return rows

    def scan_apply(self,
                   cols,
                   startrow=None,
                   limit=50,
                   filter=None,
                   rowfunc=None):
        rows = []
        if startrow is None:
            startrow = HConstants.EMPTY_START_ROW
        else:
            startrow = Bytes.toBytes(java_str(startrow))

        scan = Scan(startrow)

        if filter is not None:
            scan.setFilter(filter)

        for c in cols:
            scan.addColumn(c)

        cnt = 0
        scanner = None
        try:
            scanner = self.table.getScanner(scan)
            for rec in scanner:
                if limit is not None and cnt >= limit:
                    break

                rowfunc(rec)
                cnt += 1
        finally:
            if scanner is not None:
                try:
                    scanner.close()
                except:
                    pass

        return

    def save(self, rowkey, vals, ts=None):
        key = java_str(rowkey)

        rowup = Put(Bytes.toBytes(key))
        if ts is not None:
            rowup.setTimestamp(ts)

        for k, v in vals.items():
            (fam, col) = KeyValue.parseColumn(Bytes.toBytes(java_str(k)))
            if isinstance(v, com.google.protobuf.Message):
                rowup.add(fam, col, v.toByteArray())
            else:
                rowup.add(fam, col, PBUtil.toBytes(v))

        self.table.put(rowup)

    def update(self, startrow, wherefilt, sets):
        if startrow is None:
            startrow = HConstants.EMPTY_START_ROW
        elif isinstance(startrow, java.lang.String):
            startrow = Bytes.toBytes(startrow)

        updater = ColumnUpdate(sets)
        cols = self.families()

        cnt = 0
        upcnt = 0

        scan = Scan(startrow)
        for c in cols:
            scan.addColumn(c)

        if wherefilt is not None:
            scan.setFilter(filter)

        scanner = None
        try:
            scanner = self.table.getScanner(scan)

            for rec in scanner:
                cnt += 1
                rowup = updater.getUpdate(rec)
                if rowup is not None:
                    self.table.commit(rowup)
                    upcnt += 1
        finally:
            if scanner is not None:
                try:
                    scanner.close()
                except:
                    pass

        return upcnt

    def delete(self, rowkey):
        op = Delete(Bytes.toBytes(java_str(rowkey)))
        self.table.delete(op)

    def deleteAll(self, rows):
        for rec in rows:
            if '__key__' in rec:
                op = Delete(Bytes.toBytes(java_str(rec['__key__'])))
                self.table.delete(op)

    def families(self):
        return [
            coldesc.getNameAsString() + ":"
            for coldesc in self.table.getTableDescriptor().getFamilies()
        ]
コード例 #2
0
ファイル: hbase.py プロジェクト: ScottWang/meetup.beeno
class Table(object):
	def __init__(self, tablename, conf=None):
		if conf is None:
			conf = HBaseConfiguration()

		self.table = HTable(conf, tablename)

	def get(self, rowkey):
		'''
		Retrieves the specific table row as a dictionary, with
		full column names (column family:name) as keys, and deserialized
		(from protocol buffers) values as values.

		If the row does not exist, returns None.
		'''
		op = Get( Bytes.toBytes( java_str(rowkey) ) )
		row = self.table.get(op)
		if row is not None and not row.isEmpty():
			return todict(row)

		return None

	def getVersions(self, rowkey, limit=None, ts=None):
		'''
		Retrieves up to _limit_ versions of the specified row,
		at or before the specified timestamp (_ts_).  If _ts_ is None
		or not specified, defaults to now.
		'''
		op = Get( Bytes.toBytes(java_str(rowkey)) )
		if ts is not None:
			op.setTimeRange( 0, ts )
		if limit is not None:
			op.setMaxVersions(limit)

		row = self.table.get(op)

		versions = []
		if row is not None and not row.isEmpty():
			for e in row.list():
				col = str(java.lang.String(e.getColumn()))
				colts = e.getTimestamp()
				val = PBUtil.toValue( e.getValue() )
				versions.append( (colts, {col: val} ) )

		return versions

	def scan(self, cols, startrow=None, limit=50, filter=None):
		rows = []
		if startrow is None:
			startrow = HConstants.EMPTY_START_ROW

		scan = Scan(startrow)
		
		if filter is not None:
			scan.setFilter(filter)

		for c in cols:
			scan.addColumn(c)

		scanner = None

		cnt = 0
		try:
			scanner = self.table.getScanner(scan)
			for rec in scanner:
				if limit is not None and cnt >= limit:
					break
			
				rows.append(todict(rec))
				cnt += 1
		finally:
			if scanner is not None:
				try:
					scanner.close()
				except:
					pass

		return rows

	def scan_apply(self, cols, startrow=None, limit=50, filter=None, rowfunc=None):
		rows = []
		if startrow is None:
			startrow = HConstants.EMPTY_START_ROW
		else:
			startrow = Bytes.toBytes( java_str(startrow) )

		scan = Scan(startrow)
		
		if filter is not None:
			scan.setFilter(filter)

		for c in cols:
			scan.addColumn(c)

		cnt = 0
		scanner = None
		try:
			scanner = self.table.getScanner(scan)
			for rec in scanner:
				if limit is not None and cnt >= limit:
					break

				rowfunc(rec)
				cnt += 1
		finally:
			if scanner is not None:
				try:
					scanner.close()
				except:
					pass

		return

	def save(self, rowkey, vals, ts=None):
		key = java_str(rowkey)

		rowup = Put( Bytes.toBytes(key) )
		if ts is not None:
			rowup.setTimestamp(ts)

		for k, v in vals.items():
			(fam, col) = KeyValue.parseColumn( Bytes.toBytes( java_str(k) ) )
			if isinstance(v, com.google.protobuf.Message):
				rowup.add(fam, col, v.toByteArray())
			else:
				rowup.add(fam, col, PBUtil.toBytes(v))

		self.table.put(rowup)
		
		
	def update(self, startrow, wherefilt, sets):
		if startrow is None:
			startrow = HConstants.EMPTY_START_ROW
		elif isinstance(startrow, java.lang.String):
			startrow = Bytes.toBytes(startrow)

		updater = ColumnUpdate(sets)
		cols = self.families()

		cnt = 0
		upcnt = 0

		scan = Scan(startrow)
		for c in cols:
			scan.addColumn(c)

		if wherefilt is not None:
			scan.setFilter(filter)

		scanner = None
		try:
			scanner = self.table.getScanner(scan)
			
			for rec in scanner:
				cnt += 1
				rowup = updater.getUpdate(rec)
				if rowup is not None:
					self.table.commit(rowup)
					upcnt += 1
		finally:
			if scanner is not None:
				try:
					scanner.close()
				except:
					pass

		return upcnt
		
		
	def delete(self, rowkey):
		op = Delete( Bytes.toBytes( java_str(rowkey) ) )
		self.table.delete( op )

	def deleteAll(self, rows):
		for rec in rows:
			if '__key__' in rec:
				op = Delete( Bytes.toBytes( java_str( rec['__key__'] ) ) )
				self.table.delete( op )

	def families(self):
		return [coldesc.getNameAsString()+":" for coldesc in self.table.getTableDescriptor().getFamilies()]