Python HTableの例、org.apache.hadoop.hbase.client.HTable Pythonの例

コード例 #1

0

ファイルを表示

ファイル: region.py プロジェクト: meyarivan/misc

    def sync_region_info(self, id_region=None):

        if id_region is None:
            id_region = self.id_region

        meta_table = HTable(self.conf, HConstants.META_TABLE_NAME)
        gobj = Get(Bytes.toBytes(id_region))
        result = meta_table.get(gobj)
        bytes = result.getValue(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER)
        hri = None

        try:
            hri = Writables.getHRegionInfo(bytes)
        except java.lang.NullPointerException:
            raise PHBaseException("could to retrieve region info for %s" % (id_region))

        self.start_key = jls(hri.getStartKey())
        self.end_key = jls(hri.getEndKey())
        self.name = jls(hri.getRegionName())
        self.table_name = HTable(hri.getTableDesc().getName())
        self.offline = hri.isOffline()

        self.obj = hri

コード例 #2

0

ファイルを表示

class HBaseTable(object):
    '''
    Represents ah HBase table and operation that can be performed on an HBase table.
    '''
    def __init__(self, name, conf, admin):
        self.logger = Logger.getLogger("HBaseTable")
        # self.logger.setLevel(Level.DEBUG)
        self.conf = conf
        self.admin = admin
        self.name = name
        self._table = None

    def row(self, rowId):
        if not self._table:
            self._table = HTable(self.conf, self.name)
        return HBaseRow(self._table, rowId)

    def scan(self, start_row=None, end_row=None, filter=None):
        '''
        Read the table including and between start and end rows
        applying the given filter.
        '''
        if not self._table:
            self._table = HTable(self.conf, self.name)
        sc = None
        if start_row and filter:
            sc = Scan(start_row, filter)
        elif start_row and end_row:
            sc = Scan(start_row, end_row)
        elif start_row:
            sc = Scan(start_row)
        else:
            sc = Scan()
        s = self._table.getScanner(sc)
        while True:
            r = s.next()
            if r is None:
                raise StopIteration()
            yield r

コード例 #3

0

ファイルを表示

ファイル: HBase.py プロジェクト: dsidlo/Jython4BigData

    def scan(self, start_row=None, end_row=None, filter=None):
        '''
        Read the table including and between start and end rows
        applying the given filter.
        '''
        if not self._table:
            self._table = HTable(self.conf, self.name)
        sc = None
        if start_row and filter:
            sc = Scan(start_row, filter)
        elif start_row and end_row:
            sc = Scan(start_row, end_row)
        elif start_row:
            sc = Scan(start_row)
        else:
            sc = Scan()
        s = self._table.getScanner(sc)
	while True:
            r = s.next()
	    if r is None:
                raise StopIteration()
            yield r

コード例 #4

0

ファイルを表示

ファイル: HBase.py プロジェクト: dsidlo/Jython4BigData

class HBaseTable(object):
    '''
    Represents ah HBase table and operation that can be performed on an HBase table.
    '''
    def __init__(self, name, conf, admin):
        self.logger = Logger.getLogger("HBaseTable")
        # self.logger.setLevel(Level.DEBUG)
        self.conf = conf
        self.admin = admin
        self.name = name
        self._table = None

    def row(self, rowId):
        if not self._table:
            self._table = HTable(self.conf, self.name)
        return HBaseRow(self._table, rowId)

    def scan(self, start_row=None, end_row=None, filter=None):
        '''
        Read the table including and between start and end rows
        applying the given filter.
        '''
        if not self._table:
            self._table = HTable(self.conf, self.name)
        sc = None
        if start_row and filter:
            sc = Scan(start_row, filter)
        elif start_row and end_row:
            sc = Scan(start_row, end_row)
        elif start_row:
            sc = Scan(start_row)
        else:
            sc = Scan()
        s = self._table.getScanner(sc)
	while True:
            r = s.next()
	    if r is None:
                raise StopIteration()
            yield r

コード例 #5

0

ファイルを表示

    def __init__(self, tablename, conf=None):
        if conf is None:
            conf = HBaseConfiguration()

        self.table = HTable(conf, tablename)

コード例 #6

0

ファイルを表示

class Table(object):
    def __init__(self, tablename, conf=None):
        if conf is None:
            conf = HBaseConfiguration()

        self.table = HTable(conf, tablename)

    def get(self, rowkey):
        '''
		Retrieves the specific table row as a dictionary, with
		full column names (column family:name) as keys, and deserialized
		(from protocol buffers) values as values.

		If the row does not exist, returns None.
		'''
        op = Get(Bytes.toBytes(java_str(rowkey)))
        row = self.table.get(op)
        if row is not None and not row.isEmpty():
            return todict(row)

        return None

    def getVersions(self, rowkey, limit=None, ts=None):
        '''
		Retrieves up to _limit_ versions of the specified row,
		at or before the specified timestamp (_ts_).  If _ts_ is None
		or not specified, defaults to now.
		'''
        op = Get(Bytes.toBytes(java_str(rowkey)))
        if ts is not None:
            op.setTimeRange(0, ts)
        if limit is not None:
            op.setMaxVersions(limit)

        row = self.table.get(op)

        versions = []
        if row is not None and not row.isEmpty():
            for e in row.list():
                col = str(java.lang.String(e.getColumn()))
                colts = e.getTimestamp()
                val = PBUtil.toValue(e.getValue())
                versions.append((colts, {col: val}))

        return versions

    def scan(self, cols, startrow=None, limit=50, filter=None):
        rows = []
        if startrow is None:
            startrow = HConstants.EMPTY_START_ROW

        scan = Scan(startrow)

        if filter is not None:
            scan.setFilter(filter)

        for c in cols:
            scan.addColumn(c)

        scanner = None

        cnt = 0
        try:
            scanner = self.table.getScanner(scan)
            for rec in scanner:
                if limit is not None and cnt >= limit:
                    break

                rows.append(todict(rec))
                cnt += 1
        finally:
            if scanner is not None:
                try:
                    scanner.close()
                except:
                    pass

        return rows

    def scan_apply(self,
                   cols,
                   startrow=None,
                   limit=50,
                   filter=None,
                   rowfunc=None):
        rows = []
        if startrow is None:
            startrow = HConstants.EMPTY_START_ROW
        else:
            startrow = Bytes.toBytes(java_str(startrow))

        scan = Scan(startrow)

        if filter is not None:
            scan.setFilter(filter)

        for c in cols:
            scan.addColumn(c)

        cnt = 0
        scanner = None
        try:
            scanner = self.table.getScanner(scan)
            for rec in scanner:
                if limit is not None and cnt >= limit:
                    break

                rowfunc(rec)
                cnt += 1
        finally:
            if scanner is not None:
                try:
                    scanner.close()
                except:
                    pass

        return

    def save(self, rowkey, vals, ts=None):
        key = java_str(rowkey)

        rowup = Put(Bytes.toBytes(key))
        if ts is not None:
            rowup.setTimestamp(ts)

        for k, v in vals.items():
            (fam, col) = KeyValue.parseColumn(Bytes.toBytes(java_str(k)))
            if isinstance(v, com.google.protobuf.Message):
                rowup.add(fam, col, v.toByteArray())
            else:
                rowup.add(fam, col, PBUtil.toBytes(v))

        self.table.put(rowup)

    def update(self, startrow, wherefilt, sets):
        if startrow is None:
            startrow = HConstants.EMPTY_START_ROW
        elif isinstance(startrow, java.lang.String):
            startrow = Bytes.toBytes(startrow)

        updater = ColumnUpdate(sets)
        cols = self.families()

        cnt = 0
        upcnt = 0

        scan = Scan(startrow)
        for c in cols:
            scan.addColumn(c)

        if wherefilt is not None:
            scan.setFilter(filter)

        scanner = None
        try:
            scanner = self.table.getScanner(scan)

            for rec in scanner:
                cnt += 1
                rowup = updater.getUpdate(rec)
                if rowup is not None:
                    self.table.commit(rowup)
                    upcnt += 1
        finally:
            if scanner is not None:
                try:
                    scanner.close()
                except:
                    pass

        return upcnt

    def delete(self, rowkey):
        op = Delete(Bytes.toBytes(java_str(rowkey)))
        self.table.delete(op)

    def deleteAll(self, rows):
        for rec in rows:
            if '__key__' in rec:
                op = Delete(Bytes.toBytes(java_str(rec['__key__'])))
                self.table.delete(op)

    def families(self):
        return [
            coldesc.getNameAsString() + ":"
            for coldesc in self.table.getTableDescriptor().getFamilies()
        ]

コード例 #7

0

ファイルを表示

ファイル: hbase.py プロジェクト: ScottWang/meetup.beeno

	def __init__(self, tablename, conf=None):
		if conf is None:
			conf = HBaseConfiguration()

		self.table = HTable(conf, tablename)

コード例 #8

0

ファイルを表示

ファイル: hbase.py プロジェクト: ScottWang/meetup.beeno

class Table(object):
	def __init__(self, tablename, conf=None):
		if conf is None:
			conf = HBaseConfiguration()

		self.table = HTable(conf, tablename)

	def get(self, rowkey):
		'''
		Retrieves the specific table row as a dictionary, with
		full column names (column family:name) as keys, and deserialized
		(from protocol buffers) values as values.

		If the row does not exist, returns None.
		'''
		op = Get( Bytes.toBytes( java_str(rowkey) ) )
		row = self.table.get(op)
		if row is not None and not row.isEmpty():
			return todict(row)

		return None

	def getVersions(self, rowkey, limit=None, ts=None):
		'''
		Retrieves up to _limit_ versions of the specified row,
		at or before the specified timestamp (_ts_).  If _ts_ is None
		or not specified, defaults to now.
		'''
		op = Get( Bytes.toBytes(java_str(rowkey)) )
		if ts is not None:
			op.setTimeRange( 0, ts )
		if limit is not None:
			op.setMaxVersions(limit)

		row = self.table.get(op)

		versions = []
		if row is not None and not row.isEmpty():
			for e in row.list():
				col = str(java.lang.String(e.getColumn()))
				colts = e.getTimestamp()
				val = PBUtil.toValue( e.getValue() )
				versions.append( (colts, {col: val} ) )

		return versions

	def scan(self, cols, startrow=None, limit=50, filter=None):
		rows = []
		if startrow is None:
			startrow = HConstants.EMPTY_START_ROW

		scan = Scan(startrow)
		
		if filter is not None:
			scan.setFilter(filter)

		for c in cols:
			scan.addColumn(c)

		scanner = None

		cnt = 0
		try:
			scanner = self.table.getScanner(scan)
			for rec in scanner:
				if limit is not None and cnt >= limit:
					break
			
				rows.append(todict(rec))
				cnt += 1
		finally:
			if scanner is not None:
				try:
					scanner.close()
				except:
					pass

		return rows

	def scan_apply(self, cols, startrow=None, limit=50, filter=None, rowfunc=None):
		rows = []
		if startrow is None:
			startrow = HConstants.EMPTY_START_ROW
		else:
			startrow = Bytes.toBytes( java_str(startrow) )

		scan = Scan(startrow)
		
		if filter is not None:
			scan.setFilter(filter)

		for c in cols:
			scan.addColumn(c)

		cnt = 0
		scanner = None
		try:
			scanner = self.table.getScanner(scan)
			for rec in scanner:
				if limit is not None and cnt >= limit:
					break

				rowfunc(rec)
				cnt += 1
		finally:
			if scanner is not None:
				try:
					scanner.close()
				except:
					pass

		return

	def save(self, rowkey, vals, ts=None):
		key = java_str(rowkey)

		rowup = Put( Bytes.toBytes(key) )
		if ts is not None:
			rowup.setTimestamp(ts)

		for k, v in vals.items():
			(fam, col) = KeyValue.parseColumn( Bytes.toBytes( java_str(k) ) )
			if isinstance(v, com.google.protobuf.Message):
				rowup.add(fam, col, v.toByteArray())
			else:
				rowup.add(fam, col, PBUtil.toBytes(v))

		self.table.put(rowup)
		
		
	def update(self, startrow, wherefilt, sets):
		if startrow is None:
			startrow = HConstants.EMPTY_START_ROW
		elif isinstance(startrow, java.lang.String):
			startrow = Bytes.toBytes(startrow)

		updater = ColumnUpdate(sets)
		cols = self.families()

		cnt = 0
		upcnt = 0

		scan = Scan(startrow)
		for c in cols:
			scan.addColumn(c)

		if wherefilt is not None:
			scan.setFilter(filter)

		scanner = None
		try:
			scanner = self.table.getScanner(scan)
			
			for rec in scanner:
				cnt += 1
				rowup = updater.getUpdate(rec)
				if rowup is not None:
					self.table.commit(rowup)
					upcnt += 1
		finally:
			if scanner is not None:
				try:
					scanner.close()
				except:
					pass

		return upcnt
		
		
	def delete(self, rowkey):
		op = Delete( Bytes.toBytes( java_str(rowkey) ) )
		self.table.delete( op )

	def deleteAll(self, rows):
		for rec in rows:
			if '__key__' in rec:
				op = Delete( Bytes.toBytes( java_str( rec['__key__'] ) ) )
				self.table.delete( op )

	def families(self):
		return [coldesc.getNameAsString()+":" for coldesc in self.table.getTableDescriptor().getFamilies()]

コード例 #9

0

ファイルを表示

ファイル: hbase_delete.py プロジェクト: ekzemplaro/data_base_language

from org.apache.hadoop.hbase.util import Bytes
# ----------------------------------------------------------------
def delete_row_proc (table,key_in):
	print	"delete_row_proc"
	print	key_in
	dd = Delete(Bytes.toBytes(key_in))
	table.delete (dd)
# ----------------------------------------------------------------
print ("*** 開始 ***")
#
id_in = sys.argv[1]
print ("%s" % id_in)
#
conf = HBaseConfiguration()

table = HTable(conf, "cities")

delete_row_proc (table,id_in)

ss = Scan ()
ss.addColumn ("name","")
ss.addColumn ("population","")
ss.addColumn ("date_mod","")

scanner = table.getScanner(ss)
while 1:
	result = scanner.next()
	if not result:
		break
	key = java.lang.String(result.row)
	print key,"\t",

コード例 #10

0

ファイルを表示

ファイル: test.py プロジェクト: akalyaev/vagrant-hbase

from org.apache.hadoop.hbase import HBaseConfiguration
from org.apache.hadoop.hbase.client import HTable, Get
from org.apache.hadoop.hbase.util import Bytes


conf = HBaseConfiguration.create()
table = HTable(conf, "test")
get = Get(Bytes.toBytes('row1'))
result = table.get(get)
value = result.getValue(Bytes.toBytes('cf'), Bytes.toBytes('col1'))
print Bytes.toString(value, 0, len(value))

コード例 #11

0

ファイルを表示

 def row(self, rowId):
     if not self._table:
         self._table = HTable(self.conf, self.name)
     return HBaseRow(self._table, rowId)

コード例 #12

0

ファイルを表示

from org.apache.hadoop.hbase import HBaseConfiguration
from org.apache.hadoop.hbase.client import HTable, Get
from org.apache.hadoop.hbase.util import Bytes

conf = HBaseConfiguration.create()
table = HTable(conf, "test")
get = Get(Bytes.toBytes('row1'))
result = table.get(get)
value = result.getValue(Bytes.toBytes('cf'), Bytes.toBytes('col1'))
print Bytes.toString(value, 0, len(value))

コード例 #13

0

ファイルを表示

ファイル: check_meta_data.py プロジェクト: meyarivan/misc

import os, sys
import java.lang

from org.apache.hadoop.hbase import HBaseConfiguration
from org.apache.hadoop.hbase.client import HTable
from org.apache.hadoop.hbase.client import MetaScanner
from org.apache.hadoop.hbase.util import Bytes
from org.apache.hadoop.hbase import HConstants
from org.apache.hadoop.hbase.client import Scan
from org.apache.hadoop.hbase.util import Writables

import hbaseutils

# setup table/connection
conf = HBaseConfiguration()
meta_table = HTable(conf, HConstants.META_TABLE_NAME)
scanner = meta_table.getScanner(Scan())

# utility funcs
jls = java.lang.String

prev_region = curr_region = None

regions = []
start_keys = {}
end_keys = {}

while True:

    errors = 0

コード例 #14

0

ファイルを表示

ファイル: HBase.py プロジェクト: dsidlo/Jython4BigData

 def row(self, rowId):
     if not self._table:
         self._table = HTable(self.conf, self.name)
     return HBaseRow(self._table, rowId)