Example #1
0
class HbaseUtil:
    def __init__(self, tableName, host='localhost', port=9090):
        self.tableName = tableName
        transport = TSocket.TSocket(host, port)
        self.transport = TTransport.TBufferedTransport(transport)
        protocol = TBinaryProtocol.TBinaryProtocol(transport)
        self.client = Client(protocol)
        self.transport.open()
        if tableName not in self.client.getTableNames():
            print 'creating table %s' % tableName
            columns = []
            col = ColumnDescriptor()
            col.name = 'page:title'
            col.maxVersions = 10
            columns.append(col)
            col = ColumnDescriptor()
            col.name = 'page:article'
            columns.append(col)
            self.client.createTable(tableName, columns)
        #self.printAll()

    def close(self):
        self.transport.close()

    def insert(self, rowkey, title, content):
        mutations = [Mutation(column='page:article', value=content.encode('utf-8')),
                     Mutation(column='page:title', value=title.encode('utf-8'))]
        self.client.mutateRow(self.tableName, rowkey, mutations, {})

    def existRowKey(self, rowkey):
        pass

    def printAll(self):
        print 'starting scanner...'
        scanner = self.client.scannerOpen(self.tableName, '', ['page:title'], {})
        r = self.client.scannerGet(scanner)
        while r:
            for i in r:
                print i.row
                for k, v in i.columns.items():
                    print k, v.value, v.timestamp
                    x = v.value
                    print x
            r = self.client.scannerGet(scanner)
        print 'scanner finished '
Example #2
0
class HBaseClient:
    ''' Hbase client '''
    def __init__(self):
        transport = TSocket.TSocket('localhost', 9090)
        transport = TTransport.TBufferedTransport(transport)
        protocol = TBinaryProtocol.TBinaryProtocol(transport)

        self.__client = Client(protocol)
        transport.open()

    def getTableNames(self):
        ''' get table names '''
        return self.__client.getTableNames()

    def deleteTable(self, tName):
        ''' delete table name '''
        if self.__client.isTableEnabled(tName):
            self.__client.disableTable(tName)

        self.__client.deleteTable(tName)

    def createTable(self, tName, ColumnDescriptors):
        try:
            self.__client.createTable(tName, ColumnDescriptors)
        except ttypes.AlreadyExists as excp:
            raise UfException(Errors.HBASE_CREATE_ERROR,
                              "AlreadyExists Error when creating table %s with cols: %s): %s" % \
                              (tName, [col.name for col in ColumnDescriptors], excp.message))

    def getColumnDescriptors(self, tName):
        try:
            return self.__client.getColumnDescriptors(tName)
        except:
            raise UfException(Errors.UNKNOWN_ERROR,
                              "Error when getting column descriptors table %s" % tName)

    def updateRow(self, tName, rowName, mutations, timestamp=None):
        ''' add row to table '''
        try:
            if timestamp is None:
                self.__client.mutateRow(tName, rowName, mutations)
            else:
                self.__client.mutateRowTs(tName, rowName, mutations, timestamp)
        except Exception as excp:
            raise UfException(Errors.HBASE_UPDATE_ERROR,
                              "Error when updating table %s - rowName %s - mutations %s: %s" % \
                              (tName, rowName, mutations, excp))

    def getRow(self, tName, rowName):
        ''' get row '''
        result = self.__client.getRow(tName, rowName)
        if not result:
            return result
        else:
            return result[0]

    def scanTable(self, tName, columns, startRow="", endRow=None):
        ''' scan a table '''
        if endRow is None:
            scanner = self.__client.scannerOpen(tName, startRow, columns)
        else:
            scanner = self.__client.scannerOpenWithStop(tName, startRow, endRow, columns)
        ret = []

        row = self.__client.scannerGet(scanner)
        while row:
          ret.append(row[0])
          row = self.__client.scannerGet(scanner)

        return ret

    def getClient(self):
        ''' return client, in case low level api is needed '''
        return self.__client
Example #3
0
cols = client.getColumnDescriptors(t)
print "column families in %s" % (t)
for col_name in cols.keys():
    col = cols[col_name]
    print "  column: %s, maxVer: %d" % (col.name, col.maxVersions)
#
# Test UTF-8 handling
#
invalid = "foo-\xfc\xa1\xa1\xa1\xa1\xa1"
valid = "foo-\xE7\x94\x9F\xE3\x83\x93\xE3\x83\xBC\xE3\x83\xAB"

# non-utf8 is fine for data
mutations = [Mutation(column="entry:foo", value=invalid)]
print str(mutations)
client.mutateRow(t, "foo", mutations)

# try empty strings
mutations = [Mutation(column="entry:", value="")]
client.mutateRow(t, "", mutations)

# this row name is valid utf8
mutations = [Mutation(column="entry:foo", value=valid)]
client.mutateRow(t, valid, mutations)

# non-utf8 is not allowed in row names
try:
    mutations = [Mutation(column="entry:foo", value=invalid)]
    client.mutateRow(t, invalid, mutations)
except ttypes.IOError, e:
    print 'expected exception: %s' % (e.message)
Example #4
0
    print "WARN: " + ae.message

cols = client.getColumnDescriptors(t)
print "column families in %s" % (t)
for col_name in cols.keys():
    col = cols[col_name]
    print "  column: %s, maxVer: %d" % (col.name, col.maxVersions)
#
# Test UTF-8 handling
#
invalid = "foo-\xfc\xa1\xa1\xa1\xa1\xa1"
valid = "foo-\xE7\x94\x9F\xE3\x83\x93\xE3\x83\xBC\xE3\x83\xAB"

# non-utf8 is fine for data
mutations = [Mutation({"column": "entry:foo", "value": invalid})]
client.mutateRow(t, "foo", mutations)

# try empty strings
mutations = [Mutation({"column": "entry:", "value": ""})]
client.mutateRow(t, "", mutations)

# this row name is valid utf8
mutations = [Mutation({"column": "entry:foo", "value": valid})]
client.mutateRow(t, valid, mutations)

# non-utf8 is not allowed in row names
try:
    mutations = [Mutation({"column": "entry:foo", "value": invalid})]
    client.mutateRow(t, invalid, mutations)
except ttypes.IOError, e:
    print "expected exception: %s" % (e.message)
Example #5
0
try:
    print 'creating table : % s' % t
    client.createTable(t, columns)
except Exception, ae:
    print 'Warn:' + ae.message


# 插入数据
invalid = 'foo-\xfc\xa1\xa1\xa1\xa1\xa1'
valid = 'foo-\xE7\x94\x9F\xE3\x83\x93\xE3\x83\xBC\xE3\x83\xAB'

# non-utf8 is fine for data
mutations = [Mutation(column='entry:foo', value=invalid)]
print str(mutations)
client.mutateRow(t, 'foo', mutations)  # foo is row key

# try empty strings
# cell value empty
mutations = [Mutation(column='entry:foo', value='')]
# rowkey empty
client.mutateRow(t, '', mutations)

#this row name is valid utf8
mutations = [Mutation(column='entry:foo', value=valid)]
client.mutateRow(t, valid, mutations)


# run a scanner on the rows we just created
# 全表扫描
print 'starting scanner...'