コード例 #1
0
 def create_table_hbase(self, table_name, column_list=None):
     column_families = list()
     # 定义列族
     if column_list:
         for each in column_list:
             column = ColumnDescriptor(name=each)
             column_families.append(column)
     else:
         column = ColumnDescriptor(name=HBASE_COLUM_FAMILY)
         column_families.append(column)
     # 创建表
     self.client.createTable(table_name, column_families)
コード例 #2
0
def put_hbase(dataframe, table, rowid):
    head = dataframe.columns.values.tolist()
    if rowid in head:
        head = [i for i in head if i != rowid]
    else:
        rowid = False
    client = hbase_connect()
    alltable = client.getTableNames()
    print(alltable)
    if table not in alltable:
        columns = []
        for i in head:
            columns.append(ColumnDescriptor(name=i))
        client.createTable(table, columns)
    for i in range(len(dataframe)):
        mutations = []
        for j in head:
            mutations.append(
                Mutation(column=j + ':a', value=str(dataframe.loc[i, j])))

        if rowid:
            batchMutation = [
                BatchMutation(str(dataframe.loc[i, rowid]), mutations)
            ]
        else:
            batchMutation = [BatchMutation(str(i), mutations)]
        try:
            tmp = client.mutateRows(table, batchMutation)
        except:
            client = hbase_connect()
            client.mutateRows(table, batchMutation)
コード例 #3
0
    def create_table(self, tablename, cflist):
        """
            创建Hbase表,建议列族最多不超过三个,并指定列族信息.
            cflist: 以列表的形式传入列族及列的信息,如:["person:", "geo:"]
            
            使用方式:
                create_table_status, create_table_message = obj.create_table(["person:", "geo:"])
                if not create_table_status:
                    print create_table_message
                print create_table_message
        """
        try:
            # 首先判断是否存在要创建的表,如果存在则跳过,否则会创建
            # 获取所有的表
            tables = self.client.getTableNames()
            if tablename in tables:
                return False, u'{}表已存在'.format(tablename)

            # 用户可添加多个列族信息,并以列表的形式传参
            cloumn_family_list = map(lambda cf: ColumnDescriptor(name=cf, maxVersions=1), cflist)

            self.client.createTable(tablename, cloumn_family_list)
        except Exception as ex:
            return False, ex.message
        else:
            return True, u'{}表创建成功'.format(tablename)
コード例 #4
0
ファイル: HbaseHelper.py プロジェクト: zhijiay/HbaseHelper
 def createTable(self, tablename):
     '''
     create table in hbase
     :param tablename: the table name
     :return: no return
     '''
     name = ColumnDescriptor(name='default')
     self.client.createTable(tablename, [name])
コード例 #5
0
ファイル: hbaseUtil.py プロジェクト: zero1997/movieRecommend
 def create_table(self, table, *columns):
     """
     创建表格
     :param table:表名
     :param columns:列族名
     """
     func = lambda col: ColumnDescriptor(col)
     column_families = map(func, columns)
     self.__client.createTable(table, column_families)
コード例 #6
0
 def _initTable(cls, client):
     dat = ColumnDescriptor(name='data', maxVersions=1)
     tmp = [Mutation(column=_DATA, value=_VALUE)]
     try:
         client.createTable(_TABLE, [dat])
         client.mutateRow(_TABLE, _ROW, tmp)
         dbg("Create Table For Thrift Test Success!")
         return True
     except AlreadyExists:
         return True
     return False
コード例 #7
0
 def __create_table(self, table):
     """ create table in hbase with column families
     """
     columnFamilies = []
     for columnFamily in self.columnFamilies:
         name = ColumnDescriptor(name=columnFamily, maxVersions=1)
         columnFamilies.append(name)
     try:
         self.client.createTable(table, columnFamilies)
     except AlreadyExists, tx:
         print 'Thrift exception'
         print '%s' % (tx.message)
コード例 #8
0
ファイル: hbase_storage.py プロジェクト: flaght/mcrawler
 def __create_table(self, table, columns_name):
     '''
     create table
     '''
     tables = self.client.getTableNames()
     if table not in tables:
         cols = []
         for column_name in columns_name:
             col = ColumnDescriptor(name='%s:'%column_name, maxVersions=1)
             cols.append(col)
         try:
             self.client.createTable(table, cols)
         except Exception,e:
             print e
コード例 #9
0
 def __create_table(self, table, columns_name):
     """
     create table
     """
     #         tables = self.client.getTableNames()
     tables = self.hb.tables()
     if table not in tables:
         cols = []
         for column_name in columns_name:
             col = ColumnDescriptor(name='%s:' % column_name, maxVersions=1)
             cols.append(col)
             f = column_name
         # self.client.createTable(table, cols)
         self.hb.create_table(table, f)
コード例 #10
0
def createTable(client, tableName, *colFamilys):
    '''
    创建新表
    :param client: 连接HBase的客户端实例
    :param tableName: 表名
    :param *colFamilys: 任意个数的列簇名
    '''
    colFamilyList = []
    # 根据可变参数定义列族
    for colFamily in colFamilys:
        col = ColumnDescriptor(name=str(colFamily))
        colFamilyList.append(col)
    # 创建表
    client.createTable(tableName, colFamilyList)
    print('建表成功!')
コード例 #11
0
    def createTable(self):
        tables = self.client.getTableNames()
        found = False

        for table in tables:
            if table == self.tablename:
                found = True

        # 删除表
        if found is True:
            self.deleteTable()
            # 创建表
            self.client.createTable(
                self.tablename,
                [ColumnDescriptor(name=self.columnFamily, maxVersions=1)])
コード例 #12
0
 def creat_table(self, table_name, all_columnFamily_name, *args):
     try:
         table_column = []
         for columnFamily in all_columnFamily_name:
             column = ColumnDescriptor(name=columnFamily)  # 定义列族
             table_column.append(column)
         self.client.createTable(table_name, table_column)  # 创建表
         # logging.info('creat table %s'%table_name)
         return True
     except Exception as e:
         if (not self.client.isTableEnabled(table_name)):  # 如果存在,并且禁用了就启动他
             self.client.enableTable(table_name)
         if (self.exception):
             raise e  # 抛出异常让外部处理
         logging.error('creat table error: %s' % e)  # 可能已经存在了
         return False
コード例 #13
0
 def creat_table(self, table_name, all_columnFamily_name, *args):
     # 创建表格。all_columnFamily_name为['cf1','cf2']的形式
     try:
         table_column = []
         for columnFamily in all_columnFamily_name:
             column = ColumnDescriptor(name=columnFamily)  # 定义列族
             table_column.append(column)
         self.client.createTable(table_name, table_column)  # 创建表
         logging.info('creat table %s' % table_name)
         return True
     except Exception as e:
         logging.error('creat table error: %s' % e)  # 可能已经存在了
         if not self.client.isTableEnabled(table_name):  # 如果存在,并且禁用了就启动他
             self.client.enableTable(table_name)
         if self.exception:
             raise e
         return False
コード例 #14
0
ファイル: hbase_manager.py プロジェクト: flaght/mcrawler
 def __create_table(self, table, columns_name):
     '''
     create table
     '''
     try:
         if table not in self.tables:
             self.tables = self.client.getTableNames()
         if table not in self.tables:
             cols = []
             for column_name in columns_name:
                 col = ColumnDescriptor(name='%s:' % column_name,
                                        maxVersions=1)
                 cols.append(col)
             try:
                 self.client.createTable(table, cols)
             except Exception, e:
                 print_plus(content=e, level=1)
                 return HbaseManager.CreateError
     except Exception, e:
         print_plus(content=e, level=1)
         return HbaseManager.ConnectError
コード例 #15
0
ファイル: gettxt1.py プロジェクト: bug320/crawl
 def create_table(self, table, *columns):  #创建表
     self.client.createTable(
         table, map(lambda column: ColumnDescriptor(column), columns))
コード例 #16
0
"""
创建一个HBase表 https://cloud.tencent.com/document/product/589/12309
"""
from thrift import Thrift
from thrift.transport import TSocket, TTransport
from thrift.protocol import TBinaryProtocol
from hbase import Hbase
from hbase.ttypes import ColumnDescriptor, Mutation, BatchMutation, TRegionInfo
from hbase.ttypes import IOError, AlreadyExists

socket = TSocket.TSocket(host='192.168.40.188', port=9090)
socket.setTimeout(5000)

transport = TTransport.TBufferedTransport(socket)
protocol = TBinaryProtocol.TBinaryProtocol(transport)

client = Hbase.Client(protocol)
transport.open()

new_table = ColumnDescriptor(name='cf:', maxVersions=1)
client.createTable('thrift_test_1', [new_table])

tables = client.getTableNames()
socket.close()

print(tables)
コード例 #17
0
            TSocket(settings.hbase_thrift_host, settings.hbase_thrift_port))
transport.open()
protocol = TBinaryProtocol.TBinaryProtocol(transport)

client = Hbase.Client(protocol)

# FIXME: should use another table instead of the working table.
TABLE_NAME = "%s_item_similarities" % site_id

print "About to Create the Table ..."
if TABLE_NAME in client.getTableNames():
    client.disableTable(TABLE_NAME)
    client.deleteTable(TABLE_NAME)

client.createTable(TABLE_NAME,
    [ColumnDescriptor(name="p")]
    )

import md5

def doHash(id):
    return md5.md5(id).hexdigest()


# TODO: maybe better use multiple-column way and a compressed way. and compare.

def insertSimOneRow():
    global last_item1, last_rows
    client.mutateRow(TABLE_NAME, doHash(last_item1),
                [Mutation(column="p:item_id1", value=last_item1),
                Mutation(column="p:mostSimilarItems", value=json.dumps(last_rows))])
コード例 #18
0
ファイル: hbase_demo.py プロジェクト: npalgit/helloPython
# while(True):
#     result = client.scannerGet(scanid)
#     print(result)
#     if not result:
#         break

rows_data = client.scannerGetList(scanid, 20)
for data in rows_data:
    print(data)
client.scannerClose(scanid)

print(
    row_data
)  #[TRowResult(columns={'address:province': TCell(timestamp=1543680680608L, value='yunnan')}, row='row01')]
column01 = ColumnDescriptor(
    name='user_info'
)  # ColumnDescriptor(bloomFilterType='NONE', bloomFilterNbHashes=0, name='user_info', maxVersions=3, blockCacheEnabled=False, inMemory=False, timeToLive=-1, bloomFilterVectorSize=0, compression='NONE')
column02 = ColumnDescriptor('addr_info')
#hbase好像不支持使用Python创建预分区表
# client.createTable('tablepy',[column01,column02])
# print(client)
region_info = client.getTableRegions('tablepy')  #查看表分区
table_info = client.getColumnDescriptors('tablepy')  #查看表结构
print(
    region_info
)  # [TRegionInfo(startKey='', endKey='', version=1, id=1543752131747L, name='tablepy,,1543752131747.ccfa71e67b9732adb575129bf9e560eb.')]
print(
    table_info
)  # {'addr_info:': ColumnDescriptor(bloomFilterType='NONE', bloomFilterNbHashes=0, name='addr_info:', maxVersions=3, blockCacheEnabled=False, inMemory=False, timeToLive=2147483647, bloomFilterVectorSize=0, compression='NONE'), 'user_info:': ColumnDescriptor(bloomFilterType='NONE', bloomFilterNbHashes=0, name='user_info:', maxVersions=3, blockCacheEnabled=False, inMemory=False, timeToLive=2147483647, bloomFilterVectorSize=0, compression='NONE')}
#插入数据
mutation = Mutation(column='user_info:province', value='350000')
コード例 #19
0

if __name__ == "__main__":
    """ 链接数据库
    """
    # server端地址和端口
    host = "localhost"
    port = "9090"
    transport = TSocket.TSocket(host, port)
    # 可以设置超时
    transport.setTimeout(5000)
    # 设置传输方式(TFramedTransport或TBufferedTransport)
    trans = TTransport.TBufferedTransport(transport)
    # 设置传输协议
    protocol = TBinaryProtocol.TBinaryProtocol(trans)
    # 确定客户端
    client = Hbase.Client(protocol)
    # 打开连接
    transport.open()
    print "OK"
    """
    """
    # 获取表名
    client.getTableNames()
    # 创建新表
    _TABLE = "keyword"

    demo = ColumnDescriptor(name='data:',
                            maxVersions=10)  # 列族data能保留最近的10个数据,每个列名后面要跟:号
    createTable(_TABLE, [demo])
コード例 #20
0
print "[INFO] setup connection"
transport = THttpClient.THttpClient("https://{0}:{1}".format(
    sys.argv[1], 9090))
client = Hbase.Client(TBinaryProtocol.TBinaryProtocol(transport))

table = 'test:thrift_proxy_demo'

print "[INFO] start client"
transport.open()

print "[INFO] list the current tables"
print client.getTableNames()

print "[INFO] create a table, place some data"
client.createTable(table, [ColumnDescriptor(name='family1:')])
client.mutateRow(table, 'row1', [
    Mutation(column='family1:cq1', value='foo'),
    Mutation(column='family1:cq2', value='foo')
], {})
client.mutateRow(table, 'row2', [
    Mutation(column='family1:cq1', value='bar'),
    Mutation(column='family1:cq2', value='bar')
], {})
client.mutateRow(table, 'row3', [
    Mutation(column='family1:cq1', value='foo'),
    Mutation(column='family1:cq2', value='foo')
], {})
client.mutateRow(table, 'row4', [
    Mutation(column='family1:cq1', value='bar'),
    Mutation(column='family1:cq2', value='bar')
コード例 #21
0
def main():
    socket = TSocket.TSocket('127.0.0.1', 9090)
    socket.setTimeout(5000)
    transport = TTransport.TBufferedTransport(socket)
    protocol = TBinaryProtocol.TBinaryProtocol(transport)
    client = Hbase.Client(protocol)
    socket.open()

    table_list = client.getTableNames()
    start = time.time()
    logging.basicConfig(format='%(asctime)s | %(levelname)s | %(message)s',
                        level=logging.INFO,
                        datefmt='%Y-%m-%d %H:%M:%S')

    logging.info('Initiating task: Taiwan Air Quality!')

    Attributes = [
        'AMB_TEMP', 'CO', 'NO', 'NO2', 'NOx', 'O3', 'PM10', 'PM2.5',
        'RAINFALL', 'RAIN_COND', 'UVB', 'RH', 'SO2', 'WD_HR', 'WIND_DIREC',
        'WIND_SPEED', 'WS_HR', 'CH4', 'NMHC', 'THC', 'PH_RAIN'
    ]

    csvfiles = [
        filename for filename in os.listdir(os.getcwd())
        if filename.endswith('.csv')
    ]
    logging.info(str(csvfiles))

    InsertCounts = 0

    for file in csvfiles:
        with open(file, newline='') as f:
            frames = csv.reader(f)
            table_Name = ''
            logging.info("Start reading {0}".format(file))

            Column_Descriptors = []
            ctr = 0

            # length = sum(1 for row in frames)
            #
            # for frame in tqdm(frames, total=length):
            for frame in tqdm(frames):
                if ctr == 0:
                    ctr += 1
                    continue
                elif ctr == 1:
                    ctr += 1
                    table_Name = str(str.encode(frame[1],
                                                'utf-8')).replace('\\', "")
                    table_Name = table_Name.replace("b", "")
                    table_Name = table_Name.replace("'", "")
                    if table_Name not in table_list:
                        for type in Attributes:
                            Column_Descriptors.append(
                                ColumnDescriptor(name=type))
                        client.createTable(table_Name, Column_Descriptors)
                        logging.info('Build Table : {0}'.format(table_Name))
                    else:
                        logging.info(
                            'Table {0} already exist, no need to create'.
                            format(table_Name))

                # ['2018/01/02', 'iilan', 'NOx', '5.1', '4.4', '3.5', '2.1', '2.5', '3.2', '4.6', '15',
                # '13', '11', '7', '6.8', '7.1', '13', '13', '12', '13', '16', '24', '23', '20', '24', '18', '13']

                for i in range(3, 26):
                    qualifier = i - 2
                    value = frame[i]
                    row = frame[0]  # date
                    column = frame[2]  # attr
                    mutate = Mutation(column=column + ':' + str(qualifier),
                                      value=value)
                    client.mutateRow(table_Name, frame[0], [mutate])
                    InsertCounts += 1

    end = time.time()

    logging.info("================Insert Done================\n")
    logging.info("totalInsertCount: {0}, totalTimeSpend: {1}\n".format(
        InsertCounts, end - start))
    logging.info(client.getTableNames())
コード例 #22
0
 def createTable(self, tableName, columnFamilies):
     columnFamilies = [ColumnDescriptor(_) for _ in columnFamilies]
     return self.client.createTable(tableName, columnFamilies)
コード例 #23
0
 def __createTable(self):
     col1 = ColumnDescriptor(name="person:", maxVersions=1)
     col2 = ColumnDescriptor(name="contents:", maxVersions=1)
     col3 = ColumnDescriptor(name="info:", maxVersions=1)
     self.client.createTable(self.tableName, [col1, col2, col3])
コード例 #24
0
ファイル: 3123123.py プロジェクト: linruohan/my_study
    def __createTable(self):
        name = ColumnDescriptor(name='name')
        foo = ColumnDescriptor(name='foo')

        self.client.createTable(self.tableName, [name, foo])
コード例 #25
0
transport = TBufferedTransport(
    TSocket(settings.hbase_thrift_host, settings.hbase_thrift_port))
transport.open()
protocol = TBinaryProtocol.TBinaryProtocol(transport)

client = Hbase.Client(protocol)

# FIXME: should use another table instead of the working table.
TABLE_NAME = "%s_item_similarities" % site_id

print "About to Create the Table ..."
if TABLE_NAME in client.getTableNames():
    client.disableTable(TABLE_NAME)
    client.deleteTable(TABLE_NAME)

client.createTable(TABLE_NAME, [ColumnDescriptor(name="p")])

import md5


def doHash(id):
    return md5.md5(id).hexdigest()


# TODO: maybe better use multiple-column way and a compressed way. and compare.


def insertSimOneRow():
    global last_item1, last_rows
    client.mutateRow(TABLE_NAME, doHash(last_item1), [
        Mutation(column="p:item_id1", value=last_item1),
コード例 #26
0
    def createTable(self):
        tables = self.client.getTableNames()

        columnFamily = ColumnDescriptor(name=self.columnFamily, maxVersions=1)

        self.client.createTable(self.tablename, [columnFamily])