def create_table_hbase(self, table_name, column_list=None): column_families = list() # 定义列族 if column_list: for each in column_list: column = ColumnDescriptor(name=each) column_families.append(column) else: column = ColumnDescriptor(name=HBASE_COLUM_FAMILY) column_families.append(column) # 创建表 self.client.createTable(table_name, column_families)
def put_hbase(dataframe, table, rowid): head = dataframe.columns.values.tolist() if rowid in head: head = [i for i in head if i != rowid] else: rowid = False client = hbase_connect() alltable = client.getTableNames() print(alltable) if table not in alltable: columns = [] for i in head: columns.append(ColumnDescriptor(name=i)) client.createTable(table, columns) for i in range(len(dataframe)): mutations = [] for j in head: mutations.append( Mutation(column=j + ':a', value=str(dataframe.loc[i, j]))) if rowid: batchMutation = [ BatchMutation(str(dataframe.loc[i, rowid]), mutations) ] else: batchMutation = [BatchMutation(str(i), mutations)] try: tmp = client.mutateRows(table, batchMutation) except: client = hbase_connect() client.mutateRows(table, batchMutation)
def create_table(self, tablename, cflist): """ 创建Hbase表,建议列族最多不超过三个,并指定列族信息. cflist: 以列表的形式传入列族及列的信息,如:["person:", "geo:"] 使用方式: create_table_status, create_table_message = obj.create_table(["person:", "geo:"]) if not create_table_status: print create_table_message print create_table_message """ try: # 首先判断是否存在要创建的表,如果存在则跳过,否则会创建 # 获取所有的表 tables = self.client.getTableNames() if tablename in tables: return False, u'{}表已存在'.format(tablename) # 用户可添加多个列族信息,并以列表的形式传参 cloumn_family_list = map(lambda cf: ColumnDescriptor(name=cf, maxVersions=1), cflist) self.client.createTable(tablename, cloumn_family_list) except Exception as ex: return False, ex.message else: return True, u'{}表创建成功'.format(tablename)
def createTable(self, tablename): ''' create table in hbase :param tablename: the table name :return: no return ''' name = ColumnDescriptor(name='default') self.client.createTable(tablename, [name])
def create_table(self, table, *columns): """ 创建表格 :param table:表名 :param columns:列族名 """ func = lambda col: ColumnDescriptor(col) column_families = map(func, columns) self.__client.createTable(table, column_families)
def _initTable(cls, client): dat = ColumnDescriptor(name='data', maxVersions=1) tmp = [Mutation(column=_DATA, value=_VALUE)] try: client.createTable(_TABLE, [dat]) client.mutateRow(_TABLE, _ROW, tmp) dbg("Create Table For Thrift Test Success!") return True except AlreadyExists: return True return False
def __create_table(self, table): """ create table in hbase with column families """ columnFamilies = [] for columnFamily in self.columnFamilies: name = ColumnDescriptor(name=columnFamily, maxVersions=1) columnFamilies.append(name) try: self.client.createTable(table, columnFamilies) except AlreadyExists, tx: print 'Thrift exception' print '%s' % (tx.message)
def __create_table(self, table, columns_name): ''' create table ''' tables = self.client.getTableNames() if table not in tables: cols = [] for column_name in columns_name: col = ColumnDescriptor(name='%s:'%column_name, maxVersions=1) cols.append(col) try: self.client.createTable(table, cols) except Exception,e: print e
def __create_table(self, table, columns_name): """ create table """ # tables = self.client.getTableNames() tables = self.hb.tables() if table not in tables: cols = [] for column_name in columns_name: col = ColumnDescriptor(name='%s:' % column_name, maxVersions=1) cols.append(col) f = column_name # self.client.createTable(table, cols) self.hb.create_table(table, f)
def createTable(client, tableName, *colFamilys): ''' 创建新表 :param client: 连接HBase的客户端实例 :param tableName: 表名 :param *colFamilys: 任意个数的列簇名 ''' colFamilyList = [] # 根据可变参数定义列族 for colFamily in colFamilys: col = ColumnDescriptor(name=str(colFamily)) colFamilyList.append(col) # 创建表 client.createTable(tableName, colFamilyList) print('建表成功!')
def createTable(self): tables = self.client.getTableNames() found = False for table in tables: if table == self.tablename: found = True # 删除表 if found is True: self.deleteTable() # 创建表 self.client.createTable( self.tablename, [ColumnDescriptor(name=self.columnFamily, maxVersions=1)])
def creat_table(self, table_name, all_columnFamily_name, *args): try: table_column = [] for columnFamily in all_columnFamily_name: column = ColumnDescriptor(name=columnFamily) # 定义列族 table_column.append(column) self.client.createTable(table_name, table_column) # 创建表 # logging.info('creat table %s'%table_name) return True except Exception as e: if (not self.client.isTableEnabled(table_name)): # 如果存在,并且禁用了就启动他 self.client.enableTable(table_name) if (self.exception): raise e # 抛出异常让外部处理 logging.error('creat table error: %s' % e) # 可能已经存在了 return False
def creat_table(self, table_name, all_columnFamily_name, *args): # 创建表格。all_columnFamily_name为['cf1','cf2']的形式 try: table_column = [] for columnFamily in all_columnFamily_name: column = ColumnDescriptor(name=columnFamily) # 定义列族 table_column.append(column) self.client.createTable(table_name, table_column) # 创建表 logging.info('creat table %s' % table_name) return True except Exception as e: logging.error('creat table error: %s' % e) # 可能已经存在了 if not self.client.isTableEnabled(table_name): # 如果存在,并且禁用了就启动他 self.client.enableTable(table_name) if self.exception: raise e return False
def __create_table(self, table, columns_name): ''' create table ''' try: if table not in self.tables: self.tables = self.client.getTableNames() if table not in self.tables: cols = [] for column_name in columns_name: col = ColumnDescriptor(name='%s:' % column_name, maxVersions=1) cols.append(col) try: self.client.createTable(table, cols) except Exception, e: print_plus(content=e, level=1) return HbaseManager.CreateError except Exception, e: print_plus(content=e, level=1) return HbaseManager.ConnectError
def create_table(self, table, *columns): #创建表 self.client.createTable( table, map(lambda column: ColumnDescriptor(column), columns))
""" 创建一个HBase表 https://cloud.tencent.com/document/product/589/12309 """ from thrift import Thrift from thrift.transport import TSocket, TTransport from thrift.protocol import TBinaryProtocol from hbase import Hbase from hbase.ttypes import ColumnDescriptor, Mutation, BatchMutation, TRegionInfo from hbase.ttypes import IOError, AlreadyExists socket = TSocket.TSocket(host='192.168.40.188', port=9090) socket.setTimeout(5000) transport = TTransport.TBufferedTransport(socket) protocol = TBinaryProtocol.TBinaryProtocol(transport) client = Hbase.Client(protocol) transport.open() new_table = ColumnDescriptor(name='cf:', maxVersions=1) client.createTable('thrift_test_1', [new_table]) tables = client.getTableNames() socket.close() print(tables)
TSocket(settings.hbase_thrift_host, settings.hbase_thrift_port)) transport.open() protocol = TBinaryProtocol.TBinaryProtocol(transport) client = Hbase.Client(protocol) # FIXME: should use another table instead of the working table. TABLE_NAME = "%s_item_similarities" % site_id print "About to Create the Table ..." if TABLE_NAME in client.getTableNames(): client.disableTable(TABLE_NAME) client.deleteTable(TABLE_NAME) client.createTable(TABLE_NAME, [ColumnDescriptor(name="p")] ) import md5 def doHash(id): return md5.md5(id).hexdigest() # TODO: maybe better use multiple-column way and a compressed way. and compare. def insertSimOneRow(): global last_item1, last_rows client.mutateRow(TABLE_NAME, doHash(last_item1), [Mutation(column="p:item_id1", value=last_item1), Mutation(column="p:mostSimilarItems", value=json.dumps(last_rows))])
# while(True): # result = client.scannerGet(scanid) # print(result) # if not result: # break rows_data = client.scannerGetList(scanid, 20) for data in rows_data: print(data) client.scannerClose(scanid) print( row_data ) #[TRowResult(columns={'address:province': TCell(timestamp=1543680680608L, value='yunnan')}, row='row01')] column01 = ColumnDescriptor( name='user_info' ) # ColumnDescriptor(bloomFilterType='NONE', bloomFilterNbHashes=0, name='user_info', maxVersions=3, blockCacheEnabled=False, inMemory=False, timeToLive=-1, bloomFilterVectorSize=0, compression='NONE') column02 = ColumnDescriptor('addr_info') #hbase好像不支持使用Python创建预分区表 # client.createTable('tablepy',[column01,column02]) # print(client) region_info = client.getTableRegions('tablepy') #查看表分区 table_info = client.getColumnDescriptors('tablepy') #查看表结构 print( region_info ) # [TRegionInfo(startKey='', endKey='', version=1, id=1543752131747L, name='tablepy,,1543752131747.ccfa71e67b9732adb575129bf9e560eb.')] print( table_info ) # {'addr_info:': ColumnDescriptor(bloomFilterType='NONE', bloomFilterNbHashes=0, name='addr_info:', maxVersions=3, blockCacheEnabled=False, inMemory=False, timeToLive=2147483647, bloomFilterVectorSize=0, compression='NONE'), 'user_info:': ColumnDescriptor(bloomFilterType='NONE', bloomFilterNbHashes=0, name='user_info:', maxVersions=3, blockCacheEnabled=False, inMemory=False, timeToLive=2147483647, bloomFilterVectorSize=0, compression='NONE')} #插入数据 mutation = Mutation(column='user_info:province', value='350000')
if __name__ == "__main__": """ 链接数据库 """ # server端地址和端口 host = "localhost" port = "9090" transport = TSocket.TSocket(host, port) # 可以设置超时 transport.setTimeout(5000) # 设置传输方式(TFramedTransport或TBufferedTransport) trans = TTransport.TBufferedTransport(transport) # 设置传输协议 protocol = TBinaryProtocol.TBinaryProtocol(trans) # 确定客户端 client = Hbase.Client(protocol) # 打开连接 transport.open() print "OK" """ """ # 获取表名 client.getTableNames() # 创建新表 _TABLE = "keyword" demo = ColumnDescriptor(name='data:', maxVersions=10) # 列族data能保留最近的10个数据,每个列名后面要跟:号 createTable(_TABLE, [demo])
print "[INFO] setup connection" transport = THttpClient.THttpClient("https://{0}:{1}".format( sys.argv[1], 9090)) client = Hbase.Client(TBinaryProtocol.TBinaryProtocol(transport)) table = 'test:thrift_proxy_demo' print "[INFO] start client" transport.open() print "[INFO] list the current tables" print client.getTableNames() print "[INFO] create a table, place some data" client.createTable(table, [ColumnDescriptor(name='family1:')]) client.mutateRow(table, 'row1', [ Mutation(column='family1:cq1', value='foo'), Mutation(column='family1:cq2', value='foo') ], {}) client.mutateRow(table, 'row2', [ Mutation(column='family1:cq1', value='bar'), Mutation(column='family1:cq2', value='bar') ], {}) client.mutateRow(table, 'row3', [ Mutation(column='family1:cq1', value='foo'), Mutation(column='family1:cq2', value='foo') ], {}) client.mutateRow(table, 'row4', [ Mutation(column='family1:cq1', value='bar'), Mutation(column='family1:cq2', value='bar')
def main(): socket = TSocket.TSocket('127.0.0.1', 9090) socket.setTimeout(5000) transport = TTransport.TBufferedTransport(socket) protocol = TBinaryProtocol.TBinaryProtocol(transport) client = Hbase.Client(protocol) socket.open() table_list = client.getTableNames() start = time.time() logging.basicConfig(format='%(asctime)s | %(levelname)s | %(message)s', level=logging.INFO, datefmt='%Y-%m-%d %H:%M:%S') logging.info('Initiating task: Taiwan Air Quality!') Attributes = [ 'AMB_TEMP', 'CO', 'NO', 'NO2', 'NOx', 'O3', 'PM10', 'PM2.5', 'RAINFALL', 'RAIN_COND', 'UVB', 'RH', 'SO2', 'WD_HR', 'WIND_DIREC', 'WIND_SPEED', 'WS_HR', 'CH4', 'NMHC', 'THC', 'PH_RAIN' ] csvfiles = [ filename for filename in os.listdir(os.getcwd()) if filename.endswith('.csv') ] logging.info(str(csvfiles)) InsertCounts = 0 for file in csvfiles: with open(file, newline='') as f: frames = csv.reader(f) table_Name = '' logging.info("Start reading {0}".format(file)) Column_Descriptors = [] ctr = 0 # length = sum(1 for row in frames) # # for frame in tqdm(frames, total=length): for frame in tqdm(frames): if ctr == 0: ctr += 1 continue elif ctr == 1: ctr += 1 table_Name = str(str.encode(frame[1], 'utf-8')).replace('\\', "") table_Name = table_Name.replace("b", "") table_Name = table_Name.replace("'", "") if table_Name not in table_list: for type in Attributes: Column_Descriptors.append( ColumnDescriptor(name=type)) client.createTable(table_Name, Column_Descriptors) logging.info('Build Table : {0}'.format(table_Name)) else: logging.info( 'Table {0} already exist, no need to create'. format(table_Name)) # ['2018/01/02', 'iilan', 'NOx', '5.1', '4.4', '3.5', '2.1', '2.5', '3.2', '4.6', '15', # '13', '11', '7', '6.8', '7.1', '13', '13', '12', '13', '16', '24', '23', '20', '24', '18', '13'] for i in range(3, 26): qualifier = i - 2 value = frame[i] row = frame[0] # date column = frame[2] # attr mutate = Mutation(column=column + ':' + str(qualifier), value=value) client.mutateRow(table_Name, frame[0], [mutate]) InsertCounts += 1 end = time.time() logging.info("================Insert Done================\n") logging.info("totalInsertCount: {0}, totalTimeSpend: {1}\n".format( InsertCounts, end - start)) logging.info(client.getTableNames())
def createTable(self, tableName, columnFamilies): columnFamilies = [ColumnDescriptor(_) for _ in columnFamilies] return self.client.createTable(tableName, columnFamilies)
def __createTable(self): col1 = ColumnDescriptor(name="person:", maxVersions=1) col2 = ColumnDescriptor(name="contents:", maxVersions=1) col3 = ColumnDescriptor(name="info:", maxVersions=1) self.client.createTable(self.tableName, [col1, col2, col3])
def __createTable(self): name = ColumnDescriptor(name='name') foo = ColumnDescriptor(name='foo') self.client.createTable(self.tableName, [name, foo])
transport = TBufferedTransport( TSocket(settings.hbase_thrift_host, settings.hbase_thrift_port)) transport.open() protocol = TBinaryProtocol.TBinaryProtocol(transport) client = Hbase.Client(protocol) # FIXME: should use another table instead of the working table. TABLE_NAME = "%s_item_similarities" % site_id print "About to Create the Table ..." if TABLE_NAME in client.getTableNames(): client.disableTable(TABLE_NAME) client.deleteTable(TABLE_NAME) client.createTable(TABLE_NAME, [ColumnDescriptor(name="p")]) import md5 def doHash(id): return md5.md5(id).hexdigest() # TODO: maybe better use multiple-column way and a compressed way. and compare. def insertSimOneRow(): global last_item1, last_rows client.mutateRow(TABLE_NAME, doHash(last_item1), [ Mutation(column="p:item_id1", value=last_item1),
def createTable(self): tables = self.client.getTableNames() columnFamily = ColumnDescriptor(name=self.columnFamily, maxVersions=1) self.client.createTable(self.tablename, [columnFamily])