Esempio n. 1
0
    def puts(self, rowKeys, qualifier, values):
        """ put sevel rows, `qualifier` is autoincrement

        :param rowKeys: a single rowKey
        :param values: values is a 2-dimension list, one piece element is [name, sex, age]
        :param qualifier: column family qualifier

        Usage::

        >>> HBaseTest('table').puts(rowKeys=[1,2,3],qualifier="name",values=[1,2,3])

        """

        mutationsBatch = []
        if not isinstance(rowKeys, list):
            rowKeys = [rowKeys] * len(values)

        for i, value in enumerate(values):
            mutations = []
            # for j, column in enumerate(value):
            if isinstance(value, str):
                value = value.encode('utf-8')
                m_name = Hbase.Mutation(column=(self.columnFamilies[0] + ':' +
                                                qualifier).encode('utf-8'),
                                        value=value)
            elif isinstance(value, int):
                m_name = Hbase.Mutation(column=(self.columnFamilies[0] + ':' +
                                                qualifier).encode('utf-8'),
                                        value=encode(value))
            mutations.append(m_name)
            mutationsBatch.append(
                Hbase.BatchMutation(row=rowKeys[i].encode('utf-8'),
                                    mutations=mutations))
        self.client.mutateRows(self.table, mutationsBatch, {})
Esempio n. 2
0
    def puts(self, rowKeys, values, qualifier='1'):
        """ put sevel rows, `qualifier` is autoincrement

       :param rowKeys: a single rowKey
       :param values: values is a 2-dimension list, one piece element is [name, sex, age]
       :param qualifier: column family qualifier

       Usage::

       >>> HBaseTest().puts('test', [['lee', 'f', '27'], ['clark', 'm', 27], ['dan', 'f', '27']])

       """
        mutationsBatch = []
        if not isinstance(rowKeys, list):
            rowKeys = [rowKeys] * len(values)

        for i, value in enumerate(values):
            mutations = []
            for j, column in enumerate(value):
                if isinstance(column, str):
                    m_name = Hbase.Mutation(column=self.columnFamilies[j] +
                                            ':' + qualifier,
                                            value=column)
                elif isinstance(column, int):
                    m_name = Hbase.Mutation(column=self.columnFamilies[j] +
                                            ':' + qualifier,
                                            value=encode(column))
                mutations.append(m_name)

            qualifier = str(int(qualifier) + 1)
            mutationsBatch.append(
                Hbase.BatchMutation(row=rowKeys[i], mutations=mutations))
        self.client.mutateRows(self.table, mutationsBatch, {})
Esempio n. 3
0
 def puts(self, rowkey, columnFamilies, values):
     mutationsBatch = []
     try:
         if not isinstance(rowkey, list):
             rowKeys = [rowkey] * len(values)
         for i, value in enumerate(values):
             mutations = []
             for j, column in enumerate(value):
                 if isinstance(column, str):
                     m_name = Hbase.Mutation(column=columnFamilies[j] +
                                             ':' + '0',
                                             value=column)
                 elif isinstance(column, int):
                     m_name = Hbase.Mutation(column=columnFamilies[j] +
                                             ':' + '0',
                                             value=encode(column))
                 mutations.append(m_name)
             mutationsBatch.append(
                 Hbase.BatchMutation(row=rowKeys[i], mutations=mutations))
         self.client.mutateRows(self.dbname, mutationsBatch)
         return True
     except (Hbase.IOError, Hbase.TException, Hbase.TApplicationException,
             Hbase.IllegalArgument) as e:
         logInfo('puts')
         logInfo(e)
         print(e)
     return False
Esempio n. 4
0
    def push(self, table):
        """
        扫描 MongoDB 全表,并把数据写入Hbase 中
        :param table:
        :return:
        """

        handle = RotatingFileHandler('./full_sync.log', maxBytes=50 * 1024 * 1024, backupCount=3)
        handle.setFormatter(logging.Formatter(
            '%(asctime)s %(name)-12s %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s'))

        logger = logging.getLogger(table)
        logger.addHandler(handle)
        logger.setLevel(logging.INFO)
        logger.info('开始推送 ' + table + ' !')

        db_name = table.split('.')[0]
        table_name = table.split('.')[1]

        client = pymongo.MongoClient(MONGODB_HOST, MONGODB_PORT, unicode_decode_error_handler='ignore')
        admin = client['admin']
        admin.authenticate(USER, PASSWORD)

        transport = TSocket.TSocket(THRIFT_IP, THRIFT_PORT)
        transport = TTransport.TBufferedTransport(transport)
        protocol = TBinaryProtocol.TBinaryProtocol(transport)
        thrift_client = Hbase.Client(protocol)
        transport.open()

        count = 0
        cursor = client[db_name][table_name].find().sort('$natural', pymongo.ASCENDING)
        for record in cursor:
            count += 1
            mutations = []
            # row_key的值为 md5(_id)[0:10]:_id
            _id = str(record['_id'])
            row_key = bytes(hashlib.md5(bytes(_id, encoding="utf-8")).hexdigest()[0:10] + ':' + _id, encoding="utf-8")
            for item in record:
                if item == '_id':
                    continue
                key = bytes('data:' + item, encoding="utf8")
                var = bytes(str(record[item]), encoding="utf8")
                # hbase.client.keyvalue.maxsize 默认是10M,超出这个值则设置为None
                if len(var) < 10 * 1024 * 1024:
                    mutations.append(Hbase.Mutation(column=key, value=var))
                else:
                    mutations.append(Hbase.Mutation(column=key, value=bytes(str(None), encoding="utf8")))

            thrift_client.mutateRow(bytes(table_name, encoding="utf8"), row_key, mutations, {})

            if count % 100000 == 0:
                if 'create_time' in record:
                    logger.info(table + ' 已经读出 ' + str(count / 10000) + ' 万条数据'
                                + '    ' + str(record['create_time']))
                else:
                    logger.info(table + ' 已经读出 ' + str(count / 10000) + ' 万条数据')

        client.close()
        transport.close()
Esempio n. 5
0
 def put(self, rowKey, qualifier='0', *args):
     mutations = []
     for j, column in enumerate(args):
         if isinstance(column, str):
             m_name = Hbase.Mutation(column=self.columnFamilies[j] + ':' +
                                     qualifier,
                                     value=column)
         elif isinstance(column, int):
             m_name = Hbase.Mutation(column=self.columnFamilies[j] + ':' +
                                     qualifier,
                                     value=encode(column))
             mutations.append(m_name)
         self.client.mutateRow(self.table, rowKey, mutations, {})
Esempio n. 6
0
    def write_hbase(self, data):
        """
        将数据写入 HBase, 注意,源ID会经过
        :param data: 对 HBase 的一个操作,比如
        {
            # 'i' 是插入, 'd' 是删除 (只能是 'i' 或 'd')
            'op': 'i',
            # 写入的 HBase 表
            'table_name': 'hb_charts',
            # 数据id
            '_id': '121314125_img2',
            # 写入的各个字段的值
            'columns': {
                'title' : 'This is a title'
            }
        }
        :return:
        """

        op = data['op']
        table_name = bytes(data['table_name'], "utf-8")
        #
        row_key = bytes(self.generate_rowkey(data['_id']), "utf-8")
        columns = data['columns'] if 'columns' in data else []

        if op == 'i':
            mutations = []
            for item in columns:
                if item == '_id':
                    continue
                key = bytes(self.cf + ':' + item, encoding="utf8")
                var = bytes(str(columns[item]), encoding="utf8")
                # hbase.client.keyvalue.maxsize 默认是10M,超出这个值则设置为None
                if len(var) < 10 * 1024 * 1024:
                    mutations.append(Hbase.Mutation(column=key, value=var))
                else:
                    mutations.append(
                        Hbase.Mutation(column=key,
                                       value=bytes(str(None),
                                                   encoding="utf8")))
                    self.logger.warning(self.getName() + ' ' +
                                        data['table_name'] + ' 的 _id为 ' +
                                        data['_id'] + ' 的数据的 ' + str(item) +
                                        ' 字段的值大小超过了' +
                                        ' HBase 默认规定的键值10M限制,先已经置 None 替代该值')
            self.client.mutateRow(table_name, row_key, mutations, {})
            self.logger.debug(str(QUEUE.qsize()) + ' 插入到 HBase ' + str(data))
        elif op == 'd':
            self.client.deleteAllRow(table_name, row_key, {})
            self.logger.debug(str(QUEUE.qsize()) + ' 删除到 HBase ' + str(data))
Esempio n. 7
0
def updateRowsForDebugging(table_name,rows):
    
    for row in rows:
        newrowid= '|'.join(['*****@*****.**',row.row.split('|')[1]])
        print newrowid
        mutatelist=list()    
        
        for col in row.columns.iteritems():
            if('master_info_cf:updateDate'== col[0]):
                mutatelist.append(Hbase.Mutation(column=col[0],value=rfc3339(time.time())))
#                print col[1].value
#                print rfc3339(time.time())
            else:
                mutatelist.append(Hbase.Mutation(column=col[0],value=col[1].value))

        print mutatelist
        client.mutateRow(table_name,newrowid,mutatelist)
        break
Esempio n. 8
0
    def putRow(self, rowName, rowKey, **keys):
        # 插入数据。如果在test表中row行cf:a列存在,将覆盖

        columns = []
        for k in keys:
            name = rowName + ':' + k
            value = keys[k]
            #必须要写 column=name, value=value 参数形式 不然要报错
            columns.append(Hbase.Mutation(column=name, value=value))

        self.client.mutateRow(self.tableName, rowKey, columns)
Esempio n. 9
0
 def put(self, rowkey, columnFamilies, *args):
     mutations = []
     try:
         for j, column in enumerate(args):
             if isinstance(column, str):
                 m_name = Hbase.Mutation(column=columnFamilies[j] + ':' +
                                         '0',
                                         value=column)
             elif isinstance(column, int):
                 m_name = Hbase.Mutation(column=columnFamilies[j] + ':' +
                                         '0',
                                         value=encode(column))
             mutations.append(m_name)
         self.client.mutateRow(self.dbname, rowkey, mutations)
         return True
     except (Hbase.IOError, Hbase.TException, Hbase.TApplicationException,
             Hbase.IllegalArgument) as e:
         logInfo('put')
         logInfo(e)
         print(e)
     return False
Esempio n. 10
0
 def delete_column(self, table, row_key, column):
     """ Deletes a column from a row in the table. If it's the last remaining column it will also delete the row.
     :param table: The name of the table
     :param row_key: The key of the row we want to put a value in
     :param column: The column name including the column family with the colon format, such as 'cf:count'
     :return: None
     """
     try:
         mutations = [Hbase.Mutation(column=column, isDelete=1)]
         self.client.mutateRow(table, row_key, mutations)
     except Thrift.TException, tx:
         print '%s' % tx.message
Esempio n. 11
0
def put_datas_from_hdfs():
    transport.open()
    count = 0
    with hdfs_client.read("/user/xiongz/data.txt") as file:
        lines = file.read().split("\n")
        for line in lines:
            list = line.split('|')
            name = list[0]
            sex = list[1]
            phoneNo = list[2]
            birthDay = list[3]
            address = list[4]
            answer = list[5]
            rowkey = list[6].strip()
            mutations = [
                Hbase.Mutation(column="person:name", value=name),
                Hbase.Mutation(column="person:sex", value=sex),
                Hbase.Mutation(column="person:phoneNo", value=phoneNo),
                Hbase.Mutation(column="person:birthDay", value=birthDay),
                Hbase.Mutation(column="person:address", value=address),
                Hbase.Mutation(column="content:answer", value=answer),
            ]
            client.mutateRow('l_test_table', rowkey, mutations)
            count = count + 1
            if count % 100 == 0:
                print count
            else:
                continue
    transport.close()
Esempio n. 12
0
    def puts(self, rowKeys, values, qualifier='1'):
        mutationsBatch = []
        if not isinstance(rowKeys, list):
            rowKeys = [rowKeys] * len(values)

        for i, value in enumerate(values):
            mutations = []
            for j, column in enumerate(value):
                if isinstance(column, str):
                    m_name = Hbase.Mutation(column=self.columnFamilies[j] +
                                            ':' + qualifier,
                                            value=column)
                elif isinstance(column, int):
                    m_name = Hbase.Mutation(column=self.columnFamilies[j] +
                                            ':' + qualifier,
                                            value=encode(column))
                mutations.append(m_name)

            qualifier = str(int(qualifier) + 1)
            mutationsBatch.append(
                Hbase.BatchMutation(row=rowKeys[i], mutations=mutations))
        self.client.mutateRows(self.table, mutationsBatch, {})
Esempio n. 13
0
 def put_row(self, table, row_key, column, value):
     """ Puts a value in a specific cell in Hbase based on table name, row key, and the full column name
     :param table:
     :param row_key: The key of the row we want to put a value in
     :param column: The column name including the column family with the colon format, such as 'cf:count'
     :param value: The array of bytes (using Python's string type) to insert as the value for this cell
     :return: None
     """
     try:
         mutations = [Hbase.Mutation(column=column, value=value)]
         self.client.mutateRow(table, row_key, mutations)
     except Thrift.TException, tx:
         print '%s' % tx.message
Esempio n. 14
0
    def put(self, rowKey, qualifier='0', *args):
        """ put one row

       :param *args: all values correspond to column families.
           e.g. [name, sex, age]

       Usage::

       >>> HBaseTest().put('test', '0', 'john', 'male', '95')

       """
        mutations = []
        for j, column in enumerate(args):
            if isinstance(column, str):
                m_name = Hbase.Mutation(column=self.columnFamilies[j] + ':' +
                                        qualifier,
                                        value=column)
            elif isinstance(column, int):
                m_name = Hbase.Mutation(column=self.columnFamilies[j] + ':' +
                                        qualifier,
                                        value=encode(column))
            mutations.append(m_name)
        self.client.mutateRow(self.table, rowKey, mutations, {})
Esempio n. 15
0
 def putByColumns(self, rowkey, args):
     mutations = []
     try:
         for key, value in args.items():
             if isinstance(value, bytes):
                 #       print (key,value)
                 value = value.decode()
                 m_name = Hbase.Mutation(column=key, value=value)
             elif isinstance(value, str):
                 m_name = Hbase.Mutation(column=key, value=value)
             elif isinstance(value, int):
                 m_name = Hbase.Mutation(column=key, value=encode(value))
             elif isinstance(value, datetime.datetime):
                 return False
             mutations.append(m_name)
         self.client.mutateRow(self.dbname, rowkey, mutations)
         return True
     except (Hbase.IOError, Hbase.TException, Hbase.TApplicationException,
             Hbase.IllegalArgument) as e:
         logInfo('putByColumns')
         logInfo(e)
         print(e)
     return False
Esempio n. 16
0
    def write_hbase(self, data):
        """
        将数据写入 Hbase
        :param data: 对 Hbase 的一个操作,比如
        {
            'op': 'i',  # i 是插入, d 是删除
            'table_name': 'hb_charts',
            '_id': '121314125_img2',
            'columns': {
                'title' : 'This is a title'
            }
        }
        :return:
        """

        op = data['op']
        table_name = bytes(data['table_name'], "utf-8")
        row_key = bytes(hashlib.md5(bytes(data['_id'], "utf-8")).hexdigest()[0:10] + ':' + data['_id'], "utf-8")
        columns = data['columns'] if 'columns' in data else []

        if op == 'i':
            mutations = []
            for item in columns:
                if item == '_id':
                    continue
                key = bytes('data:' + item, encoding="utf8")
                var = bytes(str(columns[item]), encoding="utf8")
                # hbase.client.keyvalue.maxsize 默认是10M,超出这个值则设置为None
                if len(var) < 10 * 1024 * 1024:
                    mutations.append(Hbase.Mutation(column=key, value=var))
                else:
                    mutations.append(Hbase.Mutation(column=key, value=bytes(str(None), encoding="utf8")))
            self.client.mutateRow(table_name, row_key, mutations, {})
            self.logger.info(str(QUEUE.qsize()) + ' 插入到 Hbase ' + str(data))
        elif op == 'd':
            self.client.deleteAllRow(table_name, row_key, {})
            self.logger.info(str(QUEUE.qsize()) + ' 删除到 Hbase ' + str(data))
Esempio n. 17
0
def execute():
    mutationsbatch = []
    mutations_attributes = {}

    sock = TSocket.TSocket(thriftServer, thriftPort)
    transport = TTransport.TSaslClientTransport(sock, thriftServer,
                                                saslServiceName)
    #protocol = TCompactProtocol.TCompactProtocol(transport)
    protocol = TBinaryProtocol.TBinaryProtocol(transport)
    client = Hbase.Client(protocol)
    transport.open()

    mutations = [
        Hbase.Mutation(column="c:coluna1", value='Texto da coluna 1'),
        Hbase.Mutation(column="c:coluna2", value='Texto da coluna 2')
    ]
    row_key = '00001'

    mutationsbatch.append(Hbase.BatchMutation(row=row_key,
                                              mutations=mutations))

    client.mutateRows(tablename, mutationsbatch, mutations_attributes)

    print('OK')

    del mutations
    del mutationsbatch

    mutationsbatch = []

    transport.close()

    del client
    del protocol
    del transport
    del sock
Esempio n. 18
0
 def putByColumn(self, rowkey, arg):
     mutations = []
     try:
         key = arg.key()
         value = arg[key]
         m_name = Hbase.Mutation(column=key, value=value)
         mutations.append(m_name)
         self.client.mutateRow(self.dbname, rowkey, mutations)
         return True
     except (Hbase.IOError, Hbase.TException, Hbase.TApplicationException,
             Hbase.IllegalArgument) as e:
         logInfo('putByColumn')
         logInfo(e)
         print(e)
     return False
Esempio n. 19
0
    def put(self, rowKey, qualifier, value):
        """
        put one row
        column is column name,value is column value
        :param rowKey: rowKey
        :param column: column name
        :param value: column value
        :description: HbaseApi(table).put('rowKey','column','value')
        """

        rowKey = rowKey.encode('utf-8')
        mutations = []
        # for j, column in enumerate(column):
        if isinstance(value, str):
            value = value.encode('utf-8')
            m_name = Hbase.Mutation(column=(self.columnFamilies[0] + ':' +
                                            qualifier).encode('utf-8'),
                                    value=value)
        elif isinstance(value, int):
            m_name = Hbase.Mutation(column=(self.columnFamilies[0] + ':' +
                                            qualifier).encode('utf-8'),
                                    value=encode(value))
        mutations.append(m_name)
        self.client.mutateRow(self.table, rowKey, mutations, {})
Esempio n. 20
0
def updateUUID(table_name):

    coldesc = client.getColumnDescriptors(table_name)

    desc_name,desc = coldesc.items()[0]

    print desc_name

    scanner = client.scannerOpen(table_name,'',[desc_name])

    counter=0
    indexOfUUID=0
    try:
        while True:
            result=client.scannerGet(scanner)
            #print result[0].row
            fields=dict()
            
            valid = False
            
            for k,v in result[0].columns.items():
                fields[k]= v.value
                if(k == "email_info_family:visit_open_count"):
                    print result[0].row
                    valid = True
            
            if(valid):
                fields["email_info_family:lyrisUUID"] = uuids[indexOfUUID];
                indexOfUUID= indexOfUUID + 1
                
                
                mutatelist = list()
                
                for k,v in fields.iteritems():                    
                    mutatelist.append(Hbase.Mutation(column=k,value=v))
                
                print mutatelist        
                client.mutateRow(table_name,result[0].row,mutatelist)
            
           
            counter = counter + 1
            if((counter%1000)==0):
                print "scanning....%d" %(counter)
    except:
        pass
    
    client.scannerClose(scanner)
Esempio n. 21
0
def updateColumn(table_name):

    coldesc = client.getColumnDescriptors(table_name)

    desc_name, desc = coldesc.items()[0]

    print desc_name

    scanner = client.scannerOpen(table_name, '', [desc_name])

    counter = 0
    try:
        while True:
            result = client.scannerGet(scanner)

            fields = dict()

            valid = False

            for k, v in result[0].columns.items():
                fields[k] = v.value
                if (k == "master_info_cf:optIn" and v.value != 1):
                    fields[k] = 1
                    valid = True

            if (valid):
                mutatelist = list()

                #for k,v in fields.iteritems():
                #    mutatelist.append(Hbase.Mutation(column=k,value=v))

                mutatelist.append(
                    Hbase.Mutation(column="master_info_cf:optIn", value="1"))
                print result[0].row
                client.mutateRow(table_name, result[0].row, mutatelist)

            counter = counter + 1
            if ((counter % 1000) == 0):
                print "scanning....%d" % (counter)

    except:
        pass

    client.scannerClose(scanner)
def columnProcess(table_name):
    coldesc = client.getColumnDescriptors(table_name)

    desc_name, desc = coldesc.items()[0]

    #print desc_name

    scanner = client.scannerOpen(table_name, '', [desc_name])

    try:
        while True:
            result = client.scannerGet(scanner)

            row = result[0].columns
            row_key = result[0].row

            email = ""
            emailDomain = ""

            for col, val in row.iteritems():
                if (col == "master_info_cf:email"):
                    email = val.value
                elif (col == "master_info_cf:emailDomain"):
                    emailDomain = val.value

            if len(emailDomain) == 0:
                if len(email) == 0:
                    print "There are risky row exist"
                else:
                    two = email.split('@')
                    if len(two) == 2:
                        print row_key
                        try:
                            column = "%semailDomain" % (desc_name)
                            client.mutateRow(
                                table_name, row_key,
                                [Hbase.Mutation(column=column, value=two[1])])
                        except:
                            print "Error Mutation: %s" % (row_key)

        client.scannerClose(scanner)
    except:
        pass
Esempio n. 23
0
def main(args):
    #    getColumnInfo('visitor_by_hour')
    #    getColumnInfo('email_by_hour')

    #    getColumnInfo('lyris_uptilt_master_lyris')
    table_name = 'lyris_uptiltallin1002_master_lyris'
    #    table_name='lyris_fulcrumtech_master_lyris'
    #    table_name='visitor_by_hour'

    #    rows=getUniqRow(table_name)
    #    print(len(rows.items()))

    #    ret=getRowsLimit(table_name,3)
    #   printRowsResult(ret)
    #    updateRowsForDebugging(table_name,ret)

    client.mutateRow(table_name, '[email protected]|079678-13795157-000', [
        Hbase.Mutation(column='master_info_cf:webSiteType', value='Commercial')
    ])
Esempio n. 24
0
def insert(table_name, row):
    """
    向表中插入一条数据
    :param table_name: 表名
    :param row: 一条数据, 格式为{'row_key':'ad97c74a38b6','cf1:field1':'data...', 'cf2:field1':'data...'}
    :return:
    """

    row_key = bytes(row['row_key'], encoding='utf-8') if isinstance(row['row_key'], str) else row['row_key']
    table_name = bytes(table_name, encoding='utf-8') if isinstance(table_name, str) else table_name
    mutations = []
    for item in row:
        if item != 'row_key':
            key = bytes(item, encoding="utf8")
            var = bytes(str(row[item]), encoding="utf8")
            # hbase.client.keyvalue.maxsize 默认是10M,超出这个值则设置为None
            if len(var) < 10 * 1024 * 1024:
                mutations.append(Hbase.Mutation(column=key, value=var))
            else:
                raise IllegalArgument("row_key: " + row['row_key'] + ' 的数据的 ' + item + ' 字段的值大小超过了10M ')
    client.mutateRow(table_name, row_key, mutations, {})
Esempio n. 25
0
    def put_Item(self, item):
        #将scrapy产生的Item存入hbase
        columnFamily = self.columnFamilies[0]

        rowKey = item.get('url', 'not set').encode('utf8')
        if rowKey == 'not set':
            return

        mutation = []
        for label in item.keys():
            val = item.get(label).encode('utf8')
            if label == 'dtype':
                label = 'type'

            mutation.append(
                Hbase.Mutation(column=columnFamily + label, value=val))

        if len(mutation) == 0:
            return

        self.client.mutateRow(self.table, rowKey, mutation, {})
def change_label(request):
    print(request.body)
    dict_from_req = json.loads(request.body.decode(encoding='utf-8'))
    new_label = dict_from_req['new_label']
    ind = dict_from_req['index']
    response_data = {'modified_label': new_label}

    # Connect to HBase Thrift server
    host = 'ai-master.sh.intel.com'
    port = 9090
    transport = TTransport.TBufferedTransport(TSocket.TSocket(host, port))
    protocol = TBinaryProtocol.TBinaryProtocol(transport)
    client = Hbase.Client(protocol)
    transport.open()
    # add/update rows with thrift
    mutations = [Hbase.Mutation(column='label:', value=new_label)]
    client.mutateRow('drug', ind, mutations)
    transport.close()

    return HttpResponse(json.dumps(response_data),
                        content_type='application/json',
                        status=200)
Esempio n. 27
0
def getUpdateColumn(table_name, column):

    coldesc = client.getColumnDescriptors(table_name)

    desc_name, desc = coldesc.items()[0]

    print desc_name

    scanner = client.scannerOpen(table_name, '', [desc_name])

    counter = 0

    try:
        while True:
            mutatelist = list()
            result = client.scannerGet(scanner)
            #print result[0]
            #print result[0].row

            for col, value in result[0].columns.items():

                if (col == "email_info_family:lyris_uuid"):
                    continue

                mutatelist.append(Hbase.Mutation(column=col,
                                                 value=value.value))

            client.mutateRow(table_name, result[0].row, mutatelist)
            client.d
            counter = counter + 1
            if ((counter % 1000) == 0):
                print "scanning....%d" % (counter)

    except:
        pass

    return True
Esempio n. 28
0
def put_batch_from_local(table_name='l_test_table', batch_size=500):
    transport.open()
    mutations_batch = []
    with open('./data.txt') as file:
        lines = file.readlines()
        len_file = len(lines)
        print 'len_file : ' + str(len_file)
        i = 0
        for line in lines:
            i = i + 1
            list = line.split('|')
            name = list[0]
            sex = list[1]
            phoneNo = list[2]
            birthDay = list[3]
            address = list[4]
            answer = list[5]
            rowkey = list[6].strip()
            mutations = [
                Hbase.Mutation(column="person:name", value=name),
                Hbase.Mutation(column="person:sex", value=sex),
                Hbase.Mutation(column="person:phoneNo", value=phoneNo),
                Hbase.Mutation(column="person:birthDay", value=birthDay),
                Hbase.Mutation(column="person:address", value=address),
                Hbase.Mutation(column="content:answer", value=answer),
            ]
            mutations_batch.append(
                BatchMutation(row=rowkey, mutations=mutations))
            if batch_size >= len_file - i:
                print "start last mutateRows ----- i: " + str(i)
                client.mutateRows(table_name, mutations_batch)
                mutations_batch = []
            elif len(mutations_batch
                     ) % batch_size == 0 and batch_size <= len_file - i:
                print "start process mutateRows ----- i: " + str(i)
                client.mutateRows(table_name, mutations_batch)
                mutations_batch = []
            else:
                continue
    transport.close()
Esempio n. 29
0
    tableName = 'AdventureWorks_Person_Contact'
    tableExists = False
    tableNames = client.getTableNames()
    for table in tableNames:
        if table == tableName:
            tableExists = True
    if tableExists != True:
        columnFamilies = []
        columnFamilies.append(Hbase.ColumnDescriptor(name='info'))
        columnFamilies.append(Hbase.ColumnDescriptor(name='contact'))
        columnFamilies.append(Hbase.ColumnDescriptor(name='others'))
        client.createTable(tableName, columnFamilies)
        mutationsbatch = []
        mutations = [
            Hbase.Mutation(column='info:FULLNAME', value='Gustavo Achong'),
            Hbase.Mutation(column='info:AGE', value='38'),
            Hbase.Mutation(column='contact:EMAILID',
                           value='*****@*****.**'),
            Hbase.Mutation(column='contact:PHONE', value='398-555-0132'),
            Hbase.Mutation(column='others:MODIFIEDDATE',
                           value='5/16/2005 4:33:33 PM')
        ]
        mutationsbatch.append(Hbase.BatchMutation(row='1',
                                                  mutations=mutations))
        mutations = [
            Hbase.Mutation(column='info:FULLNAME', value='Catherine Abel'),
            Hbase.Mutation(column='info:AGE', value='36'),
            Hbase.Mutation(column='contact:EMAILID',
                           value='*****@*****.**'),
            Hbase.Mutation(column='contact:PHONE', value='747-555-0171'),
Esempio n. 30
0
def safe_to_hbase():
    hbase_client = Hbase.Client(protocol)
    transport.open()
    result_list, result_recived_list = recived_process(str)
    rowkey = hashlib.md5(result_list[0]).hexdigest() + datetime.datetime.now().strftime('%Y%m%d%H%M%S') \
             + result_list[0]
    recived_time = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
    result_list_result = [
        0,
        1,
        Hbase.Mutation(column="recived:result_1", value=result_list[2]),
        Hbase.Mutation(column="recived:result_2", value=result_list[3]),
        Hbase.Mutation(column="recived:result_3", value=result_list[4]),
        Hbase.Mutation(column="recived:result_4", value=result_list[5]),
        Hbase.Mutation(column="recived:result_5", value=result_list[6]),
        Hbase.Mutation(column="recived:result_6", value=result_list[7]),
        Hbase.Mutation(column="recived:result_7", value=result_list[8]),
        Hbase.Mutation(column="recived:result_8", value=result_list[9]),
        Hbase.Mutation(column="recived:result_9", value=result_list[10]),
        Hbase.Mutation(column="recived:result_10", value=result_list[11]),
    ]

    mutations = [
        Hbase.Mutation(column="recived:url_send", value=result_list[0]),
        Hbase.Mutation(column="recived:status", value=result_list[1]),
        Hbase.Mutation(column="recived:time_recived", value=recived_time),
    ]
    if len(result_recived_list) > 2:
        print len(result_recived_list)
        i = 2
        while i <= len(result_recived_list) - 1:
            mutations.append(result_list_result[i])
            print mutations
            i = i + 1

    hbase_client.mutateRow(HBASE_TABLE_NAME, rowkey, mutations)
    transport.close()