コード例 #1
0
ファイル: HbaseOperate.py プロジェクト: ZhangTiny1703/Hbase
    def scanner(self, numRows=100, startRow=None, stopRow=None):
        scan = Hbase.TScan(startRow, stopRow)
        scannerId = self.client.scannerOpenWithScan(self.table, scan, {})
        #        row = self.client.scannerGet(scannerId)

        ret = []
        rowList = self.client.scannerGetList(scannerId, numRows)
        while rowList:
            for r in rowList:
                rd = {'row': r.row}
                for k, v in r.columns.iteritems():
                    cf, qualifier = k.split(':')
                    if qualifier not in rd:
                        rd[qualifier] = {}

                    idx = self.columnFamilies.index(cf)
                    if self.columnFamiliesType[idx] == str:
                        rd[qualifier].update({cf: v.value})
                    elif self.columnFamiliesType[idx] == int:
                        rd[qualifier].update({cf: decode(v.value)})

                ret.append(rd)

            rowList = self.client.scannerGetList(scannerId, numRows)

        self.client.scannerClose(scannerId)
        return ret
コード例 #2
0
ファイル: source_hbase.py プロジェクト: wshuyi/miner
    def get_statuses(self, uid):
        key_beg = pack_mid(uid, 0)
        key_end = pack_mid(uid, 0x7fffffffffffffff)
        scan = Hbase.TScan(startRow=key_beg, stopRow=key_end)

        client = self._get_client()
        scanner = client.scannerOpenWithScan(self.cfg['table_status'], scan,
                                             None)

        i = 0
        while True:
            i += 1
            row_list = client.scannerGetList(scanner, i)
            if not row_list:
                break

            for row in row_list:
                (status, repost) = load_status(row.columns)
                if status is not None:
                    status.__dict__.pop('batches')
                    ret = {}
                    ret.update(status.__dict__)
                    if repost is not None:
                        repost.__dict__.pop('batches')
                        ret['retweeted_status'] = repost.__dict__
                yield ret

        client.scannerClose(scanner)
コード例 #3
0
 def scanWithKeyword(self, __filter):
     scan = Hbase.TScan()
     #print "ValueFilter(=,'substring:%s')" %(__filter)
     scan.columns = ['content:0']
     scan.filterString = "ValueFilter(=,'substring:%s')" % (__filter)
     scannerId = self.client.scannerOpenWithScan(self.table, scan, {})
     result = self.client.scannerGetList(scannerId, 100)
     return result
コード例 #4
0
 def from_crawler(cls, crawler):
     # cls.http_proxies = crawler.settings.get('HTTP_PROXIES', False)
     # if not cls.http_proxies:
     #     raise NotConfigured
     host = crawler.settings.get('HBASE_HOST')
     port = crawler.settings.get('HBASE_PORT')
     table = crawler.settings.get('PROXY_TABLE')
     # cls.stats = crawler.stats
     cls.hbase = HbaseWrapper(host, port, table)
     cls.mutex = thread.allocate_lock()
     cls.timeout = crawler.settings.get('PROXIES_TIMEOUT')
     cls.tscan = Hbase.TScan(columns=['cf:0'], caching=True, batchSize=20)
     cls._get_proxies()
     s = cls()
     crawler.signals.connect(s.spider_closed, signal=signals.spider_closed)
     return s
コード例 #5
0
    def scanner(self, numRows=100, startRow=None, stopRow=None):
        """

        :param numRows:
        :param startRow:
        :param stopRow:
        :return:
        """
        scan = Hbase.TScan(startRow, stopRow)
        scannerId = self.client.scannerOpenWithScan(self.table, scan, {})

        ret = []
        rowList = self.client.scannerGetList(scannerId, numRows)

        for r in rowList:
            rd = {}
            row = r.row.decode('utf-8')
            value = (r.columns[b'info:name'].value).decode('utf-8')
            rd[row] = value
            # print ('the row is ',r.row.decode('utf-8'))
            # print ('the value is ',(r.columns[b'info:name'].value).decode('utf-8'))
            ret.append(rd)

        return ret
コード例 #6
0
ファイル: Client.py プロジェクト: ds112/hbase-on-windows
        mutationsbatch.append(Hbase.BatchMutation(row='9',
                                                  mutations=mutations))
        mutations = [
            Hbase.Mutation(column='info:FULLNAME', value='Ronald Adina'),
            Hbase.Mutation(column='info:AGE', value='41'),
            Hbase.Mutation(column='contact:EMAILID',
                           value='*****@*****.**'),
            Hbase.Mutation(column='contact:PHONE', value='453-555-0165'),
            Hbase.Mutation(column='others:MODIFIEDDATE',
                           value='5/16/2005 4:33:33 PM')
        ]
        mutationsbatch.append(
            Hbase.BatchMutation(row='10', mutations=mutations))
        client.mutateRows(tableName, mutationsbatch, None)

    scan = Hbase.TScan(startRow=None, stopRow=None)
    scannerId = client.scannerOpenWithScan(tableName, scan, None)
    scanValues = client.scannerGet(scannerId)
    if len(scanValues) == 1:
        while len(scanValues) == 1:
            for row in scanValues:
                print '\n'
                print '%s' % (row.row),
                column = row.columns
                for values in column:
                    print '%s' % (row.columns.get(values).value),
                scanValues = client.scannerGet(scannerId)
    client.scannerClose(scannerId)

    transport.close()
except Thrift.TException, tx:
コード例 #7
0
rows = client.getRow(tablename, "shakespeare-comedies-000001")

# Do a pull on a single row
for row in rows:
    # Pull out values in cell
    message = row.columns.get(messagecolumncf).value
    username = row.columns.get(usernamecolumncf).value
    linenumber = decode(row.columns.get(linenumbercolumncf).value)

    rowKey = row.row

    print("Got row: " + rowKey + ":" + str(linenumber) + ":" + username + ":" +
          message)

# Open a scan over all comedy rows in Shakespeare
scan = Hbase.TScan(startRow="shakespeare-comedies-000001",
                   stopRow="shakespeare-comedies-999999")
scannerId = client.scannerOpenWithScan(tablename, scan)

# Go through every row passed back by scanner
row = client.scannerGet(scannerId)

# Go through every row passed back by scanner
rowList = client.scannerGetList(scannerId, numRows)

while rowList:
    for row in rowList:
        # Pull out values in columns
        message = row.columns.get(messagecolumncf).value
        username = row.columns.get(usernamecolumncf).value
        linenumber = decode(row.columns.get(linenumbercolumncf).value)