def picarus_loader(prefix, dataset, email, picarus_server, api_key=None, login_key=None, otp=None, download=False, test=False, verbose=False): import hadoopy_hbase import picarus dataset = DATASETS[dataset]() if download: dataset.download() if otp: api_key = picarus.PicarusClient(email=email, login_key=login_key, server=picarus_server).auth_yubikey(otp)['apiKey'] if api_key is None: raise ValueError('api_key or login_key/otp must be set!') client = picarus.PicarusClient(email=email, api_key=api_key, server=picarus_server, max_attempts=10) for split, name, columns in dataset.images(): row = hadoopy_hbase.hash_key(name, prefix=prefix + split, suffix=name, hash_bytes=4) if verbose: print('row[%r] len(data:image)[%d]' % (repr(row), len(columns.get('data:image', '')))) client.patch_row(TABLE, row, columns) if test: remote_columns = client.get_row(TABLE, row) if remote_columns != columns: print(remote_columns) print(columns) print({x: len(y) for x, y in remote_columns.items()}) print({x: len(y) for x, y in columns.items()}) assert remote_columns == columns client.delete_row(TABLE, row)
def hbase_loader(prefix, dataset, thrift_server, thrift_port, verbose=False): import hadoopy_hbase dataset = DATASETS[dataset]() client = hadoopy_hbase.connect(thrift_server, thrift_port) for split, name, columns in dataset.images(): row = hadoopy_hbase.hash_key(name, prefix=prefix + split, suffix=name, hash_bytes=4) if verbose: print(repr(row)) mutations = [hadoopy_hbase.Mutation(column=x, value=y) for x, y in columns.items()] client.mutateRow(TABLE, row, mutations)
import hadoopy_hbase import glob import os import json c = hadoopy_hbase.connect() table_name = "images" # c.createTable(table_name, [hadoopy_hbase.ColumnDescriptor('data:'), # hadoopy_hbase.ColumnDescriptor('meta:')]) for x in glob.glob("/mnt/brandyn_extra/goodlogo_entity_images/*"): entity = os.path.basename(x) for y in glob.glob(x + "/*"): fn = os.path.basename(y) print((entity, fn)) ms = [ hadoopy_hbase.Mutation(column="data:image", value=open(y).read()), hadoopy_hbase.Mutation(column="meta:class", value=entity), hadoopy_hbase.Mutation(column="meta:file", value=fn), ] entity_fn = "%s/%s" % (entity, fn) row = hadoopy_hbase.hash_key(entity_fn, prefix="logos:good", suffix=entity_fn, hash_bytes=4) c.mutateRow(table_name, row, ms) # c.majorCompact(table_name)