Esempio n. 1
0
def picarus_loader(prefix, dataset, email, picarus_server, api_key=None, login_key=None, otp=None, download=False, test=False, verbose=False):
    import hadoopy_hbase
    import picarus
    dataset = DATASETS[dataset]()
    if download:
        dataset.download()
    if otp:
        api_key = picarus.PicarusClient(email=email, login_key=login_key, server=picarus_server).auth_yubikey(otp)['apiKey']
    if api_key is None:
        raise ValueError('api_key or login_key/otp must be set!')
    client = picarus.PicarusClient(email=email, api_key=api_key, server=picarus_server, max_attempts=10)
    for split, name, columns in dataset.images():
        row = hadoopy_hbase.hash_key(name, prefix=prefix + split, suffix=name, hash_bytes=4)
        if verbose:
            print('row[%r] len(data:image)[%d]' % (repr(row), len(columns.get('data:image', ''))))
        client.patch_row(TABLE, row, columns)
        if test:
            remote_columns = client.get_row(TABLE, row)
            if remote_columns != columns:
                print(remote_columns)
                print(columns)
                print({x: len(y) for x, y in remote_columns.items()})
                print({x: len(y) for x, y in columns.items()})
                assert remote_columns == columns
            client.delete_row(TABLE, row)
Esempio n. 2
0
def picarus_loader(prefix, dataset, email, picarus_server, api_key=None, login_key=None, otp=None, download=False, test=False, verbose=False):
    import hadoopy_hbase
    import picarus
    dataset = DATASETS[dataset]()
    if download:
        dataset.download()
    if otp:
        api_key = picarus.PicarusClient(email=email, login_key=login_key, server=picarus_server).auth_yubikey(otp)['apiKey']
    if api_key is None:
        raise ValueError('api_key or login_key/otp must be set!')
    client = picarus.PicarusClient(email=email, api_key=api_key, server=picarus_server, max_attempts=10)
    for split, name, columns in dataset.images():
        row = hadoopy_hbase.hash_key(name, prefix=prefix + split, suffix=name, hash_bytes=4)
        if verbose:
            print('row[%r] len(data:image)[%d]' % (repr(row), len(columns.get('data:image', ''))))
        client.patch_row(TABLE, row, columns)
        if test:
            remote_columns = client.get_row(TABLE, row)
            if remote_columns != columns:
                print(remote_columns)
                print(columns)
                print({x: len(y) for x, y in remote_columns.items()})
                print({x: len(y) for x, y in columns.items()})
                assert remote_columns == columns
            client.delete_row(TABLE, row)
Esempio n. 3
0
def hbase_loader(prefix, dataset, thrift_server, thrift_port, verbose=False):
    import hadoopy_hbase
    dataset = DATASETS[dataset]()
    client = hadoopy_hbase.connect(thrift_server, thrift_port)
    for split, name, columns in dataset.images():
        row = hadoopy_hbase.hash_key(name, prefix=prefix + split, suffix=name, hash_bytes=4)
        if verbose:
            print(repr(row))
        mutations = [hadoopy_hbase.Mutation(column=x, value=y) for x, y in columns.items()]
        client.mutateRow(TABLE, row, mutations)
Esempio n. 4
0
def hbase_loader(prefix, dataset, thrift_server, thrift_port, verbose=False):
    import hadoopy_hbase
    dataset = DATASETS[dataset]()
    client = hadoopy_hbase.connect(thrift_server, thrift_port)
    for split, name, columns in dataset.images():
        row = hadoopy_hbase.hash_key(name, prefix=prefix + split, suffix=name, hash_bytes=4)
        if verbose:
            print(repr(row))
        mutations = [hadoopy_hbase.Mutation(column=x, value=y) for x, y in columns.items()]
        client.mutateRow(TABLE, row, mutations)
Esempio n. 5
0
import hadoopy_hbase
import glob
import os
import json

c = hadoopy_hbase.connect()
table_name = "images"
# c.createTable(table_name, [hadoopy_hbase.ColumnDescriptor('data:'),
#                           hadoopy_hbase.ColumnDescriptor('meta:')])
for x in glob.glob("/mnt/brandyn_extra/goodlogo_entity_images/*"):
    entity = os.path.basename(x)
    for y in glob.glob(x + "/*"):
        fn = os.path.basename(y)
        print((entity, fn))
        ms = [
            hadoopy_hbase.Mutation(column="data:image", value=open(y).read()),
            hadoopy_hbase.Mutation(column="meta:class", value=entity),
            hadoopy_hbase.Mutation(column="meta:file", value=fn),
        ]
        entity_fn = "%s/%s" % (entity, fn)
        row = hadoopy_hbase.hash_key(entity_fn, prefix="logos:good", suffix=entity_fn, hash_bytes=4)
        c.mutateRow(table_name, row, ms)
# c.majorCompact(table_name)