def __init__(self, thrift=None): self.image_orig_column = 'data:image' self.image_column = 'data:image_320' self.images_table = 'images' self.models_table = 'picarus_models' self.hb = thrift if thrift is not None else hadoopy_hbase.connect() self.max_cell_size = 10 * 1024 * 1024 # 10MB # Feature Settings self.superpixel_column = 'feat:superpixel' # Feature Hasher settings self.hb = hadoopy_hbase.connect() # Feature Classifier settings self.feature_classifier_row = self.images_table # Mask Hasher settings self.texton_classes = json.load(open('class_colors.js')) self.texton_num_classes = len(self.texton_classes) # Index Settings self.class_column = 'meta:class_2' self.indoor_class_column = 'meta:class_0' self.num_mappers = 6 #self.versions = self.get_versions() # Model columns self.model_chunks_column = 'data:model_chunks' self.model_column = 'data:model' self.input_column = 'data:input' self.input_type_column = 'data:input_type' self.output_type_column = 'data:output_type' self.model_type_column = 'data:model_type' self.creation_time_column = 'data:creation_time' self.notes_column = 'data:notes' self.name_column = 'data:name' self.tags_column = 'data:tags' self.factory_info_column = 'data:factory_info'
def _setup(start_stop_rows, inputs): thrift = hadoopy_hbase.connect() # TODO: Need to pass in thrift server/port manager = PicarusManager(thrift=thrift) slices = [base64.b64encode(start_row) + ',' + base64.b64encode(stop_row) for start_row, stop_row in start_stop_rows] os.nice(5) # These are background tasks, don't let the CPU get too crazy return thrift, manager, slices, {k: base64.b64encode(v) for k, v in inputs.items()}
def main(): parser = argparse.ArgumentParser(description='Picarus user operations') parser.add_argument('--thrift_server', default='localhost') parser.add_argument('--thrift_port', default='9090') ARGS = parser.parse_args() hb = hadoopy_hbase.connect(ARGS.thrift_server, ARGS.thrift_port) hb.createTable('videos', [ hadoopy_hbase.ColumnDescriptor('data:', maxVersions=1), hadoopy_hbase.ColumnDescriptor('meta:', maxVersions=1), hadoopy_hbase.ColumnDescriptor('pred:', maxVersions=1), hadoopy_hbase.ColumnDescriptor('thum:', maxVersions=1), hadoopy_hbase.ColumnDescriptor( 'feat:', maxVersions=1, compression='SNAPPY'), hadoopy_hbase.ColumnDescriptor('hash:', maxVersions=1) ]) hb.createTable('models', [ hadoopy_hbase.ColumnDescriptor( 'data:', maxVersions=1, compression='SNAPPY'), hadoopy_hbase.ColumnDescriptor('meta:', maxVersions=1), hadoopy_hbase.ColumnDescriptor('user:'******'images', [ hadoopy_hbase.ColumnDescriptor('data:', maxVersions=1), hadoopy_hbase.ColumnDescriptor('meta:', maxVersions=1), hadoopy_hbase.ColumnDescriptor('pred:', maxVersions=1), hadoopy_hbase.ColumnDescriptor('thum:', maxVersions=1), hadoopy_hbase.ColumnDescriptor( 'feat:', maxVersions=1, compression='SNAPPY'), hadoopy_hbase.ColumnDescriptor('hash:', maxVersions=1) ])
def __init__(self, table, col, db=None, **kw): if db is None: self._db = hadoopy_hbase.connect(**kw) else: self._db = db self._table = table self._col = col
def test_hbase(self): import hadoopy_hbase client = hadoopy_hbase.connect() try: client.createTable('testtable', [hadoopy_hbase.ColumnDescriptor('colfam1:')]) except: pass for x in xrange(100): client.mutateRow('testtable', str(x), [ hadoopy_hbase.Mutation(column='colfam1:col%d' % y, value=str(x)) for y in range(10) ]) ds = data_sources.HBaseDataSource({'mydata': 'colfam1:col0'}, 'testtable') print list(ds.rows()) print list(ds.columns(list(ds.rows())[0])) print list(ds.column_values(list(ds.rows())[0])) print[(x, list(y)) for x, y in ds.row_columns()] print[(x, dict(y)) for x, y in ds.row_column_values()] print ds.uri ds = data_sources.data_source_from_uri(ds.uri) print list(ds.rows()) print list(ds.columns(list(ds.rows())[0])) print list(ds.column_values(list(ds.rows())[0])) print[(x, list(y)) for x, y in ds.row_columns()] print[(x, dict(y)) for x, y in ds.row_column_values()] print ds.uri
def __init__(self, table, row, cf, db=None, **kw): if db is None: self._db = hadoopy_hbase.connect(**kw) else: self._db = db self._table = table self._row = row self._cf = cf + ':'
def __init__(self, server, port, *args, **kw): if hasattr(self, 'args'): self.args += [server, port] else: self.args = [server, port] self._thrift = hadoopy_hbase.connect(server, port, timeout=300000) self.num_mappers = 6 super(HBaseDB, self).__init__(*args, **kw)
def hbase_loader(prefix, dataset, thrift_server, thrift_port, verbose=False): import hadoopy_hbase dataset = DATASETS[dataset]() client = hadoopy_hbase.connect(thrift_server, thrift_port) for split, name, columns in dataset.images(): row = hadoopy_hbase.hash_key(name, prefix=prefix + split, suffix=name, hash_bytes=4) if verbose: print(repr(row)) mutations = [hadoopy_hbase.Mutation(column=x, value=y) for x, y in columns.items()] client.mutateRow(TABLE, row, mutations)
def __init__(self): self.image_orig_column = 'data:image' self.image_column = 'data:image_320' self.thumbnails_column = 'data:image_75sq' self.images_table = 'images' self.models_table = 'picarus_models' self.hb = hadoopy_hbase.connect() # Feature Settings #self.feature_dict = {'name': 'imfeat.GIST'} #self.feature_name = 'gist' #self.feature_dict = {'name': 'imfeat.PyramidHistogram', 'args': ['lab'], 'kw': {'levels': 2, 'num_bins': [4, 11, 11]}} #self.feature_name = 'lab_pyramid_histogram_2level_4_11_11' self.feature_dict = {'name': 'picarus._features.HOGBoVW', 'kw': {'clusters': json.load(open('clusters.js')), 'levels': 2, 'sbin': 16, 'blocks': 1}} self.feature_name = 'bovw_hog_levels2_sbin16_blocks1_clusters100' self.superpixel_column = 'feat:superpixel' self.feature_column = 'feat:' + self.feature_name # Feature Hasher settings self.feature_hasher_row = self.images_table self.feature_hasher_column = 'data:hasher_' + self.feature_name self.hb = hadoopy_hbase.connect() self.feature_hash_column = 'hash:' + self.feature_name # Feature Classifier settings self.feature_classifier_row = self.images_table self.feature_classifier_column = 'data:classifier_' + self.feature_name self.feature_prediction_column = 'hash:predict_' + self.feature_name self.feature_class_positive = 'indoor' # Mask Hasher settings self.texton_num_classes = 8 self.texton_classes = json.load(open('../class_colors.js')) self.masks_hasher_row = 'masks' self.masks_hasher_column = 'data:hasher_masks' self.masks_hash_column = 'hash:masks' self.masks_ilp_column = 'hash:masks_ilp' # Index Settings self.feature_index_row = self.images_table self.feature_index_column = 'data:index_' + self.feature_name self.masks_index_row = 'masks' self.masks_index_column = 'data:index_masks' self.masks_column = 'feat:masks' self.masks_gt_column = 'feat:masks_gt' self.class_column = 'meta:class_2' self.indoor_class_column = 'meta:class_0' self.num_mappers = 10
def __init__(self, columns, table, host, port, slices, data_connection=None): suffix = '/'.join(base64.urlsafe_b64encode(x) + '/' + base64.urlsafe_b64encode(y) for x, y in slices) super(HBaseDataSource, self).__init__('hbase://%s:%d/%s/%s' % (urllib.quote(host), port, urllib.quote(table), suffix), columns) import hadoopy_hbase if data_connection: self._hbase = data_connection else: self._hbase = hadoopy_hbase.connect(host, port) self._table = table self._raw_columns = columns.values() self._slices = slices
def test_hbase(self): import hadoopy_hbase client = hadoopy_hbase.connect() try: client.createTable('testtable', [hadoopy_hbase.ColumnDescriptor('colfam1:')]) except: pass for x in xrange(100): client.mutateRow('testtable', str(x), [hadoopy_hbase.Mutation(column='colfam1:col%d' % y, value=str(x)) for y in range(10)]) ds = data_sources.HBaseDataSource({'mydata': 'colfam1:col0'}, 'testtable') print list(ds.rows()) print list(ds.columns(list(ds.rows())[0])) print list(ds.column_values(list(ds.rows())[0])) print [(x, list(y)) for x, y in ds.row_columns()]
def main(): #tags = ' animals architecture art asia australia autumn baby band barcelona beach berlin bike bird birds birthday black blackandwhite blue bw california canada canon car cat chicago china christmas church city clouds color concert dance day de dog england europe fall family fashion festival film florida flower flowers food football france friends fun garden geotagged germany girl graffiti green halloween hawaii holiday house india instagramapp iphone iphoneography island italia italy japan kids la lake landscape light live london love macro me mexico model museum music nature new newyork newyorkcity night nikon nyc ocean old paris park party people photo photography photos portrait raw red river rock san sanfrancisco scotland sea seattle show sky snow spain spring square squareformat street summer sun sunset taiwan texas thailand tokyo travel tree trees trip uk unitedstates urban usa vacation vintage washington water wedding white winter woman yellow zoo '.strip().split() tags = [ 'Pyramids Of Giza', 'Great Wall Of China', 'Terracotta Warriors', 'Statue Of Liberty', 'Edinburgh Castle', 'Stirling Castle', 'Empire State Building', 'Stonehenge', 'Blackpool Tower', 'London Bridge', 'Tower Bridge', 'Buckinghampalace', 'Sphinx', 'Eiffle Tower', 'Arc Du Triomph', 'Louvre', 'Cristo Redentor', 'CN Tower', 'Norte Dame', 'River Nile', 'Mount Rushmore', 'Pentagon', 'White House', 'Lincoln Memorial', 'Grand Canyon', 'Leaning Tower Of Piza', 'Easter Island Heads', 'Niagara Falls', 'Abbey Road', 'Ayers Rock', 'Evangeline Oak', 'Lone Cyprus', 'Golden Gate Bridge', 'Colosseum', 'Taj Mahal', 'Santorini' ] client = hadoopy_hbase.connect('localhost') random.shuffle(tags) flickr = vision_data.Flickr(max_iters=1) #remove_table(client, 'flickr') #client.createTable('flickr', [ColumnDescriptor('metadata:'), ColumnDescriptor('images:')]) while True: for tag in tags: mutations = [] try: for url_m, metadata in flickr.image_class_meta_url(tag): mutations.append( BatchMutation(row=url_m, mutations=[ Mutation(column='metadata:%s' % x, value=y.encode('utf-8')) for x, y in metadata.items() ])) except Exception, e: print(e) continue st = time.time() client.mutateRows('flickr', mutations) if mutations: print( (tag, (time.time() - st) / len(mutations), len(mutations))) else: print((tag, 0., len(mutations)))
def main(): parser = argparse.ArgumentParser(description='Picarus user operations') parser.add_argument('--thrift_server', default='localhost') parser.add_argument('--thrift_port', default='9090') ARGS = parser.parse_args() hb = hadoopy_hbase.connect(ARGS.thrift_server, ARGS.thrift_port) hb.createTable('videos', [hadoopy_hbase.ColumnDescriptor('data:', maxVersions=1), hadoopy_hbase.ColumnDescriptor('meta:', maxVersions=1), hadoopy_hbase.ColumnDescriptor('pred:', maxVersions=1), hadoopy_hbase.ColumnDescriptor('thum:', maxVersions=1), hadoopy_hbase.ColumnDescriptor('feat:', maxVersions=1, compression='SNAPPY'), hadoopy_hbase.ColumnDescriptor('hash:', maxVersions=1)]) hb.createTable('models', [hadoopy_hbase.ColumnDescriptor('data:', maxVersions=1, compression='SNAPPY'), hadoopy_hbase.ColumnDescriptor('meta:', maxVersions=1), hadoopy_hbase.ColumnDescriptor('user:'******'images', [hadoopy_hbase.ColumnDescriptor('data:', maxVersions=1), hadoopy_hbase.ColumnDescriptor('meta:', maxVersions=1), hadoopy_hbase.ColumnDescriptor('pred:', maxVersions=1), hadoopy_hbase.ColumnDescriptor('thum:', maxVersions=1), hadoopy_hbase.ColumnDescriptor('feat:', maxVersions=1, compression='SNAPPY'), hadoopy_hbase.ColumnDescriptor('hash:', maxVersions=1)])
def main(): parser = argparse.ArgumentParser(description="Picarus user operations") parser.add_argument("--thrift_server", default="localhost") parser.add_argument("--thrift_port", default="9090") ARGS = parser.parse_args() hb = hadoopy_hbase.connect(ARGS.thrift_server, ARGS.thrift_port) hb.createTable( "videos", [ hadoopy_hbase.ColumnDescriptor("data:", maxVersions=1), hadoopy_hbase.ColumnDescriptor("meta:", maxVersions=1), hadoopy_hbase.ColumnDescriptor("pred:", maxVersions=1), hadoopy_hbase.ColumnDescriptor("thum:", maxVersions=1), hadoopy_hbase.ColumnDescriptor("feat:", maxVersions=1, compression="SNAPPY"), hadoopy_hbase.ColumnDescriptor("hash:", maxVersions=1), ], ) hb.createTable( "models", [ hadoopy_hbase.ColumnDescriptor("data:", maxVersions=1, compression="SNAPPY"), hadoopy_hbase.ColumnDescriptor("meta:", maxVersions=1), hadoopy_hbase.ColumnDescriptor("user:"******"images", [ hadoopy_hbase.ColumnDescriptor("data:", maxVersions=1), hadoopy_hbase.ColumnDescriptor("meta:", maxVersions=1), hadoopy_hbase.ColumnDescriptor("pred:", maxVersions=1), hadoopy_hbase.ColumnDescriptor("thum:", maxVersions=1), hadoopy_hbase.ColumnDescriptor("feat:", maxVersions=1, compression="SNAPPY"), hadoopy_hbase.ColumnDescriptor("hash:", maxVersions=1), ], )
def main(): #tags = ' animals architecture art asia australia autumn baby band barcelona beach berlin bike bird birds birthday black blackandwhite blue bw california canada canon car cat chicago china christmas church city clouds color concert dance day de dog england europe fall family fashion festival film florida flower flowers food football france friends fun garden geotagged germany girl graffiti green halloween hawaii holiday house india instagramapp iphone iphoneography island italia italy japan kids la lake landscape light live london love macro me mexico model museum music nature new newyork newyorkcity night nikon nyc ocean old paris park party people photo photography photos portrait raw red river rock san sanfrancisco scotland sea seattle show sky snow spain spring square squareformat street summer sun sunset taiwan texas thailand tokyo travel tree trees trip uk unitedstates urban usa vacation vintage washington water wedding white winter woman yellow zoo '.strip().split() tags = ['Pyramids Of Giza', 'Great Wall Of China', 'Terracotta Warriors', 'Statue Of Liberty', 'Edinburgh Castle', 'Stirling Castle', 'Empire State Building', 'Stonehenge', 'Blackpool Tower', 'London Bridge', 'Tower Bridge', 'Buckinghampalace', 'Sphinx', 'Eiffle Tower', 'Arc Du Triomph', 'Louvre', 'Cristo Redentor', 'CN Tower', 'Norte Dame', 'River Nile', 'Mount Rushmore', 'Pentagon', 'White House', 'Lincoln Memorial', 'Grand Canyon', 'Leaning Tower Of Piza', 'Easter Island Heads', 'Niagara Falls', 'Abbey Road', 'Ayers Rock', 'Evangeline Oak', 'Lone Cyprus', 'Golden Gate Bridge', 'Colosseum', 'Taj Mahal', 'Santorini'] client = hadoopy_hbase.connect('localhost') random.shuffle(tags) flickr = vision_data.Flickr(max_iters=1) #remove_table(client, 'flickr') #client.createTable('flickr', [ColumnDescriptor('metadata:'), ColumnDescriptor('images:')]) while True: for tag in tags: mutations = [] try: for url_m, metadata in flickr.image_class_meta_url(tag): mutations.append(BatchMutation(row=url_m, mutations=[Mutation(column='metadata:%s' % x, value=y.encode('utf-8')) for x, y in metadata.items()])) except Exception, e: print(e) continue st = time.time() client.mutateRows('flickr', mutations) if mutations: print((tag, (time.time() - st) / len(mutations), len(mutations))) else: print((tag, 0., len(mutations)))
def display(): client = hadoopy_hbase.connect('localhost') for x in hadoopy_hbase.scanner(client, 'flickr', ['metadata:title']): print(x)
import hadoopy_hbase import logging import time import tempfile import zlib import json import os import random import numpy as np import imfeat import picarus.modules import picarus.api logging.basicConfig(level=logging.DEBUG) a = hadoopy_hbase.connect() hrc = picarus.modules.HashRetrievalClassifier() hrc.load(open('sun397_feature_index.pb').read()) for num, (row, cols) in enumerate( hadoopy_hbase.scanner(a, 'images', start_row='sun397train')): if num > 2: break print cols['feat:superpixel'][:50] image = imfeat.image_fromstring(cols['data:image_320']) print imfeat.image_fromstring(cols['data:image']).shape print imfeat.image_fromstring(cols['data:image_320']).shape print('image_75sq[%d]' % len(cols['data:image_75sq'])) print row cur_f = picarus.api.np_fromstring(cols['feat:gist']) cur_h = np.fromstring(cols['hash:gist'], dtype=np.uint8) print 'HOG', picarus.api.np_fromstring( cols['feat:bovw_hog_levels2_sbin16_blocks1_clusters100'])
import hadoopy_hbase c = hadoopy_hbase.connect() def delete_rows(prefix): assert ord(prefix[-1]) != 255 stop_row = prefix[:-1] + chr(ord(prefix[-1]) + 1) for x, y in hadoopy_hbase.scanner(c, "images", start_row=prefix, stop_row=stop_row): assert x.startswith(prefix) print(repr(x)) c.deleteAllRow("images", x) # c.majorCompact('images') delete_rows("restaurant:flickr")
render_xml = lambda message: '<message>%s</message>'%message render_json = lambda **args: json.dumps(args) render_html = lambda message: '<html><body>%s</body></html>'%message render_txt = lambda message: message render_xml_exception = lambda exception: '<exception>%s</exception>' % exception.message render_json_exception = lambda exception: json.dumps({'exception': exception.message}) parser = argparse.ArgumentParser(description='Run Picarus REST Frontend') parser.add_argument('--redis_host', help='Redis Host', default='localhost') parser.add_argument('--redis_port', type=int, help='Redis Port', default=6380) parser.add_argument('--redis_db', type=int, help='Redis DB', default=0) parser.add_argument('--port', default='15000', type=int) parser.add_argument('--thrift_server', default='localhost') parser.add_argument('--thrift_port', default='9090') ARGS = parser.parse_args() THRIFT = hadoopy_hbase.connect(ARGS.thrift_server, ARGS.thrift_port) THRIFT_LOCK = gevent.coros.RLock() USERS = Users(ARGS.redis_host, ARGS.redis_port, ARGS.redis_db) def print_request(): ks = ['auth', 'content_length', 'content_type', 'environ', 'fullpath', 'is_ajax', 'is_xhr', 'method', 'path', 'query_string', 'remote_addr', 'remote_route', 'script_name', 'url', 'urlparts'] for k in ks: print('%s: %s' % (k, str(getattr(bottle.request, k)))) print('%s: %s' % ('files', (getattr(bottle.request, 'files')).keys())) ks = ['forms', 'params', 'query', 'cookies', 'headers'] for k in ks: print('%s: %s' % (k, str(dict(getattr(bottle.request, k)))))
parser = argparse.ArgumentParser(description='Run Picarus REST Frontend') parser.add_argument('--users_redis_host', help='Redis Host', default='localhost') parser.add_argument('--users_redis_port', type=int, help='Redis Port', default=6380) parser.add_argument('--users_redis_db', type=int, help='Redis DB', default=0) parser.add_argument('--yubikey_redis_host', help='Redis Host', default='localhost') parser.add_argument('--yubikey_redis_port', type=int, help='Redis Port', default=6380) parser.add_argument('--yubikey_redis_db', type=int, help='Redis DB', default=1) parser.add_argument('--annotations_redis_host', help='Annotations Host', default='localhost') parser.add_argument('--annotations_redis_port', type=int, help='Annotations Port', default=6380) parser.add_argument('--annotations_redis_db', type=int, help='Annotations DB', default=2) parser.add_argument('--port', default='15000', type=int) parser.add_argument('--thrift_server', default='localhost') parser.add_argument('--thrift_port', default='9090') ARGS = parser.parse_args() THRIFT_POOL = gevent.queue.Queue() THRIFT_CONSTRUCTOR = lambda : hadoopy_hbase.connect(ARGS.thrift_server, ARGS.thrift_port) for x in range(10): THRIFT_POOL.put(THRIFT_CONSTRUCTOR()) USERS = Users(ARGS.users_redis_host, ARGS.users_redis_port, ARGS.users_redis_db) YUBIKEY = Yubikey(ARGS.yubikey_redis_host, ARGS.yubikey_redis_port, ARGS.yubikey_redis_db) ANNOTATORS = annotators.Annotators(ARGS.annotations_redis_host, ARGS.annotations_redis_port, ARGS.annotations_redis_db) # Set necessary globals in tables module tables.VERSION = VERSION = 'a1' tables.thrift_lock = thrift_lock tables.ANNOTATORS = ANNOTATORS def print_request(): ks = ['auth', 'content_length', 'content_type', 'environ', 'fullpath', 'is_ajax', 'is_xhr', 'method', 'path', 'query_string', 'remote_addr', 'remote_route', 'script_name', 'url', 'urlparts'] for k in ks: print('%s: %s' % (k, str(getattr(bottle.request, k))))
import hadoopy_hbase import time c = hadoopy_hbase.connect('localhost') cnt = 0 st = time.time() N = 5000 for x in hadoopy_hbase.scanner(c, 'flickr', per_call=N, columns=['metadata:license']): cnt += 1 if cnt % N == 0: print(((time.time() - st) / N, cnt)) st = time.time()
render_xml_exception = lambda exception: '<exception>%s</exception>' % exception.message render_json_exception = lambda exception: json.dumps( {'exception': exception.message}) parser = argparse.ArgumentParser(description='Run Picarus REST Frontend') parser.add_argument('--redis_host', help='Redis Host', default='localhost') parser.add_argument('--redis_port', type=int, help='Redis Port', default=6380) parser.add_argument('--redis_db', type=int, help='Redis DB', default=0) parser.add_argument('--port', default='15000', type=int) parser.add_argument('--thrift_server', default='localhost') parser.add_argument('--thrift_port', default='9090') ARGS = parser.parse_args() THRIFT = hadoopy_hbase.connect(ARGS.thrift_server, ARGS.thrift_port) THRIFT_LOCK = gevent.coros.RLock() USERS = Users(ARGS.redis_host, ARGS.redis_port, ARGS.redis_db) def print_request(): ks = [ 'auth', 'content_length', 'content_type', 'environ', 'fullpath', 'is_ajax', 'is_xhr', 'method', 'path', 'query_string', 'remote_addr', 'remote_route', 'script_name', 'url', 'urlparts' ] for k in ks: print('%s: %s' % (k, str(getattr(bottle.request, k)))) print('%s: %s' % ('files', (getattr(bottle.request, 'files')).keys()))