def manager(**args): db_nums = list(enumerate(['users', 'response', 'state', 'key_to_path', 'path_to_key', 'tasks'])) logging.debug(db_nums) args.update(dict((y + '_db', redis.StrictRedis(host=args['redis_address'], port=args['redis_port'], db=x)) for x, y in db_nums)) logging.debug(args) sp = lambda x: ROOT + '/static_private/' + x # Used if we want to use a specific data connection instead of creating one, by setting to None # data connections can be disallowed (useful if one is not needed) try: if args['data_connection']: args['data_source'] = data_source_from_uri(args['data'], data_connection=args['data_connection']) else: args['data_source'] = None except KeyError: args['data_source'] = data_source_from_uri(args['data']) if args['type'] == 'image_class': m = mturk_vision.AMTImageClassManager(index_path=sp('image_label.html'), config_path=sp('image_class_config.js'), **args) elif args['type'] == 'image_qa': m = mturk_vision.AMTImageQAManager(index_path=sp('image_qa.html'), config_path=sp('image_qa_config.js'), **args) else: raise ValueError('Unknown type[%s]' % args['type']) if args['sync']: m.sync() return m
def test_dir(self): import tempfile import shutil import os temp_dir = None try: temp_dir = tempfile.mkdtemp() for x in xrange(100): d = os.path.join(temp_dir, str(x)) try: os.mkdir(d) except OSError: pass for y in range(10): open(os.path.join(d, 'colfam1:col%d' % y), 'w').write(str(x)) ds = data_sources.DirectoryDataSource({'mydata': 'colfam1:col0'}, temp_dir) print list(ds.rows()) print list(ds.columns(list(ds.rows())[0])) print list(ds.column_values(list(ds.rows())[0])) print[(x, list(y)) for x, y in ds.row_columns()] print[(x, dict(y)) for x, y in ds.row_column_values()] print ds.uri ds = data_sources.data_source_from_uri(ds.uri) print list(ds.rows()) print list(ds.columns(list(ds.rows())[0])) print list(ds.column_values(list(ds.rows())[0])) print[(x, list(y)) for x, y in ds.row_columns()] print[(x, dict(y)) for x, y in ds.row_column_values()] print ds.uri finally: if temp_dir is not None: shutil.rmtree(temp_dir)
def test_hbase(self): import hadoopy_hbase client = hadoopy_hbase.connect() try: client.createTable('testtable', [hadoopy_hbase.ColumnDescriptor('colfam1:')]) except: pass for x in xrange(100): client.mutateRow('testtable', str(x), [ hadoopy_hbase.Mutation(column='colfam1:col%d' % y, value=str(x)) for y in range(10) ]) ds = data_sources.HBaseDataSource({'mydata': 'colfam1:col0'}, 'testtable') print list(ds.rows()) print list(ds.columns(list(ds.rows())[0])) print list(ds.column_values(list(ds.rows())[0])) print[(x, list(y)) for x, y in ds.row_columns()] print[(x, dict(y)) for x, y in ds.row_column_values()] print ds.uri ds = data_sources.data_source_from_uri(ds.uri) print list(ds.rows()) print list(ds.columns(list(ds.rows())[0])) print list(ds.column_values(list(ds.rows())[0])) print[(x, list(y)) for x, y in ds.row_columns()] print[(x, dict(y)) for x, y in ds.row_column_values()] print ds.uri
def manager(**args): db_nums = list(enumerate(['users', 'response', 'state', 'key_to_path', 'path_to_key', 'tasks'])) logging.debug(db_nums) args.update(dict((y + '_db', redis.StrictRedis(host=args['redis_address'], port=args['redis_port'], db=x)) for x, y in db_nums)) logging.debug(args) sp = lambda x: ROOT + '/static_private/' + x args['data_source'] = data_source_from_uri(args['data']) if args['type'] == 'image_class': m = mturk_vision.AMTImageClassManager(index_path=sp('image_label.html'), config_path=sp('image_class_config.js'), **args) elif args['type'] == 'image_qa': m = mturk_vision.AMTImageQAManager(index_path=sp('image_qa.html'), config_path=sp('image_qa_config.js'), **args) else: raise ValueError('Unknown type[%s]' % args['type']) if args['sync']: m.sync() return m
import hadoopy_hbase client = hadoopy_hbase.connect() try: client.createTable('testtable', [hadoopy_hbase.ColumnDescriptor('colfam1:')]) except: pass for x in xrange(100): client.mutateRow('testtable', str(x), [hadoopy_hbase.Mutation(column='colfam1:col%d' % y, value=str(x)) for y in range(10)]) ds = data_sources.HBaseDataSource({'mydata': 'colfam1:col0'}, 'testtable') print list(ds.rows()) print list(ds.columns(list(ds.rows())[0])) print list(ds.column_values(list(ds.rows())[0])) print [(x, list(y)) for x, y in ds.row_columns()] print [(x, dict(y)) for x, y in ds.row_column_values()] print ds.uri ds = data_sources.data_source_from_uri(ds.uri) print list(ds.rows()) print list(ds.columns(list(ds.rows())[0])) print list(ds.column_values(list(ds.rows())[0])) print [(x, list(y)) for x, y in ds.row_columns()] print [(x, dict(y)) for x, y in ds.row_column_values()] print ds.uri def test_dir(self): import tempfile import shutil import os temp_dir = None try: temp_dir = tempfile.mkdtemp() for x in xrange(100):
import numpy as np y = str(np.fromstring(y, dtype=np.double).tolist()) elif conv == 'str': pass elif conv == 'image': import base64 y = '<img src="data:image/jpeg;base64,%s" />' % base64.b64encode(y) else: raise ValueError('Unsupported: ' + conv) render_columns.append([x, y]) except KeyError: render_columns.append([x, str(len(y))]) render_rows.append(row + ' | ' + ' '.join([x + '|' + y for x, y in render_columns])) return '<br>'.join(render_rows) if __name__ == "__main__": parser = argparse.ArgumentParser(description="Serve data source visualization") parser.add_argument('data_source') parser.add_argument('--port', default='8080') parser.add_argument('--rows', default=10, type=int) parser.add_argument('--convert', action='append') ARGS = parser.parse_args() print(ARGS.convert) CONVERT = {} if ARGS.convert: CONVERT = dict(x.split('=', 1) for x in ARGS.convert) DATA = data_sources.data_source_from_uri(ARGS.data_source) bottle.run(host='0.0.0.0', port=ARGS.port, server='gevent')
(dataset._name, start_time, hdfs_file_cnt)) hdfs_buf.append(x) if len(hdfs_buf) >= 100: try: hadoopy.writetb( 'spatial_queries/input/%s/%f/%d.tb.seq' % (dataset._name, start_time, hdfs_file_cnt), hdfs_buf) except IOError, e: print('Got IOError, skipping') print(e) hdfs_file_cnt += 1 hdfs_buf = [] if hdfs_buf: hadoopy.writetb( 'spatial_queries/input/%s/%f/%d.tb.seq' % (dataset._name, start_time, hdfs_file_cnt), hdfs_buf) print('NumClasses[%d]' % len(classes)) print('Classes: %r' % classes) if __name__ == '__main__': dataset = vision_data.MSRC() classes = msrc_classes if 1: from data_sources import data_source_from_uri from sun397_dataset import SUN397 uri = 'hbase://localhost:9090/images?image=data:image_320>=feat:masks_gt' dataset = SUN397(data_source_from_uri(uri)) classes = json.load(open('classes.js')) write_texton_hadoop(dataset, classes)
yield str(image_num), (image, label_points) hdfs_file_cnt = 0 hdfs_buf = [] start_time = time.time() for x in make_data(): print('spatial_queries/input/%s/%f/%d.tb.seq' % (dataset._name, start_time, hdfs_file_cnt)) hdfs_buf.append(x) if len(hdfs_buf) >= 100: try: hadoopy.writetb('spatial_queries/input/%s/%f/%d.tb.seq' % (dataset._name, start_time, hdfs_file_cnt), hdfs_buf) except IOError, e: print('Got IOError, skipping') print(e) hdfs_file_cnt += 1 hdfs_buf = [] if hdfs_buf: hadoopy.writetb('spatial_queries/input/%s/%f/%d.tb.seq' % (dataset._name, start_time, hdfs_file_cnt), hdfs_buf) print('NumClasses[%d]' % len(classes)) print('Classes: %r' % classes) if __name__ == '__main__': dataset = vision_data.MSRC() classes = msrc_classes if 1: from data_sources import data_source_from_uri from sun397_dataset import SUN397 uri = 'hbase://localhost:9090/images?image=data:image_320>=feat:masks_gt' dataset = SUN397(data_source_from_uri(uri)) classes = json.load(open('classes.js')) write_texton_hadoop(dataset, classes)