Exemple #1
0
def manager(**args):
    db_nums = list(enumerate(['users', 'response', 'state', 'key_to_path', 'path_to_key', 'tasks']))
    logging.debug(db_nums)
    args.update(dict((y + '_db', redis.StrictRedis(host=args['redis_address'], port=args['redis_port'], db=x))
                     for x, y in db_nums))
    logging.debug(args)
    sp = lambda x: ROOT + '/static_private/' + x
    # Used if we want to use a specific data connection instead of creating one, by setting to None
    # data connections can be disallowed (useful if one is not needed)
    try:
        if args['data_connection']:
            args['data_source'] = data_source_from_uri(args['data'], data_connection=args['data_connection'])
        else:
            args['data_source'] = None
    except KeyError:
        args['data_source'] = data_source_from_uri(args['data'])
        
    if args['type'] == 'image_class':
        m = mturk_vision.AMTImageClassManager(index_path=sp('image_label.html'),
                                              config_path=sp('image_class_config.js'),
                                              **args)
    elif args['type'] == 'image_qa':
        m = mturk_vision.AMTImageQAManager(index_path=sp('image_qa.html'),
                                           config_path=sp('image_qa_config.js'),
                                           **args)
    else:
        raise ValueError('Unknown type[%s]' % args['type'])
    if args['sync']:
        m.sync()
    return m
Exemple #2
0
    def test_dir(self):
        import tempfile
        import shutil
        import os
        temp_dir = None
        try:
            temp_dir = tempfile.mkdtemp()
            for x in xrange(100):
                d = os.path.join(temp_dir, str(x))
                try:
                    os.mkdir(d)
                except OSError:
                    pass
                for y in range(10):
                    open(os.path.join(d, 'colfam1:col%d' % y),
                         'w').write(str(x))
            ds = data_sources.DirectoryDataSource({'mydata': 'colfam1:col0'},
                                                  temp_dir)
            print list(ds.rows())
            print list(ds.columns(list(ds.rows())[0]))
            print list(ds.column_values(list(ds.rows())[0]))
            print[(x, list(y)) for x, y in ds.row_columns()]
            print[(x, dict(y)) for x, y in ds.row_column_values()]
            print ds.uri
            ds = data_sources.data_source_from_uri(ds.uri)
            print list(ds.rows())
            print list(ds.columns(list(ds.rows())[0]))
            print list(ds.column_values(list(ds.rows())[0]))
            print[(x, list(y)) for x, y in ds.row_columns()]
            print[(x, dict(y)) for x, y in ds.row_column_values()]
            print ds.uri

        finally:
            if temp_dir is not None:
                shutil.rmtree(temp_dir)
Exemple #3
0
 def test_hbase(self):
     import hadoopy_hbase
     client = hadoopy_hbase.connect()
     try:
         client.createTable('testtable',
                            [hadoopy_hbase.ColumnDescriptor('colfam1:')])
     except:
         pass
     for x in xrange(100):
         client.mutateRow('testtable', str(x), [
             hadoopy_hbase.Mutation(column='colfam1:col%d' % y,
                                    value=str(x)) for y in range(10)
         ])
     ds = data_sources.HBaseDataSource({'mydata': 'colfam1:col0'},
                                       'testtable')
     print list(ds.rows())
     print list(ds.columns(list(ds.rows())[0]))
     print list(ds.column_values(list(ds.rows())[0]))
     print[(x, list(y)) for x, y in ds.row_columns()]
     print[(x, dict(y)) for x, y in ds.row_column_values()]
     print ds.uri
     ds = data_sources.data_source_from_uri(ds.uri)
     print list(ds.rows())
     print list(ds.columns(list(ds.rows())[0]))
     print list(ds.column_values(list(ds.rows())[0]))
     print[(x, list(y)) for x, y in ds.row_columns()]
     print[(x, dict(y)) for x, y in ds.row_column_values()]
     print ds.uri
Exemple #4
0
def manager(**args):
    db_nums = list(enumerate(['users', 'response', 'state', 'key_to_path', 'path_to_key', 'tasks']))
    logging.debug(db_nums)
    args.update(dict((y + '_db', redis.StrictRedis(host=args['redis_address'], port=args['redis_port'], db=x))
                     for x, y in db_nums))
    logging.debug(args)
    sp = lambda x: ROOT + '/static_private/' + x
    args['data_source'] = data_source_from_uri(args['data'])
    if args['type'] == 'image_class':
        m = mturk_vision.AMTImageClassManager(index_path=sp('image_label.html'),
                                              config_path=sp('image_class_config.js'),
                                              **args)
    elif args['type'] == 'image_qa':
        m = mturk_vision.AMTImageQAManager(index_path=sp('image_qa.html'),
                                           config_path=sp('image_qa_config.js'),
                                           **args)
    else:
        raise ValueError('Unknown type[%s]' % args['type'])
    if args['sync']:
        m.sync()
    return m
Exemple #5
0
        import hadoopy_hbase
        client = hadoopy_hbase.connect()
        try:
            client.createTable('testtable', [hadoopy_hbase.ColumnDescriptor('colfam1:')])
        except:
            pass
        for x in xrange(100):
            client.mutateRow('testtable', str(x), [hadoopy_hbase.Mutation(column='colfam1:col%d' % y, value=str(x)) for y in range(10)])
        ds = data_sources.HBaseDataSource({'mydata': 'colfam1:col0'}, 'testtable')
        print list(ds.rows())
        print list(ds.columns(list(ds.rows())[0]))
        print list(ds.column_values(list(ds.rows())[0]))
        print [(x, list(y)) for x, y in ds.row_columns()]
        print [(x, dict(y)) for x, y in ds.row_column_values()]
        print ds.uri
        ds = data_sources.data_source_from_uri(ds.uri)
        print list(ds.rows())
        print list(ds.columns(list(ds.rows())[0]))
        print list(ds.column_values(list(ds.rows())[0]))
        print [(x, list(y)) for x, y in ds.row_columns()]
        print [(x, dict(y)) for x, y in ds.row_column_values()]
        print ds.uri

    def test_dir(self):
        import tempfile
        import shutil
        import os
        temp_dir = None
        try:
            temp_dir = tempfile.mkdtemp()
            for x in xrange(100):
Exemple #6
0
                    import numpy as np
                    y = str(np.fromstring(y, dtype=np.double).tolist())
                elif conv == 'str':
                    pass
                elif conv == 'image':
                    import base64
                    y = '<img src="data:image/jpeg;base64,%s" />' % base64.b64encode(y)
                else:
                    raise ValueError('Unsupported: ' + conv)
                render_columns.append([x, y])
            except KeyError:
                render_columns.append([x, str(len(y))])
        render_rows.append(row + ' | ' + ' '.join([x + '|' + y for x, y in render_columns]))
    return '<br>'.join(render_rows)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Serve data source visualization")
    parser.add_argument('data_source')
    parser.add_argument('--port',
                        default='8080')
    parser.add_argument('--rows', default=10, type=int)
    parser.add_argument('--convert', action='append')
    ARGS = parser.parse_args()
    print(ARGS.convert)
    CONVERT = {}
    if ARGS.convert:
        CONVERT = dict(x.split('=', 1) for x in ARGS.convert)
    DATA = data_sources.data_source_from_uri(ARGS.data_source)
    bottle.run(host='0.0.0.0', port=ARGS.port, server='gevent')
Exemple #7
0
              (dataset._name, start_time, hdfs_file_cnt))
        hdfs_buf.append(x)
        if len(hdfs_buf) >= 100:
            try:
                hadoopy.writetb(
                    'spatial_queries/input/%s/%f/%d.tb.seq' %
                    (dataset._name, start_time, hdfs_file_cnt), hdfs_buf)
            except IOError, e:
                print('Got IOError, skipping')
                print(e)
            hdfs_file_cnt += 1
            hdfs_buf = []
    if hdfs_buf:
        hadoopy.writetb(
            'spatial_queries/input/%s/%f/%d.tb.seq' %
            (dataset._name, start_time, hdfs_file_cnt), hdfs_buf)
    print('NumClasses[%d]' % len(classes))
    print('Classes: %r' % classes)


if __name__ == '__main__':
    dataset = vision_data.MSRC()
    classes = msrc_classes
    if 1:
        from data_sources import data_source_from_uri
        from sun397_dataset import SUN397
        uri = 'hbase://localhost:9090/images?image=data:image_320&gt=feat:masks_gt'
        dataset = SUN397(data_source_from_uri(uri))
        classes = json.load(open('classes.js'))
    write_texton_hadoop(dataset, classes)
Exemple #8
0
            yield str(image_num), (image, label_points)
    hdfs_file_cnt = 0
    hdfs_buf = []
    start_time = time.time()
    for x in make_data():
        print('spatial_queries/input/%s/%f/%d.tb.seq' % (dataset._name, start_time, hdfs_file_cnt))
        hdfs_buf.append(x)
        if len(hdfs_buf) >= 100:
            try:
                hadoopy.writetb('spatial_queries/input/%s/%f/%d.tb.seq' % (dataset._name, start_time, hdfs_file_cnt), hdfs_buf)
            except IOError, e:
                print('Got IOError, skipping')
                print(e)
            hdfs_file_cnt += 1
            hdfs_buf = []
    if hdfs_buf:
        hadoopy.writetb('spatial_queries/input/%s/%f/%d.tb.seq' % (dataset._name, start_time, hdfs_file_cnt), hdfs_buf)
    print('NumClasses[%d]' % len(classes))
    print('Classes: %r' % classes)

if __name__ == '__main__':
    dataset = vision_data.MSRC()
    classes = msrc_classes
    if 1:
        from data_sources import data_source_from_uri
        from sun397_dataset import SUN397
        uri = 'hbase://localhost:9090/images?image=data:image_320&gt=feat:masks_gt'
        dataset = SUN397(data_source_from_uri(uri))
        classes = json.load(open('classes.js'))
    write_texton_hadoop(dataset, classes)