Example #1
0
def main():
    p = argparse.ArgumentParser('simple debugging tool for watching the linker and OpenQuery')
    p.add_argument('action', help='either `run` or `cache` or `delete`')
    p.add_argument('folder', help='folder name')
    p.add_argument('subfolder', help='subfolder name')
    args = yakonfig.parse_args(p, [kvlayer, yakonfig])

    config = yakonfig.get_global_config()

    key = cbor.dumps((args.folder.replace(' ', '_'), args.subfolder.replace(' ', '_')))

    if args.action == 'run':
        web_conf = Config()
        with yakonfig.defaulted_config([kvlayer, dblogger, web_conf], config=config):
            traverse_extract_fetch(web_conf, key, stop_after_extraction=True)

    elif args.action == 'delete':
        kvlclient = kvlayer.client()
        kvlclient.setup_namespace({'openquery': (str,)})
        kvlclient.delete('openquery', (key,))
        print('deleted %r' % key)

    elif args.action == 'cache':

        kvlclient = kvlayer.client()
        kvlclient.setup_namespace({'openquery': (str,)})
        count = 0
        for rec in kvlclient.scan('openquery'):
            count += 1
            if rec[0][0] == key:
                print rec
        print('%d cached queries' % count)
Example #2
0
def main():
    p = argparse.ArgumentParser(
        'simple debugging tool for watching the linker and OpenQuery')
    p.add_argument('action', help='either `run` or `cache` or `delete`')
    p.add_argument('folder', help='folder name')
    p.add_argument('subfolder', help='subfolder name')
    args = yakonfig.parse_args(p, [kvlayer, yakonfig])

    config = yakonfig.get_global_config()

    key = cbor.dumps(
        (args.folder.replace(' ', '_'), args.subfolder.replace(' ', '_')))

    if args.action == 'run':
        web_conf = Config()
        with yakonfig.defaulted_config([kvlayer, dblogger, web_conf],
                                       config=config):
            traverse_extract_fetch(web_conf, key, stop_after_extraction=True)

    elif args.action == 'delete':
        kvlclient = kvlayer.client()
        kvlclient.setup_namespace({'openquery': (str, )})
        kvlclient.delete('openquery', (key, ))
        print('deleted %r' % key)

    elif args.action == 'cache':

        kvlclient = kvlayer.client()
        kvlclient.setup_namespace({'openquery': (str, )})
        count = 0
        for rec in kvlclient.scan('openquery'):
            count += 1
            if rec[0][0] == key:
                print rec
        print('%d cached queries' % count)
Example #3
0
    def __init__(self, storage_client=None, table_name="log",
                 storage_config=None):
        """Create a new database log handler.

        You must either pass in ``storage_client``, an actual kvlayer
        client object, or ``storage_config``, a dictionary which will
        be passed to ``kvlayer.client()``.  Log messages
        will be stored in the table ``table_name``.

        :param storage_client: existing storage client
        :type storage_client: :class:`kvlayer.AbstractStorage`
        :param str table_name: virtual table name
        :param dict storage_config: configuration for new storage client

        """
        super(DatabaseLogHandler, self).__init__()

        if storage_client is None:
            if storage_config is None:
                raise RuntimeError('must pass either storage_client or '
                                   'storage_config')
            with yakonfig.defaulted_config(
                    [kvlayer], config=dict(kvlayer=storage_config)):
                storage_client = kvlayer.client()

        self.storage = storage_client
        self.table_name = table_name
        storage_client.setup_namespace({table_name: 1})
        self.sequence_number = 0
Example #4
0
def main(options):
    """Run the recommender system on a sequence of topics.
    """
    description = "System using LDA, Kmeans and Solr to optimize diversification and exploitation of different topics"
    parser = argparse.ArgumentParser(description=description)
    parser.add_argument("--overwrite", action="store_true")
    args = yakonfig.parse_args(parser, [yakonfig])

    logging.basicConfig(level=logging.DEBUG)

    config = yakonfig.get_global_config("harness")
    batch_size = config.get("batch_size", 5)
    run_file_path = config["run_file_path"]
    if os.path.exists(run_file_path):
        if args.overwrite:
            os.remove(run_file_path)
        else:
            os.remove(run_file_path)
            # sys.exit('%r already exists' % run_file_path)

    kvl_config = {"storage_type": "local", "namespace": "test", "app_name": "test"}
    kvl = kvlayer.client(kvl_config)
    method, feedback_options, poids, id_config = options[0], options[1], options[2], options[3]
    print method, poids
    system = SearchSystem([], method, feedback_options, poids)
    print args.config
    args.config = "config" + str(id_config) + ".yaml"
    print args.config
    ambassador = HarnessAmbassadorCLI(system, args.config, batch_size)
    ambassador.run()
Example #5
0
def main():
    p = argparse.ArgumentParser(
        description='SortingDesk report generation tool')
    p.add_argument('-c',
                   '--config',
                   required=True,
                   help='dossier stack YAML config file')
    p.add_argument('-o',
                   '--output',
                   required=True,
                   help='path to write Excel workbook file')
    p.add_argument('-u',
                   '--user',
                   default='unknown',
                   help='user name (default=ALL)')
    p.add_argument('folder', help='folder name')
    p.add_argument('subfolder',
                   nargs='?',
                   default=None,
                   help='subfolder name (default=ALL)')
    args = p.parse_args()

    config = yakonfig.set_default_config([kvlayer], filename=args.config)
    factory = Factory(config)
    store = factory.create(Store)

    # Instantiate and run report generator.
    folders = Folders(kvlayer.client())
    gen = ReportGenerator(store,
                          folders,
                          args.folder,
                          subfolder_name=args.subfolder,
                          user=args.user)
    with open(args.output, 'wb+') as out:
        gen.run(out)
 def __init__(self, *args, **kwargs):
     super(to_kvlayer, self).__init__(*args, **kwargs)
     self.client = kvlayer.client()
     tables = { 'stream_items': 2 }
     for ndx in self.config['indexes']:
         tables['stream_items_' + ndx] = self.index_sizes[ndx]
     self.client.setup_namespace(tables)
Example #7
0
def client(backend, request, tmpdir, namespace_string):
    config_path = str(request.fspath.dirpath('config_{0}.yaml'.format(backend)))
    statsfile = StringIO.StringIO()
    params = dict(
        app_name='kvlayer',
        namespace=namespace_string,
        log_stats=statsfile,
        log_stats_interval_ops=1,
        blagh='hoo haa',
    )

    # this is hacky but must go somewhere
    if backend == 'filestorage':
        local = tmpdir.join('local')
        with local.open('w') as f: pass
        params['kvlayer_filename'] = str(local)

    if backend == 'redis':
        params['storage_addresses'] = [ redis_address(request) ]

    with yakonfig.defaulted_config([kvlayer], filename=config_path,
                                   params=params):
        client = kvlayer.client()
        client.delete_namespace()
        yield client
        client.delete_namespace()
def test_kvlayer_simple(configurator, tmpdir):
    si = streamcorpus.make_stream_item('2000-01-01T12:34:00.000123Z',
                                       'test://test.stream.item/')
    chunkfile = str(tmpdir.join('chunk.sc.xz'))
    with streamcorpus.Chunk(path=chunkfile, mode='wb') as chunk:
        chunk.add(si)

    with configurator():
        writer = to_kvlayer(yakonfig.get_global_config(
            'streamcorpus_pipeline', 'to_kvlayer'))
        writer(chunkfile, {}, '')

        kvlclient = kvlayer.client()
        kvlclient.setup_namespace({'stream_items': 2})
        print repr(list(kvlclient.scan_keys('stream_items')))
        for (k,v) in kvlclient.get(
                'stream_items',
                (uuid.UUID(int=946730040),
                 uuid.UUID(hex='985c1e3ed73256cd9a399919fe93cf76'))):
            assert v is not None

        reader = from_kvlayer(yakonfig.get_global_config(
            'streamcorpus_pipeline', 'from_kvlayer'))
        sis = list(reader(''))
        assert len(sis) == 1
        assert sis[0].stream_time.epoch_ticks == si.stream_time.epoch_ticks
        assert sis[0].abs_url == si.abs_url
Example #9
0
 def __init__(self, *args, **kwargs):
     super(to_kvlayer, self).__init__(*args, **kwargs)
     self.client = kvlayer.client()
     tables = {'stream_items': 2}
     for ndx in self.config['indexes']:
         tables['stream_items_' + ndx] = self.index_sizes[ndx]
     self.client.setup_namespace(tables)
def rejester_run(work_unit):
    '''Rejester entry point to run the elasticsearch load.

    This uses the work unit key as the input filename string for the
    reader specified in the work unit.  If the work unit data
    includes the key ``output`` then that value is passed as the matching
    output filename string.

    :param work_unit: work unit to run
    :type work_unit: :class:`rejester.WorkUnit`

    '''
    if 'config' not in work_unit.spec:
        raise rejester.exceptions.ProgrammerError(
            'could not run without global config')

    with yakonfig.defaulted_config([rejester, kvlayer, dblogger],
                                   config=work_unit.spec['config']):

        ## Setup elasticsearch client
        ## http://elasticsearch-py.readthedocs.org/en/master/api.html#elasticsearch
        es = elasticsearch.Elasticsearch(work_unit.spec['config']['elasticsearch']['cluster'])

        ## Setup kvlayer client
        kvl = kvlayer.client()
        kvl.setup_namespace({'stream_items': 2})

        ## Get data associate with work_unit
        key, data = kvl.get('stream_items', work_unit.key).next()

        ## Index an individual stream_item
        elasticsearch_loader.index_stream_item(es, kvl, data)
def client(namespace_string, request):
    config_path = str(request.fspath.dirpath('config_cassandra.yaml'))
    with yakonfig.defaulted_config([kvlayer], filename=config_path,
                                   params={'namespace': namespace_string,
                                           'app_name': 'kvltest'}):
        client = kvlayer.client()
        yield client
        client.delete_namespace()
Example #12
0
 def __init__(self, config):
     logger.info('thing init')
     self.client = config.get('client',None) or kvlayer.client()
     self.client.setup_namespace(dict(t1=2))
     self.item_size = config.pop('item_size', fifteen_MB_minus_overhead)
     self.long_string = b' ' * self.item_size
     self.num_batches = config.pop('num_batches', 10)
     self.num_items_per_batch = config.pop('num_items_per_batch', 1)
     self.num_items = self.num_batches * self.num_items_per_batch
def kvl():
    config = {
        'storage_type': 'local',
        'app_name': 'diffeo',
        'namespace': 'memex_dossier.models.tests',
    }
    client = kvlayer.client(config)
    yield client
    client.close()
def worker(config, num_records):
    yakonfig.set_global_config(dict(kvlayer=config))
    client = kvlayer.client()
    dbhandler = DatabaseLogHandler(client)
    logger = logging.getLogger('foo')
    logger.addHandler(dbhandler)
    for i in xrange(num_records):
        logger.critical('a message: %d', i)
    logger.critical('finished')
def local_kvl():
    kvl = kvlayer.client(config={},
                         storage_type='local',
                         namespace='test',
                         app_name='test')

    build_test_data(kvl)

    return kvl
Example #16
0
def kvl():
    config = {
        'storage_type': 'local',
        'app_name': 'diffeo',
        'namespace': 'dossier.models.tests',
    }
    client = kvlayer.client(config)
    yield client
    client.close()
Example #17
0
 def label_store(self):
     '''Return a thread local :class:`dossier.label.LabelStore` client.'''
     if self._label_store is None:
         config = global_config('memex_dossier.label')
         if 'kvlayer' in config:
             kvl = kvlayer.client(config=config['kvlayer'])
             self._label_store = LabelStore(kvl)
         else:
             self._label_store = self.create(LabelStore, config=config)
     return self._label_store
Example #18
0
 def label_store(self):
     '''Return a thread local :class:`dossier.label.LabelStore` client.'''
     if self._label_store is None:
         config = global_config('dossier.label')
         if 'kvlayer' in config:
             kvl = kvlayer.client(config=config['kvlayer'])
             self._label_store = LabelStore(kvl)
         else:
             self._label_store = self.create(LabelStore, config=config)
     return self._label_store
Example #19
0
 def store(self):
     if self._store is None:
         feature_indexes = None
         try:
             conf = yakonfig.get_global_config("dossier.store")
             feature_indexes = conf["feature_indexes"]
         except KeyError:
             pass
         self._store = Store(kvlayer.client(), feature_indexes=feature_indexes)
     return self._store
Example #20
0
def kvl():
    config = {
        'storage_type': 'local',
        'app_name': 'diffeo',
        'namespace': 'dossier.store.test',
    }
    with yakonfig.defaulted_config([kvlayer], params=config) as config:
        client = kvlayer.client()
        yield client
        client.delete_namespace()
Example #21
0
def main():
    parser = argparse.ArgumentParser(__doc__, conflict_handler='resolve')
    parser.add_argument('run_file_path', help='path to run file to score.')
    parser.add_argument('scored_run_file_output_path',
                        help='path to file to create with scores inserted'
                        'into run file.')
    parser.add_argument('--overwrite',
                        action='store_true',
                        default=False,
                        help='overwrite any existing run file.')
    parser.add_argument('--verbose',
                        action='store_true',
                        default=False,
                        help='display verbose log messages.')
    parser.add_argument('--scorer',
                        action='append',
                        default=[],
                        dest='scorers',
                        help='names of scorer functions to run;'
                        ' if none are provided, it runs all of them')

    modules = [yakonfig, kvlayer]
    args = yakonfig.parse_args(parser, modules)

    if os.path.exists(args.scored_run_file_output_path):
        if args.overwrite:
            os.remove(args.scored_run_file_output_path)
        else:
            sys.exit('%r already exists' % args.scored_run_file_output_path)

    if args.verbose:
        level = logging.DEBUG
    else:
        level = logging.INFO
    logging.basicConfig(level=level)

    kvl = kvlayer.client()
    label_store = LabelStore(kvl)

    run = load_run(args.run_file_path)

    if len(args.scorers) == 0:
        args.scorers = available_scorers.keys()

    for scorer_name in args.scorers:
        scorer = available_scorers.get(scorer_name)
        logger.info('running %s', scorer_name)
        # this modifies the run['scores'] object itself
        scorer(run, label_store)

    print(format_scores(run))

    open(args.scored_run_file_output_path, 'wb').\
        write(json.dumps(run, indent=4))
Example #22
0
 def store(self):
     if self._store is None:
         feature_indexes = None
         try:
             conf = yakonfig.get_global_config('dossier.store')
             feature_indexes = conf['feature_indexes']
         except KeyError:
             pass
         self._store = Store(kvlayer.client(),
                             feature_indexes=feature_indexes)
     return self._store
Example #23
0
def main():
    parser = argparse.ArgumentParser(
        description='Run kvlayer performance tests on a single backend.',
        conflict_handler='resolve')
    parser.add_argument('--num-workers', action='append', default=[], type=int)
    parser.add_argument('--item-size', action='append', default=[], type=int, 
                        help='size of the items to push in the large writes test, '
                        'defaults to maximum size per record in thrift RPC server '
                        'example, i.e. 15MB minus a bit of overhead')
    parser.add_argument('--num-items-per-batch', action='append', default=[], type=int, 
                        help='number of items per batch in the large writes test, '
                        'defaults to 1')
    parser.add_argument('--num-batches', default=10, type=int, 
                        help='number of batches in the large writes test, '
                        'defaults to 10')
    parser.add_argument('--profile', action='store_true')
    parser.add_argument('--shutdown-proxies', action='store_true')
    parser.add_argument('--out', default=None, help='file to append results to')
    modules = [yakonfig]
    if dblogger:
        modules.append(dblogger)
    modules.append(kvlayer)
    args = yakonfig.parse_args(parser, modules)

    if args.out:
        out = open(args.out, 'a')
    else:
        out = sys.stdout

    if not args.item_size:
        args.item_size = [fifteen_MB_minus_overhead]
    if not args.num_workers:
        args.num_workers = [1]
    if not args.num_items_per_batch:
        args.num_items_per_batch = [1]

    # return code for sys.exit()
    rc = 0
    for num_workers in args.num_workers:
        for num_items_per_batch in args.num_items_per_batch:
            for item_size in args.item_size:
                rc = run_perftests(
                    num_workers=num_workers,
                    item_size=item_size,
                    num_items_per_batch=num_items_per_batch,
                    num_batches=args.num_batches,
                    profile=args.profile,
                    out=out)

    if args.shutdown_proxies:
        # special feature of CBOR RPC proxy, really for testing only!
        client = kvlayer.client()
        client.shutdown_proxies()
    return rc
Example #24
0
 def __init__(self, *args, **kwargs):
     super(to_dossier_store, self).__init__(*args, **kwargs)
     kvl = kvlayer.client()
     feature_indexes = None
     try:
         conf = yakonfig.get_global_config('dossier.store')
         feature_indexes = conf['feature_indexes']
     except KeyError:
         pass
     self.store = Store(kvl, feature_indexes=feature_indexes)
     tfidf_path = self.config.get('tfidf_path')
     self.tfidf = gensim.models.TfidfModel.load(tfidf_path)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('dest_path',
                        help='File into which you want to store test data')

    args = parser.parse_args()

    kvl = kvlayer.client(config={'filename': args.dest_path},
                         storage_type='filestorage',
                         namespace='test',
                         app_name='test')

    build_demo_data(kvl)
Example #26
0
 def fin():
     logger.info('tearing down %s...', namespace_string)
     try:
         config = yakonfig.get_global_config('kvlayer')
         ## this is probably already in the config
         config['namespace'] = namespace_string
         client = kvlayer.client(config)
         client.delete_namespace()
         logger.info('finished tearing down %s.', namespace_string)
     except KeyError:
         logger.warn('%s not configured in this process; cannot guess config', namespace_string)
     except Exception, exc:
         logger.error('failed to tear down %s', namespace_string, exc_info=True)
Example #27
0
 def __init__(self, *args, **kwargs):
     super(to_dossier_store, self).__init__(*args, **kwargs)
     kvl = kvlayer.client()
     feature_indexes = None
     try:
         conf = yakonfig.get_global_config('dossier.store')
         feature_indexes = conf['feature_indexes']
     except KeyError:
         pass
     self.store = Store(kvl,
                        feature_indexes=feature_indexes)
     tfidf_path = self.config.get('tfidf_path')
     self.tfidf = gensim.models.TfidfModel.load(tfidf_path)
Example #28
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('dest_path',
                        help='File into which you want to store test data')

    args = parser.parse_args()

    kvl = kvlayer.client(config={'filename': args.dest_path},
                         storage_type='filestorage',
                         namespace='test',
                         app_name='test')

    build_demo_data(kvl)
Example #29
0
def perftest_throughput_insert_random(num_workers=4,
                                      profile=False, 
                                      item_size=fifteen_MB_minus_overhead, 
                                      num_items_per_batch=1, 
                                      num_batches=10,
                                      client=None,
                                      ):
    '''Measure concurrent write throughput writing data to a table.'''
    if client is None:
        client = kvlayer.client()
    if client._config.get('storage_type') == 'accumulo':
        import struct
        client.setup_namespace(dict(t1=2))
        step = ((0x7fffffff // 20) * 2) + 1
        splits = [struct.pack('>I', i) for i in xrange(step, 0x0ffffffff, step)]
        logger.info('accumulo splits=%r', splits)
        client.conn.client.addSplits(client.conn.login, client._ns('t1'), splits)

    num_inserts = num_items_per_batch * num_batches
    total_inserts = num_workers * num_inserts
    task_generator = (uuid.uuid4() for x in xrange(num_workers))
    class_config = dict(
        item_size=item_size,
        num_items_per_batch=num_items_per_batch, 
        num_batches=num_batches,
        )
    if num_workers == 1:
        class_config['client'] = client
    start_time = time.time()
    ret_vals = list(run_many(random_inserts, task_generator,
                             timeout=total_inserts * 5,
                             class_config=class_config,
                             num_workers=num_workers,
                             profile=profile))


    elapsed = time.time() - start_time
    assert len(ret_vals) == total_inserts, (len(ret_vals), num_workers, num_batches, num_items_per_batch)
    total_bytes = item_size * total_inserts
    rate = total_inserts / elapsed
    print(
        'parallel {0} workers, {1} batches, {2} items per batch, '
        '{3} bytes per item, '
        '{4} inserts ({5:.4f} MB) written in {6:.1f} seconds --> '
        '{7:.1f} items/sec, {8:.4f} MB/s'
        .format(
            num_workers, num_batches, num_items_per_batch, item_size,
            total_inserts, total_bytes / 2**20, elapsed, 
            rate, total_bytes / (2**20 * elapsed)))
    sys.stdout.flush()
    return ret_vals, (total_bytes / elapsed)
Example #30
0
def main():
    parser = argparse.ArgumentParser('test tool for checking that we can load '
                                     'the truth data as distributed by NIST for '
                                     'TREC 2015')
    parser.add_argument('truth_data_path', help='path to truth data file')
    modules = [yakonfig, kvlayer]
    args = yakonfig.parse_args(parser, modules)
    logging.basicConfig(level=logging.DEBUG)
    kvl = kvlayer.client()
    label_store = LabelStore(kvl)
    parse_truth_data(label_store, args.truth_data_path)
    logger.debug('Done!  The truth data was loaded into this kvlayer backend: %r',
                 json.dumps(yakonfig.get_global_config('kvlayer'), indent=4,
                            sort_keys=True))
def main():
    parser = argparse.ArgumentParser(
        description='create rejester jobs to load elasticsearch',
        conflict_handler='resolve')
    parser.add_argument('--source', action='append',
                        help='source strings to consider')
    parser.add_argument('--work-spec-name', '-W', metavar='NAME',
                        default='elasticsearch',
                        help='name of rejester work spec')
    args = yakonfig.parse_args(parser, [yakonfig, rejester, kvlayer, dblogger])

    task_master = rejester.TaskMaster(yakonfig.get_global_config('rejester'))
    kvl = kvlayer.client()
    make_rejester_jobs(task_master, kvl, args.source,  args.work_spec_name)
Example #32
0
def main():
    parser = argparse.ArgumentParser(
        'Command line interface to the office TREC DD jig.',
        usage=usage,
        conflict_handler='resolve')
    parser.add_argument('command', help='must be "load", "init", "start", "step", or "stop"')
    parser.add_argument('args', help='input for given command',
                        nargs=argparse.REMAINDER)
    modules = [yakonfig, kvlayer, Harness]
    args = yakonfig.parse_args(parser, modules)

    logging.basicConfig(level=logging.DEBUG)

    if args.command not in set(['load', 'init', 'start', 'step', 'stop']):
        sys.exit('The only valid commands are "load", "init", "start", "step", and "stop".')

    kvl = kvlayer.client()
    label_store = LabelStore(kvl)
    config = yakonfig.get_global_config('harness')
    harness = Harness(config, kvl, label_store)

    if args.command == 'load':
        if not config.get('truth_data_path'):
            sys.exit('Must provide --truth-data-path as an argument')
        if not os.path.exists(config['truth_data_path']):
            sys.exit('%r does not exist' % config['truth_data_path'])
        parse_truth_data(label_store, config['truth_data_path'])
        logger.info('Done!  The truth data was loaded into this '
                     'kvlayer backend:\n%s',
                    json.dumps(yakonfig.get_global_config('kvlayer'),
                               indent=4, sort_keys=True))

    elif args.command == 'init':
        response = harness.init()
        print(json.dumps(response))

    elif args.command == 'start':
        response = harness.start()
        print(json.dumps(response))

    elif args.command == 'stop':
        response = harness.stop(args.args[0])
        print(json.dumps(response))

    elif args.command == 'step':
        parts = args.args
        topic_id = parts.pop(0)
        feedback = harness.step(topic_id, parts)
        print(json.dumps(feedback))
def chunks(configurator, test_data_dir, overlay={}):
    with configurator(overlay):
        path = get_test_v0_3_0_chunk_path(test_data_dir)
        config = yakonfig.get_global_config('streamcorpus_pipeline',
                                            'to_kvlayer')
        writer = to_kvlayer(config)

        ## name_info and i_str are not used by the writer
        i_str = ''
        name_info = {}
        writer(path, name_info, i_str)

        client = kvlayer.client()
        client.setup_namespace({'stream_items': 2,
                                'stream_items_doc_id_epoch_ticks': 2,
                                'stream_items_with_source': 2})
        yield path, client
Example #34
0
def main():
    import argparse
    parser = argparse.ArgumentParser(description='')
    parser.add_argument('--storage_type', default='redis')
    parser.add_argument('--storage_address', nargs='?', dest='storage_addresses')
    modules = [yakonfig, kvlayer]
    args = yakonfig.parse_args(parser, modules)
    config = yakonfig.get_global_config()

    if not args.storage_addresses:
        args.storage_addresses = ['redis.diffeo.com:6379']

    config['kvlayer'].update({
        'storage_type': args.storage_type,
        'storage_addresses': args.storage_addresses,
        })
    client = kvlayer.client()
    scan_batch_size(client)
Example #35
0
def client(namespace_string, config_path):
    app_name = "kvlayer"
    with yakonfig.defaulted_config(
        [kvlayer], filename=config_path, params={"app_name": app_name, "namespace": namespace_string}
    ):
        logger.info("initializing client")
        client = kvlayer.client()

        def _test_ns(name):
            return "_".join([app_name, namespace_string, name])

        client._test_ns = _test_ns

        yield client

        logger.info("tearing down %s", _test_ns(""))
        client.delete_namespace()
        logger.info("done cleaning up")
Example #36
0
def client(backend, request, tmpdir, namespace_string):
    if backend in _extension_test_configs:
        file_config = yaml.load(_extension_test_configs[backend])
    else:
        config_path = str(request.fspath.dirpath('config_{0}.yaml'
                                                 .format(backend)))
        # read and parse the config file, insert an object
        with open(config_path, 'r') as f:
            file_config = yaml.load(f)

    # Insert an object into the config which stats will write to.
    # Below we can get the stats text and log it here.
    # (Normal stats flow logs to file.)
    file_config['kvlayer']['log_stats'] = StringIO()
    file_config['kvlayer']['encoder'] = 'packed'

    params = dict(
        app_name='kvlayer',
        namespace=namespace_string,
    )

    # this is hacky but must go somewhere
    if backend == 'filestorage':
        local = tmpdir.join('local')
        with local.open('w') as f:
            pass
        params['kvlayer_filename'] = str(local)

    if backend == 'redis':
        params['storage_addresses'] = [redis_address(request)]

    with yakonfig.defaulted_config(
            [kvlayer],
            config=file_config,
            params=params):
        client = kvlayer.client()
        client.delete_namespace()
        yield client
        if client._log_stats is not None:
            client._log_stats.flush()
            logger.info('storage stats (%s %s):\n%s',
                        backend, request.function.__name__,
                        file_config['kvlayer']['log_stats'].getvalue())
        client.delete_namespace()
Example #37
0
def chunks(configurator, test_data_dir, overlay={}):
    with configurator(overlay):
        path = get_test_v0_3_0_chunk_path(test_data_dir)
        config = yakonfig.get_global_config('streamcorpus_pipeline',
                                            'to_kvlayer')
        writer = to_kvlayer(config)

        ## name_info and i_str are not used by the writer
        i_str = ''
        name_info = {}
        writer(path, name_info, i_str)

        client = kvlayer.client()
        client.setup_namespace({
            'stream_items': 2,
            'stream_items_doc_id_epoch_ticks': 2,
            'stream_items_with_source': 2
        })
        yield path, client
Example #38
0
def client(request, namespace_string, redis_address):
    config = dict(
        namespace = namespace_string,
        storage_type = 'redis',
        app_name = 'dbltest',
        storage_addresses = [redis_address],
        )
    print config
    yakonfig.set_global_config(dict(kvlayer=config))
    client = kvlayer.client()

    client.setup_namespace(
        dict(existing_table_1=2,
             existing_table_2=2))

    def cleanup():
        client.delete_namespace()
    request.addfinalizer(cleanup)

    return client
Example #39
0
def main():
    '''Run the random recommender system on a sequence of topics.
    '''
    description = (
        'A baseline recommender system that uses the truth data to'
        ' create output that has perfect recall and would also have'
        ' perfect precision if you ignore subtopic diversity/novelty.'
        ' This generates output directly from the truth data and'
        ' randomly shuffles the truth data per topic, so that'
        ' the ordering of passages does not attempt to optimize any'
        ' particular quality metric.')
    parser = argparse.ArgumentParser(description=description)
    parser.add_argument('--overwrite', action='store_true')
    args = yakonfig.parse_args(parser, [yakonfig])

    logging.basicConfig(level=logging.DEBUG)

    config = yakonfig.get_global_config('harness')
    batch_size = config.get('batch_size', 5)
    run_file_path = config['run_file_path']
    if os.path.exists(run_file_path):
        if args.overwrite:
            os.remove(run_file_path)
        else:
            sys.exit('%r already exists' % run_file_path)

    kvl_config = {
        'storage_type': 'local',
        'namespace': 'test',
        'app_name': 'test'
    }
    kvl = kvlayer.client(kvl_config)
    label_store = LabelStore(kvl)

    parse_truth_data(label_store, config['truth_data_path'])

    # Set up the system
    doc_store = make_doc_store(label_store)
    system = RandomSystem(doc_store)
    ambassador = HarnessAmbassadorCLI(system, args.config, batch_size)
    ambassador.run()
def main():
    '''Run the random recommender system on a sequence of topics.
    '''
    description = ('A baseline recommender system that uses the truth data to'
                   ' create output that has perfect recall and would also have'
                   ' perfect precision if you ignore subtopic diversity/novelty.'
                   ' This generates output directly from the truth data and'
                   ' randomly shuffles the truth data per topic, so that'
                   ' the ordering of passages does not attempt to optimize any'
                   ' particular quality metric.')
    parser = argparse.ArgumentParser(description=description)
    parser.add_argument('--overwrite', action='store_true')
    args = yakonfig.parse_args(parser, [yakonfig])

    logging.basicConfig(level=logging.DEBUG)

    config = yakonfig.get_global_config('harness')
    batch_size = config.get('batch_size', 5)
    run_file_path = config['run_file_path']
    if os.path.exists(run_file_path):
        if args.overwrite:
            os.remove(run_file_path)
        else:
            sys.exit('%r already exists' % run_file_path)

    kvl_config = {'storage_type': 'local',
                  'namespace': 'test',
                  'app_name': 'test'}
    kvl = kvlayer.client(kvl_config)
    label_store = LabelStore(kvl)

    parse_truth_data(label_store, config['truth_data_path'])

    # Set up the system
    doc_store = make_doc_store(label_store)
    system = RandomSystem(doc_store)
    ambassador = HarnessAmbassadorCLI(system, args.config, batch_size)
    ambassador.run()
Example #41
0
    def __init__(self, *args, **kwargs):
        super(SplitS3Storage, self).__init__(*args, **kwargs)

        # Find some credentials
        aws_access_key_id = self._value_or_path('aws_access_key_id')
        aws_secret_access_key = self._value_or_path('aws_secret_access_key')

        # Other things we need to know
        bucket_name = self._config.get('bucket', None)
        if not bucket_name:
            raise ConfigurationError('split_s3 storage requires bucket')
        self.tables = self._config.get('tables', None)
        if not self.tables:
            raise ConfigurationError('split_s3 storage requires tables')
        self.prefix = self._config.get('path_prefix', '')
        if self._config.get('kvlayer_prefix', True):
            self.prefix += '{0}/{1}/'.format(self._app_name, self._namespace)
        self.retries = self._config.get('retries', 5)
        self.retry_interval = self._config.get('retry_interval', 0.1)

        # Set up the other backend
        if 'kvlayer' not in self._config:
            raise ConfigurationError('split_s3 storage requires '
                                     'second kvlayer configuration')
        self.kvlclient = kvlayer.client(config=self._config['kvlayer'],
                                        app_name=self._app_name,
                                        namespace=self._namespace)

        # Actually connect to S3
        connection = boto.connect_s3(
            aws_access_key_id=aws_access_key_id,
            aws_secret_access_key=aws_secret_access_key,
            # Any sort of connection pooling apparently fails for
            # HTTPS; see https://github.com/boto/boto/issues/1934
            is_secure=False,
        )
        self.bucket = connection.get_bucket(bucket_name)
Example #42
0
 def kvlclient(self):
     '''Return a thread local ``kvlayer`` client.'''
     if self._kvlclient is None:
         self._kvlclient = kvlayer.client()
     return self._kvlclient
Example #43
0
def kvl(config_local):
    client = kvlayer.client()
    yield client
    client.delete_namespace()
    client.close()
Example #44
0
 def make_config(overlay={}):
     config = yakonfig.merge.overlay_config(base_config, overlay)
     with yakonfig.defaulted_config([kvlayer, MiniScp], config=config):
         yield
         client = kvlayer.client()
         client.delete_namespace()
Example #45
0
from dossier.fc import FeatureCollection, StringCounter
from dossier.store import Store
import kvlayer

# Uses a configuration-free in memory database. This is ONLY useful for
# development, debugging or testing.
conn = kvlayer.client(config={}, storage_type='local')

# Or you can use this to test with Redis.
# config = {
# 'storage_type': 'redis',
# 'storage_addresses': ['localhost:6379'],
# 'app_name': 'your-app-name',
# 'namespace': 'features',
# }
# conn = kvlayer.client(config=config)

# Use something like this for HBase.
# config = {
# 'storage_type': 'hbase',
# 'storage_addresses': ['127.0.0..1:17111'],
# 'username': '******',
# 'password': '******',
# 'dbname': 'database-name',
# 'app_name': 'your-app-name',
# 'namespace': 'features',
# }
# conn = kvlayer.client(config=config)

# There are more backends available like MySQL, PostgreSQL and Accumulo.
#
Example #46
0
class Factory(yakonfig.factory.AutoFactory):
    config_name = 'sortingdesk_report'
    kvlclient = property(lambda self: kvlayer.client())
    auto_config = lambda self: []
Example #47
0
 def label_store(self):
     if self._label_store is None:
         self._label_store = LabelStore(kvlayer.client())
     return self._label_store
Example #48
0
 def __init__(self, *args, **kwargs):
     super(from_kvlayer, self).__init__(*args, **kwargs)
     self.client = kvlayer.client()
     self.client.setup_namespace(dict(stream_items=2))