def main(): p = argparse.ArgumentParser('simple debugging tool for watching the linker and OpenQuery') p.add_argument('action', help='either `run` or `cache` or `delete`') p.add_argument('folder', help='folder name') p.add_argument('subfolder', help='subfolder name') args = yakonfig.parse_args(p, [kvlayer, yakonfig]) config = yakonfig.get_global_config() key = cbor.dumps((args.folder.replace(' ', '_'), args.subfolder.replace(' ', '_'))) if args.action == 'run': web_conf = Config() with yakonfig.defaulted_config([kvlayer, dblogger, web_conf], config=config): traverse_extract_fetch(web_conf, key, stop_after_extraction=True) elif args.action == 'delete': kvlclient = kvlayer.client() kvlclient.setup_namespace({'openquery': (str,)}) kvlclient.delete('openquery', (key,)) print('deleted %r' % key) elif args.action == 'cache': kvlclient = kvlayer.client() kvlclient.setup_namespace({'openquery': (str,)}) count = 0 for rec in kvlclient.scan('openquery'): count += 1 if rec[0][0] == key: print rec print('%d cached queries' % count)
def main(): p = argparse.ArgumentParser( 'simple debugging tool for watching the linker and OpenQuery') p.add_argument('action', help='either `run` or `cache` or `delete`') p.add_argument('folder', help='folder name') p.add_argument('subfolder', help='subfolder name') args = yakonfig.parse_args(p, [kvlayer, yakonfig]) config = yakonfig.get_global_config() key = cbor.dumps( (args.folder.replace(' ', '_'), args.subfolder.replace(' ', '_'))) if args.action == 'run': web_conf = Config() with yakonfig.defaulted_config([kvlayer, dblogger, web_conf], config=config): traverse_extract_fetch(web_conf, key, stop_after_extraction=True) elif args.action == 'delete': kvlclient = kvlayer.client() kvlclient.setup_namespace({'openquery': (str, )}) kvlclient.delete('openquery', (key, )) print('deleted %r' % key) elif args.action == 'cache': kvlclient = kvlayer.client() kvlclient.setup_namespace({'openquery': (str, )}) count = 0 for rec in kvlclient.scan('openquery'): count += 1 if rec[0][0] == key: print rec print('%d cached queries' % count)
def __init__(self, storage_client=None, table_name="log", storage_config=None): """Create a new database log handler. You must either pass in ``storage_client``, an actual kvlayer client object, or ``storage_config``, a dictionary which will be passed to ``kvlayer.client()``. Log messages will be stored in the table ``table_name``. :param storage_client: existing storage client :type storage_client: :class:`kvlayer.AbstractStorage` :param str table_name: virtual table name :param dict storage_config: configuration for new storage client """ super(DatabaseLogHandler, self).__init__() if storage_client is None: if storage_config is None: raise RuntimeError('must pass either storage_client or ' 'storage_config') with yakonfig.defaulted_config( [kvlayer], config=dict(kvlayer=storage_config)): storage_client = kvlayer.client() self.storage = storage_client self.table_name = table_name storage_client.setup_namespace({table_name: 1}) self.sequence_number = 0
def main(options): """Run the recommender system on a sequence of topics. """ description = "System using LDA, Kmeans and Solr to optimize diversification and exploitation of different topics" parser = argparse.ArgumentParser(description=description) parser.add_argument("--overwrite", action="store_true") args = yakonfig.parse_args(parser, [yakonfig]) logging.basicConfig(level=logging.DEBUG) config = yakonfig.get_global_config("harness") batch_size = config.get("batch_size", 5) run_file_path = config["run_file_path"] if os.path.exists(run_file_path): if args.overwrite: os.remove(run_file_path) else: os.remove(run_file_path) # sys.exit('%r already exists' % run_file_path) kvl_config = {"storage_type": "local", "namespace": "test", "app_name": "test"} kvl = kvlayer.client(kvl_config) method, feedback_options, poids, id_config = options[0], options[1], options[2], options[3] print method, poids system = SearchSystem([], method, feedback_options, poids) print args.config args.config = "config" + str(id_config) + ".yaml" print args.config ambassador = HarnessAmbassadorCLI(system, args.config, batch_size) ambassador.run()
def main(): p = argparse.ArgumentParser( description='SortingDesk report generation tool') p.add_argument('-c', '--config', required=True, help='dossier stack YAML config file') p.add_argument('-o', '--output', required=True, help='path to write Excel workbook file') p.add_argument('-u', '--user', default='unknown', help='user name (default=ALL)') p.add_argument('folder', help='folder name') p.add_argument('subfolder', nargs='?', default=None, help='subfolder name (default=ALL)') args = p.parse_args() config = yakonfig.set_default_config([kvlayer], filename=args.config) factory = Factory(config) store = factory.create(Store) # Instantiate and run report generator. folders = Folders(kvlayer.client()) gen = ReportGenerator(store, folders, args.folder, subfolder_name=args.subfolder, user=args.user) with open(args.output, 'wb+') as out: gen.run(out)
def __init__(self, *args, **kwargs): super(to_kvlayer, self).__init__(*args, **kwargs) self.client = kvlayer.client() tables = { 'stream_items': 2 } for ndx in self.config['indexes']: tables['stream_items_' + ndx] = self.index_sizes[ndx] self.client.setup_namespace(tables)
def client(backend, request, tmpdir, namespace_string): config_path = str(request.fspath.dirpath('config_{0}.yaml'.format(backend))) statsfile = StringIO.StringIO() params = dict( app_name='kvlayer', namespace=namespace_string, log_stats=statsfile, log_stats_interval_ops=1, blagh='hoo haa', ) # this is hacky but must go somewhere if backend == 'filestorage': local = tmpdir.join('local') with local.open('w') as f: pass params['kvlayer_filename'] = str(local) if backend == 'redis': params['storage_addresses'] = [ redis_address(request) ] with yakonfig.defaulted_config([kvlayer], filename=config_path, params=params): client = kvlayer.client() client.delete_namespace() yield client client.delete_namespace()
def test_kvlayer_simple(configurator, tmpdir): si = streamcorpus.make_stream_item('2000-01-01T12:34:00.000123Z', 'test://test.stream.item/') chunkfile = str(tmpdir.join('chunk.sc.xz')) with streamcorpus.Chunk(path=chunkfile, mode='wb') as chunk: chunk.add(si) with configurator(): writer = to_kvlayer(yakonfig.get_global_config( 'streamcorpus_pipeline', 'to_kvlayer')) writer(chunkfile, {}, '') kvlclient = kvlayer.client() kvlclient.setup_namespace({'stream_items': 2}) print repr(list(kvlclient.scan_keys('stream_items'))) for (k,v) in kvlclient.get( 'stream_items', (uuid.UUID(int=946730040), uuid.UUID(hex='985c1e3ed73256cd9a399919fe93cf76'))): assert v is not None reader = from_kvlayer(yakonfig.get_global_config( 'streamcorpus_pipeline', 'from_kvlayer')) sis = list(reader('')) assert len(sis) == 1 assert sis[0].stream_time.epoch_ticks == si.stream_time.epoch_ticks assert sis[0].abs_url == si.abs_url
def __init__(self, *args, **kwargs): super(to_kvlayer, self).__init__(*args, **kwargs) self.client = kvlayer.client() tables = {'stream_items': 2} for ndx in self.config['indexes']: tables['stream_items_' + ndx] = self.index_sizes[ndx] self.client.setup_namespace(tables)
def rejester_run(work_unit): '''Rejester entry point to run the elasticsearch load. This uses the work unit key as the input filename string for the reader specified in the work unit. If the work unit data includes the key ``output`` then that value is passed as the matching output filename string. :param work_unit: work unit to run :type work_unit: :class:`rejester.WorkUnit` ''' if 'config' not in work_unit.spec: raise rejester.exceptions.ProgrammerError( 'could not run without global config') with yakonfig.defaulted_config([rejester, kvlayer, dblogger], config=work_unit.spec['config']): ## Setup elasticsearch client ## http://elasticsearch-py.readthedocs.org/en/master/api.html#elasticsearch es = elasticsearch.Elasticsearch(work_unit.spec['config']['elasticsearch']['cluster']) ## Setup kvlayer client kvl = kvlayer.client() kvl.setup_namespace({'stream_items': 2}) ## Get data associate with work_unit key, data = kvl.get('stream_items', work_unit.key).next() ## Index an individual stream_item elasticsearch_loader.index_stream_item(es, kvl, data)
def client(namespace_string, request): config_path = str(request.fspath.dirpath('config_cassandra.yaml')) with yakonfig.defaulted_config([kvlayer], filename=config_path, params={'namespace': namespace_string, 'app_name': 'kvltest'}): client = kvlayer.client() yield client client.delete_namespace()
def __init__(self, config): logger.info('thing init') self.client = config.get('client',None) or kvlayer.client() self.client.setup_namespace(dict(t1=2)) self.item_size = config.pop('item_size', fifteen_MB_minus_overhead) self.long_string = b' ' * self.item_size self.num_batches = config.pop('num_batches', 10) self.num_items_per_batch = config.pop('num_items_per_batch', 1) self.num_items = self.num_batches * self.num_items_per_batch
def kvl(): config = { 'storage_type': 'local', 'app_name': 'diffeo', 'namespace': 'memex_dossier.models.tests', } client = kvlayer.client(config) yield client client.close()
def worker(config, num_records): yakonfig.set_global_config(dict(kvlayer=config)) client = kvlayer.client() dbhandler = DatabaseLogHandler(client) logger = logging.getLogger('foo') logger.addHandler(dbhandler) for i in xrange(num_records): logger.critical('a message: %d', i) logger.critical('finished')
def local_kvl(): kvl = kvlayer.client(config={}, storage_type='local', namespace='test', app_name='test') build_test_data(kvl) return kvl
def kvl(): config = { 'storage_type': 'local', 'app_name': 'diffeo', 'namespace': 'dossier.models.tests', } client = kvlayer.client(config) yield client client.close()
def label_store(self): '''Return a thread local :class:`dossier.label.LabelStore` client.''' if self._label_store is None: config = global_config('memex_dossier.label') if 'kvlayer' in config: kvl = kvlayer.client(config=config['kvlayer']) self._label_store = LabelStore(kvl) else: self._label_store = self.create(LabelStore, config=config) return self._label_store
def label_store(self): '''Return a thread local :class:`dossier.label.LabelStore` client.''' if self._label_store is None: config = global_config('dossier.label') if 'kvlayer' in config: kvl = kvlayer.client(config=config['kvlayer']) self._label_store = LabelStore(kvl) else: self._label_store = self.create(LabelStore, config=config) return self._label_store
def store(self): if self._store is None: feature_indexes = None try: conf = yakonfig.get_global_config("dossier.store") feature_indexes = conf["feature_indexes"] except KeyError: pass self._store = Store(kvlayer.client(), feature_indexes=feature_indexes) return self._store
def kvl(): config = { 'storage_type': 'local', 'app_name': 'diffeo', 'namespace': 'dossier.store.test', } with yakonfig.defaulted_config([kvlayer], params=config) as config: client = kvlayer.client() yield client client.delete_namespace()
def main(): parser = argparse.ArgumentParser(__doc__, conflict_handler='resolve') parser.add_argument('run_file_path', help='path to run file to score.') parser.add_argument('scored_run_file_output_path', help='path to file to create with scores inserted' 'into run file.') parser.add_argument('--overwrite', action='store_true', default=False, help='overwrite any existing run file.') parser.add_argument('--verbose', action='store_true', default=False, help='display verbose log messages.') parser.add_argument('--scorer', action='append', default=[], dest='scorers', help='names of scorer functions to run;' ' if none are provided, it runs all of them') modules = [yakonfig, kvlayer] args = yakonfig.parse_args(parser, modules) if os.path.exists(args.scored_run_file_output_path): if args.overwrite: os.remove(args.scored_run_file_output_path) else: sys.exit('%r already exists' % args.scored_run_file_output_path) if args.verbose: level = logging.DEBUG else: level = logging.INFO logging.basicConfig(level=level) kvl = kvlayer.client() label_store = LabelStore(kvl) run = load_run(args.run_file_path) if len(args.scorers) == 0: args.scorers = available_scorers.keys() for scorer_name in args.scorers: scorer = available_scorers.get(scorer_name) logger.info('running %s', scorer_name) # this modifies the run['scores'] object itself scorer(run, label_store) print(format_scores(run)) open(args.scored_run_file_output_path, 'wb').\ write(json.dumps(run, indent=4))
def store(self): if self._store is None: feature_indexes = None try: conf = yakonfig.get_global_config('dossier.store') feature_indexes = conf['feature_indexes'] except KeyError: pass self._store = Store(kvlayer.client(), feature_indexes=feature_indexes) return self._store
def main(): parser = argparse.ArgumentParser( description='Run kvlayer performance tests on a single backend.', conflict_handler='resolve') parser.add_argument('--num-workers', action='append', default=[], type=int) parser.add_argument('--item-size', action='append', default=[], type=int, help='size of the items to push in the large writes test, ' 'defaults to maximum size per record in thrift RPC server ' 'example, i.e. 15MB minus a bit of overhead') parser.add_argument('--num-items-per-batch', action='append', default=[], type=int, help='number of items per batch in the large writes test, ' 'defaults to 1') parser.add_argument('--num-batches', default=10, type=int, help='number of batches in the large writes test, ' 'defaults to 10') parser.add_argument('--profile', action='store_true') parser.add_argument('--shutdown-proxies', action='store_true') parser.add_argument('--out', default=None, help='file to append results to') modules = [yakonfig] if dblogger: modules.append(dblogger) modules.append(kvlayer) args = yakonfig.parse_args(parser, modules) if args.out: out = open(args.out, 'a') else: out = sys.stdout if not args.item_size: args.item_size = [fifteen_MB_minus_overhead] if not args.num_workers: args.num_workers = [1] if not args.num_items_per_batch: args.num_items_per_batch = [1] # return code for sys.exit() rc = 0 for num_workers in args.num_workers: for num_items_per_batch in args.num_items_per_batch: for item_size in args.item_size: rc = run_perftests( num_workers=num_workers, item_size=item_size, num_items_per_batch=num_items_per_batch, num_batches=args.num_batches, profile=args.profile, out=out) if args.shutdown_proxies: # special feature of CBOR RPC proxy, really for testing only! client = kvlayer.client() client.shutdown_proxies() return rc
def __init__(self, *args, **kwargs): super(to_dossier_store, self).__init__(*args, **kwargs) kvl = kvlayer.client() feature_indexes = None try: conf = yakonfig.get_global_config('dossier.store') feature_indexes = conf['feature_indexes'] except KeyError: pass self.store = Store(kvl, feature_indexes=feature_indexes) tfidf_path = self.config.get('tfidf_path') self.tfidf = gensim.models.TfidfModel.load(tfidf_path)
def main(): parser = argparse.ArgumentParser() parser.add_argument('dest_path', help='File into which you want to store test data') args = parser.parse_args() kvl = kvlayer.client(config={'filename': args.dest_path}, storage_type='filestorage', namespace='test', app_name='test') build_demo_data(kvl)
def fin(): logger.info('tearing down %s...', namespace_string) try: config = yakonfig.get_global_config('kvlayer') ## this is probably already in the config config['namespace'] = namespace_string client = kvlayer.client(config) client.delete_namespace() logger.info('finished tearing down %s.', namespace_string) except KeyError: logger.warn('%s not configured in this process; cannot guess config', namespace_string) except Exception, exc: logger.error('failed to tear down %s', namespace_string, exc_info=True)
def perftest_throughput_insert_random(num_workers=4, profile=False, item_size=fifteen_MB_minus_overhead, num_items_per_batch=1, num_batches=10, client=None, ): '''Measure concurrent write throughput writing data to a table.''' if client is None: client = kvlayer.client() if client._config.get('storage_type') == 'accumulo': import struct client.setup_namespace(dict(t1=2)) step = ((0x7fffffff // 20) * 2) + 1 splits = [struct.pack('>I', i) for i in xrange(step, 0x0ffffffff, step)] logger.info('accumulo splits=%r', splits) client.conn.client.addSplits(client.conn.login, client._ns('t1'), splits) num_inserts = num_items_per_batch * num_batches total_inserts = num_workers * num_inserts task_generator = (uuid.uuid4() for x in xrange(num_workers)) class_config = dict( item_size=item_size, num_items_per_batch=num_items_per_batch, num_batches=num_batches, ) if num_workers == 1: class_config['client'] = client start_time = time.time() ret_vals = list(run_many(random_inserts, task_generator, timeout=total_inserts * 5, class_config=class_config, num_workers=num_workers, profile=profile)) elapsed = time.time() - start_time assert len(ret_vals) == total_inserts, (len(ret_vals), num_workers, num_batches, num_items_per_batch) total_bytes = item_size * total_inserts rate = total_inserts / elapsed print( 'parallel {0} workers, {1} batches, {2} items per batch, ' '{3} bytes per item, ' '{4} inserts ({5:.4f} MB) written in {6:.1f} seconds --> ' '{7:.1f} items/sec, {8:.4f} MB/s' .format( num_workers, num_batches, num_items_per_batch, item_size, total_inserts, total_bytes / 2**20, elapsed, rate, total_bytes / (2**20 * elapsed))) sys.stdout.flush() return ret_vals, (total_bytes / elapsed)
def main(): parser = argparse.ArgumentParser('test tool for checking that we can load ' 'the truth data as distributed by NIST for ' 'TREC 2015') parser.add_argument('truth_data_path', help='path to truth data file') modules = [yakonfig, kvlayer] args = yakonfig.parse_args(parser, modules) logging.basicConfig(level=logging.DEBUG) kvl = kvlayer.client() label_store = LabelStore(kvl) parse_truth_data(label_store, args.truth_data_path) logger.debug('Done! The truth data was loaded into this kvlayer backend: %r', json.dumps(yakonfig.get_global_config('kvlayer'), indent=4, sort_keys=True))
def main(): parser = argparse.ArgumentParser( description='create rejester jobs to load elasticsearch', conflict_handler='resolve') parser.add_argument('--source', action='append', help='source strings to consider') parser.add_argument('--work-spec-name', '-W', metavar='NAME', default='elasticsearch', help='name of rejester work spec') args = yakonfig.parse_args(parser, [yakonfig, rejester, kvlayer, dblogger]) task_master = rejester.TaskMaster(yakonfig.get_global_config('rejester')) kvl = kvlayer.client() make_rejester_jobs(task_master, kvl, args.source, args.work_spec_name)
def main(): parser = argparse.ArgumentParser( 'Command line interface to the office TREC DD jig.', usage=usage, conflict_handler='resolve') parser.add_argument('command', help='must be "load", "init", "start", "step", or "stop"') parser.add_argument('args', help='input for given command', nargs=argparse.REMAINDER) modules = [yakonfig, kvlayer, Harness] args = yakonfig.parse_args(parser, modules) logging.basicConfig(level=logging.DEBUG) if args.command not in set(['load', 'init', 'start', 'step', 'stop']): sys.exit('The only valid commands are "load", "init", "start", "step", and "stop".') kvl = kvlayer.client() label_store = LabelStore(kvl) config = yakonfig.get_global_config('harness') harness = Harness(config, kvl, label_store) if args.command == 'load': if not config.get('truth_data_path'): sys.exit('Must provide --truth-data-path as an argument') if not os.path.exists(config['truth_data_path']): sys.exit('%r does not exist' % config['truth_data_path']) parse_truth_data(label_store, config['truth_data_path']) logger.info('Done! The truth data was loaded into this ' 'kvlayer backend:\n%s', json.dumps(yakonfig.get_global_config('kvlayer'), indent=4, sort_keys=True)) elif args.command == 'init': response = harness.init() print(json.dumps(response)) elif args.command == 'start': response = harness.start() print(json.dumps(response)) elif args.command == 'stop': response = harness.stop(args.args[0]) print(json.dumps(response)) elif args.command == 'step': parts = args.args topic_id = parts.pop(0) feedback = harness.step(topic_id, parts) print(json.dumps(feedback))
def chunks(configurator, test_data_dir, overlay={}): with configurator(overlay): path = get_test_v0_3_0_chunk_path(test_data_dir) config = yakonfig.get_global_config('streamcorpus_pipeline', 'to_kvlayer') writer = to_kvlayer(config) ## name_info and i_str are not used by the writer i_str = '' name_info = {} writer(path, name_info, i_str) client = kvlayer.client() client.setup_namespace({'stream_items': 2, 'stream_items_doc_id_epoch_ticks': 2, 'stream_items_with_source': 2}) yield path, client
def main(): import argparse parser = argparse.ArgumentParser(description='') parser.add_argument('--storage_type', default='redis') parser.add_argument('--storage_address', nargs='?', dest='storage_addresses') modules = [yakonfig, kvlayer] args = yakonfig.parse_args(parser, modules) config = yakonfig.get_global_config() if not args.storage_addresses: args.storage_addresses = ['redis.diffeo.com:6379'] config['kvlayer'].update({ 'storage_type': args.storage_type, 'storage_addresses': args.storage_addresses, }) client = kvlayer.client() scan_batch_size(client)
def client(namespace_string, config_path): app_name = "kvlayer" with yakonfig.defaulted_config( [kvlayer], filename=config_path, params={"app_name": app_name, "namespace": namespace_string} ): logger.info("initializing client") client = kvlayer.client() def _test_ns(name): return "_".join([app_name, namespace_string, name]) client._test_ns = _test_ns yield client logger.info("tearing down %s", _test_ns("")) client.delete_namespace() logger.info("done cleaning up")
def client(backend, request, tmpdir, namespace_string): if backend in _extension_test_configs: file_config = yaml.load(_extension_test_configs[backend]) else: config_path = str(request.fspath.dirpath('config_{0}.yaml' .format(backend))) # read and parse the config file, insert an object with open(config_path, 'r') as f: file_config = yaml.load(f) # Insert an object into the config which stats will write to. # Below we can get the stats text and log it here. # (Normal stats flow logs to file.) file_config['kvlayer']['log_stats'] = StringIO() file_config['kvlayer']['encoder'] = 'packed' params = dict( app_name='kvlayer', namespace=namespace_string, ) # this is hacky but must go somewhere if backend == 'filestorage': local = tmpdir.join('local') with local.open('w') as f: pass params['kvlayer_filename'] = str(local) if backend == 'redis': params['storage_addresses'] = [redis_address(request)] with yakonfig.defaulted_config( [kvlayer], config=file_config, params=params): client = kvlayer.client() client.delete_namespace() yield client if client._log_stats is not None: client._log_stats.flush() logger.info('storage stats (%s %s):\n%s', backend, request.function.__name__, file_config['kvlayer']['log_stats'].getvalue()) client.delete_namespace()
def chunks(configurator, test_data_dir, overlay={}): with configurator(overlay): path = get_test_v0_3_0_chunk_path(test_data_dir) config = yakonfig.get_global_config('streamcorpus_pipeline', 'to_kvlayer') writer = to_kvlayer(config) ## name_info and i_str are not used by the writer i_str = '' name_info = {} writer(path, name_info, i_str) client = kvlayer.client() client.setup_namespace({ 'stream_items': 2, 'stream_items_doc_id_epoch_ticks': 2, 'stream_items_with_source': 2 }) yield path, client
def client(request, namespace_string, redis_address): config = dict( namespace = namespace_string, storage_type = 'redis', app_name = 'dbltest', storage_addresses = [redis_address], ) print config yakonfig.set_global_config(dict(kvlayer=config)) client = kvlayer.client() client.setup_namespace( dict(existing_table_1=2, existing_table_2=2)) def cleanup(): client.delete_namespace() request.addfinalizer(cleanup) return client
def main(): '''Run the random recommender system on a sequence of topics. ''' description = ( 'A baseline recommender system that uses the truth data to' ' create output that has perfect recall and would also have' ' perfect precision if you ignore subtopic diversity/novelty.' ' This generates output directly from the truth data and' ' randomly shuffles the truth data per topic, so that' ' the ordering of passages does not attempt to optimize any' ' particular quality metric.') parser = argparse.ArgumentParser(description=description) parser.add_argument('--overwrite', action='store_true') args = yakonfig.parse_args(parser, [yakonfig]) logging.basicConfig(level=logging.DEBUG) config = yakonfig.get_global_config('harness') batch_size = config.get('batch_size', 5) run_file_path = config['run_file_path'] if os.path.exists(run_file_path): if args.overwrite: os.remove(run_file_path) else: sys.exit('%r already exists' % run_file_path) kvl_config = { 'storage_type': 'local', 'namespace': 'test', 'app_name': 'test' } kvl = kvlayer.client(kvl_config) label_store = LabelStore(kvl) parse_truth_data(label_store, config['truth_data_path']) # Set up the system doc_store = make_doc_store(label_store) system = RandomSystem(doc_store) ambassador = HarnessAmbassadorCLI(system, args.config, batch_size) ambassador.run()
def main(): '''Run the random recommender system on a sequence of topics. ''' description = ('A baseline recommender system that uses the truth data to' ' create output that has perfect recall and would also have' ' perfect precision if you ignore subtopic diversity/novelty.' ' This generates output directly from the truth data and' ' randomly shuffles the truth data per topic, so that' ' the ordering of passages does not attempt to optimize any' ' particular quality metric.') parser = argparse.ArgumentParser(description=description) parser.add_argument('--overwrite', action='store_true') args = yakonfig.parse_args(parser, [yakonfig]) logging.basicConfig(level=logging.DEBUG) config = yakonfig.get_global_config('harness') batch_size = config.get('batch_size', 5) run_file_path = config['run_file_path'] if os.path.exists(run_file_path): if args.overwrite: os.remove(run_file_path) else: sys.exit('%r already exists' % run_file_path) kvl_config = {'storage_type': 'local', 'namespace': 'test', 'app_name': 'test'} kvl = kvlayer.client(kvl_config) label_store = LabelStore(kvl) parse_truth_data(label_store, config['truth_data_path']) # Set up the system doc_store = make_doc_store(label_store) system = RandomSystem(doc_store) ambassador = HarnessAmbassadorCLI(system, args.config, batch_size) ambassador.run()
def __init__(self, *args, **kwargs): super(SplitS3Storage, self).__init__(*args, **kwargs) # Find some credentials aws_access_key_id = self._value_or_path('aws_access_key_id') aws_secret_access_key = self._value_or_path('aws_secret_access_key') # Other things we need to know bucket_name = self._config.get('bucket', None) if not bucket_name: raise ConfigurationError('split_s3 storage requires bucket') self.tables = self._config.get('tables', None) if not self.tables: raise ConfigurationError('split_s3 storage requires tables') self.prefix = self._config.get('path_prefix', '') if self._config.get('kvlayer_prefix', True): self.prefix += '{0}/{1}/'.format(self._app_name, self._namespace) self.retries = self._config.get('retries', 5) self.retry_interval = self._config.get('retry_interval', 0.1) # Set up the other backend if 'kvlayer' not in self._config: raise ConfigurationError('split_s3 storage requires ' 'second kvlayer configuration') self.kvlclient = kvlayer.client(config=self._config['kvlayer'], app_name=self._app_name, namespace=self._namespace) # Actually connect to S3 connection = boto.connect_s3( aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, # Any sort of connection pooling apparently fails for # HTTPS; see https://github.com/boto/boto/issues/1934 is_secure=False, ) self.bucket = connection.get_bucket(bucket_name)
def kvlclient(self): '''Return a thread local ``kvlayer`` client.''' if self._kvlclient is None: self._kvlclient = kvlayer.client() return self._kvlclient
def kvl(config_local): client = kvlayer.client() yield client client.delete_namespace() client.close()
def make_config(overlay={}): config = yakonfig.merge.overlay_config(base_config, overlay) with yakonfig.defaulted_config([kvlayer, MiniScp], config=config): yield client = kvlayer.client() client.delete_namespace()
from dossier.fc import FeatureCollection, StringCounter from dossier.store import Store import kvlayer # Uses a configuration-free in memory database. This is ONLY useful for # development, debugging or testing. conn = kvlayer.client(config={}, storage_type='local') # Or you can use this to test with Redis. # config = { # 'storage_type': 'redis', # 'storage_addresses': ['localhost:6379'], # 'app_name': 'your-app-name', # 'namespace': 'features', # } # conn = kvlayer.client(config=config) # Use something like this for HBase. # config = { # 'storage_type': 'hbase', # 'storage_addresses': ['127.0.0..1:17111'], # 'username': '******', # 'password': '******', # 'dbname': 'database-name', # 'app_name': 'your-app-name', # 'namespace': 'features', # } # conn = kvlayer.client(config=config) # There are more backends available like MySQL, PostgreSQL and Accumulo. #
class Factory(yakonfig.factory.AutoFactory): config_name = 'sortingdesk_report' kvlclient = property(lambda self: kvlayer.client()) auto_config = lambda self: []
def label_store(self): if self._label_store is None: self._label_store = LabelStore(kvlayer.client()) return self._label_store
def __init__(self, *args, **kwargs): super(from_kvlayer, self).__init__(*args, **kwargs) self.client = kvlayer.client() self.client.setup_namespace(dict(stream_items=2))