def build_demo_data(kvl): label_store = LabelStore(kvl) topic = 'where_are_aid_workers_housed_near_Monrovia' subtopics = ['Tanji_Fish_Curing_Site', 'Camp_Ramrod', 'Town_of_Wamba'] subtopic_to_documents = { 0: [(random_sid(), '2100-%d|%s' % (len(subtopics[0]), subtopics[0]), 3), (random_sid(), '15-93|we_drove_out_to_the_other_side_' + 'of_the_river_delta_to_a_small_fish_smoking_camp', 2) ], 1: [(random_sid(), '3120-%d|%s' % (len(subtopics[1]), subtopics[1]), 2), (random_sid(), '200-217|Ramrod_(Facility)', 3) ], 2: [(random_sid(), '3120-%d|%s' % (len(subtopics[2]), subtopics[2]), 3), (random_sid(), '53-63|Wamba_Town', 2), (random_sid(), '44-50|Woomba', 1) ] } for idx, subtopic in enumerate(subtopics): for stream_id, subtopic_id2, rating in subtopic_to_documents[idx]: print stream_id label = Label(topic, stream_id, 'John', CorefValue.Positive, subtopic_id1=subtopic, subtopic_id2=subtopic_id2, rating=rating) label_store.put(label)
def build_demo_data(kvl): label_store = LabelStore(kvl) topic = 'where_are_aid_workers_housed_near_Monrovia' subtopics = ['Tanji_Fish_Curing_Site', 'Camp_Ramrod', 'Town_of_Wamba'] subtopic_to_documents = { 0: [(random_sid(), '2100-%d|%s' % (len(subtopics[0]), subtopics[0]), 3), (random_sid(), '15-93|we_drove_out_to_the_other_side_' + 'of_the_river_delta_to_a_small_fish_smoking_camp', 2)], 1: [(random_sid(), '3120-%d|%s' % (len(subtopics[1]), subtopics[1]), 2), (random_sid(), '200-217|Ramrod_(Facility)', 3)], 2: [(random_sid(), '3120-%d|%s' % (len(subtopics[2]), subtopics[2]), 3), (random_sid(), '53-63|Wamba_Town', 2), (random_sid(), '44-50|Woomba', 1)] } for idx, subtopic in enumerate(subtopics): for stream_id, subtopic_id2, rating in subtopic_to_documents[idx]: print stream_id label = Label(topic, stream_id, 'John', CorefValue.Positive, subtopic_id1=subtopic, subtopic_id2=subtopic_id2, rating=rating) label_store.put(label)
def build_test_data(kvl): topics = ['topic1', 'topic2', 'topic3'] subtopics = ['subtopic1', 'subtopic2', 'subtopic3'] relevances = [[1, 2, 3]]*3 offset = '13-235' label_store = LabelStore(kvl) for t_idx, topic in enumerate(topics): for s_idx, subtopic in enumerate(subtopics): label = Label(topic, 'doc'+str(t_idx)+str(s_idx), 'me', CorefValue.Positive, subtopic_id1=subtopic, subtopic_id2=offset+'|'+'some text', relevance=relevances[t_idx][s_idx]) label_store.put(label)
def build_test_data(kvl): topics = ['topic1', 'topic2', 'topic3'] subtopics = ['subtopic1', 'subtopic2', 'subtopic3'] relevances = [[1, 2, 3]] * 3 offset = '13-235' label_store = LabelStore(kvl) for t_idx, topic in enumerate(topics): for s_idx, subtopic in enumerate(subtopics): label = Label(topic, 'doc' + str(t_idx) + str(s_idx), 'me', CorefValue.Positive, subtopic_id1=subtopic, subtopic_id2=offset + '|' + 'some text', relevance=relevances[t_idx][s_idx]) label_store.put(label)
def label_store(self): '''Return a thread local :class:`dossier.label.LabelStore` client.''' if self._label_store is None: config = global_config('memex_dossier.label') if 'kvlayer' in config: kvl = kvlayer.client(config=config['kvlayer']) self._label_store = LabelStore(kvl) else: self._label_store = self.create(LabelStore, config=config) return self._label_store
def main(): parser = argparse.ArgumentParser(__doc__, conflict_handler='resolve') parser.add_argument('run_file_path', help='path to run file to score.') parser.add_argument('scored_run_file_output_path', help='path to file to create with scores inserted' 'into run file.') parser.add_argument('--overwrite', action='store_true', default=False, help='overwrite any existing run file.') parser.add_argument('--verbose', action='store_true', default=False, help='display verbose log messages.') parser.add_argument('--scorer', action='append', default=[], dest='scorers', help='names of scorer functions to run;' ' if none are provided, it runs all of them') modules = [yakonfig, kvlayer] args = yakonfig.parse_args(parser, modules) if os.path.exists(args.scored_run_file_output_path): if args.overwrite: os.remove(args.scored_run_file_output_path) else: sys.exit('%r already exists' % args.scored_run_file_output_path) if args.verbose: level = logging.DEBUG else: level = logging.INFO logging.basicConfig(level=level) kvl = kvlayer.client() label_store = LabelStore(kvl) run = load_run(args.run_file_path) if len(args.scorers) == 0: args.scorers = available_scorers.keys() for scorer_name in args.scorers: scorer = available_scorers.get(scorer_name) logger.info('running %s', scorer_name) # this modifies the run['scores'] object itself scorer(run, label_store) print(format_scores(run)) open(args.scored_run_file_output_path, 'wb').\ write(json.dumps(run, indent=4))
def build_test_data(kvl): topics = ['topic1', 'topic2', 'topic3'] subtopics = ['subtopic1', 'subtopic2', 'subtopic3'] offset = '13,235' ratings = [[1, 2, 3]]*3 label_store = LabelStore(kvl) for t_idx, topic in enumerate(topics): for s_idx, subtopic in enumerate(subtopics): meta = dict(topic_name=topic, topic_id=str(t_idx), passage_text='howdy', subtopic_name='bye bye', ) label = Label(str(t_idx), 'doc'+str(t_idx)+str(s_idx), 'me', CorefValue.Positive, subtopic_id1=subtopic, subtopic_id2=offset, rating=ratings[t_idx][s_idx], meta=meta, ) label_store.put(label)
def main(): parser = argparse.ArgumentParser('test tool for checking that we can load ' 'the truth data as distributed by NIST for ' 'TREC 2015') parser.add_argument('truth_data_path', help='path to truth data file') modules = [yakonfig, kvlayer] args = yakonfig.parse_args(parser, modules) logging.basicConfig(level=logging.DEBUG) kvl = kvlayer.client() label_store = LabelStore(kvl) parse_truth_data(label_store, args.truth_data_path) logger.debug('Done! The truth data was loaded into this kvlayer backend: %r', json.dumps(yakonfig.get_global_config('kvlayer'), indent=4, sort_keys=True))
def main(): parser = argparse.ArgumentParser( 'Command line interface to the office TREC DD jig.', usage=usage, conflict_handler='resolve') parser.add_argument('command', help='must be "load", "init", "start", "step", or "stop"') parser.add_argument('args', help='input for given command', nargs=argparse.REMAINDER) modules = [yakonfig, kvlayer, Harness] args = yakonfig.parse_args(parser, modules) logging.basicConfig(level=logging.DEBUG) if args.command not in set(['load', 'init', 'start', 'step', 'stop']): sys.exit('The only valid commands are "load", "init", "start", "step", and "stop".') kvl = kvlayer.client() label_store = LabelStore(kvl) config = yakonfig.get_global_config('harness') harness = Harness(config, kvl, label_store) if args.command == 'load': if not config.get('truth_data_path'): sys.exit('Must provide --truth-data-path as an argument') if not os.path.exists(config['truth_data_path']): sys.exit('%r does not exist' % config['truth_data_path']) parse_truth_data(label_store, config['truth_data_path']) logger.info('Done! The truth data was loaded into this ' 'kvlayer backend:\n%s', json.dumps(yakonfig.get_global_config('kvlayer'), indent=4, sort_keys=True)) elif args.command == 'init': response = harness.init() print(json.dumps(response)) elif args.command == 'start': response = harness.start() print(json.dumps(response)) elif args.command == 'stop': response = harness.stop(args.args[0]) print(json.dumps(response)) elif args.command == 'step': parts = args.args topic_id = parts.pop(0) feedback = harness.step(topic_id, parts) print(json.dumps(feedback))
def main(): '''Run the random recommender system on a sequence of topics. ''' description = ( 'A baseline recommender system that uses the truth data to' ' create output that has perfect recall and would also have' ' perfect precision if you ignore subtopic diversity/novelty.' ' This generates output directly from the truth data and' ' randomly shuffles the truth data per topic, so that' ' the ordering of passages does not attempt to optimize any' ' particular quality metric.') parser = argparse.ArgumentParser(description=description) parser.add_argument('--overwrite', action='store_true') args = yakonfig.parse_args(parser, [yakonfig]) logging.basicConfig(level=logging.DEBUG) config = yakonfig.get_global_config('harness') batch_size = config.get('batch_size', 5) run_file_path = config['run_file_path'] if os.path.exists(run_file_path): if args.overwrite: os.remove(run_file_path) else: sys.exit('%r already exists' % run_file_path) kvl_config = { 'storage_type': 'local', 'namespace': 'test', 'app_name': 'test' } kvl = kvlayer.client(kvl_config) label_store = LabelStore(kvl) parse_truth_data(label_store, config['truth_data_path']) # Set up the system doc_store = make_doc_store(label_store) system = RandomSystem(doc_store) ambassador = HarnessAmbassadorCLI(system, args.config, batch_size) ambassador.run()
def label_store(kvl): yield LabelStore(kvl)
def label_store(kvl): client = LabelStore(kvl) yield client client.delete_all()
def label_store(self): if self._label_store is None: self._label_store = LabelStore(kvlayer.client()) return self._label_store
def label_store(kvlclient): return LabelStore(kvlclient)
def label_store(kvl): lstore = LabelStore(kvl) yield lstore lstore.delete_all()