def build_demo_data(kvl):

    label_store = LabelStore(kvl)

    topic = 'where_are_aid_workers_housed_near_Monrovia'
    subtopics = ['Tanji_Fish_Curing_Site',
                 'Camp_Ramrod',
                 'Town_of_Wamba']
    subtopic_to_documents = {
        0: [(random_sid(), '2100-%d|%s' % (len(subtopics[0]), subtopics[0]), 3),
            (random_sid(), '15-93|we_drove_out_to_the_other_side_' +
             'of_the_river_delta_to_a_small_fish_smoking_camp', 2)
        ],
        1: [(random_sid(), '3120-%d|%s' % (len(subtopics[1]), subtopics[1]), 2),
            (random_sid(), '200-217|Ramrod_(Facility)', 3)
        ],
        2: [(random_sid(), '3120-%d|%s' % (len(subtopics[2]), subtopics[2]), 3),
            (random_sid(), '53-63|Wamba_Town', 2),
            (random_sid(), '44-50|Woomba', 1)
        ]
    }

    for idx, subtopic in enumerate(subtopics):
        for stream_id, subtopic_id2, rating in subtopic_to_documents[idx]:

            print stream_id

            label = Label(topic, stream_id, 'John', CorefValue.Positive,
                          subtopic_id1=subtopic,
                          subtopic_id2=subtopic_id2,
                          rating=rating)
            label_store.put(label)
Exemple #2
0
def build_demo_data(kvl):

    label_store = LabelStore(kvl)

    topic = 'where_are_aid_workers_housed_near_Monrovia'
    subtopics = ['Tanji_Fish_Curing_Site', 'Camp_Ramrod', 'Town_of_Wamba']
    subtopic_to_documents = {
        0:
        [(random_sid(), '2100-%d|%s' % (len(subtopics[0]), subtopics[0]), 3),
         (random_sid(), '15-93|we_drove_out_to_the_other_side_' +
          'of_the_river_delta_to_a_small_fish_smoking_camp', 2)],
        1:
        [(random_sid(), '3120-%d|%s' % (len(subtopics[1]), subtopics[1]), 2),
         (random_sid(), '200-217|Ramrod_(Facility)', 3)],
        2:
        [(random_sid(), '3120-%d|%s' % (len(subtopics[2]), subtopics[2]), 3),
         (random_sid(), '53-63|Wamba_Town', 2),
         (random_sid(), '44-50|Woomba', 1)]
    }

    for idx, subtopic in enumerate(subtopics):
        for stream_id, subtopic_id2, rating in subtopic_to_documents[idx]:

            print stream_id

            label = Label(topic,
                          stream_id,
                          'John',
                          CorefValue.Positive,
                          subtopic_id1=subtopic,
                          subtopic_id2=subtopic_id2,
                          rating=rating)
            label_store.put(label)
def build_test_data(kvl):
    topics = ['topic1', 'topic2', 'topic3']
    subtopics = ['subtopic1', 'subtopic2', 'subtopic3']
    relevances = [[1, 2, 3]]*3
    offset = '13-235'

    label_store = LabelStore(kvl)

    for t_idx, topic in enumerate(topics):
        for s_idx, subtopic in enumerate(subtopics):
            label = Label(topic, 'doc'+str(t_idx)+str(s_idx),
                          'me', CorefValue.Positive,
                          subtopic_id1=subtopic,
                          subtopic_id2=offset+'|'+'some text',
                          relevance=relevances[t_idx][s_idx])
            label_store.put(label)
Exemple #4
0
def build_test_data(kvl):
    topics = ['topic1', 'topic2', 'topic3']
    subtopics = ['subtopic1', 'subtopic2', 'subtopic3']
    relevances = [[1, 2, 3]] * 3
    offset = '13-235'

    label_store = LabelStore(kvl)

    for t_idx, topic in enumerate(topics):
        for s_idx, subtopic in enumerate(subtopics):
            label = Label(topic,
                          'doc' + str(t_idx) + str(s_idx),
                          'me',
                          CorefValue.Positive,
                          subtopic_id1=subtopic,
                          subtopic_id2=offset + '|' + 'some text',
                          relevance=relevances[t_idx][s_idx])
            label_store.put(label)
Exemple #5
0
 def label_store(self):
     '''Return a thread local :class:`dossier.label.LabelStore` client.'''
     if self._label_store is None:
         config = global_config('memex_dossier.label')
         if 'kvlayer' in config:
             kvl = kvlayer.client(config=config['kvlayer'])
             self._label_store = LabelStore(kvl)
         else:
             self._label_store = self.create(LabelStore, config=config)
     return self._label_store
Exemple #6
0
def main():
    parser = argparse.ArgumentParser(__doc__, conflict_handler='resolve')
    parser.add_argument('run_file_path', help='path to run file to score.')
    parser.add_argument('scored_run_file_output_path',
                        help='path to file to create with scores inserted'
                        'into run file.')
    parser.add_argument('--overwrite',
                        action='store_true',
                        default=False,
                        help='overwrite any existing run file.')
    parser.add_argument('--verbose',
                        action='store_true',
                        default=False,
                        help='display verbose log messages.')
    parser.add_argument('--scorer',
                        action='append',
                        default=[],
                        dest='scorers',
                        help='names of scorer functions to run;'
                        ' if none are provided, it runs all of them')

    modules = [yakonfig, kvlayer]
    args = yakonfig.parse_args(parser, modules)

    if os.path.exists(args.scored_run_file_output_path):
        if args.overwrite:
            os.remove(args.scored_run_file_output_path)
        else:
            sys.exit('%r already exists' % args.scored_run_file_output_path)

    if args.verbose:
        level = logging.DEBUG
    else:
        level = logging.INFO
    logging.basicConfig(level=level)

    kvl = kvlayer.client()
    label_store = LabelStore(kvl)

    run = load_run(args.run_file_path)

    if len(args.scorers) == 0:
        args.scorers = available_scorers.keys()

    for scorer_name in args.scorers:
        scorer = available_scorers.get(scorer_name)
        logger.info('running %s', scorer_name)
        # this modifies the run['scores'] object itself
        scorer(run, label_store)

    print(format_scores(run))

    open(args.scored_run_file_output_path, 'wb').\
        write(json.dumps(run, indent=4))
def build_test_data(kvl):
    topics = ['topic1', 'topic2', 'topic3']
    subtopics = ['subtopic1', 'subtopic2', 'subtopic3']
    offset = '13,235'
    ratings = [[1, 2, 3]]*3

    label_store = LabelStore(kvl)

    for t_idx, topic in enumerate(topics):
        for s_idx, subtopic in enumerate(subtopics):
            meta = dict(topic_name=topic, topic_id=str(t_idx),
                        passage_text='howdy',
                        subtopic_name='bye bye',
            )
            label = Label(str(t_idx), 'doc'+str(t_idx)+str(s_idx),
                          'me', CorefValue.Positive,
                          subtopic_id1=subtopic,
                          subtopic_id2=offset,
                          rating=ratings[t_idx][s_idx],
                          meta=meta,
            )
            label_store.put(label)
Exemple #8
0
def main():
    parser = argparse.ArgumentParser('test tool for checking that we can load '
                                     'the truth data as distributed by NIST for '
                                     'TREC 2015')
    parser.add_argument('truth_data_path', help='path to truth data file')
    modules = [yakonfig, kvlayer]
    args = yakonfig.parse_args(parser, modules)
    logging.basicConfig(level=logging.DEBUG)
    kvl = kvlayer.client()
    label_store = LabelStore(kvl)
    parse_truth_data(label_store, args.truth_data_path)
    logger.debug('Done!  The truth data was loaded into this kvlayer backend: %r',
                 json.dumps(yakonfig.get_global_config('kvlayer'), indent=4,
                            sort_keys=True))
Exemple #9
0
def main():
    parser = argparse.ArgumentParser(
        'Command line interface to the office TREC DD jig.',
        usage=usage,
        conflict_handler='resolve')
    parser.add_argument('command', help='must be "load", "init", "start", "step", or "stop"')
    parser.add_argument('args', help='input for given command',
                        nargs=argparse.REMAINDER)
    modules = [yakonfig, kvlayer, Harness]
    args = yakonfig.parse_args(parser, modules)

    logging.basicConfig(level=logging.DEBUG)

    if args.command not in set(['load', 'init', 'start', 'step', 'stop']):
        sys.exit('The only valid commands are "load", "init", "start", "step", and "stop".')

    kvl = kvlayer.client()
    label_store = LabelStore(kvl)
    config = yakonfig.get_global_config('harness')
    harness = Harness(config, kvl, label_store)

    if args.command == 'load':
        if not config.get('truth_data_path'):
            sys.exit('Must provide --truth-data-path as an argument')
        if not os.path.exists(config['truth_data_path']):
            sys.exit('%r does not exist' % config['truth_data_path'])
        parse_truth_data(label_store, config['truth_data_path'])
        logger.info('Done!  The truth data was loaded into this '
                     'kvlayer backend:\n%s',
                    json.dumps(yakonfig.get_global_config('kvlayer'),
                               indent=4, sort_keys=True))

    elif args.command == 'init':
        response = harness.init()
        print(json.dumps(response))

    elif args.command == 'start':
        response = harness.start()
        print(json.dumps(response))

    elif args.command == 'stop':
        response = harness.stop(args.args[0])
        print(json.dumps(response))

    elif args.command == 'step':
        parts = args.args
        topic_id = parts.pop(0)
        feedback = harness.step(topic_id, parts)
        print(json.dumps(feedback))
Exemple #10
0
def main():
    '''Run the random recommender system on a sequence of topics.
    '''
    description = (
        'A baseline recommender system that uses the truth data to'
        ' create output that has perfect recall and would also have'
        ' perfect precision if you ignore subtopic diversity/novelty.'
        ' This generates output directly from the truth data and'
        ' randomly shuffles the truth data per topic, so that'
        ' the ordering of passages does not attempt to optimize any'
        ' particular quality metric.')
    parser = argparse.ArgumentParser(description=description)
    parser.add_argument('--overwrite', action='store_true')
    args = yakonfig.parse_args(parser, [yakonfig])

    logging.basicConfig(level=logging.DEBUG)

    config = yakonfig.get_global_config('harness')
    batch_size = config.get('batch_size', 5)
    run_file_path = config['run_file_path']
    if os.path.exists(run_file_path):
        if args.overwrite:
            os.remove(run_file_path)
        else:
            sys.exit('%r already exists' % run_file_path)

    kvl_config = {
        'storage_type': 'local',
        'namespace': 'test',
        'app_name': 'test'
    }
    kvl = kvlayer.client(kvl_config)
    label_store = LabelStore(kvl)

    parse_truth_data(label_store, config['truth_data_path'])

    # Set up the system
    doc_store = make_doc_store(label_store)
    system = RandomSystem(doc_store)
    ambassador = HarnessAmbassadorCLI(system, args.config, batch_size)
    ambassador.run()
Exemple #11
0
def label_store(kvl):
    yield LabelStore(kvl)
def label_store(kvl):
    client = LabelStore(kvl)
    yield client
    client.delete_all()
Exemple #13
0
def label_store(kvl):
    client = LabelStore(kvl)
    yield client
    client.delete_all()
Exemple #14
0
 def label_store(self):
     if self._label_store is None:
         self._label_store = LabelStore(kvlayer.client())
     return self._label_store
Exemple #15
0
def label_store(kvlclient):
    return LabelStore(kvlclient)
Exemple #16
0
def label_store(kvl):
    lstore = LabelStore(kvl)
    yield lstore
    lstore.delete_all()
def label_store(kvl):
    lstore = LabelStore(kvl)
    yield lstore
    lstore.delete_all()