Ejemplo n.º 1
0
                        prediction[eid] = 1.0 / rank

                eva = self.evaluator.evaluate(prediction, labels)

                h_out = {
                    'docno': data['docno'],
                    body_field: {
                        'predict': zip(l_e, prediction),
                    },
                    'eval': eva,
                }

                h_total_eva = add_svm_feature(h_total_eva, eva)
                h_mean_eva = mutiply_svm_feature(h_total_eva, 1.0 / p)

                print >> out, json.dumps(h_out)

                if not p % 1000:
                    logging.info('predicted [%d] docs, eva %s', p,
                                 json.dumps(h_mean_eva))


if __name__ == '__main__':
    import sys
    from knowledge4ir.utils import (load_py_config, set_basic_log)

    set_basic_log(logging.INFO)

    runner = SummarizationBaseline(config=load_py_config(sys.argv[1]))
    runner.process()
Ejemplo n.º 2
0
        for p in xrange(len(l_entities_matched)):
            to_add = h_to_add_entities[l_to_add_entities['id']]
            l_entities_matched[p]['f'].update(to_add['f'])
        h_sf_matched['entities'] = l_entities_matched
        return h_sf_matched


class MatchMain(Configurable):
    trec_rank_in = Unicode(help="trec rank in").tag(config=True)
    feature_out = Unicode(help='extract feature out').tag(config=True)


if __name__ == '__main__':
    from knowledge4ir.utils import (
        set_basic_log,
        load_py_config,
    )
    set_basic_log(logging.DEBUG)
    if 2 != len(sys.argv):
        print "I extract matching features"
        print "1 para: config"
        MatchCenter.class_print_help()
        MatchMain.class_print_help()
        sys.exit(-1)
    conf = load_py_config(sys.argv[1])

    main_para = MatchMain(config=conf)
    extractor = MatchCenter(config=conf)

    extractor.pipe_extract(main_para.trec_rank_in, main_para.feature_out)

def process(tagme_in, wiki_fb_dict_in, out_name, tagged_field):
    h_wiki_fb = dict([line.strip().split('\t')[:2]
                      for line in open(wiki_fb_dict_in)])
    logging.info('wiki fb dict loaded')
    out = open(out_name, 'w')

    for cnt, line in enumerate(open(tagme_in)):
        if not cnt % 1000:
            logging.info('process [%d] lines', cnt)
        h = wrap_doc(line.strip(), h_wiki_fb, tagged_field)
        print >> out, json.dumps(h)

    out.close()
    logging.info('finished')

if __name__ == '__main__':
    from knowledge4ir.utils import set_basic_log
    set_basic_log()
    if 5 != len(sys.argv):
        print "4 para: tag me out (docno+text) + wiki fb matching dict + out  + field name (title|bodyText|paperAbstract|query)"
        sys.exit(-1)

    process(*sys.argv[1:])





Ejemplo n.º 4
0
        test_in = Unicode(help='test in').tag(config=True)
        test_out = Unicode(help='test res').tag(config=True)
        model_out = Unicode(help='model dump out name').tag(config=True)
        log_level = Unicode('INFO', help='log level').tag(config=True)
        raw_corpus_in = Unicode(help='corpus to align').tag(config=True)
        aligned_corpus_out = Unicode(help='aligned corpus output').tag(
            config=True)

    if 2 > len(sys.argv):
        print "unit test model train test"
        print "1 para, config with aligning config (optional, set if want to align to raw corpus)"
        SalienceModelCenter.class_print_help()
        Main.class_print_help()
        AlignPredicted.class_print_help()
        sys.exit(-1)

    conf = load_py_config(sys.argv[1])
    para = Main(config=conf)

    set_basic_log(logging.getLevelName(para.log_level))

    model = SalienceModelCenter(config=conf)
    model.load_model(para.model_out)
    model.predict(para.test_in, para.test_out)
    converter = AlignPredicted(config=conf)
    if converter.entity_id_pickle_in:
        logging.info('aligning to [%s]', para.raw_corpus_in)
        converter.align_predict_to_corpus(para.raw_corpus_in, para.test_out,
                                          para.aligned_corpus_out)
        logging.info('alignment finished')