prediction[eid] = 1.0 / rank eva = self.evaluator.evaluate(prediction, labels) h_out = { 'docno': data['docno'], body_field: { 'predict': zip(l_e, prediction), }, 'eval': eva, } h_total_eva = add_svm_feature(h_total_eva, eva) h_mean_eva = mutiply_svm_feature(h_total_eva, 1.0 / p) print >> out, json.dumps(h_out) if not p % 1000: logging.info('predicted [%d] docs, eva %s', p, json.dumps(h_mean_eva)) if __name__ == '__main__': import sys from knowledge4ir.utils import (load_py_config, set_basic_log) set_basic_log(logging.INFO) runner = SummarizationBaseline(config=load_py_config(sys.argv[1])) runner.process()
for p in xrange(len(l_entities_matched)): to_add = h_to_add_entities[l_to_add_entities['id']] l_entities_matched[p]['f'].update(to_add['f']) h_sf_matched['entities'] = l_entities_matched return h_sf_matched class MatchMain(Configurable): trec_rank_in = Unicode(help="trec rank in").tag(config=True) feature_out = Unicode(help='extract feature out').tag(config=True) if __name__ == '__main__': from knowledge4ir.utils import ( set_basic_log, load_py_config, ) set_basic_log(logging.DEBUG) if 2 != len(sys.argv): print "I extract matching features" print "1 para: config" MatchCenter.class_print_help() MatchMain.class_print_help() sys.exit(-1) conf = load_py_config(sys.argv[1]) main_para = MatchMain(config=conf) extractor = MatchCenter(config=conf) extractor.pipe_extract(main_para.trec_rank_in, main_para.feature_out)
def process(tagme_in, wiki_fb_dict_in, out_name, tagged_field): h_wiki_fb = dict([line.strip().split('\t')[:2] for line in open(wiki_fb_dict_in)]) logging.info('wiki fb dict loaded') out = open(out_name, 'w') for cnt, line in enumerate(open(tagme_in)): if not cnt % 1000: logging.info('process [%d] lines', cnt) h = wrap_doc(line.strip(), h_wiki_fb, tagged_field) print >> out, json.dumps(h) out.close() logging.info('finished') if __name__ == '__main__': from knowledge4ir.utils import set_basic_log set_basic_log() if 5 != len(sys.argv): print "4 para: tag me out (docno+text) + wiki fb matching dict + out + field name (title|bodyText|paperAbstract|query)" sys.exit(-1) process(*sys.argv[1:])
test_in = Unicode(help='test in').tag(config=True) test_out = Unicode(help='test res').tag(config=True) model_out = Unicode(help='model dump out name').tag(config=True) log_level = Unicode('INFO', help='log level').tag(config=True) raw_corpus_in = Unicode(help='corpus to align').tag(config=True) aligned_corpus_out = Unicode(help='aligned corpus output').tag( config=True) if 2 > len(sys.argv): print "unit test model train test" print "1 para, config with aligning config (optional, set if want to align to raw corpus)" SalienceModelCenter.class_print_help() Main.class_print_help() AlignPredicted.class_print_help() sys.exit(-1) conf = load_py_config(sys.argv[1]) para = Main(config=conf) set_basic_log(logging.getLevelName(para.log_level)) model = SalienceModelCenter(config=conf) model.load_model(para.model_out) model.predict(para.test_in, para.test_out) converter = AlignPredicted(config=conf) if converter.entity_id_pickle_in: logging.info('aligning to [%s]', para.raw_corpus_in) converter.align_predict_to_corpus(para.raw_corpus_in, para.test_out, para.aligned_corpus_out) logging.info('alignment finished')