avg_run_time = 1.0*sum(run_times.values())/len(run_times) std_run_time = scipy.array(run_times.values()).std() sys.stderr.write('\nTiming results\n') sys.stderr.write('Mapper time: total [%2.2fs] min [%1.2fs] max [%1.2fs]\n' %(sum(map_times.values()), min(map_times.values()), max(map_times.values()))) sys.stderr.write('Run time: total [%2.2fs] min [%1.2fs] max [%1.2fs] avg [%1.4f] std [%1.4f]\n' %(sum(run_times.values()), min(run_times.values()), max(run_times.values()), avg_run_time, std_run_time)) sys.stderr.write('------\n') if __name__ == '__main__': options, task = parse_options() ## create SummaryProblem instances setup_start_time = time.time() if options.task == 'u08': framework.setup_TAC08(task) else: framework.setup_DUC_basic(task, skip_updates=False) ## only run the parser if compression is required (this is not known by the pickle stuff) parser = None if options.compress: parser = berkeleyparser.CommandLineParser(BERKELEY_PARSER_CMD) framework.setup_DUC_sentences(task, parser, reload=options.reload) setup_time = time.time() - setup_start_time ## go! run_standard(options) sys.stderr.write('Setup time [%1.2fs]\n' %setup_time) ## evaluate if not options.manpath:
bigram_path = 'dat/%s/features' % (task_name) task = Task(task_name, topic_file, doc_path, man_path) # Get documents, split into sentences, tokenize and stem if args.load is not None: start_time = time.time() sys.stderr.write('Loading [%s] problem data in [%s]\n' %(task.name, task.data_pickle)) task.problems = util.load_pickle(args.load) sys.stderr.write('Done [%.2f s]\n' % (time.time() - start_time)) else: text.text_processor.load_splitta_model('lib/splitta/model_nb/') # Skip update data if task_name[:3] == 'tac': framework.setup_TAC08(task, True) elif task_name[:3] == 'duc': framework.setup_DUC_basic(task, True) elif task_name[:3] == 'new': framework.setup_news(task) else: raise Exception('Unknown task %s' % task) if task_name[:3] != 'new': for problem in task.problems: problem.load_documents() ## save pickled version for faster loading later sys.stderr.write('Saving [%s] problem data in [%s]\n' %(task.name, task.data_pickle)) util.save_pickle(task.problems, task.data_pickle) # Tokenize for parser tokenizer = nltk.tokenize.treebank.TreebankWordTokenizer() for problem in task.problems:
task.data_pickle = '%s/%s_data.pickle' % (options.dataroot, task.name) task.punkt_pickle = '%s/%s_punkt.pickle' % (options.dataroot, task.name) return options, task if __name__ == '__main__': options, task = parse_options() ## create SummaryProblem instances if options.task == 'u08': framework.setup_TAC08(task) else: framework.setup_DUC_basic(task) # only run the parser if compression is required (this is not known by the pickle stuff) parser = None if options.compress: parser = berkeleyparser.CommandLineParser(BERKELEY_PARSER_CMD) framework.setup_DUC_sentences(task, parser, reload=options.reload) #for problem in task.problems: # for sentence in problem.get_new_sentences(): # print sentence.parsed #sys.exit(0) ## create output directory try: os.popen('rm -rf %s' % options.output)
if options.dataroot: os.popen("mkdir -p " + options.dataroot) task.data_pickle = '%s/%s_data.pickle' %(options.dataroot, task.name) task.punkt_pickle = '%s/%s_punkt.pickle' %(options.dataroot, task.name) return options, task if __name__ == '__main__': options, task = parse_options() ## create SummaryProblem instances if options.task == 'u08': framework.setup_TAC08(task) else: framework.setup_DUC_basic(task) # only run the parser if compression is required (this is not known by the pickle stuff) parser = None if options.compress: parser = berkeleyparser.CommandLineParser(BERKELEY_PARSER_CMD) framework.setup_DUC_sentences(task, parser, reload=options.reload) #for problem in task.problems: # for sentence in problem.get_new_sentences(): # print sentence.parsed #sys.exit(0) ## create output directory try: os.popen('rm -rf %s' %options.output) except: pass