def read_yaml_config(config_file): return yaml.load(open(config_file)) if __name__ == "__main__": args = setup_argparse() nlp = spacy.load('en_vectors_web_lg') nlp.add_pipe(nlp.create_pipe('sentencizer')) print("Reading config...") config = read_yaml_config(args.config_file) print("Reading corpus...") corpus = Corpus.from_config(config, nlp) print("Reading summarizer...") summarizer = Summarizer.from_config(config, nlp) print("Creating orderer...") information_orderer = setup_information_orderer() num_topics = len(corpus.topics) for i, topic in enumerate(corpus.topics, 1): candidates = summarizer.summarize(topic) summary = information_orderer.order_all(candidates) print("Summarized {0}/{1} topics".format(i, num_topics)) with open('{0}{1}'.format(args.output_dir, make_filename(topic.id(), config.get(Summarizer.WORD_LIMIT_KEY))), 'w') as outfile: