def get_top_news_from_topic(self, topic_csv, topic_id, threshold=0.9): with open(os.path.join(self.models_folder, topic_csv), "r") as fin: topic_mappings = csv.DictReader(fin) valid_ids = [ topic_map["document_id"] for topic_map in topic_mappings if int(topic_map["topic_id"]) == topic_id and float(topic_map["topic_prob"]) > threshold ] # sorted(valid_docs, key=lambda d: d["topic_prob"]) # top_document_ids = [valid_doc["document_id"] for valid_doc in valid_docs[-top_n:]] new_corpus = Corpus() for document in self.corpus.iter_documents(): if document.document_id in valid_ids: new_corpus.add_document_from_element(document) return new_corpus