def rank_sentences(self, dataset, document_column_name, **kwargs): all_sentences = list(map(sent_tokenize, dataset[document_column_name])) all_scores = [list(range(len(sentences)))[::-1] for sentences in all_sentences] data = [ {"sentences": sentences, "scores": scores} for sentences, scores in zip(all_sentences, all_scores) ] return Baseline.append_column(dataset, data, self.name)
def rank_sentences(self, dataset, document_column_name, seed=42, **kwargs): random.seed(seed) all_sentences = list(map(sent_tokenize, dataset[document_column_name])) scores = [[random.random() for sentence in sentences] for sentences in all_sentences] data = [{ "sentences": sentences, "scores": scores } for sentences, scores in zip(all_sentences, scores)] return Baseline.append_column(dataset, data, self.name)
def rank_sentences(self, dataset, document_column_name, **kwargs): all_sentences = [] all_scores = [] for document in tqdm(dataset[document_column_name]): sentences, scores = self.run_single(document) all_sentences.append(sentences) all_scores.append(scores) data = [{ "sentences": sentences, "scores": scores } for sentences, scores in zip(all_sentences, all_scores)] return Baseline.append_column(dataset, data, self.name)