def most_frequent(self, filtered_tokens, processed_tokens, header=True): if header: filtered_tokens = filtered_tokens[1:] processed_tokens = processed_tokens[1:] filtered_document = [] for sent_token in filtered_tokens: # don't add id to the total document if len(self.id) > 0: filtered_document += sent_token[1:] else: filtered_document += sent_token processed_document = [] for sent_token in processed_tokens: if len(self.id) > 0: processed_document += sent_token[1:] else: filtered_document += sent_token filtered_most_common = FreqDist(filtered_document).most_common() filtered_most_common.insert(0, ['word', 'frequency']) processed_most_common = FreqDist(processed_document).most_common() processed_most_common.insert(0, ['word', 'frequency']) return filtered_most_common, processed_most_common